generated from felixm/defaultpy
Update ledgerai to read existing transactions from beancount file
This commit is contained in:
1
.gitignore
vendored
1
.gitignore
vendored
@@ -1,4 +1,5 @@
|
||||
# ---> Python
|
||||
uv.lock
|
||||
# Byte-compiled / optimized / DLL files
|
||||
__pycache__/
|
||||
*.py[cod]
|
||||
|
||||
@@ -7,35 +7,28 @@ name = "toldg"
|
||||
version = "0.1.0"
|
||||
description = "Tool to generate ledger files from csv"
|
||||
readme = "README.md"
|
||||
requires-python = ">=3.12,<4.0"
|
||||
requires-python = ">=3.13,<4.0"
|
||||
license = {text = "MIT"}
|
||||
authors = [
|
||||
{name = "Felix Martin", email = "mail@felixm.de"}
|
||||
]
|
||||
dependencies = [
|
||||
"fava (>=1.30.1,<2.0.0)",
|
||||
"pydantic (>=2.10.6,<3.0.0)",
|
||||
"beancount (>=3.1.0,<4.0.0)",
|
||||
"rich (>=13.9.4,<14.0.0)",
|
||||
"numpy (>=2.2.3,<3.0.0)"
|
||||
"fava",
|
||||
"pydantic",
|
||||
"beancount",
|
||||
"rich",
|
||||
"numpy",
|
||||
"ty",
|
||||
"ruff",
|
||||
]
|
||||
|
||||
[tool.poetry.group.dev.dependencies]
|
||||
pre-commit = "^4.1.0"
|
||||
black = "^25.1.0"
|
||||
isort = "^6.0.1"
|
||||
pytest = "^8.3.4"
|
||||
|
||||
[project.scripts]
|
||||
toldg = "toldg.__main__:main"
|
||||
|
||||
[tool.setuptools]
|
||||
package-dir = {"" = "src"}
|
||||
|
||||
[tool.black]
|
||||
line-length = 88
|
||||
target-version = ["py312"]
|
||||
[tool.ruff]
|
||||
target-version = "py313"
|
||||
line-length = 100
|
||||
|
||||
[tool.isort]
|
||||
profile = "black"
|
||||
line_length = 88
|
||||
|
||||
@@ -7,7 +7,8 @@ from rich.logging import RichHandler
|
||||
|
||||
from toldg.process import process_csv_files, process_ldg_files
|
||||
from toldg.train import train
|
||||
from toldg.utils import load_config, remove_if_exists, write_meta
|
||||
from toldg.utils import load_config, remove_if_exists
|
||||
from toldg.models import Config
|
||||
|
||||
|
||||
def init_logging():
|
||||
@@ -19,10 +20,28 @@ def init_logging():
|
||||
)
|
||||
|
||||
|
||||
def get_new_entries(entries: list, csv_entries: list) -> list:
|
||||
key_to_entry = {
|
||||
(entry.meta["source_file"], entry.meta["source_index"]): entry for entry in entries
|
||||
}
|
||||
assert len(entries) == len(key_to_entry), "Transaction keys must be unique"
|
||||
|
||||
def load_and_write_back(filename):
|
||||
entries, errors, options_map = beancount.loader.load_file(filename)
|
||||
new_entries = []
|
||||
for csv_entry in csv_entries:
|
||||
key = (csv_entry.csv_file, csv_entry.index)
|
||||
if key in key_to_entry:
|
||||
existing_entry = key_to_entry[key]
|
||||
if existing_entry.meta["source_row"] != csv_entry.row:
|
||||
msg = f"Consistency error: CSV entry {csv_entry} is different to {existing_entry}"
|
||||
logging.error(msg)
|
||||
raise SystemExit(1)
|
||||
else:
|
||||
new_entries.append(csv_entry)
|
||||
logging.info(f"Got {len(new_entries)} new and {len(entries)} existing transactions.")
|
||||
return new_entries
|
||||
|
||||
|
||||
def update_ledger(config: Config):
|
||||
def beancount_entry_to_string(entry) -> str:
|
||||
buf = io.StringIO()
|
||||
beancount.parser.printer.print_entry(entry, file=buf)
|
||||
@@ -31,15 +50,35 @@ def load_and_write_back(filename):
|
||||
def is_transaction(entry) -> bool:
|
||||
return isinstance(entry, beancount.core.data.Transaction)
|
||||
|
||||
prev_entry_was_transaction = False
|
||||
filename = config.output_file
|
||||
entries, errors, options_map = beancount.loader.load_file(filename)
|
||||
|
||||
if errors:
|
||||
print(f"errors in generated '{filename}'")
|
||||
logging.error(f"errors in '{filename}'")
|
||||
for err in errors:
|
||||
print(err)
|
||||
else:
|
||||
logging.error(err)
|
||||
raise SystemExit(1)
|
||||
|
||||
entries.sort(key=lambda e: e.date)
|
||||
with open(filename, "w") as f:
|
||||
f.write('option "operating_currency" "USD"\n')
|
||||
|
||||
# Note(felixm): Only write back transactions from the main beancount file.
|
||||
# The issue is that `beancount.loader.load_file` does not allow for a full
|
||||
# round trip; some of the entries get swallowed. Therefore, treat all files
|
||||
# that are not the main beancount file as input only files. This means
|
||||
# these input only files can only be edited by hand, but the user can use
|
||||
# them to set options for beancount and fava, and add other types of
|
||||
# entries that would otherwise disappear after the round trip. I have seen
|
||||
# tickets on GitHub about changing this API so that everything can be
|
||||
# written back as is, but until then, this works well for my use-case.
|
||||
entries = [e for e in entries if e.meta["filename"] == str(filename.absolute())]
|
||||
|
||||
csv_entries = process_csv_files(config, False)
|
||||
entries += get_new_entries(entries, csv_entries)
|
||||
remove_if_exists(config.output_file)
|
||||
process_ldg_files(config)
|
||||
|
||||
with open(filename, "a") as f:
|
||||
prev_entry_was_transaction = False
|
||||
for entry in entries:
|
||||
if prev_entry_was_transaction:
|
||||
f.write("\n")
|
||||
@@ -48,6 +87,7 @@ def load_and_write_back(filename):
|
||||
f.write(beancount_entry_to_string(entry))
|
||||
f.write("\n")
|
||||
prev_entry_was_transaction = is_transaction(entry)
|
||||
logging.info(f"Ledger file '{filename}' was written successfully.")
|
||||
|
||||
|
||||
def main():
|
||||
@@ -56,11 +96,7 @@ def main():
|
||||
if len(sys.argv) > 2 and sys.argv[2] == "train":
|
||||
train(config)
|
||||
else:
|
||||
remove_if_exists(config.output_file)
|
||||
write_meta(config)
|
||||
process_csv_files(config)
|
||||
load_and_write_back(config.output_file)
|
||||
process_ldg_files(config)
|
||||
update_ledger(config)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
@@ -8,9 +8,7 @@ EXECUTABLE_NAME = "fzf.exe" if sys.platform == "win32" else "fzf"
|
||||
def iterfzf(iterable, prompt="> "):
|
||||
cmd = [EXECUTABLE_NAME, "--prompt=" + prompt]
|
||||
encoding = sys.getdefaultencoding()
|
||||
proc = subprocess.Popen(
|
||||
cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=None
|
||||
)
|
||||
proc = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=None)
|
||||
if proc.stdin is None:
|
||||
return None
|
||||
try:
|
||||
@@ -24,7 +22,10 @@ def iterfzf(iterable, prompt="> "):
|
||||
return None
|
||||
if proc.stdout is None:
|
||||
return None
|
||||
decode = lambda t: t.decode(encoding)
|
||||
|
||||
def decode(t):
|
||||
return t.decode(encoding)
|
||||
|
||||
output = [decode(ln.strip(b"\r\n\0")) for ln in iter(proc.stdout.readline, b"")]
|
||||
try:
|
||||
return output[0]
|
||||
|
||||
@@ -86,4 +86,3 @@ class Transaction(BaseModel):
|
||||
|
||||
def key(self):
|
||||
return self.csv_file + ", " + self.row
|
||||
|
||||
|
||||
@@ -1,11 +1,10 @@
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import pickle
|
||||
import re
|
||||
from collections import Counter, defaultdict
|
||||
from collections import Counter
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional, Set, Tuple
|
||||
from typing import Dict, List, Optional, Tuple
|
||||
|
||||
import numpy as np
|
||||
|
||||
@@ -31,9 +30,7 @@ class Tokenizer:
|
||||
word_counts.update(tokens)
|
||||
|
||||
# Filter words by minimum count
|
||||
filtered_words = [
|
||||
word for word, count in word_counts.items() if count >= self.min_count
|
||||
]
|
||||
filtered_words = [word for word, count in word_counts.items() if count >= self.min_count]
|
||||
|
||||
# Build vocabulary
|
||||
self.vocab = {word: idx for idx, word in enumerate(filtered_words)}
|
||||
@@ -373,7 +370,6 @@ def get_sort_categories(model_path: Path):
|
||||
_classifier.sort_categories(row, categories)
|
||||
|
||||
try:
|
||||
|
||||
model_path = Path("transaction_classifier.pkl")
|
||||
_classifier = TransactionClassifier(model_path)
|
||||
if _classifier.model is None:
|
||||
@@ -385,9 +381,7 @@ def get_sort_categories(model_path: Path):
|
||||
return sort_categories
|
||||
|
||||
|
||||
def add_account2(
|
||||
model_path: Path, transactions: List[Transaction], categories: List[str]
|
||||
):
|
||||
def add_account2(model_path: Path, transactions: List[Transaction], categories: List[str]):
|
||||
"""Add account2 to unmapped transactions."""
|
||||
unmapped_transactions = list(filter(lambda t: t.mapping is None, transactions))
|
||||
if len(unmapped_transactions) == 0:
|
||||
|
||||
@@ -14,10 +14,9 @@ from toldg.models import Config, CsvConfig, Mapping, Transaction
|
||||
|
||||
def process_ldg_files(config: Config):
|
||||
with open(config.output_file, "a") as f_out:
|
||||
f_out.write("\n")
|
||||
for ldg_file in toldg.utils.get_ldg_files(config.input_directory):
|
||||
ldg_rel = os.path.relpath(ldg_file, os.path.dirname(config.output_file))
|
||||
f_out.write(f"include \"{ldg_rel}\"\n")
|
||||
f_out.write(f'include "{ldg_rel}"\n')
|
||||
|
||||
|
||||
def get_csv_config(csv_file: str, csv_configs: list[CsvConfig]) -> CsvConfig:
|
||||
@@ -63,8 +62,7 @@ def get_transactions(csv_file: str, config: CsvConfig) -> list[Transaction]:
|
||||
for _ in range(config.skip):
|
||||
next(reader)
|
||||
rows = [row for row in reader if row]
|
||||
transactions = [row_to_transaction(i, row, fields)
|
||||
for i, row in enumerate(reversed(rows))]
|
||||
transactions = [row_to_transaction(i, row, fields) for i, row in enumerate(reversed(rows))]
|
||||
return transactions
|
||||
|
||||
|
||||
@@ -74,9 +72,7 @@ def apply_mappings(transactions: list[Transaction], mappings: dict[str, Mapping]
|
||||
if t.key() in mappings:
|
||||
mapping = mappings[t.key()]
|
||||
assert isinstance(mapping, Mapping)
|
||||
assert (
|
||||
mapping.count > 0
|
||||
), f"{mapping} used by {t} but count is not greater than '0'."
|
||||
assert mapping.count > 0, f"{mapping} used by {t} but count is not greater than '0'."
|
||||
mapping.count -= 1
|
||||
t.mapping = mapping
|
||||
else:
|
||||
@@ -86,7 +82,7 @@ def apply_mappings(transactions: list[Transaction], mappings: dict[str, Mapping]
|
||||
assert mapping.count == 0, f"{mapping} was not used as often as expected!"
|
||||
|
||||
|
||||
def process_csv_files(config: Config) -> list[Transaction]:
|
||||
def process_csv_files(config: Config, write_outputs: bool = True) -> list[Transaction]:
|
||||
csv_files = toldg.utils.get_csv_files(config.input_directory)
|
||||
transactions = []
|
||||
for csv_file in csv_files:
|
||||
@@ -97,6 +93,7 @@ def process_csv_files(config: Config) -> list[Transaction]:
|
||||
mappings = toldg.utils.read_mappings(config.mappings_file)
|
||||
apply_mappings(transactions, mappings)
|
||||
toldg.predict.add_account2(config.model, transactions, config.categories)
|
||||
if write_outputs:
|
||||
toldg.utils.write_mappings(transactions, config.mappings_file)
|
||||
toldg.write.render_to_file(transactions, config)
|
||||
return transactions
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
import logging
|
||||
from pathlib import Path
|
||||
|
||||
from toldg.models import Config
|
||||
from toldg.predict import train_classifier
|
||||
|
||||
@@ -3,7 +3,7 @@ import logging
|
||||
import os
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional
|
||||
from typing import Dict, List
|
||||
|
||||
from pydantic import ValidationError
|
||||
|
||||
@@ -57,14 +57,6 @@ def category_to_bean(c: str) -> str:
|
||||
return ":".join(new_sections)
|
||||
|
||||
|
||||
def write_meta(config: Config):
|
||||
with open(config.output_file, "a") as f:
|
||||
for category in config.categories:
|
||||
f.write(f"2017-01-01 open {category_to_bean(category)}\n")
|
||||
f.write("\n")
|
||||
f.write('option "operating_currency" "USD"\n\n')
|
||||
|
||||
|
||||
def write_mappings(transactions: List[Transaction], mappings_file: Path):
|
||||
"""Write transactions to the mappings file."""
|
||||
|
||||
@@ -74,10 +66,8 @@ def write_mappings(transactions: List[Transaction], mappings_file: Path):
|
||||
pass
|
||||
else:
|
||||
mapping = Mapping(
|
||||
**{
|
||||
"account2": t.account2.strip(),
|
||||
"narration": t.description,
|
||||
}
|
||||
account2=t.account2.strip(),
|
||||
narration=t.description,
|
||||
)
|
||||
mappings[t.key()] = mapping
|
||||
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
from pathlib import Path
|
||||
from typing import List
|
||||
|
||||
from toldg.models import Config, Transaction
|
||||
@@ -6,11 +5,11 @@ from toldg.utils import category_to_bean
|
||||
|
||||
BEANCOUNT_TRANSACTION_TEMPLATE = """
|
||||
{t.date} * {description}{tags}
|
||||
{account2:<40} {t.debit:<6} {t.currency}
|
||||
{account1:<40} {t.credit:<6} {t.currency}
|
||||
source_file: "{t.csv_file}"
|
||||
source_index: {t.index}
|
||||
source_row: "{t.row}"
|
||||
{account2:<40} {t.debit:<6} {t.currency}
|
||||
{account1:<40} {t.credit:<6} {t.currency}
|
||||
"""
|
||||
|
||||
|
||||
@@ -58,4 +57,3 @@ def render_to_file(transactions: List[Transaction], config: Config):
|
||||
content = "".join(format(t) for t in transactions)
|
||||
with open(config.output_file, "a") as f:
|
||||
f.write(content)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user