generated from felixm/defaultpy
Update ledgerai to read existing transactions from beancount file
This commit is contained in:
1
.gitignore
vendored
1
.gitignore
vendored
@@ -1,4 +1,5 @@
|
|||||||
# ---> Python
|
# ---> Python
|
||||||
|
uv.lock
|
||||||
# Byte-compiled / optimized / DLL files
|
# Byte-compiled / optimized / DLL files
|
||||||
__pycache__/
|
__pycache__/
|
||||||
*.py[cod]
|
*.py[cod]
|
||||||
|
|||||||
@@ -7,35 +7,28 @@ name = "toldg"
|
|||||||
version = "0.1.0"
|
version = "0.1.0"
|
||||||
description = "Tool to generate ledger files from csv"
|
description = "Tool to generate ledger files from csv"
|
||||||
readme = "README.md"
|
readme = "README.md"
|
||||||
requires-python = ">=3.12,<4.0"
|
requires-python = ">=3.13,<4.0"
|
||||||
license = {text = "MIT"}
|
license = {text = "MIT"}
|
||||||
authors = [
|
authors = [
|
||||||
{name = "Felix Martin", email = "mail@felixm.de"}
|
{name = "Felix Martin", email = "mail@felixm.de"}
|
||||||
]
|
]
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"fava (>=1.30.1,<2.0.0)",
|
"fava",
|
||||||
"pydantic (>=2.10.6,<3.0.0)",
|
"pydantic",
|
||||||
"beancount (>=3.1.0,<4.0.0)",
|
"beancount",
|
||||||
"rich (>=13.9.4,<14.0.0)",
|
"rich",
|
||||||
"numpy (>=2.2.3,<3.0.0)"
|
"numpy",
|
||||||
|
"ty",
|
||||||
|
"ruff",
|
||||||
]
|
]
|
||||||
|
|
||||||
[tool.poetry.group.dev.dependencies]
|
|
||||||
pre-commit = "^4.1.0"
|
|
||||||
black = "^25.1.0"
|
|
||||||
isort = "^6.0.1"
|
|
||||||
pytest = "^8.3.4"
|
|
||||||
|
|
||||||
[project.scripts]
|
[project.scripts]
|
||||||
toldg = "toldg.__main__:main"
|
toldg = "toldg.__main__:main"
|
||||||
|
|
||||||
[tool.setuptools]
|
[tool.setuptools]
|
||||||
package-dir = {"" = "src"}
|
package-dir = {"" = "src"}
|
||||||
|
|
||||||
[tool.black]
|
[tool.ruff]
|
||||||
line-length = 88
|
target-version = "py313"
|
||||||
target-version = ["py312"]
|
line-length = 100
|
||||||
|
|
||||||
[tool.isort]
|
|
||||||
profile = "black"
|
|
||||||
line_length = 88
|
|
||||||
|
|||||||
@@ -7,7 +7,8 @@ from rich.logging import RichHandler
|
|||||||
|
|
||||||
from toldg.process import process_csv_files, process_ldg_files
|
from toldg.process import process_csv_files, process_ldg_files
|
||||||
from toldg.train import train
|
from toldg.train import train
|
||||||
from toldg.utils import load_config, remove_if_exists, write_meta
|
from toldg.utils import load_config, remove_if_exists
|
||||||
|
from toldg.models import Config
|
||||||
|
|
||||||
|
|
||||||
def init_logging():
|
def init_logging():
|
||||||
@@ -19,10 +20,28 @@ def init_logging():
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def get_new_entries(entries: list, csv_entries: list) -> list:
|
||||||
|
key_to_entry = {
|
||||||
|
(entry.meta["source_file"], entry.meta["source_index"]): entry for entry in entries
|
||||||
|
}
|
||||||
|
assert len(entries) == len(key_to_entry), "Transaction keys must be unique"
|
||||||
|
|
||||||
def load_and_write_back(filename):
|
new_entries = []
|
||||||
entries, errors, options_map = beancount.loader.load_file(filename)
|
for csv_entry in csv_entries:
|
||||||
|
key = (csv_entry.csv_file, csv_entry.index)
|
||||||
|
if key in key_to_entry:
|
||||||
|
existing_entry = key_to_entry[key]
|
||||||
|
if existing_entry.meta["source_row"] != csv_entry.row:
|
||||||
|
msg = f"Consistency error: CSV entry {csv_entry} is different to {existing_entry}"
|
||||||
|
logging.error(msg)
|
||||||
|
raise SystemExit(1)
|
||||||
|
else:
|
||||||
|
new_entries.append(csv_entry)
|
||||||
|
logging.info(f"Got {len(new_entries)} new and {len(entries)} existing transactions.")
|
||||||
|
return new_entries
|
||||||
|
|
||||||
|
|
||||||
|
def update_ledger(config: Config):
|
||||||
def beancount_entry_to_string(entry) -> str:
|
def beancount_entry_to_string(entry) -> str:
|
||||||
buf = io.StringIO()
|
buf = io.StringIO()
|
||||||
beancount.parser.printer.print_entry(entry, file=buf)
|
beancount.parser.printer.print_entry(entry, file=buf)
|
||||||
@@ -31,23 +50,44 @@ def load_and_write_back(filename):
|
|||||||
def is_transaction(entry) -> bool:
|
def is_transaction(entry) -> bool:
|
||||||
return isinstance(entry, beancount.core.data.Transaction)
|
return isinstance(entry, beancount.core.data.Transaction)
|
||||||
|
|
||||||
prev_entry_was_transaction = False
|
filename = config.output_file
|
||||||
|
entries, errors, options_map = beancount.loader.load_file(filename)
|
||||||
|
|
||||||
if errors:
|
if errors:
|
||||||
print(f"errors in generated '{filename}'")
|
logging.error(f"errors in '{filename}'")
|
||||||
for err in errors:
|
for err in errors:
|
||||||
print(err)
|
logging.error(err)
|
||||||
else:
|
raise SystemExit(1)
|
||||||
entries.sort(key=lambda e: e.date)
|
|
||||||
with open(filename, "w") as f:
|
entries.sort(key=lambda e: e.date)
|
||||||
f.write('option "operating_currency" "USD"\n')
|
|
||||||
for entry in entries:
|
# Note(felixm): Only write back transactions from the main beancount file.
|
||||||
if prev_entry_was_transaction:
|
# The issue is that `beancount.loader.load_file` does not allow for a full
|
||||||
f.write("\n")
|
# round trip; some of the entries get swallowed. Therefore, treat all files
|
||||||
elif not prev_entry_was_transaction and is_transaction(entry):
|
# that are not the main beancount file as input only files. This means
|
||||||
f.write("\n")
|
# these input only files can only be edited by hand, but the user can use
|
||||||
f.write(beancount_entry_to_string(entry))
|
# them to set options for beancount and fava, and add other types of
|
||||||
|
# entries that would otherwise disappear after the round trip. I have seen
|
||||||
|
# tickets on GitHub about changing this API so that everything can be
|
||||||
|
# written back as is, but until then, this works well for my use-case.
|
||||||
|
entries = [e for e in entries if e.meta["filename"] == str(filename.absolute())]
|
||||||
|
|
||||||
|
csv_entries = process_csv_files(config, False)
|
||||||
|
entries += get_new_entries(entries, csv_entries)
|
||||||
|
remove_if_exists(config.output_file)
|
||||||
|
process_ldg_files(config)
|
||||||
|
|
||||||
|
with open(filename, "a") as f:
|
||||||
|
prev_entry_was_transaction = False
|
||||||
|
for entry in entries:
|
||||||
|
if prev_entry_was_transaction:
|
||||||
f.write("\n")
|
f.write("\n")
|
||||||
prev_entry_was_transaction = is_transaction(entry)
|
elif not prev_entry_was_transaction and is_transaction(entry):
|
||||||
|
f.write("\n")
|
||||||
|
f.write(beancount_entry_to_string(entry))
|
||||||
|
f.write("\n")
|
||||||
|
prev_entry_was_transaction = is_transaction(entry)
|
||||||
|
logging.info(f"Ledger file '{filename}' was written successfully.")
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
@@ -56,11 +96,7 @@ def main():
|
|||||||
if len(sys.argv) > 2 and sys.argv[2] == "train":
|
if len(sys.argv) > 2 and sys.argv[2] == "train":
|
||||||
train(config)
|
train(config)
|
||||||
else:
|
else:
|
||||||
remove_if_exists(config.output_file)
|
update_ledger(config)
|
||||||
write_meta(config)
|
|
||||||
process_csv_files(config)
|
|
||||||
load_and_write_back(config.output_file)
|
|
||||||
process_ldg_files(config)
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|||||||
@@ -8,9 +8,7 @@ EXECUTABLE_NAME = "fzf.exe" if sys.platform == "win32" else "fzf"
|
|||||||
def iterfzf(iterable, prompt="> "):
|
def iterfzf(iterable, prompt="> "):
|
||||||
cmd = [EXECUTABLE_NAME, "--prompt=" + prompt]
|
cmd = [EXECUTABLE_NAME, "--prompt=" + prompt]
|
||||||
encoding = sys.getdefaultencoding()
|
encoding = sys.getdefaultencoding()
|
||||||
proc = subprocess.Popen(
|
proc = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=None)
|
||||||
cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=None
|
|
||||||
)
|
|
||||||
if proc.stdin is None:
|
if proc.stdin is None:
|
||||||
return None
|
return None
|
||||||
try:
|
try:
|
||||||
@@ -24,7 +22,10 @@ def iterfzf(iterable, prompt="> "):
|
|||||||
return None
|
return None
|
||||||
if proc.stdout is None:
|
if proc.stdout is None:
|
||||||
return None
|
return None
|
||||||
decode = lambda t: t.decode(encoding)
|
|
||||||
|
def decode(t):
|
||||||
|
return t.decode(encoding)
|
||||||
|
|
||||||
output = [decode(ln.strip(b"\r\n\0")) for ln in iter(proc.stdout.readline, b"")]
|
output = [decode(ln.strip(b"\r\n\0")) for ln in iter(proc.stdout.readline, b"")]
|
||||||
try:
|
try:
|
||||||
return output[0]
|
return output[0]
|
||||||
|
|||||||
@@ -86,4 +86,3 @@ class Transaction(BaseModel):
|
|||||||
|
|
||||||
def key(self):
|
def key(self):
|
||||||
return self.csv_file + ", " + self.row
|
return self.csv_file + ", " + self.row
|
||||||
|
|
||||||
|
|||||||
@@ -1,11 +1,10 @@
|
|||||||
import json
|
|
||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
import pickle
|
import pickle
|
||||||
import re
|
import re
|
||||||
from collections import Counter, defaultdict
|
from collections import Counter
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Any, Dict, List, Optional, Set, Tuple
|
from typing import Dict, List, Optional, Tuple
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
@@ -31,9 +30,7 @@ class Tokenizer:
|
|||||||
word_counts.update(tokens)
|
word_counts.update(tokens)
|
||||||
|
|
||||||
# Filter words by minimum count
|
# Filter words by minimum count
|
||||||
filtered_words = [
|
filtered_words = [word for word, count in word_counts.items() if count >= self.min_count]
|
||||||
word for word, count in word_counts.items() if count >= self.min_count
|
|
||||||
]
|
|
||||||
|
|
||||||
# Build vocabulary
|
# Build vocabulary
|
||||||
self.vocab = {word: idx for idx, word in enumerate(filtered_words)}
|
self.vocab = {word: idx for idx, word in enumerate(filtered_words)}
|
||||||
@@ -373,7 +370,6 @@ def get_sort_categories(model_path: Path):
|
|||||||
_classifier.sort_categories(row, categories)
|
_classifier.sort_categories(row, categories)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
|
||||||
model_path = Path("transaction_classifier.pkl")
|
model_path = Path("transaction_classifier.pkl")
|
||||||
_classifier = TransactionClassifier(model_path)
|
_classifier = TransactionClassifier(model_path)
|
||||||
if _classifier.model is None:
|
if _classifier.model is None:
|
||||||
@@ -385,9 +381,7 @@ def get_sort_categories(model_path: Path):
|
|||||||
return sort_categories
|
return sort_categories
|
||||||
|
|
||||||
|
|
||||||
def add_account2(
|
def add_account2(model_path: Path, transactions: List[Transaction], categories: List[str]):
|
||||||
model_path: Path, transactions: List[Transaction], categories: List[str]
|
|
||||||
):
|
|
||||||
"""Add account2 to unmapped transactions."""
|
"""Add account2 to unmapped transactions."""
|
||||||
unmapped_transactions = list(filter(lambda t: t.mapping is None, transactions))
|
unmapped_transactions = list(filter(lambda t: t.mapping is None, transactions))
|
||||||
if len(unmapped_transactions) == 0:
|
if len(unmapped_transactions) == 0:
|
||||||
|
|||||||
@@ -14,10 +14,9 @@ from toldg.models import Config, CsvConfig, Mapping, Transaction
|
|||||||
|
|
||||||
def process_ldg_files(config: Config):
|
def process_ldg_files(config: Config):
|
||||||
with open(config.output_file, "a") as f_out:
|
with open(config.output_file, "a") as f_out:
|
||||||
f_out.write("\n")
|
|
||||||
for ldg_file in toldg.utils.get_ldg_files(config.input_directory):
|
for ldg_file in toldg.utils.get_ldg_files(config.input_directory):
|
||||||
ldg_rel = os.path.relpath(ldg_file, os.path.dirname(config.output_file))
|
ldg_rel = os.path.relpath(ldg_file, os.path.dirname(config.output_file))
|
||||||
f_out.write(f"include \"{ldg_rel}\"\n")
|
f_out.write(f'include "{ldg_rel}"\n')
|
||||||
|
|
||||||
|
|
||||||
def get_csv_config(csv_file: str, csv_configs: list[CsvConfig]) -> CsvConfig:
|
def get_csv_config(csv_file: str, csv_configs: list[CsvConfig]) -> CsvConfig:
|
||||||
@@ -63,8 +62,7 @@ def get_transactions(csv_file: str, config: CsvConfig) -> list[Transaction]:
|
|||||||
for _ in range(config.skip):
|
for _ in range(config.skip):
|
||||||
next(reader)
|
next(reader)
|
||||||
rows = [row for row in reader if row]
|
rows = [row for row in reader if row]
|
||||||
transactions = [row_to_transaction(i, row, fields)
|
transactions = [row_to_transaction(i, row, fields) for i, row in enumerate(reversed(rows))]
|
||||||
for i, row in enumerate(reversed(rows))]
|
|
||||||
return transactions
|
return transactions
|
||||||
|
|
||||||
|
|
||||||
@@ -74,9 +72,7 @@ def apply_mappings(transactions: list[Transaction], mappings: dict[str, Mapping]
|
|||||||
if t.key() in mappings:
|
if t.key() in mappings:
|
||||||
mapping = mappings[t.key()]
|
mapping = mappings[t.key()]
|
||||||
assert isinstance(mapping, Mapping)
|
assert isinstance(mapping, Mapping)
|
||||||
assert (
|
assert mapping.count > 0, f"{mapping} used by {t} but count is not greater than '0'."
|
||||||
mapping.count > 0
|
|
||||||
), f"{mapping} used by {t} but count is not greater than '0'."
|
|
||||||
mapping.count -= 1
|
mapping.count -= 1
|
||||||
t.mapping = mapping
|
t.mapping = mapping
|
||||||
else:
|
else:
|
||||||
@@ -86,7 +82,7 @@ def apply_mappings(transactions: list[Transaction], mappings: dict[str, Mapping]
|
|||||||
assert mapping.count == 0, f"{mapping} was not used as often as expected!"
|
assert mapping.count == 0, f"{mapping} was not used as often as expected!"
|
||||||
|
|
||||||
|
|
||||||
def process_csv_files(config: Config) -> list[Transaction]:
|
def process_csv_files(config: Config, write_outputs: bool = True) -> list[Transaction]:
|
||||||
csv_files = toldg.utils.get_csv_files(config.input_directory)
|
csv_files = toldg.utils.get_csv_files(config.input_directory)
|
||||||
transactions = []
|
transactions = []
|
||||||
for csv_file in csv_files:
|
for csv_file in csv_files:
|
||||||
@@ -97,6 +93,7 @@ def process_csv_files(config: Config) -> list[Transaction]:
|
|||||||
mappings = toldg.utils.read_mappings(config.mappings_file)
|
mappings = toldg.utils.read_mappings(config.mappings_file)
|
||||||
apply_mappings(transactions, mappings)
|
apply_mappings(transactions, mappings)
|
||||||
toldg.predict.add_account2(config.model, transactions, config.categories)
|
toldg.predict.add_account2(config.model, transactions, config.categories)
|
||||||
toldg.utils.write_mappings(transactions, config.mappings_file)
|
if write_outputs:
|
||||||
toldg.write.render_to_file(transactions, config)
|
toldg.utils.write_mappings(transactions, config.mappings_file)
|
||||||
|
toldg.write.render_to_file(transactions, config)
|
||||||
return transactions
|
return transactions
|
||||||
|
|||||||
@@ -1,5 +1,4 @@
|
|||||||
import logging
|
import logging
|
||||||
from pathlib import Path
|
|
||||||
|
|
||||||
from toldg.models import Config
|
from toldg.models import Config
|
||||||
from toldg.predict import train_classifier
|
from toldg.predict import train_classifier
|
||||||
|
|||||||
@@ -3,7 +3,7 @@ import logging
|
|||||||
import os
|
import os
|
||||||
import sys
|
import sys
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Any, Dict, List, Optional
|
from typing import Dict, List
|
||||||
|
|
||||||
from pydantic import ValidationError
|
from pydantic import ValidationError
|
||||||
|
|
||||||
@@ -57,14 +57,6 @@ def category_to_bean(c: str) -> str:
|
|||||||
return ":".join(new_sections)
|
return ":".join(new_sections)
|
||||||
|
|
||||||
|
|
||||||
def write_meta(config: Config):
|
|
||||||
with open(config.output_file, "a") as f:
|
|
||||||
for category in config.categories:
|
|
||||||
f.write(f"2017-01-01 open {category_to_bean(category)}\n")
|
|
||||||
f.write("\n")
|
|
||||||
f.write('option "operating_currency" "USD"\n\n')
|
|
||||||
|
|
||||||
|
|
||||||
def write_mappings(transactions: List[Transaction], mappings_file: Path):
|
def write_mappings(transactions: List[Transaction], mappings_file: Path):
|
||||||
"""Write transactions to the mappings file."""
|
"""Write transactions to the mappings file."""
|
||||||
|
|
||||||
@@ -74,10 +66,8 @@ def write_mappings(transactions: List[Transaction], mappings_file: Path):
|
|||||||
pass
|
pass
|
||||||
else:
|
else:
|
||||||
mapping = Mapping(
|
mapping = Mapping(
|
||||||
**{
|
account2=t.account2.strip(),
|
||||||
"account2": t.account2.strip(),
|
narration=t.description,
|
||||||
"narration": t.description,
|
|
||||||
}
|
|
||||||
)
|
)
|
||||||
mappings[t.key()] = mapping
|
mappings[t.key()] = mapping
|
||||||
|
|
||||||
|
|||||||
@@ -1,4 +1,3 @@
|
|||||||
from pathlib import Path
|
|
||||||
from typing import List
|
from typing import List
|
||||||
|
|
||||||
from toldg.models import Config, Transaction
|
from toldg.models import Config, Transaction
|
||||||
@@ -6,11 +5,11 @@ from toldg.utils import category_to_bean
|
|||||||
|
|
||||||
BEANCOUNT_TRANSACTION_TEMPLATE = """
|
BEANCOUNT_TRANSACTION_TEMPLATE = """
|
||||||
{t.date} * {description}{tags}
|
{t.date} * {description}{tags}
|
||||||
{account2:<40} {t.debit:<6} {t.currency}
|
|
||||||
{account1:<40} {t.credit:<6} {t.currency}
|
|
||||||
source_file: "{t.csv_file}"
|
source_file: "{t.csv_file}"
|
||||||
source_index: {t.index}
|
source_index: {t.index}
|
||||||
source_row: "{t.row}"
|
source_row: "{t.row}"
|
||||||
|
{account2:<40} {t.debit:<6} {t.currency}
|
||||||
|
{account1:<40} {t.credit:<6} {t.currency}
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
|
||||||
@@ -58,4 +57,3 @@ def render_to_file(transactions: List[Transaction], config: Config):
|
|||||||
content = "".join(format(t) for t in transactions)
|
content = "".join(format(t) for t in transactions)
|
||||||
with open(config.output_file, "a") as f:
|
with open(config.output_file, "a") as f:
|
||||||
f.write(content)
|
f.write(content)
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user