Update project structure and move to beancount

This commit is contained in:
2025-03-02 11:08:33 -05:00
parent 886bcdbdd1
commit 08c50e776e
17 changed files with 1844 additions and 296 deletions

1
src/toldg/__init__.py Normal file
View File

@@ -0,0 +1 @@
__version__ = "0.1.0"

28
src/toldg/__main__.py Normal file
View File

@@ -0,0 +1,28 @@
import logging
from rich.logging import RichHandler
from toldg.process import process_csv_files, process_ldg_files
from toldg.utils import load_config, remove_if_exists, write_meta
def init_logging():
logging.basicConfig(
level=logging.INFO,
format="%(message)s",
datefmt="[%X]",
handlers=[RichHandler()],
)
def main():
init_logging()
config = load_config()
remove_if_exists(config.output_file)
write_meta(config)
process_ldg_files(config)
process_csv_files(config)
if __name__ == "__main__":
main()

32
src/toldg/fzf.py Normal file
View File

@@ -0,0 +1,32 @@
import errno
import subprocess
import sys
EXECUTABLE_NAME = "fzf.exe" if sys.platform == "win32" else "fzf"
def iterfzf(iterable, prompt="> "):
cmd = [EXECUTABLE_NAME, "--prompt=" + prompt]
encoding = sys.getdefaultencoding()
proc = subprocess.Popen(
cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=None
)
if proc.stdin is None:
return None
try:
lines = "\n".join(iterable)
proc.stdin.write(lines.encode("utf-8"))
proc.stdin.close()
except IOError as e:
if e.errno != errno.EPIPE and errno.EPIPE != 32:
raise
if proc is None or proc.wait() not in [0, 1]:
return None
if proc.stdout is None:
return None
decode = lambda t: t.decode(encoding)
output = [decode(ln.strip(b"\r\n\0")) for ln in iter(proc.stdout.readline, b"")]
try:
return output[0]
except IndexError:
return None

76
src/toldg/models.py Normal file
View File

@@ -0,0 +1,76 @@
from pathlib import Path
from typing import List, Optional
from pydantic import BaseModel
UNKNOWN_CATEGORY = "account2"
class CsvConfig(BaseModel):
"""
Class to define how to parse a certain CSV file. We use the
file_match_regex attribute to decide whether to apply a config for a file.
If multiple configs match a single file we raise an exception.
"""
class Config:
extra = "forbid"
account1: str
file_match_regex: str
fields: List[str]
input_date_format: str = "%m/%d/%Y"
output_date_format: str = "%Y/%m/%d"
skip: int = 1
delimiter: str = ","
quotechar: str = '"'
currency: str = "USD"
class Config(BaseModel):
"""
Configuration class for managing file search and data processing settings.
Attributes:
input_directory (Path): Where to search for 'ldg' and 'csv' files.
mappings_file (Path): The path to a 'json' file that contains account2 mappings.
output_file (Path): Location to which to write the output 'ldg' file.
csv_configs: List of CsvConfig which explains how to handle specific
CSV files.
categories (List[str]): A list of account2s. An account has to be defined here
before it can be used in a mapping. Otherwise, ledger will complain.
commodities (List[str]): A list of commodities relevant to the data processing.
find_duplicates (bool): Flag to check and abort on duplicated transactions. Not
really useful.
"""
class Config:
extra = "forbid"
input_directory: Path
mappings_file: Path
descriptions_file: Optional[Path] = None
output_file: Path = Path("output.ldg")
csv_configs: List[CsvConfig]
categories: List[str]
commodities: List[str]
find_duplicates: bool = False
class Transaction(BaseModel):
"""
Class for ledger transaction to render into ldg file.
"""
class Config:
extra = "forbid"
currency: str
debit: str
credit: str
date: str
account1: str
account2: str
description: str
csv_file: str
row: str

49
src/toldg/predict.py Normal file
View File

@@ -0,0 +1,49 @@
from typing import List
from toldg.fzf import iterfzf
from toldg.models import UNKNOWN_CATEGORY, Transaction
def get_sort_categories():
def sort_categories(row: str, categories: List[str]):
if learn is None:
return
_, _, probs = learn.predict(row)
cat_to_prob = dict(zip(learn.dls.vocab[1], probs.tolist()))
categories.sort(
key=lambda c: cat_to_prob[c] if c in cat_to_prob else 0.0, reverse=True
)
learn = None
try:
from fastai.text.all import load_learner
learn = load_learner("export.pkl")
except ModuleNotFoundError:
user_input = input("No fastai module. Type yes to continue anyway.")
if user_input.strip().lower() != "yes":
raise Exception("fastai module missing")
return sort_categories
def add_account2(transactions: List[Transaction], categories: List[str]):
unmapped_transactions = list(
filter(lambda t: t.account2 == UNKNOWN_CATEGORY, transactions)
)
if len(unmapped_transactions) == 0:
return
sort_categories = get_sort_categories()
for t in unmapped_transactions:
sort_categories(t.row, categories)
add_account2_interactive(t, categories)
def add_account2_interactive(transaction: Transaction, categories: List[str]):
t = transaction
account2 = None
prompt = f"{t.account1} {t.date} {t.description} {t.debit} > "
while account2 is None:
account2 = iterfzf(categories, prompt=prompt)
transaction.account2 = account2
print(f"Assigned category '{account2}'.")

121
src/toldg/process.py Normal file
View File

@@ -0,0 +1,121 @@
import csv
import datetime
import logging
import re
import sys
from typing import Dict, List
import toldg.models
import toldg.predict
import toldg.utils
import toldg.write
from toldg.models import Config, CsvConfig, Transaction
def process_ldg_files(config: Config):
for ldg_file in toldg.utils.get_ldg_files(config.input_directory):
with open(ldg_file, "r") as f_in:
with open(config.output_file, "a") as f_out:
f_out.write(f_in.read())
def get_csv_config(csv_file: str, csv_configs: List[CsvConfig]) -> CsvConfig:
cs = [c for c in csv_configs if re.match(c.file_match_regex, csv_file)]
if not cs:
logging.critical(f"No CSV config for {csv_file}.")
sys.exit(1)
elif len(cs) > 1:
logging.critical(f"Multiple CSV configs for {csv_file}.")
sys.exit(1)
return cs[0]
def get_transactions(csv_file: str, config: CsvConfig) -> List[Transaction]:
def date_to_date(date: str) -> str:
d = datetime.datetime.strptime(date, config.input_date_format)
return d.strftime(config.output_date_format)
def flip_sign(amount: str) -> str:
return amount[1:] if amount.startswith("-") else "-" + amount
def row_to_transaction(row, fields):
"""The user can configure the mapping of CSV fields to the three
required fields date, amount and description via the CsvConfig."""
t = {field: row[index] for index, field in fields}
amount = t["amount"]
return Transaction(
currency=config.currency,
debit=flip_sign(amount),
credit=amount,
date=date_to_date(t["date"]),
account1=config.account1,
account2=toldg.models.UNKNOWN_CATEGORY,
description=t["description"],
csv_file=csv_file,
row=csv_file + ", " + ", ".join(row),
)
fields = [(i, f) for i, f in enumerate(config.fields) if f]
with open(csv_file, "r") as f:
reader = csv.reader(f, delimiter=config.delimiter, quotechar=config.quotechar)
for _ in range(config.skip):
next(reader)
transactions = [row_to_transaction(row, fields) for row in reader if row]
return transactions
def find_duplicates(transactions: List[Transaction]):
rows = set()
for t in transactions:
row = t.row
if row in rows:
logging.critical(f"'{row}' is duplicated.")
logging.critical("Exit because of duplicated transactions.")
sys.exit(1)
else:
rows.add(row)
def apply_mappings(transactions: List[Transaction], mappings: Dict[str, str]):
unused_mappings = set(mappings.keys())
for t in transactions:
if t.row in mappings:
t.account2 = mappings[t.row]
unused_mappings.discard(t.row)
else:
logging.warning(f"No mapping for '{t}'.")
for row in unused_mappings:
logging.warning(f"Unused mapping '{row}' -> {mappings[row]}.")
def apply_descriptions(transactions: List[Transaction], descriptions: Dict[str, str]):
unused_descriptions = set(descriptions.keys())
for t in transactions:
if t.row in descriptions:
t.description = descriptions[t.row]
unused_descriptions.discard(t.row)
for row in unused_descriptions:
logging.warning(f"Unused mapping '{row}' -> {descriptions[row]}.")
def process_csv_files(config: Config):
csv_files = toldg.utils.get_csv_files(config.input_directory)
transactions = []
for csv_file in csv_files:
csv_file = str(csv_file)
csv_config = get_csv_config(csv_file, config.csv_configs)
transactions += get_transactions(csv_file, csv_config)
if config.find_duplicates:
find_duplicates(transactions)
if config.descriptions_file is not None:
descriptions = toldg.utils.read_descriptions(config.descriptions_file)
apply_descriptions(transactions, descriptions)
mappings = toldg.utils.read_mappings(config.mappings_file)
apply_mappings(transactions, mappings)
toldg.predict.add_account2(transactions, config.categories)
toldg.utils.write_mappings(transactions, config.mappings_file)
toldg.write.render_to_file(transactions, config)

113
src/toldg/utils.py Normal file
View File

@@ -0,0 +1,113 @@
import json
import logging
import os
import sys
from pathlib import Path
from typing import Dict, List
from pydantic import ValidationError
from toldg.models import Config, Transaction
def get_files(directory: Path, ending="") -> List[Path]:
"""Gets files from directory recursively in lexigraphic order."""
return [
Path(os.path.join(subdir, f))
for subdir, _, files in os.walk(directory)
for f in files
if f.endswith(ending)
]
def get_csv_files(directory: Path) -> List[Path]:
return get_files(directory, ".csv")
def get_ldg_files(directory: Path) -> List[Path]:
return get_files(directory, ".ldg")
def load_config() -> Config:
try:
config_file = Path(sys.argv[1])
except IndexError:
logging.critical("Provide configuration file as first argument.")
sys.exit(1)
try:
with open(config_file, "r") as f:
config = Config(**json.load(f))
except ValidationError as e:
logging.critical(f"Could not validate {config_file}.")
logging.info(e)
sys.exit(1)
except FileNotFoundError:
logging.critical(f"Could not find {config_file}.")
sys.exit(1)
return config
def category_to_bean(c: str) -> str:
sections = map(list, c.split(":"))
new_sections = []
for section in sections:
section[0] = section[0].upper()
new_sections.append("".join(section))
return ":".join(new_sections)
def write_meta(config: Config):
with open(config.output_file, "a") as f:
for category in config.categories:
f.write(f"2017-01-01 open {category_to_bean(category)}\n")
f.write("\n")
f.write('option "operating_currency" "USD"\n\n')
# Commodity section is not required for beancount
# for commodity in config.commodities:
# f.write(f"commodity {commodity}\n")
# f.write("\n")
def write_mappings(transactions: List[Transaction], mappings_file: Path):
mappings = {}
for t in transactions:
try:
mappings[t.account2.strip()].append(t.row)
except KeyError:
mappings[t.account2.strip()] = [t.row]
with open(mappings_file, "w") as f:
json.dump({k: sorted(v) for k, v in sorted(mappings.items())}, f, indent=4)
def read_mappings(mappings_file: Path) -> Dict[str, str]:
with open(mappings_file, "r") as f:
account2_to_rows = json.load(f)
return {
row: category for category, rows in account2_to_rows.items() for row in rows
}
def read_descriptions(descriptions_file: Path) -> Dict[str, str]:
"""I am basic so the description file is currently a double row based
format where the first row matches the CSV row and the second one is the
description."""
descriptions = {}
current_row = None
with open(descriptions_file, "r") as f:
for line in f.readlines():
if current_row is None:
current_row = line.rstrip("\n")
else:
descriptions[current_row] = line.rstrip("\n")
current_row = None
return descriptions
def remove_if_exists(output_file: Path):
try:
os.remove(output_file)
except OSError:
pass

32
src/toldg/write.py Normal file
View File

@@ -0,0 +1,32 @@
from pathlib import Path
from typing import List
from toldg.models import Config, Transaction
from toldg.utils import category_to_bean
BEANCOUNT_TRANSACTION_TEMPLATE = """
{t.date} * "{t.description}"
{t.account2:<40} {t.debit:<6} {t.currency}
{t.account1:<40} {t.credit:<6} {t.currency}
"""
def format(t):
t.date = t.date.replace("/", "-")
t.description = t.description.replace('"', '\\"')
if not t.debit.startswith("-"):
t.debit = " " + t.debit
if not t.credit.startswith("-"):
t.credit = " " + t.credit
t.account1 = category_to_bean(t.account1)
t.account2 = category_to_bean(t.account2)
if t.currency == "EUR":
t.debit = t.debit.replace(".", "|").replace(",", ".").replace("|", ",")
t.credit = t.credit.replace(".", "|").replace(",", ".").replace("|", ",")
return BEANCOUNT_TRANSACTION_TEMPLATE.format(t=t)
def render_to_file(transactions: List[Transaction], config: Config):
content = "".join(format(t) for t in transactions)
with open(config.output_file, "a") as f:
f.write(content)