Update project structure and move to beancount

This commit is contained in:
2025-03-02 11:08:33 -05:00
parent 886bcdbdd1
commit 08c50e776e
17 changed files with 1844 additions and 296 deletions

View File

@@ -1,4 +0,0 @@
def hello():
print("Hello, seaman!")

1
src/toldg/__init__.py Normal file
View File

@@ -0,0 +1 @@
__version__ = "0.1.0"

28
src/toldg/__main__.py Normal file
View File

@@ -0,0 +1,28 @@
import logging
from rich.logging import RichHandler
from toldg.process import process_csv_files, process_ldg_files
from toldg.utils import load_config, remove_if_exists, write_meta
def init_logging():
logging.basicConfig(
level=logging.INFO,
format="%(message)s",
datefmt="[%X]",
handlers=[RichHandler()],
)
def main():
init_logging()
config = load_config()
remove_if_exists(config.output_file)
write_meta(config)
process_ldg_files(config)
process_csv_files(config)
if __name__ == "__main__":
main()

View File

@@ -2,19 +2,20 @@ import errno
import subprocess
import sys
EXECUTABLE_NAME = 'fzf.exe' if sys.platform == 'win32' else 'fzf'
EXECUTABLE_NAME = "fzf.exe" if sys.platform == "win32" else "fzf"
def iterfzf(iterable, prompt='> '):
cmd = [EXECUTABLE_NAME, '--prompt=' + prompt]
def iterfzf(iterable, prompt="> "):
cmd = [EXECUTABLE_NAME, "--prompt=" + prompt]
encoding = sys.getdefaultencoding()
proc = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=None)
proc = subprocess.Popen(
cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=None
)
if proc.stdin is None:
return None
try:
lines = "\n".join(iterable)
proc.stdin.write(lines.encode('utf-8'))
proc.stdin.write(lines.encode("utf-8"))
proc.stdin.close()
except IOError as e:
if e.errno != errno.EPIPE and errno.EPIPE != 32:
@@ -24,7 +25,7 @@ def iterfzf(iterable, prompt='> '):
if proc.stdout is None:
return None
decode = lambda t: t.decode(encoding)
output = [decode(ln.strip(b'\r\n\0')) for ln in iter(proc.stdout.readline, b'')]
output = [decode(ln.strip(b"\r\n\0")) for ln in iter(proc.stdout.readline, b"")]
try:
return output[0]
except IndexError:

View File

@@ -1,10 +1,9 @@
from pydantic import BaseModel
from typing import List
from typing import Optional
from pathlib import Path
from typing import List, Optional
from pydantic import BaseModel
UNKNOWN_CATEGORY = 'account2'
UNKNOWN_CATEGORY = "account2"
class CsvConfig(BaseModel):
@@ -13,8 +12,9 @@ class CsvConfig(BaseModel):
file_match_regex attribute to decide whether to apply a config for a file.
If multiple configs match a single file we raise an exception.
"""
class Config:
extra = 'forbid'
extra = "forbid"
account1: str
file_match_regex: str
@@ -23,8 +23,8 @@ class CsvConfig(BaseModel):
output_date_format: str = "%Y/%m/%d"
skip: int = 1
delimiter: str = ","
quotechar: str = "\""
currency: str = "$"
quotechar: str = '"'
currency: str = "USD"
class Config(BaseModel):
@@ -39,12 +39,13 @@ class Config(BaseModel):
CSV files.
categories (List[str]): A list of account2s. An account has to be defined here
before it can be used in a mapping. Otherwise, ledger will complain.
commodities (List[str]): A list of commodities relevant to the data processing.
commodities (List[str]): A list of commodities relevant to the data processing.
find_duplicates (bool): Flag to check and abort on duplicated transactions. Not
really useful.
"""
class Config:
extra = 'forbid'
extra = "forbid"
input_directory: Path
mappings_file: Path
@@ -60,8 +61,9 @@ class Transaction(BaseModel):
"""
Class for ledger transaction to render into ldg file.
"""
class Config:
extra = 'forbid'
extra = "forbid"
currency: str
debit: str

View File

@@ -1,19 +1,23 @@
from src.models import Transaction, UNKNOWN_CATEGORY
from src.fzf import iterfzf
from typing import List
from toldg.fzf import iterfzf
from toldg.models import UNKNOWN_CATEGORY, Transaction
def get_sort_categories():
def sort_categories(row: str, categories: List[str]):
if learn is None:
return
_, _, probs = learn.predict(row)
cat_to_prob = dict(zip(learn.dls.vocab[1],probs.tolist()))
categories.sort(key=lambda c: cat_to_prob[c] if c in cat_to_prob else 0.0, reverse=True)
cat_to_prob = dict(zip(learn.dls.vocab[1], probs.tolist()))
categories.sort(
key=lambda c: cat_to_prob[c] if c in cat_to_prob else 0.0, reverse=True
)
learn = None
try:
from fastai.text.all import load_learner
learn = load_learner("export.pkl")
except ModuleNotFoundError:
user_input = input("No fastai module. Type yes to continue anyway.")
@@ -24,7 +28,9 @@ def get_sort_categories():
def add_account2(transactions: List[Transaction], categories: List[str]):
unmapped_transactions = list(filter(lambda t: t.account2 == UNKNOWN_CATEGORY, transactions))
unmapped_transactions = list(
filter(lambda t: t.account2 == UNKNOWN_CATEGORY, transactions)
)
if len(unmapped_transactions) == 0:
return
sort_categories = get_sort_categories()

View File

@@ -1,26 +1,26 @@
import csv
import datetime
import logging
import re
import sys
import datetime
import src.utils
import src.write
import src.models
import src.predict
from src.models import Config, CsvConfig, Transaction
from typing import List, Dict
from typing import Dict, List
import toldg.models
import toldg.predict
import toldg.utils
import toldg.write
from toldg.models import Config, CsvConfig, Transaction
def process_ldg_files(config: Config):
for ldg_file in src.utils.get_ldg_files(config.input_directory):
with open(ldg_file, 'r') as f_in:
with open(config.output_file, 'a') as f_out:
for ldg_file in toldg.utils.get_ldg_files(config.input_directory):
with open(ldg_file, "r") as f_in:
with open(config.output_file, "a") as f_out:
f_out.write(f_in.read())
def get_csv_config(csv_file: str, csv_configs: List[CsvConfig]) -> CsvConfig:
cs = [c for c in csv_configs
if re.match(c.file_match_regex, csv_file)]
cs = [c for c in csv_configs if re.match(c.file_match_regex, csv_file)]
if not cs:
logging.critical(f"No CSV config for {csv_file}.")
sys.exit(1)
@@ -39,29 +39,28 @@ def get_transactions(csv_file: str, config: CsvConfig) -> List[Transaction]:
return amount[1:] if amount.startswith("-") else "-" + amount
def row_to_transaction(row, fields):
""" The user can configure the mapping of CSV fields to the three
required fields date, amount and description via the CsvConfig. """
"""The user can configure the mapping of CSV fields to the three
required fields date, amount and description via the CsvConfig."""
t = {field: row[index] for index, field in fields}
amount = t['amount']
amount = t["amount"]
return Transaction(
currency=config.currency,
debit=flip_sign(amount),
credit=amount,
date=date_to_date(t['date']),
account1=config.account1,
account2=src.models.UNKNOWN_CATEGORY,
description=t['description'],
csv_file=csv_file,
row=csv_file + ", " + ", ".join(row))
currency=config.currency,
debit=flip_sign(amount),
credit=amount,
date=date_to_date(t["date"]),
account1=config.account1,
account2=toldg.models.UNKNOWN_CATEGORY,
description=t["description"],
csv_file=csv_file,
row=csv_file + ", " + ", ".join(row),
)
fields = [(i, f) for i, f in enumerate(config.fields) if f]
with open(csv_file, 'r') as f:
reader = csv.reader(f, delimiter=config.delimiter,
quotechar=config.quotechar)
with open(csv_file, "r") as f:
reader = csv.reader(f, delimiter=config.delimiter, quotechar=config.quotechar)
for _ in range(config.skip):
next(reader)
transactions = [row_to_transaction(row, fields)
for row in reader if row]
transactions = [row_to_transaction(row, fields) for row in reader if row]
return transactions
@@ -100,7 +99,7 @@ def apply_descriptions(transactions: List[Transaction], descriptions: Dict[str,
def process_csv_files(config: Config):
csv_files = src.utils.get_csv_files(config.input_directory)
csv_files = toldg.utils.get_csv_files(config.input_directory)
transactions = []
for csv_file in csv_files:
csv_file = str(csv_file)
@@ -111,13 +110,12 @@ def process_csv_files(config: Config):
find_duplicates(transactions)
if config.descriptions_file is not None:
descriptions = src.utils.read_descriptions(config.descriptions_file)
descriptions = toldg.utils.read_descriptions(config.descriptions_file)
apply_descriptions(transactions, descriptions)
mappings = src.utils.read_mappings(config.mappings_file)
mappings = toldg.utils.read_mappings(config.mappings_file)
apply_mappings(transactions, mappings)
src.predict.add_account2(transactions, config.categories)
src.utils.write_mappings(transactions, config.mappings_file)
src.write.render_to_file(transactions, config)
toldg.predict.add_account2(transactions, config.categories)
toldg.utils.write_mappings(transactions, config.mappings_file)
toldg.write.render_to_file(transactions, config)

View File

@@ -1,20 +1,23 @@
import json
import logging
import os
import sys
import logging
import json
from pathlib import Path
from typing import List, Dict
from src.models import Config, Transaction
from typing import Dict, List
from pydantic import ValidationError
from toldg.models import Config, Transaction
def get_files(directory: Path, ending="") -> List[Path]:
""" Gets files from directory recursively in lexigraphic order. """
return [Path(os.path.join(subdir, f))
for subdir, _, files in os.walk(directory)
for f in files
if f.endswith(ending)]
"""Gets files from directory recursively in lexigraphic order."""
return [
Path(os.path.join(subdir, f))
for subdir, _, files in os.walk(directory)
for f in files
if f.endswith(ending)
]
def get_csv_files(directory: Path) -> List[Path]:
@@ -33,7 +36,7 @@ def load_config() -> Config:
sys.exit(1)
try:
with open(config_file, 'r') as f:
with open(config_file, "r") as f:
config = Config(**json.load(f))
except ValidationError as e:
logging.critical(f"Could not validate {config_file}.")
@@ -45,15 +48,26 @@ def load_config() -> Config:
return config
def write_meta(config: Config):
with open(config.output_file, 'a') as f:
for category in config.categories:
f.write(f"account {category}\n")
f.write("\n")
def category_to_bean(c: str) -> str:
sections = map(list, c.split(":"))
new_sections = []
for section in sections:
section[0] = section[0].upper()
new_sections.append("".join(section))
return ":".join(new_sections)
for commodity in config.commodities:
f.write(f"commodity {commodity}\n")
def write_meta(config: Config):
with open(config.output_file, "a") as f:
for category in config.categories:
f.write(f"2017-01-01 open {category_to_bean(category)}\n")
f.write("\n")
f.write('option "operating_currency" "USD"\n\n')
# Commodity section is not required for beancount
# for commodity in config.commodities:
# f.write(f"commodity {commodity}\n")
# f.write("\n")
def write_mappings(transactions: List[Transaction], mappings_file: Path):
@@ -69,20 +83,20 @@ def write_mappings(transactions: List[Transaction], mappings_file: Path):
def read_mappings(mappings_file: Path) -> Dict[str, str]:
with open(mappings_file, 'r') as f:
with open(mappings_file, "r") as f:
account2_to_rows = json.load(f)
return {row: category
for category, rows in account2_to_rows.items()
for row in rows}
return {
row: category for category, rows in account2_to_rows.items() for row in rows
}
def read_descriptions(descriptions_file: Path) -> Dict[str, str]:
""" I am basic so the description file is currently a double row based
"""I am basic so the description file is currently a double row based
format where the first row matches the CSV row and the second one is the
description. """
description."""
descriptions = {}
current_row = None
with open(descriptions_file, 'r') as f:
with open(descriptions_file, "r") as f:
for line in f.readlines():
if current_row is None:
current_row = line.rstrip("\n")

32
src/toldg/write.py Normal file
View File

@@ -0,0 +1,32 @@
from pathlib import Path
from typing import List
from toldg.models import Config, Transaction
from toldg.utils import category_to_bean
BEANCOUNT_TRANSACTION_TEMPLATE = """
{t.date} * "{t.description}"
{t.account2:<40} {t.debit:<6} {t.currency}
{t.account1:<40} {t.credit:<6} {t.currency}
"""
def format(t):
t.date = t.date.replace("/", "-")
t.description = t.description.replace('"', '\\"')
if not t.debit.startswith("-"):
t.debit = " " + t.debit
if not t.credit.startswith("-"):
t.credit = " " + t.credit
t.account1 = category_to_bean(t.account1)
t.account2 = category_to_bean(t.account2)
if t.currency == "EUR":
t.debit = t.debit.replace(".", "|").replace(",", ".").replace("|", ",")
t.credit = t.credit.replace(".", "|").replace(",", ".").replace("|", ",")
return BEANCOUNT_TRANSACTION_TEMPLATE.format(t=t)
def render_to_file(transactions: List[Transaction], config: Config):
content = "".join(format(t) for t in transactions)
with open(config.output_file, "a") as f:
f.write(content)

View File

@@ -1,17 +0,0 @@
from pathlib import Path
from typing import List
from src.models import Transaction, Config
LEDGER_TRANSACTION_TEMPLATE = """
{t.date} {t.description} ; {t.row}
{t.account2} {t.currency} {t.debit}
{t.account1} {t.currency} {t.credit}
"""
def render_to_file(transactions: List[Transaction], config: Config):
content = "".join([LEDGER_TRANSACTION_TEMPLATE.format(t=t)
for t in transactions])
with open(config.output_file, 'a') as f:
f.write(content)