Update project structure and move to beancount

2025-03-02 11:08:33 -05:00
parent 886bcdbdd1
commit 08c50e776e
17 changed files with 1844 additions and 296 deletions
--- a/src/init.py
+++ b/src/init.py
@@ -1,4 +0,0 @@
-
-
-def hello():
-    print("Hello, seaman!")
--- a/src/toldg/init.py
+++ b/src/toldg/init.py
@@ -0,0 +1 @@
+__version__ = "0.1.0"
--- a/src/toldg/main.py
+++ b/src/toldg/main.py
@@ -0,0 +1,28 @@
+import logging
+
+from rich.logging import RichHandler
+
+from toldg.process import process_csv_files, process_ldg_files
+from toldg.utils import load_config, remove_if_exists, write_meta
+
+
+def init_logging():
+    logging.basicConfig(
+        level=logging.INFO,
+        format="%(message)s",
+        datefmt="[%X]",
+        handlers=[RichHandler()],
+    )
+
+
+def main():
+    init_logging()
+    config = load_config()
+    remove_if_exists(config.output_file)
+    write_meta(config)
+    process_ldg_files(config)
+    process_csv_files(config)
+
+
+if __name__ == "__main__":
+    main()
--- a/src/toldg/fzf.py
+++ b/src/toldg/fzf.py
@@ -2,19 +2,20 @@ import errno
 import subprocess
 import sys

-
-EXECUTABLE_NAME = 'fzf.exe' if sys.platform == 'win32' else 'fzf'
+EXECUTABLE_NAME = "fzf.exe" if sys.platform == "win32" else "fzf"


-def iterfzf(iterable, prompt='> '):
-    cmd = [EXECUTABLE_NAME, '--prompt=' + prompt]
+def iterfzf(iterable, prompt="> "):
+    cmd = [EXECUTABLE_NAME, "--prompt=" + prompt]
    encoding = sys.getdefaultencoding()
-    proc = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=None)
+    proc = subprocess.Popen(
+        cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=None
+    )
    if proc.stdin is None:
        return None
    try:
        lines = "\n".join(iterable)
-        proc.stdin.write(lines.encode('utf-8'))
+        proc.stdin.write(lines.encode("utf-8"))
        proc.stdin.close()
    except IOError as e:
        if e.errno != errno.EPIPE and errno.EPIPE != 32:
@@ -24,7 +25,7 @@ def iterfzf(iterable, prompt='> '):
    if proc.stdout is None:
        return None
    decode = lambda t: t.decode(encoding)
-    output = [decode(ln.strip(b'\r\n\0')) for ln in iter(proc.stdout.readline, b'')]
+    output = [decode(ln.strip(b"\r\n\0")) for ln in iter(proc.stdout.readline, b"")]
    try:
        return output[0]
    except IndexError:
--- a/src/toldg/models.py
+++ b/src/toldg/models.py
@@ -1,10 +1,9 @@
-from pydantic import BaseModel
-from typing import List
-from typing import Optional
 from pathlib import Path
+from typing import List, Optional

+from pydantic import BaseModel

-UNKNOWN_CATEGORY = 'account2'
+UNKNOWN_CATEGORY = "account2"


 class CsvConfig(BaseModel):
@@ -13,8 +12,9 @@ class CsvConfig(BaseModel):
    file_match_regex attribute to decide whether to apply a config for a file.
    If multiple configs match a single file we raise an exception.
    """
+
    class Config:
-        extra = 'forbid'
+        extra = "forbid"

    account1: str
    file_match_regex: str
@@ -23,8 +23,8 @@ class CsvConfig(BaseModel):
    output_date_format: str = "%Y/%m/%d"
    skip: int = 1
    delimiter: str = ","
-    quotechar: str = "\""
-    currency: str = "$"
+    quotechar: str = '"'
+    currency: str = "USD"


 class Config(BaseModel):
@@ -39,12 +39,13 @@ class Config(BaseModel):
                                 CSV files.
        categories (List[str]):  A list of account2s. An account has to be defined here
                                 before it can be used in a mapping. Otherwise, ledger will complain.
-        commodities (List[str]): A list of commodities relevant to the data processing. 
+        commodities (List[str]): A list of commodities relevant to the data processing.
        find_duplicates (bool):  Flag to check and abort on duplicated transactions. Not
                                 really useful.
    """
+
    class Config:
-        extra = 'forbid'
+        extra = "forbid"

    input_directory: Path
    mappings_file: Path
@@ -60,8 +61,9 @@ class Transaction(BaseModel):
    """
    Class for ledger transaction to render into ldg file.
    """
+
    class Config:
-        extra = 'forbid'
+        extra = "forbid"

    currency: str
    debit: str
--- a/src/toldg/predict.py
+++ b/src/toldg/predict.py
@@ -1,19 +1,23 @@
-from src.models import Transaction, UNKNOWN_CATEGORY
-from src.fzf import iterfzf
 from typing import List

+from toldg.fzf import iterfzf
+from toldg.models import UNKNOWN_CATEGORY, Transaction
+

 def get_sort_categories():
    def sort_categories(row: str, categories: List[str]):
        if learn is None:
            return
        _, _, probs = learn.predict(row)
-        cat_to_prob = dict(zip(learn.dls.vocab[1],probs.tolist()))
-        categories.sort(key=lambda c: cat_to_prob[c] if c in cat_to_prob else 0.0, reverse=True)
+        cat_to_prob = dict(zip(learn.dls.vocab[1], probs.tolist()))
+        categories.sort(
+            key=lambda c: cat_to_prob[c] if c in cat_to_prob else 0.0, reverse=True
+        )

    learn = None
    try:
        from fastai.text.all import load_learner
+
        learn = load_learner("export.pkl")
    except ModuleNotFoundError:
        user_input = input("No fastai module. Type yes to continue anyway.")
@@ -24,7 +28,9 @@ def get_sort_categories():


 def add_account2(transactions: List[Transaction], categories: List[str]):
-    unmapped_transactions = list(filter(lambda t: t.account2 == UNKNOWN_CATEGORY, transactions))
+    unmapped_transactions = list(
+        filter(lambda t: t.account2 == UNKNOWN_CATEGORY, transactions)
+    )
    if len(unmapped_transactions) == 0:
        return
    sort_categories = get_sort_categories()
--- a/src/toldg/process.py
+++ b/src/toldg/process.py
@@ -1,26 +1,26 @@
 import csv
+import datetime
 import logging
 import re
 import sys
-import datetime
-import src.utils
-import src.write
-import src.models
-import src.predict
-from src.models import Config, CsvConfig, Transaction
-from typing import List, Dict
+from typing import Dict, List
+
+import toldg.models
+import toldg.predict
+import toldg.utils
+import toldg.write
+from toldg.models import Config, CsvConfig, Transaction


 def process_ldg_files(config: Config):
-    for ldg_file in src.utils.get_ldg_files(config.input_directory):
-        with open(ldg_file, 'r') as f_in:
-            with open(config.output_file, 'a') as f_out:
+    for ldg_file in toldg.utils.get_ldg_files(config.input_directory):
+        with open(ldg_file, "r") as f_in:
+            with open(config.output_file, "a") as f_out:
                f_out.write(f_in.read())


 def get_csv_config(csv_file: str, csv_configs: List[CsvConfig]) -> CsvConfig:
-    cs = [c for c in csv_configs
-          if re.match(c.file_match_regex, csv_file)]
+    cs = [c for c in csv_configs if re.match(c.file_match_regex, csv_file)]
    if not cs:
        logging.critical(f"No CSV config for {csv_file}.")
        sys.exit(1)
@@ -39,29 +39,28 @@ def get_transactions(csv_file: str, config: CsvConfig) -> List[Transaction]:
        return amount[1:] if amount.startswith("-") else "-" + amount

    def row_to_transaction(row, fields):
-        """ The user can configure the mapping of CSV fields to the three
-        required fields date, amount and description via the CsvConfig. """
+        """The user can configure the mapping of CSV fields to the three
+        required fields date, amount and description via the CsvConfig."""
        t = {field: row[index] for index, field in fields}
-        amount = t['amount']
+        amount = t["amount"]
        return Transaction(
-                currency=config.currency,
-                debit=flip_sign(amount),
-                credit=amount,
-                date=date_to_date(t['date']),
-                account1=config.account1,
-                account2=src.models.UNKNOWN_CATEGORY,
-                description=t['description'],
-                csv_file=csv_file,
-                row=csv_file + ", " + ", ".join(row))
+            currency=config.currency,
+            debit=flip_sign(amount),
+            credit=amount,
+            date=date_to_date(t["date"]),
+            account1=config.account1,
+            account2=toldg.models.UNKNOWN_CATEGORY,
+            description=t["description"],
+            csv_file=csv_file,
+            row=csv_file + ", " + ", ".join(row),
+        )

    fields = [(i, f) for i, f in enumerate(config.fields) if f]
-    with open(csv_file, 'r') as f:
-        reader = csv.reader(f, delimiter=config.delimiter,
-                               quotechar=config.quotechar)
+    with open(csv_file, "r") as f:
+        reader = csv.reader(f, delimiter=config.delimiter, quotechar=config.quotechar)
        for _ in range(config.skip):
            next(reader)
-        transactions = [row_to_transaction(row, fields)
-                        for row in reader if row]
+        transactions = [row_to_transaction(row, fields) for row in reader if row]
    return transactions


@@ -100,7 +99,7 @@ def apply_descriptions(transactions: List[Transaction], descriptions: Dict[str,


 def process_csv_files(config: Config):
-    csv_files = src.utils.get_csv_files(config.input_directory)
+    csv_files = toldg.utils.get_csv_files(config.input_directory)
    transactions = []
    for csv_file in csv_files:
        csv_file = str(csv_file)
@@ -111,13 +110,12 @@ def process_csv_files(config: Config):
        find_duplicates(transactions)

    if config.descriptions_file is not None:
-        descriptions = src.utils.read_descriptions(config.descriptions_file)
+        descriptions = toldg.utils.read_descriptions(config.descriptions_file)
        apply_descriptions(transactions, descriptions)

-    mappings = src.utils.read_mappings(config.mappings_file)
+    mappings = toldg.utils.read_mappings(config.mappings_file)
    apply_mappings(transactions, mappings)

-    src.predict.add_account2(transactions, config.categories)
-    src.utils.write_mappings(transactions, config.mappings_file)
-    src.write.render_to_file(transactions, config)
-
+    toldg.predict.add_account2(transactions, config.categories)
+    toldg.utils.write_mappings(transactions, config.mappings_file)
+    toldg.write.render_to_file(transactions, config)
--- a/src/toldg/utils.py
+++ b/src/toldg/utils.py
@@ -1,20 +1,23 @@
+import json
 import logging
 import os
 import sys
-import logging
-import json
 from pathlib import Path
-from typing import List, Dict
-from src.models import Config, Transaction
+from typing import Dict, List
+
 from pydantic import ValidationError

+from toldg.models import Config, Transaction
+

 def get_files(directory: Path, ending="") -> List[Path]:
-    """ Gets files from directory recursively in lexigraphic order. """
-    return [Path(os.path.join(subdir, f))
-            for subdir, _, files in os.walk(directory)
-            for f in files
-            if f.endswith(ending)]
+    """Gets files from directory recursively in lexigraphic order."""
+    return [
+        Path(os.path.join(subdir, f))
+        for subdir, _, files in os.walk(directory)
+        for f in files
+        if f.endswith(ending)
+    ]


 def get_csv_files(directory: Path) -> List[Path]:
@@ -33,7 +36,7 @@ def load_config() -> Config:
        sys.exit(1)

    try:
-        with open(config_file, 'r') as f:
+        with open(config_file, "r") as f:
            config = Config(**json.load(f))
    except ValidationError as e:
        logging.critical(f"Could not validate {config_file}.")
@@ -45,15 +48,26 @@ def load_config() -> Config:
    return config


-def write_meta(config: Config):
-    with open(config.output_file, 'a') as f:
-        for category in config.categories:
-            f.write(f"account {category}\n")
-        f.write("\n")
+def category_to_bean(c: str) -> str:
+    sections = map(list, c.split(":"))
+    new_sections = []
+    for section in sections:
+        section[0] = section[0].upper()
+        new_sections.append("".join(section))
+    return ":".join(new_sections)

-        for commodity in config.commodities:
-            f.write(f"commodity {commodity}\n")
+
+def write_meta(config: Config):
+    with open(config.output_file, "a") as f:
+        for category in config.categories:
+            f.write(f"2017-01-01 open {category_to_bean(category)}\n")
        f.write("\n")
+        f.write('option "operating_currency" "USD"\n\n')
+
+        # Commodity section is not required for beancount
+        # for commodity in config.commodities:
+        #     f.write(f"commodity {commodity}\n")
+        # f.write("\n")


 def write_mappings(transactions: List[Transaction], mappings_file: Path):
@@ -69,20 +83,20 @@ def write_mappings(transactions: List[Transaction], mappings_file: Path):


 def read_mappings(mappings_file: Path) -> Dict[str, str]:
-    with open(mappings_file, 'r') as f:
+    with open(mappings_file, "r") as f:
        account2_to_rows = json.load(f)
-    return {row: category
-            for category, rows in account2_to_rows.items()
-            for row in rows}
+    return {
+        row: category for category, rows in account2_to_rows.items() for row in rows
+    }


 def read_descriptions(descriptions_file: Path) -> Dict[str, str]:
-    """ I am basic so the description file is currently a double row based
+    """I am basic so the description file is currently a double row based
    format where the first row matches the CSV row and the second one is the
-    description. """
+    description."""
    descriptions = {}
    current_row = None
-    with open(descriptions_file, 'r') as f:
+    with open(descriptions_file, "r") as f:
        for line in f.readlines():
            if current_row is None:
                current_row = line.rstrip("\n")
--- a/src/toldg/write.py
+++ b/src/toldg/write.py
@@ -0,0 +1,32 @@
+from pathlib import Path
+from typing import List
+
+from toldg.models import Config, Transaction
+from toldg.utils import category_to_bean
+
+BEANCOUNT_TRANSACTION_TEMPLATE = """
+{t.date} * "{t.description}"
+    {t.account2:<40}  {t.debit:<6} {t.currency}
+    {t.account1:<40}  {t.credit:<6} {t.currency}
+"""
+
+
+def format(t):
+    t.date = t.date.replace("/", "-")
+    t.description = t.description.replace('"', '\\"')
+    if not t.debit.startswith("-"):
+        t.debit = " " + t.debit
+    if not t.credit.startswith("-"):
+        t.credit = " " + t.credit
+    t.account1 = category_to_bean(t.account1)
+    t.account2 = category_to_bean(t.account2)
+    if t.currency == "EUR":
+        t.debit = t.debit.replace(".", "|").replace(",", ".").replace("|", ",")
+        t.credit = t.credit.replace(".", "|").replace(",", ".").replace("|", ",")
+    return BEANCOUNT_TRANSACTION_TEMPLATE.format(t=t)
+
+
+def render_to_file(transactions: List[Transaction], config: Config):
+    content = "".join(format(t) for t in transactions)
+    with open(config.output_file, "a") as f:
+        f.write(content)
--- a/src/write.py
+++ b/src/write.py
@@ -1,17 +0,0 @@
-from pathlib import Path
-from typing import List
-from src.models import Transaction, Config
-
-
-LEDGER_TRANSACTION_TEMPLATE = """
-{t.date} {t.description} ; {t.row}
-    {t.account2}  {t.currency} {t.debit}
-    {t.account1}  {t.currency} {t.credit}
-"""
-
-
-def render_to_file(transactions: List[Transaction], config: Config):
-    content = "".join([LEDGER_TRANSACTION_TEMPLATE.format(t=t)
-                       for t in transactions])
-    with open(config.output_file, 'a') as f:
-        f.write(content)