Implement new mapping format

2025-03-02 13:32:08 -05:00
parent 08c50e776e
commit 078bf07d0f
5 changed files with 76 additions and 65 deletions
--- a/README.md
+++ b/README.md
@@ -4,17 +4,19 @@ Script to transform CSV files into [beancount](https://beancount.github.io/docs/
 ## Usage
-To transform CSV data into Beancount run `toldg` via `python-poetry`.
+To transform CSV data into Beancount first install the package via
 `poetry install`.
 You can then run `toldg` from the directory where your configuration
 file and your ledger data is located.
 ```bash
-poetry -P ${LEDGER_DATA_ROOT} run toldg
+poetry -P ledgerai run toldg
 ```
-To visualize the data with [fava](https://beancount.github.io/fava/) install all
+To visualize the data with [fava](https://beancount.github.io/fava/), enable the venv and run `fava` from there.
 dependencies via `python-poetry`, enable the venv and run `fava` from there.
 ```bash
 poetry install
 eval "$(poetry env activate)"
 fava your_ledger.beancount
 ```
--- a/src/toldg/models.py
+++ b/src/toldg/models.py
@@ -49,18 +49,14 @@ class Config(BaseModel):
    input_directory: Path
    mappings_file: Path
    descriptions_file: Optional[Path] = None
    output_file: Path = Path("output.ldg")
    csv_configs: List[CsvConfig]
    categories: List[str]
    commodities: List[str]
    find_duplicates: bool = False
 class Transaction(BaseModel):
-    """
+    """Class for ledger transaction to render into ldg file."""
    Class for ledger transaction to render into ldg file.
    """
    class Config:
        extra = "forbid"
@@ -74,3 +70,16 @@ class Transaction(BaseModel):
    description: str
    csv_file: str
    row: str
    narration: Optional[str] = None
    payee: Optional[str] = None
 class Mapping(BaseModel):
    """Class for transaction mapping from mappings file."""
    class Config:
        extra = "forbid"
    account2: str
    narration: Optional[str] = None
    payee: Optional[str] = None
--- a/src/toldg/process.py
+++ b/src/toldg/process.py
@@ -3,13 +3,13 @@ import datetime
 import logging
 import re
 import sys
-from typing import Dict, List
+from typing import Any, Dict, List
 import toldg.models
 import toldg.predict
 import toldg.utils
 import toldg.write
-from toldg.models import Config, CsvConfig, Transaction
+from toldg.models import Config, CsvConfig, Mapping, Transaction
 def process_ldg_files(config: Config):
@@ -76,26 +76,33 @@ def find_duplicates(transactions: List[Transaction]):
            rows.add(row)
-def apply_mappings(transactions: List[Transaction], mappings: Dict[str, str]):
+def apply_mappings(transactions: List[Transaction], mappings: Dict[str, Mapping]):
    """Apply mappings to transactions."""
    unused_mappings = set(mappings.keys())
    for t in transactions:
        if t.row in mappings:
-            t.account2 = mappings[t.row]
+            mapping = mappings[t.row]
            assert isinstance(
                mapping, Mapping
            ), "Only new mappings format is supported."
            t.account2 = mapping.account2
            if mapping.narration:
                t.narration = mapping.narration
            if mapping.payee:
                t.payee = mapping.payee
            unused_mappings.discard(t.row)
        else:
            logging.warning(f"No mapping for '{t}'.")
    for row in unused_mappings:
-        logging.warning(f"Unused mapping '{row}' -> {mappings[row]}.")
+        mapping_info = mappings[row]
-
+        account2 = mapping_info["account2"]
-
+        logging.warning(f"Unused mapping '{row}' -> {account2}")
 def apply_descriptions(transactions: List[Transaction], descriptions: Dict[str, str]):
    unused_descriptions = set(descriptions.keys())
    for t in transactions:
        if t.row in descriptions:
            t.description = descriptions[t.row]
            unused_descriptions.discard(t.row)
    for row in unused_descriptions:
        logging.warning(f"Unused mapping '{row}' -> {descriptions[row]}.")
 def process_csv_files(config: Config):
@@ -109,13 +116,8 @@ def process_csv_files(config: Config):
    if config.find_duplicates:
        find_duplicates(transactions)
    if config.descriptions_file is not None:
        descriptions = toldg.utils.read_descriptions(config.descriptions_file)
        apply_descriptions(transactions, descriptions)
    mappings = toldg.utils.read_mappings(config.mappings_file)
    apply_mappings(transactions, mappings)
    toldg.predict.add_account2(transactions, config.categories)
    toldg.utils.write_mappings(transactions, config.mappings_file)
    toldg.write.render_to_file(transactions, config)
--- a/src/toldg/utils.py
+++ b/src/toldg/utils.py
@@ -3,11 +3,11 @@ import logging
 import os
 import sys
 from pathlib import Path
-from typing import Dict, List
+from typing import Any, Dict, List, Optional
 from pydantic import ValidationError
-from toldg.models import Config, Transaction
+from toldg.models import Config, Mapping, Transaction
 def get_files(directory: Path, ending="") -> List[Path]:
@@ -64,46 +64,33 @@ def write_meta(config: Config):
        f.write("\n")
        f.write('option "operating_currency" "USD"\n\n')
        # Commodity section is not required for beancount
        # for commodity in config.commodities:
        #     f.write(f"commodity {commodity}\n")
        # f.write("\n")
 def write_mappings(transactions: List[Transaction], mappings_file: Path):
    """Write transactions to the mappings file."""
    mappings = {}
    for t in transactions:
-        try:
+        mapping = Mapping(
-            mappings[t.account2.strip()].append(t.row)
+            **{
-        except KeyError:
+                "account2": t.account2.strip(),
-            mappings[t.account2.strip()] = [t.row]
+            }
        )
        if t.narration:
            mapping.narration = t.narration
        if t.payee:
            mapping.payee = t.payee
        mappings[t.row] = mapping.dict()
    with open(mappings_file, "w") as f:
-        json.dump({k: sorted(v) for k, v in sorted(mappings.items())}, f, indent=4)
+        json.dump(mappings, f, indent=4)
-def read_mappings(mappings_file: Path) -> Dict[str, str]:
+def read_mappings(mappings_file: Path) -> Dict[str, Mapping]:
    """Read mappings from file."""
    with open(mappings_file, "r") as f:
-        account2_to_rows = json.load(f)
+        data = json.load(f)
-    return {
+    for key, value in data.items():
-        row: category for category, rows in account2_to_rows.items() for row in rows
+        data[key] = Mapping(**value)
-    }
+    return data
 def read_descriptions(descriptions_file: Path) -> Dict[str, str]:
    """I am basic so the description file is currently a double row based
    format where the first row matches the CSV row and the second one is the
    description."""
    descriptions = {}
    current_row = None
    with open(descriptions_file, "r") as f:
        for line in f.readlines():
            if current_row is None:
                current_row = line.rstrip("\n")
            else:
                descriptions[current_row] = line.rstrip("\n")
                current_row = None
    return descriptions
 def remove_if_exists(output_file: Path):
--- a/src/toldg/write.py
+++ b/src/toldg/write.py
@@ -5,7 +5,7 @@ from toldg.models import Config, Transaction
 from toldg.utils import category_to_bean
 BEANCOUNT_TRANSACTION_TEMPLATE = """
-{t.date} * "{t.description}"
+{t.date} * {t.description}
    {t.account2:<40}  {t.debit:<6} {t.currency}
    {t.account1:<40}  {t.credit:<6} {t.currency}
 """
@@ -13,7 +13,18 @@ BEANCOUNT_TRANSACTION_TEMPLATE = """
 def format(t):
    t.date = t.date.replace("/", "-")
-    t.description = t.description.replace('"', '\\"')
+    if t.narration and t.payee:
        # A transaction may have an optional “payee” and/or a “narration.”
        t.description = f'"{t.payee}" "{t.narration}"'
    elif t.narration:
        # If you place a single string on a transaction line, it becomes its narration:
        t.description = f'"{t.narration}"'
    elif t.payee:
        # If you want to set just a payee, put an empty narration string:
        t.description = f'"{t.payee}" ""'
    else:
        t.description = f'"{t.description}"'
    if not t.debit.startswith("-"):
        t.debit = " " + t.debit
    if not t.credit.startswith("-"):