From 078bf07d0f60b34677d7dd49d00140826353d5b3 Mon Sep 17 00:00:00 2001
From: Felix Martin <mail@felixm.de>
Date: Sun, 2 Mar 2025 13:32:08 -0500
Subject: [PATCH] Implement new mapping format

---
 README.md            | 12 +++++-----
 src/toldg/models.py  | 19 +++++++++++-----
 src/toldg/process.py | 42 ++++++++++++++++++-----------------
 src/toldg/utils.py   | 53 +++++++++++++++++---------------------------
 src/toldg/write.py   | 15 +++++++++++--
 5 files changed, 76 insertions(+), 65 deletions(-)

diff --git a/README.md b/README.md
index 4634a58..7ea8fe3 100644
--- a/README.md
+++ b/README.md
@@ -4,17 +4,19 @@ Script to transform CSV files into [beancount](https://beancount.github.io/docs/
 
 ## Usage
 
-To transform CSV data into Beancount run `toldg` via `python-poetry`.
+To transform CSV data into Beancount first install the package via
+`poetry install`.
+
+You can then run `toldg` from the directory where your configuration
+file and your ledger data is located.
 
 ```bash
-poetry -P ${LEDGER_DATA_ROOT} run toldg
+poetry -P ledgerai run toldg
 ```
 
-To visualize the data with [fava](https://beancount.github.io/fava/) install all
-dependencies via `python-poetry`, enable the venv and run `fava` from there.
+To visualize the data with [fava](https://beancount.github.io/fava/), enable the venv and run `fava` from there.
 
 ```bash
-poetry install
 eval "$(poetry env activate)"
 fava your_ledger.beancount
 ```
diff --git a/src/toldg/models.py b/src/toldg/models.py
index bed50e0..5a96df6 100644
--- a/src/toldg/models.py
+++ b/src/toldg/models.py
@@ -49,18 +49,14 @@ class Config(BaseModel):
 
     input_directory: Path
     mappings_file: Path
-    descriptions_file: Optional[Path] = None
     output_file: Path = Path("output.ldg")
     csv_configs: List[CsvConfig]
     categories: List[str]
-    commodities: List[str]
     find_duplicates: bool = False
 
 
 class Transaction(BaseModel):
-    """
-    Class for ledger transaction to render into ldg file.
-    """
+    """Class for ledger transaction to render into ldg file."""
 
     class Config:
         extra = "forbid"
@@ -74,3 +70,16 @@ class Transaction(BaseModel):
     description: str
     csv_file: str
     row: str
+    narration: Optional[str] = None
+    payee: Optional[str] = None
+
+
+class Mapping(BaseModel):
+    """Class for transaction mapping from mappings file."""
+
+    class Config:
+        extra = "forbid"
+
+    account2: str
+    narration: Optional[str] = None
+    payee: Optional[str] = None
diff --git a/src/toldg/process.py b/src/toldg/process.py
index 2571577..f9443b4 100644
--- a/src/toldg/process.py
+++ b/src/toldg/process.py
@@ -3,13 +3,13 @@ import datetime
 import logging
 import re
 import sys
-from typing import Dict, List
+from typing import Any, Dict, List
 
 import toldg.models
 import toldg.predict
 import toldg.utils
 import toldg.write
-from toldg.models import Config, CsvConfig, Transaction
+from toldg.models import Config, CsvConfig, Mapping, Transaction
 
 
 def process_ldg_files(config: Config):
@@ -76,26 +76,33 @@ def find_duplicates(transactions: List[Transaction]):
             rows.add(row)
 
 
-def apply_mappings(transactions: List[Transaction], mappings: Dict[str, str]):
+def apply_mappings(transactions: List[Transaction], mappings: Dict[str, Mapping]):
+    """Apply mappings to transactions."""
     unused_mappings = set(mappings.keys())
+
     for t in transactions:
         if t.row in mappings:
-            t.account2 = mappings[t.row]
+            mapping = mappings[t.row]
+
+            assert isinstance(
+                mapping, Mapping
+            ), "Only new mappings format is supported."
+            t.account2 = mapping.account2
+
+            if mapping.narration:
+                t.narration = mapping.narration
+
+            if mapping.payee:
+                t.payee = mapping.payee
+
             unused_mappings.discard(t.row)
         else:
             logging.warning(f"No mapping for '{t}'.")
+
     for row in unused_mappings:
-        logging.warning(f"Unused mapping '{row}' -> {mappings[row]}.")
-
-
-def apply_descriptions(transactions: List[Transaction], descriptions: Dict[str, str]):
-    unused_descriptions = set(descriptions.keys())
-    for t in transactions:
-        if t.row in descriptions:
-            t.description = descriptions[t.row]
-            unused_descriptions.discard(t.row)
-    for row in unused_descriptions:
-        logging.warning(f"Unused mapping '{row}' -> {descriptions[row]}.")
+        mapping_info = mappings[row]
+        account2 = mapping_info["account2"]
+        logging.warning(f"Unused mapping '{row}' -> {account2}")
 
 
 def process_csv_files(config: Config):
@@ -109,13 +116,8 @@ def process_csv_files(config: Config):
     if config.find_duplicates:
         find_duplicates(transactions)
 
-    if config.descriptions_file is not None:
-        descriptions = toldg.utils.read_descriptions(config.descriptions_file)
-        apply_descriptions(transactions, descriptions)
-
     mappings = toldg.utils.read_mappings(config.mappings_file)
     apply_mappings(transactions, mappings)
-
     toldg.predict.add_account2(transactions, config.categories)
     toldg.utils.write_mappings(transactions, config.mappings_file)
     toldg.write.render_to_file(transactions, config)
diff --git a/src/toldg/utils.py b/src/toldg/utils.py
index 93aa8df..034964c 100644
--- a/src/toldg/utils.py
+++ b/src/toldg/utils.py
@@ -3,11 +3,11 @@ import logging
 import os
 import sys
 from pathlib import Path
-from typing import Dict, List
+from typing import Any, Dict, List, Optional
 
 from pydantic import ValidationError
 
-from toldg.models import Config, Transaction
+from toldg.models import Config, Mapping, Transaction
 
 
 def get_files(directory: Path, ending="") -> List[Path]:
@@ -64,46 +64,33 @@ def write_meta(config: Config):
         f.write("\n")
         f.write('option "operating_currency" "USD"\n\n')
 
-        # Commodity section is not required for beancount
-        # for commodity in config.commodities:
-        #     f.write(f"commodity {commodity}\n")
-        # f.write("\n")
-
 
 def write_mappings(transactions: List[Transaction], mappings_file: Path):
+    """Write transactions to the mappings file."""
     mappings = {}
     for t in transactions:
-        try:
-            mappings[t.account2.strip()].append(t.row)
-        except KeyError:
-            mappings[t.account2.strip()] = [t.row]
+        mapping = Mapping(
+            **{
+                "account2": t.account2.strip(),
+            }
+        )
+        if t.narration:
+            mapping.narration = t.narration
+        if t.payee:
+            mapping.payee = t.payee
+        mappings[t.row] = mapping.dict()
 
     with open(mappings_file, "w") as f:
-        json.dump({k: sorted(v) for k, v in sorted(mappings.items())}, f, indent=4)
+        json.dump(mappings, f, indent=4)
 
 
-def read_mappings(mappings_file: Path) -> Dict[str, str]:
+def read_mappings(mappings_file: Path) -> Dict[str, Mapping]:
+    """Read mappings from file."""
     with open(mappings_file, "r") as f:
-        account2_to_rows = json.load(f)
-    return {
-        row: category for category, rows in account2_to_rows.items() for row in rows
-    }
-
-
-def read_descriptions(descriptions_file: Path) -> Dict[str, str]:
-    """I am basic so the description file is currently a double row based
-    format where the first row matches the CSV row and the second one is the
-    description."""
-    descriptions = {}
-    current_row = None
-    with open(descriptions_file, "r") as f:
-        for line in f.readlines():
-            if current_row is None:
-                current_row = line.rstrip("\n")
-            else:
-                descriptions[current_row] = line.rstrip("\n")
-                current_row = None
-    return descriptions
+        data = json.load(f)
+    for key, value in data.items():
+        data[key] = Mapping(**value)
+    return data
 
 
 def remove_if_exists(output_file: Path):
diff --git a/src/toldg/write.py b/src/toldg/write.py
index 7696265..e76e446 100644
--- a/src/toldg/write.py
+++ b/src/toldg/write.py
@@ -5,7 +5,7 @@ from toldg.models import Config, Transaction
 from toldg.utils import category_to_bean
 
 BEANCOUNT_TRANSACTION_TEMPLATE = """
-{t.date} * "{t.description}"
+{t.date} * {t.description}
     {t.account2:<40}  {t.debit:<6} {t.currency}
     {t.account1:<40}  {t.credit:<6} {t.currency}
 """
@@ -13,7 +13,18 @@ BEANCOUNT_TRANSACTION_TEMPLATE = """
 
 def format(t):
     t.date = t.date.replace("/", "-")
-    t.description = t.description.replace('"', '\\"')
+    if t.narration and t.payee:
+        # A transaction may have an optional “payee” and/or a “narration.”
+        t.description = f'"{t.payee}" "{t.narration}"'
+    elif t.narration:
+        # If you place a single string on a transaction line, it becomes its narration:
+        t.description = f'"{t.narration}"'
+    elif t.payee:
+        # If you want to set just a payee, put an empty narration string:
+        t.description = f'"{t.payee}" ""'
+    else:
+        t.description = f'"{t.description}"'
+
     if not t.debit.startswith("-"):
         t.debit = " " + t.debit
     if not t.credit.startswith("-"):