Add count to specify how often a mapping is used.

Implement new mapping format
2025-03-02 13:44:43 -05:00 · 2025-03-02 13:32:08 -05:00
5 changed files with 74 additions and 85 deletions
@@ -4,17 +4,19 @@ Script to transform CSV files into [beancount](https://beancount.github.io/docs/

 ## Usage

-To transform CSV data into Beancount run `toldg` via `python-poetry`.
+To transform CSV data into Beancount first install the package via
+`poetry install`.
+
+You can then run `toldg` from the directory where your configuration
+file and your ledger data is located.

 ```bash
-poetry -P ${LEDGER_DATA_ROOT} run toldg
+poetry -P ledgerai run toldg
 ```

-To visualize the data with [fava](https://beancount.github.io/fava/) install all
-dependencies via `python-poetry`, enable the venv and run `fava` from there.
+To visualize the data with [fava](https://beancount.github.io/fava/), enable the venv and run `fava` from there.

 ```bash
-poetry install
 eval "$(poetry env activate)"
 fava your_ledger.beancount
 ```
@@ -40,8 +40,6 @@ class Config(BaseModel):
        categories (List[str]):  A list of account2s. An account has to be defined here
                                 before it can be used in a mapping. Otherwise, ledger will complain.
        commodities (List[str]): A list of commodities relevant to the data processing.
-        find_duplicates (bool):  Flag to check and abort on duplicated transactions. Not
-                                 really useful.
    """

    class Config:
@@ -49,18 +47,13 @@ class Config(BaseModel):

    input_directory: Path
    mappings_file: Path
-    descriptions_file: Optional[Path] = None
    output_file: Path = Path("output.ldg")
    csv_configs: List[CsvConfig]
    categories: List[str]
-    commodities: List[str]
-    find_duplicates: bool = False


 class Transaction(BaseModel):
-    """
-    Class for ledger transaction to render into ldg file.
-    """
+    """Class for ledger transaction to render into ldg file."""

    class Config:
        extra = "forbid"
@@ -74,3 +67,17 @@ class Transaction(BaseModel):
    description: str
    csv_file: str
    row: str
+    narration: Optional[str] = None
+    payee: Optional[str] = None
+
+
+class Mapping(BaseModel):
+    """Class for transaction mapping from mappings file."""
+
+    class Config:
+        extra = "forbid"
+
+    account2: str
+    count: int = 1
+    narration: Optional[str] = None
+    payee: Optional[str] = None
@@ -3,13 +3,13 @@ import datetime
 import logging
 import re
 import sys
-from typing import Dict, List
+from typing import Any, Dict, List

 import toldg.models
 import toldg.predict
 import toldg.utils
 import toldg.write
-from toldg.models import Config, CsvConfig, Transaction
+from toldg.models import Config, CsvConfig, Mapping, Transaction


 def process_ldg_files(config: Config):
@@ -64,38 +64,28 @@ def get_transactions(csv_file: str, config: CsvConfig) -> List[Transaction]:
    return transactions


-def find_duplicates(transactions: List[Transaction]):
-    rows = set()
-    for t in transactions:
-        row = t.row
-        if row in rows:
-            logging.critical(f"'{row}' is duplicated.")
-            logging.critical("Exit because of duplicated transactions.")
-            sys.exit(1)
-        else:
-            rows.add(row)
-
-
-def apply_mappings(transactions: List[Transaction], mappings: Dict[str, str]):
-    unused_mappings = set(mappings.keys())
+def apply_mappings(transactions: List[Transaction], mappings: Dict[str, Mapping]):
+    """Apply mappings to transactions."""
    for t in transactions:
        if t.row in mappings:
-            t.account2 = mappings[t.row]
-            unused_mappings.discard(t.row)
+            mapping = mappings[t.row]
+            assert isinstance(mapping, Mapping)
+            assert (
+                mapping.count > 0
+            ), f"{mapping} used by {t} but count is not greater than '0'."
+            mapping.count -= 1
+            t.account2 = mapping.account2
+
+            if mapping.narration:
+                t.narration = mapping.narration
+
+            if mapping.payee:
+                t.payee = mapping.payee
        else:
            logging.warning(f"No mapping for '{t}'.")
-    for row in unused_mappings:
-        logging.warning(f"Unused mapping '{row}' -> {mappings[row]}.")

-
-def apply_descriptions(transactions: List[Transaction], descriptions: Dict[str, str]):
-    unused_descriptions = set(descriptions.keys())
-    for t in transactions:
-        if t.row in descriptions:
-            t.description = descriptions[t.row]
-            unused_descriptions.discard(t.row)
-    for row in unused_descriptions:
-        logging.warning(f"Unused mapping '{row}' -> {descriptions[row]}.")
+    for mapping in mappings.values():
+        assert mapping.count == 0, f"{mapping} was not used as often as expected!"


 def process_csv_files(config: Config):
@@ -106,16 +96,8 @@ def process_csv_files(config: Config):
        csv_config = get_csv_config(csv_file, config.csv_configs)
        transactions += get_transactions(csv_file, csv_config)

-    if config.find_duplicates:
-        find_duplicates(transactions)
-
-    if config.descriptions_file is not None:
-        descriptions = toldg.utils.read_descriptions(config.descriptions_file)
-        apply_descriptions(transactions, descriptions)
-
    mappings = toldg.utils.read_mappings(config.mappings_file)
    apply_mappings(transactions, mappings)
-
    toldg.predict.add_account2(transactions, config.categories)
    toldg.utils.write_mappings(transactions, config.mappings_file)
    toldg.write.render_to_file(transactions, config)
@@ -3,11 +3,11 @@ import logging
 import os
 import sys
 from pathlib import Path
-from typing import Dict, List
+from typing import Any, Dict, List, Optional

 from pydantic import ValidationError

-from toldg.models import Config, Transaction
+from toldg.models import Config, Mapping, Transaction


 def get_files(directory: Path, ending="") -> List[Path]:
@@ -64,46 +64,33 @@ def write_meta(config: Config):
        f.write("\n")
        f.write('option "operating_currency" "USD"\n\n')

-        # Commodity section is not required for beancount
-        # for commodity in config.commodities:
-        #     f.write(f"commodity {commodity}\n")
-        # f.write("\n")
-

 def write_mappings(transactions: List[Transaction], mappings_file: Path):
+    """Write transactions to the mappings file."""
    mappings = {}
    for t in transactions:
-        try:
-            mappings[t.account2.strip()].append(t.row)
-        except KeyError:
-            mappings[t.account2.strip()] = [t.row]
+        mapping = Mapping(
+            **{
+                "account2": t.account2.strip(),
+            }
+        )
+        if t.narration:
+            mapping.narration = t.narration
+        if t.payee:
+            mapping.payee = t.payee
+        mappings[t.row] = mapping.dict()

    with open(mappings_file, "w") as f:
-        json.dump({k: sorted(v) for k, v in sorted(mappings.items())}, f, indent=4)
+        json.dump(mappings, f, indent=4)


-def read_mappings(mappings_file: Path) -> Dict[str, str]:
+def read_mappings(mappings_file: Path) -> Dict[str, Mapping]:
+    """Read mappings from file."""
    with open(mappings_file, "r") as f:
-        account2_to_rows = json.load(f)
-    return {
-        row: category for category, rows in account2_to_rows.items() for row in rows
-    }
-
-
-def read_descriptions(descriptions_file: Path) -> Dict[str, str]:
-    """I am basic so the description file is currently a double row based
-    format where the first row matches the CSV row and the second one is the
-    description."""
-    descriptions = {}
-    current_row = None
-    with open(descriptions_file, "r") as f:
-        for line in f.readlines():
-            if current_row is None:
-                current_row = line.rstrip("\n")
-            else:
-                descriptions[current_row] = line.rstrip("\n")
-                current_row = None
-    return descriptions
+        data = json.load(f)
+    for key, value in data.items():
+        data[key] = Mapping(**value)
+    return data


 def remove_if_exists(output_file: Path):
@@ -5,7 +5,7 @@ from toldg.models import Config, Transaction
 from toldg.utils import category_to_bean

 BEANCOUNT_TRANSACTION_TEMPLATE = """
-{t.date} * "{t.description}"
+{t.date} * {t.description}
    {t.account2:<40}  {t.debit:<6} {t.currency}
    {t.account1:<40}  {t.credit:<6} {t.currency}
 """
@@ -13,7 +13,18 @@ BEANCOUNT_TRANSACTION_TEMPLATE = """

 def format(t):
    t.date = t.date.replace("/", "-")
-    t.description = t.description.replace('"', '\\"')
+    if t.narration and t.payee:
+        # A transaction may have an optional “payee” and/or a “narration.”
+        t.description = f'"{t.payee}" "{t.narration}"'
+    elif t.narration:
+        # If you place a single string on a transaction line, it becomes its narration:
+        t.description = f'"{t.narration}"'
+    elif t.payee:
+        # If you want to set just a payee, put an empty narration string:
+        t.description = f'"{t.payee}" ""'
+    else:
+        t.description = f'"{t.description}"'
+
    if not t.debit.startswith("-"):
        t.debit = " " + t.debit
    if not t.credit.startswith("-"):
Author	SHA1	Message	Date
felixm	5d40838368	Add count to specify how often a mapping is used.	2025-03-02 13:44:43 -05:00
felixm	078bf07d0f	Implement new mapping format	2025-03-02 13:32:08 -05:00