From 078bf07d0f60b34677d7dd49d00140826353d5b3 Mon Sep 17 00:00:00 2001 From: Felix Martin Date: Sun, 2 Mar 2025 13:32:08 -0500 Subject: [PATCH] Implement new mapping format --- README.md | 12 +++++----- src/toldg/models.py | 19 +++++++++++----- src/toldg/process.py | 42 ++++++++++++++++++----------------- src/toldg/utils.py | 53 +++++++++++++++++--------------------------- src/toldg/write.py | 15 +++++++++++-- 5 files changed, 76 insertions(+), 65 deletions(-) diff --git a/README.md b/README.md index 4634a58..7ea8fe3 100644 --- a/README.md +++ b/README.md @@ -4,17 +4,19 @@ Script to transform CSV files into [beancount](https://beancount.github.io/docs/ ## Usage -To transform CSV data into Beancount run `toldg` via `python-poetry`. +To transform CSV data into Beancount first install the package via +`poetry install`. + +You can then run `toldg` from the directory where your configuration +file and your ledger data is located. ```bash -poetry -P ${LEDGER_DATA_ROOT} run toldg +poetry -P ledgerai run toldg ``` -To visualize the data with [fava](https://beancount.github.io/fava/) install all -dependencies via `python-poetry`, enable the venv and run `fava` from there. +To visualize the data with [fava](https://beancount.github.io/fava/), enable the venv and run `fava` from there. ```bash -poetry install eval "$(poetry env activate)" fava your_ledger.beancount ``` diff --git a/src/toldg/models.py b/src/toldg/models.py index bed50e0..5a96df6 100644 --- a/src/toldg/models.py +++ b/src/toldg/models.py @@ -49,18 +49,14 @@ class Config(BaseModel): input_directory: Path mappings_file: Path - descriptions_file: Optional[Path] = None output_file: Path = Path("output.ldg") csv_configs: List[CsvConfig] categories: List[str] - commodities: List[str] find_duplicates: bool = False class Transaction(BaseModel): - """ - Class for ledger transaction to render into ldg file. - """ + """Class for ledger transaction to render into ldg file.""" class Config: extra = "forbid" @@ -74,3 +70,16 @@ class Transaction(BaseModel): description: str csv_file: str row: str + narration: Optional[str] = None + payee: Optional[str] = None + + +class Mapping(BaseModel): + """Class for transaction mapping from mappings file.""" + + class Config: + extra = "forbid" + + account2: str + narration: Optional[str] = None + payee: Optional[str] = None diff --git a/src/toldg/process.py b/src/toldg/process.py index 2571577..f9443b4 100644 --- a/src/toldg/process.py +++ b/src/toldg/process.py @@ -3,13 +3,13 @@ import datetime import logging import re import sys -from typing import Dict, List +from typing import Any, Dict, List import toldg.models import toldg.predict import toldg.utils import toldg.write -from toldg.models import Config, CsvConfig, Transaction +from toldg.models import Config, CsvConfig, Mapping, Transaction def process_ldg_files(config: Config): @@ -76,26 +76,33 @@ def find_duplicates(transactions: List[Transaction]): rows.add(row) -def apply_mappings(transactions: List[Transaction], mappings: Dict[str, str]): +def apply_mappings(transactions: List[Transaction], mappings: Dict[str, Mapping]): + """Apply mappings to transactions.""" unused_mappings = set(mappings.keys()) + for t in transactions: if t.row in mappings: - t.account2 = mappings[t.row] + mapping = mappings[t.row] + + assert isinstance( + mapping, Mapping + ), "Only new mappings format is supported." + t.account2 = mapping.account2 + + if mapping.narration: + t.narration = mapping.narration + + if mapping.payee: + t.payee = mapping.payee + unused_mappings.discard(t.row) else: logging.warning(f"No mapping for '{t}'.") + for row in unused_mappings: - logging.warning(f"Unused mapping '{row}' -> {mappings[row]}.") - - -def apply_descriptions(transactions: List[Transaction], descriptions: Dict[str, str]): - unused_descriptions = set(descriptions.keys()) - for t in transactions: - if t.row in descriptions: - t.description = descriptions[t.row] - unused_descriptions.discard(t.row) - for row in unused_descriptions: - logging.warning(f"Unused mapping '{row}' -> {descriptions[row]}.") + mapping_info = mappings[row] + account2 = mapping_info["account2"] + logging.warning(f"Unused mapping '{row}' -> {account2}") def process_csv_files(config: Config): @@ -109,13 +116,8 @@ def process_csv_files(config: Config): if config.find_duplicates: find_duplicates(transactions) - if config.descriptions_file is not None: - descriptions = toldg.utils.read_descriptions(config.descriptions_file) - apply_descriptions(transactions, descriptions) - mappings = toldg.utils.read_mappings(config.mappings_file) apply_mappings(transactions, mappings) - toldg.predict.add_account2(transactions, config.categories) toldg.utils.write_mappings(transactions, config.mappings_file) toldg.write.render_to_file(transactions, config) diff --git a/src/toldg/utils.py b/src/toldg/utils.py index 93aa8df..034964c 100644 --- a/src/toldg/utils.py +++ b/src/toldg/utils.py @@ -3,11 +3,11 @@ import logging import os import sys from pathlib import Path -from typing import Dict, List +from typing import Any, Dict, List, Optional from pydantic import ValidationError -from toldg.models import Config, Transaction +from toldg.models import Config, Mapping, Transaction def get_files(directory: Path, ending="") -> List[Path]: @@ -64,46 +64,33 @@ def write_meta(config: Config): f.write("\n") f.write('option "operating_currency" "USD"\n\n') - # Commodity section is not required for beancount - # for commodity in config.commodities: - # f.write(f"commodity {commodity}\n") - # f.write("\n") - def write_mappings(transactions: List[Transaction], mappings_file: Path): + """Write transactions to the mappings file.""" mappings = {} for t in transactions: - try: - mappings[t.account2.strip()].append(t.row) - except KeyError: - mappings[t.account2.strip()] = [t.row] + mapping = Mapping( + **{ + "account2": t.account2.strip(), + } + ) + if t.narration: + mapping.narration = t.narration + if t.payee: + mapping.payee = t.payee + mappings[t.row] = mapping.dict() with open(mappings_file, "w") as f: - json.dump({k: sorted(v) for k, v in sorted(mappings.items())}, f, indent=4) + json.dump(mappings, f, indent=4) -def read_mappings(mappings_file: Path) -> Dict[str, str]: +def read_mappings(mappings_file: Path) -> Dict[str, Mapping]: + """Read mappings from file.""" with open(mappings_file, "r") as f: - account2_to_rows = json.load(f) - return { - row: category for category, rows in account2_to_rows.items() for row in rows - } - - -def read_descriptions(descriptions_file: Path) -> Dict[str, str]: - """I am basic so the description file is currently a double row based - format where the first row matches the CSV row and the second one is the - description.""" - descriptions = {} - current_row = None - with open(descriptions_file, "r") as f: - for line in f.readlines(): - if current_row is None: - current_row = line.rstrip("\n") - else: - descriptions[current_row] = line.rstrip("\n") - current_row = None - return descriptions + data = json.load(f) + for key, value in data.items(): + data[key] = Mapping(**value) + return data def remove_if_exists(output_file: Path): diff --git a/src/toldg/write.py b/src/toldg/write.py index 7696265..e76e446 100644 --- a/src/toldg/write.py +++ b/src/toldg/write.py @@ -5,7 +5,7 @@ from toldg.models import Config, Transaction from toldg.utils import category_to_bean BEANCOUNT_TRANSACTION_TEMPLATE = """ -{t.date} * "{t.description}" +{t.date} * {t.description} {t.account2:<40} {t.debit:<6} {t.currency} {t.account1:<40} {t.credit:<6} {t.currency} """ @@ -13,7 +13,18 @@ BEANCOUNT_TRANSACTION_TEMPLATE = """ def format(t): t.date = t.date.replace("/", "-") - t.description = t.description.replace('"', '\\"') + if t.narration and t.payee: + # A transaction may have an optional “payee” and/or a “narration.” + t.description = f'"{t.payee}" "{t.narration}"' + elif t.narration: + # If you place a single string on a transaction line, it becomes its narration: + t.description = f'"{t.narration}"' + elif t.payee: + # If you want to set just a payee, put an empty narration string: + t.description = f'"{t.payee}" ""' + else: + t.description = f'"{t.description}"' + if not t.debit.startswith("-"): t.debit = " " + t.debit if not t.credit.startswith("-"):