Implement new mapping format

This commit is contained in:
2025-03-02 13:32:08 -05:00
parent 08c50e776e
commit 078bf07d0f
5 changed files with 76 additions and 65 deletions

View File

@@ -4,17 +4,19 @@ Script to transform CSV files into [beancount](https://beancount.github.io/docs/
## Usage ## Usage
To transform CSV data into Beancount run `toldg` via `python-poetry`. To transform CSV data into Beancount first install the package via
`poetry install`.
You can then run `toldg` from the directory where your configuration
file and your ledger data is located.
```bash ```bash
poetry -P ${LEDGER_DATA_ROOT} run toldg poetry -P ledgerai run toldg
``` ```
To visualize the data with [fava](https://beancount.github.io/fava/) install all To visualize the data with [fava](https://beancount.github.io/fava/), enable the venv and run `fava` from there.
dependencies via `python-poetry`, enable the venv and run `fava` from there.
```bash ```bash
poetry install
eval "$(poetry env activate)" eval "$(poetry env activate)"
fava your_ledger.beancount fava your_ledger.beancount
``` ```

View File

@@ -49,18 +49,14 @@ class Config(BaseModel):
input_directory: Path input_directory: Path
mappings_file: Path mappings_file: Path
descriptions_file: Optional[Path] = None
output_file: Path = Path("output.ldg") output_file: Path = Path("output.ldg")
csv_configs: List[CsvConfig] csv_configs: List[CsvConfig]
categories: List[str] categories: List[str]
commodities: List[str]
find_duplicates: bool = False find_duplicates: bool = False
class Transaction(BaseModel): class Transaction(BaseModel):
""" """Class for ledger transaction to render into ldg file."""
Class for ledger transaction to render into ldg file.
"""
class Config: class Config:
extra = "forbid" extra = "forbid"
@@ -74,3 +70,16 @@ class Transaction(BaseModel):
description: str description: str
csv_file: str csv_file: str
row: str row: str
narration: Optional[str] = None
payee: Optional[str] = None
class Mapping(BaseModel):
"""Class for transaction mapping from mappings file."""
class Config:
extra = "forbid"
account2: str
narration: Optional[str] = None
payee: Optional[str] = None

View File

@@ -3,13 +3,13 @@ import datetime
import logging import logging
import re import re
import sys import sys
from typing import Dict, List from typing import Any, Dict, List
import toldg.models import toldg.models
import toldg.predict import toldg.predict
import toldg.utils import toldg.utils
import toldg.write import toldg.write
from toldg.models import Config, CsvConfig, Transaction from toldg.models import Config, CsvConfig, Mapping, Transaction
def process_ldg_files(config: Config): def process_ldg_files(config: Config):
@@ -76,26 +76,33 @@ def find_duplicates(transactions: List[Transaction]):
rows.add(row) rows.add(row)
def apply_mappings(transactions: List[Transaction], mappings: Dict[str, str]): def apply_mappings(transactions: List[Transaction], mappings: Dict[str, Mapping]):
"""Apply mappings to transactions."""
unused_mappings = set(mappings.keys()) unused_mappings = set(mappings.keys())
for t in transactions: for t in transactions:
if t.row in mappings: if t.row in mappings:
t.account2 = mappings[t.row] mapping = mappings[t.row]
assert isinstance(
mapping, Mapping
), "Only new mappings format is supported."
t.account2 = mapping.account2
if mapping.narration:
t.narration = mapping.narration
if mapping.payee:
t.payee = mapping.payee
unused_mappings.discard(t.row) unused_mappings.discard(t.row)
else: else:
logging.warning(f"No mapping for '{t}'.") logging.warning(f"No mapping for '{t}'.")
for row in unused_mappings: for row in unused_mappings:
logging.warning(f"Unused mapping '{row}' -> {mappings[row]}.") mapping_info = mappings[row]
account2 = mapping_info["account2"]
logging.warning(f"Unused mapping '{row}' -> {account2}")
def apply_descriptions(transactions: List[Transaction], descriptions: Dict[str, str]):
unused_descriptions = set(descriptions.keys())
for t in transactions:
if t.row in descriptions:
t.description = descriptions[t.row]
unused_descriptions.discard(t.row)
for row in unused_descriptions:
logging.warning(f"Unused mapping '{row}' -> {descriptions[row]}.")
def process_csv_files(config: Config): def process_csv_files(config: Config):
@@ -109,13 +116,8 @@ def process_csv_files(config: Config):
if config.find_duplicates: if config.find_duplicates:
find_duplicates(transactions) find_duplicates(transactions)
if config.descriptions_file is not None:
descriptions = toldg.utils.read_descriptions(config.descriptions_file)
apply_descriptions(transactions, descriptions)
mappings = toldg.utils.read_mappings(config.mappings_file) mappings = toldg.utils.read_mappings(config.mappings_file)
apply_mappings(transactions, mappings) apply_mappings(transactions, mappings)
toldg.predict.add_account2(transactions, config.categories) toldg.predict.add_account2(transactions, config.categories)
toldg.utils.write_mappings(transactions, config.mappings_file) toldg.utils.write_mappings(transactions, config.mappings_file)
toldg.write.render_to_file(transactions, config) toldg.write.render_to_file(transactions, config)

View File

@@ -3,11 +3,11 @@ import logging
import os import os
import sys import sys
from pathlib import Path from pathlib import Path
from typing import Dict, List from typing import Any, Dict, List, Optional
from pydantic import ValidationError from pydantic import ValidationError
from toldg.models import Config, Transaction from toldg.models import Config, Mapping, Transaction
def get_files(directory: Path, ending="") -> List[Path]: def get_files(directory: Path, ending="") -> List[Path]:
@@ -64,46 +64,33 @@ def write_meta(config: Config):
f.write("\n") f.write("\n")
f.write('option "operating_currency" "USD"\n\n') f.write('option "operating_currency" "USD"\n\n')
# Commodity section is not required for beancount
# for commodity in config.commodities:
# f.write(f"commodity {commodity}\n")
# f.write("\n")
def write_mappings(transactions: List[Transaction], mappings_file: Path): def write_mappings(transactions: List[Transaction], mappings_file: Path):
"""Write transactions to the mappings file."""
mappings = {} mappings = {}
for t in transactions: for t in transactions:
try: mapping = Mapping(
mappings[t.account2.strip()].append(t.row) **{
except KeyError: "account2": t.account2.strip(),
mappings[t.account2.strip()] = [t.row] }
)
if t.narration:
mapping.narration = t.narration
if t.payee:
mapping.payee = t.payee
mappings[t.row] = mapping.dict()
with open(mappings_file, "w") as f: with open(mappings_file, "w") as f:
json.dump({k: sorted(v) for k, v in sorted(mappings.items())}, f, indent=4) json.dump(mappings, f, indent=4)
def read_mappings(mappings_file: Path) -> Dict[str, str]: def read_mappings(mappings_file: Path) -> Dict[str, Mapping]:
"""Read mappings from file."""
with open(mappings_file, "r") as f: with open(mappings_file, "r") as f:
account2_to_rows = json.load(f) data = json.load(f)
return { for key, value in data.items():
row: category for category, rows in account2_to_rows.items() for row in rows data[key] = Mapping(**value)
} return data
def read_descriptions(descriptions_file: Path) -> Dict[str, str]:
"""I am basic so the description file is currently a double row based
format where the first row matches the CSV row and the second one is the
description."""
descriptions = {}
current_row = None
with open(descriptions_file, "r") as f:
for line in f.readlines():
if current_row is None:
current_row = line.rstrip("\n")
else:
descriptions[current_row] = line.rstrip("\n")
current_row = None
return descriptions
def remove_if_exists(output_file: Path): def remove_if_exists(output_file: Path):

View File

@@ -5,7 +5,7 @@ from toldg.models import Config, Transaction
from toldg.utils import category_to_bean from toldg.utils import category_to_bean
BEANCOUNT_TRANSACTION_TEMPLATE = """ BEANCOUNT_TRANSACTION_TEMPLATE = """
{t.date} * "{t.description}" {t.date} * {t.description}
{t.account2:<40} {t.debit:<6} {t.currency} {t.account2:<40} {t.debit:<6} {t.currency}
{t.account1:<40} {t.credit:<6} {t.currency} {t.account1:<40} {t.credit:<6} {t.currency}
""" """
@@ -13,7 +13,18 @@ BEANCOUNT_TRANSACTION_TEMPLATE = """
def format(t): def format(t):
t.date = t.date.replace("/", "-") t.date = t.date.replace("/", "-")
t.description = t.description.replace('"', '\\"') if t.narration and t.payee:
# A transaction may have an optional “payee” and/or a “narration.”
t.description = f'"{t.payee}" "{t.narration}"'
elif t.narration:
# If you place a single string on a transaction line, it becomes its narration:
t.description = f'"{t.narration}"'
elif t.payee:
# If you want to set just a payee, put an empty narration string:
t.description = f'"{t.payee}" ""'
else:
t.description = f'"{t.description}"'
if not t.debit.startswith("-"): if not t.debit.startswith("-"):
t.debit = " " + t.debit t.debit = " " + t.debit
if not t.credit.startswith("-"): if not t.credit.startswith("-"):