Implement new mapping format

This commit is contained in:
2025-03-02 13:32:08 -05:00
parent 08c50e776e
commit 078bf07d0f
5 changed files with 76 additions and 65 deletions

View File

@@ -4,17 +4,19 @@ Script to transform CSV files into [beancount](https://beancount.github.io/docs/
## Usage
To transform CSV data into Beancount run `toldg` via `python-poetry`.
To transform CSV data into Beancount first install the package via
`poetry install`.
You can then run `toldg` from the directory where your configuration
file and your ledger data is located.
```bash
poetry -P ${LEDGER_DATA_ROOT} run toldg
poetry -P ledgerai run toldg
```
To visualize the data with [fava](https://beancount.github.io/fava/) install all
dependencies via `python-poetry`, enable the venv and run `fava` from there.
To visualize the data with [fava](https://beancount.github.io/fava/), enable the venv and run `fava` from there.
```bash
poetry install
eval "$(poetry env activate)"
fava your_ledger.beancount
```

View File

@@ -49,18 +49,14 @@ class Config(BaseModel):
input_directory: Path
mappings_file: Path
descriptions_file: Optional[Path] = None
output_file: Path = Path("output.ldg")
csv_configs: List[CsvConfig]
categories: List[str]
commodities: List[str]
find_duplicates: bool = False
class Transaction(BaseModel):
"""
Class for ledger transaction to render into ldg file.
"""
"""Class for ledger transaction to render into ldg file."""
class Config:
extra = "forbid"
@@ -74,3 +70,16 @@ class Transaction(BaseModel):
description: str
csv_file: str
row: str
narration: Optional[str] = None
payee: Optional[str] = None
class Mapping(BaseModel):
"""Class for transaction mapping from mappings file."""
class Config:
extra = "forbid"
account2: str
narration: Optional[str] = None
payee: Optional[str] = None

View File

@@ -3,13 +3,13 @@ import datetime
import logging
import re
import sys
from typing import Dict, List
from typing import Any, Dict, List
import toldg.models
import toldg.predict
import toldg.utils
import toldg.write
from toldg.models import Config, CsvConfig, Transaction
from toldg.models import Config, CsvConfig, Mapping, Transaction
def process_ldg_files(config: Config):
@@ -76,26 +76,33 @@ def find_duplicates(transactions: List[Transaction]):
rows.add(row)
def apply_mappings(transactions: List[Transaction], mappings: Dict[str, str]):
def apply_mappings(transactions: List[Transaction], mappings: Dict[str, Mapping]):
"""Apply mappings to transactions."""
unused_mappings = set(mappings.keys())
for t in transactions:
if t.row in mappings:
t.account2 = mappings[t.row]
mapping = mappings[t.row]
assert isinstance(
mapping, Mapping
), "Only new mappings format is supported."
t.account2 = mapping.account2
if mapping.narration:
t.narration = mapping.narration
if mapping.payee:
t.payee = mapping.payee
unused_mappings.discard(t.row)
else:
logging.warning(f"No mapping for '{t}'.")
for row in unused_mappings:
logging.warning(f"Unused mapping '{row}' -> {mappings[row]}.")
def apply_descriptions(transactions: List[Transaction], descriptions: Dict[str, str]):
unused_descriptions = set(descriptions.keys())
for t in transactions:
if t.row in descriptions:
t.description = descriptions[t.row]
unused_descriptions.discard(t.row)
for row in unused_descriptions:
logging.warning(f"Unused mapping '{row}' -> {descriptions[row]}.")
mapping_info = mappings[row]
account2 = mapping_info["account2"]
logging.warning(f"Unused mapping '{row}' -> {account2}")
def process_csv_files(config: Config):
@@ -109,13 +116,8 @@ def process_csv_files(config: Config):
if config.find_duplicates:
find_duplicates(transactions)
if config.descriptions_file is not None:
descriptions = toldg.utils.read_descriptions(config.descriptions_file)
apply_descriptions(transactions, descriptions)
mappings = toldg.utils.read_mappings(config.mappings_file)
apply_mappings(transactions, mappings)
toldg.predict.add_account2(transactions, config.categories)
toldg.utils.write_mappings(transactions, config.mappings_file)
toldg.write.render_to_file(transactions, config)

View File

@@ -3,11 +3,11 @@ import logging
import os
import sys
from pathlib import Path
from typing import Dict, List
from typing import Any, Dict, List, Optional
from pydantic import ValidationError
from toldg.models import Config, Transaction
from toldg.models import Config, Mapping, Transaction
def get_files(directory: Path, ending="") -> List[Path]:
@@ -64,46 +64,33 @@ def write_meta(config: Config):
f.write("\n")
f.write('option "operating_currency" "USD"\n\n')
# Commodity section is not required for beancount
# for commodity in config.commodities:
# f.write(f"commodity {commodity}\n")
# f.write("\n")
def write_mappings(transactions: List[Transaction], mappings_file: Path):
"""Write transactions to the mappings file."""
mappings = {}
for t in transactions:
try:
mappings[t.account2.strip()].append(t.row)
except KeyError:
mappings[t.account2.strip()] = [t.row]
mapping = Mapping(
**{
"account2": t.account2.strip(),
}
)
if t.narration:
mapping.narration = t.narration
if t.payee:
mapping.payee = t.payee
mappings[t.row] = mapping.dict()
with open(mappings_file, "w") as f:
json.dump({k: sorted(v) for k, v in sorted(mappings.items())}, f, indent=4)
json.dump(mappings, f, indent=4)
def read_mappings(mappings_file: Path) -> Dict[str, str]:
def read_mappings(mappings_file: Path) -> Dict[str, Mapping]:
"""Read mappings from file."""
with open(mappings_file, "r") as f:
account2_to_rows = json.load(f)
return {
row: category for category, rows in account2_to_rows.items() for row in rows
}
def read_descriptions(descriptions_file: Path) -> Dict[str, str]:
"""I am basic so the description file is currently a double row based
format where the first row matches the CSV row and the second one is the
description."""
descriptions = {}
current_row = None
with open(descriptions_file, "r") as f:
for line in f.readlines():
if current_row is None:
current_row = line.rstrip("\n")
else:
descriptions[current_row] = line.rstrip("\n")
current_row = None
return descriptions
data = json.load(f)
for key, value in data.items():
data[key] = Mapping(**value)
return data
def remove_if_exists(output_file: Path):

View File

@@ -5,7 +5,7 @@ from toldg.models import Config, Transaction
from toldg.utils import category_to_bean
BEANCOUNT_TRANSACTION_TEMPLATE = """
{t.date} * "{t.description}"
{t.date} * {t.description}
{t.account2:<40} {t.debit:<6} {t.currency}
{t.account1:<40} {t.credit:<6} {t.currency}
"""
@@ -13,7 +13,18 @@ BEANCOUNT_TRANSACTION_TEMPLATE = """
def format(t):
t.date = t.date.replace("/", "-")
t.description = t.description.replace('"', '\\"')
if t.narration and t.payee:
# A transaction may have an optional “payee” and/or a “narration.”
t.description = f'"{t.payee}" "{t.narration}"'
elif t.narration:
# If you place a single string on a transaction line, it becomes its narration:
t.description = f'"{t.narration}"'
elif t.payee:
# If you want to set just a payee, put an empty narration string:
t.description = f'"{t.payee}" ""'
else:
t.description = f'"{t.description}"'
if not t.debit.startswith("-"):
t.debit = " " + t.debit
if not t.credit.startswith("-"):