Compare commits

...

2 Commits

Author SHA1 Message Date
5d40838368 Add count to specify how often a mapping is used. 2025-03-02 13:44:43 -05:00
078bf07d0f Implement new mapping format 2025-03-02 13:32:08 -05:00
5 changed files with 74 additions and 85 deletions

View File

@@ -4,17 +4,19 @@ Script to transform CSV files into [beancount](https://beancount.github.io/docs/
## Usage
To transform CSV data into Beancount run `toldg` via `python-poetry`.
To transform CSV data into Beancount first install the package via
`poetry install`.
You can then run `toldg` from the directory where your configuration
file and your ledger data is located.
```bash
poetry -P ${LEDGER_DATA_ROOT} run toldg
poetry -P ledgerai run toldg
```
To visualize the data with [fava](https://beancount.github.io/fava/) install all
dependencies via `python-poetry`, enable the venv and run `fava` from there.
To visualize the data with [fava](https://beancount.github.io/fava/), enable the venv and run `fava` from there.
```bash
poetry install
eval "$(poetry env activate)"
fava your_ledger.beancount
```

View File

@@ -40,8 +40,6 @@ class Config(BaseModel):
categories (List[str]): A list of account2s. An account has to be defined here
before it can be used in a mapping. Otherwise, ledger will complain.
commodities (List[str]): A list of commodities relevant to the data processing.
find_duplicates (bool): Flag to check and abort on duplicated transactions. Not
really useful.
"""
class Config:
@@ -49,18 +47,13 @@ class Config(BaseModel):
input_directory: Path
mappings_file: Path
descriptions_file: Optional[Path] = None
output_file: Path = Path("output.ldg")
csv_configs: List[CsvConfig]
categories: List[str]
commodities: List[str]
find_duplicates: bool = False
class Transaction(BaseModel):
"""
Class for ledger transaction to render into ldg file.
"""
"""Class for ledger transaction to render into ldg file."""
class Config:
extra = "forbid"
@@ -74,3 +67,17 @@ class Transaction(BaseModel):
description: str
csv_file: str
row: str
narration: Optional[str] = None
payee: Optional[str] = None
class Mapping(BaseModel):
"""Class for transaction mapping from mappings file."""
class Config:
extra = "forbid"
account2: str
count: int = 1
narration: Optional[str] = None
payee: Optional[str] = None

View File

@@ -3,13 +3,13 @@ import datetime
import logging
import re
import sys
from typing import Dict, List
from typing import Any, Dict, List
import toldg.models
import toldg.predict
import toldg.utils
import toldg.write
from toldg.models import Config, CsvConfig, Transaction
from toldg.models import Config, CsvConfig, Mapping, Transaction
def process_ldg_files(config: Config):
@@ -64,38 +64,28 @@ def get_transactions(csv_file: str, config: CsvConfig) -> List[Transaction]:
return transactions
def find_duplicates(transactions: List[Transaction]):
rows = set()
for t in transactions:
row = t.row
if row in rows:
logging.critical(f"'{row}' is duplicated.")
logging.critical("Exit because of duplicated transactions.")
sys.exit(1)
else:
rows.add(row)
def apply_mappings(transactions: List[Transaction], mappings: Dict[str, str]):
unused_mappings = set(mappings.keys())
def apply_mappings(transactions: List[Transaction], mappings: Dict[str, Mapping]):
"""Apply mappings to transactions."""
for t in transactions:
if t.row in mappings:
t.account2 = mappings[t.row]
unused_mappings.discard(t.row)
mapping = mappings[t.row]
assert isinstance(mapping, Mapping)
assert (
mapping.count > 0
), f"{mapping} used by {t} but count is not greater than '0'."
mapping.count -= 1
t.account2 = mapping.account2
if mapping.narration:
t.narration = mapping.narration
if mapping.payee:
t.payee = mapping.payee
else:
logging.warning(f"No mapping for '{t}'.")
for row in unused_mappings:
logging.warning(f"Unused mapping '{row}' -> {mappings[row]}.")
def apply_descriptions(transactions: List[Transaction], descriptions: Dict[str, str]):
unused_descriptions = set(descriptions.keys())
for t in transactions:
if t.row in descriptions:
t.description = descriptions[t.row]
unused_descriptions.discard(t.row)
for row in unused_descriptions:
logging.warning(f"Unused mapping '{row}' -> {descriptions[row]}.")
for mapping in mappings.values():
assert mapping.count == 0, f"{mapping} was not used as often as expected!"
def process_csv_files(config: Config):
@@ -106,16 +96,8 @@ def process_csv_files(config: Config):
csv_config = get_csv_config(csv_file, config.csv_configs)
transactions += get_transactions(csv_file, csv_config)
if config.find_duplicates:
find_duplicates(transactions)
if config.descriptions_file is not None:
descriptions = toldg.utils.read_descriptions(config.descriptions_file)
apply_descriptions(transactions, descriptions)
mappings = toldg.utils.read_mappings(config.mappings_file)
apply_mappings(transactions, mappings)
toldg.predict.add_account2(transactions, config.categories)
toldg.utils.write_mappings(transactions, config.mappings_file)
toldg.write.render_to_file(transactions, config)

View File

@@ -3,11 +3,11 @@ import logging
import os
import sys
from pathlib import Path
from typing import Dict, List
from typing import Any, Dict, List, Optional
from pydantic import ValidationError
from toldg.models import Config, Transaction
from toldg.models import Config, Mapping, Transaction
def get_files(directory: Path, ending="") -> List[Path]:
@@ -64,46 +64,33 @@ def write_meta(config: Config):
f.write("\n")
f.write('option "operating_currency" "USD"\n\n')
# Commodity section is not required for beancount
# for commodity in config.commodities:
# f.write(f"commodity {commodity}\n")
# f.write("\n")
def write_mappings(transactions: List[Transaction], mappings_file: Path):
"""Write transactions to the mappings file."""
mappings = {}
for t in transactions:
try:
mappings[t.account2.strip()].append(t.row)
except KeyError:
mappings[t.account2.strip()] = [t.row]
mapping = Mapping(
**{
"account2": t.account2.strip(),
}
)
if t.narration:
mapping.narration = t.narration
if t.payee:
mapping.payee = t.payee
mappings[t.row] = mapping.dict()
with open(mappings_file, "w") as f:
json.dump({k: sorted(v) for k, v in sorted(mappings.items())}, f, indent=4)
json.dump(mappings, f, indent=4)
def read_mappings(mappings_file: Path) -> Dict[str, str]:
def read_mappings(mappings_file: Path) -> Dict[str, Mapping]:
"""Read mappings from file."""
with open(mappings_file, "r") as f:
account2_to_rows = json.load(f)
return {
row: category for category, rows in account2_to_rows.items() for row in rows
}
def read_descriptions(descriptions_file: Path) -> Dict[str, str]:
"""I am basic so the description file is currently a double row based
format where the first row matches the CSV row and the second one is the
description."""
descriptions = {}
current_row = None
with open(descriptions_file, "r") as f:
for line in f.readlines():
if current_row is None:
current_row = line.rstrip("\n")
else:
descriptions[current_row] = line.rstrip("\n")
current_row = None
return descriptions
data = json.load(f)
for key, value in data.items():
data[key] = Mapping(**value)
return data
def remove_if_exists(output_file: Path):

View File

@@ -5,7 +5,7 @@ from toldg.models import Config, Transaction
from toldg.utils import category_to_bean
BEANCOUNT_TRANSACTION_TEMPLATE = """
{t.date} * "{t.description}"
{t.date} * {t.description}
{t.account2:<40} {t.debit:<6} {t.currency}
{t.account1:<40} {t.credit:<6} {t.currency}
"""
@@ -13,7 +13,18 @@ BEANCOUNT_TRANSACTION_TEMPLATE = """
def format(t):
t.date = t.date.replace("/", "-")
t.description = t.description.replace('"', '\\"')
if t.narration and t.payee:
# A transaction may have an optional “payee” and/or a “narration.”
t.description = f'"{t.payee}" "{t.narration}"'
elif t.narration:
# If you place a single string on a transaction line, it becomes its narration:
t.description = f'"{t.narration}"'
elif t.payee:
# If you want to set just a payee, put an empty narration string:
t.description = f'"{t.payee}" ""'
else:
t.description = f'"{t.description}"'
if not t.debit.startswith("-"):
t.debit = " " + t.debit
if not t.credit.startswith("-"):