Files
ledgerai/src/toldg/process.py

124 lines
4.0 KiB
Python

import csv
import datetime
import logging
import re
import sys
from typing import Any, Dict, List
import toldg.models
import toldg.predict
import toldg.utils
import toldg.write
from toldg.models import Config, CsvConfig, Mapping, Transaction
def process_ldg_files(config: Config):
for ldg_file in toldg.utils.get_ldg_files(config.input_directory):
with open(ldg_file, "r") as f_in:
with open(config.output_file, "a") as f_out:
f_out.write(f_in.read())
def get_csv_config(csv_file: str, csv_configs: List[CsvConfig]) -> CsvConfig:
cs = [c for c in csv_configs if re.match(c.file_match_regex, csv_file)]
if not cs:
logging.critical(f"No CSV config for {csv_file}.")
sys.exit(1)
elif len(cs) > 1:
logging.critical(f"Multiple CSV configs for {csv_file}.")
sys.exit(1)
return cs[0]
def get_transactions(csv_file: str, config: CsvConfig) -> List[Transaction]:
def date_to_date(date: str) -> str:
d = datetime.datetime.strptime(date, config.input_date_format)
return d.strftime(config.output_date_format)
def flip_sign(amount: str) -> str:
return amount[1:] if amount.startswith("-") else "-" + amount
def row_to_transaction(row, fields):
"""The user can configure the mapping of CSV fields to the three
required fields date, amount and description via the CsvConfig."""
t = {field: row[index] for index, field in fields}
amount = t["amount"]
return Transaction(
currency=config.currency,
debit=flip_sign(amount),
credit=amount,
date=date_to_date(t["date"]),
account1=config.account1,
account2=toldg.models.UNKNOWN_CATEGORY,
description=t["description"],
csv_file=csv_file,
row=csv_file + ", " + ", ".join(row),
)
fields = [(i, f) for i, f in enumerate(config.fields) if f]
with open(csv_file, "r") as f:
reader = csv.reader(f, delimiter=config.delimiter, quotechar=config.quotechar)
for _ in range(config.skip):
next(reader)
transactions = [row_to_transaction(row, fields) for row in reader if row]
return transactions
def find_duplicates(transactions: List[Transaction]):
rows = set()
for t in transactions:
row = t.row
if row in rows:
logging.critical(f"'{row}' is duplicated.")
logging.critical("Exit because of duplicated transactions.")
sys.exit(1)
else:
rows.add(row)
def apply_mappings(transactions: List[Transaction], mappings: Dict[str, Mapping]):
"""Apply mappings to transactions."""
unused_mappings = set(mappings.keys())
for t in transactions:
if t.row in mappings:
mapping = mappings[t.row]
assert isinstance(
mapping, Mapping
), "Only new mappings format is supported."
t.account2 = mapping.account2
if mapping.narration:
t.narration = mapping.narration
if mapping.payee:
t.payee = mapping.payee
unused_mappings.discard(t.row)
else:
logging.warning(f"No mapping for '{t}'.")
for row in unused_mappings:
mapping_info = mappings[row]
account2 = mapping_info["account2"]
logging.warning(f"Unused mapping '{row}' -> {account2}")
def process_csv_files(config: Config):
csv_files = toldg.utils.get_csv_files(config.input_directory)
transactions = []
for csv_file in csv_files:
csv_file = str(csv_file)
csv_config = get_csv_config(csv_file, config.csv_configs)
transactions += get_transactions(csv_file, csv_config)
if config.find_duplicates:
find_duplicates(transactions)
mappings = toldg.utils.read_mappings(config.mappings_file)
apply_mappings(transactions, mappings)
toldg.predict.add_account2(transactions, config.categories)
toldg.utils.write_mappings(transactions, config.mappings_file)
toldg.write.render_to_file(transactions, config)