Add feature to add descriptions and clean up code.

This commit is contained in:
felixm 2024-04-20 09:07:58 -04:00
parent e29d08e0d7
commit 5cf06b2031
5 changed files with 58 additions and 33 deletions

View File

@ -17,14 +17,13 @@ The script takes a directory in which it recursively searches for CSV and LDG
files. From these files, it generates a single ledger accounting file that
includes all transactions.
For now, ledger files are simply appended to the output file without
modifications.
Ledger files are appended to the output file without modifications.
However, the transaction for the CSV files are extended with their *account2*
information, i.e, the category of the transaction. Optionally, these
transactions can also get a more meaningful description and tags.
The mapping information are stored in a file `mappings.json`. It maps a unique
identifier for each transaction (based on filename, line number) to the
respective *account2*, and (optinally) *tags* or *description.
identifier for each transaction (based on the filename and full CSV row) to a
respective *account2*.

View File

@ -1,7 +1,7 @@
from pydantic import BaseModel, Extra
from pydantic import BaseModel
from typing import List
from typing import Optional
from pathlib import Path
from typing import List
UNKNOWN_CATEGORY = 'account2'
@ -14,7 +14,7 @@ class CsvConfig(BaseModel):
If multiple configs match a single file we raise an exception.
"""
class Config:
extra = Extra.forbid
extra = 'forbid'
account1: str
file_match_regex: str
@ -29,22 +29,31 @@ class CsvConfig(BaseModel):
class Config(BaseModel):
"""
Basic class for the configuration of this script.
- input_directory: we search for ldg and csv files recursively here
- output_directory: for all input files we do name.replace(input_directory,
output_directory)
- mappings_directory: directory of CSV mapping files
- csv_configs: configuration for the different input files
Configuration class for managing file search and data processing settings.
Attributes:
input_directory (Path): Where to search for 'ldg' and 'csv' files.
mappings_file (Path): The path to a 'json' file that contains account2 mappings.
output_file (Path): Location to which to write the output 'ldg' file.
csv_configs: List of CsvConfig which explains how to handle specific
CSV files.
categories (List[str]): A list of account2s. An account has to be defined here
before it can be used in a mapping. Otherwise, ledger will complain.
commodities (List[str]): A list of commodities relevant to the data processing.
find_duplicates (bool): Flag to check and abort on duplicated transactions. Not
really useful.
"""
class Config:
extra = Extra.forbid
extra = 'forbid'
input_directory: Path
mappings_file: Path
descriptions_file: Optional[Path] = None
output_file: Path = Path("output.ldg")
csv_configs: List[CsvConfig]
categories: List[str]
commodities: List[str]
find_duplicates: bool = False
class Transaction(BaseModel):
@ -52,7 +61,7 @@ class Transaction(BaseModel):
Class for ledger transaction to render into ldg file.
"""
class Config:
extra = Extra.forbid
extra = 'forbid'
currency: str
debit: str

View File

@ -89,6 +89,16 @@ def apply_mappings(transactions: List[Transaction], mappings: Dict[str, str]):
logging.warning(f"Unused mapping '{row}' -> {mappings[row]}.")
def apply_descriptions(transactions: List[Transaction], descriptions: Dict[str, str]):
unused_descriptions = set(descriptions.keys())
for t in transactions:
if t.row in descriptions:
t.description = descriptions[t.row]
unused_descriptions.discard(t.row)
for row in unused_descriptions:
logging.warning(f"Unused mapping '{row}' -> {descriptions[row]}.")
def process_csv_files(config: Config):
csv_files = src.utils.get_csv_files(config.input_directory)
transactions = []
@ -96,9 +106,17 @@ def process_csv_files(config: Config):
csv_file = str(csv_file)
csv_config = get_csv_config(csv_file, config.csv_configs)
transactions += get_transactions(csv_file, csv_config)
find_duplicates(transactions)
if config.find_duplicates:
find_duplicates(transactions)
if config.descriptions_file is not None:
descriptions = src.utils.read_descriptions(config.descriptions_file)
apply_descriptions(transactions, descriptions)
mappings = src.utils.read_mappings(config.mappings_file)
apply_mappings(transactions, mappings)
src.predict.add_account2(transactions, config.categories)
src.utils.write_mappings(transactions, config.mappings_file)
src.write.render_to_file(transactions, config)

View File

@ -76,6 +76,22 @@ def read_mappings(mappings_file: Path) -> Dict[str, str]:
for row in rows}
def read_descriptions(descriptions_file: Path) -> Dict[str, str]:
""" I am basic so the description file is currently a double row based
format where the first row matches the CSV row and the second one is the
description. """
descriptions = {}
current_row = None
with open(descriptions_file, 'r') as f:
for line in f.readlines():
if current_row is None:
current_row = line.rstrip("\n")
else:
descriptions[current_row] = line.rstrip("\n")
current_row = None
return descriptions
def remove_if_exists(output_file: Path):
try:
os.remove(output_file)

View File

@ -1,24 +1,7 @@
import os.path
import csv
import logging
import src.utils
import src.process
from src.models import Transaction
from rich.logging import RichHandler
from typing import List
def write_mappings(unmatched_transactions: List[Transaction], mappings_directory: str):
""" Write mappings for unmatched expenses for update by the user. """
if not unmatched_transactions:
return
fn = os.path.join(mappings_directory, "unmatched.csv")
with open(fn, 'a') as f:
writer = csv.writer(f)
for t in unmatched_transactions:
e = ["expenses", t.description,
f"credit={t.credit};date={t.date}"]
writer.writerow(e)
def init_logging():