Add feature to add descriptions and clean up code.

This commit is contained in:
felixm 2024-04-20 09:07:58 -04:00
parent e29d08e0d7
commit 5cf06b2031
5 changed files with 58 additions and 33 deletions

View File

@ -17,14 +17,13 @@ The script takes a directory in which it recursively searches for CSV and LDG
files. From these files, it generates a single ledger accounting file that files. From these files, it generates a single ledger accounting file that
includes all transactions. includes all transactions.
For now, ledger files are simply appended to the output file without Ledger files are appended to the output file without modifications.
modifications.
However, the transaction for the CSV files are extended with their *account2* However, the transaction for the CSV files are extended with their *account2*
information, i.e, the category of the transaction. Optionally, these information, i.e, the category of the transaction. Optionally, these
transactions can also get a more meaningful description and tags. transactions can also get a more meaningful description and tags.
The mapping information are stored in a file `mappings.json`. It maps a unique The mapping information are stored in a file `mappings.json`. It maps a unique
identifier for each transaction (based on filename, line number) to the identifier for each transaction (based on the filename and full CSV row) to a
respective *account2*, and (optinally) *tags* or *description. respective *account2*.

View File

@ -1,7 +1,7 @@
from pydantic import BaseModel, Extra from pydantic import BaseModel
from typing import List from typing import List
from typing import Optional
from pathlib import Path from pathlib import Path
from typing import List
UNKNOWN_CATEGORY = 'account2' UNKNOWN_CATEGORY = 'account2'
@ -14,7 +14,7 @@ class CsvConfig(BaseModel):
If multiple configs match a single file we raise an exception. If multiple configs match a single file we raise an exception.
""" """
class Config: class Config:
extra = Extra.forbid extra = 'forbid'
account1: str account1: str
file_match_regex: str file_match_regex: str
@ -29,22 +29,31 @@ class CsvConfig(BaseModel):
class Config(BaseModel): class Config(BaseModel):
""" """
Basic class for the configuration of this script. Configuration class for managing file search and data processing settings.
- input_directory: we search for ldg and csv files recursively here
- output_directory: for all input files we do name.replace(input_directory, Attributes:
output_directory) input_directory (Path): Where to search for 'ldg' and 'csv' files.
- mappings_directory: directory of CSV mapping files mappings_file (Path): The path to a 'json' file that contains account2 mappings.
- csv_configs: configuration for the different input files output_file (Path): Location to which to write the output 'ldg' file.
csv_configs: List of CsvConfig which explains how to handle specific
CSV files.
categories (List[str]): A list of account2s. An account has to be defined here
before it can be used in a mapping. Otherwise, ledger will complain.
commodities (List[str]): A list of commodities relevant to the data processing.
find_duplicates (bool): Flag to check and abort on duplicated transactions. Not
really useful.
""" """
class Config: class Config:
extra = Extra.forbid extra = 'forbid'
input_directory: Path input_directory: Path
mappings_file: Path mappings_file: Path
descriptions_file: Optional[Path] = None
output_file: Path = Path("output.ldg") output_file: Path = Path("output.ldg")
csv_configs: List[CsvConfig] csv_configs: List[CsvConfig]
categories: List[str] categories: List[str]
commodities: List[str] commodities: List[str]
find_duplicates: bool = False
class Transaction(BaseModel): class Transaction(BaseModel):
@ -52,7 +61,7 @@ class Transaction(BaseModel):
Class for ledger transaction to render into ldg file. Class for ledger transaction to render into ldg file.
""" """
class Config: class Config:
extra = Extra.forbid extra = 'forbid'
currency: str currency: str
debit: str debit: str

View File

@ -89,6 +89,16 @@ def apply_mappings(transactions: List[Transaction], mappings: Dict[str, str]):
logging.warning(f"Unused mapping '{row}' -> {mappings[row]}.") logging.warning(f"Unused mapping '{row}' -> {mappings[row]}.")
def apply_descriptions(transactions: List[Transaction], descriptions: Dict[str, str]):
unused_descriptions = set(descriptions.keys())
for t in transactions:
if t.row in descriptions:
t.description = descriptions[t.row]
unused_descriptions.discard(t.row)
for row in unused_descriptions:
logging.warning(f"Unused mapping '{row}' -> {descriptions[row]}.")
def process_csv_files(config: Config): def process_csv_files(config: Config):
csv_files = src.utils.get_csv_files(config.input_directory) csv_files = src.utils.get_csv_files(config.input_directory)
transactions = [] transactions = []
@ -96,9 +106,17 @@ def process_csv_files(config: Config):
csv_file = str(csv_file) csv_file = str(csv_file)
csv_config = get_csv_config(csv_file, config.csv_configs) csv_config = get_csv_config(csv_file, config.csv_configs)
transactions += get_transactions(csv_file, csv_config) transactions += get_transactions(csv_file, csv_config)
find_duplicates(transactions)
if config.find_duplicates:
find_duplicates(transactions)
if config.descriptions_file is not None:
descriptions = src.utils.read_descriptions(config.descriptions_file)
apply_descriptions(transactions, descriptions)
mappings = src.utils.read_mappings(config.mappings_file) mappings = src.utils.read_mappings(config.mappings_file)
apply_mappings(transactions, mappings) apply_mappings(transactions, mappings)
src.predict.add_account2(transactions, config.categories) src.predict.add_account2(transactions, config.categories)
src.utils.write_mappings(transactions, config.mappings_file) src.utils.write_mappings(transactions, config.mappings_file)
src.write.render_to_file(transactions, config) src.write.render_to_file(transactions, config)

View File

@ -76,6 +76,22 @@ def read_mappings(mappings_file: Path) -> Dict[str, str]:
for row in rows} for row in rows}
def read_descriptions(descriptions_file: Path) -> Dict[str, str]:
""" I am basic so the description file is currently a double row based
format where the first row matches the CSV row and the second one is the
description. """
descriptions = {}
current_row = None
with open(descriptions_file, 'r') as f:
for line in f.readlines():
if current_row is None:
current_row = line.rstrip("\n")
else:
descriptions[current_row] = line.rstrip("\n")
current_row = None
return descriptions
def remove_if_exists(output_file: Path): def remove_if_exists(output_file: Path):
try: try:
os.remove(output_file) os.remove(output_file)

View File

@ -1,24 +1,7 @@
import os.path
import csv
import logging import logging
import src.utils import src.utils
import src.process import src.process
from src.models import Transaction
from rich.logging import RichHandler from rich.logging import RichHandler
from typing import List
def write_mappings(unmatched_transactions: List[Transaction], mappings_directory: str):
""" Write mappings for unmatched expenses for update by the user. """
if not unmatched_transactions:
return
fn = os.path.join(mappings_directory, "unmatched.csv")
with open(fn, 'a') as f:
writer = csv.writer(f)
for t in unmatched_transactions:
e = ["expenses", t.description,
f"credit={t.credit};date={t.date}"]
writer.writerow(e)
def init_logging(): def init_logging():