Add feature to add descriptions and clean up code.

2024-04-20 09:07:58 -04:00 · 2024-04-20 09:07:58 -04:00 · 5cf06b2031
commit 5cf06b2031
parent e29d08e0d7
5 changed files with 58 additions and 33 deletions
--- a/README.md
+++ b/README.md
@ -17,14 +17,13 @@ The script takes a directory in which it recursively searches for CSV and LDG
 files. From these files, it generates a single ledger accounting file that
 includes all transactions.

-For now, ledger files are simply appended to the output file without
-modifications.
+Ledger files are appended to the output file without modifications.

 However, the transaction for the CSV files are extended with their *account2*
 information, i.e, the category of the transaction. Optionally, these
 transactions can also get a more meaningful description and tags.

 The mapping information are stored in a file `mappings.json`. It maps a unique
-identifier for each transaction (based on filename, line number) to the
-respective *account2*, and (optinally) *tags* or *description.
+identifier for each transaction (based on the filename and full CSV row) to a
+respective *account2*.

--- a/src/models.py
+++ b/src/models.py
@ -1,7 +1,7 @@
-from pydantic import BaseModel, Extra
+from pydantic import BaseModel
 from typing import List
+from typing import Optional
 from pathlib import Path
-from typing import List


 UNKNOWN_CATEGORY = 'account2'
@ -14,7 +14,7 @@ class CsvConfig(BaseModel):
    If multiple configs match a single file we raise an exception.
    """
    class Config:
-        extra = Extra.forbid
+        extra = 'forbid'

    account1: str
    file_match_regex: str
@ -29,22 +29,31 @@ class CsvConfig(BaseModel):

 class Config(BaseModel):
    """
-    Basic class for the configuration of this script.
-    - input_directory: we search for ldg and csv files recursively here
-    - output_directory: for all input files we do name.replace(input_directory,
-      output_directory)
-    - mappings_directory: directory of CSV mapping files
-    - csv_configs: configuration for the different input files
+    Configuration class for managing file search and data processing settings.
+
+    Attributes:
+        input_directory (Path):  Where to search for 'ldg' and 'csv' files.
+        mappings_file (Path):    The path to a 'json' file that contains account2 mappings.
+        output_file (Path):      Location to which to write the output 'ldg' file.
+        csv_configs:             List of CsvConfig which explains how to handle specific
+                                 CSV files.
+        categories (List[str]):  A list of account2s. An account has to be defined here
+                                 before it can be used in a mapping. Otherwise, ledger will complain.
+        commodities (List[str]): A list of commodities relevant to the data processing. 
+        find_duplicates (bool):  Flag to check and abort on duplicated transactions. Not
+                                 really useful.
    """
    class Config:
-        extra = Extra.forbid
+        extra = 'forbid'

    input_directory: Path
    mappings_file: Path
+    descriptions_file: Optional[Path] = None
    output_file: Path = Path("output.ldg")
    csv_configs: List[CsvConfig]
    categories: List[str]
    commodities: List[str]
+    find_duplicates: bool = False


 class Transaction(BaseModel):
@ -52,7 +61,7 @@ class Transaction(BaseModel):
    Class for ledger transaction to render into ldg file.
    """
    class Config:
-        extra = Extra.forbid
+        extra = 'forbid'

    currency: str
    debit: str
--- a/src/process.py
+++ b/src/process.py
@ -89,6 +89,16 @@ def apply_mappings(transactions: List[Transaction], mappings: Dict[str, str]):
        logging.warning(f"Unused mapping '{row}' -> {mappings[row]}.")


+def apply_descriptions(transactions: List[Transaction], descriptions: Dict[str, str]):
+    unused_descriptions = set(descriptions.keys())
+    for t in transactions:
+        if t.row in descriptions:
+            t.description = descriptions[t.row]
+            unused_descriptions.discard(t.row)
+    for row in unused_descriptions:
+        logging.warning(f"Unused mapping '{row}' -> {descriptions[row]}.")
+
+
 def process_csv_files(config: Config):
    csv_files = src.utils.get_csv_files(config.input_directory)
    transactions = []
@ -96,9 +106,17 @@ def process_csv_files(config: Config):
        csv_file = str(csv_file)
        csv_config = get_csv_config(csv_file, config.csv_configs)
        transactions += get_transactions(csv_file, csv_config)
-    find_duplicates(transactions)
+
+    if config.find_duplicates:
+        find_duplicates(transactions)
+
+    if config.descriptions_file is not None:
+        descriptions = src.utils.read_descriptions(config.descriptions_file)
+        apply_descriptions(transactions, descriptions)
+
    mappings = src.utils.read_mappings(config.mappings_file)
    apply_mappings(transactions, mappings)
+
    src.predict.add_account2(transactions, config.categories)
    src.utils.write_mappings(transactions, config.mappings_file)
    src.write.render_to_file(transactions, config)
--- a/src/utils.py
+++ b/src/utils.py
@ -76,6 +76,22 @@ def read_mappings(mappings_file: Path) -> Dict[str, str]:
            for row in rows}


+def read_descriptions(descriptions_file: Path) -> Dict[str, str]:
+    """ I am basic so the description file is currently a double row based
+    format where the first row matches the CSV row and the second one is the
+    description. """
+    descriptions = {}
+    current_row = None
+    with open(descriptions_file, 'r') as f:
+        for line in f.readlines():
+            if current_row is None:
+                current_row = line.rstrip("\n")
+            else:
+                descriptions[current_row] = line.rstrip("\n")
+                current_row = None
+    return descriptions
+
+
 def remove_if_exists(output_file: Path):
    try:
        os.remove(output_file)
--- a/toldg.py
+++ b/toldg.py
@ -1,24 +1,7 @@
-import os.path
-import csv
 import logging
 import src.utils
 import src.process
-from src.models import Transaction
 from rich.logging import RichHandler
-from typing import List
-
-
-def write_mappings(unmatched_transactions: List[Transaction], mappings_directory: str):
-    """ Write mappings for unmatched expenses for update by the user. """
-    if not unmatched_transactions:
-        return
-    fn = os.path.join(mappings_directory, "unmatched.csv")
-    with open(fn, 'a') as f:
-        writer = csv.writer(f)
-        for t in unmatched_transactions:
-            e = ["expenses", t.description,
-                 f"credit={t.credit};date={t.date}"]
-            writer.writerow(e)


 def init_logging():