generated from felixm/defaultpy
Refactor ledger processing to explicit mapping which will make automated classfication easy
This commit is contained in:
parent
b6de0e5514
commit
ba0c906e3c
2
Pipfile
2
Pipfile
@ -4,6 +4,8 @@ verify_ssl = true
|
||||
name = "pypi"
|
||||
|
||||
[packages]
|
||||
rich = "*"
|
||||
pydantic = "*"
|
||||
|
||||
[dev-packages]
|
||||
|
||||
|
104
Pipfile.lock
generated
Normal file
104
Pipfile.lock
generated
Normal file
@ -0,0 +1,104 @@
|
||||
{
|
||||
"_meta": {
|
||||
"hash": {
|
||||
"sha256": "654c54f63f5623a4ee5945b77e4aed25a286f4264d9ff82eb5196e5f23336dca"
|
||||
},
|
||||
"pipfile-spec": 6,
|
||||
"requires": {
|
||||
"python_full_version": "3.11.3",
|
||||
"python_version": "3.11"
|
||||
},
|
||||
"sources": [
|
||||
{
|
||||
"name": "pypi",
|
||||
"url": "https://pypi.org/simple",
|
||||
"verify_ssl": true
|
||||
}
|
||||
]
|
||||
},
|
||||
"default": {
|
||||
"markdown-it-py": {
|
||||
"hashes": [
|
||||
"sha256:355216845c60bd96232cd8d8c40e8f9765cc86f46880e43a8fd22dc1a1a8cab1",
|
||||
"sha256:e3f60a94fa066dc52ec76661e37c851cb232d92f9886b15cb560aaada2df8feb"
|
||||
],
|
||||
"markers": "python_version >= '3.8'",
|
||||
"version": "==3.0.0"
|
||||
},
|
||||
"mdurl": {
|
||||
"hashes": [
|
||||
"sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8",
|
||||
"sha256:bb413d29f5eea38f31dd4754dd7377d4465116fb207585f97bf925588687c1ba"
|
||||
],
|
||||
"markers": "python_version >= '3.7'",
|
||||
"version": "==0.1.2"
|
||||
},
|
||||
"pydantic": {
|
||||
"hashes": [
|
||||
"sha256:07293ab08e7b4d3c9d7de4949a0ea571f11e4557d19ea24dd3ae0c524c0c334d",
|
||||
"sha256:0a2aabdc73c2a5960e87c3ffebca6ccde88665616d1fd6d3db3178ef427b267a",
|
||||
"sha256:0da48717dc9495d3a8f215e0d012599db6b8092db02acac5e0d58a65248ec5bc",
|
||||
"sha256:128d9453d92e6e81e881dd7e2484e08d8b164da5507f62d06ceecf84bf2e21d3",
|
||||
"sha256:2196c06484da2b3fded1ab6dbe182bdabeb09f6318b7fdc412609ee2b564c49a",
|
||||
"sha256:2e9aec8627a1a6823fc62fb96480abe3eb10168fd0d859ee3d3b395105ae19a7",
|
||||
"sha256:3283b574b01e8dbc982080d8287c968489d25329a463b29a90d4157de4f2baaf",
|
||||
"sha256:3c52eb595db83e189419bf337b59154bdcca642ee4b2a09e5d7797e41ace783f",
|
||||
"sha256:4b466a23009ff5cdd7076eb56aca537c745ca491293cc38e72bf1e0e00de5b91",
|
||||
"sha256:517a681919bf880ce1dac7e5bc0c3af1e58ba118fd774da2ffcd93c5f96eaece",
|
||||
"sha256:5f8bbaf4013b9a50e8100333cc4e3fa2f81214033e05ac5aa44fa24a98670a29",
|
||||
"sha256:6257bb45ad78abacda13f15bde5886efd6bf549dd71085e64b8dcf9919c38b60",
|
||||
"sha256:67195274fd27780f15c4c372f4ba9a5c02dad6d50647b917b6a92bf00b3d301a",
|
||||
"sha256:6cafde02f6699ce4ff643417d1a9223716ec25e228ddc3b436fe7e2d25a1f305",
|
||||
"sha256:73ef93e5e1d3c8e83f1ff2e7fdd026d9e063c7e089394869a6e2985696693766",
|
||||
"sha256:7845b31959468bc5b78d7b95ec52fe5be32b55d0d09983a877cca6aedc51068f",
|
||||
"sha256:7847ca62e581e6088d9000f3c497267868ca2fa89432714e21a4fb33a04d52e8",
|
||||
"sha256:7e1d5290044f620f80cf1c969c542a5468f3656de47b41aa78100c5baa2b8276",
|
||||
"sha256:7ee829b86ce984261d99ff2fd6e88f2230068d96c2a582f29583ed602ef3fc2c",
|
||||
"sha256:83fcff3c7df7adff880622a98022626f4f6dbce6639a88a15a3ce0f96466cb60",
|
||||
"sha256:939328fd539b8d0edf244327398a667b6b140afd3bf7e347cf9813c736211896",
|
||||
"sha256:95c70da2cd3b6ddf3b9645ecaa8d98f3d80c606624b6d245558d202cd23ea3be",
|
||||
"sha256:963671eda0b6ba6926d8fc759e3e10335e1dc1b71ff2a43ed2efd6996634dafb",
|
||||
"sha256:970b1bdc6243ef663ba5c7e36ac9ab1f2bfecb8ad297c9824b542d41a750b298",
|
||||
"sha256:9863b9420d99dfa9c064042304868e8ba08e89081428a1c471858aa2af6f57c4",
|
||||
"sha256:ad428e92ab68798d9326bb3e5515bc927444a3d71a93b4a2ca02a8a5d795c572",
|
||||
"sha256:b48d3d634bca23b172f47f2335c617d3fcb4b3ba18481c96b7943a4c634f5c8d",
|
||||
"sha256:b9cd67fb763248cbe38f0593cd8611bfe4b8ad82acb3bdf2b0898c23415a1f82",
|
||||
"sha256:d111a21bbbfd85c17248130deac02bbd9b5e20b303338e0dbe0faa78330e37e0",
|
||||
"sha256:e1aa5c2410769ca28aa9a7841b80d9d9a1c5f223928ca8bec7e7c9a34d26b1d4",
|
||||
"sha256:e692dec4a40bfb40ca530e07805b1208c1de071a18d26af4a2a0d79015b352ca",
|
||||
"sha256:e7c9900b43ac14110efa977be3da28931ffc74c27e96ee89fbcaaf0b0fe338e1",
|
||||
"sha256:eec39224b2b2e861259d6f3c8b6290d4e0fbdce147adb797484a42278a1a486f",
|
||||
"sha256:f0b7628fb8efe60fe66fd4adadd7ad2304014770cdc1f4934db41fe46cc8825f",
|
||||
"sha256:f50e1764ce9353be67267e7fd0da08349397c7db17a562ad036aa7c8f4adfdb6",
|
||||
"sha256:fab81a92f42d6d525dd47ced310b0c3e10c416bbfae5d59523e63ea22f82b31e"
|
||||
],
|
||||
"index": "pypi",
|
||||
"version": "==1.10.9"
|
||||
},
|
||||
"pygments": {
|
||||
"hashes": [
|
||||
"sha256:8ace4d3c1dd481894b2005f560ead0f9f19ee64fe983366be1a21e171d12775c",
|
||||
"sha256:db2db3deb4b4179f399a09054b023b6a586b76499d36965813c71aa8ed7b5fd1"
|
||||
],
|
||||
"markers": "python_version >= '3.7'",
|
||||
"version": "==2.15.1"
|
||||
},
|
||||
"rich": {
|
||||
"hashes": [
|
||||
"sha256:8f87bc7ee54675732fa66a05ebfe489e27264caeeff3728c945d25971b6485ec",
|
||||
"sha256:d653d6bccede5844304c605d5aac802c7cf9621efd700b46c7ec2b51ea914898"
|
||||
],
|
||||
"index": "pypi",
|
||||
"version": "==13.4.2"
|
||||
},
|
||||
"typing-extensions": {
|
||||
"hashes": [
|
||||
"sha256:88a4153d8505aabbb4e13aacb7c486c2b4a33ca3b3f807914a9b4c844c471c26",
|
||||
"sha256:d91d5919357fe7f681a9f2b5b4cb2a5f1ef0a1e9f59c4d8ff0d3491e05c0ffd5"
|
||||
],
|
||||
"markers": "python_version >= '3.7'",
|
||||
"version": "==4.6.3"
|
||||
}
|
||||
},
|
||||
"develop": {}
|
||||
}
|
24
README.md
24
README.md
@ -1,6 +1,9 @@
|
||||
# defaultpy
|
||||
# ledgerai
|
||||
|
||||
Default Python project.
|
||||
Script to transform CSV data into [ledger](https://ledger-cli.org/) accounting
|
||||
files.
|
||||
|
||||
# Usage
|
||||
|
||||
Run `pipenv install -dev` to install all packages.
|
||||
|
||||
@ -8,3 +11,20 @@ Run `pipenv shell` to get venv shell.
|
||||
|
||||
Run `pipenv install <package>` to install a package.
|
||||
|
||||
# Architecture
|
||||
|
||||
The script takes a directory in which it recursively searches for CSV and LDG
|
||||
files. From these files, it generates a single ledger accounting file that
|
||||
includes all transactions.
|
||||
|
||||
For now, ledger files are simply appended to the output file without
|
||||
modifications.
|
||||
|
||||
However, the transaction for the CSV files are extended with their *account2*
|
||||
information, i.e, the category of the transaction. Optionally, these
|
||||
transactions can also get a more meaningful description and tags.
|
||||
|
||||
The mapping information are stored in a file `mappings.json`. It maps a unique
|
||||
identifier for each transaction (based on filename, line number) to the
|
||||
respective *account2*, and (optinally) *tags* or *description.
|
||||
|
||||
|
60
src/models.py
Normal file
60
src/models.py
Normal file
@ -0,0 +1,60 @@
|
||||
from pydantic import BaseModel, Extra
|
||||
from typing import List
|
||||
from pathlib import Path
|
||||
from typing import List
|
||||
|
||||
|
||||
class CsvConfig(BaseModel):
|
||||
"""
|
||||
Class to define how to parse a certain CSV file. We use the
|
||||
file_match_regex attribute to decide whether to apply a config for a file.
|
||||
If multiple configs match a single file we raise an exception.
|
||||
"""
|
||||
class Config:
|
||||
extra = Extra.forbid
|
||||
|
||||
account1: str
|
||||
file_match_regex: str
|
||||
fields: List[str]
|
||||
input_date_format: str = "%m/%d/%Y"
|
||||
output_date_format: str = "%Y/%m/%d"
|
||||
skip: int = 1
|
||||
delimiter: str = ","
|
||||
quotechar: str = "\""
|
||||
currency: str = "$"
|
||||
|
||||
|
||||
class Config(BaseModel):
|
||||
"""
|
||||
Basic class for the configuration of this script.
|
||||
- input_directory: we search for ldg and csv files recursively here
|
||||
- output_directory: for all input files we do name.replace(input_directory,
|
||||
output_directory)
|
||||
- mappings_directory: directory of CSV mapping files
|
||||
- csv_configs: configuration for the different input files
|
||||
"""
|
||||
class Config:
|
||||
extra = Extra.forbid
|
||||
|
||||
input_directory: Path
|
||||
mappings_file: Path
|
||||
output_file: Path = Path("output.ldg")
|
||||
csv_configs: List[CsvConfig]
|
||||
|
||||
|
||||
class Transaction(BaseModel):
|
||||
"""
|
||||
Class for ledger transaction to render into ldg file.
|
||||
"""
|
||||
class Config:
|
||||
extra = Extra.forbid
|
||||
|
||||
currency: str
|
||||
debit: str
|
||||
credit: str
|
||||
date: str
|
||||
account1: str
|
||||
account2: str
|
||||
description: str
|
||||
csv_file: str
|
||||
row: str
|
102
src/process.py
Normal file
102
src/process.py
Normal file
@ -0,0 +1,102 @@
|
||||
import csv
|
||||
import logging
|
||||
import re
|
||||
import sys
|
||||
import datetime
|
||||
import src.utils
|
||||
import src.write
|
||||
from src.models import Config, CsvConfig, Transaction
|
||||
from typing import List, Dict
|
||||
|
||||
|
||||
def process_ldg_files(config: Config):
|
||||
for ldg_file in src.utils.get_ldg_files(config.input_directory):
|
||||
with open(ldg_file, 'r') as f_in:
|
||||
with open(config.output_file, 'a') as f_out:
|
||||
f_out.write(f_in.read())
|
||||
|
||||
|
||||
def get_csv_config(csv_file: str, csv_configs: List[CsvConfig]) -> CsvConfig:
|
||||
cs = [c for c in csv_configs
|
||||
if re.match(c.file_match_regex, csv_file)]
|
||||
if not cs:
|
||||
logging.critical(f"No CSV config for {csv_file}.")
|
||||
sys.exit(1)
|
||||
elif len(cs) > 1:
|
||||
logging.critical(f"Multiple CSV configs for {csv_file}.")
|
||||
sys.exit(1)
|
||||
return cs[0]
|
||||
|
||||
|
||||
def get_transactions(csv_file: str, config: CsvConfig) -> List[Transaction]:
|
||||
def date_to_date(date: str) -> str:
|
||||
d = datetime.datetime.strptime(date, config.input_date_format)
|
||||
return d.strftime(config.output_date_format)
|
||||
|
||||
def flip_sign(amount: str) -> str:
|
||||
return amount[1:] if amount.startswith("-") else "-" + amount
|
||||
|
||||
def row_to_transaction(row, fields):
|
||||
""" The user can configure the mapping of CSV fields to the three
|
||||
required fields date, amount and description via the CsvConfig. """
|
||||
t = {field: row[index] for index, field in fields}
|
||||
amount = t['amount']
|
||||
return Transaction(
|
||||
currency=config.currency,
|
||||
debit=flip_sign(amount),
|
||||
credit=amount,
|
||||
date=date_to_date(t['date']),
|
||||
account1=config.account1,
|
||||
account2="account2",
|
||||
description=t['description'],
|
||||
csv_file=csv_file,
|
||||
row=csv_file + ", " + ", ".join(row))
|
||||
|
||||
fields = [(i, f) for i, f in enumerate(config.fields) if f]
|
||||
with open(csv_file, 'r') as f:
|
||||
reader = csv.reader(f, delimiter=config.delimiter,
|
||||
quotechar=config.quotechar)
|
||||
for _ in range(config.skip):
|
||||
next(reader)
|
||||
transactions = [row_to_transaction(row, fields)
|
||||
for row in reader if row]
|
||||
return transactions
|
||||
|
||||
|
||||
def find_duplicates(transactions: List[Transaction]):
|
||||
rows = set()
|
||||
for t in transactions:
|
||||
row = t.row
|
||||
if row in rows:
|
||||
logging.critical(f"'{row}' is duplicated.")
|
||||
logging.critical("Exit because of duplicated transactions.")
|
||||
sys.exit(1)
|
||||
else:
|
||||
rows.add(row)
|
||||
|
||||
|
||||
def apply_mappings(transactions: List[Transaction], mappings: Dict[str, str]):
|
||||
unused_mappings = set(mappings.keys())
|
||||
for t in transactions:
|
||||
if t.row in mappings:
|
||||
t.account2 = mappings[t.row]
|
||||
unused_mappings.discard(t.row)
|
||||
else:
|
||||
logging.warning(f"No mapping for '{t}'.")
|
||||
for row in unused_mappings:
|
||||
logging.warning(f"Unused mapping '{row}' -> {mappings[row]}.")
|
||||
|
||||
|
||||
def process_csv_files(config: Config):
|
||||
csv_files = src.utils.get_csv_files(config.input_directory)
|
||||
transactions = []
|
||||
for csv_file in csv_files:
|
||||
csv_file = str(csv_file)
|
||||
csv_config = get_csv_config(csv_file, config.csv_configs)
|
||||
transactions += get_transactions(csv_file, csv_config)
|
||||
find_duplicates(transactions)
|
||||
mappings = src.utils.read_mappings(config.mappings_file)
|
||||
apply_mappings(transactions, mappings)
|
||||
src.utils.write_mappings(transactions, config.mappings_file)
|
||||
src.write.render_to_file(transactions, config.output_file)
|
||||
|
72
src/utils.py
Normal file
72
src/utils.py
Normal file
@ -0,0 +1,72 @@
|
||||
import logging
|
||||
import os
|
||||
import sys
|
||||
import logging
|
||||
import json
|
||||
from pathlib import Path
|
||||
from typing import List, Dict
|
||||
from src.models import Config, Transaction
|
||||
from pydantic import ValidationError
|
||||
|
||||
|
||||
def get_files(directory: Path, ending="") -> List[Path]:
|
||||
""" Gets files from directory recursively in lexigraphic order. """
|
||||
return [Path(os.path.join(subdir, f))
|
||||
for subdir, _, files in os.walk(directory)
|
||||
for f in files
|
||||
if f.endswith(ending)]
|
||||
|
||||
|
||||
def get_csv_files(directory: Path) -> List[Path]:
|
||||
return get_files(directory, ".csv")
|
||||
|
||||
|
||||
def get_ldg_files(directory: Path) -> List[Path]:
|
||||
return get_files(directory, ".ldg")
|
||||
|
||||
|
||||
def load_config() -> Config:
|
||||
try:
|
||||
config_file = Path(sys.argv[1])
|
||||
except IndexError:
|
||||
logging.critical("Provide configuration file as first argument.")
|
||||
sys.exit(1)
|
||||
|
||||
try:
|
||||
with open(config_file, 'r') as f:
|
||||
config = Config(**json.load(f))
|
||||
except ValidationError as e:
|
||||
logging.critical(f"Could not validate {config_file}.")
|
||||
logging.info(e)
|
||||
sys.exit(1)
|
||||
except FileNotFoundError:
|
||||
logging.critical(f"Could not find {config_file}.")
|
||||
sys.exit(1)
|
||||
return config
|
||||
|
||||
|
||||
def write_mappings(transactions: List[Transaction], mappings_file: Path):
|
||||
mappings = {}
|
||||
for t in transactions:
|
||||
try:
|
||||
mappings[t.account2.strip()].append(t.row)
|
||||
except KeyError:
|
||||
mappings[t.account2.strip()] = [t.row]
|
||||
|
||||
with open(mappings_file, "w") as f:
|
||||
json.dump({k: sorted(v) for k, v in sorted(mappings.items())}, f, indent=4)
|
||||
|
||||
|
||||
def read_mappings(mappings_file: Path) -> Dict[str, str]:
|
||||
with open(mappings_file, 'r') as f:
|
||||
account2_to_rows = json.load(f)
|
||||
return {row: category
|
||||
for category, rows in account2_to_rows.items()
|
||||
for row in rows}
|
||||
|
||||
|
||||
def remove_if_exists(output_file: Path):
|
||||
try:
|
||||
os.remove(output_file)
|
||||
except OSError:
|
||||
pass
|
17
src/write.py
Normal file
17
src/write.py
Normal file
@ -0,0 +1,17 @@
|
||||
from pathlib import Path
|
||||
from typing import List
|
||||
from src.models import Transaction
|
||||
|
||||
|
||||
LEDGER_TRANSACTION_TEMPLATE = """
|
||||
{t.date} {t.description} ; {t.row}
|
||||
{t.account2} {t.currency} {t.debit}
|
||||
{t.account1} {t.currency} {t.credit}
|
||||
"""
|
||||
|
||||
|
||||
def render_to_file(transactions: List[Transaction], ledger_file: Path):
|
||||
content = "".join([LEDGER_TRANSACTION_TEMPLATE.format(t=t)
|
||||
for t in transactions])
|
||||
with open(ledger_file, 'a') as f:
|
||||
f.write(content)
|
42
toldg.py
Normal file
42
toldg.py
Normal file
@ -0,0 +1,42 @@
|
||||
import os.path
|
||||
import csv
|
||||
import logging
|
||||
import src.utils
|
||||
import src.process
|
||||
from src.models import Transaction
|
||||
from rich.logging import RichHandler
|
||||
from typing import List
|
||||
|
||||
|
||||
def write_mappings(unmatched_transactions: List[Transaction], mappings_directory: str):
|
||||
""" Write mappings for unmatched expenses for update by the user. """
|
||||
if not unmatched_transactions:
|
||||
return
|
||||
fn = os.path.join(mappings_directory, "unmatched.csv")
|
||||
with open(fn, 'a') as f:
|
||||
writer = csv.writer(f)
|
||||
for t in unmatched_transactions:
|
||||
e = ["expenses", t.description,
|
||||
f"credit={t.credit};date={t.date}"]
|
||||
writer.writerow(e)
|
||||
|
||||
|
||||
def init_logging():
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format="%(message)s",
|
||||
datefmt="[%X]",
|
||||
handlers=[RichHandler()],
|
||||
)
|
||||
|
||||
|
||||
def main():
|
||||
init_logging()
|
||||
config = src.utils.load_config()
|
||||
src.utils.remove_if_exists(config.output_file)
|
||||
src.process.process_ldg_files(config)
|
||||
src.process.process_csv_files(config)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
Loading…
Reference in New Issue
Block a user