generated from felixm/defaultpy
Update project structure and move to beancount
This commit is contained in:
1
src/toldg/__init__.py
Normal file
1
src/toldg/__init__.py
Normal file
@@ -0,0 +1 @@
|
||||
__version__ = "0.1.0"
|
||||
28
src/toldg/__main__.py
Normal file
28
src/toldg/__main__.py
Normal file
@@ -0,0 +1,28 @@
|
||||
import logging
|
||||
|
||||
from rich.logging import RichHandler
|
||||
|
||||
from toldg.process import process_csv_files, process_ldg_files
|
||||
from toldg.utils import load_config, remove_if_exists, write_meta
|
||||
|
||||
|
||||
def init_logging():
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format="%(message)s",
|
||||
datefmt="[%X]",
|
||||
handlers=[RichHandler()],
|
||||
)
|
||||
|
||||
|
||||
def main():
|
||||
init_logging()
|
||||
config = load_config()
|
||||
remove_if_exists(config.output_file)
|
||||
write_meta(config)
|
||||
process_ldg_files(config)
|
||||
process_csv_files(config)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
32
src/toldg/fzf.py
Normal file
32
src/toldg/fzf.py
Normal file
@@ -0,0 +1,32 @@
|
||||
import errno
|
||||
import subprocess
|
||||
import sys
|
||||
|
||||
EXECUTABLE_NAME = "fzf.exe" if sys.platform == "win32" else "fzf"
|
||||
|
||||
|
||||
def iterfzf(iterable, prompt="> "):
|
||||
cmd = [EXECUTABLE_NAME, "--prompt=" + prompt]
|
||||
encoding = sys.getdefaultencoding()
|
||||
proc = subprocess.Popen(
|
||||
cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=None
|
||||
)
|
||||
if proc.stdin is None:
|
||||
return None
|
||||
try:
|
||||
lines = "\n".join(iterable)
|
||||
proc.stdin.write(lines.encode("utf-8"))
|
||||
proc.stdin.close()
|
||||
except IOError as e:
|
||||
if e.errno != errno.EPIPE and errno.EPIPE != 32:
|
||||
raise
|
||||
if proc is None or proc.wait() not in [0, 1]:
|
||||
return None
|
||||
if proc.stdout is None:
|
||||
return None
|
||||
decode = lambda t: t.decode(encoding)
|
||||
output = [decode(ln.strip(b"\r\n\0")) for ln in iter(proc.stdout.readline, b"")]
|
||||
try:
|
||||
return output[0]
|
||||
except IndexError:
|
||||
return None
|
||||
76
src/toldg/models.py
Normal file
76
src/toldg/models.py
Normal file
@@ -0,0 +1,76 @@
|
||||
from pathlib import Path
|
||||
from typing import List, Optional
|
||||
|
||||
from pydantic import BaseModel
|
||||
|
||||
UNKNOWN_CATEGORY = "account2"
|
||||
|
||||
|
||||
class CsvConfig(BaseModel):
|
||||
"""
|
||||
Class to define how to parse a certain CSV file. We use the
|
||||
file_match_regex attribute to decide whether to apply a config for a file.
|
||||
If multiple configs match a single file we raise an exception.
|
||||
"""
|
||||
|
||||
class Config:
|
||||
extra = "forbid"
|
||||
|
||||
account1: str
|
||||
file_match_regex: str
|
||||
fields: List[str]
|
||||
input_date_format: str = "%m/%d/%Y"
|
||||
output_date_format: str = "%Y/%m/%d"
|
||||
skip: int = 1
|
||||
delimiter: str = ","
|
||||
quotechar: str = '"'
|
||||
currency: str = "USD"
|
||||
|
||||
|
||||
class Config(BaseModel):
|
||||
"""
|
||||
Configuration class for managing file search and data processing settings.
|
||||
|
||||
Attributes:
|
||||
input_directory (Path): Where to search for 'ldg' and 'csv' files.
|
||||
mappings_file (Path): The path to a 'json' file that contains account2 mappings.
|
||||
output_file (Path): Location to which to write the output 'ldg' file.
|
||||
csv_configs: List of CsvConfig which explains how to handle specific
|
||||
CSV files.
|
||||
categories (List[str]): A list of account2s. An account has to be defined here
|
||||
before it can be used in a mapping. Otherwise, ledger will complain.
|
||||
commodities (List[str]): A list of commodities relevant to the data processing.
|
||||
find_duplicates (bool): Flag to check and abort on duplicated transactions. Not
|
||||
really useful.
|
||||
"""
|
||||
|
||||
class Config:
|
||||
extra = "forbid"
|
||||
|
||||
input_directory: Path
|
||||
mappings_file: Path
|
||||
descriptions_file: Optional[Path] = None
|
||||
output_file: Path = Path("output.ldg")
|
||||
csv_configs: List[CsvConfig]
|
||||
categories: List[str]
|
||||
commodities: List[str]
|
||||
find_duplicates: bool = False
|
||||
|
||||
|
||||
class Transaction(BaseModel):
|
||||
"""
|
||||
Class for ledger transaction to render into ldg file.
|
||||
"""
|
||||
|
||||
class Config:
|
||||
extra = "forbid"
|
||||
|
||||
currency: str
|
||||
debit: str
|
||||
credit: str
|
||||
date: str
|
||||
account1: str
|
||||
account2: str
|
||||
description: str
|
||||
csv_file: str
|
||||
row: str
|
||||
49
src/toldg/predict.py
Normal file
49
src/toldg/predict.py
Normal file
@@ -0,0 +1,49 @@
|
||||
from typing import List
|
||||
|
||||
from toldg.fzf import iterfzf
|
||||
from toldg.models import UNKNOWN_CATEGORY, Transaction
|
||||
|
||||
|
||||
def get_sort_categories():
|
||||
def sort_categories(row: str, categories: List[str]):
|
||||
if learn is None:
|
||||
return
|
||||
_, _, probs = learn.predict(row)
|
||||
cat_to_prob = dict(zip(learn.dls.vocab[1], probs.tolist()))
|
||||
categories.sort(
|
||||
key=lambda c: cat_to_prob[c] if c in cat_to_prob else 0.0, reverse=True
|
||||
)
|
||||
|
||||
learn = None
|
||||
try:
|
||||
from fastai.text.all import load_learner
|
||||
|
||||
learn = load_learner("export.pkl")
|
||||
except ModuleNotFoundError:
|
||||
user_input = input("No fastai module. Type yes to continue anyway.")
|
||||
if user_input.strip().lower() != "yes":
|
||||
raise Exception("fastai module missing")
|
||||
|
||||
return sort_categories
|
||||
|
||||
|
||||
def add_account2(transactions: List[Transaction], categories: List[str]):
|
||||
unmapped_transactions = list(
|
||||
filter(lambda t: t.account2 == UNKNOWN_CATEGORY, transactions)
|
||||
)
|
||||
if len(unmapped_transactions) == 0:
|
||||
return
|
||||
sort_categories = get_sort_categories()
|
||||
for t in unmapped_transactions:
|
||||
sort_categories(t.row, categories)
|
||||
add_account2_interactive(t, categories)
|
||||
|
||||
|
||||
def add_account2_interactive(transaction: Transaction, categories: List[str]):
|
||||
t = transaction
|
||||
account2 = None
|
||||
prompt = f"{t.account1} {t.date} {t.description} {t.debit} > "
|
||||
while account2 is None:
|
||||
account2 = iterfzf(categories, prompt=prompt)
|
||||
transaction.account2 = account2
|
||||
print(f"Assigned category '{account2}'.")
|
||||
121
src/toldg/process.py
Normal file
121
src/toldg/process.py
Normal file
@@ -0,0 +1,121 @@
|
||||
import csv
|
||||
import datetime
|
||||
import logging
|
||||
import re
|
||||
import sys
|
||||
from typing import Dict, List
|
||||
|
||||
import toldg.models
|
||||
import toldg.predict
|
||||
import toldg.utils
|
||||
import toldg.write
|
||||
from toldg.models import Config, CsvConfig, Transaction
|
||||
|
||||
|
||||
def process_ldg_files(config: Config):
|
||||
for ldg_file in toldg.utils.get_ldg_files(config.input_directory):
|
||||
with open(ldg_file, "r") as f_in:
|
||||
with open(config.output_file, "a") as f_out:
|
||||
f_out.write(f_in.read())
|
||||
|
||||
|
||||
def get_csv_config(csv_file: str, csv_configs: List[CsvConfig]) -> CsvConfig:
|
||||
cs = [c for c in csv_configs if re.match(c.file_match_regex, csv_file)]
|
||||
if not cs:
|
||||
logging.critical(f"No CSV config for {csv_file}.")
|
||||
sys.exit(1)
|
||||
elif len(cs) > 1:
|
||||
logging.critical(f"Multiple CSV configs for {csv_file}.")
|
||||
sys.exit(1)
|
||||
return cs[0]
|
||||
|
||||
|
||||
def get_transactions(csv_file: str, config: CsvConfig) -> List[Transaction]:
|
||||
def date_to_date(date: str) -> str:
|
||||
d = datetime.datetime.strptime(date, config.input_date_format)
|
||||
return d.strftime(config.output_date_format)
|
||||
|
||||
def flip_sign(amount: str) -> str:
|
||||
return amount[1:] if amount.startswith("-") else "-" + amount
|
||||
|
||||
def row_to_transaction(row, fields):
|
||||
"""The user can configure the mapping of CSV fields to the three
|
||||
required fields date, amount and description via the CsvConfig."""
|
||||
t = {field: row[index] for index, field in fields}
|
||||
amount = t["amount"]
|
||||
return Transaction(
|
||||
currency=config.currency,
|
||||
debit=flip_sign(amount),
|
||||
credit=amount,
|
||||
date=date_to_date(t["date"]),
|
||||
account1=config.account1,
|
||||
account2=toldg.models.UNKNOWN_CATEGORY,
|
||||
description=t["description"],
|
||||
csv_file=csv_file,
|
||||
row=csv_file + ", " + ", ".join(row),
|
||||
)
|
||||
|
||||
fields = [(i, f) for i, f in enumerate(config.fields) if f]
|
||||
with open(csv_file, "r") as f:
|
||||
reader = csv.reader(f, delimiter=config.delimiter, quotechar=config.quotechar)
|
||||
for _ in range(config.skip):
|
||||
next(reader)
|
||||
transactions = [row_to_transaction(row, fields) for row in reader if row]
|
||||
return transactions
|
||||
|
||||
|
||||
def find_duplicates(transactions: List[Transaction]):
|
||||
rows = set()
|
||||
for t in transactions:
|
||||
row = t.row
|
||||
if row in rows:
|
||||
logging.critical(f"'{row}' is duplicated.")
|
||||
logging.critical("Exit because of duplicated transactions.")
|
||||
sys.exit(1)
|
||||
else:
|
||||
rows.add(row)
|
||||
|
||||
|
||||
def apply_mappings(transactions: List[Transaction], mappings: Dict[str, str]):
|
||||
unused_mappings = set(mappings.keys())
|
||||
for t in transactions:
|
||||
if t.row in mappings:
|
||||
t.account2 = mappings[t.row]
|
||||
unused_mappings.discard(t.row)
|
||||
else:
|
||||
logging.warning(f"No mapping for '{t}'.")
|
||||
for row in unused_mappings:
|
||||
logging.warning(f"Unused mapping '{row}' -> {mappings[row]}.")
|
||||
|
||||
|
||||
def apply_descriptions(transactions: List[Transaction], descriptions: Dict[str, str]):
|
||||
unused_descriptions = set(descriptions.keys())
|
||||
for t in transactions:
|
||||
if t.row in descriptions:
|
||||
t.description = descriptions[t.row]
|
||||
unused_descriptions.discard(t.row)
|
||||
for row in unused_descriptions:
|
||||
logging.warning(f"Unused mapping '{row}' -> {descriptions[row]}.")
|
||||
|
||||
|
||||
def process_csv_files(config: Config):
|
||||
csv_files = toldg.utils.get_csv_files(config.input_directory)
|
||||
transactions = []
|
||||
for csv_file in csv_files:
|
||||
csv_file = str(csv_file)
|
||||
csv_config = get_csv_config(csv_file, config.csv_configs)
|
||||
transactions += get_transactions(csv_file, csv_config)
|
||||
|
||||
if config.find_duplicates:
|
||||
find_duplicates(transactions)
|
||||
|
||||
if config.descriptions_file is not None:
|
||||
descriptions = toldg.utils.read_descriptions(config.descriptions_file)
|
||||
apply_descriptions(transactions, descriptions)
|
||||
|
||||
mappings = toldg.utils.read_mappings(config.mappings_file)
|
||||
apply_mappings(transactions, mappings)
|
||||
|
||||
toldg.predict.add_account2(transactions, config.categories)
|
||||
toldg.utils.write_mappings(transactions, config.mappings_file)
|
||||
toldg.write.render_to_file(transactions, config)
|
||||
113
src/toldg/utils.py
Normal file
113
src/toldg/utils.py
Normal file
@@ -0,0 +1,113 @@
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from typing import Dict, List
|
||||
|
||||
from pydantic import ValidationError
|
||||
|
||||
from toldg.models import Config, Transaction
|
||||
|
||||
|
||||
def get_files(directory: Path, ending="") -> List[Path]:
|
||||
"""Gets files from directory recursively in lexigraphic order."""
|
||||
return [
|
||||
Path(os.path.join(subdir, f))
|
||||
for subdir, _, files in os.walk(directory)
|
||||
for f in files
|
||||
if f.endswith(ending)
|
||||
]
|
||||
|
||||
|
||||
def get_csv_files(directory: Path) -> List[Path]:
|
||||
return get_files(directory, ".csv")
|
||||
|
||||
|
||||
def get_ldg_files(directory: Path) -> List[Path]:
|
||||
return get_files(directory, ".ldg")
|
||||
|
||||
|
||||
def load_config() -> Config:
|
||||
try:
|
||||
config_file = Path(sys.argv[1])
|
||||
except IndexError:
|
||||
logging.critical("Provide configuration file as first argument.")
|
||||
sys.exit(1)
|
||||
|
||||
try:
|
||||
with open(config_file, "r") as f:
|
||||
config = Config(**json.load(f))
|
||||
except ValidationError as e:
|
||||
logging.critical(f"Could not validate {config_file}.")
|
||||
logging.info(e)
|
||||
sys.exit(1)
|
||||
except FileNotFoundError:
|
||||
logging.critical(f"Could not find {config_file}.")
|
||||
sys.exit(1)
|
||||
return config
|
||||
|
||||
|
||||
def category_to_bean(c: str) -> str:
|
||||
sections = map(list, c.split(":"))
|
||||
new_sections = []
|
||||
for section in sections:
|
||||
section[0] = section[0].upper()
|
||||
new_sections.append("".join(section))
|
||||
return ":".join(new_sections)
|
||||
|
||||
|
||||
def write_meta(config: Config):
|
||||
with open(config.output_file, "a") as f:
|
||||
for category in config.categories:
|
||||
f.write(f"2017-01-01 open {category_to_bean(category)}\n")
|
||||
f.write("\n")
|
||||
f.write('option "operating_currency" "USD"\n\n')
|
||||
|
||||
# Commodity section is not required for beancount
|
||||
# for commodity in config.commodities:
|
||||
# f.write(f"commodity {commodity}\n")
|
||||
# f.write("\n")
|
||||
|
||||
|
||||
def write_mappings(transactions: List[Transaction], mappings_file: Path):
|
||||
mappings = {}
|
||||
for t in transactions:
|
||||
try:
|
||||
mappings[t.account2.strip()].append(t.row)
|
||||
except KeyError:
|
||||
mappings[t.account2.strip()] = [t.row]
|
||||
|
||||
with open(mappings_file, "w") as f:
|
||||
json.dump({k: sorted(v) for k, v in sorted(mappings.items())}, f, indent=4)
|
||||
|
||||
|
||||
def read_mappings(mappings_file: Path) -> Dict[str, str]:
|
||||
with open(mappings_file, "r") as f:
|
||||
account2_to_rows = json.load(f)
|
||||
return {
|
||||
row: category for category, rows in account2_to_rows.items() for row in rows
|
||||
}
|
||||
|
||||
|
||||
def read_descriptions(descriptions_file: Path) -> Dict[str, str]:
|
||||
"""I am basic so the description file is currently a double row based
|
||||
format where the first row matches the CSV row and the second one is the
|
||||
description."""
|
||||
descriptions = {}
|
||||
current_row = None
|
||||
with open(descriptions_file, "r") as f:
|
||||
for line in f.readlines():
|
||||
if current_row is None:
|
||||
current_row = line.rstrip("\n")
|
||||
else:
|
||||
descriptions[current_row] = line.rstrip("\n")
|
||||
current_row = None
|
||||
return descriptions
|
||||
|
||||
|
||||
def remove_if_exists(output_file: Path):
|
||||
try:
|
||||
os.remove(output_file)
|
||||
except OSError:
|
||||
pass
|
||||
32
src/toldg/write.py
Normal file
32
src/toldg/write.py
Normal file
@@ -0,0 +1,32 @@
|
||||
from pathlib import Path
|
||||
from typing import List
|
||||
|
||||
from toldg.models import Config, Transaction
|
||||
from toldg.utils import category_to_bean
|
||||
|
||||
BEANCOUNT_TRANSACTION_TEMPLATE = """
|
||||
{t.date} * "{t.description}"
|
||||
{t.account2:<40} {t.debit:<6} {t.currency}
|
||||
{t.account1:<40} {t.credit:<6} {t.currency}
|
||||
"""
|
||||
|
||||
|
||||
def format(t):
|
||||
t.date = t.date.replace("/", "-")
|
||||
t.description = t.description.replace('"', '\\"')
|
||||
if not t.debit.startswith("-"):
|
||||
t.debit = " " + t.debit
|
||||
if not t.credit.startswith("-"):
|
||||
t.credit = " " + t.credit
|
||||
t.account1 = category_to_bean(t.account1)
|
||||
t.account2 = category_to_bean(t.account2)
|
||||
if t.currency == "EUR":
|
||||
t.debit = t.debit.replace(".", "|").replace(",", ".").replace("|", ",")
|
||||
t.credit = t.credit.replace(".", "|").replace(",", ".").replace("|", ",")
|
||||
return BEANCOUNT_TRANSACTION_TEMPLATE.format(t=t)
|
||||
|
||||
|
||||
def render_to_file(transactions: List[Transaction], config: Config):
|
||||
content = "".join(format(t) for t in transactions)
|
||||
with open(config.output_file, "a") as f:
|
||||
f.write(content)
|
||||
Reference in New Issue
Block a user