generated from felixm/defaultpy
Update project structure and move to beancount
This commit is contained in:
@@ -1,4 +0,0 @@
|
||||
|
||||
|
||||
def hello():
|
||||
print("Hello, seaman!")
|
||||
1
src/toldg/__init__.py
Normal file
1
src/toldg/__init__.py
Normal file
@@ -0,0 +1 @@
|
||||
__version__ = "0.1.0"
|
||||
28
src/toldg/__main__.py
Normal file
28
src/toldg/__main__.py
Normal file
@@ -0,0 +1,28 @@
|
||||
import logging
|
||||
|
||||
from rich.logging import RichHandler
|
||||
|
||||
from toldg.process import process_csv_files, process_ldg_files
|
||||
from toldg.utils import load_config, remove_if_exists, write_meta
|
||||
|
||||
|
||||
def init_logging():
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format="%(message)s",
|
||||
datefmt="[%X]",
|
||||
handlers=[RichHandler()],
|
||||
)
|
||||
|
||||
|
||||
def main():
|
||||
init_logging()
|
||||
config = load_config()
|
||||
remove_if_exists(config.output_file)
|
||||
write_meta(config)
|
||||
process_ldg_files(config)
|
||||
process_csv_files(config)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -2,19 +2,20 @@ import errno
|
||||
import subprocess
|
||||
import sys
|
||||
|
||||
|
||||
EXECUTABLE_NAME = 'fzf.exe' if sys.platform == 'win32' else 'fzf'
|
||||
EXECUTABLE_NAME = "fzf.exe" if sys.platform == "win32" else "fzf"
|
||||
|
||||
|
||||
def iterfzf(iterable, prompt='> '):
|
||||
cmd = [EXECUTABLE_NAME, '--prompt=' + prompt]
|
||||
def iterfzf(iterable, prompt="> "):
|
||||
cmd = [EXECUTABLE_NAME, "--prompt=" + prompt]
|
||||
encoding = sys.getdefaultencoding()
|
||||
proc = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=None)
|
||||
proc = subprocess.Popen(
|
||||
cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=None
|
||||
)
|
||||
if proc.stdin is None:
|
||||
return None
|
||||
try:
|
||||
lines = "\n".join(iterable)
|
||||
proc.stdin.write(lines.encode('utf-8'))
|
||||
proc.stdin.write(lines.encode("utf-8"))
|
||||
proc.stdin.close()
|
||||
except IOError as e:
|
||||
if e.errno != errno.EPIPE and errno.EPIPE != 32:
|
||||
@@ -24,7 +25,7 @@ def iterfzf(iterable, prompt='> '):
|
||||
if proc.stdout is None:
|
||||
return None
|
||||
decode = lambda t: t.decode(encoding)
|
||||
output = [decode(ln.strip(b'\r\n\0')) for ln in iter(proc.stdout.readline, b'')]
|
||||
output = [decode(ln.strip(b"\r\n\0")) for ln in iter(proc.stdout.readline, b"")]
|
||||
try:
|
||||
return output[0]
|
||||
except IndexError:
|
||||
@@ -1,10 +1,9 @@
|
||||
from pydantic import BaseModel
|
||||
from typing import List
|
||||
from typing import Optional
|
||||
from pathlib import Path
|
||||
from typing import List, Optional
|
||||
|
||||
from pydantic import BaseModel
|
||||
|
||||
UNKNOWN_CATEGORY = 'account2'
|
||||
UNKNOWN_CATEGORY = "account2"
|
||||
|
||||
|
||||
class CsvConfig(BaseModel):
|
||||
@@ -13,8 +12,9 @@ class CsvConfig(BaseModel):
|
||||
file_match_regex attribute to decide whether to apply a config for a file.
|
||||
If multiple configs match a single file we raise an exception.
|
||||
"""
|
||||
|
||||
class Config:
|
||||
extra = 'forbid'
|
||||
extra = "forbid"
|
||||
|
||||
account1: str
|
||||
file_match_regex: str
|
||||
@@ -23,8 +23,8 @@ class CsvConfig(BaseModel):
|
||||
output_date_format: str = "%Y/%m/%d"
|
||||
skip: int = 1
|
||||
delimiter: str = ","
|
||||
quotechar: str = "\""
|
||||
currency: str = "$"
|
||||
quotechar: str = '"'
|
||||
currency: str = "USD"
|
||||
|
||||
|
||||
class Config(BaseModel):
|
||||
@@ -39,12 +39,13 @@ class Config(BaseModel):
|
||||
CSV files.
|
||||
categories (List[str]): A list of account2s. An account has to be defined here
|
||||
before it can be used in a mapping. Otherwise, ledger will complain.
|
||||
commodities (List[str]): A list of commodities relevant to the data processing.
|
||||
commodities (List[str]): A list of commodities relevant to the data processing.
|
||||
find_duplicates (bool): Flag to check and abort on duplicated transactions. Not
|
||||
really useful.
|
||||
"""
|
||||
|
||||
class Config:
|
||||
extra = 'forbid'
|
||||
extra = "forbid"
|
||||
|
||||
input_directory: Path
|
||||
mappings_file: Path
|
||||
@@ -60,8 +61,9 @@ class Transaction(BaseModel):
|
||||
"""
|
||||
Class for ledger transaction to render into ldg file.
|
||||
"""
|
||||
|
||||
class Config:
|
||||
extra = 'forbid'
|
||||
extra = "forbid"
|
||||
|
||||
currency: str
|
||||
debit: str
|
||||
@@ -1,19 +1,23 @@
|
||||
from src.models import Transaction, UNKNOWN_CATEGORY
|
||||
from src.fzf import iterfzf
|
||||
from typing import List
|
||||
|
||||
from toldg.fzf import iterfzf
|
||||
from toldg.models import UNKNOWN_CATEGORY, Transaction
|
||||
|
||||
|
||||
def get_sort_categories():
|
||||
def sort_categories(row: str, categories: List[str]):
|
||||
if learn is None:
|
||||
return
|
||||
_, _, probs = learn.predict(row)
|
||||
cat_to_prob = dict(zip(learn.dls.vocab[1],probs.tolist()))
|
||||
categories.sort(key=lambda c: cat_to_prob[c] if c in cat_to_prob else 0.0, reverse=True)
|
||||
cat_to_prob = dict(zip(learn.dls.vocab[1], probs.tolist()))
|
||||
categories.sort(
|
||||
key=lambda c: cat_to_prob[c] if c in cat_to_prob else 0.0, reverse=True
|
||||
)
|
||||
|
||||
learn = None
|
||||
try:
|
||||
from fastai.text.all import load_learner
|
||||
|
||||
learn = load_learner("export.pkl")
|
||||
except ModuleNotFoundError:
|
||||
user_input = input("No fastai module. Type yes to continue anyway.")
|
||||
@@ -24,7 +28,9 @@ def get_sort_categories():
|
||||
|
||||
|
||||
def add_account2(transactions: List[Transaction], categories: List[str]):
|
||||
unmapped_transactions = list(filter(lambda t: t.account2 == UNKNOWN_CATEGORY, transactions))
|
||||
unmapped_transactions = list(
|
||||
filter(lambda t: t.account2 == UNKNOWN_CATEGORY, transactions)
|
||||
)
|
||||
if len(unmapped_transactions) == 0:
|
||||
return
|
||||
sort_categories = get_sort_categories()
|
||||
@@ -1,26 +1,26 @@
|
||||
import csv
|
||||
import datetime
|
||||
import logging
|
||||
import re
|
||||
import sys
|
||||
import datetime
|
||||
import src.utils
|
||||
import src.write
|
||||
import src.models
|
||||
import src.predict
|
||||
from src.models import Config, CsvConfig, Transaction
|
||||
from typing import List, Dict
|
||||
from typing import Dict, List
|
||||
|
||||
import toldg.models
|
||||
import toldg.predict
|
||||
import toldg.utils
|
||||
import toldg.write
|
||||
from toldg.models import Config, CsvConfig, Transaction
|
||||
|
||||
|
||||
def process_ldg_files(config: Config):
|
||||
for ldg_file in src.utils.get_ldg_files(config.input_directory):
|
||||
with open(ldg_file, 'r') as f_in:
|
||||
with open(config.output_file, 'a') as f_out:
|
||||
for ldg_file in toldg.utils.get_ldg_files(config.input_directory):
|
||||
with open(ldg_file, "r") as f_in:
|
||||
with open(config.output_file, "a") as f_out:
|
||||
f_out.write(f_in.read())
|
||||
|
||||
|
||||
def get_csv_config(csv_file: str, csv_configs: List[CsvConfig]) -> CsvConfig:
|
||||
cs = [c for c in csv_configs
|
||||
if re.match(c.file_match_regex, csv_file)]
|
||||
cs = [c for c in csv_configs if re.match(c.file_match_regex, csv_file)]
|
||||
if not cs:
|
||||
logging.critical(f"No CSV config for {csv_file}.")
|
||||
sys.exit(1)
|
||||
@@ -39,29 +39,28 @@ def get_transactions(csv_file: str, config: CsvConfig) -> List[Transaction]:
|
||||
return amount[1:] if amount.startswith("-") else "-" + amount
|
||||
|
||||
def row_to_transaction(row, fields):
|
||||
""" The user can configure the mapping of CSV fields to the three
|
||||
required fields date, amount and description via the CsvConfig. """
|
||||
"""The user can configure the mapping of CSV fields to the three
|
||||
required fields date, amount and description via the CsvConfig."""
|
||||
t = {field: row[index] for index, field in fields}
|
||||
amount = t['amount']
|
||||
amount = t["amount"]
|
||||
return Transaction(
|
||||
currency=config.currency,
|
||||
debit=flip_sign(amount),
|
||||
credit=amount,
|
||||
date=date_to_date(t['date']),
|
||||
account1=config.account1,
|
||||
account2=src.models.UNKNOWN_CATEGORY,
|
||||
description=t['description'],
|
||||
csv_file=csv_file,
|
||||
row=csv_file + ", " + ", ".join(row))
|
||||
currency=config.currency,
|
||||
debit=flip_sign(amount),
|
||||
credit=amount,
|
||||
date=date_to_date(t["date"]),
|
||||
account1=config.account1,
|
||||
account2=toldg.models.UNKNOWN_CATEGORY,
|
||||
description=t["description"],
|
||||
csv_file=csv_file,
|
||||
row=csv_file + ", " + ", ".join(row),
|
||||
)
|
||||
|
||||
fields = [(i, f) for i, f in enumerate(config.fields) if f]
|
||||
with open(csv_file, 'r') as f:
|
||||
reader = csv.reader(f, delimiter=config.delimiter,
|
||||
quotechar=config.quotechar)
|
||||
with open(csv_file, "r") as f:
|
||||
reader = csv.reader(f, delimiter=config.delimiter, quotechar=config.quotechar)
|
||||
for _ in range(config.skip):
|
||||
next(reader)
|
||||
transactions = [row_to_transaction(row, fields)
|
||||
for row in reader if row]
|
||||
transactions = [row_to_transaction(row, fields) for row in reader if row]
|
||||
return transactions
|
||||
|
||||
|
||||
@@ -100,7 +99,7 @@ def apply_descriptions(transactions: List[Transaction], descriptions: Dict[str,
|
||||
|
||||
|
||||
def process_csv_files(config: Config):
|
||||
csv_files = src.utils.get_csv_files(config.input_directory)
|
||||
csv_files = toldg.utils.get_csv_files(config.input_directory)
|
||||
transactions = []
|
||||
for csv_file in csv_files:
|
||||
csv_file = str(csv_file)
|
||||
@@ -111,13 +110,12 @@ def process_csv_files(config: Config):
|
||||
find_duplicates(transactions)
|
||||
|
||||
if config.descriptions_file is not None:
|
||||
descriptions = src.utils.read_descriptions(config.descriptions_file)
|
||||
descriptions = toldg.utils.read_descriptions(config.descriptions_file)
|
||||
apply_descriptions(transactions, descriptions)
|
||||
|
||||
mappings = src.utils.read_mappings(config.mappings_file)
|
||||
mappings = toldg.utils.read_mappings(config.mappings_file)
|
||||
apply_mappings(transactions, mappings)
|
||||
|
||||
src.predict.add_account2(transactions, config.categories)
|
||||
src.utils.write_mappings(transactions, config.mappings_file)
|
||||
src.write.render_to_file(transactions, config)
|
||||
|
||||
toldg.predict.add_account2(transactions, config.categories)
|
||||
toldg.utils.write_mappings(transactions, config.mappings_file)
|
||||
toldg.write.render_to_file(transactions, config)
|
||||
@@ -1,20 +1,23 @@
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import sys
|
||||
import logging
|
||||
import json
|
||||
from pathlib import Path
|
||||
from typing import List, Dict
|
||||
from src.models import Config, Transaction
|
||||
from typing import Dict, List
|
||||
|
||||
from pydantic import ValidationError
|
||||
|
||||
from toldg.models import Config, Transaction
|
||||
|
||||
|
||||
def get_files(directory: Path, ending="") -> List[Path]:
|
||||
""" Gets files from directory recursively in lexigraphic order. """
|
||||
return [Path(os.path.join(subdir, f))
|
||||
for subdir, _, files in os.walk(directory)
|
||||
for f in files
|
||||
if f.endswith(ending)]
|
||||
"""Gets files from directory recursively in lexigraphic order."""
|
||||
return [
|
||||
Path(os.path.join(subdir, f))
|
||||
for subdir, _, files in os.walk(directory)
|
||||
for f in files
|
||||
if f.endswith(ending)
|
||||
]
|
||||
|
||||
|
||||
def get_csv_files(directory: Path) -> List[Path]:
|
||||
@@ -33,7 +36,7 @@ def load_config() -> Config:
|
||||
sys.exit(1)
|
||||
|
||||
try:
|
||||
with open(config_file, 'r') as f:
|
||||
with open(config_file, "r") as f:
|
||||
config = Config(**json.load(f))
|
||||
except ValidationError as e:
|
||||
logging.critical(f"Could not validate {config_file}.")
|
||||
@@ -45,15 +48,26 @@ def load_config() -> Config:
|
||||
return config
|
||||
|
||||
|
||||
def write_meta(config: Config):
|
||||
with open(config.output_file, 'a') as f:
|
||||
for category in config.categories:
|
||||
f.write(f"account {category}\n")
|
||||
f.write("\n")
|
||||
def category_to_bean(c: str) -> str:
|
||||
sections = map(list, c.split(":"))
|
||||
new_sections = []
|
||||
for section in sections:
|
||||
section[0] = section[0].upper()
|
||||
new_sections.append("".join(section))
|
||||
return ":".join(new_sections)
|
||||
|
||||
for commodity in config.commodities:
|
||||
f.write(f"commodity {commodity}\n")
|
||||
|
||||
def write_meta(config: Config):
|
||||
with open(config.output_file, "a") as f:
|
||||
for category in config.categories:
|
||||
f.write(f"2017-01-01 open {category_to_bean(category)}\n")
|
||||
f.write("\n")
|
||||
f.write('option "operating_currency" "USD"\n\n')
|
||||
|
||||
# Commodity section is not required for beancount
|
||||
# for commodity in config.commodities:
|
||||
# f.write(f"commodity {commodity}\n")
|
||||
# f.write("\n")
|
||||
|
||||
|
||||
def write_mappings(transactions: List[Transaction], mappings_file: Path):
|
||||
@@ -69,20 +83,20 @@ def write_mappings(transactions: List[Transaction], mappings_file: Path):
|
||||
|
||||
|
||||
def read_mappings(mappings_file: Path) -> Dict[str, str]:
|
||||
with open(mappings_file, 'r') as f:
|
||||
with open(mappings_file, "r") as f:
|
||||
account2_to_rows = json.load(f)
|
||||
return {row: category
|
||||
for category, rows in account2_to_rows.items()
|
||||
for row in rows}
|
||||
return {
|
||||
row: category for category, rows in account2_to_rows.items() for row in rows
|
||||
}
|
||||
|
||||
|
||||
def read_descriptions(descriptions_file: Path) -> Dict[str, str]:
|
||||
""" I am basic so the description file is currently a double row based
|
||||
"""I am basic so the description file is currently a double row based
|
||||
format where the first row matches the CSV row and the second one is the
|
||||
description. """
|
||||
description."""
|
||||
descriptions = {}
|
||||
current_row = None
|
||||
with open(descriptions_file, 'r') as f:
|
||||
with open(descriptions_file, "r") as f:
|
||||
for line in f.readlines():
|
||||
if current_row is None:
|
||||
current_row = line.rstrip("\n")
|
||||
32
src/toldg/write.py
Normal file
32
src/toldg/write.py
Normal file
@@ -0,0 +1,32 @@
|
||||
from pathlib import Path
|
||||
from typing import List
|
||||
|
||||
from toldg.models import Config, Transaction
|
||||
from toldg.utils import category_to_bean
|
||||
|
||||
BEANCOUNT_TRANSACTION_TEMPLATE = """
|
||||
{t.date} * "{t.description}"
|
||||
{t.account2:<40} {t.debit:<6} {t.currency}
|
||||
{t.account1:<40} {t.credit:<6} {t.currency}
|
||||
"""
|
||||
|
||||
|
||||
def format(t):
|
||||
t.date = t.date.replace("/", "-")
|
||||
t.description = t.description.replace('"', '\\"')
|
||||
if not t.debit.startswith("-"):
|
||||
t.debit = " " + t.debit
|
||||
if not t.credit.startswith("-"):
|
||||
t.credit = " " + t.credit
|
||||
t.account1 = category_to_bean(t.account1)
|
||||
t.account2 = category_to_bean(t.account2)
|
||||
if t.currency == "EUR":
|
||||
t.debit = t.debit.replace(".", "|").replace(",", ".").replace("|", ",")
|
||||
t.credit = t.credit.replace(".", "|").replace(",", ".").replace("|", ",")
|
||||
return BEANCOUNT_TRANSACTION_TEMPLATE.format(t=t)
|
||||
|
||||
|
||||
def render_to_file(transactions: List[Transaction], config: Config):
|
||||
content = "".join(format(t) for t in transactions)
|
||||
with open(config.output_file, "a") as f:
|
||||
f.write(content)
|
||||
17
src/write.py
17
src/write.py
@@ -1,17 +0,0 @@
|
||||
from pathlib import Path
|
||||
from typing import List
|
||||
from src.models import Transaction, Config
|
||||
|
||||
|
||||
LEDGER_TRANSACTION_TEMPLATE = """
|
||||
{t.date} {t.description} ; {t.row}
|
||||
{t.account2} {t.currency} {t.debit}
|
||||
{t.account1} {t.currency} {t.credit}
|
||||
"""
|
||||
|
||||
|
||||
def render_to_file(transactions: List[Transaction], config: Config):
|
||||
content = "".join([LEDGER_TRANSACTION_TEMPLATE.format(t=t)
|
||||
for t in transactions])
|
||||
with open(config.output_file, 'a') as f:
|
||||
f.write(content)
|
||||
Reference in New Issue
Block a user