Add scripts and update readme.
This commit is contained in:
2
.gitignore
vendored
2
.gitignore
vendored
@@ -1,3 +1,5 @@
|
||||
# Ignore sensitive data
|
||||
gather.json
|
||||
# ---> Python
|
||||
# Byte-compiled / optimized / DLL files
|
||||
__pycache__/
|
||||
|
||||
27
README.md
27
README.md
@@ -1,3 +1,28 @@
|
||||
# ledgerpy
|
||||
|
||||
Scripts to transform different input formats (CSV and OFX) into ledger accounting files. Includes mapping language to update transaction details automatically.
|
||||
Scripts to transform different input formats (CSV and OFX) into ledger
|
||||
accounting files. Includes mapping language to update transaction details
|
||||
automatically.
|
||||
|
||||
There are other [scripts](https://github.com/ledger/ledger/wiki/CSV-Import) that
|
||||
attempt to handle the same use-cases. I have tried a couple of them, as well as
|
||||
hledger's integrated CSV import, and ran into issues or didn't like the
|
||||
usability. That's why I wrote my own scripts for my workflow. Probably not too
|
||||
useful for anybody else, but I included an example workspace to showcase how I
|
||||
use the scripts.
|
||||
|
||||
## Dependencies
|
||||
|
||||
- jinja2
|
||||
- ofxtools
|
||||
- python3.8 or higher
|
||||
|
||||
## Todo
|
||||
|
||||
- [ ] Write this readme
|
||||
- [ ] Create setup.py file
|
||||
- [ ] Use OFX parser from ofxtools instead of parsing the XML
|
||||
- [ ] Autoappend latest OFX data to CSV file
|
||||
- [ ] Include example workspace with mock data to demo my workflow
|
||||
|
||||
|
||||
|
||||
94
getofx.py
Normal file
94
getofx.py
Normal file
@@ -0,0 +1,94 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import datetime
|
||||
import ofxtools
|
||||
import json
|
||||
import logging
|
||||
import sys
|
||||
import csv
|
||||
import xml.etree.ElementTree as ET
|
||||
|
||||
from ofxtools import OFXClient
|
||||
from ofxtools.Client import StmtRq, CcStmtEndRq, CcStmtRq
|
||||
from functools import namedtuple
|
||||
|
||||
|
||||
def get_transactions(data):
|
||||
Transaction = namedtuple("Transaction",
|
||||
["details", "date", "description",
|
||||
"amount", "type", "balance", "slip"])
|
||||
root = ET.fromstring(data)
|
||||
ts = []
|
||||
for statement in root.iter("STMTTRN"):
|
||||
description, date, amount = "", "", ""
|
||||
for child in statement:
|
||||
if child.tag == "TRNAMT":
|
||||
amount = child.text
|
||||
elif child.tag == "DTPOSTED":
|
||||
d = datetime.datetime.strptime(child.text[:8], "%Y%m%d")
|
||||
date = d.strftime("%m/%d/%Y")
|
||||
elif child.tag == "NAME":
|
||||
if description:
|
||||
description = child.text + " " + description
|
||||
else:
|
||||
description = child.text
|
||||
elif child.tag == "MEMO":
|
||||
if description:
|
||||
description = description + " " + child.text
|
||||
else:
|
||||
description = child.text
|
||||
t = Transaction("-", date, description, amount, "-", "-", "-")
|
||||
ts.append(t)
|
||||
return ts
|
||||
|
||||
|
||||
def process_account(client, secret, year, name, accttype, acctid, csv_file):
|
||||
dtstart = datetime.datetime(int(year), 1, 1, tzinfo=ofxtools.utils.UTC)
|
||||
dtend = datetime.datetime(int(year), 12, 31, tzinfo=ofxtools.utils.UTC)
|
||||
|
||||
if accttype.upper() in ("CHECKING", "SAVINGS"):
|
||||
rq = StmtRq(acctid=acctid, accttype=accttype.upper(),
|
||||
dtstart=dtstart, dtend=dtend)
|
||||
else:
|
||||
rq = CcStmtRq(acctid=acctid, dtstart=dtstart, dtend=dtend)
|
||||
|
||||
response = client.request_statements(secret, rq)
|
||||
data = response.read().decode()
|
||||
# with open(csv_file.replace(".csv", ".xml"), "w") as f:
|
||||
# f.write(data)
|
||||
transactions = get_transactions(data)
|
||||
|
||||
with open(csv_file, "w") as f:
|
||||
csv_writer = csv.writer(f)
|
||||
csv_writer.writerow(["details", "date", "description",
|
||||
"amount", "type", "balance", "slip"])
|
||||
for t in transactions:
|
||||
csv_writer.writerow(t)
|
||||
#if t.date.startswith(year):
|
||||
|
||||
|
||||
def get_client(url, userid, org, fid, clientuid, bankid, version, **kwargs):
|
||||
return OFXClient(url, userid=userid, org=org, fid=fid,
|
||||
clientuid=clientuid, bankid=bankid, version=version,
|
||||
prettyprint=True)
|
||||
|
||||
|
||||
def main(config):
|
||||
client = get_client(**config["client"])
|
||||
year = config["year"]
|
||||
secret = config["secret"]
|
||||
for account in config["accounts"]:
|
||||
name = account["name"]
|
||||
logging.info(f"Processing {name}.")
|
||||
process_account(client, secret, year, **account)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
try:
|
||||
config_file = sys.argv[1]
|
||||
except IndexError:
|
||||
config_file = "gather.json"
|
||||
with open(config_file, 'r') as f:
|
||||
config = json.load(f)
|
||||
main(config)
|
||||
|
||||
316
toldg.py
Normal file
316
toldg.py
Normal file
@@ -0,0 +1,316 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import json
|
||||
import sys
|
||||
import csv
|
||||
import os.path
|
||||
import time
|
||||
import re
|
||||
import datetime
|
||||
import logging
|
||||
import jinja2
|
||||
import shutil
|
||||
import tempfile
|
||||
from dataclasses import dataclass, field
|
||||
from typing import List, Tuple
|
||||
|
||||
|
||||
@dataclass
|
||||
class Config:
|
||||
"""
|
||||
Basic class for the configuration of this script.
|
||||
- input_directory: we search for ldg and csv files recursively here
|
||||
- output_directory: for all input files we do name.replace(input_directory,
|
||||
output_directory)
|
||||
- mappings_directory: directory of CSV mapping files
|
||||
- csv_configs: configuration for the different input files
|
||||
"""
|
||||
input_directory: str
|
||||
output_directory: str
|
||||
mappings_directory: str
|
||||
csv_configs: List
|
||||
|
||||
|
||||
@dataclass
|
||||
class CsvConfig:
|
||||
"""
|
||||
Class to define how to parse a certain CSV file. We use the
|
||||
file_match_regex attribute to decide whether to apply a config for a file.
|
||||
If multiple configs match a single file we raise an exception.
|
||||
"""
|
||||
account1: str
|
||||
file_match_regex: str
|
||||
fields: List[str]
|
||||
input_date_format: str = "%m/%d/%Y"
|
||||
output_date_format: str = "%Y/%m/%d"
|
||||
skip: int = 1
|
||||
delimiter: str = ","
|
||||
quotechar: str = "\""
|
||||
currency: str = "$"
|
||||
|
||||
|
||||
@dataclass
|
||||
class CsvMapping:
|
||||
"""
|
||||
Class that defines the account2 attribute for a CSV transaction.
|
||||
description_pattern: string or regexes to match the description
|
||||
specifiers: additonal conditions in the form
|
||||
transaction_attribute=value;another_attribute=value2
|
||||
"""
|
||||
mapping_file: str
|
||||
account2: str
|
||||
description_pattern: str
|
||||
specifiers: List[Tuple[str, str]] = field(default_factory=lambda: [])
|
||||
|
||||
|
||||
@dataclass
|
||||
class LdgTransaction:
|
||||
"""
|
||||
Class for ledger transaction to render into ldg file.
|
||||
"""
|
||||
currency: str
|
||||
debit: str
|
||||
credit: str
|
||||
date: str
|
||||
account1: str
|
||||
account2: str
|
||||
description: str
|
||||
csv_file: str
|
||||
row: str
|
||||
|
||||
|
||||
LEDGER_TRANSACTION_TEMPLATE = """
|
||||
{{t.date}} {{t.description}} ; {{t.row}}
|
||||
{{t.account2}} {{t.currency}} {{t.debit}}
|
||||
{{t.account1}} {{t.currency}} {{t.credit}}
|
||||
|
||||
"""
|
||||
|
||||
|
||||
def get_files(input_directory):
|
||||
""" Gets files from directory recursively in lexigraphic order. """
|
||||
return sorted([os.path.join(subdir, f)
|
||||
for subdir, dirs, files in os.walk(input_directory)
|
||||
for f in files])
|
||||
|
||||
|
||||
def get_mappings(mappings_directory: str) -> List[CsvMapping]:
|
||||
|
||||
def parse_specifiers(s):
|
||||
""" This is a little extra magic I have introduced to specify
|
||||
mappings with more cranularity. The argument s is a string in the form
|
||||
|
||||
attribute1=value1;attribute2=value2;attribute3=value3
|
||||
|
||||
and we want to get it into the form
|
||||
|
||||
[(attribute1, value1), (attribute2, value2), (attribute3, value3)]
|
||||
"""
|
||||
r = []
|
||||
for pair in s.split(';'):
|
||||
attr, value = pair.split("=")
|
||||
r.append((attr, value))
|
||||
return r
|
||||
|
||||
def get_mappings_from_file(csv_file):
|
||||
def row_to_mapping(row):
|
||||
pattern = row[1]
|
||||
if pattern.startswith("/") and pattern.endswith("/"):
|
||||
row[1] = re.compile(pattern[1:-1], re.IGNORECASE)
|
||||
if len(row) == 3 and row[2]:
|
||||
row[2] = parse_specifiers(row[2])
|
||||
return CsvMapping(csv_file, *row)
|
||||
|
||||
with open(csv_file, 'r') as f:
|
||||
reader = csv.reader(f, delimiter=',', quotechar='"')
|
||||
# ignore empty lines and comments
|
||||
return [row_to_mapping(row) for row in reader
|
||||
if row
|
||||
if not row[0].startswith("#")]
|
||||
return [m
|
||||
for f in get_files(mappings_directory)
|
||||
for m in get_mappings_from_file(f)]
|
||||
|
||||
|
||||
def get_transactions(csv_file, config: CsvConfig, mappings: List[CsvMapping]):
|
||||
def date_to_date(date):
|
||||
d = datetime.datetime.strptime(date, config.input_date_format)
|
||||
return d.strftime(config.output_date_format)
|
||||
|
||||
def flip_sign(amount):
|
||||
if amount.startswith("-"):
|
||||
return amount[1:]
|
||||
return "-" + amount
|
||||
|
||||
def make_equal_len(str_1, str_2):
|
||||
max_len = max(len(str_1), len(str_2))
|
||||
str_1 += " " * (max_len - len(str_1))
|
||||
str_2 += " " * (max_len - len(str_2))
|
||||
return (str_1, str_2)
|
||||
|
||||
def get_account2(transaction):
|
||||
t = transaction
|
||||
matching_mappings = []
|
||||
for mapping in mappings:
|
||||
pattern = mapping.description_pattern
|
||||
if type(pattern) is str and pattern == transaction.description:
|
||||
pass
|
||||
elif type(pattern) is re.Pattern and pattern.match(t.description):
|
||||
pass
|
||||
else:
|
||||
continue
|
||||
|
||||
specifiers_match = True
|
||||
for attr, value in mapping.specifiers:
|
||||
if getattr(t, attr) != value:
|
||||
specifiers_match = False
|
||||
|
||||
if specifiers_match:
|
||||
matching_mappings.append(mapping)
|
||||
|
||||
if not matching_mappings:
|
||||
logging.info(f"No match for {transaction}.")
|
||||
e = f"expenses,{t.description},credit={t.credit};date={t.date}\n"
|
||||
unmatched_expenses.append(e)
|
||||
return "expenses"
|
||||
elif len(matching_mappings) == 1:
|
||||
return matching_mappings[0].account2
|
||||
else:
|
||||
logging.info(
|
||||
f"\nMultiple matches for {transaction}. Picking first.")
|
||||
for m in matching_mappings:
|
||||
logging.info(f" {m}")
|
||||
return matching_mappings[0].account2
|
||||
|
||||
def row_to_transaction(row):
|
||||
t = {field: row[index] for index, field in fields}
|
||||
amount = t['amount']
|
||||
t = LdgTransaction(config.currency, flip_sign(amount), amount,
|
||||
date_to_date(t['date']), config.account1,
|
||||
"", t['description'], csv_file, ", ".join(row))
|
||||
t.account1, t.account2 = make_equal_len(t.account1, get_account2(t))
|
||||
return t
|
||||
|
||||
fields = [(index, field)
|
||||
for index, field in enumerate(config.fields) if field]
|
||||
unmatched_expenses = []
|
||||
with open(csv_file, 'r') as f:
|
||||
reader = csv.reader(f, delimiter=config.delimiter,
|
||||
quotechar=config.quotechar)
|
||||
[next(reader) for _ in range(config.skip)]
|
||||
transactions = [t
|
||||
for row in reader
|
||||
if row
|
||||
if (t := row_to_transaction(row))
|
||||
]
|
||||
return transactions, unmatched_expenses
|
||||
|
||||
|
||||
def render_to_file(transactions, csv_file, ledger_file, template_file=""):
|
||||
if template_file:
|
||||
dirname = os.path.dirname(template_file)
|
||||
template_file = os.path.basename(template_file)
|
||||
template_loader = jinja2.FileSystemLoader(searchpath=dirname)
|
||||
template_env = jinja2.Environment(loader=template_loader)
|
||||
template = template_env.get_template(template_file)
|
||||
else:
|
||||
template_env = jinja2.Environment(loader=jinja2.BaseLoader)
|
||||
template = template_env.from_string(LEDGER_TRANSACTION_TEMPLATE)
|
||||
|
||||
# Write transactions into virtual file. We could just create a string
|
||||
# object, but that doesn't work as nicely with the Jinja API plus I think
|
||||
# this approach is faster.
|
||||
tf = tempfile.SpooledTemporaryFile(mode='w+')
|
||||
for t in transactions:
|
||||
tf.write(template.render(t=t))
|
||||
tf.seek(0)
|
||||
new_ledger_content = tf.read()
|
||||
|
||||
status = "no change"
|
||||
if not os.path.isfile(ledger_file):
|
||||
with open(ledger_file, 'w') as f:
|
||||
f.write(new_ledger_content)
|
||||
status = "new"
|
||||
else:
|
||||
with open(ledger_file, 'r') as f:
|
||||
old_ledger_content = f.read()
|
||||
f.close()
|
||||
if new_ledger_content != old_ledger_content:
|
||||
with open(ledger_file, 'w') as f:
|
||||
f.write(new_ledger_content)
|
||||
status = "update"
|
||||
logging.info(f"{csv_file:30} -> {ledger_file:30} | {status}")
|
||||
|
||||
|
||||
def main(config):
|
||||
def file_age(file):
|
||||
return time.time() - os.path.getmtime(file)
|
||||
|
||||
def get_csv_config(csv_file: str, csv_configs: List[CsvConfig]) -> CsvConfig:
|
||||
cs = [c for c in csv_configs
|
||||
if re.match(c.file_match_regex, csv_file)]
|
||||
if not cs:
|
||||
raise Exception(f"No config for {csv_file=}.")
|
||||
elif len(cs) > 1:
|
||||
raise Exception(f"More than one config for {csv_file=}.")
|
||||
return cs[0]
|
||||
|
||||
def write_unmatched_expenses(unmatched_expenses, mappings_directory):
|
||||
if not unmatched_expenses:
|
||||
return
|
||||
fn = os.path.join(mappings_directory, "unmatched.csv")
|
||||
with open(fn, 'a') as f:
|
||||
for e in unmatched_expenses:
|
||||
f.write(e)
|
||||
|
||||
def csv_to_ldg_filename(csv_file: str, config: Config):
|
||||
r = csv_file
|
||||
r = r.replace(config.input_directory, config.output_directory)
|
||||
r = r.replace(".csv", ".ldg")
|
||||
return r
|
||||
|
||||
def process_csv_file(csv_file, mappings: List[CsvMapping], config: Config):
|
||||
ledger_file = csv_to_ldg_filename(csv_file, config)
|
||||
csv_config = get_csv_config(csv_file, config.csv_configs)
|
||||
|
||||
transactions, unmatched = get_transactions(
|
||||
csv_file, csv_config, mappings)
|
||||
write_unmatched_expenses(unmatched, config.mappings_directory)
|
||||
render_to_file(transactions, csv_file, ledger_file)
|
||||
|
||||
def process_ldg_file(ldg_file: str, config: Config):
|
||||
dest_file = ldg_file.replace(
|
||||
config.input_directory, config.output_directory)
|
||||
status = "no change"
|
||||
if not os.path.isfile(dest_file):
|
||||
status = "new"
|
||||
shutil.copy(ldg_file, dest_file)
|
||||
if file_age(dest_file) > file_age(ldg_file):
|
||||
shutil.copy(ldg_file, dest_file)
|
||||
status = "update"
|
||||
logging.info(f"{ldg_file:30} -> {dest_file:30} | {status}")
|
||||
|
||||
input_files = get_files(config.input_directory)
|
||||
config.csv_configs = [CsvConfig(**c) for c in config.csv_configs]
|
||||
mappings = get_mappings(config.mappings_directory)
|
||||
for f in input_files:
|
||||
if f.endswith(".csv"):
|
||||
process_csv_file(f, mappings, config)
|
||||
elif f.endswith(".ldg"):
|
||||
process_ldg_file(f, config)
|
||||
else:
|
||||
m = f"Unsupported file type for '{f}'."
|
||||
raise Exception(m)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
logging.basicConfig(stream=sys.stdout,
|
||||
level=logging.DEBUG,
|
||||
format='%(message)s')
|
||||
try:
|
||||
config_file = sys.argv[1]
|
||||
except IndexError:
|
||||
config_file = "config.json"
|
||||
with open(config_file, 'r') as f:
|
||||
config = Config(**json.load(f))
|
||||
main(config)
|
||||
Reference in New Issue
Block a user