generated from felixm/defaultpy
Implement new mapping format
This commit is contained in:
12
README.md
12
README.md
@@ -4,17 +4,19 @@ Script to transform CSV files into [beancount](https://beancount.github.io/docs/
|
|||||||
|
|
||||||
## Usage
|
## Usage
|
||||||
|
|
||||||
To transform CSV data into Beancount run `toldg` via `python-poetry`.
|
To transform CSV data into Beancount first install the package via
|
||||||
|
`poetry install`.
|
||||||
|
|
||||||
|
You can then run `toldg` from the directory where your configuration
|
||||||
|
file and your ledger data is located.
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
poetry -P ${LEDGER_DATA_ROOT} run toldg
|
poetry -P ledgerai run toldg
|
||||||
```
|
```
|
||||||
|
|
||||||
To visualize the data with [fava](https://beancount.github.io/fava/) install all
|
To visualize the data with [fava](https://beancount.github.io/fava/), enable the venv and run `fava` from there.
|
||||||
dependencies via `python-poetry`, enable the venv and run `fava` from there.
|
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
poetry install
|
|
||||||
eval "$(poetry env activate)"
|
eval "$(poetry env activate)"
|
||||||
fava your_ledger.beancount
|
fava your_ledger.beancount
|
||||||
```
|
```
|
||||||
|
|||||||
@@ -49,18 +49,14 @@ class Config(BaseModel):
|
|||||||
|
|
||||||
input_directory: Path
|
input_directory: Path
|
||||||
mappings_file: Path
|
mappings_file: Path
|
||||||
descriptions_file: Optional[Path] = None
|
|
||||||
output_file: Path = Path("output.ldg")
|
output_file: Path = Path("output.ldg")
|
||||||
csv_configs: List[CsvConfig]
|
csv_configs: List[CsvConfig]
|
||||||
categories: List[str]
|
categories: List[str]
|
||||||
commodities: List[str]
|
|
||||||
find_duplicates: bool = False
|
find_duplicates: bool = False
|
||||||
|
|
||||||
|
|
||||||
class Transaction(BaseModel):
|
class Transaction(BaseModel):
|
||||||
"""
|
"""Class for ledger transaction to render into ldg file."""
|
||||||
Class for ledger transaction to render into ldg file.
|
|
||||||
"""
|
|
||||||
|
|
||||||
class Config:
|
class Config:
|
||||||
extra = "forbid"
|
extra = "forbid"
|
||||||
@@ -74,3 +70,16 @@ class Transaction(BaseModel):
|
|||||||
description: str
|
description: str
|
||||||
csv_file: str
|
csv_file: str
|
||||||
row: str
|
row: str
|
||||||
|
narration: Optional[str] = None
|
||||||
|
payee: Optional[str] = None
|
||||||
|
|
||||||
|
|
||||||
|
class Mapping(BaseModel):
|
||||||
|
"""Class for transaction mapping from mappings file."""
|
||||||
|
|
||||||
|
class Config:
|
||||||
|
extra = "forbid"
|
||||||
|
|
||||||
|
account2: str
|
||||||
|
narration: Optional[str] = None
|
||||||
|
payee: Optional[str] = None
|
||||||
|
|||||||
@@ -3,13 +3,13 @@ import datetime
|
|||||||
import logging
|
import logging
|
||||||
import re
|
import re
|
||||||
import sys
|
import sys
|
||||||
from typing import Dict, List
|
from typing import Any, Dict, List
|
||||||
|
|
||||||
import toldg.models
|
import toldg.models
|
||||||
import toldg.predict
|
import toldg.predict
|
||||||
import toldg.utils
|
import toldg.utils
|
||||||
import toldg.write
|
import toldg.write
|
||||||
from toldg.models import Config, CsvConfig, Transaction
|
from toldg.models import Config, CsvConfig, Mapping, Transaction
|
||||||
|
|
||||||
|
|
||||||
def process_ldg_files(config: Config):
|
def process_ldg_files(config: Config):
|
||||||
@@ -76,26 +76,33 @@ def find_duplicates(transactions: List[Transaction]):
|
|||||||
rows.add(row)
|
rows.add(row)
|
||||||
|
|
||||||
|
|
||||||
def apply_mappings(transactions: List[Transaction], mappings: Dict[str, str]):
|
def apply_mappings(transactions: List[Transaction], mappings: Dict[str, Mapping]):
|
||||||
|
"""Apply mappings to transactions."""
|
||||||
unused_mappings = set(mappings.keys())
|
unused_mappings = set(mappings.keys())
|
||||||
|
|
||||||
for t in transactions:
|
for t in transactions:
|
||||||
if t.row in mappings:
|
if t.row in mappings:
|
||||||
t.account2 = mappings[t.row]
|
mapping = mappings[t.row]
|
||||||
|
|
||||||
|
assert isinstance(
|
||||||
|
mapping, Mapping
|
||||||
|
), "Only new mappings format is supported."
|
||||||
|
t.account2 = mapping.account2
|
||||||
|
|
||||||
|
if mapping.narration:
|
||||||
|
t.narration = mapping.narration
|
||||||
|
|
||||||
|
if mapping.payee:
|
||||||
|
t.payee = mapping.payee
|
||||||
|
|
||||||
unused_mappings.discard(t.row)
|
unused_mappings.discard(t.row)
|
||||||
else:
|
else:
|
||||||
logging.warning(f"No mapping for '{t}'.")
|
logging.warning(f"No mapping for '{t}'.")
|
||||||
|
|
||||||
for row in unused_mappings:
|
for row in unused_mappings:
|
||||||
logging.warning(f"Unused mapping '{row}' -> {mappings[row]}.")
|
mapping_info = mappings[row]
|
||||||
|
account2 = mapping_info["account2"]
|
||||||
|
logging.warning(f"Unused mapping '{row}' -> {account2}")
|
||||||
def apply_descriptions(transactions: List[Transaction], descriptions: Dict[str, str]):
|
|
||||||
unused_descriptions = set(descriptions.keys())
|
|
||||||
for t in transactions:
|
|
||||||
if t.row in descriptions:
|
|
||||||
t.description = descriptions[t.row]
|
|
||||||
unused_descriptions.discard(t.row)
|
|
||||||
for row in unused_descriptions:
|
|
||||||
logging.warning(f"Unused mapping '{row}' -> {descriptions[row]}.")
|
|
||||||
|
|
||||||
|
|
||||||
def process_csv_files(config: Config):
|
def process_csv_files(config: Config):
|
||||||
@@ -109,13 +116,8 @@ def process_csv_files(config: Config):
|
|||||||
if config.find_duplicates:
|
if config.find_duplicates:
|
||||||
find_duplicates(transactions)
|
find_duplicates(transactions)
|
||||||
|
|
||||||
if config.descriptions_file is not None:
|
|
||||||
descriptions = toldg.utils.read_descriptions(config.descriptions_file)
|
|
||||||
apply_descriptions(transactions, descriptions)
|
|
||||||
|
|
||||||
mappings = toldg.utils.read_mappings(config.mappings_file)
|
mappings = toldg.utils.read_mappings(config.mappings_file)
|
||||||
apply_mappings(transactions, mappings)
|
apply_mappings(transactions, mappings)
|
||||||
|
|
||||||
toldg.predict.add_account2(transactions, config.categories)
|
toldg.predict.add_account2(transactions, config.categories)
|
||||||
toldg.utils.write_mappings(transactions, config.mappings_file)
|
toldg.utils.write_mappings(transactions, config.mappings_file)
|
||||||
toldg.write.render_to_file(transactions, config)
|
toldg.write.render_to_file(transactions, config)
|
||||||
|
|||||||
@@ -3,11 +3,11 @@ import logging
|
|||||||
import os
|
import os
|
||||||
import sys
|
import sys
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Dict, List
|
from typing import Any, Dict, List, Optional
|
||||||
|
|
||||||
from pydantic import ValidationError
|
from pydantic import ValidationError
|
||||||
|
|
||||||
from toldg.models import Config, Transaction
|
from toldg.models import Config, Mapping, Transaction
|
||||||
|
|
||||||
|
|
||||||
def get_files(directory: Path, ending="") -> List[Path]:
|
def get_files(directory: Path, ending="") -> List[Path]:
|
||||||
@@ -64,46 +64,33 @@ def write_meta(config: Config):
|
|||||||
f.write("\n")
|
f.write("\n")
|
||||||
f.write('option "operating_currency" "USD"\n\n')
|
f.write('option "operating_currency" "USD"\n\n')
|
||||||
|
|
||||||
# Commodity section is not required for beancount
|
|
||||||
# for commodity in config.commodities:
|
|
||||||
# f.write(f"commodity {commodity}\n")
|
|
||||||
# f.write("\n")
|
|
||||||
|
|
||||||
|
|
||||||
def write_mappings(transactions: List[Transaction], mappings_file: Path):
|
def write_mappings(transactions: List[Transaction], mappings_file: Path):
|
||||||
|
"""Write transactions to the mappings file."""
|
||||||
mappings = {}
|
mappings = {}
|
||||||
for t in transactions:
|
for t in transactions:
|
||||||
try:
|
mapping = Mapping(
|
||||||
mappings[t.account2.strip()].append(t.row)
|
**{
|
||||||
except KeyError:
|
"account2": t.account2.strip(),
|
||||||
mappings[t.account2.strip()] = [t.row]
|
}
|
||||||
|
)
|
||||||
|
if t.narration:
|
||||||
|
mapping.narration = t.narration
|
||||||
|
if t.payee:
|
||||||
|
mapping.payee = t.payee
|
||||||
|
mappings[t.row] = mapping.dict()
|
||||||
|
|
||||||
with open(mappings_file, "w") as f:
|
with open(mappings_file, "w") as f:
|
||||||
json.dump({k: sorted(v) for k, v in sorted(mappings.items())}, f, indent=4)
|
json.dump(mappings, f, indent=4)
|
||||||
|
|
||||||
|
|
||||||
def read_mappings(mappings_file: Path) -> Dict[str, str]:
|
def read_mappings(mappings_file: Path) -> Dict[str, Mapping]:
|
||||||
|
"""Read mappings from file."""
|
||||||
with open(mappings_file, "r") as f:
|
with open(mappings_file, "r") as f:
|
||||||
account2_to_rows = json.load(f)
|
data = json.load(f)
|
||||||
return {
|
for key, value in data.items():
|
||||||
row: category for category, rows in account2_to_rows.items() for row in rows
|
data[key] = Mapping(**value)
|
||||||
}
|
return data
|
||||||
|
|
||||||
|
|
||||||
def read_descriptions(descriptions_file: Path) -> Dict[str, str]:
|
|
||||||
"""I am basic so the description file is currently a double row based
|
|
||||||
format where the first row matches the CSV row and the second one is the
|
|
||||||
description."""
|
|
||||||
descriptions = {}
|
|
||||||
current_row = None
|
|
||||||
with open(descriptions_file, "r") as f:
|
|
||||||
for line in f.readlines():
|
|
||||||
if current_row is None:
|
|
||||||
current_row = line.rstrip("\n")
|
|
||||||
else:
|
|
||||||
descriptions[current_row] = line.rstrip("\n")
|
|
||||||
current_row = None
|
|
||||||
return descriptions
|
|
||||||
|
|
||||||
|
|
||||||
def remove_if_exists(output_file: Path):
|
def remove_if_exists(output_file: Path):
|
||||||
|
|||||||
@@ -5,7 +5,7 @@ from toldg.models import Config, Transaction
|
|||||||
from toldg.utils import category_to_bean
|
from toldg.utils import category_to_bean
|
||||||
|
|
||||||
BEANCOUNT_TRANSACTION_TEMPLATE = """
|
BEANCOUNT_TRANSACTION_TEMPLATE = """
|
||||||
{t.date} * "{t.description}"
|
{t.date} * {t.description}
|
||||||
{t.account2:<40} {t.debit:<6} {t.currency}
|
{t.account2:<40} {t.debit:<6} {t.currency}
|
||||||
{t.account1:<40} {t.credit:<6} {t.currency}
|
{t.account1:<40} {t.credit:<6} {t.currency}
|
||||||
"""
|
"""
|
||||||
@@ -13,7 +13,18 @@ BEANCOUNT_TRANSACTION_TEMPLATE = """
|
|||||||
|
|
||||||
def format(t):
|
def format(t):
|
||||||
t.date = t.date.replace("/", "-")
|
t.date = t.date.replace("/", "-")
|
||||||
t.description = t.description.replace('"', '\\"')
|
if t.narration and t.payee:
|
||||||
|
# A transaction may have an optional “payee” and/or a “narration.”
|
||||||
|
t.description = f'"{t.payee}" "{t.narration}"'
|
||||||
|
elif t.narration:
|
||||||
|
# If you place a single string on a transaction line, it becomes its narration:
|
||||||
|
t.description = f'"{t.narration}"'
|
||||||
|
elif t.payee:
|
||||||
|
# If you want to set just a payee, put an empty narration string:
|
||||||
|
t.description = f'"{t.payee}" ""'
|
||||||
|
else:
|
||||||
|
t.description = f'"{t.description}"'
|
||||||
|
|
||||||
if not t.debit.startswith("-"):
|
if not t.debit.startswith("-"):
|
||||||
t.debit = " " + t.debit
|
t.debit = " " + t.debit
|
||||||
if not t.credit.startswith("-"):
|
if not t.credit.startswith("-"):
|
||||||
|
|||||||
Reference in New Issue
Block a user