Skip to content

Commit

Permalink
feat! : add generic json reader
Browse files Browse the repository at this point in the history
BREAKING CHANGE: old jsonreader.py is rename into schwabjsonreader.py
  • Loading branch information
DEVNODEREACT committed Oct 6, 2024
1 parent 67b09f4 commit 8194ac4
Show file tree
Hide file tree
Showing 2 changed files with 129 additions and 69 deletions.
106 changes: 37 additions & 69 deletions beancount_reds_importers/libreader/jsonreader.py
Original file line number Diff line number Diff line change
@@ -1,92 +1,60 @@
"""JSON importer module for beancount to be used along with investment/banking/other importer modules in
beancount_reds_importers.
#!/usr/bin/env python3

------------------------------
This is WIP and incomplete.
------------------------------
"""JSON reader for beancount-reds-importers.
JSON schemas vary widely. This one is based on Charles Schwab's json format. In the future, the
goal is to make this reader automatically "understand" the schema of any json given to it.
JSON files have widely varying specifications, and thus, this is a very generic reader, and most of
the logic will have to be the institution specific readers.
Until that happens, perhaps this file should be renamed to schwabjsonreader.py.
"""

import json

# import re
import warnings

# import datetime
# import ofxparse
# from collections import namedtuple
from beancount.ingest import importer
from bs4.builder import XMLParsedAsHTMLWarning

from beancount_reds_importers.libreader import reader

warnings.filterwarnings("ignore", category=XMLParsedAsHTMLWarning)


class Importer(reader.Reader, importer.ImporterProtocol):
FILE_EXTS = ["json"]

def initialize_reader(self, file):
if getattr(self, "file", None) != file:
self.file = file
self.reader_ready = False
with open(file.name, 'r') as f:
self.json_data = json.load(f)
self.reader_ready = self.deep_identify(file)
if self.reader_ready:
self.file_read_done = False
if self.reader_ready:
self.set_currency()

def deep_identify(self, file):
# identify based on filename
return True
"""For overriding by institution specific importer which can check if an account name
matches, and other such things."""
# default value to False, else jsonreader.initialize_reader fail to execute because missing attribut "config"
return False

def file_date(self, file):
"Get the maximum date from the file."
self.initialize(file) # self.date_format gets set via this
self.read_file(file)
return max(ot.date for ot in self.get_transactions()).date()
"""Get the ending date of the statement."""
if not getattr(self, "json_data", None):
self.initialize(file)
# TODO:
return None

def read_file(self, file):
with open(file.name) as fh:
self.rdr = json.load(fh)

# transactions = []
# for transaction in self.rdr['BrokerageTransactions']:
# raw_ot = Transaction(
# date = transaction['Date'],
# type = transaction['Action'],
# security = transaction['Symbol'],
# memo = transaction['Description'],
# unit_price = transaction['Price'],
# units = transaction['Quantity'],
# fees = transaction['Fees & Comm'],
# total = transaction['Amount']
# )

# def get_transactions(self):
# Transaction = namedtuple('Transaction', ['date', 'type', 'security', 'memo', 'unit_price',
# 'units', 'fees', 'total'])
# for transaction in self.rdr['BrokerageTransactions']:
# raw_ot = Transaction(
# date = transaction['Date'],
# type = transaction['Action'],
# security = transaction['Symbol'],
# memo = transaction['Description'],
# unit_price = transaction['Price'],
# units = transaction['Quantity'],
# fees = transaction['Fees & Comm'],
# total = transaction['Amount']
# )
# ot = self.fixup(ot)
# import pdb; pdb.set_trace()
# yield ot

def fixup(self, ot):
ot.date = self.convert_date(ot.date)

# def convert_date(d):
# return datetime.datetime.strptime(d, self.date_format)

def get_balance_assertion_date(self):
return None
with open(file.name, 'r') as f:
self.json_data = json.load(f)

def get_json_elements(self, json_path, json_interpreter=lambda x: x):
"""Extract a list of elements in the JSON file at the given JSON path. Typically,
transactions are stored in a JSON path, and this extracts them."""
elements = self.json_data
for key in json_path.split('.'):
if key in elements:
elements = elements[key]
else:
return []
for elem in elements:
yield json_interpreter(elem)

def get_transactions(self):
"""/Transactions/Transaction is a dummy default path for transactions that needs to be
overriden in the institution specific importer."""
yield from self.get_json_elements("Transactions.Transaction")
92 changes: 92 additions & 0 deletions beancount_reds_importers/libreader/schwabjsonreader.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
"""JSON importer module for beancount to be used along with investment/banking/other importer modules in
beancount_reds_importers.
------------------------------
This is WIP and incomplete.
------------------------------
JSON schemas vary widely. This one is based on Charles Schwab's json format. In the future, the
goal is to make this reader automatically "understand" the schema of any json given to it.
Until that happens, perhaps this file should be renamed to schwabjsonreader.py.
"""

import json

# import re
import warnings

# import datetime
# import ofxparse
# from collections import namedtuple
from beancount.ingest import importer
from bs4.builder import XMLParsedAsHTMLWarning

from beancount_reds_importers.libreader import reader

warnings.filterwarnings("ignore", category=XMLParsedAsHTMLWarning)


class Importer(reader.Reader, importer.ImporterProtocol):
FILE_EXTS = ["json"]

def initialize_reader(self, file):
if getattr(self, "file", None) != file:
self.file = file
self.reader_ready = self.deep_identify(file)
if self.reader_ready:
self.file_read_done = False

def deep_identify(self, file):
# identify based on filename
return True

def file_date(self, file):
"Get the maximum date from the file."
self.initialize(file) # self.date_format gets set via this
self.read_file(file)
return max(ot.date for ot in self.get_transactions()).date()

def read_file(self, file):
with open(file.name) as fh:
self.rdr = json.load(fh)

# transactions = []
# for transaction in self.rdr['BrokerageTransactions']:
# raw_ot = Transaction(
# date = transaction['Date'],
# type = transaction['Action'],
# security = transaction['Symbol'],
# memo = transaction['Description'],
# unit_price = transaction['Price'],
# units = transaction['Quantity'],
# fees = transaction['Fees & Comm'],
# total = transaction['Amount']
# )

# def get_transactions(self):
# Transaction = namedtuple('Transaction', ['date', 'type', 'security', 'memo', 'unit_price',
# 'units', 'fees', 'total'])
# for transaction in self.rdr['BrokerageTransactions']:
# raw_ot = Transaction(
# date = transaction['Date'],
# type = transaction['Action'],
# security = transaction['Symbol'],
# memo = transaction['Description'],
# unit_price = transaction['Price'],
# units = transaction['Quantity'],
# fees = transaction['Fees & Comm'],
# total = transaction['Amount']
# )
# ot = self.fixup(ot)
# import pdb; pdb.set_trace()
# yield ot

def fixup(self, ot):
ot.date = self.convert_date(ot.date)

# def convert_date(d):
# return datetime.datetime.strptime(d, self.date_format)

def get_balance_assertion_date(self):
return None

0 comments on commit 8194ac4

Please sign in to comment.