Skip to content

Commit

Permalink
Adding an importer for Vanguard 529 CSV data
Browse files Browse the repository at this point in the history
  • Loading branch information
farktronix committed Sep 14, 2023
1 parent a6639f2 commit 94b3ef2
Show file tree
Hide file tree
Showing 5 changed files with 110 additions and 20 deletions.
15 changes: 6 additions & 9 deletions beancount_reds_importers/importers/schwab/schwab_csv_balances.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,21 +48,18 @@ def file_date(self, file):

def get_max_transaction_date(self):
return self.date.date()

def prepare_processed_table(self, rdr):
rdr = rdr.cut('memo', 'security', 'units', 'unit_price')
rdr = rdr.selectne('memo', '--') # we don't need total rows
rdr = rdr.addfield('date', self.date)
return rdr

def prepare_tables(self):
# first row has date
d = self.raw_rdr[0][0].rsplit(' ', 1)[1]
self.date = datetime.datetime.strptime(d, self.date_format)

for section, table in self.alltables.items():
if section in self.config['section_headers']:
table = table.rename(self.header_map)
table = self.convert_columns(table)
table = table.cut('memo', 'security', 'units', 'unit_price')
table = table.selectne('memo', '--') # we don't need total rows
table = table.addfield('date', self.date)
self.alltables[section] = table

def get_balance_positions(self):
for section in self.config['section_headers']:
yield from self.alltables[section].namedtuples()
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
# flake8: noqa

from os import path
from beancount.ingest import regression_pytest as regtest
from beancount_reds_importers.importers.schwab import schwab_csv_balances
Expand Down
80 changes: 80 additions & 0 deletions beancount_reds_importers/importers/vanguard/vanguard_529.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
""" Vanguard 529 csv importer."""

import petl as etl
import sys
import re
import datetime

from beancount.core.number import D

from beancount_reds_importers.libreader import csv_multitable_reader
from beancount_reds_importers.libtransactionbuilder import investments

class Importer(investments.Importer, csv_multitable_reader.Importer):
IMPORTER_NAME = 'Vanguard 529'

def custom_init(self):
self.max_rounding_error = 0.04
# Vanguard only gives a csv download option for 529 accounts, but they name it "ofxdownload" to tease you
self.filename_pattern_def = '.*ofxdownload.*'
self.header_identifier = 'Fund Account Number,Fund Name,Price,Shares,Total Value.*'
self.get_ticker_info = self.get_ticker_info_from_id
self.date_format = '%m/%d/%Y'
self.funds_db_txt = 'funds_by_ticker'
self.header_map = {
"Process Date": 'date',
"Trade Date": 'tradeDate',
"Transaction Type": 'type',
"Transaction Description": 'memo',
"Shares": 'units',
"Share Price": 'unit_price',
"Gross Amount": 'amount',
"Net Amount": 'total',
"Price": 'unit_price',
}
self.transaction_type_map = {
'Contribution AIP': 'buystock',
'Contribution EBT': 'buystock',
}
self.skip_transaction_types = []
self.section_titles_are_headers = True
self.config['add_currency_precision'] = self.config.get('add_currency_precision', True)

def deep_identify(self, file):
account_number = self.config.get('account_number', '')
return super().deep_identify(file) and account_number in file.head()

def file_date(self, file):
return datetime.datetime.now()

def prepare_tables(self):
ticker_by_desc = {desc: ticker for ticker, _, desc in self.fund_data}

alltables = {}
maxdate = None
for section, table in self.alltables.items():
if section == 'Fund Account Number':
section = 'Balance Positions'
table = table.addfield('security', lambda x: ticker_by_desc.get(x['Fund Name'], x['Fund Name']))
# We need to add a date field but we can't do that yet because we need to make sure
# the transactions section has been processed and set
elif section == 'Account Number':
section = 'Transactions'
table = table.addfield('security', lambda x: ticker_by_desc.get(x['Investment Name'], x['Investment Name']))
# We have to do our own finding of the max date because the table data hasn't been cleaned up yet
maxdate = max(datetime.datetime.strptime(d[0], self.date_format) for d in table.cut('Trade Date').rename('Trade Date', 'date').namedtuples()).date().strftime(self.date_format)
alltables[section] = table
self.alltables = alltables

self.alltables['Balance Positions'] = self.alltables['Balance Positions'].addfield('date', maxdate)

def is_section_title(self, row):
if len(row) == 0:
return False
return row[0] == 'Fund Account Number' or row[0] == 'Account Number'

def get_transactions(self):
yield from self.alltables['Transactions'].namedtuples()

def get_balance_positions(self):
yield from self.alltables['Balance Positions'].namedtuples()
16 changes: 11 additions & 5 deletions beancount_reds_importers/libreader/csv_multitable_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,9 +41,6 @@ def file_date(self, file):
raise "Not yet implemented"
pass

def convert_columns(self, rdr):
pass

def is_section_title(self, row):
# Match against rows that contain section titles. Eg: 'section1', 'section2', ...
return len(row) == 1
Expand All @@ -59,6 +56,10 @@ def read_file(self, file):

self.raw_rdr = rdr = self.read_raw(file)

skip_offset = 1
if getattr(self, 'section_titles_are_headers', False):
skip_offset = 0

rdr = rdr.skip(getattr(self, 'skip_head_rows', 0)) # chop unwanted file header rows
rdr = rdr.head(len(rdr) - getattr(self, 'skip_tail_rows', 0) - 1) # chop unwanted file footer rows

Expand All @@ -73,8 +74,8 @@ def read_file(self, file):
for (s, e) in table_indexes:
if s == e:
continue
table = rdr.skip(s+1) # skip past start index and header row
table = table.head(e-s-1) # chop lines after table section data
table = rdr.skip(s+skip_offset) # skip past start index and header row
table = table.head(e-s-skip_offset) # chop lines after table section data
self.alltables[rdr[s][0]] = table

for section, table in self.alltables.items():
Expand All @@ -83,6 +84,11 @@ def read_file(self, file):
self.alltables[section] = table

self.prepare_tables() # to be overridden by importer

for section, table in self.alltables.items():
table = self.process_table(table)
self.alltables[section] = table

self.file_read_done = True

def get_transactions(self):
Expand Down
17 changes: 13 additions & 4 deletions beancount_reds_importers/libreader/csvreader.py
Original file line number Diff line number Diff line change
Expand Up @@ -192,14 +192,23 @@ def read_file(self, file):
rdr = self.prepare_table(rdr)

# process table
rdr = rdr.rename(self.header_map)
rdr = self.convert_columns(rdr)
rdr = self.fix_column_names(rdr)
rdr = self.prepare_processed_table(rdr)
rdr = self.process_table(rdr)
self.rdr = rdr
self.ifile = file
self.file_read_done = True

def process_table(self, rdr):
# Filter out any header mappings that don't exist in this table, since petl doesn't do this for us
# and will complain if we try to rename a header that doesn't exist
existing_headers = {key: value for key, value in self.header_map.items() if key in rdr.header()}
rdr = rdr.rename(existing_headers)

rdr = self.convert_columns(rdr)
rdr = self.fix_column_names(rdr)
rdr = self.prepare_processed_table(rdr)
return rdr


def get_transactions(self):
for ot in self.rdr.namedtuples():
if self.skip_transaction(ot):
Expand Down

0 comments on commit 94b3ef2

Please sign in to comment.