diff --git a/beancount_reds_importers/importers/schwab/schwab_csv_balances.py b/beancount_reds_importers/importers/schwab/schwab_csv_balances.py index 21661e6..789f3f5 100644 --- a/beancount_reds_importers/importers/schwab/schwab_csv_balances.py +++ b/beancount_reds_importers/importers/schwab/schwab_csv_balances.py @@ -48,21 +48,18 @@ def file_date(self, file): def get_max_transaction_date(self): return self.date.date() + + def prepare_processed_table(self, rdr): + rdr = rdr.cut('memo', 'security', 'units', 'unit_price') + rdr = rdr.selectne('memo', '--') # we don't need total rows + rdr = rdr.addfield('date', self.date) + return rdr def prepare_tables(self): # first row has date d = self.raw_rdr[0][0].rsplit(' ', 1)[1] self.date = datetime.datetime.strptime(d, self.date_format) - for section, table in self.alltables.items(): - if section in self.config['section_headers']: - table = table.rename(self.header_map) - table = self.convert_columns(table) - table = table.cut('memo', 'security', 'units', 'unit_price') - table = table.selectne('memo', '--') # we don't need total rows - table = table.addfield('date', self.date) - self.alltables[section] = table - def get_balance_positions(self): for section in self.config['section_headers']: yield from self.alltables[section].namedtuples() diff --git a/beancount_reds_importers/importers/schwab/tests/schwab_csv_balances/schwab_csv_Balances_test.py b/beancount_reds_importers/importers/schwab/tests/schwab_csv_balances/schwab_csv_Balances_test.py index 9138a25..efc5d18 100644 --- a/beancount_reds_importers/importers/schwab/tests/schwab_csv_balances/schwab_csv_Balances_test.py +++ b/beancount_reds_importers/importers/schwab/tests/schwab_csv_balances/schwab_csv_Balances_test.py @@ -1,5 +1,3 @@ -# flake8: noqa - from os import path from beancount.ingest import regression_pytest as regtest from beancount_reds_importers.importers.schwab import schwab_csv_balances diff --git a/beancount_reds_importers/importers/vanguard/vanguard_529.py b/beancount_reds_importers/importers/vanguard/vanguard_529.py new file mode 100644 index 0000000..9bb1da2 --- /dev/null +++ b/beancount_reds_importers/importers/vanguard/vanguard_529.py @@ -0,0 +1,80 @@ +""" Vanguard 529 csv importer.""" + +import petl as etl +import sys +import re +import datetime + +from beancount.core.number import D + +from beancount_reds_importers.libreader import csv_multitable_reader +from beancount_reds_importers.libtransactionbuilder import investments + +class Importer(investments.Importer, csv_multitable_reader.Importer): + IMPORTER_NAME = 'Vanguard 529' + + def custom_init(self): + self.max_rounding_error = 0.04 + # Vanguard only gives a csv download option for 529 accounts, but they name it "ofxdownload" to tease you + self.filename_pattern_def = '.*ofxdownload.*' + self.header_identifier = 'Fund Account Number,Fund Name,Price,Shares,Total Value.*' + self.get_ticker_info = self.get_ticker_info_from_id + self.date_format = '%m/%d/%Y' + self.funds_db_txt = 'funds_by_ticker' + self.header_map = { + "Process Date": 'date', + "Trade Date": 'tradeDate', + "Transaction Type": 'type', + "Transaction Description": 'memo', + "Shares": 'units', + "Share Price": 'unit_price', + "Gross Amount": 'amount', + "Net Amount": 'total', + "Price": 'unit_price', + } + self.transaction_type_map = { + 'Contribution AIP': 'buystock', + 'Contribution EBT': 'buystock', + } + self.skip_transaction_types = [] + self.section_titles_are_headers = True + self.config['add_currency_precision'] = self.config.get('add_currency_precision', True) + + def deep_identify(self, file): + account_number = self.config.get('account_number', '') + return super().deep_identify(file) and account_number in file.head() + + def file_date(self, file): + return datetime.datetime.now() + + def prepare_tables(self): + ticker_by_desc = {desc: ticker for ticker, _, desc in self.fund_data} + + alltables = {} + maxdate = None + for section, table in self.alltables.items(): + if section == 'Fund Account Number': + section = 'Balance Positions' + table = table.addfield('security', lambda x: ticker_by_desc.get(x['Fund Name'], x['Fund Name'])) + # We need to add a date field but we can't do that yet because we need to make sure + # the transactions section has been processed and set + elif section == 'Account Number': + section = 'Transactions' + table = table.addfield('security', lambda x: ticker_by_desc.get(x['Investment Name'], x['Investment Name'])) + # We have to do our own finding of the max date because the table data hasn't been cleaned up yet + maxdate = max(datetime.datetime.strptime(d[0], self.date_format) for d in table.cut('Trade Date').rename('Trade Date', 'date').namedtuples()).date().strftime(self.date_format) + alltables[section] = table + self.alltables = alltables + + self.alltables['Balance Positions'] = self.alltables['Balance Positions'].addfield('date', maxdate) + + def is_section_title(self, row): + if len(row) == 0: + return False + return row[0] == 'Fund Account Number' or row[0] == 'Account Number' + + def get_transactions(self): + yield from self.alltables['Transactions'].namedtuples() + + def get_balance_positions(self): + yield from self.alltables['Balance Positions'].namedtuples() diff --git a/beancount_reds_importers/libreader/csv_multitable_reader.py b/beancount_reds_importers/libreader/csv_multitable_reader.py index 8a8f4ef..bb31ce7 100644 --- a/beancount_reds_importers/libreader/csv_multitable_reader.py +++ b/beancount_reds_importers/libreader/csv_multitable_reader.py @@ -41,9 +41,6 @@ def file_date(self, file): raise "Not yet implemented" pass - def convert_columns(self, rdr): - pass - def is_section_title(self, row): # Match against rows that contain section titles. Eg: 'section1', 'section2', ... return len(row) == 1 @@ -59,6 +56,10 @@ def read_file(self, file): self.raw_rdr = rdr = self.read_raw(file) + skip_offset = 1 + if getattr(self, 'section_titles_are_headers', False): + skip_offset = 0 + rdr = rdr.skip(getattr(self, 'skip_head_rows', 0)) # chop unwanted file header rows rdr = rdr.head(len(rdr) - getattr(self, 'skip_tail_rows', 0) - 1) # chop unwanted file footer rows @@ -73,8 +74,8 @@ def read_file(self, file): for (s, e) in table_indexes: if s == e: continue - table = rdr.skip(s+1) # skip past start index and header row - table = table.head(e-s-1) # chop lines after table section data + table = rdr.skip(s+skip_offset) # skip past start index and header row + table = table.head(e-s-skip_offset) # chop lines after table section data self.alltables[rdr[s][0]] = table for section, table in self.alltables.items(): @@ -83,6 +84,11 @@ def read_file(self, file): self.alltables[section] = table self.prepare_tables() # to be overridden by importer + + for section, table in self.alltables.items(): + table = self.process_table(table) + self.alltables[section] = table + self.file_read_done = True def get_transactions(self): diff --git a/beancount_reds_importers/libreader/csvreader.py b/beancount_reds_importers/libreader/csvreader.py index 078fc1b..534d29a 100644 --- a/beancount_reds_importers/libreader/csvreader.py +++ b/beancount_reds_importers/libreader/csvreader.py @@ -192,14 +192,23 @@ def read_file(self, file): rdr = self.prepare_table(rdr) # process table - rdr = rdr.rename(self.header_map) - rdr = self.convert_columns(rdr) - rdr = self.fix_column_names(rdr) - rdr = self.prepare_processed_table(rdr) + rdr = self.process_table(rdr) self.rdr = rdr self.ifile = file self.file_read_done = True + def process_table(self, rdr): + # Filter out any header mappings that don't exist in this table, since petl doesn't do this for us + # and will complain if we try to rename a header that doesn't exist + existing_headers = {key: value for key, value in self.header_map.items() if key in rdr.header()} + rdr = rdr.rename(existing_headers) + + rdr = self.convert_columns(rdr) + rdr = self.fix_column_names(rdr) + rdr = self.prepare_processed_table(rdr) + return rdr + + def get_transactions(self): for ot in self.rdr.namedtuples(): if self.skip_transaction(ot):