Adding an importer for Vanguard 529 CSV data

redstreet · Sep 14, 2023 · 94b3ef2 · 94b3ef2
1 parent a6639f2
commit 94b3ef2
Show file tree

Hide file tree

Showing 5 changed files with 110 additions and 20 deletions.
diff --git a/beancount_reds_importers/importers/schwab/schwab_csv_balances.py b/beancount_reds_importers/importers/schwab/schwab_csv_balances.py
@@ -48,21 +48,18 @@ def file_date(self, file):
 
     def get_max_transaction_date(self):
         return self.date.date()
+
+    def prepare_processed_table(self, rdr):
+        rdr = rdr.cut('memo', 'security', 'units', 'unit_price')
+        rdr = rdr.selectne('memo', '--')  # we don't need total rows
+        rdr = rdr.addfield('date', self.date)
+        return rdr
 
     def prepare_tables(self):
         # first row has date
         d = self.raw_rdr[0][0].rsplit(' ', 1)[1]
         self.date = datetime.datetime.strptime(d, self.date_format)
 
-        for section, table in self.alltables.items():
-            if section in self.config['section_headers']:
-                table = table.rename(self.header_map)
-                table = self.convert_columns(table)
-                table = table.cut('memo', 'security', 'units', 'unit_price')
-                table = table.selectne('memo', '--')  # we don't need total rows
-                table = table.addfield('date', self.date)
-                self.alltables[section] = table
-
     def get_balance_positions(self):
         for section in self.config['section_headers']:
             yield from self.alltables[section].namedtuples()
diff --git a/...unt_reds_importers/importers/schwab/tests/schwab_csv_balances/schwab_csv_Balances_test.py b/...unt_reds_importers/importers/schwab/tests/schwab_csv_balances/schwab_csv_Balances_test.py
@@ -1,5 +1,3 @@
-# flake8: noqa
-
 from os import path
 from beancount.ingest import regression_pytest as regtest
 from beancount_reds_importers.importers.schwab import schwab_csv_balances

diff --git a/beancount_reds_importers/importers/vanguard/vanguard_529.py b/beancount_reds_importers/importers/vanguard/vanguard_529.py
@@ -0,0 +1,80 @@
+""" Vanguard 529 csv importer."""
+
+import petl as etl
+import sys
+import re
+import datetime
+
+from beancount.core.number import D
+
+from beancount_reds_importers.libreader import csv_multitable_reader
+from beancount_reds_importers.libtransactionbuilder import investments
+
+class Importer(investments.Importer, csv_multitable_reader.Importer):
+    IMPORTER_NAME = 'Vanguard 529'
+
+    def custom_init(self):
+        self.max_rounding_error = 0.04
+        # Vanguard only gives a csv download option for 529 accounts, but they name it "ofxdownload" to tease you
+        self.filename_pattern_def = '.*ofxdownload.*'
+        self.header_identifier = 'Fund Account Number,Fund Name,Price,Shares,Total Value.*'
+        self.get_ticker_info = self.get_ticker_info_from_id
+        self.date_format = '%m/%d/%Y'
+        self.funds_db_txt = 'funds_by_ticker'
+        self.header_map = {
+            "Process Date":             'date',
+            "Trade Date":               'tradeDate',
+            "Transaction Type":         'type',
+            "Transaction Description":  'memo',
+            "Shares":                   'units',
+            "Share Price":              'unit_price',
+            "Gross Amount":             'amount',
+            "Net Amount":               'total',
+            "Price":                    'unit_price',
+            }
+        self.transaction_type_map = {
+            'Contribution AIP':             'buystock',
+            'Contribution EBT':             'buystock',
+            }
+        self.skip_transaction_types = []
+        self.section_titles_are_headers = True
+        self.config['add_currency_precision'] = self.config.get('add_currency_precision', True)
+
+    def deep_identify(self, file):
+        account_number = self.config.get('account_number', '')
+        return super().deep_identify(file) and account_number in file.head()
+
+    def file_date(self, file):
+        return datetime.datetime.now()
+
+    def prepare_tables(self):
+        ticker_by_desc = {desc: ticker for ticker, _, desc in self.fund_data}
+
+        alltables = {}
+        maxdate = None
+        for section, table in self.alltables.items():
+            if section == 'Fund Account Number':
+                section = 'Balance Positions'
+                table = table.addfield('security', lambda x: ticker_by_desc.get(x['Fund Name'], x['Fund Name']))
+                # We need to add a date field but we can't do that yet because we need to make sure 
+                #  the transactions section has been processed and set
+            elif section == 'Account Number':
+                section = 'Transactions'
+                table = table.addfield('security', lambda x: ticker_by_desc.get(x['Investment Name'], x['Investment Name']))
+                # We have to do our own finding of the max date because the table data hasn't been cleaned up yet
+                maxdate = max(datetime.datetime.strptime(d[0], self.date_format) for d in table.cut('Trade Date').rename('Trade Date', 'date').namedtuples()).date().strftime(self.date_format)
+            alltables[section] = table
+        self.alltables = alltables
+
+        self.alltables['Balance Positions'] = self.alltables['Balance Positions'].addfield('date', maxdate)
+
+    def is_section_title(self, row):
+        if len(row) == 0:
+            return False
+        return row[0] == 'Fund Account Number' or row[0] == 'Account Number'
+
+    def get_transactions(self):
+        yield from self.alltables['Transactions'].namedtuples()
+
+    def get_balance_positions(self):
+        yield from self.alltables['Balance Positions'].namedtuples()
diff --git a/beancount_reds_importers/libreader/csv_multitable_reader.py b/beancount_reds_importers/libreader/csv_multitable_reader.py
@@ -41,9 +41,6 @@ def file_date(self, file):
         raise "Not yet implemented"
         pass
 
-    def convert_columns(self, rdr):
-        pass
-
     def is_section_title(self, row):
         # Match against rows that contain section titles. Eg: 'section1', 'section2', ...
         return len(row) == 1
@@ -59,6 +56,10 @@ def read_file(self, file):
 
         self.raw_rdr = rdr = self.read_raw(file)
 
+        skip_offset = 1
+        if getattr(self, 'section_titles_are_headers', False):
+            skip_offset = 0
+
         rdr = rdr.skip(getattr(self, 'skip_head_rows', 0))                 # chop unwanted file header rows
         rdr = rdr.head(len(rdr) - getattr(self, 'skip_tail_rows', 0) - 1)  # chop unwanted file footer rows
 
@@ -73,8 +74,8 @@ def read_file(self, file):
         for (s, e) in table_indexes:
             if s == e:
                 continue
-            table = rdr.skip(s+1)      # skip past start index and header row
-            table = table.head(e-s-1)  # chop lines after table section data
+            table = rdr.skip(s+skip_offset)      # skip past start index and header row
+            table = table.head(e-s-skip_offset)  # chop lines after table section data
             self.alltables[rdr[s][0]] = table
 
         for section, table in self.alltables.items():
@@ -83,6 +84,11 @@ def read_file(self, file):
             self.alltables[section] = table
 
         self.prepare_tables()  # to be overridden by importer
+
+        for section, table in self.alltables.items():
+            table = self.process_table(table)
+            self.alltables[section] = table
+
         self.file_read_done = True
 
     def get_transactions(self):

diff --git a/beancount_reds_importers/libreader/csvreader.py b/beancount_reds_importers/libreader/csvreader.py
@@ -192,14 +192,23 @@ def read_file(self, file):
             rdr = self.prepare_table(rdr)
 
             # process table
-            rdr = rdr.rename(self.header_map)
-            rdr = self.convert_columns(rdr)
-            rdr = self.fix_column_names(rdr)
-            rdr = self.prepare_processed_table(rdr)
+            rdr = self.process_table(rdr)
             self.rdr = rdr
             self.ifile = file
             self.file_read_done = True
 
+    def process_table(self, rdr):
+        # Filter out any header mappings that don't exist in this table, since petl doesn't do this for us
+        #  and will complain if we try to rename a header that doesn't exist
+        existing_headers = {key: value for key, value in self.header_map.items() if key in rdr.header()}
+        rdr = rdr.rename(existing_headers)
+
+        rdr = self.convert_columns(rdr)
+        rdr = self.fix_column_names(rdr)
+        rdr = self.prepare_processed_table(rdr)
+        return rdr
+
+
     def get_transactions(self):
         for ot in self.rdr.namedtuples():
             if self.skip_transaction(ot):