Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

pdfreader and bamboohr paycheck importer #94

Merged
merged 7 commits into from
Apr 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .github/workflows/conventionalcommits.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ name: Conventional Commits
on:
pull_request:
branches: [ main ]
types: [opened, reopened, edited]

jobs:
build:
Expand Down
1 change: 1 addition & 0 deletions .github/workflows/pythonpackage.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ on:
branches: [ main ]
pull_request:
branches: [ main ]
types: [opened, reopened, edited]

jobs:
build:
Expand Down
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ coverage.xml
*.py,cover
.hypothesis/
.pytest_cache/
.debug-*

# Translations
*.mo
Expand Down
64 changes: 64 additions & 0 deletions beancount_reds_importers/importers/bamboohr/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
"""BambooHR paycheck importer"""

import re

from dateparser.search import search_dates

from beancount_reds_importers.libreader import pdfreader
from beancount_reds_importers.libtransactionbuilder import paycheck

# BambooHR exports paycheck stubs to pdf, with multiple tables across multiple pages.
# Call this importer with a config that looks like:
#
# bamboohr.Importer({"desc":"Paycheck (My Company)",
# "main_account":"Income:Employment",
# "paycheck_template": {}, # See beancount_reds_importers/libtransactionbuilder/paycheck.py for sample template
# "currency": "PENNIES",
# }),
#


class Importer(paycheck.Importer, pdfreader.Importer):
IMPORTER_NAME = "BambooHR Paycheck"

def custom_init(self):
self.max_rounding_error = 0.04
self.filename_pattern_def = r"PayStub.*\.pdf"
self.pdf_table_extraction_settings = {"join_tolerance": 4, "snap_tolerance": 4}
self.pdf_table_extraction_crop = (0, 40, 0, 0)
self.debug = False

self.header_map = {
"Deduction Type": "description",
"Pay Type": "description",
"Paycheck Total": "amount",
"Tax Type": "description",
}

self.currency_fields = ["ytd_total", "amount"]

def paycheck_date(self, input_file):
if not self.file_read_done:
self.read_file(input_file)
dates = [date for _, date in search_dates(self.meta_text)]
return dates[2].date()

def prepare_tables(self):
def valid_header(label):
if label in self.header_map:
return self.header_map[header]

label = label.lower().replace(" ", "_")
return re.sub(r"20\d{2}", "ytd", label)

for section, table in self.alltables.items():
# rename columns
for header in table.header():
table = table.rename(header, valid_header(header))
# convert columns
table = self.convert_columns(table)

self.alltables[section] = table

def build_metadata(self, file, metatype=None, data={}):
return {"filing_account": self.config["main_account"]}
72 changes: 72 additions & 0 deletions beancount_reds_importers/importers/genericpdf/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
"""Generic pdf paycheck importer"""

import datetime

from beancount_reds_importers.libreader import pdfreader
from beancount_reds_importers.libtransactionbuilder import paycheck

# Generic pdf paystub importer. Use this to build your own pdf paystub importer.
# Call this importer with a config that looks like:
#
# genericpdf.Importer({"desc":"Paycheck (My Company)",
# "main_account":"Income:Employment",
# "paycheck_template": {}, # See beancount_reds_importers/libtransactionbuilder/paycheck.py for sample template
# "currency": "PENNIES",
# }),
#


class Importer(paycheck.Importer, pdfreader.Importer):
IMPORTER_NAME = "Generic PDF Paycheck"

def custom_init(self):
self.max_rounding_error = 0.04
self.filename_pattern_def = r"paystub.*\.pdf"
self.pdf_table_extraction_settings = {"join_tolerance": 4, "snap_tolerance": 4}
self.pdf_table_extraction_crop = (0, 0, 0, 0)
self.pdf_table_title_height = 0
# Set this true as you play with the extraction settings and crop to view images of what the pdf parser detects
self.debug = True

self.header_map = {
"CURRENT": "amount",
"CURRENT PAY": "amount",
"PAY DESCRIPTION": "description",
"DEDUCTIONS": "description",
"TAX TYPE": "description",
"TOTAL NET PAY": "description",
"YTD": "ytd",
"YTD PAY": "ytd",
}

self.currency_fields = ["ytd", "amount"]
self.date_format = "%m/%d/%Y"

def paycheck_date(self, input_file):
if not self.file_read_done:
self.read_file(input_file)
*_, d = self.alltables["table_1"].header()
self.date = datetime.datetime.strptime(d, self.date_format)
return self.date.date()

def prepare_tables(self):
def valid_header(label):
if label in self.header_map:
return self.header_map[header]

return label.lower().replace(" ", "_")

for section, table in self.alltables.items():
# rename columns
for header in table.header():
if section == "table_6" and header == "":
table = table.rename(header, "amount")
else:
table = table.rename(header, valid_header(header))
# convert columns
table = self.convert_columns(table)

self.alltables[section] = table

def build_metadata(self, file, metatype=None, data={}):
return {"filing_account": self.config["main_account"]}
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
from os import path

from beancount.ingest import regression_pytest as regtest

from beancount_reds_importers.importers import genericpdf


@regtest.with_importer(
genericpdf.Importer(
{
"desc": "Paycheck",
"main_account": "Income:Salary:FakeCompany",
a0js marked this conversation as resolved.
Show resolved Hide resolved
"paycheck_template": {
"table_4": {
"Bonus": "Income:Bonus:FakeCompany",
"Overtime": "Income:Overtime:FakeCompany",
"Regular": "Income:Salary:FakeCompany",
},
"table_5": {
"Federal MED/EE": "Expenses:Taxes:Medicare",
"Federal OASDI/EE": "Expenses:Taxes:SocialSecurity",
"Federal Withholding": "Expenses:Taxes:FederalIncome",
"State Withholding": "Expenses:Taxes:StateIncome",
},
"table_6": {"CURRENT": "Assets:Checking:ABCBank"},
},
"currency": "USD",
}
)
)
@regtest.with_testdir(path.dirname(__file__))
class TestGenericPDF(regtest.ImporterTestBase):
pass
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@

2023-12-03 * "Paycheck"
filing_account: "Income:Salary:FakeCompany"
Assets:Checking:ABCBank 4228.00 USD
Expenses:Taxes:FederalIncome 416.00 USD
Expenses:Taxes:Medicare 128.00 USD
Expenses:Taxes:SocialSecurity 96.00 USD
Expenses:Taxes:StateIncome 32.00 USD
Income:Bonus:FakeCompany -3000.00 USD
Income:Overtime:FakeCompany -300.00 USD
Income:Salary:FakeCompany -1600.00 USD
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Income:Salary:FakeCompany
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
2023-12-03
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
paystub.sample.pdf
Loading