Skip to content

Commit

Permalink
[Issue #2699] Skip assistance listing transform processing with empty…
Browse files Browse the repository at this point in the history
… fields (#2711)

Summary
Fixes #2699

Time to review: 30 mins

Changes proposed
Modify `transform_assistance_listings` to skip listings with empty or
null values for `programtitle` or `cfdanumber` values.

Context for reviewers
We want to modify the process that transforms assistance listings from
the legacy system to not create a record in our assistance listings
table if the program_title/assistance_listing_number are null/empty. The
legacy system has these "empty" records to handle connecting other
pieces of data in a way that is confusing, and can be implemented
differently when we get to it.

Additional information
See attached unit tests
  • Loading branch information
mikehgrantsgov authored Nov 4, 2024
1 parent f5066c5 commit 7160cf3
Show file tree
Hide file tree
Showing 3 changed files with 115 additions and 0 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,22 @@ def process_assistance_listing(
)
source_assistance_listing.transformation_notes = transform_constants.ORPHANED_CFDA

elif not source_assistance_listing.programtitle or not source_assistance_listing.cfdanumber:
self.increment(
transform_constants.Metrics.TOTAL_RECORDS_SKIPPED,
prefix=transform_constants.ASSISTANCE_LISTING,
)
logger.info(
"Skipping assistance listing with empty required fields",
extra={
**extra,
"programtitle": source_assistance_listing.programtitle,
"cfdanumber": source_assistance_listing.cfdanumber,
},
)
source_assistance_listing.transformation_notes = "empty_assistance_listing"
source_assistance_listing.transformed_at = self.transform_time

else:
# To avoid incrementing metrics for records we fail to transform, record
# here whether it's an insert/update and we'll increment after transforming
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ class Metrics(StrEnum):
TOTAL_RECORDS_DELETED = "total_records_deleted"
TOTAL_RECORDS_INSERTED = "total_records_inserted"
TOTAL_RECORDS_UPDATED = "total_records_updated"
TOTAL_RECORDS_SKIPPED = "total_records_skipped"
TOTAL_RECORDS_ORPHANED = "total_records_orphaned"
TOTAL_DUPLICATE_RECORDS_SKIPPED = "total_duplicate_records_skipped"
TOTAL_HISTORICAL_ORPHANS_SKIPPED = "total_historical_orphans_skipped"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -155,3 +155,101 @@ def test_process_assistance_listing_delete_but_current_missing(
validate_assistance_listing(db_session, delete_but_current_missing, expect_in_db=False)
assert delete_but_current_missing.transformed_at is not None
assert delete_but_current_missing.transformation_notes == "orphaned_delete_record"

def test_process_empty_assistance_listings(self, db_session, transform_assistance_listing):
"""Test that assistance listings with empty required fields are skipped"""
# Create opportunities with empty assistance listings
opportunity1 = f.OpportunityFactory.create(opportunity_assistance_listings=[])

# Empty program title
empty_program_title = setup_cfda(
create_existing=False,
opportunity=opportunity1,
source_values={"programtitle": "", "cfdanumber": "12.345"},
)

# Empty assistance listing number
empty_listing_number = setup_cfda(
create_existing=False,
opportunity=opportunity1,
source_values={"programtitle": "Test Program", "cfdanumber": ""},
)

# Both empty
both_empty = setup_cfda(
create_existing=False,
opportunity=opportunity1,
source_values={"programtitle": "", "cfdanumber": ""},
)

# Control - valid record
valid_record = setup_cfda(
create_existing=False,
opportunity=opportunity1,
source_values={"programtitle": "Valid Program", "cfdanumber": "67.890"},
)

transform_assistance_listing.run_subtask()

# Verify empty records were skipped and marked appropriately
for record in [empty_program_title, empty_listing_number, both_empty]:
assert record.transformed_at is not None
assert record.transformation_notes == "empty_assistance_listing"

# Verify no record was created in the target table
assistance_listing = (
db_session.query(OpportunityAssistanceListing)
.filter(
OpportunityAssistanceListing.opportunity_assistance_listing_id
== record.opp_cfda_id
)
.one_or_none()
)
assert assistance_listing is None

# Verify valid record was processed
validate_assistance_listing(db_session, valid_record)

# Verify metrics
metrics = transform_assistance_listing.metrics
assert metrics[transform_constants.Metrics.TOTAL_RECORDS_PROCESSED] == 4
assert metrics[transform_constants.Metrics.TOTAL_RECORDS_SKIPPED] == 3
assert metrics[transform_constants.Metrics.TOTAL_RECORDS_INSERTED] == 1

def test_process_empty_assistance_listing_update(
self, db_session, transform_assistance_listing
):
"""Test that empty assistance listings are skipped even for updates"""
opportunity = f.OpportunityFactory.create(opportunity_assistance_listings=[])

# Create a record that exists but will be updated with empty values
empty_update = setup_cfda(
create_existing=True,
opportunity=opportunity,
source_values={"programtitle": "", "cfdanumber": ""},
)

transform_assistance_listing.run_subtask()

# Verify record was marked as processed but not updated
assert empty_update.transformed_at is not None
assert empty_update.transformation_notes == "empty_assistance_listing"

# Verify original record in target table remains unchanged
assistance_listing = (
db_session.query(OpportunityAssistanceListing)
.filter(
OpportunityAssistanceListing.opportunity_assistance_listing_id
== empty_update.opp_cfda_id
)
.one()
)
assert assistance_listing is not None
# Verify the values weren't updated to empty
assert assistance_listing.program_title != ""
assert assistance_listing.assistance_listing_number != ""

# Verify metrics
metrics = transform_assistance_listing.metrics
assert metrics[transform_constants.Metrics.TOTAL_RECORDS_PROCESSED] == 1
assert metrics[transform_constants.Metrics.TOTAL_RECORDS_SKIPPED] == 1

0 comments on commit 7160cf3

Please sign in to comment.