diff --git a/api/src/data_migration/transformation/subtask/transform_assistance_listing.py b/api/src/data_migration/transformation/subtask/transform_assistance_listing.py index c429c5146..dfe773a0f 100644 --- a/api/src/data_migration/transformation/subtask/transform_assistance_listing.py +++ b/api/src/data_migration/transformation/subtask/transform_assistance_listing.py @@ -86,6 +86,22 @@ def process_assistance_listing( ) source_assistance_listing.transformation_notes = transform_constants.ORPHANED_CFDA + elif not source_assistance_listing.programtitle or not source_assistance_listing.cfdanumber: + self.increment( + transform_constants.Metrics.TOTAL_RECORDS_SKIPPED, + prefix=transform_constants.ASSISTANCE_LISTING, + ) + logger.info( + "Skipping assistance listing with empty required fields", + extra={ + **extra, + "programtitle": source_assistance_listing.programtitle, + "cfdanumber": source_assistance_listing.cfdanumber, + }, + ) + source_assistance_listing.transformation_notes = "empty_assistance_listing" + source_assistance_listing.transformed_at = self.transform_time + else: # To avoid incrementing metrics for records we fail to transform, record # here whether it's an insert/update and we'll increment after transforming diff --git a/api/src/data_migration/transformation/transform_constants.py b/api/src/data_migration/transformation/transform_constants.py index 16d023fbd..7bcb1f74a 100644 --- a/api/src/data_migration/transformation/transform_constants.py +++ b/api/src/data_migration/transformation/transform_constants.py @@ -42,6 +42,7 @@ class Metrics(StrEnum): TOTAL_RECORDS_DELETED = "total_records_deleted" TOTAL_RECORDS_INSERTED = "total_records_inserted" TOTAL_RECORDS_UPDATED = "total_records_updated" + TOTAL_RECORDS_SKIPPED = "total_records_skipped" TOTAL_RECORDS_ORPHANED = "total_records_orphaned" TOTAL_DUPLICATE_RECORDS_SKIPPED = "total_duplicate_records_skipped" TOTAL_HISTORICAL_ORPHANS_SKIPPED = "total_historical_orphans_skipped" diff --git a/api/tests/src/data_migration/transformation/subtask/test_transform_assistance_listing.py b/api/tests/src/data_migration/transformation/subtask/test_transform_assistance_listing.py index 91c6572e6..dcc766a6c 100644 --- a/api/tests/src/data_migration/transformation/subtask/test_transform_assistance_listing.py +++ b/api/tests/src/data_migration/transformation/subtask/test_transform_assistance_listing.py @@ -155,3 +155,101 @@ def test_process_assistance_listing_delete_but_current_missing( validate_assistance_listing(db_session, delete_but_current_missing, expect_in_db=False) assert delete_but_current_missing.transformed_at is not None assert delete_but_current_missing.transformation_notes == "orphaned_delete_record" + + def test_process_empty_assistance_listings(self, db_session, transform_assistance_listing): + """Test that assistance listings with empty required fields are skipped""" + # Create opportunities with empty assistance listings + opportunity1 = f.OpportunityFactory.create(opportunity_assistance_listings=[]) + + # Empty program title + empty_program_title = setup_cfda( + create_existing=False, + opportunity=opportunity1, + source_values={"programtitle": "", "cfdanumber": "12.345"}, + ) + + # Empty assistance listing number + empty_listing_number = setup_cfda( + create_existing=False, + opportunity=opportunity1, + source_values={"programtitle": "Test Program", "cfdanumber": ""}, + ) + + # Both empty + both_empty = setup_cfda( + create_existing=False, + opportunity=opportunity1, + source_values={"programtitle": "", "cfdanumber": ""}, + ) + + # Control - valid record + valid_record = setup_cfda( + create_existing=False, + opportunity=opportunity1, + source_values={"programtitle": "Valid Program", "cfdanumber": "67.890"}, + ) + + transform_assistance_listing.run_subtask() + + # Verify empty records were skipped and marked appropriately + for record in [empty_program_title, empty_listing_number, both_empty]: + assert record.transformed_at is not None + assert record.transformation_notes == "empty_assistance_listing" + + # Verify no record was created in the target table + assistance_listing = ( + db_session.query(OpportunityAssistanceListing) + .filter( + OpportunityAssistanceListing.opportunity_assistance_listing_id + == record.opp_cfda_id + ) + .one_or_none() + ) + assert assistance_listing is None + + # Verify valid record was processed + validate_assistance_listing(db_session, valid_record) + + # Verify metrics + metrics = transform_assistance_listing.metrics + assert metrics[transform_constants.Metrics.TOTAL_RECORDS_PROCESSED] == 4 + assert metrics[transform_constants.Metrics.TOTAL_RECORDS_SKIPPED] == 3 + assert metrics[transform_constants.Metrics.TOTAL_RECORDS_INSERTED] == 1 + + def test_process_empty_assistance_listing_update( + self, db_session, transform_assistance_listing + ): + """Test that empty assistance listings are skipped even for updates""" + opportunity = f.OpportunityFactory.create(opportunity_assistance_listings=[]) + + # Create a record that exists but will be updated with empty values + empty_update = setup_cfda( + create_existing=True, + opportunity=opportunity, + source_values={"programtitle": "", "cfdanumber": ""}, + ) + + transform_assistance_listing.run_subtask() + + # Verify record was marked as processed but not updated + assert empty_update.transformed_at is not None + assert empty_update.transformation_notes == "empty_assistance_listing" + + # Verify original record in target table remains unchanged + assistance_listing = ( + db_session.query(OpportunityAssistanceListing) + .filter( + OpportunityAssistanceListing.opportunity_assistance_listing_id + == empty_update.opp_cfda_id + ) + .one() + ) + assert assistance_listing is not None + # Verify the values weren't updated to empty + assert assistance_listing.program_title != "" + assert assistance_listing.assistance_listing_number != "" + + # Verify metrics + metrics = transform_assistance_listing.metrics + assert metrics[transform_constants.Metrics.TOTAL_RECORDS_PROCESSED] == 1 + assert metrics[transform_constants.Metrics.TOTAL_RECORDS_SKIPPED] == 1