Skip to content

Commit

Permalink
[Issue #2697] Enable agency data load in ELT process (#2698)
Browse files Browse the repository at this point in the history
## Summary
Fixes #2697

### Time to review: __5 mins__

## Changes proposed
Enable the extract-load process on the TGROUPS table (agency data)

Enable the Agency transform steps

## Context for reviewers
I've tested this in the lower environments and prod by manually
configuring the job to load the data, so all of the agency data is there
already. This just gets updates from here on out.

The extract-load process has a set of tables it is configured to load,
and the transformation process has specific classes for transforming
tables that are handled via env vars, so the configuration change is
minor.

The unit test changes I made show that the agency transforms are
happening now when running the "full" job.
  • Loading branch information
chouinar authored Nov 4, 2024
1 parent 7160cf3 commit 785079e
Show file tree
Hide file tree
Showing 3 changed files with 36 additions and 12 deletions.
2 changes: 1 addition & 1 deletion api/src/data_migration/load/load_oracle_data_task.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@
"tfundinstr_forecast_hist",
"tfundinstr_synopsis",
"tfundinstr_synopsis_hist",
# tgroups, # Want to hold on this until we have permissions
"tgroups",
]


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ class TransformOracleDataTaskConfig(PydanticBaseEnvConfig):
enable_applicant_type: bool = True # TRANSFORM_ORACLE_DATA_ENABLE_APPLICANT_TYPE
enable_funding_category: bool = True # TRANSFORM_ORACLE_DATA_ENABLE_FUNDING_CATEGORY
enable_funding_instrument: bool = True # TRANSFORM_ORACLE_DATA_ENABLE_FUNDING_INSTRUMENT
enable_agency: bool = False # TRANSFORM_ORACLE_DATA_ENABLE_AGENCY
enable_agency: bool = True # TRANSFORM_ORACLE_DATA_ENABLE_AGENCY


class TransformOracleDataTask(Task):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,11 @@
from tests.conftest import BaseTestClass
from tests.src.data_migration.transformation.conftest import (
get_summary_from_source,
setup_agency,
setup_cfda,
setup_opportunity,
setup_synopsis_forecast,
validate_agency,
validate_applicant_type,
validate_assistance_listing,
validate_funding_category,
Expand Down Expand Up @@ -40,7 +42,12 @@ def transform_oracle_data_task(

def test_all_inserts(self, db_session, transform_oracle_data_task):
# Test that we're fully capable of processing inserts across an entire opportunity record
opportunity = setup_opportunity(create_existing=False)
parent_agency = setup_agency("INSERTAGENCY", create_existing=False)
subagency = setup_agency("INSERTAGENCY-ABC", create_existing=False)

opportunity = setup_opportunity(
create_existing=False, source_values={"owningagency": "INSERTAGENCY-ABC"}
)

cfda1 = setup_cfda(create_existing=False, opportunity=opportunity)
cfda2 = setup_cfda(create_existing=False, opportunity=opportunity)
Expand Down Expand Up @@ -178,9 +185,12 @@ def test_all_inserts(self, db_session, transform_oracle_data_task):
)
validate_summary_and_nested(db_session, synopsis_hist, [], [], [])

validate_agency(db_session, parent_agency)
validate_agency(db_session, subagency)

assert {
transform_oracle_data_task.Metrics.TOTAL_RECORDS_PROCESSED: 37,
transform_oracle_data_task.Metrics.TOTAL_RECORDS_INSERTED: 31,
transform_oracle_data_task.Metrics.TOTAL_RECORDS_PROCESSED: 39,
transform_oracle_data_task.Metrics.TOTAL_RECORDS_INSERTED: 33,
transform_oracle_data_task.Metrics.TOTAL_RECORDS_UPDATED: 0,
transform_oracle_data_task.Metrics.TOTAL_RECORDS_DELETED: 0,
transform_oracle_data_task.Metrics.TOTAL_DUPLICATE_RECORDS_SKIPPED: 3,
Expand All @@ -189,8 +199,15 @@ def test_all_inserts(self, db_session, transform_oracle_data_task):
}.items() <= transform_oracle_data_task.metrics.items()

def test_mix_of_inserts_updates_deletes(self, db_session, transform_oracle_data_task):
parent_agency = setup_agency("UPDATEAGENCY", create_existing=True)
subagency = setup_agency(
"UPDATEAGENCY-XYZ",
create_existing=True,
deleted_fields={"AgencyContactEMail2", "ldapGp", "description"},
)

existing_opportunity = f.OpportunityFactory(
no_current_summary=True, opportunity_assistance_listings=[]
no_current_summary=True, opportunity_assistance_listings=[], agency="UPDATEAGENCY"
)
opportunity = f.StagingTopportunityFactory(
opportunity_id=existing_opportunity.opportunity_id, cfdas=[]
Expand Down Expand Up @@ -428,23 +445,28 @@ def test_mix_of_inserts_updates_deletes(self, db_session, transform_oracle_data_
)
validate_summary_and_nested(db_session, synopsis_hist_insert, [], [], [])

print(transform_oracle_data_task.metrics)
validate_agency(db_session, parent_agency)
validate_agency(db_session, subagency, deleted_fields={"ldapGp", "description"})

assert {
transform_oracle_data_task.Metrics.TOTAL_RECORDS_PROCESSED: 41,
transform_oracle_data_task.Metrics.TOTAL_RECORDS_PROCESSED: 43,
transform_oracle_data_task.Metrics.TOTAL_RECORDS_INSERTED: 8,
transform_oracle_data_task.Metrics.TOTAL_RECORDS_UPDATED: 9,
transform_oracle_data_task.Metrics.TOTAL_RECORDS_UPDATED: 11,
transform_oracle_data_task.Metrics.TOTAL_RECORDS_DELETED: 8,
transform_oracle_data_task.Metrics.TOTAL_DUPLICATE_RECORDS_SKIPPED: 15,
transform_oracle_data_task.Metrics.TOTAL_RECORDS_ORPHANED: 0,
transform_oracle_data_task.Metrics.TOTAL_DELETE_ORPHANS_SKIPPED: 1,
}.items() <= transform_oracle_data_task.metrics.items()

def test_delete_opportunity_with_deleted_children(self, db_session, transform_oracle_data_task):
agency = setup_agency("AGENCYXYZ", create_existing=True)

# We create an opportunity with a synopsis/forecast record, and various other child values
# We then delete all of them at once. Deleting the opportunity will recursively delete the others
# but we'll still have delete events for the others - this verfies how we handle that.

existing_opportunity = f.OpportunityFactory(
no_current_summary=True, opportunity_assistance_listings=[]
no_current_summary=True, opportunity_assistance_listings=[], agency="AGENCYXYZ"
)
opportunity = f.StagingTopportunityFactory(
opportunity_id=existing_opportunity.opportunity_id, cfdas=[], is_deleted=True
Expand Down Expand Up @@ -532,9 +554,11 @@ def test_delete_opportunity_with_deleted_children(self, db_session, transform_or
validate_funding_instrument(db_session, forecast_funding_instrument, expect_in_db=False)
validate_funding_instrument(db_session, synopsis_funding_instrument, expect_in_db=False)

validate_agency(db_session, agency)

assert {
transform_oracle_data_task.Metrics.TOTAL_RECORDS_PROCESSED: 10,
# Despite processing 10 records, only the opportunity is actually deleted directly
transform_oracle_data_task.Metrics.TOTAL_RECORDS_PROCESSED: 11,
# Despite processing 11 records, only the opportunity is actually deleted directly
transform_oracle_data_task.Metrics.TOTAL_RECORDS_DELETED: 1,
f"opportunity.{transform_oracle_data_task.Metrics.TOTAL_RECORDS_DELETED}": 1,
transform_oracle_data_task.Metrics.TOTAL_DELETE_ORPHANS_SKIPPED: 9,
Expand Down

0 comments on commit 785079e

Please sign in to comment.