diff --git a/datasets/nlm_rxnorm/pipelines/_images/run_csv_transform_kub/csv_transform.py b/datasets/nlm_rxnorm/pipelines/_images/run_csv_transform_kub/csv_transform.py index 08be1237b..36e13d8a6 100644 --- a/datasets/nlm_rxnorm/pipelines/_images/run_csv_transform_kub/csv_transform.py +++ b/datasets/nlm_rxnorm/pipelines/_images/run_csv_transform_kub/csv_transform.py @@ -265,7 +265,7 @@ def create_dest_table( project_id: str, dataset_id: str, table_id: str, - schema_filepath: list, + schema_filepath: str, bucket_name: str, ) -> bool: table_ref = f"{project_id}.{dataset_id}.{table_id}" @@ -281,9 +281,11 @@ def create_dest_table( if not table: logging.info( ( - f"Table {table_ref} currently does not exist. Attempting to create table." + f"Table {table_ref} currently does not exist. Attempting to create table from filepath {schema_filepath}." ) ) + file_name = os.path.split(schema_filepath)[1] + file_path = os.path.split(schema_filepath)[0] if check_gcs_file_exists(schema_filepath, bucket_name): schema = create_table_schema([], bucket_name, schema_filepath) table = bigquery.Table(table_ref, schema=schema) @@ -291,8 +293,6 @@ def create_dest_table( logging.info(f"Table {table_ref} was created".format(table_id)) table_exists = True else: - file_name = os.path.split(schema_filepath)[1] - file_path = os.path.split(schema_filepath)[0] logging.info( f"Error: Unable to create table {table_ref} because schema file {file_name} does not exist in location {file_path} in bucket {bucket_name}" ) diff --git a/datasets/nlm_rxnorm/pipelines/nlm_rxnorm/nlm_rxnorm_dag.py b/datasets/nlm_rxnorm/pipelines/nlm_rxnorm/nlm_rxnorm_dag.py index aad933251..1db2df5cc 100644 --- a/datasets/nlm_rxnorm/pipelines/nlm_rxnorm/nlm_rxnorm_dag.py +++ b/datasets/nlm_rxnorm/pipelines/nlm_rxnorm/nlm_rxnorm_dag.py @@ -93,7 +93,7 @@ "CHUNKSIZE": "750000", "TARGET_GCS_BUCKET": "{{ var.value.composer_bucket }}", "TARGET_GCS_PATH": "data/nlm_rxnorm", - "SCHEMA_FILEPATH": "data/nlm_rxnorm/schema/rxnatomarchive_schema.json", + "SCHEMA_FILEPATH": "data/nlm_rxnorm/schema", "PROJECT_ID": "{{ var.value.gcp_project }}", "DATASET_ID": "nlm_rxnorm", }, @@ -118,7 +118,7 @@ "CHUNKSIZE": "750000", "TARGET_GCS_BUCKET": "{{ var.value.composer_bucket }}", "TARGET_GCS_PATH": "data/nlm_rxnorm", - "SCHEMA_FILEPATH": "data/nlm_rxnorm/schema/rxnconso_schema.json", + "SCHEMA_FILEPATH": "data/nlm_rxnorm/schema", "PROJECT_ID": "{{ var.value.gcp_project }}", "DATASET_ID": "nlm_rxnorm", }, @@ -143,7 +143,7 @@ "CHUNKSIZE": "750000", "TARGET_GCS_BUCKET": "{{ var.value.composer_bucket }}", "TARGET_GCS_PATH": "data/nlm_rxnorm", - "SCHEMA_FILEPATH": "data/nlm_rxnorm/schema/rxncui_schema.json", + "SCHEMA_FILEPATH": "data/nlm_rxnorm/schema", "PROJECT_ID": "{{ var.value.gcp_project }}", "DATASET_ID": "nlm_rxnorm", }, @@ -168,7 +168,7 @@ "CHUNKSIZE": "750000", "TARGET_GCS_BUCKET": "{{ var.value.composer_bucket }}", "TARGET_GCS_PATH": "data/nlm_rxnorm", - "SCHEMA_FILEPATH": "data/nlm_rxnorm/schema/rxncuichange_schema.json", + "SCHEMA_FILEPATH": "data/nlm_rxnorm/schema", "PROJECT_ID": "{{ var.value.gcp_project }}", "DATASET_ID": "nlm_rxnorm", }, @@ -193,7 +193,7 @@ "CHUNKSIZE": "750000", "TARGET_GCS_BUCKET": "{{ var.value.composer_bucket }}", "TARGET_GCS_PATH": "data/nlm_rxnorm", - "SCHEMA_FILEPATH": "data/nlm_rxnorm/schema/rxndoc_schema.json", + "SCHEMA_FILEPATH": "data/nlm_rxnorm/schema", "PROJECT_ID": "{{ var.value.gcp_project }}", "DATASET_ID": "nlm_rxnorm", }, @@ -218,7 +218,7 @@ "CHUNKSIZE": "750000", "TARGET_GCS_BUCKET": "{{ var.value.composer_bucket }}", "TARGET_GCS_PATH": "data/nlm_rxnorm", - "SCHEMA_FILEPATH": "data/nlm_rxnorm/schema/rxnrel_schema.json", + "SCHEMA_FILEPATH": "data/nlm_rxnorm/schema", "PROJECT_ID": "{{ var.value.gcp_project }}", "DATASET_ID": "nlm_rxnorm", }, @@ -243,7 +243,7 @@ "CHUNKSIZE": "750000", "TARGET_GCS_BUCKET": "{{ var.value.composer_bucket }}", "TARGET_GCS_PATH": "data/nlm_rxnorm", - "SCHEMA_FILEPATH": "data/nlm_rxnorm/schema/rxnsab_schema.json", + "SCHEMA_FILEPATH": "data/nlm_rxnorm/schema", "PROJECT_ID": "{{ var.value.gcp_project }}", "DATASET_ID": "nlm_rxnorm", }, @@ -293,7 +293,7 @@ "CHUNKSIZE": "750000", "TARGET_GCS_BUCKET": "{{ var.value.composer_bucket }}", "TARGET_GCS_PATH": "data/nlm_rxnorm", - "SCHEMA_FILEPATH": "data/nlm_rxnorm/schema/rxnsty_schema.json", + "SCHEMA_FILEPATH": "data/nlm_rxnorm/schema", "PROJECT_ID": "{{ var.value.gcp_project }}", "DATASET_ID": "nlm_rxnorm", }, diff --git a/datasets/nlm_rxnorm/pipelines/nlm_rxnorm/pipeline.yaml b/datasets/nlm_rxnorm/pipelines/nlm_rxnorm/pipeline.yaml index 305433f2d..0ad0c17cb 100644 --- a/datasets/nlm_rxnorm/pipelines/nlm_rxnorm/pipeline.yaml +++ b/datasets/nlm_rxnorm/pipelines/nlm_rxnorm/pipeline.yaml @@ -88,7 +88,7 @@ dag: CHUNKSIZE: "750000" TARGET_GCS_BUCKET: "{{ var.value.composer_bucket }}" TARGET_GCS_PATH: "data/nlm_rxnorm" - SCHEMA_FILEPATH: "data/nlm_rxnorm/schema/rxnatomarchive_schema.json" + SCHEMA_FILEPATH: "data/nlm_rxnorm/schema" PROJECT_ID: "{{ var.value.gcp_project }}" DATASET_ID: "nlm_rxnorm" @@ -112,7 +112,7 @@ dag: CHUNKSIZE: "750000" TARGET_GCS_BUCKET: "{{ var.value.composer_bucket }}" TARGET_GCS_PATH: "data/nlm_rxnorm" - SCHEMA_FILEPATH: "data/nlm_rxnorm/schema/rxnconso_schema.json" + SCHEMA_FILEPATH: "data/nlm_rxnorm/schema" PROJECT_ID: "{{ var.value.gcp_project }}" DATASET_ID: "nlm_rxnorm" @@ -136,7 +136,7 @@ dag: CHUNKSIZE: "750000" TARGET_GCS_BUCKET: "{{ var.value.composer_bucket }}" TARGET_GCS_PATH: "data/nlm_rxnorm" - SCHEMA_FILEPATH: "data/nlm_rxnorm/schema/rxncui_schema.json" + SCHEMA_FILEPATH: "data/nlm_rxnorm/schema" PROJECT_ID: "{{ var.value.gcp_project }}" DATASET_ID: "nlm_rxnorm" @@ -160,7 +160,7 @@ dag: CHUNKSIZE: "750000" TARGET_GCS_BUCKET: "{{ var.value.composer_bucket }}" TARGET_GCS_PATH: "data/nlm_rxnorm" - SCHEMA_FILEPATH: "data/nlm_rxnorm/schema/rxncuichange_schema.json" + SCHEMA_FILEPATH: "data/nlm_rxnorm/schema" PROJECT_ID: "{{ var.value.gcp_project }}" DATASET_ID: "nlm_rxnorm" @@ -184,7 +184,7 @@ dag: CHUNKSIZE: "750000" TARGET_GCS_BUCKET: "{{ var.value.composer_bucket }}" TARGET_GCS_PATH: "data/nlm_rxnorm" - SCHEMA_FILEPATH: "data/nlm_rxnorm/schema/rxndoc_schema.json" + SCHEMA_FILEPATH: "data/nlm_rxnorm/schema" PROJECT_ID: "{{ var.value.gcp_project }}" DATASET_ID: "nlm_rxnorm" @@ -208,7 +208,7 @@ dag: CHUNKSIZE: "750000" TARGET_GCS_BUCKET: "{{ var.value.composer_bucket }}" TARGET_GCS_PATH: "data/nlm_rxnorm" - SCHEMA_FILEPATH: "data/nlm_rxnorm/schema/rxnrel_schema.json" + SCHEMA_FILEPATH: "data/nlm_rxnorm/schema" PROJECT_ID: "{{ var.value.gcp_project }}" DATASET_ID: "nlm_rxnorm" @@ -232,7 +232,7 @@ dag: CHUNKSIZE: "750000" TARGET_GCS_BUCKET: "{{ var.value.composer_bucket }}" TARGET_GCS_PATH: "data/nlm_rxnorm" - SCHEMA_FILEPATH: "data/nlm_rxnorm/schema/rxnsab_schema.json" + SCHEMA_FILEPATH: "data/nlm_rxnorm/schema" PROJECT_ID: "{{ var.value.gcp_project }}" DATASET_ID: "nlm_rxnorm" @@ -280,7 +280,7 @@ dag: CHUNKSIZE: "750000" TARGET_GCS_BUCKET: "{{ var.value.composer_bucket }}" TARGET_GCS_PATH: "data/nlm_rxnorm" - SCHEMA_FILEPATH: "data/nlm_rxnorm/schema/rxnsty_schema.json" + SCHEMA_FILEPATH: "data/nlm_rxnorm/schema" PROJECT_ID: "{{ var.value.gcp_project }}" DATASET_ID: "nlm_rxnorm"