From 31a38882f69f6843910f8abf0e173ba58589c566 Mon Sep 17 00:00:00 2001 From: matt garber Date: Wed, 16 Aug 2023 08:30:30 -0400 Subject: [PATCH] flattened conditions, added new code table (#105) * flattened conditions, added new code table * moved var, included all as default * docs * tweak to column name --- .../core/builder_condition_codeableconcept.py | 33 +++++--- .../studies/core/builder_core_medication.py | 1 - .../studies/core/builder_encounter_coding.py | 22 +++-- cumulus_library/studies/core/condition.sql | 32 +++++--- .../core/condition_codeable_concept.sql | 80 +++++++++++++------ .../codeable_concept_denormalize.sql.jinja | 34 +++++--- cumulus_library/template_sql/templates.py | 44 +++++++--- tests/test_cli.py | 4 +- tests/test_templates.py | 65 ++++++++++++--- 9 files changed, 224 insertions(+), 91 deletions(-) diff --git a/cumulus_library/studies/core/builder_condition_codeableconcept.py b/cumulus_library/studies/core/builder_condition_codeableconcept.py index 15adc130..1661d4a9 100644 --- a/cumulus_library/studies/core/builder_condition_codeableconcept.py +++ b/cumulus_library/studies/core/builder_condition_codeableconcept.py @@ -17,18 +17,27 @@ def prepare_queries(self, cursor: object, schema: str): :param schema: the schema/db name, matching the cursor """ - config = CodeableConceptConfig( + preferred_config = CodeableConceptConfig( source_table="condition", source_id="id", - cc_column={ - "name": "code", - "is_array": False, - "code_systems": [ - "http://snomed.info/sct", - "http://hl7.org/fhir/sid/icd-10-cm", - "http://hl7.org/fhir/sid/icd-9-cm", - ], - }, - target_table="core__condition_codable_concepts", + column_name="code", + is_array=False, + target_table="core__condition_codable_concepts_display", + filter_priority=True, + code_systems=[ + "http://snomed.info/sct", + "http://hl7.org/fhir/sid/icd-10-cm", + "http://hl7.org/fhir/sid/icd-9-cm", + ], ) - self.queries.append(get_codeable_concept_denormalize_query(config)) + self.queries.append(get_codeable_concept_denormalize_query(preferred_config)) + + all_config = CodeableConceptConfig( + source_table="condition", + source_id="id", + column_name="code", + is_array=False, + target_table="core__condition_codable_concepts_all", + filter_priority=False, + ) + self.queries.append(get_codeable_concept_denormalize_query(all_config)) diff --git a/cumulus_library/studies/core/builder_core_medication.py b/cumulus_library/studies/core/builder_core_medication.py index 10106779..951eda4d 100644 --- a/cumulus_library/studies/core/builder_core_medication.py +++ b/cumulus_library/studies/core/builder_core_medication.py @@ -3,7 +3,6 @@ from cumulus_library.base_table_builder import BaseTableBuilder from cumulus_library.helper import get_progress_bar, query_console_output from cumulus_library.template_sql.templates import ( - CodeableConceptConfig, get_core_medication_query, get_is_table_not_empty_query, get_column_datatype_query, diff --git a/cumulus_library/studies/core/builder_encounter_coding.py b/cumulus_library/studies/core/builder_encounter_coding.py index 6f1cc30f..314c61ff 100644 --- a/cumulus_library/studies/core/builder_encounter_coding.py +++ b/cumulus_library/studies/core/builder_encounter_coding.py @@ -51,11 +51,11 @@ def _check_data_in_fields(self, code_sources: list[dict], schema, cursor) -> dic for code_source in code_sources: if code_source["is_array"]: code_source["has_data"] = is_codeable_concept_array_populated( - schema, "encounter", code_source["name"], cursor + schema, "encounter", code_source["column_name"], cursor ) else: code_source["has_data"] = is_codeable_concept_populated( - schema, "encounter", code_source["name"], cursor + schema, "encounter", code_source["column_name"], cursor ) progress.advance(task) return code_sources @@ -70,8 +70,9 @@ def prepare_queries(self, cursor: object, schema: str): code_sources = [ { - "name": "type", + "column_name": "type", "is_array": True, + "filter_priority": True, "code_systems": [ "http://terminology.hl7.org/CodeSystem/encounter-type", "http://terminology.hl7.org/CodeSystem/v2-0004", @@ -81,8 +82,9 @@ def prepare_queries(self, cursor: object, schema: str): "has_data": False, }, { - "name": "servicetype", + "column_name": "servicetype", "is_array": False, + "filter_priority": True, "code_systems": [ "http://terminology.hl7.org/CodeSystem/service-type", "urn:oid:2.16.840.1.113883.4.642.3.518", @@ -91,8 +93,9 @@ def prepare_queries(self, cursor: object, schema: str): "has_data": False, }, { - "name": "priority", + "column_name": "priority", "is_array": False, + "filter_priority": True, "code_systems": [ "http://terminology.hl7.org/CodeSystem/v3-ActPriority", "http://snomed.info/sct", @@ -106,15 +109,18 @@ def prepare_queries(self, cursor: object, schema: str): config = CodeableConceptConfig( source_table="encounter", source_id="id", - cc_column=code_source, - target_table=f"core__encounter_dn_{code_source['name']}", + column_name=code_source["column_name"], + is_array=code_source["is_array"], + filter_priority=code_source["filter_priority"], + code_systems=code_source["code_systems"], + target_table=f"core__encounter_dn_{code_source['column_name']}", ) self.queries.append(get_codeable_concept_denormalize_query(config)) else: self.queries.append( get_ctas_empty_query( schema_name=schema, - table_name=f"core__encounter_dn_{code_source['name']}", + table_name=f"core__encounter_dn_{code_source['column_name']}", table_cols=["id", "code", "code_system", "display"], ) ) diff --git a/cumulus_library/studies/core/condition.sql b/cumulus_library/studies/core/condition.sql index ac8d1b76..fb3c0cec 100644 --- a/cumulus_library/studies/core/condition.sql +++ b/cumulus_library/studies/core/condition.sql @@ -2,11 +2,12 @@ -- Condition -- https://build.fhir.org/ig/HL7/US-Core/StructureDefinition-us-core-condition-encounter-diagnosis.html -- ---Each Condition must have: +-- Each Condition must have: -- a category code of “problem-list-item” or “health-concern” --- a code that identifies the condition +-- a code that identifies the condition (this is available in +-- core__condition_codable_concepts_all) -- a patient ---Each Condition must support: +-- Each Condition must support: -- a clinical status of the condition (e.g., active or resolved) -- a verification status -- a category code of ‘sdoh’ @@ -14,13 +15,14 @@ -- abatement date (in other words, date of resolution or remission) -- a date when recorded - CREATE TABLE core__condition AS WITH temp_condition AS ( SELECT c.category, - c.code, c.clinicalstatus, + cca.code, + cca.code_system, + cca.display, c.verificationstatus, c.subject.reference AS subject_ref, c.encounter.reference AS encounter_ref, @@ -28,11 +30,15 @@ WITH temp_condition AS ( date(from_iso8601_timestamp(c.recordeddate)) AS recordeddate, concat('Condition/', c.id) AS condition_ref FROM condition AS c + INNER JOIN core__condition_codable_concepts_all AS cca ON c.id = cca.id ) SELECT - t_category_coding.category_row AS category, - tc.code AS cond_code, + t_category_coding.category_row.code AS category_code, + t_category_coding.category_row.display AS category_display, + tc.code, + tc.code_system, + tc.code_display, tc.subject_ref, tc.encounter_ref, tc.condition_id, @@ -42,9 +48,9 @@ SELECT date_trunc('month', date(tc.recordeddate)) AS recorded_month, date_trunc('year', date(tc.recordeddate)) AS recorded_year FROM temp_condition AS tc, - unnest(category) AS t_category (category_coding), --noqa - unnest(category_coding.coding) AS t_category_coding (category_row), --noqa - unnest(code.coding) AS t_coding (code_row) --noqa + unnest(category) AS t_category (category_coding), + unnest(category_coding.coding) AS t_category_coding (category_row) + WHERE tc.recordeddate BETWEEN date('2016-01-01') AND current_date; -- ########################################################################### @@ -53,14 +59,14 @@ WHERE tc.recordeddate BETWEEN date('2016-01-01') AND current_date; CREATE TABLE core__count_condition_month AS WITH concept_map AS ( - SELECT + SELECT DISTINCT c.recorded_month AS cond_month, c.subject_ref, coalesce(c.encounter_ref, 'None') AS encounter_ref, coalesce(mapping.display, 'None') AS cond_code_display, - c.category.code AS cond_category_code + c.category_code AS cond_category_code FROM core__condition AS c - LEFT JOIN core__condition_codable_concepts AS mapping + LEFT JOIN core__condition_codable_concepts_display AS mapping ON c.condition_id = mapping.id ), diff --git a/cumulus_library/studies/core/condition_codeable_concept.sql b/cumulus_library/studies/core/condition_codeable_concept.sql index 2d39173a..a48b924f 100644 --- a/cumulus_library/studies/core/condition_codeable_concept.sql +++ b/cumulus_library/studies/core/condition_codeable_concept.sql @@ -7,10 +7,11 @@ in the future change the priority order of concept systems, or add additional systems to support other implementations if we run into unusual data in the wild. */ -CREATE TABLE core__condition_codable_concepts AS ( - WITH - system_0 AS ( +# filtering case +CREATE TABLE core__condition_codable_concepts_preferred AS ( + WITH + system_code_0 AS ( SELECT DISTINCT s.id AS id, '0' AS priority, @@ -19,12 +20,11 @@ CREATE TABLE core__condition_codable_concepts AS ( u.codeable_concept.system AS code_system FROM condition AS s, - UNNEST(s.code.coding) AS u (codeable_concept) --noqa: AL05 + UNNEST(s.code.coding) AS u (codeable_concept) WHERE u.codeable_concept.system = 'http://snomed.info/sct' ), --noqa: LT07 - - system_1 AS ( + system_code_1 AS ( SELECT DISTINCT s.id AS id, '1' AS priority, @@ -33,12 +33,11 @@ CREATE TABLE core__condition_codable_concepts AS ( u.codeable_concept.system AS code_system FROM condition AS s, - UNNEST(s.code.coding) AS u (codeable_concept) --noqa: AL05 + UNNEST(s.code.coding) AS u (codeable_concept) WHERE u.codeable_concept.system = 'http://hl7.org/fhir/sid/icd-10-cm' ), --noqa: LT07 - - system_2 AS ( + system_code_2 AS ( SELECT DISTINCT s.id AS id, '2' AS priority, @@ -47,7 +46,7 @@ CREATE TABLE core__condition_codable_concepts AS ( u.codeable_concept.system AS code_system FROM condition AS s, - UNNEST(s.code.coding) AS u (codeable_concept) --noqa: AL05 + UNNEST(s.code.coding) AS u (codeable_concept) WHERE u.codeable_concept.system = 'http://hl7.org/fhir/sid/icd-9-cm' ), --noqa: LT07 @@ -59,7 +58,7 @@ CREATE TABLE core__condition_codable_concepts AS ( code_system, code, display - FROM system_0 + FROM system_code_0 UNION SELECT id, @@ -67,7 +66,7 @@ CREATE TABLE core__condition_codable_concepts AS ( code_system, code, display - FROM system_1 + FROM system_code_1 UNION SELECT id, @@ -75,27 +74,62 @@ CREATE TABLE core__condition_codable_concepts AS ( code_system, code, display - FROM system_2 - ORDER BY id, priority - ) + FROM system_code_2 + ), - SELECT - id, - code, - code_system, - display - FROM ( + partitioned_table AS ( SELECT id, code, code_system, display, + priority, ROW_NUMBER() OVER ( PARTITION BY id ) AS available_priority FROM union_table - GROUP BY id, code_system, code, display + GROUP BY id, priority, code_system, code, display + ORDER BY priority ASC ) + + SELECT + id, + code, + code_system, + display + FROM partitioned_table WHERE available_priority = 1 -); \ No newline at end of file +); + +#non-filtering case + +CREATE TABLE target__concepts AS ( + WITH + system_code_col_0 AS ( + SELECT DISTINCT + s.id AS id, + u.codeable_concept.code AS code, + u.codeable_concept.display AS display, + u.codeable_concept.system AS code_system + FROM + source AS s, + UNNEST(s.code_col) AS cc (cc_row), + UNNEST(cc.cc_row.coding) AS u (codeable_concept) + ), --noqa: LT07 + + union_table AS ( + SELECT + id, + code_system, + code, + display + FROM system_code_col_0 + ) + SELECT + id, + code, + code_system, + display + FROM union_table +); diff --git a/cumulus_library/template_sql/codeable_concept_denormalize.sql.jinja b/cumulus_library/template_sql/codeable_concept_denormalize.sql.jinja index 1d9193be..b984417f 100644 --- a/cumulus_library/template_sql/codeable_concept_denormalize.sql.jinja +++ b/cumulus_library/template_sql/codeable_concept_denormalize.sql.jinja @@ -1,40 +1,47 @@ CREATE TABLE {{ target_table }} AS ( WITH - {%- for system in cc_column.code_systems %} - system_{{ cc_column.name }}_{{ loop.index0 }} AS ( + {%- for system in code_systems %} + system_{{ column_name }}_{{ loop.index0 }} AS ( SELECT DISTINCT s.{{ source_id }} AS id, + {%- if filter_priority %} '{{ loop.index0 }}' AS priority, + {%- endif %} u.codeable_concept.code AS code, u.codeable_concept.display AS display, u.codeable_concept.system AS code_system FROM {{ source_table }} AS s, - {% if cc_column.is_array %} - UNNEST(s.{{ cc_column.name }}) AS cc (cc_row), + {%- if is_array %} + UNNEST(s.{{ column_name }}) AS cc (cc_row), UNNEST(cc.cc_row.coding) AS u (codeable_concept) - {% else %} - UNNEST(s.{{ cc_column.name }}.coding) AS u (codeable_concept) + {%- else %} + UNNEST(s.{{ column_name }}.coding) AS u (codeable_concept) {%- endif %} + {%- if filter_priority %} WHERE u.codeable_concept.system = '{{ system }}' + {%- endif %} ), --noqa: LT07 {%- endfor %} union_table AS ( - {%- for system in cc_column.code_systems %} + {%- for system in code_systems %} SELECT id, + {%- if filter_priority %} priority, + {%- endif %} code_system, code, display - FROM system_{{ cc_column.name }}_{{ loop.index0 }} + FROM system_{{ column_name }}_{{ loop.index0 }} {%- if not loop.last %} UNION {%- endif -%} {%- endfor %} - ), + ) +{%- if filter_priority -%}, partitioned_table AS ( SELECT @@ -60,3 +67,12 @@ CREATE TABLE {{ target_table }} AS ( FROM partitioned_table WHERE available_priority = 1 ); +{% else %} + SELECT + id, + code, + code_system, + display + FROM union_table +); +{% endif %} \ No newline at end of file diff --git a/cumulus_library/template_sql/templates.py b/cumulus_library/template_sql/templates.py index 76d73b61..36be5e07 100644 --- a/cumulus_library/template_sql/templates.py +++ b/cumulus_library/template_sql/templates.py @@ -1,7 +1,7 @@ """ Collection of jinja template getters for common SQL queries """ from enum import Enum from pathlib import Path -from typing import Dict, List +from typing import Dict, List, TypedDict from jinja2 import Template @@ -194,24 +194,37 @@ class CodeableConceptConfig: :param source_table: the table to extract extensions from :param source_id: the id field to use in the new table - :param cc_columns: the column containing the codeableConcept you want to extract. - Format: - {'name':[column], - 'is_array': [boolean], - 'code_systems':[List of code system strings, in priority order]} - is_array relates to the FHIR spec - if the field is specified - as 0...*, set this to be true. + :param column_name: the column containing the codeableConcept you want to extract. + :param is_array: whether the codeableConcept is 0...1 or 0..* in the FHIR spec :param target_table: the name of the table to create + :param filter_priority: If true, will use code systems to select a single code, + in preference order, for use as a display value. :param code_systems: a list of systems, in preference order, for selecting data + for filtering. This should not be set if filter_priority is false. """ def __init__( - self, source_table: str, source_id: str, cc_column: dict, target_table: str + self, + source_table: str, + source_id: str, + column_name: str, + is_array: bool, + target_table: str, + filter_priority: bool = False, + code_systems: list = None, ): + if not filter_priority and code_systems != None: + raise Exception( + "CodeableConceptConfig cannot have non-default value assigned to " + "code_systems unless filter_priority is true." + ) self.source_table = source_table self.source_id = source_id - self.cc_column = cc_column + self.column_name = column_name + self.is_array = is_array self.target_table = target_table + self.filter_priority = filter_priority + self.code_systems = code_systems def get_codeable_concept_denormalize_query(config: CodeableConceptConfig) -> str: @@ -227,12 +240,21 @@ def get_codeable_concept_denormalize_query(config: CodeableConceptConfig) -> str :param config: a CodableConeptConfig """ path = Path(__file__).parent + + # If we get a None for code systems, we want one dummy value so the jinja + # for loop will do a single pass. This implicitly means that we're not + # filtering, so this parameter will be otherwise ignored + config.code_systems = config.code_systems or ["all"] + with open(f"{path}/codeable_concept_denormalize.sql.jinja") as codable_concept: return Template(codable_concept.read()).render( source_table=config.source_table, source_id=config.source_id, - cc_column=config.cc_column, + column_name=config.column_name, + is_array=config.is_array, target_table=config.target_table, + filter_priority=config.filter_priority, + code_systems=config.code_systems, ) diff --git a/tests/test_cli.py b/tests/test_cli.py index 690af361..d077c089 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -76,7 +76,7 @@ def test_cli_path_mapping( "args,cursor_calls,pandas_cursor_calls", [ (["build", "-t", "vocab", "--database", "test"], 344, 0), - (["build", "-t", "core", "--database", "test"], 46, 0), + (["build", "-t", "core", "--database", "test"], 47, 0), (["export", "-t", "core", "--database", "test"], 1, 10), ( [ @@ -106,7 +106,7 @@ def test_cli_path_mapping( ), ( ["build", "-t", "core", "-s", "tests/test_data/", "--database", "test"], - 46, + 47, 0, ), ( diff --git a/tests/test_templates.py b/tests/test_templates.py index 46a592a8..2bc9db38 100644 --- a/tests/test_templates.py +++ b/tests/test_templates.py @@ -158,7 +158,7 @@ def test_extension_denormalize_creation(): assert query == expected -def test_codeable_concept_denormalize_creation(): +def test_codeable_concept_denormalize_filter_creation(): expected = """CREATE TABLE target__concepts AS ( WITH system_code_col_0 AS ( @@ -170,7 +170,6 @@ def test_codeable_concept_denormalize_creation(): u.codeable_concept.system AS code_system FROM source AS s, - UNNEST(s.code_col.coding) AS u (codeable_concept) WHERE u.codeable_concept.system = 'http://snomed.info/sct' @@ -184,7 +183,6 @@ def test_codeable_concept_denormalize_creation(): u.codeable_concept.system AS code_system FROM source AS s, - UNNEST(s.code_col.coding) AS u (codeable_concept) WHERE u.codeable_concept.system = 'http://hl7.org/fhir/sid/icd-10-cm' @@ -231,20 +229,63 @@ def test_codeable_concept_denormalize_creation(): display FROM partitioned_table WHERE available_priority = 1 -);""" +); +""" config = CodeableConceptConfig( source_table="source", source_id="id", - cc_column={ - "name": "code_col", - "is_array": False, - "code_systems": [ - "http://snomed.info/sct", - "http://hl7.org/fhir/sid/icd-10-cm", - ], - }, + column_name="code_col", + target_table="target__concepts", + is_array=False, + filter_priority=True, + code_systems=[ + "http://snomed.info/sct", + "http://hl7.org/fhir/sid/icd-10-cm", + ], + ) + query = get_codeable_concept_denormalize_query(config) + + assert query == expected + + +def test_codeable_concept_denormalize_all_creation(): + expected = """CREATE TABLE target__concepts AS ( + WITH + system_code_col_0 AS ( + SELECT DISTINCT + s.id AS id, + u.codeable_concept.code AS code, + u.codeable_concept.display AS display, + u.codeable_concept.system AS code_system + FROM + source AS s, + UNNEST(s.code_col) AS cc (cc_row), + UNNEST(cc.cc_row.coding) AS u (codeable_concept) + ), --noqa: LT07 + + union_table AS ( + SELECT + id, + code_system, + code, + display + FROM system_code_col_0 + ) + SELECT + id, + code, + code_system, + display + FROM union_table +); +""" + config = CodeableConceptConfig( + source_table="source", + source_id="id", + column_name="code_col", target_table="target__concepts", + is_array=True, ) query = get_codeable_concept_denormalize_query(config) assert query == expected