From f3c18ba7bf1b3c525c2b68390efda2b7180c2041 Mon Sep 17 00:00:00 2001 From: genkey6 Date: Sat, 30 Nov 2024 14:50:42 +0900 Subject: [PATCH 1/3] feat: remove prefix provided by osmosis_prefix meta before get_prior_knowledge --- .../core/column_level_knowledge_propagator.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/dbt_osmosis/core/column_level_knowledge_propagator.py b/src/dbt_osmosis/core/column_level_knowledge_propagator.py index e1efd63..c4b4c8b 100644 --- a/src/dbt_osmosis/core/column_level_knowledge_propagator.py +++ b/src/dbt_osmosis/core/column_level_knowledge_propagator.py @@ -209,7 +209,13 @@ def update_undocumented_columns_with_prior_knowledge( changes_committed = 0 for column in undocumented_columns: - prior_knowledge: ColumnLevelKnowledge = get_prior_knowledge(knowledge, column) + original_knowledge = ColumnLevelKnowledgePropagator._get_original_knowledge(node, column) + if original_knowledge["meta"].get("osmosis_prefix", None): + column_without_prefix = column.removeprefix(original_knowledge["meta"]["osmosis_prefix"]) + else: + column_without_prefix = column + + prior_knowledge: ColumnLevelKnowledge = get_prior_knowledge(knowledge, column_without_prefix) progenitor = prior_knowledge.pop("progenitor", None) prior_knowledge: ColumnLevelKnowledge = { k: v for k, v in prior_knowledge.items() if k in inheritables @@ -217,7 +223,7 @@ def update_undocumented_columns_with_prior_knowledge( ColumnLevelKnowledgePropagator._merge_prior_knowledge_with_original_knowledge( prior_knowledge, - ColumnLevelKnowledgePropagator._get_original_knowledge(node, column), + original_knowledge, add_progenitor_to_meta, progenitor, ) From cb07944e93759fdca41968f22ecfa108532b70cc Mon Sep 17 00:00:00 2001 From: genkey6 Date: Sat, 30 Nov 2024 14:51:45 +0900 Subject: [PATCH 2/3] test: add unit tests for update_undocumented_columns_with_prior_knowledge with osmosis_prefix meta provided --- demo_duckdb/models/schema.yml | 4 + demo_duckdb/models/staging/schema.yml | 3 + .../test_column_level_knowledge_propagator.py | 152 ++++++++++++++++++ 3 files changed, 159 insertions(+) diff --git a/demo_duckdb/models/schema.yml b/demo_duckdb/models/schema.yml index 5c244a2..7b69aff 100644 --- a/demo_duckdb/models/schema.yml +++ b/demo_duckdb/models/schema.yml @@ -29,6 +29,10 @@ models: - name: customer_lifetime_value data_type: DOUBLE description: '' + + - name: customer_rank + data_type: VARCHAR + description: '' - name: orders description: This table has basic information about orders, as well as some derived facts based on payments diff --git a/demo_duckdb/models/staging/schema.yml b/demo_duckdb/models/staging/schema.yml index e3c46fd..d16bf70 100644 --- a/demo_duckdb/models/staging/schema.yml +++ b/demo_duckdb/models/staging/schema.yml @@ -15,6 +15,9 @@ models: - name: last_name data_type: VARCHAR description: '' + - name: rank + data_type: VARCHAR + description: '' - name: stg_orders columns: - name: order_id diff --git a/tests/test_column_level_knowledge_propagator.py b/tests/test_column_level_knowledge_propagator.py index efc6fe7..ec5a087 100644 --- a/tests/test_column_level_knowledge_propagator.py +++ b/tests/test_column_level_knowledge_propagator.py @@ -77,6 +77,14 @@ def test_inherit_column_level_knowledge(): "constraints": [], "quote": None, }, + "rank": { + "progenitor": "model.jaffle_shop_duckdb.stg_customers", + "generation": "generation_0", + "name": "rank", + "data_type": "VARCHAR", + "constraints": [], + "quote": None, + }, "order_id": { "progenitor": "model.jaffle_shop_duckdb.stg_orders", "generation": "generation_0", @@ -561,6 +569,150 @@ def test_update_undocumented_columns_with_prior_knowledge_with_add_inheritance_f assert set(target_node.columns["customer_id"]._extra["policy_tags"]) == set(["my_policy_tag1"]) +def test_update_undocumented_columns_with_osmosis_prefix_meta_with_prior_knowledge(): + manifest = load_manifest() + manifest.nodes["model.jaffle_shop_duckdb.stg_customers"].columns[ + "rank" + ].description = "THIS COLUMN IS UPDATED FOR TESTING" + manifest.nodes["model.jaffle_shop_duckdb.stg_customers"].columns["rank"].meta = { + "my_key": "my_value", + } + manifest.nodes["model.jaffle_shop_duckdb.stg_customers"].columns["rank"].tags = [ + "my_tag1", + "my_tag2", + ] + + target_node_name = "model.jaffle_shop_duckdb.customers" + manifest.nodes[target_node_name].columns["customer_rank"].tags = set( + [ + "my_tag3", + "my_tag4", + ] + ) + manifest.nodes[target_node_name].columns["customer_rank"].meta = { + "my_key": "my_old_value", + "my_new_key": "my_new_value", + "osmosis_prefix": "customer_", + } + target_node = manifest.nodes[target_node_name] + knowledge = ColumnLevelKnowledgePropagator.get_node_columns_with_inherited_knowledge( + manifest, target_node, placeholders=[""] + ) + yaml_file_model_section = { + "columns": [ + { + "name": "customer_rank", + } + ] + } + undocumented_columns = target_node.columns.keys() + ColumnLevelKnowledgePropagator.update_undocumented_columns_with_prior_knowledge( + undocumented_columns, + target_node, + yaml_file_model_section, + knowledge, + skip_add_tags=False, + skip_merge_meta=False, + add_progenitor_to_meta=False, + ) + + assert yaml_file_model_section["columns"][0]["name"] == "customer_rank" + assert ( + yaml_file_model_section["columns"][0]["description"] == "THIS COLUMN IS UPDATED FOR TESTING" + ) + assert yaml_file_model_section["columns"][0]["meta"] == { + "my_key": "my_value", + "my_new_key": "my_new_value", + "osmosis_prefix": "customer_", + } + assert set(yaml_file_model_section["columns"][0]["tags"]) == set( + ["my_tag1", "my_tag2", "my_tag3", "my_tag4"] + ) + + assert target_node.columns["customer_rank"].description == "THIS COLUMN IS UPDATED FOR TESTING" + assert target_node.columns["customer_rank"].meta == { + "my_key": "my_value", + "my_new_key": "my_new_value", + "osmosis_prefix": "customer_", + } + assert set(target_node.columns["customer_rank"].tags) == set( + ["my_tag1", "my_tag2", "my_tag3", "my_tag4"] + ) + + +def test_update_undocumented_columns_with_osmosis_prefix_meta_with_prior_knowledge_with_osmosis_keep_description(): + manifest = load_manifest() + manifest.nodes["model.jaffle_shop_duckdb.stg_customers"].columns[ + "rank" + ].description = "THIS COLUMN IS UPDATED FOR TESTING" + manifest.nodes["model.jaffle_shop_duckdb.stg_customers"].columns["rank"].meta = { + "my_key": "my_value", + } + manifest.nodes["model.jaffle_shop_duckdb.stg_customers"].columns["rank"].tags = [ + "my_tag1", + "my_tag2", + ] + + column_description_not_updated = ( + "This column will not be updated as it has the 'osmosis_keep_description' attribute" + ) + target_node_name = "model.jaffle_shop_duckdb.customers" + + manifest.nodes[target_node_name].columns[ + "customer_rank" + ].description = column_description_not_updated + manifest.nodes[target_node_name].columns["customer_rank"].tags = set( + [ + "my_tag3", + "my_tag4", + ] + ) + manifest.nodes[target_node_name].columns["customer_rank"].meta = { + "my_key": "my_value", + "osmosis_prefix": "customer_", + "osmosis_keep_description": True, + } + + target_node = manifest.nodes[target_node_name] + knowledge = ColumnLevelKnowledgePropagator.get_node_columns_with_inherited_knowledge( + manifest, target_node, placeholders=[""] + ) + yaml_file_model_section = { + "columns": [ + { + "name": "customer_rank", + } + ] + } + undocumented_columns = target_node.columns.keys() + ColumnLevelKnowledgePropagator.update_undocumented_columns_with_prior_knowledge( + undocumented_columns, + target_node, + yaml_file_model_section, + knowledge, + skip_add_tags=True, + skip_merge_meta=True, + add_progenitor_to_meta=False, + ) + + assert yaml_file_model_section["columns"][0]["name"] == "customer_rank" + assert yaml_file_model_section["columns"][0]["description"] == column_description_not_updated + assert yaml_file_model_section["columns"][0]["meta"] == { + "my_key": "my_value", + "osmosis_keep_description": True, + "osmosis_prefix": "customer_", + } + assert set(yaml_file_model_section["columns"][0]["tags"]) == set(["my_tag3", "my_tag4"]) + + assert target_node.columns["customer_rank"].description == column_description_not_updated + assert target_node.columns["customer_rank"].meta == { + "my_key": "my_value", + "osmosis_keep_description": True, + "osmosis_prefix": "customer_", + } + assert set(target_node.columns["customer_rank"].tags) == set(["my_tag3", "my_tag4"]) + + @pytest.mark.parametrize("use_unrendered_descriptions", [True, False]) def test_use_unrendered_descriptions(use_unrendered_descriptions): manifest = load_manifest() From 9c3296e6eac7107dcfbd1251c3da1c7855206a76 Mon Sep 17 00:00:00 2001 From: genkey6 Date: Tue, 10 Dec 2024 22:31:24 +0900 Subject: [PATCH 3/3] chore: fix format by black --- .../core/column_level_knowledge_propagator.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/src/dbt_osmosis/core/column_level_knowledge_propagator.py b/src/dbt_osmosis/core/column_level_knowledge_propagator.py index c4b4c8b..998986d 100644 --- a/src/dbt_osmosis/core/column_level_knowledge_propagator.py +++ b/src/dbt_osmosis/core/column_level_knowledge_propagator.py @@ -209,13 +209,19 @@ def update_undocumented_columns_with_prior_knowledge( changes_committed = 0 for column in undocumented_columns: - original_knowledge = ColumnLevelKnowledgePropagator._get_original_knowledge(node, column) + original_knowledge = ColumnLevelKnowledgePropagator._get_original_knowledge( + node, column + ) if original_knowledge["meta"].get("osmosis_prefix", None): - column_without_prefix = column.removeprefix(original_knowledge["meta"]["osmosis_prefix"]) + column_without_prefix = column.removeprefix( + original_knowledge["meta"]["osmosis_prefix"] + ) else: column_without_prefix = column - prior_knowledge: ColumnLevelKnowledge = get_prior_knowledge(knowledge, column_without_prefix) + prior_knowledge: ColumnLevelKnowledge = get_prior_knowledge( + knowledge, column_without_prefix + ) progenitor = prior_knowledge.pop("progenitor", None) prior_knowledge: ColumnLevelKnowledge = { k: v for k, v in prior_knowledge.items() if k in inheritables