From 8bb519447ff8f3908556a0be0d53a7db53eb2cbc Mon Sep 17 00:00:00 2001 From: Jonathan Date: Tue, 25 Jun 2024 10:02:21 -0700 Subject: [PATCH 1/3] Remove TODO expr in filters --- semantic_model_generator/generate_model.py | 1 - semantic_model_generator/tests/generate_model_test.py | 8 ++++---- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/semantic_model_generator/generate_model.py b/semantic_model_generator/generate_model.py index 8e924a5b..5271069c 100644 --- a/semantic_model_generator/generate_model.py +++ b/semantic_model_generator/generate_model.py @@ -30,7 +30,6 @@ def _get_placeholder_filter() -> List[semantic_model_pb2.NamedFilter]: name=_PLACEHOLDER_COMMENT, synonyms=[_PLACEHOLDER_COMMENT], description=_PLACEHOLDER_COMMENT, - expr=_PLACEHOLDER_COMMENT, ) ] diff --git a/semantic_model_generator/tests/generate_model_test.py b/semantic_model_generator/tests/generate_model_test.py index d5362b3d..5bb2089d 100644 --- a/semantic_model_generator/tests/generate_model_test.py +++ b/semantic_model_generator/tests/generate_model_test.py @@ -314,7 +314,7 @@ def mock_dependencies_exceed_context(mock_snowflake_connection): def test_raw_schema_to_semantic_context( mock_dependencies, mock_snowflake_connection, mock_snowflake_connection_env ): - want_yaml = "name: this is the best semantic model ever\ntables:\n - name: ALIAS\n description: some table comment\n base_table:\n database: TEST_DB\n schema: SCHEMA_TEST\n table: ALIAS\n filters:\n - name: ' '\n synonyms:\n - ' '\n description: ' '\n expr: ' '\n dimensions:\n - name: ZIP_CODE\n synonyms:\n - ' '\n description: some column comment\n expr: ZIP_CODE\n data_type: TEXT\n time_dimensions:\n - name: BAD_ALIAS\n synonyms:\n - ' '\n description: ' '\n expr: BAD_ALIAS\n data_type: TIMESTAMP\n measures:\n - name: AREA_CODE\n synonyms:\n - ' '\n description: ' '\n expr: AREA_CODE\n data_type: NUMBER\n - name: CBSA\n synonyms:\n - ' '\n description: ' '\n expr: CBSA\n data_type: NUMBER\n" + want_yaml = "name: this is the best semantic model ever\ntables:\n - name: ALIAS\n description: some table comment\n base_table:\n database: TEST_DB\n schema: SCHEMA_TEST\n table: ALIAS\n filters:\n - name: ' '\n synonyms:\n - ' '\n description: ' '\n dimensions:\n - name: ZIP_CODE\n synonyms:\n - ' '\n description: some column comment\n expr: ZIP_CODE\n data_type: TEXT\n time_dimensions:\n - name: BAD_ALIAS\n synonyms:\n - ' '\n description: ' '\n expr: BAD_ALIAS\n data_type: TIMESTAMP\n measures:\n - name: AREA_CODE\n synonyms:\n - ' '\n description: ' '\n expr: AREA_CODE\n data_type: NUMBER\n - name: CBSA\n synonyms:\n - ' '\n description: ' '\n expr: CBSA\n data_type: NUMBER\n" snowflake_account = "test_account" base_tables = ["test_db.schema_test.ALIAS"] @@ -365,7 +365,7 @@ def test_generate_base_context_with_placeholder_comments( mock_file.assert_called_once_with(output_path, "w") # Assert file save called with placeholder comments added. mock_file().write.assert_called_once_with( - "name: my awesome semantic model\ntables:\n - name: ALIAS\n description: some table comment\n base_table:\n database: TEST_DB\n schema: SCHEMA_TEST\n table: ALIAS\n filters:\n - name: ' ' # \n synonyms:\n - ' ' # \n description: ' ' # \n expr: ' ' # \n dimensions:\n - name: ZIP_CODE\n synonyms:\n - ' ' # \n description: some column comment\n expr: ZIP_CODE\n data_type: TEXT\n time_dimensions:\n - name: BAD_ALIAS\n synonyms:\n - ' ' # \n description: ' ' # \n expr: BAD_ALIAS\n data_type: TIMESTAMP\n measures:\n - name: AREA_CODE\n synonyms:\n - ' ' # \n description: ' ' # \n expr: AREA_CODE\n data_type: NUMBER\n - name: CBSA\n synonyms:\n - ' ' # \n description: ' ' # \n expr: CBSA\n data_type: NUMBER\n" + "name: my awesome semantic model\ntables:\n - name: ALIAS\n description: some table comment\n base_table:\n database: TEST_DB\n schema: SCHEMA_TEST\n table: ALIAS\n filters:\n - name: ' ' # \n synonyms:\n - ' ' # \n description: ' ' # \n dimensions:\n - name: ZIP_CODE\n synonyms:\n - ' ' # \n description: some column comment\n expr: ZIP_CODE\n data_type: TEXT\n time_dimensions:\n - name: BAD_ALIAS\n synonyms:\n - ' ' # \n description: ' ' # \n expr: BAD_ALIAS\n data_type: TIMESTAMP\n measures:\n - name: AREA_CODE\n synonyms:\n - ' ' # \n description: ' ' # \n expr: AREA_CODE\n data_type: NUMBER\n - name: CBSA\n synonyms:\n - ' ' # \n description: ' ' # \n expr: CBSA\n data_type: NUMBER\n" ) @@ -395,7 +395,7 @@ def test_generate_base_context_with_placeholder_comments_cross_database_cross_sc # Assert file save called with placeholder comments added along with sample values and cross-database mock_file().write.assert_called_once_with( - "name: Another Incredible Semantic Model\ntables:\n - name: ALIAS\n description: some table comment\n base_table:\n database: TEST_DB\n schema: SCHEMA_TEST\n table: ALIAS\n filters:\n - name: ' ' # \n synonyms:\n - ' ' # \n description: ' ' # \n expr: ' ' # \n dimensions:\n - name: ZIP_CODE\n synonyms:\n - ' ' # \n description: some column comment\n expr: ZIP_CODE\n data_type: TEXT\n time_dimensions:\n - name: BAD_ALIAS\n synonyms:\n - ' ' # \n description: ' ' # \n expr: BAD_ALIAS\n data_type: TIMESTAMP\n measures:\n - name: AREA_CODE\n synonyms:\n - ' ' # \n description: ' ' # \n expr: AREA_CODE\n data_type: NUMBER\n - name: CBSA\n synonyms:\n - ' ' # \n description: ' ' # \n expr: CBSA\n data_type: NUMBER\n - name: PRODUCTS\n description: ' ' # \n base_table:\n database: A_DIFFERENT_DATABASE\n schema: A_DIFFERENT_SCHEMA\n table: PRODUCTS\n filters:\n - name: ' ' # \n synonyms:\n - ' ' # \n description: ' ' # \n expr: ' ' # \n measures:\n - name: SKU\n synonyms:\n - ' ' # \n description: ' ' # \n expr: SKU\n data_type: NUMBER\n sample_values:\n - '1'\n - '2'\n - '3'\n" + "name: Another Incredible Semantic Model\ntables:\n - name: ALIAS\n description: some table comment\n base_table:\n database: TEST_DB\n schema: SCHEMA_TEST\n table: ALIAS\n filters:\n - name: ' ' # \n synonyms:\n - ' ' # \n description: ' ' # \n dimensions:\n - name: ZIP_CODE\n synonyms:\n - ' ' # \n description: some column comment\n expr: ZIP_CODE\n data_type: TEXT\n time_dimensions:\n - name: BAD_ALIAS\n synonyms:\n - ' ' # \n description: ' ' # \n expr: BAD_ALIAS\n data_type: TIMESTAMP\n measures:\n - name: AREA_CODE\n synonyms:\n - ' ' # \n description: ' ' # \n expr: AREA_CODE\n data_type: NUMBER\n - name: CBSA\n synonyms:\n - ' ' # \n description: ' ' # \n expr: CBSA\n data_type: NUMBER\n - name: PRODUCTS\n description: ' ' # \n base_table:\n database: A_DIFFERENT_DATABASE\n schema: A_DIFFERENT_SCHEMA\n table: PRODUCTS\n filters:\n - name: ' ' # \n synonyms:\n - ' ' # \n description: ' ' # \n measures:\n - name: SKU\n synonyms:\n - ' ' # \n description: ' ' # \n expr: SKU\n data_type: NUMBER\n sample_values:\n - '1'\n - '2'\n - '3'\n" ) @@ -494,7 +494,7 @@ def test_generate_base_context_from_table_that_has_too_long_context( mock_file.assert_called_once_with(output_path, "w") mock_logger.warning.assert_called_once_with( - "WARNING 🚨: The Semantic model is too large. \n Passed size is 26867 characters. We need you to remove 784 characters in your semantic model. Please check: \n (1) If you have long descriptions that can be truncated. \n (2) If you can remove some columns that are not used within your tables. \n (3) If you have extra tables you do not need. \n (4) If you can remove sample values. \n Once you've finished updating, please validate your semantic model." + "WARNING 🚨: The Semantic model is too large. \n Passed size is 26848 characters. We need you to remove 768 characters in your semantic model. Please check: \n (1) If you have long descriptions that can be truncated. \n (2) If you can remove some columns that are not used within your tables. \n (3) If you have extra tables you do not need. \n (4) If you can remove sample values. \n Once you've finished updating, please validate your semantic model." ) mock_file.assert_called_once_with(output_path, "w") From beb29aa73e2d89428bbf4eb7ad88f9bcab79180a Mon Sep 17 00:00:00 2001 From: Jonathan Date: Tue, 25 Jun 2024 10:14:33 -0700 Subject: [PATCH 2/3] Update test for valid filter --- semantic_model_generator/tests/samples/validate_yamls.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/semantic_model_generator/tests/samples/validate_yamls.py b/semantic_model_generator/tests/samples/validate_yamls.py index 893688a0..5ae8796b 100644 --- a/semantic_model_generator/tests/samples/validate_yamls.py +++ b/semantic_model_generator/tests/samples/validate_yamls.py @@ -5,6 +5,12 @@ database: AUTOSQL_DATASET_BIRD_V2 schema: ADDRESS table: ALIAS + filters: + - name: ' ' # + synonyms: + - ' ' # + description: ' ' # + expr: ' ' # dimensions: - name: ALIAS synonyms: From 2755b1711047be3039458b62b28c2d5222111da8 Mon Sep 17 00:00:00 2001 From: Jonathan Date: Tue, 25 Jun 2024 10:28:15 -0700 Subject: [PATCH 3/3] update --- semantic_model_generator/generate_model.py | 1 + semantic_model_generator/tests/generate_model_test.py | 8 ++++---- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/semantic_model_generator/generate_model.py b/semantic_model_generator/generate_model.py index 5271069c..8e924a5b 100644 --- a/semantic_model_generator/generate_model.py +++ b/semantic_model_generator/generate_model.py @@ -30,6 +30,7 @@ def _get_placeholder_filter() -> List[semantic_model_pb2.NamedFilter]: name=_PLACEHOLDER_COMMENT, synonyms=[_PLACEHOLDER_COMMENT], description=_PLACEHOLDER_COMMENT, + expr=_PLACEHOLDER_COMMENT, ) ] diff --git a/semantic_model_generator/tests/generate_model_test.py b/semantic_model_generator/tests/generate_model_test.py index 5bb2089d..d5362b3d 100644 --- a/semantic_model_generator/tests/generate_model_test.py +++ b/semantic_model_generator/tests/generate_model_test.py @@ -314,7 +314,7 @@ def mock_dependencies_exceed_context(mock_snowflake_connection): def test_raw_schema_to_semantic_context( mock_dependencies, mock_snowflake_connection, mock_snowflake_connection_env ): - want_yaml = "name: this is the best semantic model ever\ntables:\n - name: ALIAS\n description: some table comment\n base_table:\n database: TEST_DB\n schema: SCHEMA_TEST\n table: ALIAS\n filters:\n - name: ' '\n synonyms:\n - ' '\n description: ' '\n dimensions:\n - name: ZIP_CODE\n synonyms:\n - ' '\n description: some column comment\n expr: ZIP_CODE\n data_type: TEXT\n time_dimensions:\n - name: BAD_ALIAS\n synonyms:\n - ' '\n description: ' '\n expr: BAD_ALIAS\n data_type: TIMESTAMP\n measures:\n - name: AREA_CODE\n synonyms:\n - ' '\n description: ' '\n expr: AREA_CODE\n data_type: NUMBER\n - name: CBSA\n synonyms:\n - ' '\n description: ' '\n expr: CBSA\n data_type: NUMBER\n" + want_yaml = "name: this is the best semantic model ever\ntables:\n - name: ALIAS\n description: some table comment\n base_table:\n database: TEST_DB\n schema: SCHEMA_TEST\n table: ALIAS\n filters:\n - name: ' '\n synonyms:\n - ' '\n description: ' '\n expr: ' '\n dimensions:\n - name: ZIP_CODE\n synonyms:\n - ' '\n description: some column comment\n expr: ZIP_CODE\n data_type: TEXT\n time_dimensions:\n - name: BAD_ALIAS\n synonyms:\n - ' '\n description: ' '\n expr: BAD_ALIAS\n data_type: TIMESTAMP\n measures:\n - name: AREA_CODE\n synonyms:\n - ' '\n description: ' '\n expr: AREA_CODE\n data_type: NUMBER\n - name: CBSA\n synonyms:\n - ' '\n description: ' '\n expr: CBSA\n data_type: NUMBER\n" snowflake_account = "test_account" base_tables = ["test_db.schema_test.ALIAS"] @@ -365,7 +365,7 @@ def test_generate_base_context_with_placeholder_comments( mock_file.assert_called_once_with(output_path, "w") # Assert file save called with placeholder comments added. mock_file().write.assert_called_once_with( - "name: my awesome semantic model\ntables:\n - name: ALIAS\n description: some table comment\n base_table:\n database: TEST_DB\n schema: SCHEMA_TEST\n table: ALIAS\n filters:\n - name: ' ' # \n synonyms:\n - ' ' # \n description: ' ' # \n dimensions:\n - name: ZIP_CODE\n synonyms:\n - ' ' # \n description: some column comment\n expr: ZIP_CODE\n data_type: TEXT\n time_dimensions:\n - name: BAD_ALIAS\n synonyms:\n - ' ' # \n description: ' ' # \n expr: BAD_ALIAS\n data_type: TIMESTAMP\n measures:\n - name: AREA_CODE\n synonyms:\n - ' ' # \n description: ' ' # \n expr: AREA_CODE\n data_type: NUMBER\n - name: CBSA\n synonyms:\n - ' ' # \n description: ' ' # \n expr: CBSA\n data_type: NUMBER\n" + "name: my awesome semantic model\ntables:\n - name: ALIAS\n description: some table comment\n base_table:\n database: TEST_DB\n schema: SCHEMA_TEST\n table: ALIAS\n filters:\n - name: ' ' # \n synonyms:\n - ' ' # \n description: ' ' # \n expr: ' ' # \n dimensions:\n - name: ZIP_CODE\n synonyms:\n - ' ' # \n description: some column comment\n expr: ZIP_CODE\n data_type: TEXT\n time_dimensions:\n - name: BAD_ALIAS\n synonyms:\n - ' ' # \n description: ' ' # \n expr: BAD_ALIAS\n data_type: TIMESTAMP\n measures:\n - name: AREA_CODE\n synonyms:\n - ' ' # \n description: ' ' # \n expr: AREA_CODE\n data_type: NUMBER\n - name: CBSA\n synonyms:\n - ' ' # \n description: ' ' # \n expr: CBSA\n data_type: NUMBER\n" ) @@ -395,7 +395,7 @@ def test_generate_base_context_with_placeholder_comments_cross_database_cross_sc # Assert file save called with placeholder comments added along with sample values and cross-database mock_file().write.assert_called_once_with( - "name: Another Incredible Semantic Model\ntables:\n - name: ALIAS\n description: some table comment\n base_table:\n database: TEST_DB\n schema: SCHEMA_TEST\n table: ALIAS\n filters:\n - name: ' ' # \n synonyms:\n - ' ' # \n description: ' ' # \n dimensions:\n - name: ZIP_CODE\n synonyms:\n - ' ' # \n description: some column comment\n expr: ZIP_CODE\n data_type: TEXT\n time_dimensions:\n - name: BAD_ALIAS\n synonyms:\n - ' ' # \n description: ' ' # \n expr: BAD_ALIAS\n data_type: TIMESTAMP\n measures:\n - name: AREA_CODE\n synonyms:\n - ' ' # \n description: ' ' # \n expr: AREA_CODE\n data_type: NUMBER\n - name: CBSA\n synonyms:\n - ' ' # \n description: ' ' # \n expr: CBSA\n data_type: NUMBER\n - name: PRODUCTS\n description: ' ' # \n base_table:\n database: A_DIFFERENT_DATABASE\n schema: A_DIFFERENT_SCHEMA\n table: PRODUCTS\n filters:\n - name: ' ' # \n synonyms:\n - ' ' # \n description: ' ' # \n measures:\n - name: SKU\n synonyms:\n - ' ' # \n description: ' ' # \n expr: SKU\n data_type: NUMBER\n sample_values:\n - '1'\n - '2'\n - '3'\n" + "name: Another Incredible Semantic Model\ntables:\n - name: ALIAS\n description: some table comment\n base_table:\n database: TEST_DB\n schema: SCHEMA_TEST\n table: ALIAS\n filters:\n - name: ' ' # \n synonyms:\n - ' ' # \n description: ' ' # \n expr: ' ' # \n dimensions:\n - name: ZIP_CODE\n synonyms:\n - ' ' # \n description: some column comment\n expr: ZIP_CODE\n data_type: TEXT\n time_dimensions:\n - name: BAD_ALIAS\n synonyms:\n - ' ' # \n description: ' ' # \n expr: BAD_ALIAS\n data_type: TIMESTAMP\n measures:\n - name: AREA_CODE\n synonyms:\n - ' ' # \n description: ' ' # \n expr: AREA_CODE\n data_type: NUMBER\n - name: CBSA\n synonyms:\n - ' ' # \n description: ' ' # \n expr: CBSA\n data_type: NUMBER\n - name: PRODUCTS\n description: ' ' # \n base_table:\n database: A_DIFFERENT_DATABASE\n schema: A_DIFFERENT_SCHEMA\n table: PRODUCTS\n filters:\n - name: ' ' # \n synonyms:\n - ' ' # \n description: ' ' # \n expr: ' ' # \n measures:\n - name: SKU\n synonyms:\n - ' ' # \n description: ' ' # \n expr: SKU\n data_type: NUMBER\n sample_values:\n - '1'\n - '2'\n - '3'\n" ) @@ -494,7 +494,7 @@ def test_generate_base_context_from_table_that_has_too_long_context( mock_file.assert_called_once_with(output_path, "w") mock_logger.warning.assert_called_once_with( - "WARNING 🚨: The Semantic model is too large. \n Passed size is 26848 characters. We need you to remove 768 characters in your semantic model. Please check: \n (1) If you have long descriptions that can be truncated. \n (2) If you can remove some columns that are not used within your tables. \n (3) If you have extra tables you do not need. \n (4) If you can remove sample values. \n Once you've finished updating, please validate your semantic model." + "WARNING 🚨: The Semantic model is too large. \n Passed size is 26867 characters. We need you to remove 784 characters in your semantic model. Please check: \n (1) If you have long descriptions that can be truncated. \n (2) If you can remove some columns that are not used within your tables. \n (3) If you have extra tables you do not need. \n (4) If you can remove sample values. \n Once you've finished updating, please validate your semantic model." ) mock_file.assert_called_once_with(output_path, "w")