From 10d0abc5f79645cf3039ef4b322765c5fb829897 Mon Sep 17 00:00:00 2001 From: Jonathan Date: Mon, 29 Apr 2024 12:31:27 -0700 Subject: [PATCH] Check mismatched quotes, test for uppercase --- .../sqlgen/generate_sql.py | 8 +- .../tests/validate_model_test.py | 146 ++++++++++++++++++ 2 files changed, 152 insertions(+), 2 deletions(-) diff --git a/semantic_model_generator/sqlgen/generate_sql.py b/semantic_model_generator/sqlgen/generate_sql.py index 6006274a..35823f71 100644 --- a/semantic_model_generator/sqlgen/generate_sql.py +++ b/semantic_model_generator/sqlgen/generate_sql.py @@ -51,6 +51,10 @@ def _return_col_or_expr( raise ValueError( f"Column names should not have spaces in them. Passed = {col.name}" ) + if col.name.count('"') % 2 != 0: # Odd number of quotes indicates an issue + raise ValueError( + f"Invalid column name '{col.name}'. Mismatched quotes detected." + ) expr = ( f"{col.expr} as {col.name}" if col.expr.lower() != col.name.lower() @@ -63,8 +67,8 @@ def _return_col_or_expr( columns = [] for dim_col in table.dimensions: columns.append(_return_col_or_expr(dim_col)) - for time_col in table.measures: - columns.append(_return_col_or_expr(time_col)) + for measure_col in table.measures: + columns.append(_return_col_or_expr(measure_col)) for time_dim_col in table.time_dimensions: columns.append(_return_col_or_expr(time_dim_col)) diff --git a/semantic_model_generator/tests/validate_model_test.py b/semantic_model_generator/tests/validate_model_test.py index bd71466e..90ea079c 100644 --- a/semantic_model_generator/tests/validate_model_test.py +++ b/semantic_model_generator/tests/validate_model_test.py @@ -104,6 +104,96 @@ def mock_snowflake_connection(): """ +_INVALID_YAML_UPPERCASE_DEFAULT_AGG = """name: my test semantic model +tables: + - name: ALIAS + base_table: + database: AUTOSQL_DATASET_BIRD_V2 + schema: ADDRESS + table: ALIAS + dimensions: + - name: ALIAS + synonyms: + - 'an alias for something' + expr: ALIAS + data_type: TEXT + sample_values: + - Holtsville + - Adjuntas + - Boqueron + measures: + - name: ZIP_CODE + synonyms: + - 'another synonym' + expr: ZIP_CODE + data_type: NUMBER + sample_values: + - '501' + default_aggregation: AVG + - name: AREA_CODE + base_table: + database: AUTOSQL_DATASET_BIRD_V2 + schema: ADDRESS + table: AREA_CODE + measures: + - name: ZIP_CODE + expr: ZIP_CODE + data_type: NUMBER + sample_values: + - '501' + - '544' + - name: AREA_CODE + expr: AREA_CODE + data_type: NUMBER + sample_values: + - '631' +""" + +_INVALID_YAML_UNMATCHED_QUOTE = """name: my test semantic model +tables: + - name: ALIAS + base_table: + database: AUTOSQL_DATASET_BIRD_V2 + schema: ADDRESS + table: ALIAS + dimensions: + - name: ALIAS + synonyms: + - 'an alias for something' + expr: ALIAS + data_type: TEXT + sample_values: + - Holtsville + - Adjuntas + - Boqueron + measures: + - name: ZIP_CODE + synonyms: + - 'another synonym' + expr: ZIP_CODE + data_type: NUMBER + sample_values: + - '501' + - name: AREA_CODE + base_table: + database: AUTOSQL_DATASET_BIRD_V2 + schema: ADDRESS + table: AREA_CODE + measures: + - name: ZIP_CODE" + expr: ZIP_CODE + data_type: NUMBER + sample_values: + - '501' + - '544' + - name: AREA_CODE + expr: AREA_CODE + data_type: NUMBER + sample_values: + - '631' +""" + + @pytest.fixture def temp_valid_yaml_file(): """Create a temporary YAML file with the test data.""" @@ -122,6 +212,24 @@ def temp_invalid_yaml_formatting_file(): yield tmp.name +@pytest.fixture +def temp_invalid_yaml_uppercase_file(): + """Create a temporary YAML file with the test data.""" + with tempfile.NamedTemporaryFile(mode="w", delete=True) as tmp: + tmp.write(_INVALID_YAML_UPPERCASE_DEFAULT_AGG) + tmp.flush() + yield tmp.name + + +@pytest.fixture +def temp_invalid_yaml_unmatched_quote_file(): + """Create a temporary YAML file with the test data.""" + with tempfile.NamedTemporaryFile(mode="w", delete=True) as tmp: + tmp.write(_INVALID_YAML_UNMATCHED_QUOTE) + tmp.flush() + yield tmp.name + + @mock.patch("semantic_model_generator.validate_model.logger") def test_valid_yaml(mock_logger, temp_valid_yaml_file, mock_snowflake_connection): account_name = "snowflake test" @@ -176,3 +284,41 @@ def test_invalid_yaml_formatting(mock_logger, temp_invalid_yaml_formatting_file) assert ( expected_log_call not in mock_logger.mock_calls ), "Unexpected log message found in logger calls" + + +@mock.patch("semantic_model_generator.validate_model.logger") +def test_invalid_yaml_uppercase(mock_logger, temp_invalid_yaml_uppercase_file): + account_name = "snowflake test" + with pytest.raises(ValueError) as exc_info: + validate(temp_invalid_yaml_uppercase_file, account_name) + + expected_error_fragment = "Unable to parse yaml to protobuf. Error: Failed to parse tables field: Failed to parse measures field: Failed to parse default_aggregation field: Invalid enum value AVG for enum type semantic_model_generator.AggregationType at SemanticModel.tables[0].measures[0].default_aggregation..." + assert expected_error_fragment in str(exc_info.value), "Unexpected error message" + + expected_log_call = mock.call.info( + f"Successfully validated {temp_invalid_yaml_uppercase_file}" + ) + assert ( + expected_log_call not in mock_logger.mock_calls + ), "Unexpected log message found in logger calls" + + +@mock.patch("semantic_model_generator.validate_model.logger") +def test_invalid_yaml_missing_quote( + mock_logger, temp_invalid_yaml_unmatched_quote_file +): + account_name = "snowflake test" + with pytest.raises(ValueError) as exc_info: + validate(temp_invalid_yaml_unmatched_quote_file, account_name) + + expected_error_fragment = "Unable to execute query with your logical table against physical tables on Snowflake. Query = SELECT ALIAS, ZIP_CODE FROM AUTOSQL_DATASET_BIRD_V2.ADDRESS.ALIAS LIMIT 100. Error = Invalid column name 'ZIP_CODE\"'. Mismatched quotes detected." + + assert expected_error_fragment in str(exc_info.value), "Unexpected error message" + + expected_log_call = mock.call.info( + f"Successfully validated {temp_invalid_yaml_unmatched_quote_file}" + ) + + assert ( + expected_log_call not in mock_logger.mock_calls + ), "Unexpected log message found in logger calls"