From e56bf26343f22b83b69212232e5c341d8328f86e Mon Sep 17 00:00:00 2001 From: Jonathan Date: Fri, 3 May 2024 10:39:49 -0700 Subject: [PATCH] physical to base and sample values --- README.md | 45 +++++++++++++++++-- semantic_model_generator/generate_model.py | 18 ++++---- .../tests/generate_model_test.py | 16 +++---- 3 files changed, 58 insertions(+), 21 deletions(-) diff --git a/README.md b/README.md index 41d387d7..7adde515 100644 --- a/README.md +++ b/README.md @@ -73,12 +73,12 @@ python ```python from semantic_model_generator.generate_model import generate_base_semantic_model_from_snowflake -PHYSICAL_TABLES = ['..','..'] +BASE_TABLES = ['..','..'] SNOWFLAKE_ACCOUNT = "" SEMANTIC_MODEL_NAME = "" generate_base_semantic_model_from_snowflake( - physical_tables=PHYSICAL_TABLES, + base_tables=BASE_TABLES, snowflake_account=SNOWFLAKE_ACCOUNT, semantic_model_name=SEMANTIC_MODEL_NAME ) @@ -99,7 +99,7 @@ This is the script version run on the command line. 2. Run on your command line. ```bash python -m semantic_model_generator.generate_model \ - --physical_tables "['..','..']" \ + --base_tables "['..','..']" \ --semantic_model_name "" \ --snowflake_account="" ``` @@ -181,7 +181,7 @@ tables: description: A logical table capturing daily sales information across different store locations and product categories. # The fully qualified name of the underlying physical table. - physical_table: + base_table: database: sales schema: public table: sd_data @@ -195,11 +195,19 @@ tables: description: The category of the product sold. expr: cat unique: false + data_type: NUMBER + sample_values: + - '501' + - '544' - name: store_country description: The country where the sale took place. expr: cntry unique: false + data_type: TEXT + sample_values: + - 'USA' + - 'GBR' - name: sales_channel synonyms: @@ -208,6 +216,10 @@ tables: description: The channel through which the sale was made. expr: chn unique: false + data_type: TEXT + sample_values: + - 'FB' + - 'GOOGLE' time_dimensions: - name: sale_timestamp @@ -217,6 +229,11 @@ tables: description: The time when the sale occurred. In UTC. expr: dt unique: false + data_type: TIMESTAMP + sample_values: + - '2016-09-01 07:30:00' + - '2016-09-01 14:16:00' + - '2016-09-04 09:20:00' measures: - name: sales_amount @@ -226,11 +243,19 @@ tables: description: The total amount of money generated from the sale. expr: amt default_aggregation: sum + data_type: NUMBER + sample_values: + - '11.650000' + - '50.880000' - name: sales_tax description: The sales tax paid for this sale. expr: amt * 0.0975 default_aggregation: sum + data_type: NUMBER + sample_values: + - '51.650000' + - '57.800' - name: units_sold synonyms: @@ -239,11 +264,19 @@ tables: description: The number of units sold in the transaction. expr: unts default_aggregation: sum + data_type: NUMBER + sample_values: + - '1' + - '3' - name: cost description: The cost of the product sold. expr: cst default_aggregation: sum + data_type: NUMBER + sample_values: + - '10' + - '33' - name: profit synonyms: @@ -252,6 +285,10 @@ tables: description: The profit generated from a sale. expr: amt - cst default_aggregation: sum + data_type: NUMBER + sample_values: + - '15' + - '37' # A table can define commonly used filters over it. These filters can then be referenced in user questions directly. diff --git a/semantic_model_generator/generate_model.py b/semantic_model_generator/generate_model.py index ea590885..ec159080 100644 --- a/semantic_model_generator/generate_model.py +++ b/semantic_model_generator/generate_model.py @@ -127,13 +127,13 @@ def _raw_table_to_semantic_context_table( def raw_schema_to_semantic_context( - physical_tables: List[str], snowflake_account: str, semantic_model_name: str + base_tables: List[str], snowflake_account: str, semantic_model_name: str ) -> semantic_model_pb2.SemanticModel: """ Converts a list of fully qualified Snowflake table names into a semantic model. Parameters: - physical_tables (list[str]): Fully qualified table names to include in the semantic model. + base_tables (list[str]): Fully qualified table names to include in the semantic model. - snowflake_account (str): Snowflake account identifier. - semantic_model_name (str): A meaningful semantic model name. @@ -155,7 +155,7 @@ def raw_schema_to_semantic_context( # For FQN tables, create a new snowflake connection per table in case the db/schema is different. table_objects = [] unique_database_schema: List[str] = [] - for table in physical_tables: + for table in base_tables: # Verify this is a valid FQN table. For now, we check that the table follows the following format. # {database}.{schema}.{table} fqn_table = create_fqn_table(table) @@ -253,7 +253,7 @@ def _to_snake_case(s: str) -> str: def generate_base_semantic_model_from_snowflake( - physical_tables: List[str], + base_tables: List[str], snowflake_account: str, semantic_model_name: str, output_yaml_path: Optional[str] = None, @@ -262,7 +262,7 @@ def generate_base_semantic_model_from_snowflake( Generates a base semantic context from specified Snowflake tables and exports it to a YAML file. Parameters: - physical_tables : Fully qualified names of Snowflake tables to include in the semantic context. + base_tables : Fully qualified names of Snowflake tables to include in the semantic context. snowflake_account: Identifier of the Snowflake account. semantic_model_name: The human readable model name. This should be semantically meaningful to an organization. output_yaml_path: Path for the output YAML file. If None, defaults to 'semantic_model_generator/output_models/YYYYMMDDHHMMSS_.yaml'. @@ -280,7 +280,7 @@ def generate_base_semantic_model_from_snowflake( else: # Assume user gives correct path. write_path = output_yaml_path context = raw_schema_to_semantic_context( - physical_tables, + base_tables, snowflake_account=snowflake_account, semantic_model_name=semantic_model_name, ) @@ -301,10 +301,10 @@ def generate_base_semantic_model_from_snowflake( ) parser.add_argument( - "--physical_tables", + "--base_tables", type=list, required=True, - help="The list of fully qualified table names all following the format {database_name}.{schema_name}{table_name}", + help="The list of fully qualified table names all following the format {database_name}.{schema_name}.{table_name}", ) parser.add_argument( "--snowflake_account", @@ -328,7 +328,7 @@ def generate_base_semantic_model_from_snowflake( args = parser.parse_args() generate_base_semantic_model_from_snowflake( - physical_tables=args.physical_tables, + base_tables=args.base_tables, snowflake_account=args.snowflake_account, semantic_model_name=args.semantic_model_name, output_yaml_path=args.output_yaml_path, diff --git a/semantic_model_generator/tests/generate_model_test.py b/semantic_model_generator/tests/generate_model_test.py index e81b72d8..42b13a0d 100644 --- a/semantic_model_generator/tests/generate_model_test.py +++ b/semantic_model_generator/tests/generate_model_test.py @@ -217,11 +217,11 @@ def test_raw_schema_to_semantic_context( want_yaml = "name: this is the best semantic model ever\ntables:\n - name: ALIAS\n description: ' '\n base_table:\n database: test_db\n schema: schema_test\n table: ALIAS\n filters:\n - name: ' '\n synonyms:\n - ' '\n description: ' '\n expr: ' '\n dimensions:\n - name: ZIP_CODE\n synonyms:\n - ' '\n description: ' '\n expr: ZIP_CODE\n data_type: TEXT\n time_dimensions:\n - name: BAD_ALIAS\n synonyms:\n - ' '\n description: ' '\n expr: BAD_ALIAS\n data_type: TIMESTAMP\n measures:\n - name: AREA_CODE\n synonyms:\n - ' '\n description: ' '\n expr: AREA_CODE\n data_type: NUMBER\n - name: CBSA\n synonyms:\n - ' '\n description: ' '\n expr: CBSA\n data_type: NUMBER\n" snowflake_account = "test_account" - physical_tables = ["test_db.schema_test.ALIAS"] + base_tables = ["test_db.schema_test.ALIAS"] semantic_model_name = "this is the best semantic model ever" semantic_model = raw_schema_to_semantic_context( - physical_tables=physical_tables, + base_tables=base_tables, snowflake_account=snowflake_account, semantic_model_name=semantic_model_name, ) @@ -251,13 +251,13 @@ def test_generate_base_context_with_placeholder_comments( mock_snowflake_connection_env, ): - physical_tables = ["test_db.schema_test.ALIAS"] + base_tables = ["test_db.schema_test.ALIAS"] snowflake_account = "test_account" output_path = "output_model_path.yaml" semantic_model_name = "my awesome semantic model" generate_base_semantic_model_from_snowflake( - physical_tables=physical_tables, + base_tables=base_tables, snowflake_account=snowflake_account, output_yaml_path=output_path, semantic_model_name=semantic_model_name, @@ -278,7 +278,7 @@ def test_generate_base_context_with_placeholder_comments_cross_database_cross_sc mock_snowflake_connection_env, ): - physical_tables = [ + base_tables = [ "test_db.schema_test.ALIAS", "a_different_database.a_different_schema.PRODUCTS", ] @@ -287,7 +287,7 @@ def test_generate_base_context_with_placeholder_comments_cross_database_cross_sc semantic_model_name = "Another Incredible Semantic Model" generate_base_semantic_model_from_snowflake( - physical_tables=physical_tables, + base_tables=base_tables, snowflake_account=snowflake_account, output_yaml_path=output_path, semantic_model_name=semantic_model_name, @@ -310,13 +310,13 @@ def test_generate_base_context_with_placeholder_comments_missing_datatype( mock_snowflake_connection_env, ): - physical_tables = ["test_db.schema_test.ALIAS"] + base_tables = ["test_db.schema_test.ALIAS"] snowflake_account = "test_account" output_path = "output_model_path.yaml" semantic_model_name = "Another Incredible Semantic Model with new dtypes" generate_base_semantic_model_from_snowflake( - physical_tables=physical_tables, + base_tables=base_tables, snowflake_account=snowflake_account, output_yaml_path=output_path, semantic_model_name=semantic_model_name,