Skip to content

Commit

Permalink
physical to base and sample values
Browse files Browse the repository at this point in the history
  • Loading branch information
sfc-gh-jhilgart committed May 3, 2024
1 parent da82cd3 commit e56bf26
Show file tree
Hide file tree
Showing 3 changed files with 58 additions and 21 deletions.
45 changes: 41 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -73,12 +73,12 @@ python
```python
from semantic_model_generator.generate_model import generate_base_semantic_model_from_snowflake

PHYSICAL_TABLES = ['<your-database-name-1>.<your-schema-name-1>.<your-physical-table-or-view-name-1>','<your-database-name-2>.<your-schema-name-2>.<your-physical-table-or-view-name-2>']
BASE_TABLES = ['<your-database-name-1>.<your-schema-name-1>.<your-base-table-or-view-name-1>','<your-database-name-2>.<your-schema-name-2>.<your-base-table-or-view-name-2>']
SNOWFLAKE_ACCOUNT = "<your-snowflake-account>"
SEMANTIC_MODEL_NAME = "<a-meaningful-semantic-model-name>"

generate_base_semantic_model_from_snowflake(
physical_tables=PHYSICAL_TABLES,
base_tables=BASE_TABLES,
snowflake_account=SNOWFLAKE_ACCOUNT,
semantic_model_name=SEMANTIC_MODEL_NAME
)
Expand All @@ -99,7 +99,7 @@ This is the script version run on the command line.
2. Run on your command line.
```bash
python -m semantic_model_generator.generate_model \
--physical_tables "['<your-database-name-1>.<your-schema-name-1>.<your-physical-table-or-view-name-1>','<your-database-name-2>.<your-schema-name-2>.<your-physical-table-or-view-name-2>']" \
--base_tables "['<your-database-name-1>.<your-schema-name-1>.<your-base-table-or-view-name-1>','<your-database-name-2>.<your-schema-name-2>.<your-base-table-or-view-name-2>']" \
--semantic_model_name "<a-meaningful-semantic-model-name>" \
--snowflake_account="<your-snowflake-account>"
```
Expand Down Expand Up @@ -181,7 +181,7 @@ tables:
description: A logical table capturing daily sales information across different store locations and product categories.
# The fully qualified name of the underlying physical table.
physical_table:
base_table:
database: sales
schema: public
table: sd_data
Expand All @@ -195,11 +195,19 @@ tables:
description: The category of the product sold.
expr: cat
unique: false
data_type: NUMBER
sample_values:
- '501'
- '544'
- name: store_country
description: The country where the sale took place.
expr: cntry
unique: false
data_type: TEXT
sample_values:
- 'USA'
- 'GBR'
- name: sales_channel
synonyms:
Expand All @@ -208,6 +216,10 @@ tables:
description: The channel through which the sale was made.
expr: chn
unique: false
data_type: TEXT
sample_values:
- 'FB'
- 'GOOGLE'
time_dimensions:
- name: sale_timestamp
Expand All @@ -217,6 +229,11 @@ tables:
description: The time when the sale occurred. In UTC.
expr: dt
unique: false
data_type: TIMESTAMP
sample_values:
- '2016-09-01 07:30:00'
- '2016-09-01 14:16:00'
- '2016-09-04 09:20:00'
measures:
- name: sales_amount
Expand All @@ -226,11 +243,19 @@ tables:
description: The total amount of money generated from the sale.
expr: amt
default_aggregation: sum
data_type: NUMBER
sample_values:
- '11.650000'
- '50.880000'
- name: sales_tax
description: The sales tax paid for this sale.
expr: amt * 0.0975
default_aggregation: sum
data_type: NUMBER
sample_values:
- '51.650000'
- '57.800'
- name: units_sold
synonyms:
Expand All @@ -239,11 +264,19 @@ tables:
description: The number of units sold in the transaction.
expr: unts
default_aggregation: sum
data_type: NUMBER
sample_values:
- '1'
- '3'
- name: cost
description: The cost of the product sold.
expr: cst
default_aggregation: sum
data_type: NUMBER
sample_values:
- '10'
- '33'
- name: profit
synonyms:
Expand All @@ -252,6 +285,10 @@ tables:
description: The profit generated from a sale.
expr: amt - cst
default_aggregation: sum
data_type: NUMBER
sample_values:
- '15'
- '37'
# A table can define commonly used filters over it. These filters can then be referenced in user questions directly.
Expand Down
18 changes: 9 additions & 9 deletions semantic_model_generator/generate_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,13 +127,13 @@ def _raw_table_to_semantic_context_table(


def raw_schema_to_semantic_context(
physical_tables: List[str], snowflake_account: str, semantic_model_name: str
base_tables: List[str], snowflake_account: str, semantic_model_name: str
) -> semantic_model_pb2.SemanticModel:
"""
Converts a list of fully qualified Snowflake table names into a semantic model.
Parameters:
physical_tables (list[str]): Fully qualified table names to include in the semantic model.
base_tables (list[str]): Fully qualified table names to include in the semantic model.
- snowflake_account (str): Snowflake account identifier.
- semantic_model_name (str): A meaningful semantic model name.
Expand All @@ -155,7 +155,7 @@ def raw_schema_to_semantic_context(
# For FQN tables, create a new snowflake connection per table in case the db/schema is different.
table_objects = []
unique_database_schema: List[str] = []
for table in physical_tables:
for table in base_tables:
# Verify this is a valid FQN table. For now, we check that the table follows the following format.
# {database}.{schema}.{table}
fqn_table = create_fqn_table(table)
Expand Down Expand Up @@ -253,7 +253,7 @@ def _to_snake_case(s: str) -> str:


def generate_base_semantic_model_from_snowflake(
physical_tables: List[str],
base_tables: List[str],
snowflake_account: str,
semantic_model_name: str,
output_yaml_path: Optional[str] = None,
Expand All @@ -262,7 +262,7 @@ def generate_base_semantic_model_from_snowflake(
Generates a base semantic context from specified Snowflake tables and exports it to a YAML file.
Parameters:
physical_tables : Fully qualified names of Snowflake tables to include in the semantic context.
base_tables : Fully qualified names of Snowflake tables to include in the semantic context.
snowflake_account: Identifier of the Snowflake account.
semantic_model_name: The human readable model name. This should be semantically meaningful to an organization.
output_yaml_path: Path for the output YAML file. If None, defaults to 'semantic_model_generator/output_models/YYYYMMDDHHMMSS_<semantic_model_name>.yaml'.
Expand All @@ -280,7 +280,7 @@ def generate_base_semantic_model_from_snowflake(
else: # Assume user gives correct path.
write_path = output_yaml_path
context = raw_schema_to_semantic_context(
physical_tables,
base_tables,
snowflake_account=snowflake_account,
semantic_model_name=semantic_model_name,
)
Expand All @@ -301,10 +301,10 @@ def generate_base_semantic_model_from_snowflake(
)

parser.add_argument(
"--physical_tables",
"--base_tables",
type=list,
required=True,
help="The list of fully qualified table names all following the format {database_name}.{schema_name}{table_name}",
help="The list of fully qualified table names all following the format {database_name}.{schema_name}.{table_name}",
)
parser.add_argument(
"--snowflake_account",
Expand All @@ -328,7 +328,7 @@ def generate_base_semantic_model_from_snowflake(
args = parser.parse_args()

generate_base_semantic_model_from_snowflake(
physical_tables=args.physical_tables,
base_tables=args.base_tables,
snowflake_account=args.snowflake_account,
semantic_model_name=args.semantic_model_name,
output_yaml_path=args.output_yaml_path,
Expand Down
16 changes: 8 additions & 8 deletions semantic_model_generator/tests/generate_model_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -217,11 +217,11 @@ def test_raw_schema_to_semantic_context(
want_yaml = "name: this is the best semantic model ever\ntables:\n - name: ALIAS\n description: ' '\n base_table:\n database: test_db\n schema: schema_test\n table: ALIAS\n filters:\n - name: ' '\n synonyms:\n - ' '\n description: ' '\n expr: ' '\n dimensions:\n - name: ZIP_CODE\n synonyms:\n - ' '\n description: ' '\n expr: ZIP_CODE\n data_type: TEXT\n time_dimensions:\n - name: BAD_ALIAS\n synonyms:\n - ' '\n description: ' '\n expr: BAD_ALIAS\n data_type: TIMESTAMP\n measures:\n - name: AREA_CODE\n synonyms:\n - ' '\n description: ' '\n expr: AREA_CODE\n data_type: NUMBER\n - name: CBSA\n synonyms:\n - ' '\n description: ' '\n expr: CBSA\n data_type: NUMBER\n"

snowflake_account = "test_account"
physical_tables = ["test_db.schema_test.ALIAS"]
base_tables = ["test_db.schema_test.ALIAS"]
semantic_model_name = "this is the best semantic model ever"

semantic_model = raw_schema_to_semantic_context(
physical_tables=physical_tables,
base_tables=base_tables,
snowflake_account=snowflake_account,
semantic_model_name=semantic_model_name,
)
Expand Down Expand Up @@ -251,13 +251,13 @@ def test_generate_base_context_with_placeholder_comments(
mock_snowflake_connection_env,
):

physical_tables = ["test_db.schema_test.ALIAS"]
base_tables = ["test_db.schema_test.ALIAS"]
snowflake_account = "test_account"
output_path = "output_model_path.yaml"
semantic_model_name = "my awesome semantic model"

generate_base_semantic_model_from_snowflake(
physical_tables=physical_tables,
base_tables=base_tables,
snowflake_account=snowflake_account,
output_yaml_path=output_path,
semantic_model_name=semantic_model_name,
Expand All @@ -278,7 +278,7 @@ def test_generate_base_context_with_placeholder_comments_cross_database_cross_sc
mock_snowflake_connection_env,
):

physical_tables = [
base_tables = [
"test_db.schema_test.ALIAS",
"a_different_database.a_different_schema.PRODUCTS",
]
Expand All @@ -287,7 +287,7 @@ def test_generate_base_context_with_placeholder_comments_cross_database_cross_sc
semantic_model_name = "Another Incredible Semantic Model"

generate_base_semantic_model_from_snowflake(
physical_tables=physical_tables,
base_tables=base_tables,
snowflake_account=snowflake_account,
output_yaml_path=output_path,
semantic_model_name=semantic_model_name,
Expand All @@ -310,13 +310,13 @@ def test_generate_base_context_with_placeholder_comments_missing_datatype(
mock_snowflake_connection_env,
):

physical_tables = ["test_db.schema_test.ALIAS"]
base_tables = ["test_db.schema_test.ALIAS"]
snowflake_account = "test_account"
output_path = "output_model_path.yaml"
semantic_model_name = "Another Incredible Semantic Model with new dtypes"

generate_base_semantic_model_from_snowflake(
physical_tables=physical_tables,
base_tables=base_tables,
snowflake_account=snowflake_account,
output_yaml_path=output_path,
semantic_model_name=semantic_model_name,
Expand Down

0 comments on commit e56bf26

Please sign in to comment.