From 87612e96af6be5b6c1865e4d992b5b0cc901528a Mon Sep 17 00:00:00 2001 From: sfc-gh-jsummer Date: Wed, 31 Jul 2024 15:34:36 -0500 Subject: [PATCH 01/16] Initial commit --- admin_apps/curate_semantic.py | 77 ++++++++++++++++++++++ admin_apps/journeys/builder.py | 114 ++++++++++++++++++++++----------- 2 files changed, 154 insertions(+), 37 deletions(-) create mode 100644 admin_apps/curate_semantic.py diff --git a/admin_apps/curate_semantic.py b/admin_apps/curate_semantic.py new file mode 100644 index 00000000..872afb73 --- /dev/null +++ b/admin_apps/curate_semantic.py @@ -0,0 +1,77 @@ +from typing import Optional +from snowflake.connector import SnowflakeConnection + + +revise_semantic_prompt = """You are a data analyst tasked with revising a semantic file for your enterprise. +You will receive an initial shell of a semantic file for Cortex Analyst and must update the semantic file using additional metadata files. +The generated Cortex Analyst semantic file MUST adhere to the following documentation: + +{docs} + +Follow the rules below. + +1. Generated descriptions should be concise. +2. Each tablename should correspond to a single logical table in the semantic file. Do not create multiple logical tables for a single tablename. +3. Do not make assumptions about filters. Table samples are not exhaustive of values. + + +{initial_semantic_file} + + +{metadata_files} + +Revised Semantic File: +""" + +def run_cortex_complete(conn: SnowflakeConnection, + model: str, + prompt: str, + prompt_args: Optional[dict] = None) -> str: + + if prompt_args: + prompt = prompt.format(**prompt_args)# .replace("'", "\\'") + complete_sql = f"SELECT snowflake.cortex.complete('{model}', '{prompt}')" + # response = conn.cursor().execute(complete_sql).fetchone()[0] + + # return response + return prompt + +def format_metadata_files(metadata_files: dict) -> str: + metadata_str = "" + for fname, metadata in metadata_files.items(): + metadata_str += f"Filename: {metadata['filename']}\n" + metadata_str += f"Platform: {metadata['platform']}\n" + metadata_str += f"Contents: {metadata['contents']}\n" + return metadata_str + + +def get_cortex_analyst_docs(webpage: str = "https://docs.snowflake.com/LIMITEDACCESS/snowflake-cortex/semantic-model-spec") -> str: + # TODO: Slice the webpage to get the relevant documentation + import requests + from bs4 import BeautifulSoup + + r = requests.get(webpage) + soup = BeautifulSoup(r.text, "html.parser") + article = soup.find("article") + sections = article.find_all('section') + docs = "" + for section in sections: + section_id = section.get('id') + for copybutton in section.find_all(class_='copybutton'): + copybutton.decompose() + if section_id in ['key-concepts', + 'tips-for-creating-a-semantic-model', + 'specification', + 'example-yaml']: + section_text = section.get_text(strip=False) + docs += f"{section_text}\n" + docs = docs.replace("\n\n", "\n") + return docs + +def refine_with_other_metadata(conn: SnowflakeConnection, + model: str = 'mistral-large', + prompt: str = revise_semantic_prompt, + prompt_args: Optional[dict] = None) -> str: + + return run_cortex_complete(conn, model=model, prompt = prompt, prompt_args=prompt_args) + \ No newline at end of file diff --git a/admin_apps/journeys/builder.py b/admin_apps/journeys/builder.py index 0b6cf78d..248426d6 100644 --- a/admin_apps/journeys/builder.py +++ b/admin_apps/journeys/builder.py @@ -5,6 +5,7 @@ from semantic_model_generator.snowflake_utils.snowflake_connector import ( fetch_table_names, ) +from admin_apps.curate_semantic import format_metadata_files, get_cortex_analyst_docs, refine_with_other_metadata @st.cache_resource(show_spinner=False) @@ -27,46 +28,85 @@ def table_selector_dialog() -> None: st.write( "Please fill out the following fields to start building your semantic model." ) - with st.form("table_selector_form"): - model_name = st.text_input("Semantic Model Name") - sample_values = st.selectbox( - "Number of sample values", list(range(1, 11)), index=0 - ) - st.markdown("") + # with st.form("table_selector_form"): + model_name = st.text_input("Semantic Model Name") + sample_values = st.selectbox( + "Number of sample values", list(range(1, 11)), index=0 + ) + st.markdown("") + + if "available_tables" not in st.session_state: + with st.spinner("Loading table definitions..."): + st.session_state["available_tables"] = get_available_tables() - if "available_tables" not in st.session_state: - with st.spinner("Loading table definitions..."): - st.session_state["available_tables"] = get_available_tables() + tables = st.multiselect( + label="Tables", + options=st.session_state["available_tables"], + placeholder="Select the tables you'd like to include in your semantic model.", + ) - tables = st.multiselect( - label="Tables", - options=st.session_state["available_tables"], - placeholder="Select the tables you'd like to include in your semantic model.", - ) - st.markdown("
", unsafe_allow_html=True) - submit = st.form_submit_button( - "Submit", use_container_width=True, type="primary" - ) - if submit: - if not model_name: - st.error("Please provide a name for your semantic model.") - elif not tables: - st.error("Please select at least one table to proceed.") - else: - with st.spinner("Generating model..."): - yaml_str = generate_model_str_from_snowflake( - base_tables=tables, - snowflake_account=st.session_state["account_name"], - semantic_model_name=model_name, - n_sample_values=sample_values, # type: ignore - conn=get_snowflake_connection(), - ) + has_semantic = st.radio("Do you have existing semantic or reporting layers?", ("No", "Yes")) + if has_semantic == "Yes": + metadata_files = st.file_uploader('Upload metadata files', + accept_multiple_files=True) + if metadata_files: + metadata = {} + for uploaded_file in metadata_files: + fname = uploaded_file.name + metadata[fname] = {} + c1, c2, c3 = st.columns(3) + with c1: + st.write(fname) + with c2: + metadata[fname]['filename'] = st.text_input(f"filename_{fname}", + placeholder = "Descriptive filename", + label_visibility="collapsed") + with c3: + metadata[fname]['platform'] = st.text_input(f"platform_{fname}", + placeholder = "Enter source platform", + label_visibility="collapsed") + metadata[fname]['contents'] = uploaded_file.read().decode('utf-8') + st.session_state["metadata_files"] = metadata + else: + st.session_state["metadata_files"] = None + + st.markdown("
", unsafe_allow_html=True) + submit = st.button( + "Submit", use_container_width=True, type="primary" + ) + if submit: + if not model_name: + st.error("Please provide a name for your semantic model.") + elif not tables: + st.error("Please select at least one table to proceed.") + else: + with st.spinner("Generating model..."): + # yaml_str = generate_model_str_from_snowflake( + # base_tables=tables, + # snowflake_account=st.session_state["account_name"], + # semantic_model_name=model_name, + # n_sample_values=sample_values, # type: ignore + # conn=get_snowflake_connection(), + # ) + yaml_str = 'abc' + # Set the YAML session state so that the iteration app has access to the generated contents, + # then proceed to the iteration screen. + # st.session_state["yaml"] = yaml_str + # st.session_state["page"] = GeneratorAppScreen.ITERATION + + if st.session_state["metadata_files"]: + metadata_str = format_metadata_files(st.session_state["metadata_files"]) + prompt_args = { + "docs": get_cortex_analyst_docs(), + "initial_semantic_file": yaml_str, + "metadata_files": metadata_str + } + response = refine_with_other_metadata(conn = get_snowflake_connection(), + prompt_args = prompt_args) - # Set the YAML session state so that the iteration app has access to the generated contents, - # then proceed to the iteration screen. - st.session_state["yaml"] = yaml_str - st.session_state["page"] = GeneratorAppScreen.ITERATION - st.rerun() + # st.write(response) + st.text_area("Prompt", response, height=500) + # st.rerun() def show() -> None: From 722c02317bffafc49d801ae9fc84537420b5ddd4 Mon Sep 17 00:00:00 2001 From: sfc-gh-jsummer Date: Thu, 1 Aug 2024 11:43:20 -0500 Subject: [PATCH 02/16] Add initial curation process --- admin_apps/curate_semantic.py | 17 ++++++++----- admin_apps/journeys/builder.py | 46 +++++++++++++++++++--------------- 2 files changed, 37 insertions(+), 26 deletions(-) diff --git a/admin_apps/curate_semantic.py b/admin_apps/curate_semantic.py index 872afb73..d0aa720b 100644 --- a/admin_apps/curate_semantic.py +++ b/admin_apps/curate_semantic.py @@ -1,5 +1,6 @@ from typing import Optional from snowflake.connector import SnowflakeConnection +from snowflake.snowpark.exceptions import SnowparkSQLException revise_semantic_prompt = """You are a data analyst tasked with revising a semantic file for your enterprise. @@ -29,12 +30,11 @@ def run_cortex_complete(conn: SnowflakeConnection, prompt_args: Optional[dict] = None) -> str: if prompt_args: - prompt = prompt.format(**prompt_args)# .replace("'", "\\'") + prompt = prompt.format(**prompt_args).replace("'", "\\'") complete_sql = f"SELECT snowflake.cortex.complete('{model}', '{prompt}')" - # response = conn.cursor().execute(complete_sql).fetchone()[0] + response = conn.cursor().execute(complete_sql).fetchone()[0] - # return response - return prompt + return response def format_metadata_files(metadata_files: dict) -> str: metadata_str = "" @@ -73,5 +73,10 @@ def refine_with_other_metadata(conn: SnowflakeConnection, prompt: str = revise_semantic_prompt, prompt_args: Optional[dict] = None) -> str: - return run_cortex_complete(conn, model=model, prompt = prompt, prompt_args=prompt_args) - \ No newline at end of file + error = '' # Used as a flag to enable builder workflow to continue with prior state + try: + response = run_cortex_complete(conn, model=model, prompt = prompt, prompt_args=prompt_args) + return response, error + except Exception as e: + error = f'Error encountered: {str(e)}' + return '', error \ No newline at end of file diff --git a/admin_apps/journeys/builder.py b/admin_apps/journeys/builder.py index 248426d6..367b4b88 100644 --- a/admin_apps/journeys/builder.py +++ b/admin_apps/journeys/builder.py @@ -81,32 +81,38 @@ def table_selector_dialog() -> None: st.error("Please select at least one table to proceed.") else: with st.spinner("Generating model..."): - # yaml_str = generate_model_str_from_snowflake( - # base_tables=tables, - # snowflake_account=st.session_state["account_name"], - # semantic_model_name=model_name, - # n_sample_values=sample_values, # type: ignore - # conn=get_snowflake_connection(), - # ) - yaml_str = 'abc' - # Set the YAML session state so that the iteration app has access to the generated contents, - # then proceed to the iteration screen. - # st.session_state["yaml"] = yaml_str - # st.session_state["page"] = GeneratorAppScreen.ITERATION - + yaml_str = generate_model_str_from_snowflake( + base_tables=tables, + snowflake_account=st.session_state["account_name"], + semantic_model_name=model_name, + n_sample_values=sample_values, # type: ignore + conn=get_snowflake_connection(), + ) if st.session_state["metadata_files"]: - metadata_str = format_metadata_files(st.session_state["metadata_files"]) + metadata_str = format_metadata_files(st.session_state["metadata_files"]) # Make metadata string format-friendly for prompt prompt_args = { - "docs": get_cortex_analyst_docs(), + "docs": get_cortex_analyst_docs(), # Scrape semantic file docs and pass as string "initial_semantic_file": yaml_str, "metadata_files": metadata_str } - response = refine_with_other_metadata(conn = get_snowflake_connection(), - prompt_args = prompt_args) + response, curate_error = refine_with_other_metadata(conn = get_snowflake_connection(), + prompt_args = prompt_args) + if curate_error: + st.warning(f"There was an error curating the semantic model. {curate_error}") + st.session_state["yaml"] = yaml_str + else: + st.session_state["yaml"] = response + st.text_area(response, response, height=500) + + else: + st.session_state["yaml"] = yaml_str + + # Set the YAML session state so that the iteration app has access to the generated contents, + # then proceed to the iteration screen. + st.session_state["page"] = GeneratorAppScreen.ITERATION + - # st.write(response) - st.text_area("Prompt", response, height=500) - # st.rerun() + st.rerun() def show() -> None: From c7a5368beb6c2d7ed1e0829a68258b7f893c2318 Mon Sep 17 00:00:00 2001 From: sfc-gh-jsummer Date: Sat, 10 Aug 2024 13:07:29 -0500 Subject: [PATCH 03/16] Pivot to programmatic translation for dbt --- admin_apps/curate_semantic.py | 82 ------------------------------ admin_apps/journeys/builder.py | 55 +++++--------------- admin_apps/journeys/iteration.py | 43 +++++++++++++++- admin_apps/partner_semantic.py | 86 ++++++++++++++++++++++++++++++++ admin_apps/shared_utils.py | 19 +++++++ 5 files changed, 158 insertions(+), 127 deletions(-) delete mode 100644 admin_apps/curate_semantic.py create mode 100644 admin_apps/partner_semantic.py diff --git a/admin_apps/curate_semantic.py b/admin_apps/curate_semantic.py deleted file mode 100644 index d0aa720b..00000000 --- a/admin_apps/curate_semantic.py +++ /dev/null @@ -1,82 +0,0 @@ -from typing import Optional -from snowflake.connector import SnowflakeConnection -from snowflake.snowpark.exceptions import SnowparkSQLException - - -revise_semantic_prompt = """You are a data analyst tasked with revising a semantic file for your enterprise. -You will receive an initial shell of a semantic file for Cortex Analyst and must update the semantic file using additional metadata files. -The generated Cortex Analyst semantic file MUST adhere to the following documentation: - -{docs} - -Follow the rules below. - -1. Generated descriptions should be concise. -2. Each tablename should correspond to a single logical table in the semantic file. Do not create multiple logical tables for a single tablename. -3. Do not make assumptions about filters. Table samples are not exhaustive of values. - - -{initial_semantic_file} - - -{metadata_files} - -Revised Semantic File: -""" - -def run_cortex_complete(conn: SnowflakeConnection, - model: str, - prompt: str, - prompt_args: Optional[dict] = None) -> str: - - if prompt_args: - prompt = prompt.format(**prompt_args).replace("'", "\\'") - complete_sql = f"SELECT snowflake.cortex.complete('{model}', '{prompt}')" - response = conn.cursor().execute(complete_sql).fetchone()[0] - - return response - -def format_metadata_files(metadata_files: dict) -> str: - metadata_str = "" - for fname, metadata in metadata_files.items(): - metadata_str += f"Filename: {metadata['filename']}\n" - metadata_str += f"Platform: {metadata['platform']}\n" - metadata_str += f"Contents: {metadata['contents']}\n" - return metadata_str - - -def get_cortex_analyst_docs(webpage: str = "https://docs.snowflake.com/LIMITEDACCESS/snowflake-cortex/semantic-model-spec") -> str: - # TODO: Slice the webpage to get the relevant documentation - import requests - from bs4 import BeautifulSoup - - r = requests.get(webpage) - soup = BeautifulSoup(r.text, "html.parser") - article = soup.find("article") - sections = article.find_all('section') - docs = "" - for section in sections: - section_id = section.get('id') - for copybutton in section.find_all(class_='copybutton'): - copybutton.decompose() - if section_id in ['key-concepts', - 'tips-for-creating-a-semantic-model', - 'specification', - 'example-yaml']: - section_text = section.get_text(strip=False) - docs += f"{section_text}\n" - docs = docs.replace("\n\n", "\n") - return docs - -def refine_with_other_metadata(conn: SnowflakeConnection, - model: str = 'mistral-large', - prompt: str = revise_semantic_prompt, - prompt_args: Optional[dict] = None) -> str: - - error = '' # Used as a flag to enable builder workflow to continue with prior state - try: - response = run_cortex_complete(conn, model=model, prompt = prompt, prompt_args=prompt_args) - return response, error - except Exception as e: - error = f'Error encountered: {str(e)}' - return '', error \ No newline at end of file diff --git a/admin_apps/journeys/builder.py b/admin_apps/journeys/builder.py index 367b4b88..3f402528 100644 --- a/admin_apps/journeys/builder.py +++ b/admin_apps/journeys/builder.py @@ -1,11 +1,17 @@ import streamlit as st -from admin_apps.shared_utils import GeneratorAppScreen, get_snowflake_connection +from admin_apps.shared_utils import GeneratorAppScreen, get_snowflake_connection, upload_partner_semantic from semantic_model_generator.generate_model import generate_model_str_from_snowflake from semantic_model_generator.snowflake_utils.snowflake_connector import ( fetch_table_names, ) -from admin_apps.curate_semantic import format_metadata_files, get_cortex_analyst_docs, refine_with_other_metadata +from semantic_model_generator.data_processing.proto_utils import ( + yaml_to_semantic_model, +) +from admin_apps.partner_semantic import ( + load_yaml_file, + extract_key_values +) @st.cache_resource(show_spinner=False) @@ -45,31 +51,11 @@ def table_selector_dialog() -> None: placeholder="Select the tables you'd like to include in your semantic model.", ) - has_semantic = st.radio("Do you have existing semantic or reporting layers?", ("No", "Yes")) + has_semantic = st.radio("Do you have an existing semantic for these tables(s) in a partner tool?", ("No", "Yes")) if has_semantic == "Yes": - metadata_files = st.file_uploader('Upload metadata files', - accept_multiple_files=True) - if metadata_files: - metadata = {} - for uploaded_file in metadata_files: - fname = uploaded_file.name - metadata[fname] = {} - c1, c2, c3 = st.columns(3) - with c1: - st.write(fname) - with c2: - metadata[fname]['filename'] = st.text_input(f"filename_{fname}", - placeholder = "Descriptive filename", - label_visibility="collapsed") - with c3: - metadata[fname]['platform'] = st.text_input(f"platform_{fname}", - placeholder = "Enter source platform", - label_visibility="collapsed") - metadata[fname]['contents'] = uploaded_file.read().decode('utf-8') - st.session_state["metadata_files"] = metadata + upload_partner_semantic() else: - st.session_state["metadata_files"] = None - + st.session_state["partner_semantic"] = None st.markdown("
", unsafe_allow_html=True) submit = st.button( "Submit", use_container_width=True, type="primary" @@ -88,30 +74,13 @@ def table_selector_dialog() -> None: n_sample_values=sample_values, # type: ignore conn=get_snowflake_connection(), ) - if st.session_state["metadata_files"]: - metadata_str = format_metadata_files(st.session_state["metadata_files"]) # Make metadata string format-friendly for prompt - prompt_args = { - "docs": get_cortex_analyst_docs(), # Scrape semantic file docs and pass as string - "initial_semantic_file": yaml_str, - "metadata_files": metadata_str - } - response, curate_error = refine_with_other_metadata(conn = get_snowflake_connection(), - prompt_args = prompt_args) - if curate_error: - st.warning(f"There was an error curating the semantic model. {curate_error}") - st.session_state["yaml"] = yaml_str - else: - st.session_state["yaml"] = response - st.text_area(response, response, height=500) - else: - st.session_state["yaml"] = yaml_str + st.session_state["yaml"] = yaml_str # Set the YAML session state so that the iteration app has access to the generated contents, # then proceed to the iteration screen. st.session_state["page"] = GeneratorAppScreen.ITERATION - st.rerun() diff --git a/admin_apps/journeys/iteration.py b/admin_apps/journeys/iteration.py index 00632d55..58f0a6c1 100644 --- a/admin_apps/journeys/iteration.py +++ b/admin_apps/journeys/iteration.py @@ -20,6 +20,7 @@ init_session_states, upload_yaml, validate_and_upload_tmp_yaml, + upload_partner_semantic, ) from semantic_model_generator.data_processing.cte_utils import ( context_to_column_format, @@ -43,6 +44,13 @@ ) from semantic_model_generator.validate_model import validate +from admin_apps.partner_semantic import ( + load_yaml_file, + extract_key_values, + extract_expressions_from_sections, + make_field_df +) + def get_file_name() -> str: return st.session_state.file_name # type: ignore @@ -382,6 +390,28 @@ def upload_handler(file_name: str) -> None: ) upload_handler(new_name) +@st.experimental_dialog(f"Integrate partner tool semantic specs", width="large") +def integrate_partner_semantics() -> None: + # User either came right to iteration app or did not upload partner semantic in builder + if 'partner_semantic' not in st.session_state: + upload_partner_semantic() + # User uploaded in builder or just uploaded while in iteration + if 'partner_semantic' in st.session_state: + st.write("Select which semantic layers to compare.") + c1, c2 = st.columns(2) + with c1: # TODO: Need to parser a semantic file for cortex analyst to run comparison + semantic_snowflake_tbl = st.selectbox("Snowflake", + list(st.session_state.ctx_table_col_expr_dict.keys())) + with c2: + semantic_partner_tbl = st.selectbox("Partner", + extract_key_values(st.session_state["partner_semantic"], 'name')) + if st.button("Compare"): + partner_view = [x for x in st.session_state["partner_semantic"] if x.get('name') == semantic_partner_tbl][0] + partner_fields = extract_expressions_from_sections(partner_view, ['dimensions', 'measures', 'entities']) + partner_fields_df = make_field_df(partner_fields) + st.dataframe(partner_fields_df, hide_index=True, use_container_width=True) + + def update_container( container: DeltaGenerator, content: str, prefix: Optional[str] @@ -441,7 +471,7 @@ def yaml_editor(yaml_str: str) -> None: status_container = st.empty() with button_container: - left, right, _ = st.columns((1, 1, 2)) + left, center, right = st.columns(3) if left.button("Save", use_container_width=True, help=SAVE_HELP): # Validate new content try: @@ -464,12 +494,18 @@ def yaml_editor(yaml_str: str) -> None: ) exception_as_dialog(e) - if right.button( + if center.button( "Upload", use_container_width=True, help=UPLOAD_HELP, ): upload_dialog(content) + if right.button( + "Translate", + use_container_width=True, + help=TRANSLATE_HELP, + ): + integrate_partner_semantics() # Render the validation state (success=True, failed=False, editing=None) in the editor. if st.session_state.validated: @@ -516,6 +552,9 @@ def set_up_requirements() -> None: you think your semantic model is doing great and should be pushed to prod! Note that the semantic model must be validated to be uploaded.""" +TRANSLATE_HELP = """Have an existing semantic layer in a partner tool that's integrated +with Snowflake? Use this feature to integrate partner semantic specs into Cortex Analyst's spec.""" + def show() -> None: init_session_states() diff --git a/admin_apps/partner_semantic.py b/admin_apps/partner_semantic.py new file mode 100644 index 00000000..7d50c2c4 --- /dev/null +++ b/admin_apps/partner_semantic.py @@ -0,0 +1,86 @@ +from typing import Optional +import yaml + +import pandas as pd +from snowflake.connector import SnowflakeConnection +# from snowflake.snowpark.exceptions import SnowparkSQLException + +def unpack_yaml(data): + """ + Recursively unpacks a YAML structure into a Python dictionary. + """ + if isinstance(data, dict): + return {key: unpack_yaml(value) for key, value in data.items()} + elif isinstance(data, list): + return [unpack_yaml(item) for item in data] + else: + return data + +def load_yaml_file(file_paths) -> list[dict]: + """ + Loads one or more YAML files and combines them into a single list. + """ + combined_yaml = [] + for file_path in file_paths: + yaml_content = yaml.safe_load(file_path) + combined_yaml.append(unpack_yaml(yaml_content)) + return combined_yaml + +def extract_key_values(data: list[dict], key: str) -> list[dict]: + """ + Extracts key's value from a list of dictionaries. + """ + result = [] + for item in data: + values = item.get(key, []) + if isinstance(values, list): + result.extend(values) + else: + result.append(values) + return result + +def extract_dbt_models(yaml_data: list[dict]) -> list: + """ + Extracts dbt models from a dictionary of YAML data. + """ + + return [x.get('name', None) for model in yaml_data for x in model.get('semantic_models', None)] + +def extract_expressions_from_sections(data_dict, section_names): + """ + Extracts data in section_names from a dictionary into a nested dictionary: + """ + def extract_key(obj): + return obj.get('expr', obj['name']).lower() + + d = {} + for i in section_names: + d[i] = {extract_key(obj): obj for obj in data_dict.get(i, [])} + + return d + +def make_field_df(fields): + """ + Converts a nested dictionary of fields into a DataFrame. + """ + rows = [] + for section, entity_list in fields.items(): + for field_key, field_details in entity_list.items(): + rows.append({'section': section, + 'field_key': field_key, + 'field_details': field_details + }) + return pd.DataFrame(rows) + + +def run_cortex_complete(conn: SnowflakeConnection, + model: str, + prompt: str, + prompt_args: Optional[dict] = None) -> str: + + if prompt_args: + prompt = prompt.format(**prompt_args).replace("'", "\\'") + complete_sql = f"SELECT snowflake.cortex.complete('{model}', '{prompt}')" + response = conn.cursor().execute(complete_sql).fetchone()[0] + + return response diff --git a/admin_apps/shared_utils.py b/admin_apps/shared_utils.py index 1a37c255..f576ab8b 100644 --- a/admin_apps/shared_utils.py +++ b/admin_apps/shared_utils.py @@ -25,6 +25,11 @@ set_schema, ) +from admin_apps.partner_semantic import ( + load_yaml_file, + extract_key_values +) + SNOWFLAKE_ACCOUNT = os.environ.get("SNOWFLAKE_ACCOUNT_LOCATOR", "") _TMP_FILE_NAME = f"admin_app_temp_model_{datetime.now().strftime('%Y%m%d_%H%M%S')}" @@ -823,6 +828,20 @@ def download_yaml(file_name: str, conn: SnowflakeConnection) -> str: # Read the raw contents from {temp_dir}/{file_name} and return it as a string. yaml_str = temp_file.read() return yaml_str + +def upload_partner_semantic() -> None: + """ + Upload the semantic model to a stage. + """ + partners = [None, "dbt"] + + st.session_state["partner_tool"] = st.selectbox("Select the partner tool", partners) + if st.session_state["partner_tool"] == "dbt": + uploaded_files = st.file_uploader(f'Upload {st.session_state["partner_tool"]} semantic yaml file(s)', + type=['yaml', 'yml'], + accept_multiple_files=True) + if uploaded_files: + st.session_state["partner_semantic"] = extract_key_values(load_yaml_file(uploaded_files), 'semantic_models') @dataclass From 860bed9b19d9015fe0a1208898df39dae4cdf2ed Mon Sep 17 00:00:00 2001 From: sfc-gh-jsummer Date: Tue, 13 Aug 2024 12:40:33 -0500 Subject: [PATCH 04/16] Add working draft of partner merge functionality --- admin_apps/app.py | 9 +- admin_apps/journeys/builder.py | 2 +- admin_apps/journeys/iteration.py | 100 +++++++++++++++--- admin_apps/partner_semantic.py | 83 +++++++++++++++ admin_apps/shared_utils.py | 80 ++++++++++++-- .../data_processing/proto_utils.py | 21 ++++ 6 files changed, 267 insertions(+), 28 deletions(-) diff --git a/admin_apps/app.py b/admin_apps/app.py index bcdce8e4..3b64fd8b 100644 --- a/admin_apps/app.py +++ b/admin_apps/app.py @@ -1,6 +1,9 @@ import streamlit as st from snowflake.connector import DatabaseError +# set_page_config must be run as the first Streamlit command on the page, before any other streamlit imports. +st.set_page_config(layout="wide", page_icon="💬", page_title="Semantic Model Generator") + from admin_apps.shared_utils import GeneratorAppScreen, get_snowflake_connection from semantic_model_generator.snowflake_utils.env_vars import ( SNOWFLAKE_ACCOUNT_LOCATOR, @@ -9,11 +12,9 @@ assert_required_env_vars, ) -# set_page_config must be run as the first Streamlit command on the page, before any other streamlit imports. -st.set_page_config(layout="wide", page_icon="💬", page_title="Semantic Model Generator") -@st.experimental_dialog(title="Setup") +@st.dialog(title="Setup") def env_setup_popup(missing_env_vars: list[str]) -> None: """ Renders a dialog box to prompt the user to set the required environment variables. @@ -28,7 +29,7 @@ def env_setup_popup(missing_env_vars: list[str]) -> None: st.stop() -@st.experimental_dialog(title="Connection Error") +@st.dialog(title="Connection Error") def failed_connection_popup() -> None: """ Renders a dialog box detailing that the credentials provided could not be used to connect to Snowflake. diff --git a/admin_apps/journeys/builder.py b/admin_apps/journeys/builder.py index 3f402528..914bd936 100644 --- a/admin_apps/journeys/builder.py +++ b/admin_apps/journeys/builder.py @@ -25,7 +25,7 @@ def get_available_tables() -> list[str]: return fetch_table_names(get_snowflake_connection()) -@st.experimental_dialog("Selecting your tables", width="large") +@st.dialog("Selecting your tables", width="large") def table_selector_dialog() -> None: """ Renders a dialog box for the user to input the tables they want to use in their semantic model. diff --git a/admin_apps/journeys/iteration.py b/admin_apps/journeys/iteration.py index 58f0a6c1..3dd98fdf 100644 --- a/admin_apps/journeys/iteration.py +++ b/admin_apps/journeys/iteration.py @@ -1,8 +1,10 @@ import json import time from typing import Any, Dict, List, Optional +import yaml import pandas as pd +import numpy as np import requests import sqlglot import streamlit as st @@ -21,6 +23,7 @@ upload_yaml, validate_and_upload_tmp_yaml, upload_partner_semantic, + PartnerCompareRow ) from semantic_model_generator.data_processing.cte_utils import ( context_to_column_format, @@ -31,6 +34,7 @@ from semantic_model_generator.data_processing.proto_utils import ( proto_to_yaml, yaml_to_semantic_model, + proto_to_dict, ) from semantic_model_generator.protos import semantic_model_pb2 from semantic_model_generator.snowflake_utils.env_vars import ( @@ -48,7 +52,8 @@ load_yaml_file, extract_key_values, extract_expressions_from_sections, - make_field_df + make_field_df, + determine_field_section ) @@ -140,7 +145,7 @@ def show_expr_for_ref(message_index: int) -> None: st.dataframe(col_df, hide_index=True, use_container_width=True, height=250) -@st.experimental_dialog("Edit", width="large") +@st.dialog("Edit", width="large") def edit_verified_query( conn: SnowflakeConnection, sql: str, question: str, message_index: int ) -> None: @@ -339,7 +344,7 @@ def chat_and_edit_vqr(_conn: SnowflakeConnection) -> None: st.session_state.active_suggestion = None -@st.experimental_dialog("Upload", width="small") +@st.dialog("Upload", width="small") def upload_dialog(content: str) -> None: def upload_handler(file_name: str) -> None: if not st.session_state.validated and changed_from_last_validated_model(): @@ -390,27 +395,92 @@ def upload_handler(file_name: str) -> None: ) upload_handler(new_name) -@st.experimental_dialog(f"Integrate partner tool semantic specs", width="large") +@st.dialog(f"Integrate partner tool semantic specs", width="large") def integrate_partner_semantics() -> None: + # User either came right to iteration app or did not upload partner semantic in builder if 'partner_semantic' not in st.session_state: upload_partner_semantic() # User uploaded in builder or just uploaded while in iteration if 'partner_semantic' in st.session_state: - st.write("Select which semantic layers to compare.") + # Get cortex semantic file as dictionary + cortex_semantic = proto_to_dict(st.session_state['semantic_model']) + cortex_tables = [i.get('name', None) for i in cortex_semantic['tables']] + partner_tables = extract_key_values(st.session_state["partner_semantic"], 'name') + st.write("Select which logical views to compare.") c1, c2 = st.columns(2) - with c1: # TODO: Need to parser a semantic file for cortex analyst to run comparison - semantic_snowflake_tbl = st.selectbox("Snowflake", - list(st.session_state.ctx_table_col_expr_dict.keys())) + with c1: + semantic_cortex_tbl = st.selectbox("Snowflake", cortex_tables) with c2: - semantic_partner_tbl = st.selectbox("Partner", - extract_key_values(st.session_state["partner_semantic"], 'name')) - if st.button("Compare"): + semantic_partner_tbl = st.selectbox("Partner", partner_tables) + # TO DO add mass selection options + st.session_state['partner_metadata_preference'] = st.selectbox( + "For fields shared in both, select default", + ["Partner", "Cortex"], + index = 0, + help = "Which semantic file should be checked first for necessary metadata. Where metadata is missing, the other semantic file will be checked." + ) + st.session_state['keep_extra_cortex'] = st.toggle("Keep unmatched Cortex fields", + value = True + ) + st.session_state['keep_extra_partner'] = st.toggle("Keep unmatched Partner fields", + value = True + ) + # if st.toggle("Compare Fields"): + with st.expander("Compare Fields", expanded=False): partner_view = [x for x in st.session_state["partner_semantic"] if x.get('name') == semantic_partner_tbl][0] partner_fields = extract_expressions_from_sections(partner_view, ['dimensions', 'measures', 'entities']) partner_fields_df = make_field_df(partner_fields) - st.dataframe(partner_fields_df, hide_index=True, use_container_width=True) + + cortex_view = [x for x in cortex_semantic['tables'] if x.get('name') == semantic_cortex_tbl][0] + cortex_fields = extract_expressions_from_sections(cortex_view, ['dimensions', 'time_dimensions', 'measures']) + cortex_fields_df = make_field_df(cortex_fields) + combined_fields_df = cortex_fields_df.merge(partner_fields_df, on='field_key', how='outer', suffixes=('_cortex', '_partner')).replace(np.nan, None) + # Convert json strings to dict for easier extraction later + for col in ['field_details_cortex', 'field_details_partner']: + combined_fields_df[col] = combined_fields_df[col].apply(lambda x: json.loads(x) if not pd.isnull(x) and not isinstance(x, dict) else x) + + dimensions, measures, time_dimensions = st.container(border=True), st.container(border=True), st.container(border=True) + dimensions.write("Dimensions") + measures.write("Measures") + time_dimensions.write("Time_dimensions") + + dimensions_section, measures_sections, time_dimensions_section = [], [], [] + + for k,v in combined_fields_df.iterrows(): + # Get destination section for cortex analyst semantic file + target_section, target_data_type = determine_field_section( + v['section_cortex'], + v['section_partner'], + v['field_details_cortex'], + v['field_details_partner']) + if target_section == 'dimensions': + with dimensions: + dimensions_section.append({**PartnerCompareRow(row_data=v).render_row(), 'data_type': target_data_type}) + if target_section == 'measures': + with measures: + measures_sections.append({**PartnerCompareRow(row_data=v).render_row(), 'data_type': target_data_type}) + if target_section == 'time_dimensions': + with time_dimensions: + time_dimensions_section.append({**PartnerCompareRow(row_data=v).render_row(), 'data_type': target_data_type}) + if st.button("Integrate"): + # Update fields in cortex semantic model + for i, tbl in enumerate(cortex_semantic['tables']): + if tbl.get('name', None) == semantic_cortex_tbl: + cortex_semantic['tables'][i]['dimensions'] = dimensions_section + cortex_semantic['tables'][i]['measures'] = measures_sections + cortex_semantic['tables'][i]['time_dimensions'] = time_dimensions_section + # Submitted changes to fields will be captured in the yaml editor + # User will need to make necessary modifications there before validating/uploading + try: + st.session_state["yaml"] = yaml.dump(cortex_semantic, sort_keys=False) + st.session_state["semantic_model"] = yaml_to_semantic_model(st.session_state["yaml"]) + st.success("Integration complete! Please validate your semantic model before uploading.") + st.rerun() + except Exception as e: + st.error(f"Integration failed: {e}") + def update_container( @@ -441,7 +511,7 @@ def update_container( container.markdown(content) -@st.experimental_dialog("Error", width="small") +@st.dialog("Error", width="small") def exception_as_dialog(e: Exception) -> None: st.error(f"An error occurred: {e}") @@ -486,7 +556,7 @@ def yaml_editor(yaml_str: str) -> None: ) st.session_state.semantic_model = yaml_to_semantic_model(content) st.session_state.last_saved_yaml = content - st.rerun() + st.rerun() # TO DO: Troubleshoot why this is causing the RerunData(page_script_hash error except Exception as e: st.session_state["validated"] = False update_container( @@ -516,7 +586,7 @@ def yaml_editor(yaml_str: str) -> None: update_container(status_container, "editing", prefix=status_container_title) -@st.experimental_dialog("Welcome to the Iteration app! 💬", width="large") +@st.dialog("Welcome to the Iteration app! 💬", width="large") def set_up_requirements() -> None: """ Collects existing YAML location from the user so that we can download it. diff --git a/admin_apps/partner_semantic.py b/admin_apps/partner_semantic.py index 7d50c2c4..2c8d522e 100644 --- a/admin_apps/partner_semantic.py +++ b/admin_apps/partner_semantic.py @@ -1,5 +1,6 @@ from typing import Optional import yaml +import json import pandas as pd from snowflake.connector import SnowflakeConnection @@ -72,6 +73,87 @@ def make_field_df(fields): }) return pd.DataFrame(rows) +def determine_field_section(section_cortex: str, + section_partner: str, + field_details_cortex: str, + field_details_partner: str): + """ + Derives intended section of field in cortex analyst model. + + Currently expects dbt as source. + """ + + if section_cortex and field_details_cortex: + try: + # field_details_cortex = json.loads(field_details_cortex) + data_type = field_details_cortex.get('data_type', None) + except TypeError: + data_type = 'TEXT' + return (section_cortex, data_type) + else: # No matching cortex field found; field is partner is a novel logical field + if section_partner == 'entities': + section_cortex = 'dimensions' + data_type = 'TEXT' + elif section_partner == 'measures': + section_cortex = 'measures' + data_type = 'NUMBER' + else: # field_details_partner == 'dimensions' + try: + # field_details_partner = json.loads(field_details_partner) + if field_details_partner.get('type') == 'time': + section_cortex = 'time_dimensions' + data_type = 'DATE' + except TypeError: + section_cortex = 'dimensions' + data_type = 'TEXT' + else: + section_cortex = 'dimensions' + data_type = 'TEXT' + return (section_cortex, data_type) + + +def merge_fields(field_key: str, + section_cortex: str, + section_dbt: str, + field_details_cortex: str, + field_details_dbt: str) -> tuple[str, str]: # (section, field_details) + """ + Merges field details from cortex and dbt into a single field returning target section and field details. + """ + # If the field is present in both models, we keep cortex details and add dbt details + if section_cortex and section_dbt: + selected_details = field_details_cortex + for k in ['description', 'name']: + selected_details[k] = field_details_dbt.get(k, field_details_cortex.get(k, None)) + selected_details['expr'] = field_key # Unique key will become expr for cortex + return (section_cortex, selected_details) + + # If field exists in dbt but not in cortex, we add shell to cortex to keep field + elif section_dbt and not section_cortex: + if section_dbt == 'entities': + section_cortex = 'dimensions' + data_type = 'TEXT' + elif section_dbt == 'measures': + section_cortex = 'measures' + data_type = 'NUMBER' + else: # section_dbt == 'dimensions' + if field_details_dbt.get('type') == 'time': + section_cortex = 'time_dimensions' + data_type = 'DATE' + else: + section_cortex = 'dimensions' + data_type = 'TEXT' + return (section_cortex, { + 'name': field_details_dbt.get('name', None), + 'synonyms': [' '], + 'description': field_details_dbt.get('description', None), + 'expr': field_key, + 'data_type': data_type + }) + + else: + return (section_cortex, field_details_cortex) + def run_cortex_complete(conn: SnowflakeConnection, model: str, @@ -84,3 +166,4 @@ def run_cortex_complete(conn: SnowflakeConnection, response = conn.cursor().execute(complete_sql).fetchone()[0] return response + diff --git a/admin_apps/shared_utils.py b/admin_apps/shared_utils.py index f576ab8b..9e7d791f 100644 --- a/admin_apps/shared_utils.py +++ b/admin_apps/shared_utils.py @@ -2,6 +2,7 @@ import os import time +import json from dataclasses import dataclass from datetime import datetime from enum import Enum @@ -130,7 +131,7 @@ def init_session_states() -> None: st.session_state.confirmed_edits = False -@st.experimental_dialog("Edit Dimension") # type: ignore[misc] +@st.dialog("Edit Dimension") # type: ignore[misc] def edit_dimension(table_name: str, dim: semantic_model_pb2.Dimension) -> None: """ Renders a dialog box to edit an existing dimension. @@ -180,7 +181,7 @@ def edit_dimension(table_name: str, dim: semantic_model_pb2.Dimension) -> None: st.rerun() -@st.experimental_dialog("Add Dimension") # type: ignore[misc] +@st.dialog("Add Dimension") # type: ignore[misc] def add_dimension(table: semantic_model_pb2.Table) -> None: """ Renders a dialog box to add a new dimension. @@ -219,7 +220,7 @@ def add_dimension(table: semantic_model_pb2.Table) -> None: st.rerun() -@st.experimental_dialog("Edit Measure") # type: ignore[misc] +@st.dialog("Edit Measure") # type: ignore[misc] def edit_measure(table_name: str, measure: semantic_model_pb2.Measure) -> None: """ Renders a dialog box to edit an existing measure. @@ -292,7 +293,7 @@ def edit_measure(table_name: str, measure: semantic_model_pb2.Measure) -> None: st.rerun() -@st.experimental_dialog("Add Measure") # type: ignore[misc] +@st.dialog("Add Measure") # type: ignore[misc] def add_measure(table: semantic_model_pb2.Table) -> None: """ Renders a dialog box to add a new measure. @@ -352,7 +353,7 @@ def add_measure(table: semantic_model_pb2.Table) -> None: st.rerun() -@st.experimental_dialog("Edit Time Dimension") # type: ignore[misc] +@st.dialog("Edit Time Dimension") # type: ignore[misc] def edit_time_dimension( table_name: str, tdim: semantic_model_pb2.TimeDimension ) -> None: @@ -397,7 +398,7 @@ def edit_time_dimension( st.rerun() -@st.experimental_dialog("Add Time Dimension") # type: ignore[misc] +@st.dialog("Add Time Dimension") # type: ignore[misc] def add_time_dimension(table: semantic_model_pb2.Table) -> None: """ Renders a dialog box to add a new time dimension. @@ -595,7 +596,7 @@ def display_table(table_name: str) -> None: add_time_dimension(table) -@st.experimental_dialog("Add Table") # type: ignore[misc] +@st.dialog("Add Table") # type: ignore[misc] def add_new_table() -> None: """ Renders a dialog box to add a new logical table. @@ -713,7 +714,7 @@ def import_yaml() -> None: st.rerun() -@st.experimental_dialog("Model YAML", width="large") # type: ignore +@st.dialog("Model YAML", width="large") # type: ignore def show_yaml_in_dialog() -> None: yaml = proto_to_yaml(st.session_state.semantic_model) st.code( @@ -843,6 +844,69 @@ def upload_partner_semantic() -> None: if uploaded_files: st.session_state["partner_semantic"] = extract_key_values(load_yaml_file(uploaded_files), 'semantic_models') +class PartnerCompareRow: + def __init__(self, row_data:pd.Series) -> dict: + self.row_data = row_data + + def render_row(self): + radio_options = { + "field_details_cortex": "cortex", + "field_details_partner": "partner", + "remove": "remove"} + with st.container(border=True, height=150): + key_col, detail_col = st.columns((.5, 1)) + if self.row_data["field_details_cortex"]: + cortex_metadata = self.row_data["field_details_cortex"] + else: + cortex_metadata = {} + if self.row_data["field_details_partner"]: + partner_metadata = self.row_data["field_details_partner"] + else: + partner_metadata = {} + with key_col: + st.write(self.row_data["field_key"]) + if cortex_metadata and partner_metadata: + if st.session_state['partner_metadata_preference'] == "Partner": + toggle = "field_details_partner" + elif cortex_metadata: + if st.session_state['keep_extra_cortex']: + toggle = "field_details_cortex" + else: + toggle = "remove" + else: + if st.session_state['keep_extra_partner']: + toggle = "field_details_partner" + else: + toggle = "remove" + toggle_options = ["field_details_cortex","field_details_partner", "remove"] + detail_selection = st.radio("Keep", + index = toggle_options.index(toggle), + options=toggle_options, + format_func= lambda x: radio_options[x], + key=f'row_{self.row_data["field_key"]}', + label_visibility='collapsed') + with detail_col: + if detail_selection == "field_details_cortex": + st.json({k:v for k,v in cortex_metadata.items() if k in ['name', 'description']}) + elif detail_selection == "field_details_partner": + st.json({k:v for k,v in partner_metadata.items() if k in ['name', 'description']}) + else: + pass + # Extract the selected metadata + if detail_selection == "field_details_cortex": + metadata = cortex_metadata + else: # Data type will come after + metadata = dict( + name = partner_metadata.get('name'), + description = partner_metadata.get('description', None), + expr = self.row_data["field_key"], + ) + if metadata.get('description', None): + if cortex_metadata.get('description', None): + metadata['description'] = cortex_metadata.get('description', None) + else: + metadata['description'] = '' # TO DO: Use Cortex to generate description if not found + return metadata @dataclass class AppMetadata: diff --git a/semantic_model_generator/data_processing/proto_utils.py b/semantic_model_generator/data_processing/proto_utils.py index 93bf6346..768b0909 100644 --- a/semantic_model_generator/data_processing/proto_utils.py +++ b/semantic_model_generator/data_processing/proto_utils.py @@ -39,6 +39,27 @@ def proto_to_yaml(message: ProtoMsg) -> str: return yaml_str except Exception as e: raise ValueError(f"Failed to convert protobuf message to YAML: {e}") + +def proto_to_dict(message: ProtoMsg) -> dict: + """Serializes the input proto into a dictionary. + + Args: + message: Protobuf message to be serialized. + + Returns: + The serialized dictionary, or None if an error occurs. + """ + try: + # Convert the Protobuf message to JSON string. + json_str = json_format.MessageToJson(message, preserving_proto_field_name=True) + + # Convert the JSON string to a Python dictionary. + json_data = json.loads(json_str) + + assert isinstance(json_data, dict) + return json_data + except Exception as e: + raise ValueError(f"Failed to convert protobuf message to dictionary: {e}") def yaml_to_semantic_model(yaml_str: str) -> semantic_model_pb2.SemanticModel: From 870bf1131818c43147c2f60251712508f3cc8f90 Mon Sep 17 00:00:00 2001 From: sfc-gh-jsummer Date: Tue, 13 Aug 2024 18:53:42 -0500 Subject: [PATCH 05/16] Refactor comparison module and operation --- admin_apps/journeys/iteration.py | 130 ++++++++++++++++++------------- admin_apps/partner_semantic.py | 12 +++ admin_apps/shared_utils.py | 103 ++++++++++++------------ 3 files changed, 142 insertions(+), 103 deletions(-) diff --git a/admin_apps/journeys/iteration.py b/admin_apps/journeys/iteration.py index 3dd98fdf..458d65b7 100644 --- a/admin_apps/journeys/iteration.py +++ b/admin_apps/journeys/iteration.py @@ -53,7 +53,8 @@ extract_key_values, extract_expressions_from_sections, make_field_df, - determine_field_section + determine_field_section, + create_table_field_df ) @@ -405,7 +406,7 @@ def integrate_partner_semantics() -> None: if 'partner_semantic' in st.session_state: # Get cortex semantic file as dictionary cortex_semantic = proto_to_dict(st.session_state['semantic_model']) - cortex_tables = [i.get('name', None) for i in cortex_semantic['tables']] + cortex_tables = extract_key_values(cortex_semantic['tables'], 'name') partner_tables = extract_key_values(st.session_state["partner_semantic"], 'name') st.write("Select which logical views to compare.") c1, c2 = st.columns(2) @@ -413,40 +414,58 @@ def integrate_partner_semantics() -> None: semantic_cortex_tbl = st.selectbox("Snowflake", cortex_tables) with c2: semantic_partner_tbl = st.selectbox("Partner", partner_tables) - # TO DO add mass selection options + st.session_state['partner_metadata_preference'] = st.selectbox( - "For fields shared in both, select default", + "For fields shared in both, select default source", ["Partner", "Cortex"], index = 0, - help = "Which semantic file should be checked first for necessary metadata. Where metadata is missing, the other semantic file will be checked." + help = COMPARE_SEMANTICS_HELP + ) + orphan_label, orphan_col1, orphan_col2 = st.columns(3) + with orphan_label: + st.write("Keep unmatched fields:") + with orphan_col1: + st.session_state['keep_extra_cortex'] = st.toggle("Cortex",value = True) + with orphan_col2: + st.session_state['keep_extra_partner'] = st.toggle("Partner",value = True) + + with st.expander("Customize by field", expanded=False): + st.caption("Only common metadata fields displayed") + # Create dataframe of each semantic file's fields with mergeable keys + partner_fields_df = create_table_field_df( + semantic_partner_tbl, + ['dimensions', 'measures', 'entities'], + st.session_state["partner_semantic"] + ) + cortex_fields_df = create_table_field_df( + semantic_cortex_tbl, + ['dimensions', 'time_dimensions', 'measures'], + cortex_semantic['tables'] ) - st.session_state['keep_extra_cortex'] = st.toggle("Keep unmatched Cortex fields", - value = True - ) - st.session_state['keep_extra_partner'] = st.toggle("Keep unmatched Partner fields", - value = True - ) - # if st.toggle("Compare Fields"): - with st.expander("Compare Fields", expanded=False): - partner_view = [x for x in st.session_state["partner_semantic"] if x.get('name') == semantic_partner_tbl][0] - partner_fields = extract_expressions_from_sections(partner_view, ['dimensions', 'measures', 'entities']) - partner_fields_df = make_field_df(partner_fields) - - cortex_view = [x for x in cortex_semantic['tables'] if x.get('name') == semantic_cortex_tbl][0] - cortex_fields = extract_expressions_from_sections(cortex_view, ['dimensions', 'time_dimensions', 'measures']) - cortex_fields_df = make_field_df(cortex_fields) - combined_fields_df = cortex_fields_df.merge(partner_fields_df, on='field_key', how='outer', suffixes=('_cortex', '_partner')).replace(np.nan, None) + combined_fields_df = cortex_fields_df.merge( + partner_fields_df, + on='field_key', + how='outer', + suffixes=('_cortex', '_partner')).replace(np.nan, None) # Convert json strings to dict for easier extraction later for col in ['field_details_cortex', 'field_details_partner']: - combined_fields_df[col] = combined_fields_df[col].apply(lambda x: json.loads(x) if not pd.isnull(x) and not isinstance(x, dict) else x) - - dimensions, measures, time_dimensions = st.container(border=True), st.container(border=True), st.container(border=True) - dimensions.write("Dimensions") - measures.write("Measures") - time_dimensions.write("Time_dimensions") + combined_fields_df[col] = combined_fields_df[col].apply(lambda x: + json.loads(x) if not pd.isnull(x) and + not isinstance(x, dict) else x) + # Create containers and store them in a dictionary + containers = { + 'dimensions': st.container(border=True), + 'measures': st.container(border=True), + 'time_dimensions': st.container(border=True) + } + + # Assign labels to the containers + for key in containers.keys(): + containers[key].write(key.replace('_',' ').title()) - dimensions_section, measures_sections, time_dimensions_section = [], [], [] + # Initialize sections as empty lists + sections = {key: [] for key in containers.keys()} for k,v in combined_fields_df.iterrows(): # Get destination section for cortex analyst semantic file @@ -455,31 +474,30 @@ def integrate_partner_semantics() -> None: v['section_partner'], v['field_details_cortex'], v['field_details_partner']) - if target_section == 'dimensions': - with dimensions: - dimensions_section.append({**PartnerCompareRow(row_data=v).render_row(), 'data_type': target_data_type}) - if target_section == 'measures': - with measures: - measures_sections.append({**PartnerCompareRow(row_data=v).render_row(), 'data_type': target_data_type}) - if target_section == 'time_dimensions': - with time_dimensions: - time_dimensions_section.append({**PartnerCompareRow(row_data=v).render_row(), 'data_type': target_data_type}) - if st.button("Integrate"): - # Update fields in cortex semantic model - for i, tbl in enumerate(cortex_semantic['tables']): - if tbl.get('name', None) == semantic_cortex_tbl: - cortex_semantic['tables'][i]['dimensions'] = dimensions_section - cortex_semantic['tables'][i]['measures'] = measures_sections - cortex_semantic['tables'][i]['time_dimensions'] = time_dimensions_section - # Submitted changes to fields will be captured in the yaml editor - # User will need to make necessary modifications there before validating/uploading - try: - st.session_state["yaml"] = yaml.dump(cortex_semantic, sort_keys=False) - st.session_state["semantic_model"] = yaml_to_semantic_model(st.session_state["yaml"]) - st.success("Integration complete! Please validate your semantic model before uploading.") - st.rerun() - except Exception as e: - st.error(f"Integration failed: {e}") + with containers[target_section]: + sections[target_section].append({**PartnerCompareRow(row_data=v).render_row(), + 'data_type': target_data_type}) + + integrate_col, reset_col, _ = st.columns((.2,.2, 1)) + with integrate_col: + if st.button("Integrate", help=INTEGRATE_HELP): + # Update fields in cortex semantic model + for i, tbl in enumerate(cortex_semantic['tables']): + if tbl.get('name', None) == semantic_cortex_tbl: + for k in sections.keys(): + cortex_semantic['tables'][i][k] = sections[k] + # Submitted changes to fields will be captured in the yaml editor + # User will need to make necessary modifications there before validating/uploading + try: + st.session_state["yaml"] = yaml.dump(cortex_semantic, sort_keys=False) + st.session_state["semantic_model"] = yaml_to_semantic_model(st.session_state["yaml"]) + st.success("Integration complete! Please validate your semantic model before uploading.") + st.rerun() + except Exception as e: + st.error(f"Integration failed: {e}") + with reset_col: + if st.button("Back", help="Return to the main iteration screen"): + st.rerun() # Lazy alternative to resetting all configurations @@ -625,6 +643,12 @@ def set_up_requirements() -> None: TRANSLATE_HELP = """Have an existing semantic layer in a partner tool that's integrated with Snowflake? Use this feature to integrate partner semantic specs into Cortex Analyst's spec.""" +COMPARE_SEMANTICS_HELP = """Which semantic file should be checked first for necessary metadata. +Where metadata is missing, the other semantic file will be checked.""" + +INTEGRATE_HELP = """Merge the Cortex Analyst semantic file and Partner semantic file into the +primary Cortex Analyst yaml editor.""" + def show() -> None: init_session_states() diff --git a/admin_apps/partner_semantic.py b/admin_apps/partner_semantic.py index 2c8d522e..c7690f9e 100644 --- a/admin_apps/partner_semantic.py +++ b/admin_apps/partner_semantic.py @@ -73,6 +73,18 @@ def make_field_df(fields): }) return pd.DataFrame(rows) +def create_table_field_df(table_name: str, + sections: list[str], + yaml_data: list[dict]) -> pd.DataFrame: + """ + Extracts sections of table_name in yaml_data dictionary as a DataFrame. + """ + view = [x for x in yaml_data if x.get('name') == table_name][0] + fields = extract_expressions_from_sections(view, sections) + fields_df = make_field_df(fields) + + return fields_df + def determine_field_section(section_cortex: str, section_partner: str, field_details_cortex: str, diff --git a/admin_apps/shared_utils.py b/admin_apps/shared_utils.py index 9e7d791f..8da3b7ee 100644 --- a/admin_apps/shared_utils.py +++ b/admin_apps/shared_utils.py @@ -847,66 +847,69 @@ def upload_partner_semantic() -> None: class PartnerCompareRow: def __init__(self, row_data:pd.Series) -> dict: self.row_data = row_data + self.key = row_data["field_key"] + self.cortex_metadata = self.row_data["field_details_cortex"] if self.row_data["field_details_cortex"] else {} + self.partner_metadata = self.row_data["field_details_partner"] if self.row_data["field_details_partner"] else {} + def render_row(self): - radio_options = { - "field_details_cortex": "cortex", - "field_details_partner": "partner", - "remove": "remove"} - with st.container(border=True, height=150): - key_col, detail_col = st.columns((.5, 1)) - if self.row_data["field_details_cortex"]: - cortex_metadata = self.row_data["field_details_cortex"] + toggle_options = [ + "merged", + "cortex", + "partner", + "remove"] + metadata = {} + + # Create displays for each metadata combination + # Hybrid will merge the 2 based on preference + common_fields = ['name', 'description'] + if self.cortex_metadata and self.partner_metadata: + metadata['merged'] = self.cortex_metadata.copy() + if st.session_state['partner_metadata_preference'] == "Partner": + for n in common_fields: + metadata['merged'][n] = self.partner_metadata.get(n, self.cortex_metadata.get(n, None)) + else: + for n in common_fields: + metadata['merged'][n] = self.cortex_metadata.get(n, self.partner_metadata.get(n, None)) + + else: + metadata['merged'] = {} + metadata['partner'] = {field: self.partner_metadata.get(field) for field in common_fields} if self.partner_metadata else {} + metadata['cortex'] = self.cortex_metadata if self.cortex_metadata else {} + metadata['remove'] = {} + + if metadata['merged']: + toggle_default = 'merged' + elif metadata['partner']: + if st.session_state['keep_extra_partner']: + toggle_default = 'partner' else: - cortex_metadata = {} - if self.row_data["field_details_partner"]: - partner_metadata = self.row_data["field_details_partner"] + toggle_default = 'remove' + elif metadata['cortex']: + if st.session_state['keep_extra_cortex']: + toggle_default = 'cortex' else: - partner_metadata = {} + toggle_default = 'remove' + else: + toggle_default = 'remove' + with st.container(border=True, height=175): + key_col, detail_col = st.columns((.5, 1)) with key_col: - st.write(self.row_data["field_key"]) - if cortex_metadata and partner_metadata: - if st.session_state['partner_metadata_preference'] == "Partner": - toggle = "field_details_partner" - elif cortex_metadata: - if st.session_state['keep_extra_cortex']: - toggle = "field_details_cortex" - else: - toggle = "remove" - else: - if st.session_state['keep_extra_partner']: - toggle = "field_details_partner" - else: - toggle = "remove" - toggle_options = ["field_details_cortex","field_details_partner", "remove"] + st.write(self.key) + # We want to disable non-options but always keep remove option + revised_options = [i for i in toggle_options if metadata[i] or i == 'remove'] detail_selection = st.radio("Keep", - index = toggle_options.index(toggle), - options=toggle_options, - format_func= lambda x: radio_options[x], - key=f'row_{self.row_data["field_key"]}', + index = revised_options.index(toggle_default), + options=revised_options, + key=f'row_{self.key}', label_visibility='collapsed') with detail_col: - if detail_selection == "field_details_cortex": - st.json({k:v for k,v in cortex_metadata.items() if k in ['name', 'description']}) - elif detail_selection == "field_details_partner": - st.json({k:v for k,v in partner_metadata.items() if k in ['name', 'description']}) + if metadata[detail_selection]: + st.json({k:v for k,v in metadata[detail_selection].items() if k in common_fields and v is not None}) else: - pass + st.write("NA") # Extract the selected metadata - if detail_selection == "field_details_cortex": - metadata = cortex_metadata - else: # Data type will come after - metadata = dict( - name = partner_metadata.get('name'), - description = partner_metadata.get('description', None), - expr = self.row_data["field_key"], - ) - if metadata.get('description', None): - if cortex_metadata.get('description', None): - metadata['description'] = cortex_metadata.get('description', None) - else: - metadata['description'] = '' # TO DO: Use Cortex to generate description if not found - return metadata + return metadata[detail_selection] @dataclass class AppMetadata: From 2a063c6f88296cc16014ee038c623364604af584 Mon Sep 17 00:00:00 2001 From: sfc-gh-jsummer Date: Wed, 14 Aug 2024 08:37:52 -0500 Subject: [PATCH 06/16] Upgrade streamlit to current to mitigate dialog rerun errors and dialog warnings --- admin_apps/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/admin_apps/requirements.txt b/admin_apps/requirements.txt index e99bb6e9..be37a7e5 100644 --- a/admin_apps/requirements.txt +++ b/admin_apps/requirements.txt @@ -2,7 +2,7 @@ numpy==1.24.4 urllib3==1.26.19 requests==2.32.3 sqlglot==23.17.0 -streamlit==1.36.0 +streamlit==1.37.1 streamlit-extras==0.4.3 streamlit-monaco==0.1.3 snowflake-connector-python[secure-local-storage]==3.11.0 \ No newline at end of file From 99c18d4720fc7178d22ec74e7bdc66400388b6ed Mon Sep 17 00:00:00 2001 From: sfc-gh-jsummer Date: Wed, 14 Aug 2024 08:38:33 -0500 Subject: [PATCH 07/16] Move modular objects into shared_utils --- admin_apps/journeys/builder.py | 4 - admin_apps/journeys/iteration.py | 132 +----------------- admin_apps/partner_semantic.py | 99 +------------- admin_apps/shared_utils.py | 224 ++++++++++++++++++++++++++++++- 4 files changed, 229 insertions(+), 230 deletions(-) diff --git a/admin_apps/journeys/builder.py b/admin_apps/journeys/builder.py index 914bd936..48673761 100644 --- a/admin_apps/journeys/builder.py +++ b/admin_apps/journeys/builder.py @@ -8,10 +8,6 @@ from semantic_model_generator.data_processing.proto_utils import ( yaml_to_semantic_model, ) -from admin_apps.partner_semantic import ( - load_yaml_file, - extract_key_values -) @st.cache_resource(show_spinner=False) diff --git a/admin_apps/journeys/iteration.py b/admin_apps/journeys/iteration.py index 458d65b7..c3cc5e88 100644 --- a/admin_apps/journeys/iteration.py +++ b/admin_apps/journeys/iteration.py @@ -22,8 +22,7 @@ init_session_states, upload_yaml, validate_and_upload_tmp_yaml, - upload_partner_semantic, - PartnerCompareRow + integrate_partner_semantics ) from semantic_model_generator.data_processing.cte_utils import ( context_to_column_format, @@ -34,7 +33,6 @@ from semantic_model_generator.data_processing.proto_utils import ( proto_to_yaml, yaml_to_semantic_model, - proto_to_dict, ) from semantic_model_generator.protos import semantic_model_pb2 from semantic_model_generator.snowflake_utils.env_vars import ( @@ -48,15 +46,6 @@ ) from semantic_model_generator.validate_model import validate -from admin_apps.partner_semantic import ( - load_yaml_file, - extract_key_values, - extract_expressions_from_sections, - make_field_df, - determine_field_section, - create_table_field_df -) - def get_file_name() -> str: return st.session_state.file_name # type: ignore @@ -396,109 +385,6 @@ def upload_handler(file_name: str) -> None: ) upload_handler(new_name) -@st.dialog(f"Integrate partner tool semantic specs", width="large") -def integrate_partner_semantics() -> None: - - # User either came right to iteration app or did not upload partner semantic in builder - if 'partner_semantic' not in st.session_state: - upload_partner_semantic() - # User uploaded in builder or just uploaded while in iteration - if 'partner_semantic' in st.session_state: - # Get cortex semantic file as dictionary - cortex_semantic = proto_to_dict(st.session_state['semantic_model']) - cortex_tables = extract_key_values(cortex_semantic['tables'], 'name') - partner_tables = extract_key_values(st.session_state["partner_semantic"], 'name') - st.write("Select which logical views to compare.") - c1, c2 = st.columns(2) - with c1: - semantic_cortex_tbl = st.selectbox("Snowflake", cortex_tables) - with c2: - semantic_partner_tbl = st.selectbox("Partner", partner_tables) - - st.session_state['partner_metadata_preference'] = st.selectbox( - "For fields shared in both, select default source", - ["Partner", "Cortex"], - index = 0, - help = COMPARE_SEMANTICS_HELP - ) - orphan_label, orphan_col1, orphan_col2 = st.columns(3) - with orphan_label: - st.write("Keep unmatched fields:") - with orphan_col1: - st.session_state['keep_extra_cortex'] = st.toggle("Cortex",value = True) - with orphan_col2: - st.session_state['keep_extra_partner'] = st.toggle("Partner",value = True) - - with st.expander("Customize by field", expanded=False): - st.caption("Only common metadata fields displayed") - # Create dataframe of each semantic file's fields with mergeable keys - partner_fields_df = create_table_field_df( - semantic_partner_tbl, - ['dimensions', 'measures', 'entities'], - st.session_state["partner_semantic"] - ) - cortex_fields_df = create_table_field_df( - semantic_cortex_tbl, - ['dimensions', 'time_dimensions', 'measures'], - cortex_semantic['tables'] - ) - - combined_fields_df = cortex_fields_df.merge( - partner_fields_df, - on='field_key', - how='outer', - suffixes=('_cortex', '_partner')).replace(np.nan, None) - # Convert json strings to dict for easier extraction later - for col in ['field_details_cortex', 'field_details_partner']: - combined_fields_df[col] = combined_fields_df[col].apply(lambda x: - json.loads(x) if not pd.isnull(x) and - not isinstance(x, dict) else x) - # Create containers and store them in a dictionary - containers = { - 'dimensions': st.container(border=True), - 'measures': st.container(border=True), - 'time_dimensions': st.container(border=True) - } - - # Assign labels to the containers - for key in containers.keys(): - containers[key].write(key.replace('_',' ').title()) - - # Initialize sections as empty lists - sections = {key: [] for key in containers.keys()} - - for k,v in combined_fields_df.iterrows(): - # Get destination section for cortex analyst semantic file - target_section, target_data_type = determine_field_section( - v['section_cortex'], - v['section_partner'], - v['field_details_cortex'], - v['field_details_partner']) - with containers[target_section]: - sections[target_section].append({**PartnerCompareRow(row_data=v).render_row(), - 'data_type': target_data_type}) - - integrate_col, reset_col, _ = st.columns((.2,.2, 1)) - with integrate_col: - if st.button("Integrate", help=INTEGRATE_HELP): - # Update fields in cortex semantic model - for i, tbl in enumerate(cortex_semantic['tables']): - if tbl.get('name', None) == semantic_cortex_tbl: - for k in sections.keys(): - cortex_semantic['tables'][i][k] = sections[k] - # Submitted changes to fields will be captured in the yaml editor - # User will need to make necessary modifications there before validating/uploading - try: - st.session_state["yaml"] = yaml.dump(cortex_semantic, sort_keys=False) - st.session_state["semantic_model"] = yaml_to_semantic_model(st.session_state["yaml"]) - st.success("Integration complete! Please validate your semantic model before uploading.") - st.rerun() - except Exception as e: - st.error(f"Integration failed: {e}") - with reset_col: - if st.button("Back", help="Return to the main iteration screen"): - st.rerun() # Lazy alternative to resetting all configurations - def update_container( @@ -589,9 +475,10 @@ def yaml_editor(yaml_str: str) -> None: ): upload_dialog(content) if right.button( - "Translate", + "Partner Semantic", use_container_width=True, - help=TRANSLATE_HELP, + help=PARTNER_SEMANTIC_HELP, + disabled = not st.session_state["validated"] ): integrate_partner_semantics() @@ -640,14 +527,9 @@ def set_up_requirements() -> None: you think your semantic model is doing great and should be pushed to prod! Note that the semantic model must be validated to be uploaded.""" -TRANSLATE_HELP = """Have an existing semantic layer in a partner tool that's integrated -with Snowflake? Use this feature to integrate partner semantic specs into Cortex Analyst's spec.""" - -COMPARE_SEMANTICS_HELP = """Which semantic file should be checked first for necessary metadata. -Where metadata is missing, the other semantic file will be checked.""" - -INTEGRATE_HELP = """Merge the Cortex Analyst semantic file and Partner semantic file into the -primary Cortex Analyst yaml editor.""" +PARTNER_SEMANTIC_HELP = """Have an existing semantic layer in a partner tool that's integrated +with Snowflake? Use this feature to integrate partner semantic specs into Cortex Analyst's spec. +Note that the Cortex Analyst semantic model must be validated before integrating partner semantics.""" def show() -> None: diff --git a/admin_apps/partner_semantic.py b/admin_apps/partner_semantic.py index c7690f9e..314577b4 100644 --- a/admin_apps/partner_semantic.py +++ b/admin_apps/partner_semantic.py @@ -1,64 +1,14 @@ from typing import Optional import yaml -import json import pandas as pd from snowflake.connector import SnowflakeConnection # from snowflake.snowpark.exceptions import SnowparkSQLException -def unpack_yaml(data): - """ - Recursively unpacks a YAML structure into a Python dictionary. - """ - if isinstance(data, dict): - return {key: unpack_yaml(value) for key, value in data.items()} - elif isinstance(data, list): - return [unpack_yaml(item) for item in data] - else: - return data - -def load_yaml_file(file_paths) -> list[dict]: - """ - Loads one or more YAML files and combines them into a single list. - """ - combined_yaml = [] - for file_path in file_paths: - yaml_content = yaml.safe_load(file_path) - combined_yaml.append(unpack_yaml(yaml_content)) - return combined_yaml -def extract_key_values(data: list[dict], key: str) -> list[dict]: - """ - Extracts key's value from a list of dictionaries. - """ - result = [] - for item in data: - values = item.get(key, []) - if isinstance(values, list): - result.extend(values) - else: - result.append(values) - return result - -def extract_dbt_models(yaml_data: list[dict]) -> list: - """ - Extracts dbt models from a dictionary of YAML data. - """ - - return [x.get('name', None) for model in yaml_data for x in model.get('semantic_models', None)] -def extract_expressions_from_sections(data_dict, section_names): - """ - Extracts data in section_names from a dictionary into a nested dictionary: - """ - def extract_key(obj): - return obj.get('expr', obj['name']).lower() - - d = {} - for i in section_names: - d[i] = {extract_key(obj): obj for obj in data_dict.get(i, [])} - - return d + + def make_field_df(fields): """ @@ -97,7 +47,6 @@ def determine_field_section(section_cortex: str, if section_cortex and field_details_cortex: try: - # field_details_cortex = json.loads(field_details_cortex) data_type = field_details_cortex.get('data_type', None) except TypeError: data_type = 'TEXT' @@ -111,7 +60,6 @@ def determine_field_section(section_cortex: str, data_type = 'NUMBER' else: # field_details_partner == 'dimensions' try: - # field_details_partner = json.loads(field_details_partner) if field_details_partner.get('type') == 'time': section_cortex = 'time_dimensions' data_type = 'DATE' @@ -124,49 +72,6 @@ def determine_field_section(section_cortex: str, return (section_cortex, data_type) -def merge_fields(field_key: str, - section_cortex: str, - section_dbt: str, - field_details_cortex: str, - field_details_dbt: str) -> tuple[str, str]: # (section, field_details) - """ - Merges field details from cortex and dbt into a single field returning target section and field details. - """ - # If the field is present in both models, we keep cortex details and add dbt details - if section_cortex and section_dbt: - selected_details = field_details_cortex - for k in ['description', 'name']: - selected_details[k] = field_details_dbt.get(k, field_details_cortex.get(k, None)) - selected_details['expr'] = field_key # Unique key will become expr for cortex - return (section_cortex, selected_details) - - # If field exists in dbt but not in cortex, we add shell to cortex to keep field - elif section_dbt and not section_cortex: - if section_dbt == 'entities': - section_cortex = 'dimensions' - data_type = 'TEXT' - elif section_dbt == 'measures': - section_cortex = 'measures' - data_type = 'NUMBER' - else: # section_dbt == 'dimensions' - if field_details_dbt.get('type') == 'time': - section_cortex = 'time_dimensions' - data_type = 'DATE' - else: - section_cortex = 'dimensions' - data_type = 'TEXT' - return (section_cortex, { - 'name': field_details_dbt.get('name', None), - 'synonyms': [' '], - 'description': field_details_dbt.get('description', None), - 'expr': field_key, - 'data_type': data_type - }) - - else: - return (section_cortex, field_details_cortex) - - def run_cortex_complete(conn: SnowflakeConnection, model: str, prompt: str, diff --git a/admin_apps/shared_utils.py b/admin_apps/shared_utils.py index 8da3b7ee..dacffa83 100644 --- a/admin_apps/shared_utils.py +++ b/admin_apps/shared_utils.py @@ -3,6 +3,7 @@ import os import time import json +import yaml from dataclasses import dataclass from datetime import datetime from enum import Enum @@ -10,12 +11,14 @@ from typing import Optional import pandas as pd +import numpy as np import streamlit as st from snowflake.connector import SnowflakeConnection from semantic_model_generator.data_processing.proto_utils import ( proto_to_yaml, yaml_to_semantic_model, + proto_to_dict, ) from semantic_model_generator.generate_model import raw_schema_to_semantic_context from semantic_model_generator.protos import semantic_model_pb2 @@ -26,10 +29,6 @@ set_schema, ) -from admin_apps.partner_semantic import ( - load_yaml_file, - extract_key_values -) SNOWFLAKE_ACCOUNT = os.environ.get("SNOWFLAKE_ACCOUNT_LOCATOR", "") _TMP_FILE_NAME = f"admin_app_temp_model_{datetime.now().strftime('%Y%m%d_%H%M%S')}" @@ -830,6 +829,53 @@ def download_yaml(file_name: str, conn: SnowflakeConnection) -> str: yaml_str = temp_file.read() return yaml_str +def unpack_yaml(data): + """ + Recursively unpacks a YAML structure into a Python dictionary. + """ + if isinstance(data, dict): + return {key: unpack_yaml(value) for key, value in data.items()} + elif isinstance(data, list): + return [unpack_yaml(item) for item in data] + else: + return data + +def load_yaml_file(file_paths) -> list[dict]: + """ + Loads one or more YAML files and combines them into a single list. + """ + combined_yaml = [] + for file_path in file_paths: + yaml_content = yaml.safe_load(file_path) + combined_yaml.append(unpack_yaml(yaml_content)) + return combined_yaml + +def extract_key_values(data: list[dict], key: str) -> list[dict]: + """ + Extracts key's value from a list of dictionaries. + """ + result = [] + for item in data: + values = item.get(key, []) + if isinstance(values, list): + result.extend(values) + else: + result.append(values) + return result + +def extract_expressions_from_sections(data_dict, section_names): + """ + Extracts data in section_names from a dictionary into a nested dictionary: + """ + def extract_key(obj): + return obj.get('expr', obj['name']).lower() + + d = {} + for i in section_names: + d[i] = {extract_key(obj): obj for obj in data_dict.get(i, [])} + + return d + def upload_partner_semantic() -> None: """ Upload the semantic model to a stage. @@ -910,6 +956,176 @@ def render_row(self): st.write("NA") # Extract the selected metadata return metadata[detail_selection] + +def make_field_df(fields): + """ + Converts a nested dictionary of fields into a DataFrame. + """ + rows = [] + for section, entity_list in fields.items(): + for field_key, field_details in entity_list.items(): + rows.append({'section': section, + 'field_key': field_key, + 'field_details': field_details + }) + return pd.DataFrame(rows) + +def create_table_field_df(table_name: str, + sections: list[str], + yaml_data: list[dict]) -> pd.DataFrame: + """ + Extracts sections of table_name in yaml_data dictionary as a DataFrame. + """ + view = [x for x in yaml_data if x.get('name') == table_name][0] + fields = extract_expressions_from_sections(view, sections) + fields_df = make_field_df(fields) + + return fields_df + +def determine_field_section(section_cortex: str, + section_partner: str, + field_details_cortex: str, + field_details_partner: str): + """ + Derives intended section of field in cortex analyst model. + + Currently expects dbt as source. + """ + + if section_cortex and field_details_cortex: + try: + data_type = field_details_cortex.get('data_type', None) + except TypeError: + data_type = 'TEXT' + return (section_cortex, data_type) + else: # No matching cortex field found; field is partner is a novel logical field + if section_partner == 'entities': + section_cortex = 'dimensions' + data_type = 'TEXT' + elif section_partner == 'measures': + section_cortex = 'measures' + data_type = 'NUMBER' + else: # field_details_partner == 'dimensions' + try: + if field_details_partner.get('type') == 'time': + section_cortex = 'time_dimensions' + data_type = 'DATE' + except TypeError: + section_cortex = 'dimensions' + data_type = 'TEXT' + else: + section_cortex = 'dimensions' + data_type = 'TEXT' + return (section_cortex, data_type) + +@st.dialog(f"Integrate partner tool semantic specs", width="large") +def integrate_partner_semantics() -> None: + + COMPARE_SEMANTICS_HELP = """Which semantic file should be checked first for necessary metadata. + Where metadata is missing, the other semantic file will be checked.""" + + INTEGRATE_HELP = """Merge the Cortex Analyst semantic file and Partner semantic file into the + primary Cortex Analyst yaml editor.""" + + # User either came right to iteration app or did not upload partner semantic in builder + if 'partner_semantic' not in st.session_state: + upload_partner_semantic() + # User uploaded in builder or just uploaded while in iteration + if 'partner_semantic' in st.session_state: + # Get cortex semantic file as dictionary + cortex_semantic = proto_to_dict(st.session_state['semantic_model']) + cortex_tables = extract_key_values(cortex_semantic['tables'], 'name') + partner_tables = extract_key_values(st.session_state["partner_semantic"], 'name') + st.write("Select which logical views to compare.") + c1, c2 = st.columns(2) + with c1: + semantic_cortex_tbl = st.selectbox("Snowflake", cortex_tables) + with c2: + semantic_partner_tbl = st.selectbox("Partner", partner_tables) + + st.session_state['partner_metadata_preference'] = st.selectbox( + "For fields shared in both, select default source", + ["Partner", "Cortex"], + index = 0, + help = COMPARE_SEMANTICS_HELP + ) + orphan_label, orphan_col1, orphan_col2 = st.columns(3, vertical_alignment="center", gap="small") + with orphan_label: + st.write("Keep unmatched fields:") + with orphan_col1: + st.session_state['keep_extra_cortex'] = st.toggle("Cortex",value = True) + with orphan_col2: + st.session_state['keep_extra_partner'] = st.toggle("Partner",value = True) + + with st.expander("Advanced configuration", expanded=False): + st.caption("Only common metadata fields displayed") + # Create dataframe of each semantic file's fields with mergeable keys + partner_fields_df = create_table_field_df( + semantic_partner_tbl, + ['dimensions', 'measures', 'entities'], + st.session_state["partner_semantic"] + ) + cortex_fields_df = create_table_field_df( + semantic_cortex_tbl, + ['dimensions', 'time_dimensions', 'measures'], + cortex_semantic['tables'] + ) + + combined_fields_df = cortex_fields_df.merge( + partner_fields_df, + on='field_key', + how='outer', + suffixes=('_cortex', '_partner')).replace(np.nan, None) + # Convert json strings to dict for easier extraction later + for col in ['field_details_cortex', 'field_details_partner']: + combined_fields_df[col] = combined_fields_df[col].apply(lambda x: + json.loads(x) if not pd.isnull(x) and + not isinstance(x, dict) else x) + # Create containers and store them in a dictionary + containers = { + 'dimensions': st.container(), + 'measures': st.container(), + 'time_dimensions': st.container() + } + + # Assign labels to the containers + for key in containers.keys(): + containers[key].write(key.replace('_',' ').title()) + + # Initialize sections as empty lists + sections = {key: [] for key in containers.keys()} + + for k,v in combined_fields_df.iterrows(): + # Get destination section for cortex analyst semantic file + target_section, target_data_type = determine_field_section( + v['section_cortex'], + v['section_partner'], + v['field_details_cortex'], + v['field_details_partner']) + with containers[target_section]: + sections[target_section].append({**PartnerCompareRow(row_data=v).render_row(), + 'data_type': target_data_type}) + + integrate_col, reset_col, _ = st.columns((1, 1, 5), gap = "small") + with integrate_col: + if st.button("Merge", help=INTEGRATE_HELP, use_container_width=True): + # Update fields in cortex semantic model + for i, tbl in enumerate(cortex_semantic['tables']): + if tbl.get('name', None) == semantic_cortex_tbl: + for k in sections.keys(): + cortex_semantic['tables'][i][k] = sections[k] + # Submitted changes to fields will be captured in the yaml editor + # User will need to make necessary modifications there before validating/uploading + try: + st.session_state["yaml"] = yaml.dump(cortex_semantic, sort_keys=False) + st.session_state["semantic_model"] = yaml_to_semantic_model(st.session_state["yaml"]) + st.success("Integration complete! Please validate your semantic model before uploading.") + st.rerun() + except Exception as e: + st.error(f"Integration failed: {e}") + with reset_col: + if st.button("Back", help="Return to the main iteration screen", use_container_width=True): + st.rerun() # Lazy alternative to resetting all configurations @dataclass class AppMetadata: From 68ebc306fce369bad32f5c044ddfcd456bf7bba3 Mon Sep 17 00:00:00 2001 From: sfc-gh-jsummer Date: Wed, 14 Aug 2024 08:40:06 -0500 Subject: [PATCH 08/16] Remove partner_semantic sandbox --- admin_apps/partner_semantic.py | 86 ---------------------------------- admin_apps/shared_utils.py | 12 +++++ 2 files changed, 12 insertions(+), 86 deletions(-) delete mode 100644 admin_apps/partner_semantic.py diff --git a/admin_apps/partner_semantic.py b/admin_apps/partner_semantic.py deleted file mode 100644 index 314577b4..00000000 --- a/admin_apps/partner_semantic.py +++ /dev/null @@ -1,86 +0,0 @@ -from typing import Optional -import yaml - -import pandas as pd -from snowflake.connector import SnowflakeConnection -# from snowflake.snowpark.exceptions import SnowparkSQLException - - - - - - -def make_field_df(fields): - """ - Converts a nested dictionary of fields into a DataFrame. - """ - rows = [] - for section, entity_list in fields.items(): - for field_key, field_details in entity_list.items(): - rows.append({'section': section, - 'field_key': field_key, - 'field_details': field_details - }) - return pd.DataFrame(rows) - -def create_table_field_df(table_name: str, - sections: list[str], - yaml_data: list[dict]) -> pd.DataFrame: - """ - Extracts sections of table_name in yaml_data dictionary as a DataFrame. - """ - view = [x for x in yaml_data if x.get('name') == table_name][0] - fields = extract_expressions_from_sections(view, sections) - fields_df = make_field_df(fields) - - return fields_df - -def determine_field_section(section_cortex: str, - section_partner: str, - field_details_cortex: str, - field_details_partner: str): - """ - Derives intended section of field in cortex analyst model. - - Currently expects dbt as source. - """ - - if section_cortex and field_details_cortex: - try: - data_type = field_details_cortex.get('data_type', None) - except TypeError: - data_type = 'TEXT' - return (section_cortex, data_type) - else: # No matching cortex field found; field is partner is a novel logical field - if section_partner == 'entities': - section_cortex = 'dimensions' - data_type = 'TEXT' - elif section_partner == 'measures': - section_cortex = 'measures' - data_type = 'NUMBER' - else: # field_details_partner == 'dimensions' - try: - if field_details_partner.get('type') == 'time': - section_cortex = 'time_dimensions' - data_type = 'DATE' - except TypeError: - section_cortex = 'dimensions' - data_type = 'TEXT' - else: - section_cortex = 'dimensions' - data_type = 'TEXT' - return (section_cortex, data_type) - - -def run_cortex_complete(conn: SnowflakeConnection, - model: str, - prompt: str, - prompt_args: Optional[dict] = None) -> str: - - if prompt_args: - prompt = prompt.format(**prompt_args).replace("'", "\\'") - complete_sql = f"SELECT snowflake.cortex.complete('{model}', '{prompt}')" - response = conn.cursor().execute(complete_sql).fetchone()[0] - - return response - diff --git a/admin_apps/shared_utils.py b/admin_apps/shared_utils.py index dacffa83..606f7b25 100644 --- a/admin_apps/shared_utils.py +++ b/admin_apps/shared_utils.py @@ -1017,6 +1017,18 @@ def determine_field_section(section_cortex: str, section_cortex = 'dimensions' data_type = 'TEXT' return (section_cortex, data_type) + +def run_cortex_complete(conn: SnowflakeConnection, + model: str, + prompt: str, + prompt_args: Optional[dict] = None) -> str: + + if prompt_args: + prompt = prompt.format(**prompt_args).replace("'", "\\'") + complete_sql = f"SELECT snowflake.cortex.complete('{model}', '{prompt}')" + response = conn.cursor().execute(complete_sql).fetchone()[0] + + return response @st.dialog(f"Integrate partner tool semantic specs", width="large") def integrate_partner_semantics() -> None: From 5e7d97130c813113cebaf5521e554ec5bd48a328 Mon Sep 17 00:00:00 2001 From: sfc-gh-jsummer Date: Wed, 14 Aug 2024 09:29:00 -0500 Subject: [PATCH 09/16] Update tooltips and labels --- admin_apps/journeys/iteration.py | 2 +- admin_apps/shared_utils.py | 11 +++++++++-- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/admin_apps/journeys/iteration.py b/admin_apps/journeys/iteration.py index c3cc5e88..d4ffa1b7 100644 --- a/admin_apps/journeys/iteration.py +++ b/admin_apps/journeys/iteration.py @@ -446,7 +446,7 @@ def yaml_editor(yaml_str: str) -> None: with button_container: left, center, right = st.columns(3) - if left.button("Save", use_container_width=True, help=SAVE_HELP): + if left.button("Validate", use_container_width=True, help=SAVE_HELP): # Validate new content try: validate( diff --git a/admin_apps/shared_utils.py b/admin_apps/shared_utils.py index 606f7b25..96d75d79 100644 --- a/admin_apps/shared_utils.py +++ b/admin_apps/shared_utils.py @@ -1032,6 +1032,7 @@ def run_cortex_complete(conn: SnowflakeConnection, @st.dialog(f"Integrate partner tool semantic specs", width="large") def integrate_partner_semantics() -> None: + st.write("Upload semantic files from supported partners to merge with Cortex Analyst semantic model.") COMPARE_SEMANTICS_HELP = """Which semantic file should be checked first for necessary metadata. Where metadata is missing, the other semantic file will be checked.""" @@ -1039,6 +1040,12 @@ def integrate_partner_semantics() -> None: INTEGRATE_HELP = """Merge the Cortex Analyst semantic file and Partner semantic file into the primary Cortex Analyst yaml editor.""" + KEEP_CORTEX_HELP = """Retain fields that are found in Cortex Analyst semantic model + but not in Partner semantic model.""" + + KEEP_PARTNER_HELP = """Retain fields that are found in Partner semantic model + but not in Cortex Analyst semantic model.""" + # User either came right to iteration app or did not upload partner semantic in builder if 'partner_semantic' not in st.session_state: upload_partner_semantic() @@ -1065,9 +1072,9 @@ def integrate_partner_semantics() -> None: with orphan_label: st.write("Keep unmatched fields:") with orphan_col1: - st.session_state['keep_extra_cortex'] = st.toggle("Cortex",value = True) + st.session_state['keep_extra_cortex'] = st.toggle("Cortex",value = True, help = KEEP_CORTEX_HELP) with orphan_col2: - st.session_state['keep_extra_partner'] = st.toggle("Partner",value = True) + st.session_state['keep_extra_partner'] = st.toggle("Partner",value = True, help = KEEP_PARTNER_HELP) with st.expander("Advanced configuration", expanded=False): st.caption("Only common metadata fields displayed") From 788f28442f06b2c294539664be809704d6c691c5 Mon Sep 17 00:00:00 2001 From: sfc-gh-jsummer Date: Wed, 14 Aug 2024 11:18:03 -0500 Subject: [PATCH 10/16] Block activity if user deletes semantic file in dialog --- admin_apps/shared_utils.py | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/admin_apps/shared_utils.py b/admin_apps/shared_utils.py index 96d75d79..b16ace33 100644 --- a/admin_apps/shared_utils.py +++ b/admin_apps/shared_utils.py @@ -881,14 +881,20 @@ def upload_partner_semantic() -> None: Upload the semantic model to a stage. """ partners = [None, "dbt"] + # User may have specified a partner tool in builder module or returning to module + selected_partner = st.session_state.get("partner_tool", None) - st.session_state["partner_tool"] = st.selectbox("Select the partner tool", partners) + st.session_state["partner_tool"] = st.selectbox("Select the partner tool", + partners, + index = partners.index(selected_partner)) if st.session_state["partner_tool"] == "dbt": uploaded_files = st.file_uploader(f'Upload {st.session_state["partner_tool"]} semantic yaml file(s)', type=['yaml', 'yml'], accept_multiple_files=True) if uploaded_files: st.session_state["partner_semantic"] = extract_key_values(load_yaml_file(uploaded_files), 'semantic_models') + else: + st.session_state["partner_semantic"] = None class PartnerCompareRow: def __init__(self, row_data:pd.Series) -> dict: @@ -1046,11 +1052,9 @@ def integrate_partner_semantics() -> None: KEEP_PARTNER_HELP = """Retain fields that are found in Partner semantic model but not in Cortex Analyst semantic model.""" - # User either came right to iteration app or did not upload partner semantic in builder - if 'partner_semantic' not in st.session_state: - upload_partner_semantic() - # User uploaded in builder or just uploaded while in iteration - if 'partner_semantic' in st.session_state: + upload_partner_semantic() # Give user another chance to add/change partner semantic files besides builder + if st.session_state.get('partner_semantic', None) and st.session_state.get('partner_tool', None): + # upload_partner_semantic() # Get cortex semantic file as dictionary cortex_semantic = proto_to_dict(st.session_state['semantic_model']) cortex_tables = extract_key_values(cortex_semantic['tables'], 'name') @@ -1076,7 +1080,8 @@ def integrate_partner_semantics() -> None: with orphan_col2: st.session_state['keep_extra_partner'] = st.toggle("Partner",value = True, help = KEEP_PARTNER_HELP) - with st.expander("Advanced configuration", expanded=False): + with st.expander("Advanced configuration", + expanded=False): st.caption("Only common metadata fields displayed") # Create dataframe of each semantic file's fields with mergeable keys partner_fields_df = create_table_field_df( From 0d35c1c78ffc4cdd462607b682760988e140d8a0 Mon Sep 17 00:00:00 2001 From: sfc-gh-jsummer Date: Wed, 14 Aug 2024 16:56:53 -0500 Subject: [PATCH 11/16] Minimize expander bug on partner semantic upload --- admin_apps/journeys/builder.py | 10 +-- admin_apps/shared_utils.py | 131 ++++++++++++++++++++------------- 2 files changed, 84 insertions(+), 57 deletions(-) diff --git a/admin_apps/journeys/builder.py b/admin_apps/journeys/builder.py index 48673761..0c3fb148 100644 --- a/admin_apps/journeys/builder.py +++ b/admin_apps/journeys/builder.py @@ -47,11 +47,11 @@ def table_selector_dialog() -> None: placeholder="Select the tables you'd like to include in your semantic model.", ) - has_semantic = st.radio("Do you have an existing semantic for these tables(s) in a partner tool?", ("No", "Yes")) - if has_semantic == "Yes": - upload_partner_semantic() - else: - st.session_state["partner_semantic"] = None + # has_semantic = st.radio("Do you have an existing semantic for these tables(s) in a partner tool?", ("No", "Yes")) + # if has_semantic == "Yes": + # upload_partner_semantic() + # else: + # st.session_state["partner_semantic"] = None st.markdown("
", unsafe_allow_html=True) submit = st.button( "Submit", use_container_width=True, type="primary" diff --git a/admin_apps/shared_utils.py b/admin_apps/shared_utils.py index b16ace33..ee5154e3 100644 --- a/admin_apps/shared_utils.py +++ b/admin_apps/shared_utils.py @@ -875,12 +875,22 @@ def extract_key(obj): d[i] = {extract_key(obj): obj for obj in data_dict.get(i, [])} return d - -def upload_partner_semantic() -> None: + +def upload_label(): + if st.session_state.get('my_file_uploader', None): + st.session_state['upload_label'] = ', '.join([i.name for i in st.session_state['my_file_uploader']]) + elif st.session_state.get('uploaded_semantic_files', None): + st.session_state['upload_label'] = ', '.join([i for i in st.session_state['uploaded_semantic_files']]) + else: + st.session_state['upload_label'] = f'Upload {st.session_state["partner_tool"]} semantic yaml file(s)' + + +def upload_partner_semantic() -> bool: """ Upload the semantic model to a stage. """ partners = [None, "dbt"] + uploaded_files = [] # User may have specified a partner tool in builder module or returning to module selected_partner = st.session_state.get("partner_tool", None) @@ -890,11 +900,14 @@ def upload_partner_semantic() -> None: if st.session_state["partner_tool"] == "dbt": uploaded_files = st.file_uploader(f'Upload {st.session_state["partner_tool"]} semantic yaml file(s)', type=['yaml', 'yml'], - accept_multiple_files=True) + accept_multiple_files=True, + key = 'myfile') if uploaded_files: st.session_state["partner_semantic"] = extract_key_values(load_yaml_file(uploaded_files), 'semantic_models') + st.session_state["uploaded_semantic_files"] = [i.name for i in uploaded_files] else: st.session_state["partner_semantic"] = None + return bool(uploaded_files) class PartnerCompareRow: def __init__(self, row_data:pd.Series) -> dict: @@ -903,7 +916,7 @@ def __init__(self, row_data:pd.Series) -> dict: self.cortex_metadata = self.row_data["field_details_cortex"] if self.row_data["field_details_cortex"] else {} self.partner_metadata = self.row_data["field_details_partner"] if self.row_data["field_details_partner"] else {} - + def render_row(self): toggle_options = [ "merged", @@ -912,8 +925,8 @@ def render_row(self): "remove"] metadata = {} - # Create displays for each metadata combination - # Hybrid will merge the 2 based on preference + # Create metadata based for each field given merging or singular semantic file useage of the field + # Merge will merge the 2 based on user-selected preference common_fields = ['name', 'description'] if self.cortex_metadata and self.partner_metadata: metadata['merged'] = self.cortex_metadata.copy() @@ -944,24 +957,31 @@ def render_row(self): toggle_default = 'remove' else: toggle_default = 'remove' - with st.container(border=True, height=175): - key_col, detail_col = st.columns((.5, 1)) - with key_col: - st.write(self.key) - # We want to disable non-options but always keep remove option - revised_options = [i for i in toggle_options if metadata[i] or i == 'remove'] - detail_selection = st.radio("Keep", - index = revised_options.index(toggle_default), - options=revised_options, - key=f'row_{self.key}', - label_visibility='collapsed') - with detail_col: - if metadata[detail_selection]: - st.json({k:v for k,v in metadata[detail_selection].items() if k in common_fields and v is not None}) - else: - st.write("NA") + + # with st.container(border=True, + # height=175): + key_col, detail_col = st.columns((.5, 1)) + with key_col: + st.write(self.key) + # We want to disable non-options but always keep remove option + revised_options = [i for i in toggle_options if metadata[i] or i == 'remove'] + detail_selection = st.radio("Keep", + index = revised_options.index(toggle_default), + options=revised_options, + key=f'row_{self.key}', + format_func=lambda x: x.capitalize(), + label_visibility='collapsed') + with detail_col: + if metadata[detail_selection]: + st.json({k:v for k,v in metadata[detail_selection].items() if k in common_fields and v is not None}) + else: + st.write("NA") + st.divider() # Extract the selected metadata - return metadata[detail_selection] + selected_metadata = metadata[detail_selection] + # Add expr to selected metadata if it's not included which is the case for dbt + selected_metadata['expr'] = self.key + return selected_metadata def make_field_df(fields): """ @@ -1051,15 +1071,20 @@ def integrate_partner_semantics() -> None: KEEP_PARTNER_HELP = """Retain fields that are found in Partner semantic model but not in Cortex Analyst semantic model.""" + + uploaded_files = upload_partner_semantic() # Give user another chance to add/change partner semantic files besides builder + st.divider() + if ( + st.session_state.get('partner_semantic', None) and + st.session_state.get('partner_tool', None) and + st.session_state.get('uploaded_semantic_files', None) + ): - upload_partner_semantic() # Give user another chance to add/change partner semantic files besides builder - if st.session_state.get('partner_semantic', None) and st.session_state.get('partner_tool', None): - # upload_partner_semantic() # Get cortex semantic file as dictionary cortex_semantic = proto_to_dict(st.session_state['semantic_model']) cortex_tables = extract_key_values(cortex_semantic['tables'], 'name') partner_tables = extract_key_values(st.session_state["partner_semantic"], 'name') - st.write("Select which logical views to compare.") + st.write("Select which logical views to compare and merge.") c1, c2 = st.columns(2) with c1: semantic_cortex_tbl = st.selectbox("Snowflake", cortex_tables) @@ -1067,22 +1092,20 @@ def integrate_partner_semantics() -> None: semantic_partner_tbl = st.selectbox("Partner", partner_tables) st.session_state['partner_metadata_preference'] = st.selectbox( - "For fields shared in both, select default source", + "For fields shared in both sources, which source should be checked first for common metadata?", ["Partner", "Cortex"], index = 0, help = COMPARE_SEMANTICS_HELP ) orphan_label, orphan_col1, orphan_col2 = st.columns(3, vertical_alignment="center", gap="small") with orphan_label: - st.write("Keep unmatched fields:") + st.write("Retain unmatched fields:") with orphan_col1: st.session_state['keep_extra_cortex'] = st.toggle("Cortex",value = True, help = KEEP_CORTEX_HELP) with orphan_col2: st.session_state['keep_extra_partner'] = st.toggle("Partner",value = True, help = KEEP_PARTNER_HELP) - - with st.expander("Advanced configuration", - expanded=False): - st.caption("Only common metadata fields displayed") + with st.expander("Advanced configuration", expanded=False): + st.caption("Only shared metadata information displayed") # Create dataframe of each semantic file's fields with mergeable keys partner_fields_df = create_table_field_df( semantic_partner_tbl, @@ -1094,7 +1117,6 @@ def integrate_partner_semantics() -> None: ['dimensions', 'time_dimensions', 'measures'], cortex_semantic['tables'] ) - combined_fields_df = cortex_fields_df.merge( partner_fields_df, on='field_key', @@ -1112,7 +1134,7 @@ def integrate_partner_semantics() -> None: 'time_dimensions': st.container() } - # Assign labels to the containers + # Assign labels to the containers for key in containers.keys(): containers[key].write(key.replace('_',' ').title()) @@ -1132,24 +1154,29 @@ def integrate_partner_semantics() -> None: integrate_col, reset_col, _ = st.columns((1, 1, 5), gap = "small") with integrate_col: - if st.button("Merge", help=INTEGRATE_HELP, use_container_width=True): - # Update fields in cortex semantic model - for i, tbl in enumerate(cortex_semantic['tables']): - if tbl.get('name', None) == semantic_cortex_tbl: - for k in sections.keys(): - cortex_semantic['tables'][i][k] = sections[k] - # Submitted changes to fields will be captured in the yaml editor - # User will need to make necessary modifications there before validating/uploading - try: - st.session_state["yaml"] = yaml.dump(cortex_semantic, sort_keys=False) - st.session_state["semantic_model"] = yaml_to_semantic_model(st.session_state["yaml"]) - st.success("Integration complete! Please validate your semantic model before uploading.") - st.rerun() - except Exception as e: - st.error(f"Integration failed: {e}") + merge_button = st.button("Merge", help=INTEGRATE_HELP, use_container_width=True) with reset_col: - if st.button("Back", help="Return to the main iteration screen", use_container_width=True): - st.rerun() # Lazy alternative to resetting all configurations + reset_button = st.button("Back", help="Return to the main iteration screen", use_container_width=True) + + if merge_button: + # Update fields in cortex semantic model + for i, tbl in enumerate(cortex_semantic['tables']): + if tbl.get('name', None) == semantic_cortex_tbl: + for k in sections.keys(): + cortex_semantic['tables'][i][k] = sections[k] + # Submitted changes to fields will be captured in the yaml editor + # User will need to make necessary modifications there before validating/uploading + try: + st.session_state["yaml"] = yaml.dump(cortex_semantic, sort_keys=False) + st.session_state["semantic_model"] = yaml_to_semantic_model(st.session_state["yaml"]) + st.success("Integration complete! Please validate your semantic model before uploading.") + st.rerun() + except Exception as e: + st.error(f"Integration failed: {e}") + + if reset_button: + st.rerun() # Lazy alternative to resetting all configurations + @dataclass class AppMetadata: From c07387248cb14c9ce4d9f689b793831fe0719cc6 Mon Sep 17 00:00:00 2001 From: sfc-gh-jsummer Date: Thu, 15 Aug 2024 11:25:46 -0500 Subject: [PATCH 12/16] Fix bug for fields set to remove --- admin_apps/shared_utils.py | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/admin_apps/shared_utils.py b/admin_apps/shared_utils.py index ee5154e3..68239a40 100644 --- a/admin_apps/shared_utils.py +++ b/admin_apps/shared_utils.py @@ -891,7 +891,7 @@ def upload_partner_semantic() -> bool: """ partners = [None, "dbt"] uploaded_files = [] - # User may have specified a partner tool in builder module or returning to module + selected_partner = st.session_state.get("partner_tool", None) st.session_state["partner_tool"] = st.selectbox("Select the partner tool", @@ -977,11 +977,12 @@ def render_row(self): else: st.write("NA") st.divider() - # Extract the selected metadata - selected_metadata = metadata[detail_selection] - # Add expr to selected metadata if it's not included which is the case for dbt - selected_metadata['expr'] = self.key - return selected_metadata + # Extract the selected metadata if not set to remove + if detail_selection != 'remove': + selected_metadata = metadata[detail_selection] + # Add expr to selected metadata if it's not included which is the case for dbt + selected_metadata['expr'] = self.key + return selected_metadata def make_field_df(fields): """ @@ -1071,9 +1072,10 @@ def integrate_partner_semantics() -> None: KEEP_PARTNER_HELP = """Retain fields that are found in Partner semantic model but not in Cortex Analyst semantic model.""" - + uploaded_files = upload_partner_semantic() # Give user another chance to add/change partner semantic files besides builder st.divider() + if ( st.session_state.get('partner_semantic', None) and st.session_state.get('partner_tool', None) and @@ -1149,8 +1151,10 @@ def integrate_partner_semantics() -> None: v['field_details_cortex'], v['field_details_partner']) with containers[target_section]: - sections[target_section].append({**PartnerCompareRow(row_data=v).render_row(), - 'data_type': target_data_type}) + selected_metadata = PartnerCompareRow(v).render_row() + if selected_metadata: + selected_metadata['data_type'] = target_data_type + sections[target_section].append(selected_metadata) integrate_col, reset_col, _ = st.columns((1, 1, 5), gap = "small") with integrate_col: @@ -1170,6 +1174,7 @@ def integrate_partner_semantics() -> None: st.session_state["yaml"] = yaml.dump(cortex_semantic, sort_keys=False) st.session_state["semantic_model"] = yaml_to_semantic_model(st.session_state["yaml"]) st.success("Integration complete! Please validate your semantic model before uploading.") + time.sleep(1.5) st.rerun() except Exception as e: st.error(f"Integration failed: {e}") From ce4c3cb954eb909ab6caf0d81fafe13415dacab2 Mon Sep 17 00:00:00 2001 From: sfc-gh-jsummer Date: Thu, 15 Aug 2024 12:18:24 -0500 Subject: [PATCH 13/16] Add exit if incorrect yaml uploaded --- admin_apps/shared_utils.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/admin_apps/shared_utils.py b/admin_apps/shared_utils.py index 68239a40..ccdef1fc 100644 --- a/admin_apps/shared_utils.py +++ b/admin_apps/shared_utils.py @@ -903,8 +903,12 @@ def upload_partner_semantic() -> bool: accept_multiple_files=True, key = 'myfile') if uploaded_files: - st.session_state["partner_semantic"] = extract_key_values(load_yaml_file(uploaded_files), 'semantic_models') - st.session_state["uploaded_semantic_files"] = [i.name for i in uploaded_files] + partner_semantic = extract_key_values(load_yaml_file(uploaded_files), 'semantic_models') + if not partner_semantic: + st.error("Upload file does not contain required semantic_models section.") + else: + st.session_state["partner_semantic"] = partner_semantic + st.session_state["uploaded_semantic_files"] = [i.name for i in uploaded_files] else: st.session_state["partner_semantic"] = None return bool(uploaded_files) From 3967d3d2091316e6795ab94ca9a35f9545f92080 Mon Sep 17 00:00:00 2001 From: sfc-gh-jsummer Date: Mon, 26 Aug 2024 10:43:00 -0500 Subject: [PATCH 14/16] Fix all linting errors --- admin_apps/app.py | 8 +- admin_apps/journeys/builder.py | 5 +- admin_apps/journeys/iteration.py | 16 +- admin_apps/shared_utils.py | 432 ++++++++++-------- .../data_processing/proto_utils.py | 11 +- 5 files changed, 268 insertions(+), 204 deletions(-) diff --git a/admin_apps/app.py b/admin_apps/app.py index 3b64fd8b..498686d3 100644 --- a/admin_apps/app.py +++ b/admin_apps/app.py @@ -4,8 +4,11 @@ # set_page_config must be run as the first Streamlit command on the page, before any other streamlit imports. st.set_page_config(layout="wide", page_icon="💬", page_title="Semantic Model Generator") -from admin_apps.shared_utils import GeneratorAppScreen, get_snowflake_connection -from semantic_model_generator.snowflake_utils.env_vars import ( +from admin_apps.shared_utils import ( # noqa: E402 + GeneratorAppScreen, + get_snowflake_connection, +) +from semantic_model_generator.snowflake_utils.env_vars import ( # noqa: E402 SNOWFLAKE_ACCOUNT_LOCATOR, SNOWFLAKE_HOST, SNOWFLAKE_USER, @@ -13,7 +16,6 @@ ) - @st.dialog(title="Setup") def env_setup_popup(missing_env_vars: list[str]) -> None: """ diff --git a/admin_apps/journeys/builder.py b/admin_apps/journeys/builder.py index f2e893d3..c6ca6d86 100644 --- a/admin_apps/journeys/builder.py +++ b/admin_apps/journeys/builder.py @@ -2,16 +2,13 @@ from loguru import logger from snowflake.connector import ProgrammingError -from admin_apps.shared_utils import GeneratorAppScreen, get_snowflake_connection, upload_partner_semantic +from admin_apps.shared_utils import GeneratorAppScreen, get_snowflake_connection from semantic_model_generator.generate_model import generate_model_str_from_snowflake from semantic_model_generator.snowflake_utils.snowflake_connector import ( fetch_databases, fetch_schemas_in_database, fetch_tables_views_in_schema, ) -from semantic_model_generator.data_processing.proto_utils import ( - yaml_to_semantic_model, -) @st.cache_resource(show_spinner=False) diff --git a/admin_apps/journeys/iteration.py b/admin_apps/journeys/iteration.py index 61f6efd0..c68b3bd3 100644 --- a/admin_apps/journeys/iteration.py +++ b/admin_apps/journeys/iteration.py @@ -1,10 +1,8 @@ import json import time from typing import Any, Dict, List, Optional -import yaml import pandas as pd -import numpy as np import requests import sqlglot import streamlit as st @@ -21,9 +19,9 @@ download_yaml, get_snowflake_connection, init_session_states, + integrate_partner_semantics, upload_yaml, validate_and_upload_tmp_yaml, - integrate_partner_semantics ) from semantic_model_generator.data_processing.cte_utils import ( context_to_column_format, @@ -395,7 +393,6 @@ def upload_handler(file_name: str) -> None: upload_handler(new_name) - def update_container( container: DeltaGenerator, content: str, prefix: Optional[str] ) -> None: @@ -454,12 +451,7 @@ def yaml_editor(yaml_str: str) -> None: status_container = st.empty() with button_container: - ( - one, - two, - three, - four - ) = st.columns(4) + (one, two, three, four) = st.columns(4) if one.button("Validate", use_container_width=True, help=VALIDATE_HELP): # Validate new content try: @@ -508,8 +500,8 @@ def yaml_editor(yaml_str: str) -> None: if four.button( "Partner Semantic", use_container_width=True, - help=PARTNER_SEMANTIC_HELP, - disabled = not st.session_state["validated"] + help=PARTNER_SEMANTIC_HELP, + disabled=not st.session_state["validated"], ): integrate_partner_semantics() diff --git a/admin_apps/shared_utils.py b/admin_apps/shared_utils.py index ccdef1fc..508093b5 100644 --- a/admin_apps/shared_utils.py +++ b/admin_apps/shared_utils.py @@ -1,24 +1,24 @@ from __future__ import annotations +import json import os import time -import json -import yaml from dataclasses import dataclass from datetime import datetime from enum import Enum from io import StringIO -from typing import Optional +from typing import Any, Optional -import pandas as pd import numpy as np +import pandas as pd import streamlit as st +import yaml from snowflake.connector import SnowflakeConnection from semantic_model_generator.data_processing.proto_utils import ( + proto_to_dict, proto_to_yaml, yaml_to_semantic_model, - proto_to_dict, ) from semantic_model_generator.generate_model import raw_schema_to_semantic_context from semantic_model_generator.protos import semantic_model_pb2 @@ -29,7 +29,6 @@ set_schema, ) - SNOWFLAKE_ACCOUNT = os.environ.get("SNOWFLAKE_ACCOUNT_LOCATOR", "") _TMP_FILE_NAME = f"admin_app_temp_model_{datetime.now().strftime('%Y%m%d_%H%M%S')}" @@ -828,8 +827,11 @@ def download_yaml(file_name: str, conn: SnowflakeConnection) -> str: # Read the raw contents from {temp_dir}/{file_name} and return it as a string. yaml_str = temp_file.read() return yaml_str - -def unpack_yaml(data): + + +def unpack_yaml( + data: Any | dict[str, Any] | list[Any] +) -> Any | dict[str, Any] | list[str]: """ Recursively unpacks a YAML structure into a Python dictionary. """ @@ -840,7 +842,8 @@ def unpack_yaml(data): else: return data -def load_yaml_file(file_paths) -> list[dict]: + +def load_yaml_file(file_paths: list[Any]) -> list[Any]: # type: ignore """ Loads one or more YAML files and combines them into a single list. """ @@ -850,7 +853,8 @@ def load_yaml_file(file_paths) -> list[dict]: combined_yaml.append(unpack_yaml(yaml_content)) return combined_yaml -def extract_key_values(data: list[dict], key: str) -> list[dict]: + +def extract_key_values(data: list[dict[str, Any]], key: str) -> list[Any]: """ Extracts key's value from a list of dictionaries. """ @@ -863,26 +867,22 @@ def extract_key_values(data: list[dict], key: str) -> list[dict]: result.append(values) return result -def extract_expressions_from_sections(data_dict, section_names): + +def extract_expressions_from_sections( + data_dict: dict[str, Any], section_names: list[str] +) -> dict[str, dict[str, Any]]: """ Extracts data in section_names from a dictionary into a nested dictionary: """ - def extract_key(obj): - return obj.get('expr', obj['name']).lower() - + + def extract_key(obj: dict[str, Any]) -> str | Any: + return obj.get("expr", obj["name"]).lower() + d = {} for i in section_names: d[i] = {extract_key(obj): obj for obj in data_dict.get(i, [])} - - return d -def upload_label(): - if st.session_state.get('my_file_uploader', None): - st.session_state['upload_label'] = ', '.join([i.name for i in st.session_state['my_file_uploader']]) - elif st.session_state.get('uploaded_semantic_files', None): - st.session_state['upload_label'] = ', '.join([i for i in st.session_state['uploaded_semantic_files']]) - else: - st.session_state['upload_label'] = f'Upload {st.session_state["partner_tool"]} semantic yaml file(s)' + return d def upload_partner_semantic() -> bool: @@ -890,133 +890,169 @@ def upload_partner_semantic() -> bool: Upload the semantic model to a stage. """ partners = [None, "dbt"] - uploaded_files = [] - + uploaded_files = None + selected_partner = st.session_state.get("partner_tool", None) - st.session_state["partner_tool"] = st.selectbox("Select the partner tool", - partners, - index = partners.index(selected_partner)) + st.session_state["partner_tool"] = st.selectbox( + "Select the partner tool", partners, index=partners.index(selected_partner) + ) if st.session_state["partner_tool"] == "dbt": - uploaded_files = st.file_uploader(f'Upload {st.session_state["partner_tool"]} semantic yaml file(s)', - type=['yaml', 'yml'], - accept_multiple_files=True, - key = 'myfile') + uploaded_files = st.file_uploader( + f'Upload {st.session_state["partner_tool"]} semantic yaml file(s)', + type=["yaml", "yml"], + accept_multiple_files=True, + key="myfile", + ) if uploaded_files: - partner_semantic = extract_key_values(load_yaml_file(uploaded_files), 'semantic_models') + partner_semantic = extract_key_values( + load_yaml_file(uploaded_files), "semantic_models" + ) if not partner_semantic: - st.error("Upload file does not contain required semantic_models section.") + st.error( + "Upload file does not contain required semantic_models section." + ) else: st.session_state["partner_semantic"] = partner_semantic - st.session_state["uploaded_semantic_files"] = [i.name for i in uploaded_files] + st.session_state["uploaded_semantic_files"] = [ + i.name for i in uploaded_files + ] else: st.session_state["partner_semantic"] = None return bool(uploaded_files) + class PartnerCompareRow: - def __init__(self, row_data:pd.Series) -> dict: + def __init__(self, row_data: pd.Series) -> None: # type: ignore self.row_data = row_data self.key = row_data["field_key"] - self.cortex_metadata = self.row_data["field_details_cortex"] if self.row_data["field_details_cortex"] else {} - self.partner_metadata = self.row_data["field_details_partner"] if self.row_data["field_details_partner"] else {} - - - def render_row(self): - toggle_options = [ - "merged", - "cortex", - "partner", - "remove"] + self.cortex_metadata = ( + self.row_data["field_details_cortex"] + if self.row_data["field_details_cortex"] + else {} + ) + self.partner_metadata = ( + self.row_data["field_details_partner"] + if self.row_data["field_details_partner"] + else {} + ) + + def render_row(self) -> None | dict[str, Any]: # type: ignore + toggle_options = ["merged", "cortex", "partner", "remove"] metadata = {} # Create metadata based for each field given merging or singular semantic file useage of the field # Merge will merge the 2 based on user-selected preference - common_fields = ['name', 'description'] + common_fields = ["name", "description"] if self.cortex_metadata and self.partner_metadata: - metadata['merged'] = self.cortex_metadata.copy() - if st.session_state['partner_metadata_preference'] == "Partner": + metadata["merged"] = self.cortex_metadata.copy() + if st.session_state["partner_metadata_preference"] == "Partner": for n in common_fields: - metadata['merged'][n] = self.partner_metadata.get(n, self.cortex_metadata.get(n, None)) + metadata["merged"][n] = self.partner_metadata.get( + n, self.cortex_metadata.get(n, None) + ) else: for n in common_fields: - metadata['merged'][n] = self.cortex_metadata.get(n, self.partner_metadata.get(n, None)) + metadata["merged"][n] = self.cortex_metadata.get( + n, self.partner_metadata.get(n, None) + ) else: - metadata['merged'] = {} - metadata['partner'] = {field: self.partner_metadata.get(field) for field in common_fields} if self.partner_metadata else {} - metadata['cortex'] = self.cortex_metadata if self.cortex_metadata else {} - metadata['remove'] = {} - - if metadata['merged']: - toggle_default = 'merged' - elif metadata['partner']: - if st.session_state['keep_extra_partner']: - toggle_default = 'partner' + metadata["merged"] = {} + metadata["partner"] = ( + {field: self.partner_metadata.get(field) for field in common_fields} + if self.partner_metadata + else {} + ) + metadata["cortex"] = self.cortex_metadata if self.cortex_metadata else {} + metadata["remove"] = {} + + if metadata["merged"]: + toggle_default = "merged" + elif metadata["partner"]: + if st.session_state["keep_extra_partner"]: + toggle_default = "partner" else: - toggle_default = 'remove' - elif metadata['cortex']: - if st.session_state['keep_extra_cortex']: - toggle_default = 'cortex' + toggle_default = "remove" + elif metadata["cortex"]: + if st.session_state["keep_extra_cortex"]: + toggle_default = "cortex" else: - toggle_default = 'remove' + toggle_default = "remove" else: - toggle_default = 'remove' + toggle_default = "remove" - # with st.container(border=True, - # height=175): - key_col, detail_col = st.columns((.5, 1)) + key_col, detail_col = st.columns((0.5, 1)) with key_col: st.write(self.key) # We want to disable non-options but always keep remove option - revised_options = [i for i in toggle_options if metadata[i] or i == 'remove'] - detail_selection = st.radio("Keep", - index = revised_options.index(toggle_default), - options=revised_options, - key=f'row_{self.key}', - format_func=lambda x: x.capitalize(), - label_visibility='collapsed') + revised_options = [ + i for i in toggle_options if metadata[i] or i == "remove" + ] + detail_selection: str = st.radio( + "Keep", # type: ignore + index=revised_options.index(toggle_default), + options=revised_options, + key=f"row_{self.key}", + format_func=lambda x: x.capitalize(), + label_visibility="collapsed", + ) with detail_col: if metadata[detail_selection]: - st.json({k:v for k,v in metadata[detail_selection].items() if k in common_fields and v is not None}) + st.json( + { + k: v + for k, v in metadata[detail_selection].items() + if k in common_fields and v is not None + } + ) else: st.write("NA") st.divider() # Extract the selected metadata if not set to remove - if detail_selection != 'remove': - selected_metadata = metadata[detail_selection] + if detail_selection != "remove": + selected_metadata: dict[str, Any] = metadata[detail_selection] # Add expr to selected metadata if it's not included which is the case for dbt - selected_metadata['expr'] = self.key + selected_metadata["expr"] = self.key return selected_metadata - -def make_field_df(fields): + + +def make_field_df(fields: dict[str, Any]) -> pd.DataFrame: """ Converts a nested dictionary of fields into a DataFrame. """ rows = [] for section, entity_list in fields.items(): for field_key, field_details in entity_list.items(): - rows.append({'section': section, - 'field_key': field_key, - 'field_details': field_details - }) + rows.append( + { + "section": section, + "field_key": field_key, + "field_details": field_details, + } + ) return pd.DataFrame(rows) -def create_table_field_df(table_name: str, - sections: list[str], - yaml_data: list[dict]) -> pd.DataFrame: + +def create_table_field_df( + table_name: str, sections: list[str], yaml_data: list[dict[str, Any]] +) -> pd.DataFrame: """ Extracts sections of table_name in yaml_data dictionary as a DataFrame. """ - view = [x for x in yaml_data if x.get('name') == table_name][0] + view = [x for x in yaml_data if x.get("name") == table_name][0] fields = extract_expressions_from_sections(view, sections) fields_df = make_field_df(fields) return fields_df -def determine_field_section(section_cortex: str, - section_partner: str, - field_details_cortex: str, - field_details_partner: str): + +def determine_field_section( + section_cortex: str, + section_partner: str, + field_details_cortex: dict[str, str], + field_details_partner: dict[str, str], +) -> tuple[str, str | None]: """ Derives intended section of field in cortex analyst model. @@ -1025,166 +1061,202 @@ def determine_field_section(section_cortex: str, if section_cortex and field_details_cortex: try: - data_type = field_details_cortex.get('data_type', None) + data_type = field_details_cortex.get("data_type", None) except TypeError: - data_type = 'TEXT' + data_type = "TEXT" return (section_cortex, data_type) - else: # No matching cortex field found; field is partner is a novel logical field - if section_partner == 'entities': - section_cortex = 'dimensions' - data_type = 'TEXT' - elif section_partner == 'measures': - section_cortex = 'measures' - data_type = 'NUMBER' - else: # field_details_partner == 'dimensions' + else: # No matching cortex field found; field is partner is a novel logical field + if section_partner == "entities": + section_cortex = "dimensions" + data_type = "TEXT" + elif section_partner == "measures": + section_cortex = "measures" + data_type = "NUMBER" + else: # field_details_partner == 'dimensions' try: - if field_details_partner.get('type') == 'time': - section_cortex = 'time_dimensions' - data_type = 'DATE' + if field_details_partner.get("type") == "time": + section_cortex = "time_dimensions" + data_type = "DATE" except TypeError: - section_cortex = 'dimensions' - data_type = 'TEXT' + section_cortex = "dimensions" + data_type = "TEXT" else: - section_cortex = 'dimensions' - data_type = 'TEXT' + section_cortex = "dimensions" + data_type = "TEXT" return (section_cortex, data_type) - -def run_cortex_complete(conn: SnowflakeConnection, - model: str, - prompt: str, - prompt_args: Optional[dict] = None) -> str: - + + +def run_cortex_complete( + conn: SnowflakeConnection, + model: str, + prompt: str, + prompt_args: Optional[dict[str, Any]] = None, +) -> str | None: + if prompt_args: prompt = prompt.format(**prompt_args).replace("'", "\\'") complete_sql = f"SELECT snowflake.cortex.complete('{model}', '{prompt}')" - response = conn.cursor().execute(complete_sql).fetchone()[0] + response = conn.cursor().execute(complete_sql) + + if response: + output: str = response.fetchone()[0] # type: ignore + return output + else: + return None - return response -@st.dialog(f"Integrate partner tool semantic specs", width="large") +@st.dialog("Integrate partner tool semantic specs", width="large") def integrate_partner_semantics() -> None: - st.write("Upload semantic files from supported partners to merge with Cortex Analyst semantic model.") + st.write( + "Upload semantic files from supported partners to merge with Cortex Analyst semantic model." + ) - COMPARE_SEMANTICS_HELP = """Which semantic file should be checked first for necessary metadata. + COMPARE_SEMANTICS_HELP = """Which semantic file should be checked first for necessary metadata. Where metadata is missing, the other semantic file will be checked.""" INTEGRATE_HELP = """Merge the Cortex Analyst semantic file and Partner semantic file into the primary Cortex Analyst yaml editor.""" - KEEP_CORTEX_HELP = """Retain fields that are found in Cortex Analyst semantic model + KEEP_CORTEX_HELP = """Retain fields that are found in Cortex Analyst semantic model but not in Partner semantic model.""" - KEEP_PARTNER_HELP = """Retain fields that are found in Partner semantic model + KEEP_PARTNER_HELP = """Retain fields that are found in Partner semantic model but not in Cortex Analyst semantic model.""" - uploaded_files = upload_partner_semantic() # Give user another chance to add/change partner semantic files besides builder + upload_partner_semantic() st.divider() if ( - st.session_state.get('partner_semantic', None) and - st.session_state.get('partner_tool', None) and - st.session_state.get('uploaded_semantic_files', None) - ): + st.session_state.get("partner_semantic", None) + and st.session_state.get("partner_tool", None) + and st.session_state.get("uploaded_semantic_files", None) + ): # Get cortex semantic file as dictionary - cortex_semantic = proto_to_dict(st.session_state['semantic_model']) - cortex_tables = extract_key_values(cortex_semantic['tables'], 'name') - partner_tables = extract_key_values(st.session_state["partner_semantic"], 'name') + cortex_semantic = proto_to_dict(st.session_state["semantic_model"]) + cortex_tables = extract_key_values(cortex_semantic["tables"], "name") + partner_tables = extract_key_values( + st.session_state["partner_semantic"], "name" + ) st.write("Select which logical views to compare and merge.") c1, c2 = st.columns(2) with c1: - semantic_cortex_tbl = st.selectbox("Snowflake", cortex_tables) + semantic_cortex_tbl: str = st.selectbox("Snowflake", cortex_tables) # type: ignore with c2: - semantic_partner_tbl = st.selectbox("Partner", partner_tables) - - st.session_state['partner_metadata_preference'] = st.selectbox( + semantic_partner_tbl: str = st.selectbox("Partner", partner_tables) # type: ignore + + st.session_state["partner_metadata_preference"] = st.selectbox( "For fields shared in both sources, which source should be checked first for common metadata?", ["Partner", "Cortex"], - index = 0, - help = COMPARE_SEMANTICS_HELP - ) - orphan_label, orphan_col1, orphan_col2 = st.columns(3, vertical_alignment="center", gap="small") + index=0, + help=COMPARE_SEMANTICS_HELP, + ) + orphan_label, orphan_col1, orphan_col2 = st.columns( + 3, vertical_alignment="center", gap="small" + ) with orphan_label: st.write("Retain unmatched fields:") with orphan_col1: - st.session_state['keep_extra_cortex'] = st.toggle("Cortex",value = True, help = KEEP_CORTEX_HELP) + st.session_state["keep_extra_cortex"] = st.toggle( + "Cortex", value=True, help=KEEP_CORTEX_HELP + ) with orphan_col2: - st.session_state['keep_extra_partner'] = st.toggle("Partner",value = True, help = KEEP_PARTNER_HELP) + st.session_state["keep_extra_partner"] = st.toggle( + "Partner", value=True, help=KEEP_PARTNER_HELP + ) with st.expander("Advanced configuration", expanded=False): st.caption("Only shared metadata information displayed") # Create dataframe of each semantic file's fields with mergeable keys partner_fields_df = create_table_field_df( - semantic_partner_tbl, - ['dimensions', 'measures', 'entities'], - st.session_state["partner_semantic"] + semantic_partner_tbl, # type: ignore + ["dimensions", "measures", "entities"], + st.session_state["partner_semantic"], ) cortex_fields_df = create_table_field_df( - semantic_cortex_tbl, - ['dimensions', 'time_dimensions', 'measures'], - cortex_semantic['tables'] + semantic_cortex_tbl, # type: ignore + ["dimensions", "time_dimensions", "measures"], + cortex_semantic["tables"], ) combined_fields_df = cortex_fields_df.merge( - partner_fields_df, - on='field_key', - how='outer', - suffixes=('_cortex', '_partner')).replace(np.nan, None) + partner_fields_df, + on="field_key", + how="outer", + suffixes=("_cortex", "_partner"), + ).replace(np.nan, None) # Convert json strings to dict for easier extraction later - for col in ['field_details_cortex', 'field_details_partner']: - combined_fields_df[col] = combined_fields_df[col].apply(lambda x: - json.loads(x) if not pd.isnull(x) and - not isinstance(x, dict) else x) + for col in ["field_details_cortex", "field_details_partner"]: + combined_fields_df[col] = combined_fields_df[col].apply( + lambda x: ( + json.loads(x) + if not pd.isnull(x) and not isinstance(x, dict) + else x + ) + ) # Create containers and store them in a dictionary containers = { - 'dimensions': st.container(), - 'measures': st.container(), - 'time_dimensions': st.container() + "dimensions": st.container(), + "measures": st.container(), + "time_dimensions": st.container(), } - + # Assign labels to the containers for key in containers.keys(): - containers[key].write(key.replace('_',' ').title()) + containers[key].write(key.replace("_", " ").title()) # Initialize sections as empty lists - sections = {key: [] for key in containers.keys()} + sections: dict[str, list[dict[str, Any]]] = { + key: [] for key in containers.keys() + } - for k,v in combined_fields_df.iterrows(): + for k, v in combined_fields_df.iterrows(): # Get destination section for cortex analyst semantic file target_section, target_data_type = determine_field_section( - v['section_cortex'], - v['section_partner'], - v['field_details_cortex'], - v['field_details_partner']) + v["section_cortex"], + v["section_partner"], + v["field_details_cortex"], + v["field_details_partner"], + ) with containers[target_section]: selected_metadata = PartnerCompareRow(v).render_row() if selected_metadata: - selected_metadata['data_type'] = target_data_type + selected_metadata["data_type"] = target_data_type sections[target_section].append(selected_metadata) - - integrate_col, reset_col, _ = st.columns((1, 1, 5), gap = "small") + + integrate_col, reset_col, _ = st.columns((1, 1, 5), gap="small") with integrate_col: - merge_button = st.button("Merge", help=INTEGRATE_HELP, use_container_width=True) + merge_button = st.button( + "Merge", help=INTEGRATE_HELP, use_container_width=True + ) with reset_col: - reset_button = st.button("Back", help="Return to the main iteration screen", use_container_width=True) + reset_button = st.button( + "Back", + help="Return to the main iteration screen", + use_container_width=True, + ) if merge_button: # Update fields in cortex semantic model - for i, tbl in enumerate(cortex_semantic['tables']): - if tbl.get('name', None) == semantic_cortex_tbl: + for i, tbl in enumerate(cortex_semantic["tables"]): + if tbl.get("name", None) == semantic_cortex_tbl: for k in sections.keys(): - cortex_semantic['tables'][i][k] = sections[k] + cortex_semantic["tables"][i][k] = sections[k] # Submitted changes to fields will be captured in the yaml editor # User will need to make necessary modifications there before validating/uploading try: st.session_state["yaml"] = yaml.dump(cortex_semantic, sort_keys=False) - st.session_state["semantic_model"] = yaml_to_semantic_model(st.session_state["yaml"]) - st.success("Integration complete! Please validate your semantic model before uploading.") + st.session_state["semantic_model"] = yaml_to_semantic_model( + st.session_state["yaml"] + ) + st.success( + "Integration complete! Please validate your semantic model before uploading." + ) time.sleep(1.5) st.rerun() except Exception as e: st.error(f"Integration failed: {e}") - + if reset_button: - st.rerun() # Lazy alternative to resetting all configurations + st.rerun() # Lazy alternative to resetting all configurations @dataclass diff --git a/semantic_model_generator/data_processing/proto_utils.py b/semantic_model_generator/data_processing/proto_utils.py index 768b0909..6b066d68 100644 --- a/semantic_model_generator/data_processing/proto_utils.py +++ b/semantic_model_generator/data_processing/proto_utils.py @@ -1,6 +1,6 @@ import io import json -from typing import TypeVar +from typing import Any, TypeVar import ruamel.yaml from google.protobuf import json_format @@ -39,8 +39,9 @@ def proto_to_yaml(message: ProtoMsg) -> str: return yaml_str except Exception as e: raise ValueError(f"Failed to convert protobuf message to YAML: {e}") - -def proto_to_dict(message: ProtoMsg) -> dict: + + +def proto_to_dict(message: ProtoMsg) -> dict[str, Any]: """Serializes the input proto into a dictionary. Args: @@ -52,10 +53,10 @@ def proto_to_dict(message: ProtoMsg) -> dict: try: # Convert the Protobuf message to JSON string. json_str = json_format.MessageToJson(message, preserving_proto_field_name=True) - + # Convert the JSON string to a Python dictionary. json_data = json.loads(json_str) - + assert isinstance(json_data, dict) return json_data except Exception as e: From f3fcbb544f60f4fa267acbccbb043cab4992f496 Mon Sep 17 00:00:00 2001 From: sfc-gh-jsummer Date: Mon, 26 Aug 2024 11:22:16 -0500 Subject: [PATCH 15/16] Add save button to replace reset button --- admin_apps/shared_utils.py | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/admin_apps/shared_utils.py b/admin_apps/shared_utils.py index 508093b5..8d680dce 100644 --- a/admin_apps/shared_utils.py +++ b/admin_apps/shared_utils.py @@ -1222,15 +1222,17 @@ def integrate_partner_semantics() -> None: selected_metadata["data_type"] = target_data_type sections[target_section].append(selected_metadata) - integrate_col, reset_col, _ = st.columns((1, 1, 5), gap="small") + integrate_col, commit_col, _ = st.columns((1, 1, 5), gap="small") with integrate_col: merge_button = st.button( - "Merge", help=INTEGRATE_HELP, use_container_width=True + "Merge", + help=INTEGRATE_HELP, + use_container_width=True ) - with reset_col: + with commit_col: reset_button = st.button( - "Back", - help="Return to the main iteration screen", + "Save", + help="Save the merged results and return to the main iteration screen", use_container_width=True, ) @@ -1247,15 +1249,19 @@ def integrate_partner_semantics() -> None: st.session_state["semantic_model"] = yaml_to_semantic_model( st.session_state["yaml"] ) - st.success( - "Integration complete! Please validate your semantic model before uploading." + merge_msg = st.success( + "Merging..." ) - time.sleep(1.5) - st.rerun() + time.sleep(1) + merge_msg.empty() except Exception as e: st.error(f"Integration failed: {e}") if reset_button: + st.success( + "Integration complete! Please validate your semantic model before uploading." + ) + time.sleep(1.5) st.rerun() # Lazy alternative to resetting all configurations From 4cc656447f032a9d2435c317619a62ff156ec384 Mon Sep 17 00:00:00 2001 From: sfc-gh-jsummer Date: Mon, 26 Aug 2024 11:23:27 -0500 Subject: [PATCH 16/16] Run formatter --- admin_apps/shared_utils.py | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/admin_apps/shared_utils.py b/admin_apps/shared_utils.py index 8d680dce..5fcb64ee 100644 --- a/admin_apps/shared_utils.py +++ b/admin_apps/shared_utils.py @@ -1225,9 +1225,7 @@ def integrate_partner_semantics() -> None: integrate_col, commit_col, _ = st.columns((1, 1, 5), gap="small") with integrate_col: merge_button = st.button( - "Merge", - help=INTEGRATE_HELP, - use_container_width=True + "Merge", help=INTEGRATE_HELP, use_container_width=True ) with commit_col: reset_button = st.button( @@ -1249,9 +1247,7 @@ def integrate_partner_semantics() -> None: st.session_state["semantic_model"] = yaml_to_semantic_model( st.session_state["yaml"] ) - merge_msg = st.success( - "Merging..." - ) + merge_msg = st.success("Merging...") time.sleep(1) merge_msg.empty() except Exception as e: @@ -1259,8 +1255,8 @@ def integrate_partner_semantics() -> None: if reset_button: st.success( - "Integration complete! Please validate your semantic model before uploading." - ) + "Integration complete! Please validate your semantic model before uploading." + ) time.sleep(1.5) st.rerun() # Lazy alternative to resetting all configurations