From 1257d345bad03d53bffc251df17265d0f51ea748 Mon Sep 17 00:00:00 2001 From: Chris Nivera Date: Thu, 17 Oct 2024 10:21:57 -0700 Subject: [PATCH 1/4] autopopulate --- .../data_processing/data_types.py | 1 + semantic_model_generator/generate_model.py | 18 +++++++++++++++++- .../snowflake_utils/snowflake_connector.py | 18 ++++++++++++++++++ 3 files changed, 36 insertions(+), 1 deletion(-) diff --git a/semantic_model_generator/data_processing/data_types.py b/semantic_model_generator/data_processing/data_types.py index e4529332..b7e4ec0b 100644 --- a/semantic_model_generator/data_processing/data_types.py +++ b/semantic_model_generator/data_processing/data_types.py @@ -37,6 +37,7 @@ class Table: id_: int name: str columns: List[Column] + primary_key: Optional[list[str]] = None comment: Optional[str] = ( None # comment field's to save the table comment user specified on the table ) diff --git a/semantic_model_generator/generate_model.py b/semantic_model_generator/generate_model.py index 766bc0c6..dc3afba2 100644 --- a/semantic_model_generator/generate_model.py +++ b/semantic_model_generator/generate_model.py @@ -59,7 +59,7 @@ def _get_placeholder_joins() -> List[semantic_model_pb2.Relationship]: def _raw_table_to_semantic_context_table( - database: str, schema: str, raw_table: data_types.Table + database: str, schema: str, raw_table: data_types.Table, allow_joins: bool = False ) -> semantic_model_pb2.Table: """ Converts a raw table representation to a semantic model table in protobuf format. @@ -68,6 +68,7 @@ def _raw_table_to_semantic_context_table( database (str): The name of the database containing the table. schema (str): The name of the schema containing the table. raw_table (data_types.Table): The raw table object to be transformed. + allow_joins (bool): Whether joins are enabled in the semantic model. Returns: semantic_model_pb2.Table: A protobuf representation of the semantic table. @@ -146,6 +147,18 @@ def _raw_table_to_semantic_context_table( f"No valid columns found for table {raw_table.name}. Please verify that this table contains column's datatypes not in {OBJECT_DATATYPES}." ) + primary_key = None + if allow_joins: + # Populate the primary key field if we were able to retrieve one during raw table construction. + # If not, leave a placeholder for the user to fill out. + primary_key = semantic_model_pb2.PrimaryKey( + columns=( + raw_table.primary_key + if raw_table.primary_key + else [_PLACEHOLDER_COMMENT] + ) + ) + return semantic_model_pb2.Table( name=raw_table.name, base_table=semantic_model_pb2.FullyQualifiedTable( @@ -157,6 +170,7 @@ def _raw_table_to_semantic_context_table( dimensions=dimensions, time_dimensions=time_dimensions, measures=measures, + primary_key=primary_key, ) @@ -222,11 +236,13 @@ def raw_schema_to_semantic_context( ndv_per_column=n_sample_values, # number of sample values to pull per column. columns_df=valid_columns_df_this_table, max_workers=1, + allow_joins=allow_joins, ) table_object = _raw_table_to_semantic_context_table( database=fqn_table.database, schema=fqn_table.schema_name, raw_table=raw_table, + allow_joins=allow_joins, ) table_objects.append(table_object) # TODO(jhilgart): Call cortex model to generate a semantically friendly name here. diff --git a/semantic_model_generator/snowflake_utils/snowflake_connector.py b/semantic_model_generator/snowflake_utils/snowflake_connector.py index 815d6668..dbe24440 100644 --- a/semantic_model_generator/snowflake_utils/snowflake_connector.py +++ b/semantic_model_generator/snowflake_utils/snowflake_connector.py @@ -126,6 +126,18 @@ def _get_column_comment( return "" +def _get_table_primary_keys( + conn: SnowflakeConnection, schema_name: str, table_name: str +) -> list[str] | None: + query = f"show primary keys in table {schema_name}.{table_name};" + cursor = conn.cursor() + cursor.execute(query) + primary_keys = cursor.fetchall() + if primary_keys: + return [pk[3] for pk in primary_keys] + return None + + def get_table_representation( conn: SnowflakeConnection, schema_name: str, @@ -134,6 +146,7 @@ def get_table_representation( ndv_per_column: int, columns_df: pd.DataFrame, max_workers: int, + allow_joins: bool = False, ) -> Table: table_comment = _get_table_comment(conn, schema_name, table_name, columns_df) @@ -159,11 +172,16 @@ def _get_col(col_index: int, column_row: pd.Series) -> Column: index_and_column.append((col_index, column)) columns = [c for _, c in sorted(index_and_column, key=lambda x: x[0])] + primary_keys = ( + _get_table_primary_keys(conn, schema_name, table_name) if allow_joins else None + ) + return Table( id_=table_index, name=table_name, comment=table_comment, columns=columns, + primary_keys=primary_keys, ) From 219f196a229bfba1804ea7063465601c428d2b32 Mon Sep 17 00:00:00 2001 From: Chris Nivera Date: Thu, 17 Oct 2024 10:24:23 -0700 Subject: [PATCH 2/4] rename experimental features --- journeys/builder.py | 4 ++-- journeys/iteration.py | 8 +++++--- partner/looker.py | 4 ++-- 3 files changed, 9 insertions(+), 7 deletions(-) diff --git a/journeys/builder.py b/journeys/builder.py index 857fd33d..f980d233 100644 --- a/journeys/builder.py +++ b/journeys/builder.py @@ -117,8 +117,8 @@ def table_selector_dialog() -> None: st.markdown("
", unsafe_allow_html=True) experimental_features = st.checkbox( - "Enable experimental features (optional)", - help="Checking this box will enable generation of experimental features in the semantic model. If enabling this setting, please ensure that you have the proper parameters set on your Snowflake account. Some features (e.g. joins) are currently in Private Preview and available only to select accounts. Reach out to your account team for access.", + "Enable joins (optional)", + help="Checking this box will enable you to add/edit join paths in your semantic model. If enabling this setting, please ensure that you have the proper parameters set on your Snowflake account. Reach out to your account team for access.", ) st.session_state["experimental_features"] = experimental_features diff --git a/journeys/iteration.py b/journeys/iteration.py index abb698de..ad355320 100644 --- a/journeys/iteration.py +++ b/journeys/iteration.py @@ -629,8 +629,8 @@ def set_up_requirements() -> None: file_name = st.selectbox("File name", options=available_files, index=None) experimental_features = st.checkbox( - "Enable experimental features (optional)", - help="Checking this box will enable generation of experimental features in the semantic model. If enabling this setting, please ensure that you have the proper parameters set on your Snowflake account. Some features (e.g. joins) are currently in Private Preview and available only to select accounts. Reach out to your account team for access.", + "Enable joins (optional)", + help="Checking this box will enable you to add/edit join paths in your semantic model. If enabling this setting, please ensure that you have the proper parameters set on your Snowflake account. Reach out to your account team for access.", ) if st.button( @@ -703,7 +703,9 @@ def show() -> None: return_home_button() if "yaml" not in st.session_state: # Only proceed to download the YAML from stage if we don't have one from the builder flow. - yaml = download_yaml(st.session_state.file_name, st.session_state.snowflake_stage.stage_name) + yaml = download_yaml( + st.session_state.file_name, st.session_state.snowflake_stage.stage_name + ) st.session_state["yaml"] = yaml st.session_state["semantic_model"] = yaml_to_semantic_model(yaml) if "last_saved_yaml" not in st.session_state: diff --git a/partner/looker.py b/partner/looker.py index 903a62e1..fe874670 100644 --- a/partner/looker.py +++ b/partner/looker.py @@ -232,8 +232,8 @@ def set_looker_semantic() -> None: sample_values = input_sample_value_num() experimental_features = st.checkbox( - "Enable experimental features (optional)", - help="Checking this box will enable generation of experimental features in the semantic model. If enabling this setting, please ensure that you have the proper parameters set on your Snowflake account. Some features (e.g. joins) are currently in Private Preview and available only to select accounts. Reach out to your account team for access.", + "Enable joins (optional)", + help="Checking this box will enable you to add/edit join paths in your semantic model. If enabling this setting, please ensure that you have the proper parameters set on your Snowflake account. Reach out to your account team for access.", ) if st.button("Continue", type="primary"): From 938aaf489fd5ba31aa69db2bb4f6e2ce7073828c Mon Sep 17 00:00:00 2001 From: Chris Nivera Date: Mon, 21 Oct 2024 13:59:58 -0700 Subject: [PATCH 3/4] move pk population on join path add --- journeys/joins.py | 38 ++++++++++++++++++- .../data_processing/data_types.py | 1 - semantic_model_generator/generate_model.py | 18 +-------- .../snowflake_utils/snowflake_connector.py | 13 ++----- 4 files changed, 42 insertions(+), 28 deletions(-) diff --git a/journeys/joins.py b/journeys/joins.py index a6e0e700..293dc332 100644 --- a/journeys/joins.py +++ b/journeys/joins.py @@ -3,7 +3,14 @@ import streamlit as st from streamlit_extras.row import row +from app_utils.shared_utils import get_snowflake_connection +from semantic_model_generator.data_processing.cte_utils import ( + fully_qualified_table_name, +) from semantic_model_generator.protos import semantic_model_pb2 +from semantic_model_generator.snowflake_utils.snowflake_connector import ( + get_table_primary_keys, +) SUPPORTED_JOIN_TYPES = [ join_type @@ -167,7 +174,6 @@ def relationship_builder( @st.experimental_dialog("Join Builder", width="large") def joins_dialog() -> None: - if "builder_joins" not in st.session_state: # Making a copy of the original relationships list so we can modify freely without affecting the original. st.session_state.builder_joins = st.session_state.semantic_model.relationships[ @@ -210,6 +216,36 @@ def joins_dialog() -> None: ) return + # Populate primary key information for each table in a join relationship. + left_table_object = next( + ( + table + for table in st.session_state.semantic_model.tables + if table.name == relationship.left_table + ) + ) + right_table_object = next( + ( + table + for table in st.session_state.semantic_model.tables + if table.name == relationship.right_table + ) + ) + + if not left_table_object.primary_key.columns: + primary_keys = get_table_primary_keys( + get_snowflake_connection(), + table_fqn=fully_qualified_table_name(left_table_object.base_table), + ) + left_table_object.primary_key.columns.extend(primary_keys or [""]) + + if not right_table_object.primary_key.columns: + primary_keys = get_table_primary_keys( + get_snowflake_connection(), + table_fqn=fully_qualified_table_name(right_table_object.base_table), + ) + right_table_object.primary_key.columns.extend(primary_keys or [""]) + del st.session_state.semantic_model.relationships[:] st.session_state.semantic_model.relationships.extend( st.session_state.builder_joins diff --git a/semantic_model_generator/data_processing/data_types.py b/semantic_model_generator/data_processing/data_types.py index b7e4ec0b..e4529332 100644 --- a/semantic_model_generator/data_processing/data_types.py +++ b/semantic_model_generator/data_processing/data_types.py @@ -37,7 +37,6 @@ class Table: id_: int name: str columns: List[Column] - primary_key: Optional[list[str]] = None comment: Optional[str] = ( None # comment field's to save the table comment user specified on the table ) diff --git a/semantic_model_generator/generate_model.py b/semantic_model_generator/generate_model.py index dc3afba2..766bc0c6 100644 --- a/semantic_model_generator/generate_model.py +++ b/semantic_model_generator/generate_model.py @@ -59,7 +59,7 @@ def _get_placeholder_joins() -> List[semantic_model_pb2.Relationship]: def _raw_table_to_semantic_context_table( - database: str, schema: str, raw_table: data_types.Table, allow_joins: bool = False + database: str, schema: str, raw_table: data_types.Table ) -> semantic_model_pb2.Table: """ Converts a raw table representation to a semantic model table in protobuf format. @@ -68,7 +68,6 @@ def _raw_table_to_semantic_context_table( database (str): The name of the database containing the table. schema (str): The name of the schema containing the table. raw_table (data_types.Table): The raw table object to be transformed. - allow_joins (bool): Whether joins are enabled in the semantic model. Returns: semantic_model_pb2.Table: A protobuf representation of the semantic table. @@ -147,18 +146,6 @@ def _raw_table_to_semantic_context_table( f"No valid columns found for table {raw_table.name}. Please verify that this table contains column's datatypes not in {OBJECT_DATATYPES}." ) - primary_key = None - if allow_joins: - # Populate the primary key field if we were able to retrieve one during raw table construction. - # If not, leave a placeholder for the user to fill out. - primary_key = semantic_model_pb2.PrimaryKey( - columns=( - raw_table.primary_key - if raw_table.primary_key - else [_PLACEHOLDER_COMMENT] - ) - ) - return semantic_model_pb2.Table( name=raw_table.name, base_table=semantic_model_pb2.FullyQualifiedTable( @@ -170,7 +157,6 @@ def _raw_table_to_semantic_context_table( dimensions=dimensions, time_dimensions=time_dimensions, measures=measures, - primary_key=primary_key, ) @@ -236,13 +222,11 @@ def raw_schema_to_semantic_context( ndv_per_column=n_sample_values, # number of sample values to pull per column. columns_df=valid_columns_df_this_table, max_workers=1, - allow_joins=allow_joins, ) table_object = _raw_table_to_semantic_context_table( database=fqn_table.database, schema=fqn_table.schema_name, raw_table=raw_table, - allow_joins=allow_joins, ) table_objects.append(table_object) # TODO(jhilgart): Call cortex model to generate a semantically friendly name here. diff --git a/semantic_model_generator/snowflake_utils/snowflake_connector.py b/semantic_model_generator/snowflake_utils/snowflake_connector.py index dbe24440..76b695ef 100644 --- a/semantic_model_generator/snowflake_utils/snowflake_connector.py +++ b/semantic_model_generator/snowflake_utils/snowflake_connector.py @@ -126,10 +126,11 @@ def _get_column_comment( return "" -def _get_table_primary_keys( - conn: SnowflakeConnection, schema_name: str, table_name: str +def get_table_primary_keys( + conn: SnowflakeConnection, + table_fqn: str, ) -> list[str] | None: - query = f"show primary keys in table {schema_name}.{table_name};" + query = f"show primary keys in table {table_fqn};" cursor = conn.cursor() cursor.execute(query) primary_keys = cursor.fetchall() @@ -146,7 +147,6 @@ def get_table_representation( ndv_per_column: int, columns_df: pd.DataFrame, max_workers: int, - allow_joins: bool = False, ) -> Table: table_comment = _get_table_comment(conn, schema_name, table_name, columns_df) @@ -172,16 +172,11 @@ def _get_col(col_index: int, column_row: pd.Series) -> Column: index_and_column.append((col_index, column)) columns = [c for _, c in sorted(index_and_column, key=lambda x: x[0])] - primary_keys = ( - _get_table_primary_keys(conn, schema_name, table_name) if allow_joins else None - ) - return Table( id_=table_index, name=table_name, comment=table_comment, columns=columns, - primary_keys=primary_keys, ) From fb47915a9df65536ecf3e47b112c161e5bed9401 Mon Sep 17 00:00:00 2001 From: Chris Nivera Date: Mon, 21 Oct 2024 15:04:09 -0700 Subject: [PATCH 4/4] st.spinner --- journeys/joins.py | 29 +++++++++++++++++------------ 1 file changed, 17 insertions(+), 12 deletions(-) diff --git a/journeys/joins.py b/journeys/joins.py index 293dc332..2d09d519 100644 --- a/journeys/joins.py +++ b/journeys/joins.py @@ -232,19 +232,24 @@ def joins_dialog() -> None: ) ) - if not left_table_object.primary_key.columns: - primary_keys = get_table_primary_keys( - get_snowflake_connection(), - table_fqn=fully_qualified_table_name(left_table_object.base_table), - ) - left_table_object.primary_key.columns.extend(primary_keys or [""]) + with st.spinner("Fetching primary keys..."): + if not left_table_object.primary_key.columns: + primary_keys = get_table_primary_keys( + get_snowflake_connection(), + table_fqn=fully_qualified_table_name( + left_table_object.base_table + ), + ) + left_table_object.primary_key.columns.extend(primary_keys or [""]) - if not right_table_object.primary_key.columns: - primary_keys = get_table_primary_keys( - get_snowflake_connection(), - table_fqn=fully_qualified_table_name(right_table_object.base_table), - ) - right_table_object.primary_key.columns.extend(primary_keys or [""]) + if not right_table_object.primary_key.columns: + primary_keys = get_table_primary_keys( + get_snowflake_connection(), + table_fqn=fully_qualified_table_name( + right_table_object.base_table + ), + ) + right_table_object.primary_key.columns.extend(primary_keys or [""]) del st.session_state.semantic_model.relationships[:] st.session_state.semantic_model.relationships.extend(