From 87dca0fbf600af9899a5c9c37471558b445f7054 Mon Sep 17 00:00:00 2001 From: Michael Date: Mon, 23 Sep 2024 21:19:38 +0200 Subject: [PATCH 1/7] fixed spark and git --- src/sempy_labs/_git.py | 4 +- src/sempy_labs/_spark.py | 61 ++++++++----------- .../directlake/_generate_shared_expression.py | 16 +---- 3 files changed, 30 insertions(+), 51 deletions(-) diff --git a/src/sempy_labs/_git.py b/src/sempy_labs/_git.py index 78729342..c4f0921a 100644 --- a/src/sempy_labs/_git.py +++ b/src/sempy_labs/_git.py @@ -132,7 +132,7 @@ def get_git_status(workspace: Optional[str] = None) -> pd.DataFrame: client = fabric.FabricRestClient() response = client.get(f"/v1/workspaces/{workspace_id}/git/status") - if response not in [200, 202]: + if response.status_code not in [200, 202]: raise FabricHTTPException(response) result = lro(client, response).json() @@ -237,7 +237,7 @@ def initialize_git_connection(workspace: Optional[str] = None): client = fabric.FabricRestClient() response = client.post(f"/v1/workspaces/{workspace_id}/git/initializeConnection") - if response not in [200, 202]: + if response.status_code not in [200, 202]: raise FabricHTTPException(response) lro(client, response) diff --git a/src/sempy_labs/_spark.py b/src/sempy_labs/_spark.py index fec4ed50..95b421ef 100644 --- a/src/sempy_labs/_spark.py +++ b/src/sempy_labs/_spark.py @@ -298,7 +298,7 @@ def delete_custom_pool(pool_name: str, workspace: Optional[str] = None): ) -def get_spark_settings(workspace: Optional[str] = None) -> pd.DataFrame: +def get_spark_settings(workspace: Optional[str] = None, return_dataframe: Optional[bool] = True) -> pd.DataFrame | dict: """ Shows the spark settings for a workspace. @@ -308,10 +308,12 @@ def get_spark_settings(workspace: Optional[str] = None) -> pd.DataFrame: The name of the Fabric workspace. Defaults to None which resolves to the workspace of the attached lakehouse or if no lakehouse attached, resolves to the workspace of the notebook. + return_dataframe : bool, default=True + If True, returns a pandas dataframe. If False, returns a json dictionary. Returns ------- - pandas.DataFrame + pandas.DataFrame | dict A pandas dataframe showing the spark settings for a workspace. """ @@ -368,7 +370,10 @@ def get_spark_settings(workspace: Optional[str] = None) -> pd.DataFrame: df[bool_cols] = df[bool_cols].astype(bool) # df[int_cols] = df[int_cols].astype(int) - return df + if return_dataframe: + return df + else: + return response.json() def update_spark_settings( @@ -420,38 +425,24 @@ def update_spark_settings( # https://learn.microsoft.com/en-us/rest/api/fabric/spark/workspace-settings/update-spark-settings?tabs=HTTP (workspace, workspace_id) = resolve_workspace_name_and_id(workspace) - dfS = get_spark_settings(workspace=workspace) - - if automatic_log_enabled is None: - automatic_log_enabled = bool(dfS["Automatic Log Enabled"].iloc[0]) - if high_concurrency_enabled is None: - high_concurrency_enabled = bool(dfS["High Concurrency Enabled"].iloc[0]) - if customize_compute_enabled is None: - customize_compute_enabled = bool(dfS["Customize Compute Enabled"].iloc[0]) - if default_pool_name is None: - default_pool_name = dfS["Default Pool Name"].iloc[0] - if max_node_count is None: - max_node_count = int(dfS["Max Node Count"].iloc[0]) - if max_executors is None: - max_executors = int(dfS["Max Executors"].iloc[0]) - if environment_name is None: - environment_name = dfS["Environment Name"].iloc[0] - if runtime_version is None: - runtime_version = dfS["Runtime Version"].iloc[0] - - request_body = { - "automaticLog": {"enabled": automatic_log_enabled}, - "highConcurrency": {"notebookInteractiveRunEnabled": high_concurrency_enabled}, - "pool": { - "customizeComputeEnabled": customize_compute_enabled, - "defaultPool": {"name": default_pool_name, "type": "Workspace"}, - "starterPool": { - "maxNodeCount": max_node_count, - "maxExecutors": max_executors, - }, - }, - "environment": {"name": environment_name, "runtimeVersion": runtime_version}, - } + request_body = get_spark_settings(workspace=workspace, return_dataframe=False) + + if automatic_log_enabled is not None: + request_body['automaticLog']['enabled'] = automatic_log_enabled + if high_concurrency_enabled is not None: + request_body['highConcurrency']['notebookInteractiveRunEnabled'] = high_concurrency_enabled + if customize_compute_enabled is not None: + request_body['pool']['customizeComputeEnabled'] = customize_compute_enabled + if default_pool_name is not None: + request_body['pool']['defaultPool']['name'] = default_pool_name + if max_node_count is not None: + request_body['pool']['starterPool']['maxNodeCount'] = max_node_count + if max_executors is not None: + request_body['pool']['starterPool']['maxExecutors'] = max_executors + if environment_name is not None: + request_body['environment']['name'] = environment_name + if runtime_version is not None: + request_body['environment']['runtimeVersion'] = runtime_version client = fabric.FabricRestClient() response = client.patch( diff --git a/src/sempy_labs/directlake/_generate_shared_expression.py b/src/sempy_labs/directlake/_generate_shared_expression.py index a8daa4f4..3afc19d7 100644 --- a/src/sempy_labs/directlake/_generate_shared_expression.py +++ b/src/sempy_labs/directlake/_generate_shared_expression.py @@ -13,7 +13,6 @@ def generate_shared_expression( item_name: Optional[str] = None, item_type: Optional[str] = "Lakehouse", workspace: Optional[str] = None, - direct_lake_over_onelake: Optional[bool] = False, ) -> str: """ Dynamically generates the M expression used by a Direct Lake model for a given lakehouse/warehouse. @@ -29,8 +28,6 @@ def generate_shared_expression( The Fabric workspace used by the item. Defaults to None which resolves to the workspace of the attached lakehouse or if no lakehouse attached, resolves to the workspace of the notebook. - direct_lake_over_onelake : bool, defualt=False - Generates an expression required for a Direct Lake over OneLake semantic mode. Only available for lakehouses, not warehouses. Returns ------- @@ -47,9 +44,6 @@ def generate_shared_expression( f"{icons.red_dot} Invalid item type. Valid options: {item_types}." ) - if item_type == 'Warehouse' and direct_lake_over_onelake: - raise ValueError(f"{icons.red_dot} Direct Lake over OneLake is only supported for lakehouses, not warehouses.") - if item_name is None: item_id = fabric.get_lakehouse_id() item_name = resolve_lakehouse_name(item_id, workspace) @@ -83,12 +77,6 @@ def generate_shared_expression( start_expr = 'let\n\tdatabase = ' end_expr = '\nin\n\tdatabase' - if not direct_lake_over_onelake: - mid_expr = f'Sql.Database("{sqlEPCS}", "{sqlepid}")' - else: - url = prop.get('oneLakeTablesPath').rstrip('/Tables') - mid_expr = f'AzureStorage.DataLake(\\"{url}\\")"' - - sh = f"{start_expr}{mid_expr}{end_expr}" + mid_expr = f'Sql.Database("{sqlEPCS}", "{sqlepid}")' - return sh + return f"{start_expr}{mid_expr}{end_expr}" From 157ab1ced762a84d3b823ae7993de6c589356f0b Mon Sep 17 00:00:00 2001 From: Michael Date: Mon, 23 Sep 2024 21:30:26 +0200 Subject: [PATCH 2/7] more fixes --- src/sempy_labs/_git.py | 37 ++++++++++++++++++------------------- 1 file changed, 18 insertions(+), 19 deletions(-) diff --git a/src/sempy_labs/_git.py b/src/sempy_labs/_git.py index c4f0921a..f20e8822 100644 --- a/src/sempy_labs/_git.py +++ b/src/sempy_labs/_git.py @@ -137,14 +137,13 @@ def get_git_status(workspace: Optional[str] = None) -> pd.DataFrame: result = lro(client, response).json() - for v in result.get("value", []): - changes = v.get("changes", []) + for changes in result.get("changes", []): item_metadata = changes.get("itemMetadata", {}) item_identifier = item_metadata.get("itemIdentifier", {}) new_data = { - "Workspace Head": v.get("workspaceHead"), - "Remote Commit Hash": v.get("remoteCommitHash"), + "Workspace Head": result.get("workspaceHead"), + "Remote Commit Hash": result.get("remoteCommitHash"), "Object ID": item_identifier.get("objectId"), "Logical ID": item_identifier.get("logicalId"), "Item Type": item_metadata.get("itemType"), @@ -199,21 +198,21 @@ def get_git_connection(workspace: Optional[str] = None) -> pd.DataFrame: if response.status_code != 200: raise FabricHTTPException(response) - for v in response.json().get("value", []): - provider_details = v.get("gitProviderDetails", {}) - sync_details = v.get("gitSyncDetails", {}) - new_data = { - "Organization Name": provider_details.get("organizationName"), - "Project Name": provider_details.get("projectName"), - "Git Provider Type": provider_details.get("gitProviderType"), - "Repository Name": provider_details.get("repositoryName"), - "Branch Name": provider_details.get("branchName"), - "Directory Name": provider_details.get("directoryName"), - "Workspace Head": sync_details.get("head"), - "Last Sync Time": sync_details.get("lastSyncTime"), - "Git Conneciton State": v.get("gitConnectionState"), - } - df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True) + r = response.json() + provider_details = r.get("gitProviderDetails", {}) + sync_details = r.get("gitSyncDetails", {}) + new_data = { + "Organization Name": provider_details.get("organizationName"), + "Project Name": provider_details.get("projectName"), + "Git Provider Type": provider_details.get("gitProviderType"), + "Repository Name": provider_details.get("repositoryName"), + "Branch Name": provider_details.get("branchName"), + "Directory Name": provider_details.get("directoryName"), + "Workspace Head": sync_details.get("head"), + "Last Sync Time": sync_details.get("lastSyncTime"), + "Git Connection State": r.get("gitConnectionState"), + } + df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True) return df From b4b861d2661401dba7d85044e612cc2030119a91 Mon Sep 17 00:00:00 2001 From: Michael Date: Mon, 23 Sep 2024 22:35:58 +0200 Subject: [PATCH 3/7] black --- src/sempy_labs/_spark.py | 22 +++++++++++-------- .../directlake/_generate_shared_expression.py | 4 ++-- 2 files changed, 15 insertions(+), 11 deletions(-) diff --git a/src/sempy_labs/_spark.py b/src/sempy_labs/_spark.py index 95b421ef..3b8efcbe 100644 --- a/src/sempy_labs/_spark.py +++ b/src/sempy_labs/_spark.py @@ -298,7 +298,9 @@ def delete_custom_pool(pool_name: str, workspace: Optional[str] = None): ) -def get_spark_settings(workspace: Optional[str] = None, return_dataframe: Optional[bool] = True) -> pd.DataFrame | dict: +def get_spark_settings( + workspace: Optional[str] = None, return_dataframe: Optional[bool] = True +) -> pd.DataFrame | dict: """ Shows the spark settings for a workspace. @@ -428,21 +430,23 @@ def update_spark_settings( request_body = get_spark_settings(workspace=workspace, return_dataframe=False) if automatic_log_enabled is not None: - request_body['automaticLog']['enabled'] = automatic_log_enabled + request_body["automaticLog"]["enabled"] = automatic_log_enabled if high_concurrency_enabled is not None: - request_body['highConcurrency']['notebookInteractiveRunEnabled'] = high_concurrency_enabled + request_body["highConcurrency"][ + "notebookInteractiveRunEnabled" + ] = high_concurrency_enabled if customize_compute_enabled is not None: - request_body['pool']['customizeComputeEnabled'] = customize_compute_enabled + request_body["pool"]["customizeComputeEnabled"] = customize_compute_enabled if default_pool_name is not None: - request_body['pool']['defaultPool']['name'] = default_pool_name + request_body["pool"]["defaultPool"]["name"] = default_pool_name if max_node_count is not None: - request_body['pool']['starterPool']['maxNodeCount'] = max_node_count + request_body["pool"]["starterPool"]["maxNodeCount"] = max_node_count if max_executors is not None: - request_body['pool']['starterPool']['maxExecutors'] = max_executors + request_body["pool"]["starterPool"]["maxExecutors"] = max_executors if environment_name is not None: - request_body['environment']['name'] = environment_name + request_body["environment"]["name"] = environment_name if runtime_version is not None: - request_body['environment']['runtimeVersion'] = runtime_version + request_body["environment"]["runtimeVersion"] = runtime_version client = fabric.FabricRestClient() response = client.patch( diff --git a/src/sempy_labs/directlake/_generate_shared_expression.py b/src/sempy_labs/directlake/_generate_shared_expression.py index 3afc19d7..eaf97583 100644 --- a/src/sempy_labs/directlake/_generate_shared_expression.py +++ b/src/sempy_labs/directlake/_generate_shared_expression.py @@ -75,8 +75,8 @@ def generate_shared_expression( f"{icons.red_dot} The SQL Endpoint for the '{item_name}' lakehouse within the '{workspace}' workspace has not yet been provisioned. Please wait until it has been provisioned." ) - start_expr = 'let\n\tdatabase = ' - end_expr = '\nin\n\tdatabase' + start_expr = "let\n\tdatabase = " + end_expr = "\nin\n\tdatabase" mid_expr = f'Sql.Database("{sqlEPCS}", "{sqlepid}")' return f"{start_expr}{mid_expr}{end_expr}" From 8d7e372127a149b481779645f7542d38ee8eef8e Mon Sep 17 00:00:00 2001 From: Michael Date: Mon, 23 Sep 2024 22:37:38 +0200 Subject: [PATCH 4/7] fixed columns --- src/sempy_labs/tom/_model.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/sempy_labs/tom/_model.py b/src/sempy_labs/tom/_model.py index 3a6a6e30..368024c8 100644 --- a/src/sempy_labs/tom/_model.py +++ b/src/sempy_labs/tom/_model.py @@ -4003,7 +4003,7 @@ def update_column( import Microsoft.AnalysisServices.Tabular as TOM import System - c = self.model.Tables[table_name].Measures[column_name] + c = self.model.Tables[table_name].Columns[column_name] if c.Type == TOM.ColumnType.Data: if source_column is not None: c.SourceColumn = source_column From d78e73cd4ea014273c1c278852e43a468ca58700 Mon Sep 17 00:00:00 2001 From: Michael Date: Tue, 24 Sep 2024 10:33:42 +0200 Subject: [PATCH 5/7] fixed remove_object --- src/sempy_labs/tom/_model.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/sempy_labs/tom/_model.py b/src/sempy_labs/tom/_model.py index 368024c8..28424f71 100644 --- a/src/sempy_labs/tom/_model.py +++ b/src/sempy_labs/tom/_model.py @@ -1652,13 +1652,13 @@ def remove_object(self, object): objType = object.ObjectType # Have to remove translations and perspectives on the object before removing it. - if objType in ["Table", "Column", "Measure", "Hierarchy", "Level"]: + if objType in [TOM.ObjectType.Table, TOM.ObjectType.Column, TOM.ObjectType.Measure, TOM.ObjectType.Hierarchy, TOM.ObjectType.Level]: for lang in object.Model.Cultures: try: self.remove_translation(object=object, language=lang.Name) except Exception: pass - if objType in ["Table", "Column", "Measure", "Hierarchy"]: + if objType in [TOM.ObjectType.Table, TOM.ObjectType.Column, TOM.ObjectType.Measure, TOM.ObjectType.Hierarchy, TOM.ObjectType.Level]: for persp in object.Model.Perspectives: try: self.remove_from_perspective( From e546eaa3b5f4b44be1163e4039ffb94085d947a4 Mon Sep 17 00:00:00 2001 From: Michael Date: Tue, 24 Sep 2024 12:04:04 +0200 Subject: [PATCH 6/7] fixed bug --- src/sempy_labs/_query_scale_out.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/sempy_labs/_query_scale_out.py b/src/sempy_labs/_query_scale_out.py index 80e7fc4c..e0f1a319 100644 --- a/src/sempy_labs/_query_scale_out.py +++ b/src/sempy_labs/_query_scale_out.py @@ -339,7 +339,6 @@ def list_qso_settings( if dataset is not None: dataset_id = resolve_dataset_id(dataset, workspace) - workspace_id = fabric.get_workspace_id() df = pd.DataFrame( columns=[ "Dataset Id", From 19544955afae59ff7ca1a51dc17384cebfee8be9 Mon Sep 17 00:00:00 2001 From: Michael Date: Tue, 24 Sep 2024 14:28:22 +0200 Subject: [PATCH 7/7] change --- src/sempy_labs/tom/_model.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/sempy_labs/tom/_model.py b/src/sempy_labs/tom/_model.py index 28424f71..e8b34276 100644 --- a/src/sempy_labs/tom/_model.py +++ b/src/sempy_labs/tom/_model.py @@ -1640,7 +1640,7 @@ def remove_translation( def remove_object(self, object): """ - Removes an object from a semantic model. + Removes an object from a semantic model. Parameters ----------