Merge branch 'm-kovalsky/fixedsparkandgit'

microsoft · Sep 24, 2024 · b0aeac5 · b0aeac5
2 parents 3f03434 + 1954495
commit b0aeac5
Show file tree

Hide file tree

Showing 5 changed files with 57 additions and 76 deletions.
diff --git a/src/sempy_labs/_git.py b/src/sempy_labs/_git.py
@@ -132,19 +132,18 @@ def get_git_status(workspace: Optional[str] = None) -> pd.DataFrame:
     client = fabric.FabricRestClient()
     response = client.get(f"/v1/workspaces/{workspace_id}/git/status")
 
-    if response not in [200, 202]:
+    if response.status_code not in [200, 202]:
         raise FabricHTTPException(response)
 
     result = lro(client, response).json()
 
-    for v in result.get("value", []):
-        changes = v.get("changes", [])
+    for changes in result.get("changes", []):
         item_metadata = changes.get("itemMetadata", {})
         item_identifier = item_metadata.get("itemIdentifier", {})
 
         new_data = {
-            "Workspace Head": v.get("workspaceHead"),
-            "Remote Commit Hash": v.get("remoteCommitHash"),
+            "Workspace Head": result.get("workspaceHead"),
+            "Remote Commit Hash": result.get("remoteCommitHash"),
             "Object ID": item_identifier.get("objectId"),
             "Logical ID": item_identifier.get("logicalId"),
             "Item Type": item_metadata.get("itemType"),
@@ -199,21 +198,21 @@ def get_git_connection(workspace: Optional[str] = None) -> pd.DataFrame:
     if response.status_code != 200:
         raise FabricHTTPException(response)
 
-    for v in response.json().get("value", []):
-        provider_details = v.get("gitProviderDetails", {})
-        sync_details = v.get("gitSyncDetails", {})
-        new_data = {
-            "Organization Name": provider_details.get("organizationName"),
-            "Project Name": provider_details.get("projectName"),
-            "Git Provider Type": provider_details.get("gitProviderType"),
-            "Repository Name": provider_details.get("repositoryName"),
-            "Branch Name": provider_details.get("branchName"),
-            "Directory Name": provider_details.get("directoryName"),
-            "Workspace Head": sync_details.get("head"),
-            "Last Sync Time": sync_details.get("lastSyncTime"),
-            "Git Conneciton State": v.get("gitConnectionState"),
-        }
-        df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True)
+    r = response.json()
+    provider_details = r.get("gitProviderDetails", {})
+    sync_details = r.get("gitSyncDetails", {})
+    new_data = {
+        "Organization Name": provider_details.get("organizationName"),
+        "Project Name": provider_details.get("projectName"),
+        "Git Provider Type": provider_details.get("gitProviderType"),
+        "Repository Name": provider_details.get("repositoryName"),
+        "Branch Name": provider_details.get("branchName"),
+        "Directory Name": provider_details.get("directoryName"),
+        "Workspace Head": sync_details.get("head"),
+        "Last Sync Time": sync_details.get("lastSyncTime"),
+        "Git Connection State": r.get("gitConnectionState"),
+    }
+    df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True)
 
     return df
 
@@ -237,7 +236,7 @@ def initialize_git_connection(workspace: Optional[str] = None):
     client = fabric.FabricRestClient()
     response = client.post(f"/v1/workspaces/{workspace_id}/git/initializeConnection")
 
-    if response not in [200, 202]:
+    if response.status_code not in [200, 202]:
         raise FabricHTTPException(response)
 
     lro(client, response)

diff --git a/src/sempy_labs/_query_scale_out.py b/src/sempy_labs/_query_scale_out.py
@@ -339,7 +339,6 @@ def list_qso_settings(
     if dataset is not None:
         dataset_id = resolve_dataset_id(dataset, workspace)
 
-    workspace_id = fabric.get_workspace_id()
     df = pd.DataFrame(
         columns=[
             "Dataset Id",

diff --git a/src/sempy_labs/_spark.py b/src/sempy_labs/_spark.py
@@ -298,7 +298,9 @@ def delete_custom_pool(pool_name: str, workspace: Optional[str] = None):
     )
 
 
-def get_spark_settings(workspace: Optional[str] = None) -> pd.DataFrame:
+def get_spark_settings(
+    workspace: Optional[str] = None, return_dataframe: Optional[bool] = True
+) -> pd.DataFrame | dict:
     """
     Shows the spark settings for a workspace.
 
@@ -308,10 +310,12 @@ def get_spark_settings(workspace: Optional[str] = None) -> pd.DataFrame:
         The name of the Fabric workspace.
         Defaults to None which resolves to the workspace of the attached lakehouse
         or if no lakehouse attached, resolves to the workspace of the notebook.
+    return_dataframe : bool, default=True
+        If True, returns a pandas dataframe. If False, returns a json dictionary.
 
     Returns
     -------
-    pandas.DataFrame
+    pandas.DataFrame | dict
         A pandas dataframe showing the spark settings for a workspace.
     """
 
@@ -368,7 +372,10 @@ def get_spark_settings(workspace: Optional[str] = None) -> pd.DataFrame:
     df[bool_cols] = df[bool_cols].astype(bool)
     # df[int_cols] = df[int_cols].astype(int)
 
-    return df
+    if return_dataframe:
+        return df
+    else:
+        return response.json()
 
 
 def update_spark_settings(
@@ -420,38 +427,26 @@ def update_spark_settings(
     # https://learn.microsoft.com/en-us/rest/api/fabric/spark/workspace-settings/update-spark-settings?tabs=HTTP
     (workspace, workspace_id) = resolve_workspace_name_and_id(workspace)
 
-    dfS = get_spark_settings(workspace=workspace)
-
-    if automatic_log_enabled is None:
-        automatic_log_enabled = bool(dfS["Automatic Log Enabled"].iloc[0])
-    if high_concurrency_enabled is None:
-        high_concurrency_enabled = bool(dfS["High Concurrency Enabled"].iloc[0])
-    if customize_compute_enabled is None:
-        customize_compute_enabled = bool(dfS["Customize Compute Enabled"].iloc[0])
-    if default_pool_name is None:
-        default_pool_name = dfS["Default Pool Name"].iloc[0]
-    if max_node_count is None:
-        max_node_count = int(dfS["Max Node Count"].iloc[0])
-    if max_executors is None:
-        max_executors = int(dfS["Max Executors"].iloc[0])
-    if environment_name is None:
-        environment_name = dfS["Environment Name"].iloc[0]
-    if runtime_version is None:
-        runtime_version = dfS["Runtime Version"].iloc[0]
+    request_body = get_spark_settings(workspace=workspace, return_dataframe=False)
 
-    request_body = {
-        "automaticLog": {"enabled": automatic_log_enabled},
-        "highConcurrency": {"notebookInteractiveRunEnabled": high_concurrency_enabled},
-        "pool": {
-            "customizeComputeEnabled": customize_compute_enabled,
-            "defaultPool": {"name": default_pool_name, "type": "Workspace"},
-            "starterPool": {
-                "maxNodeCount": max_node_count,
-                "maxExecutors": max_executors,
-            },
-        },
-        "environment": {"name": environment_name, "runtimeVersion": runtime_version},
-    }
+    if automatic_log_enabled is not None:
+        request_body["automaticLog"]["enabled"] = automatic_log_enabled
+    if high_concurrency_enabled is not None:
+        request_body["highConcurrency"][
+            "notebookInteractiveRunEnabled"
+        ] = high_concurrency_enabled
+    if customize_compute_enabled is not None:
+        request_body["pool"]["customizeComputeEnabled"] = customize_compute_enabled
+    if default_pool_name is not None:
+        request_body["pool"]["defaultPool"]["name"] = default_pool_name
+    if max_node_count is not None:
+        request_body["pool"]["starterPool"]["maxNodeCount"] = max_node_count
+    if max_executors is not None:
+        request_body["pool"]["starterPool"]["maxExecutors"] = max_executors
+    if environment_name is not None:
+        request_body["environment"]["name"] = environment_name
+    if runtime_version is not None:
+        request_body["environment"]["runtimeVersion"] = runtime_version
 
     client = fabric.FabricRestClient()
     response = client.patch(

diff --git a/src/sempy_labs/directlake/_generate_shared_expression.py b/src/sempy_labs/directlake/_generate_shared_expression.py
@@ -13,7 +13,6 @@ def generate_shared_expression(
     item_name: Optional[str] = None,
     item_type: Optional[str] = "Lakehouse",
     workspace: Optional[str] = None,
-    direct_lake_over_onelake: Optional[bool] = False,
 ) -> str:
     """
     Dynamically generates the M expression used by a Direct Lake model for a given lakehouse/warehouse.
@@ -29,8 +28,6 @@ def generate_shared_expression(
         The Fabric workspace used by the item.
         Defaults to None which resolves to the workspace of the attached lakehouse
         or if no lakehouse attached, resolves to the workspace of the notebook.
-    direct_lake_over_onelake : bool, defualt=False
-        Generates an expression required for a Direct Lake over OneLake semantic mode. Only available for lakehouses, not warehouses.
 
     Returns
     -------
@@ -47,9 +44,6 @@ def generate_shared_expression(
             f"{icons.red_dot} Invalid item type. Valid options: {item_types}."
         )
 
-    if item_type == 'Warehouse' and direct_lake_over_onelake:
-        raise ValueError(f"{icons.red_dot} Direct Lake over OneLake is only supported for lakehouses, not warehouses.")
-
     if item_name is None:
         item_id = fabric.get_lakehouse_id()
         item_name = resolve_lakehouse_name(item_id, workspace)
@@ -81,14 +75,8 @@ def generate_shared_expression(
             f"{icons.red_dot} The SQL Endpoint for the '{item_name}' lakehouse within the '{workspace}' workspace has not yet been provisioned. Please wait until it has been provisioned."
         )
 
-    start_expr = 'let\n\tdatabase = '
-    end_expr = '\nin\n\tdatabase'
-    if not direct_lake_over_onelake:
-        mid_expr = f'Sql.Database("{sqlEPCS}", "{sqlepid}")'
-    else:
-        url = prop.get('oneLakeTablesPath').rstrip('/Tables')
-        mid_expr = f'AzureStorage.DataLake(\\"{url}\\")"'
-
-    sh = f"{start_expr}{mid_expr}{end_expr}"
+    start_expr = "let\n\tdatabase = "
+    end_expr = "\nin\n\tdatabase"
+    mid_expr = f'Sql.Database("{sqlEPCS}", "{sqlepid}")'
 
-    return sh
+    return f"{start_expr}{mid_expr}{end_expr}"
diff --git a/src/sempy_labs/tom/_model.py b/src/sempy_labs/tom/_model.py
@@ -1640,7 +1640,7 @@ def remove_translation(
 
     def remove_object(self, object):
         """
-        Removes an object from a semantic model.
+        Removes an object from a semantic model. 
 
         Parameters
         ----------
@@ -1652,13 +1652,13 @@ def remove_object(self, object):
         objType = object.ObjectType
 
         # Have to remove translations and perspectives on the object before removing it.
-        if objType in ["Table", "Column", "Measure", "Hierarchy", "Level"]:
+        if objType in [TOM.ObjectType.Table, TOM.ObjectType.Column, TOM.ObjectType.Measure, TOM.ObjectType.Hierarchy, TOM.ObjectType.Level]:
             for lang in object.Model.Cultures:
                 try:
                     self.remove_translation(object=object, language=lang.Name)
                 except Exception:
                     pass
-        if objType in ["Table", "Column", "Measure", "Hierarchy"]:
+        if objType in [TOM.ObjectType.Table, TOM.ObjectType.Column, TOM.ObjectType.Measure, TOM.ObjectType.Hierarchy, TOM.ObjectType.Level]:
             for persp in object.Model.Perspectives:
                 try:
                     self.remove_from_perspective(
@@ -4003,7 +4003,7 @@ def update_column(
         import Microsoft.AnalysisServices.Tabular as TOM
         import System
 
-        c = self.model.Tables[table_name].Measures[column_name]
+        c = self.model.Tables[table_name].Columns[column_name]
         if c.Type == TOM.ColumnType.Data:
             if source_column is not None:
                 c.SourceColumn = source_column