Merge branch 'm-kovalsky/bpausedatasetid'

microsoft · Dec 12, 2024 · b985f9f · b985f9f
2 parents cd700ab + 6c38897
commit b985f9f
Show file tree

Hide file tree

Showing 9 changed files with 193 additions and 133 deletions.
diff --git a/src/sempy_labs/_helper_functions.py b/src/sempy_labs/_helper_functions.py
@@ -160,14 +160,34 @@ def resolve_report_name(report_id: UUID, workspace: Optional[str] = None) -> str
     return obj
 
 
-def resolve_dataset_id(dataset: str, workspace: Optional[str] = None) -> UUID:
+def resolve_dataset_name_and_id(
+    dataset: str | UUID, workspace: Optional[str] = None
+) -> Tuple[str, UUID]:
+
+    (workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace)
+
+    if _is_valid_uuid(dataset):
+        dataset_id = dataset
+        dataset_name = fabric.resolve_item_name(
+            item_id=dataset_id, type="SemanticModel", workspace=workspace_id
+        )
+    else:
+        dataset_name = dataset
+        dataset_id = fabric.resolve_item_id(
+            item_name=dataset, type="SemanticModel", workspace=workspace_id
+        )
+
+    return dataset_name, dataset_id
+
+
+def resolve_dataset_id(dataset: str | UUID, workspace: Optional[str] = None) -> UUID:
     """
     Obtains the ID of the semantic model.
 
     Parameters
     ----------
-    dataset : str
-        The name of the semantic model.
+    dataset : str | UUID
+        The name or ID of the semantic model.
     workspace : str, default=None
         The Fabric workspace name.
         Defaults to None which resolves to the workspace of the attached lakehouse
@@ -179,15 +199,14 @@ def resolve_dataset_id(dataset: str, workspace: Optional[str] = None) -> UUID:
         The ID of the semantic model.
     """
 
-    if workspace is None:
-        workspace_id = fabric.get_workspace_id()
-        workspace = fabric.resolve_workspace_name(workspace_id)
-
-    obj = fabric.resolve_item_id(
-        item_name=dataset, type="SemanticModel", workspace=workspace
-    )
+    if _is_valid_uuid(dataset):
+        dataset_id = dataset
+    else:
+        dataset_id = fabric.resolve_item_id(
+            item_name=dataset, type="SemanticModel", workspace=workspace
+        )
 
-    return obj
+    return dataset_id
 
 
 def resolve_dataset_name(dataset_id: UUID, workspace: Optional[str] = None) -> str:
@@ -1167,20 +1186,20 @@ def _make_list_unique(my_list):
 
 def _get_partition_map(dataset: str, workspace: Optional[str] = None) -> pd.DataFrame:
 
-    if workspace is None:
-        workspace = fabric.resolve_workspace_name()
+    (workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace)
+    (dataset_name, dataset_id) = resolve_dataset_name_and_id(dataset, workspace_id)
 
     partitions = fabric.evaluate_dax(
-        dataset=dataset,
-        workspace=workspace,
+        dataset=dataset_id,
+        workspace=workspace_id,
         dax_string="""
     select [ID] AS [PartitionID], [TableID], [Name] AS [PartitionName] from $system.tmschema_partitions
     """,
     )
 
     tables = fabric.evaluate_dax(
-        dataset=dataset,
-        workspace=workspace,
+        dataset=dataset_id,
+        workspace=workspace_id,
         dax_string="""
     select [ID] AS [TableID], [Name] AS [TableName] from $system.tmschema_tables
     """,

diff --git a/src/sempy_labs/_list_functions.py b/src/sempy_labs/_list_functions.py
@@ -7,23 +7,25 @@
     pagination,
     resolve_item_type,
     format_dax_object_name,
+    resolve_dataset_name_and_id,
 )
 import pandas as pd
 from typing import Optional
 import sempy_labs._icons as icons
 from sempy.fabric.exceptions import FabricHTTPException
+from uuid import UUID
 
 
 def get_object_level_security(
-    dataset: str, workspace: Optional[str] = None
+    dataset: str | UUID, workspace: Optional[str] = None
 ) -> pd.DataFrame:
     """
     Shows the object level security for the semantic model.
 
     Parameters
     ----------
-    dataset : str
-        Name of the semantic model.
+    dataset : str | UUID
+        Name or ID of the semantic model.
     workspace : str, default=None
         The Fabric workspace name.
         Defaults to None which resolves to the workspace of the attached lakehouse
@@ -37,12 +39,13 @@ def get_object_level_security(
 
     from sempy_labs.tom import connect_semantic_model
 
-    workspace = fabric.resolve_workspace_name(workspace)
+    (workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace)
+    (dataset_name, dataset_id) = resolve_dataset_name_and_id(dataset, workspace_id)
 
     df = pd.DataFrame(columns=["Role Name", "Object Type", "Table Name", "Object Name"])
 
     with connect_semantic_model(
-        dataset=dataset, readonly=True, workspace=workspace
+        dataset=dataset_id, readonly=True, workspace=workspace_id
     ) as tom:
 
         for r in tom.model.Roles:
@@ -82,15 +85,15 @@ def get_object_level_security(
 
 
 def list_tables(
-    dataset: str, workspace: Optional[str] = None, extended: bool = False
+    dataset: str | UUID, workspace: Optional[str] = None, extended: bool = False
 ) -> pd.DataFrame:
     """
     Shows a semantic model's tables and their properties.
 
     Parameters
     ----------
-    dataset : str
-        Name of the semantic model.
+    dataset : str | UUID
+        Name or ID of the semantic model.
     workspace : str, default=None
         The Fabric workspace name.
         Defaults to None which resolves to the workspace of the attached lakehouse
@@ -106,7 +109,8 @@ def list_tables(
 
     from sempy_labs.tom import connect_semantic_model
 
-    workspace = fabric.resolve_workspace_name(workspace)
+    (workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace)
+    (dataset_name, dataset_id) = resolve_dataset_name_and_id(dataset, workspace_id)
 
     df = pd.DataFrame(
         columns=[
@@ -121,20 +125,20 @@ def list_tables(
     )
 
     with connect_semantic_model(
-        dataset=dataset, workspace=workspace, readonly=True
+        dataset=dataset_id, workspace=workspace_id, readonly=True
     ) as tom:
         if extended:
             dict_df = fabric.evaluate_dax(
-                dataset=dataset,
-                workspace=workspace,
+                dataset=dataset_id,
+                workspace=workspace_id,
                 dax_string="""
                 EVALUATE SELECTCOLUMNS(FILTER(INFO.STORAGETABLECOLUMNS(), [COLUMN_TYPE] = "BASIC_DATA"),[DIMENSION_NAME],[DICTIONARY_SIZE])
                 """,
             )
             dict_sum = dict_df.groupby("[DIMENSION_NAME]")["[DICTIONARY_SIZE]"].sum()
             data = fabric.evaluate_dax(
-                dataset=dataset,
-                workspace=workspace,
+                dataset=dataset_id,
+                workspace=workspace_id,
                 dax_string="""EVALUATE SELECTCOLUMNS(INFO.STORAGETABLECOLUMNSEGMENTS(),[TABLE_ID],[DIMENSION_NAME],[USED_SIZE])""",
             )
             data_sum = (
@@ -162,8 +166,8 @@ def list_tables(
                 .sum()
             )
             rc = fabric.evaluate_dax(
-                dataset=dataset,
-                workspace=workspace,
+                dataset=dataset_id,
+                workspace=workspace_id,
                 dax_string="""
                 SELECT [DIMENSION_NAME],[ROWS_COUNT] FROM $SYSTEM.DISCOVER_STORAGE_TABLES
                 WHERE RIGHT ( LEFT ( TABLE_ID, 2 ), 1 ) <> '$'
@@ -850,15 +854,15 @@ def update_item(
 
 
 def list_relationships(
-    dataset: str, workspace: Optional[str] = None, extended: bool = False
+    dataset: str | UUID, workspace: Optional[str] = None, extended: bool = False
 ) -> pd.DataFrame:
     """
     Shows a semantic model's relationships and their properties.
 
     Parameters
     ----------
-    dataset: str
-        Name of the semantic model.
+    dataset: str | UUID
+        Name or UUID of the semantic model.
     workspace : str, default=None
         The Fabric workspace name.
         Defaults to None which resolves to the workspace of the attached lakehouse
@@ -872,17 +876,18 @@ def list_relationships(
         A pandas dataframe showing the object level security for the semantic model.
     """
 
-    workspace = fabric.resolve_workspace_name(workspace)
+    (workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace)
+    (dataset_name, dataset_id) = resolve_dataset_name_and_id(dataset, workspace_id)
 
-    dfR = fabric.list_relationships(dataset=dataset, workspace=workspace)
+    dfR = fabric.list_relationships(dataset=dataset_id, workspace=workspace_id)
     dfR["From Object"] = format_dax_object_name(dfR["From Table"], dfR["From Column"])
     dfR["To Object"] = format_dax_object_name(dfR["To Table"], dfR["To Column"])
 
     if extended:
         # Used to map the Relationship IDs
         rel = fabric.evaluate_dax(
-            dataset=dataset,
-            workspace=workspace,
+            dataset=dataset_id,
+            workspace=workspace_id,
             dax_string="""
                 SELECT
                 [ID] AS [RelationshipID]
@@ -893,8 +898,8 @@ def list_relationships(
 
         # USED_SIZE shows the Relationship Size where TABLE_ID starts with R$
         cs = fabric.evaluate_dax(
-            dataset=dataset,
-            workspace=workspace,
+            dataset=dataset_id,
+            workspace=workspace_id,
             dax_string="""
                 SELECT
                 [TABLE_ID]

diff --git a/src/sempy_labs/_model_bpa.py b/src/sempy_labs/_model_bpa.py
@@ -10,9 +10,10 @@
     create_relationship_name,
     save_as_delta_table,
     resolve_workspace_capacity,
-    resolve_dataset_id,
+    resolve_dataset_name_and_id,
     get_language_codes,
     _get_max_run_id,
+    resolve_workspace_name_and_id,
 )
 from sempy_labs.lakehouse import get_lakehouse_tables, lakehouse_attached
 from sempy_labs.tom import connect_semantic_model
@@ -23,11 +24,12 @@
 from pyspark.sql.functions import col, flatten
 from pyspark.sql.types import StructType, StructField, StringType
 import os
+from uuid import UUID
 
 
 @log
 def run_model_bpa(
-    dataset: str,
+    dataset: str | UUID,
     rules: Optional[pd.DataFrame] = None,
     workspace: Optional[str] = None,
     export: bool = False,
@@ -41,8 +43,8 @@ def run_model_bpa(
 
     Parameters
     ----------
-    dataset : str
-        Name of the semantic model.
+    dataset : str | UUID
+        Name or ID of the semantic model.
     rules : pandas.DataFrame, default=None
         A pandas dataframe containing rules to be evaluated.
     workspace : str, default=None
@@ -105,15 +107,18 @@ def map_language(language, language_list):
         if language is not None:
             language = map_language(language, language_list)
 
-    workspace = fabric.resolve_workspace_name(workspace)
+    (workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace)
+    (dataset_name, dataset_id) = resolve_dataset_name_and_id(
+        dataset, workspace=workspace_id
+    )
 
     if language is not None and language not in language_list:
         print(
             f"{icons.yellow_dot} The '{language}' language code is not in our predefined language list. Please file an issue and let us know which language code you are using: https://github.com/microsoft/semantic-link-labs/issues/new?assignees=&labels=&projects=&template=bug_report.md&title=."
         )
 
     with connect_semantic_model(
-        dataset=dataset, workspace=workspace, readonly=True
+        dataset=dataset_id, workspace=workspace_id, readonly=True
     ) as tom:
 
         if extended:
@@ -122,7 +127,7 @@ def map_language(language, language_list):
         # Do not run BPA for models with no tables
         if tom.model.Tables.Count == 0:
             print(
-                f"{icons.warning} The '{dataset}' semantic model within the '{workspace}' workspace has no tables and therefore there are no valid BPA results."
+                f"{icons.warning} The '{dataset_name}' semantic model within the '{workspace_name}' workspace has no tables and therefore there are no valid BPA results."
             )
             finalDF = pd.DataFrame(
                 columns=[
@@ -136,7 +141,9 @@ def map_language(language, language_list):
                 ]
             )
         else:
-            dep = get_model_calc_dependencies(dataset=dataset, workspace=workspace)
+            dep = get_model_calc_dependencies(
+                dataset=dataset_id, workspace=workspace_id
+            )
 
             def translate_using_po(rule_file):
                 current_dir = os.path.dirname(os.path.abspath(__file__))
@@ -382,20 +389,19 @@ def translate_using_spark(rule_file):
             runId = max_run_id + 1
 
         now = datetime.datetime.now()
-        dfD = fabric.list_datasets(workspace=workspace, mode="rest")
-        dfD_filt = dfD[dfD["Dataset Name"] == dataset]
+        dfD = fabric.list_datasets(workspace=workspace_id, mode="rest")
+        dfD_filt = dfD[dfD["Dataset Id"] == dataset_id]
         configured_by = dfD_filt["Configured By"].iloc[0]
-        capacity_id, capacity_name = resolve_workspace_capacity(workspace=workspace)
+        capacity_id, capacity_name = resolve_workspace_capacity(workspace=workspace_id)
         dfExport["Capacity Name"] = capacity_name
         dfExport["Capacity Id"] = capacity_id
-        dfExport["Workspace Name"] = workspace
-        dfExport["Workspace Id"] = fabric.resolve_workspace_id(workspace)
-        dfExport["Dataset Name"] = dataset
-        dfExport["Dataset Id"] = resolve_dataset_id(dataset, workspace)
+        dfExport["Workspace Name"] = workspace_name
+        dfExport["Workspace Id"] = workspace_id
+        dfExport["Dataset Name"] = dataset_name
+        dfExport["Dataset Id"] = dataset_id
         dfExport["Configured By"] = configured_by
         dfExport["Timestamp"] = now
         dfExport["RunId"] = runId
-        dfExport["Configured By"] = configured_by
         dfExport["RunId"] = dfExport["RunId"].astype("int")
 
         dfExport = dfExport[list(icons.bpa_schema.keys())]

diff --git a/src/sempy_labs/_model_bpa_bulk.py b/src/sempy_labs/_model_bpa_bulk.py
@@ -119,16 +119,16 @@ def run_model_bpa_bulk(
             dfD_filt = dfD[~dfD["Dataset Name"].isin(skip_models)]
 
             if len(dfD_filt) > 0:
-                for i2, r2 in dfD_filt.iterrows():
+                for _, r2 in dfD_filt.iterrows():
+                    dataset_id = r2["Dataset Id"]
                     dataset_name = r2["Dataset Name"]
                     config_by = r2["Configured By"]
-                    dataset_id = r2["Dataset Id"]
                     print(
                         f"{icons.in_progress} Collecting Model BPA stats for the '{dataset_name}' semantic model within the '{wksp}' workspace."
                     )
                     try:
                         bpa_df = run_model_bpa(
-                            dataset=dataset_name,
+                            dataset=dataset_id,
                             workspace=wksp,
                             language=language,
                             return_dataframe=True,