Merge pull request #5 from microsoft/m-kovalsky/functionparametersdes…

…criptions M kovalsky/functionparametersdescriptions
microsoft · Jun 6, 2024 · f7764ad · f7764ad
2 parents e4c60dd + 41941ed
commit f7764ad
Show file tree

Hide file tree

Showing 48 changed files with 3,423 additions and 785 deletions.
diff --git a/.vscode/settings.json b/.vscode/settings.json
@@ -0,0 +1,7 @@
+{
+    "python.testing.pytestArgs": [
+        "tests"
+    ],
+    "python.testing.unittestEnabled": false,
+    "python.testing.pytestEnabled": true
+}
diff --git a/sempy_labs/AI.py b/sempy_labs/AI.py
@@ -4,8 +4,9 @@
 from synapse.ml.services.openai import OpenAICompletion
 from pyspark.sql.functions import col
 from pyspark.sql import SparkSession
+from typing import List, Optional, Union
 
-def optimize_semantic_model(dataset: str, workspace: str | None = None):
+def optimize_semantic_model(dataset: str, workspace: Optional[str] = None):
 
     from .ModelBPA import run_model_bpa
     from .Fallback import check_fallback_reason
@@ -43,7 +44,7 @@ def optimize_semantic_model(dataset: str, workspace: str | None = None):
             print(f"The '{rule}' rule has been followed.")
 
 
-def generate_measure_descriptions(dataset: str, measures: str | list, gpt_model: str = 'gpt-35-turbo', workspace: str | None = None):
+def generate_measure_descriptions(dataset: str, measures: Union[str,List[str]], gpt_model: Optional[str] = 'gpt-35-turbo', workspace: Optional[str] = None):
 
     service_name = 'synapseml-openai'
 
@@ -96,12 +97,10 @@ def generate_measure_descriptions(dataset: str, measures: str | list, gpt_model:
 
     #m.SaveChanges()
 
-def generate_aggs(dataset: str, table_name: str, columns: str | list, workspace: str | None = None, lakehouse_workspace: str | None = None):
+def generate_aggs(dataset: str, table_name: str, columns: Union[str,List[str]], workspace: Optional[str] = None, lakehouse_workspace: Optional[str] = None):
 
-    from .HelperFunctions import get_direct_lake_sql_endpoint
-    from .HelperFunctions import resolve_lakehouse_id
-    from .HelperFunctions import create_abfss_path
-    from .HelperFunctions import format_dax_object_name
+    from .HelperFunctions import get_direct_lake_sql_endpoint, create_abfss_path, format_dax_object_name, resolve_lakehouse_id
+
     sempy.fabric._client._utils._init_analysis_services()
     import Microsoft.AnalysisServices.Tabular as TOM
     import System
@@ -173,8 +172,8 @@ def generate_aggs(dataset: str, table_name: str, columns: str | list, workspace:
 
     sqlEndpointId = get_direct_lake_sql_endpoint(dataset = dataset, workspace = workspace)
 
-    dfI = fabric.list_items(workspace = lakehouse_workspace)
-    dfI_filt = dfI[(dfI['Id'] == sqlEndpointId) & (dfI['Type'] == 'SQLEndpoint')]
+    dfI = fabric.list_items(workspace = lakehouse_workspace, type = 'SQLEndpoint')
+    dfI_filt = dfI[(dfI['Id'] == sqlEndpointId)]
 
     if len(dfI_filt) == 0:
         print(f"The lakehouse (SQL Endpoint) used by the '{dataset}' semantic model does not reside in the '{lakehouse_workspace}' workspace. Please update the lakehouse_workspace parameter.")

diff --git a/sempy_labs/ClearCache.py b/sempy_labs/ClearCache.py
@@ -1,18 +1,30 @@
 import sempy
 import sempy.fabric as fabric
 from .HelperFunctions import resolve_dataset_id
+from typing import List, Optional, Union
 
 green_dot = '\U0001F7E2'
 yellow_dot = '\U0001F7E1'
 red_dot = '\U0001F534'
 in_progress = '⌛'
 
-def clear_cache(dataset: str, workspace: str | None = None):
+def clear_cache(dataset: str, workspace: Optional[str] = None):
 
     """
+    Clears the cache of a semantic model.
+
+    Parameters
+    ----------
+    dataset : str
+        Name of the semantic model.
+    workspace : str, default=None
+        The Fabric workspace name.
+        Defaults to None which resolves to the workspace of the attached lakehouse
+        or if no lakehouse attached, resolves to the workspace of the notebook.
+
+    Returns
+    -------
     
-    Documentation is available here: https://github.com/microsoft/semantic-link-labs?tab=readme-ov-file#clear_cache
-
     """
 
     if workspace == None:

diff --git a/sempy_labs/Connections.py b/sempy_labs/Connections.py
@@ -1,6 +1,7 @@
 import sempy
 import sempy.fabric as fabric
 import pandas as pd
+from typing import List, Optional, Union
 
 def create_connection_cloud(name: str, server_name: str, database_name: str, user_name: str, password: str, privacy_level: str):
 

diff --git a/sempy_labs/CreateBlankSemanticModel.py b/sempy_labs/CreateBlankSemanticModel.py
@@ -1,17 +1,32 @@
 import sempy
 import sempy.fabric as fabric
+from typing import List, Optional, Union
 
 green_dot = '\U0001F7E2'
 yellow_dot = '\U0001F7E1'
 red_dot = '\U0001F534'
 in_progress = '⌛'
 
-def create_blank_semantic_model(dataset: str, compatibility_level: int = 1605, workspace: str | None = None):
-
+def create_blank_semantic_model(dataset: str, compatibility_level: Optional[int] = 1605, workspace: Optional[str] = None):
+  
   """
+    Creates a new blank semantic model (no tables/columns etc.).
+
+    Parameters
+    ----------
+    dataset : str
+        Name of the semantic model.
+    compatibility_level : int
+        The compatibility level of the semantic model.
+        Defaults to 1605.
+    workspace : str, default=None
+        The Fabric workspace name.
+        Defaults to None which resolves to the workspace of the attached lakehouse
+        or if no lakehouse attached, resolves to the workspace of the notebook.
+    
+    Returns
+    -------
     
-    Documentation is available here: https://github.com/microsoft/semantic-link-labs?tab=readme-ov-file#create_blank_semantic_model
-
     """
 
   if workspace == None:

diff --git a/sempy_labs/CreatePQTFile.py b/sempy_labs/CreatePQTFile.py
@@ -6,19 +6,34 @@
 from .ListFunctions import list_tables
 from .Lakehouse import lakehouse_attached
 from sempy._utils._log import log
+from typing import List, Optional, Union
 
 green_dot = '\U0001F7E2'
 yellow_dot = '\U0001F7E1'
 red_dot = '\U0001F534'
 in_progress = '⌛'
 
 @log
-def create_pqt_file(dataset: str, workspace: str | None = None, file_name: str | None = None):
+def create_pqt_file(dataset: str, workspace: Optional[str] = None, file_name: Optional[str] = None):
 
     """
+    Dynamically generates a [Power Query Template](https://learn.microsoft.com/power-query/power-query-template) file based on the semantic model. The .pqt file is saved within the Files section of your lakehouse.
+
+    Parameters
+    ----------
+    dataset : str
+        Name of the semantic model.
+    workspace : str, default=None
+        The Fabric workspace name.
+        Defaults to None which resolves to the workspace of the attached lakehouse
+        or if no lakehouse attached, resolves to the workspace of the notebook.
+    file_name : str, default=None
+        The name of the Power Query Template file to be generated.
+        Defaults to None which resolves to 'PowerQueryTemplate'.
+
+    Returns
+    -------
     
-    Documentation is available here: https://github.com/microsoft/semantic-link-labs?tab=readme-ov-file#create_pqt_file
-
     """
 
     if file_name is None:

diff --git a/sempy_labs/DataCoverageDef.py b/sempy_labs/DataCoverageDef.py
diff --git a/sempy_labs/DirectLakeSchemaCompare.py b/sempy_labs/DirectLakeSchemaCompare.py
@@ -4,13 +4,32 @@
 from .HelperFunctions import format_dax_object_name, resolve_lakehouse_name, get_direct_lake_sql_endpoint
 from .GetLakehouseColumns import get_lakehouse_columns
 from .ListFunctions import list_tables
+from typing import List, Optional, Union
 
-def direct_lake_schema_compare(dataset: str, workspace: str | None = None, lakehouse: str | None = None, lakehouse_workspace: str | None = None):
+def direct_lake_schema_compare(dataset: str, workspace: Optional[str] = None, lakehouse: Optional[str] = None, lakehouse_workspace: Optional[str] = None):
 
     """
-    
-    Documentation is available here: https://github.com/microsoft/semantic-link-labs?tab=readme-ov-file#direct_lake_schema_compare
+    Checks that all the tables in a Direct Lake semantic model map to tables in their corresponding lakehouse and that the columns in each table exist.
+
+    Parameters
+    ----------
+    dataset : str
+        Name of the semantic model.
+    workspace : str, default=None
+        The Fabric workspace name.
+        Defaults to None which resolves to the workspace of the attached lakehouse
+        or if no lakehouse attached, resolves to the workspace of the notebook.
+    lakehouse : str, default=None
+        The Fabric lakehouse used by the Direct Lake semantic model.
+        Defaults to None which resolves to the lakehouse attached to the notebook.
+    lakehouse_workspace : str, default=None
+        The Fabric workspace used by the lakehouse.
+        Defaults to None which resolves to the workspace of the attached lakehouse
+        or if no lakehouse attached, resolves to the workspace of the notebook.
 
+    Returns
+    -------
+    
     """
 
     if workspace == None:
@@ -26,8 +45,8 @@ def direct_lake_schema_compare(dataset: str, workspace: str | None = None, lakeh
 
     dfP = fabric.list_partitions(dataset = dataset, workspace = workspace)
     sqlEndpointId = get_direct_lake_sql_endpoint(dataset, workspace)
-    dfI = fabric.list_items(workspace = lakehouse_workspace)
-    dfI_filt = dfI[(dfI['Type'] == 'SQLEndpoint') & (dfI['Id'] == sqlEndpointId)]
+    dfI = fabric.list_items(workspace = lakehouse_workspace, type = 'SQLEndpoint')
+    dfI_filt = dfI[(dfI['Id'] == sqlEndpointId)]
 
     if len(dfI_filt) == 0:
         print(f"The SQL Endpoint in the '{dataset}' semantic model in the '{workspace} workspace does not point to the '{lakehouse}' lakehouse in the '{lakehouse_workspace}' workspace as specified.")

diff --git a/sempy_labs/DirectLakeSchemaSync.py b/sempy_labs/DirectLakeSchemaSync.py
@@ -3,13 +3,36 @@
 import pandas as pd
 from .GetLakehouseColumns import get_lakehouse_columns
 from .HelperFunctions import format_dax_object_name, resolve_lakehouse_name, get_direct_lake_sql_endpoint
+from typing import List, Optional, Union
+from sempy._utils._log import log
 
-def direct_lake_schema_sync(dataset: str, workspace: str | None = None, add_to_model: bool = False, lakehouse: str | None = None, lakehouse_workspace: str | None = None):
+@log
+def direct_lake_schema_sync(dataset: str, workspace: Optional[str] = None, add_to_model: Optional[bool] = False, lakehouse: Optional[str] = None, lakehouse_workspace: Optional[str] = None):
 
     """
+    Shows/adds columns which exist in the lakehouse but do not exist in the semantic model (only for tables in the semantic model).
+
+    Parameters
+    ----------
+    dataset : str
+        Name of the semantic model.
+    workspace : str, default=None
+        The Fabric workspace name.
+        Defaults to None which resolves to the workspace of the attached lakehouse
+        or if no lakehouse attached, resolves to the workspace of the notebook.
+    add_to_model : bool, default=False
+        If set to True, columns which exist in the lakehouse but do not exist in the semantic model are added to the semantic model. No new tables are added.
+    lakehouse : str, default=None
+        The Fabric lakehouse used by the Direct Lake semantic model.
+        Defaults to None which resolves to the lakehouse attached to the notebook.
+    lakehouse_workspace : str, default=None
+        The Fabric workspace used by the lakehouse.
+        Defaults to None which resolves to the workspace of the attached lakehouse
+        or if no lakehouse attached, resolves to the workspace of the notebook.
+
+    Returns
+    -------
     
-    Documentation is available here: https://github.com/microsoft/semantic-link-labs?tab=readme-ov-file#direct_lake_schema_sync
-
     """
 
     sempy.fabric._client._utils._init_analysis_services()
@@ -30,8 +53,8 @@ def direct_lake_schema_sync(dataset: str, workspace: str | None = None, add_to_m
 
     sqlEndpointId = get_direct_lake_sql_endpoint(dataset, workspace)
 
-    dfI = fabric.list_items(workspace = lakehouse_workspace)
-    dfI_filt = dfI[(dfI['Type'] == 'SQLEndpoint') & (dfI['Id'] == sqlEndpointId)]
+    dfI = fabric.list_items(workspace = lakehouse_workspace, type = 'SQLEndpoint')
+    dfI_filt = dfI[(dfI['Id'] == sqlEndpointId)]
 
     if len(dfI_filt) == 0:
         print(f"The SQL Endpoint in the '{dataset}' semantic model in the '{workspace} workspace does not point to the '{lakehouse}' lakehouse in the '{lakehouse_workspace}' workspace as specified.")

diff --git a/sempy_labs/Fallback.py b/sempy_labs/Fallback.py
@@ -1,13 +1,26 @@
 import sempy
 import sempy.fabric as fabric
 import numpy as np
+from typing import List, Optional, Union
 
-def check_fallback_reason(dataset: str, workspace: str | None = None):
+def check_fallback_reason(dataset: str, workspace: Optional[str] = None):
 
     """
-    
-    Documentation is available here: https://github.com/microsoft/semantic-link-labs?tab=readme-ov-file#check_fallback_reason
+    Shows the reason a table in a Direct Lake semantic model would fallback to DirectQuery.
+
+    Parameters
+    ----------
+    dataset : str
+        Name of the semantic model.
+    workspace : str, default=None
+        The Fabric workspace name.
+        Defaults to None which resolves to the workspace of the attached lakehouse
+        or if no lakehouse attached, resolves to the workspace of the notebook.
 
+    Returns
+    -------
+    pandas.DataFrame
+        The tables in the semantic model and their fallback reason.
     """
 
     if workspace == None: