Skip to content

Commit

Permalink
Merge branch 'microsoft:main' into cross_region
Browse files Browse the repository at this point in the history
  • Loading branch information
MitchSS authored Dec 12, 2024
2 parents 05f3519 + b985f9f commit dd8c55a
Show file tree
Hide file tree
Showing 11 changed files with 206 additions and 136 deletions.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ Check out the video below for an introduction to Semantic Link, Semantic Link La
* [Dynamically generate a Direct Lake semantic model](https://semantic-link-labs.readthedocs.io/en/stable/sempy_labs.directlake.html#sempy_labs.directlake.generate_direct_lake_semantic_model)
* [Check why a Direct Lake semantic model would fallback to DirectQuery](https://semantic-link-labs.readthedocs.io/en/stable/sempy_labs.directlake.html#sempy_labs.directlake.check_fallback_reason)
* [View a measure dependency tree](https://semantic-link-labs.readthedocs.io/en/stable/sempy_labs.html#sempy_labs.measure_dependency_tree)
* [View unique columns touched in a single (or multiple) DAX query(ies)](https://semantic-link-labs.readthedocs.io/en/stable/sempy_labs.html#sempy_labs.get_dax_query_dependencies)
* Reports
* [Report Best Practice Analyzer (BPA)](https://semantic-link-labs.readthedocs.io/en/stable/sempy_labs.report.html#sempy_labs.report.run_report_bpa)
* [View report metadata](https://github.com/microsoft/semantic-link-labs/blob/main/notebooks/Report%20Analysis.ipynb)
Expand Down
53 changes: 36 additions & 17 deletions src/sempy_labs/_helper_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,14 +160,34 @@ def resolve_report_name(report_id: UUID, workspace: Optional[str] = None) -> str
return obj


def resolve_dataset_id(dataset: str, workspace: Optional[str] = None) -> UUID:
def resolve_dataset_name_and_id(
dataset: str | UUID, workspace: Optional[str] = None
) -> Tuple[str, UUID]:

(workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace)

if _is_valid_uuid(dataset):
dataset_id = dataset
dataset_name = fabric.resolve_item_name(
item_id=dataset_id, type="SemanticModel", workspace=workspace_id
)
else:
dataset_name = dataset
dataset_id = fabric.resolve_item_id(
item_name=dataset, type="SemanticModel", workspace=workspace_id
)

return dataset_name, dataset_id


def resolve_dataset_id(dataset: str | UUID, workspace: Optional[str] = None) -> UUID:
"""
Obtains the ID of the semantic model.
Parameters
----------
dataset : str
The name of the semantic model.
dataset : str | UUID
The name or ID of the semantic model.
workspace : str, default=None
The Fabric workspace name.
Defaults to None which resolves to the workspace of the attached lakehouse
Expand All @@ -179,15 +199,14 @@ def resolve_dataset_id(dataset: str, workspace: Optional[str] = None) -> UUID:
The ID of the semantic model.
"""

if workspace is None:
workspace_id = fabric.get_workspace_id()
workspace = fabric.resolve_workspace_name(workspace_id)

obj = fabric.resolve_item_id(
item_name=dataset, type="SemanticModel", workspace=workspace
)
if _is_valid_uuid(dataset):
dataset_id = dataset
else:
dataset_id = fabric.resolve_item_id(
item_name=dataset, type="SemanticModel", workspace=workspace
)

return obj
return dataset_id


def resolve_dataset_name(dataset_id: UUID, workspace: Optional[str] = None) -> str:
Expand Down Expand Up @@ -1167,20 +1186,20 @@ def _make_list_unique(my_list):

def _get_partition_map(dataset: str, workspace: Optional[str] = None) -> pd.DataFrame:

if workspace is None:
workspace = fabric.resolve_workspace_name()
(workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace)
(dataset_name, dataset_id) = resolve_dataset_name_and_id(dataset, workspace_id)

partitions = fabric.evaluate_dax(
dataset=dataset,
workspace=workspace,
dataset=dataset_id,
workspace=workspace_id,
dax_string="""
select [ID] AS [PartitionID], [TableID], [Name] AS [PartitionName] from $system.tmschema_partitions
""",
)

tables = fabric.evaluate_dax(
dataset=dataset,
workspace=workspace,
dataset=dataset_id,
workspace=workspace_id,
dax_string="""
select [ID] AS [TableID], [Name] AS [TableName] from $system.tmschema_tables
""",
Expand Down
55 changes: 30 additions & 25 deletions src/sempy_labs/_list_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,23 +7,25 @@
pagination,
resolve_item_type,
format_dax_object_name,
resolve_dataset_name_and_id,
)
import pandas as pd
from typing import Optional
import sempy_labs._icons as icons
from sempy.fabric.exceptions import FabricHTTPException
from uuid import UUID


def get_object_level_security(
dataset: str, workspace: Optional[str] = None
dataset: str | UUID, workspace: Optional[str] = None
) -> pd.DataFrame:
"""
Shows the object level security for the semantic model.
Parameters
----------
dataset : str
Name of the semantic model.
dataset : str | UUID
Name or ID of the semantic model.
workspace : str, default=None
The Fabric workspace name.
Defaults to None which resolves to the workspace of the attached lakehouse
Expand All @@ -37,12 +39,13 @@ def get_object_level_security(

from sempy_labs.tom import connect_semantic_model

workspace = fabric.resolve_workspace_name(workspace)
(workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace)
(dataset_name, dataset_id) = resolve_dataset_name_and_id(dataset, workspace_id)

df = pd.DataFrame(columns=["Role Name", "Object Type", "Table Name", "Object Name"])

with connect_semantic_model(
dataset=dataset, readonly=True, workspace=workspace
dataset=dataset_id, readonly=True, workspace=workspace_id
) as tom:

for r in tom.model.Roles:
Expand Down Expand Up @@ -82,15 +85,15 @@ def get_object_level_security(


def list_tables(
dataset: str, workspace: Optional[str] = None, extended: bool = False
dataset: str | UUID, workspace: Optional[str] = None, extended: bool = False
) -> pd.DataFrame:
"""
Shows a semantic model's tables and their properties.
Parameters
----------
dataset : str
Name of the semantic model.
dataset : str | UUID
Name or ID of the semantic model.
workspace : str, default=None
The Fabric workspace name.
Defaults to None which resolves to the workspace of the attached lakehouse
Expand All @@ -106,7 +109,8 @@ def list_tables(

from sempy_labs.tom import connect_semantic_model

workspace = fabric.resolve_workspace_name(workspace)
(workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace)
(dataset_name, dataset_id) = resolve_dataset_name_and_id(dataset, workspace_id)

df = pd.DataFrame(
columns=[
Expand All @@ -121,20 +125,20 @@ def list_tables(
)

with connect_semantic_model(
dataset=dataset, workspace=workspace, readonly=True
dataset=dataset_id, workspace=workspace_id, readonly=True
) as tom:
if extended:
dict_df = fabric.evaluate_dax(
dataset=dataset,
workspace=workspace,
dataset=dataset_id,
workspace=workspace_id,
dax_string="""
EVALUATE SELECTCOLUMNS(FILTER(INFO.STORAGETABLECOLUMNS(), [COLUMN_TYPE] = "BASIC_DATA"),[DIMENSION_NAME],[DICTIONARY_SIZE])
""",
)
dict_sum = dict_df.groupby("[DIMENSION_NAME]")["[DICTIONARY_SIZE]"].sum()
data = fabric.evaluate_dax(
dataset=dataset,
workspace=workspace,
dataset=dataset_id,
workspace=workspace_id,
dax_string="""EVALUATE SELECTCOLUMNS(INFO.STORAGETABLECOLUMNSEGMENTS(),[TABLE_ID],[DIMENSION_NAME],[USED_SIZE])""",
)
data_sum = (
Expand Down Expand Up @@ -162,8 +166,8 @@ def list_tables(
.sum()
)
rc = fabric.evaluate_dax(
dataset=dataset,
workspace=workspace,
dataset=dataset_id,
workspace=workspace_id,
dax_string="""
SELECT [DIMENSION_NAME],[ROWS_COUNT] FROM $SYSTEM.DISCOVER_STORAGE_TABLES
WHERE RIGHT ( LEFT ( TABLE_ID, 2 ), 1 ) <> '$'
Expand Down Expand Up @@ -850,15 +854,15 @@ def update_item(


def list_relationships(
dataset: str, workspace: Optional[str] = None, extended: bool = False
dataset: str | UUID, workspace: Optional[str] = None, extended: bool = False
) -> pd.DataFrame:
"""
Shows a semantic model's relationships and their properties.
Parameters
----------
dataset: str
Name of the semantic model.
dataset: str | UUID
Name or UUID of the semantic model.
workspace : str, default=None
The Fabric workspace name.
Defaults to None which resolves to the workspace of the attached lakehouse
Expand All @@ -872,17 +876,18 @@ def list_relationships(
A pandas dataframe showing the object level security for the semantic model.
"""

workspace = fabric.resolve_workspace_name(workspace)
(workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace)
(dataset_name, dataset_id) = resolve_dataset_name_and_id(dataset, workspace_id)

dfR = fabric.list_relationships(dataset=dataset, workspace=workspace)
dfR = fabric.list_relationships(dataset=dataset_id, workspace=workspace_id)
dfR["From Object"] = format_dax_object_name(dfR["From Table"], dfR["From Column"])
dfR["To Object"] = format_dax_object_name(dfR["To Table"], dfR["To Column"])

if extended:
# Used to map the Relationship IDs
rel = fabric.evaluate_dax(
dataset=dataset,
workspace=workspace,
dataset=dataset_id,
workspace=workspace_id,
dax_string="""
SELECT
[ID] AS [RelationshipID]
Expand All @@ -893,8 +898,8 @@ def list_relationships(

# USED_SIZE shows the Relationship Size where TABLE_ID starts with R$
cs = fabric.evaluate_dax(
dataset=dataset,
workspace=workspace,
dataset=dataset_id,
workspace=workspace_id,
dax_string="""
SELECT
[TABLE_ID]
Expand Down
38 changes: 22 additions & 16 deletions src/sempy_labs/_model_bpa.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,10 @@
create_relationship_name,
save_as_delta_table,
resolve_workspace_capacity,
resolve_dataset_id,
resolve_dataset_name_and_id,
get_language_codes,
_get_max_run_id,
resolve_workspace_name_and_id,
)
from sempy_labs.lakehouse import get_lakehouse_tables, lakehouse_attached
from sempy_labs.tom import connect_semantic_model
Expand All @@ -23,11 +24,12 @@
from pyspark.sql.functions import col, flatten
from pyspark.sql.types import StructType, StructField, StringType
import os
from uuid import UUID


@log
def run_model_bpa(
dataset: str,
dataset: str | UUID,
rules: Optional[pd.DataFrame] = None,
workspace: Optional[str] = None,
export: bool = False,
Expand All @@ -41,8 +43,8 @@ def run_model_bpa(
Parameters
----------
dataset : str
Name of the semantic model.
dataset : str | UUID
Name or ID of the semantic model.
rules : pandas.DataFrame, default=None
A pandas dataframe containing rules to be evaluated.
workspace : str, default=None
Expand Down Expand Up @@ -105,15 +107,18 @@ def map_language(language, language_list):
if language is not None:
language = map_language(language, language_list)

workspace = fabric.resolve_workspace_name(workspace)
(workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace)
(dataset_name, dataset_id) = resolve_dataset_name_and_id(
dataset, workspace=workspace_id
)

if language is not None and language not in language_list:
print(
f"{icons.yellow_dot} The '{language}' language code is not in our predefined language list. Please file an issue and let us know which language code you are using: https://github.com/microsoft/semantic-link-labs/issues/new?assignees=&labels=&projects=&template=bug_report.md&title=."
)

with connect_semantic_model(
dataset=dataset, workspace=workspace, readonly=True
dataset=dataset_id, workspace=workspace_id, readonly=True
) as tom:

if extended:
Expand All @@ -122,7 +127,7 @@ def map_language(language, language_list):
# Do not run BPA for models with no tables
if tom.model.Tables.Count == 0:
print(
f"{icons.warning} The '{dataset}' semantic model within the '{workspace}' workspace has no tables and therefore there are no valid BPA results."
f"{icons.warning} The '{dataset_name}' semantic model within the '{workspace_name}' workspace has no tables and therefore there are no valid BPA results."
)
finalDF = pd.DataFrame(
columns=[
Expand All @@ -136,7 +141,9 @@ def map_language(language, language_list):
]
)
else:
dep = get_model_calc_dependencies(dataset=dataset, workspace=workspace)
dep = get_model_calc_dependencies(
dataset=dataset_id, workspace=workspace_id
)

def translate_using_po(rule_file):
current_dir = os.path.dirname(os.path.abspath(__file__))
Expand Down Expand Up @@ -382,20 +389,19 @@ def translate_using_spark(rule_file):
runId = max_run_id + 1

now = datetime.datetime.now()
dfD = fabric.list_datasets(workspace=workspace, mode="rest")
dfD_filt = dfD[dfD["Dataset Name"] == dataset]
dfD = fabric.list_datasets(workspace=workspace_id, mode="rest")
dfD_filt = dfD[dfD["Dataset Id"] == dataset_id]
configured_by = dfD_filt["Configured By"].iloc[0]
capacity_id, capacity_name = resolve_workspace_capacity(workspace=workspace)
capacity_id, capacity_name = resolve_workspace_capacity(workspace=workspace_id)
dfExport["Capacity Name"] = capacity_name
dfExport["Capacity Id"] = capacity_id
dfExport["Workspace Name"] = workspace
dfExport["Workspace Id"] = fabric.resolve_workspace_id(workspace)
dfExport["Dataset Name"] = dataset
dfExport["Dataset Id"] = resolve_dataset_id(dataset, workspace)
dfExport["Workspace Name"] = workspace_name
dfExport["Workspace Id"] = workspace_id
dfExport["Dataset Name"] = dataset_name
dfExport["Dataset Id"] = dataset_id
dfExport["Configured By"] = configured_by
dfExport["Timestamp"] = now
dfExport["RunId"] = runId
dfExport["Configured By"] = configured_by
dfExport["RunId"] = dfExport["RunId"].astype("int")

dfExport = dfExport[list(icons.bpa_schema.keys())]
Expand Down
6 changes: 3 additions & 3 deletions src/sempy_labs/_model_bpa_bulk.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,16 +119,16 @@ def run_model_bpa_bulk(
dfD_filt = dfD[~dfD["Dataset Name"].isin(skip_models)]

if len(dfD_filt) > 0:
for i2, r2 in dfD_filt.iterrows():
for _, r2 in dfD_filt.iterrows():
dataset_id = r2["Dataset Id"]
dataset_name = r2["Dataset Name"]
config_by = r2["Configured By"]
dataset_id = r2["Dataset Id"]
print(
f"{icons.in_progress} Collecting Model BPA stats for the '{dataset_name}' semantic model within the '{wksp}' workspace."
)
try:
bpa_df = run_model_bpa(
dataset=dataset_name,
dataset=dataset_id,
workspace=wksp,
language=language,
return_dataframe=True,
Expand Down
Loading

0 comments on commit dd8c55a

Please sign in to comment.