Skip to content

Commit

Permalink
Merge branch 'm-kovalsky/reportwrapper'
Browse files Browse the repository at this point in the history
  • Loading branch information
m-kovalsky committed Sep 24, 2024
2 parents 3e893dd + d4b5b5e commit 3f03434
Show file tree
Hide file tree
Showing 14 changed files with 3,093 additions and 13 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# Semantic Link Labs

[![PyPI version](https://badge.fury.io/py/semantic-link-labs.svg)](https://badge.fury.io/py/semantic-link-labs)
[![Read The Docs](https://readthedocs.org/projects/semantic-link-labs/badge/?version=0.7.4&style=flat)](https://readthedocs.org/projects/semantic-link-labs/)
[![Read The Docs](https://readthedocs.org/projects/semantic-link-labs/badge/?version=0.8.0&style=flat)](https://readthedocs.org/projects/semantic-link-labs/)
[![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)
[![Downloads](https://static.pepy.tech/badge/semantic-link-labs)](https://pepy.tech/project/semantic-link-labs)

Expand Down
3 changes: 2 additions & 1 deletion docs/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,4 +10,5 @@ azure-storage-blob>=12.9.0
anytree
IPython
polib
azure.mgmt.resource
azure.mgmt.resource
jsonpath_ng
2 changes: 1 addition & 1 deletion docs/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
project = 'semantic-link-labs'
copyright = '2024, Microsoft and community'
author = 'Microsoft and community'
release = '0.7.4'
release = '0.8.0'

# -- General configuration ---------------------------------------------------
# https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration
Expand Down
1 change: 1 addition & 0 deletions notebooks/Report Analysis.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"cells":[{"cell_type":"markdown","id":"5c27dfd1-4fe0-4a97-92e6-ddf78889aa93","metadata":{"nteract":{"transient":{"deleting":false}}},"source":["### Install the latest .whl package\n","\n","Check [here](https://pypi.org/project/semantic-link-labs/) to see the latest version."]},{"cell_type":"code","execution_count":null,"id":"d5cae9db-cef9-48a8-a351-9c5fcc99645c","metadata":{"jupyter":{"outputs_hidden":true,"source_hidden":false},"nteract":{"transient":{"deleting":false}}},"outputs":[],"source":["%pip install semantic-link-labs"]},{"cell_type":"markdown","id":"b195eae8","metadata":{},"source":["### Import the library and set the initial parameters"]},{"cell_type":"code","execution_count":null,"id":"1344e286","metadata":{},"outputs":[],"source":["import sempy_labs as labs\n","import sempy_labs.report as rep\n","from sempy_labs.report import ReportWrapper\n","\n","# For checking reports\n","report_name = '' # Enter the report name\n","report_workspace = None # Enter the workspace in which the report exists\n","rpt = ReportWrapper(report=report_name, workspace=report_workspace)"]},{"cell_type":"markdown","id":"4b68c027","metadata":{},"source":["### Report Best Practice Analyzer (BPA)"]},{"cell_type":"code","execution_count":null,"id":"3a596d68","metadata":{},"outputs":[],"source":["rep.run_report_bpa(report=report_name, workspace=report_workspace)"]},{"cell_type":"markdown","id":"2ac3b59a","metadata":{},"source":["#### Report BPA using custom rules"]},{"cell_type":"code","execution_count":null,"id":"bf0fe645","metadata":{},"outputs":[],"source":["import pandas as pd\n","\n","rules = pd.DataFrame(\n"," [\n"," (\n"," \"Error Prevention\",\n"," \"Semantic Model\",\n"," \"Error\",\n"," \"Fix report objects which reference invalid semantic model objects\",\n"," lambda df: df[\"Valid Semantic Model Object\"] == False,\n"," \"This rule highlights visuals, report filters, page filters or visual filters which reference an invalid semantic model object (i.e Measure/Column/Hierarchy).\",\n"," \"\",\n"," ),\n"," (\n"," \"Performance\",\n"," \"Page\",\n"," \"Warning\",\n"," \"Reduce the number of visible visuals on the page\",\n"," lambda df: df[\"Visible Visual Count\"] > 15,\n"," 'Reducing the number of visable visuals on a page will lead to faster report performance. This rule flags pages with over \" + visVisuals + \" visible visuals.',\n"," ),\n"," (\n"," \"Performance\",\n"," \"Visual\",\n"," \"Warning\",\n"," \"Reduce the number of objects within visuals\",\n"," lambda df: df[\"Visual Object Count\"] > 5,\n"," \"Reducing the number of objects (i.e. measures, columns) which are used in a visual will lead to faster report performance.\",\n"," )\n"," ],\n"," columns=[\n"," \"Category\",\n"," \"Scope\",\n"," \"Severity\",\n"," \"Rule Name\",\n"," \"Expression\",\n"," \"Description\",\n"," \"URL\",\n"," ],\n",")\n","\n","rep.run_report_bpa(report=report_name, workspace=report_workspace, rules=rules)"]},{"cell_type":"markdown","id":"5a3fe6e8-b8aa-4447-812b-7931831e07fe","metadata":{"nteract":{"transient":{"deleting":false}}},"source":["### View all semantic model objects within a report"]},{"cell_type":"code","execution_count":null,"id":"9e349954","metadata":{},"outputs":[],"source":["rpt.list_semantic_model_objects()"]},{"cell_type":"markdown","id":"9efe783e","metadata":{},"source":["#### View broken report objects (if any semantic model objects used in the report do not exist within the semantic model)\n","The 'Valid Object' column indicates if the object (measure/column/hierarchy) exists within the semantic model used by the report"]},{"cell_type":"code","execution_count":null,"id":"a7480602","metadata":{},"outputs":[],"source":["rpt.list_semantic_model_objects(extended=True)"]},{"cell_type":"markdown","id":"5fab7ef7","metadata":{},"source":["#### View broken report objects across all reports tied to a semantic model"]},{"cell_type":"code","execution_count":null,"id":"34f1d9a6","metadata":{},"outputs":[],"source":["dataset_name = '' # Enter the semantic model name\n","dataset_workspace = None # Enter the workspace name in which the semantic model exists (if set to None it will use the workspace in which the notebook is running)\n","labs.list_report_semantic_model_objects(dataset=dataset_name, workspace=dataset_workspace, extended=True)"]},{"cell_type":"markdown","id":"26acd665","metadata":{},"source":["### List functions within the ReportWrapper"]},{"cell_type":"code","execution_count":null,"id":"9c2c206a","metadata":{},"outputs":[],"source":["rpt.list_custom_visuals()"]},{"cell_type":"code","execution_count":null,"id":"22647267","metadata":{},"outputs":[],"source":["rpt.list_pages()"]},{"cell_type":"code","execution_count":null,"id":"493fb104","metadata":{},"outputs":[],"source":["rpt.list_visuals()"]},{"cell_type":"code","execution_count":null,"id":"1ef9a9ac","metadata":{},"outputs":[],"source":["rpt.list_visual_objects()"]},{"cell_type":"code","execution_count":null,"id":"ad00499c","metadata":{},"outputs":[],"source":["rpt.list_report_filters()"]},{"cell_type":"code","execution_count":null,"id":"c0f02a49","metadata":{},"outputs":[],"source":["rpt.list_page_filters()"]},{"cell_type":"code","execution_count":null,"id":"4cf8d12f","metadata":{},"outputs":[],"source":["rpt.list_visual_filters()"]},{"cell_type":"code","execution_count":null,"id":"3b5e9c16","metadata":{},"outputs":[],"source":["rpt.list_report_level_measures()"]},{"cell_type":"code","execution_count":null,"id":"9f420f57","metadata":{},"outputs":[],"source":["rpt.list_visual_interactions()"]},{"cell_type":"code","execution_count":null,"id":"198afff0","metadata":{},"outputs":[],"source":["rpt.list_bookmarks()"]},{"cell_type":"markdown","id":"f3ee5962","metadata":{},"source":["### Perform actions on a report\n","Set a custom theme for a report based on a Power BI theme file"]},{"cell_type":"code","execution_count":null,"id":"ec1a7d1b","metadata":{},"outputs":[],"source":["rpt.set_theme(theme_file_path='/lakehouse/default/Files/CY23SU09.json')"]},{"cell_type":"code","execution_count":null,"id":"b1203067","metadata":{},"outputs":[],"source":["rpt.set_theme(theme_file_path='https://raw.githubusercontent.com/PowerBiDevCamp/FabricUserApiDemo/main/FabricUserApiDemo/DefinitionTemplates/Shared/Reports/StaticResources/SharedResources/BaseThemes/CY23SU08.json')"]},{"cell_type":"markdown","id":"9b230f9f","metadata":{},"source":["Set whether a Power BI report page is hidden or visible"]},{"cell_type":"code","execution_count":null,"id":"8ce57483","metadata":{},"outputs":[],"source":["rpt.set_page_visibility(page_name='Page 1', hidden=True)"]},{"cell_type":"markdown","id":"ef3e4c02","metadata":{},"source":["Set the active page (page shown upon opening the report)"]},{"cell_type":"code","execution_count":null,"id":"024d4111","metadata":{},"outputs":[],"source":["rpt.set_active_page(page_name='Page 2')"]},{"cell_type":"markdown","id":"f96511ac","metadata":{},"source":["Disable the 'show items with no data' for all visuals within a Power BI report"]},{"cell_type":"code","execution_count":null,"id":"65bf38fe","metadata":{},"outputs":[],"source":["rpt.disable_show_items_with_no_data()"]},{"cell_type":"markdown","id":"ddee3565","metadata":{},"source":["Remove all custom visuals in a Power BI report which are not used in any visuals within the report"]},{"cell_type":"code","execution_count":null,"id":"9aba1564","metadata":{},"outputs":[],"source":["rpt.remove_unnecessary_custom_visuals()"]},{"cell_type":"markdown","id":"5342423f","metadata":{},"source":["Hide all pages which are used for a tooltip or drillthrough"]},{"cell_type":"code","execution_count":null,"id":"b9045cbb","metadata":{},"outputs":[],"source":["rpt.hide_tooltip_drillthrough_pages()"]}],"metadata":{"kernel_info":{"name":"synapse_pyspark"},"kernelspec":{"display_name":"Synapse PySpark","language":"Python","name":"synapse_pyspark"},"language_info":{"name":"python"},"microsoft":{"language":"python"},"nteract":{"version":"[email protected]"},"spark_compute":{"compute_id":"/trident/default"},"synapse_widget":{"state":{},"version":"0.1"},"widgets":{}},"nbformat":4,"nbformat_minor":5}
5 changes: 3 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ name="semantic-link-labs"
authors = [
{ name = "Microsoft Corporation" },
]
version="0.7.4"
version="0.8.0"
description="Semantic Link Labs for Microsoft Fabric"
readme="README.md"
requires-python=">=3.10,<3.12"
Expand All @@ -28,6 +28,7 @@ dependencies = [
"powerbiclient",
"polib",
"azure.mgmt.resource",
"jsonpath_ng",
]

[tool.setuptools.packages.find]
Expand All @@ -45,7 +46,7 @@ test = [
Repository = "https://github.com/microsoft/semantic-link-labs.git"

[[tool.mypy.overrides]]
module = "sempy.*,Microsoft.*,System.*,anytree.*,powerbiclient.*,synapse.ml.services.*,polib.*,azure.mgmt.resource.*"
module = "sempy.*,Microsoft.*,System.*,anytree.*,powerbiclient.*,synapse.ml.services.*,polib.*,azure.mgmt.resource.*,jsonpath_ng.*"
ignore_missing_imports = true

[tool.flake8]
Expand Down
6 changes: 6 additions & 0 deletions src/sempy_labs/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,8 @@
)
from sempy_labs._list_functions import (
list_reports_using_semantic_model,
list_semantic_model_object_report_usage,
list_report_semantic_model_objects,
list_semantic_model_objects,
list_shortcuts,
get_object_level_security,
Expand Down Expand Up @@ -117,6 +119,7 @@
resolve_environment_id,
resolve_capacity_id,
resolve_warehouse_id,
resolve_dataset_from_report,
resolve_workspace_capacity,
create_abfss_path,
format_dax_object_name,
Expand Down Expand Up @@ -188,6 +191,8 @@
"list_storage_account_files",
"backup_semantic_model",
"restore_semantic_model",
"list_semantic_model_object_report_usage",
"list_report_semantic_model_objects",
"delete_custom_pool",
"clear_cache",
# create_connection_cloud,
Expand Down Expand Up @@ -303,4 +308,5 @@
"delete_premium_capacity",
"suspend_fabric_capacity",
"delete_embedded_capacity",
"resolve_dataset_from_report",
]
4 changes: 2 additions & 2 deletions src/sempy_labs/_helper_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,14 @@
import re
import json
import base64
import time
from sempy.fabric.exceptions import FabricHTTPException
import pandas as pd
from functools import wraps
import datetime
import time
from typing import Optional, Tuple, List
from uuid import UUID
import sempy_labs._icons as icons
from sempy.fabric.exceptions import FabricHTTPException
import urllib.parse
from azure.core.credentials import TokenCredential, AccessToken

Expand Down
209 changes: 209 additions & 0 deletions src/sempy_labs/_list_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -1774,3 +1774,212 @@ def list_reports_using_semantic_model(
df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True)

return df


def list_report_semantic_model_objects(
dataset: str, workspace: Optional[str] = None, extended: Optional[bool] = False
) -> pd.DataFrame:
"""
Shows a list of semantic model objects (i.e. columns, measures, hierarchies) used in all reports which feed data from
a given semantic model.
Requirement: Reports must be in the PBIR format.
Parameters
----------
dataset : str
Name of the semantic model.
workspace : str, default=None
The Fabric workspace name.
Defaults to None which resolves to the workspace of the attached lakehouse
or if no lakehouse attached, resolves to the workspace of the notebook.
extended: bool, default=False
If True, adds an extra column called 'Valid Semantic Model Object' which identifies whether the semantic model object used
in the report exists in the semantic model which feeds data to the report.
Returns
-------
pandas.DataFrame
A pandas dataframe showing a list of semantic model objects (i.e. columns, measures, hierarchies) used in all reports which feed data from
a given semantic model.
"""

from sempy_labs.report import ReportWrapper
from sempy_labs.tom import connect_semantic_model

dfRO = pd.DataFrame(
columns=[
"Report Name",
"Report Workspace Name",
"Table Name",
"Object Name",
"Object Type",
"Report Source",
"Report Source Object",
]
)

# Collect all reports which use the semantic model
dfR = list_reports_using_semantic_model(dataset=dataset, workspace=workspace)

if len(dfR) == 0:
return dfRO

for _, r in dfR.iterrows():
report_name = r["Report Name"]
report_workspace = r["Report Workspace Name"]

rpt = ReportWrapper(report=report_name, workspace=report_workspace)
# Collect all semantic model objects used in the report
dfRSO = rpt.list_semantic_model_objects()
dfRSO["Report Name"] = report_name
dfRSO["Report Workspace Name"] = report_workspace
colName = "Report Name"
dfRSO.insert(0, colName, dfRSO.pop(colName))
colName = "Report Workspace Name"
dfRSO.insert(1, colName, dfRSO.pop(colName))

dfRO = pd.concat([dfRO, dfRSO], ignore_index=True)

# Collect all semantic model objects
if extended:
with connect_semantic_model(
dataset=dataset, readonly=True, workspace=workspace
) as tom:
for index, row in dfRO.iterrows():
object_type = row["Object Type"]
if object_type == "Measure":
dfRO.at[index, "Valid Semantic Model Object"] = any(
o.Name == row["Object Name"] for o in tom.all_measures()
)
elif object_type == "Column":
dfRO.at[index, "Valid Semantic Model Object"] = any(
format_dax_object_name(c.Parent.Name, c.Name)
== format_dax_object_name(row["Table Name"], row["Object Name"])
for c in tom.all_columns()
)
elif object_type == "Hierarchy":
dfRO.at[index, "Valid Semantic Model Object"] = any(
format_dax_object_name(h.Parent.Name, h.Name)
== format_dax_object_name(row["Table Name"], row["Object Name"])
for h in tom.all_hierarchies()
)

return dfRO


def list_semantic_model_object_report_usage(
dataset: str,
workspace: Optional[str] = None,
include_dependencies: Optional[bool] = False,
extended: Optional[bool] = False,
) -> pd.DataFrame:
"""
Shows a list of semantic model objects and how many times they are referenced in all reports which rely on this semantic model.
Requirement: Reports must be in the PBIR format.
Parameters
----------
dataset : str
Name of the semantic model.
workspace : str, default=None
The Fabric workspace name.
Defaults to None which resolves to the workspace of the attached lakehouse
or if no lakehouse attached, resolves to the workspace of the notebook.
include_dependencies : bool, default=False
If True, includes measure dependencies.
extended: bool, default=False
If True, adds columns 'Total Size', 'Data Size', 'Dictionary Size', 'Hierarchy Size' based on Vertipaq statistics.
Returns
-------
pandas.DataFrame
A pandas dataframe showing a list of semantic model objects and how many times they are referenced in all reports which rely on this semantic model. By default, the dataframe
is sorted descending by 'Report Usage Count'.
"""

from sempy_labs._model_dependencies import get_measure_dependencies
from sempy_labs._helper_functions import format_dax_object_name

workspace = fabric.resolve_workspace_name(workspace)

dfR = list_report_semantic_model_objects(dataset=dataset, workspace=workspace)
usage_column_name = "Report Usage Count"

if not include_dependencies:
final_df = (
dfR.groupby(["Table Name", "Object Name", "Object Type"])
.size()
.reset_index(name=usage_column_name)
)
else:
df = pd.DataFrame(columns=["Table Name", "Object Name", "Object Type"])
dep = get_measure_dependencies(dataset=dataset, workspace=workspace)

for i, r in dfR.iterrows():
object_type = r["Object Type"]
table_name = r["Table Name"]
object_name = r["Object Name"]
new_data = {
"Table Name": table_name,
"Object Name": object_name,
"Object Type": object_type,
}
df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True)
if object_type == "Measure":
df_filt = dep[dep["Object Name"] == object_name][
["Referenced Table", "Referenced Object", "Referenced Object Type"]
]
df_filt.rename(
columns={
"Referenced Table": "Table Name",
"Referenced Object": "Object Name",
"Referenced Object Type": "Object Type",
},
inplace=True,
)

df = pd.concat([df, df_filt], ignore_index=True)

final_df = (
df.groupby(["Table Name", "Object Name", "Object Type"])
.size()
.reset_index(name=usage_column_name)
)

if extended:
final_df["Object"] = format_dax_object_name(
final_df["Table Name"], final_df["Object Name"]
)
dfC = fabric.list_columns(dataset=dataset, workspace=workspace, extended=True)
dfC["Object"] = format_dax_object_name(dfC["Table Name"], dfC["Column Name"])
final_df = pd.merge(
final_df,
dfC[
[
"Object",
"Total Size",
"Data Size",
"Dictionary Size",
"Hierarchy Size",
]
],
on="Object",
how="left",
)

ext_int_cols = ["Total Size", "Data Size", "Dictionary Size", "Hierarchy Size"]
final_df[ext_int_cols] = final_df[ext_int_cols].fillna(0).astype(int)
final_df.drop("Object", axis=1, inplace=True)

int_cols = [usage_column_name]
final_df[int_cols] = final_df[int_cols].astype(int)

final_df = final_df[final_df["Object Type"] != "Table"].sort_values(
by=usage_column_name, ascending=False
)

final_df.reset_index(drop=True, inplace=True)

return final_df
Loading

0 comments on commit 3f03434

Please sign in to comment.