From 5b948c0eec7e3de753f68ce2e6dcd7547fcef952 Mon Sep 17 00:00:00 2001 From: Michael Date: Sun, 6 Oct 2024 16:12:09 +0200 Subject: [PATCH 1/7] added func and wrapper for scannerapi --- src/sempy_labs/admin/_basic_functions.py | 43 --- src/sempy_labs/admin/_scanner.py | 368 +++++++++++++++++++++++ 2 files changed, 368 insertions(+), 43 deletions(-) create mode 100644 src/sempy_labs/admin/_scanner.py diff --git a/src/sempy_labs/admin/_basic_functions.py b/src/sempy_labs/admin/_basic_functions.py index 8c5c9a3a..7d75148b 100644 --- a/src/sempy_labs/admin/_basic_functions.py +++ b/src/sempy_labs/admin/_basic_functions.py @@ -10,7 +10,6 @@ import datetime import numpy as np import pandas as pd -import time def list_workspaces( @@ -479,48 +478,6 @@ def list_capacities_delegated_tenant_settings( return combined_response -def scan_workspaces( - data_source_details: bool = False, - dataset_schema: bool = False, - dataset_expressions: bool = False, - lineage: bool = False, - artifact_users: bool = False, - workspace: Optional[str | List[str]] = None, -) -> dict: - - workspace = fabric.resolve_workspace_name(workspace) - - if isinstance(workspace, str): - workspace = [workspace] - - workspace_list = [] - - for w in workspace: - workspace_list.append(fabric.resolve_workspace_id(w)) - - client = fabric.PowerBIRestClient() - request_body = {"workspaces": workspace_list} - - response_clause = f"/v1.0/myorg/admin/workspaces/getInfo?lineage={lineage}&datasourceDetails={data_source_details}&datasetSchema={dataset_schema}&datasetExpressions={dataset_expressions}&getArtifactUsers={artifact_users}" - response = client.post(response_clause, json=request_body) - - if response.status_code != 202: - raise FabricHTTPException(response) - scan_id = response.json()["id"] - scan_status = response.json().get("status") - while scan_status not in ["Succeeded", "Failed"]: - time.sleep(1) - response = client.get(f"/v1.0/myorg/admin/workspaces/scanStatus/{scan_id}") - scan_status = response.json().get("status") - if scan_status == "Failed": - raise FabricHTTPException(response) - response = client.get(f"/v1.0/myorg/admin/workspaces/scanResult/{scan_id}") - if response.status_code != 200: - raise FabricHTTPException(response) - - return response.json() - - def list_datasets() -> pd.DataFrame: """ Shows a list of datasets for the organization. diff --git a/src/sempy_labs/admin/_scanner.py b/src/sempy_labs/admin/_scanner.py new file mode 100644 index 00000000..b1d0960f --- /dev/null +++ b/src/sempy_labs/admin/_scanner.py @@ -0,0 +1,368 @@ +import sempy.fabric as fabric +from typing import Optional, List +from sempy.fabric.exceptions import FabricHTTPException +import pandas as pd +import time +import sempy_labs._icons as icons + + +def scan_workspaces( + data_source_details: bool = False, + dataset_schema: bool = False, + dataset_expressions: bool = False, + lineage: bool = False, + artifact_users: bool = False, + workspace: Optional[str | List[str]] = None, +) -> dict: + + # https://learn.microsoft.com/en-us/rest/api/power-bi/admin/workspace-info-post-workspace-info + + if workspace is None: + workspace = fabric.resolve_workspace_name(workspace) + + if isinstance(workspace, str): + workspace = [workspace] + + workspace_list = [] + + if len(workspace_list) > 100: + raise ValueError(f"{icons.red_dot} There is a limit of 100 workspaces.") + + for w in workspace: + workspace_list.append(fabric.resolve_workspace_id(w)) + + client = fabric.PowerBIRestClient() + request_body = {"workspaces": workspace_list} + + response_clause = f"/v1.0/myorg/admin/workspaces/getInfo?lineage={lineage}&datasourceDetails={data_source_details}&datasetSchema={dataset_schema}&datasetExpressions={dataset_expressions}&getArtifactUsers={artifact_users}" + response = client.post(response_clause, json=request_body) + + if response.status_code != 202: + raise FabricHTTPException(response) + scan_id = response.json().get("id") + scan_status = response.json().get("status") + while scan_status not in ["Succeeded", "Failed"]: + time.sleep(1) + response = client.get(f"/v1.0/myorg/admin/workspaces/scanStatus/{scan_id}") + scan_status = response.json().get("status") + if scan_status == "Failed": + raise FabricHTTPException(response) + response = client.get(f"/v1.0/myorg/admin/workspaces/scanResult/{scan_id}") + if response.status_code != 200: + raise FabricHTTPException(response) + + return response.json() + + +class ScannerWrapper: + + def __init__( + self, + data_source_details: Optional[bool] = False, + dataset_schema: Optional[bool] = False, + dataset_expressions: Optional[bool] = False, + lineage: Optional[bool] = False, + artifact_users: Optional[bool] = False, + workspace: Optional[str | List[str]] = None, + ): + + self._data_source_details = data_source_details + self._dataset_schema = dataset_schema + self._dataset_expressions = dataset_expressions + self._lineage = lineage + self._artifact_users = artifact_users + self._workspace = workspace + + self.output = scan_workspaces( + data_source_details=self._data_source_details, + dataset_schema=self._dataset_schema, + dataset_expressions=self._dataset_expressions, + lineage=self._lineage, + artifact_users=self._artifact_users, + workspace=self._workspace, + ) + + def list_kql_databases(self): + + df = pd.DataFrame(columns=[]) + + for w in self.output.get("workspaces", []): + for obj in w.get("KQLDatabase", []): + ep = obj.get("extendedProperties", {}) + new_data = { + "Workspace Name": w.get("name"), + "Workspace Id": w.get("id"), + "KQL Database Name": obj.get("name"), + "KQL Database Id": obj.get("id"), + "Description": obj.get("description"), + "State": obj.get("state"), + "Last Updated Date": obj.get("lastUpdatedDate"), + "Created Date": obj.get("createdDate"), + "Modified Date": obj.get("modifiedDate"), + "Modified By": obj.get("modfiedBy"), + "Modified By Id": obj.get("modfiedById"), + "Created By Id": obj.get("createdById"), + "Sensitivity Label Id": obj.get("sensitivityLabel", {}).get( + "labelId" + ), + "Query Service URI": ep.get("QueryServiceUri"), + "Ingestion Service URI": ep.get("IngestionServiceUri"), + "Region": ep.get("Region"), + "Kusto Database Type": ep.get("KustoDatabaseType"), + } + df = pd.concat( + [df, pd.DataFrame(new_data, index=[0])], ignore_index=True + ) + + return df + + def list_eventhouses(self): + + df = pd.DataFrame(columns=[]) + + for w in self.output.get("workspaces", []): + for obj in w.get("Eventhouse", []): + new_data = { + "Workspace Name": w.get("name"), + "Workspace Id": w.get("id"), + "Eventhouse Name": obj.get("name"), + "Eventhouse Id": obj.get("id"), + "Description": obj.get("description"), + "State": obj.get("state"), + "Last Updated Date": obj.get("lastUpdatedDate"), + "Created Date": obj.get("createdDate"), + "Modified Date": obj.get("modifiedDate"), + "Modified By": obj.get("modfiedBy"), + "Created By": obj.get("createdBy"), + "Modified By Id": obj.get("modfiedById"), + "Created By Id": obj.get("createdById"), + "Sensitivity Label Id": obj.get("sensitivityLabel", {}).get( + "labelId" + ), + } + df = pd.concat( + [df, pd.DataFrame(new_data, index=[0])], ignore_index=True + ) + + return df + + def list_kql_querysets(self): + + df = pd.DataFrame(columns=[]) + + for w in self.output.get("workspaces", []): + for obj in w.get("KQLQueryset", []): + new_data = { + "Workspace Name": w.get("name"), + "Workspace Id": w.get("id"), + "KQL Queryset Name": obj.get("name"), + "KQL Queryset Id": obj.get("id"), + "Description": obj.get("description"), + "State": obj.get("state"), + "Last Updated Date": obj.get("lastUpdatedDate"), + "Created Date": obj.get("createdDate"), + "Modified Date": obj.get("modifiedDate"), + "Modified By": obj.get("modfiedBy"), + "Created By": obj.get("createdBy"), + "Modified By Id": obj.get("modfiedById"), + "Created By Id": obj.get("createdById"), + "Sensitivity Label Id": obj.get("sensitivityLabel", {}).get( + "labelId" + ), + } + df = pd.concat( + [df, pd.DataFrame(new_data, index=[0])], ignore_index=True + ) + + return df + + def list_lakehouses(self): + + df = pd.DataFrame(columns=[]) + + for w in self.output.get("workspaces", []): + for obj in w.get("Lakehouse", []): + ep = obj.get("extendedProperties", {}) + + ds_list = [] + if "datasourceUsages" in obj: + ds_list = [ + item["datasourceInstanceId"] + for item in obj.get("datasourceUsages") + ] + + new_data = { + "Workspace Name": w.get("name"), + "Workspace Id": w.get("id"), + "Lakehouse Name": obj.get("name"), + "Lakehouse Id": obj.get("id"), + "Description": obj.get("description"), + "State": obj.get("state"), + "Last Updated Date": obj.get("lastUpdatedDate"), + "Created Date": obj.get("createdDate"), + "Modified Date": obj.get("modifiedDate"), + "Created By": obj.get("createdBy"), + "Modified By Id": obj.get("modfiedById"), + "Created By Id": obj.get("createdById"), + "OneLake Tables Path": ep.get("OneLakeTablesPath"), + "OneLake Files Path": ep.get("OneLakeFilesPath"), + "DW Properties": ep.get("DwProperties"), + "Datasource Usages": ds_list, + } + df = pd.concat( + [df, pd.DataFrame(new_data, index=[0])], ignore_index=True + ) + + return df + + def list_notebooks(self): + + df = pd.DataFrame(columns=[]) + + for w in self.output.get("workspaces", []): + for obj in w.get("Notebook", []): + new_data = { + "Workspace Name": w.get("name"), + "Workspace Id": w.get("id"), + "Notebook Name": obj.get("name"), + "Notebook Id": obj.get("id"), + "Description": obj.get("description"), + "State": obj.get("state"), + "Last Updated Date": obj.get("lastUpdatedDate"), + "Created Date": obj.get("createdDate"), + "Modified By": obj.get("modifiedBy"), + "Created By": obj.get("createdBy"), + "Modified By Id": obj.get("modifiedById"), + "Created By Id": obj.get("createdById"), + "Sensitivity Label Id": obj.get("sensitivityLabel", {}).get( + "labelId" + ), + } + df = pd.concat( + [df, pd.DataFrame(new_data, index=[0])], ignore_index=True + ) + + return df + + def list_reports(self): + + df = pd.DataFrame(columns=[]) + + for w in self.output.get("workspaces", []): + for obj in w.get("reports", []): + new_data = { + "Workspace Name": w.get("name"), + "Workspace Id": w.get("id"), + "Report Name": obj.get("name"), + "Report Id": obj.get("id"), + "Report Type": obj.get("reportType"), + "Description": obj.get("id"), + "Created Date": obj.get("createdDateTime"), + "Modified Date": obj.get("modifiedDateTime"), + "Modified By": obj.get("modifiedBy"), + "Created By": obj.get("createdBy"), + "Modified By Id": obj.get("modifiedById"), + "Created By Id": obj.get("createdById"), + } + df = pd.concat( + [df, pd.DataFrame(new_data, index=[0])], ignore_index=True + ) + + return df + + def list_datasets(self): + + df = pd.DataFrame(columns=[]) + + for w in self.output.get("workspaces", []): + for obj in w.get("datasets", []): + new_data = { + "Workspace Name": w.get("name"), + "Workspace Id": w.get("id"), + "Dataset Name": obj.get("name"), + "Dataset Id": obj.get("id"), + "Configured By": obj.get("configuredBy"), + "Configured By Id": obj.get("configuredById"), + "Effective Identity Required": obj.get( + "isEffectiveIdentityRequired" + ), + "Effective Identity Roles Required": obj.get( + "isEffectiveIdentityRolesRequired" + ), + "Target Storage Mode": obj.get("targetStorageMode"), + "Created Date": obj.get("createdDate"), + "Content Provider Type": obj.get("contentProviderType"), + "Sensitivity Label Id": obj.get("sensitivityLabel", {}).get( + "labelId" + ), + } + df = pd.concat( + [df, pd.DataFrame(new_data, index=[0])], ignore_index=True + ) + + return df + + def list_warehouses(self): + + df = pd.DataFrame(columns=[]) + + for w in self.output.get("workspaces", []): + for obj in w.get("warehouses", []): + new_data = { + "Workspace Name": w.get("name"), + "Workspace Id": w.get("id"), + "Warehouse Name": obj.get("name"), + "Warehouse Id": obj.get("id"), + "Configured By": obj.get("configuredBy"), + "Configured By Id": obj.get("configuredById"), + "Modified By": obj.get("modifiedBy"), + "Modified By Id": obj.get("modifiedById"), + "Modified Date": obj.get("modifiedDateTime"), + } + df = pd.concat( + [df, pd.DataFrame(new_data, index=[0])], ignore_index=True + ) + + return df + + def list_sql_endpoints(self): + + df = pd.DataFrame(columns=[]) + + for w in self.output.get("workspaces", []): + for obj in w.get("SQLAnalyticsEndpoint", []): + new_data = { + "Workspace Name": w.get("name"), + "Workspace Id": w.get("id"), + "SQL Endpoint Name": obj.get("name"), + "SQL Endpoint Id": obj.get("id"), + "Configured By": obj.get("configuredBy"), + "Configured By Id": obj.get("configuredById"), + "Modified By": obj.get("modifiedBy"), + "Modified By Id": obj.get("modifiedById"), + "Modified Date": obj.get("modifiedDateTime"), + "Sensitivity Label Id": obj.get("sensitivityLabel", {}).get( + "labelId" + ), + } + df = pd.concat( + [df, pd.DataFrame(new_data, index=[0])], ignore_index=True + ) + + return df + + def list_data_source_instances(self): + + df = pd.DataFrame(columns=[]) + + for obj in self.output.get("datasourceInstances", []): + new_data = { + "Data Source Type": obj.get("datasourceType"), + "Server": obj.get("connectionDetails", {}).get("datasourceType"), + "Path": obj.get("connectionDetails", {}).get("path"), + "datasourceId": obj.get("datasourceId"), + "gatewayId": obj.get("gatewayId"), + } + df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True) + + return df From ed9526dc20d038f1786bb74960af72eea08a34b4 Mon Sep 17 00:00:00 2001 From: Michael Date: Sun, 6 Oct 2024 16:51:48 +0200 Subject: [PATCH 2/7] added rest of functions --- src/sempy_labs/admin/__init__.py | 6 + src/sempy_labs/admin/_scanner.py | 438 ++++++++++++++++++++++++++++--- 2 files changed, 413 insertions(+), 31 deletions(-) diff --git a/src/sempy_labs/admin/__init__.py b/src/sempy_labs/admin/__init__.py index d29cf2cf..48b15c6d 100644 --- a/src/sempy_labs/admin/__init__.py +++ b/src/sempy_labs/admin/__init__.py @@ -1,3 +1,7 @@ +from sempy_labs.admin._scanner import ( + scan_workspaces, + ScannerWrapper, +) from sempy_labs.admin._basic_functions import ( assign_workspaces_to_capacity, list_capacities, @@ -50,4 +54,6 @@ "unassign_workspaces_from_capacity", "list_external_data_shares", "revoke_external_data_share", + "scan_workspaces", + "ScannerWrapper", ] diff --git a/src/sempy_labs/admin/_scanner.py b/src/sempy_labs/admin/_scanner.py index b1d0960f..51fc6cce 100644 --- a/src/sempy_labs/admin/_scanner.py +++ b/src/sempy_labs/admin/_scanner.py @@ -82,9 +82,59 @@ def __init__( workspace=self._workspace, ) - def list_kql_databases(self): + def list_workspaces(self) -> pd.DataFrame: + + df = pd.DataFrame( + columns=[ + "Workspace Name", + "Workspace Id", + "State", + "Is On Dedicated Capacity", + "Capacity Id", + "Default Dataset Storage Format", + ] + ) + + for w in self.output.get("workspaces", []): + new_data = { + "Workspace Name": w.get("name"), + "Workspace Id": w.get("id"), + "State": w.get("state"), + "Is On Dedicated Capacity": w.get("isOnDedicatedCapacity"), + "Capacity Id": w.get("capacityId"), + "Default Dataset Storage Format": w.get("defaultDatasetStorageFormat"), + } + df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True) + + bool_cols = ["Is On Dedicated Capacity"] + df[bool_cols] = df[bool_cols].astype(bool) + + return df - df = pd.DataFrame(columns=[]) + def list_kql_databases(self) -> pd.DataFrame: + + df = pd.DataFrame( + columns=[ + "Workspace Name", + "Workspace Id", + "KQL Database Name", + "KQL Database Id", + "Description", + "State", + "Last Updated Date", + "Created Date", + "Modified Date", + "Created By", + "Modified By", + "Modified By Id", + "Created By Id", + "Sensitivity Label Id", + "Query Service URI", + "Ingestion Service URI", + "Region", + "Kusto Database Type", + ] + ) for w in self.output.get("workspaces", []): for obj in w.get("KQLDatabase", []): @@ -116,9 +166,26 @@ def list_kql_databases(self): return df - def list_eventhouses(self): - - df = pd.DataFrame(columns=[]) + def list_eventhouses(self) -> pd.DataFrame: + + df = pd.DataFrame( + columns=[ + "Workspace Name", + "Workspace Id", + "Eventhouse Name", + "Eventhouse Id", + "Description", + "State", + "Last Updated Date", + "Created Date", + "Modified Date", + "Created By", + "Modified By", + "Modified By Id", + "Created By Id", + "Sensitivity Label Id", + ] + ) for w in self.output.get("workspaces", []): for obj in w.get("Eventhouse", []): @@ -146,9 +213,26 @@ def list_eventhouses(self): return df - def list_kql_querysets(self): - - df = pd.DataFrame(columns=[]) + def list_kql_querysets(self) -> pd.DataFrame: + + df = pd.DataFrame( + columns=[ + "Workspace Name", + "Workspace Id", + "KQL Queryset Name", + "KQL Queryset Id", + "Description", + "State", + "Last Updated Date", + "Created Date", + "Modified Date", + "Created By", + "Modified By", + "Modified By Id", + "Created By Id", + "Sensitivity Label Id", + ] + ) for w in self.output.get("workspaces", []): for obj in w.get("KQLQueryset", []): @@ -176,9 +260,30 @@ def list_kql_querysets(self): return df - def list_lakehouses(self): - - df = pd.DataFrame(columns=[]) + def list_lakehouses(self) -> pd.DataFrame: + + df = pd.DataFrame( + columns=[ + "Workspace Name", + "Workspace Id", + "Lakehouse Name", + "Lakehouse Id", + "Description", + "State", + "Last Updated Date", + "Created Date", + "Modified Date", + "Created By", + "Modified By", + "Modified By Id", + "Created By Id", + "Sensitivity Label Id", + "OneLake Tables Path", + "OneLake Files Path", + "DW Properties", + "Datasource Usages", + ] + ) for w in self.output.get("workspaces", []): for obj in w.get("Lakehouse", []): @@ -202,12 +307,16 @@ def list_lakehouses(self): "Created Date": obj.get("createdDate"), "Modified Date": obj.get("modifiedDate"), "Created By": obj.get("createdBy"), + "Modified By": obj.get("modifiedBy"), "Modified By Id": obj.get("modfiedById"), "Created By Id": obj.get("createdById"), + "Sensitivity Label Id": obj.get("sensitivityLabel", {}).get( + "labelId" + ), "OneLake Tables Path": ep.get("OneLakeTablesPath"), "OneLake Files Path": ep.get("OneLakeFilesPath"), "DW Properties": ep.get("DwProperties"), - "Datasource Usages": ds_list, + "Datasource Usages": [ds_list], } df = pd.concat( [df, pd.DataFrame(new_data, index=[0])], ignore_index=True @@ -215,9 +324,25 @@ def list_lakehouses(self): return df - def list_notebooks(self): - - df = pd.DataFrame(columns=[]) + def list_notebooks(self) -> pd.DataFrame: + + df = pd.DataFrame( + columns=[ + "Workspace Name", + "Workspace Id", + "Notebook Name", + "Notebook Id", + "Description", + "State", + "Last Updated Date", + "Created Date", + "Modified By", + "Created By", + "Modified By Id", + "Created By Id", + "Sensitivity Label Id", + ] + ) for w in self.output.get("workspaces", []): for obj in w.get("Notebook", []): @@ -244,9 +369,25 @@ def list_notebooks(self): return df - def list_reports(self): - - df = pd.DataFrame(columns=[]) + def list_reports(self) -> pd.DataFrame: + + df = pd.DataFrame( + columns=[ + "Workspace Name", + "Workspace Id", + "Report Name", + "Report Id", + "Report Type", + "Description", + "Created Date", + "Modified Date", + "Modified By", + "Created By", + "Modified By Id", + "Created By Id", + "Sensitivity Label Id", + ] + ) for w in self.output.get("workspaces", []): for obj in w.get("reports", []): @@ -263,6 +404,9 @@ def list_reports(self): "Created By": obj.get("createdBy"), "Modified By Id": obj.get("modifiedById"), "Created By Id": obj.get("createdById"), + "Sensitivity Label Id": obj.get("sensitivityLabel", {}).get( + "labelId" + ), } df = pd.concat( [df, pd.DataFrame(new_data, index=[0])], ignore_index=True @@ -270,9 +414,24 @@ def list_reports(self): return df - def list_datasets(self): - - df = pd.DataFrame(columns=[]) + def list_datasets(self) -> pd.DataFrame: + + df = pd.DataFrame( + columns=[ + "Workspace Name", + "Workspace Id", + "Dataset Name", + "Dataset Id", + "Configured By", + "Configured By Id", + "Effective Identity Required", + "Effective Identity Roles Required", + "Target Storage Mode", + "Created Date", + "Content Provider Type", + "Sensitivity Label Id", + ] + ) for w in self.output.get("workspaces", []): for obj in w.get("datasets", []): @@ -300,11 +459,27 @@ def list_datasets(self): [df, pd.DataFrame(new_data, index=[0])], ignore_index=True ) - return df + bool_cols = ["Effective Identity Required", "Effective Identity Roles Required"] + df[bool_cols] = df[bool_cols].astype(bool) - def list_warehouses(self): + return df - df = pd.DataFrame(columns=[]) + def list_warehouses(self) -> pd.DataFrame: + + df = pd.DataFrame( + columns=[ + "Workspace Name", + "Workspace Id", + "Warehouse Name", + "Warehouse Id", + "Configured By", + "Configured By Id", + "Modified By", + "Modified By Id", + "Modified Date", + "Sensitivity Label Id", + ] + ) for w in self.output.get("workspaces", []): for obj in w.get("warehouses", []): @@ -318,6 +493,9 @@ def list_warehouses(self): "Modified By": obj.get("modifiedBy"), "Modified By Id": obj.get("modifiedById"), "Modified Date": obj.get("modifiedDateTime"), + "Sensitivity Label Id": obj.get("sensitivityLabel", {}).get( + "labelId" + ), } df = pd.concat( [df, pd.DataFrame(new_data, index=[0])], ignore_index=True @@ -325,9 +503,22 @@ def list_warehouses(self): return df - def list_sql_endpoints(self): - - df = pd.DataFrame(columns=[]) + def list_sql_endpoints(self) -> pd.DataFrame: + + df = pd.DataFrame( + columns=[ + "Workspace Name", + "Workspace Id", + "SQL Endpoint Name", + "SQL Endpoint Id", + "Configured By", + "Configured By Id", + "Modified By", + "Modified By Id", + "Modified Date", + "Sensitivity Label Id", + ] + ) for w in self.output.get("workspaces", []): for obj in w.get("SQLAnalyticsEndpoint", []): @@ -351,18 +542,203 @@ def list_sql_endpoints(self): return df - def list_data_source_instances(self): + def list_data_source_instances(self) -> pd.DataFrame: - df = pd.DataFrame(columns=[]) + df = pd.DataFrame( + columns=[ + "Data Source Type", + "Server", + "Path", + "Data Source Id", + "Gateway Id", + ] + ) for obj in self.output.get("datasourceInstances", []): new_data = { "Data Source Type": obj.get("datasourceType"), "Server": obj.get("connectionDetails", {}).get("datasourceType"), "Path": obj.get("connectionDetails", {}).get("path"), - "datasourceId": obj.get("datasourceId"), - "gatewayId": obj.get("gatewayId"), + "Datasource Id": obj.get("datasourceId"), + "Gateway Id": obj.get("gatewayId"), } df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True) return df + + def list_users(self) -> pd.DataFrame: + + df = pd.DataFrame( + columns=[ + "Workspace Name", + "Workspace Id", + "Group User Access Right", + "Email Address", + "Display Name", + "Identifier", + "Graph Id", + "Principal Type", + "User Type", + ] + ) + + for w in self.output.get("workspaces", []): + for obj in w.get("users", []): + new_data = { + "Workspace Name": w.get("name"), + "Workspace Id": w.get("id"), + "Group User Access Right": obj.get("groupUserAccessRight"), + "Email Address": obj.get("emailAddress"), + "Display Name": obj.get("displayName"), + "Identifier": obj.get("identifier"), + "Graph Id": obj.get("graphId"), + "Principal Type": obj.get("principalType"), + "User Type": obj.get("userType"), + } + + df = pd.concat( + [df, pd.DataFrame(new_data, index=[0])], ignore_index=True + ) + + return df + + def list_datamarts(self) -> pd.DataFrame: + + df = pd.DataFrame( + columns=[ + "Workspace Name", + "Workspace Id", + "Datamart Name", + "Datamart Id", + "Description", + "State", + "Last Updated Date", + "Created Date", + "Modified Date", + "Created By", + "Modified By", + "Modified By Id", + "Created By Id", + "Sensitivity Label Id", + ] + ) + + for w in self.output.get("workspaces", []): + for obj in w.get("datamarts", []): + new_data = { + "Workspace Name": w.get("name"), + "Workspace Id": w.get("id"), + "Datamart Name": obj.get("name"), + "Datamart Id": obj.get("id"), + "Description": obj.get("description"), + "State": obj.get("state"), + "Last Updated Date": obj.get("lastUpdatedDate"), + "Created Date": obj.get("createdDate"), + "Modified Date": obj.get("modifiedDate"), + "Modified By": obj.get("modfiedBy"), + "Created By": obj.get("createdBy"), + "Modified By Id": obj.get("modfiedById"), + "Created By Id": obj.get("createdById"), + "Sensitivity Label Id": obj.get("sensitivityLabel", {}).get( + "labelId" + ), + } + df = pd.concat( + [df, pd.DataFrame(new_data, index=[0])], ignore_index=True + ) + + return df + + def list_dashboards(self) -> pd.DataFrame: + + df = pd.DataFrame( + columns=[ + "Workspace Name", + "Workspace Id", + "Dashboard Name", + "Dashboard Id", + "Description", + "State", + "Last Updated Date", + "Created Date", + "Modified Date", + "Created By", + "Modified By", + "Modified By Id", + "Created By Id", + "Sensitivity Label Id", + ] + ) + + for w in self.output.get("workspaces", []): + for obj in w.get("dashboards", []): + new_data = { + "Workspace Name": w.get("name"), + "Workspace Id": w.get("id"), + "Dashboard Name": obj.get("name"), + "Dashboard Id": obj.get("id"), + "Description": obj.get("description"), + "State": obj.get("state"), + "Last Updated Date": obj.get("lastUpdatedDate"), + "Created Date": obj.get("createdDate"), + "Modified Date": obj.get("modifiedDate"), + "Modified By": obj.get("modfiedBy"), + "Created By": obj.get("createdBy"), + "Modified By Id": obj.get("modfiedById"), + "Created By Id": obj.get("createdById"), + "Sensitivity Label Id": obj.get("sensitivityLabel", {}).get( + "labelId" + ), + } + df = pd.concat( + [df, pd.DataFrame(new_data, index=[0])], ignore_index=True + ) + + return df + + def list_dataflows(self) -> pd.DataFrame: + + df = pd.DataFrame( + columns=[ + "Workspace Name", + "Workspace Id", + "Dataflow Name", + "Dataflow Id", + "Description", + "State", + "Last Updated Date", + "Created Date", + "Modified Date", + "Created By", + "Modified By", + "Modified By Id", + "Created By Id", + "Sensitivity Label Id", + ] + ) + + for w in self.output.get("workspaces", []): + for obj in w.get("dataflows", []): + new_data = { + "Workspace Name": w.get("name"), + "Workspace Id": w.get("id"), + "Dataflow Name": obj.get("name"), + "Dataflow Id": obj.get("id"), + "Description": obj.get("description"), + "State": obj.get("state"), + "Last Updated Date": obj.get("lastUpdatedDate"), + "Created Date": obj.get("createdDate"), + "Modified Date": obj.get("modifiedDate"), + "Modified By": obj.get("modfiedBy"), + "Created By": obj.get("createdBy"), + "Modified By Id": obj.get("modfiedById"), + "Created By Id": obj.get("createdById"), + "Sensitivity Label Id": obj.get("sensitivityLabel", {}).get( + "labelId" + ), + } + df = pd.concat( + [df, pd.DataFrame(new_data, index=[0])], ignore_index=True + ) + + return df From 682791a6f7c75353e9d47399176e140f0d2d22d0 Mon Sep 17 00:00:00 2001 From: Michael Date: Sun, 6 Oct 2024 18:10:23 +0200 Subject: [PATCH 3/7] added dataset subfunctions --- src/sempy_labs/admin/_scanner.py | 197 ++++++++++++++++++++++++++++++- 1 file changed, 192 insertions(+), 5 deletions(-) diff --git a/src/sempy_labs/admin/_scanner.py b/src/sempy_labs/admin/_scanner.py index 51fc6cce..4f03ff6f 100644 --- a/src/sempy_labs/admin/_scanner.py +++ b/src/sempy_labs/admin/_scanner.py @@ -7,13 +7,38 @@ def scan_workspaces( - data_source_details: bool = False, - dataset_schema: bool = False, dataset_expressions: bool = False, - lineage: bool = False, + dataset_schema: bool = False, + data_source_details: bool = False, artifact_users: bool = False, + lineage: bool = False, workspace: Optional[str | List[str]] = None, ) -> dict: + """ + Scans a workspace or set of workspace for detailed metadata. + + Parameters + ---------- + dataset_expressions : bool, default=False + Whether to return data source details. + dataset_schema : bool, defualt=False + Whether to return dataset schema (tables, columns and measures). If you set this parameter to true, you must fully enable metadata scanning in order for data to be returned. + data_source_details : bool, default=False + Whether to return dataset expressions (DAX and Mashup queries). If you set this parameter to true, you must fully enable metadata scanning in order for data to be returned. + artifact_users : bool, default=False + Whether to return user details for a Power BI item (such as a report or a dashboard). + lineage : bool, default=False + Whether to return lineage info (upstream dataflows, tiles, data source IDs). + workspace : str | List[str], default=None + The Fabric workspace name. + Defaults to None which resolves to the workspace of the attached lakehouse + or if no lakehouse attached, resolves to the workspace of the notebook. + + Returns + ------- + dict + The .json output showing the metadata for the workspace(s) and their items. + """ # https://learn.microsoft.com/en-us/rest/api/power-bi/admin/workspace-info-post-workspace-info @@ -34,8 +59,8 @@ def scan_workspaces( client = fabric.PowerBIRestClient() request_body = {"workspaces": workspace_list} - response_clause = f"/v1.0/myorg/admin/workspaces/getInfo?lineage={lineage}&datasourceDetails={data_source_details}&datasetSchema={dataset_schema}&datasetExpressions={dataset_expressions}&getArtifactUsers={artifact_users}" - response = client.post(response_clause, json=request_body) + url = f"/v1.0/myorg/admin/workspaces/getInfo?lineage={lineage}&datasourceDetails={data_source_details}&datasetSchema={dataset_schema}&datasetExpressions={dataset_expressions}&getArtifactUsers={artifact_users}" + response = client.post(url, json=request_body) if response.status_code != 202: raise FabricHTTPException(response) @@ -430,11 +455,19 @@ def list_datasets(self) -> pd.DataFrame: "Created Date", "Content Provider Type", "Sensitivity Label Id", + "Datasource Usages", ] ) for w in self.output.get("workspaces", []): for obj in w.get("datasets", []): + + ds_list = [] + if "datasourceUsages" in obj: + ds_list = [ + item["datasourceInstanceId"] + for item in obj.get("datasourceUsages") + ] new_data = { "Workspace Name": w.get("name"), "Workspace Id": w.get("id"), @@ -454,6 +487,7 @@ def list_datasets(self) -> pd.DataFrame: "Sensitivity Label Id": obj.get("sensitivityLabel", {}).get( "labelId" ), + "Datasource Usages": [ds_list], } df = pd.concat( [df, pd.DataFrame(new_data, index=[0])], ignore_index=True @@ -742,3 +776,156 @@ def list_dataflows(self) -> pd.DataFrame: ) return df + + def list_dataset_tables(self): + + df = pd.DataFrame( + columns=[ + "Workspace Name", + "Workspace Id", + "Dataset Name", + "Dataset Id", + "Table Name", + "Hidden", + "Storage Mode", + "Source Expression", + ] + ) + + for w in self.output.get("workspaces", []): + for obj in w.get("datasets", []): + for t in obj.get("tables", []): + source = t.get("source", {}) + new_data = { + "Workspace Name": w.get("name"), + "Workspace Id": w.get("id"), + "Dataset Name": obj.get("name"), + "Dataset Id": obj.get("id"), + "Table Name": t.get("name"), + "Hidden": t.get("isHidden"), + "Storage Mode": t.get("storageMode"), + "Source Expression": source[0].get("expression") if source and isinstance(source[0], dict) else None + } + + df = pd.concat( + [df, pd.DataFrame(new_data, index=[0])], ignore_index=True + ) + + bool_cols = ["Hidden"] + df[bool_cols] = df[bool_cols].astype(bool) + + return df + + def list_dataset_columns(self): + + df = pd.DataFrame( + columns=[ + "Workspace Name", + "Workspace Id", + "Dataset Name", + "Dataset Id", + "Table Name", + "Column Name", + "Data Type", + "Hidden", + "Column Type", + "Expression", + ] + ) + + for w in self.output.get("workspaces", []): + for obj in w.get("datasets", []): + for t in obj.get("tables", []): + for c in t.get("columns", []): + new_data = { + "Workspace Name": w.get("name"), + "Workspace Id": w.get("id"), + "Dataset Name": obj.get("name"), + "Dataset Id": obj.get("id"), + "Table Name": t.get("name"), + "Column Name": c.get("name"), + "Data Type": c.get("dataType"), + "Hidden": c.get("isHidden"), + "Column Type": c.get("columnType"), + "Expression": c.get("expression"), + } + + df = pd.concat( + [df, pd.DataFrame(new_data, index=[0])], ignore_index=True + ) + + bool_cols = ["Hidden"] + df[bool_cols] = df[bool_cols].astype(bool) + + return df + + def list_dataset_measures(self): + + df = pd.DataFrame( + columns=[ + "Workspace Name", + "Workspace Id", + "Dataset Name", + "Dataset Id", + "Table Name", + "Measure Name", + "Expression", + "Hidden", + ] + ) + + for w in self.output.get("workspaces", []): + for obj in w.get("datasets", []): + for t in obj.get("tables", []): + for m in t.get("measures", []): + new_data = { + "Workspace Name": w.get("name"), + "Workspace Id": w.get("id"), + "Dataset Name": obj.get("name"), + "Dataset Id": obj.get("id"), + "Table Name": t.get("name"), + "Measure Name": m.get("name"), + "Expression": m.get("expression"), + "Hidden": m.get("isHidden"), + } + + df = pd.concat( + [df, pd.DataFrame(new_data, index=[0])], ignore_index=True + ) + + bool_cols = ["Hidden"] + df[bool_cols] = df[bool_cols].astype(bool) + + return df + + def list_dataset_expressions(self): + + df = pd.DataFrame( + columns=[ + "Workspace Name", + "Workspace Id", + "Dataset Name", + "Dataset Id", + "Expression Name", + "Expression", + ] + ) + + for w in self.output.get("workspaces", []): + for obj in w.get("datasets", []): + for e in obj.get("expressions", []): + + new_data = { + "Workspace Name": w.get("name"), + "Workspace Id": w.get("id"), + "Dataset Name": obj.get("name"), + "Dataset Id": obj.get("id"), + "Expression Name": e.get("name"), + "Expression": e.get("expression"), + } + + df = pd.concat( + [df, pd.DataFrame(new_data, index=[0])], ignore_index=True + ) + + return df From a846548fd31393fbd3dcc9162a34deec9afc8fa0 Mon Sep 17 00:00:00 2001 From: Michael Date: Sun, 6 Oct 2024 21:59:35 +0200 Subject: [PATCH 4/7] updates per documentation --- src/sempy_labs/admin/_scanner.py | 123 ++++++++++++++++++++----------- 1 file changed, 78 insertions(+), 45 deletions(-) diff --git a/src/sempy_labs/admin/_scanner.py b/src/sempy_labs/admin/_scanner.py index 4f03ff6f..6045c8d7 100644 --- a/src/sempy_labs/admin/_scanner.py +++ b/src/sempy_labs/admin/_scanner.py @@ -452,6 +452,8 @@ def list_datasets(self) -> pd.DataFrame: "Effective Identity Required", "Effective Identity Roles Required", "Target Storage Mode", + "Endorsement", + "Certified By", "Created Date", "Content Provider Type", "Sensitivity Label Id", @@ -468,6 +470,7 @@ def list_datasets(self) -> pd.DataFrame: item["datasourceInstanceId"] for item in obj.get("datasourceUsages") ] + end = obj.get("endorsementDetails", {}) new_data = { "Workspace Name": w.get("name"), "Workspace Id": w.get("id"), @@ -488,6 +491,8 @@ def list_datasets(self) -> pd.DataFrame: "labelId" ), "Datasource Usages": [ds_list], + "Endorsement": end.get("endorsement") if end else None, + "Certified By": end.get("certifiedBy") if end else None, } df = pd.concat( [df, pd.DataFrame(new_data, index=[0])], ignore_index=True @@ -646,19 +651,21 @@ def list_datamarts(self) -> pd.DataFrame: "Datamart Id", "Description", "State", - "Last Updated Date", - "Created Date", "Modified Date", - "Created By", "Modified By", "Modified By Id", - "Created By Id", + "Configured By", + "Configured By Id", + "Suspended Batch Id" "Sensitivity Label Id", + "Endorsement", + "Certified By", ] ) for w in self.output.get("workspaces", []): for obj in w.get("datamarts", []): + end = obj.get("endorsementDetails", {}) new_data = { "Workspace Name": w.get("name"), "Workspace Id": w.get("id"), @@ -666,16 +673,17 @@ def list_datamarts(self) -> pd.DataFrame: "Datamart Id": obj.get("id"), "Description": obj.get("description"), "State": obj.get("state"), - "Last Updated Date": obj.get("lastUpdatedDate"), - "Created Date": obj.get("createdDate"), - "Modified Date": obj.get("modifiedDate"), + "Modified Date": obj.get("modifiedDateTime"), "Modified By": obj.get("modfiedBy"), - "Created By": obj.get("createdBy"), "Modified By Id": obj.get("modfiedById"), - "Created By Id": obj.get("createdById"), "Sensitivity Label Id": obj.get("sensitivityLabel", {}).get( "labelId" ), + "Endorsement": end.get("endorsement") if end else None, + "Certified By": end.get("certifiedBy") if end else None, + "Suspended Batch Id": obj.get('suspendedBatchId'), + "Configured By": obj.get('configuredBy'), + "Configured By Id": obj.get('configuredById'), } df = pd.concat( [df, pd.DataFrame(new_data, index=[0])], ignore_index=True @@ -692,14 +700,9 @@ def list_dashboards(self) -> pd.DataFrame: "Dashboard Name", "Dashboard Id", "Description", - "State", - "Last Updated Date", - "Created Date", - "Modified Date", - "Created By", - "Modified By", - "Modified By Id", - "Created By Id", + "Is Read Only", + "Data Classification", + "App Id", "Sensitivity Label Id", ] ) @@ -709,17 +712,12 @@ def list_dashboards(self) -> pd.DataFrame: new_data = { "Workspace Name": w.get("name"), "Workspace Id": w.get("id"), - "Dashboard Name": obj.get("name"), + "Dashboard Name": obj.get("displayName"), "Dashboard Id": obj.get("id"), "Description": obj.get("description"), - "State": obj.get("state"), - "Last Updated Date": obj.get("lastUpdatedDate"), - "Created Date": obj.get("createdDate"), - "Modified Date": obj.get("modifiedDate"), - "Modified By": obj.get("modfiedBy"), - "Created By": obj.get("createdBy"), - "Modified By Id": obj.get("modfiedById"), - "Created By Id": obj.get("createdById"), + "Is Read Only": obj.get("isReadOnly"), + "Data Classification": obj.get('dataClassification'), + "App Id": obj.get('appId'), "Sensitivity Label Id": obj.get("sensitivityLabel", {}).get( "labelId" ), @@ -728,6 +726,9 @@ def list_dashboards(self) -> pd.DataFrame: [df, pd.DataFrame(new_data, index=[0])], ignore_index=True ) + bool_cols = ["Is Read Only"] + df[bool_cols] = df[bool_cols].astype(bool) + return df def list_dataflows(self) -> pd.DataFrame: @@ -739,37 +740,34 @@ def list_dataflows(self) -> pd.DataFrame: "Dataflow Name", "Dataflow Id", "Description", - "State", - "Last Updated Date", - "Created Date", + "Configured By", + "Model URL", "Modified Date", - "Created By", "Modified By", - "Modified By Id", - "Created By Id", "Sensitivity Label Id", + "Endorsement", + "Certified By", ] ) for w in self.output.get("workspaces", []): for obj in w.get("dataflows", []): + end = obj.get('endorsementDetails', {}) new_data = { "Workspace Name": w.get("name"), "Workspace Id": w.get("id"), "Dataflow Name": obj.get("name"), - "Dataflow Id": obj.get("id"), + "Dataflow Id": obj.get("objectId"), "Description": obj.get("description"), - "State": obj.get("state"), - "Last Updated Date": obj.get("lastUpdatedDate"), - "Created Date": obj.get("createdDate"), - "Modified Date": obj.get("modifiedDate"), + "Modified Date": obj.get("modifiedDateTime"), "Modified By": obj.get("modfiedBy"), - "Created By": obj.get("createdBy"), - "Modified By Id": obj.get("modfiedById"), - "Created By Id": obj.get("createdById"), + "Configured By": obj.get("configuredBy"), + "Model URL": obj.get('modelUrl'), "Sensitivity Label Id": obj.get("sensitivityLabel", {}).get( "labelId" ), + "Endorsement": end.get("endorsement") if end else None, + "Certified By": end.get("certifiedBy") if end else None, } df = pd.concat( [df, pd.DataFrame(new_data, index=[0])], ignore_index=True @@ -777,7 +775,7 @@ def list_dataflows(self) -> pd.DataFrame: return df - def list_dataset_tables(self): + def list_dataset_tables(self) -> pd.DataFrame: df = pd.DataFrame( columns=[ @@ -789,6 +787,7 @@ def list_dataset_tables(self): "Hidden", "Storage Mode", "Source Expression", + "Description", ] ) @@ -804,7 +803,12 @@ def list_dataset_tables(self): "Table Name": t.get("name"), "Hidden": t.get("isHidden"), "Storage Mode": t.get("storageMode"), - "Source Expression": source[0].get("expression") if source and isinstance(source[0], dict) else None + "Source Expression": ( + source[0].get("expression") + if source and isinstance(source[0], dict) + else None + ), + "Description": t.get("description"), } df = pd.concat( @@ -816,7 +820,7 @@ def list_dataset_tables(self): return df - def list_dataset_columns(self): + def list_dataset_columns(self) -> pd.DataFrame: df = pd.DataFrame( columns=[ @@ -830,6 +834,7 @@ def list_dataset_columns(self): "Hidden", "Column Type", "Expression", + "Description", ] ) @@ -848,6 +853,7 @@ def list_dataset_columns(self): "Hidden": c.get("isHidden"), "Column Type": c.get("columnType"), "Expression": c.get("expression"), + "Description": c.get("description"), } df = pd.concat( @@ -859,7 +865,7 @@ def list_dataset_columns(self): return df - def list_dataset_measures(self): + def list_dataset_measures(self) -> pd.DataFrame: df = pd.DataFrame( columns=[ @@ -871,6 +877,7 @@ def list_dataset_measures(self): "Measure Name", "Expression", "Hidden", + "Description", ] ) @@ -887,6 +894,7 @@ def list_dataset_measures(self): "Measure Name": m.get("name"), "Expression": m.get("expression"), "Hidden": m.get("isHidden"), + "Description": m.get("description"), } df = pd.concat( @@ -898,7 +906,7 @@ def list_dataset_measures(self): return df - def list_dataset_expressions(self): + def list_dataset_expressions(self) -> pd.DataFrame: df = pd.DataFrame( columns=[ @@ -908,6 +916,7 @@ def list_dataset_expressions(self): "Dataset Id", "Expression Name", "Expression", + "Description", ] ) @@ -922,6 +931,7 @@ def list_dataset_expressions(self): "Dataset Id": obj.get("id"), "Expression Name": e.get("name"), "Expression": e.get("expression"), + "Description": e.get("description"), } df = pd.concat( @@ -929,3 +939,26 @@ def list_dataset_expressions(self): ) return df + + def list_dashboard_tiles(self) -> pd.DataFrame: + + df = pd.DataFrame(columns=[]) + + for w in self.output.get("workspaces", []): + for obj in w.get("dashboards", []): + for t in obj.get("tiles", []): + new_data = { + "Workspace Name": w.get("name"), + "Workspace Id": w.get("id"), + "Dashboard Name": obj.get("displayName"), + "Dashboard Id": obj.get("id"), + "Tile Id": t.get("id"), + "Title": t.get("title"), + "Report Id": t.get("reportId"), + "Dataset Id": t.get("datasetId"), + } + df = pd.concat( + [df, pd.DataFrame(new_data, index=[0])], ignore_index=True + ) + + return df From 1921119a0b97f8b7bbc5d70737d2e949fcf54c0f Mon Sep 17 00:00:00 2001 From: Michael Date: Mon, 7 Oct 2024 09:14:30 +0200 Subject: [PATCH 5/7] updates --- src/sempy_labs/admin/_scanner.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/sempy_labs/admin/_scanner.py b/src/sempy_labs/admin/_scanner.py index 6045c8d7..d2782b39 100644 --- a/src/sempy_labs/admin/_scanner.py +++ b/src/sempy_labs/admin/_scanner.py @@ -448,10 +448,11 @@ def list_datasets(self) -> pd.DataFrame: "Dataset Name", "Dataset Id", "Configured By", - "Configured By Id", + "Description", "Effective Identity Required", "Effective Identity Roles Required", "Target Storage Mode", + "Schema May Not Be Up To Date", "Endorsement", "Certified By", "Created Date", @@ -476,8 +477,8 @@ def list_datasets(self) -> pd.DataFrame: "Workspace Id": w.get("id"), "Dataset Name": obj.get("name"), "Dataset Id": obj.get("id"), + "Description": obj.get('description'), "Configured By": obj.get("configuredBy"), - "Configured By Id": obj.get("configuredById"), "Effective Identity Required": obj.get( "isEffectiveIdentityRequired" ), @@ -493,12 +494,13 @@ def list_datasets(self) -> pd.DataFrame: "Datasource Usages": [ds_list], "Endorsement": end.get("endorsement") if end else None, "Certified By": end.get("certifiedBy") if end else None, + "Schema May Not Be Up To Date": obj.get('schemaMayNotBeUpToDate'), } df = pd.concat( [df, pd.DataFrame(new_data, index=[0])], ignore_index=True ) - bool_cols = ["Effective Identity Required", "Effective Identity Roles Required"] + bool_cols = ["Effective Identity Required", "Effective Identity Roles Required", "Schema May Not Be Up To Date"] df[bool_cols] = df[bool_cols].astype(bool) return df From ad51a901b92bb50efde287b1b578068a11094084 Mon Sep 17 00:00:00 2001 From: Michael Date: Mon, 7 Oct 2024 11:19:43 +0200 Subject: [PATCH 6/7] added user functions --- src/sempy_labs/admin/_scanner.py | 840 ++++++++++++++++++++++--------- 1 file changed, 604 insertions(+), 236 deletions(-) diff --git a/src/sempy_labs/admin/_scanner.py b/src/sempy_labs/admin/_scanner.py index d2782b39..be8480ce 100644 --- a/src/sempy_labs/admin/_scanner.py +++ b/src/sempy_labs/admin/_scanner.py @@ -136,14 +136,217 @@ def list_workspaces(self) -> pd.DataFrame: return df - def list_kql_databases(self) -> pd.DataFrame: + def list_dashboards(self) -> pd.DataFrame: df = pd.DataFrame( columns=[ "Workspace Name", "Workspace Id", - "KQL Database Name", - "KQL Database Id", + "Dashboard Name", + "Dashboard Id", + "Description", + "Is Read Only", + "Data Classification", + "App Id", + "Sensitivity Label Id", + ] + ) + + for w in self.output.get("workspaces", []): + for obj in w.get("dashboards", []): + new_data = { + "Workspace Name": w.get("name"), + "Workspace Id": w.get("id"), + "Dashboard Name": obj.get("displayName"), + "Dashboard Id": obj.get("id"), + "Description": obj.get("description"), + "Is Read Only": obj.get("isReadOnly"), + "Data Classification": obj.get("dataClassification"), + "App Id": obj.get("appId"), + "Sensitivity Label Id": obj.get("sensitivityLabel", {}).get( + "labelId" + ), + } + df = pd.concat( + [df, pd.DataFrame(new_data, index=[0])], ignore_index=True + ) + + bool_cols = ["Is Read Only"] + df[bool_cols] = df[bool_cols].astype(bool) + + return df + + def list_dataflows(self) -> pd.DataFrame: + + df = pd.DataFrame( + columns=[ + "Workspace Name", + "Workspace Id", + "Dataflow Name", + "Dataflow Id", + "Description", + "Configured By", + "Model URL", + "Modified Date", + "Modified By", + "Sensitivity Label Id", + "Endorsement", + "Certified By", + ] + ) + + for w in self.output.get("workspaces", []): + for obj in w.get("dataflows", []): + end = obj.get("endorsementDetails", {}) + new_data = { + "Workspace Name": w.get("name"), + "Workspace Id": w.get("id"), + "Dataflow Name": obj.get("name"), + "Dataflow Id": obj.get("objectId"), + "Description": obj.get("description"), + "Modified Date": obj.get("modifiedDateTime"), + "Modified By": obj.get("modfiedBy"), + "Configured By": obj.get("configuredBy"), + "Model URL": obj.get("modelUrl"), + "Sensitivity Label Id": obj.get("sensitivityLabel", {}).get( + "labelId" + ), + "Endorsement": end.get("endorsement") if end else None, + "Certified By": end.get("certifiedBy") if end else None, + } + df = pd.concat( + [df, pd.DataFrame(new_data, index=[0])], ignore_index=True + ) + + return df + + def list_datamarts(self) -> pd.DataFrame: + + df = pd.DataFrame( + columns=[ + "Workspace Name", + "Workspace Id", + "Datamart Name", + "Datamart Id", + "Description", + "State", + "Modified Date", + "Modified By", + "Modified By Id", + "Configured By", + "Configured By Id", + "Suspended Batch Id" "Sensitivity Label Id", + "Endorsement", + "Certified By", + ] + ) + + for w in self.output.get("workspaces", []): + for obj in w.get("datamarts", []): + end = obj.get("endorsementDetails", {}) + new_data = { + "Workspace Name": w.get("name"), + "Workspace Id": w.get("id"), + "Datamart Name": obj.get("name"), + "Datamart Id": obj.get("id"), + "Description": obj.get("description"), + "State": obj.get("state"), + "Modified Date": obj.get("modifiedDateTime"), + "Modified By": obj.get("modfiedBy"), + "Modified By Id": obj.get("modfiedById"), + "Sensitivity Label Id": obj.get("sensitivityLabel", {}).get( + "labelId" + ), + "Endorsement": end.get("endorsement") if end else None, + "Certified By": end.get("certifiedBy") if end else None, + "Suspended Batch Id": obj.get("suspendedBatchId"), + "Configured By": obj.get("configuredBy"), + "Configured By Id": obj.get("configuredById"), + } + df = pd.concat( + [df, pd.DataFrame(new_data, index=[0])], ignore_index=True + ) + + return df + + def list_datasets(self) -> pd.DataFrame: + + df = pd.DataFrame( + columns=[ + "Workspace Name", + "Workspace Id", + "Dataset Name", + "Dataset Id", + "Configured By", + "Description", + "Effective Identity Required", + "Effective Identity Roles Required", + "Target Storage Mode", + "Schema May Not Be Up To Date", + "Endorsement", + "Certified By", + "Created Date", + "Content Provider Type", + "Sensitivity Label Id", + "Datasource Usages", + ] + ) + + for w in self.output.get("workspaces", []): + for obj in w.get("datasets", []): + + ds_list = [] + if "datasourceUsages" in obj: + ds_list = [ + item["datasourceInstanceId"] + for item in obj.get("datasourceUsages") + ] + end = obj.get("endorsementDetails", {}) + new_data = { + "Workspace Name": w.get("name"), + "Workspace Id": w.get("id"), + "Dataset Name": obj.get("name"), + "Dataset Id": obj.get("id"), + "Description": obj.get("description"), + "Configured By": obj.get("configuredBy"), + "Effective Identity Required": obj.get( + "isEffectiveIdentityRequired" + ), + "Effective Identity Roles Required": obj.get( + "isEffectiveIdentityRolesRequired" + ), + "Target Storage Mode": obj.get("targetStorageMode"), + "Created Date": obj.get("createdDate"), + "Content Provider Type": obj.get("contentProviderType"), + "Sensitivity Label Id": obj.get("sensitivityLabel", {}).get( + "labelId" + ), + "Datasource Usages": [ds_list], + "Endorsement": end.get("endorsement") if end else None, + "Certified By": end.get("certifiedBy") if end else None, + "Schema May Not Be Up To Date": obj.get("schemaMayNotBeUpToDate"), + } + df = pd.concat( + [df, pd.DataFrame(new_data, index=[0])], ignore_index=True + ) + + bool_cols = [ + "Effective Identity Required", + "Effective Identity Roles Required", + "Schema May Not Be Up To Date", + ] + df[bool_cols] = df[bool_cols].astype(bool) + + return df + + def list_eventhouses(self) -> pd.DataFrame: + + df = pd.DataFrame( + columns=[ + "Workspace Name", + "Workspace Id", + "Eventhouse Name", + "Eventhouse Id", "Description", "State", "Last Updated Date", @@ -154,36 +357,28 @@ def list_kql_databases(self) -> pd.DataFrame: "Modified By Id", "Created By Id", "Sensitivity Label Id", - "Query Service URI", - "Ingestion Service URI", - "Region", - "Kusto Database Type", ] ) for w in self.output.get("workspaces", []): - for obj in w.get("KQLDatabase", []): - ep = obj.get("extendedProperties", {}) + for obj in w.get("Eventhouse", []): new_data = { "Workspace Name": w.get("name"), "Workspace Id": w.get("id"), - "KQL Database Name": obj.get("name"), - "KQL Database Id": obj.get("id"), + "Eventhouse Name": obj.get("name"), + "Eventhouse Id": obj.get("id"), "Description": obj.get("description"), "State": obj.get("state"), "Last Updated Date": obj.get("lastUpdatedDate"), "Created Date": obj.get("createdDate"), "Modified Date": obj.get("modifiedDate"), "Modified By": obj.get("modfiedBy"), + "Created By": obj.get("createdBy"), "Modified By Id": obj.get("modfiedById"), "Created By Id": obj.get("createdById"), "Sensitivity Label Id": obj.get("sensitivityLabel", {}).get( "labelId" ), - "Query Service URI": ep.get("QueryServiceUri"), - "Ingestion Service URI": ep.get("IngestionServiceUri"), - "Region": ep.get("Region"), - "Kusto Database Type": ep.get("KustoDatabaseType"), } df = pd.concat( [df, pd.DataFrame(new_data, index=[0])], ignore_index=True @@ -191,14 +386,14 @@ def list_kql_databases(self) -> pd.DataFrame: return df - def list_eventhouses(self) -> pd.DataFrame: + def list_kql_databases(self) -> pd.DataFrame: df = pd.DataFrame( columns=[ "Workspace Name", "Workspace Id", - "Eventhouse Name", - "Eventhouse Id", + "KQL Database Name", + "KQL Database Id", "Description", "State", "Last Updated Date", @@ -209,28 +404,36 @@ def list_eventhouses(self) -> pd.DataFrame: "Modified By Id", "Created By Id", "Sensitivity Label Id", + "Query Service URI", + "Ingestion Service URI", + "Region", + "Kusto Database Type", ] ) for w in self.output.get("workspaces", []): - for obj in w.get("Eventhouse", []): + for obj in w.get("KQLDatabase", []): + ep = obj.get("extendedProperties", {}) new_data = { "Workspace Name": w.get("name"), "Workspace Id": w.get("id"), - "Eventhouse Name": obj.get("name"), - "Eventhouse Id": obj.get("id"), + "KQL Database Name": obj.get("name"), + "KQL Database Id": obj.get("id"), "Description": obj.get("description"), "State": obj.get("state"), "Last Updated Date": obj.get("lastUpdatedDate"), "Created Date": obj.get("createdDate"), "Modified Date": obj.get("modifiedDate"), "Modified By": obj.get("modfiedBy"), - "Created By": obj.get("createdBy"), "Modified By Id": obj.get("modfiedById"), "Created By Id": obj.get("createdById"), "Sensitivity Label Id": obj.get("sensitivityLabel", {}).get( "labelId" ), + "Query Service URI": ep.get("QueryServiceUri"), + "Ingestion Service URI": ep.get("IngestionServiceUri"), + "Region": ep.get("Region"), + "Kusto Database Type": ep.get("KustoDatabaseType"), } df = pd.concat( [df, pd.DataFrame(new_data, index=[0])], ignore_index=True @@ -439,70 +642,43 @@ def list_reports(self) -> pd.DataFrame: return df - def list_datasets(self) -> pd.DataFrame: + def list_sql_endpoints(self) -> pd.DataFrame: df = pd.DataFrame( columns=[ "Workspace Name", "Workspace Id", - "Dataset Name", - "Dataset Id", + "SQL Endpoint Name", + "SQL Endpoint Id", "Configured By", - "Description", - "Effective Identity Required", - "Effective Identity Roles Required", - "Target Storage Mode", - "Schema May Not Be Up To Date", - "Endorsement", - "Certified By", - "Created Date", - "Content Provider Type", + "Configured By Id", + "Modified By", + "Modified By Id", + "Modified Date", "Sensitivity Label Id", - "Datasource Usages", ] ) for w in self.output.get("workspaces", []): - for obj in w.get("datasets", []): - - ds_list = [] - if "datasourceUsages" in obj: - ds_list = [ - item["datasourceInstanceId"] - for item in obj.get("datasourceUsages") - ] - end = obj.get("endorsementDetails", {}) + for obj in w.get("SQLAnalyticsEndpoint", []): new_data = { "Workspace Name": w.get("name"), "Workspace Id": w.get("id"), - "Dataset Name": obj.get("name"), - "Dataset Id": obj.get("id"), - "Description": obj.get('description'), + "SQL Endpoint Name": obj.get("name"), + "SQL Endpoint Id": obj.get("id"), "Configured By": obj.get("configuredBy"), - "Effective Identity Required": obj.get( - "isEffectiveIdentityRequired" - ), - "Effective Identity Roles Required": obj.get( - "isEffectiveIdentityRolesRequired" - ), - "Target Storage Mode": obj.get("targetStorageMode"), - "Created Date": obj.get("createdDate"), - "Content Provider Type": obj.get("contentProviderType"), + "Configured By Id": obj.get("configuredById"), + "Modified By": obj.get("modifiedBy"), + "Modified By Id": obj.get("modifiedById"), + "Modified Date": obj.get("modifiedDateTime"), "Sensitivity Label Id": obj.get("sensitivityLabel", {}).get( "labelId" ), - "Datasource Usages": [ds_list], - "Endorsement": end.get("endorsement") if end else None, - "Certified By": end.get("certifiedBy") if end else None, - "Schema May Not Be Up To Date": obj.get('schemaMayNotBeUpToDate'), } df = pd.concat( [df, pd.DataFrame(new_data, index=[0])], ignore_index=True ) - bool_cols = ["Effective Identity Required", "Effective Identity Roles Required", "Schema May Not Be Up To Date"] - df[bool_cols] = df[bool_cols].astype(bool) - return df def list_warehouses(self) -> pd.DataFrame: @@ -544,45 +720,6 @@ def list_warehouses(self) -> pd.DataFrame: return df - def list_sql_endpoints(self) -> pd.DataFrame: - - df = pd.DataFrame( - columns=[ - "Workspace Name", - "Workspace Id", - "SQL Endpoint Name", - "SQL Endpoint Id", - "Configured By", - "Configured By Id", - "Modified By", - "Modified By Id", - "Modified Date", - "Sensitivity Label Id", - ] - ) - - for w in self.output.get("workspaces", []): - for obj in w.get("SQLAnalyticsEndpoint", []): - new_data = { - "Workspace Name": w.get("name"), - "Workspace Id": w.get("id"), - "SQL Endpoint Name": obj.get("name"), - "SQL Endpoint Id": obj.get("id"), - "Configured By": obj.get("configuredBy"), - "Configured By Id": obj.get("configuredById"), - "Modified By": obj.get("modifiedBy"), - "Modified By Id": obj.get("modifiedById"), - "Modified Date": obj.get("modifiedDateTime"), - "Sensitivity Label Id": obj.get("sensitivityLabel", {}).get( - "labelId" - ), - } - df = pd.concat( - [df, pd.DataFrame(new_data, index=[0])], ignore_index=True - ) - - return df - def list_data_source_instances(self) -> pd.DataFrame: df = pd.DataFrame( @@ -643,140 +780,7 @@ def list_users(self) -> pd.DataFrame: return df - def list_datamarts(self) -> pd.DataFrame: - - df = pd.DataFrame( - columns=[ - "Workspace Name", - "Workspace Id", - "Datamart Name", - "Datamart Id", - "Description", - "State", - "Modified Date", - "Modified By", - "Modified By Id", - "Configured By", - "Configured By Id", - "Suspended Batch Id" - "Sensitivity Label Id", - "Endorsement", - "Certified By", - ] - ) - - for w in self.output.get("workspaces", []): - for obj in w.get("datamarts", []): - end = obj.get("endorsementDetails", {}) - new_data = { - "Workspace Name": w.get("name"), - "Workspace Id": w.get("id"), - "Datamart Name": obj.get("name"), - "Datamart Id": obj.get("id"), - "Description": obj.get("description"), - "State": obj.get("state"), - "Modified Date": obj.get("modifiedDateTime"), - "Modified By": obj.get("modfiedBy"), - "Modified By Id": obj.get("modfiedById"), - "Sensitivity Label Id": obj.get("sensitivityLabel", {}).get( - "labelId" - ), - "Endorsement": end.get("endorsement") if end else None, - "Certified By": end.get("certifiedBy") if end else None, - "Suspended Batch Id": obj.get('suspendedBatchId'), - "Configured By": obj.get('configuredBy'), - "Configured By Id": obj.get('configuredById'), - } - df = pd.concat( - [df, pd.DataFrame(new_data, index=[0])], ignore_index=True - ) - - return df - - def list_dashboards(self) -> pd.DataFrame: - - df = pd.DataFrame( - columns=[ - "Workspace Name", - "Workspace Id", - "Dashboard Name", - "Dashboard Id", - "Description", - "Is Read Only", - "Data Classification", - "App Id", - "Sensitivity Label Id", - ] - ) - - for w in self.output.get("workspaces", []): - for obj in w.get("dashboards", []): - new_data = { - "Workspace Name": w.get("name"), - "Workspace Id": w.get("id"), - "Dashboard Name": obj.get("displayName"), - "Dashboard Id": obj.get("id"), - "Description": obj.get("description"), - "Is Read Only": obj.get("isReadOnly"), - "Data Classification": obj.get('dataClassification'), - "App Id": obj.get('appId'), - "Sensitivity Label Id": obj.get("sensitivityLabel", {}).get( - "labelId" - ), - } - df = pd.concat( - [df, pd.DataFrame(new_data, index=[0])], ignore_index=True - ) - - bool_cols = ["Is Read Only"] - df[bool_cols] = df[bool_cols].astype(bool) - - return df - - def list_dataflows(self) -> pd.DataFrame: - - df = pd.DataFrame( - columns=[ - "Workspace Name", - "Workspace Id", - "Dataflow Name", - "Dataflow Id", - "Description", - "Configured By", - "Model URL", - "Modified Date", - "Modified By", - "Sensitivity Label Id", - "Endorsement", - "Certified By", - ] - ) - - for w in self.output.get("workspaces", []): - for obj in w.get("dataflows", []): - end = obj.get('endorsementDetails', {}) - new_data = { - "Workspace Name": w.get("name"), - "Workspace Id": w.get("id"), - "Dataflow Name": obj.get("name"), - "Dataflow Id": obj.get("objectId"), - "Description": obj.get("description"), - "Modified Date": obj.get("modifiedDateTime"), - "Modified By": obj.get("modfiedBy"), - "Configured By": obj.get("configuredBy"), - "Model URL": obj.get('modelUrl'), - "Sensitivity Label Id": obj.get("sensitivityLabel", {}).get( - "labelId" - ), - "Endorsement": end.get("endorsement") if end else None, - "Certified By": end.get("certifiedBy") if end else None, - } - df = pd.concat( - [df, pd.DataFrame(new_data, index=[0])], ignore_index=True - ) - - return df - + # Dataset functions def list_dataset_tables(self) -> pd.DataFrame: df = pd.DataFrame( @@ -942,6 +946,7 @@ def list_dataset_expressions(self) -> pd.DataFrame: return df + # Dashboard functions def list_dashboard_tiles(self) -> pd.DataFrame: df = pd.DataFrame(columns=[]) @@ -964,3 +969,366 @@ def list_dashboard_tiles(self) -> pd.DataFrame: ) return df + + # User functions + def list_reports_users(self) -> pd.DataFrame: + + df = pd.DataFrame( + columns=[ + "Workspace Name", + "Workspace Id", + "Report Name", + "Report Id", + "Report Type", + "Report User Access Right", + "Email Address", + "Display Name", + "Identifier", + "Graph Id", + "Principal Type", + "User Type", + ] + ) + + for w in self.output.get("workspaces", []): + for obj in w.get("reports", []): + for u in obj.get("users", []): + new_data = { + "Workspace Name": w.get("name"), + "Workspace Id": w.get("id"), + "Report Name": obj.get("name"), + "Report Id": obj.get("id"), + "Report Type": obj.get("reportType"), + "Report User Access Right": u.get("reportUserAccessRight"), + "Email Address": u.get("emailAddress"), + "Display Name": u.get("displayName"), + "Identifier": u.get("identifier"), + "Graph Id": u.get("graphId"), + "Principal Type": u.get("principalType"), + "User Type": u.get("userType"), + } + df = pd.concat( + [df, pd.DataFrame(new_data, index=[0])], ignore_index=True + ) + + return df + + def list_dataset_users(self) -> pd.DataFrame: + + df = pd.DataFrame( + columns=[ + "Workspace Name", + "Workspace Id", + "Dataset Name", + "Dataset Id", + "Dataset User Access Right", + "Email Address", + "Display Name", + "Identifier", + "Graph Id", + "Principal Type", + "User Type", + ] + ) + + for w in self.output.get("workspaces", []): + for obj in w.get("datasets", []): + for u in obj.get("users", []): + new_data = { + "Workspace Name": w.get("name"), + "Workspace Id": w.get("id"), + "Dataset Name": obj.get("name"), + "Dataset Id": obj.get("id"), + "Dataset User Access Right": u.get("datasetUserAccessRight"), + "Email Address": u.get("emailAddress"), + "Display Name": u.get("displayName"), + "Identifier": u.get("identifier"), + "Graph Id": u.get("graphId"), + "Principal Type": u.get("principalType"), + "User Type": u.get("userType"), + } + df = pd.concat( + [df, pd.DataFrame(new_data, index=[0])], ignore_index=True + ) + + return df + + def list_lakehouse_users(self) -> pd.DataFrame: + + df = pd.DataFrame( + columns=[ + "Workspace Name", + "Workspace Id", + "Lakehouse Name", + "Lakehouse Id", + "Artifact User Access Right", + "Email Address", + "Display Name", + "Identifier", + "Graph Id", + "Principal Type", + "User Type", + ] + ) + + for w in self.output.get("workspaces", []): + for obj in w.get("Lakehouse", []): + for u in obj.get("users", []): + new_data = { + "Workspace Name": w.get("name"), + "Workspace Id": w.get("id"), + "Lakehouse Name": obj.get("name"), + "Lakehouse Id": obj.get("id"), + "Artifact User Access Right": u.get("artifactUserAccessRight"), + "Email Address": u.get("emailAddress"), + "Display Name": u.get("displayName"), + "Identifier": u.get("identifier"), + "Graph Id": u.get("graphId"), + "Principal Type": u.get("principalType"), + "User Type": u.get("userType"), + } + df = pd.concat( + [df, pd.DataFrame(new_data, index=[0])], ignore_index=True + ) + + return df + + def list_notebook_users(self) -> pd.DataFrame: + + df = pd.DataFrame( + columns=[ + "Workspace Name", + "Workspace Id", + "Notebook Name", + "Notebook Id", + "Artifact User Access Right", + "Email Address", + "Display Name", + "Identifier", + "Graph Id", + "Principal Type", + "User Type", + ] + ) + + for w in self.output.get("workspaces", []): + for obj in w.get("Notebook", []): + for u in obj.get("users", []): + new_data = { + "Workspace Name": w.get("name"), + "Workspace Id": w.get("id"), + "Notebook Name": obj.get("name"), + "Notebook Id": obj.get("id"), + "Artifact User Access Right": u.get("artifactUserAccessRight"), + "Email Address": u.get("emailAddress"), + "Display Name": u.get("displayName"), + "Identifier": u.get("identifier"), + "Graph Id": u.get("graphId"), + "Principal Type": u.get("principalType"), + "User Type": u.get("userType"), + } + df = pd.concat( + [df, pd.DataFrame(new_data, index=[0])], ignore_index=True + ) + + return df + + def list_notebook_users(self) -> pd.DataFrame: + + df = pd.DataFrame( + columns=[ + "Workspace Name", + "Workspace Id", + "Notebook Name", + "Notebook Id", + "Artifact User Access Right", + "Email Address", + "Display Name", + "Identifier", + "Graph Id", + "Principal Type", + "User Type", + ] + ) + + for w in self.output.get("workspaces", []): + for obj in w.get("Notebook", []): + for u in obj.get("users", []): + new_data = { + "Workspace Name": w.get("name"), + "Workspace Id": w.get("id"), + "Notebook Name": obj.get("name"), + "Notebook Id": obj.get("id"), + "Artifact User Access Right": u.get("artifactUserAccessRight"), + "Email Address": u.get("emailAddress"), + "Display Name": u.get("displayName"), + "Identifier": u.get("identifier"), + "Graph Id": u.get("graphId"), + "Principal Type": u.get("principalType"), + "User Type": u.get("userType"), + } + df = pd.concat( + [df, pd.DataFrame(new_data, index=[0])], ignore_index=True + ) + + return df + + def list_warehouse_users(self) -> pd.DataFrame: + + df = pd.DataFrame( + columns=[ + "Workspace Name", + "Workspace Id", + "Warehouse Name", + "Warehouse Id", + "Datamart User Access Right", + "Email Address", + "Display Name", + "Identifier", + "Graph Id", + "Principal Type", + "User Type", + ] + ) + + for w in self.output.get("workspaces", []): + for obj in w.get("warehouses", []): + for u in obj.get("users", []): + new_data = { + "Workspace Name": w.get("name"), + "Workspace Id": w.get("id"), + "Warehouse Name": obj.get("name"), + "Warehouse Id": obj.get("id"), + "Datamart User Access Right": u.get("datamartUserAccessRight"), + "Email Address": u.get("emailAddress"), + "Display Name": u.get("displayName"), + "Identifier": u.get("identifier"), + "Graph Id": u.get("graphId"), + "Principal Type": u.get("principalType"), + "User Type": u.get("userType"), + } + df = pd.concat( + [df, pd.DataFrame(new_data, index=[0])], ignore_index=True + ) + + return df + + def list_sql_endpoint_users(self) -> pd.DataFrame: + + df = pd.DataFrame( + columns=[ + "Workspace Name", + "Workspace Id", + "SQL Endpoint Name", + "SQL Endpoint Id", + "Datamart User Access Right", + "Email Address", + "Display Name", + "Identifier", + "Graph Id", + "Principal Type", + "User Type", + ] + ) + + for w in self.output.get("workspaces", []): + for obj in w.get("SQLAnalyticsEndpoint", []): + for u in obj.get("users", []): + new_data = { + "Workspace Name": w.get("name"), + "Workspace Id": w.get("id"), + "SQL Endpoint Name": obj.get("name"), + "SQL Endpoint Id": obj.get("id"), + "Datamart User Access Right": u.get("datamartUserAccessRight"), + "Email Address": u.get("emailAddress"), + "Display Name": u.get("displayName"), + "Identifier": u.get("identifier"), + "Graph Id": u.get("graphId"), + "Principal Type": u.get("principalType"), + "User Type": u.get("userType"), + } + df = pd.concat( + [df, pd.DataFrame(new_data, index=[0])], ignore_index=True + ) + + return df + + def list_dashboard_users(self) -> pd.DataFrame: + + df = pd.DataFrame( + columns=[ + "Workspace Name", + "Workspace Id", + "Dashboard Name", + "Dashboard Id", + "App User Access Right", + "Email Address", + "Display Name", + "Identifier", + "Graph Id", + "Principal Type", + # "User Type", + ] + ) + + for w in self.output.get("workspaces", []): + for obj in w.get("dashboards", []): + for u in obj.get("users", []): + new_data = { + "Workspace Name": w.get("name"), + "Workspace Id": w.get("id"), + "Dashboard Name": obj.get("displayName"), + "Dashboard Id": obj.get("id"), + "App User Access Right": u.get("appUserAccessRight"), + "Email Address": u.get("emailAddress"), + "Display Name": u.get("displayName"), + "Identifier": u.get("identifier"), + "Graph Id": u.get("graphId"), + "Principal Type": u.get("principalType"), + # "User Type": u.get("userType"), + } + df = pd.concat( + [df, pd.DataFrame(new_data, index=[0])], ignore_index=True + ) + + return df + + def list_dataflow_users(self) -> pd.DataFrame: + + df = pd.DataFrame( + columns=[ + "Workspace Name", + "Workspace Id", + "Dataflow Name", + "Dataflow Id", + "App User Access Right", + "Email Address", + "Display Name", + "Identifier", + "Graph Id", + "Principal Type", + # "User Type", + ] + ) + + for w in self.output.get("workspaces", []): + for obj in w.get("dataflows", []): + for u in obj.get("users", []): + new_data = { + "Workspace Name": w.get("name"), + "Workspace Id": w.get("id"), + "Dataflow Name": obj.get("name"), + "Dataflow Id": obj.get("id"), + "App User Access Right": u.get("appUserAccessRight"), + "Email Address": u.get("emailAddress"), + "Display Name": u.get("displayName"), + "Identifier": u.get("identifier"), + "Graph Id": u.get("graphId"), + "Principal Type": u.get("principalType"), + # "User Type": u.get("userType"), + } + df = pd.concat( + [df, pd.DataFrame(new_data, index=[0])], ignore_index=True + ) + + return df From 6f7e153c2a89a81af86254e50902ffecc7f89770 Mon Sep 17 00:00:00 2001 From: Michael Date: Sun, 10 Nov 2024 12:38:52 +0200 Subject: [PATCH 7/7] fixes for user functions --- README.md | 2 +- docs/source/conf.py | 2 +- environment.yml | 2 +- pyproject.toml | 4 +- src/sempy_labs/admin/_scanner.py | 154 +++++++++++++++++++++---------- 5 files changed, 109 insertions(+), 55 deletions(-) diff --git a/README.md b/README.md index 29ed0fde..04e8d196 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ # Semantic Link Labs [![PyPI version](https://badge.fury.io/py/semantic-link-labs.svg)](https://badge.fury.io/py/semantic-link-labs) -[![Read The Docs](https://readthedocs.org/projects/semantic-link-labs/badge/?version=0.8.2&style=flat)](https://readthedocs.org/projects/semantic-link-labs/) +[![Read The Docs](https://readthedocs.org/projects/semantic-link-labs/badge/?version=0.8.5&style=flat)](https://readthedocs.org/projects/semantic-link-labs/) [![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black) [![Downloads](https://static.pepy.tech/badge/semantic-link-labs)](https://pepy.tech/project/semantic-link-labs) diff --git a/docs/source/conf.py b/docs/source/conf.py index 7be8a136..7b4a9f42 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -13,7 +13,7 @@ project = 'semantic-link-labs' copyright = '2024, Microsoft and community' author = 'Microsoft and community' -release = '0.8.2' +release = '0.8.5' # -- General configuration --------------------------------------------------- # https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration diff --git a/environment.yml b/environment.yml index 01c321d6..5dce7700 100644 --- a/environment.yml +++ b/environment.yml @@ -6,7 +6,7 @@ dependencies: - pytest-cov - pytest-mock - pip: - - semantic-link-sempy>=0.8.0 + - semantic-link-sempy>=0.8.2 - azure-identity==1.7.1 - azure-storage-blob>=12.9.0 - pandas-stubs diff --git a/pyproject.toml b/pyproject.toml index 34abf358..c392428b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -7,7 +7,7 @@ name="semantic-link-labs" authors = [ { name = "Microsoft Corporation" }, ] -version="0.8.2" +version="0.8.5" description="Semantic Link Labs for Microsoft Fabric" readme="README.md" requires-python=">=3.10,<3.12" @@ -23,7 +23,7 @@ classifiers = [ license= { text = "MIT License" } dependencies = [ - "semantic-link-sempy>=0.8.0", + "semantic-link-sempy>=0.8.2", "anytree", "powerbiclient", "polib", diff --git a/src/sempy_labs/admin/_scanner.py b/src/sempy_labs/admin/_scanner.py index be8480ce..b96d9974 100644 --- a/src/sempy_labs/admin/_scanner.py +++ b/src/sempy_labs/admin/_scanner.py @@ -744,7 +744,7 @@ def list_data_source_instances(self) -> pd.DataFrame: return df - def list_users(self) -> pd.DataFrame: + def list_workspace_users(self) -> pd.DataFrame: df = pd.DataFrame( columns=[ @@ -841,6 +841,8 @@ def list_dataset_columns(self) -> pd.DataFrame: "Column Type", "Expression", "Description", + "Sort By Column", + "Summarize By", ] ) @@ -860,6 +862,8 @@ def list_dataset_columns(self) -> pd.DataFrame: "Column Type": c.get("columnType"), "Expression": c.get("expression"), "Description": c.get("description"), + "Sort By Column": c.get("sortByColumn"), + "Summarize By": c.get("summarizeBy"), } df = pd.concat( @@ -883,6 +887,7 @@ def list_dataset_measures(self) -> pd.DataFrame: "Measure Name", "Expression", "Hidden", + "Format String", "Description", ] ) @@ -900,6 +905,7 @@ def list_dataset_measures(self) -> pd.DataFrame: "Measure Name": m.get("name"), "Expression": m.get("expression"), "Hidden": m.get("isHidden"), + "Format String": m.get("formatString"), "Description": m.get("description"), } @@ -946,6 +952,94 @@ def list_dataset_expressions(self) -> pd.DataFrame: return df + def list_dataset_roles(self, include_members: bool = False) -> pd.DataFrame: + + df = pd.DataFrame( + columns=[ + "Workspace Name", + "Workspace Id", + "Dataset Name", + "Dataset Id", + "Role Name", + "Model Permission", + ] + ) + + for w in self.output.get("workspaces", []): + for obj in w.get("datasets", []): + for e in obj.get("roles", []): + + if not include_members: + new_data = { + "Workspace Name": w.get("name"), + "Workspace Id": w.get("id"), + "Dataset Name": obj.get("name"), + "Dataset Id": obj.get("id"), + "Role Name": e.get("name"), + "Model Permission": e.get("modelPermission"), + } + + df = pd.concat( + [df, pd.DataFrame(new_data, index=[0])], ignore_index=True + ) + else: + for rm in e.get("members", []): + new_data = { + "Workspace Name": w.get("name"), + "Workspace Id": w.get("id"), + "Dataset Name": obj.get("name"), + "Dataset Id": obj.get("id"), + "Role Name": e.get("name"), + "Model Permission": e.get("modelPermission"), + "Member Name": rm.get("memberName"), + "Member Id": rm.get("memberId"), + "Member Type": rm.get("memberType"), + "Identity Provider": rm.get("identityProvider"), + } + + df = pd.concat( + [df, pd.DataFrame(new_data, index=[0])], + ignore_index=True, + ) + + return df + + def list_dataset_row_level_security(self) -> pd.DataFrame: + + df = pd.DataFrame( + columns=[ + "Workspace Name", + "Workspace Id", + "Dataset Name", + "Dataset Id", + "Role Name", + "Model Permission", + "Table Name", + "Filter Expression", + ] + ) + + for w in self.output.get("workspaces", []): + for obj in w.get("datasets", []): + for e in obj.get("roles", []): + for tp in e.get("tablePermissions", []): + new_data = { + "Workspace Name": w.get("name"), + "Workspace Id": w.get("id"), + "Dataset Name": obj.get("name"), + "Dataset Id": obj.get("id"), + "Role Name": e.get("name"), + "Model Permission": e.get("modelPermission"), + "Table Name": tp.get("name"), + "Filter Expression": tp.get("filterExpression"), + } + + df = pd.concat( + [df, pd.DataFrame(new_data, index=[0])], ignore_index=True + ) + + return df + # Dashboard functions def list_dashboard_tiles(self) -> pd.DataFrame: @@ -971,7 +1065,7 @@ def list_dashboard_tiles(self) -> pd.DataFrame: return df # User functions - def list_reports_users(self) -> pd.DataFrame: + def list_report_users(self) -> pd.DataFrame: df = pd.DataFrame( columns=[ @@ -1133,46 +1227,6 @@ def list_notebook_users(self) -> pd.DataFrame: return df - def list_notebook_users(self) -> pd.DataFrame: - - df = pd.DataFrame( - columns=[ - "Workspace Name", - "Workspace Id", - "Notebook Name", - "Notebook Id", - "Artifact User Access Right", - "Email Address", - "Display Name", - "Identifier", - "Graph Id", - "Principal Type", - "User Type", - ] - ) - - for w in self.output.get("workspaces", []): - for obj in w.get("Notebook", []): - for u in obj.get("users", []): - new_data = { - "Workspace Name": w.get("name"), - "Workspace Id": w.get("id"), - "Notebook Name": obj.get("name"), - "Notebook Id": obj.get("id"), - "Artifact User Access Right": u.get("artifactUserAccessRight"), - "Email Address": u.get("emailAddress"), - "Display Name": u.get("displayName"), - "Identifier": u.get("identifier"), - "Graph Id": u.get("graphId"), - "Principal Type": u.get("principalType"), - "User Type": u.get("userType"), - } - df = pd.concat( - [df, pd.DataFrame(new_data, index=[0])], ignore_index=True - ) - - return df - def list_warehouse_users(self) -> pd.DataFrame: df = pd.DataFrame( @@ -1261,13 +1315,13 @@ def list_dashboard_users(self) -> pd.DataFrame: "Workspace Id", "Dashboard Name", "Dashboard Id", - "App User Access Right", + "Dashboard User Access Right", "Email Address", "Display Name", "Identifier", "Graph Id", "Principal Type", - # "User Type", + "User Type", ] ) @@ -1279,13 +1333,13 @@ def list_dashboard_users(self) -> pd.DataFrame: "Workspace Id": w.get("id"), "Dashboard Name": obj.get("displayName"), "Dashboard Id": obj.get("id"), - "App User Access Right": u.get("appUserAccessRight"), + "Dashboard User Access Right": u.get("appUserAccessRight"), "Email Address": u.get("emailAddress"), "Display Name": u.get("displayName"), "Identifier": u.get("identifier"), "Graph Id": u.get("graphId"), "Principal Type": u.get("principalType"), - # "User Type": u.get("userType"), + "User Type": u.get("userType"), } df = pd.concat( [df, pd.DataFrame(new_data, index=[0])], ignore_index=True @@ -1301,13 +1355,13 @@ def list_dataflow_users(self) -> pd.DataFrame: "Workspace Id", "Dataflow Name", "Dataflow Id", - "App User Access Right", + "Dataflow User Access Right", "Email Address", "Display Name", "Identifier", "Graph Id", "Principal Type", - # "User Type", + "User Type", ] ) @@ -1319,13 +1373,13 @@ def list_dataflow_users(self) -> pd.DataFrame: "Workspace Id": w.get("id"), "Dataflow Name": obj.get("name"), "Dataflow Id": obj.get("id"), - "App User Access Right": u.get("appUserAccessRight"), + "Dataflow User Access Right": u.get("appUserAccessRight"), "Email Address": u.get("emailAddress"), "Display Name": u.get("displayName"), "Identifier": u.get("identifier"), "Graph Id": u.get("graphId"), "Principal Type": u.get("principalType"), - # "User Type": u.get("userType"), + "User Type": u.get("userType"), } df = pd.concat( [df, pd.DataFrame(new_data, index=[0])], ignore_index=True