Skip to content

Commit

Permalink
docs: update isolation forest doc (#2210)
Browse files Browse the repository at this point in the history
* update isolation forest doc

* formatting

* update isolation forest
  • Loading branch information
JessicaXYWang authored Apr 18, 2024
1 parent 4487398 commit 786e59a
Showing 1 changed file with 64 additions and 18 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -24,22 +24,42 @@
"metadata": {},
"source": [
"## Prerequisites\n",
" - If you are running it on Synapse, you'll need to [create an AML workspace and set up linked Service](../../../Use%20with%20MLFlow/Overview/).\n"
" - If running on Synapse, you'll need to [create an AML workspace and set up linked Service](../../Use%20with%20MLFlow/Overview.md) and add the following installation cell.\n",
" - If running on Fabric, you need to add the following installation cell and attach the notebook to a lakehouse. On the left side of your notebook, select Add to add an existing lakehouse or create a new one."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"%pip install sqlparse raiwidgets interpret-community mlflow==2.6.0 numpy==1.22.4"
],
"# %%configure -f\n",
"# {\n",
"# \"name\": \"synapseml\",\n",
"# \"conf\": {\n",
"# \"spark.jars.packages\": \"com.microsoft.azure:synapseml_2.12:<THE_SYNAPSEML_VERSION_YOU_WANT>\",\n",
"# \"spark.jars.repositories\": \"https://mmlspark.azureedge.net/maven\",\n",
"# \"spark.jars.excludes\": \"org.scala-lang:scala-reflect,org.apache.spark:spark-tags_2.12,org.scalactic:scalactic_2.12,org.scalatest:scalatest_2.12,com.fasterxml.jackson.core:jackson-databind\",\n",
"# \"spark.yarn.user.classpath.first\": \"true\",\n",
"# \"spark.sql.parquet.enableVectorizedReader\": \"false\"\n",
"# }\n",
"# }"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
},
"outputs": [],
"source": [
"%pip install sqlparse raiwidgets interpret-community mlflow==2.6.0 numpy==1.22.4"
]
},
{
"cell_type": "markdown",
Expand Down Expand Up @@ -155,8 +175,14 @@
"\n",
"# MLFlow experiment\n",
"artifact_path = \"isolationforest\"\n",
"experiment_name = f\"/Shared/isolation_forest_experiment-{str(uuid.uuid1())}/\"\n",
"model_name = f\"isolation-forest-model\""
"model_name = f\"isolation-forest-model\"\n",
"\n",
"platform = current_platform()\n",
"experiment_name = {\n",
" \"databricks\": f\"/Shared/isolation_forest_experiment-{str(uuid.uuid1())}/\",\n",
" \"synapse\": f\"isolation_forest_experiment-{str(uuid.uuid1())}\",\n",
" \"synapse_internal\": f\"isolation_forest_experiment-{str(uuid.uuid1())}\", # Fabric\n",
"}.get(platform, f\"isolation_forest_experiment\")"
]
},
{
Expand Down Expand Up @@ -365,7 +391,11 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"metadata": {
"tags": [
"hide-synapse-internal"
]
},
"outputs": [],
"source": [
"if running_on_synapse():\n",
Expand All @@ -374,9 +404,7 @@
" tracking_url = find_secret(\n",
" secret_name=\"aml-mlflow-tracking-url\", keyvault=\"mmlspark-build-keys\"\n",
" ) # check link in prerequisites for more information on mlflow tracking url\n",
" mlflow.set_tracking_uri(tracking_url)\n",
" experiment_name = f\"isolation_forest_experiment\"\n",
" model_name = \"isolation-forest\""
" mlflow.set_tracking_uri(tracking_url)"
]
},
{
Expand All @@ -393,7 +421,7 @@
"outputs": [],
"source": [
"mlflow.set_experiment(experiment_name)\n",
"with mlflow.start_run():\n",
"with mlflow.start_run() as run:\n",
" va = VectorAssembler(inputCols=inputCols, outputCol=\"features\")\n",
" pipeline = Pipeline(stages=[va, isolationForest])\n",
" model = pipeline.fit(df_train)\n",
Expand Down Expand Up @@ -424,7 +452,10 @@
"nuid": "57cda5af-b090-4b6d-ad07-530519e0300e",
"showTitle": false,
"title": ""
}
},
"tags": [
"hide-synapse-internal"
]
},
"source": [
"Load the trained Isolation Forest Model"
Expand All @@ -439,12 +470,20 @@
"nuid": "f44b9a1f-c2fe-4b5b-a318-4d6d73370978",
"showTitle": false,
"title": ""
}
},
"tags": [
"hide-synapse-internal"
]
},
"outputs": [],
"source": [
"# model_version = 1\n",
"# model_uri = f\"models:/{model_name}/{model_version}\"\n",
"# if running_on_databricks():\n",
"# model_version = <your_model_version>\n",
"# model_uri = f\"models:/{model_name}/{model_version}\"\n",
"# elif running_on_synapse_internal():\n",
"# model_uri = \"runs:/{run_id}/{artifact_path}\".format(\n",
"# run_id=run.info.run_id, artifact_path=artifact_path\n",
"# )\n",
"# model = mlflow.spark.load_model(model_uri)"
]
},
Expand Down Expand Up @@ -943,12 +982,16 @@
"source": [
"When you run the cell above, you will see the following global feature importance plot:\n",
"\n",
"![](https://mmlspark.blob.core.windows.net/graphics/notebooks/global_feature_importance.jpg)"
"![](https://mmlspark.blob.core.windows.net/graphics/notebooks/global-feature-importance.jpg)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"metadata": {
"tags": [
"hide-synapse-internal"
]
},
"source": [
"Visualize the explanation in the ExplanationDashboard from https://github.com/microsoft/responsible-ai-widgets."
]
Expand All @@ -962,7 +1005,10 @@
"nuid": "140602e6-908e-4b32-ab9c-49dd79705171",
"showTitle": false,
"title": ""
}
},
"tags": [
"hide-synapse-internal"
]
},
"outputs": [],
"source": [
Expand Down

0 comments on commit 786e59a

Please sign in to comment.