diff --git a/docs/Explore Algorithms/Anomaly Detection/Quickstart - Isolation Forests.ipynb b/docs/Explore Algorithms/Anomaly Detection/Quickstart - Isolation Forests.ipynb index a03e9e75de..0a4f0e38ea 100644 --- a/docs/Explore Algorithms/Anomaly Detection/Quickstart - Isolation Forests.ipynb +++ b/docs/Explore Algorithms/Anomaly Detection/Quickstart - Isolation Forests.ipynb @@ -24,22 +24,42 @@ "metadata": {}, "source": [ "## Prerequisites\n", - " - If you are running it on Synapse, you'll need to [create an AML workspace and set up linked Service](../../../Use%20with%20MLFlow/Overview/).\n" + " - If running on Synapse, you'll need to [create an AML workspace and set up linked Service](../../Use%20with%20MLFlow/Overview.md) and add the following installation cell.\n", + " - If running on Fabric, you need to add the following installation cell and attach the notebook to a lakehouse. On the left side of your notebook, select Add to add an existing lakehouse or create a new one." ] }, { "cell_type": "code", "execution_count": null, + "metadata": {}, "outputs": [], "source": [ - "%pip install sqlparse raiwidgets interpret-community mlflow==2.6.0 numpy==1.22.4" - ], + "# %%configure -f\n", + "# {\n", + "# \"name\": \"synapseml\",\n", + "# \"conf\": {\n", + "# \"spark.jars.packages\": \"com.microsoft.azure:synapseml_2.12:\",\n", + "# \"spark.jars.repositories\": \"https://mmlspark.azureedge.net/maven\",\n", + "# \"spark.jars.excludes\": \"org.scala-lang:scala-reflect,org.apache.spark:spark-tags_2.12,org.scalactic:scalactic_2.12,org.scalatest:scalatest_2.12,com.fasterxml.jackson.core:jackson-databind\",\n", + "# \"spark.yarn.user.classpath.first\": \"true\",\n", + "# \"spark.sql.parquet.enableVectorizedReader\": \"false\"\n", + "# }\n", + "# }" + ] + }, + { + "cell_type": "code", + "execution_count": null, "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } - } + }, + "outputs": [], + "source": [ + "%pip install sqlparse raiwidgets interpret-community mlflow==2.6.0 numpy==1.22.4" + ] }, { "cell_type": "markdown", @@ -155,8 +175,14 @@ "\n", "# MLFlow experiment\n", "artifact_path = \"isolationforest\"\n", - "experiment_name = f\"/Shared/isolation_forest_experiment-{str(uuid.uuid1())}/\"\n", - "model_name = f\"isolation-forest-model\"" + "model_name = f\"isolation-forest-model\"\n", + "\n", + "platform = current_platform()\n", + "experiment_name = {\n", + " \"databricks\": f\"/Shared/isolation_forest_experiment-{str(uuid.uuid1())}/\",\n", + " \"synapse\": f\"isolation_forest_experiment-{str(uuid.uuid1())}\",\n", + " \"synapse_internal\": f\"isolation_forest_experiment-{str(uuid.uuid1())}\", # Fabric\n", + "}.get(platform, f\"isolation_forest_experiment\")" ] }, { @@ -365,7 +391,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "tags": [ + "hide-synapse-internal" + ] + }, "outputs": [], "source": [ "if running_on_synapse():\n", @@ -374,9 +404,7 @@ " tracking_url = find_secret(\n", " secret_name=\"aml-mlflow-tracking-url\", keyvault=\"mmlspark-build-keys\"\n", " ) # check link in prerequisites for more information on mlflow tracking url\n", - " mlflow.set_tracking_uri(tracking_url)\n", - " experiment_name = f\"isolation_forest_experiment\"\n", - " model_name = \"isolation-forest\"" + " mlflow.set_tracking_uri(tracking_url)" ] }, { @@ -393,7 +421,7 @@ "outputs": [], "source": [ "mlflow.set_experiment(experiment_name)\n", - "with mlflow.start_run():\n", + "with mlflow.start_run() as run:\n", " va = VectorAssembler(inputCols=inputCols, outputCol=\"features\")\n", " pipeline = Pipeline(stages=[va, isolationForest])\n", " model = pipeline.fit(df_train)\n", @@ -424,7 +452,10 @@ "nuid": "57cda5af-b090-4b6d-ad07-530519e0300e", "showTitle": false, "title": "" - } + }, + "tags": [ + "hide-synapse-internal" + ] }, "source": [ "Load the trained Isolation Forest Model" @@ -439,12 +470,20 @@ "nuid": "f44b9a1f-c2fe-4b5b-a318-4d6d73370978", "showTitle": false, "title": "" - } + }, + "tags": [ + "hide-synapse-internal" + ] }, "outputs": [], "source": [ - "# model_version = 1\n", - "# model_uri = f\"models:/{model_name}/{model_version}\"\n", + "# if running_on_databricks():\n", + "# model_version = \n", + "# model_uri = f\"models:/{model_name}/{model_version}\"\n", + "# elif running_on_synapse_internal():\n", + "# model_uri = \"runs:/{run_id}/{artifact_path}\".format(\n", + "# run_id=run.info.run_id, artifact_path=artifact_path\n", + "# )\n", "# model = mlflow.spark.load_model(model_uri)" ] }, @@ -943,12 +982,16 @@ "source": [ "When you run the cell above, you will see the following global feature importance plot:\n", "\n", - "![](https://mmlspark.blob.core.windows.net/graphics/notebooks/global_feature_importance.jpg)" + "![](https://mmlspark.blob.core.windows.net/graphics/notebooks/global-feature-importance.jpg)" ] }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "tags": [ + "hide-synapse-internal" + ] + }, "source": [ "Visualize the explanation in the ExplanationDashboard from https://github.com/microsoft/responsible-ai-widgets." ] @@ -962,7 +1005,10 @@ "nuid": "140602e6-908e-4b32-ab9c-49dd79705171", "showTitle": false, "title": "" - } + }, + "tags": [ + "hide-synapse-internal" + ] }, "outputs": [], "source": [