From 9e252c5d67f3f69f17ca96bd5cd645990f424afa Mon Sep 17 00:00:00 2001
From: Jianjie Liu <jianjl@microsoft.com>
Date: Wed, 22 Sep 2021 16:44:08 +0000
Subject: [PATCH 01/27] Mock Movielens schema v1

---
 recommenders/datasets/mock/__init__.py        |  0
 recommenders/datasets/mock/movielens.py       | 66 +++++++++++++++++
 recommenders/datasets/movielens.py            | 52 +++++++++-----
 setup.py                                      |  7 +-
 .../recommenders/datasets/mock/__init__.py    |  0
 .../datasets/mock/test_movielens.py           | 72 +++++++++++++++++++
 .../recommenders/datasets/test_movielens.py   | 21 ++++++
 7 files changed, 200 insertions(+), 18 deletions(-)
 create mode 100644 recommenders/datasets/mock/__init__.py
 create mode 100644 recommenders/datasets/mock/movielens.py
 create mode 100644 tests/unit/recommenders/datasets/mock/__init__.py
 create mode 100644 tests/unit/recommenders/datasets/mock/test_movielens.py
 create mode 100644 tests/unit/recommenders/datasets/test_movielens.py

diff --git a/recommenders/datasets/mock/__init__.py b/recommenders/datasets/mock/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/recommenders/datasets/mock/movielens.py b/recommenders/datasets/mock/movielens.py
new file mode 100644
index 0000000000..a9e73bf031
--- /dev/null
+++ b/recommenders/datasets/mock/movielens.py
@@ -0,0 +1,66 @@
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+
+"""
+Mock dataset schema to generate fake data for testing use. This will mimic the Movielens Dataset
+"""
+try:
+    import pandera as pa
+except ImportError as e:
+    raise ImportError("Pandera not installed. Try `pip install recommender['dev']`") from e
+
+import random
+from typing import Optional
+
+from pandera.typing import DateTime, Series
+from pandera import Field
+from pyspark.sql import SparkSession
+from pyspark.sql.types import StructType
+
+
+class MockMovielens100kSchema(pa.SchemaModel):
+    """
+    Mock dataset schema to generate fake data for testing purpose.
+    This schema is configured to mimic the Movielens 100k dataset
+
+    http://files.grouplens.org/datasets/movielens/ml-100k/
+    """
+    # The 100k dataset has 943 total users
+    userID: Series[int] = Field(in_range={"min_value": 1, "max_value": 943})
+    # And 1682 total items
+    itemID: Series[int] = Field(in_range={"min_value": 1, "max_value": 1682})
+    # Rating is on the scale from 1 to 5
+    rating: Series[int] = Field(in_range={"min_value": 1, "max_value": 5})
+    timestamp: Series[DateTime]
+    title: Series[str] = Field(eq="foo")
+    genres: Series[str] = Field(eq="genreA|0")
+
+    @classmethod
+    def get_df(cls, size: int = 3, seed: int = 100):
+        """Return fake movielens dataset as a Pandas Dataframe with specified rows.
+
+        Args:
+            size (int): number of rows to generate
+            seed (int, optional): seeding the pseudo-number generation. Defaults to 100.
+
+        Returns:
+            pandas.DataFrame: a mock dataset
+        """
+        random.seed(seed)
+        return cls.example(size=size)
+
+    @classmethod
+    def get_spark_df(cls, spark: SparkSession, size: int = 3, seed: int = 100, schema: Optional[StructType] = None):
+        """Return fake movielens dataset as a Spark Dataframe with specified rows
+
+        Args:
+            spark (SparkSession): spark session to load the dataframe into
+            size (int): number of rows to generate
+            seed (int, optional): seeding the pseudo-number generation. Defaults to 100.
+            schema (pyspark.sql.types.StructType optional): [description]. Defaults to None.
+
+        Returns:
+            pyspark.sql.DataFrame: a mock dataset
+        """
+        pandas_df = cls.get_df(size=size, seed=seed)
+        return spark.createDataFrame(pandas_df, schema=schema)
diff --git a/recommenders/datasets/movielens.py b/recommenders/datasets/movielens.py
index 73d7a58f1c..463bc00853 100644
--- a/recommenders/datasets/movielens.py
+++ b/recommenders/datasets/movielens.py
@@ -7,6 +7,7 @@
 import warnings
 import pandas as pd
 from zipfile import ZipFile
+from recommenders.datasets.mock.movielens import MockMovielens100kSchema
 from recommenders.datasets.download_utils import maybe_download, download_path
 from recommenders.utils.notebook_utils import is_databricks
 from recommenders.utils.constants import (
@@ -100,6 +101,11 @@ def item_has_header(self):
     "20m": _DataFormat(",", "ml-20m/ratings.csv", True, ",", "ml-20m/movies.csv", True),
 }
 
+# Fake data for testing only
+MOCK_DATA_FORMAT = {
+    "mock100": {"size": 100, "seed": 101}
+}
+
 # 100K data genres index to string mapper. For 1m, 10m, and 20m, the genres labels are already in the dataset.
 GENRES = (
     "unknown",
@@ -136,7 +142,7 @@ def item_has_header(self):
     Will only use the first four column names."""
 WARNING_HAVE_SCHEMA_AND_HEADER = """Both schema and header are provided.
     The header argument will be ignored."""
-ERROR_MOVIE_LENS_SIZE = "Invalid data size. Should be one of {100k, 1m, 10m, or 20m}"
+ERROR_MOVIE_LENS_SIZE = "Invalid data size. Should be one of {100k, 1m, 10m, or 20m, or mock100}"
 ERROR_HEADER = "Header error. At least user and movie column names should be provided"
 
 
@@ -154,14 +160,16 @@ def load_pandas_df(
     To load movie information only, you can use load_item_df function.
 
     Args:
-        size (str): Size of the data to load. One of ("100k", "1m", "10m", "20m").
-        header (list or tuple or None): Rating dataset header.
-        local_cache_path (str): Path (directory or a zip file) to cache the downloaded zip file.
+        size (str): Size of the data to load. One of ("100k", "1m", "10m", "20m", "mock100").
+        header* (list or tuple or None): Rating dataset header. 'DEFAULT_HEADER' is set for all mock data sizes ("mock*").
+        local_cache_path* (str): Path (directory or a zip file) to cache the downloaded zip file.
             If None, all the intermediate files will be stored in a temporary directory and removed after use.
-        title_col (str): Movie title column name. If None, the column will not be loaded.
-        genres_col (str): Genres column name. Genres are '|' separated string.
+        title_col* (str): Movie title column name. If None, the column will not be loaded.
+        genres_col* (str): Genres column name. Genres are '|' separated string.
             If None, the column will not be loaded.
-        year_col (str): Movie release year column name. If None, the column will not be loaded.
+        year_col* (str): Movie release year column name. If None, the column will not be loaded.
+
+        All (*) arguments are not applicable when mock dataset is specified (size = "mock*")
 
     Returns:
         pandas.DataFrame: Movie rating dataset.
@@ -185,9 +193,13 @@ def load_pandas_df(
         )
     """
     size = size.lower()
-    if size not in DATA_FORMAT:
+    if size not in DATA_FORMAT and size not in MOCK_DATA_FORMAT:
         raise ValueError(ERROR_MOVIE_LENS_SIZE)
 
+    if size in MOCK_DATA_FORMAT:
+        # generate fake data using the dictionary as a kwarg to the generation function
+        return MockMovielens100kSchema.get_df(**MOCK_DATA_FORMAT[size])
+
     if header is None:
         header = DEFAULT_HEADER
     elif len(header) < 2:
@@ -349,17 +361,19 @@ def load_spark_df(
 
     Args:
         spark (pyspark.SparkSession): Spark session.
-        size (str): Size of the data to load. One of ("100k", "1m", "10m", "20m").
-        header (list or tuple): Rating dataset header.
+        size (str): Size of the data to load. One of ("100k", "1m", "10m", "20m", "mock100").
+        header* (list or tuple): Rating dataset header. 'DEFAULT_HEADER' is set for all mock data sizes ("mock*").
             If schema is provided, this argument is ignored.
-        schema (pyspark.StructType): Dataset schema.
-        local_cache_path (str): Path (directory or a zip file) to cache the downloaded zip file.
+        schema* (pyspark.StructType): Dataset schema.
+        local_cache_path* (str): Path (directory or a zip file) to cache the downloaded zip file.
             If None, all the intermediate files will be stored in a temporary directory and removed after use.
-        dbutils (Databricks.dbutils): Databricks utility object
-        title_col (str): Title column name. If None, the column will not be loaded.
-        genres_col (str): Genres column name. Genres are '|' separated string.
+        dbutils* (Databricks.dbutils): Databricks utility object
+        title_col* (str): Title column name. If None, the column will not be loaded.
+        genres_col* (str): Genres column name. Genres are '|' separated string.
             If None, the column will not be loaded.
-        year_col (str): Movie release year column name. If None, the column will not be loaded.
+        year_col* (str): Movie release year column name. If None, the column will not be loaded.
+
+            All (*) arguments are not applicable when mock dataset is specified (size = "mock*")
 
     Returns:
         pyspark.sql.DataFrame: Movie rating dataset.
@@ -394,9 +408,13 @@ def load_spark_df(
         spark_df = load_spark_df(spark, dbutils=dbutils)
     """
     size = size.lower()
-    if size not in DATA_FORMAT:
+    if size not in DATA_FORMAT and size not in MOCK_DATA_FORMAT:
         raise ValueError(ERROR_MOVIE_LENS_SIZE)
 
+    if size in MOCK_DATA_FORMAT:
+        # generate fake data using the dictionary as a kwarg to the generation function
+        return MockMovielens100kSchema.get_spark_df(spark, **MOCK_DATA_FORMAT[size])
+
     schema = _get_schema(header, schema)
     if len(schema) < 2:
         raise ValueError(ERROR_HEADER)
diff --git a/setup.py b/setup.py
index 0aba6982be..7aef19fe52 100644
--- a/setup.py
+++ b/setup.py
@@ -73,7 +73,12 @@
         "cmake>=3.18.4.post1",
         "xlearn==0.40a1",
     ],
-    "dev": ["black>=18.6b4,<21", "pytest>=3.6.4", "pytest-cov>=2.12.1"],
+    "dev": [
+        "black>=18.6b4,<21",
+        "pytest>=3.6.4",
+        "pytest-cov>=2.12.1",
+        "pytest-lazy-fixture>=0.6.3",  # Allow using fixtures in pytest.mark.parametrize
+    ],
 }
 # for the brave of heart
 extras_require["all"] = list(set(sum([*extras_require.values()], [])))
diff --git a/tests/unit/recommenders/datasets/mock/__init__.py b/tests/unit/recommenders/datasets/mock/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/tests/unit/recommenders/datasets/mock/test_movielens.py b/tests/unit/recommenders/datasets/mock/test_movielens.py
new file mode 100644
index 0000000000..446f5cd75e
--- /dev/null
+++ b/tests/unit/recommenders/datasets/mock/test_movielens.py
@@ -0,0 +1,72 @@
+from recommenders.datasets.mock.movielens import MockMovielens100kSchema
+from recommenders.datasets.movielens import DEFAULT_HEADER
+from recommenders.utils.constants import (
+    DEFAULT_USER_COL,
+    DEFAULT_ITEM_COL,
+    DEFAULT_RATING_COL,
+    DEFAULT_TIMESTAMP_COL,
+)
+
+import pytest
+import pandas
+import pyspark.sql
+from pyspark.sql import SparkSession
+from pyspark.sql.types import IntegerType, FloatType, LongType, StructField, StructType
+
+
+@pytest.fixture(scope="module")
+def default_schema():
+    return StructType([
+        StructField(DEFAULT_USER_COL, IntegerType()),
+        StructField(DEFAULT_ITEM_COL, IntegerType()),
+        StructField(DEFAULT_RATING_COL, FloatType()),
+        StructField(DEFAULT_TIMESTAMP_COL, LongType()),
+    ])
+
+
+@pytest.fixture(scope="module")
+def custom_schema():
+    return StructType([
+        StructField("userID", IntegerType()),
+        StructField("itemID", IntegerType()),
+        StructField("rating", FloatType()),
+    ])
+
+
+@pytest.mark.parametrize("size", [10, 100])
+def test_mock_movielens_schema__has_default_col_names(size):
+    df = MockMovielens100kSchema.example(size=size)
+    for col_name in DEFAULT_HEADER:
+        assert col_name in df.columns
+
+
+@pytest.mark.parametrize("seed", [-1])  # seed for pseudo-random # generation
+@pytest.mark.parametrize("size", [0, 3, 10])
+def test_mock_movielens_schema__get_df__return_success(size, seed):
+    df = MockMovielens100kSchema.get_df(size, seed=seed)
+    assert type(df) == pandas.DataFrame
+    assert len(df) == size
+
+
+@pytest.mark.parametrize("seed", [0, 101])  # seed for pseudo-random # generation
+@pytest.mark.parametrize("size", [3, 10])
+def test_mock_movielens_schema__get_spark_df__return_success(spark: SparkSession, size, seed):
+    df = MockMovielens100kSchema.get_spark_df(spark, size, seed=seed)
+    assert type(df) == pyspark.sql.DataFrame
+    assert df.count() == size
+
+
+@pytest.mark.parametrize("schema", [
+    None,
+    pytest.lazy_fixture('default_schema'),
+    pytest.lazy_fixture('custom_schema')
+])
+def test_mock_movielens_schema__get_spark_df__with_custom_schema_return_success(spark: SparkSession, schema):
+    df = MockMovielens100kSchema.get_spark_df(spark, schema=schema)
+    assert type(df) == pyspark.sql.DataFrame
+    assert df.count() >= 0
+
+
+def test_mock_movielens_schema__get_spark_df__fail_on_empty_rows(spark: SparkSession):
+    with pytest.raises(ValueError, match="can not infer schema from empty dataset.*"):
+        MockMovielens100kSchema.get_spark_df(spark, 0)
diff --git a/tests/unit/recommenders/datasets/test_movielens.py b/tests/unit/recommenders/datasets/test_movielens.py
new file mode 100644
index 0000000000..f05cc24882
--- /dev/null
+++ b/tests/unit/recommenders/datasets/test_movielens.py
@@ -0,0 +1,21 @@
+from recommenders.datasets.movielens import DATA_FORMAT, MOCK_DATA_FORMAT
+from recommenders.datasets.movielens import load_pandas_df, load_spark_df
+
+import pyspark.sql
+from pyspark.sql import SparkSession
+
+
+def test_mock_movielens_data__no_name_collision():
+    """
+    Making sure that no common names are shared between the mock and real dataset sizes
+    """
+    dataset_name = set(DATA_FORMAT.keys())
+    dataset_name_mock = set(MOCK_DATA_FORMAT.keys())
+    collision = dataset_name.intersection(dataset_name_mock)
+    assert not collision
+
+
+def test_mock_movielens_data_generation_succeed(spark: SparkSession):
+    df = load_spark_df(spark, "mock100")
+    assert type(df) == pyspark.sql.DataFrame
+    assert df.count() == 100

From feef435f6e6948c019b0b4add72c965f9fce4945 Mon Sep 17 00:00:00 2001
From: Jianjie Liu <jianjl@microsoft.com>
Date: Wed, 22 Sep 2021 19:58:12 +0000
Subject: [PATCH 02/27] Mock schema experiment

---
 .../als_movielens_diversity_metrics.ipynb     | 622 +++++++++---------
 recommenders/datasets/mock/movielens.py       |  47 +-
 recommenders/evaluation/spark_evaluation.py   |   3 +-
 tests/unit/examples/test_notebooks_pyspark.py |   6 +-
 4 files changed, 362 insertions(+), 316 deletions(-)

diff --git a/examples/03_evaluate/als_movielens_diversity_metrics.ipynb b/examples/03_evaluate/als_movielens_diversity_metrics.ipynb
index bf684cf9d5..31e998bbb6 100644
--- a/examples/03_evaluate/als_movielens_diversity_metrics.ipynb
+++ b/examples/03_evaluate/als_movielens_diversity_metrics.ipynb
@@ -2,16 +2,15 @@
  "cells": [
   {
    "cell_type": "markdown",
-   "metadata": {},
    "source": [
     "<i>Copyright (c) Microsoft Corporation. All rights reserved.</i>\n",
     "\n",
     "<i>Licensed under the MIT License.</i>"
-   ]
+   ],
+   "metadata": {}
   },
   {
    "cell_type": "markdown",
-   "metadata": {},
    "source": [
     "# Apply Diversity Metrics  \n",
     "## -- Compare ALS and Random Recommenders on MovieLens (PySpark)\n",
@@ -41,11 +40,11 @@
     "The comparision results show that the ALS recommender outperforms the random recommender on ranking metrics (Precision@k, Recall@k, NDCG@k, and\tMean average precision), while the random recommender outperforms ALS recommender on diversity metrics. This is because ALS is optimized for estimating the item rating as accurate as possible, therefore it performs well on accuracy metrics including rating and ranking metrics. As a side effect, the items being recommended tend to be popular items, which are the items mostly sold or viewed. It leaves the [long-tail items](https://github.com/microsoft/recommenders/blob/main/GLOSSARY.md) having less chance to get introduced to the users. This is the reason why ALS is not performing as well as a random recommender on diversity metrics. \n",
     "\n",
     "From the algorithmic point of view, items in the tail suffer from the cold-start problem, making them hard for recommendation systems to use. However, from the business point of view, oftentimes the items in the tail can be highly profitable, since, depending on supply, business can apply a higher margin to them. Recommendation systems that optimize metrics like novelty and diversity, can help to find users willing to get these long tail items. Usually there is a trade-off between one type of metric vs. another. One should decide which set of metrics to optimize based on business scenarios."
-   ]
+   ],
+   "metadata": {}
   },
   {
    "cell_type": "markdown",
-   "metadata": {},
    "source": [
     "**Coverage**\n",
     "\n",
@@ -65,11 +64,11 @@
     "p(i|R) = \\frac{|M_r (i)|}{|\\textrm{reco_df}|}\n",
     "$$\n",
     "and $M_r (i)$ denotes the users who are recommended item $i$.\n"
-   ]
+   ],
+   "metadata": {}
   },
   {
    "cell_type": "markdown",
-   "metadata": {},
    "source": [
     "\n",
     "**Diversity**\n",
@@ -89,11 +88,11 @@
     "$$\n",
     "\\textrm{diversity} = 1 - \\textrm{IL}\n",
     "$$\n"
-   ]
+   ],
+   "metadata": {}
   },
   {
    "cell_type": "markdown",
-   "metadata": {},
    "source": [
     "\n",
     "**Novelty**\n",
@@ -112,11 +111,11 @@
     "$$\n",
     "\\textrm{novelty} = \\sum_{i \\in N_r} \\frac{|M_r (i)|}{|\\textrm{reco_df}|} \\textrm{novelty}(i)\n",
     "$$\n"
-   ]
+   ],
+   "metadata": {}
   },
   {
    "cell_type": "markdown",
-   "metadata": {},
    "source": [
     "**Serendipity**\n",
     "\n",
@@ -131,30 +130,19 @@
     "\\textrm{serendipity} = \\frac{1}{|M|} \\sum_{u \\in M_r}\n",
     "\\frac{1}{|N_r (u)|} \\sum_{i \\in N_r (u)} \\big(1 - \\textrm{expectedness}(i|u) \\big) \\, \\textrm{relevance}(i)\n",
     "$$\n"
-   ]
+   ],
+   "metadata": {}
   },
   {
    "cell_type": "markdown",
-   "metadata": {},
    "source": [
     "**Note**: This notebook requires a PySpark environment to run properly. Please follow the steps in [SETUP.md](https://github.com/Microsoft/Recommenders/blob/master/SETUP.md#dependencies-setup) to install the PySpark environment."
-   ]
+   ],
+   "metadata": {}
   },
   {
    "cell_type": "code",
    "execution_count": 1,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "System version: 3.6.13 |Anaconda, Inc.| (default, Jun  4 2021, 14:25:59) \n",
-      "[GCC 7.5.0]\n",
-      "Spark version: 2.4.8\n"
-     ]
-    }
-   ],
    "source": [
     "# set the environment path to find Recommenders\n",
     "%load_ext autoreload\n",
@@ -185,52 +173,62 @@
     "\n",
     "print(\"System version: {}\".format(sys.version))\n",
     "print(\"Spark version: {}\".format(pyspark.__version__))\n"
-   ]
+   ],
+   "outputs": [
+    {
+     "output_type": "stream",
+     "name": "stdout",
+     "text": [
+      "System version: 3.6.9 (default, Jan 26 2021, 15:33:00) \n",
+      "[GCC 8.4.0]\n",
+      "Spark version: 2.4.8\n"
+     ]
+    }
+   ],
+   "metadata": {}
   },
   {
    "cell_type": "markdown",
-   "metadata": {},
    "source": [
     "\n",
     "Set the default parameters."
-   ]
+   ],
+   "metadata": {}
   },
   {
    "cell_type": "code",
    "execution_count": 2,
-   "metadata": {
-    "tags": [
-     "parameters"
-    ]
-   },
-   "outputs": [],
    "source": [
     "# top k items to recommend\n",
-    "TOP_K = 10\n",
+    "TOP_K = 1\n",
     "\n",
     "# Select MovieLens data size: 100k, 1m, 10m, or 20m\n",
-    "MOVIELENS_DATA_SIZE = '100k'\n",
+    "MOVIELENS_DATA_SIZE = 'mock100'\n",
     "\n",
     "# user, item column names\n",
-    "COL_USER=\"UserId\"\n",
-    "COL_ITEM=\"MovieId\"\n",
-    "COL_RATING=\"Rating\""
-   ]
+    "COL_USER=\"userId\"\n",
+    "COL_ITEM=\"itemID\"\n",
+    "COL_RATING=\"rating\""
+   ],
+   "outputs": [],
+   "metadata": {
+    "tags": [
+     "parameters"
+    ]
+   }
   },
   {
    "cell_type": "markdown",
-   "metadata": {},
    "source": [
     "### 1. Set up Spark context\n",
     "\n",
     "The following settings work well for debugging locally on VM - change when running on a cluster. We set up a giant single executor with many threads and specify memory cap. "
-   ]
+   ],
+   "metadata": {}
   },
   {
    "cell_type": "code",
    "execution_count": 3,
-   "metadata": {},
-   "outputs": [],
    "source": [
     "# the following settings work well for debugging locally on VM - change when running on a cluster\n",
     "# set up a giant single executor with many threads and specify memory cap\n",
@@ -238,66 +236,26 @@
     "spark = start_or_get_spark(\"ALS PySpark\", memory=\"16g\")\n",
     "\n",
     "spark.conf.set(\"spark.sql.crossJoin.enabled\", \"true\")"
-   ]
+   ],
+   "outputs": [],
+   "metadata": {}
   },
   {
    "cell_type": "markdown",
-   "metadata": {},
    "source": [
     "### 2. Download the MovieLens dataset"
-   ]
+   ],
+   "metadata": {}
   },
   {
    "cell_type": "code",
    "execution_count": 4,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "100%|██████████| 4.81k/4.81k [00:00<00:00, 17.1kKB/s]\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "+-------+------+------+---------+--------------------+------+\n",
-      "|MovieId|UserId|Rating|Timestamp|               title|genres|\n",
-      "+-------+------+------+---------+--------------------+------+\n",
-      "|     26|   138|   5.0|879024232|Brothers McMullen...|Comedy|\n",
-      "|     26|   224|   3.0|888104153|Brothers McMullen...|Comedy|\n",
-      "|     26|    18|   4.0|880129731|Brothers McMullen...|Comedy|\n",
-      "|     26|   222|   3.0|878183043|Brothers McMullen...|Comedy|\n",
-      "|     26|    43|   5.0|883954901|Brothers McMullen...|Comedy|\n",
-      "|     26|   201|   4.0|884111927|Brothers McMullen...|Comedy|\n",
-      "|     26|   299|   4.0|878192601|Brothers McMullen...|Comedy|\n",
-      "|     26|    95|   3.0|880571951|Brothers McMullen...|Comedy|\n",
-      "|     26|    89|   3.0|879459909|Brothers McMullen...|Comedy|\n",
-      "|     26|   361|   3.0|879440941|Brothers McMullen...|Comedy|\n",
-      "|     26|   194|   3.0|879522240|Brothers McMullen...|Comedy|\n",
-      "|     26|   391|   5.0|877399745|Brothers McMullen...|Comedy|\n",
-      "|     26|   345|   3.0|884993555|Brothers McMullen...|Comedy|\n",
-      "|     26|   303|   4.0|879468307|Brothers McMullen...|Comedy|\n",
-      "|     26|   401|   3.0|891033395|Brothers McMullen...|Comedy|\n",
-      "|     26|   429|   3.0|882386333|Brothers McMullen...|Comedy|\n",
-      "|     26|   293|   3.0|888907015|Brothers McMullen...|Comedy|\n",
-      "|     26|   270|   5.0|876954995|Brothers McMullen...|Comedy|\n",
-      "|     26|   442|   3.0|883388576|Brothers McMullen...|Comedy|\n",
-      "|     26|   342|   2.0|875320037|Brothers McMullen...|Comedy|\n",
-      "+-------+------+------+---------+--------------------+------+\n",
-      "only showing top 20 rows\n",
-      "\n"
-     ]
-    }
-   ],
    "source": [
     "# Note: The DataFrame-based API for ALS currently only supports integers for user and item ids.\n",
     "schema = StructType(\n",
     "    (\n",
     "        StructField(COL_USER, IntegerType()),\n",
-    "        StructField(COL_ITEM, IntegerType()),\n",
+    "        StructField(COL_ITEM, LongType()),\n",
     "        StructField(COL_RATING, FloatType()),\n",
     "        StructField(\"Timestamp\", LongType()),\n",
     "    )\n",
@@ -305,75 +263,108 @@
     "\n",
     "data = movielens.load_spark_df(spark, size=MOVIELENS_DATA_SIZE, schema=schema, title_col=\"title\", genres_col=\"genres\")\n",
     "data.show()"
-   ]
+   ],
+   "outputs": [
+    {
+     "output_type": "stream",
+     "name": "stdout",
+     "text": [
+      "+------+------+------+--------------------+-----+--------+\n",
+      "|userID|itemID|rating|           timestamp|title|  genres|\n",
+      "+------+------+------+--------------------+-----+--------+\n",
+      "|     6|     4|     4|2200-06-19 12:21:...|  foo|genreA|0|\n",
+      "|     8|     4|     1|1970-01-01 00:00:...|  foo|genreA|0|\n",
+      "|     8|     4|     4|2109-02-14 15:31:...|  foo|genreA|0|\n",
+      "|     9|     2|     2|1969-12-31 23:59:...|  foo|genreA|0|\n",
+      "|     9|     4|     3|2210-04-25 01:58:...|  foo|genreA|0|\n",
+      "|     3|     5|     3| 1970-01-01 00:00:00|  foo|genreA|0|\n",
+      "|     1|     2|     1|1970-01-01 00:00:...|  foo|genreA|0|\n",
+      "|     8|     3|     2| 1970-01-01 00:00:00|  foo|genreA|0|\n",
+      "|     3|    10|     4|1970-01-01 00:00:...|  foo|genreA|0|\n",
+      "|     7|    10|     2|1969-12-31 23:59:...|  foo|genreA|0|\n",
+      "|     8|     9|     5|1970-01-01 00:00:...|  foo|genreA|0|\n",
+      "|     4|     2|     3|1969-12-31 23:59:...|  foo|genreA|0|\n",
+      "|     5|     8|     5|1970-01-01 00:00:...|  foo|genreA|0|\n",
+      "|     2|     7|     1|1969-12-31 23:59:...|  foo|genreA|0|\n",
+      "|     4|     6|     2| 1970-01-01 00:00:00|  foo|genreA|0|\n",
+      "|     2|     5|     3|1969-12-31 23:59:...|  foo|genreA|0|\n",
+      "|     7|     2|     1|1970-01-01 00:00:...|  foo|genreA|0|\n",
+      "|     8|     4|     5|1969-12-31 23:59:...|  foo|genreA|0|\n",
+      "|     7|     8|     1|1969-12-31 23:59:...|  foo|genreA|0|\n",
+      "|     9|     4|     1|1970-01-01 00:00:...|  foo|genreA|0|\n",
+      "+------+------+------+--------------------+-----+--------+\n",
+      "only showing top 20 rows\n",
+      "\n"
+     ]
+    }
+   ],
+   "metadata": {}
   },
   {
    "cell_type": "markdown",
-   "metadata": {},
    "source": [
     "#### Split the data using the Spark random splitter provided in utilities"
-   ]
+   ],
+   "metadata": {}
   },
   {
    "cell_type": "code",
    "execution_count": 5,
-   "metadata": {},
+   "source": [
+    "train_df, test_df = spark_random_split(data.select(COL_USER, COL_ITEM, COL_RATING), ratio=0.75, seed=123)\n",
+    "print (\"N train_df\", train_df.cache().count())\n",
+    "print (\"N test_df\", test_df.cache().count())"
+   ],
    "outputs": [
     {
-     "name": "stdout",
      "output_type": "stream",
+     "name": "stdout",
      "text": [
-      "N train_df 75066\n",
-      "N test_df 24934\n"
+      "N train_df 73\n",
+      "N test_df 27\n"
      ]
     }
    ],
-   "source": [
-    "train_df, test_df = spark_random_split(data.select(COL_USER, COL_ITEM, COL_RATING), ratio=0.75, seed=123)\n",
-    "print (\"N train_df\", train_df.cache().count())\n",
-    "print (\"N test_df\", test_df.cache().count())"
-   ]
+   "metadata": {}
   },
   {
    "cell_type": "markdown",
-   "metadata": {},
    "source": [
     "#### Get all possible user-item pairs"
-   ]
+   ],
+   "metadata": {}
   },
   {
    "cell_type": "markdown",
-   "metadata": {},
    "source": [
     "Note: We assume that training data contains all users and all catalog items. "
-   ]
+   ],
+   "metadata": {}
   },
   {
    "cell_type": "code",
    "execution_count": 6,
-   "metadata": {},
-   "outputs": [],
    "source": [
     "users = train_df.select(COL_USER).distinct()\n",
     "items = train_df.select(COL_ITEM).distinct()\n",
     "user_item = users.crossJoin(items)"
-   ]
+   ],
+   "outputs": [],
+   "metadata": {}
   },
   {
    "cell_type": "markdown",
-   "metadata": {},
    "source": [
     "### 3. Train the ALS model on the training data, and get the top-k recommendations for our testing data\n",
     "\n",
     "To predict movie ratings, we use the rating data in the training set as users' explicit feedback. The hyperparameters used in building the model are referenced from [here](http://mymedialite.net/examples/datasets.html). We do not constrain the latent factors (`nonnegative = False`) in order to allow for both positive and negative preferences towards movies.\n",
     "Timing will vary depending on the machine being used to train."
-   ]
+   ],
+   "metadata": {}
   },
   {
    "cell_type": "code",
    "execution_count": 7,
-   "metadata": {},
-   "outputs": [],
    "source": [
     "header = {\n",
     "    \"userCol\": COL_USER,\n",
@@ -392,51 +383,42 @@
     "    seed=42,\n",
     "    **header\n",
     ")"
-   ]
+   ],
+   "outputs": [],
+   "metadata": {}
   },
   {
    "cell_type": "code",
    "execution_count": 8,
-   "metadata": {},
+   "source": [
+    "with Timer() as train_time:\n",
+    "    model = als.fit(train_df)\n",
+    "\n",
+    "print(\"Took {} seconds for training.\".format(train_time.interval))"
+   ],
    "outputs": [
     {
-     "name": "stdout",
      "output_type": "stream",
+     "name": "stdout",
      "text": [
-      "Took 4.012367556002573 seconds for training.\n"
+      "Took 2.5952707109972835 seconds for training.\n"
      ]
     }
    ],
-   "source": [
-    "with Timer() as train_time:\n",
-    "    model = als.fit(train_df)\n",
-    "\n",
-    "print(\"Took {} seconds for training.\".format(train_time.interval))"
-   ]
+   "metadata": {}
   },
   {
    "cell_type": "markdown",
-   "metadata": {},
    "source": [
     "In the movie recommendation use case, recommending movies that have been rated by the users does not make sense. Therefore, the rated movies are removed from the recommended items.\n",
     "\n",
     "In order to achieve this, we recommend all movies to all users, and then remove the user-movie pairs that exist in the training dataset."
-   ]
+   ],
+   "metadata": {}
   },
   {
    "cell_type": "code",
    "execution_count": 9,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "1464853\n",
-      "9430\n"
-     ]
-    }
-   ],
    "source": [
     "# Score all user-item pairs\n",
     "dfs_pred = model.transform(user_item)\n",
@@ -457,22 +439,31 @@
     "top_k_reco = top_all.select(\"*\", F.row_number().over(window).alias(\"rank\")).filter(F.col(\"rank\") <= TOP_K).drop(\"rank\")\n",
     " \n",
     "print(top_k_reco.count())"
-   ]
+   ],
+   "outputs": [
+    {
+     "output_type": "stream",
+     "name": "stdout",
+     "text": [
+      "48\n",
+      "10\n"
+     ]
+    }
+   ],
+   "metadata": {}
   },
   {
    "cell_type": "markdown",
-   "metadata": {},
    "source": [
     "### 4. Random Recommender\n",
     "\n",
     "We define a recommender which randomly recommends unseen items to each user. "
-   ]
+   ],
+   "metadata": {}
   },
   {
    "cell_type": "code",
    "execution_count": 10,
-   "metadata": {},
-   "outputs": [],
    "source": [
     "# random recommender\n",
     "window = Window.partitionBy(COL_USER).orderBy(F.rand())\n",
@@ -493,20 +484,20 @@
     "  .filter(F.col(\"score\") <= TOP_K)\n",
     "  .drop(COL_RATING)\n",
     ")"
-   ]
+   ],
+   "outputs": [],
+   "metadata": {}
   },
   {
    "cell_type": "markdown",
-   "metadata": {},
    "source": [
     "### 5. ALS vs Random Recommenders Performance Comparison"
-   ]
+   ],
+   "metadata": {}
   },
   {
    "cell_type": "code",
    "execution_count": 11,
-   "metadata": {},
-   "outputs": [],
    "source": [
     "def get_ranking_results(ranking_eval):\n",
     "    metrics = {\n",
@@ -527,13 +518,13 @@
     "        \"serendipity\": diversity_eval.serendipity()\n",
     "    }\n",
     "    return metrics "
-   ]
+   ],
+   "outputs": [],
+   "metadata": {}
   },
   {
    "cell_type": "code",
    "execution_count": 12,
-   "metadata": {},
-   "outputs": [],
    "source": [
     "def generate_summary(data, algo, k, ranking_metrics, diversity_metrics):\n",
     "    summary = {\"Data\": data, \"Algo\": algo, \"K\": k}\n",
@@ -548,40 +539,40 @@
     "    summary.update(ranking_metrics)\n",
     "    summary.update(diversity_metrics)\n",
     "    return summary"
-   ]
+   ],
+   "outputs": [],
+   "metadata": {}
   },
   {
    "cell_type": "markdown",
-   "metadata": {},
    "source": [
     "#### ALS Recommender Performance Results"
-   ]
+   ],
+   "metadata": {}
   },
   {
    "cell_type": "code",
    "execution_count": 13,
-   "metadata": {},
-   "outputs": [],
    "source": [
     "als_ranking_eval = SparkRankingEvaluation(\n",
     "    test_df, \n",
     "    top_all, \n",
     "    k = TOP_K, \n",
-    "    col_user=\"UserId\", \n",
-    "    col_item=\"MovieId\",\n",
-    "    col_rating=\"Rating\", \n",
+    "    col_user=COL_USER, \n",
+    "    col_item=COL_ITEM,\n",
+    "    col_rating=COL_RATING, \n",
     "    col_prediction=\"prediction\",\n",
     "    relevancy_method=\"top_k\"\n",
     ")\n",
     "\n",
     "als_ranking_metrics = get_ranking_results(als_ranking_eval)"
-   ]
+   ],
+   "outputs": [],
+   "metadata": {}
   },
   {
    "cell_type": "code",
-   "execution_count": 17,
-   "metadata": {},
-   "outputs": [],
+   "execution_count": 14,
    "source": [
     "als_diversity_eval = SparkDiversityEvaluation(\n",
     "    train_df = train_df, \n",
@@ -591,29 +582,29 @@
     ")\n",
     "\n",
     "als_diversity_metrics = get_diversity_results(als_diversity_eval)"
-   ]
+   ],
+   "outputs": [],
+   "metadata": {}
   },
   {
    "cell_type": "code",
-   "execution_count": 18,
-   "metadata": {},
-   "outputs": [],
+   "execution_count": 15,
    "source": [
     "als_results = generate_summary(MOVIELENS_DATA_SIZE, \"als\", TOP_K, als_ranking_metrics, als_diversity_metrics)"
-   ]
+   ],
+   "outputs": [],
+   "metadata": {}
   },
   {
    "cell_type": "markdown",
-   "metadata": {},
    "source": [
     "#### Random Recommender Performance Results"
-   ]
+   ],
+   "metadata": {}
   },
   {
    "cell_type": "code",
-   "execution_count": 19,
-   "metadata": {},
-   "outputs": [],
+   "execution_count": 16,
    "source": [
     "random_ranking_eval = SparkRankingEvaluation(\n",
     "    test_df,\n",
@@ -626,13 +617,13 @@
     ")\n",
     "\n",
     "random_ranking_metrics = get_ranking_results(random_ranking_eval)"
-   ]
+   ],
+   "outputs": [],
+   "metadata": {}
   },
   {
    "cell_type": "code",
-   "execution_count": 20,
-   "metadata": {},
-   "outputs": [],
+   "execution_count": 17,
    "source": [
     "random_diversity_eval = SparkDiversityEvaluation(\n",
     "    train_df = train_df, \n",
@@ -642,43 +633,48 @@
     ")\n",
     "  \n",
     "random_diversity_metrics = get_diversity_results(random_diversity_eval)"
-   ]
+   ],
+   "outputs": [],
+   "metadata": {}
   },
   {
    "cell_type": "code",
-   "execution_count": 21,
-   "metadata": {},
-   "outputs": [],
+   "execution_count": 18,
    "source": [
     "random_results = generate_summary(MOVIELENS_DATA_SIZE, \"random\", TOP_K, random_ranking_metrics, random_diversity_metrics)"
-   ]
+   ],
+   "outputs": [],
+   "metadata": {}
   },
   {
    "cell_type": "markdown",
-   "metadata": {},
    "source": [
     "#### Result Comparison"
-   ]
+   ],
+   "metadata": {}
   },
   {
    "cell_type": "code",
-   "execution_count": 22,
-   "metadata": {},
-   "outputs": [],
+   "execution_count": 19,
    "source": [
     "cols = [\"Data\", \"Algo\", \"K\", \"Precision@k\", \"Recall@k\", \"NDCG@k\", \"Mean average precision\",\"catalog_coverage\", \"distributional_coverage\",\"novelty\", \"diversity\", \"serendipity\" ]\n",
     "df_results = pd.DataFrame(columns=cols)\n",
     "\n",
     "df_results.loc[1] = als_results \n",
     "df_results.loc[2] = random_results "
-   ]
+   ],
+   "outputs": [],
+   "metadata": {}
   },
   {
    "cell_type": "code",
-   "execution_count": 23,
-   "metadata": {},
+   "execution_count": 20,
+   "source": [
+    "df_results"
+   ],
    "outputs": [
     {
+     "output_type": "execute_result",
      "data": {
       "text/html": [
        "<div>\n",
@@ -716,98 +712,89 @@
        "  <tbody>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
-       "      <td>100k</td>\n",
+       "      <td>mock100</td>\n",
        "      <td>als</td>\n",
-       "      <td>10</td>\n",
-       "      <td>0.047296</td>\n",
-       "      <td>0.016015</td>\n",
-       "      <td>0.043097</td>\n",
-       "      <td>0.004579</td>\n",
-       "      <td>0.385793</td>\n",
-       "      <td>7.967257</td>\n",
-       "      <td>11.659776</td>\n",
-       "      <td>0.892277</td>\n",
-       "      <td>0.878733</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0.4</td>\n",
+       "      <td>0.150000</td>\n",
+       "      <td>0.4</td>\n",
+       "      <td>0.17</td>\n",
+       "      <td>0.4</td>\n",
+       "      <td>1.685475</td>\n",
+       "      <td>3.624421</td>\n",
+       "      <td>None</td>\n",
+       "      <td>0.405009</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2</th>\n",
-       "      <td>100k</td>\n",
+       "      <td>mock100</td>\n",
        "      <td>random</td>\n",
-       "      <td>10</td>\n",
-       "      <td>0.016755</td>\n",
-       "      <td>0.005883</td>\n",
-       "      <td>0.017849</td>\n",
-       "      <td>0.001890</td>\n",
-       "      <td>0.996326</td>\n",
-       "      <td>10.540834</td>\n",
-       "      <td>12.133664</td>\n",
-       "      <td>0.922288</td>\n",
-       "      <td>0.893001</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0.3</td>\n",
+       "      <td>0.116667</td>\n",
+       "      <td>0.3</td>\n",
+       "      <td>0.12</td>\n",
+       "      <td>0.6</td>\n",
+       "      <td>2.446439</td>\n",
+       "      <td>3.644061</td>\n",
+       "      <td>None</td>\n",
+       "      <td>0.396229</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
        "</div>"
       ],
       "text/plain": [
-       "   Data    Algo   K  Precision@k  Recall@k    NDCG@k  Mean average precision  \\\n",
-       "1  100k     als  10     0.047296  0.016015  0.043097                0.004579   \n",
-       "2  100k  random  10     0.016755  0.005883  0.017849                0.001890   \n",
-       "\n",
-       "   catalog_coverage  distributional_coverage    novelty  diversity  \\\n",
-       "1          0.385793                 7.967257  11.659776   0.892277   \n",
-       "2          0.996326                10.540834  12.133664   0.922288   \n",
+       "      Data    Algo  K  Precision@k  Recall@k  NDCG@k  Mean average precision  \\\n",
+       "1  mock100     als  1          0.4  0.150000     0.4                    0.17   \n",
+       "2  mock100  random  1          0.3  0.116667     0.3                    0.12   \n",
        "\n",
-       "   serendipity  \n",
-       "1     0.878733  \n",
-       "2     0.893001  "
+       "   catalog_coverage  distributional_coverage   novelty diversity  serendipity  \n",
+       "1               0.4                 1.685475  3.624421      None     0.405009  \n",
+       "2               0.6                 2.446439  3.644061      None     0.396229  "
       ]
      },
-     "execution_count": 23,
      "metadata": {},
-     "output_type": "execute_result"
+     "execution_count": 20
     }
    ],
-   "source": [
-    "df_results"
-   ]
+   "metadata": {}
   },
   {
    "cell_type": "markdown",
-   "metadata": {},
    "source": [
     "#### Conclusion\n",
     "The comparision results show that the ALS recommender outperforms the random recommender on ranking metrics (Precision@k, Recall@k, NDCG@k, and\tMean average precision), while the random recommender outperforms ALS recommender on diversity metrics. This is because ALS is optimized for estimating the item rating as accurate as possible, therefore it performs well on accuracy metrics including rating and ranking metrics. As a side effect, the items being recommended tend to be popular items, which are the items mostly sold or viewed. It leaves the long-tail less popular items having less chance to get introduced to the users. This is the reason why ALS is not performing as well as a random recommender on diversity metrics. "
-   ]
+   ],
+   "metadata": {}
   },
   {
    "cell_type": "markdown",
-   "metadata": {},
    "source": [
     "### 6.  Calculate diversity metrics using item feature vector based item-item similarity\n",
     "In the above section we calculate diversity metrics using item co-occurrence count based item-item similarity. In the scenarios when item features are available, we may want to calculate item-item similarity based on item feature vectors. In this section, we show how to calculate diversity metrics using item feature vector based item-item similarity."
-   ]
+   ],
+   "metadata": {}
   },
   {
    "cell_type": "code",
-   "execution_count": 24,
-   "metadata": {},
-   "outputs": [],
+   "execution_count": 21,
    "source": [
     "# Get movie features \"title\" and \"genres\"\n",
     "movies = (\n",
-    "    data.groupBy(\"MovieId\", \"title\", \"genres\").count()\n",
+    "    data.groupBy(COL_ITEM, \"title\", \"genres\").count()\n",
     "    .na.drop()  # remove rows with null values\n",
     "    .withColumn(\"genres\", F.split(F.col(\"genres\"), \"\\|\"))  # convert to array of genres\n",
     "    .withColumn(\"title\", F.regexp_replace(F.col(\"title\"), \"[\\(),:^0-9]\", \"\"))  # remove year from title\n",
     "    .drop(\"count\")  # remove unused columns\n",
     ")"
-   ]
+   ],
+   "outputs": [],
+   "metadata": {}
   },
   {
    "cell_type": "code",
-   "execution_count": 25,
-   "metadata": {},
-   "outputs": [],
+   "execution_count": 22,
    "source": [
     "# tokenize \"title\" column\n",
     "title_tokenizer = Tokenizer(inputCol=\"title\", outputCol=\"title_words\")\n",
@@ -816,41 +803,18 @@
     "# remove stop words\n",
     "remover = StopWordsRemover(inputCol=\"title_words\", outputCol=\"text\")\n",
     "clean_data = remover.transform(tokenized_data).drop(\"title\", \"title_words\")"
-   ]
+   ],
+   "outputs": [],
+   "metadata": {}
   },
   {
    "cell_type": "code",
-   "execution_count": 26,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "+-------+---------------------------------------------+\n",
-      "|MovieId|features                                     |\n",
-      "+-------+---------------------------------------------+\n",
-      "|167    |(1043,[128,544,1025],[1.0,1.0,1.0])          |\n",
-      "|1343   |(1043,[38,300,1024],[1.0,1.0,1.0])           |\n",
-      "|1607   |(1043,[592,821,1024],[1.0,1.0,1.0])          |\n",
-      "|966    |(1043,[389,502,1028],[1.0,1.0,1.0])          |\n",
-      "|9      |(1043,[11,342,1014,1024],[1.0,1.0,1.0,1.0])  |\n",
-      "|1230   |(1043,[597,740,902,1025],[1.0,1.0,1.0,1.0])  |\n",
-      "|1118   |(1043,[702,1025],[1.0,1.0])                  |\n",
-      "|673    |(1043,[169,690,1027,1040],[1.0,1.0,1.0,1.0]) |\n",
-      "|879    |(1043,[909,1026,1027,1034],[1.0,1.0,1.0,1.0])|\n",
-      "|66     |(1043,[256,1025,1028],[1.0,1.0,1.0])         |\n",
-      "+-------+---------------------------------------------+\n",
-      "only showing top 10 rows\n",
-      "\n"
-     ]
-    }
-   ],
+   "execution_count": 23,
    "source": [
     "# convert text input into feature vectors\n",
     "\n",
     "# step 1: perform HashingTF on column \"text\"\n",
-    "text_hasher = HashingTF(inputCol=\"text\", outputCol=\"text_features\", numFeatures=1024)\n",
+    "text_hasher = HashingTF(inputCol=\"text\", outputCol=\"text_features\", numFeatures=3)\n",
     "hashed_data = text_hasher.transform(clean_data)\n",
     "\n",
     "# step 2: fit a CountVectorizerModel from column \"genres\".\n",
@@ -863,32 +827,54 @@
     "    inputCols=[\"text_features\", \"genres_features\"],\n",
     "    outputCol=\"features\",\n",
     ")\n",
-    "feature_data = assembler.transform(vectorized_data).select(\"MovieId\", \"features\")\n",
+    "feature_data = assembler.transform(vectorized_data).select(COL_ITEM, \"features\")\n",
     "\n",
     "feature_data.show(10, False)"
-   ]
+   ],
+   "outputs": [
+    {
+     "output_type": "stream",
+     "name": "stdout",
+     "text": [
+      "+------+---------------------+\n",
+      "|itemID|features             |\n",
+      "+------+---------------------+\n",
+      "|6     |[0.0,1.0,0.0,1.0,1.0]|\n",
+      "|2     |[0.0,1.0,0.0,1.0,1.0]|\n",
+      "|5     |[0.0,1.0,0.0,1.0,1.0]|\n",
+      "|7     |[0.0,1.0,0.0,1.0,1.0]|\n",
+      "|1     |[0.0,1.0,0.0,1.0,1.0]|\n",
+      "|4     |[0.0,1.0,0.0,1.0,1.0]|\n",
+      "|3     |[0.0,1.0,0.0,1.0,1.0]|\n",
+      "|10    |[0.0,1.0,0.0,1.0,1.0]|\n",
+      "|8     |[0.0,1.0,0.0,1.0,1.0]|\n",
+      "|9     |[0.0,1.0,0.0,1.0,1.0]|\n",
+      "+------+---------------------+\n",
+      "\n"
+     ]
+    }
+   ],
+   "metadata": {}
   },
   {
    "cell_type": "markdown",
-   "metadata": {},
    "source": [
     "The *features* column is represented with a SparseVector object. For example, in the feature vector (1043,[128,544,1025],[1.0,1.0,1.0]), 1043 is the vector length, indicating the vector consisting of 1043 item features. The values at index positions 128,544,1025 are 1.0, and the values at other positions are all 0. "
-   ]
+   ],
+   "metadata": {}
   },
   {
    "cell_type": "code",
-   "execution_count": 27,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "0.8738984131037538\n",
-      "0.8873467159479473\n"
-     ]
-    }
+   "execution_count": null,
+   "source": [
+    "feature_data.count()"
    ],
+   "outputs": [],
+   "metadata": {}
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
    "source": [
     "als_eval = SparkDiversityEvaluation(\n",
     "    train_df = train_df, \n",
@@ -903,22 +889,13 @@
     "als_serendipity=als_eval.serendipity()\n",
     "print(als_diversity)\n",
     "print(als_serendipity)"
-   ]
+   ],
+   "outputs": [],
+   "metadata": {}
   },
   {
    "cell_type": "code",
-   "execution_count": 28,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "0.8978120851519519\n",
-      "0.8937850286817351\n"
-     ]
-    }
-   ],
+   "execution_count": null,
    "source": [
     "random_eval = SparkDiversityEvaluation(\n",
     "    train_df = train_df, \n",
@@ -933,18 +910,48 @@
     "random_serendipity=random_eval.serendipity()\n",
     "print(random_diversity)\n",
     "print(random_serendipity)"
-   ]
+   ],
+   "outputs": [],
+   "metadata": {}
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 27,
+   "source": [
+    "import cProfile, pstats, io\n",
+    "\n",
+    "pr = cProfile.Profile()\n",
+    "pr.enable()\n",
+    "# ... do something ...\n",
+    "als_eval = SparkDiversityEvaluation(\n",
+    "    train_df = train_df, \n",
+    "    reco_df = top_k_reco,\n",
+    "    item_feature_df = feature_data, \n",
+    "    item_sim_measure=\"item_feature_vector\",\n",
+    "    col_user = COL_USER, \n",
+    "    col_item = COL_ITEM\n",
+    ")\n",
+    "als_diversity=als_eval.diversity()\n",
+    "als_serendipity=als_eval.serendipity()\n",
+    "\n",
+    "pr.disable()\n",
+    "s = io.StringIO()\n",
+    "ps = pstats.Stats(pr, stream=s).sort_stats(\"cumulative\")\n",
+    "ps.print_stats()\n",
+    "print(s.getvalue())"
+   ],
+   "outputs": [],
+   "metadata": {}
   },
   {
    "cell_type": "markdown",
-   "metadata": {},
    "source": [
     "It's interesting that the value of diversity and serendipity changes when using different item-item similarity calculation approach, for both ALS algorithm and random recommender. The diversity and serendipity of random recommender are still higher than ALS algorithm. "
-   ]
+   ],
+   "metadata": {}
   },
   {
    "cell_type": "markdown",
-   "metadata": {},
    "source": [
     "### References\n",
     "The metric definitions / formulations are based on the following references:\n",
@@ -952,24 +959,24 @@
     "- G. Shani and A. Gunawardana, Evaluating recommendation systems, Recommender Systems Handbook pp. 257-297, 2010.\n",
     "- E. Yan, Serendipity: Accuracy’s unpopular best friend in recommender Systems, eugeneyan.com, April 2020\n",
     "- Y.C. Zhang, D.Ó. Séaghdha, D. Quercia and T. Jambor, Auralist: introducing serendipity into music recommendation, WSDM 2012\n"
-   ]
+   ],
+   "metadata": {}
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {},
-   "outputs": [],
    "source": [
     "# cleanup spark instance\n",
     "spark.stop()"
-   ]
+   ],
+   "outputs": [],
+   "metadata": {}
   }
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python (reco_pyspark)",
-   "language": "python",
-   "name": "reco_pyspark"
+   "name": "python3",
+   "display_name": "Python 3.6.9 64-bit ('.env': venv)"
   },
   "language_info": {
    "codemirror_mode": {
@@ -981,7 +988,10 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.6.13"
+   "version": "3.6.9"
+  },
+  "interpreter": {
+   "hash": "7ec2189bea0434770dca7423a25e631e1cca9c4e2b4ff137a82f4dff32ac9607"
   }
  },
  "nbformat": 4,
diff --git a/recommenders/datasets/mock/movielens.py b/recommenders/datasets/mock/movielens.py
index a9e73bf031..1c72e2a97f 100644
--- a/recommenders/datasets/mock/movielens.py
+++ b/recommenders/datasets/mock/movielens.py
@@ -9,11 +9,19 @@
 except ImportError as e:
     raise ImportError("Pandera not installed. Try `pip install recommender['dev']`") from e
 
+from recommenders.utils.constants import (
+    DEFAULT_USER_COL,
+    DEFAULT_ITEM_COL,
+    DEFAULT_RATING_COL,
+    DEFAULT_TIMESTAMP_COL,
+)
+
 import random
 from typing import Optional
 
 from pandera.typing import DateTime, Series
-from pandera import Field
+from pandera import Field, Check
+from pandera.schemas import DataFrameSchema
 from pyspark.sql import SparkSession
 from pyspark.sql.types import StructType
 
@@ -26,9 +34,9 @@ class MockMovielens100kSchema(pa.SchemaModel):
     http://files.grouplens.org/datasets/movielens/ml-100k/
     """
     # The 100k dataset has 943 total users
-    userID: Series[int] = Field(in_range={"min_value": 1, "max_value": 943})
+    userID: Series[int] = Field(in_range={"min_value": 1, "max_value": 10})
     # And 1682 total items
-    itemID: Series[int] = Field(in_range={"min_value": 1, "max_value": 1682})
+    itemID: Series[int] = Field(in_range={"min_value": 1, "max_value": 10})
     # Rating is on the scale from 1 to 5
     rating: Series[int] = Field(in_range={"min_value": 1, "max_value": 5})
     timestamp: Series[DateTime]
@@ -36,12 +44,18 @@ class MockMovielens100kSchema(pa.SchemaModel):
     genres: Series[str] = Field(eq="genreA|0")
 
     @classmethod
-    def get_df(cls, size: int = 3, seed: int = 100):
+    def get_df(
+        cls,
+        size: int = 3, seed: int = 100,
+        # title_col: Optional[str] = None, genres_col: Optional[str] = None
+    ):
         """Return fake movielens dataset as a Pandas Dataframe with specified rows.
 
         Args:
             size (int): number of rows to generate
             seed (int, optional): seeding the pseudo-number generation. Defaults to 100.
+            title_col (str, optional): if not None, append a title column. Defaults to None.
+            genres_col (str, optional): if not None, append a genre column. Defaults to None.
 
         Returns:
             pandas.DataFrame: a mock dataset
@@ -50,17 +64,36 @@ def get_df(cls, size: int = 3, seed: int = 100):
         return cls.example(size=size)
 
     @classmethod
-    def get_spark_df(cls, spark: SparkSession, size: int = 3, seed: int = 100, schema: Optional[StructType] = None):
+    def get_spark_df(
+        cls,
+        spark: SparkSession,
+        size: int = 3, seed: int = 100,
+        # title_col: Optional[str] = None, genres_col: Optional[str] = None,
+        # schema: Optional[StructType] = None
+    ):
         """Return fake movielens dataset as a Spark Dataframe with specified rows
 
         Args:
             spark (SparkSession): spark session to load the dataframe into
             size (int): number of rows to generate
             seed (int, optional): seeding the pseudo-number generation. Defaults to 100.
-            schema (pyspark.sql.types.StructType optional): [description]. Defaults to None.
+            title_col (str, optional): if not None, append a title column. Defaults to None.
+            genres_col (str, optional): if not None, append a genre column. Defaults to None.
+            schema (pyspark.sql.types.StructType, optional): dataset schema. Defaults to None.
 
         Returns:
             pyspark.sql.DataFrame: a mock dataset
         """
         pandas_df = cls.get_df(size=size, seed=seed)
-        return spark.createDataFrame(pandas_df, schema=schema)
+        return spark.createDataFrame(pandas_df)
+
+    # @classmethod
+    # def _get_item_df(cls, size, title_col: Optional[str] = None, genres_col: Optional[str] = None):
+    #     schema = DataFrameSchema()  # create an empty schema
+    #     if title_col is not None:
+    #         # adds a title column with random alphabets
+    #         schema = schema.add_columns({title_col: pa.Column(str, Check.str_matches(r'^[a-z]+$'))})
+    #     if genres_col is not None:
+    #         # adds a genre column with '|' separated string
+    #         schema = schema.add_columns({genres_col: pa.Column(str, Check.str_matches(r'^[a-z]+\|[0-9]$'))})
+    #     schema.example()
\ No newline at end of file
diff --git a/recommenders/evaluation/spark_evaluation.py b/recommenders/evaluation/spark_evaluation.py
index 37a73778ea..875e404519 100644
--- a/recommenders/evaluation/spark_evaluation.py
+++ b/recommenders/evaluation/spark_evaluation.py
@@ -3,6 +3,7 @@
 
 
 import numpy as np
+from pyspark.sql.types import LongType
 
 try:
     from pyspark.mllib.evaluation import RegressionMetrics, RankingMetrics
@@ -574,7 +575,7 @@ def __init__(
             self.col_item_features = DEFAULT_ITEM_FEATURES_COL
             required_schema = StructType(
                 (
-                    StructField(self.col_item, IntegerType()),
+                    StructField(self.col_item, LongType()),
                     StructField(self.col_item_features, VectorUDT()),
                 )
             )
diff --git a/tests/unit/examples/test_notebooks_pyspark.py b/tests/unit/examples/test_notebooks_pyspark.py
index e4ae1d9464..c8a916c45c 100644
--- a/tests/unit/examples/test_notebooks_pyspark.py
+++ b/tests/unit/examples/test_notebooks_pyspark.py
@@ -48,9 +48,11 @@ def test_evaluation_runs(notebooks, output_notebook, kernel_name):
 
 @pytest.mark.notebooks
 @pytest.mark.spark
-def test_evaluation_diversity_runs(notebooks, output_notebook, kernel_name):
+@pytest.mark.parametrize("data_size", ["100k", "mock100"])
+def test_evaluation_diversity_runs(notebooks, output_notebook, kernel_name, data_size):
     notebook_path = notebooks["evaluation_diversity"]
-    pm.execute_notebook(notebook_path, output_notebook, kernel_name=kernel_name)
+    pm.execute_notebook(notebook_path, output_notebook, kernel_name=kernel_name, 
+                        parameters=dict(TOP_K=10, MOVIELENS_DATA_SIZE=data_size))
 
 
 @pytest.mark.notebooks

From e4f41e7b943cbab0fa7aa0e0e10148d14709c556 Mon Sep 17 00:00:00 2001
From: Jianjie Liu <jianjl@microsoft.com>
Date: Wed, 22 Sep 2021 23:37:50 +0000
Subject: [PATCH 03/27] use csv and change datetime to int

---
 recommenders/datasets/mock/movielens.py     | 15 ++++++++++++---
 recommenders/evaluation/spark_evaluation.py |  4 ++--
 2 files changed, 14 insertions(+), 5 deletions(-)

diff --git a/recommenders/datasets/mock/movielens.py b/recommenders/datasets/mock/movielens.py
index 1c72e2a97f..41d2eaec6f 100644
--- a/recommenders/datasets/mock/movielens.py
+++ b/recommenders/datasets/mock/movielens.py
@@ -23,7 +23,7 @@
 from pandera import Field, Check
 from pandera.schemas import DataFrameSchema
 from pyspark.sql import SparkSession
-from pyspark.sql.types import StructType
+from pyspark.sql.types import StructField, StructType, LongType, IntegerType, StringType, FloatType
 
 
 class MockMovielens100kSchema(pa.SchemaModel):
@@ -39,7 +39,7 @@ class MockMovielens100kSchema(pa.SchemaModel):
     itemID: Series[int] = Field(in_range={"min_value": 1, "max_value": 10})
     # Rating is on the scale from 1 to 5
     rating: Series[int] = Field(in_range={"min_value": 1, "max_value": 5})
-    timestamp: Series[DateTime]
+    timestamp: Series[int]
     title: Series[str] = Field(eq="foo")
     genres: Series[str] = Field(eq="genreA|0")
 
@@ -85,7 +85,16 @@ def get_spark_df(
             pyspark.sql.DataFrame: a mock dataset
         """
         pandas_df = cls.get_df(size=size, seed=seed)
-        return spark.createDataFrame(pandas_df)
+        pandas_df.to_csv('test.csv', header=False, index=False)
+        default_schema = StructType([
+            StructField(DEFAULT_USER_COL, IntegerType()),
+            StructField(DEFAULT_ITEM_COL, IntegerType()),
+            StructField(DEFAULT_RATING_COL, FloatType()),
+            StructField(DEFAULT_TIMESTAMP_COL, LongType()),
+            StructField("title", StringType()),
+            StructField("genres", StringType()),
+        ])
+        return spark.read.csv('test.csv', schema=default_schema)
 
     # @classmethod
     # def _get_item_df(cls, size, title_col: Optional[str] = None, genres_col: Optional[str] = None):
diff --git a/recommenders/evaluation/spark_evaluation.py b/recommenders/evaluation/spark_evaluation.py
index 875e404519..5110d72e82 100644
--- a/recommenders/evaluation/spark_evaluation.py
+++ b/recommenders/evaluation/spark_evaluation.py
@@ -575,7 +575,7 @@ def __init__(
             self.col_item_features = DEFAULT_ITEM_FEATURES_COL
             required_schema = StructType(
                 (
-                    StructField(self.col_item, LongType()),
+                    StructField(self.col_item, IntegerType()),
                     StructField(self.col_item_features, VectorUDT()),
                 )
             )
@@ -618,7 +618,7 @@ def _get_pairwise_items(self, df):
             .select(self.col_user, "i1", "i2")
         )
 
-    def _get_cosine_similarity(self, n_partitions=200):
+    def _get_cosine_similarity(self, n_partitions=10):
 
         if self.item_sim_measure == "item_cooccurrence_count":
             # calculate item-item similarity based on item co-occurrence count

From 772bbc6a3b96add2c6a335249213bd61957ea45a Mon Sep 17 00:00:00 2001
From: Jianjie Liu <jianjl@microsoft.com>
Date: Thu, 23 Sep 2021 15:29:29 +0000
Subject: [PATCH 04/27] Try more experiment with 10 rows and another NB

---
 recommenders/datasets/mock/movielens.py       | 4 ++--
 recommenders/datasets/movielens.py            | 3 ++-
 tests/unit/examples/test_notebooks_pyspark.py | 8 +++++---
 3 files changed, 9 insertions(+), 6 deletions(-)

diff --git a/recommenders/datasets/mock/movielens.py b/recommenders/datasets/mock/movielens.py
index 41d2eaec6f..4344de7e42 100644
--- a/recommenders/datasets/mock/movielens.py
+++ b/recommenders/datasets/mock/movielens.py
@@ -39,7 +39,7 @@ class MockMovielens100kSchema(pa.SchemaModel):
     itemID: Series[int] = Field(in_range={"min_value": 1, "max_value": 10})
     # Rating is on the scale from 1 to 5
     rating: Series[int] = Field(in_range={"min_value": 1, "max_value": 5})
-    timestamp: Series[int]
+    timestamp: Series[str] = Field(eq="2022-2-22")
     title: Series[str] = Field(eq="foo")
     genres: Series[str] = Field(eq="genreA|0")
 
@@ -90,7 +90,7 @@ def get_spark_df(
             StructField(DEFAULT_USER_COL, IntegerType()),
             StructField(DEFAULT_ITEM_COL, IntegerType()),
             StructField(DEFAULT_RATING_COL, FloatType()),
-            StructField(DEFAULT_TIMESTAMP_COL, LongType()),
+            StructField(DEFAULT_TIMESTAMP_COL, StringType()),
             StructField("title", StringType()),
             StructField("genres", StringType()),
         ])
diff --git a/recommenders/datasets/movielens.py b/recommenders/datasets/movielens.py
index 463bc00853..863578902b 100644
--- a/recommenders/datasets/movielens.py
+++ b/recommenders/datasets/movielens.py
@@ -103,7 +103,8 @@ def item_has_header(self):
 
 # Fake data for testing only
 MOCK_DATA_FORMAT = {
-    "mock100": {"size": 100, "seed": 101}
+    "mock100": {"size": 100, "seed": 0},
+    "mock10": {"size": 10, "seed": 6}
 }
 
 # 100K data genres index to string mapper. For 1m, 10m, and 20m, the genres labels are already in the dataset.
diff --git a/tests/unit/examples/test_notebooks_pyspark.py b/tests/unit/examples/test_notebooks_pyspark.py
index c8a916c45c..d96e5c2ca9 100644
--- a/tests/unit/examples/test_notebooks_pyspark.py
+++ b/tests/unit/examples/test_notebooks_pyspark.py
@@ -31,9 +31,11 @@ def test_data_split_runs(notebooks, output_notebook, kernel_name):
 @pytest.mark.skipif(
     sys.platform == "win32", reason="Takes 2764.50s in Windows, while in Linux 124.35s"
 )
-def test_als_deep_dive_runs(notebooks, output_notebook, kernel_name):
+@pytest.mark.parametrize("data_size", ["100k", "mock100", "mock10"])
+def test_als_deep_dive_runs(notebooks, output_notebook, kernel_name, data_size):
     notebook_path = notebooks["als_deep_dive"]
-    pm.execute_notebook(notebook_path, output_notebook, kernel_name=kernel_name)
+    pm.execute_notebook(notebook_path, output_notebook, kernel_name=kernel_name,
+                        parameters=dict(TOP_K=10, MOVIELENS_DATA_SIZE=data_size))
 
 
 @pytest.mark.notebooks
@@ -48,7 +50,7 @@ def test_evaluation_runs(notebooks, output_notebook, kernel_name):
 
 @pytest.mark.notebooks
 @pytest.mark.spark
-@pytest.mark.parametrize("data_size", ["100k", "mock100"])
+@pytest.mark.parametrize("data_size", ["100k", "mock100", "mock10"])
 def test_evaluation_diversity_runs(notebooks, output_notebook, kernel_name, data_size):
     notebook_path = notebooks["evaluation_diversity"]
     pm.execute_notebook(notebook_path, output_notebook, kernel_name=kernel_name, 

From 49f874d53f3612bba81f0c9819094631ee245b81 Mon Sep 17 00:00:00 2001
From: Jianjie Liu <jianjl@microsoft.com>
Date: Thu, 23 Sep 2021 16:08:08 +0000
Subject: [PATCH 05/27] Try mock100 dataset on other NBs

---
 .../als_deep_dive.ipynb                       |  406 +++---
 .../als_movielens_diversity_metrics.ipynb     |  241 +---
 .../tuning_spark_als.ipynb                    | 1281 ++++-------------
 tests/unit/examples/test_notebooks_pyspark.py |    8 +-
 tests/unit/examples/test_notebooks_python.py  |   12 +-
 5 files changed, 522 insertions(+), 1426 deletions(-)

diff --git a/examples/02_model_collaborative_filtering/als_deep_dive.ipynb b/examples/02_model_collaborative_filtering/als_deep_dive.ipynb
index b633257bff..ce825152fc 100644
--- a/examples/02_model_collaborative_filtering/als_deep_dive.ipynb
+++ b/examples/02_model_collaborative_filtering/als_deep_dive.ipynb
@@ -2,32 +2,31 @@
  "cells": [
   {
    "cell_type": "markdown",
-   "metadata": {},
    "source": [
     "<i>Copyright (c) Microsoft Corporation. All rights reserved.</i>\n",
     "\n",
     "<i>Licensed under the MIT License.</i>"
-   ]
+   ],
+   "metadata": {}
   },
   {
    "cell_type": "markdown",
-   "metadata": {},
    "source": [
     "# Spark Collaborative Filtering (ALS) Deep Dive"
-   ]
+   ],
+   "metadata": {}
   },
   {
    "cell_type": "markdown",
-   "metadata": {},
    "source": [
     "Spark MLlib provides a collaborative filtering algorithm that can be used for training a matrix factorization model, which predicts explicit or implicit ratings of users on items for recommendations.\n",
     "\n",
     "This notebook presents a deep dive into the Spark collaborative filtering algorithm."
-   ]
+   ],
+   "metadata": {}
   },
   {
    "cell_type": "markdown",
-   "metadata": {},
    "source": [
     "## 1 Matrix factorization algorithm\n",
     "\n",
@@ -54,11 +53,11 @@
     "Owing to the term of $q_{i}^{T}p_{u}$ the loss function is non-convex. Gradient descent method can be applied but this will incur expensive computations. An Alternating Least Square (ALS) algorithm was therefore developed to overcome this issue. \n",
     "\n",
     "The basic idea of ALS is to learn one of $q$ and $p$ at a time for optimization while keeping the other as constant. This makes the objective at each iteration convex and solvable. The alternating between $q$ and $p$ stops when there is convergence to the optimal. It is worth noting that this iterative computation can be parallelised and/or distributed, which makes the algorithm desirable for use cases where the dataset is large and thus the user-item rating matrix is super sparse (as is typical in recommendation scenarios). A comprehensive discussion of ALS and its distributed computation can be found [here](http://stanford.edu/~rezab/classes/cme323/S15/notes/lec14.pdf)."
-   ]
+   ],
+   "metadata": {}
   },
   {
    "cell_type": "markdown",
-   "metadata": {},
    "source": [
     "## 2 Spark Mllib implementation\n",
     "\n",
@@ -67,40 +66,28 @@
     "* The uniqueness of ALS implementation is that it distributes the matrix factorization model training by using \"Alternating Least Square\" method. \n",
     "* In the training method, there are parameters that can be selected to control the model performance.\n",
     "* Both explicit and implicit ratings are supported by Spark ALS model."
-   ]
+   ],
+   "metadata": {}
   },
   {
    "cell_type": "markdown",
-   "metadata": {},
    "source": [
     "## 3 Spark ALS based MovieLens recommender\n",
     "\n",
     "In the following code, the MovieLens-100K dataset is used to illustrate the ALS algorithm in Spark."
-   ]
+   ],
+   "metadata": {}
   },
   {
    "cell_type": "markdown",
-   "metadata": {},
    "source": [
     "**Note**: This notebook requires a PySpark environment to run properly. Please follow the steps in [SETUP.md](https://github.com/Microsoft/Recommenders/blob/master/SETUP.md#dependencies-setup) to install the PySpark environment."
-   ]
+   ],
+   "metadata": {}
   },
   {
    "cell_type": "code",
    "execution_count": 19,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "System version: 3.5.5 |Anaconda custom (64-bit)| (default, May 13 2018, 21:12:35) \n",
-      "[GCC 7.2.0]\n",
-      "Pandas version: 0.23.0\n",
-      "PySpark version: 2.3.1\n"
-     ]
-    }
-   ],
    "source": [
     "# set the environment path to find Recommenders\n",
     "import sys\n",
@@ -129,21 +116,34 @@
     "print(\"System version: {}\".format(sys.version))\n",
     "print(\"Pandas version: {}\".format(pd.__version__))\n",
     "print(\"PySpark version: {}\".format(pyspark.__version__))"
-   ]
+   ],
+   "outputs": [
+    {
+     "output_type": "stream",
+     "name": "stdout",
+     "text": [
+      "System version: 3.5.5 |Anaconda custom (64-bit)| (default, May 13 2018, 21:12:35) \n",
+      "[GCC 7.2.0]\n",
+      "Pandas version: 0.23.0\n",
+      "PySpark version: 2.3.1\n"
+     ]
+    }
+   ],
+   "metadata": {}
   },
   {
    "cell_type": "markdown",
-   "metadata": {},
    "source": [
     "Data column names"
-   ]
+   ],
+   "metadata": {}
   },
   {
    "cell_type": "code",
    "execution_count": 20,
-   "metadata": {},
-   "outputs": [],
    "source": [
+    "MOVIELENS_DATA_SIZE = \"100k\"\n",
+    "\n",
     "COL_USER = \"UserId\"\n",
     "COL_ITEM = \"MovieId\"\n",
     "COL_RATING = \"Rating\"\n",
@@ -158,80 +158,84 @@
     "        StructField(COL_TIMESTAMP, LongType()),\n",
     "    )\n",
     ")"
-   ]
+   ],
+   "outputs": [],
+   "metadata": {}
   },
   {
    "cell_type": "markdown",
-   "metadata": {},
    "source": [
     "Model hyper parameters - these parameters are selected with reference to the benchmarking results [here](http://mymedialite.net/examples/datasets.html)."
-   ]
+   ],
+   "metadata": {}
   },
   {
    "cell_type": "code",
    "execution_count": 21,
-   "metadata": {},
-   "outputs": [],
    "source": [
     "RANK = 10\n",
     "MAX_ITER = 15\n",
     "REG_PARAM = 0.05"
-   ]
+   ],
+   "outputs": [],
+   "metadata": {}
   },
   {
    "cell_type": "markdown",
-   "metadata": {},
    "source": [
     "Number of recommended items"
-   ]
+   ],
+   "metadata": {}
   },
   {
    "cell_type": "code",
    "execution_count": 22,
-   "metadata": {},
-   "outputs": [],
    "source": [
     "K = 10"
-   ]
+   ],
+   "outputs": [],
+   "metadata": {}
   },
   {
    "cell_type": "markdown",
-   "metadata": {},
    "source": [
     "Initialize a Spark session."
-   ]
+   ],
+   "metadata": {}
   },
   {
    "cell_type": "code",
    "execution_count": 23,
-   "metadata": {},
-   "outputs": [],
    "source": [
     "spark = start_or_get_spark(\"ALS Deep Dive\", memory=\"16g\")"
-   ]
+   ],
+   "outputs": [],
+   "metadata": {}
   },
   {
    "cell_type": "markdown",
-   "metadata": {},
    "source": [
     "### 3.1 Load and prepare data"
-   ]
+   ],
+   "metadata": {}
   },
   {
    "cell_type": "markdown",
-   "metadata": {},
    "source": [
     "Data is read from csv into a Spark DataFrame."
-   ]
+   ],
+   "metadata": {}
   },
   {
    "cell_type": "code",
    "execution_count": 24,
-   "metadata": {},
+   "source": [
+    "dfs = movielens.load_spark_df(spark=spark, size=MOVIELENS_DATA_SIZE, schema=schema)"
+   ],
    "outputs": [
     {
-     "name": "stderr",
      "output_type": "stream",
+     "name": "stderr",
      "text": [
       "../../recommenders/dataset/movielens.py:471: UserWarning: Both schema and header are provided.\n",
       "    The header argument will be ignored.\n",
@@ -240,20 +244,18 @@
      ]
     }
    ],
-   "source": [
-    "dfs = movielens.load_spark_df(spark=spark, size=\"100k\", schema=schema)"
-   ]
+   "metadata": {}
   },
   {
    "cell_type": "code",
    "execution_count": 25,
-   "metadata": {
-    "scrolled": true
-   },
+   "source": [
+    "dfs.show(5)"
+   ],
    "outputs": [
     {
-     "name": "stdout",
      "output_type": "stream",
+     "name": "stdout",
      "text": [
       "+------+-------+------+---------+\n",
       "|UserId|MovieId|Rating|Timestamp|\n",
@@ -269,45 +271,43 @@
      ]
     }
    ],
-   "source": [
-    "dfs.show(5)"
-   ]
+   "metadata": {
+    "scrolled": true
+   }
   },
   {
    "cell_type": "markdown",
-   "metadata": {},
    "source": [
     "Data is then randomly split by 80-20 ratio for training and testing."
-   ]
+   ],
+   "metadata": {}
   },
   {
    "cell_type": "code",
    "execution_count": 26,
-   "metadata": {},
-   "outputs": [],
    "source": [
     "dfs_train, dfs_test = spark_random_split(dfs, ratio=0.75, seed=42)"
-   ]
+   ],
+   "outputs": [],
+   "metadata": {}
   },
   {
    "cell_type": "markdown",
-   "metadata": {},
    "source": [
     "### 3.2 Train a movielens model "
-   ]
+   ],
+   "metadata": {}
   },
   {
    "cell_type": "markdown",
-   "metadata": {},
    "source": [
     "It is worth noting that Spark ALS model allows dropping cold users to favor a robust evaluation with the testing data. In case there are cold users, Spark ALS implementation allows users to drop cold users in order to make sure evaluations on the prediction results are sound."
-   ]
+   ],
+   "metadata": {}
   },
   {
    "cell_type": "code",
    "execution_count": 27,
-   "metadata": {},
-   "outputs": [],
    "source": [
     "als = ALS(\n",
     "    maxIter=MAX_ITER, \n",
@@ -320,49 +320,38 @@
     ")\n",
     "\n",
     "model = als.fit(dfs_train)"
-   ]
+   ],
+   "outputs": [],
+   "metadata": {}
   },
   {
    "cell_type": "markdown",
-   "metadata": {},
    "source": [
     "### 3.3 Prediction with the model\n",
     "\n",
     "The trained model can be used to predict ratings with a given test data."
-   ]
+   ],
+   "metadata": {}
   },
   {
    "cell_type": "code",
    "execution_count": 28,
-   "metadata": {},
-   "outputs": [],
    "source": [
     "dfs_pred = model.transform(dfs_test).drop(COL_RATING)"
-   ]
+   ],
+   "outputs": [],
+   "metadata": {}
   },
   {
    "cell_type": "markdown",
-   "metadata": {},
    "source": [
     "With the prediction results, the model performance can be evaluated."
-   ]
+   ],
+   "metadata": {}
   },
   {
    "cell_type": "code",
    "execution_count": 29,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "RMSE score = 0.9697095550242029\n",
-      "MAE score = 0.7554838330206419\n",
-      "R2 score = 0.24874053010909036\n",
-      "Explained variance score = 0.2547961843833687\n"
-     ]
-    }
-   ],
    "source": [
     "evaluations = SparkRatingEvaluation(\n",
     "    dfs_test, \n",
@@ -380,23 +369,54 @@
     "    \"Explained variance score = {}\".format(evaluations.exp_var()),\n",
     "    sep=\"\\n\"\n",
     ")"
-   ]
+   ],
+   "outputs": [
+    {
+     "output_type": "stream",
+     "name": "stdout",
+     "text": [
+      "RMSE score = 0.9697095550242029\n",
+      "MAE score = 0.7554838330206419\n",
+      "R2 score = 0.24874053010909036\n",
+      "Explained variance score = 0.2547961843833687\n"
+     ]
+    }
+   ],
+   "metadata": {}
   },
   {
    "cell_type": "markdown",
-   "metadata": {},
    "source": [
     "Oftentimes ranking metrics are also of interest to data scientists. Note usually ranking metrics apply to the scenario of recommending a list of items. In our case, the recommended items should be different from those that have been rated by the users. "
-   ]
+   ],
+   "metadata": {}
   },
   {
    "cell_type": "code",
    "execution_count": 30,
-   "metadata": {},
+   "source": [
+    "# Get the cross join of all user-item pairs and score them.\n",
+    "users = dfs_train.select('UserId').distinct()\n",
+    "items = dfs_train.select('MovieId').distinct()\n",
+    "user_item = users.crossJoin(items)\n",
+    "dfs_pred = model.transform(user_item)\n",
+    "\n",
+    "# Remove seen items.\n",
+    "dfs_pred_exclude_train = dfs_pred.alias(\"pred\").join(\n",
+    "    dfs_train.alias(\"train\"),\n",
+    "    (dfs_pred['UserId'] == dfs_train['UserId']) & (dfs_pred['MovieId'] == dfs_train['MovieId']),\n",
+    "    how='outer'\n",
+    ")\n",
+    "\n",
+    "dfs_pred_final = dfs_pred_exclude_train.filter(dfs_pred_exclude_train[\"train.Rating\"].isNull()) \\\n",
+    "    .select('pred.' + 'UserId', 'pred.' + 'MovieId', 'pred.' + \"prediction\")\n",
+    "\n",
+    "dfs_pred_final.show()"
+   ],
    "outputs": [
     {
-     "name": "stdout",
      "output_type": "stream",
+     "name": "stdout",
      "text": [
       "+------+-------+----------+\n",
       "|UserId|MovieId|prediction|\n",
@@ -427,42 +447,11 @@
      ]
     }
    ],
-   "source": [
-    "# Get the cross join of all user-item pairs and score them.\n",
-    "users = dfs_train.select('UserId').distinct()\n",
-    "items = dfs_train.select('MovieId').distinct()\n",
-    "user_item = users.crossJoin(items)\n",
-    "dfs_pred = model.transform(user_item)\n",
-    "\n",
-    "# Remove seen items.\n",
-    "dfs_pred_exclude_train = dfs_pred.alias(\"pred\").join(\n",
-    "    dfs_train.alias(\"train\"),\n",
-    "    (dfs_pred['UserId'] == dfs_train['UserId']) & (dfs_pred['MovieId'] == dfs_train['MovieId']),\n",
-    "    how='outer'\n",
-    ")\n",
-    "\n",
-    "dfs_pred_final = dfs_pred_exclude_train.filter(dfs_pred_exclude_train[\"train.Rating\"].isNull()) \\\n",
-    "    .select('pred.' + 'UserId', 'pred.' + 'MovieId', 'pred.' + \"prediction\")\n",
-    "\n",
-    "dfs_pred_final.show()"
-   ]
+   "metadata": {}
   },
   {
    "cell_type": "code",
    "execution_count": 31,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Precision@k = 0.04061505832449631\n",
-      "Recall@k = 0.013571438145917577\n",
-      "NDCG@k = 0.03699684800440573\n",
-      "Mean average precision = 0.003702411260039904\n"
-     ]
-    }
-   ],
    "source": [
     "evaluations = SparkRankingEvaluation(\n",
     "    dfs_test, \n",
@@ -481,11 +470,23 @@
     "    \"Mean average precision = {}\".format(evaluations.map_at_k()),\n",
     "    sep=\"\\n\"\n",
     ")"
-   ]
+   ],
+   "outputs": [
+    {
+     "output_type": "stream",
+     "name": "stdout",
+     "text": [
+      "Precision@k = 0.04061505832449631\n",
+      "Recall@k = 0.013571438145917577\n",
+      "NDCG@k = 0.03699684800440573\n",
+      "Mean average precision = 0.003702411260039904\n"
+     ]
+    }
+   ],
+   "metadata": {}
   },
   {
    "cell_type": "markdown",
-   "metadata": {},
    "source": [
     "### 3.4 Fine tune the model\n",
     "\n",
@@ -498,48 +499,47 @@
     "|`maxIters`|Maximum number of iterations|10|The more iterations the better the model converges to the optimal point.|\n",
     "\n",
     "It is always a good practice to start model building with default parameter values and then sweep the parameter in a range to find the optimal combination of parameters. The following parameter set is used for training ALS models for comparison study purposes."
-   ]
+   ],
+   "metadata": {}
   },
   {
    "cell_type": "code",
    "execution_count": 32,
-   "metadata": {},
-   "outputs": [],
    "source": [
     "param_dict = {\n",
     "    \"rank\": [10, 15, 20],\n",
     "    \"regParam\": [0.001, 0.1, 1.0]\n",
     "}"
-   ]
+   ],
+   "outputs": [],
+   "metadata": {}
   },
   {
    "cell_type": "markdown",
-   "metadata": {},
    "source": [
     "Generate a dictionary for each parameter combination which can then be fed into model training."
-   ]
+   ],
+   "metadata": {}
   },
   {
    "cell_type": "code",
    "execution_count": 33,
-   "metadata": {},
-   "outputs": [],
    "source": [
     "param_grid = generate_param_grid(param_dict)"
-   ]
+   ],
+   "outputs": [],
+   "metadata": {}
   },
   {
    "cell_type": "markdown",
-   "metadata": {},
    "source": [
     "Train models with parameters specified in the parameter grid. Evaluate the model with, for example, the RMSE metric, and then record the metrics for visualization."
-   ]
+   ],
+   "metadata": {}
   },
   {
    "cell_type": "code",
    "execution_count": 34,
-   "metadata": {},
-   "outputs": [],
    "source": [
     "rmse_score = []\n",
     "\n",
@@ -569,94 +569,98 @@
     "\n",
     "rmse_score = [float('%.4f' % x) for x in rmse_score]\n",
     "rmse_score_array = np.reshape(rmse_score, (len(param_dict[\"rank\"]), len(param_dict[\"regParam\"]))) "
-   ]
+   ],
+   "outputs": [],
+   "metadata": {}
   },
   {
    "cell_type": "code",
    "execution_count": 35,
-   "metadata": {},
-   "outputs": [],
    "source": [
     "rmse_df = pd.DataFrame(data=rmse_score_array, index=pd.Index(param_dict[\"rank\"], name=\"rank\"), \n",
     "                       columns=pd.Index(param_dict[\"regParam\"], name=\"reg. parameter\"))"
-   ]
+   ],
+   "outputs": [],
+   "metadata": {}
   },
   {
    "cell_type": "code",
    "execution_count": 36,
-   "metadata": {},
+   "source": [
+    "fig, ax = plt.subplots()\n",
+    "sns.heatmap(rmse_df, cbar=False, annot=True, fmt=\".4g\")"
+   ],
    "outputs": [
     {
+     "output_type": "execute_result",
      "data": {
       "text/plain": [
        "<matplotlib.axes._subplots.AxesSubplot at 0x7f2e2801e320>"
       ]
      },
-     "execution_count": 36,
      "metadata": {},
-     "output_type": "execute_result"
+     "execution_count": 36
     },
     {
+     "output_type": "display_data",
      "data": {
-      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX8AAAEKCAYAAAD6q1UVAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvhp/UCwAAIABJREFUeJzt3XeYU1X+x/H3yUwywzAFZqjSUVSKCD8BCwsMyoLiosgKLBaKAhYWbCC4soC6riIKu6KsIChFBUR2F8Te2MUyUpQiTao4OCK9OS3J+f2REBhghqySBOZ+Xs+Th+Sec5Pvmct8cnNykjHWWkRExFlcsS5ARESiT+EvIuJACn8REQdS+IuIOJDCX0TEgRT+IiIOpPAXEXEghb+IiAMp/EVEHCg+1gUUZ/e1bfTR47NU5Q82xroE+RX2D8+MdQnyK5Qd8aoJp5/O/EVEHEjhLyLiQAp/EREHUviLiDiQwl9ExIEU/iIiDqTwFxFxIIW/iIgDKfxFRBxI4S8i4kAKfxERB1L4i4g4kMJfRMSBFP4iIg6k8BcRcSCFv4iIAyn8RUQcSOEvIuJACn8REQdS+IuIOJDCX0TEgRT+IiIOpPAXEXEghb+IiAMp/EVEHEjhLyLiQAp/EREHUviLiDiQwl9ExIEU/iIiDqTwFxFxoPhYF1AalL1nKJ4Wl+Pft5f9A/qc0O6+rCVJt9wO1g8+H4cnPYd3zSoAEq7qQJnuPQHInT2d/I/eA8DTqi1lut8KLheFS7L4+eUXojcgB+jQPpOxYx8lzuXipZdn8tSY54u016xZjcmTxlKhYjp79+yjZ+9BbN+ew8UXN+T58U+QkpqMz+fjiSfHM2fOfADaZrZk9Og/4/G4+eqrVfTr/wA+n49OndrzyKgh+P0Wr9fLAw+M5LPPl8Ri2KWOp1M/4s9vij18gNwXhp3QHnf+JXja3oi1Fvw+Ct6bgf/7b3HVboCn/S2hfq4KVcmf+xy+9ctw1WmIp10PMC4oyCN/3kTs3h3RHFZUGGttrGs4qd3XtjkzCzuJ+IaNsXm5JN//p5OGP4llIC8XgLjadUkZNop9d/bEJKeQ9vdJ7L+nP2BJ+/uL7L+nHxgXac9OZv89/bAH9lP2vofI//g9vCu+iu7AfqHKH2yMdQklcrlcrF29iKs79iA7O4esL97mllvvZu3aDaE+s2ZO5K23P2TGjDm0zWxJr17d6d1nEPXq1cVay8aNW6hatTKLs96hUeNMDhw4yOaNi2l/dXc2bNjMqJGD+e67bF6eOouyZZM4fPhnAC66qD4zX3uBRhe1idXwT2n/8MxYlxA2V80LoSCPhM53njT8cSdAYT4AplINEm8cRO6EIUX7JJYlaeBYfh43ELwFlBnwNHmzx2J3/UB8s3a4zjmXgvkTozCa06PsiFdNOP007XMaeFevxB48WHyHYPADmMQyHHlWc1/SgsKvl2IPHcQeOkTh10txX3Iprirn4Pvhe+yB/QAULl9GQsszNyzONi2aN2XTpq1s2bKNwsJCXn99Htd16lCkT/369fj4408B+GThZ1zXqT0AGzZsZuPGLQDk5Ozgp527qVgxg4yM8uTn57Nhw2YAPvzwv3S5oSNAKPgByiYlcaaecJ2N/NvWYXMPFd8hGPwAxpMAJ/nZxzdogW/jCvAWBDZYi0koE9gnIQl7aO9prflMoWmfKPFc3oqkXv0w5cpzcFTgDMWVUQH/zp9Cffy7duLKqEDhsi+Jq14TV6Uq+HftxHP5bzDx7liVXuqcU60K32f/ELqdvT2HFs2bFumzcuUautzQkfHPTaFz52tITU0hPb08e/YcDYLmzZrg8bjZtGkr1lrcbjeX/F9jln21ki5drqV6jXNCfa+//moe/8tDVKqYwXXX94r8ICUk7oJmeK7qjimbSt7MMSe0xze8nMKsd0K38xdMJrHHEKy3EPJzyZ0yMprlRo3O/KOk4ItF7LuzJwcfe5gyt94W3HqSV2fWYg8d4vDz40geNpLUp8bj3/Ej+HxRrbc0M+bEn/vxZ+MPDn2M1q0vY8ni92jd6jKys3Pwer2h9ipVKjF16rP07Xt/aN+bb7mbZ54exRefLeDQocN4vUeP2bx579Loojb8/sbbeWTUcdMOElG+9UvJnTCEvNnj8GR2LdJmksvhqlQD36aVoW3uS68hb+YYcv82EO/y/+Bpf3O0S46KiJz5G2PSgIeAzkDF4OafgHnAk9bafcXs1x/oD/BMo3r0qlk1EuXFlHf1SuKqVMOkpuHfvRP3RU1Cba4KFSlctRyAwsWfU7j4cwASru4Efn9M6i2NtmfnUKP60bPy6tWqkpNT9A29nJwddO3WD4CyZZPocsO1HDgQmNpLSUlm/rzpjBj5FF8uPvo+TNaXy8i8sgsAv23Xmnr16p7w2Is+/ZK6dWuRkVGe3btL53TCmcq/bR2mfCUokwzBqaK4BpfiXbcU/MEn6qQUXJVr4t++CQDv6iwSbx4aq5IjKlJn/q8De4FMa22GtTYDaBvcNqe4nay1k6y1zay1zUpT8LuqVgtdjzu3HiY+HntgP4XLFuNu2hyTnIxJTsbdtDmFyxYDYNLKBf5NTibx2uvJe29BTGovjZYsXc5559Whdu0auN1uunW7njcXvF+kT0ZG+dArhGFDBzJ12iwA3G43c+dM4ZVX3mDu3KLHpGLFDAA8Hg9DBg9g0qQZAJx7bu1Qn6ZNGuHxuBX8UWLKVw5dd1WpDXHxoeAHiG90Bd7VXxzdIfcwJjEJk14FgLi6jfDv2h6tcqMqUnP+ta21o4/dYK39ERhtjLmtmH3OWskPjsB9URNMahrlps0h99WXA//JgPx35uNp2ZqEKzuAz4vNL+Dg6EcAsIcOkjtrOmnjAisJcmdOwx4KnF2WvWMQcXXODW33/5Adg5GVTj6fj3vuHc7bb71GnMvF1GmzWbPmW0aNHMzSZStYsOAD2rS5gscfewiLZdGiLAYOehiArl070arVpaRnlKdnz24A3N73PlasWM3g+++i47XtcLlcTJw4nU8WfgZAlxs6csstN1JY6CUvN4+bbr4rZmMvbRK6DMBVqz4mKYUy946ncOEbod8977KPiK/fnPjGrbB+H3gLyJ87PrSvSauASU3Hv3Xt0Tu0fvLfnExi13ux1g95h8mfPynaw4qKiCz1NMa8D3wITLPW7ghuqwz0Bn5rrW13qvs4m5Z6SlFn+lJPKdnZtNRTThTrpZ7dgQzgP8aYPcaYPcBCIB3oWtKOIiISeRGZ9rHW7gWGBi9FGGP6AC9H4nFFRCQ8sVjq+UgMHlNERI4RqaWeK4trAioX0yYiIlESqdU+lYEOBJZ2HssAn0foMUVEJEyRCv8FQLK1dvnxDcaYhRF6TBERCVOk3vC9vYS2myLxmCIiEj59t4+IiAMp/EVEHEjhLyLiQAp/EREHUviLiDiQwl9ExIEU/iIiDqTwFxFxIIW/iIgDKfxFRBxI4S8i4kAKfxERB1L4i4g4kMJfRMSBFP4iIg6k8BcRcSCFv4iIAyn8RUQcSOEvIuJACn8REQdS+IuIOJDCX0TEgRT+IiIOpPAXEXGg+FgXUJzUaS/HugT5pc5pFesKROQUdOYvIuJACn8REQdS+IuIOJDCX0TEgRT+IiIOpPAXEXEghb+IiAMp/EVEHEjhLyLiQAp/EREHUviLiDiQwl9ExIEU/iIiDqTwFxFxIIW/iIgDKfxFRBxI4S8i4kAKfxERB1L4i4g4kMJfRMSBFP4iIg6k8BcRcSCFv4iIAyn8RUQcSOEvIuJACn8REQdS+IuIOFBY4W+MSTjJtvTTX46IiERDuGf+/zTGuI/cMMZUBT6ITEkiIhJp4Yb/v4E5xpg4Y0xt4D3goUgVJSIikRUfTidr7YvGGA+BJ4HawB3W2s8jWZiIiEROieFvjLn/2JtADWA5cJkx5jJr7dhIFne2GP7Xsfz3s8Wkly/Hv1954YT2xV+tZNCwR6hWtQoA7dpcwV233Ux+fgG9BgyhoLAQn9fHb9v+hj/2vRWArKVf88zzU/D7LUlJiTz+8APUrH5OVMdVmnVon8nYsY8S53Lx0sszeWrM80Xaa9asxuRJY6lQMZ29e/bRs/cgtm/P4eKLG/L8+CdISU3G5/PxxJPjmTNnPgBtM1syevSf8XjcfPXVKvr1fwCfz0e5cmlMfvEZ6tatRX5ePn37P8Dq1etjMexSx9OpH/HnN8UePkDuC8NOaI87/xI8bW/EWgt+HwXvzcD//be4ajfA0/6WUD9Xharkz30O3/pluOo0xNOuBxgXFOSRP28idu+OaA4rKoy1tvhGY0aWtLO19pHTXlFQ4a7NxRd2hlm6fBVJZcrwp8eeLjb8p86cy4QxRX9c1lpyc/NISipDoddLz7sGM+yeO7i4UX2u/UNfnn1yBOfWrsmsfy5g1Zr1PD78gWgN6Vcpc06rWJdQIpfLxdrVi7i6Yw+ys3PI+uJtbrn1btau3RDqM2vmRN56+0NmzJhD28yW9OrVnd59BlGvXl2stWzcuIWqVSuzOOsdGjXO5MCBg2zeuJj2V3dnw4bNjBo5mO++y+blqbMY/cRwDh0+zGN/GccFF5zL+L//lfZXd4/hT6Bk+4dnxrqEsLlqXggFeSR0vvOk4Y87AQrzATCVapB44yByJwwp2iexLEkDx/LzuIHgLaDMgKfJmz0Wu+sH4pu1w3XOuRTMnxiF0ZweZUe8asLpV+KZfyTDvTRp1uQituf872cGxhiSksoA4PV68Xq9GBM4bgY4fPhnAA4eOkzFChmnrV6na9G8KZs2bWXLlm0AvP76PK7r1KFI+NevX48HBo8C4JOFnzH3jSkAbNiwOdQnJ2cHP+3cTcWKGbjd8eTn54faP/zwvwx98I+8PHUW9eufz+inxgOwfv0matWqTqVKFfjpp13RGG6p5t+2DpNWofgOweAHMJ4EOMnJbnyDFvg2rgBvQWCDtZiEMljAJCRhD+09zVWfGcKa8zfGnA8MJjDfH9rHWntlZMoqfVZ8s5Yuve6mUoUMBg/oy3l1awHg8/nodtsgtm3/gR5dfkfjhhcC8Miwe7lr8AgSEzyULZvEa5PGxbL8UuWcalX4PvuH0O3s7Tm0aN60SJ+VK9fQ5YaOjH9uCp07X0Nqagrp6eXZs+doEDRv1gSPx82mTVux1uJ2u7nk/xqz7KuVdOlyLdVrBKbpVq5aww2dO/LZ50to3qwJtWpVp3q1qgr/KIm7oBmeq7pjyqaSN3PMCe3xDS+nMOud0O38BZNJ7DEE6y2E/Fxyp5Q4AXLWCne1zxzga2A4MOSYi4ShwQXn8sHcafxz2gRu+n0nBj30aKgtLi6OudOe56N/zWDVmm/ZsHkrANNn/4t/PP0oH/37FTp3bM9Tz74Yo+pLnyOvro51/PTng0Mfo3Xry1iy+D1at7qM7OwcvF5vqL1KlUpMnfosffveH9r35lvu5pmnR/HFZws4dOgwXq8PgNFPPUe58mksXfI+AwbcxtfLv8Hr80VwhHIs3/ql5E4YQt7scXgyuxZpM8nlcFWqgW/TytA296XXkDdzDLl/G4h3+X/wtL852iVHRbjh77XW/sNau9hau+zIpbjOxpirj7meZoyZYoxZaYx5zRhTuYT9+htjlhpjlk6ePvN/GMaZLbls2dD0TusrWuD1etm7b3+RPqkpyTT/v8Z8mrWUPXv3sX7j5tCrgGuuas3yb9ZEve7Sant2DjWOefO8erWq5Bw3bZeTs4Ou3frRvEUH/jxiNAAHDhwEICUlmfnzpjNi5FN8ufir0D5ZXy4j88ouXN7ydyxalMXGjVsAOHjwEH373U+z5u3p3WcQFStkhKacJHr829ZhyleCMsmhbXENLsW7bin4g0/GSSm4KtfEv30TAN7VWcTVOD8W5UZcuOH/pjHmbmNMVWNM+pFLCf3/esz1Z4AcoBOwBCj2nRNr7SRrbTNrbbO+PXuEWdqZb9fuPaGzw1Vr1uO3lnJpqezZu48DBw8BkJefT9aSr6lTqwapKSkcOvwzW7dlA/D5kq+pW6tmzOovbZYsXc5559Whdu0auN1uunW7njcXvF+kT0ZG+dArhGFDBzJ12iwA3G43c+dM4ZVX3mDu3AVF9qlYMfC+jMfjYcjgAUyaNAOAtLRU3O7AZyRvv+0mFn36JQeDx10iy5Q/eq7pqlIb4uIh9+jPPr7RFXhXf3F0h9zDmMQkTHpgZV5c3Ub4d22PVrlRFdacP9Ar+O+xUz0WqBvGvs2stU2C18cZY3qV2PssNGTkkyz5eiX79h3gqs63cPftt4amCLrfcC3vf/Ips//1FnHxcSR6PIx5ZBjGGHbu3svDf3kan9+P9Vs6XNmKzJaXAjBq6CDue/hxjMuQmpLMYw/dF8shlio+n4977h3O22+9RpzLxdRps1mz5ltGjRzM0mUrWLDgA9q0uYLHH3sIi2XRoiwGDnoYgK5dO9Gq1aWkZ5SnZ89uANze9z5WrFjN4PvvouO17XC5XEycOJ1PFn4GQP0L6/HyS3/H5/exdu239Os/OGZjL20SugzAVas+JimFMveOp3DhG4GAB7zLPiK+fnPiG7fC+n3gLSB/7vjQviatAiY1Hf/WtUfv0PrJf3MyiV3vxVo/5B0mf/6kaA8rKkpc6vmL79SYbGAsgUUrA4BzbfCBjDErrbWNT3UfZ9NSTynqTF/qKSU7m5Z6yolOy1LPYxljGgENgMQj26y104vp/iKQErw+DagA7DTGVCHwITEREYmhcJd6jgQyCYT/28A1wKfAScO/uM8HWGt/NMZ88osqFRGR0ybcN3xvBK4CfrTW9gEuBk74mucw6YNjIiIxFu60T5611m+M8RpjUoGfKOHNXmPMyuKagGKXeoqISHScMvxNYL3bSmNMOQJz+cuAQ8DiEnarDHQAjv9ctAH0baAiIjF2yvC31lpjTBNr7T7gBWPMu0Cqtba4s3uABUCytfaEN3eNMQt/cbUiInJahDvtk2WMaW6tXWKt3Xqqztba20touync4kREJDLCDf+2wB3GmO+AwwSmb2w46/VFROTME274XxPRKkREJKrC/TOO30W6EBERiZ5w1/mLiEgpovAXEXEghb+IiAMp/EVEHEjhLyLiQAp/EREHUviLiDiQwl9ExIEU/iIiDqTwFxFxIIW/iIgDKfxFRBxI4S8i4kAKfxERB1L4i4g4kMJfRMSBFP4iIg6k8BcRcSCFv4iIAyn8RUQcSOEvIuJACn8REQeKj3UBxfL7Yl2BiEippTN/EREHUviLiDiQwl9ExIEU/iIiDqTwFxFxIIW/iIgDKfxFRBxI4S8i4kAKfxERB1L4i4g4kMJfRMSBFP4iIg6k8BcRcSCFv4iIAyn8RUQcSOEvIuJACn8REQdS+IuIOJDCX0TEgRT+IiIOpPAXEXEghb+IiAMp/EVEHEjhLyLiQAp/EREHUviLiDiQwl9ExIEU/iIiDqTwFxFxIIW/iIgDKfxFRBwoPtYFlAbDn/gb//18Cenl0/j39AkntC/+eiWDHvoL1apWBqBd6yu4q08P8vML6DVwKAUFhfh8fn6b2ZI/3n4zAA8/Po6lK74huWwSAI//6T4urFc3eoMq5Tq0z2Ts2EeJc7l46eWZPDXm+SLtNWtWY/KksVSomM7ePfvo2XsQ27fncPHFDXl+/BOkpCbj8/l44snxzJkzH4Ar2/6GJ58cjsvl4vChw9zW9z42bdrKvff057bbeuD1etm1cw99+9/Ptm3bYzHsUsfTqR/x5zfFHj5A7gvDTmiPO/8SPG1vxFoLfh8F783A//23uGo3wNP+llA/V4Wq5M99Dt/6ZbjqNMTTrgcYFxTkkT9vInbvjmgOKyqMtTbWNZxU4U8bzszCTmLp8m9IKpPInx4fW2z4T535LyY8NbLIdmstubl5JCWVodDrpefdDzLsnv5c3PBCHn58HG2uaE77tr+J1jBOmzLVM2NdQolcLhdrVy/i6o49yM7OIeuLt7nl1rtZu3ZDqM+smRN56+0PmTFjDm0zW9KrV3d69xlEvXp1sdayceMWqlatzOKsd2jUOJP9+w+wZvUiuvy+D+vWbeTOO3rRvHkTbu97H5ltruDLxV+Rm5vHHf170qbN5dx0810x/AmUbP/wzFiXEDZXzQuhII+EzneeNPxxJ0BhPgCmUg0SbxxE7oQhRfskliVp4Fh+HjcQvAWUGfA0ebPHYnf9QHyzdrjOOZeC+ROjMJrTo+yIV004/TTtcxo0a9KItNSU/3k/YwxJSWUA8Hq9eL0+DGEdN/kVWjRvyqZNW9myZRuFhYW8/vo8ruvUoUif+vXr8fHHnwLwycLPuK5TewA2bNjMxo1bAMjJ2cFPO3dTsWIGEHgyT00J/D9IS0shJydwtrjwP5+Tm5sHwJeLl1G9WtXID9Ih/NvWYXMPFd8hGPwAxpMAJznZjW/QAt/GFeAtCGywFpMQ+L00CUnYQ3tPa81nCk37RMmK1evo0vuPVKqQweABt3FenVoA+Hw+uvW9l23bc+hxw7U0bnhBaJ9nX5zBP6bO4rJLLua+O3vj8bhjVX6pck61Knyf/UPodvb2HFo0b1qkz8qVa+hyQ0fGPzeFzp2vITU1hfT08uzZczQImjdrgsfjZtOmrQDcccdg3pw/g9zcPA4cPEjL33Q64bH79O7Bu+99EpmByUnFXdAMz1XdMWVTyZs55oT2+IaXU5j1Tuh2/oLJJPYYgvUWQn4uuVNGnrBPaRCRM39jTJox5kljzDpjzO7gZW1wW7lIPOaZrMH55/HBnJf459TnuOn3v2PQn/4SaouLi2Puy+P5aO5UVq39lg2btwJw7x29ePPVF5j94jj2HzzIlFffiFH1pY8xJ766On7688Ghj9G69WUsWfwerVtdRnZ2Dl6vN9RepUolpk59lr597w/te889/eh03a3UrtuMadNm8/SYoqFx001daHbJxTz9zD8iMCopjm/9UnInDCFv9jg8mV2LtJnkcrgq1cC3aWVom/vSa8ibOYbcvw3Eu/w/eNrfHO2SoyJS0z6vA3uBTGtthrU2A2gb3DanuJ2MMf2NMUuNMUsnT58VodKiL7lsUmh6p/XlzfF6fezdt79In9SUZJo3vYhPv/wKgIoV0jHG4PG46dyxHavWfhv1ukur7dk51Kh+Tuh29WpVQ1M0R+Tk7KBrt340b9GBP48YDcCBAwcBSElJZv686YwY+RRfLg4crwoV0ml8UQMWL/kagNfnzOfyy5uF7u+qK1vx0LBBdO7Sm4KCgoiOT07Ov20dpnwlKJMc2hbX4FK865aC3xfYkJSCq3JN/Ns3AeBdnUVcjfNjUW7ERSr8a1trR1trfzyywVr7o7V2NFCzuJ2stZOstc2stc369vxDhEqLvl2794bODletWY/fbymXlsqevfs5cDAwX5mXn0/W0uXUqVkdgJ279gCBM9KPF2VRr26t2BRfCi1ZupzzzqtD7do1cLvddOt2PW8ueL9In4yM8qFXCMOGDmTqtMDJiNvtZu6cKbzyyhvMnbsg1H/v3v2kpaVSL7giq91VrVm3LvAGcpMmDZnw/JPc0KUPO3fujsYQJciUrxy67qpSG+Li4Zj3COIbXYF39RdHd8g9jElMwqRXASCubiP8u0rnyqxIzfl/Z4x5EJhmrd0BYIypDPQGvo/QY8bMkFFPseTrVezbf4CruvTi7ttuDk0RdO/ckfcXfsrsf79DXJyLxIQExox6EGMMO3fv4eG/jsPn82Otnw5tW5HZsgUAQx97mr379mOt5YLz6jJy8IBYDrFU8fl83HPvcN5+6zXiXC6mTpvNmjXfMmrkYJYuW8GCBR/Qps0VPP7YQ1gsixZlMXDQwwB07dqJVq0uJT2jPD17dgPg9r73sWLFau64awivz56E32/Zt3cfffs/AMDoJ/5McnJZZs0MrBj5/vvt3NClT2wGX8okdBmAq1Z9TFIKZe4dT+HCNwIBD3iXfUR8/ebEN26F9fvAW0D+3PGhfU1aBUxqOv6ta4/eofWT/+ZkErvei7V+yDtM/vxJ0R5WVERkqacxpjwwDLgeqAxYYAcwHxhtrd1zqvs4m5Z6SlFn+lJPKdnZtNRTThTuUs+InPlba/caY14GPgCyrLWh11nGmKuBdyPxuCIiEp5IrfYZBMwD/gh8Y4y5/pjmv0biMUVEJHyRmvPvB1xirT1kjKkNvGGMqW2t/TvoU0wiIrEWqfCPOzLVY63daozJJPAEUAuFv4hIzEVqqeePxpgmR24Enwh+B1QALorQY4qISJgiFf49gR+P3WCt9VprewKtI/SYIiISpkit9skuoe2zSDymiIiET9/qKSLiQAp/EREHUviLiDiQwl9ExIEU/iIiDqTwFxFxIIW/iIgDKfxFRBxI4S8i4kAKfxERB1L4i4g4kMJfRMSBFP4iIg6k8BcRcSCFv4iIAyn8RUQcSOEvIuJACn8REQdS+IuIOJDCX0TEgRT+IiIOpPAXEXEghb+IiAMp/EVEHEjhLyLiQMZaG+saHMkY099aOynWdcgvo+N39tKxC9CZf+z0j3UB8qvo+J29dOxQ+IuIOJLCX0TEgRT+seP4OceznI7f2UvHDr3hKyLiSDrzFxFxIIX/aWCMudoYs94Ys9EYM+wk7QnGmNnB9i+NMbWPaXsouH29MabDMdtfMsb8ZIz5JjqjkOOFcVxbG2O+MsZ4jTE3xqJGOblT/f6YgGeDx3alMeb/ol1jrCn8fyVjTBzwPHAN0ADoYYxpcFy324G91trzgHHA6OC+DYA/AA2Bq4EJwfsDmBrcJjEQ5nHdBvQGXotudRKGqZT8+3MNUC946Q/8Iwo1nVEU/r9eC2CjtXaztbYAmAVcf1yf64FpwetvAFcZY0xw+yxrbb61dguwMXh/WGv/C+yJxgDkpE55XK21W621KwF/LAqU4oXx+3M9MN0GZAHljDFVo1PdmUHh/+tVA74/5nZ2cNtJ+1hrvcB+ICPMfSU2dGxKN8cfX4X/r2dOsu34JVTF9QlnX4kNHZvSzfHHV+H/62UDNY65XR34obg+xph4II3AS9Jw9pXY0LEp3Rx/fBX+v94SoJ4xpo4xxkPgDdz5x/WZD/QKXr8R+NgGPmAxH/hDcDVQHQJvPi2OUt1SsnCOq5xhr+LzAAADzklEQVS95gM9g6t+LgP2W2tzYl1UNMXHuoCznbXWa4z5I/AeEAe8ZK1dbYx5FFhqrZ0PTAFmGGM2Ejjj/0Nw39XGmNeBNYAXGGCt9QEYY2YCmUAFY0w2MNJaOyXKw3OscI6rMaY58C+gPNDJGPOItbZhDMuWoJP9/gBuAGvtC8DbQEcCiyx+BvrEptLY0Sd8RUQcSNM+IiIOpPAXEXEghb+IiAMp/EVEHEjhLyLiQAp/kTOQMaa3MeacWNchpZfCX84awQ/knDH/Z4Of1o6U3sD/FP4RrkdKGa3zlzNa8G8fvAN8AlwOdAYuAB4BEoBNQB9r7SFjTEdgLLAL+Aqoa6393Snu+13gS6Ap8C3Q01r7szFmBNAJKAN8DtxhrbXGmIXB2y0JfEr0W2A44AF2Azdba3cYY0YBdYCqwPnA/cBlBL5KeDvQyVpbaIy5JFhzcrDu3sH7nhrslxscd4Pj+1lrc46vx1r7zP/y8xUHs9bqossZewFqE/jK5MuCtysA/wXKBm8PBUYAiQS+pbFOcPtMYEEY922BlsHbLwGDg9fTj+k3g0BYAywEJhzTVp6jJ1F9gWeC10cBnxL4VOnFBD5Fek2w7V8EnsTcBIK7YnB7dwKfJD7yOM2C10/Vb0JJ49RFl5Nd9DJRzgbf2cB3rkPg7LkB8FngTyLgAb4ALgQ228DfRYBA+PcP476/t9Z+Frz+CjAIeBpoa4x5EEgC0oHVwJvBfrOP2b86MDv4XfAeYMsxbe/YwNn9KgJfEfFucPsqAk88FwCNgA+CY4kDTvb9MqfqN/sk+4iUSOEvZ4PDx1w3wAfW2h7HdjDGNP2F9338vKc1xiQCEwiceX8fnMJJLKae8cBYG/iun0wCZ/xH5ANYa/3GmEJr7ZHH8hP43TPAamvt5aeo8VT9DhezXaRYZ8ybZyJhygJaGmPOAzDGJBljzgfWAXWP+fvI3cO8v5rGmCOh2oPAVM2RoN9ljEkm8E2sxUkjMDcPR7+5NVzrgYpHHt8Y4zbGHPliuINAShj9RH4Rhb+cVay1Owm8KTrTGLOSwJPBhdbaXOBu4F1jzKfADgJ/MQ1jTDNjzORi7nIt0Ct4X+nAP6y1+4AXCUzP/JvA1zsXZxQwxxiziMAbsf/LWAoIPLGMNsasAJYDVwSbpwIvGGOWE5jmKa6fyC+i1T5Sahhjkm1g1Y8h8MfXN1hrx5XQvzaBN4UbRalEkTOGzvylNOkXPFNeTWA6ZmKM6xE5Y+nMX0TEgXTmLyLiQAp/EREHUviLiDiQwl9ExIEU/iIiDqTwFxFxoP8HkqdScClQ374AAAAASUVORK5CYII=\n",
+      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX8AAAEKCAYAAAD6q1UVAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvhp/UCwAAIABJREFUeJzt3XeYU1X+x/H3yUwywzAFZqjSUVSKCD8BCwsMyoLiosgKLBaKAhYWbCC4soC6riIKu6KsIChFBUR2F8Te2MUyUpQiTao4OCK9OS3J+f2REBhghqySBOZ+Xs+Th+Sec5Pvmct8cnNykjHWWkRExFlcsS5ARESiT+EvIuJACn8REQdS+IuIOJDCX0TEgRT+IiIOpPAXEXEghb+IiAMp/EVEHCg+1gUUZ/e1bfTR47NU5Q82xroE+RX2D8+MdQnyK5Qd8aoJp5/O/EVEHEjhLyLiQAp/EREHUviLiDiQwl9ExIEU/iIiDqTwFxFxIIW/iIgDKfxFRBxI4S8i4kAKfxERB1L4i4g4kMJfRMSBFP4iIg6k8BcRcSCFv4iIAyn8RUQcSOEvIuJACn8REQdS+IuIOJDCX0TEgRT+IiIOpPAXEXEghb+IiAMp/EVEHEjhLyLiQAp/EREHUviLiDiQwl9ExIEU/iIiDqTwFxFxoPhYF1AalL1nKJ4Wl+Pft5f9A/qc0O6+rCVJt9wO1g8+H4cnPYd3zSoAEq7qQJnuPQHInT2d/I/eA8DTqi1lut8KLheFS7L4+eUXojcgB+jQPpOxYx8lzuXipZdn8tSY54u016xZjcmTxlKhYjp79+yjZ+9BbN+ew8UXN+T58U+QkpqMz+fjiSfHM2fOfADaZrZk9Og/4/G4+eqrVfTr/wA+n49OndrzyKgh+P0Wr9fLAw+M5LPPl8Ri2KWOp1M/4s9vij18gNwXhp3QHnf+JXja3oi1Fvw+Ct6bgf/7b3HVboCn/S2hfq4KVcmf+xy+9ctw1WmIp10PMC4oyCN/3kTs3h3RHFZUGGttrGs4qd3XtjkzCzuJ+IaNsXm5JN//p5OGP4llIC8XgLjadUkZNop9d/bEJKeQ9vdJ7L+nP2BJ+/uL7L+nHxgXac9OZv89/bAH9lP2vofI//g9vCu+iu7AfqHKH2yMdQklcrlcrF29iKs79iA7O4esL97mllvvZu3aDaE+s2ZO5K23P2TGjDm0zWxJr17d6d1nEPXq1cVay8aNW6hatTKLs96hUeNMDhw4yOaNi2l/dXc2bNjMqJGD+e67bF6eOouyZZM4fPhnAC66qD4zX3uBRhe1idXwT2n/8MxYlxA2V80LoSCPhM53njT8cSdAYT4AplINEm8cRO6EIUX7JJYlaeBYfh43ELwFlBnwNHmzx2J3/UB8s3a4zjmXgvkTozCa06PsiFdNOP007XMaeFevxB48WHyHYPADmMQyHHlWc1/SgsKvl2IPHcQeOkTh10txX3Iprirn4Pvhe+yB/QAULl9GQsszNyzONi2aN2XTpq1s2bKNwsJCXn99Htd16lCkT/369fj4408B+GThZ1zXqT0AGzZsZuPGLQDk5Ozgp527qVgxg4yM8uTn57Nhw2YAPvzwv3S5oSNAKPgByiYlcaaecJ2N/NvWYXMPFd8hGPwAxpMAJ/nZxzdogW/jCvAWBDZYi0koE9gnIQl7aO9prflMoWmfKPFc3oqkXv0w5cpzcFTgDMWVUQH/zp9Cffy7duLKqEDhsi+Jq14TV6Uq+HftxHP5bzDx7liVXuqcU60K32f/ELqdvT2HFs2bFumzcuUautzQkfHPTaFz52tITU0hPb08e/YcDYLmzZrg8bjZtGkr1lrcbjeX/F9jln21ki5drqV6jXNCfa+//moe/8tDVKqYwXXX94r8ICUk7oJmeK7qjimbSt7MMSe0xze8nMKsd0K38xdMJrHHEKy3EPJzyZ0yMprlRo3O/KOk4ItF7LuzJwcfe5gyt94W3HqSV2fWYg8d4vDz40geNpLUp8bj3/Ej+HxRrbc0M+bEn/vxZ+MPDn2M1q0vY8ni92jd6jKys3Pwer2h9ipVKjF16rP07Xt/aN+bb7mbZ54exRefLeDQocN4vUeP2bx579Loojb8/sbbeWTUcdMOElG+9UvJnTCEvNnj8GR2LdJmksvhqlQD36aVoW3uS68hb+YYcv82EO/y/+Bpf3O0S46KiJz5G2PSgIeAzkDF4OafgHnAk9bafcXs1x/oD/BMo3r0qlk1EuXFlHf1SuKqVMOkpuHfvRP3RU1Cba4KFSlctRyAwsWfU7j4cwASru4Efn9M6i2NtmfnUKP60bPy6tWqkpNT9A29nJwddO3WD4CyZZPocsO1HDgQmNpLSUlm/rzpjBj5FF8uPvo+TNaXy8i8sgsAv23Xmnr16p7w2Is+/ZK6dWuRkVGe3btL53TCmcq/bR2mfCUokwzBqaK4BpfiXbcU/MEn6qQUXJVr4t++CQDv6iwSbx4aq5IjKlJn/q8De4FMa22GtTYDaBvcNqe4nay1k6y1zay1zUpT8LuqVgtdjzu3HiY+HntgP4XLFuNu2hyTnIxJTsbdtDmFyxYDYNLKBf5NTibx2uvJe29BTGovjZYsXc5559Whdu0auN1uunW7njcXvF+kT0ZG+dArhGFDBzJ12iwA3G43c+dM4ZVX3mDu3KLHpGLFDAA8Hg9DBg9g0qQZAJx7bu1Qn6ZNGuHxuBX8UWLKVw5dd1WpDXHxoeAHiG90Bd7VXxzdIfcwJjEJk14FgLi6jfDv2h6tcqMqUnP+ta21o4/dYK39ERhtjLmtmH3OWskPjsB9URNMahrlps0h99WXA//JgPx35uNp2ZqEKzuAz4vNL+Dg6EcAsIcOkjtrOmnjAisJcmdOwx4KnF2WvWMQcXXODW33/5Adg5GVTj6fj3vuHc7bb71GnMvF1GmzWbPmW0aNHMzSZStYsOAD2rS5gscfewiLZdGiLAYOehiArl070arVpaRnlKdnz24A3N73PlasWM3g+++i47XtcLlcTJw4nU8WfgZAlxs6csstN1JY6CUvN4+bbr4rZmMvbRK6DMBVqz4mKYUy946ncOEbod8977KPiK/fnPjGrbB+H3gLyJ87PrSvSauASU3Hv3Xt0Tu0fvLfnExi13ux1g95h8mfPynaw4qKiCz1NMa8D3wITLPW7ghuqwz0Bn5rrW13qvs4m5Z6SlFn+lJPKdnZtNRTThTrpZ7dgQzgP8aYPcaYPcBCIB3oWtKOIiISeRGZ9rHW7gWGBi9FGGP6AC9H4nFFRCQ8sVjq+UgMHlNERI4RqaWeK4trAioX0yYiIlESqdU+lYEOBJZ2HssAn0foMUVEJEyRCv8FQLK1dvnxDcaYhRF6TBERCVOk3vC9vYS2myLxmCIiEj59t4+IiAMp/EVEHEjhLyLiQAp/EREHUviLiDiQwl9ExIEU/iIiDqTwFxFxIIW/iIgDKfxFRBxI4S8i4kAKfxERB1L4i4g4kMJfRMSBFP4iIg6k8BcRcSCFv4iIAyn8RUQcSOEvIuJACn8REQdS+IuIOJDCX0TEgRT+IiIOpPAXEXGg+FgXUJzUaS/HugT5pc5pFesKROQUdOYvIuJACn8REQdS+IuIOJDCX0TEgRT+IiIOpPAXEXEghb+IiAMp/EVEHEjhLyLiQAp/EREHUviLiDiQwl9ExIEU/iIiDqTwFxFxIIW/iIgDKfxFRBxI4S8i4kAKfxERB1L4i4g4kMJfRMSBFP4iIg6k8BcRcSCFv4iIAyn8RUQcSOEvIuJACn8REQdS+IuIOFBY4W+MSTjJtvTTX46IiERDuGf+/zTGuI/cMMZUBT6ITEkiIhJp4Yb/v4E5xpg4Y0xt4D3goUgVJSIikRUfTidr7YvGGA+BJ4HawB3W2s8jWZiIiEROieFvjLn/2JtADWA5cJkx5jJr7dhIFne2GP7Xsfz3s8Wkly/Hv1954YT2xV+tZNCwR6hWtQoA7dpcwV233Ux+fgG9BgyhoLAQn9fHb9v+hj/2vRWArKVf88zzU/D7LUlJiTz+8APUrH5OVMdVmnVon8nYsY8S53Lx0sszeWrM80Xaa9asxuRJY6lQMZ29e/bRs/cgtm/P4eKLG/L8+CdISU3G5/PxxJPjmTNnPgBtM1syevSf8XjcfPXVKvr1fwCfz0e5cmlMfvEZ6tatRX5ePn37P8Dq1etjMexSx9OpH/HnN8UePkDuC8NOaI87/xI8bW/EWgt+HwXvzcD//be4ajfA0/6WUD9Xharkz30O3/pluOo0xNOuBxgXFOSRP28idu+OaA4rKoy1tvhGY0aWtLO19pHTXlFQ4a7NxRd2hlm6fBVJZcrwp8eeLjb8p86cy4QxRX9c1lpyc/NISipDoddLz7sGM+yeO7i4UX2u/UNfnn1yBOfWrsmsfy5g1Zr1PD78gWgN6Vcpc06rWJdQIpfLxdrVi7i6Yw+ys3PI+uJtbrn1btau3RDqM2vmRN56+0NmzJhD28yW9OrVnd59BlGvXl2stWzcuIWqVSuzOOsdGjXO5MCBg2zeuJj2V3dnw4bNjBo5mO++y+blqbMY/cRwDh0+zGN/GccFF5zL+L//lfZXd4/hT6Bk+4dnxrqEsLlqXggFeSR0vvOk4Y87AQrzATCVapB44yByJwwp2iexLEkDx/LzuIHgLaDMgKfJmz0Wu+sH4pu1w3XOuRTMnxiF0ZweZUe8asLpV+KZfyTDvTRp1uQituf872cGxhiSksoA4PV68Xq9GBM4bgY4fPhnAA4eOkzFChmnrV6na9G8KZs2bWXLlm0AvP76PK7r1KFI+NevX48HBo8C4JOFnzH3jSkAbNiwOdQnJ2cHP+3cTcWKGbjd8eTn54faP/zwvwx98I+8PHUW9eufz+inxgOwfv0matWqTqVKFfjpp13RGG6p5t+2DpNWofgOweAHMJ4EOMnJbnyDFvg2rgBvQWCDtZiEMljAJCRhD+09zVWfGcKa8zfGnA8MJjDfH9rHWntlZMoqfVZ8s5Yuve6mUoUMBg/oy3l1awHg8/nodtsgtm3/gR5dfkfjhhcC8Miwe7lr8AgSEzyULZvEa5PGxbL8UuWcalX4PvuH0O3s7Tm0aN60SJ+VK9fQ5YaOjH9uCp07X0Nqagrp6eXZs+doEDRv1gSPx82mTVux1uJ2u7nk/xqz7KuVdOlyLdVrBKbpVq5aww2dO/LZ50to3qwJtWpVp3q1qgr/KIm7oBmeq7pjyqaSN3PMCe3xDS+nMOud0O38BZNJ7DEE6y2E/Fxyp5Q4AXLWCne1zxzga2A4MOSYi4ShwQXn8sHcafxz2gRu+n0nBj30aKgtLi6OudOe56N/zWDVmm/ZsHkrANNn/4t/PP0oH/37FTp3bM9Tz74Yo+pLnyOvro51/PTng0Mfo3Xry1iy+D1at7qM7OwcvF5vqL1KlUpMnfosffveH9r35lvu5pmnR/HFZws4dOgwXq8PgNFPPUe58mksXfI+AwbcxtfLv8Hr80VwhHIs3/ql5E4YQt7scXgyuxZpM8nlcFWqgW/TytA296XXkDdzDLl/G4h3+X/wtL852iVHRbjh77XW/sNau9hau+zIpbjOxpirj7meZoyZYoxZaYx5zRhTuYT9+htjlhpjlk6ePvN/GMaZLbls2dD0TusrWuD1etm7b3+RPqkpyTT/v8Z8mrWUPXv3sX7j5tCrgGuuas3yb9ZEve7Sant2DjWOefO8erWq5Bw3bZeTs4Ou3frRvEUH/jxiNAAHDhwEICUlmfnzpjNi5FN8ufir0D5ZXy4j88ouXN7ydyxalMXGjVsAOHjwEH373U+z5u3p3WcQFStkhKacJHr829ZhyleCMsmhbXENLsW7bin4g0/GSSm4KtfEv30TAN7VWcTVOD8W5UZcuOH/pjHmbmNMVWNM+pFLCf3/esz1Z4AcoBOwBCj2nRNr7SRrbTNrbbO+PXuEWdqZb9fuPaGzw1Vr1uO3lnJpqezZu48DBw8BkJefT9aSr6lTqwapKSkcOvwzW7dlA/D5kq+pW6tmzOovbZYsXc5559Whdu0auN1uunW7njcXvF+kT0ZG+dArhGFDBzJ12iwA3G43c+dM4ZVX3mDu3AVF9qlYMfC+jMfjYcjgAUyaNAOAtLRU3O7AZyRvv+0mFn36JQeDx10iy5Q/eq7pqlIb4uIh9+jPPr7RFXhXf3F0h9zDmMQkTHpgZV5c3Ub4d22PVrlRFdacP9Ar+O+xUz0WqBvGvs2stU2C18cZY3qV2PssNGTkkyz5eiX79h3gqs63cPftt4amCLrfcC3vf/Ips//1FnHxcSR6PIx5ZBjGGHbu3svDf3kan9+P9Vs6XNmKzJaXAjBq6CDue/hxjMuQmpLMYw/dF8shlio+n4977h3O22+9RpzLxdRps1mz5ltGjRzM0mUrWLDgA9q0uYLHH3sIi2XRoiwGDnoYgK5dO9Gq1aWkZ5SnZ89uANze9z5WrFjN4PvvouO17XC5XEycOJ1PFn4GQP0L6/HyS3/H5/exdu239Os/OGZjL20SugzAVas+JimFMveOp3DhG4GAB7zLPiK+fnPiG7fC+n3gLSB/7vjQviatAiY1Hf/WtUfv0PrJf3MyiV3vxVo/5B0mf/6kaA8rKkpc6vmL79SYbGAsgUUrA4BzbfCBjDErrbWNT3UfZ9NSTynqTF/qKSU7m5Z6yolOy1LPYxljGgENgMQj26y104vp/iKQErw+DagA7DTGVCHwITEREYmhcJd6jgQyCYT/28A1wKfAScO/uM8HWGt/NMZ88osqFRGR0ybcN3xvBK4CfrTW9gEuBk74mucw6YNjIiIxFu60T5611m+M8RpjUoGfKOHNXmPMyuKagGKXeoqISHScMvxNYL3bSmNMOQJz+cuAQ8DiEnarDHQAjv9ctAH0baAiIjF2yvC31lpjTBNr7T7gBWPMu0Cqtba4s3uABUCytfaEN3eNMQt/cbUiInJahDvtk2WMaW6tXWKt3Xqqztba20touync4kREJDLCDf+2wB3GmO+AwwSmb2w46/VFROTME274XxPRKkREJKrC/TOO30W6EBERiZ5w1/mLiEgpovAXEXEghb+IiAMp/EVEHEjhLyLiQAp/EREHUviLiDiQwl9ExIEU/iIiDqTwFxFxIIW/iIgDKfxFRBxI4S8i4kAKfxERB1L4i4g4kMJfRMSBFP4iIg6k8BcRcSCFv4iIAyn8RUQcSOEvIuJACn8REQeKj3UBxfL7Yl2BiEippTN/EREHUviLiDiQwl9ExIEU/iIiDqTwFxFxIIW/iIgDKfxFRBxI4S8i4kAKfxERB1L4i4g4kMJfRMSBFP4iIg6k8BcRcSCFv4iIAyn8RUQcSOEvIuJACn8REQdS+IuIOJDCX0TEgRT+IiIOpPAXEXEghb+IiAMp/EVEHEjhLyLiQAp/EREHUviLiDiQwl9ExIEU/iIiDqTwFxFxIIW/iIgDKfxFRBwoPtYFlAbDn/gb//18Cenl0/j39AkntC/+eiWDHvoL1apWBqBd6yu4q08P8vML6DVwKAUFhfh8fn6b2ZI/3n4zAA8/Po6lK74huWwSAI//6T4urFc3eoMq5Tq0z2Ts2EeJc7l46eWZPDXm+SLtNWtWY/KksVSomM7ePfvo2XsQ27fncPHFDXl+/BOkpCbj8/l44snxzJkzH4Ar2/6GJ58cjsvl4vChw9zW9z42bdrKvff057bbeuD1etm1cw99+9/Ptm3bYzHsUsfTqR/x5zfFHj5A7gvDTmiPO/8SPG1vxFoLfh8F783A//23uGo3wNP+llA/V4Wq5M99Dt/6ZbjqNMTTrgcYFxTkkT9vInbvjmgOKyqMtTbWNZxU4U8bzszCTmLp8m9IKpPInx4fW2z4T535LyY8NbLIdmstubl5JCWVodDrpefdDzLsnv5c3PBCHn58HG2uaE77tr+J1jBOmzLVM2NdQolcLhdrVy/i6o49yM7OIeuLt7nl1rtZu3ZDqM+smRN56+0PmTFjDm0zW9KrV3d69xlEvXp1sdayceMWqlatzOKsd2jUOJP9+w+wZvUiuvy+D+vWbeTOO3rRvHkTbu97H5ltruDLxV+Rm5vHHf170qbN5dx0810x/AmUbP/wzFiXEDZXzQuhII+EzneeNPxxJ0BhPgCmUg0SbxxE7oQhRfskliVp4Fh+HjcQvAWUGfA0ebPHYnf9QHyzdrjOOZeC+ROjMJrTo+yIV004/TTtcxo0a9KItNSU/3k/YwxJSWUA8Hq9eL0+DGEdN/kVWjRvyqZNW9myZRuFhYW8/vo8ruvUoUif+vXr8fHHnwLwycLPuK5TewA2bNjMxo1bAMjJ2cFPO3dTsWIGEHgyT00J/D9IS0shJydwtrjwP5+Tm5sHwJeLl1G9WtXID9Ih/NvWYXMPFd8hGPwAxpMAJznZjW/QAt/GFeAtCGywFpMQ+L00CUnYQ3tPa81nCk37RMmK1evo0vuPVKqQweABt3FenVoA+Hw+uvW9l23bc+hxw7U0bnhBaJ9nX5zBP6bO4rJLLua+O3vj8bhjVX6pck61Knyf/UPodvb2HFo0b1qkz8qVa+hyQ0fGPzeFzp2vITU1hfT08uzZczQImjdrgsfjZtOmrQDcccdg3pw/g9zcPA4cPEjL33Q64bH79O7Bu+99EpmByUnFXdAMz1XdMWVTyZs55oT2+IaXU5j1Tuh2/oLJJPYYgvUWQn4uuVNGnrBPaRCRM39jTJox5kljzDpjzO7gZW1wW7lIPOaZrMH55/HBnJf459TnuOn3v2PQn/4SaouLi2Puy+P5aO5UVq39lg2btwJw7x29ePPVF5j94jj2HzzIlFffiFH1pY8xJ766On7688Ghj9G69WUsWfwerVtdRnZ2Dl6vN9RepUolpk59lr597w/te889/eh03a3UrtuMadNm8/SYoqFx001daHbJxTz9zD8iMCopjm/9UnInDCFv9jg8mV2LtJnkcrgq1cC3aWVom/vSa8ibOYbcvw3Eu/w/eNrfHO2SoyJS0z6vA3uBTGtthrU2A2gb3DanuJ2MMf2NMUuNMUsnT58VodKiL7lsUmh6p/XlzfF6fezdt79In9SUZJo3vYhPv/wKgIoV0jHG4PG46dyxHavWfhv1ukur7dk51Kh+Tuh29WpVQ1M0R+Tk7KBrt340b9GBP48YDcCBAwcBSElJZv686YwY+RRfLg4crwoV0ml8UQMWL/kagNfnzOfyy5uF7u+qK1vx0LBBdO7Sm4KCgoiOT07Ov20dpnwlKJMc2hbX4FK865aC3xfYkJSCq3JN/Ns3AeBdnUVcjfNjUW7ERSr8a1trR1trfzyywVr7o7V2NFCzuJ2stZOstc2stc369vxDhEqLvl2794bODletWY/fbymXlsqevfs5cDAwX5mXn0/W0uXUqVkdgJ279gCBM9KPF2VRr26t2BRfCi1ZupzzzqtD7do1cLvddOt2PW8ueL9In4yM8qFXCMOGDmTqtMDJiNvtZu6cKbzyyhvMnbsg1H/v3v2kpaVSL7giq91VrVm3LvAGcpMmDZnw/JPc0KUPO3fujsYQJciUrxy67qpSG+Li4Zj3COIbXYF39RdHd8g9jElMwqRXASCubiP8u0rnyqxIzfl/Z4x5EJhmrd0BYIypDPQGvo/QY8bMkFFPseTrVezbf4CruvTi7ttuDk0RdO/ckfcXfsrsf79DXJyLxIQExox6EGMMO3fv4eG/jsPn82Otnw5tW5HZsgUAQx97mr379mOt5YLz6jJy8IBYDrFU8fl83HPvcN5+6zXiXC6mTpvNmjXfMmrkYJYuW8GCBR/Qps0VPP7YQ1gsixZlMXDQwwB07dqJVq0uJT2jPD17dgPg9r73sWLFau64awivz56E32/Zt3cfffs/AMDoJ/5McnJZZs0MrBj5/vvt3NClT2wGX8okdBmAq1Z9TFIKZe4dT+HCNwIBD3iXfUR8/ebEN26F9fvAW0D+3PGhfU1aBUxqOv6ta4/eofWT/+ZkErvei7V+yDtM/vxJ0R5WVERkqacxpjwwDLgeqAxYYAcwHxhtrd1zqvs4m5Z6SlFn+lJPKdnZtNRTThTuUs+InPlba/caY14GPgCyrLWh11nGmKuBdyPxuCIiEp5IrfYZBMwD/gh8Y4y5/pjmv0biMUVEJHyRmvPvB1xirT1kjKkNvGGMqW2t/TvoU0wiIrEWqfCPOzLVY63daozJJPAEUAuFv4hIzEVqqeePxpgmR24Enwh+B1QALorQY4qISJgiFf49gR+P3WCt9VprewKtI/SYIiISpkit9skuoe2zSDymiIiET9/qKSLiQAp/EREHUviLiDiQwl9ExIEU/iIiDqTwFxFxIIW/iIgDKfxFRBxI4S8i4kAKfxERB1L4i4g4kMJfRMSBFP4iIg6k8BcRcSCFv4iIAyn8RUQcSOEvIuJACn8REQdS+IuIOJDCX0TEgRT+IiIOpPAXEXEghb+IiAMp/EVEHEjhLyLiQMZaG+saHMkY099aOynWdcgvo+N39tKxC9CZf+z0j3UB8qvo+J29dOxQ+IuIOJLCX0TEgRT+seP4OceznI7f2UvHDr3hKyLiSDrzFxFxIIX/aWCMudoYs94Ys9EYM+wk7QnGmNnB9i+NMbWPaXsouH29MabDMdtfMsb8ZIz5JjqjkOOFcVxbG2O+MsZ4jTE3xqJGOblT/f6YgGeDx3alMeb/ol1jrCn8fyVjTBzwPHAN0ADoYYxpcFy324G91trzgHHA6OC+DYA/AA2Bq4EJwfsDmBrcJjEQ5nHdBvQGXotudRKGqZT8+3MNUC946Q/8Iwo1nVEU/r9eC2CjtXaztbYAmAVcf1yf64FpwetvAFcZY0xw+yxrbb61dguwMXh/WGv/C+yJxgDkpE55XK21W621KwF/LAqU4oXx+3M9MN0GZAHljDFVo1PdmUHh/+tVA74/5nZ2cNtJ+1hrvcB+ICPMfSU2dGxKN8cfX4X/r2dOsu34JVTF9QlnX4kNHZvSzfHHV+H/62UDNY65XR34obg+xph4II3AS9Jw9pXY0LEp3Rx/fBX+v94SoJ4xpo4xxkPgDdz5x/WZD/QKXr8R+NgGPmAxH/hDcDVQHQJvPi2OUt1SsnCOq5xhr+LzAAADzklEQVS95gM9g6t+LgP2W2tzYl1UNMXHuoCznbXWa4z5I/AeEAe8ZK1dbYx5FFhqrZ0PTAFmGGM2Ejjj/0Nw39XGmNeBNYAXGGCt9QEYY2YCmUAFY0w2MNJaOyXKw3OscI6rMaY58C+gPNDJGPOItbZhDMuWoJP9/gBuAGvtC8DbQEcCiyx+BvrEptLY0Sd8RUQcSNM+IiIOpPAXEXEghb+IiAMp/EVEHEjhLyLiQAp/kTOQMaa3MeacWNchpZfCX84awQ/knDH/Z4Of1o6U3sD/FP4RrkdKGa3zlzNa8G8fvAN8AlwOdAYuAB4BEoBNQB9r7SFjTEdgLLAL+Aqoa6393Snu+13gS6Ap8C3Q01r7szFmBNAJKAN8DtxhrbXGmIXB2y0JfEr0W2A44AF2Azdba3cYY0YBdYCqwPnA/cBlBL5KeDvQyVpbaIy5JFhzcrDu3sH7nhrslxscd4Pj+1lrc46vx1r7zP/y8xUHs9bqossZewFqE/jK5MuCtysA/wXKBm8PBUYAiQS+pbFOcPtMYEEY922BlsHbLwGDg9fTj+k3g0BYAywEJhzTVp6jJ1F9gWeC10cBnxL4VOnFBD5Fek2w7V8EnsTcBIK7YnB7dwKfJD7yOM2C10/Vb0JJ49RFl5Nd9DJRzgbf2cB3rkPg7LkB8FngTyLgAb4ALgQ228DfRYBA+PcP476/t9Z+Frz+CjAIeBpoa4x5EEgC0oHVwJvBfrOP2b86MDv4XfAeYMsxbe/YwNn9KgJfEfFucPsqAk88FwCNgA+CY4kDTvb9MqfqN/sk+4iUSOEvZ4PDx1w3wAfW2h7HdjDGNP2F9338vKc1xiQCEwiceX8fnMJJLKae8cBYG/iun0wCZ/xH5ANYa/3GmEJr7ZHH8hP43TPAamvt5aeo8VT9DhezXaRYZ8ybZyJhygJaGmPOAzDGJBljzgfWAXWP+fvI3cO8v5rGmCOh2oPAVM2RoN9ljEkm8E2sxUkjMDcPR7+5NVzrgYpHHt8Y4zbGHPliuINAShj9RH4Rhb+cVay1Owm8KTrTGLOSwJPBhdbaXOBu4F1jzKfADgJ/MQ1jTDNjzORi7nIt0Ct4X+nAP6y1+4AXCUzP/JvA1zsXZxQwxxiziMAbsf/LWAoIPLGMNsasAJYDVwSbpwIvGGOWE5jmKa6fyC+i1T5Sahhjkm1g1Y8h8MfXN1hrx5XQvzaBN4UbRalEkTOGzvylNOkXPFNeTWA6ZmKM6xE5Y+nMX0TEgXTmLyLiQAp/EREHUviLiDiQwl9ExIEU/iIiDqTwFxFxoP8HkqdScClQ374AAAAASUVORK5CYII=",
       "text/plain": [
        "<Figure size 432x288 with 1 Axes>"
       ]
      },
-     "metadata": {},
-     "output_type": "display_data"
+     "metadata": {}
     }
    ],
-   "source": [
-    "fig, ax = plt.subplots()\n",
-    "sns.heatmap(rmse_df, cbar=False, annot=True, fmt=\".4g\")"
-   ]
+   "metadata": {}
   },
   {
    "cell_type": "markdown",
-   "metadata": {},
    "source": [
     "The calculated RMSE scores can be visualized to comparatively study how model performance is affected by different parameters."
-   ]
+   ],
+   "metadata": {}
   },
   {
    "cell_type": "markdown",
-   "metadata": {},
    "source": [
     "It can be seen from this visualization that RMSE first decreases and then increases as rank increases, due to overfitting. When the rank equals 20 and the regularization parameter equals 0.1, the model achieves the lowest RMSE score."
-   ]
+   ],
+   "metadata": {}
   },
   {
    "cell_type": "markdown",
-   "metadata": {},
    "source": [
     "### 3.5 Top K recommendation"
-   ]
+   ],
+   "metadata": {}
   },
   {
    "cell_type": "markdown",
-   "metadata": {},
    "source": [
     "#### 3.5.1 Top k for all users (items)"
-   ]
+   ],
+   "metadata": {}
   },
   {
    "cell_type": "code",
    "execution_count": 37,
-   "metadata": {},
-   "outputs": [],
    "source": [
     "dfs_rec = model.recommendForAllUsers(10)"
-   ]
+   ],
+   "outputs": [],
+   "metadata": {}
   },
   {
    "cell_type": "code",
    "execution_count": 38,
-   "metadata": {},
+   "source": [
+    "dfs_rec.show(10)"
+   ],
    "outputs": [
     {
-     "name": "stdout",
      "output_type": "stream",
+     "name": "stdout",
      "text": [
       "+------+--------------------+\n",
       "|UserId|     recommendations|\n",
@@ -677,36 +681,36 @@
      ]
     }
    ],
-   "source": [
-    "dfs_rec.show(10)"
-   ]
+   "metadata": {}
   },
   {
    "cell_type": "markdown",
-   "metadata": {},
    "source": [
     "#### 3.5.2 Top k for a selected set of users (items)"
-   ]
+   ],
+   "metadata": {}
   },
   {
    "cell_type": "code",
    "execution_count": 39,
-   "metadata": {},
-   "outputs": [],
    "source": [
     "users = dfs_train.select(als.getUserCol()).distinct().limit(3)\n",
     "\n",
     "dfs_rec_subset = model.recommendForUserSubset(users, 10)"
-   ]
+   ],
+   "outputs": [],
+   "metadata": {}
   },
   {
    "cell_type": "code",
    "execution_count": 40,
-   "metadata": {},
+   "source": [
+    "dfs_rec_subset.show(10)"
+   ],
    "outputs": [
     {
-     "name": "stdout",
      "output_type": "stream",
+     "name": "stdout",
      "text": [
       "+------+--------------------+\n",
       "|UserId|     recommendations|\n",
@@ -719,13 +723,10 @@
      ]
     }
    ],
-   "source": [
-    "dfs_rec_subset.show(10)"
-   ]
+   "metadata": {}
   },
   {
    "cell_type": "markdown",
-   "metadata": {},
    "source": [
     "#### 3.5.3 Run-time considerations for top-k recommendations\n",
     "\n",
@@ -734,28 +735,28 @@
     "* Inner products of user-item pairs are calculated individually instead of leveraging matrix block multiplication features which are available in certain contemporary computing acceleration libraries (e.g., BLAS).\n",
     "\n",
     "More details about possible optimizations of the top k recommendations in Spark can be found [here](https://engineeringblog.yelp.com/2018/05/scaling-collaborative-filtering-with-pyspark.html)."
-   ]
+   ],
+   "metadata": {}
   },
   {
    "cell_type": "code",
    "execution_count": 41,
-   "metadata": {},
-   "outputs": [],
    "source": [
     "# cleanup spark instance\n",
     "spark.stop()"
-   ]
+   ],
+   "outputs": [],
+   "metadata": {}
   },
   {
    "cell_type": "markdown",
-   "metadata": {},
    "source": [
     "## References"
-   ]
+   ],
+   "metadata": {}
   },
   {
    "cell_type": "markdown",
-   "metadata": {},
    "source": [
     "1. Yehuda Koren, Robert Bell, and Chris Volinsky, \"Matrix Factorization Techniques for Recommender Systems\n",
     "\", ACM Computer, Vol. 42, Issue 8, pp 30-37, Aug., 2009.\n",
@@ -765,14 +766,14 @@
     "4. Seaborn. url: https://seaborn.pydata.org/\n",
     "5. Scaling collaborative filtering with PySpark. url: https://engineeringblog.yelp.com/2018/05/scaling-collaborative-filtering-with-pyspark.html\n",
     "6. Matrix Completion via Alternating Least Square (ALS). url: http://stanford.edu/~rezab/classes/cme323/S15/notes/lec14.pdf"
-   ]
+   ],
+   "metadata": {}
   }
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3 Spark - local",
-   "language": "python",
-   "name": "spark-3-python"
+   "name": "python3",
+   "display_name": "Python 3.6.9 64-bit ('.env': venv)"
   },
   "language_info": {
    "codemirror_mode": {
@@ -784,7 +785,10 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.5.5"
+   "version": "3.6.9"
+  },
+  "interpreter": {
+   "hash": "7ec2189bea0434770dca7423a25e631e1cca9c4e2b4ff137a82f4dff32ac9607"
   }
  },
  "nbformat": 4,
diff --git a/examples/03_evaluate/als_movielens_diversity_metrics.ipynb b/examples/03_evaluate/als_movielens_diversity_metrics.ipynb
index 31e998bbb6..aaba0a35d1 100644
--- a/examples/03_evaluate/als_movielens_diversity_metrics.ipynb
+++ b/examples/03_evaluate/als_movielens_diversity_metrics.ipynb
@@ -197,18 +197,20 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 10,
    "source": [
     "# top k items to recommend\n",
-    "TOP_K = 1\n",
+    "TOP_K = 10\n",
     "\n",
     "# Select MovieLens data size: 100k, 1m, 10m, or 20m\n",
-    "MOVIELENS_DATA_SIZE = 'mock100'\n",
+    "MOVIELENS_DATA_SIZE = 'mock10'\n",
     "\n",
     "# user, item column names\n",
     "COL_USER=\"userId\"\n",
     "COL_ITEM=\"itemID\"\n",
-    "COL_RATING=\"rating\""
+    "COL_RATING=\"rating\"\n",
+    "COL_TITLE=\"title\"\n",
+    "COL_GENRE=\"genres\""
    ],
    "outputs": [],
    "metadata": {
@@ -255,13 +257,13 @@
     "schema = StructType(\n",
     "    (\n",
     "        StructField(COL_USER, IntegerType()),\n",
-    "        StructField(COL_ITEM, LongType()),\n",
+    "        StructField(COL_ITEM, IntegerType()),\n",
     "        StructField(COL_RATING, FloatType()),\n",
-    "        StructField(\"Timestamp\", LongType()),\n",
+    "        StructField(\"Timestamp\", StringType()),\n",
     "    )\n",
     ")\n",
     "\n",
-    "data = movielens.load_spark_df(spark, size=MOVIELENS_DATA_SIZE, schema=schema, title_col=\"title\", genres_col=\"genres\")\n",
+    "data = movielens.load_spark_df(spark, size=MOVIELENS_DATA_SIZE, schema=schema, title_col=COL_TITLE, genres_col=\"genres\")\n",
     "data.show()"
    ],
    "outputs": [
@@ -269,31 +271,20 @@
      "output_type": "stream",
      "name": "stdout",
      "text": [
-      "+------+------+------+--------------------+-----+--------+\n",
-      "|userID|itemID|rating|           timestamp|title|  genres|\n",
-      "+------+------+------+--------------------+-----+--------+\n",
-      "|     6|     4|     4|2200-06-19 12:21:...|  foo|genreA|0|\n",
-      "|     8|     4|     1|1970-01-01 00:00:...|  foo|genreA|0|\n",
-      "|     8|     4|     4|2109-02-14 15:31:...|  foo|genreA|0|\n",
-      "|     9|     2|     2|1969-12-31 23:59:...|  foo|genreA|0|\n",
-      "|     9|     4|     3|2210-04-25 01:58:...|  foo|genreA|0|\n",
-      "|     3|     5|     3| 1970-01-01 00:00:00|  foo|genreA|0|\n",
-      "|     1|     2|     1|1970-01-01 00:00:...|  foo|genreA|0|\n",
-      "|     8|     3|     2| 1970-01-01 00:00:00|  foo|genreA|0|\n",
-      "|     3|    10|     4|1970-01-01 00:00:...|  foo|genreA|0|\n",
-      "|     7|    10|     2|1969-12-31 23:59:...|  foo|genreA|0|\n",
-      "|     8|     9|     5|1970-01-01 00:00:...|  foo|genreA|0|\n",
-      "|     4|     2|     3|1969-12-31 23:59:...|  foo|genreA|0|\n",
-      "|     5|     8|     5|1970-01-01 00:00:...|  foo|genreA|0|\n",
-      "|     2|     7|     1|1969-12-31 23:59:...|  foo|genreA|0|\n",
-      "|     4|     6|     2| 1970-01-01 00:00:00|  foo|genreA|0|\n",
-      "|     2|     5|     3|1969-12-31 23:59:...|  foo|genreA|0|\n",
-      "|     7|     2|     1|1970-01-01 00:00:...|  foo|genreA|0|\n",
-      "|     8|     4|     5|1969-12-31 23:59:...|  foo|genreA|0|\n",
-      "|     7|     8|     1|1969-12-31 23:59:...|  foo|genreA|0|\n",
-      "|     9|     4|     1|1970-01-01 00:00:...|  foo|genreA|0|\n",
-      "+------+------+------+--------------------+-----+--------+\n",
-      "only showing top 20 rows\n",
+      "+------+------+------+---------+-----+--------+\n",
+      "|userID|itemID|rating|timestamp|title|  genres|\n",
+      "+------+------+------+---------+-----+--------+\n",
+      "|     8|     3|   4.0|2022-2-22|  foo|genreA|0|\n",
+      "|     8|     9|   5.0|2022-2-22|  foo|genreA|0|\n",
+      "|     5|     1|   5.0|2022-2-22|  foo|genreA|0|\n",
+      "|     9|     1|   1.0|2022-2-22|  foo|genreA|0|\n",
+      "|     7|     5|   5.0|2022-2-22|  foo|genreA|0|\n",
+      "|     3|     6|   5.0|2022-2-22|  foo|genreA|0|\n",
+      "|     2|     6|   2.0|2022-2-22|  foo|genreA|0|\n",
+      "|     5|     7|   4.0|2022-2-22|  foo|genreA|0|\n",
+      "|     6|     9|   2.0|2022-2-22|  foo|genreA|0|\n",
+      "|     5|     6|   3.0|2022-2-22|  foo|genreA|0|\n",
+      "+------+------+------+---------+-----+--------+\n",
       "\n"
      ]
     }
@@ -320,8 +311,8 @@
      "output_type": "stream",
      "name": "stdout",
      "text": [
-      "N train_df 73\n",
-      "N test_df 27\n"
+      "N train_df 6\n",
+      "N test_df 4\n"
      ]
     }
    ],
@@ -401,7 +392,7 @@
      "output_type": "stream",
      "name": "stdout",
      "text": [
-      "Took 2.5952707109972835 seconds for training.\n"
+      "Took 2.296935658028815 seconds for training.\n"
      ]
     }
    ],
@@ -445,8 +436,8 @@
      "output_type": "stream",
      "name": "stdout",
      "text": [
-      "48\n",
-      "10\n"
+      "30\n",
+      "30\n"
      ]
     }
    ],
@@ -463,7 +454,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": null,
    "source": [
     "# random recommender\n",
     "window = Window.partitionBy(COL_USER).orderBy(F.rand())\n",
@@ -497,7 +488,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": null,
    "source": [
     "def get_ranking_results(ranking_eval):\n",
     "    metrics = {\n",
@@ -524,7 +515,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 12,
+   "execution_count": null,
    "source": [
     "def generate_summary(data, algo, k, ranking_metrics, diversity_metrics):\n",
     "    summary = {\"Data\": data, \"Algo\": algo, \"K\": k}\n",
@@ -552,7 +543,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 13,
+   "execution_count": null,
    "source": [
     "als_ranking_eval = SparkRankingEvaluation(\n",
     "    test_df, \n",
@@ -572,7 +563,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 14,
+   "execution_count": null,
    "source": [
     "als_diversity_eval = SparkDiversityEvaluation(\n",
     "    train_df = train_df, \n",
@@ -588,7 +579,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 15,
+   "execution_count": null,
    "source": [
     "als_results = generate_summary(MOVIELENS_DATA_SIZE, \"als\", TOP_K, als_ranking_metrics, als_diversity_metrics)"
    ],
@@ -604,7 +595,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 16,
+   "execution_count": null,
    "source": [
     "random_ranking_eval = SparkRankingEvaluation(\n",
     "    test_df,\n",
@@ -623,7 +614,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 17,
+   "execution_count": null,
    "source": [
     "random_diversity_eval = SparkDiversityEvaluation(\n",
     "    train_df = train_df, \n",
@@ -639,7 +630,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 18,
+   "execution_count": null,
    "source": [
     "random_results = generate_summary(MOVIELENS_DATA_SIZE, \"random\", TOP_K, random_ranking_metrics, random_diversity_metrics)"
    ],
@@ -655,7 +646,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 19,
+   "execution_count": null,
    "source": [
     "cols = [\"Data\", \"Algo\", \"K\", \"Precision@k\", \"Recall@k\", \"NDCG@k\", \"Mean average precision\",\"catalog_coverage\", \"distributional_coverage\",\"novelty\", \"diversity\", \"serendipity\" ]\n",
     "df_results = pd.DataFrame(columns=cols)\n",
@@ -668,96 +659,11 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 20,
+   "execution_count": null,
    "source": [
     "df_results"
    ],
-   "outputs": [
-    {
-     "output_type": "execute_result",
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>Data</th>\n",
-       "      <th>Algo</th>\n",
-       "      <th>K</th>\n",
-       "      <th>Precision@k</th>\n",
-       "      <th>Recall@k</th>\n",
-       "      <th>NDCG@k</th>\n",
-       "      <th>Mean average precision</th>\n",
-       "      <th>catalog_coverage</th>\n",
-       "      <th>distributional_coverage</th>\n",
-       "      <th>novelty</th>\n",
-       "      <th>diversity</th>\n",
-       "      <th>serendipity</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>mock100</td>\n",
-       "      <td>als</td>\n",
-       "      <td>1</td>\n",
-       "      <td>0.4</td>\n",
-       "      <td>0.150000</td>\n",
-       "      <td>0.4</td>\n",
-       "      <td>0.17</td>\n",
-       "      <td>0.4</td>\n",
-       "      <td>1.685475</td>\n",
-       "      <td>3.624421</td>\n",
-       "      <td>None</td>\n",
-       "      <td>0.405009</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>mock100</td>\n",
-       "      <td>random</td>\n",
-       "      <td>1</td>\n",
-       "      <td>0.3</td>\n",
-       "      <td>0.116667</td>\n",
-       "      <td>0.3</td>\n",
-       "      <td>0.12</td>\n",
-       "      <td>0.6</td>\n",
-       "      <td>2.446439</td>\n",
-       "      <td>3.644061</td>\n",
-       "      <td>None</td>\n",
-       "      <td>0.396229</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "      Data    Algo  K  Precision@k  Recall@k  NDCG@k  Mean average precision  \\\n",
-       "1  mock100     als  1          0.4  0.150000     0.4                    0.17   \n",
-       "2  mock100  random  1          0.3  0.116667     0.3                    0.12   \n",
-       "\n",
-       "   catalog_coverage  distributional_coverage   novelty diversity  serendipity  \n",
-       "1               0.4                 1.685475  3.624421      None     0.405009  \n",
-       "2               0.6                 2.446439  3.644061      None     0.396229  "
-      ]
-     },
-     "metadata": {},
-     "execution_count": 20
-    }
-   ],
+   "outputs": [],
    "metadata": {}
   },
   {
@@ -778,14 +684,14 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 21,
+   "execution_count": null,
    "source": [
     "# Get movie features \"title\" and \"genres\"\n",
     "movies = (\n",
-    "    data.groupBy(COL_ITEM, \"title\", \"genres\").count()\n",
+    "    data.groupBy(COL_ITEM, COL_TITLE, \"genres\").count()\n",
     "    .na.drop()  # remove rows with null values\n",
     "    .withColumn(\"genres\", F.split(F.col(\"genres\"), \"\\|\"))  # convert to array of genres\n",
-    "    .withColumn(\"title\", F.regexp_replace(F.col(\"title\"), \"[\\(),:^0-9]\", \"\"))  # remove year from title\n",
+    "    .withColumn(COL_TITLE, F.regexp_replace(F.col(COL_TITLE), \"[\\(),:^0-9]\", \"\"))  # remove year from title\n",
     "    .drop(\"count\")  # remove unused columns\n",
     ")"
    ],
@@ -794,22 +700,22 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 22,
+   "execution_count": null,
    "source": [
     "# tokenize \"title\" column\n",
-    "title_tokenizer = Tokenizer(inputCol=\"title\", outputCol=\"title_words\")\n",
+    "title_tokenizer = Tokenizer(inputCol=COL_TITLE, outputCol=\"title_words\")\n",
     "tokenized_data = title_tokenizer.transform(movies)\n",
     "\n",
     "# remove stop words\n",
     "remover = StopWordsRemover(inputCol=\"title_words\", outputCol=\"text\")\n",
-    "clean_data = remover.transform(tokenized_data).drop(\"title\", \"title_words\")"
+    "clean_data = remover.transform(tokenized_data).drop(COL_TITLE, \"title_words\")"
    ],
    "outputs": [],
    "metadata": {}
   },
   {
    "cell_type": "code",
-   "execution_count": 23,
+   "execution_count": null,
    "source": [
     "# convert text input into feature vectors\n",
     "\n",
@@ -831,29 +737,7 @@
     "\n",
     "feature_data.show(10, False)"
    ],
-   "outputs": [
-    {
-     "output_type": "stream",
-     "name": "stdout",
-     "text": [
-      "+------+---------------------+\n",
-      "|itemID|features             |\n",
-      "+------+---------------------+\n",
-      "|6     |[0.0,1.0,0.0,1.0,1.0]|\n",
-      "|2     |[0.0,1.0,0.0,1.0,1.0]|\n",
-      "|5     |[0.0,1.0,0.0,1.0,1.0]|\n",
-      "|7     |[0.0,1.0,0.0,1.0,1.0]|\n",
-      "|1     |[0.0,1.0,0.0,1.0,1.0]|\n",
-      "|4     |[0.0,1.0,0.0,1.0,1.0]|\n",
-      "|3     |[0.0,1.0,0.0,1.0,1.0]|\n",
-      "|10    |[0.0,1.0,0.0,1.0,1.0]|\n",
-      "|8     |[0.0,1.0,0.0,1.0,1.0]|\n",
-      "|9     |[0.0,1.0,0.0,1.0,1.0]|\n",
-      "+------+---------------------+\n",
-      "\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "metadata": {}
   },
   {
@@ -914,35 +798,6 @@
    "outputs": [],
    "metadata": {}
   },
-  {
-   "cell_type": "code",
-   "execution_count": 27,
-   "source": [
-    "import cProfile, pstats, io\n",
-    "\n",
-    "pr = cProfile.Profile()\n",
-    "pr.enable()\n",
-    "# ... do something ...\n",
-    "als_eval = SparkDiversityEvaluation(\n",
-    "    train_df = train_df, \n",
-    "    reco_df = top_k_reco,\n",
-    "    item_feature_df = feature_data, \n",
-    "    item_sim_measure=\"item_feature_vector\",\n",
-    "    col_user = COL_USER, \n",
-    "    col_item = COL_ITEM\n",
-    ")\n",
-    "als_diversity=als_eval.diversity()\n",
-    "als_serendipity=als_eval.serendipity()\n",
-    "\n",
-    "pr.disable()\n",
-    "s = io.StringIO()\n",
-    "ps = pstats.Stats(pr, stream=s).sort_stats(\"cumulative\")\n",
-    "ps.print_stats()\n",
-    "print(s.getvalue())"
-   ],
-   "outputs": [],
-   "metadata": {}
-  },
   {
    "cell_type": "markdown",
    "source": [
diff --git a/examples/04_model_select_and_optimize/tuning_spark_als.ipynb b/examples/04_model_select_and_optimize/tuning_spark_als.ipynb
index 0d8cf261ea..e0d839412c 100644
--- a/examples/04_model_select_and_optimize/tuning_spark_als.ipynb
+++ b/examples/04_model_select_and_optimize/tuning_spark_als.ipynb
@@ -2,23 +2,22 @@
  "cells": [
   {
    "cell_type": "markdown",
-   "metadata": {},
    "source": [
     "<i>Copyright (c) Microsoft Corporation. All rights reserved.</i>\n",
     "\n",
     "<i>Licensed under the MIT License.</i>"
-   ]
+   ],
+   "metadata": {}
   },
   {
    "cell_type": "markdown",
-   "metadata": {},
    "source": [
     "# Hyperparameter tuning (Spark based recommender)"
-   ]
+   ],
+   "metadata": {}
   },
   {
    "cell_type": "markdown",
-   "metadata": {},
    "source": [
     "Hyperparameter tuning for Spark based recommender algorithm is important to select a model with the optimal performance. This notebook introduces good practices in performing hyperparameter tuning for building recommender models with the utility functions provided in the [Microsoft/Recommenders](https://github.com/Microsoft/Recommenders.git) repository.\n",
     "\n",
@@ -26,31 +25,19 @@
     "* Spark native/custom constructs (`ParamGridBuilder`, `TrainValidationSplit`).\n",
     "* `hyperopt` package with Tree of Parzen Estimator algorithm. \n",
     "* Brute-force random search of parameter values sampled with pre-defined space. "
-   ]
+   ],
+   "metadata": {}
   },
   {
    "cell_type": "markdown",
-   "metadata": {},
    "source": [
     "## 0 Global settings and import"
-   ]
+   ],
+   "metadata": {}
   },
   {
    "cell_type": "code",
    "execution_count": 1,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "System version: 3.5.5 |Anaconda custom (64-bit)| (default, May 13 2018, 21:12:35) \n",
-      "[GCC 7.2.0]\n",
-      "Pandas version: 0.23.0\n",
-      "PySpark version: 2.3.1\n"
-     ]
-    }
-   ],
    "source": [
     "# set the environment path to find Recommenders\n",
     "%matplotlib notebook\n",
@@ -58,7 +45,6 @@
     "import matplotlib\n",
     "import matplotlib.pyplot as plt\n",
     "import sys\n",
-    
     "import pandas as pd\n",
     "import numpy as np\n",
     "\n",
@@ -90,18 +76,27 @@
     "print(\"System version: {}\".format(sys.version))\n",
     "print(\"Pandas version: {}\".format(pd.__version__))\n",
     "print(\"PySpark version: {}\".format(pyspark.__version__))"
-   ]
+   ],
+   "outputs": [
+    {
+     "output_type": "stream",
+     "name": "stdout",
+     "text": [
+      "System version: 3.5.5 |Anaconda custom (64-bit)| (default, May 13 2018, 21:12:35) \n",
+      "[GCC 7.2.0]\n",
+      "Pandas version: 0.23.0\n",
+      "PySpark version: 2.3.1\n"
+     ]
+    }
+   ],
+   "metadata": {}
   },
   {
    "cell_type": "code",
    "execution_count": 2,
-   "metadata": {
-    "tags": [
-     "parameters"
-    ]
-   },
-   "outputs": [],
    "source": [
+    "MOVIELENS_DATA_SIZE = \"100k\"\n",
+    "\n",
     "NUMBER_CORES = 1\n",
     "NUMBER_ITERATIONS = 25\n",
     "\n",
@@ -128,138 +123,142 @@
     "\n",
     "RANK = [10, 15, 20, 30, 40]\n",
     "REG = [ 0.1, 0.01, 0.001, 0.0001, 0.00001]"
-   ]
+   ],
+   "outputs": [],
+   "metadata": {
+    "tags": [
+     "parameters"
+    ]
+   }
   },
   {
    "cell_type": "markdown",
-   "metadata": {},
    "source": [
     "## 1 Data preparation"
-   ]
+   ],
+   "metadata": {}
   },
   {
    "cell_type": "markdown",
-   "metadata": {},
    "source": [
     "A Spark session is created. Note in this case, to study the running time for different approaches, the Spark session in local mode uses only one core for running. This eliminates the impact of parallelization of parameter tuning. "
-   ]
+   ],
+   "metadata": {}
   },
   {
    "cell_type": "code",
    "execution_count": 3,
-   "metadata": {},
-   "outputs": [],
    "source": [
     "spark = start_or_get_spark(url=\"local[{}]\".format(NUMBER_CORES))"
-   ]
+   ],
+   "outputs": [],
+   "metadata": {}
   },
   {
    "cell_type": "markdown",
-   "metadata": {},
    "source": [
     "MovieLens 100k dataset is used for running the demonstration."
-   ]
+   ],
+   "metadata": {}
   },
   {
    "cell_type": "code",
    "execution_count": 4,
-   "metadata": {},
+   "source": [
+    "data = load_spark_df(spark, size=MOVIELENS_DATA_SIZE, header=(COL_USER, COL_ITEM, COL_RATING))"
+   ],
    "outputs": [
     {
-     "name": "stderr",
      "output_type": "stream",
+     "name": "stderr",
      "text": [
       "100%|██████████| 4.81k/4.81k [00:01<00:00, 2.47kKB/s]\n"
      ]
     }
    ],
-   "source": [
-    "data = load_spark_df(spark, size='100k', header=(COL_USER, COL_ITEM, COL_RATING))"
-   ]
+   "metadata": {}
   },
   {
    "cell_type": "markdown",
-   "metadata": {},
    "source": [
     "To reduce time spent on the comparitive study, 50% of the data is used for the experimentation below."
-   ]
+   ],
+   "metadata": {}
   },
   {
    "cell_type": "code",
    "execution_count": 5,
-   "metadata": {},
-   "outputs": [],
    "source": [
     "data, _ = spark_random_split(data, ratio=SUBSET_RATIO)"
-   ]
+   ],
+   "outputs": [],
+   "metadata": {}
   },
   {
    "cell_type": "markdown",
-   "metadata": {},
    "source": [
     "The dataset is split into 3 subsets randomly with a given split ratio. The hyperparameter tuning is performed on the training and the validating data, and then the optimal recommender selected is evaluated on the testing dataset."
-   ]
+   ],
+   "metadata": {}
   },
   {
    "cell_type": "code",
    "execution_count": 6,
-   "metadata": {},
-   "outputs": [],
    "source": [
     "train, valid, test = spark_random_split(data, ratio=[3, 1, 1])"
-   ]
+   ],
+   "outputs": [],
+   "metadata": {}
   },
   {
    "cell_type": "markdown",
-   "metadata": {},
    "source": [
     "## 2 Hyper parameter tuning with Azure Machine Learning Services"
-   ]
+   ],
+   "metadata": {}
   },
   {
    "cell_type": "markdown",
-   "metadata": {},
    "source": [
     "The `hyperdrive` module in the [Azure Machine Learning Services](https://azure.microsoft.com/en-us/services/machine-learning-service/) runs [hyperparameter tuning and optimizing for machine learning model selection](https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-tune-hyperparameters). At the moment, the service supports running hyperparameter tuning on heterogenous computing targets such as cluster of commodity compute nodes with or without GPU devices (see detailed documentation [here](https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-set-up-training-targets)). It is feasible to run parameter tuning on a cluster of VM nodes. In this case, the service containerizes individual and independent Spark session on each node of the cluster to run the parameter tuning job in parallel, instead of inside a single Spark session where the training is executed in a distributed manner.  \n",
     "\n",
     "Detailed instructions of tuning hyperparameter of non-Spark workloads by using Azure Machine Learning Services can be found in [this](./hypertune_aml_wide_and_deep_quickstart.ipynb) notebook. "
-   ]
+   ],
+   "metadata": {}
   },
   {
    "cell_type": "markdown",
-   "metadata": {},
    "source": [
     "## 3 Hyper parameter tuning with Spark ML constructs"
-   ]
+   ],
+   "metadata": {}
   },
   {
    "cell_type": "markdown",
-   "metadata": {},
    "source": [
     "### 3.1 Spark native construct"
-   ]
+   ],
+   "metadata": {}
   },
   {
    "cell_type": "markdown",
-   "metadata": {},
    "source": [
     "Spark ML lib implements modules such as `CrossValidator` and `TrainValidationSplit` for tuning hyperparameters (see [here](https://spark.apache.org/docs/2.2.0/ml-tuning.html)). However, by default, it does not support custom machine learning algorithms, data splitting methods, and evaluation metrics, like what are offered as utility functions in the Recommenders repository. \n",
     "\n",
     "For example, the Spark native constuct can be used for tuning a recommender against the `rmse` metric which is one of the available regression metrics in Spark."
-   ]
+   ],
+   "metadata": {}
   },
   {
    "cell_type": "markdown",
-   "metadata": {},
    "source": [
     "Firstly, a Spark ALS object needs to be created. In this case, for illustration purpose, it is an ALS model object."
-   ]
+   ],
+   "metadata": {}
   },
   {
    "cell_type": "code",
    "execution_count": 7,
-   "metadata": {},
-   "outputs": [],
    "source": [
     "# NOTE the parameters of interest, rank and regParam, are left unset, \n",
     "# because their values will be assigned in the parameter grid builder.\n",
@@ -271,41 +270,41 @@
     "    nonnegative=False,\n",
     "    **HEADER_ALS\n",
     ")"
-   ]
+   ],
+   "outputs": [],
+   "metadata": {}
   },
   {
    "cell_type": "markdown",
-   "metadata": {},
    "source": [
     "Then, a parameter grid can be defined as follows. Without loss of generity, only `rank` and `regParam` are considered."
-   ]
+   ],
+   "metadata": {}
   },
   {
    "cell_type": "code",
    "execution_count": 8,
-   "metadata": {},
-   "outputs": [],
    "source": [
     "paramGrid = ParamGridBuilder() \\\n",
     "    .addGrid(als.rank, RANK) \\\n",
     "    .addGrid(als.regParam, REG) \\\n",
     "    .build()"
-   ]
+   ],
+   "outputs": [],
+   "metadata": {}
   },
   {
    "cell_type": "markdown",
-   "metadata": {},
    "source": [
     "Given the settings above, a `TrainValidationSplit` constructor can be created for fitting the best model in the given parameter range. In this case, the `RegressionEvaluator` is using `RMSE`, by default, as an evaluation metric. \n",
     "\n",
     "Since the data splitter is embedded in the `TrainValidationSplit` object, to make sure the splitting ratio is consistent across different approaches, the split ratio is set to be 0.75 and in the model training the training dataset and validating dataset are combined. "
-   ]
+   ],
+   "metadata": {}
   },
   {
    "cell_type": "code",
    "execution_count": 9,
-   "metadata": {},
-   "outputs": [],
    "source": [
     "tvs = TrainValidationSplit(\n",
     "    estimator=als,\n",
@@ -317,36 +316,44 @@
     "    # are therefore not available here. \n",
     "    trainRatio=0.75\n",
     ")"
-   ]
+   ],
+   "outputs": [],
+   "metadata": {}
   },
   {
    "cell_type": "code",
    "execution_count": 10,
-   "metadata": {},
-   "outputs": [],
    "source": [
     "with Timer() as time_spark:\n",
     "    # Run TrainValidationSplit, and choose the best set of parameters.\n",
     "    # NOTE train and valid is union because in Spark TrainValidationSplit does splitting by itself.\n",
     "    model = tvs.fit(train.union(valid))\n",
     "\n"
-   ]
+   ],
+   "outputs": [],
+   "metadata": {}
   },
   {
    "cell_type": "markdown",
-   "metadata": {},
    "source": [
     "The model parameters in the grid and the best metrics can be then returned. "
-   ]
+   ],
+   "metadata": {}
   },
   {
    "cell_type": "code",
    "execution_count": 11,
-   "metadata": {},
+   "source": [
+    "for idx, item in enumerate(model.getEstimatorParamMaps()):\n",
+    "    print('Run {}:'.format(idx))\n",
+    "    print('\\tValidation Metric: {}'.format(model.validationMetrics[idx]))\n",
+    "    for key, value in item.items():\n",
+    "        print('\\t{0}: {1}'.format(repr(key), value))"
+   ],
    "outputs": [
     {
-     "name": "stdout",
      "output_type": "stream",
+     "name": "stdout",
      "text": [
       "Run 0:\n",
       "\tValidation Metric: 1.0505385750367227\n",
@@ -451,20 +458,17 @@
      ]
     }
    ],
-   "source": [
-    "for idx, item in enumerate(model.getEstimatorParamMaps()):\n",
-    "    print('Run {}:'.format(idx))\n",
-    "    print('\\tValidation Metric: {}'.format(model.validationMetrics[idx]))\n",
-    "    for key, value in item.items():\n",
-    "        print('\\t{0}: {1}'.format(repr(key), value))"
-   ]
+   "metadata": {}
   },
   {
    "cell_type": "code",
    "execution_count": 12,
-   "metadata": {},
+   "source": [
+    "model.validationMetrics"
+   ],
    "outputs": [
     {
+     "output_type": "execute_result",
      "data": {
       "text/plain": [
        "[1.0505385750367227,\n",
@@ -494,54 +498,49 @@
        " 4.426604995574413]"
       ]
      },
-     "execution_count": 12,
      "metadata": {},
-     "output_type": "execute_result"
+     "execution_count": 12
     }
    ],
-   "source": [
-    "model.validationMetrics"
-   ]
+   "metadata": {}
   },
   {
    "cell_type": "markdown",
-   "metadata": {},
    "source": [
     "To get the best model, just do"
-   ]
+   ],
+   "metadata": {}
   },
   {
    "cell_type": "code",
    "execution_count": 13,
-   "metadata": {},
-   "outputs": [],
    "source": [
     "model_best_spark = model.bestModel"
-   ]
+   ],
+   "outputs": [],
+   "metadata": {}
   },
   {
    "cell_type": "markdown",
-   "metadata": {},
    "source": [
     "### 3.2 Custom `Estimator`, `Transformer`, and `Evaluator` for Spark ALS\n",
     "\n",
     "One can also customize Spark modules to allow tuning hyperparameters for a desired model and evaluation metric, given that the native Spark ALS does not allow tuning hyperparameters for ranking metrics such as precision@k, recall@k, etc. This can be done by creating custom `Estimator`, `Transformer` and `Evaluator`. The benefit is that, after the customization, the tuning process can make use of `trainValidSplit` directly, which distributes the tuning in a Spark session."
-   ]
+   ],
+   "metadata": {}
   },
   {
    "cell_type": "markdown",
-   "metadata": {},
    "source": [
     "#### Customized `Estimator` and `Transformer` for top k recommender based on Spark ALS\n",
     "\n",
     "The following shows how to implement a PySpark `Estimator` and `Transfomer` for recommending top k items from ALS model. The latter generates top k recommendations from the model object. Both of the two are designed by following the protocol of Spark APIs, to make sure that they can be run with the hyperparameter tuning constructs in Spark."
-   ]
+   ],
+   "metadata": {}
   },
   {
    "cell_type": "code",
    "execution_count": 14,
-   "metadata": {},
-   "outputs": [],
    "source": [
     "class ALSTopK(\n",
     "    ALS,\n",
@@ -696,22 +695,22 @@
     "        )\n",
     "        \n",
     "        return topk_recommendation_all.select(self.userCol, labelCol, predictionCol)"
-   ]
+   ],
+   "outputs": [],
+   "metadata": {}
   },
   {
    "cell_type": "markdown",
-   "metadata": {},
    "source": [
     "#### Customized precision@k evaluation metric\n",
     "\n",
     "In addition to the custom `Estimator` and `Transformer`, it may also be desired to customize an `Evaluator` to allow \"beyond-rating\" metrics. The codes as following illustrates a precision@k evaluator. Other types of evaluators can be developed in a similar way."
-   ]
+   ],
+   "metadata": {}
   },
   {
    "cell_type": "code",
    "execution_count": 15,
-   "metadata": {},
-   "outputs": [],
    "source": [
     "# Define a custom Evaulator. Here precision@k is used.\n",
     "class PrecisionAtKEvaluator(Evaluator):\n",
@@ -733,20 +732,20 @@
     "\n",
     "    def isLargerBetter(self):\n",
     "        return True"
-   ]
+   ],
+   "outputs": [],
+   "metadata": {}
   },
   {
    "cell_type": "markdown",
-   "metadata": {},
    "source": [
     "Then a new ALS top-k recommender can be created, and the Spark native construct, `TrainValidationSplit` module, can be used to find the optimal model w.r.t the precision@k metric."
-   ]
+   ],
+   "metadata": {}
   },
   {
    "cell_type": "code",
    "execution_count": 16,
-   "metadata": {},
-   "outputs": [],
    "source": [
     "alstopk = ALSTopK(\n",
     "    userCol=COL_USER,\n",
@@ -771,14 +770,23 @@
     "    # are therefore not available here. \n",
     "    trainRatio=0.75\n",
     ")"
-   ]
+   ],
+   "outputs": [],
+   "metadata": {}
   },
   {
    "cell_type": "code",
    "execution_count": 17,
-   "metadata": {},
+   "source": [
+    "# Run TrainValidationSplit, and choose the best set of parameters.\n",
+    "# NOTE train and valid is union because in Spark TrainValidationSplit does splitting by itself.\n",
+    "model_precision = tvs.fit(train.union(valid))\n",
+    "\n",
+    "model_precision.getEstimatorParamMaps()"
+   ],
    "outputs": [
     {
+     "output_type": "execute_result",
      "data": {
       "text/plain": [
        "[{Param(parent='ALSTopK_4f48b7cc6cf2badfcea7', name='rank', doc='rank of the factorization'): 10,\n",
@@ -791,24 +799,15 @@
        "  Param(parent='ALSTopK_4f48b7cc6cf2badfcea7', name='regParam', doc='regularization parameter (>= 0).'): 0.01}]"
       ]
      },
-     "execution_count": 17,
      "metadata": {},
-     "output_type": "execute_result"
+     "execution_count": 17
     }
    ],
-   "source": [
-    "# Run TrainValidationSplit, and choose the best set of parameters.\n",
-    "# NOTE train and valid is union because in Spark TrainValidationSplit does splitting by itself.\n",
-    "model_precision = tvs.fit(train.union(valid))\n",
-    "\n",
-    "model_precision.getEstimatorParamMaps()"
-   ]
+   "metadata": {}
   },
   {
    "cell_type": "code",
    "execution_count": 18,
-   "metadata": {},
-   "outputs": [],
    "source": [
     "def best_param(model, is_larger_better=True):\n",
     "    if is_larger_better:\n",
@@ -819,25 +818,35 @@
     "    parameters = model.getEstimatorParamMaps()[model.validationMetrics.index(best_metric)]\n",
     "     \n",
     "    return list(parameters.values())"
-   ]
+   ],
+   "outputs": [],
+   "metadata": {}
   },
   {
    "cell_type": "code",
    "execution_count": 19,
-   "metadata": {},
-   "outputs": [],
    "source": [
     "params = best_param(model_precision)"
-   ]
+   ],
+   "outputs": [],
+   "metadata": {}
   },
   {
    "cell_type": "code",
    "execution_count": 20,
-   "metadata": {},
+   "source": [
+    "model_precision.bestModel.transform(valid).limit(5).show()\n",
+    "\n",
+    "for idx, item in enumerate(model_precision.getEstimatorParamMaps()):\n",
+    "    print('Run {}:'.format(idx))\n",
+    "    print('\\tValidation Metric: {}'.format(model_precision.validationMetrics[idx]))\n",
+    "    for key, value in item.items():\n",
+    "        print('\\t{0}: {1}'.format(repr(key), value))"
+   ],
    "outputs": [
     {
-     "name": "stdout",
      "output_type": "stream",
+     "name": "stdout",
      "text": [
       "+------+--------------------+--------------------+\n",
       "|userID|               label|          prediction|\n",
@@ -868,39 +877,29 @@
      ]
     }
    ],
-   "source": [
-    "model_precision.bestModel.transform(valid).limit(5).show()\n",
-    "\n",
-    "for idx, item in enumerate(model_precision.getEstimatorParamMaps()):\n",
-    "    print('Run {}:'.format(idx))\n",
-    "    print('\\tValidation Metric: {}'.format(model_precision.validationMetrics[idx]))\n",
-    "    for key, value in item.items():\n",
-    "        print('\\t{0}: {1}'.format(repr(key), value))"
-   ]
+   "metadata": {}
   },
   {
    "cell_type": "markdown",
-   "metadata": {},
    "source": [
     "## 4 Hyperparameter tuning with `hyperopt`"
-   ]
+   ],
+   "metadata": {}
   },
   {
    "cell_type": "markdown",
-   "metadata": {},
    "source": [
     "`hyperopt` is an open source Python package that is designed for tuning parameters for generic function with any pre-defined loss. More information about `hyperopt` can be found [here](https://github.com/hyperopt/hyperopt). `hyperopt` supports parallelization on MongoDB but not Spark. In our case, the tuning is performed in a sequential mode on a local computer.\n",
     "\n",
     "In `hyperopt`, an *objective* function is defined for optimizing the hyper parameters. In this case, the objective is similar to that in the Spark native construct situation, which is *to the RMSE metric for an ALS recommender*. Parameters of `rank` and `regParam` are used as hyperparameters. \n",
     "\n",
     "The objective function shown below demonstrates a RMSE loss for an ALS recommender. "
-   ]
+   ],
+   "metadata": {}
   },
   {
    "cell_type": "code",
    "execution_count": 21,
-   "metadata": {},
-   "outputs": [],
    "source": [
     "# Customize an objective function\n",
     "def objective(params):\n",
@@ -946,11 +945,12 @@
     "        'status': STATUS_OK,\n",
     "        'eval_time': time_run_start.interval\n",
     "    }"
-   ]
+   ],
+   "outputs": [],
+   "metadata": {}
   },
   {
    "cell_type": "markdown",
-   "metadata": {},
    "source": [
     "A search space is usually defined for hyperparameter exploration. Design of search space is empirical, and depends on the understanding of how distribution of parameter of interest affects the model performance measured by the loss function. \n",
     "\n",
@@ -959,13 +959,12 @@
     "* The reg parameter prevents overfitting in certain way. \n",
     "\n",
     "Therefore, in this case, a uniform distribution and a lognormal distribution sampling spaces are used for rank and reg, respectively. A narrow search space is used for illustration purpose, that is, the range of rank is from 10 to 20, while that of reg is from $e^{-5}$ to $e^{-1}$. Together with the randomly sampled hyper parameters, other parameters use for building / evaluating the recommender, like `k`, column names, data, etc., are kept as constants."
-   ]
+   ],
+   "metadata": {}
   },
   {
    "cell_type": "code",
    "execution_count": 22,
-   "metadata": {},
-   "outputs": [],
    "source": [
     "# define a search space\n",
     "space = {\n",
@@ -980,31 +979,31 @@
     "    'k': 10,\n",
     "    'relevancy_method': \"top_k\"\n",
     "}"
-   ]
+   ],
+   "outputs": [],
+   "metadata": {}
   },
   {
    "cell_type": "markdown",
-   "metadata": {},
    "source": [
     "### 4.1 Hyperparameter tuning with TPE"
-   ]
+   ],
+   "metadata": {}
   },
   {
    "cell_type": "markdown",
-   "metadata": {},
    "source": [
     "`fmin` of `hyperopt` is used for running the trials for searching optimal hyper parameters. In `hyperopt`, there are different strategies for intelligently optimize hyper parameters. For example, `hyperopt` avails [Tree of Parzen Estimators (TPE) method](https://papers.nips.cc/paper/4443-algorithms-for-hyper-parameter-optimization.pdf) for searching optimal parameters. \n",
     "\n",
     "The TPE method models a surface response of $p(x|y)$ by transforming a generative process, replacing the distributions of the configuration prior with non-parametric densities, where $p$ is the probability of configuration space $x$ given the loss $y$. For different configuration space, the TPE method does different replacements. That is, uniform $\\to$ truncated Gaussian mixture, log-uniform $\\to$ exponentiated truncated Gaussian mixture, categorical $\\to$ re-weighted categorical, etc. Using different observations ${x(1), ..., x(k)}$ in the non-parametric densities, these substitutions represent a learning algorithm that can produce a variety of densities over the configuration space $X$. By maintaining sorted lists of observed variables in $H$, the runtime of each iteration of the TPE algorithm can scale linearly in $|H|$ and linearly in the number of variables (dimensions) being optimized. In a nutshell, the algorithm recognizes the irrelevant variables in the configuration space, and thus reduces iterations in searching for the optimal ones. Details of the TPE algorithm can be found in the reference paper.\n",
     "\n",
     "The following runs the trials with the pre-defined objective function and search space. TPE is used as the optimization method. Totally there will be 10 evaluations run for searching the best parameters."
-   ]
+   ],
+   "metadata": {}
   },
   {
    "cell_type": "code",
    "execution_count": 23,
-   "metadata": {},
-   "outputs": [],
    "source": [
     "with Timer() as time_hyperopt:\n",
     "    # Trials for recording each iteration of the hyperparameter searching.\n",
@@ -1018,14 +1017,19 @@
     "        max_evals=NUMBER_ITERATIONS\n",
     "    )\n",
     "                  \n"
-   ]
+   ],
+   "outputs": [],
+   "metadata": {}
   },
   {
    "cell_type": "code",
    "execution_count": 24,
-   "metadata": {},
+   "source": [
+    "trials.best_trial"
+   ],
    "outputs": [
     {
+     "output_type": "execute_result",
      "data": {
       "text/plain": [
        "{'book_time': datetime.datetime(2019, 7, 17, 12, 28, 19, 108000),\n",
@@ -1046,801 +1050,41 @@
        " 'version': 0}"
       ]
      },
-     "execution_count": 24,
      "metadata": {},
-     "output_type": "execute_result"
+     "execution_count": 24
     }
    ],
-   "source": [
-    "trials.best_trial"
-   ]
+   "metadata": {}
   },
   {
    "cell_type": "code",
    "execution_count": 25,
-   "metadata": {},
+   "source": [
+    "parameters = ['rank', 'reg']\n",
+    "cols = len(parameters)\n",
+    "f, axes = plt.subplots(nrows=1, ncols=cols, figsize=(15,5))\n",
+    "cmap = plt.cm.jet\n",
+    "for i, val in enumerate(parameters):\n",
+    "    xs = np.array([t['misc']['vals'][val] for t in trials.trials]).ravel()\n",
+    "    ys = [t['result']['loss'] for t in trials.trials]\n",
+    "    xs, ys = zip(*sorted(zip(xs, ys)))\n",
+    "    ys = np.array(ys)\n",
+    "    axes[i].scatter(xs, ys, s=20, linewidth=0.01, alpha=0.75, c=cmap(float(i)/len(parameters)))\n",
+    "    axes[i].set_title(val)"
+   ],
    "outputs": [
     {
+     "output_type": "display_data",
      "data": {
-      "application/javascript": [
-       "/* Put everything inside the global mpl namespace */\n",
-       "window.mpl = {};\n",
-       "\n",
-       "\n",
-       "mpl.get_websocket_type = function() {\n",
-       "    if (typeof(WebSocket) !== 'undefined') {\n",
-       "        return WebSocket;\n",
-       "    } else if (typeof(MozWebSocket) !== 'undefined') {\n",
-       "        return MozWebSocket;\n",
-       "    } else {\n",
-       "        alert('Your browser does not have WebSocket support.' +\n",
-       "              'Please try Chrome, Safari or Firefox ≥ 6. ' +\n",
-       "              'Firefox 4 and 5 are also supported but you ' +\n",
-       "              'have to enable WebSockets in about:config.');\n",
-       "    };\n",
-       "}\n",
-       "\n",
-       "mpl.figure = function(figure_id, websocket, ondownload, parent_element) {\n",
-       "    this.id = figure_id;\n",
-       "\n",
-       "    this.ws = websocket;\n",
-       "\n",
-       "    this.supports_binary = (this.ws.binaryType != undefined);\n",
-       "\n",
-       "    if (!this.supports_binary) {\n",
-       "        var warnings = document.getElementById(\"mpl-warnings\");\n",
-       "        if (warnings) {\n",
-       "            warnings.style.display = 'block';\n",
-       "            warnings.textContent = (\n",
-       "                \"This browser does not support binary websocket messages. \" +\n",
-       "                    \"Performance may be slow.\");\n",
-       "        }\n",
-       "    }\n",
-       "\n",
-       "    this.imageObj = new Image();\n",
-       "\n",
-       "    this.context = undefined;\n",
-       "    this.message = undefined;\n",
-       "    this.canvas = undefined;\n",
-       "    this.rubberband_canvas = undefined;\n",
-       "    this.rubberband_context = undefined;\n",
-       "    this.format_dropdown = undefined;\n",
-       "\n",
-       "    this.image_mode = 'full';\n",
-       "\n",
-       "    this.root = $('<div/>');\n",
-       "    this._root_extra_style(this.root)\n",
-       "    this.root.attr('style', 'display: inline-block');\n",
-       "\n",
-       "    $(parent_element).append(this.root);\n",
-       "\n",
-       "    this._init_header(this);\n",
-       "    this._init_canvas(this);\n",
-       "    this._init_toolbar(this);\n",
-       "\n",
-       "    var fig = this;\n",
-       "\n",
-       "    this.waiting = false;\n",
-       "\n",
-       "    this.ws.onopen =  function () {\n",
-       "            fig.send_message(\"supports_binary\", {value: fig.supports_binary});\n",
-       "            fig.send_message(\"send_image_mode\", {});\n",
-       "            if (mpl.ratio != 1) {\n",
-       "                fig.send_message(\"set_dpi_ratio\", {'dpi_ratio': mpl.ratio});\n",
-       "            }\n",
-       "            fig.send_message(\"refresh\", {});\n",
-       "        }\n",
-       "\n",
-       "    this.imageObj.onload = function() {\n",
-       "            if (fig.image_mode == 'full') {\n",
-       "                // Full images could contain transparency (where diff images\n",
-       "                // almost always do), so we need to clear the canvas so that\n",
-       "                // there is no ghosting.\n",
-       "                fig.context.clearRect(0, 0, fig.canvas.width, fig.canvas.height);\n",
-       "            }\n",
-       "            fig.context.drawImage(fig.imageObj, 0, 0);\n",
-       "        };\n",
-       "\n",
-       "    this.imageObj.onunload = function() {\n",
-       "        fig.ws.close();\n",
-       "    }\n",
-       "\n",
-       "    this.ws.onmessage = this._make_on_message_function(this);\n",
-       "\n",
-       "    this.ondownload = ondownload;\n",
-       "}\n",
-       "\n",
-       "mpl.figure.prototype._init_header = function() {\n",
-       "    var titlebar = $(\n",
-       "        '<div class=\"ui-dialog-titlebar ui-widget-header ui-corner-all ' +\n",
-       "        'ui-helper-clearfix\"/>');\n",
-       "    var titletext = $(\n",
-       "        '<div class=\"ui-dialog-title\" style=\"width: 100%; ' +\n",
-       "        'text-align: center; padding: 3px;\"/>');\n",
-       "    titlebar.append(titletext)\n",
-       "    this.root.append(titlebar);\n",
-       "    this.header = titletext[0];\n",
-       "}\n",
-       "\n",
-       "\n",
-       "\n",
-       "mpl.figure.prototype._canvas_extra_style = function(canvas_div) {\n",
-       "\n",
-       "}\n",
-       "\n",
-       "\n",
-       "mpl.figure.prototype._root_extra_style = function(canvas_div) {\n",
-       "\n",
-       "}\n",
-       "\n",
-       "mpl.figure.prototype._init_canvas = function() {\n",
-       "    var fig = this;\n",
-       "\n",
-       "    var canvas_div = $('<div/>');\n",
-       "\n",
-       "    canvas_div.attr('style', 'position: relative; clear: both; outline: 0');\n",
-       "\n",
-       "    function canvas_keyboard_event(event) {\n",
-       "        return fig.key_event(event, event['data']);\n",
-       "    }\n",
-       "\n",
-       "    canvas_div.keydown('key_press', canvas_keyboard_event);\n",
-       "    canvas_div.keyup('key_release', canvas_keyboard_event);\n",
-       "    this.canvas_div = canvas_div\n",
-       "    this._canvas_extra_style(canvas_div)\n",
-       "    this.root.append(canvas_div);\n",
-       "\n",
-       "    var canvas = $('<canvas/>');\n",
-       "    canvas.addClass('mpl-canvas');\n",
-       "    canvas.attr('style', \"left: 0; top: 0; z-index: 0; outline: 0\")\n",
-       "\n",
-       "    this.canvas = canvas[0];\n",
-       "    this.context = canvas[0].getContext(\"2d\");\n",
-       "\n",
-       "    var backingStore = this.context.backingStorePixelRatio ||\n",
-       "\tthis.context.webkitBackingStorePixelRatio ||\n",
-       "\tthis.context.mozBackingStorePixelRatio ||\n",
-       "\tthis.context.msBackingStorePixelRatio ||\n",
-       "\tthis.context.oBackingStorePixelRatio ||\n",
-       "\tthis.context.backingStorePixelRatio || 1;\n",
-       "\n",
-       "    mpl.ratio = (window.devicePixelRatio || 1) / backingStore;\n",
-       "\n",
-       "    var rubberband = $('<canvas/>');\n",
-       "    rubberband.attr('style', \"position: absolute; left: 0; top: 0; z-index: 1;\")\n",
-       "\n",
-       "    var pass_mouse_events = true;\n",
-       "\n",
-       "    canvas_div.resizable({\n",
-       "        start: function(event, ui) {\n",
-       "            pass_mouse_events = false;\n",
-       "        },\n",
-       "        resize: function(event, ui) {\n",
-       "            fig.request_resize(ui.size.width, ui.size.height);\n",
-       "        },\n",
-       "        stop: function(event, ui) {\n",
-       "            pass_mouse_events = true;\n",
-       "            fig.request_resize(ui.size.width, ui.size.height);\n",
-       "        },\n",
-       "    });\n",
-       "\n",
-       "    function mouse_event_fn(event) {\n",
-       "        if (pass_mouse_events)\n",
-       "            return fig.mouse_event(event, event['data']);\n",
-       "    }\n",
-       "\n",
-       "    rubberband.mousedown('button_press', mouse_event_fn);\n",
-       "    rubberband.mouseup('button_release', mouse_event_fn);\n",
-       "    // Throttle sequential mouse events to 1 every 20ms.\n",
-       "    rubberband.mousemove('motion_notify', mouse_event_fn);\n",
-       "\n",
-       "    rubberband.mouseenter('figure_enter', mouse_event_fn);\n",
-       "    rubberband.mouseleave('figure_leave', mouse_event_fn);\n",
-       "\n",
-       "    canvas_div.on(\"wheel\", function (event) {\n",
-       "        event = event.originalEvent;\n",
-       "        event['data'] = 'scroll'\n",
-       "        if (event.deltaY < 0) {\n",
-       "            event.step = 1;\n",
-       "        } else {\n",
-       "            event.step = -1;\n",
-       "        }\n",
-       "        mouse_event_fn(event);\n",
-       "    });\n",
-       "\n",
-       "    canvas_div.append(canvas);\n",
-       "    canvas_div.append(rubberband);\n",
-       "\n",
-       "    this.rubberband = rubberband;\n",
-       "    this.rubberband_canvas = rubberband[0];\n",
-       "    this.rubberband_context = rubberband[0].getContext(\"2d\");\n",
-       "    this.rubberband_context.strokeStyle = \"#000000\";\n",
-       "\n",
-       "    this._resize_canvas = function(width, height) {\n",
-       "        // Keep the size of the canvas, canvas container, and rubber band\n",
-       "        // canvas in synch.\n",
-       "        canvas_div.css('width', width)\n",
-       "        canvas_div.css('height', height)\n",
-       "\n",
-       "        canvas.attr('width', width * mpl.ratio);\n",
-       "        canvas.attr('height', height * mpl.ratio);\n",
-       "        canvas.attr('style', 'width: ' + width + 'px; height: ' + height + 'px;');\n",
-       "\n",
-       "        rubberband.attr('width', width);\n",
-       "        rubberband.attr('height', height);\n",
-       "    }\n",
-       "\n",
-       "    // Set the figure to an initial 600x600px, this will subsequently be updated\n",
-       "    // upon first draw.\n",
-       "    this._resize_canvas(600, 600);\n",
-       "\n",
-       "    // Disable right mouse context menu.\n",
-       "    $(this.rubberband_canvas).bind(\"contextmenu\",function(e){\n",
-       "        return false;\n",
-       "    });\n",
-       "\n",
-       "    function set_focus () {\n",
-       "        canvas.focus();\n",
-       "        canvas_div.focus();\n",
-       "    }\n",
-       "\n",
-       "    window.setTimeout(set_focus, 100);\n",
-       "}\n",
-       "\n",
-       "mpl.figure.prototype._init_toolbar = function() {\n",
-       "    var fig = this;\n",
-       "\n",
-       "    var nav_element = $('<div/>')\n",
-       "    nav_element.attr('style', 'width: 100%');\n",
-       "    this.root.append(nav_element);\n",
-       "\n",
-       "    // Define a callback function for later on.\n",
-       "    function toolbar_event(event) {\n",
-       "        return fig.toolbar_button_onclick(event['data']);\n",
-       "    }\n",
-       "    function toolbar_mouse_event(event) {\n",
-       "        return fig.toolbar_button_onmouseover(event['data']);\n",
-       "    }\n",
-       "\n",
-       "    for(var toolbar_ind in mpl.toolbar_items) {\n",
-       "        var name = mpl.toolbar_items[toolbar_ind][0];\n",
-       "        var tooltip = mpl.toolbar_items[toolbar_ind][1];\n",
-       "        var image = mpl.toolbar_items[toolbar_ind][2];\n",
-       "        var method_name = mpl.toolbar_items[toolbar_ind][3];\n",
-       "\n",
-       "        if (!name) {\n",
-       "            // put a spacer in here.\n",
-       "            continue;\n",
-       "        }\n",
-       "        var button = $('<button/>');\n",
-       "        button.addClass('ui-button ui-widget ui-state-default ui-corner-all ' +\n",
-       "                        'ui-button-icon-only');\n",
-       "        button.attr('role', 'button');\n",
-       "        button.attr('aria-disabled', 'false');\n",
-       "        button.click(method_name, toolbar_event);\n",
-       "        button.mouseover(tooltip, toolbar_mouse_event);\n",
-       "\n",
-       "        var icon_img = $('<span/>');\n",
-       "        icon_img.addClass('ui-button-icon-primary ui-icon');\n",
-       "        icon_img.addClass(image);\n",
-       "        icon_img.addClass('ui-corner-all');\n",
-       "\n",
-       "        var tooltip_span = $('<span/>');\n",
-       "        tooltip_span.addClass('ui-button-text');\n",
-       "        tooltip_span.html(tooltip);\n",
-       "\n",
-       "        button.append(icon_img);\n",
-       "        button.append(tooltip_span);\n",
-       "\n",
-       "        nav_element.append(button);\n",
-       "    }\n",
-       "\n",
-       "    var fmt_picker_span = $('<span/>');\n",
-       "\n",
-       "    var fmt_picker = $('<select/>');\n",
-       "    fmt_picker.addClass('mpl-toolbar-option ui-widget ui-widget-content');\n",
-       "    fmt_picker_span.append(fmt_picker);\n",
-       "    nav_element.append(fmt_picker_span);\n",
-       "    this.format_dropdown = fmt_picker[0];\n",
-       "\n",
-       "    for (var ind in mpl.extensions) {\n",
-       "        var fmt = mpl.extensions[ind];\n",
-       "        var option = $(\n",
-       "            '<option/>', {selected: fmt === mpl.default_extension}).html(fmt);\n",
-       "        fmt_picker.append(option)\n",
-       "    }\n",
-       "\n",
-       "    // Add hover states to the ui-buttons\n",
-       "    $( \".ui-button\" ).hover(\n",
-       "        function() { $(this).addClass(\"ui-state-hover\");},\n",
-       "        function() { $(this).removeClass(\"ui-state-hover\");}\n",
-       "    );\n",
-       "\n",
-       "    var status_bar = $('<span class=\"mpl-message\"/>');\n",
-       "    nav_element.append(status_bar);\n",
-       "    this.message = status_bar[0];\n",
-       "}\n",
-       "\n",
-       "mpl.figure.prototype.request_resize = function(x_pixels, y_pixels) {\n",
-       "    // Request matplotlib to resize the figure. Matplotlib will then trigger a resize in the client,\n",
-       "    // which will in turn request a refresh of the image.\n",
-       "    this.send_message('resize', {'width': x_pixels, 'height': y_pixels});\n",
-       "}\n",
-       "\n",
-       "mpl.figure.prototype.send_message = function(type, properties) {\n",
-       "    properties['type'] = type;\n",
-       "    properties['figure_id'] = this.id;\n",
-       "    this.ws.send(JSON.stringify(properties));\n",
-       "}\n",
-       "\n",
-       "mpl.figure.prototype.send_draw_message = function() {\n",
-       "    if (!this.waiting) {\n",
-       "        this.waiting = true;\n",
-       "        this.ws.send(JSON.stringify({type: \"draw\", figure_id: this.id}));\n",
-       "    }\n",
-       "}\n",
-       "\n",
-       "\n",
-       "mpl.figure.prototype.handle_save = function(fig, msg) {\n",
-       "    var format_dropdown = fig.format_dropdown;\n",
-       "    var format = format_dropdown.options[format_dropdown.selectedIndex].value;\n",
-       "    fig.ondownload(fig, format);\n",
-       "}\n",
-       "\n",
-       "\n",
-       "mpl.figure.prototype.handle_resize = function(fig, msg) {\n",
-       "    var size = msg['size'];\n",
-       "    if (size[0] != fig.canvas.width || size[1] != fig.canvas.height) {\n",
-       "        fig._resize_canvas(size[0], size[1]);\n",
-       "        fig.send_message(\"refresh\", {});\n",
-       "    };\n",
-       "}\n",
-       "\n",
-       "mpl.figure.prototype.handle_rubberband = function(fig, msg) {\n",
-       "    var x0 = msg['x0'] / mpl.ratio;\n",
-       "    var y0 = (fig.canvas.height - msg['y0']) / mpl.ratio;\n",
-       "    var x1 = msg['x1'] / mpl.ratio;\n",
-       "    var y1 = (fig.canvas.height - msg['y1']) / mpl.ratio;\n",
-       "    x0 = Math.floor(x0) + 0.5;\n",
-       "    y0 = Math.floor(y0) + 0.5;\n",
-       "    x1 = Math.floor(x1) + 0.5;\n",
-       "    y1 = Math.floor(y1) + 0.5;\n",
-       "    var min_x = Math.min(x0, x1);\n",
-       "    var min_y = Math.min(y0, y1);\n",
-       "    var width = Math.abs(x1 - x0);\n",
-       "    var height = Math.abs(y1 - y0);\n",
-       "\n",
-       "    fig.rubberband_context.clearRect(\n",
-       "        0, 0, fig.canvas.width, fig.canvas.height);\n",
-       "\n",
-       "    fig.rubberband_context.strokeRect(min_x, min_y, width, height);\n",
-       "}\n",
-       "\n",
-       "mpl.figure.prototype.handle_figure_label = function(fig, msg) {\n",
-       "    // Updates the figure title.\n",
-       "    fig.header.textContent = msg['label'];\n",
-       "}\n",
-       "\n",
-       "mpl.figure.prototype.handle_cursor = function(fig, msg) {\n",
-       "    var cursor = msg['cursor'];\n",
-       "    switch(cursor)\n",
-       "    {\n",
-       "    case 0:\n",
-       "        cursor = 'pointer';\n",
-       "        break;\n",
-       "    case 1:\n",
-       "        cursor = 'default';\n",
-       "        break;\n",
-       "    case 2:\n",
-       "        cursor = 'crosshair';\n",
-       "        break;\n",
-       "    case 3:\n",
-       "        cursor = 'move';\n",
-       "        break;\n",
-       "    }\n",
-       "    fig.rubberband_canvas.style.cursor = cursor;\n",
-       "}\n",
-       "\n",
-       "mpl.figure.prototype.handle_message = function(fig, msg) {\n",
-       "    fig.message.textContent = msg['message'];\n",
-       "}\n",
-       "\n",
-       "mpl.figure.prototype.handle_draw = function(fig, msg) {\n",
-       "    // Request the server to send over a new figure.\n",
-       "    fig.send_draw_message();\n",
-       "}\n",
-       "\n",
-       "mpl.figure.prototype.handle_image_mode = function(fig, msg) {\n",
-       "    fig.image_mode = msg['mode'];\n",
-       "}\n",
-       "\n",
-       "mpl.figure.prototype.updated_canvas_event = function() {\n",
-       "    // Called whenever the canvas gets updated.\n",
-       "    this.send_message(\"ack\", {});\n",
-       "}\n",
-       "\n",
-       "// A function to construct a web socket function for onmessage handling.\n",
-       "// Called in the figure constructor.\n",
-       "mpl.figure.prototype._make_on_message_function = function(fig) {\n",
-       "    return function socket_on_message(evt) {\n",
-       "        if (evt.data instanceof Blob) {\n",
-       "            /* FIXME: We get \"Resource interpreted as Image but\n",
-       "             * transferred with MIME type text/plain:\" errors on\n",
-       "             * Chrome.  But how to set the MIME type?  It doesn't seem\n",
-       "             * to be part of the websocket stream */\n",
-       "            evt.data.type = \"image/png\";\n",
-       "\n",
-       "            /* Free the memory for the previous frames */\n",
-       "            if (fig.imageObj.src) {\n",
-       "                (window.URL || window.webkitURL).revokeObjectURL(\n",
-       "                    fig.imageObj.src);\n",
-       "            }\n",
-       "\n",
-       "            fig.imageObj.src = (window.URL || window.webkitURL).createObjectURL(\n",
-       "                evt.data);\n",
-       "            fig.updated_canvas_event();\n",
-       "            fig.waiting = false;\n",
-       "            return;\n",
-       "        }\n",
-       "        else if (typeof evt.data === 'string' && evt.data.slice(0, 21) == \"data:image/png;base64\") {\n",
-       "            fig.imageObj.src = evt.data;\n",
-       "            fig.updated_canvas_event();\n",
-       "            fig.waiting = false;\n",
-       "            return;\n",
-       "        }\n",
-       "\n",
-       "        var msg = JSON.parse(evt.data);\n",
-       "        var msg_type = msg['type'];\n",
-       "\n",
-       "        // Call the  \"handle_{type}\" callback, which takes\n",
-       "        // the figure and JSON message as its only arguments.\n",
-       "        try {\n",
-       "            var callback = fig[\"handle_\" + msg_type];\n",
-       "        } catch (e) {\n",
-       "            console.log(\"No handler for the '\" + msg_type + \"' message type: \", msg);\n",
-       "            return;\n",
-       "        }\n",
-       "\n",
-       "        if (callback) {\n",
-       "            try {\n",
-       "                // console.log(\"Handling '\" + msg_type + \"' message: \", msg);\n",
-       "                callback(fig, msg);\n",
-       "            } catch (e) {\n",
-       "                console.log(\"Exception inside the 'handler_\" + msg_type + \"' callback:\", e, e.stack, msg);\n",
-       "            }\n",
-       "        }\n",
-       "    };\n",
-       "}\n",
-       "\n",
-       "// from http://stackoverflow.com/questions/1114465/getting-mouse-location-in-canvas\n",
-       "mpl.findpos = function(e) {\n",
-       "    //this section is from http://www.quirksmode.org/js/events_properties.html\n",
-       "    var targ;\n",
-       "    if (!e)\n",
-       "        e = window.event;\n",
-       "    if (e.target)\n",
-       "        targ = e.target;\n",
-       "    else if (e.srcElement)\n",
-       "        targ = e.srcElement;\n",
-       "    if (targ.nodeType == 3) // defeat Safari bug\n",
-       "        targ = targ.parentNode;\n",
-       "\n",
-       "    // jQuery normalizes the pageX and pageY\n",
-       "    // pageX,Y are the mouse positions relative to the document\n",
-       "    // offset() returns the position of the element relative to the document\n",
-       "    var x = e.pageX - $(targ).offset().left;\n",
-       "    var y = e.pageY - $(targ).offset().top;\n",
-       "\n",
-       "    return {\"x\": x, \"y\": y};\n",
-       "};\n",
-       "\n",
-       "/*\n",
-       " * return a copy of an object with only non-object keys\n",
-       " * we need this to avoid circular references\n",
-       " * http://stackoverflow.com/a/24161582/3208463\n",
-       " */\n",
-       "function simpleKeys (original) {\n",
-       "  return Object.keys(original).reduce(function (obj, key) {\n",
-       "    if (typeof original[key] !== 'object')\n",
-       "        obj[key] = original[key]\n",
-       "    return obj;\n",
-       "  }, {});\n",
-       "}\n",
-       "\n",
-       "mpl.figure.prototype.mouse_event = function(event, name) {\n",
-       "    var canvas_pos = mpl.findpos(event)\n",
-       "\n",
-       "    if (name === 'button_press')\n",
-       "    {\n",
-       "        this.canvas.focus();\n",
-       "        this.canvas_div.focus();\n",
-       "    }\n",
-       "\n",
-       "    var x = canvas_pos.x * mpl.ratio;\n",
-       "    var y = canvas_pos.y * mpl.ratio;\n",
-       "\n",
-       "    this.send_message(name, {x: x, y: y, button: event.button,\n",
-       "                             step: event.step,\n",
-       "                             guiEvent: simpleKeys(event)});\n",
-       "\n",
-       "    /* This prevents the web browser from automatically changing to\n",
-       "     * the text insertion cursor when the button is pressed.  We want\n",
-       "     * to control all of the cursor setting manually through the\n",
-       "     * 'cursor' event from matplotlib */\n",
-       "    event.preventDefault();\n",
-       "    return false;\n",
-       "}\n",
-       "\n",
-       "mpl.figure.prototype._key_event_extra = function(event, name) {\n",
-       "    // Handle any extra behaviour associated with a key event\n",
-       "}\n",
-       "\n",
-       "mpl.figure.prototype.key_event = function(event, name) {\n",
-       "\n",
-       "    // Prevent repeat events\n",
-       "    if (name == 'key_press')\n",
-       "    {\n",
-       "        if (event.which === this._key)\n",
-       "            return;\n",
-       "        else\n",
-       "            this._key = event.which;\n",
-       "    }\n",
-       "    if (name == 'key_release')\n",
-       "        this._key = null;\n",
-       "\n",
-       "    var value = '';\n",
-       "    if (event.ctrlKey && event.which != 17)\n",
-       "        value += \"ctrl+\";\n",
-       "    if (event.altKey && event.which != 18)\n",
-       "        value += \"alt+\";\n",
-       "    if (event.shiftKey && event.which != 16)\n",
-       "        value += \"shift+\";\n",
-       "\n",
-       "    value += 'k';\n",
-       "    value += event.which.toString();\n",
-       "\n",
-       "    this._key_event_extra(event, name);\n",
-       "\n",
-       "    this.send_message(name, {key: value,\n",
-       "                             guiEvent: simpleKeys(event)});\n",
-       "    return false;\n",
-       "}\n",
-       "\n",
-       "mpl.figure.prototype.toolbar_button_onclick = function(name) {\n",
-       "    if (name == 'download') {\n",
-       "        this.handle_save(this, null);\n",
-       "    } else {\n",
-       "        this.send_message(\"toolbar_button\", {name: name});\n",
-       "    }\n",
-       "};\n",
-       "\n",
-       "mpl.figure.prototype.toolbar_button_onmouseover = function(tooltip) {\n",
-       "    this.message.textContent = tooltip;\n",
-       "};\n",
-       "mpl.toolbar_items = [[\"Home\", \"Reset original view\", \"fa fa-home icon-home\", \"home\"], [\"Back\", \"Back to  previous view\", \"fa fa-arrow-left icon-arrow-left\", \"back\"], [\"Forward\", \"Forward to next view\", \"fa fa-arrow-right icon-arrow-right\", \"forward\"], [\"\", \"\", \"\", \"\"], [\"Pan\", \"Pan axes with left mouse, zoom with right\", \"fa fa-arrows icon-move\", \"pan\"], [\"Zoom\", \"Zoom to rectangle\", \"fa fa-square-o icon-check-empty\", \"zoom\"], [\"\", \"\", \"\", \"\"], [\"Download\", \"Download plot\", \"fa fa-floppy-o icon-save\", \"download\"]];\n",
-       "\n",
-       "mpl.extensions = [\"eps\", \"jpeg\", \"pdf\", \"png\", \"ps\", \"raw\", \"svg\", \"tif\"];\n",
-       "\n",
-       "mpl.default_extension = \"png\";var comm_websocket_adapter = function(comm) {\n",
-       "    // Create a \"websocket\"-like object which calls the given IPython comm\n",
-       "    // object with the appropriate methods. Currently this is a non binary\n",
-       "    // socket, so there is still some room for performance tuning.\n",
-       "    var ws = {};\n",
-       "\n",
-       "    ws.close = function() {\n",
-       "        comm.close()\n",
-       "    };\n",
-       "    ws.send = function(m) {\n",
-       "        //console.log('sending', m);\n",
-       "        comm.send(m);\n",
-       "    };\n",
-       "    // Register the callback with on_msg.\n",
-       "    comm.on_msg(function(msg) {\n",
-       "        //console.log('receiving', msg['content']['data'], msg);\n",
-       "        // Pass the mpl event to the overridden (by mpl) onmessage function.\n",
-       "        ws.onmessage(msg['content']['data'])\n",
-       "    });\n",
-       "    return ws;\n",
-       "}\n",
-       "\n",
-       "mpl.mpl_figure_comm = function(comm, msg) {\n",
-       "    // This is the function which gets called when the mpl process\n",
-       "    // starts-up an IPython Comm through the \"matplotlib\" channel.\n",
-       "\n",
-       "    var id = msg.content.data.id;\n",
-       "    // Get hold of the div created by the display call when the Comm\n",
-       "    // socket was opened in Python.\n",
-       "    var element = $(\"#\" + id);\n",
-       "    var ws_proxy = comm_websocket_adapter(comm)\n",
-       "\n",
-       "    function ondownload(figure, format) {\n",
-       "        window.open(figure.imageObj.src);\n",
-       "    }\n",
-       "\n",
-       "    var fig = new mpl.figure(id, ws_proxy,\n",
-       "                           ondownload,\n",
-       "                           element.get(0));\n",
-       "\n",
-       "    // Call onopen now - mpl needs it, as it is assuming we've passed it a real\n",
-       "    // web socket which is closed, not our websocket->open comm proxy.\n",
-       "    ws_proxy.onopen();\n",
-       "\n",
-       "    fig.parent_element = element.get(0);\n",
-       "    fig.cell_info = mpl.find_output_cell(\"<div id='\" + id + \"'></div>\");\n",
-       "    if (!fig.cell_info) {\n",
-       "        console.error(\"Failed to find cell for figure\", id, fig);\n",
-       "        return;\n",
-       "    }\n",
-       "\n",
-       "    var output_index = fig.cell_info[2]\n",
-       "    var cell = fig.cell_info[0];\n",
-       "\n",
-       "};\n",
-       "\n",
-       "mpl.figure.prototype.handle_close = function(fig, msg) {\n",
-       "    var width = fig.canvas.width/mpl.ratio\n",
-       "    fig.root.unbind('remove')\n",
-       "\n",
-       "    // Update the output cell to use the data from the current canvas.\n",
-       "    fig.push_to_output();\n",
-       "    var dataURL = fig.canvas.toDataURL();\n",
-       "    // Re-enable the keyboard manager in IPython - without this line, in FF,\n",
-       "    // the notebook keyboard shortcuts fail.\n",
-       "    IPython.keyboard_manager.enable()\n",
-       "    $(fig.parent_element).html('<img src=\"' + dataURL + '\" width=\"' + width + '\">');\n",
-       "    fig.close_ws(fig, msg);\n",
-       "}\n",
-       "\n",
-       "mpl.figure.prototype.close_ws = function(fig, msg){\n",
-       "    fig.send_message('closing', msg);\n",
-       "    // fig.ws.close()\n",
-       "}\n",
-       "\n",
-       "mpl.figure.prototype.push_to_output = function(remove_interactive) {\n",
-       "    // Turn the data on the canvas into data in the output cell.\n",
-       "    var width = this.canvas.width/mpl.ratio\n",
-       "    var dataURL = this.canvas.toDataURL();\n",
-       "    this.cell_info[1]['text/html'] = '<img src=\"' + dataURL + '\" width=\"' + width + '\">';\n",
-       "}\n",
-       "\n",
-       "mpl.figure.prototype.updated_canvas_event = function() {\n",
-       "    // Tell IPython that the notebook contents must change.\n",
-       "    IPython.notebook.set_dirty(true);\n",
-       "    this.send_message(\"ack\", {});\n",
-       "    var fig = this;\n",
-       "    // Wait a second, then push the new image to the DOM so\n",
-       "    // that it is saved nicely (might be nice to debounce this).\n",
-       "    setTimeout(function () { fig.push_to_output() }, 1000);\n",
-       "}\n",
-       "\n",
-       "mpl.figure.prototype._init_toolbar = function() {\n",
-       "    var fig = this;\n",
-       "\n",
-       "    var nav_element = $('<div/>')\n",
-       "    nav_element.attr('style', 'width: 100%');\n",
-       "    this.root.append(nav_element);\n",
-       "\n",
-       "    // Define a callback function for later on.\n",
-       "    function toolbar_event(event) {\n",
-       "        return fig.toolbar_button_onclick(event['data']);\n",
-       "    }\n",
-       "    function toolbar_mouse_event(event) {\n",
-       "        return fig.toolbar_button_onmouseover(event['data']);\n",
-       "    }\n",
-       "\n",
-       "    for(var toolbar_ind in mpl.toolbar_items){\n",
-       "        var name = mpl.toolbar_items[toolbar_ind][0];\n",
-       "        var tooltip = mpl.toolbar_items[toolbar_ind][1];\n",
-       "        var image = mpl.toolbar_items[toolbar_ind][2];\n",
-       "        var method_name = mpl.toolbar_items[toolbar_ind][3];\n",
-       "\n",
-       "        if (!name) { continue; };\n",
-       "\n",
-       "        var button = $('<button class=\"btn btn-default\" href=\"#\" title=\"' + name + '\"><i class=\"fa ' + image + ' fa-lg\"></i></button>');\n",
-       "        button.click(method_name, toolbar_event);\n",
-       "        button.mouseover(tooltip, toolbar_mouse_event);\n",
-       "        nav_element.append(button);\n",
-       "    }\n",
-       "\n",
-       "    // Add the status bar.\n",
-       "    var status_bar = $('<span class=\"mpl-message\" style=\"text-align:right; float: right;\"/>');\n",
-       "    nav_element.append(status_bar);\n",
-       "    this.message = status_bar[0];\n",
-       "\n",
-       "    // Add the close button to the window.\n",
-       "    var buttongrp = $('<div class=\"btn-group inline pull-right\"></div>');\n",
-       "    var button = $('<button class=\"btn btn-mini btn-primary\" href=\"#\" title=\"Stop Interaction\"><i class=\"fa fa-power-off icon-remove icon-large\"></i></button>');\n",
-       "    button.click(function (evt) { fig.handle_close(fig, {}); } );\n",
-       "    button.mouseover('Stop Interaction', toolbar_mouse_event);\n",
-       "    buttongrp.append(button);\n",
-       "    var titlebar = this.root.find($('.ui-dialog-titlebar'));\n",
-       "    titlebar.prepend(buttongrp);\n",
-       "}\n",
-       "\n",
-       "mpl.figure.prototype._root_extra_style = function(el){\n",
-       "    var fig = this\n",
-       "    el.on(\"remove\", function(){\n",
-       "\tfig.close_ws(fig, {});\n",
-       "    });\n",
-       "}\n",
-       "\n",
-       "mpl.figure.prototype._canvas_extra_style = function(el){\n",
-       "    // this is important to make the div 'focusable\n",
-       "    el.attr('tabindex', 0)\n",
-       "    // reach out to IPython and tell the keyboard manager to turn it's self\n",
-       "    // off when our div gets focus\n",
-       "\n",
-       "    // location in version 3\n",
-       "    if (IPython.notebook.keyboard_manager) {\n",
-       "        IPython.notebook.keyboard_manager.register_events(el);\n",
-       "    }\n",
-       "    else {\n",
-       "        // location in version 2\n",
-       "        IPython.keyboard_manager.register_events(el);\n",
-       "    }\n",
-       "\n",
-       "}\n",
-       "\n",
-       "mpl.figure.prototype._key_event_extra = function(event, name) {\n",
-       "    var manager = IPython.notebook.keyboard_manager;\n",
-       "    if (!manager)\n",
-       "        manager = IPython.keyboard_manager;\n",
-       "\n",
-       "    // Check for shift+enter\n",
-       "    if (event.shiftKey && event.which == 13) {\n",
-       "        this.canvas_div.blur();\n",
-       "        event.shiftKey = false;\n",
-       "        // Send a \"J\" for go to next cell\n",
-       "        event.which = 74;\n",
-       "        event.keyCode = 74;\n",
-       "        manager.command_mode();\n",
-       "        manager.handle_keydown(event);\n",
-       "    }\n",
-       "}\n",
-       "\n",
-       "mpl.figure.prototype.handle_save = function(fig, msg) {\n",
-       "    fig.ondownload(fig, null);\n",
-       "}\n",
-       "\n",
-       "\n",
-       "mpl.find_output_cell = function(html_output) {\n",
-       "    // Return the cell and output element which can be found *uniquely* in the notebook.\n",
-       "    // Note - this is a bit hacky, but it is done because the \"notebook_saving.Notebook\"\n",
-       "    // IPython event is triggered only after the cells have been serialised, which for\n",
-       "    // our purposes (turning an active figure into a static one), is too late.\n",
-       "    var cells = IPython.notebook.get_cells();\n",
-       "    var ncells = cells.length;\n",
-       "    for (var i=0; i<ncells; i++) {\n",
-       "        var cell = cells[i];\n",
-       "        if (cell.cell_type === 'code'){\n",
-       "            for (var j=0; j<cell.output_area.outputs.length; j++) {\n",
-       "                var data = cell.output_area.outputs[j];\n",
-       "                if (data.data) {\n",
-       "                    // IPython >= 3 moved mimebundle to data attribute of output\n",
-       "                    data = data.data;\n",
-       "                }\n",
-       "                if (data['text/html'] == html_output) {\n",
-       "                    return [cell, data, j];\n",
-       "                }\n",
-       "            }\n",
-       "        }\n",
-       "    }\n",
-       "}\n",
-       "\n",
-       "// Register the function which deals with the matplotlib target/channel.\n",
-       "// The kernel may be null if the page has been refreshed.\n",
-       "if (IPython.notebook.kernel != null) {\n",
-       "    IPython.notebook.kernel.comm_manager.register_target('matplotlib', mpl.mpl_figure_comm);\n",
-       "}\n"
-      ],
+      "application/javascript": "/* Put everything inside the global mpl namespace */\nwindow.mpl = {};\n\n\nmpl.get_websocket_type = function() {\n    if (typeof(WebSocket) !== 'undefined') {\n        return WebSocket;\n    } else if (typeof(MozWebSocket) !== 'undefined') {\n        return MozWebSocket;\n    } else {\n        alert('Your browser does not have WebSocket support.' +\n              'Please try Chrome, Safari or Firefox ≥ 6. ' +\n              'Firefox 4 and 5 are also supported but you ' +\n              'have to enable WebSockets in about:config.');\n    };\n}\n\nmpl.figure = function(figure_id, websocket, ondownload, parent_element) {\n    this.id = figure_id;\n\n    this.ws = websocket;\n\n    this.supports_binary = (this.ws.binaryType != undefined);\n\n    if (!this.supports_binary) {\n        var warnings = document.getElementById(\"mpl-warnings\");\n        if (warnings) {\n            warnings.style.display = 'block';\n            warnings.textContent = (\n                \"This browser does not support binary websocket messages. \" +\n                    \"Performance may be slow.\");\n        }\n    }\n\n    this.imageObj = new Image();\n\n    this.context = undefined;\n    this.message = undefined;\n    this.canvas = undefined;\n    this.rubberband_canvas = undefined;\n    this.rubberband_context = undefined;\n    this.format_dropdown = undefined;\n\n    this.image_mode = 'full';\n\n    this.root = $('<div/>');\n    this._root_extra_style(this.root)\n    this.root.attr('style', 'display: inline-block');\n\n    $(parent_element).append(this.root);\n\n    this._init_header(this);\n    this._init_canvas(this);\n    this._init_toolbar(this);\n\n    var fig = this;\n\n    this.waiting = false;\n\n    this.ws.onopen =  function () {\n            fig.send_message(\"supports_binary\", {value: fig.supports_binary});\n            fig.send_message(\"send_image_mode\", {});\n            if (mpl.ratio != 1) {\n                fig.send_message(\"set_dpi_ratio\", {'dpi_ratio': mpl.ratio});\n            }\n            fig.send_message(\"refresh\", {});\n        }\n\n    this.imageObj.onload = function() {\n            if (fig.image_mode == 'full') {\n                // Full images could contain transparency (where diff images\n                // almost always do), so we need to clear the canvas so that\n                // there is no ghosting.\n                fig.context.clearRect(0, 0, fig.canvas.width, fig.canvas.height);\n            }\n            fig.context.drawImage(fig.imageObj, 0, 0);\n        };\n\n    this.imageObj.onunload = function() {\n        fig.ws.close();\n    }\n\n    this.ws.onmessage = this._make_on_message_function(this);\n\n    this.ondownload = ondownload;\n}\n\nmpl.figure.prototype._init_header = function() {\n    var titlebar = $(\n        '<div class=\"ui-dialog-titlebar ui-widget-header ui-corner-all ' +\n        'ui-helper-clearfix\"/>');\n    var titletext = $(\n        '<div class=\"ui-dialog-title\" style=\"width: 100%; ' +\n        'text-align: center; padding: 3px;\"/>');\n    titlebar.append(titletext)\n    this.root.append(titlebar);\n    this.header = titletext[0];\n}\n\n\n\nmpl.figure.prototype._canvas_extra_style = function(canvas_div) {\n\n}\n\n\nmpl.figure.prototype._root_extra_style = function(canvas_div) {\n\n}\n\nmpl.figure.prototype._init_canvas = function() {\n    var fig = this;\n\n    var canvas_div = $('<div/>');\n\n    canvas_div.attr('style', 'position: relative; clear: both; outline: 0');\n\n    function canvas_keyboard_event(event) {\n        return fig.key_event(event, event['data']);\n    }\n\n    canvas_div.keydown('key_press', canvas_keyboard_event);\n    canvas_div.keyup('key_release', canvas_keyboard_event);\n    this.canvas_div = canvas_div\n    this._canvas_extra_style(canvas_div)\n    this.root.append(canvas_div);\n\n    var canvas = $('<canvas/>');\n    canvas.addClass('mpl-canvas');\n    canvas.attr('style', \"left: 0; top: 0; z-index: 0; outline: 0\")\n\n    this.canvas = canvas[0];\n    this.context = canvas[0].getContext(\"2d\");\n\n    var backingStore = this.context.backingStorePixelRatio ||\n\tthis.context.webkitBackingStorePixelRatio ||\n\tthis.context.mozBackingStorePixelRatio ||\n\tthis.context.msBackingStorePixelRatio ||\n\tthis.context.oBackingStorePixelRatio ||\n\tthis.context.backingStorePixelRatio || 1;\n\n    mpl.ratio = (window.devicePixelRatio || 1) / backingStore;\n\n    var rubberband = $('<canvas/>');\n    rubberband.attr('style', \"position: absolute; left: 0; top: 0; z-index: 1;\")\n\n    var pass_mouse_events = true;\n\n    canvas_div.resizable({\n        start: function(event, ui) {\n            pass_mouse_events = false;\n        },\n        resize: function(event, ui) {\n            fig.request_resize(ui.size.width, ui.size.height);\n        },\n        stop: function(event, ui) {\n            pass_mouse_events = true;\n            fig.request_resize(ui.size.width, ui.size.height);\n        },\n    });\n\n    function mouse_event_fn(event) {\n        if (pass_mouse_events)\n            return fig.mouse_event(event, event['data']);\n    }\n\n    rubberband.mousedown('button_press', mouse_event_fn);\n    rubberband.mouseup('button_release', mouse_event_fn);\n    // Throttle sequential mouse events to 1 every 20ms.\n    rubberband.mousemove('motion_notify', mouse_event_fn);\n\n    rubberband.mouseenter('figure_enter', mouse_event_fn);\n    rubberband.mouseleave('figure_leave', mouse_event_fn);\n\n    canvas_div.on(\"wheel\", function (event) {\n        event = event.originalEvent;\n        event['data'] = 'scroll'\n        if (event.deltaY < 0) {\n            event.step = 1;\n        } else {\n            event.step = -1;\n        }\n        mouse_event_fn(event);\n    });\n\n    canvas_div.append(canvas);\n    canvas_div.append(rubberband);\n\n    this.rubberband = rubberband;\n    this.rubberband_canvas = rubberband[0];\n    this.rubberband_context = rubberband[0].getContext(\"2d\");\n    this.rubberband_context.strokeStyle = \"#000000\";\n\n    this._resize_canvas = function(width, height) {\n        // Keep the size of the canvas, canvas container, and rubber band\n        // canvas in synch.\n        canvas_div.css('width', width)\n        canvas_div.css('height', height)\n\n        canvas.attr('width', width * mpl.ratio);\n        canvas.attr('height', height * mpl.ratio);\n        canvas.attr('style', 'width: ' + width + 'px; height: ' + height + 'px;');\n\n        rubberband.attr('width', width);\n        rubberband.attr('height', height);\n    }\n\n    // Set the figure to an initial 600x600px, this will subsequently be updated\n    // upon first draw.\n    this._resize_canvas(600, 600);\n\n    // Disable right mouse context menu.\n    $(this.rubberband_canvas).bind(\"contextmenu\",function(e){\n        return false;\n    });\n\n    function set_focus () {\n        canvas.focus();\n        canvas_div.focus();\n    }\n\n    window.setTimeout(set_focus, 100);\n}\n\nmpl.figure.prototype._init_toolbar = function() {\n    var fig = this;\n\n    var nav_element = $('<div/>')\n    nav_element.attr('style', 'width: 100%');\n    this.root.append(nav_element);\n\n    // Define a callback function for later on.\n    function toolbar_event(event) {\n        return fig.toolbar_button_onclick(event['data']);\n    }\n    function toolbar_mouse_event(event) {\n        return fig.toolbar_button_onmouseover(event['data']);\n    }\n\n    for(var toolbar_ind in mpl.toolbar_items) {\n        var name = mpl.toolbar_items[toolbar_ind][0];\n        var tooltip = mpl.toolbar_items[toolbar_ind][1];\n        var image = mpl.toolbar_items[toolbar_ind][2];\n        var method_name = mpl.toolbar_items[toolbar_ind][3];\n\n        if (!name) {\n            // put a spacer in here.\n            continue;\n        }\n        var button = $('<button/>');\n        button.addClass('ui-button ui-widget ui-state-default ui-corner-all ' +\n                        'ui-button-icon-only');\n        button.attr('role', 'button');\n        button.attr('aria-disabled', 'false');\n        button.click(method_name, toolbar_event);\n        button.mouseover(tooltip, toolbar_mouse_event);\n\n        var icon_img = $('<span/>');\n        icon_img.addClass('ui-button-icon-primary ui-icon');\n        icon_img.addClass(image);\n        icon_img.addClass('ui-corner-all');\n\n        var tooltip_span = $('<span/>');\n        tooltip_span.addClass('ui-button-text');\n        tooltip_span.html(tooltip);\n\n        button.append(icon_img);\n        button.append(tooltip_span);\n\n        nav_element.append(button);\n    }\n\n    var fmt_picker_span = $('<span/>');\n\n    var fmt_picker = $('<select/>');\n    fmt_picker.addClass('mpl-toolbar-option ui-widget ui-widget-content');\n    fmt_picker_span.append(fmt_picker);\n    nav_element.append(fmt_picker_span);\n    this.format_dropdown = fmt_picker[0];\n\n    for (var ind in mpl.extensions) {\n        var fmt = mpl.extensions[ind];\n        var option = $(\n            '<option/>', {selected: fmt === mpl.default_extension}).html(fmt);\n        fmt_picker.append(option)\n    }\n\n    // Add hover states to the ui-buttons\n    $( \".ui-button\" ).hover(\n        function() { $(this).addClass(\"ui-state-hover\");},\n        function() { $(this).removeClass(\"ui-state-hover\");}\n    );\n\n    var status_bar = $('<span class=\"mpl-message\"/>');\n    nav_element.append(status_bar);\n    this.message = status_bar[0];\n}\n\nmpl.figure.prototype.request_resize = function(x_pixels, y_pixels) {\n    // Request matplotlib to resize the figure. Matplotlib will then trigger a resize in the client,\n    // which will in turn request a refresh of the image.\n    this.send_message('resize', {'width': x_pixels, 'height': y_pixels});\n}\n\nmpl.figure.prototype.send_message = function(type, properties) {\n    properties['type'] = type;\n    properties['figure_id'] = this.id;\n    this.ws.send(JSON.stringify(properties));\n}\n\nmpl.figure.prototype.send_draw_message = function() {\n    if (!this.waiting) {\n        this.waiting = true;\n        this.ws.send(JSON.stringify({type: \"draw\", figure_id: this.id}));\n    }\n}\n\n\nmpl.figure.prototype.handle_save = function(fig, msg) {\n    var format_dropdown = fig.format_dropdown;\n    var format = format_dropdown.options[format_dropdown.selectedIndex].value;\n    fig.ondownload(fig, format);\n}\n\n\nmpl.figure.prototype.handle_resize = function(fig, msg) {\n    var size = msg['size'];\n    if (size[0] != fig.canvas.width || size[1] != fig.canvas.height) {\n        fig._resize_canvas(size[0], size[1]);\n        fig.send_message(\"refresh\", {});\n    };\n}\n\nmpl.figure.prototype.handle_rubberband = function(fig, msg) {\n    var x0 = msg['x0'] / mpl.ratio;\n    var y0 = (fig.canvas.height - msg['y0']) / mpl.ratio;\n    var x1 = msg['x1'] / mpl.ratio;\n    var y1 = (fig.canvas.height - msg['y1']) / mpl.ratio;\n    x0 = Math.floor(x0) + 0.5;\n    y0 = Math.floor(y0) + 0.5;\n    x1 = Math.floor(x1) + 0.5;\n    y1 = Math.floor(y1) + 0.5;\n    var min_x = Math.min(x0, x1);\n    var min_y = Math.min(y0, y1);\n    var width = Math.abs(x1 - x0);\n    var height = Math.abs(y1 - y0);\n\n    fig.rubberband_context.clearRect(\n        0, 0, fig.canvas.width, fig.canvas.height);\n\n    fig.rubberband_context.strokeRect(min_x, min_y, width, height);\n}\n\nmpl.figure.prototype.handle_figure_label = function(fig, msg) {\n    // Updates the figure title.\n    fig.header.textContent = msg['label'];\n}\n\nmpl.figure.prototype.handle_cursor = function(fig, msg) {\n    var cursor = msg['cursor'];\n    switch(cursor)\n    {\n    case 0:\n        cursor = 'pointer';\n        break;\n    case 1:\n        cursor = 'default';\n        break;\n    case 2:\n        cursor = 'crosshair';\n        break;\n    case 3:\n        cursor = 'move';\n        break;\n    }\n    fig.rubberband_canvas.style.cursor = cursor;\n}\n\nmpl.figure.prototype.handle_message = function(fig, msg) {\n    fig.message.textContent = msg['message'];\n}\n\nmpl.figure.prototype.handle_draw = function(fig, msg) {\n    // Request the server to send over a new figure.\n    fig.send_draw_message();\n}\n\nmpl.figure.prototype.handle_image_mode = function(fig, msg) {\n    fig.image_mode = msg['mode'];\n}\n\nmpl.figure.prototype.updated_canvas_event = function() {\n    // Called whenever the canvas gets updated.\n    this.send_message(\"ack\", {});\n}\n\n// A function to construct a web socket function for onmessage handling.\n// Called in the figure constructor.\nmpl.figure.prototype._make_on_message_function = function(fig) {\n    return function socket_on_message(evt) {\n        if (evt.data instanceof Blob) {\n            /* FIXME: We get \"Resource interpreted as Image but\n             * transferred with MIME type text/plain:\" errors on\n             * Chrome.  But how to set the MIME type?  It doesn't seem\n             * to be part of the websocket stream */\n            evt.data.type = \"image/png\";\n\n            /* Free the memory for the previous frames */\n            if (fig.imageObj.src) {\n                (window.URL || window.webkitURL).revokeObjectURL(\n                    fig.imageObj.src);\n            }\n\n            fig.imageObj.src = (window.URL || window.webkitURL).createObjectURL(\n                evt.data);\n            fig.updated_canvas_event();\n            fig.waiting = false;\n            return;\n        }\n        else if (typeof evt.data === 'string' && evt.data.slice(0, 21) == \"data:image/png;base64\") {\n            fig.imageObj.src = evt.data;\n            fig.updated_canvas_event();\n            fig.waiting = false;\n            return;\n        }\n\n        var msg = JSON.parse(evt.data);\n        var msg_type = msg['type'];\n\n        // Call the  \"handle_{type}\" callback, which takes\n        // the figure and JSON message as its only arguments.\n        try {\n            var callback = fig[\"handle_\" + msg_type];\n        } catch (e) {\n            console.log(\"No handler for the '\" + msg_type + \"' message type: \", msg);\n            return;\n        }\n\n        if (callback) {\n            try {\n                // console.log(\"Handling '\" + msg_type + \"' message: \", msg);\n                callback(fig, msg);\n            } catch (e) {\n                console.log(\"Exception inside the 'handler_\" + msg_type + \"' callback:\", e, e.stack, msg);\n            }\n        }\n    };\n}\n\n// from http://stackoverflow.com/questions/1114465/getting-mouse-location-in-canvas\nmpl.findpos = function(e) {\n    //this section is from http://www.quirksmode.org/js/events_properties.html\n    var targ;\n    if (!e)\n        e = window.event;\n    if (e.target)\n        targ = e.target;\n    else if (e.srcElement)\n        targ = e.srcElement;\n    if (targ.nodeType == 3) // defeat Safari bug\n        targ = targ.parentNode;\n\n    // jQuery normalizes the pageX and pageY\n    // pageX,Y are the mouse positions relative to the document\n    // offset() returns the position of the element relative to the document\n    var x = e.pageX - $(targ).offset().left;\n    var y = e.pageY - $(targ).offset().top;\n\n    return {\"x\": x, \"y\": y};\n};\n\n/*\n * return a copy of an object with only non-object keys\n * we need this to avoid circular references\n * http://stackoverflow.com/a/24161582/3208463\n */\nfunction simpleKeys (original) {\n  return Object.keys(original).reduce(function (obj, key) {\n    if (typeof original[key] !== 'object')\n        obj[key] = original[key]\n    return obj;\n  }, {});\n}\n\nmpl.figure.prototype.mouse_event = function(event, name) {\n    var canvas_pos = mpl.findpos(event)\n\n    if (name === 'button_press')\n    {\n        this.canvas.focus();\n        this.canvas_div.focus();\n    }\n\n    var x = canvas_pos.x * mpl.ratio;\n    var y = canvas_pos.y * mpl.ratio;\n\n    this.send_message(name, {x: x, y: y, button: event.button,\n                             step: event.step,\n                             guiEvent: simpleKeys(event)});\n\n    /* This prevents the web browser from automatically changing to\n     * the text insertion cursor when the button is pressed.  We want\n     * to control all of the cursor setting manually through the\n     * 'cursor' event from matplotlib */\n    event.preventDefault();\n    return false;\n}\n\nmpl.figure.prototype._key_event_extra = function(event, name) {\n    // Handle any extra behaviour associated with a key event\n}\n\nmpl.figure.prototype.key_event = function(event, name) {\n\n    // Prevent repeat events\n    if (name == 'key_press')\n    {\n        if (event.which === this._key)\n            return;\n        else\n            this._key = event.which;\n    }\n    if (name == 'key_release')\n        this._key = null;\n\n    var value = '';\n    if (event.ctrlKey && event.which != 17)\n        value += \"ctrl+\";\n    if (event.altKey && event.which != 18)\n        value += \"alt+\";\n    if (event.shiftKey && event.which != 16)\n        value += \"shift+\";\n\n    value += 'k';\n    value += event.which.toString();\n\n    this._key_event_extra(event, name);\n\n    this.send_message(name, {key: value,\n                             guiEvent: simpleKeys(event)});\n    return false;\n}\n\nmpl.figure.prototype.toolbar_button_onclick = function(name) {\n    if (name == 'download') {\n        this.handle_save(this, null);\n    } else {\n        this.send_message(\"toolbar_button\", {name: name});\n    }\n};\n\nmpl.figure.prototype.toolbar_button_onmouseover = function(tooltip) {\n    this.message.textContent = tooltip;\n};\nmpl.toolbar_items = [[\"Home\", \"Reset original view\", \"fa fa-home icon-home\", \"home\"], [\"Back\", \"Back to  previous view\", \"fa fa-arrow-left icon-arrow-left\", \"back\"], [\"Forward\", \"Forward to next view\", \"fa fa-arrow-right icon-arrow-right\", \"forward\"], [\"\", \"\", \"\", \"\"], [\"Pan\", \"Pan axes with left mouse, zoom with right\", \"fa fa-arrows icon-move\", \"pan\"], [\"Zoom\", \"Zoom to rectangle\", \"fa fa-square-o icon-check-empty\", \"zoom\"], [\"\", \"\", \"\", \"\"], [\"Download\", \"Download plot\", \"fa fa-floppy-o icon-save\", \"download\"]];\n\nmpl.extensions = [\"eps\", \"jpeg\", \"pdf\", \"png\", \"ps\", \"raw\", \"svg\", \"tif\"];\n\nmpl.default_extension = \"png\";var comm_websocket_adapter = function(comm) {\n    // Create a \"websocket\"-like object which calls the given IPython comm\n    // object with the appropriate methods. Currently this is a non binary\n    // socket, so there is still some room for performance tuning.\n    var ws = {};\n\n    ws.close = function() {\n        comm.close()\n    };\n    ws.send = function(m) {\n        //console.log('sending', m);\n        comm.send(m);\n    };\n    // Register the callback with on_msg.\n    comm.on_msg(function(msg) {\n        //console.log('receiving', msg['content']['data'], msg);\n        // Pass the mpl event to the overridden (by mpl) onmessage function.\n        ws.onmessage(msg['content']['data'])\n    });\n    return ws;\n}\n\nmpl.mpl_figure_comm = function(comm, msg) {\n    // This is the function which gets called when the mpl process\n    // starts-up an IPython Comm through the \"matplotlib\" channel.\n\n    var id = msg.content.data.id;\n    // Get hold of the div created by the display call when the Comm\n    // socket was opened in Python.\n    var element = $(\"#\" + id);\n    var ws_proxy = comm_websocket_adapter(comm)\n\n    function ondownload(figure, format) {\n        window.open(figure.imageObj.src);\n    }\n\n    var fig = new mpl.figure(id, ws_proxy,\n                           ondownload,\n                           element.get(0));\n\n    // Call onopen now - mpl needs it, as it is assuming we've passed it a real\n    // web socket which is closed, not our websocket->open comm proxy.\n    ws_proxy.onopen();\n\n    fig.parent_element = element.get(0);\n    fig.cell_info = mpl.find_output_cell(\"<div id='\" + id + \"'></div>\");\n    if (!fig.cell_info) {\n        console.error(\"Failed to find cell for figure\", id, fig);\n        return;\n    }\n\n    var output_index = fig.cell_info[2]\n    var cell = fig.cell_info[0];\n\n};\n\nmpl.figure.prototype.handle_close = function(fig, msg) {\n    var width = fig.canvas.width/mpl.ratio\n    fig.root.unbind('remove')\n\n    // Update the output cell to use the data from the current canvas.\n    fig.push_to_output();\n    var dataURL = fig.canvas.toDataURL();\n    // Re-enable the keyboard manager in IPython - without this line, in FF,\n    // the notebook keyboard shortcuts fail.\n    IPython.keyboard_manager.enable()\n    $(fig.parent_element).html('<img src=\"' + dataURL + '\" width=\"' + width + '\">');\n    fig.close_ws(fig, msg);\n}\n\nmpl.figure.prototype.close_ws = function(fig, msg){\n    fig.send_message('closing', msg);\n    // fig.ws.close()\n}\n\nmpl.figure.prototype.push_to_output = function(remove_interactive) {\n    // Turn the data on the canvas into data in the output cell.\n    var width = this.canvas.width/mpl.ratio\n    var dataURL = this.canvas.toDataURL();\n    this.cell_info[1]['text/html'] = '<img src=\"' + dataURL + '\" width=\"' + width + '\">';\n}\n\nmpl.figure.prototype.updated_canvas_event = function() {\n    // Tell IPython that the notebook contents must change.\n    IPython.notebook.set_dirty(true);\n    this.send_message(\"ack\", {});\n    var fig = this;\n    // Wait a second, then push the new image to the DOM so\n    // that it is saved nicely (might be nice to debounce this).\n    setTimeout(function () { fig.push_to_output() }, 1000);\n}\n\nmpl.figure.prototype._init_toolbar = function() {\n    var fig = this;\n\n    var nav_element = $('<div/>')\n    nav_element.attr('style', 'width: 100%');\n    this.root.append(nav_element);\n\n    // Define a callback function for later on.\n    function toolbar_event(event) {\n        return fig.toolbar_button_onclick(event['data']);\n    }\n    function toolbar_mouse_event(event) {\n        return fig.toolbar_button_onmouseover(event['data']);\n    }\n\n    for(var toolbar_ind in mpl.toolbar_items){\n        var name = mpl.toolbar_items[toolbar_ind][0];\n        var tooltip = mpl.toolbar_items[toolbar_ind][1];\n        var image = mpl.toolbar_items[toolbar_ind][2];\n        var method_name = mpl.toolbar_items[toolbar_ind][3];\n\n        if (!name) { continue; };\n\n        var button = $('<button class=\"btn btn-default\" href=\"#\" title=\"' + name + '\"><i class=\"fa ' + image + ' fa-lg\"></i></button>');\n        button.click(method_name, toolbar_event);\n        button.mouseover(tooltip, toolbar_mouse_event);\n        nav_element.append(button);\n    }\n\n    // Add the status bar.\n    var status_bar = $('<span class=\"mpl-message\" style=\"text-align:right; float: right;\"/>');\n    nav_element.append(status_bar);\n    this.message = status_bar[0];\n\n    // Add the close button to the window.\n    var buttongrp = $('<div class=\"btn-group inline pull-right\"></div>');\n    var button = $('<button class=\"btn btn-mini btn-primary\" href=\"#\" title=\"Stop Interaction\"><i class=\"fa fa-power-off icon-remove icon-large\"></i></button>');\n    button.click(function (evt) { fig.handle_close(fig, {}); } );\n    button.mouseover('Stop Interaction', toolbar_mouse_event);\n    buttongrp.append(button);\n    var titlebar = this.root.find($('.ui-dialog-titlebar'));\n    titlebar.prepend(buttongrp);\n}\n\nmpl.figure.prototype._root_extra_style = function(el){\n    var fig = this\n    el.on(\"remove\", function(){\n\tfig.close_ws(fig, {});\n    });\n}\n\nmpl.figure.prototype._canvas_extra_style = function(el){\n    // this is important to make the div 'focusable\n    el.attr('tabindex', 0)\n    // reach out to IPython and tell the keyboard manager to turn it's self\n    // off when our div gets focus\n\n    // location in version 3\n    if (IPython.notebook.keyboard_manager) {\n        IPython.notebook.keyboard_manager.register_events(el);\n    }\n    else {\n        // location in version 2\n        IPython.keyboard_manager.register_events(el);\n    }\n\n}\n\nmpl.figure.prototype._key_event_extra = function(event, name) {\n    var manager = IPython.notebook.keyboard_manager;\n    if (!manager)\n        manager = IPython.keyboard_manager;\n\n    // Check for shift+enter\n    if (event.shiftKey && event.which == 13) {\n        this.canvas_div.blur();\n        event.shiftKey = false;\n        // Send a \"J\" for go to next cell\n        event.which = 74;\n        event.keyCode = 74;\n        manager.command_mode();\n        manager.handle_keydown(event);\n    }\n}\n\nmpl.figure.prototype.handle_save = function(fig, msg) {\n    fig.ondownload(fig, null);\n}\n\n\nmpl.find_output_cell = function(html_output) {\n    // Return the cell and output element which can be found *uniquely* in the notebook.\n    // Note - this is a bit hacky, but it is done because the \"notebook_saving.Notebook\"\n    // IPython event is triggered only after the cells have been serialised, which for\n    // our purposes (turning an active figure into a static one), is too late.\n    var cells = IPython.notebook.get_cells();\n    var ncells = cells.length;\n    for (var i=0; i<ncells; i++) {\n        var cell = cells[i];\n        if (cell.cell_type === 'code'){\n            for (var j=0; j<cell.output_area.outputs.length; j++) {\n                var data = cell.output_area.outputs[j];\n                if (data.data) {\n                    // IPython >= 3 moved mimebundle to data attribute of output\n                    data = data.data;\n                }\n                if (data['text/html'] == html_output) {\n                    return [cell, data, j];\n                }\n            }\n        }\n    }\n}\n\n// Register the function which deals with the matplotlib target/channel.\n// The kernel may be null if the page has been refreshed.\nif (IPython.notebook.kernel != null) {\n    IPython.notebook.kernel.comm_manager.register_target('matplotlib', mpl.mpl_figure_comm);\n}\n",
       "text/plain": [
        "<IPython.core.display.Javascript object>"
       ]
      },
-     "metadata": {},
-     "output_type": "display_data"
+     "metadata": {}
     },
     {
+     "output_type": "display_data",
      "data": {
       "text/html": [
        "<img src=\"data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAABdwAAAH0CAYAAAAnhe8sAAAgAElEQVR4XuzdDdBdZXkv/CtKnKCvSGJrlQrahoSA1DGUDvLRIq3ThNI0HGmt7aslflDQSTWmVURHC9W2ehBsbPxqx0rp26FVKyihSTqtBUqAUqhoG76SDOkoULUnAaUQThDeWbsmKk+yedZ+1t73x/o9M87px9rrvu7ff527177Y7D3r8ccffzz8ESBAgAABAgQIECBAgAABAgQIECBAgAABAjMSmGXgPiM/LyZAgAABAgQIECBAgAABAgQIECBAgAABAgMBA3cPAgECBAgQIECAAAECBAgQIECAAAECBAgQ6EDAwL0DRLcgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIG7p4BAgQIECBAgAABAgQIECBAgAABAgQIECDQgYCBeweIbkGAAAECBAgQIECAAAECBAgQIECAAAECBAzcPQMECBAgQIAAAQIECBAgQIAAAQIECBAgQKADAQP3DhDdggABAgQIECBAgAABAgQIECBAgAABAgQIGLh7BggQIECAAAECBAgQIECAAAECBAgQIECAQAcCBu4dILoFAQIECBAgQIAAAQIECBAgQIAAAQIECBAwcPcMECBAgAABAgQIECBAgAABAgQIECBAgACBDgQM3DtAdAsCBAgQIECAAAECBAgQIECAAAECBAgQIGDg7hkgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIdCBi4d4DoFgQIECBAgAABAgQIECBAgAABAgQIECBAwMDdM0CAAAECBAgQIECAAAECBAgQIECAAAECBDoQMHDvANEtCBAgQIAAAQIECBAgQIAAAQIECBAgQICAgbtngAABAgQIECBAgAABAgQIECBAgAABAgQIdCBg4N4BolsQIECAAAECBAgQIECAAAECBAgQIECAAAEDd88AAQIECBAgQIAAAQIECBAgQIAAAQIECBDoQMDAvQNEtyBAgAABAgQIECBAgAABAgQIECBAgAABAgbungECBAgQIECAAAECBAgQIECAAAECBAgQINCBgIF7B4huQYAAAQIECBAgQIAAAQIECBAgQIAAAQIEDNw9AwQIECBAgAABAgQIECBAgAABAgQIECBAoAMBA/cOEN2CAAECBAgQIECAAAECBAgQIECAAAECBAgYuHsGCBAgQIAAAQIECBAgQIAAAQIECBAgQIBABwIG7h0gugUBAgQIECBAgAABAgQIECBAgAABAgQIEDBw9wwQIECAAAECBAgQIECAAAECBAgQIECAAIEOBAzcO0B0CwIECBAgQIAAAQIECBAgQIAAAQIECBAgYODuGSBAgAABAgQIECBAgAABAgQIECBAgAABAh0IGLh3gOgWBAgQIECAAAECBAgQIECAAAECBAgQIEDAwN0zQIAAAQIECBAgQIAAAQIECBAgQIAAAQIEOhAwcO8A0S0IECBAgAABAgQIECBAgAABAgQIECBAgICBu2eAAAECBAgQIECAAAECBAgQIECAAAECBAh0IGDg3gGiWxAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQN3zwABAgQIECBAgAABAgQIECBAgAABAgQIEOhAwMC9A0S3IECAAAECBAgQIECAAAECBAgQIECAAAECBu6eAQIECBAgQIAAAQIECBAgQIAAAQIECBAg0IGAgXsHiG5BgAABAgQIECBAgAABAgQIECBAgAABAgQM3D0DBAgQIECAAAECBAgQIECAAAECBAgQIECgAwED9w4Q3YIAAQIECBAgQIAAAQIECBAgQIAAAQIECBi4ewYIECBAgEALgRe+8IVx9NFHx7p161q8yqUECBAgQIAAAQIECBAgQIBAHwQM3PuQsj0SIECAQGcCBu6dUboRAQIECBAgQIAAAQIECBCoTsDAvbpIbYgAAQIEGoGHHnoonv70p3eOYeDeOakbEiBAgAABAgRmJDCuvm9GRXkxAQIECPRWwMC9t9HbOAECBOoROP/88+OCCy6IW265Jf7gD/4g/uEf/iHmzJkTV155ZXzwgx+MG2+8Mb7+9a/Hj/zIj8Txxx8f73//++MFL3jBXoBLLrkkXvva18YXv/jF+PSnPx2f+cxn4vHHH4+TTz451q5dG4cccsjea/c1cP/oRz8ab37zm+Nd73rXoA5/BAgQIECAAAEC4xHYX9933333xc033xy/93u/F9ddd93gwxdHHnlknHfeefHKV77yB4pp/vdve9vb4ktf+lI8+9nPjte97nWD3vCss86Ku+++O5p+zx8BAgQIEBhVwMB9VDmvI0CAAIFsBPa88WreKL3qVa+Kl7/85fHf//3fsXv37vi3f/u3WLx4ccybNy+aN2If+9jH4rbbbhv854d+6IcGe9gzcP/xH//xOO200+IXf/EX46tf/ergjdhLXvKSwSB+z9/3D9yboXxzzYc//OH4kz/5k1ixYkU2JgohQIAAAQIECNQosL++76CDDoqlS5fGcccdFytXroxnPetZ8Vd/9VeDPu9Tn/rU3j7tK1/5yuCahQsXxjvf+c7BvxH58Y9/fNAbbt++3cC9xofGnggQIDBhAQP3CYNbjgABAgS6F9jzxus973nP0E+Yf+c734ldu3YNPunefBK++VT69w/c3/SmN8VHPvKRvQVeeOGF8fa3v30wqH/uc587+J/vGbg3n4J/zWteE3//938ff/M3fxM/93M/1/3G3JEAAQIECBAgQOAHBPbX9zWfZj/wwAPjpptuigMOOGDva5YtWzb4tyC/9rWvxVOe8pTBp93/9m//djBc3/Phi8ceeyx+4id+YjB09wl3DxwBAgQIzFTAwH2mgl5PgAABAskF9rzx+vKXvxwvfvGL99bz4IMPxnvf+97BQLx5U9UM3Pf8nXPOOYNPuzd/ez7hvmHDhliyZMneazZu3Dj4pFTzlTTNJ6Gav2bg/rznPW/wX99zzz2DN2xHH310cgMFECBAgAABAgT6ILCvvm/r1q2xYMGCwVcJvuUtb/kBhj/90z+N5kMVzTC9Gco3H7xo+rovfOELP3Bd87WAzb0N3PvwFNkjAQIExitg4D5eX3cnQIAAgQkI7Hnj9Y1vfCN++Id/eO+Kv/RLvzT4Pvd3v/vd8VM/9VPR/KvGs2bNil/4hV8Y/KcZtDd/ewbu//Iv/xLHHnvs3tdfffXVccopp8Q//uM/xste9rLB/7wZuH/729+OnTt3Dr7n8xOf+MQEdmgJAgQIECBAgACBRmBffd+mTZvipJNOGgp07bXXxk//9E8PPv3e/HZPM4j//r/ma2Xe+MY3Grh7zAgQIEBgxgIG7jMmdAMCBAgQSC2w543XN7/5zb3/avADDzwQc+fOjd/93d8d/GfP3yOPPBLPeMYz4tWvfvXIA/fmE+2/8iu/Eq9//esH/2k+Kd/8K8r+CBAgQIAAAQIExiuwr77vzjvvjEWLFg1+IPUVr3jFPgs44ogj4pnPfOZ+P+G+574+4T7e/NydAAECfRAwcO9DyvZIgACBygX29cbrW9/61uDHsv7wD/8w3vGOd+wVaL6jvfkhrTPPPHNGA/d169bFZz/72fj1X//1wXeB/vmf/3k89alPrVza9ggQIECAAAECaQX21fc1FTU/gtp8rcxVV101tEDf4Z42P6sTIECgDwIG7n1I2R4JECBQucD+3nidfPLJ8e///u/xgQ98YPBVMNdcc0188pOfjIcffjiWL18+44F7w9p8h/sv//Ivx6mnnhqXXXZZPO1pT6tc2/YIECBAgAABAukE9tf3NV8B2PRjTf+3YsWK+NEf/dHYsWNH3H777fGv//qv0fzgffPX/ObPS1/60sGA/l3vetfgh1abr5PZvHlz/Md//MfgP4cddli6DVqZAAECBIoXMHAvPkIbIECAAIH9vfFqftS0+eGsL37xi/Hoo4/GiSeeOPgxrdNOO23wneyjfod785UyzSfc9/w13/W+bNmywXeHfu5znxu8cfNHgAABAgQIECDQvcD++r5mpa985Svx+7//+9H0Zs3v7Tz72c+Oo446avBvI5599tl7i7nuuuvid37nd+LWW28dfAXha17zmsHXEp577rlx//33D/4tSX8ECBAgQGBUAQP3UeW8jgABAgQIECBAgAABAgQIEKhC4Od//udj+/btcdddd1WxH5sgQIAAgXQCBu7p7K1MgAABAgQIECBAgAABAgQITFhg9erVsXjx4jj00EMHXzvzl3/5l4N/S7H56sHXve51E67GcgQIECBQm4CBe22J2g8BAgQIECBAgAABAgQIECCwX4HmKwe/8IUvxH/+53/GrFmzBl87s2rVqnj1q19NjQABAgQIzFjAwH3GhG5AgAABAgQIECBAgAABAgQIECBAgAABAgQiDNw9BQQIECBAgAABAgQIECBAgAABAgQIECBAoAMBA/cOEN2CAAECBAgQIECAAAECBAgQIECAAAECBAgYuHsGCBAgQIAAAQIECBAgQIAAAQIECBAgQIBABwIG7h0gukUZAo899ljce++98cxnPnPwwzj+CBAgQIAAAQJPJvD444/Ht7/97TjkkEPiKU95ypNd7n+fSECflwjesgQIECBAoGABfV7B4WVeuoF75gEprzuBr33ta3HooYd2d0N3IkCAAAECBHoj8NWvfjWe//zn92a/pW1Un1daYuolQIAAAQL5COjz8smilkoM3GtJ0j6eVOCBBx6Igw8+OJqD9KCDDnrS611AgAABAgQIEPjWt741+Af2999/fzzrWc8CkqmAPi/TYJRFgAABAgQyFtDnZRxO4aUZuBceoPKnL9AcpM0b5eYNmYH79N1cSYAAAQIE+iygfygjfTmVkZMqCRAgQIBATgL6h5zSqKsWA/e68rSbIQIOUo8HAQIECBAg0FZA/9BWLM31ckrjblUCBAgQIFCygP6h5PTyrt3APe98VNehgIO0Q0y3IkCAAAECPRHQP5QRtJzKyEmVBAgQIEAgJwH9Q05p1FWLgXtdedrNEAEHqceDAAECBAgQaCugf2grluZ6OaVxtyoBAgQIEChZQP9Qcnp5127gnnc+qutQwEHaIaZbESBAgACBngjoH8oIWk5l5KRKAgQIECCQk4D+Iac06qrFwL2uPO1miICD1ONBgAABAgQItBXQP7QVS3O9nNK4W5UAAQIECJQsoH8oOb28azdwzzsf1XUo4CDtENOtCBAgQIBATwT0D2UELacyclIlAQIECBDISUD/kFMaddVi4F5XnnYzRMBB6vEgQIAAAQIE2groH9qKpbleTmncrUqAAAECBEoW0D+UnF7etRu4552P6joUcJB2iOlWBAgQIECgJwL6hzKCllMZOamSAAECBAjkJKB/yCmNumoxcK8rT7sZIuAg9XgQIECAAAECbQX0D23F0lwvpzTuViVAgAABAiUL6B9KTi/v2g3c885HdR0KOEg7xHQrAgQIECDQEwH9QxlBy6mMnFRJgAABAgRyEtA/5JRGXbUYuNeVp90MEXCQejwIECBAgACBtgL6h7Ziaa6XUxp3qxIgQIAAgZIF9A8lp5d37Qbueeejug4FHKQdYroVAQIECBDoiYD+oYyg5VRGTqokQIAAAQI5CegfckqjrloM3OvK026GCDhIPR4ECBAgQIBAWwH9Q1uxNNfLKY27VQkQIECAQMkC+oeS08u7dgP3vPNRXYcCDtIOMd2KAAECBAj0RED/UEbQciojJ1USIECAAIGcBPQPOaVRVy0G7nXlWcRurr322rjwwgvjlltuifvuuy8uv/zyOP3006dV+6ZNm+Lkk0+Oo48+Om699dZpvWbPRQ7SVlwuJkCAAAECBCJC/9DuMdDntfNyNQECBAgQIJBOQJ+Xzr72lQ3ca084w/2tX78+msH5McccE2eccca0B+4PPPDA4DWHH354fP3rXzdwzzBbJREgQIAAgdoEvBFrl6g+r52XqwkQIECAAIF0Avq8dPa1r2zgXnvCme9v1qxZ0x64v+pVr4oFCxbEU5/61LjiiisM3DPPVnn9Edi1a3ds2LA1tm3bGfPnz42lSw+POXNm9wfATgkQqFrAG7HR4629z9sdu2NL3BU7Y2fMjbmxIBbG7PB//0Z/YrySAAECBAhMVkCfN1nvPq1m4N6ntDPc63TfiH3qU5+Kj370o3HDDTfE+973vmkN3B955JFo/rPnrzlIDz300Gg+KX/QQQdlqKEkAuUJNMP2lSvXD4bte/6aofvatacaupcXp4oJENiHgDdioz8WNfd5zbD9qrgydsSOvUDzYl6cFssM3Ud/ZLySAAECBAhMVECfN1HuXi1m4N6ruPPb7HTeiG3ZsiVOOumk+Kd/+qdYuHBhnH/++dMauDfXXXDBBVM2beCe33OgonIFrrji9liz5qYpG1i16rhYvnxRuRtTOQECBL4r4I3Y6I9CzX3ebbE5bowbpuAcHyfEkXHU6GheSYAAAQIECExMQJ83MereLWTg3rvI89rwk70R+853vhMvfelL4/Wvf32cc845g+KnO3D3Cfe8slZNnQIXXXR9rFu3Zcrmli1bGKtXH1/npu2KAIFeCXgjNnrcNfd5m+K6uDPumIJzRCyKE+Ok0dG8kgABAgQIEJiYgD5vYtS9W8jAvXeR57XhJ3sjdv/998fcuXMH39u+5++xxx6Lxx9/fPA/+7u/+7v42Z/92WltykE6LSYXEWgl4BPurbhcTIBAgQL6h9FDq7nP8wn30Z8LryRAgAABArkI6PNySaK+Ogzc68u0qB092RuxZrh+2223/cCemu9y/+IXvxif/exn48d+7MfiGc94xrT27CCdFpOLCLQS8B3urbhcTIBAgQL6h9FDq7nP8x3uoz8XXkmAAAECBHIR0OflkkR9dRi415dp9jt68MEHY+vWrYM6Fy9eHBdffHGccsopMW/evDjssMPivPPOi3vuuScuvfTSfe5lul8p88QXO0izfzQUWKhAM3TfuHHb4IdTmx9MXbJkvh9MLTRLZRMgMFVA/9DuqehTn9cM3bfGlsEPpzY/mHp4LPCDqe0eF1cTIECAAIGkAvq8pPxVL27gXnW8eW7u6quvHgzYn/h35plnxiWXXBIrVqyI7du3R3Pdvv4M3PPMVVUECBAgQKBGAW/E2qWqz2vn5WoCBAgQIEAgnYA+L5197SsbuNeesP3tFXCQehgIECBAgACBtgL6h7Ziaa6XUxp3qxIgQIAAgZIF9A8lp5d37Qbueeejug4FHKQdYroVAQIECBDoiYD+oYyg5VRGTqokQIAAAQI5CegfckqjrloM3OvK026GCDhIPR4ECBAgQIBAWwH9Q1uxNNfLKY27VQkQIECAQMkC+oeS08u7dgP3vPNRXYcCDtIOMd2KAAECBAj0RED/UEbQciojJ1USIECAAIGcBPQPOaVRVy0G7nXlaTdDBBykHg8CBAgQIECgrYD+oa1YmuvllMbdqgQIECBAoGQB/UPJ6eVdu4F73vmorkMBB2mHmG5FgAABAgR6IqB/KCNoOZWRkyoJECBAgEBOAvqHnNKoqxYD97rytJshAg5SjwcBAgQIECDQVkD/0FYszfVySuNuVQIECBAgULKA/qHk9PKu3cA973xU16GAg7RDTLciQIAAAQI9EdA/lBG0nMrISZUECBAgQCAnAf1DTmnUVYuBe1152s0QAQepx4MAAQIECBBoK6B/aCuW5no5pXG3KgECBAgQKFlA/1ByennXbuCedz6q61DAQdohplsRIECAAIGeCOgfyghaTmXkpEoCBAgQIJCTgP4hpzTqqsXAva487WaIgIPU40GAAAECBAi0FdA/tBVLc72c0rhblQABAgQIlCygfyg5vbxrN3DPOx/VdSjgIO0Q060IECBAgEBPBPQPZQQtpzJyUiUBAgQIEMhJQP+QUxp11WLgXleedjNEwEHq8SBAoEaBXbt2x4YNW2Pbtp0xf/7cWLr08JgzZ3aNW7UnAkkE9A9J2FsvmiKn3bE7tsRdsTN2xtyYGwtiYcwO52/r8LyAAAECBAgkEkjRPyTaqmUnLGDgPmFwy6UTcJCms7cyAQLjEWiG7StXrh8M2/f8NUP3tWtPNXQfD7m79lBA/1BG6JPOqRm2XxVXxo7YsRdoXsyL02KZoXsZj4wqCRAgQIBATLp/QN4fAQP3/mTd+506SHv/CAAgUJ3AFVfcHmvW3DRlX6tWHRfLly+qbr82RCCFgP4hhXr7NSed022xOW6MG6YUenycEEfGUe034BUECBAgQIDAxAUm3T9MfIMWTCZg4J6M3sKTFnCQTlrcegQIjFvgoouuj3XrtkxZZtmyhbF69fHjXt79CfRCQP9QRsyTzmlTXBd3xh1TcI6IRXFinFQGmioJECBAgEDPBSbdP/Scu1fbN3DvVdz93qyDtN/52z2BGgV8wr3GVO0pNwH9Q26J7LueSefkE+5lPBeqJECAAAECwwQm3T9Ioz8CBu79ybr3O3WQ9v4RAECgOgHf4V5dpDaUoYD+IcNQ9lHSpHPyHe5lPBeqJECAAAECBu6egRQCBu4p1K2ZRGDSb8SSbNKiBAj0TqAZum/cuG3ww6nND6YuWTLfD6b27imw4XEK6B/GqdvdvVPk1Azdt8aWwQ+nNj+Yengs8IOp3UXqTgQIECBAYOwCKfqHsW/KAlkIGLhnEYMiJiHgIJ2EsjUIECBAgEBdAvqHMvKUUxk5qZIAAQIECOQkoH/IKY26ajFwrytPuxki4CD1eBAgQIAAAQJtBfQPbcXSXC+nNO5WJUCAAAECJQvoH0pOL+/aDdzzzkd1HQo4SDvEdCsCBAgQINATAf1DGUHLqYycVEmAAAECBHIS0D/klEZdtRi415Wn3QwRcJB6PAgQIECAAIG2AvqHtmJprpdTGnerEiBAgACBkgX0DyWnl3ftBu5556O6DgUcpB1iuhUBAgQIEOiJgP6hjKDlVEZOqiRAgAABAjkJ6B9ySqOuWgzc68rTboYIOEg9HgQIECBAgEBbAf1DW7E018spjbtVCRAgQIBAyQL6h5LTy7t2A/e881FdhwIO0g4x3YoAAQIECPREQP9QRtByKiMnVRIgQIAAgZwE9A85pVFXLQbudeVpN0MEHKQeDwIECBAgQKCtgP6hrVia6+WUxt2qBAgQIECgZAH9Q8np5V27gXve+aiuQwEHaYeYbkWAAAECBHoioH8oI2g5lZGTKgkQIECAQE4C+oec0qirFgP3uvK0myECDlKPBwECBAgQINBWQP/QVizN9XJK425VAgQIECBQsoD+oeT08q7dwD3vfFTXoYCDtENMtyJAgAABAj0R0D+UEbScyshJlQQIECBAICcB/UNOadRVi4F7XXnazRABB6nHgwABAgQIEGgroH9oK5bmejmlcbcqAQIECBAoWUD/UHJ6eddu4J53PqrrUMBB2iGmWxEgQIAAgZ4I6B/KCFpOZeSkSgIECBAgkJOA/iGnNOqqxcC9rjztZoiAg9TjQYAAAQIECLQV0D+0FUtzvZzSuFuVAAECBAiULKB/KDm9vGs3cM87H9V1KOAg7RDTrQgQIECAQE8E9A9lBC2nMnJSJQECBAgQyElA/5BTGnXVYuBeV552M0TAQerxIECAAAECBNoK6B/aiqW5Xk5p3K1KgAABAgRKFtA/lJxe3rUbuOedj+o6FHCQdojpVgQIECBAoCcC+ocygpZTGTmpkgABAgQI5CSgf8gpjbpqMXCvK0+7GSLgIPV4ECBAgAABAm0F9A9txdJcL6c07lYlQIAAAQIlC+gfSk4v79oN3PPOR3UdCjhIO8R0KwIECBAg0BMB/UMZQcupjJxUSYAAAQIEchLQP+SURl21GLjXlafdDBFwkHo8CBAgQIAAgbYC+oe2Ymmul1Mad6sSIECAAIGSBfQPJaeXd+0G7nnno7oOBRykHWK6FQECBAgQ6ImA/qGMoOVURk6qJECAAAECOQnoH3JKo65aDNzrytNuhgg4SD0eBAgQIECAQFsB/UNbsTTXyymNu1UJECBAgEDJAvqHktPLu3YD97zzUV2HAg7SDjHdigABAgQI9ERA/1BG0HIqIydVEiBAgACBnAT0DzmlUVctBu515Wk3QwQcpB4PAgQIECBAoK2A/qGtWJrr5ZTG3aoECBAgQKBkAf1DyenlXbuBe975qK5DAQdph5huRYAAAQIEeiKgfygjaDmVkZMqCRAgQIBATgL6h5zSqKsWA/e68rSbIQIOUo8HAQIECBAg0FZA/9BWLM31ckrjblUCBAgQIFCygP6h5PTyrt3APe98VNehgIO0Q0y3IkCAAAECPRHQP5QRtJzKyEmVBAgQIEAgJwH9Q05p1FWLgXtdedrNEAEHqceDAAECBAgQaCugf2grluZ6OaVxtyoBAgQIEChZQP9Qcnp5127gnnc+qutQwEHaIaZbESBAgACBngjoH8oIWk5l5KRKAgQIECCQk4D+Iac06qrFwL2uPO1miICD1ONBgAABAgQItBXQP7QVS3O9nNK4W5UAAQIECJQsoH8oOb28azdwzzsf1XUo4CDtENOtCBAgQIBATwT0D2UELacyclIlAQIECBDISUD/kFMaddVi4F5XnnYzRMBB6vEgQIAAAQIE2groH9qKpbleTmncrUqAAAECBEoW0D+UnF7etRu4552P6joUcJB2iOlWBAgQIECgJwL6hzKCllMZOamSAAECBAjkJKB/yCmNumoxcK8rT7sZIuAg9XgQIECAAAECbQX0D23F0lwvpzTuViVAgAABAiUL6B9KTi/v2g3c885HdR0KOEg7xHQrAgQIECDQEwH9QxlBy6mMnFRJgAABAgRyEtA/5JRGXbUYuNeVp90MEXCQejwIECBAgACBtgL6h7Ziaa6XUxp3qxIgQIAAgZIF9A8lp5d37Qbueeejug4FHKQdYroVAQIECBDoiYD+oYyg5VRGTqokQIAAAQI5CegfckqjrloM3OvK026GCDhIPR4ECBAgQIBAWwH9Q1uxNNfLKY27VQkQIDWkVQUAACAASURBVECAQMkC+oeS08u7dgP3vPNRXYcCDtIOMd2KAAECBAj0RED/UEbQciojJ1USIECAAIGcBPQPOaVRVy0G7nXlaTdDBBykHg8CBAgQIECgrYD+oa1YmuvllMbdqgQIECBAoGQB/UPJ6eVdu4F73vmorkMBB2mHmG5FgAABAgR6IqB/KCNoOZWRkyoJECBAgEBOAvqHnNKoqxYD97rytJshAg5SjwcBAgQIECDQVkD/0FYszfVySuNuVQIECBAgULKA/qHk9PKu3cA973xU16GAg7RDTLciQIAAAQI9EdA/lBG0nMrISZUECBAgQCAnAf1DTmnUVYuBe1152s0QAQepx4MAAQIECBBoK6B/aCuW5no5pXG3KgECBAgQKFlA/1ByennXbuCedz6q61DAQdohplsRIECAAIGeCOgfyghaTmXkpEoCBAgQIJCTgP4hpzTqqsXAva487WaIgIPU40GAAAECBAi0FdA/tBVLc72c0rhblQABAgQIlCygfyg5vbxrN3DPOx/VdSjgIO0Q060IECBAgEBPBPQPZQQtpzJyUiUBAgQIEMhJQP+QUxp11WLgXleedjNEwEHq8SBAgAABAgTaCugf2oqluV5OadytSoAAAQIEShbQP5ScXt61G7jnnY/qOhRwkHaI6VYECBAgQKAnAvqHMoKWUxk5qZIAAQIECOQkoH/IKY26ajFwrytPuxki4CD1eBAgQIAAAQJtBfQPbcXSXC+nNO5WJUCAAAECJQvoH0pOL+/aDdzzzkd1HQo4SDvEdCsCBAgQINATAf1DGUHLqYycVEmAAAECBHIS0D/klEZdtRi415Wn3QwRcJB6PAgQIECAAIG2AvqHtmJprpdTGnerEiBAgACBkgX0DyWnl3ftBu5556O6DgUcpB1iuhUBAgQIEOiJgP6hjKDlVEZOqiRAgAABAjkJ6B9ySqOuWgzc68rTboYIOEg9HgQIECBAgEBbAf1DW7E018spjbtVCRAgQIBAyQL6h5LTy7t2A/e881FdhwIO0g4x3YoAAQIECPREQP9QRtByKiMnVRIgQIAAgZwE9A85pVFXLQbudeVZxG6uvfbauPDCC+OWW26J++67Ly6//PI4/fTT91v7ddddF+eee27ccccd8dBDD8ULXvCCOPvss+Otb31rq/06SFtxuZgAAQIECBCICP1Du8dAn9fOy9UECBAgQIBAOgF9Xjr72lc2cK894Qz3t379+ti0aVMcc8wxccYZZzzpwP1LX/rSYNj+4he/OJ7xjGdEM4BvBu4f+tCH4jd/8zenvUMH6bSpXEiAAAECBAh8V0D/0O5R0Oe183I1AQIECBAgkE5An5fOvvaVDdxrTzjz/c2aNetJB+772sIrXvGKwfD9L/7iL6a9QwfptKlcSIAAAQIECBi4z/gZ0OfNmNANCBAgQIAAgTEKmBONEbfntzZw7/kDkHr7o7wRaz7xfuqpp8b73ve+eMMb3jDtLThIp03lQgIECBAgQMDAfcbPgD5vxoRuQIAAAQIECIxRwJxojLg9v7WBe88fgNTbb/NG7PnPf35885vfjEcffTTOP//8ePe73z20/EceeSSa/+z5aw7SQw89NB544IE46KCDUm/d+gQIECBAgEABAt6IjR6SPm90O68kQIAAAQIExi+gzxu/cV9XMHDva/KZ7LvNG7G77747HnzwwbjxxhvjHe94R6xduzZ+7dd+bb87aYbyF1xwwZT/vYF7JuErgwABAgQIFCDgjdjoIenzRrfzSgIECBAgQGD8Avq88Rv3dQUD974mn8m+27wR+/6Sm6+Tab6//c4779zvTnzCPZOQlUGAAAECBAoW8EZs9PD0eaPbeSUBAgQIECAwfgF93viN+7qCgXtfk89k36O+EXvve98bn/zkJ2P79u3T3omDdNpULiRAgAABAgS+K6B/GP1R0OeNbueVBAgQIECAwPgF9HnjN+7rCgbufU0+4b6br4XZunXroILFixfHxRdfHKecckrMmzcvDjvssDjvvPPinnvuiUsvvXRwzUc+8pHB/3zRokWD//66666LVatWxW/91m8Nfjh1un8O0ulKuY4AAQIECBDYI6B/aPcs6PPaebmaAAECBAgQSCegz0tnX/vKBu61J5zh/q6++urBgP2Jf2eeeWZccsklsWLFisEn15vrmr8//uM/jk984hPRfIf7AQccEPPnz4+zzjorzj777HjKU54y7R06SKdN5UICBAgQIEDguwL6h3aPgj6vnZerCRAgQIAAgXQC+rx09rWvbOBee8L2t1fAQephIECAAAECBNoK6B/aiqW5Xk5p3K1KgAABAgRKFtA/lJxe3rUbuOedj+o6FHCQdojpVgQIECBAoCcC+ocygpZTGTmpkgABAgQI5CSgf8gpjbpqMXCvK0+7GSLgIPV4ECBAgAABAm0F9A9txdJcL6c07lYlQIAAAQIlC+gfSk4v79oN3PPOR3UdCjhIO8R0KwIECBAg0BMB/UMZQcupjJxUSYAAAQIEchLQP+SURl21GLjXlafdDBFwkHo8CBAgQIAAgbYC+oe2Ymmul1Mad6sSIECAAIGSBfQPJaeXd+0G7nnno7oOBRykHWK6FQECBAgQ6ImA/qGMoOVURk6qJECAAAECOQnoH3JKo65aDNzrytNuhgg4SD0eBAgQIECAQFsB/UNbsTTXyymNu1UJECBAgEDJAvqHktPLu3YD97zzUV2HAg7SDjHdigABAgQI9ERA/1BG0HIqIydVEiBAgACBnAT0DzmlUVctBu515Wk3QwQcpB4PAgQIECBAoK2A/qGtWJrr5ZTG3aoECBAgQKBkAf1DyenlXbuBe975qK5DAQdph5huRYAAAQIEeiKgfygjaDmVkZMqCRAgQIBATgL6h5zSqKsWA/e68rSbIQIOUo8HAQIECBAg0FZA/9BWLM31ckrjblUCBAgQIFCygP6h5PTyrt3APe98VNehgIO0Q0y3IkCAAAECPRHQP5QRtJzKyEmVBAgQIEAgJwH9Q05p1FWLgXtdedrNEAEHqceDAAECBAgQaCugf2grluZ6OaVxtyoBAgQIEChZQP9Qcnp5127gnnc+qutQwEHaIWZmt9q1a3ds2LA1tm3bGfPnz42lSw+POXNmZ1alcggQIECgRAH9QxmplZbT7tgdW+Ku2Bk7Y27MjQWxMGaH3qWMp02VBAgQIFCLQGn9Qy3ufdiHgXsfUrbHgYCDtM4HoRm2r1y5fjBs3/PXDN3Xrj3V0L3OyO2KAAECExXQP0yUe+TFSsqpGbZfFVfGjtixd7/zYl6cFssM3Ud+AryQAAECBAi0Fyipf2i/O69IKWDgnlLf2hMVcJBOlHtii11xxe2xZs1NU9Zbteq4WL580cTqsBABAgQI1Cmgfygj15Jyui02x41xwxTY4+OEODKOKgNclQQIECBAoAKBkvqHCrh7tQUD917F3e/NOkjrzP+ii66Pdeu2TNncsmULY/Xq4+vctF0RIECAwMQE9A8To57RQiXltCmuizvjjin7PSIWxYlx0owcvJgAAQIECBCYvkBJ/cP0d+XKHAQM3HNIQQ0TEXCQToR54ov4hPvEyS1IgACBXgnoH8qIu6ScfMK9jGdKlQQIECBQv0BJ/UP9adS1QwP3uvK0myECDtI6H4/mO9zf+Mar4uab742HH340DjzwgDj22EPiYx87zXe41xm5XREgQGCiAvqHiXKPvFhJOe35Dvf/iv+Kh+PhaP775odTfzV+LZ4eTx/ZwAsJECBAgACBdgIl9Q/tdubq1AIG7qkTsP7EBBykE6Oe6ELNwP2cc9bFzTffFw8/vDsOPHB2HHvs8+LjH/9FA/eJJmExAgQI1Cmgfygj19Jyeigeik/HX8XO2Dn4odQD48B4djzbD6eW8bipkgABAgQqESitf6iEvRfbMHDvRcw22Qg4SOt8DnylTJ252hUBAgRyEdA/5JLE8DpKy8nXypTxXKmSAAECBOoWKK1/qDuNunZn4F5XnnYzRMBBWufj4UdT68zVrggQIJCLgP4hlyTqGrj74dQynitVEiBAgEDdAvq8uvNNuTsD95T61p6ogIN0otwTW8wn3CdGbSECBAj0UkD/UEbspeXkE+5lPFeqJECAAIG6BUrrH+pOo67dGbjXlafdDBFwkNb5eDTf4b5y5frYtm3n3g3Onz831q491Xe41xm5XREgQGCiAvqHiXKPvFhpOe354dQdsWPvnufFPN/hPvIT4IUECBAgQKC9QGn9Q/sdekUqAQP3VPLWnbiAg3Ti5BNbsBm6b9y4bTB0b4btS5bMN2yfmL6FCBAgULeA/qGMfEvMqRm6b40t0Qzdm2H74bFg8AOq/ggQIECAAIHJCJTYP0xGxiozFTBwn6mg1xcj4CAtJiqFEiBAgACBbAT0D9lEMbQQOZWRkyoJECBAgEBOAvqHnNKoqxYD97rytJshAg5SjwcBAgQIECDQVkD/0FYszfVySuNuVQIECBAgULKA/qHk9PKu3cA973xU16GAg7RDTLciQIAAAQI9EdA/lBG0nMrISZUECBAgQCAnAf1DTmnUVYuBe1152s0QAQepx4MAAQIECBBoK6B/aCuW5no5pXG3KgECBAgQKFlA/1ByennXbuCedz6q61DAQdohplsRIECAAIGeCOgfyghaTmXkpEoCBAgQIJCTgP4hpzTqqsXAva487WaIgIPU40GAAAECBAi0FdA/tBVLc72c0rhblQABAgQIlCygfyg5vbxrN3DPOx/VdSjgIO0Q060IECBAgEBPBPQPZQQtpzJyUiUBAgQIEMhJQP+QUxp11WLgXleedjNEwEHq8SBAgAABAgTaCugf2oqluV5OadytSoAAAQIEShbQP5ScXt61G7jnnY/qOhRwkHaI6VYECBAgQKAnAvqHMoKWUxk5qZIAAQIECOQkoH/IKY26ajFwrytPuxki4CD1eBAgQIAAAQJtBfQPbcXSXC+nNO5WJUCAAAECJQvoH0pOL+/aDdzzzkd1HQo4SDvEdCsCBAgQINATAf1DGUHLqYycVEmAAAECBHIS0D/klEZdtRi415Wn3QwRcJB6PAgQIECAAIG2AvqHtmJprpdTGnerEiBAgACBkgX0DyWnl3ftBu5556O6DgUcpB1iuhUBAgQIEOiJgP6hjKDlVEZOqiRAgAABAjkJ6B9ySqOuWgzc68rTboYIOEg9HgQIECBAgEBbAf1DW7E018spjbtVCRAgQIBAyQL6h5LTy7t2A/e881FdhwIO0g4x3YoAAQIECPREQP9QRtByKiMnVRIgQIAAgZwE9A85pVFXLQbudeVpN0MEHKQeDwIECBAgQKCtgP6hrVia6+WUxt2qBAgQIECgZAH9Q8np5V27gXve+aiuQwEHaYeYbkWAAAECBHoioH8oI2g5lZGTKgkQIECAQE4C+oec0qirFgP3uvK0myECDlKPBwECBAgQINBWQP/QVizN9XJK425VAgQIECBQsoD+oeT08q7dwD3vfFTXoYCDtENMtyJAgAABAj0R0D+UEbScyshJlQQIECBAICcB/UNOadRVi4F7XXnazRABB6nHgwABAgQIEGgroH9oK5bmejmlcbcqAQIECBAoWUD/UHJ6eddu4J53PqrrUMBB2iGmWxEgQIAAgZ4I6B/KCFpOZeSkSgIECBAgkJOA/iGnNOqqxcC9rjztZoiAg9TjQYAAAQIECLQV0D+0FUtzvZzSuFuVAAECBAiULKB/KDm9vGs3cM87H9V1KOAg7RDTrQgQIECAQE8E9A9lBC2nMnJSJQECBAgQyElA/5BTGnXVYuBeV552M0TAQerxIECAAAECBNoK6B/aiqW5Xk5p3K1KgAABAgRKFtA/lJxe3rUbuOedj+o6FHCQdojpVgQIECBAoCcC+ocygpZTGTmpkgABAgQI5CSgf8gpjbpqMXCvK0+7GSLgIPV4ECBAgAABAm0F9A9txdJcL6c07lYlQIAAAQIlC+gfSk4v79oN3PPOR3UdCjhIO8R0KwIECBAg0BMB/UMZQcupjJxUSYAAAQIEchLQP+SURl21GLjXlafdDBFwkHo8CBAgQIAAgbYC+oe2Ymmul1Mad6sSIECAAIGSBfQPJaeXd+0G7nnno7oOBRykHWK6FQECBAgQ6ImA/qGMoOVURk6qJECAAAECOQnoH3JKo65aDNzrytNuhgg4SD0eBAgQIECAQFsB/UNbsTTXyymNu1UJECBAgEDJAvqHktPLu3YD97zzUV2HAg7SDjHdigABAgQI9ERA/1BG0HIqIydVEiBAgACBnAT0DzmlUVctBu515Wk3QwQcpB4PAgQIECBAoK2A/qGtWJrr5ZTG3aoECBAgQKBkAf1DyenlXbuBe975qK5DAQdph5huRYAAAQIEeiKgfygjaDmVkZMqCRAgQIBATgL6h5zSqKsWA/e68rSbIQIOUo8HAQIECBAg0FZA/9BWLM31ckrjblUCBAgQIFCygP6h5PTyrt3APe98VNehgIO0Q0y3IkCAAAECPRHQP5QRtJzKyEmVBAgQIEAgJwH9Q05p1FWLgXtdedrNEAEHqceDAAECBAgQaCugf2grluZ6OaVxtyoBAgQIEChZQP9Qcnp5127gnnc+qutQwEHaIaZbESBAgACBngjoH8oIWk5l5KRKAgQIECCQk4D+Iac06qrFwL2uPO1miICD1ONBgAABAgQItBXQP7QVS3O9nNK4W5UAAQIECJQsoH8oOb28azdwzzsf1XUo4CDtENOtCBAgQIBATwT0D2UELacyclIlAQIECBDISUD/kFMaddVi4F5XnnYzRMBB6vEgQIAAAQIE2groH9qKpbleTmncrUqAAAECBEoW0D+UnF7etRu4552P6joUcJB2iOlWBAgQIECgJwL6hzKCllMZOamSAAECBAjkJKB/yCmNumoxcK8rT7sZIuAg9XgQIECAAAECbQX0D23F0lwvpzTuViVAgAABAiUL6B9KTi/v2g3c885HdR0KOEg7xHQrAgQIECDQEwH9QxlBy6mMnFRJgAABAgRyEtA/5JRGXbUYuNeVp90MEXCQejwIECBAgACBtgL6h7Ziaa6XUxp3qxIgQIAAgZIF9A8lp5d37Qbueeejug4FHKQdYroVAQIECBDoiYD+oYyg5VRGTqokQIAAAQI5CegfckqjrloM3OvK026GCDhIPR4ECBAgQIBAWwH9Q1uxNNfLKY27VQkQIECAQMkC+oeS08u7dgP3vPNRXYcCDtIOMd2KAAECBAj0RED/UEbQciojJ1USIECAAIGcBPQPOaVRVy0G7nXlaTdDBBykHg8CBAgQIECgrYD+oa1YmuvllMbdqgQIECBAoGQB/UPJ6eVdu4F73vmorkMBB2mHmG5FgAABAgR6IqB/KCNoOZWRkyoJECBAgEBOAvqHnNKoqxYD97ryLGI31157bVx44YVxyy23xH333ReXX355nH766fut/XOf+1x87GMfi1tvvTUeeeSReNGLXhTnn39+LFmypNV+HaStuFxMgAABAgQIRIT+od1joM9r5+VqAgQIECBAIJ2APi+dfe0rG7jXnnCG+1u/fn1s2rQpjjnmmDjjjDOedOC+atWqOOSQQ+KUU06Jgw8+OD71qU/FBz/4wfjnf/7nWLx48bR36CCdNpULCRAgQIAAge8K6B/aPQr6vHZeriZAgAABAgTSCejz0tnXvrKBe+0JZ76/WbNmPenAfV9baD7l/qu/+qvxnve8Z9o7dJBOm8qFBAgQIECAgIH7jJ8Bfd6MCd2AAAECBAgQGKOAOdEYcXt+awP3nj8Aqbc/yhuxxx57LF74whfG29/+9li5cuW0t+AgnTaVCwkQIECAAAED9xk/A/q8GRO6AQECBAgQIDBGAXOiMeL2/NYG7j1/AFJvf5Q3Ys33v7///e+P22+/PZ7znOfsdwvN9703/9nz1xykhx56aDzwwANx0EEHpd669QkQIECAAIECBLwRGz0kfd7odl5JgAABAgQIjF9Anzd+476uYODe1+Qz2XfbN2KXXXZZvOENb4jPf/7z8fKXv3zoLpofVr3gggumXGPgnkn4yiBAgAABAgUIeCM2ekj6vNHtvJIAAQIECBAYv4A+b/zGfV3BwL2vyWey7zZvxP76r/86Xvva18ZnPvOZOO200550Bz7h/qRELiBAgAABAgSeRMAbsdEfEX3e6HZeSYAAAQIECIxfQJ83fuO+rmDg3tfkM9n3dN+INZ9sf93rXhfN/3v66aePVL2DdCQ2LyJAgAABAr0W0D+MHr8+b3Q7ryRAgAABAgTGL6DPG79xX1cwcO9r8gn3/eCDD8bWrVsHFSxevDguvvjiOOWUU2LevHlx2GGHxXnnnRf33HNPXHrppYNrmiH7b/zGb8SaNWviFa94xd7KDzzwwHjWs5417Z04SKdN5UICBAgQIEDguwL6h3aPgj6vnZerCRAgQIAAgXQC+rx09rWvbOBee8IZ7u/qq68eDNif+HfmmWfGJZdcEitWrIjt27dHc13z97KXvSyuueaa/V4/3S06SKcr5ToCBAgQIEBgj4D+od2zoM9r5+VqAgQIECBAIJ2APi+dfe0rG7jXnrD97RVwkHoYCBAgQIAAgbYC+oe2Ymmul1Mad6sSIECAAIGSBfQPJaeXd+0G7nnno7oOBRykHWK6FQECBAgQ6ImA/qGMoOVURk6qJECAAAECOQnoH3JKo65aDNzrytNuhgg4SD0eBMYjsGvX7tiwYWts27Yz5s+fG0uXHh5z5swez2LuSoAAgQkL6B8mDD7icrXltDt2x5a4K3bGzpgbc2NBLIzZ4f+2jvh4eBkBAgQIENinQG39g5jzETBwzycLlYxZwEE6ZmC376VAM2xfuXL9YNi+568Zuq9de6qhey+fCJsmUJ+A/qGMTGvKqRm2XxVXxo7YsRd/XsyL02KZoXsZj6MqCRAgQKAQgZr6h0LIe1OmgXtvorZRB6lngED3AldccXusWXPTlBuvWnVcLF++qPsF3ZEAAQITFtA/TBh8xOVqyum22Bw3xg1TJI6PE+LIOGpEIS8jQIAAAQIEnihQU/8g3bwEDNzzykM1YxRwkI4R1617K3DRRdfHunVbpux/2bKFsXr18b11sXECBOoR0D+UkWVNOW2K6+LOuGMK/BGxKE6Mk8oIRJUECBAgQKAAgZr6hwK4e1WigXuv4u73Zh2k/c7f7scj4BPu43F1VwIE8hHQP+STxbBKasrJJ9zLeOZUSYAAAQLlC9TUP5SfRl07MHCvK0+7GSLgIPV4EOhewHe4d2/qjgQI5CWgf8grj/1VU1NOvsO9jGdOlQQIECBQvkBN/UP5adS1AwP3uvK0GwN3zwCBiQs0Q/eNG7cNfji1+cHUJUvm+8HUiadgQQIExiXgjdi4ZLu9b205NUP3rbFl8MOpzQ+mHh4L/GBqt4+MuxEgQIAAgaitfxBpPgIG7vlkoZIxCzhIxwzs9gQIECBAoEIB/UMZocqpjJxUSYAAAQIEchLQP+SURl21GLjXlafdDBFwkHo8CBAgQIAAgbYC+oe2Ymmul1Mad6sSIECAAIGSBfQPJaeXd+0G7nnno7oOBRykHWK6FQECBAgQ6ImA/qGMoOVURk6qJECAAAECOQnoH3JKo65aDNzrytNuhgg4SD0eBAgQIECAQFsB/UNbsTTXyymNu1UJECBAgEDJAvqHktPLu3YD97zzUV2HAg7SDjHdigABAgQI9ERA/1BG0HIqIydVEiBAgACBnAT0DzmlUVctBu515Wk3QwQcpB4PAgQIECBAoK2A/qGtWJrr5ZTG3aoECBAgQKBkAf1DyenlXbuBe975qK5DAQdph5huRYAAAQIEeiKgfygjaDmVkZMqCRAgQIBATgL6h5zSqKsWA/e68rSbIQIOUo8HAQIECBAg0FZA/9BWLM31ckrjblUCBAgQIFCygP6h5PTyrt3APe98VNehgIO0Q0y3IkCAAAECPRHQP5QRtJzKyEmVBAgQIEAgJwH9Q05p1FWLgXtdedrNEAEHqceDAAECBAgQaCugf2grluZ6OaVxtyoBAgQIEChZQP9Qcnp5127gnnc+qutQwEHaIaZbESBAgACBngjoH8oIWk5l5KRKAgQIECCQk4D+Iac06qrFwL2uPO1miICD1ONBgAABAgQItBXQP7QVS3O9nNK4W5UAAQIECJQsoH8oOb28azdwzzsf1XUo4CDtENOtCBAgQIBATwT0D2UELacyclIlAQIECBDISUD/kFMaddVi4F5XnnYzRMBB6vEgQIAAAQIE2groH9qKpbleTmncrUqAAAECBEoW0D+UnF7etRu4552P6joUcJB2iOlWBAgQIECgJwL6hzKCllMZOamSAAECBAjkJKB/yCmNumoxcK8rT7sZIuAg9XgQIECAAAECbQX0D23F0lwvpzTuViVAgAABAiUL6B9KTi/v2g3c885HdR0KOEg7xHQrAgQIECDQEwH9QxlBy6mMnFRJgAABAgRyEtA/5JRGXbUYuNeVp90MEXCQejwIECBAgACBtgL6h7Ziaa6XUxp3qxIgQIAAgZIF9A8lp5d37Qbueeejug4FHKQdYroVAQIECBDoiYD+oYyg5VRGTqokQIAAAQI5CegfckqjrloM3OvK026GCDhIPR4ECBAgQIBAWwH9Q1uxNNfLKY27VQkQIECAQMkC+oeS08u7dgP3vPNRXYcCDtIOMd2KAAECBAj0RED/UEbQciojJ1USIECAAIGcBPQPOaVRVy0G7nXlaTdDBBykHg8CBAgQIECgrYD+oa1YmuvllMbdqgQIECBAoGQB/UPJ6eVdu4F73vmorkMBB2mHmG5FgAABAgR6IqB/KCNoOZWRkyoJECBAgEBOAvqHnNKoqxYD97rytJshAg5SjwcBAgQIECDQVkD/0FYszfVySuNuVQIECBAgULKA/qHk9PKu3cA973xU16GAjpuhaAAAIABJREFUg7RDTLciQIAAAQI9EdA/lBG0nMrISZUECBAgQCAnAf1DTmnUVYuBe1152s0QAQepx4MAAQIECBBoK6B/aCuW5no5pXG3KgECBAgQKFlA/1ByennXbuCedz6q61DAQdohplsRIECAAIGeCOgfyghaTmXkpEoCBAgQIJCTgP4hpzTqqsXAva487WaIgIPU40GAAAECBAi0FdA/tBVLc72c0rhblQABAgQIlCygfyg5vbxrN3DPOx/VdSjgIO0Q060IECBAgEBPBPQPZQQtpzJyUiUBAgQIEMhJQP+QUxp11WLgXleedjNEwEHq8SAwHoFdu3bHhg1bY9u2nTF//txYuvTwmDNn9ngWc1cCBAhMWED/MGHwEZfra067Y3dsibtiZ+yMuTE3FsTCmB3+b/CIj5GXESBAgEDPBPraP/Qs5iTbNXBPwm7RFAIO0hTq1qxdoBm2r1y5fjBs3/PXDN3Xrj3V0L328O2PQE8E9A9lBN3HnJph+1VxZeyIHXtDmhfz4rRYZuhexmOrSgIECBBILNDH/iExeW+WN3DvTdQ26iD1DBDoXuCKK26PNWtumnLjVauOi+XLF3W/oDsSIEBgwgL6hwmDj7hcH3O6LTbHjXHDFLHj44Q4Mo4aUdLLCBAgQIBAfwT62D/0J920OzVwT+tv9QkKOEgniG2p3ghcdNH1sW7dlin7XbZsYaxefXxvHGyUAIF6BfQPZWTbx5w2xXVxZ9wxJaAjYlGcGCeVEZwqCRAgQIBAQoE+9g8JuXu1tIF7r+Lu92YdpP3O3+7HI+AT7uNxdVcCBPIR0D/kk8WwSvqYk0+4l/FsqpIAAQIE8hXoY/+Qbxp1VWbgXleedjNEwEHq8SDQvYDvcO/e1B0JEMhLQP+QVx77q6aPOfkO9zKeTVUSIECAQL4Cfewf8k2jrsoM3OvK024M3D0DBCYu0AzdN27cNvjh1OYHU5csme8HUyeeggUJEBiXgDdi45Lt9r59zakZum+NLYMfTm1+MPXwWOAHU7t9tNyNAAECBCoW6Gv/UHGk2WzNwD2bKBQybgEH6biF3Z8AAQIECNQnoH8oI1M5lZGTKgkQIECAQE4C+oec0qirFgP3uvK0myECDlKPBwECBAgQINBWQP/QVizN9XJK425VAgQIECBQsoD+oeT08q7dwD3vfFTXoYCDtENMtyJAgAABAj0R0D+UEbScyshJlQQIECBAICcB/UNOadRVi4F7XXnazRABB6nHgwABAgQIEGgroH9oK5bmejmlcbcqAQIECBAoWUD/UHJ6eddu4J53PqrrUMBB2iGmWxEgQIAAgZ4I6B/KCFpOZeSkSgIECBAgkJOA/iGnNOqqxcC9rjztZoiAg9TjQYAAAQIECLQV0D+0FUtzvZzSuFuVAAECBAiULKB/KDm9vGs3cM87H9V1KOAg7RDTrQgQIECAQE8E9A9lBC2nMnJSJQECBAgQyElA/5BTGnXVYuBeV552M0RgnAfprl27Y8OGrbFt286YP39uLF16eMyZM1seBAgQIECAQOEC4+wfCqfJqnw5dRvH7tgdW+Ku2Bk7Y27MjQWxMGaH3rZbZXcjQIAAgdQC+ofUCdS7voF7vdna2RMExnWQNsP2lSvXD4bte/6aofvatacaunsKCRAgQIBA4QLj6h8KZ8mufDl1F0kzbL8qrowdsWPvTefFvDgtlhm6d8fsTgQIECCQgYD+IYMQKi3BwL3SYG1rqsC4DtIrrrg91qy5acqCq1YdF8uXLxIFAQIECBAgULDAuPqHgkmyLF1O3cVyW2yOG+OGKTc8Pk6II+Oo7hZyJwIECBAgkFhA/5A4gIqXN3CvOFxb+0GBcR2kF110faxbt2UK97JlC2P16uPFQIAAAQIECBQsMK7+oWCSLEuXU3exbIrr4s64Y8oNj4hFcWKc1N1C7kSAAAECBBIL6B8SB1Dx8gbuFYdra5MZuPuEuyeNAAECBAjUK+CNWBnZyqm7nHzCvTtLdyJAgACBvAX0D3nnU3J1Bu4lp6f2VgLjOkh9h3urGFxMgAABAgSKEhhX/1AUQgHFyqm7kHyHe3eW7kSAAAECeQvoH/LOp+TqDNxLTk/trQTGeZA2Q/eNG7cNfji1+cHUJUvm+8HUVum4mAABAgQI5Ckwzv4hzx2XWZWcus2tGbpvjS2DH05tfjD18FjgB1O7JXY3AgQIEMhAQP+QQQiVlmDgXmmwtjVVwEHqqSBAgAABAgTaCugf2oqluV5OadytSoAAAQIEShbQP5ScXt61G7jnnY/qOhRwkHaI6VYECBAgQKAnAvqHMoKWUxk5qZIAAQIECOQkoH/IKY26ajFwrytPuxki4CD1eBAgQIAAAQJtBfQPbcXSXC+nNO5WJUCAAAECJQvoH0pOL+/aDdzzzkd1HQo4SDvEdCsCBAgQINATAf1DGUHLqYycVEmAAAECBHIS0D/klEZdtRi415Wn3QwRcJB6PAgQIECAAIG2AvqHtmJprpdTGnerEiBAgACBkgX0DyWnl3ftBu5556O6DgUcpB1iuhUBAgQIEOiJgP6hjKDlVEZOqiRAgAABAjkJ6B9ySqOuWgzc68rTboYIOEg9HgQIECBAgEBbAf1DW7E018spjbtVCRAgQIBAyQL6h5LTy7t2A/e881FdhwIO0g4x3YoAAQIECPREQP9QRtByKiMnVRIgQIAAgZwE9A85pVFXLQbudeVpN0MEHKQeDwIECBAgQKCtgP6hrVia6+WUxt2qBAgQIECgZAH9Q8np5V27gXve+aiuQwEHaYeYbkWAAAECBHoioH8oI2g5lZGTKgkQIECAQE4C+oec0qirFgP3uvK0myECDlKPBwECBAgQINBWQP/QVizN9XJK425VAgQIECBQsoD+oeT08q7dwD3vfFTXoYCDtENMtyJAgAABAj0R0D+UEbScyshJlQQIECBAICcB/UNOadRVi4F7XXnazRABB6nHgwABAgQIEGgroH9oK5bmejmlcbcqAQIECBAoWUD/UHJ6eddu4J53PqrrUMBB2iGmWxEgQIAAgZ4I6B/KCFpOZeSkSgIECBAgkJOA/iGnNOqqxcC9rjztZoiAg9TjQYAAAQIECLQV0D+0FUtzvZzSuFuVAAECBAiULKB/KDm9vGs3cM87H9V1KOAg7RDTrQgQIECAQE8E9A9lBC2nMnJSJQECBAgQyElA/5BTGnXVYuBeV55F7Obaa6+NCy+8MG655Za477774vLLL4/TTz99v7U31/z2b//24PotW7bEm9/85vijP/qj1nt1kLYm8wICBAgQINB7Af1Du0dAn9fOy9UECBAgQIBAOgF9Xjr72lc2cK894Qz3t379+ti0aVMcc8wxccYZZzzpwH379u3xoQ99KH7yJ39y8P+efPLJBu4Z5qokAgQIECBQo4A3Yu1S1ee183I1AQIECBAgkE5An5fOvvaVDdxrTzjz/c2aNetJB+7fv4WXvexl8ZKXvMTAPfNclUeAAAECBGoR8EZs9CT1eaPbeSUBAgQIECAwfgF93viN+7qCgXtfk89k3+N8I/bII49E8589f81Beuihh8YDDzwQBx10UCYCyiBAgAABAgRyFvBGbPR09Hmj23klAQIECBAgMH4Bfd74jfu6goF7X5PPZN/jfCN2/vnnxwUXXDBlpwbumYSvDAIECBAgUICAN2Kjh6TPG93OKwkQIECAAIHxC+jzxm/c1xUM3PuafCb7HucbMZ9wzyRkZRAgQIAAgYIFvBEbPTx93uh2XkmAAAECBAiMX0CfN37jvq5g4N7X5DPZ9zjfiD1xiw7STEJXBgECBAgQKEhA/zB6WPq80e28kgABAgQIEBi/gD5v/MZ9XcHAva/JZ7Jvb8QyCUIZBAgQIECAwD4FvBEb/cHQ541u55UECBAgQIDA+AX0eeM37usKBu59TT7hvh988MHYunXroILFixfHxRdfHKecckrMmzcvDjvssDjvvPPinnvuiUsvvXRvlbfeeuvgv37DG94QRxxxRLztbW+Lpz3taXHUUUdNeycO0mlTuZAAAQIECBD4roD+od2joM9r5+VqAgQIECBAIJ2APi+dfe0rG7jXnnCG+7v66qsHA/Yn/p155plxySWXxIoVK2L79u3RXLfnr/mE1BP/XvCCFwyum+6fg3S6Uq4jQIAAAQIE9gjoH9o9C/q8dl6uJkCAAAECBNIJ6PPS2de+soF77Qnb314BB6mHgQABAgQIEGgroH9oK5bmejmlcbcqAQIECBAoWUD/UHJ6eddu4J53PqrrUMBB2iGmWxEgQIAAgZ4I6B/KCFpOZeSkSgIECBAgkJOA/iGnNOqqxcC9rjztZoiAg9TjQYAAAQIECLQV0D+0FUtzvZzSuFuVAAECBAiULKB/KDm9vGs3cM87H9V1KOAg7RDTrQgQIECAQE8E9A9lBC2nMnJSJQECBAgQyElA/5BTGnXVYuBeV552M0TAQerxIECAAAECBNoK6B/aiqW5Xk5p3K1KgAABAgRKFtA/lJxe3rUbuOedj+o6FHCQdojpVgS+T2DXrt2xYcPW2LZtZ8yfPzeWLj085syZzYgAAQJVCOgfyohRTuPLaXfsji1xV+yMnTE35saCWBizw/+dH5+4OxMgQIDApAT0D5OS7t86Bu79y7y3O3aQ9jZ6Gx+jQDNsX7ly/WDYvuevGbqvXXuqofsY3d2aAIHJCegfJmc9k5XkNBO9/b+2GbZfFVfGjtix96J5MS9Oi2WG7uMhd1cCBAgQmKCA/mGC2D1bysC9Z4H3ebsO0j6nb+/jErjiittjzZqbptx+1arjYvnyReNa1n0JECAwMQH9w8SoZ7SQnGbEt98X3xab48a4Ycr//vg4IY6Mo8azqLsSIECAAIEJCegfJgTdw2UM3HsYel+37CDta/L2PU6Biy66Ptat2zJliWXLFsbq1cePc2n3JkCAwEQE9A8TYZ7xInKaMeE+b7Apros7444p/7sjYlGcGCeNZ1F3JUCAAAECExLQP0wIuofLGLj3MPS+btlBWm/yvkM8XbY+4Z7O3soECExGQP8wGeeZriKnmQru+/Xj+oS774UfT17uSoAAAQLtBPQP7bxcPX0BA/fpW7mycAEHaeEB7qf8Ztj+xjdeFTfffG88/PCjceCBB8Sxxx4SH/vYab5DfAKR+w73CSBbggCBpAL6h6T8015cTtOmanXhOL7Dfc89/yv+Kx6Oh6P575sfY/3V+LV4ejy9VX0uJkCAAAECMxHQP8xEz2uHCRi4ez56I+AgrTPqT396c6xatWEwbN/z1wzd16xZGr/yKy+qc9OZ7aoZum/cuG3ww6nND6YuWTLfP+zILCPlECAwuoD+YXS7Sb5STuPTbgbiW2PL4IdTmx9MPTwWzOgHU5tPzV8fm+L/xP+J78T3+rdD4kfj/43XzOje41NwZwIECBCoUUD/UGOqeezJwD2PHFQxAQEH6QSQEyyxYsXl8fnP3zVl5dNPPyI+9anTE1RkSQIECBCoSUD/UEaaciojp6bK5nvh/zVuiW/Ht36g6APj6bE0TvVjrOVEqVICBAgUL6B/KD7CbDdg4J5tNArrWsBB2rVoHvdbseKK+Pzn75xSzP/6X4viz/5seR5FqoIAAQIEihXQP5QRnZzKyKmpsvmE+4ZYH7vi4R8o+qB4ViyOY/wYazlRqpQAAQLFC+gfio8w2w0YuGcbjcK6FnCQdi2ax/0+/el/i7e8ZWPs2vWdvQXNmfPU+PCHm6+UOTqPIlVBgAABAsUK6B/KiE5OZeTUVNl8Rc3/F5fGfXHv3qIPiAPi2fFDcUKc6BPu5USpUgIECBQvoH8oPsJsN2Dgnm00CutawEHatWge92u+P/ycc9bFzTffFw8/vDsOPHB2HHvs8+LjH/9F3yOeR0SqIECAQNEC+ocy4pNTGTntqfKheCg+HX8VO2Pn4DvbD4wD49nx7DgtlvkO97KiVC0BAgSKFtA/FB1f1sUbuGcdj+K6FHCQdqmZ1738aGdeeaiGAAECNQnoH8pIU05l5PT9VXb9Y6zlCaiYAAECBFIL6B9SJ1Dv+gbu9WZrZ08QcJB6JAgQIECAAIG2AvqHtmJprpdTGnerEiBAgACBkgX0DyWnl3ftBu5556O6DgUcpB1iuhUBAgQIEOiJgP6hjKDlVEZOqiRAgAABAjkJ6B9ySqOuWgzc68rTboYIOEg9HgQIECBAgEBbAf1DW7E018spjbtVCRAgQIBAyQL6h5LTy7t2A/e881FdhwIO0g4x3YoAAQIECPREQP9QRtByKiMnVRIgQIAAgZwE9A85pVFXLQbudeVpN0MEHKQeDwIECBAgQKCtgP6hrVia6+WUxt2qBAgQIECgZAH9Q8np5V27gXve+aiuQwEHaYeYbkWAAAECBHoioH8oI2g5lZGTKgkQIECAQE4C+oec0qirFgP3uvK0myECDtJ6H49du3bHhg1bY9u2nTF//txYuvTwmDNndr0btjMCBAgQmJiA/mFi1DNaSE4z4ivqxbtjd2yJu2Jn7Iy5MTcWxMKYHfq+okJULAECBDIR0D9kEkSFZRi4VxiqLe1bwEFa55PRDNtXrlw/GLbv+WuG7mvXnmroXmfkdkWAAIGJCugfJso98mJyGpmuqBc2w/ar4srYETv21j0v5sVpsczQvagkFUuAAIE8BPQPeeRQYxUG7jWmak/7FHCQ1vlgXHHF7bFmzU1TNrdq1XGxfPmiOjdtVwQIECAwMQH9w8SoZ7SQnGbEV8yLb4vNcWPcMKXe4+OEODKOKmYfCiVAgACBPAT0D3nkUGMVBu41pmpPBu49egYuuuj6WLduy5QdL1u2MFavPr5HErZKgAABAuMQ8EZsHKrd31NO3ZvmeMdNcV3cGXdMKe2IWBQnxkk5lqwmAgQIEMhYQP+QcTiFl2bgXniAyp++gIN0+lYlXekT7iWlpVYCBAiUJ6B/KCMzOZWR00yr9An3mQp6PQECBAh8v4D+wfMwLgED93HJum92Ag7S7CLppCDf4d4Jo5sQIECAwH4E9A9lPBpyKiOnmVbpO9xnKuj1BAgQIGDg7hmYhICB+ySUrZGFgDdiWcQwliKaofvGjdsGP5za/GDqkiXz/WDqWKTdlAABAv0T0D+UkbmcysipiyqbofvW2DL44dTmB1MPjwV+MLULWPcgQIBADwX0Dz0MfUJbNnCfELRl0gs4SNNnoAICBAgQIFCagP6hjMTkVEZOqiRAgAABAjkJ6B9ySqOuWgzc68rTboYIOEg9HgQIECBAgEBbAf1DW7E018spjbtVCRAgQIBAyQL6h5LTy7t2A/e881FdhwIO0g4x3YoAAQIECPREQP9QRtByKiMnVRIgQIAAgZwE9A85pVFXLQbudeVpN0MEHKQeDwIECBAgQKCtgP6hrVia6+WUxt2qBAgQIECgZAH9Q8np5V27gXve+aiuQwEHaYeYbkWAAAECBHoioH8oI2g5lZGTKgkQIECAQE4C+oec0qirFgP3uvK0myECDlKPBwECBAgQINBWQP/QVizN9XJK425VAgQIECBQsoD+oeT08q7dwD3vfFTXoYCDtENMtyJAgAABAj0R0D+UEbScyshJlQQIECBAICcB/UNOadRVi4F7XXnazRABB6nHgwABAgQIEGgroH9oK5bmejmlcbcqAQIECBAoWUD/UHJ6eddu4J53PqrrUMBB2iGmWxEgQIAAgZ4I6B/KCFpOZeSkSgIECBAgkJOA/iGnNOqqxcC9rjztZoiAg9TjQYAAAQIECLQV0D+0FUtzvZzSuFuVAAECBAiULKB/KDm9vGs3cM87H9V1KOAg7RDTrQgQIECAQE8E9A9lBC2nMnJSJQECBAgQyElA/5BTGnXVYuBeV552M0TAQerxIECAAAECBNoK6B/aiqW5Xk5p3K1KgAABAgRKFtA/lJxe3rUbuOedj+o6FHCQdojpVgQIECBAoCcC+ocygpZTGTmpkgABAgQI5CSgf8gpjbpqMXCvK0+7GSLgIPV4ECBAgAABAm0F9A9txdJcL6c07lYlQIAAAQIlC+gfSk4v79oN3PPOR3UdCjhIO8R0KwIECBAg0BMB/UMZQcupjJxUSYAAAQIEchLQP+SURl21GLjXlafdDBFwkHo8CBAgQIAAgbYC+oe2Ymmul1Mad6sSIECAAIGSBfQPJaeXd+0G7nnno7oOBRykHWK6FQECBAgQ6ImA/qGMoOVURk6qJECAAAECOQnoH3JKo65aDNzrytNuhgg4SD0eBAgQIECAQFsB/UNbsTTXyymNu1UJECBAgEDJAvqHktPLu3YD97zzUV2HAg7SDjHdigABAgQI9ERA/1BG0HIqIydVEiBAgACBnAT0DzmlUVctBu515Wk3QwQcpB4PAgQIECBAoK2A/qGtWJrr5ZTG3aoECBAgQKBkAf1DyenlXbuBe975qK5DAQdph5huRYAAAQIEeiKgfygjaDmVkZMqCRAgQIBATgL6h5zSqKsWA/e68rSbIQIOUo8HAQIECBAg0FZA/9BWLM31ckrjblUCBAgQIFCygP6h5PTyrt3APe98VNehgIO0Q0y3IkCAAAECPRHQP5QRtJzKyEmVBAgQIEAgJwH9Q05p1FWLgXtdedrNEAEHqceDAAECBAgQaCugf2grluZ6OaVxtyoBAgQIEChZQP9Qcnp5127gnnc+qutQwEHaIaZbESBAgACBngjoH8oIWk5l5KRKAgQIECCQk4D+Iac06qrFwL2uPO1miICD1ONBgAABAl0L7Nq1OzZs2Brbtu2M+fPnxtKlh8ecObO7Xsb9EgroHxLit1haTi2wXEqAAAECBDIS2B27Y0vcFTtjZ8yNubEgFsbsmEw/rX/I6EGorBQD98oCtZ39CzhIPR0ECNQoYOCbLtXGfuXK9YNh+56/Zui+du2phu7pYul8Zf1D56RjuaGcxsLa25umHP70Ft3GCRDopUBz3l4VV8aO2LF3//NiXpwWyyYydNc/9PKxm8imDdwnwmyRHAQcpDmkMJ4aDBzH4+qu+QsY+KbN6Iorbo81a26aUsSqVcfF8uWL0hZn9c4E9A+dUY71RnIaK2+vbp56+NMrbJslQKBagen+g8vbYnPcGDdMcTg+Togj46ix++gfxk7c2wUM3Hsbff827iCtM3MDxzpztavpCRj4Ts9pXFdddNH1sW7dlim3X7ZsYaxeffy4lnXfCQvoHyYMPuJychoRzsumCKQe/oiEAAECpQu0+QeXm+K6uDPumLLlI2JRnBgnjZ1C/zB24t4uYODe2+j7t3EHaZ2ZGzimz9W/YZAuAwPfdPbNys6ftP6TWl3/MCnpma0jp5n5efX3BNoOf6b7KU7GBAgQ6ItAm39w2ebacfjpH8ah6p6NgIG756A3AuM8SA0c0z1GBo7p7JuV/RsGaf0NfNP6e/7T+k9q9XH2D5PaQx/WkVMfUp7MHtsMf9p8inMm1Rvqz0TPawkQmLRAm39wOalzdH8G+odJPx39Wc/AvT9Z936n4zpIDVzSPloGjvzTCqRd3fmT1n/PP3TauHHb4IdTmx9MXbJkvh9MTR9LpxWMq3/otEg3Czl5CLoSaDP8aTOcH7W+NvWMuobXESBAoEuBtmdjc85tjS2DH05tfjD18FgwkR9Mbfasf+gyeff6fgEDd89DbwTGdZAa+KZ9hAwc0/r7NwzS+hv4pvdXQf0C4+of6peb7A7lNFnv2leb7vCnzac4RzVrO7gadR2vI0CAQFcCJf2DQv1DV6m7zxMFDNw9E70RGNdBauCY/hFqhu4+YZomB//AKY27VQkQmJzAuPqHye2gHyvJqR8557bLSQzDJzHUz81VPQQIlC8w3X9wmXqn+ofUCdS7voF7vdna2RMExnWQGjh61Pos4N8w6HP69k6gHwLj6h/6oTe5XcppctZW+p7AJD7FOYmhvkwJECDQVwH9Q1+TH/++DdzHb2yFTATGdZAaOGYSsDKSCfg3DJLRW5gAgQkIjKt/mEDpvVpCTr2KO6vNjvtTnJMY6mcFqhgCBAhMUED/MEHsni1l4N6zwPu83XEepPff/1B84APXx+bN34gXveg5ce65J8TBBz+9z9z2ToAAAQIEqhAYZ/9QBVAmm5BTJkEoYywCXQ71m3ttibtiZ+yMuTE3FsTCif044Vhw3JQAAQIzENA/zADPS4cKGLh7QHojMK6D1Cfce/MI2SgBAgQI9FBgXP1DDynHumU5jZXXzSsR8Gn5SoK0DQIEOhPQP3RG6UZPEDBw90j0RmBcB6nvcO/NI2SjBAgQINBDgXH1Dz2kHOuW5TRWXjevRMD3wVcSpG0QINCZgP6hM0o3MnD3DPRVYFwH6UUXXR/r1m2Zwrps2cJYvfr4vnLbNwECBAgQqEJgXP1DFTgZbUJOGYWhlGwFNsV1cWfcMaW+I2JRnBgnZVu3wggQIDAuAf3DuGTd1yfcPQO9ERjXQeoT7r15hGyUAAECBHooMK7+oYeUY92ynMbK6+aVCPiEeyVB2gYBAp0J6B86o3SjJwgYuHskeiMwroPUd7j35hGyUQIECBDoocC4+oceUo51y3IaK6+bVyLgO9wrCdI2CBDoTED/0BmlGxm4ewb6KjDOg7QZum/cuC22bdsZ8+fPjSVL5secObP7Sj3xfTf+GzZs3eu/dOnh/CeeggUJECBQp8A4+4c6xdLsSk5p3K1ankAzdN8aW2JH7Ih5MS8OjwUxO7xvKS9JFU9SoPn/N1virtgZO2NuzI0FsdD/v5lkAGNcS/8wRtye39on3Hv+APRp+w7SOtP2bxjUmatdESBAYDoCk/gHrvqH6SSR/ho5pc9ABQQIEMhRYKbDcv9mSI6pdleT/qE7S3f6QQEDd0/ExAWuvfbauPDCC+OWW26J++67Ly6//PI4/fTTh9ZxzTXXxOrVq2Pz5s1xyCGHxNvf/vY455xzWtXuIG3FVczFvkO/mKgUSoAAgU7Vzp7CAAAe0ElEQVQFJvUPXPUP7WLT57XzcjUBAgQIjE+gi2G53z4YXz453Fmfl0MKddZg4F5nrlnvav369bFp06Y45phj4owzznjSgfvdd98dRx99dJx11llx9tlnD177pje9KS677LLB66f75yCdrlRZ11100fWxbt2WKUUvW7YwVq8+vqzNqJYAgeIEJvEJ6+JQJlTwpP6Bq/6hXaD6vHZeriZAgACBfQvM9JPpzV27GJZviuvizrhjSpFHxKI4MU4SX+EC+rzCA8y4fAP3jMPpQ2mzZs160oH7ueeeG1/4whfi9ttv30vSfLr9y1/+ctxwww3TZnKQTpuqqAsnNXApCkWxBAhMRGBSn7CeyGYKXGRS/8BV/zD6w6HPG93OKwkQINBngS4+md74dTEs72Jo3+csc9+7Pi/3hMqtz8C93OyqqHw6b8R+5md+JhYvXhxr1qzZu+fma2he+cpXxkMPPRSzZ+/7R34eeeSRaP6z5685SA899NB44IEH4qCDDqrCzyYiDLw8BQQIpBLwD/xSyf/PupPy90Zs9Jz1eaPbeSUBAgT6LNDVkLuL+3Q1/O9znjnvXZ+Xczpl12bgXnZ+xVc/nTdiCxcujBUrVsQ73/nOvfu9/vrr48QTT4x77703nve85+3T4fzzz48LLrhgyv/OwL34x2bKBpqh+8aN22Lbtp0xf/7cWLJkfsyZs+9/EFPf7u2IAIFUApP6hHWq/eW+7qT+gas3YqM/Cfq80e28kgABAn0W6OKT6Y1fV8Py5j5bY0vsiB0xL+bF4bEgZof3mzU8o/q8GlLMcw8G7nnm0puqpvtG7LWvfW2cd955e12a73E/6aSTBj+6+tznPnefXj7h3pvHyEYJ9FrAd4ini7/5hPWHPnRj7NjxcDz88KNx4IEHxLx5Bw5+P2L58kXpCuvRyvff/1B84APXx+bN34gXveg5ce65J8TBBz+9UwFvxEbn1OeNbueVBAhMFWj7nd5tr29rPu77t62npuu7+GT6Hg/D8pqejO73os/r3tQd/0fAwN2TkFRgOm/ERv1KmSduzEGaNGqLEyAwBoFJfcJ3DKVXcctm2HvssX8aX/vat+M733k8nvrUWfH85z8zbr75rM6HvlWAdbyJST3/+ofRg9PnjW7nlQQI/KBA208qt72+rXdz/yvj83Fv3BO749GYHQfEIfGjsSyWj/zJZwP876Uw7vza5u36egX0efVmm3pnBu6pE+j5+tN5I9b8aOqVV14Zt912216tN77xjXHrrbf60dSePz+2T6DvApP6Duu+O+9v/3/5l1+O17/+C/F//+9jey952tOeEn/2Z8vj13/9xdjGLDCp598bsdGD1OeNbueVBAj8oEDbTzy3vb6t97/FV2J9XBWPxqN7X3pAHBCnxmnxE9G+BzBgnpqAT6a3fSpdP4qAPm8UNa+ZjoCB+3SUXNOpwIMPPhhbt24d3LP5MdSLL744TjnllJg3b14cdthhg6+Oueeee+LSSy8dXHP33XfH0UcfHWeffXacddZZgyH7OeecE5dddlmcccYZ067NQTptKhcSIFCIgO8QTxvUz/7sJfFP//TVKUX8zM8cFv/wD2emLa4Hq0/q+dc/tHuY9HntvFxNgMD0BNp+p3fb66dXxfeu+lx8Nu6I26e8bFEcFa+I6b9H3XODcf8Dgrb7cz2Bvgjo8/qS9OT3aeA+efPer3j11VcPBuxP/DvzzDPjkksuGfxA6vbt26O5bs/fNddcE29961tj8+bNccghh0Tzqfdm6N7mz0HaRsu1BAiUIDCpT/iWYJGixp/6qU/Erbd+Y8rSixf/SNx002+mKKlXa07q+dc/tHus9HntvFxNgMD0BNoOpNteP70qvndV1wP3cf8Dgrb7cz2Bvgjo8/qS9OT3aeA+eXMrJhJwkCaCtywBAmMTmNR3WI9tA4Xf+Hd+Z2N8+MM3xeOPf28js2ZFvOUtx8WFF/584bvLv/xJPf/6h/yfhaZCOZWRkyoJjCrQ9itX2l7ftq6vxJdjffxtfOf7vlLmqXFA/MKIXykz7n9A0HZ/rifQFwH9Q1+Snvw+Ddwnb27FRAIO0kTwliVAYKwCzdBx48ZtsW3bzpg/f24sWTI/5syZPdY13fx/BPb8aOp99z2490dTn/e8/8ePpk7wAZnE869/mGCgM1hKTjPA81IChQi0/U7vtte3YWju/YW4Iu6Ne6P5r2fH7DgkDolfitNH+tHUcf8DgjZ7cy2BPgnoH/qU9mT3auA+WW+rJRRwkCbEtzQBAgQqFWiG7v/7f98Qmzd/I170oufE299+fBx88NMr3W0/t6V/KCN3OZWRkyoJ1CTw/7d357GSFPUDwGsxATkWkFsEEblvdUFBIkdU5FSu4AJyKhI0hFMNIJcoeBCBBORMuAS5FAmrckdOTQgsCQiagK7uct+iIPcv30rm/YZh3nvTb/q96e75dMIfZPtVV33qOzU1366uLjuhX3Z5TbLWFgKTJWD+MFmyypVwFwNDI2AgHZqu1lACBAgQIFCagPlDaZSTWpB+mlRehRMgQIAAgUYKmD80slsr0SgJ90p0g0pMhYCBdCqUXYMAAQIECDRLwPyhHv2pn+rRT2pJgAABAgSqJGD+UKXeaFZdJNyb1Z9aM4aAgVR4ECBAgAABAkUFzB+Kig3mfP00GHdXJUCAAAECdRYwf6hz71W77hLu1e4ftStRwEBaIqaiCBAgQIDAkAiYP9Sjo/VTPfpJLQkQIECAQJUEzB+q1BvNqouEe7P6U2vGEDCQCg8CBAgQIECgqID5Q1GxwZyvnwbj7qoECBAgQKDOAuYPde69atddwr3a/aN2JQoYSEvEVBQBAgQIEBgSAfOHenS0fqpHP6klAQIECBCokoD5Q5V6o1l1kXBvVn9qzRgCBlLhQYAAAQIECBQVMH8oKjaY8/XTYNxdlQABAgQI1FnA/KHOvVftuku4V7t/1K5EAQNpiZiKIkCAAAECQyJg/lCPjtZP9egntSRAgAABAlUSMH+oUm80qy4S7s3qT60ZQ8BAKjwIECBAgACBogLmD0XFBnO+fhqMu6sSIECAAIE6C5g/1Ln3ql13Cfdq94/alShgIC0RU1EECBAgQGBIBMwf6tHR+qke/aSWBAgQIECgSgLmD1XqjWbVRcK9Wf2pNWMIGEiFBwECBAgQIFBUwPyhqNhgztdPg3F3VQIECBAgUGcB84c691616y7hXu3+UbsSBQykJWIqigABAgQIDImA+UM9Olo/1aOf1JIAAQIECFRJwPyhSr3RrLpIuDerP7VmDAEDqfAgQIAAAQIEigqYPxQVG8z5+mkw7q5KgAABAgTqLGD+UOfeq3bdJdyr3T9qV6KAgbRETEURIECAAIEhETB/qEdH66d69JNaEiBAgACBKgmYP1SpN5pVFwn3ZvWn1owhYCAVHgQIECBAgEBRAfOHomKDOV8/DcbdVQkQIECAQJ0FzB/q3HvVrruEe7X7R+1KFDCQloipKAIECBAgMCQC5g/16Gj9VI9+UksCBAgQIFAlAfOHKvVGs+oi4d6s/tSaMQQMpMKDAAECBAgQKCpg/lBUbDDn66fBuLsqAQIECBCos4D5Q517r9p1l3Cvdv+oXYkCL7/8clp88cXT3Llz06KLLlpiyYoiQIAAAQIEmioQP8RWXHHF9NJLL6XFFlusqc2sfbvM82rfhRpAgAABAgSmXMA8b8rJh+aCEu5D09UaOm/evPyD2UGAAAECBAgQKCoQN+xXWGGFon/m/CkSMM+bImiXIUCAAAECDRQwz2tgpw64SRLuA+4Al586gXfeeSc98cQTafr06WnatGmlX7h1Z9QK+tJpeyqQf09Mk3YS/0mj7alg/j0xTdpJ/CeNtqeCJ9v/3XffTa+88kpafvnl03zzzddTnZw09QL9zPMmO4amXqOcK3IZ3ZFNdxsuXIqMPuJFvFQhXszzivSCc4sISLgX0XIugTEE7P012PDgz3+wAoO9uvjnP1iBwV5d/A/WvwlXF0OjJ31iG6XYrsd2jO81EjNipsjYJ17Ei3gpIiBe+tdSQhUEJNyr0Avq0AgBE6nBdiN//oMVGOzVxT//wQoM9urif7D+Tbi6GJLcKBrHYkbMFIkZ8SJexEsRAfHSv5YSqiAg4V6FXlCHRgiYSA22G/nzH6zAYK8u/vkPVmCwVxf/g/VvwtXFkORG0TgWM2KmSMyIF/EiXooIiJf+tZRQBQEJ9yr0gjo0QuD1119Pp5xySjrqqKPSAgss0Ig21akR/AfbW/z5D1ZgsFcX//wHK+Dq/Qr4DHcX5DJ6ZLERM0XGHfEiXsRLEQHx0r+WEqogIOFehV5QBwIECBAgQIAAAQIECBAgQIAAAQIECBCovYCEe+27UAMIECBAgAABAgQIECBAgAABAgQIECBAoAoCEu5V6AV1IECAAAECBAgQIECAAAECBAgQIECAAIHaC0i4174LNYAAAQIECBAgQIAAAQIECBAgQIAAAQIEqiAg4V6FXlAHAgQIECBAgAABAgQIECBAgAABAgQIEKi9gIR77btQA6ZS4I477kg/+9nP0n333ZeefPLJdO2116Ydd9xxpAr77rtvuvjii99Tpc985jPpz3/+81RWs7HXOuWUU9JvfvOb9Ne//jUtuOCC6bOf/Wz6yU9+ktZYY42RNr/++uvpyCOPTL/61a/Sa6+9lj7/+c+nX/ziF2mFFVZorMtUNawX/y222CLdfvvt76nSV7/61XTFFVdMVTUbe52zzz47xX9z5szJbVxnnXXScccdl7bZZpv8/2J/crt+PH+xP7n+naXHeHT00UenQw45JJ1++uk+A1PLX4urxXd/zNlivhbjZcTJ5z73uVHr/utf/zode+yx6bHHHkurrLJK+tGPfpR22mmnkfPffffddOKJJ6bzzjsvvfjiiynmd2eddVYuu05H2S5NmfsWcfnLX/6Sv3/j98A///nPdNppp6VDDz30fWFQpMyqxlCRNvTicsIJJ+TPUfux7LLLpqeeeqqqBF3rVcTl/PPPT5dcckl66KGHclkzZsxIJ598cvr0pz891ONLLy5NGV+io4vETPzejRh59NFH05tvvplWW221dMQRR6S99tprqGOmF5cmxUytBkWV7Sog4S4wCBQQ+MMf/pDuvvvu9KlPfSrtsssuXRPuTz/9dLrwwgtHSp1//vnTEkssUeAqTh1NYOutt04zZ85MG220UXrrrbfSMccckx588MH08MMPp4UXXjj/2UEHHZSuv/76dNFFF6Ull1wyT05eeOGF/KPoAx/4ANw+BHrxj6Tj6quvnn7wgx+MXClujiy22GJ9XNmfhkDEdcTwqquumkHi5l4kk2bPnp0TPmJ/cuNkPH+xP7n+7aXfe++9abfddkuLLrpo2nLLLUcS7j4DU9cHVb/SlVdemRMTkeDYdNNN07nnnpsuuOCCPF/46Ec/+r7q/+lPf8rJ+JNOOikn2WNBRSRU77rrrpxYjyNu8EcSPuYX8T33wx/+MMVCjL/97W9p+vTpVSfJ9ZsMl0hu1H3uW9QlxqCrrroqJ04PO+yw9L3vfe99CfeiZVYxgIq2oReXSLhfc8016ZZbbhlpcsxtll566SoSdK1TUZc999wzj0OxUOiDH/xg+ulPf5oXEMUNio985CNDO7704tKE8WUiY+8f//jHfGN3zTXXTJFLmDVrVv5N+7vf/S596UtfGtqY6cWlKTFTmwFRRccUkHAXIAQmKDBt2rSuCfeXXnop/fa3v51gqf6siMCzzz6blllmmbyierPNNksvv/xynrBfeumlKVZVx/HEE0+kFVdcMf3+978fmaAUuYZzRxfo9I8zI+n4iU98YiQBxm9yBeJmXiTdd911V7E/udRdS2/5f/3rXxf7U+T/n//8J9/0jkRqJDxb443xf4o6oCaXiSR5xEk8mdI61lprrfxUYjwd0XnEnOHf//53ioUVrSNuMn/oQx/KT8zF6vbll18+J1UjuRpHPFUUK3MjEX/ggQfWQqZsl2h0JDfqPvct6tLe2R/72MdyXHSucO+nzKoEUz9tGM0lEu7xO+mBBx6oSjML16Mfl7jY22+/nceWM888M+29995DO750wne6NGV8iXb0GzNRRnynbbfddvnG8LB+J3X7sLa7NClmCg9M/qCSAhLulewWlaqDwGgJ95hExp3oxRdfPG2++eZ5NVQkhR3lC8RjdvGIXaxyX3fdddNtt92Wt5CJFe0xkW0dG2ywQf6R3fkIa/k1Gq4SO/2j9ZFwjxU7MRGMRERsd3L88cfXZvVfXXowfpRcffXVaZ999skr3ONRbLE/db3X6b/22muL/Snij5iPGx2xhUP7DT7j/xR1QA0u88Ybb6SFFlooj5HtW8LE9kOR5Ovc9iyaFKveY6Vy/Nc6IsZiG5rYMuTvf/973mbm/vvvT5/85CdHzvnKV76S53ud2wlWkWkyXFrJjTrPfSfi0t6/3RLL/ZZZhfjptw1jJdxjoUA8+bjAAgvkRGRsnfHxj3+8Cs0etw79usQFXnnllfzbMMao7bfffmjHl07sTpcmjC/Rhn5jJn5TxRzny1/+cr5Z9cUvflHMpJR/a3a6NCVmxh2InFAbAQn32nSVilZNoFvCPR4xXGSRRdJKK62U/vGPf+S9QGPrk9jOJCaVjvIE4ks2fujG43Z33nlnLvjyyy9P++23X1511n5stdVWaeWVV86PlDvKEejmHyXHfoxhvdxyy+W9Ko866qi8BcrNN99czoWHvJS4ubTJJpuk//3vf3msiZjfdtttxf4UxcVo/mJ/ajog3gURN7Fj24J4LL894W78n5o+qMNV4sm22KYhtgCMLRxaRyT1IjEeW8B0HrFQIraK2WOPPUb+qT2m7rnnnrwlxOOPP55XureOb37zmzkhf+ONN1aeZjJcotF1n/tOxKW9s7sllvstswrB1G8bRku4x1Mkr776at6WKbYiiieV4t1MsVgjtoKs+tGvS7Tv29/+dh4zYp4c32XDOr509nWnSxPGl2jDRGMmntyL77L4XRvbLsWTffvvv39mG+aYGculKTFT9XFQ/XoXkHDv3cqZBN4j0C3h3kkUL+qK5HskCXbeeWeCJQrEpCz2sYv9VVsvRB0t4RIrAWJl2jnnnFNiDYa7qG7+3UTiZtOGG26YbzrFI3+O/gRilcy//vWv/Ph+vOAv9iSO1ZqxarPbzSax359351+P5h8r3DsPsV+u/dy5c/NYctNNN6V4aimOXhLuPgPl9kMdSmslNyIhETcoW0fcrIkt5yK513lEwj2S8bvvvvvIP1122WUptouKG5yt5EaU/eEPf3jknAMOOCBFbN5www2Vp5kMl26NrtvcdyIu7e0eK+FeJAarFkCT4dKtjf/973/zHP273/1uOvzww6vG8L769OsS+7f/+Mc/TrEX9frrr5/LH9bxpR23m0sTxpdow0Rj5p133skr2WMrvVtvvTVvJRMr3GPuM8wxM5ZLU2Km8gOhCvYsIOHeM5UTCbxXoJeEe/xFbHnyjW98Y2TPT479Cxx88MF5whEvK4vV1K3DlgL92/ZSwmj+3f42VsLH0x3t++r3cg3n9CbwhS98If9Qjf2HbSnTm1mZZ7X8uz09I/bLlE55zI/tQdpffh1b+8R38XzzzZdXC0Z/2FKsXPc6ljaRx/dtKTOxrXZGi486zX0nEi/t7balTPcoGG2Fe7ez48ZoPA3Z/s6Fqo49/cTLqaeemlf0xwtj4wZy6xjWLata7R/NpQnjS7Shn5hpN4h8QtzgjfnOsMfMaC5NiZmqjn/qVVxAwr24mb8gkAV6Sbg///zz+VGw8847L78Ux9GfQCSwItl77bXX5pUh8YOu/Wi9NO+Xv/xl2m233fI/xUqrWAHvpan92cdfj+ff7QrxuOx666038mLb/muhhHaBSLLHS4HPOOOM/NJUsT+18dHyj60oOg+xX25fxN6usXVH+xFPday55pr5hnZ8DnwGyjWvc2mxL/SMGTPyI/itI55Eia3oRntpasRYzBVaR7yDJPZnb39pauzxHitx44gkSuzDXLeXppbp0i1G6jj3LRov7e0e66WpRayr+HmbDJfOdsZ2GbFwILZnOu6446rI8L46TcQl9q2PZHskSzfeeOP3lNl6AeawjS+BMJZLU8aXaMdEYqaz/fHE1WOPPZZ/Aw9zzIzl0qSYqcVgqJLjCki4j0vkBAL/LxCPdMWLIuOIl2b9/Oc/T1tuuWV+gVv8d8IJJ6RddtklP248Z86cdPTRR+ftHx555BEvjSwhkL71rW/lvaqvu+66tMYaa4yUGC9eWnDBBfP/H3TQQWnWrFl5L9bokyOPPDLFj7/Y3qF9ZWQJ1Rm6Isbzj0lgPIIfe4ovtdRS6eGHH05HHHFE7pvYc5l/fyET40kkgCKxGImh2KoqHkuOrQxidZjY7893vL8eyz9e9ib2xxMs/9/bt5Qx/pfvW+cSY1/xvfbaK28lF9vKxMKHeMdI7BMdW/3FIohYENFKvsfj+Ztttll+R0Ak5WOe8f3vfz9vWxeJkjgisR7nX3jhhfmGf+wJH4mP2BN++vTpteAq2yXmxU2Y+xZ1iZstMceJI+Y8e+65Z/4v3q0SK7XjGK/MOgTMeG3o/Bz14hLz8h122CG/qPiZZ57JSejYGi/ekRKfzTocRV1iu5R4r1f8hol3QbSOiJf4b1jHl/FcmjK+9DIedH6W4rsmnoKIm1HxuYqbwbG4IJ4CiZXuwxoz47k0KWbqMBaq4/gCEu7jGzmDwIhA/LCKBHvnsc8+++QvwB133DHNnj07768cSfc4N/ZbiwSZo3+BeKqg2xE/fvfdd9/8T7HX6ne+8508qX3ttdfyNhuxwk0fTL5/POb4ta99Lb8EKiY8Yb7ddtul448/Pt/8cPQnECtbYg/HeGojbjLF3p8x+Y5ku9jvz7aXvx7LX+z3Ilj+OZ0Jd+N/+cZ1LjG++yOhE2Pmuuuum0477bScVI8jYidWJrc/nXLNNdfkJHvrUf1Ivre/fydWFJ544on5BezxwvZIxJ911lm57DodZbrEPKspc98iLrGopn1Lw1b/b7755vkmTOsYq8y6xEzZLjNnzsxbQj733HP5qaRY7R2/lbq9C6XKRkVcYqzpfEIr2hbz47hhFccwji/juTRpfIk+LhIz8V0UN3bmzZuXFy7F03yHHHJI3kKydQxjzIzn0rSYqfIYqG69CUi49+bkLAIECBAgQIAAAQIECBAgQIAAAQIECBAgMKaAhLsAIUCAAAECBAgQIECAAAECBAgQIECAAAECJQhIuJeAqAgCBAgQIECAAAECBAgQIECAAAECBAgQICDhLgYIECBAgAABAgQIECBAgAABAgQIECBAgEAJAhLuJSAqggABAgQIECBAgAABAgQIECBAgAABAgQISLiLAQIECBAgQIAAAQIECBAgQIAAAQIECBAgUIKAhHsJiIogQIAAAQIECBAgQIAAAQIECBAgQIAAAQIS7mKAAAECBAgQIECAAAECBAgQIECAAAECBAiUICDhXgKiIggQIECAAAECBAgQIECAAAECBAgQIECAgIS7GCBAgAABAgQIECBAgAABAgQIECBAgAABAiUISLiXgKgIAgQIECBAgAABAgQIECBAgAABAgQIECAg4S4GCBAgQIAAAQIECBAgQIAAAQIECBAgQIBACQIS7iUgKoIAAQIECBAgQIAAAQIECBAgQIAAAQIECEi4iwECBAgQIECAAAECBAgQIECAAAECBAgQIFCCgIR7CYiKIECAAAECBAgQIECAAAECBAgQIECAAAECEu5igAABAgQIECBAgAABAgQIECBAgAABAgQIlCAg4V4CoiIIECBAgAABAgQIECBAgAABAgQIECBAgICEuxggQIAAAQIECBAgQIAAAQIECBAgQIAAAQIlCEi4l4CoCAIECBAgQIAAAQIECBAgQIAAAQIECBAgIOEuBggQIECAAAECBAgQIECAAAECBAgQIECAQAkCEu4lICqCAAECBAgQIECAAAECBAgQIECAAAECBAhIuIsBAgQIECBAgAABAgQIECBAgAABAgQIECBQgoCEewmIiiBAgAABAgQIECBAgAABAgQIECBAgAABAhLuYoAAAQIECBAgQIAAAQIECBAgQIAAAQIECJQgIOFeAqIiCBAgQIAAAQIECBAgQIAAAQIECBAgQICAhLsYIECAAAECBAgQIECAAAECBAgQIECAAAECJQhIuJeAqAgCBAgQIECAAAECBAgQIECAAAECBAgQICDhLgYIECBAgAABAgQIECBAgAABAgQIECBAgEAJAhLuJSAqggABAgQIECBAgAABAgQIECBAgAABAgQISLiLAQIECBAgQIAAAQIECBAgQIAAAQIECBAgUIKAhHsJiIogQIAAAQIECBAgQIAAAQIECBAgQIAAAQIS7mKAAAECBAgQIECAAAECBAgQIECAAAECBAiUICDhXgKiIggQIECAAAECBAgQIECAAAECBAgQIECAgIS7GCBAgAABAgQIECBAgAABAgQIECBAgAABAiUI/B8uIcd76T9AFgAAAABJRU5ErkJggg==\" width=\"1500\">"
@@ -1849,45 +1093,30 @@
        "<IPython.core.display.HTML object>"
       ]
      },
-     "metadata": {},
-     "output_type": "display_data"
+     "metadata": {}
     }
    ],
-   "source": [
-    "parameters = ['rank', 'reg']\n",
-    "cols = len(parameters)\n",
-    "f, axes = plt.subplots(nrows=1, ncols=cols, figsize=(15,5))\n",
-    "cmap = plt.cm.jet\n",
-    "for i, val in enumerate(parameters):\n",
-    "    xs = np.array([t['misc']['vals'][val] for t in trials.trials]).ravel()\n",
-    "    ys = [t['result']['loss'] for t in trials.trials]\n",
-    "    xs, ys = zip(*sorted(zip(xs, ys)))\n",
-    "    ys = np.array(ys)\n",
-    "    axes[i].scatter(xs, ys, s=20, linewidth=0.01, alpha=0.75, c=cmap(float(i)/len(parameters)))\n",
-    "    axes[i].set_title(val)"
-   ]
+   "metadata": {}
   },
   {
    "cell_type": "markdown",
-   "metadata": {},
    "source": [
     "It can be seen from the above plot that\n",
     "* The actual impact of rank is in line with the intuition - the smaller the value the better the result.\n",
     "* It is interesting to see that the optimal value of reg is around 0.1 to 0.15. "
-   ]
+   ],
+   "metadata": {}
   },
   {
    "cell_type": "markdown",
-   "metadata": {},
    "source": [
     "Get the best model."
-   ]
+   ],
+   "metadata": {}
   },
   {
    "cell_type": "code",
    "execution_count": 26,
-   "metadata": {},
-   "outputs": [],
    "source": [
     "als = ALS(\n",
     "    rank=best[\"rank\"],\n",
@@ -1902,20 +1131,20 @@
     ")\n",
     "    \n",
     "model_best_hyperopt = als.fit(train)"
-   ]
+   ],
+   "outputs": [],
+   "metadata": {}
   },
   {
    "cell_type": "markdown",
-   "metadata": {},
    "source": [
     "Tuning prameters against other metrics can be simply done by modifying the `objective` function. The following shows an objective function of how to tune \"precision@k\". Since `fmin` in `hyperopt` only supports minimization while the actual objective of the loss is to maximize \"precision@k\", `-precision` instead of `precision` is used in the returned value of the `objective` function."
-   ]
+   ],
+   "metadata": {}
   },
   {
    "cell_type": "code",
    "execution_count": 27,
-   "metadata": {},
-   "outputs": [],
    "source": [
     "# Customize an objective function\n",
     "def objective_precision(params):\n",
@@ -1988,29 +1217,29 @@
     "        'status': STATUS_OK,\n",
     "        'eval_time': time_run_start.interval\n",
     "    }"
-   ]
+   ],
+   "outputs": [],
+   "metadata": {}
   },
   {
    "cell_type": "markdown",
-   "metadata": {},
    "source": [
     "### 4.2 Hyperparameter tuning with `hyperopt` sampling methods"
-   ]
+   ],
+   "metadata": {}
   },
   {
    "cell_type": "markdown",
-   "metadata": {},
    "source": [
     "Though `hyperopt` works well in a single node machine, its features (e.g., `Trials` module) do not support Spark environment, which makes it hard to perform the tuning tasks in a distributed/parallel manner. It is useful to use `hyperopt` for sampling parameter values from the pre-defined sampling space, and then parallelize the model training onto Spark cluster with the sampled parameter combinations.\n",
     "\n",
     "The downside of this method is that the intelligent searching algorithm (i.e., TPE) of `hyperopt` cannot be used. The approach introduced here is therefore equivalent to random search."
-   ]
+   ],
+   "metadata": {}
   },
   {
    "cell_type": "code",
    "execution_count": 28,
-   "metadata": {},
-   "outputs": [],
    "source": [
     "with Timer() as time_sample:\n",
     "    # Sample the parameters used for model building from the pre-defined space. \n",
@@ -2018,14 +1247,19 @@
     "    \n",
     "    # The following runs model building on the sampled parameter values with the pre-defined objective function.\n",
     "    results_map = list(map(lambda x: objective(x), sample_params))\n"
-   ]
+   ],
+   "outputs": [],
+   "metadata": {}
   },
   {
    "cell_type": "code",
    "execution_count": 30,
-   "metadata": {},
+   "source": [
+    "results_map"
+   ],
    "outputs": [
     {
+     "output_type": "execute_result",
      "data": {
       "text/plain": [
        "[{'eval_time': 9.468051671981812, 'loss': 1.027085217204854, 'status': 'ok'},\n",
@@ -2055,46 +1289,41 @@
        " {'eval_time': 9.08506464958191, 'loss': 1.254533287299843, 'status': 'ok'}]"
       ]
      },
-     "execution_count": 30,
      "metadata": {},
-     "output_type": "execute_result"
+     "execution_count": 30
     }
    ],
-   "source": [
-    "results_map"
-   ]
+   "metadata": {}
   },
   {
    "cell_type": "markdown",
-   "metadata": {},
    "source": [
     "Get the best model."
-   ]
+   ],
+   "metadata": {}
   },
   {
    "cell_type": "code",
    "execution_count": 31,
-   "metadata": {},
-   "outputs": [],
    "source": [
     "loss_metrics = np.array([x['loss'] for x in results_map])\n",
     "best_loss = np.where(loss_metrics == min(loss_metrics))"
-   ]
+   ],
+   "outputs": [],
+   "metadata": {}
   },
   {
    "cell_type": "code",
    "execution_count": 32,
-   "metadata": {},
-   "outputs": [],
    "source": [
     "best_param = sample_params[best_loss[0].item()]"
-   ]
+   ],
+   "outputs": [],
+   "metadata": {}
   },
   {
    "cell_type": "code",
    "execution_count": 33,
-   "metadata": {},
-   "outputs": [],
    "source": [
     "als = ALS(\n",
     "    rank=best_param[\"rank\"],\n",
@@ -2109,29 +1338,29 @@
     ")\n",
     "    \n",
     "model_best_sample = als.fit(train)"
-   ]
+   ],
+   "outputs": [],
+   "metadata": {}
   },
   {
    "cell_type": "markdown",
-   "metadata": {},
    "source": [
     "## 5 Evaluation on testing data"
-   ]
+   ],
+   "metadata": {}
   },
   {
    "cell_type": "markdown",
-   "metadata": {},
    "source": [
     "The optimal parameters can then be used for building a recommender, which is then evaluated on the testing data.\n",
     "\n",
     "The following codes generate the evaluation results by using the testing dataset with the optimal model selected against the pre-defined loss. Without loss of generity, in this case, the optimal model that performs the best w.r.t regression loss (i.e., the RMSE metric) is used. One can simply use other metrics like precision@k, as illustrated in the above sections, to evaluate the optimal model on the testing dataset."
-   ]
+   ],
+   "metadata": {}
   },
   {
    "cell_type": "code",
    "execution_count": 34,
-   "metadata": {},
-   "outputs": [],
    "source": [
     "# Get prediction results with the optimal modesl from different approaches.\n",
     "prediction_spark = model_best_spark.transform(test)\n",
@@ -2160,14 +1389,19 @@
     "    }, index=[0])\n",
     "    \n",
     "    test_evaluations = test_evaluations.append(result)"
-   ]
+   ],
+   "outputs": [],
+   "metadata": {}
   },
   {
    "cell_type": "code",
    "execution_count": 35,
-   "metadata": {},
+   "source": [
+    "test_evaluations"
+   ],
    "outputs": [
     {
+     "output_type": "execute_result",
      "data": {
       "text/html": [
        "<div>\n",
@@ -2235,62 +1469,58 @@
        "0    sample  230.902271            0.287638  0.791199   0.232688  0.988922"
       ]
      },
-     "execution_count": 35,
      "metadata": {},
-     "output_type": "execute_result"
+     "execution_count": 35
     }
    ],
-   "source": [
-    "test_evaluations"
-   ]
+   "metadata": {}
   },
   {
    "cell_type": "markdown",
-   "metadata": {},
    "source": [
     "From the results, it can be seen that, *with the same number of iterations*, Spark native construct based approach takes the least amount of time, even if there is no parallel computing. This is simply because Spark native constructs leverage the underlying Java codes for running the actual analytics with high performance efficiency. Interestingly, the run time for `hyperopt` with TPE algorithm and random search methods are almost the same. Possible reasons for this are that, the TPE algorithm searches optimal parameters intelligently but runs the tuning iterations sequentially. Also, the advantage of TPE may become obvious when there is a higher dimensionality of hyperparameters. \n",
     "\n",
     "The three approaches use the same RMSE loss. In this measure, the native Spark construct performs the best. The `hyperopt` based approach performs the second best, but the advantage is very subtle. It should be noted that these differences may be owing to many factors like characteristics of datasets, dimensionality of hyperparameter space, sampling size in the searching, etc. Note the differences in the RMSE metrics may also come from the randomness of the intermediate steps in parameter tuning process. In practice, multiple runs are required for generating statistically robust comparison results. We have tried 5 times for running the same comparison codes above. The results aligned well with each other in terms of objective metric values and elapsed time. "
-   ]
+   ],
+   "metadata": {}
   },
   {
    "cell_type": "markdown",
-   "metadata": {},
    "source": [
     "# Conclusions"
-   ]
+   ],
+   "metadata": {}
   },
   {
    "cell_type": "markdown",
-   "metadata": {},
    "source": [
     "In summary, there are mainly three different approaches for running hyperparameter tuning for Spark based recommendation algorithm. The three different approaches are compared as follows."
-   ]
+   ],
+   "metadata": {}
   },
   {
    "cell_type": "markdown",
-   "metadata": {},
    "source": [
     "|Approach|Distributed (on Spark)|Param sampling|Advanced hyperparam searching algo|Custom evaluation metrics|Custom data split|\n",
     "|---------|-------------|--------------|--------------------------|--------------|------------|\n",
     "|AzureML Services|Parallelizing Spark sessions on multi-node cluster or single Spark session on one VM node.)|Random, Grid, Bayesian sampling for discrete and continuous variables.|Bandit policy, Median stopping policy, and truncation selection policy.|Yes|Yes|\n",
     "|Spark native construct|Distributed in single-node standalone Spark environment or multi-node Spark cluster.|No|No|Need to re-engineer Spark modules|Need to re-engineer Spark modules.|\n",
     "|`hyperopt`|No (only support parallelization on MongoDB)|Random sampling for discrete and continuous variables.|Tree Parzen Estimator|Yes|Yes|"
-   ]
+   ],
+   "metadata": {}
   },
   {
    "cell_type": "code",
    "execution_count": 36,
-   "metadata": {},
-   "outputs": [],
    "source": [
     "# cleanup spark instance\n",
     "spark.stop()"
-   ]
+   ],
+   "outputs": [],
+   "metadata": {}
   },
   {
    "cell_type": "markdown",
-   "metadata": {},
    "source": [
     "# References\n",
     "\n",
@@ -2300,7 +1530,8 @@
     "* `hyperopt`, url: http://hyperopt.github.io/hyperopt/.\n",
     "* Bergstra, J., Yamins, D., Cox, D. D. (2013) Making a Science of Model Search: Hyperparameter Optimization in Hundreds of Dimensions for Vision Architectures. Proc. of the 30th International Conference on Machine Learning (ICML 2013).\n",
     "* Kris Wright, \"Hyper parameter tuning with hyperopt\", url:https://districtdatalabs.silvrback.com/parameter-tuning-with-hyperopt"
-   ]
+   ],
+   "metadata": {}
   }
  ],
  "metadata": {
@@ -2325,4 +1556,4 @@
  },
  "nbformat": 4,
  "nbformat_minor": 2
-}
+}
\ No newline at end of file
diff --git a/tests/unit/examples/test_notebooks_pyspark.py b/tests/unit/examples/test_notebooks_pyspark.py
index d96e5c2ca9..6cb300c759 100644
--- a/tests/unit/examples/test_notebooks_pyspark.py
+++ b/tests/unit/examples/test_notebooks_pyspark.py
@@ -31,7 +31,7 @@ def test_data_split_runs(notebooks, output_notebook, kernel_name):
 @pytest.mark.skipif(
     sys.platform == "win32", reason="Takes 2764.50s in Windows, while in Linux 124.35s"
 )
-@pytest.mark.parametrize("data_size", ["100k", "mock100", "mock10"])
+@pytest.mark.parametrize("data_size", ["100k", "mock100"])
 def test_als_deep_dive_runs(notebooks, output_notebook, kernel_name, data_size):
     notebook_path = notebooks["als_deep_dive"]
     pm.execute_notebook(notebook_path, output_notebook, kernel_name=kernel_name,
@@ -50,7 +50,7 @@ def test_evaluation_runs(notebooks, output_notebook, kernel_name):
 
 @pytest.mark.notebooks
 @pytest.mark.spark
-@pytest.mark.parametrize("data_size", ["100k", "mock100", "mock10"])
+@pytest.mark.parametrize("data_size", ["100k", "mock100"])
 def test_evaluation_diversity_runs(notebooks, output_notebook, kernel_name, data_size):
     notebook_path = notebooks["evaluation_diversity"]
     pm.execute_notebook(notebook_path, output_notebook, kernel_name=kernel_name, 
@@ -62,13 +62,15 @@ def test_evaluation_diversity_runs(notebooks, output_notebook, kernel_name, data
 @pytest.mark.skipif(
     sys.platform == "win32", reason="Takes 2409.69s in Windows, while in Linux 138.30s"
 )
-def test_spark_tuning(notebooks, output_notebook, kernel_name):
+@pytest.mark.parametrize("data_size", ["100k", "mock100"])
+def test_spark_tuning(notebooks, output_notebook, kernel_name, data_size):
     notebook_path = notebooks["spark_tuning"]
     pm.execute_notebook(
         notebook_path,
         output_notebook,
         kernel_name=kernel_name,
         parameters=dict(
+            MOVIELENS_DATA_SIZE=data_size,
             NUMBER_CORES="*",
             NUMBER_ITERATIONS=3,
             SUBSET_RATIO=0.5,
diff --git a/tests/unit/examples/test_notebooks_python.py b/tests/unit/examples/test_notebooks_python.py
index 76cd854d28..3f06cd9202 100644
--- a/tests/unit/examples/test_notebooks_python.py
+++ b/tests/unit/examples/test_notebooks_python.py
@@ -50,9 +50,11 @@ def test_baseline_deep_dive_runs(notebooks, output_notebook, kernel_name):
 
 
 @pytest.mark.notebooks
-def test_surprise_deep_dive_runs(notebooks, output_notebook, kernel_name):
+@pytest.mark.parametrize("data_size", ["100k", "mock100"])
+def test_surprise_deep_dive_runs(notebooks, output_notebook, kernel_name, data_size):
     notebook_path = notebooks["surprise_svd_deep_dive"]
-    pm.execute_notebook(notebook_path, output_notebook, kernel_name=kernel_name)
+    pm.execute_notebook(notebook_path, output_notebook, kernel_name=kernel_name,
+                        parameters=dict(MOVIELENS_DATA_SIZE=data_size))
 
 
 @pytest.mark.notebooks
@@ -98,9 +100,11 @@ def test_wikidata_runs(notebooks, output_notebook, kernel_name, tmp):
 
 
 @pytest.mark.notebooks
-def test_rlrmc_quickstart_runs(notebooks, output_notebook, kernel_name):
+@pytest.mark.parametrize("data_size", ["100k", "mock100"])
+def test_rlrmc_quickstart_runs(notebooks, output_notebook, kernel_name, data_size):
     notebook_path = notebooks["rlrmc_quickstart"]
-    pm.execute_notebook(notebook_path, output_notebook, kernel_name=kernel_name)
+    pm.execute_notebook(notebook_path, output_notebook, kernel_name=kernel_name,
+                        parameters=dict(rank_parameter=2, MOVIELENS_DATA_SIZE=data_size))
 
 
 @pytest.mark.notebooks

From 0c7adbab22efe8b6c39c8aeb34d3f6ec0f425fe5 Mon Sep 17 00:00:00 2001
From: Jianjie Liu <jianjl@microsoft.com>
Date: Thu, 23 Sep 2021 16:21:08 +0000
Subject: [PATCH 06/27] Add mock_movielens test marker

---
 tests/unit/examples/test_notebooks_pyspark.py | 3 +++
 tests/unit/examples/test_notebooks_python.py  | 2 ++
 tox.ini                                       | 1 +
 3 files changed, 6 insertions(+)

diff --git a/tests/unit/examples/test_notebooks_pyspark.py b/tests/unit/examples/test_notebooks_pyspark.py
index 6cb300c759..48ddeb1162 100644
--- a/tests/unit/examples/test_notebooks_pyspark.py
+++ b/tests/unit/examples/test_notebooks_pyspark.py
@@ -28,6 +28,7 @@ def test_data_split_runs(notebooks, output_notebook, kernel_name):
 
 @pytest.mark.notebooks
 @pytest.mark.spark
+@pytest.mark.mock_movielens
 @pytest.mark.skipif(
     sys.platform == "win32", reason="Takes 2764.50s in Windows, while in Linux 124.35s"
 )
@@ -50,6 +51,7 @@ def test_evaluation_runs(notebooks, output_notebook, kernel_name):
 
 @pytest.mark.notebooks
 @pytest.mark.spark
+@pytest.mark.mock_movielens
 @pytest.mark.parametrize("data_size", ["100k", "mock100"])
 def test_evaluation_diversity_runs(notebooks, output_notebook, kernel_name, data_size):
     notebook_path = notebooks["evaluation_diversity"]
@@ -59,6 +61,7 @@ def test_evaluation_diversity_runs(notebooks, output_notebook, kernel_name, data
 
 @pytest.mark.notebooks
 @pytest.mark.spark
+@pytest.mark.mock_movielens
 @pytest.mark.skipif(
     sys.platform == "win32", reason="Takes 2409.69s in Windows, while in Linux 138.30s"
 )
diff --git a/tests/unit/examples/test_notebooks_python.py b/tests/unit/examples/test_notebooks_python.py
index 3f06cd9202..da9e65d214 100644
--- a/tests/unit/examples/test_notebooks_python.py
+++ b/tests/unit/examples/test_notebooks_python.py
@@ -50,6 +50,7 @@ def test_baseline_deep_dive_runs(notebooks, output_notebook, kernel_name):
 
 
 @pytest.mark.notebooks
+@pytest.mark.mock_movielens
 @pytest.mark.parametrize("data_size", ["100k", "mock100"])
 def test_surprise_deep_dive_runs(notebooks, output_notebook, kernel_name, data_size):
     notebook_path = notebooks["surprise_svd_deep_dive"]
@@ -100,6 +101,7 @@ def test_wikidata_runs(notebooks, output_notebook, kernel_name, tmp):
 
 
 @pytest.mark.notebooks
+@pytest.mark.mock_movielens
 @pytest.mark.parametrize("data_size", ["100k", "mock100"])
 def test_rlrmc_quickstart_runs(notebooks, output_notebook, kernel_name, data_size):
     notebook_path = notebooks["rlrmc_quickstart"]
diff --git a/tox.ini b/tox.ini
index 815e06dc14..bfb0b68833 100644
--- a/tox.ini
+++ b/tox.ini
@@ -66,6 +66,7 @@ markers =
     gpu: mark a test as gpu test
     spark: mark a test as spark test
     vw: mark a test as vowpal wabbit test
+    mock_movielens: mark a test that uses the mock dataset instead of real dataset
 testpaths =
     tests
 addopts =

From 83f26e800db6bfd164196684e597340c85301fea Mon Sep 17 00:00:00 2001
From: Jianjie Liu <jianjl@microsoft.com>
Date: Thu, 23 Sep 2021 17:33:58 +0000
Subject: [PATCH 07/27] Parametrize als_deep_dive NB

---
 .../als_deep_dive.ipynb                       | 499 +++++++-----------
 tests/unit/examples/test_notebooks_pyspark.py |   9 +-
 2 files changed, 187 insertions(+), 321 deletions(-)

diff --git a/examples/02_model_collaborative_filtering/als_deep_dive.ipynb b/examples/02_model_collaborative_filtering/als_deep_dive.ipynb
index ce825152fc..a8b19a4d65 100644
--- a/examples/02_model_collaborative_filtering/als_deep_dive.ipynb
+++ b/examples/02_model_collaborative_filtering/als_deep_dive.ipynb
@@ -2,31 +2,32 @@
  "cells": [
   {
    "cell_type": "markdown",
+   "metadata": {},
    "source": [
     "<i>Copyright (c) Microsoft Corporation. All rights reserved.</i>\n",
     "\n",
     "<i>Licensed under the MIT License.</i>"
-   ],
-   "metadata": {}
+   ]
   },
   {
    "cell_type": "markdown",
+   "metadata": {},
    "source": [
     "# Spark Collaborative Filtering (ALS) Deep Dive"
-   ],
-   "metadata": {}
+   ]
   },
   {
    "cell_type": "markdown",
+   "metadata": {},
    "source": [
     "Spark MLlib provides a collaborative filtering algorithm that can be used for training a matrix factorization model, which predicts explicit or implicit ratings of users on items for recommendations.\n",
     "\n",
     "This notebook presents a deep dive into the Spark collaborative filtering algorithm."
-   ],
-   "metadata": {}
+   ]
   },
   {
    "cell_type": "markdown",
+   "metadata": {},
    "source": [
     "## 1 Matrix factorization algorithm\n",
     "\n",
@@ -53,11 +54,11 @@
     "Owing to the term of $q_{i}^{T}p_{u}$ the loss function is non-convex. Gradient descent method can be applied but this will incur expensive computations. An Alternating Least Square (ALS) algorithm was therefore developed to overcome this issue. \n",
     "\n",
     "The basic idea of ALS is to learn one of $q$ and $p$ at a time for optimization while keeping the other as constant. This makes the objective at each iteration convex and solvable. The alternating between $q$ and $p$ stops when there is convergence to the optimal. It is worth noting that this iterative computation can be parallelised and/or distributed, which makes the algorithm desirable for use cases where the dataset is large and thus the user-item rating matrix is super sparse (as is typical in recommendation scenarios). A comprehensive discussion of ALS and its distributed computation can be found [here](http://stanford.edu/~rezab/classes/cme323/S15/notes/lec14.pdf)."
-   ],
-   "metadata": {}
+   ]
   },
   {
    "cell_type": "markdown",
+   "metadata": {},
    "source": [
     "## 2 Spark Mllib implementation\n",
     "\n",
@@ -66,28 +67,29 @@
     "* The uniqueness of ALS implementation is that it distributes the matrix factorization model training by using \"Alternating Least Square\" method. \n",
     "* In the training method, there are parameters that can be selected to control the model performance.\n",
     "* Both explicit and implicit ratings are supported by Spark ALS model."
-   ],
-   "metadata": {}
+   ]
   },
   {
    "cell_type": "markdown",
+   "metadata": {},
    "source": [
     "## 3 Spark ALS based MovieLens recommender\n",
     "\n",
     "In the following code, the MovieLens-100K dataset is used to illustrate the ALS algorithm in Spark."
-   ],
-   "metadata": {}
+   ]
   },
   {
    "cell_type": "markdown",
+   "metadata": {},
    "source": [
     "**Note**: This notebook requires a PySpark environment to run properly. Please follow the steps in [SETUP.md](https://github.com/Microsoft/Recommenders/blob/master/SETUP.md#dependencies-setup) to install the PySpark environment."
-   ],
-   "metadata": {}
+   ]
   },
   {
    "cell_type": "code",
-   "execution_count": 19,
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
    "source": [
     "# set the environment path to find Recommenders\n",
     "import sys\n",
@@ -116,31 +118,24 @@
     "print(\"System version: {}\".format(sys.version))\n",
     "print(\"Pandas version: {}\".format(pd.__version__))\n",
     "print(\"PySpark version: {}\".format(pyspark.__version__))"
-   ],
-   "outputs": [
-    {
-     "output_type": "stream",
-     "name": "stdout",
-     "text": [
-      "System version: 3.5.5 |Anaconda custom (64-bit)| (default, May 13 2018, 21:12:35) \n",
-      "[GCC 7.2.0]\n",
-      "Pandas version: 0.23.0\n",
-      "PySpark version: 2.3.1\n"
-     ]
-    }
-   ],
-   "metadata": {}
+   ]
   },
   {
    "cell_type": "markdown",
+   "metadata": {},
    "source": [
     "Data column names"
-   ],
-   "metadata": {}
+   ]
   },
   {
    "cell_type": "code",
-   "execution_count": 20,
+   "execution_count": null,
+   "metadata": {
+    "tags": [
+     "parameters"
+    ]
+   },
+   "outputs": [],
    "source": [
     "MOVIELENS_DATA_SIZE = \"100k\"\n",
     "\n",
@@ -148,8 +143,15 @@
     "COL_ITEM = \"MovieId\"\n",
     "COL_RATING = \"Rating\"\n",
     "COL_PREDICTION = \"prediction\"\n",
-    "COL_TIMESTAMP = \"Timestamp\"\n",
-    "\n",
+    "COL_TIMESTAMP = \"Timestamp\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
     "schema = StructType(\n",
     "    (\n",
     "        StructField(COL_USER, IntegerType()),\n",
@@ -158,156 +160,127 @@
     "        StructField(COL_TIMESTAMP, LongType()),\n",
     "    )\n",
     ")"
-   ],
-   "outputs": [],
-   "metadata": {}
+   ]
   },
   {
    "cell_type": "markdown",
+   "metadata": {},
    "source": [
     "Model hyper parameters - these parameters are selected with reference to the benchmarking results [here](http://mymedialite.net/examples/datasets.html)."
-   ],
-   "metadata": {}
+   ]
   },
   {
    "cell_type": "code",
-   "execution_count": 21,
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
    "source": [
     "RANK = 10\n",
     "MAX_ITER = 15\n",
     "REG_PARAM = 0.05"
-   ],
-   "outputs": [],
-   "metadata": {}
+   ]
   },
   {
    "cell_type": "markdown",
+   "metadata": {},
    "source": [
     "Number of recommended items"
-   ],
-   "metadata": {}
+   ]
   },
   {
    "cell_type": "code",
-   "execution_count": 22,
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
    "source": [
     "K = 10"
-   ],
-   "outputs": [],
-   "metadata": {}
+   ]
   },
   {
    "cell_type": "markdown",
+   "metadata": {},
    "source": [
     "Initialize a Spark session."
-   ],
-   "metadata": {}
+   ]
   },
   {
    "cell_type": "code",
-   "execution_count": 23,
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
    "source": [
     "spark = start_or_get_spark(\"ALS Deep Dive\", memory=\"16g\")"
-   ],
-   "outputs": [],
-   "metadata": {}
+   ]
   },
   {
    "cell_type": "markdown",
+   "metadata": {},
    "source": [
     "### 3.1 Load and prepare data"
-   ],
-   "metadata": {}
+   ]
   },
   {
    "cell_type": "markdown",
+   "metadata": {},
    "source": [
     "Data is read from csv into a Spark DataFrame."
-   ],
-   "metadata": {}
+   ]
   },
   {
    "cell_type": "code",
-   "execution_count": 24,
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
    "source": [
     "dfs = movielens.load_spark_df(spark=spark, size=MOVIELENS_DATA_SIZE, schema=schema)"
-   ],
-   "outputs": [
-    {
-     "output_type": "stream",
-     "name": "stderr",
-     "text": [
-      "../../recommenders/dataset/movielens.py:471: UserWarning: Both schema and header are provided.\n",
-      "    The header argument will be ignored.\n",
-      "  warnings.warn(WARNING_HAVE_SCHEMA_AND_HEADER)\n",
-      "100%|██████████| 4.81k/4.81k [00:01<00:00, 2.50kKB/s]\n"
-     ]
-    }
-   ],
-   "metadata": {}
+   ]
   },
   {
    "cell_type": "code",
-   "execution_count": 25,
-   "source": [
-    "dfs.show(5)"
-   ],
-   "outputs": [
-    {
-     "output_type": "stream",
-     "name": "stdout",
-     "text": [
-      "+------+-------+------+---------+\n",
-      "|UserId|MovieId|Rating|Timestamp|\n",
-      "+------+-------+------+---------+\n",
-      "|   196|    242|   3.0|881250949|\n",
-      "|   186|    302|   3.0|891717742|\n",
-      "|    22|    377|   1.0|878887116|\n",
-      "|   244|     51|   2.0|880606923|\n",
-      "|   166|    346|   1.0|886397596|\n",
-      "+------+-------+------+---------+\n",
-      "only showing top 5 rows\n",
-      "\n"
-     ]
-    }
-   ],
+   "execution_count": null,
    "metadata": {
     "scrolled": true
-   }
+   },
+   "outputs": [],
+   "source": [
+    "dfs.show(5)"
+   ]
   },
   {
    "cell_type": "markdown",
+   "metadata": {},
    "source": [
     "Data is then randomly split by 80-20 ratio for training and testing."
-   ],
-   "metadata": {}
+   ]
   },
   {
    "cell_type": "code",
-   "execution_count": 26,
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
    "source": [
     "dfs_train, dfs_test = spark_random_split(dfs, ratio=0.75, seed=42)"
-   ],
-   "outputs": [],
-   "metadata": {}
+   ]
   },
   {
    "cell_type": "markdown",
+   "metadata": {},
    "source": [
     "### 3.2 Train a movielens model "
-   ],
-   "metadata": {}
+   ]
   },
   {
    "cell_type": "markdown",
+   "metadata": {},
    "source": [
     "It is worth noting that Spark ALS model allows dropping cold users to favor a robust evaluation with the testing data. In case there are cold users, Spark ALS implementation allows users to drop cold users in order to make sure evaluations on the prediction results are sound."
-   ],
-   "metadata": {}
+   ]
   },
   {
    "cell_type": "code",
-   "execution_count": 27,
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
    "source": [
     "als = ALS(\n",
     "    maxIter=MAX_ITER, \n",
@@ -320,38 +293,38 @@
     ")\n",
     "\n",
     "model = als.fit(dfs_train)"
-   ],
-   "outputs": [],
-   "metadata": {}
+   ]
   },
   {
    "cell_type": "markdown",
+   "metadata": {},
    "source": [
     "### 3.3 Prediction with the model\n",
     "\n",
     "The trained model can be used to predict ratings with a given test data."
-   ],
-   "metadata": {}
+   ]
   },
   {
    "cell_type": "code",
-   "execution_count": 28,
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
    "source": [
     "dfs_pred = model.transform(dfs_test).drop(COL_RATING)"
-   ],
-   "outputs": [],
-   "metadata": {}
+   ]
   },
   {
    "cell_type": "markdown",
+   "metadata": {},
    "source": [
     "With the prediction results, the model performance can be evaluated."
-   ],
-   "metadata": {}
+   ]
   },
   {
    "cell_type": "code",
-   "execution_count": 29,
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
    "source": [
     "evaluations = SparkRatingEvaluation(\n",
     "    dfs_test, \n",
@@ -369,89 +342,45 @@
     "    \"Explained variance score = {}\".format(evaluations.exp_var()),\n",
     "    sep=\"\\n\"\n",
     ")"
-   ],
-   "outputs": [
-    {
-     "output_type": "stream",
-     "name": "stdout",
-     "text": [
-      "RMSE score = 0.9697095550242029\n",
-      "MAE score = 0.7554838330206419\n",
-      "R2 score = 0.24874053010909036\n",
-      "Explained variance score = 0.2547961843833687\n"
-     ]
-    }
-   ],
-   "metadata": {}
+   ]
   },
   {
    "cell_type": "markdown",
+   "metadata": {},
    "source": [
     "Oftentimes ranking metrics are also of interest to data scientists. Note usually ranking metrics apply to the scenario of recommending a list of items. In our case, the recommended items should be different from those that have been rated by the users. "
-   ],
-   "metadata": {}
+   ]
   },
   {
    "cell_type": "code",
-   "execution_count": 30,
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
    "source": [
     "# Get the cross join of all user-item pairs and score them.\n",
-    "users = dfs_train.select('UserId').distinct()\n",
-    "items = dfs_train.select('MovieId').distinct()\n",
+    "users = dfs_train.select(COL_USER).distinct()\n",
+    "items = dfs_train.select(COL_ITEM).distinct()\n",
     "user_item = users.crossJoin(items)\n",
     "dfs_pred = model.transform(user_item)\n",
     "\n",
     "# Remove seen items.\n",
     "dfs_pred_exclude_train = dfs_pred.alias(\"pred\").join(\n",
     "    dfs_train.alias(\"train\"),\n",
-    "    (dfs_pred['UserId'] == dfs_train['UserId']) & (dfs_pred['MovieId'] == dfs_train['MovieId']),\n",
+    "    (dfs_pred[COL_USER] == dfs_train[COL_USER]) & (dfs_pred[COL_ITEM] == dfs_train[COL_ITEM]),\n",
     "    how='outer'\n",
     ")\n",
     "\n",
     "dfs_pred_final = dfs_pred_exclude_train.filter(dfs_pred_exclude_train[\"train.Rating\"].isNull()) \\\n",
-    "    .select('pred.' + 'UserId', 'pred.' + 'MovieId', 'pred.' + \"prediction\")\n",
+    "    .select('pred.' + COL_USER, 'pred.' + COL_ITEM, 'pred.' + \"prediction\")\n",
     "\n",
     "dfs_pred_final.show()"
-   ],
-   "outputs": [
-    {
-     "output_type": "stream",
-     "name": "stdout",
-     "text": [
-      "+------+-------+----------+\n",
-      "|UserId|MovieId|prediction|\n",
-      "+------+-------+----------+\n",
-      "|     1|    587| 2.9286714|\n",
-      "|     1|    869| 2.0478792|\n",
-      "|     1|   1208|  2.349619|\n",
-      "|     1|   1677| 3.1982298|\n",
-      "|     2|     80| 2.2628117|\n",
-      "|     2|    303| 2.9711432|\n",
-      "|     2|    472| 3.0840402|\n",
-      "|     2|    582|   4.65145|\n",
-      "|     2|    838| 1.8449162|\n",
-      "|     2|    975|  3.177288|\n",
-      "|     2|   1260|  3.466885|\n",
-      "|     2|   1325| 1.1348095|\n",
-      "|     2|   1381| 4.0551796|\n",
-      "|     2|   1530| 2.1732688|\n",
-      "|     3|     22| 3.0636034|\n",
-      "|     3|     57| 2.8428345|\n",
-      "|     3|     89|  3.459687|\n",
-      "|     3|    367| 2.3071244|\n",
-      "|     3|   1091| 1.9453487|\n",
-      "|     3|   1167| 2.0511415|\n",
-      "+------+-------+----------+\n",
-      "only showing top 20 rows\n",
-      "\n"
-     ]
-    }
-   ],
-   "metadata": {}
+   ]
   },
   {
    "cell_type": "code",
-   "execution_count": 31,
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
    "source": [
     "evaluations = SparkRankingEvaluation(\n",
     "    dfs_test, \n",
@@ -470,23 +399,11 @@
     "    \"Mean average precision = {}\".format(evaluations.map_at_k()),\n",
     "    sep=\"\\n\"\n",
     ")"
-   ],
-   "outputs": [
-    {
-     "output_type": "stream",
-     "name": "stdout",
-     "text": [
-      "Precision@k = 0.04061505832449631\n",
-      "Recall@k = 0.013571438145917577\n",
-      "NDCG@k = 0.03699684800440573\n",
-      "Mean average precision = 0.003702411260039904\n"
-     ]
-    }
-   ],
-   "metadata": {}
+   ]
   },
   {
    "cell_type": "markdown",
+   "metadata": {},
    "source": [
     "### 3.4 Fine tune the model\n",
     "\n",
@@ -499,47 +416,48 @@
     "|`maxIters`|Maximum number of iterations|10|The more iterations the better the model converges to the optimal point.|\n",
     "\n",
     "It is always a good practice to start model building with default parameter values and then sweep the parameter in a range to find the optimal combination of parameters. The following parameter set is used for training ALS models for comparison study purposes."
-   ],
-   "metadata": {}
+   ]
   },
   {
    "cell_type": "code",
-   "execution_count": 32,
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
    "source": [
     "param_dict = {\n",
     "    \"rank\": [10, 15, 20],\n",
     "    \"regParam\": [0.001, 0.1, 1.0]\n",
     "}"
-   ],
-   "outputs": [],
-   "metadata": {}
+   ]
   },
   {
    "cell_type": "markdown",
+   "metadata": {},
    "source": [
     "Generate a dictionary for each parameter combination which can then be fed into model training."
-   ],
-   "metadata": {}
+   ]
   },
   {
    "cell_type": "code",
-   "execution_count": 33,
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
    "source": [
     "param_grid = generate_param_grid(param_dict)"
-   ],
-   "outputs": [],
-   "metadata": {}
+   ]
   },
   {
    "cell_type": "markdown",
+   "metadata": {},
    "source": [
     "Train models with parameters specified in the parameter grid. Evaluate the model with, for example, the RMSE metric, and then record the metrics for visualization."
-   ],
-   "metadata": {}
+   ]
   },
   {
    "cell_type": "code",
-   "execution_count": 34,
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
    "source": [
     "rmse_score = []\n",
     "\n",
@@ -569,164 +487,104 @@
     "\n",
     "rmse_score = [float('%.4f' % x) for x in rmse_score]\n",
     "rmse_score_array = np.reshape(rmse_score, (len(param_dict[\"rank\"]), len(param_dict[\"regParam\"]))) "
-   ],
-   "outputs": [],
-   "metadata": {}
+   ]
   },
   {
    "cell_type": "code",
-   "execution_count": 35,
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
    "source": [
     "rmse_df = pd.DataFrame(data=rmse_score_array, index=pd.Index(param_dict[\"rank\"], name=\"rank\"), \n",
     "                       columns=pd.Index(param_dict[\"regParam\"], name=\"reg. parameter\"))"
-   ],
-   "outputs": [],
-   "metadata": {}
+   ]
   },
   {
    "cell_type": "code",
-   "execution_count": 36,
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
    "source": [
     "fig, ax = plt.subplots()\n",
     "sns.heatmap(rmse_df, cbar=False, annot=True, fmt=\".4g\")"
-   ],
-   "outputs": [
-    {
-     "output_type": "execute_result",
-     "data": {
-      "text/plain": [
-       "<matplotlib.axes._subplots.AxesSubplot at 0x7f2e2801e320>"
-      ]
-     },
-     "metadata": {},
-     "execution_count": 36
-    },
-    {
-     "output_type": "display_data",
-     "data": {
-      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX8AAAEKCAYAAAD6q1UVAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvhp/UCwAAIABJREFUeJzt3XeYU1X+x/H3yUwywzAFZqjSUVSKCD8BCwsMyoLiosgKLBaKAhYWbCC4soC6riIKu6KsIChFBUR2F8Te2MUyUpQiTao4OCK9OS3J+f2REBhghqySBOZ+Xs+Th+Sec5Pvmct8cnNykjHWWkRExFlcsS5ARESiT+EvIuJACn8REQdS+IuIOJDCX0TEgRT+IiIOpPAXEXEghb+IiAMp/EVEHCg+1gUUZ/e1bfTR47NU5Q82xroE+RX2D8+MdQnyK5Qd8aoJp5/O/EVEHEjhLyLiQAp/EREHUviLiDiQwl9ExIEU/iIiDqTwFxFxIIW/iIgDKfxFRBxI4S8i4kAKfxERB1L4i4g4kMJfRMSBFP4iIg6k8BcRcSCFv4iIAyn8RUQcSOEvIuJACn8REQdS+IuIOJDCX0TEgRT+IiIOpPAXEXEghb+IiAMp/EVEHEjhLyLiQAp/EREHUviLiDiQwl9ExIEU/iIiDqTwFxFxoPhYF1AalL1nKJ4Wl+Pft5f9A/qc0O6+rCVJt9wO1g8+H4cnPYd3zSoAEq7qQJnuPQHInT2d/I/eA8DTqi1lut8KLheFS7L4+eUXojcgB+jQPpOxYx8lzuXipZdn8tSY54u016xZjcmTxlKhYjp79+yjZ+9BbN+ew8UXN+T58U+QkpqMz+fjiSfHM2fOfADaZrZk9Og/4/G4+eqrVfTr/wA+n49OndrzyKgh+P0Wr9fLAw+M5LPPl8Ri2KWOp1M/4s9vij18gNwXhp3QHnf+JXja3oi1Fvw+Ct6bgf/7b3HVboCn/S2hfq4KVcmf+xy+9ctw1WmIp10PMC4oyCN/3kTs3h3RHFZUGGttrGs4qd3XtjkzCzuJ+IaNsXm5JN//p5OGP4llIC8XgLjadUkZNop9d/bEJKeQ9vdJ7L+nP2BJ+/uL7L+nHxgXac9OZv89/bAH9lP2vofI//g9vCu+iu7AfqHKH2yMdQklcrlcrF29iKs79iA7O4esL97mllvvZu3aDaE+s2ZO5K23P2TGjDm0zWxJr17d6d1nEPXq1cVay8aNW6hatTKLs96hUeNMDhw4yOaNi2l/dXc2bNjMqJGD+e67bF6eOouyZZM4fPhnAC66qD4zX3uBRhe1idXwT2n/8MxYlxA2V80LoSCPhM53njT8cSdAYT4AplINEm8cRO6EIUX7JJYlaeBYfh43ELwFlBnwNHmzx2J3/UB8s3a4zjmXgvkTozCa06PsiFdNOP007XMaeFevxB48WHyHYPADmMQyHHlWc1/SgsKvl2IPHcQeOkTh10txX3Iprirn4Pvhe+yB/QAULl9GQsszNyzONi2aN2XTpq1s2bKNwsJCXn99Htd16lCkT/369fj4408B+GThZ1zXqT0AGzZsZuPGLQDk5Ozgp527qVgxg4yM8uTn57Nhw2YAPvzwv3S5oSNAKPgByiYlcaaecJ2N/NvWYXMPFd8hGPwAxpMAJ/nZxzdogW/jCvAWBDZYi0koE9gnIQl7aO9prflMoWmfKPFc3oqkXv0w5cpzcFTgDMWVUQH/zp9Cffy7duLKqEDhsi+Jq14TV6Uq+HftxHP5bzDx7liVXuqcU60K32f/ELqdvT2HFs2bFumzcuUautzQkfHPTaFz52tITU0hPb08e/YcDYLmzZrg8bjZtGkr1lrcbjeX/F9jln21ki5drqV6jXNCfa+//moe/8tDVKqYwXXX94r8ICUk7oJmeK7qjimbSt7MMSe0xze8nMKsd0K38xdMJrHHEKy3EPJzyZ0yMprlRo3O/KOk4ItF7LuzJwcfe5gyt94W3HqSV2fWYg8d4vDz40geNpLUp8bj3/Ej+HxRrbc0M+bEn/vxZ+MPDn2M1q0vY8ni92jd6jKys3Pwer2h9ipVKjF16rP07Xt/aN+bb7mbZ54exRefLeDQocN4vUeP2bx579Loojb8/sbbeWTUcdMOElG+9UvJnTCEvNnj8GR2LdJmksvhqlQD36aVoW3uS68hb+YYcv82EO/y/+Bpf3O0S46KiJz5G2PSgIeAzkDF4OafgHnAk9bafcXs1x/oD/BMo3r0qlk1EuXFlHf1SuKqVMOkpuHfvRP3RU1Cba4KFSlctRyAwsWfU7j4cwASru4Efn9M6i2NtmfnUKP60bPy6tWqkpNT9A29nJwddO3WD4CyZZPocsO1HDgQmNpLSUlm/rzpjBj5FF8uPvo+TNaXy8i8sgsAv23Xmnr16p7w2Is+/ZK6dWuRkVGe3btL53TCmcq/bR2mfCUokwzBqaK4BpfiXbcU/MEn6qQUXJVr4t++CQDv6iwSbx4aq5IjKlJn/q8De4FMa22GtTYDaBvcNqe4nay1k6y1zay1zUpT8LuqVgtdjzu3HiY+HntgP4XLFuNu2hyTnIxJTsbdtDmFyxYDYNLKBf5NTibx2uvJe29BTGovjZYsXc5559Whdu0auN1uunW7njcXvF+kT0ZG+dArhGFDBzJ12iwA3G43c+dM4ZVX3mDu3KLHpGLFDAA8Hg9DBg9g0qQZAJx7bu1Qn6ZNGuHxuBX8UWLKVw5dd1WpDXHxoeAHiG90Bd7VXxzdIfcwJjEJk14FgLi6jfDv2h6tcqMqUnP+ta21o4/dYK39ERhtjLmtmH3OWskPjsB9URNMahrlps0h99WXA//JgPx35uNp2ZqEKzuAz4vNL+Dg6EcAsIcOkjtrOmnjAisJcmdOwx4KnF2WvWMQcXXODW33/5Adg5GVTj6fj3vuHc7bb71GnMvF1GmzWbPmW0aNHMzSZStYsOAD2rS5gscfewiLZdGiLAYOehiArl070arVpaRnlKdnz24A3N73PlasWM3g+++i47XtcLlcTJw4nU8WfgZAlxs6csstN1JY6CUvN4+bbr4rZmMvbRK6DMBVqz4mKYUy946ncOEbod8977KPiK/fnPjGrbB+H3gLyJ87PrSvSauASU3Hv3Xt0Tu0fvLfnExi13ux1g95h8mfPynaw4qKiCz1NMa8D3wITLPW7ghuqwz0Bn5rrW13qvs4m5Z6SlFn+lJPKdnZtNRTThTrpZ7dgQzgP8aYPcaYPcBCIB3oWtKOIiISeRGZ9rHW7gWGBi9FGGP6AC9H4nFFRCQ8sVjq+UgMHlNERI4RqaWeK4trAioX0yYiIlESqdU+lYEOBJZ2HssAn0foMUVEJEyRCv8FQLK1dvnxDcaYhRF6TBERCVOk3vC9vYS2myLxmCIiEj59t4+IiAMp/EVEHEjhLyLiQAp/EREHUviLiDiQwl9ExIEU/iIiDqTwFxFxIIW/iIgDKfxFRBxI4S8i4kAKfxERB1L4i4g4kMJfRMSBFP4iIg6k8BcRcSCFv4iIAyn8RUQcSOEvIuJACn8REQdS+IuIOJDCX0TEgRT+IiIOpPAXEXGg+FgXUJzUaS/HugT5pc5pFesKROQUdOYvIuJACn8REQdS+IuIOJDCX0TEgRT+IiIOpPAXEXEghb+IiAMp/EVEHEjhLyLiQAp/EREHUviLiDiQwl9ExIEU/iIiDqTwFxFxIIW/iIgDKfxFRBxI4S8i4kAKfxERB1L4i4g4kMJfRMSBFP4iIg6k8BcRcSCFv4iIAyn8RUQcSOEvIuJACn8REQdS+IuIOFBY4W+MSTjJtvTTX46IiERDuGf+/zTGuI/cMMZUBT6ITEkiIhJp4Yb/v4E5xpg4Y0xt4D3goUgVJSIikRUfTidr7YvGGA+BJ4HawB3W2s8jWZiIiEROieFvjLn/2JtADWA5cJkx5jJr7dhIFne2GP7Xsfz3s8Wkly/Hv1954YT2xV+tZNCwR6hWtQoA7dpcwV233Ux+fgG9BgyhoLAQn9fHb9v+hj/2vRWArKVf88zzU/D7LUlJiTz+8APUrH5OVMdVmnVon8nYsY8S53Lx0sszeWrM80Xaa9asxuRJY6lQMZ29e/bRs/cgtm/P4eKLG/L8+CdISU3G5/PxxJPjmTNnPgBtM1syevSf8XjcfPXVKvr1fwCfz0e5cmlMfvEZ6tatRX5ePn37P8Dq1etjMexSx9OpH/HnN8UePkDuC8NOaI87/xI8bW/EWgt+HwXvzcD//be4ajfA0/6WUD9Xharkz30O3/pluOo0xNOuBxgXFOSRP28idu+OaA4rKoy1tvhGY0aWtLO19pHTXlFQ4a7NxRd2hlm6fBVJZcrwp8eeLjb8p86cy4QxRX9c1lpyc/NISipDoddLz7sGM+yeO7i4UX2u/UNfnn1yBOfWrsmsfy5g1Zr1PD78gWgN6Vcpc06rWJdQIpfLxdrVi7i6Yw+ys3PI+uJtbrn1btau3RDqM2vmRN56+0NmzJhD28yW9OrVnd59BlGvXl2stWzcuIWqVSuzOOsdGjXO5MCBg2zeuJj2V3dnw4bNjBo5mO++y+blqbMY/cRwDh0+zGN/GccFF5zL+L//lfZXd4/hT6Bk+4dnxrqEsLlqXggFeSR0vvOk4Y87AQrzATCVapB44yByJwwp2iexLEkDx/LzuIHgLaDMgKfJmz0Wu+sH4pu1w3XOuRTMnxiF0ZweZUe8asLpV+KZfyTDvTRp1uQituf872cGxhiSksoA4PV68Xq9GBM4bgY4fPhnAA4eOkzFChmnrV6na9G8KZs2bWXLlm0AvP76PK7r1KFI+NevX48HBo8C4JOFnzH3jSkAbNiwOdQnJ2cHP+3cTcWKGbjd8eTn54faP/zwvwx98I+8PHUW9eufz+inxgOwfv0matWqTqVKFfjpp13RGG6p5t+2DpNWofgOweAHMJ4EOMnJbnyDFvg2rgBvQWCDtZiEMljAJCRhD+09zVWfGcKa8zfGnA8MJjDfH9rHWntlZMoqfVZ8s5Yuve6mUoUMBg/oy3l1awHg8/nodtsgtm3/gR5dfkfjhhcC8Miwe7lr8AgSEzyULZvEa5PGxbL8UuWcalX4PvuH0O3s7Tm0aN60SJ+VK9fQ5YaOjH9uCp07X0Nqagrp6eXZs+doEDRv1gSPx82mTVux1uJ2u7nk/xqz7KuVdOlyLdVrBKbpVq5aww2dO/LZ50to3qwJtWpVp3q1qgr/KIm7oBmeq7pjyqaSN3PMCe3xDS+nMOud0O38BZNJ7DEE6y2E/Fxyp5Q4AXLWCne1zxzga2A4MOSYi4ShwQXn8sHcafxz2gRu+n0nBj30aKgtLi6OudOe56N/zWDVmm/ZsHkrANNn/4t/PP0oH/37FTp3bM9Tz74Yo+pLnyOvro51/PTng0Mfo3Xry1iy+D1at7qM7OwcvF5vqL1KlUpMnfosffveH9r35lvu5pmnR/HFZws4dOgwXq8PgNFPPUe58mksXfI+AwbcxtfLv8Hr80VwhHIs3/ql5E4YQt7scXgyuxZpM8nlcFWqgW/TytA296XXkDdzDLl/G4h3+X/wtL852iVHRbjh77XW/sNau9hau+zIpbjOxpirj7meZoyZYoxZaYx5zRhTuYT9+htjlhpjlk6ePvN/GMaZLbls2dD0TusrWuD1etm7b3+RPqkpyTT/v8Z8mrWUPXv3sX7j5tCrgGuuas3yb9ZEve7Sant2DjWOefO8erWq5Bw3bZeTs4Ou3frRvEUH/jxiNAAHDhwEICUlmfnzpjNi5FN8ufir0D5ZXy4j88ouXN7ydyxalMXGjVsAOHjwEH373U+z5u3p3WcQFStkhKacJHr829ZhyleCMsmhbXENLsW7bin4g0/GSSm4KtfEv30TAN7VWcTVOD8W5UZcuOH/pjHmbmNMVWNM+pFLCf3/esz1Z4AcoBOwBCj2nRNr7SRrbTNrbbO+PXuEWdqZb9fuPaGzw1Vr1uO3lnJpqezZu48DBw8BkJefT9aSr6lTqwapKSkcOvwzW7dlA/D5kq+pW6tmzOovbZYsXc5559Whdu0auN1uunW7njcXvF+kT0ZG+dArhGFDBzJ12iwA3G43c+dM4ZVX3mDu3AVF9qlYMfC+jMfjYcjgAUyaNAOAtLRU3O7AZyRvv+0mFn36JQeDx10iy5Q/eq7pqlIb4uIh9+jPPr7RFXhXf3F0h9zDmMQkTHpgZV5c3Ub4d22PVrlRFdacP9Ar+O+xUz0WqBvGvs2stU2C18cZY3qV2PssNGTkkyz5eiX79h3gqs63cPftt4amCLrfcC3vf/Ips//1FnHxcSR6PIx5ZBjGGHbu3svDf3kan9+P9Vs6XNmKzJaXAjBq6CDue/hxjMuQmpLMYw/dF8shlio+n4977h3O22+9RpzLxdRps1mz5ltGjRzM0mUrWLDgA9q0uYLHH3sIi2XRoiwGDnoYgK5dO9Gq1aWkZ5SnZ89uANze9z5WrFjN4PvvouO17XC5XEycOJ1PFn4GQP0L6/HyS3/H5/exdu239Os/OGZjL20SugzAVas+JimFMveOp3DhG4GAB7zLPiK+fnPiG7fC+n3gLSB/7vjQviatAiY1Hf/WtUfv0PrJf3MyiV3vxVo/5B0mf/6kaA8rKkpc6vmL79SYbGAsgUUrA4BzbfCBjDErrbWNT3UfZ9NSTynqTF/qKSU7m5Z6yolOy1LPYxljGgENgMQj26y104vp/iKQErw+DagA7DTGVCHwITEREYmhcJd6jgQyCYT/28A1wKfAScO/uM8HWGt/NMZ88osqFRGR0ybcN3xvBK4CfrTW9gEuBk74mucw6YNjIiIxFu60T5611m+M8RpjUoGfKOHNXmPMyuKagGKXeoqISHScMvxNYL3bSmNMOQJz+cuAQ8DiEnarDHQAjv9ctAH0baAiIjF2yvC31lpjTBNr7T7gBWPMu0Cqtba4s3uABUCytfaEN3eNMQt/cbUiInJahDvtk2WMaW6tXWKt3Xqqztba20touync4kREJDLCDf+2wB3GmO+AwwSmb2w46/VFROTME274XxPRKkREJKrC/TOO30W6EBERiZ5w1/mLiEgpovAXEXEghb+IiAMp/EVEHEjhLyLiQAp/EREHUviLiDiQwl9ExIEU/iIiDqTwFxFxIIW/iIgDKfxFRBxI4S8i4kAKfxERB1L4i4g4kMJfRMSBFP4iIg6k8BcRcSCFv4iIAyn8RUQcSOEvIuJACn8REQeKj3UBxfL7Yl2BiEippTN/EREHUviLiDiQwl9ExIEU/iIiDqTwFxFxIIW/iIgDKfxFRBxI4S8i4kAKfxERB1L4i4g4kMJfRMSBFP4iIg6k8BcRcSCFv4iIAyn8RUQcSOEvIuJACn8REQdS+IuIOJDCX0TEgRT+IiIOpPAXEXEghb+IiAMp/EVEHEjhLyLiQAp/EREHUviLiDiQwl9ExIEU/iIiDqTwFxFxIIW/iIgDKfxFRBwoPtYFlAbDn/gb//18Cenl0/j39AkntC/+eiWDHvoL1apWBqBd6yu4q08P8vML6DVwKAUFhfh8fn6b2ZI/3n4zAA8/Po6lK74huWwSAI//6T4urFc3eoMq5Tq0z2Ts2EeJc7l46eWZPDXm+SLtNWtWY/KksVSomM7ePfvo2XsQ27fncPHFDXl+/BOkpCbj8/l44snxzJkzH4Ar2/6GJ58cjsvl4vChw9zW9z42bdrKvff057bbeuD1etm1cw99+9/Ptm3bYzHsUsfTqR/x5zfFHj5A7gvDTmiPO/8SPG1vxFoLfh8F783A//23uGo3wNP+llA/V4Wq5M99Dt/6ZbjqNMTTrgcYFxTkkT9vInbvjmgOKyqMtTbWNZxU4U8bzszCTmLp8m9IKpPInx4fW2z4T535LyY8NbLIdmstubl5JCWVodDrpefdDzLsnv5c3PBCHn58HG2uaE77tr+J1jBOmzLVM2NdQolcLhdrVy/i6o49yM7OIeuLt7nl1rtZu3ZDqM+smRN56+0PmTFjDm0zW9KrV3d69xlEvXp1sdayceMWqlatzOKsd2jUOJP9+w+wZvUiuvy+D+vWbeTOO3rRvHkTbu97H5ltruDLxV+Rm5vHHf170qbN5dx0810x/AmUbP/wzFiXEDZXzQuhII+EzneeNPxxJ0BhPgCmUg0SbxxE7oQhRfskliVp4Fh+HjcQvAWUGfA0ebPHYnf9QHyzdrjOOZeC+ROjMJrTo+yIV004/TTtcxo0a9KItNSU/3k/YwxJSWUA8Hq9eL0+DGEdN/kVWjRvyqZNW9myZRuFhYW8/vo8ruvUoUif+vXr8fHHnwLwycLPuK5TewA2bNjMxo1bAMjJ2cFPO3dTsWIGEHgyT00J/D9IS0shJydwtrjwP5+Tm5sHwJeLl1G9WtXID9Ih/NvWYXMPFd8hGPwAxpMAJznZjW/QAt/GFeAtCGywFpMQ+L00CUnYQ3tPa81nCk37RMmK1evo0vuPVKqQweABt3FenVoA+Hw+uvW9l23bc+hxw7U0bnhBaJ9nX5zBP6bO4rJLLua+O3vj8bhjVX6pck61Knyf/UPodvb2HFo0b1qkz8qVa+hyQ0fGPzeFzp2vITU1hfT08uzZczQImjdrgsfjZtOmrQDcccdg3pw/g9zcPA4cPEjL33Q64bH79O7Bu+99EpmByUnFXdAMz1XdMWVTyZs55oT2+IaXU5j1Tuh2/oLJJPYYgvUWQn4uuVNGnrBPaRCRM39jTJox5kljzDpjzO7gZW1wW7lIPOaZrMH55/HBnJf459TnuOn3v2PQn/4SaouLi2Puy+P5aO5UVq39lg2btwJw7x29ePPVF5j94jj2HzzIlFffiFH1pY8xJ766On7688Ghj9G69WUsWfwerVtdRnZ2Dl6vN9RepUolpk59lr597w/te889/eh03a3UrtuMadNm8/SYoqFx001daHbJxTz9zD8iMCopjm/9UnInDCFv9jg8mV2LtJnkcrgq1cC3aWVom/vSa8ibOYbcvw3Eu/w/eNrfHO2SoyJS0z6vA3uBTGtthrU2A2gb3DanuJ2MMf2NMUuNMUsnT58VodKiL7lsUmh6p/XlzfF6fezdt79In9SUZJo3vYhPv/wKgIoV0jHG4PG46dyxHavWfhv1ukur7dk51Kh+Tuh29WpVQ1M0R+Tk7KBrt340b9GBP48YDcCBAwcBSElJZv686YwY+RRfLg4crwoV0ml8UQMWL/kagNfnzOfyy5uF7u+qK1vx0LBBdO7Sm4KCgoiOT07Ov20dpnwlKJMc2hbX4FK865aC3xfYkJSCq3JN/Ns3AeBdnUVcjfNjUW7ERSr8a1trR1trfzyywVr7o7V2NFCzuJ2stZOstc2stc369vxDhEqLvl2794bODletWY/fbymXlsqevfs5cDAwX5mXn0/W0uXUqVkdgJ279gCBM9KPF2VRr26t2BRfCi1ZupzzzqtD7do1cLvddOt2PW8ueL9In4yM8qFXCMOGDmTqtMDJiNvtZu6cKbzyyhvMnbsg1H/v3v2kpaVSL7giq91VrVm3LvAGcpMmDZnw/JPc0KUPO3fujsYQJciUrxy67qpSG+Li4Zj3COIbXYF39RdHd8g9jElMwqRXASCubiP8u0rnyqxIzfl/Z4x5EJhmrd0BYIypDPQGvo/QY8bMkFFPseTrVezbf4CruvTi7ttuDk0RdO/ckfcXfsrsf79DXJyLxIQExox6EGMMO3fv4eG/jsPn82Otnw5tW5HZsgUAQx97mr379mOt5YLz6jJy8IBYDrFU8fl83HPvcN5+6zXiXC6mTpvNmjXfMmrkYJYuW8GCBR/Qps0VPP7YQ1gsixZlMXDQwwB07dqJVq0uJT2jPD17dgPg9r73sWLFau64awivz56E32/Zt3cfffs/AMDoJ/5McnJZZs0MrBj5/vvt3NClT2wGX8okdBmAq1Z9TFIKZe4dT+HCNwIBD3iXfUR8/ebEN26F9fvAW0D+3PGhfU1aBUxqOv6ta4/eofWT/+ZkErvei7V+yDtM/vxJ0R5WVERkqacxpjwwDLgeqAxYYAcwHxhtrd1zqvs4m5Z6SlFn+lJPKdnZtNRTThTuUs+InPlba/caY14GPgCyrLWh11nGmKuBdyPxuCIiEp5IrfYZBMwD/gh8Y4y5/pjmv0biMUVEJHyRmvPvB1xirT1kjKkNvGGMqW2t/TvoU0wiIrEWqfCPOzLVY63daozJJPAEUAuFv4hIzEVqqeePxpgmR24Enwh+B1QALorQY4qISJgiFf49gR+P3WCt9VprewKtI/SYIiISpkit9skuoe2zSDymiIiET9/qKSLiQAp/EREHUviLiDiQwl9ExIEU/iIiDqTwFxFxIIW/iIgDKfxFRBxI4S8i4kAKfxERB1L4i4g4kMJfRMSBFP4iIg6k8BcRcSCFv4iIAyn8RUQcSOEvIuJACn8REQdS+IuIOJDCX0TEgRT+IiIOpPAXEXEghb+IiAMp/EVEHEjhLyLiQMZaG+saHMkY099aOynWdcgvo+N39tKxC9CZf+z0j3UB8qvo+J29dOxQ+IuIOJLCX0TEgRT+seP4OceznI7f2UvHDr3hKyLiSDrzFxFxIIX/aWCMudoYs94Ys9EYM+wk7QnGmNnB9i+NMbWPaXsouH29MabDMdtfMsb8ZIz5JjqjkOOFcVxbG2O+MsZ4jTE3xqJGOblT/f6YgGeDx3alMeb/ol1jrCn8fyVjTBzwPHAN0ADoYYxpcFy324G91trzgHHA6OC+DYA/AA2Bq4EJwfsDmBrcJjEQ5nHdBvQGXotudRKGqZT8+3MNUC946Q/8Iwo1nVEU/r9eC2CjtXaztbYAmAVcf1yf64FpwetvAFcZY0xw+yxrbb61dguwMXh/WGv/C+yJxgDkpE55XK21W621KwF/LAqU4oXx+3M9MN0GZAHljDFVo1PdmUHh/+tVA74/5nZ2cNtJ+1hrvcB+ICPMfSU2dGxKN8cfX4X/r2dOsu34JVTF9QlnX4kNHZvSzfHHV+H/62UDNY65XR34obg+xph4II3AS9Jw9pXY0LEp3Rx/fBX+v94SoJ4xpo4xxkPgDdz5x/WZD/QKXr8R+NgGPmAxH/hDcDVQHQJvPi2OUt1SsnCOq5xhr+LzAAADzklEQVS95gM9g6t+LgP2W2tzYl1UNMXHuoCznbXWa4z5I/AeEAe8ZK1dbYx5FFhqrZ0PTAFmGGM2Ejjj/0Nw39XGmNeBNYAXGGCt9QEYY2YCmUAFY0w2MNJaOyXKw3OscI6rMaY58C+gPNDJGPOItbZhDMuWoJP9/gBuAGvtC8DbQEcCiyx+BvrEptLY0Sd8RUQcSNM+IiIOpPAXEXEghb+IiAMp/EVEHEjhLyLiQAp/kTOQMaa3MeacWNchpZfCX84awQ/knDH/Z4Of1o6U3sD/FP4RrkdKGa3zlzNa8G8fvAN8AlwOdAYuAB4BEoBNQB9r7SFjTEdgLLAL+Aqoa6393Snu+13gS6Ap8C3Q01r7szFmBNAJKAN8DtxhrbXGmIXB2y0JfEr0W2A44AF2Azdba3cYY0YBdYCqwPnA/cBlBL5KeDvQyVpbaIy5JFhzcrDu3sH7nhrslxscd4Pj+1lrc46vx1r7zP/y8xUHs9bqossZewFqE/jK5MuCtysA/wXKBm8PBUYAiQS+pbFOcPtMYEEY922BlsHbLwGDg9fTj+k3g0BYAywEJhzTVp6jJ1F9gWeC10cBnxL4VOnFBD5Fek2w7V8EnsTcBIK7YnB7dwKfJD7yOM2C10/Vb0JJ49RFl5Nd9DJRzgbf2cB3rkPg7LkB8FngTyLgAb4ALgQ228DfRYBA+PcP476/t9Z+Frz+CjAIeBpoa4x5EEgC0oHVwJvBfrOP2b86MDv4XfAeYMsxbe/YwNn9KgJfEfFucPsqAk88FwCNgA+CY4kDTvb9MqfqN/sk+4iUSOEvZ4PDx1w3wAfW2h7HdjDGNP2F9338vKc1xiQCEwiceX8fnMJJLKae8cBYG/iun0wCZ/xH5ANYa/3GmEJr7ZHH8hP43TPAamvt5aeo8VT9DhezXaRYZ8ybZyJhygJaGmPOAzDGJBljzgfWAXWP+fvI3cO8v5rGmCOh2oPAVM2RoN9ljEkm8E2sxUkjMDcPR7+5NVzrgYpHHt8Y4zbGHPliuINAShj9RH4Rhb+cVay1Owm8KTrTGLOSwJPBhdbaXOBu4F1jzKfADgJ/MQ1jTDNjzORi7nIt0Ct4X+nAP6y1+4AXCUzP/JvA1zsXZxQwxxiziMAbsf/LWAoIPLGMNsasAJYDVwSbpwIvGGOWE5jmKa6fyC+i1T5Sahhjkm1g1Y8h8MfXN1hrx5XQvzaBN4UbRalEkTOGzvylNOkXPFNeTWA6ZmKM6xE5Y+nMX0TEgXTmLyLiQAp/EREHUviLiDiQwl9ExIEU/iIiDqTwFxFxoP8HkqdScClQ374AAAAASUVORK5CYII=",
-      "text/plain": [
-       "<Figure size 432x288 with 1 Axes>"
-      ]
-     },
-     "metadata": {}
-    }
-   ],
-   "metadata": {}
+   ]
   },
   {
    "cell_type": "markdown",
+   "metadata": {},
    "source": [
     "The calculated RMSE scores can be visualized to comparatively study how model performance is affected by different parameters."
-   ],
-   "metadata": {}
+   ]
   },
   {
    "cell_type": "markdown",
+   "metadata": {},
    "source": [
     "It can be seen from this visualization that RMSE first decreases and then increases as rank increases, due to overfitting. When the rank equals 20 and the regularization parameter equals 0.1, the model achieves the lowest RMSE score."
-   ],
-   "metadata": {}
+   ]
   },
   {
    "cell_type": "markdown",
+   "metadata": {},
    "source": [
     "### 3.5 Top K recommendation"
-   ],
-   "metadata": {}
+   ]
   },
   {
    "cell_type": "markdown",
+   "metadata": {},
    "source": [
     "#### 3.5.1 Top k for all users (items)"
-   ],
-   "metadata": {}
+   ]
   },
   {
    "cell_type": "code",
-   "execution_count": 37,
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
    "source": [
     "dfs_rec = model.recommendForAllUsers(10)"
-   ],
-   "outputs": [],
-   "metadata": {}
+   ]
   },
   {
    "cell_type": "code",
-   "execution_count": 38,
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
    "source": [
     "dfs_rec.show(10)"
-   ],
-   "outputs": [
-    {
-     "output_type": "stream",
-     "name": "stdout",
-     "text": [
-      "+------+--------------------+\n",
-      "|UserId|     recommendations|\n",
-      "+------+--------------------+\n",
-      "|   471|[[814, 3.7504902]...|\n",
-      "|   463|[[814, 3.1264882]...|\n",
-      "|   833|[[814, 3.3154674]...|\n",
-      "|   496|[[814, 3.0553887]...|\n",
-      "|   148|[[814, 4.030121],...|\n",
-      "|   540|[[814, 3.866104],...|\n",
-      "|   392|[[814, 4.1199512]...|\n",
-      "|   243|[[814, 3.7487845]...|\n",
-      "|   623|[[814, 3.9018161]...|\n",
-      "|   737|[[814, 3.85075], ...|\n",
-      "+------+--------------------+\n",
-      "only showing top 10 rows\n",
-      "\n"
-     ]
-    }
-   ],
-   "metadata": {}
+   ]
   },
   {
    "cell_type": "markdown",
+   "metadata": {},
    "source": [
     "#### 3.5.2 Top k for a selected set of users (items)"
-   ],
-   "metadata": {}
+   ]
   },
   {
    "cell_type": "code",
-   "execution_count": 39,
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
    "source": [
     "users = dfs_train.select(als.getUserCol()).distinct().limit(3)\n",
     "\n",
     "dfs_rec_subset = model.recommendForUserSubset(users, 10)"
-   ],
-   "outputs": [],
-   "metadata": {}
+   ]
   },
   {
    "cell_type": "code",
-   "execution_count": 40,
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
    "source": [
     "dfs_rec_subset.show(10)"
-   ],
-   "outputs": [
-    {
-     "output_type": "stream",
-     "name": "stdout",
-     "text": [
-      "+------+--------------------+\n",
-      "|UserId|     recommendations|\n",
-      "+------+--------------------+\n",
-      "|   471|[[814, 3.7504902]...|\n",
-      "|   463|[[814, 3.1264882]...|\n",
-      "|   148|[[814, 4.030121],...|\n",
-      "+------+--------------------+\n",
-      "\n"
-     ]
-    }
-   ],
-   "metadata": {}
+   ]
   },
   {
    "cell_type": "markdown",
+   "metadata": {},
    "source": [
     "#### 3.5.3 Run-time considerations for top-k recommendations\n",
     "\n",
@@ -735,28 +593,28 @@
     "* Inner products of user-item pairs are calculated individually instead of leveraging matrix block multiplication features which are available in certain contemporary computing acceleration libraries (e.g., BLAS).\n",
     "\n",
     "More details about possible optimizations of the top k recommendations in Spark can be found [here](https://engineeringblog.yelp.com/2018/05/scaling-collaborative-filtering-with-pyspark.html)."
-   ],
-   "metadata": {}
+   ]
   },
   {
    "cell_type": "code",
-   "execution_count": 41,
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
    "source": [
     "# cleanup spark instance\n",
     "spark.stop()"
-   ],
-   "outputs": [],
-   "metadata": {}
+   ]
   },
   {
    "cell_type": "markdown",
+   "metadata": {},
    "source": [
     "## References"
-   ],
-   "metadata": {}
+   ]
   },
   {
    "cell_type": "markdown",
+   "metadata": {},
    "source": [
     "1. Yehuda Koren, Robert Bell, and Chris Volinsky, \"Matrix Factorization Techniques for Recommender Systems\n",
     "\", ACM Computer, Vol. 42, Issue 8, pp 30-37, Aug., 2009.\n",
@@ -766,14 +624,18 @@
     "4. Seaborn. url: https://seaborn.pydata.org/\n",
     "5. Scaling collaborative filtering with PySpark. url: https://engineeringblog.yelp.com/2018/05/scaling-collaborative-filtering-with-pyspark.html\n",
     "6. Matrix Completion via Alternating Least Square (ALS). url: http://stanford.edu/~rezab/classes/cme323/S15/notes/lec14.pdf"
-   ],
-   "metadata": {}
+   ]
   }
  ],
  "metadata": {
+  "celltoolbar": "Tags",
+  "interpreter": {
+   "hash": "7ec2189bea0434770dca7423a25e631e1cca9c4e2b4ff137a82f4dff32ac9607"
+  },
   "kernelspec": {
-   "name": "python3",
-   "display_name": "Python 3.6.9 64-bit ('.env': venv)"
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
   },
   "language_info": {
    "codemirror_mode": {
@@ -786,11 +648,8 @@
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
    "version": "3.6.9"
-  },
-  "interpreter": {
-   "hash": "7ec2189bea0434770dca7423a25e631e1cca9c4e2b4ff137a82f4dff32ac9607"
   }
  },
  "nbformat": 4,
  "nbformat_minor": 2
-}
\ No newline at end of file
+}
diff --git a/tests/unit/examples/test_notebooks_pyspark.py b/tests/unit/examples/test_notebooks_pyspark.py
index 48ddeb1162..0e3fb41e09 100644
--- a/tests/unit/examples/test_notebooks_pyspark.py
+++ b/tests/unit/examples/test_notebooks_pyspark.py
@@ -8,6 +8,8 @@
 except ImportError:
     pass  # disable error while collecting tests for non-notebook environments
 
+from recommenders.utils.constants import DEFAULT_RATING_COL, DEFAULT_USER_COL, DEFAULT_ITEM_COL
+
 
 @pytest.mark.notebooks
 @pytest.mark.spark
@@ -36,7 +38,12 @@ def test_data_split_runs(notebooks, output_notebook, kernel_name):
 def test_als_deep_dive_runs(notebooks, output_notebook, kernel_name, data_size):
     notebook_path = notebooks["als_deep_dive"]
     pm.execute_notebook(notebook_path, output_notebook, kernel_name=kernel_name,
-                        parameters=dict(TOP_K=10, MOVIELENS_DATA_SIZE=data_size))
+                        parameters=dict(
+                            MOVIELENS_DATA_SIZE=data_size,
+                            COL_USER=DEFAULT_USER_COL,
+                            COL_ITEM=DEFAULT_ITEM_COL,
+                            COL_RATING=DEFAULT_RATING_COL,
+                        ))
 
 
 @pytest.mark.notebooks

From 5e679bef5ee5a919aca0f8b8e312576f40bca13c Mon Sep 17 00:00:00 2001
From: Jianjie Liu <jianjl@microsoft.com>
Date: Thu, 23 Sep 2021 20:34:21 +0000
Subject: [PATCH 08/27] Mock movielens schema v2

---
 .../als_movielens_diversity_metrics.ipynb     | 104 ++++--------------
 recommenders/datasets/mock/movielens.py       |  81 ++++++++------
 recommenders/datasets/movielens.py            |  40 +++----
 recommenders/utils/constants.py               |   9 ++
 setup.py                                      |   2 +-
 tests/unit/examples/test_notebooks_python.py  |   2 +-
 .../datasets/mock/test_movielens.py           |  88 +++++++--------
 .../recommenders/datasets/test_movielens.py   |  29 ++++-
 8 files changed, 170 insertions(+), 185 deletions(-)

diff --git a/examples/03_evaluate/als_movielens_diversity_metrics.ipynb b/examples/03_evaluate/als_movielens_diversity_metrics.ipynb
index aaba0a35d1..38de757530 100644
--- a/examples/03_evaluate/als_movielens_diversity_metrics.ipynb
+++ b/examples/03_evaluate/als_movielens_diversity_metrics.ipynb
@@ -142,7 +142,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": null,
    "source": [
     "# set the environment path to find Recommenders\n",
     "%load_ext autoreload\n",
@@ -174,17 +174,7 @@
     "print(\"System version: {}\".format(sys.version))\n",
     "print(\"Spark version: {}\".format(pyspark.__version__))\n"
    ],
-   "outputs": [
-    {
-     "output_type": "stream",
-     "name": "stdout",
-     "text": [
-      "System version: 3.6.9 (default, Jan 26 2021, 15:33:00) \n",
-      "[GCC 8.4.0]\n",
-      "Spark version: 2.4.8\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "metadata": {}
   },
   {
@@ -197,7 +187,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": null,
    "source": [
     "# top k items to recommend\n",
     "TOP_K = 10\n",
@@ -206,11 +196,11 @@
     "MOVIELENS_DATA_SIZE = 'mock10'\n",
     "\n",
     "# user, item column names\n",
-    "COL_USER=\"userId\"\n",
-    "COL_ITEM=\"itemID\"\n",
-    "COL_RATING=\"rating\"\n",
-    "COL_TITLE=\"title\"\n",
-    "COL_GENRE=\"genres\""
+    "COL_USER=\"UserId\"\n",
+    "COL_ITEM=\"ItemId\"\n",
+    "COL_RATING=\"Rating\"\n",
+    "COL_TITLE=\"Title\"\n",
+    "COL_GENRE=\"Genre\""
    ],
    "outputs": [],
    "metadata": {
@@ -230,7 +220,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": null,
    "source": [
     "# the following settings work well for debugging locally on VM - change when running on a cluster\n",
     "# set up a giant single executor with many threads and specify memory cap\n",
@@ -251,7 +241,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": null,
    "source": [
     "# Note: The DataFrame-based API for ALS currently only supports integers for user and item ids.\n",
     "schema = StructType(\n",
@@ -263,32 +253,10 @@
     "    )\n",
     ")\n",
     "\n",
-    "data = movielens.load_spark_df(spark, size=MOVIELENS_DATA_SIZE, schema=schema, title_col=COL_TITLE, genres_col=\"genres\")\n",
+    "data = movielens.load_spark_df(spark, size=MOVIELENS_DATA_SIZE, schema=schema, title_col=COL_TITLE, genres_col=COL_GENRE)\n",
     "data.show()"
    ],
-   "outputs": [
-    {
-     "output_type": "stream",
-     "name": "stdout",
-     "text": [
-      "+------+------+------+---------+-----+--------+\n",
-      "|userID|itemID|rating|timestamp|title|  genres|\n",
-      "+------+------+------+---------+-----+--------+\n",
-      "|     8|     3|   4.0|2022-2-22|  foo|genreA|0|\n",
-      "|     8|     9|   5.0|2022-2-22|  foo|genreA|0|\n",
-      "|     5|     1|   5.0|2022-2-22|  foo|genreA|0|\n",
-      "|     9|     1|   1.0|2022-2-22|  foo|genreA|0|\n",
-      "|     7|     5|   5.0|2022-2-22|  foo|genreA|0|\n",
-      "|     3|     6|   5.0|2022-2-22|  foo|genreA|0|\n",
-      "|     2|     6|   2.0|2022-2-22|  foo|genreA|0|\n",
-      "|     5|     7|   4.0|2022-2-22|  foo|genreA|0|\n",
-      "|     6|     9|   2.0|2022-2-22|  foo|genreA|0|\n",
-      "|     5|     6|   3.0|2022-2-22|  foo|genreA|0|\n",
-      "+------+------+------+---------+-----+--------+\n",
-      "\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "metadata": {}
   },
   {
@@ -300,22 +268,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": null,
    "source": [
     "train_df, test_df = spark_random_split(data.select(COL_USER, COL_ITEM, COL_RATING), ratio=0.75, seed=123)\n",
     "print (\"N train_df\", train_df.cache().count())\n",
     "print (\"N test_df\", test_df.cache().count())"
    ],
-   "outputs": [
-    {
-     "output_type": "stream",
-     "name": "stdout",
-     "text": [
-      "N train_df 6\n",
-      "N test_df 4\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "metadata": {}
   },
   {
@@ -334,7 +293,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": null,
    "source": [
     "users = train_df.select(COL_USER).distinct()\n",
     "items = train_df.select(COL_ITEM).distinct()\n",
@@ -355,7 +314,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": null,
    "source": [
     "header = {\n",
     "    \"userCol\": COL_USER,\n",
@@ -380,22 +339,14 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": null,
    "source": [
     "with Timer() as train_time:\n",
     "    model = als.fit(train_df)\n",
     "\n",
     "print(\"Took {} seconds for training.\".format(train_time.interval))"
    ],
-   "outputs": [
-    {
-     "output_type": "stream",
-     "name": "stdout",
-     "text": [
-      "Took 2.296935658028815 seconds for training.\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "metadata": {}
   },
   {
@@ -409,7 +360,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": null,
    "source": [
     "# Score all user-item pairs\n",
     "dfs_pred = model.transform(user_item)\n",
@@ -431,16 +382,7 @@
     " \n",
     "print(top_k_reco.count())"
    ],
-   "outputs": [
-    {
-     "output_type": "stream",
-     "name": "stdout",
-     "text": [
-      "30\n",
-      "30\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "metadata": {}
   },
   {
@@ -688,9 +630,9 @@
    "source": [
     "# Get movie features \"title\" and \"genres\"\n",
     "movies = (\n",
-    "    data.groupBy(COL_ITEM, COL_TITLE, \"genres\").count()\n",
+    "    data.groupBy(COL_ITEM, COL_TITLE, COL_GENRE).count()\n",
     "    .na.drop()  # remove rows with null values\n",
-    "    .withColumn(\"genres\", F.split(F.col(\"genres\"), \"\\|\"))  # convert to array of genres\n",
+    "    .withColumn(COL_GENRE, F.split(F.col(COL_GENRE), \"\\|\"))  # convert to array of genres\n",
     "    .withColumn(COL_TITLE, F.regexp_replace(F.col(COL_TITLE), \"[\\(),:^0-9]\", \"\"))  # remove year from title\n",
     "    .drop(\"count\")  # remove unused columns\n",
     ")"
@@ -724,7 +666,7 @@
     "hashed_data = text_hasher.transform(clean_data)\n",
     "\n",
     "# step 2: fit a CountVectorizerModel from column \"genres\".\n",
-    "count_vectorizer = CountVectorizer(inputCol=\"genres\", outputCol=\"genres_features\")\n",
+    "count_vectorizer = CountVectorizer(inputCol=COL_GENRE, outputCol=\"genres_features\")\n",
     "count_vectorizer_model = count_vectorizer.fit(hashed_data)\n",
     "vectorized_data = count_vectorizer_model.transform(hashed_data)\n",
     "\n",
diff --git a/recommenders/datasets/mock/movielens.py b/recommenders/datasets/mock/movielens.py
index 4344de7e42..44c5acc221 100644
--- a/recommenders/datasets/mock/movielens.py
+++ b/recommenders/datasets/mock/movielens.py
@@ -14,22 +14,26 @@
     DEFAULT_ITEM_COL,
     DEFAULT_RATING_COL,
     DEFAULT_TIMESTAMP_COL,
+    DEFAULT_TITLE_COL,
+    DEFAULT_GENRE_COL,
+    DEFAULT_HEADER
 )
 
 import random
 from typing import Optional
 
-from pandera.typing import DateTime, Series
-from pandera import Field, Check
-from pandera.schemas import DataFrameSchema
+import pandas
+import pyspark.sql
+from pandera.typing import Series
+from pandera import Field
 from pyspark.sql import SparkSession
-from pyspark.sql.types import StructField, StructType, LongType, IntegerType, StringType, FloatType
+from pyspark.sql.types import StructField, StructType, IntegerType, StringType, FloatType
 
 
-class MockMovielens100kSchema(pa.SchemaModel):
+class MockMovielensSchema(pa.SchemaModel):
     """
     Mock dataset schema to generate fake data for testing purpose.
-    This schema is configured to mimic the Movielens 100k dataset
+    This schema is configured to mimic the Movielens dataset
 
     http://files.grouplens.org/datasets/movielens/ml-100k/
     """
@@ -38,71 +42,78 @@ class MockMovielens100kSchema(pa.SchemaModel):
     # And 1682 total items
     itemID: Series[int] = Field(in_range={"min_value": 1, "max_value": 10})
     # Rating is on the scale from 1 to 5
-    rating: Series[int] = Field(in_range={"min_value": 1, "max_value": 5})
+    rating: Series[float] = Field(in_range={"min_value": 1, "max_value": 5})
     timestamp: Series[str] = Field(eq="2022-2-22")
     title: Series[str] = Field(eq="foo")
-    genres: Series[str] = Field(eq="genreA|0")
+    genre: Series[str] = Field(eq="genreA|0")
 
     @classmethod
     def get_df(
         cls,
         size: int = 3, seed: int = 100,
-        # title_col: Optional[str] = None, genres_col: Optional[str] = None
-    ):
+        keep_first_n_cols: Optional[int] = None,
+        keep_title_col: bool = False, keep_genre_col: bool = False,
+    ) -> pandas.DataFrame:
         """Return fake movielens dataset as a Pandas Dataframe with specified rows.
 
         Args:
             size (int): number of rows to generate
             seed (int, optional): seeding the pseudo-number generation. Defaults to 100.
-            title_col (str, optional): if not None, append a title column. Defaults to None.
-            genres_col (str, optional): if not None, append a genre column. Defaults to None.
+            keep_first_n_cols (int, optional): keep the first n default movielens columns.
+            keep_title_col (bool): remove the title column if False. Defaults to True.
+            keep_genre_col (bool): remove the genre column if False. Defaults to True.
 
         Returns:
             pandas.DataFrame: a mock dataset
         """
+        schema = cls.to_schema()
+        if keep_first_n_cols is not None:
+            if keep_first_n_cols < 1 or keep_first_n_cols > len(DEFAULT_HEADER):
+                raise ValueError(f"Invalid value for 'keep_first_n_cols': {keep_first_n_cols}. Valid range: [1-{len(DEFAULT_HEADER)}]")
+            schema = schema.remove_columns(DEFAULT_HEADER[keep_first_n_cols:])
+        if not keep_title_col:
+            schema = schema.remove_columns([DEFAULT_TITLE_COL])
+        if not keep_genre_col:
+            schema = schema.remove_columns([DEFAULT_GENRE_COL])
+
         random.seed(seed)
-        return cls.example(size=size)
+        return schema.example(size=size)
 
     @classmethod
     def get_spark_df(
         cls,
         spark: SparkSession,
         size: int = 3, seed: int = 100,
-        # title_col: Optional[str] = None, genres_col: Optional[str] = None,
-        # schema: Optional[StructType] = None
-    ):
+        keep_title_col: bool = False, keep_genre_col: bool = False,
+    ) -> pyspark.sql.DataFrame:
         """Return fake movielens dataset as a Spark Dataframe with specified rows
 
         Args:
             spark (SparkSession): spark session to load the dataframe into
             size (int): number of rows to generate
             seed (int, optional): seeding the pseudo-number generation. Defaults to 100.
-            title_col (str, optional): if not None, append a title column. Defaults to None.
-            genres_col (str, optional): if not None, append a genre column. Defaults to None.
-            schema (pyspark.sql.types.StructType, optional): dataset schema. Defaults to None.
+            keep_title_col (bool): remove the title column if False. Defaults to False.
+            keep_genre_col (bool): remove the genre column if False. Defaults to False.
 
         Returns:
             pyspark.sql.DataFrame: a mock dataset
         """
-        pandas_df = cls.get_df(size=size, seed=seed)
+        pandas_df = cls.get_df(size=size, seed=seed, keep_title_col=True, keep_genre_col=True)
+        # serialize the pandas.df to avoid the expensive java <-> python communication
         pandas_df.to_csv('test.csv', header=False, index=False)
-        default_schema = StructType([
+
+        deserialization_schema = StructType([
             StructField(DEFAULT_USER_COL, IntegerType()),
             StructField(DEFAULT_ITEM_COL, IntegerType()),
             StructField(DEFAULT_RATING_COL, FloatType()),
             StructField(DEFAULT_TIMESTAMP_COL, StringType()),
-            StructField("title", StringType()),
-            StructField("genres", StringType()),
+            StructField(DEFAULT_TITLE_COL, StringType()),
+            StructField(DEFAULT_GENRE_COL, StringType()),
         ])
-        return spark.read.csv('test.csv', schema=default_schema)
-
-    # @classmethod
-    # def _get_item_df(cls, size, title_col: Optional[str] = None, genres_col: Optional[str] = None):
-    #     schema = DataFrameSchema()  # create an empty schema
-    #     if title_col is not None:
-    #         # adds a title column with random alphabets
-    #         schema = schema.add_columns({title_col: pa.Column(str, Check.str_matches(r'^[a-z]+$'))})
-    #     if genres_col is not None:
-    #         # adds a genre column with '|' separated string
-    #         schema = schema.add_columns({genres_col: pa.Column(str, Check.str_matches(r'^[a-z]+\|[0-9]$'))})
-    #     schema.example()
\ No newline at end of file
+        spark_df = spark.read.csv('test.csv', schema=deserialization_schema)
+
+        if not keep_title_col:
+            spark_df = spark_df.drop(DEFAULT_TITLE_COL)
+        if not keep_genre_col:
+            spark_df = spark_df.drop(DEFAULT_GENRE_COL)
+        return spark_df
diff --git a/recommenders/datasets/movielens.py b/recommenders/datasets/movielens.py
index 863578902b..c47865afcb 100644
--- a/recommenders/datasets/movielens.py
+++ b/recommenders/datasets/movielens.py
@@ -7,14 +7,12 @@
 import warnings
 import pandas as pd
 from zipfile import ZipFile
-from recommenders.datasets.mock.movielens import MockMovielens100kSchema
+from recommenders.datasets.mock.movielens import MockMovielensSchema
 from recommenders.datasets.download_utils import maybe_download, download_path
 from recommenders.utils.notebook_utils import is_databricks
 from recommenders.utils.constants import (
-    DEFAULT_USER_COL,
+    DEFAULT_HEADER,
     DEFAULT_ITEM_COL,
-    DEFAULT_RATING_COL,
-    DEFAULT_TIMESTAMP_COL,
 )
 
 try:
@@ -130,12 +128,6 @@ def item_has_header(self):
     "Western",
 )
 
-DEFAULT_HEADER = (
-    DEFAULT_USER_COL,
-    DEFAULT_ITEM_COL,
-    DEFAULT_RATING_COL,
-    DEFAULT_TIMESTAMP_COL,
-)
 
 # Warning and error messages
 WARNING_MOVIE_LENS_HEADER = """MovieLens rating dataset has four columns
@@ -197,10 +189,6 @@ def load_pandas_df(
     if size not in DATA_FORMAT and size not in MOCK_DATA_FORMAT:
         raise ValueError(ERROR_MOVIE_LENS_SIZE)
 
-    if size in MOCK_DATA_FORMAT:
-        # generate fake data using the dictionary as a kwarg to the generation function
-        return MockMovielens100kSchema.get_df(**MOCK_DATA_FORMAT[size])
-
     if header is None:
         header = DEFAULT_HEADER
     elif len(header) < 2:
@@ -209,6 +197,15 @@ def load_pandas_df(
         warnings.warn(WARNING_MOVIE_LENS_HEADER)
         header = header[:4]
 
+    if size in MOCK_DATA_FORMAT:
+        # generate fake data
+        return MockMovielensSchema.get_df(
+            keep_first_n_cols=len(header),
+            keep_title_col=(title_col is not None),
+            keep_genre_col=(genres_col is not None),
+            **MOCK_DATA_FORMAT[size]  # supply the rest of the kwarg with the dictionary
+        )
+
     movie_col = header[1]
 
     with download_path(local_cache_path) as path:
@@ -368,9 +365,9 @@ def load_spark_df(
         schema* (pyspark.StructType): Dataset schema.
         local_cache_path* (str): Path (directory or a zip file) to cache the downloaded zip file.
             If None, all the intermediate files will be stored in a temporary directory and removed after use.
-        dbutils* (Databricks.dbutils): Databricks utility object
-        title_col* (str): Title column name. If None, the column will not be loaded.
-        genres_col* (str): Genres column name. Genres are '|' separated string.
+        dbutils (Databricks.dbutils): Databricks utility object
+        title_col (str): Title column name. If None, the column will not be loaded.
+        genres_col (str): Genres column name. Genres are '|' separated string.
             If None, the column will not be loaded.
         year_col* (str): Movie release year column name. If None, the column will not be loaded.
 
@@ -413,8 +410,13 @@ def load_spark_df(
         raise ValueError(ERROR_MOVIE_LENS_SIZE)
 
     if size in MOCK_DATA_FORMAT:
-        # generate fake data using the dictionary as a kwarg to the generation function
-        return MockMovielens100kSchema.get_spark_df(spark, **MOCK_DATA_FORMAT[size])
+        # generate fake data
+        return MockMovielensSchema.get_spark_df(
+            spark,
+            keep_title_col=(title_col is not None),
+            keep_genre_col=(genres_col is not None),
+            **MOCK_DATA_FORMAT[size]   # supply the rest of the kwarg with the dictionary
+        )
 
     schema = _get_schema(header, schema)
     if len(schema) < 2:
diff --git a/recommenders/utils/constants.py b/recommenders/utils/constants.py
index 0e7ed34a9e..e24a58d725 100644
--- a/recommenders/utils/constants.py
+++ b/recommenders/utils/constants.py
@@ -6,6 +6,8 @@
 DEFAULT_ITEM_COL = "itemID"
 DEFAULT_RATING_COL = "rating"
 DEFAULT_LABEL_COL = "label"
+DEFAULT_TITLE_COL = "title"
+DEFAULT_GENRE_COL = "genre"
 DEFAULT_RELEVANCE_COL = "relevance"
 DEFAULT_TIMESTAMP_COL = "timestamp"
 DEFAULT_PREDICTION_COL = "prediction"
@@ -13,6 +15,13 @@
 DEFAULT_ITEM_FEATURES_COL = "features"
 DEFAULT_ITEM_SIM_MEASURE = "item_cooccurrence_count"
 
+DEFAULT_HEADER = (
+    DEFAULT_USER_COL,
+    DEFAULT_ITEM_COL,
+    DEFAULT_RATING_COL,
+    DEFAULT_TIMESTAMP_COL,
+)
+
 COL_DICT = {
     "col_user": DEFAULT_USER_COL,
     "col_item": DEFAULT_ITEM_COL,
diff --git a/setup.py b/setup.py
index 7aef19fe52..3bcdb23f05 100644
--- a/setup.py
+++ b/setup.py
@@ -75,9 +75,9 @@
     ],
     "dev": [
         "black>=18.6b4,<21",
+        "pandera[strategies]>=0.6.5",  # For generating fake datasets
         "pytest>=3.6.4",
         "pytest-cov>=2.12.1",
-        "pytest-lazy-fixture>=0.6.3",  # Allow using fixtures in pytest.mark.parametrize
     ],
 }
 # for the brave of heart
diff --git a/tests/unit/examples/test_notebooks_python.py b/tests/unit/examples/test_notebooks_python.py
index da9e65d214..7b2de60ef5 100644
--- a/tests/unit/examples/test_notebooks_python.py
+++ b/tests/unit/examples/test_notebooks_python.py
@@ -102,7 +102,7 @@ def test_wikidata_runs(notebooks, output_notebook, kernel_name, tmp):
 
 @pytest.mark.notebooks
 @pytest.mark.mock_movielens
-@pytest.mark.parametrize("data_size", ["100k", "mock100"])
+@pytest.mark.parametrize("data_size", ["10m", "mock100"])
 def test_rlrmc_quickstart_runs(notebooks, output_notebook, kernel_name, data_size):
     notebook_path = notebooks["rlrmc_quickstart"]
     pm.execute_notebook(notebook_path, output_notebook, kernel_name=kernel_name,
diff --git a/tests/unit/recommenders/datasets/mock/test_movielens.py b/tests/unit/recommenders/datasets/mock/test_movielens.py
index 446f5cd75e..ae5ea765de 100644
--- a/tests/unit/recommenders/datasets/mock/test_movielens.py
+++ b/tests/unit/recommenders/datasets/mock/test_movielens.py
@@ -1,72 +1,66 @@
-from recommenders.datasets.mock.movielens import MockMovielens100kSchema
+from recommenders.datasets.mock.movielens import MockMovielensSchema
 from recommenders.datasets.movielens import DEFAULT_HEADER
 from recommenders.utils.constants import (
-    DEFAULT_USER_COL,
-    DEFAULT_ITEM_COL,
-    DEFAULT_RATING_COL,
-    DEFAULT_TIMESTAMP_COL,
+    DEFAULT_GENRE_COL,
+    DEFAULT_TITLE_COL,
 )
 
 import pytest
 import pandas
 import pyspark.sql
 from pyspark.sql import SparkSession
-from pyspark.sql.types import IntegerType, FloatType, LongType, StructField, StructType
-
-
-@pytest.fixture(scope="module")
-def default_schema():
-    return StructType([
-        StructField(DEFAULT_USER_COL, IntegerType()),
-        StructField(DEFAULT_ITEM_COL, IntegerType()),
-        StructField(DEFAULT_RATING_COL, FloatType()),
-        StructField(DEFAULT_TIMESTAMP_COL, LongType()),
-    ])
-
-
-@pytest.fixture(scope="module")
-def custom_schema():
-    return StructType([
-        StructField("userID", IntegerType()),
-        StructField("itemID", IntegerType()),
-        StructField("rating", FloatType()),
-    ])
 
 
 @pytest.mark.parametrize("size", [10, 100])
 def test_mock_movielens_schema__has_default_col_names(size):
-    df = MockMovielens100kSchema.example(size=size)
+    df = MockMovielensSchema.example(size=size)
     for col_name in DEFAULT_HEADER:
         assert col_name in df.columns
 
 
+@pytest.mark.parametrize("keep_first_n_cols", [1, 2, 3, 4])
+def test_mock_movielens_schema__get_df_remove_default_col__return_success(keep_first_n_cols):
+    df = MockMovielensSchema.get_df(size=3, keep_first_n_cols=keep_first_n_cols)
+    assert len(df) > 0
+    assert len(df.columns) == keep_first_n_cols
+
+
+@pytest.mark.parametrize("keep_first_n_cols", [-1, 0, 100])
+def test_mock_movielens_schema__get_df_invalid_param__return_failure(keep_first_n_cols):
+    with pytest.raises(ValueError, match=r"Invalid value.*"):
+        MockMovielensSchema.get_df(size=3, keep_first_n_cols=keep_first_n_cols)
+
+
+@pytest.mark.parametrize("keep_genre_col", [True, False])
+@pytest.mark.parametrize("keep_title_col", [True, False])
+@pytest.mark.parametrize("keep_first_n_cols", [None, 2])
 @pytest.mark.parametrize("seed", [-1])  # seed for pseudo-random # generation
 @pytest.mark.parametrize("size", [0, 3, 10])
-def test_mock_movielens_schema__get_df__return_success(size, seed):
-    df = MockMovielens100kSchema.get_df(size, seed=seed)
+def test_mock_movielens_schema__get_df__return_success(size, seed, keep_first_n_cols, keep_title_col, keep_genre_col):
+    df = MockMovielensSchema.get_df(
+        size=size, seed=seed,
+        keep_first_n_cols=keep_first_n_cols,
+        keep_title_col=keep_title_col, keep_genre_col=keep_genre_col
+    )
     assert type(df) == pandas.DataFrame
     assert len(df) == size
 
-
-@pytest.mark.parametrize("seed", [0, 101])  # seed for pseudo-random # generation
-@pytest.mark.parametrize("size", [3, 10])
-def test_mock_movielens_schema__get_spark_df__return_success(spark: SparkSession, size, seed):
-    df = MockMovielens100kSchema.get_spark_df(spark, size, seed=seed)
-    assert type(df) == pyspark.sql.DataFrame
-    assert df.count() == size
+    if keep_title_col:
+        assert len(df[DEFAULT_TITLE_COL]) == size
+    if keep_genre_col:
+        assert len(df[DEFAULT_GENRE_COL]) == size
 
 
-@pytest.mark.parametrize("schema", [
-    None,
-    pytest.lazy_fixture('default_schema'),
-    pytest.lazy_fixture('custom_schema')
-])
-def test_mock_movielens_schema__get_spark_df__with_custom_schema_return_success(spark: SparkSession, schema):
-    df = MockMovielens100kSchema.get_spark_df(spark, schema=schema)
+@pytest.mark.parametrize("keep_genre_col", [True, False])
+@pytest.mark.parametrize("keep_title_col", [True, False])
+@pytest.mark.parametrize("seed", [101])  # seed for pseudo-random # generation
+@pytest.mark.parametrize("size", [0, 3, 10])
+def test_mock_movielens_schema__get_spark_df__return_success(spark: SparkSession, size, seed, keep_title_col, keep_genre_col):
+    df = MockMovielensSchema.get_spark_df(spark, size=size, seed=seed, keep_title_col=keep_title_col, keep_genre_col=keep_genre_col)
     assert type(df) == pyspark.sql.DataFrame
-    assert df.count() >= 0
-
+    assert df.count() == size
 
-def test_mock_movielens_schema__get_spark_df__fail_on_empty_rows(spark: SparkSession):
-    with pytest.raises(ValueError, match="can not infer schema from empty dataset.*"):
-        MockMovielens100kSchema.get_spark_df(spark, 0)
+    if keep_title_col:
+        assert df.schema[DEFAULT_TITLE_COL]
+    if keep_genre_col:
+        assert df.schema[DEFAULT_GENRE_COL]
diff --git a/tests/unit/recommenders/datasets/test_movielens.py b/tests/unit/recommenders/datasets/test_movielens.py
index f05cc24882..d53f5d594c 100644
--- a/tests/unit/recommenders/datasets/test_movielens.py
+++ b/tests/unit/recommenders/datasets/test_movielens.py
@@ -1,7 +1,10 @@
 from recommenders.datasets.movielens import DATA_FORMAT, MOCK_DATA_FORMAT
 from recommenders.datasets.movielens import load_pandas_df, load_spark_df
+from recommenders.utils.constants import DEFAULT_GENRE_COL, DEFAULT_TITLE_COL
 
 import pyspark.sql
+import pandas
+from pandas.core.series import Series
 from pyspark.sql import SparkSession
 
 
@@ -15,7 +18,31 @@ def test_mock_movielens_data__no_name_collision():
     assert not collision
 
 
-def test_mock_movielens_data_generation_succeed(spark: SparkSession):
+def test_load_spark_df_mock_100__with_default_param__succeed(spark: SparkSession):
     df = load_spark_df(spark, "mock100")
     assert type(df) == pyspark.sql.DataFrame
     assert df.count() == 100
+
+
+def test_load_pandas_df_mock_100__with_default_param__succeed():
+    df = load_pandas_df("mock100")
+    assert type(df) == pandas.DataFrame
+    assert len(df) == 100
+
+
+def test_load_spark_df_mock_100__with_custom_param__succeed(spark: SparkSession):
+    df = load_spark_df(spark, "mock100", title_col=DEFAULT_TITLE_COL, genres_col=DEFAULT_GENRE_COL)
+    assert df.schema[DEFAULT_TITLE_COL]
+    assert df.schema[DEFAULT_GENRE_COL]
+    assert df.count() == 100
+    assert '|' in df.take(1)[0][DEFAULT_GENRE_COL]
+    assert df.take(1)[0][DEFAULT_TITLE_COL] == 'foo'
+
+
+def test_load_pandas_df_mock_100__with_custom_param__succeed(spark: SparkSession):
+    df = load_pandas_df("mock100", title_col=DEFAULT_TITLE_COL, genres_col=DEFAULT_GENRE_COL)
+    assert type(df[DEFAULT_TITLE_COL]) == Series
+    assert type(df[DEFAULT_GENRE_COL]) == Series
+    assert len(df) == 100
+    assert '|' in df.loc[0, DEFAULT_GENRE_COL]
+    assert df.loc[0, DEFAULT_TITLE_COL] == 'foo'

From eb939b8f0b09431327d0eee31ebf259f471809fb Mon Sep 17 00:00:00 2001
From: Jianjie Liu <jianjl@microsoft.com>
Date: Fri, 24 Sep 2021 00:28:54 +0000
Subject: [PATCH 09/27] Don't use 100k dataset

---
 tests/unit/examples/test_notebooks_pyspark.py | 6 +++---
 tests/unit/examples/test_notebooks_python.py  | 4 ++--
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/tests/unit/examples/test_notebooks_pyspark.py b/tests/unit/examples/test_notebooks_pyspark.py
index 0e3fb41e09..f25dbe388c 100644
--- a/tests/unit/examples/test_notebooks_pyspark.py
+++ b/tests/unit/examples/test_notebooks_pyspark.py
@@ -34,7 +34,7 @@ def test_data_split_runs(notebooks, output_notebook, kernel_name):
 @pytest.mark.skipif(
     sys.platform == "win32", reason="Takes 2764.50s in Windows, while in Linux 124.35s"
 )
-@pytest.mark.parametrize("data_size", ["100k", "mock100"])
+@pytest.mark.parametrize("data_size", ["mock100"])
 def test_als_deep_dive_runs(notebooks, output_notebook, kernel_name, data_size):
     notebook_path = notebooks["als_deep_dive"]
     pm.execute_notebook(notebook_path, output_notebook, kernel_name=kernel_name,
@@ -59,7 +59,7 @@ def test_evaluation_runs(notebooks, output_notebook, kernel_name):
 @pytest.mark.notebooks
 @pytest.mark.spark
 @pytest.mark.mock_movielens
-@pytest.mark.parametrize("data_size", ["100k", "mock100"])
+@pytest.mark.parametrize("data_size", ["mock100"])
 def test_evaluation_diversity_runs(notebooks, output_notebook, kernel_name, data_size):
     notebook_path = notebooks["evaluation_diversity"]
     pm.execute_notebook(notebook_path, output_notebook, kernel_name=kernel_name, 
@@ -72,7 +72,7 @@ def test_evaluation_diversity_runs(notebooks, output_notebook, kernel_name, data
 @pytest.mark.skipif(
     sys.platform == "win32", reason="Takes 2409.69s in Windows, while in Linux 138.30s"
 )
-@pytest.mark.parametrize("data_size", ["100k", "mock100"])
+@pytest.mark.parametrize("data_size", ["mock100"])
 def test_spark_tuning(notebooks, output_notebook, kernel_name, data_size):
     notebook_path = notebooks["spark_tuning"]
     pm.execute_notebook(
diff --git a/tests/unit/examples/test_notebooks_python.py b/tests/unit/examples/test_notebooks_python.py
index 7b2de60ef5..0d809ee51e 100644
--- a/tests/unit/examples/test_notebooks_python.py
+++ b/tests/unit/examples/test_notebooks_python.py
@@ -51,7 +51,7 @@ def test_baseline_deep_dive_runs(notebooks, output_notebook, kernel_name):
 
 @pytest.mark.notebooks
 @pytest.mark.mock_movielens
-@pytest.mark.parametrize("data_size", ["100k", "mock100"])
+@pytest.mark.parametrize("data_size", ["mock100"])
 def test_surprise_deep_dive_runs(notebooks, output_notebook, kernel_name, data_size):
     notebook_path = notebooks["surprise_svd_deep_dive"]
     pm.execute_notebook(notebook_path, output_notebook, kernel_name=kernel_name,
@@ -102,7 +102,7 @@ def test_wikidata_runs(notebooks, output_notebook, kernel_name, tmp):
 
 @pytest.mark.notebooks
 @pytest.mark.mock_movielens
-@pytest.mark.parametrize("data_size", ["10m", "mock100"])
+@pytest.mark.parametrize("data_size", ["mock100"])
 def test_rlrmc_quickstart_runs(notebooks, output_notebook, kernel_name, data_size):
     notebook_path = notebooks["rlrmc_quickstart"]
     pm.execute_notebook(notebook_path, output_notebook, kernel_name=kernel_name,

From 581c1ed00ec3c07517d35afc7ac4df81066a2886 Mon Sep 17 00:00:00 2001
From: Jianjie Liu <jianjl@microsoft.com>
Date: Fri, 24 Sep 2021 13:27:41 +0000
Subject: [PATCH 10/27] Re-wire local import to minimize module-wide dependency

---
 recommenders/datasets/mock/movielens.py | 2 +-
 recommenders/datasets/movielens.py      | 5 ++++-
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/recommenders/datasets/mock/movielens.py b/recommenders/datasets/mock/movielens.py
index 44c5acc221..c8ddf6a80c 100644
--- a/recommenders/datasets/mock/movielens.py
+++ b/recommenders/datasets/mock/movielens.py
@@ -7,7 +7,7 @@
 try:
     import pandera as pa
 except ImportError as e:
-    raise ImportError("Pandera not installed. Try `pip install recommender['dev']`") from e
+    raise ImportError("pandera is not installed. Try `pip install recommenders['dev']`") from e
 
 from recommenders.utils.constants import (
     DEFAULT_USER_COL,
diff --git a/recommenders/datasets/movielens.py b/recommenders/datasets/movielens.py
index c47865afcb..60f33a5e92 100644
--- a/recommenders/datasets/movielens.py
+++ b/recommenders/datasets/movielens.py
@@ -7,7 +7,6 @@
 import warnings
 import pandas as pd
 from zipfile import ZipFile
-from recommenders.datasets.mock.movielens import MockMovielensSchema
 from recommenders.datasets.download_utils import maybe_download, download_path
 from recommenders.utils.notebook_utils import is_databricks
 from recommenders.utils.constants import (
@@ -198,6 +197,8 @@ def load_pandas_df(
         header = header[:4]
 
     if size in MOCK_DATA_FORMAT:
+        # function-wide import to isolate extra dependencies from the mock schema will use
+        from recommenders.datasets.mock.movielens import MockMovielensSchema
         # generate fake data
         return MockMovielensSchema.get_df(
             keep_first_n_cols=len(header),
@@ -410,6 +411,8 @@ def load_spark_df(
         raise ValueError(ERROR_MOVIE_LENS_SIZE)
 
     if size in MOCK_DATA_FORMAT:
+        # function-wide import to isolate extra dependencies from the mock schema will use
+        from recommenders.datasets.mock.movielens import MockMovielensSchema
         # generate fake data
         return MockMovielensSchema.get_spark_df(
             spark,

From 5862f257a6f172544099aba0f418a9f8da7f201d Mon Sep 17 00:00:00 2001
From: Jianjie Liu <jianjl@microsoft.com>
Date: Fri, 24 Sep 2021 14:46:10 +0000
Subject: [PATCH 11/27] Runnable in non-spark env

---
 recommenders/datasets/mock/movielens.py       | 19 ++++++++++++-------
 .../datasets/mock/test_movielens.py           |  6 ++----
 .../recommenders/datasets/test_movielens.py   | 12 ++++++------
 3 files changed, 20 insertions(+), 17 deletions(-)

diff --git a/recommenders/datasets/mock/movielens.py b/recommenders/datasets/mock/movielens.py
index c8ddf6a80c..e71da13f41 100644
--- a/recommenders/datasets/mock/movielens.py
+++ b/recommenders/datasets/mock/movielens.py
@@ -9,6 +9,11 @@
 except ImportError as e:
     raise ImportError("pandera is not installed. Try `pip install recommenders['dev']`") from e
 
+try:
+    from pyspark.sql.types import StructField, StructType, IntegerType, StringType, FloatType
+except ImportError:
+    pass  # so the environment without spark doesn't break
+
 from recommenders.utils.constants import (
     DEFAULT_USER_COL,
     DEFAULT_ITEM_COL,
@@ -23,11 +28,8 @@
 from typing import Optional
 
 import pandas
-import pyspark.sql
 from pandera.typing import Series
 from pandera import Field
-from pyspark.sql import SparkSession
-from pyspark.sql.types import StructField, StructType, IntegerType, StringType, FloatType
 
 
 class MockMovielensSchema(pa.SchemaModel):
@@ -36,10 +38,12 @@ class MockMovielensSchema(pa.SchemaModel):
     This schema is configured to mimic the Movielens dataset
 
     http://files.grouplens.org/datasets/movielens/ml-100k/
+
+    Dataset schema and generation is configured using pandera.
+    Please see https://pandera.readthedocs.io/en/latest/schema_models.html
+    for more information.
     """
-    # The 100k dataset has 943 total users
     userID: Series[int] = Field(in_range={"min_value": 1, "max_value": 10})
-    # And 1682 total items
     itemID: Series[int] = Field(in_range={"min_value": 1, "max_value": 10})
     # Rating is on the scale from 1 to 5
     rating: Series[float] = Field(in_range={"min_value": 1, "max_value": 5})
@@ -77,15 +81,16 @@ def get_df(
             schema = schema.remove_columns([DEFAULT_GENRE_COL])
 
         random.seed(seed)
+        # For more information on data synthesis, see https://pandera.readthedocs.io/en/latest/data_synthesis_strategies.html
         return schema.example(size=size)
 
     @classmethod
     def get_spark_df(
         cls,
-        spark: SparkSession,
+        spark,
         size: int = 3, seed: int = 100,
         keep_title_col: bool = False, keep_genre_col: bool = False,
-    ) -> pyspark.sql.DataFrame:
+    ):
         """Return fake movielens dataset as a Spark Dataframe with specified rows
 
         Args:
diff --git a/tests/unit/recommenders/datasets/mock/test_movielens.py b/tests/unit/recommenders/datasets/mock/test_movielens.py
index ae5ea765de..e8a6e5f8be 100644
--- a/tests/unit/recommenders/datasets/mock/test_movielens.py
+++ b/tests/unit/recommenders/datasets/mock/test_movielens.py
@@ -7,8 +7,6 @@
 
 import pytest
 import pandas
-import pyspark.sql
-from pyspark.sql import SparkSession
 
 
 @pytest.mark.parametrize("size", [10, 100])
@@ -51,13 +49,13 @@ def test_mock_movielens_schema__get_df__return_success(size, seed, keep_first_n_
         assert len(df[DEFAULT_GENRE_COL]) == size
 
 
+@pytest.mark.spark
 @pytest.mark.parametrize("keep_genre_col", [True, False])
 @pytest.mark.parametrize("keep_title_col", [True, False])
 @pytest.mark.parametrize("seed", [101])  # seed for pseudo-random # generation
 @pytest.mark.parametrize("size", [0, 3, 10])
-def test_mock_movielens_schema__get_spark_df__return_success(spark: SparkSession, size, seed, keep_title_col, keep_genre_col):
+def test_mock_movielens_schema__get_spark_df__return_success(spark, size, seed, keep_title_col, keep_genre_col):
     df = MockMovielensSchema.get_spark_df(spark, size=size, seed=seed, keep_title_col=keep_title_col, keep_genre_col=keep_genre_col)
-    assert type(df) == pyspark.sql.DataFrame
     assert df.count() == size
 
     if keep_title_col:
diff --git a/tests/unit/recommenders/datasets/test_movielens.py b/tests/unit/recommenders/datasets/test_movielens.py
index d53f5d594c..ddba43a580 100644
--- a/tests/unit/recommenders/datasets/test_movielens.py
+++ b/tests/unit/recommenders/datasets/test_movielens.py
@@ -2,10 +2,9 @@
 from recommenders.datasets.movielens import load_pandas_df, load_spark_df
 from recommenders.utils.constants import DEFAULT_GENRE_COL, DEFAULT_TITLE_COL
 
-import pyspark.sql
 import pandas
+import pytest
 from pandas.core.series import Series
-from pyspark.sql import SparkSession
 
 
 def test_mock_movielens_data__no_name_collision():
@@ -18,9 +17,9 @@ def test_mock_movielens_data__no_name_collision():
     assert not collision
 
 
-def test_load_spark_df_mock_100__with_default_param__succeed(spark: SparkSession):
+@pytest.mark.spark
+def test_load_spark_df_mock_100__with_default_param__succeed(spark):
     df = load_spark_df(spark, "mock100")
-    assert type(df) == pyspark.sql.DataFrame
     assert df.count() == 100
 
 
@@ -30,7 +29,8 @@ def test_load_pandas_df_mock_100__with_default_param__succeed():
     assert len(df) == 100
 
 
-def test_load_spark_df_mock_100__with_custom_param__succeed(spark: SparkSession):
+@pytest.mark.spark
+def test_load_spark_df_mock_100__with_custom_param__succeed(spark):
     df = load_spark_df(spark, "mock100", title_col=DEFAULT_TITLE_COL, genres_col=DEFAULT_GENRE_COL)
     assert df.schema[DEFAULT_TITLE_COL]
     assert df.schema[DEFAULT_GENRE_COL]
@@ -39,7 +39,7 @@ def test_load_spark_df_mock_100__with_custom_param__succeed(spark: SparkSession)
     assert df.take(1)[0][DEFAULT_TITLE_COL] == 'foo'
 
 
-def test_load_pandas_df_mock_100__with_custom_param__succeed(spark: SparkSession):
+def test_load_pandas_df_mock_100__with_custom_param__succeed():
     df = load_pandas_df("mock100", title_col=DEFAULT_TITLE_COL, genres_col=DEFAULT_GENRE_COL)
     assert type(df[DEFAULT_TITLE_COL]) == Series
     assert type(df[DEFAULT_GENRE_COL]) == Series

From eca5abf8161c5d2c8b4355a06f2b0d241fed6ac5 Mon Sep 17 00:00:00 2001
From: Jianjie Liu <jianjl@microsoft.com>
Date: Fri, 24 Sep 2021 14:51:40 +0000
Subject: [PATCH 12/27] Rename test marker to fake_movielens

---
 tests/unit/examples/test_notebooks_pyspark.py | 6 +++---
 tests/unit/examples/test_notebooks_python.py  | 4 ++--
 tox.ini                                       | 2 +-
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/tests/unit/examples/test_notebooks_pyspark.py b/tests/unit/examples/test_notebooks_pyspark.py
index f25dbe388c..46691f3885 100644
--- a/tests/unit/examples/test_notebooks_pyspark.py
+++ b/tests/unit/examples/test_notebooks_pyspark.py
@@ -30,7 +30,7 @@ def test_data_split_runs(notebooks, output_notebook, kernel_name):
 
 @pytest.mark.notebooks
 @pytest.mark.spark
-@pytest.mark.mock_movielens
+@pytest.mark.fake_movielens
 @pytest.mark.skipif(
     sys.platform == "win32", reason="Takes 2764.50s in Windows, while in Linux 124.35s"
 )
@@ -58,7 +58,7 @@ def test_evaluation_runs(notebooks, output_notebook, kernel_name):
 
 @pytest.mark.notebooks
 @pytest.mark.spark
-@pytest.mark.mock_movielens
+@pytest.mark.fake_movielens
 @pytest.mark.parametrize("data_size", ["mock100"])
 def test_evaluation_diversity_runs(notebooks, output_notebook, kernel_name, data_size):
     notebook_path = notebooks["evaluation_diversity"]
@@ -68,7 +68,7 @@ def test_evaluation_diversity_runs(notebooks, output_notebook, kernel_name, data
 
 @pytest.mark.notebooks
 @pytest.mark.spark
-@pytest.mark.mock_movielens
+@pytest.mark.fake_movielens
 @pytest.mark.skipif(
     sys.platform == "win32", reason="Takes 2409.69s in Windows, while in Linux 138.30s"
 )
diff --git a/tests/unit/examples/test_notebooks_python.py b/tests/unit/examples/test_notebooks_python.py
index 0d809ee51e..021d80fdc3 100644
--- a/tests/unit/examples/test_notebooks_python.py
+++ b/tests/unit/examples/test_notebooks_python.py
@@ -50,7 +50,7 @@ def test_baseline_deep_dive_runs(notebooks, output_notebook, kernel_name):
 
 
 @pytest.mark.notebooks
-@pytest.mark.mock_movielens
+@pytest.mark.fake_movielens
 @pytest.mark.parametrize("data_size", ["mock100"])
 def test_surprise_deep_dive_runs(notebooks, output_notebook, kernel_name, data_size):
     notebook_path = notebooks["surprise_svd_deep_dive"]
@@ -101,7 +101,7 @@ def test_wikidata_runs(notebooks, output_notebook, kernel_name, tmp):
 
 
 @pytest.mark.notebooks
-@pytest.mark.mock_movielens
+@pytest.mark.fake_movielens
 @pytest.mark.parametrize("data_size", ["mock100"])
 def test_rlrmc_quickstart_runs(notebooks, output_notebook, kernel_name, data_size):
     notebook_path = notebooks["rlrmc_quickstart"]
diff --git a/tox.ini b/tox.ini
index bfb0b68833..7ede574a2e 100644
--- a/tox.ini
+++ b/tox.ini
@@ -66,7 +66,7 @@ markers =
     gpu: mark a test as gpu test
     spark: mark a test as spark test
     vw: mark a test as vowpal wabbit test
-    mock_movielens: mark a test that uses the mock dataset instead of real dataset
+    fake_movielens: mark a test that uses the fake dataset instead
 testpaths =
     tests
 addopts =

From 760078489d0939cf47faf538e0946e9ff7970dd1 Mon Sep 17 00:00:00 2001
From: Jianjie Liu <jianjl@microsoft.com>
Date: Fri, 24 Sep 2021 16:52:09 +0000
Subject: [PATCH 13/27] Re-render diversity_metric NB outputs

---
 .../als_movielens_diversity_metrics.ipynb     | 287 +++++++++++++++---
 1 file changed, 242 insertions(+), 45 deletions(-)

diff --git a/examples/03_evaluate/als_movielens_diversity_metrics.ipynb b/examples/03_evaluate/als_movielens_diversity_metrics.ipynb
index 38de757530..69db3dc8dd 100644
--- a/examples/03_evaluate/als_movielens_diversity_metrics.ipynb
+++ b/examples/03_evaluate/als_movielens_diversity_metrics.ipynb
@@ -142,7 +142,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 1,
    "source": [
     "# set the environment path to find Recommenders\n",
     "%load_ext autoreload\n",
@@ -174,7 +174,17 @@
     "print(\"System version: {}\".format(sys.version))\n",
     "print(\"Spark version: {}\".format(pyspark.__version__))\n"
    ],
-   "outputs": [],
+   "outputs": [
+    {
+     "output_type": "stream",
+     "name": "stdout",
+     "text": [
+      "System version: 3.6.9 (default, Jan 26 2021, 15:33:00) \n",
+      "[GCC 8.4.0]\n",
+      "Spark version: 2.4.8\n"
+     ]
+    }
+   ],
    "metadata": {}
   },
   {
@@ -187,13 +197,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 2,
    "source": [
     "# top k items to recommend\n",
     "TOP_K = 10\n",
     "\n",
     "# Select MovieLens data size: 100k, 1m, 10m, or 20m\n",
-    "MOVIELENS_DATA_SIZE = 'mock10'\n",
+    "MOVIELENS_DATA_SIZE = '100k'\n",
     "\n",
     "# user, item column names\n",
     "COL_USER=\"UserId\"\n",
@@ -220,7 +230,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 3,
    "source": [
     "# the following settings work well for debugging locally on VM - change when running on a cluster\n",
     "# set up a giant single executor with many threads and specify memory cap\n",
@@ -241,7 +251,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 4,
    "source": [
     "# Note: The DataFrame-based API for ALS currently only supports integers for user and item ids.\n",
     "schema = StructType(\n",
@@ -256,7 +266,47 @@
     "data = movielens.load_spark_df(spark, size=MOVIELENS_DATA_SIZE, schema=schema, title_col=COL_TITLE, genres_col=COL_GENRE)\n",
     "data.show()"
    ],
-   "outputs": [],
+   "outputs": [
+    {
+     "output_type": "stream",
+     "name": "stderr",
+     "text": [
+      "100%|██████████| 4.81k/4.81k [00:00<00:00, 15.6kKB/s]\n"
+     ]
+    },
+    {
+     "output_type": "stream",
+     "name": "stdout",
+     "text": [
+      "+------+------+------+---------+--------------------+------+\n",
+      "|ItemId|UserId|Rating|Timestamp|               Title| Genre|\n",
+      "+------+------+------+---------+--------------------+------+\n",
+      "|    26|   138|   5.0|879024232|Brothers McMullen...|Comedy|\n",
+      "|    26|   224|   3.0|888104153|Brothers McMullen...|Comedy|\n",
+      "|    26|    18|   4.0|880129731|Brothers McMullen...|Comedy|\n",
+      "|    26|   222|   3.0|878183043|Brothers McMullen...|Comedy|\n",
+      "|    26|    43|   5.0|883954901|Brothers McMullen...|Comedy|\n",
+      "|    26|   201|   4.0|884111927|Brothers McMullen...|Comedy|\n",
+      "|    26|   299|   4.0|878192601|Brothers McMullen...|Comedy|\n",
+      "|    26|    95|   3.0|880571951|Brothers McMullen...|Comedy|\n",
+      "|    26|    89|   3.0|879459909|Brothers McMullen...|Comedy|\n",
+      "|    26|   361|   3.0|879440941|Brothers McMullen...|Comedy|\n",
+      "|    26|   194|   3.0|879522240|Brothers McMullen...|Comedy|\n",
+      "|    26|   391|   5.0|877399745|Brothers McMullen...|Comedy|\n",
+      "|    26|   345|   3.0|884993555|Brothers McMullen...|Comedy|\n",
+      "|    26|   303|   4.0|879468307|Brothers McMullen...|Comedy|\n",
+      "|    26|   401|   3.0|891033395|Brothers McMullen...|Comedy|\n",
+      "|    26|   429|   3.0|882386333|Brothers McMullen...|Comedy|\n",
+      "|    26|   293|   3.0|888907015|Brothers McMullen...|Comedy|\n",
+      "|    26|   270|   5.0|876954995|Brothers McMullen...|Comedy|\n",
+      "|    26|   442|   3.0|883388576|Brothers McMullen...|Comedy|\n",
+      "|    26|   342|   2.0|875320037|Brothers McMullen...|Comedy|\n",
+      "+------+------+------+---------+--------------------+------+\n",
+      "only showing top 20 rows\n",
+      "\n"
+     ]
+    }
+   ],
    "metadata": {}
   },
   {
@@ -268,13 +318,22 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 5,
    "source": [
     "train_df, test_df = spark_random_split(data.select(COL_USER, COL_ITEM, COL_RATING), ratio=0.75, seed=123)\n",
     "print (\"N train_df\", train_df.cache().count())\n",
     "print (\"N test_df\", test_df.cache().count())"
    ],
-   "outputs": [],
+   "outputs": [
+    {
+     "output_type": "stream",
+     "name": "stdout",
+     "text": [
+      "N train_df 75066\n",
+      "N test_df 24934\n"
+     ]
+    }
+   ],
    "metadata": {}
   },
   {
@@ -293,7 +352,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 6,
    "source": [
     "users = train_df.select(COL_USER).distinct()\n",
     "items = train_df.select(COL_ITEM).distinct()\n",
@@ -314,7 +373,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 7,
    "source": [
     "header = {\n",
     "    \"userCol\": COL_USER,\n",
@@ -339,14 +398,22 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 8,
    "source": [
     "with Timer() as train_time:\n",
     "    model = als.fit(train_df)\n",
     "\n",
     "print(\"Took {} seconds for training.\".format(train_time.interval))"
    ],
-   "outputs": [],
+   "outputs": [
+    {
+     "output_type": "stream",
+     "name": "stdout",
+     "text": [
+      "Took 4.189040212018881 seconds for training.\n"
+     ]
+    }
+   ],
    "metadata": {}
   },
   {
@@ -360,7 +427,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 9,
    "source": [
     "# Score all user-item pairs\n",
     "dfs_pred = model.transform(user_item)\n",
@@ -382,7 +449,16 @@
     " \n",
     "print(top_k_reco.count())"
    ],
-   "outputs": [],
+   "outputs": [
+    {
+     "output_type": "stream",
+     "name": "stdout",
+     "text": [
+      "1464853\n",
+      "9430\n"
+     ]
+    }
+   ],
    "metadata": {}
   },
   {
@@ -396,7 +472,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 10,
    "source": [
     "# random recommender\n",
     "window = Window.partitionBy(COL_USER).orderBy(F.rand())\n",
@@ -430,7 +506,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 11,
    "source": [
     "def get_ranking_results(ranking_eval):\n",
     "    metrics = {\n",
@@ -457,7 +533,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 12,
    "source": [
     "def generate_summary(data, algo, k, ranking_metrics, diversity_metrics):\n",
     "    summary = {\"Data\": data, \"Algo\": algo, \"K\": k}\n",
@@ -485,7 +561,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 13,
    "source": [
     "als_ranking_eval = SparkRankingEvaluation(\n",
     "    test_df, \n",
@@ -505,7 +581,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 14,
    "source": [
     "als_diversity_eval = SparkDiversityEvaluation(\n",
     "    train_df = train_df, \n",
@@ -521,7 +597,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 15,
    "source": [
     "als_results = generate_summary(MOVIELENS_DATA_SIZE, \"als\", TOP_K, als_ranking_metrics, als_diversity_metrics)"
    ],
@@ -537,7 +613,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 16,
    "source": [
     "random_ranking_eval = SparkRankingEvaluation(\n",
     "    test_df,\n",
@@ -556,7 +632,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 17,
    "source": [
     "random_diversity_eval = SparkDiversityEvaluation(\n",
     "    train_df = train_df, \n",
@@ -572,7 +648,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 18,
    "source": [
     "random_results = generate_summary(MOVIELENS_DATA_SIZE, \"random\", TOP_K, random_ranking_metrics, random_diversity_metrics)"
    ],
@@ -588,7 +664,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 19,
    "source": [
     "cols = [\"Data\", \"Algo\", \"K\", \"Precision@k\", \"Recall@k\", \"NDCG@k\", \"Mean average precision\",\"catalog_coverage\", \"distributional_coverage\",\"novelty\", \"diversity\", \"serendipity\" ]\n",
     "df_results = pd.DataFrame(columns=cols)\n",
@@ -601,11 +677,100 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 20,
    "source": [
     "df_results"
    ],
-   "outputs": [],
+   "outputs": [
+    {
+     "output_type": "execute_result",
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>Data</th>\n",
+       "      <th>Algo</th>\n",
+       "      <th>K</th>\n",
+       "      <th>Precision@k</th>\n",
+       "      <th>Recall@k</th>\n",
+       "      <th>NDCG@k</th>\n",
+       "      <th>Mean average precision</th>\n",
+       "      <th>catalog_coverage</th>\n",
+       "      <th>distributional_coverage</th>\n",
+       "      <th>novelty</th>\n",
+       "      <th>diversity</th>\n",
+       "      <th>serendipity</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>100k</td>\n",
+       "      <td>als</td>\n",
+       "      <td>10</td>\n",
+       "      <td>0.047296</td>\n",
+       "      <td>0.016015</td>\n",
+       "      <td>0.043097</td>\n",
+       "      <td>0.004579</td>\n",
+       "      <td>0.385793</td>\n",
+       "      <td>7.967257</td>\n",
+       "      <td>11.659776</td>\n",
+       "      <td>0.892277</td>\n",
+       "      <td>0.878733</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>100k</td>\n",
+       "      <td>random</td>\n",
+       "      <td>10</td>\n",
+       "      <td>0.016543</td>\n",
+       "      <td>0.005566</td>\n",
+       "      <td>0.016373</td>\n",
+       "      <td>0.001441</td>\n",
+       "      <td>0.994489</td>\n",
+       "      <td>10.541850</td>\n",
+       "      <td>12.136439</td>\n",
+       "      <td>0.922613</td>\n",
+       "      <td>0.892511</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   Data    Algo   K  Precision@k  Recall@k    NDCG@k  Mean average precision  \\\n",
+       "1  100k     als  10     0.047296  0.016015  0.043097                0.004579   \n",
+       "2  100k  random  10     0.016543  0.005566  0.016373                0.001441   \n",
+       "\n",
+       "   catalog_coverage  distributional_coverage    novelty  diversity  \\\n",
+       "1          0.385793                 7.967257  11.659776   0.892277   \n",
+       "2          0.994489                10.541850  12.136439   0.922613   \n",
+       "\n",
+       "   serendipity  \n",
+       "1     0.878733  \n",
+       "2     0.892511  "
+      ]
+     },
+     "metadata": {},
+     "execution_count": 20
+    }
+   ],
    "metadata": {}
   },
   {
@@ -626,7 +791,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 21,
    "source": [
     "# Get movie features \"title\" and \"genres\"\n",
     "movies = (\n",
@@ -642,7 +807,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 22,
    "source": [
     "# tokenize \"title\" column\n",
     "title_tokenizer = Tokenizer(inputCol=COL_TITLE, outputCol=\"title_words\")\n",
@@ -657,12 +822,12 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 23,
    "source": [
     "# convert text input into feature vectors\n",
     "\n",
     "# step 1: perform HashingTF on column \"text\"\n",
-    "text_hasher = HashingTF(inputCol=\"text\", outputCol=\"text_features\", numFeatures=3)\n",
+    "text_hasher = HashingTF(inputCol=\"text\", outputCol=\"text_features\", numFeatures=1024)\n",
     "hashed_data = text_hasher.transform(clean_data)\n",
     "\n",
     "# step 2: fit a CountVectorizerModel from column \"genres\".\n",
@@ -679,7 +844,30 @@
     "\n",
     "feature_data.show(10, False)"
    ],
-   "outputs": [],
+   "outputs": [
+    {
+     "output_type": "stream",
+     "name": "stdout",
+     "text": [
+      "+------+---------------------------------------------+\n",
+      "|ItemId|features                                     |\n",
+      "+------+---------------------------------------------+\n",
+      "|167   |(1043,[128,544,1025],[1.0,1.0,1.0])          |\n",
+      "|1343  |(1043,[38,300,1024],[1.0,1.0,1.0])           |\n",
+      "|1607  |(1043,[592,821,1024],[1.0,1.0,1.0])          |\n",
+      "|966   |(1043,[389,502,1028],[1.0,1.0,1.0])          |\n",
+      "|9     |(1043,[11,342,1014,1024],[1.0,1.0,1.0,1.0])  |\n",
+      "|1230  |(1043,[597,740,902,1025],[1.0,1.0,1.0,1.0])  |\n",
+      "|1118  |(1043,[702,1025],[1.0,1.0])                  |\n",
+      "|673   |(1043,[169,690,1027,1040],[1.0,1.0,1.0,1.0]) |\n",
+      "|879   |(1043,[909,1026,1027,1034],[1.0,1.0,1.0,1.0])|\n",
+      "|66    |(1043,[256,1025,1028],[1.0,1.0,1.0])         |\n",
+      "+------+---------------------------------------------+\n",
+      "only showing top 10 rows\n",
+      "\n"
+     ]
+    }
+   ],
    "metadata": {}
   },
   {
@@ -691,16 +879,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "source": [
-    "feature_data.count()"
-   ],
-   "outputs": [],
-   "metadata": {}
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 24,
    "source": [
     "als_eval = SparkDiversityEvaluation(\n",
     "    train_df = train_df, \n",
@@ -716,12 +895,21 @@
     "print(als_diversity)\n",
     "print(als_serendipity)"
    ],
-   "outputs": [],
+   "outputs": [
+    {
+     "output_type": "stream",
+     "name": "stdout",
+     "text": [
+      "0.8738984131037538\n",
+      "0.8873467159479473\n"
+     ]
+    }
+   ],
    "metadata": {}
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 25,
    "source": [
     "random_eval = SparkDiversityEvaluation(\n",
     "    train_df = train_df, \n",
@@ -737,7 +925,16 @@
     "print(random_diversity)\n",
     "print(random_serendipity)"
    ],
-   "outputs": [],
+   "outputs": [
+    {
+     "output_type": "stream",
+     "name": "stdout",
+     "text": [
+      "0.8982144953920664\n",
+      "0.8941807579293202\n"
+     ]
+    }
+   ],
    "metadata": {}
   },
   {

From 04f1371705f6839d23fbf477528be14139bf4804 Mon Sep 17 00:00:00 2001
From: Jianjie Liu <jianjl@microsoft.com>
Date: Fri, 24 Sep 2021 16:56:41 +0000
Subject: [PATCH 14/27] Re-render als_deep_dive NB outputs

---
 .../als_deep_dive.ipynb                       | 484 ++++++++++++------
 1 file changed, 318 insertions(+), 166 deletions(-)

diff --git a/examples/02_model_collaborative_filtering/als_deep_dive.ipynb b/examples/02_model_collaborative_filtering/als_deep_dive.ipynb
index a8b19a4d65..0d90bb65d4 100644
--- a/examples/02_model_collaborative_filtering/als_deep_dive.ipynb
+++ b/examples/02_model_collaborative_filtering/als_deep_dive.ipynb
@@ -2,32 +2,31 @@
  "cells": [
   {
    "cell_type": "markdown",
-   "metadata": {},
    "source": [
     "<i>Copyright (c) Microsoft Corporation. All rights reserved.</i>\n",
     "\n",
     "<i>Licensed under the MIT License.</i>"
-   ]
+   ],
+   "metadata": {}
   },
   {
    "cell_type": "markdown",
-   "metadata": {},
    "source": [
     "# Spark Collaborative Filtering (ALS) Deep Dive"
-   ]
+   ],
+   "metadata": {}
   },
   {
    "cell_type": "markdown",
-   "metadata": {},
    "source": [
     "Spark MLlib provides a collaborative filtering algorithm that can be used for training a matrix factorization model, which predicts explicit or implicit ratings of users on items for recommendations.\n",
     "\n",
     "This notebook presents a deep dive into the Spark collaborative filtering algorithm."
-   ]
+   ],
+   "metadata": {}
   },
   {
    "cell_type": "markdown",
-   "metadata": {},
    "source": [
     "## 1 Matrix factorization algorithm\n",
     "\n",
@@ -54,11 +53,11 @@
     "Owing to the term of $q_{i}^{T}p_{u}$ the loss function is non-convex. Gradient descent method can be applied but this will incur expensive computations. An Alternating Least Square (ALS) algorithm was therefore developed to overcome this issue. \n",
     "\n",
     "The basic idea of ALS is to learn one of $q$ and $p$ at a time for optimization while keeping the other as constant. This makes the objective at each iteration convex and solvable. The alternating between $q$ and $p$ stops when there is convergence to the optimal. It is worth noting that this iterative computation can be parallelised and/or distributed, which makes the algorithm desirable for use cases where the dataset is large and thus the user-item rating matrix is super sparse (as is typical in recommendation scenarios). A comprehensive discussion of ALS and its distributed computation can be found [here](http://stanford.edu/~rezab/classes/cme323/S15/notes/lec14.pdf)."
-   ]
+   ],
+   "metadata": {}
   },
   {
    "cell_type": "markdown",
-   "metadata": {},
    "source": [
     "## 2 Spark Mllib implementation\n",
     "\n",
@@ -67,29 +66,28 @@
     "* The uniqueness of ALS implementation is that it distributes the matrix factorization model training by using \"Alternating Least Square\" method. \n",
     "* In the training method, there are parameters that can be selected to control the model performance.\n",
     "* Both explicit and implicit ratings are supported by Spark ALS model."
-   ]
+   ],
+   "metadata": {}
   },
   {
    "cell_type": "markdown",
-   "metadata": {},
    "source": [
     "## 3 Spark ALS based MovieLens recommender\n",
     "\n",
     "In the following code, the MovieLens-100K dataset is used to illustrate the ALS algorithm in Spark."
-   ]
+   ],
+   "metadata": {}
   },
   {
    "cell_type": "markdown",
-   "metadata": {},
    "source": [
     "**Note**: This notebook requires a PySpark environment to run properly. Please follow the steps in [SETUP.md](https://github.com/Microsoft/Recommenders/blob/master/SETUP.md#dependencies-setup) to install the PySpark environment."
-   ]
+   ],
+   "metadata": {}
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
+   "execution_count": 1,
    "source": [
     "# set the environment path to find Recommenders\n",
     "import sys\n",
@@ -118,24 +116,31 @@
     "print(\"System version: {}\".format(sys.version))\n",
     "print(\"Pandas version: {}\".format(pd.__version__))\n",
     "print(\"PySpark version: {}\".format(pyspark.__version__))"
-   ]
+   ],
+   "outputs": [
+    {
+     "output_type": "stream",
+     "name": "stdout",
+     "text": [
+      "System version: 3.6.9 (default, Jan 26 2021, 15:33:00) \n",
+      "[GCC 8.4.0]\n",
+      "Pandas version: 1.1.5\n",
+      "PySpark version: 2.4.8\n"
+     ]
+    }
+   ],
+   "metadata": {}
   },
   {
    "cell_type": "markdown",
-   "metadata": {},
    "source": [
     "Data column names"
-   ]
+   ],
+   "metadata": {}
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "tags": [
-     "parameters"
-    ]
-   },
-   "outputs": [],
+   "execution_count": 2,
    "source": [
     "MOVIELENS_DATA_SIZE = \"100k\"\n",
     "\n",
@@ -144,13 +149,17 @@
     "COL_RATING = \"Rating\"\n",
     "COL_PREDICTION = \"prediction\"\n",
     "COL_TIMESTAMP = \"Timestamp\""
-   ]
+   ],
+   "outputs": [],
+   "metadata": {
+    "tags": [
+     "parameters"
+    ]
+   }
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
+   "execution_count": 3,
    "source": [
     "schema = StructType(\n",
     "    (\n",
@@ -160,127 +169,153 @@
     "        StructField(COL_TIMESTAMP, LongType()),\n",
     "    )\n",
     ")"
-   ]
+   ],
+   "outputs": [],
+   "metadata": {}
   },
   {
    "cell_type": "markdown",
-   "metadata": {},
    "source": [
     "Model hyper parameters - these parameters are selected with reference to the benchmarking results [here](http://mymedialite.net/examples/datasets.html)."
-   ]
+   ],
+   "metadata": {}
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
+   "execution_count": 4,
    "source": [
     "RANK = 10\n",
     "MAX_ITER = 15\n",
     "REG_PARAM = 0.05"
-   ]
+   ],
+   "outputs": [],
+   "metadata": {}
   },
   {
    "cell_type": "markdown",
-   "metadata": {},
    "source": [
     "Number of recommended items"
-   ]
+   ],
+   "metadata": {}
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
+   "execution_count": 5,
    "source": [
     "K = 10"
-   ]
+   ],
+   "outputs": [],
+   "metadata": {}
   },
   {
    "cell_type": "markdown",
-   "metadata": {},
    "source": [
     "Initialize a Spark session."
-   ]
+   ],
+   "metadata": {}
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
+   "execution_count": 6,
    "source": [
     "spark = start_or_get_spark(\"ALS Deep Dive\", memory=\"16g\")"
-   ]
+   ],
+   "outputs": [],
+   "metadata": {}
   },
   {
    "cell_type": "markdown",
-   "metadata": {},
    "source": [
     "### 3.1 Load and prepare data"
-   ]
+   ],
+   "metadata": {}
   },
   {
    "cell_type": "markdown",
-   "metadata": {},
    "source": [
     "Data is read from csv into a Spark DataFrame."
-   ]
+   ],
+   "metadata": {}
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
+   "execution_count": 7,
    "source": [
     "dfs = movielens.load_spark_df(spark=spark, size=MOVIELENS_DATA_SIZE, schema=schema)"
-   ]
+   ],
+   "outputs": [
+    {
+     "output_type": "stream",
+     "name": "stderr",
+     "text": [
+      "100%|██████████| 4.81k/4.81k [00:00<00:00, 20.5kKB/s]\n"
+     ]
+    }
+   ],
+   "metadata": {}
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "scrolled": true
-   },
-   "outputs": [],
+   "execution_count": 8,
    "source": [
     "dfs.show(5)"
-   ]
+   ],
+   "outputs": [
+    {
+     "output_type": "stream",
+     "name": "stdout",
+     "text": [
+      "+------+-------+------+---------+\n",
+      "|UserId|MovieId|Rating|Timestamp|\n",
+      "+------+-------+------+---------+\n",
+      "|   196|    242|   3.0|881250949|\n",
+      "|   186|    302|   3.0|891717742|\n",
+      "|    22|    377|   1.0|878887116|\n",
+      "|   244|     51|   2.0|880606923|\n",
+      "|   166|    346|   1.0|886397596|\n",
+      "+------+-------+------+---------+\n",
+      "only showing top 5 rows\n",
+      "\n"
+     ]
+    }
+   ],
+   "metadata": {
+    "scrolled": true
+   }
   },
   {
    "cell_type": "markdown",
-   "metadata": {},
    "source": [
     "Data is then randomly split by 80-20 ratio for training and testing."
-   ]
+   ],
+   "metadata": {}
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
+   "execution_count": 9,
    "source": [
     "dfs_train, dfs_test = spark_random_split(dfs, ratio=0.75, seed=42)"
-   ]
+   ],
+   "outputs": [],
+   "metadata": {}
   },
   {
    "cell_type": "markdown",
-   "metadata": {},
    "source": [
     "### 3.2 Train a movielens model "
-   ]
+   ],
+   "metadata": {}
   },
   {
    "cell_type": "markdown",
-   "metadata": {},
    "source": [
     "It is worth noting that Spark ALS model allows dropping cold users to favor a robust evaluation with the testing data. In case there are cold users, Spark ALS implementation allows users to drop cold users in order to make sure evaluations on the prediction results are sound."
-   ]
+   ],
+   "metadata": {}
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
+   "execution_count": 10,
    "source": [
     "als = ALS(\n",
     "    maxIter=MAX_ITER, \n",
@@ -293,38 +328,38 @@
     ")\n",
     "\n",
     "model = als.fit(dfs_train)"
-   ]
+   ],
+   "outputs": [],
+   "metadata": {}
   },
   {
    "cell_type": "markdown",
-   "metadata": {},
    "source": [
     "### 3.3 Prediction with the model\n",
     "\n",
     "The trained model can be used to predict ratings with a given test data."
-   ]
+   ],
+   "metadata": {}
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
+   "execution_count": 11,
    "source": [
     "dfs_pred = model.transform(dfs_test).drop(COL_RATING)"
-   ]
+   ],
+   "outputs": [],
+   "metadata": {}
   },
   {
    "cell_type": "markdown",
-   "metadata": {},
    "source": [
     "With the prediction results, the model performance can be evaluated."
-   ]
+   ],
+   "metadata": {}
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
+   "execution_count": 12,
    "source": [
     "evaluations = SparkRatingEvaluation(\n",
     "    dfs_test, \n",
@@ -342,20 +377,31 @@
     "    \"Explained variance score = {}\".format(evaluations.exp_var()),\n",
     "    sep=\"\\n\"\n",
     ")"
-   ]
+   ],
+   "outputs": [
+    {
+     "output_type": "stream",
+     "name": "stdout",
+     "text": [
+      "RMSE score = 0.9726930349322086\n",
+      "MAE score = 0.7565710909806911\n",
+      "R2 score = 0.24411065820407096\n",
+      "Explained variance score = 0.249700271662727\n"
+     ]
+    }
+   ],
+   "metadata": {}
   },
   {
    "cell_type": "markdown",
-   "metadata": {},
    "source": [
     "Oftentimes ranking metrics are also of interest to data scientists. Note usually ranking metrics apply to the scenario of recommending a list of items. In our case, the recommended items should be different from those that have been rated by the users. "
-   ]
+   ],
+   "metadata": {}
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
+   "execution_count": 13,
    "source": [
     "# Get the cross join of all user-item pairs and score them.\n",
     "users = dfs_train.select(COL_USER).distinct()\n",
@@ -374,13 +420,46 @@
     "    .select('pred.' + COL_USER, 'pred.' + COL_ITEM, 'pred.' + \"prediction\")\n",
     "\n",
     "dfs_pred_final.show()"
-   ]
+   ],
+   "outputs": [
+    {
+     "output_type": "stream",
+     "name": "stdout",
+     "text": [
+      "+------+-------+----------+\n",
+      "|UserId|MovieId|prediction|\n",
+      "+------+-------+----------+\n",
+      "|     1|    587| 3.2763875|\n",
+      "|     1|    869|  1.996331|\n",
+      "|     1|   1208| 3.0924819|\n",
+      "|     1|   1677| 3.0549564|\n",
+      "|     2|     80| 2.2266486|\n",
+      "|     2|    303| 3.5071766|\n",
+      "|     2|    472| 2.4076686|\n",
+      "|     2|    582|  4.137449|\n",
+      "|     2|    838| 1.6214753|\n",
+      "|     2|    975| 2.7880914|\n",
+      "|     2|   1260|  3.155648|\n",
+      "|     2|   1325| 1.2494813|\n",
+      "|     2|   1381|  3.712147|\n",
+      "|     2|   1530|   2.04168|\n",
+      "|     3|     22| 2.5458775|\n",
+      "|     3|     57| 1.7472819|\n",
+      "|     3|     89|   3.85607|\n",
+      "|     3|    367| 3.2235723|\n",
+      "|     3|   1091| 1.5452085|\n",
+      "|     3|   1167| 3.5050836|\n",
+      "+------+-------+----------+\n",
+      "only showing top 20 rows\n",
+      "\n"
+     ]
+    }
+   ],
+   "metadata": {}
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
+   "execution_count": 14,
    "source": [
     "evaluations = SparkRankingEvaluation(\n",
     "    dfs_test, \n",
@@ -399,11 +478,23 @@
     "    \"Mean average precision = {}\".format(evaluations.map_at_k()),\n",
     "    sep=\"\\n\"\n",
     ")"
-   ]
+   ],
+   "outputs": [
+    {
+     "output_type": "stream",
+     "name": "stdout",
+     "text": [
+      "Precision@k = 0.03170731707317073\n",
+      "Recall@k = 0.012679519170565132\n",
+      "NDCG@k = 0.02914424248125332\n",
+      "Mean average precision = 0.0033674440032626088\n"
+     ]
+    }
+   ],
+   "metadata": {}
   },
   {
    "cell_type": "markdown",
-   "metadata": {},
    "source": [
     "### 3.4 Fine tune the model\n",
     "\n",
@@ -416,48 +507,47 @@
     "|`maxIters`|Maximum number of iterations|10|The more iterations the better the model converges to the optimal point.|\n",
     "\n",
     "It is always a good practice to start model building with default parameter values and then sweep the parameter in a range to find the optimal combination of parameters. The following parameter set is used for training ALS models for comparison study purposes."
-   ]
+   ],
+   "metadata": {}
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
+   "execution_count": 15,
    "source": [
     "param_dict = {\n",
     "    \"rank\": [10, 15, 20],\n",
     "    \"regParam\": [0.001, 0.1, 1.0]\n",
     "}"
-   ]
+   ],
+   "outputs": [],
+   "metadata": {}
   },
   {
    "cell_type": "markdown",
-   "metadata": {},
    "source": [
     "Generate a dictionary for each parameter combination which can then be fed into model training."
-   ]
+   ],
+   "metadata": {}
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
+   "execution_count": 16,
    "source": [
     "param_grid = generate_param_grid(param_dict)"
-   ]
+   ],
+   "outputs": [],
+   "metadata": {}
   },
   {
    "cell_type": "markdown",
-   "metadata": {},
    "source": [
     "Train models with parameters specified in the parameter grid. Evaluate the model with, for example, the RMSE metric, and then record the metrics for visualization."
-   ]
+   ],
+   "metadata": {}
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
+   "execution_count": 17,
    "source": [
     "rmse_score = []\n",
     "\n",
@@ -487,104 +577,166 @@
     "\n",
     "rmse_score = [float('%.4f' % x) for x in rmse_score]\n",
     "rmse_score_array = np.reshape(rmse_score, (len(param_dict[\"rank\"]), len(param_dict[\"regParam\"]))) "
-   ]
+   ],
+   "outputs": [],
+   "metadata": {}
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
+   "execution_count": 18,
    "source": [
     "rmse_df = pd.DataFrame(data=rmse_score_array, index=pd.Index(param_dict[\"rank\"], name=\"rank\"), \n",
     "                       columns=pd.Index(param_dict[\"regParam\"], name=\"reg. parameter\"))"
-   ]
+   ],
+   "outputs": [],
+   "metadata": {}
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
+   "execution_count": 19,
    "source": [
     "fig, ax = plt.subplots()\n",
     "sns.heatmap(rmse_df, cbar=False, annot=True, fmt=\".4g\")"
-   ]
+   ],
+   "outputs": [
+    {
+     "output_type": "execute_result",
+     "data": {
+      "text/plain": [
+       "<AxesSubplot:xlabel='reg. parameter', ylabel='rank'>"
+      ]
+     },
+     "metadata": {},
+     "execution_count": 19
+    },
+    {
+     "output_type": "display_data",
+     "data": {
+      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXwAAAEGCAYAAABmXi5tAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuNCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8QVMy6AAAACXBIWXMAAAsTAAALEwEAmpwYAAAitElEQVR4nO3deXxU1d3H8c9vMlkhLAnIjmjBRxDRyiKIIFoFRFGkAloQcK2i4AIIWhXQtooIVn1wwaosPoJg2cSFpYWKCBIEAVksyKKBgLILJCSZnOePGQKBJKTKZEju9/16zYuZe86993dzX/nOzblnBnPOISIipZ8v0gWIiEjxUOCLiHiEAl9ExCMU+CIiHqHAFxHxCH+kCyjI3i5tNH2ohKo8Y0OkS5BfYf/gVpEuQX6FMk9PsoLadIUvIuIRCnwREY9Q4IuIeIQCX0TEIxT4IiIeocAXEfEIBb6IiEco8EVEPEKBLyLiEQp8ERGPUOCLiHiEAl9ExCMU+CIiHqHAFxHxCAW+iIhHKPBFRDxCgS8i4hEKfBERj1Dgi4h4hAJfRMQjFPgiIh6hwBcR8QgFvoiIRyjwRUQ8QoEvIuIRCnwREY9Q4IuIeIQCX0TEIxT4IiIeocAXEfEIBb6IiEf4I11AaZBw36NEN26B27+PA/1vP6k9uklL4m65A5yDQIDDY/+XwPrVRNWpS8LdD2PxCbicHDKmvkvWF/MBKPv0y1h8AgC+chXI3rieQyOeKNbjKs3atW3DqFFPE+Xz8fY7E3l+xOg87bVr1+DvY0ZRqXISe/fso2fvfmzblsZFF13A6FeeJbFcWQKBAM8+9wpTpswE4Mo2LRk+/EliYqJZvnw1d9/Tn0AgAMAVrVswcuQwoqP97N61h6uuvrnYj7k0iun0R/znXYI7dID00QNPao86vzExV3XFOQc5ATI/GU/O99/iO6cBMe175vbzVarOkSkvE1i/DN+5DYlp2x3MIDODI9New+3ZWZyHFTbmnIt0Dfna26XNmVlYPvz1G+Ey0inzwOP5Bj5x8ZCRDkBU7XMp88hQDjzUE1+1muAcOTu2YRWTKTd8DAce6oU7fDDP6mX6DyMrZRGZn80phqP59SrP2BDpEgrl8/lYt2Yh7TvcSmpqGksWf0yP2/qwbt2xuidNfIOPPp7HhAlTuLJNS3r16kbv2/tRr965OOfYuHEz1apVYemST2jYqA0HDvzMpo1Ladu+Gxs2bGLokAFs3ZrKO2MnUb58ORZ+NoPrru/ODz9sp3LlZH76aXcEfwKF2z+4VaRLKDLf2edDZgaxne/PN/CJiYXMIwBYldrEdX2Q9Ff65+0TX4aEB1/i8Mg+kJVJfL8XyXhvBG7XdvxNr8FXsy6Z014rhqM5Pco8PckKatOQzmmQvW4V7uDPBXcIhT0AcXHBK30gJy2VnB3bAHB7d5Ozfy9WrnzedeMT8De8hMyUz0932Z7VrOlv+e67LWze/D1ZWVlMnjyDGzq2y9Onfv16zJ+/CID5CxZxQ8e2AGzYsImNGzcDkJa2kx9/2k3lyskkJ1ckMzOTDRs2ATBv3md0vqkDALfechPTp3/CDz9sBzijw76kydm6Hpd+qOAOobAHsJjYfLv4GzQnsOFryMoMLXFYXPCva4tLwP289zRVG3kK/GIS3exyyv1tPGUfe45Drw0/qT2q7vmYP5qcndvzLI9pejnZ3yyH9MPFVWqpV71GVX5IPfZzTt2WRvXqVfP0WbVqLTd1uhaATp2upVy5RJKSKubp07TJxcTERPPdd1vYtWsPfr+fxpc0AqBz5+uoWas6APXqnUuFCuX559wpfLnkE3r00HBOcYqq35T4viOJ6z6II9NfP6ndf2ELsld/kfv6yIwxxPUYRHz/0fgvakXWwhnFWW5YKfCLSdbSzznwUE8OPf8E8d3uzNNmFZIo0/dxDr06PPfq/6iYy39H5uf/LM5SBXh00DO0bt2clKWzad2qOampabnj8QBVq57F2LEvc9ddj3B0WLR7jz6MfGEoixfN4uDBQwQCOQD4/VE0vqQRHW/sSYfr/sCfHnuIevXOjchxeVFgXQrpr/QnY+ILxFzVNU+bla2Ar0ptAhtX5i6LbtGBjHeHkz7yfrJXLCCm/W3FXXLYhCXwzay8mT1nZuvNbI+Z7TazdaFlFQpZ7x4zW2Zmy8Zu2l5QtxIte90qfFWqYYmhoZv4BMo+9hzpE98isGFtnr6WWJ6ouueTtXxJBCotvbZv20GtmtVzX9esUY3t23fk6ZOWtpMuXe+mabN2PPlU8C+y/fsPAJCYWJaZM8bz5FPD+XLp8tx1lnz5FW2u6kyLltezcOGS3OGdbdvSmDN3AYcPp7N7914Wfr6ERo0ahPsw5QQ5W9djFc+ChMTcZVENW5C9LgVyQm/mCYn4qp5NTupGALK/WUxUrfMiUW5YhOsKfzKwF2jjnEtyziUDV4aWTS5oJefcGOdcE+dck97nVi+oW4njq1oj93nUOfWw6Gjcz/vB76fswGfI/Pccspb8+6T1optfQdZXi48bW5TTIWXZ19Stew516tQiOjqarl1v5MNZeW+IJydXxCx472vwoL6MHTcJgOjoaP4x5S3effcDpk79KM86lSsnAxATE8PAAfczZswEAGZ+OJuWlzUjKiqK+Pg4mjX7LevXn9k3tksLS6qS+9xXrQ74o+Hwsftt/gsvI3v1omMrZBzCYuOx5GoARP2mETk/bSuucsMuXNMy6zjn8gxUO+d2AMPN7I4w7TNiyjz4JP4LLsYSy1P+9SmkT34HooI/2sy5M4m+tDWxV7TFBQKQeYSDLz4NQEyLK/HXvwhLLE/Mle0BODz6OQJbglcXMS2vImP6e5E5qFIsEAjw4ENP8PFH7xHl8zF23PusXfsfhg4ZwLKvVjJr1lyuuOIy/vLMYzgcCxcuoW+/PwHQpUtHWrW6lKTkivTsGRweuPOuh1m5cg0DHrmPDtddjc/n4403xjN/QTBI1q/fyOw581mxfB45OTm8/fZE1qz5NmLHX5rE3twX3zkNsIRE4vuPJmv+B+CLAiB72Tz8DS7Ff3Gr4O9ediZHJr+Uu65VqIyVTyZny7pjG8zJ4cjMN4m75eHgUF36oXzH/UuqsEzLNLM5wDxgnHNuZ2hZFaA3cI1z7upTbaMkTcuUvM70aZlSuJI0LVNOFolpmd2AZODfoTH8PcACIAnoEqZ9iohIIcIypOOc2wsMCj3yMLPbgXfCsV8RESlYJKZlDovAPkVEPC8sV/hmtqqgJqBKAW0iIhJG4ZqlUwVoR3Aa5vEM+OLk7iIiEm7hCvxZQFnn3NcnNpjZgjDtU0REChGum7Z3FtL2h3DsU0RECqfv0hER8QgFvoiIRyjwRUQ8QoEvIuIRCnwREY9Q4IuIeIQCX0TEIxT4IiIeocAXEfEIBb6IiEco8EVEPEKBLyLiEQp8ERGPUOCLiHiEAl9ExCMU+CIiHqHAFxHxCAW+iIhHKPBFRDxCgS8i4hEKfBERj1Dgi4h4hAJfRMQjFPgiIh7hj3QBBSk7+s1IlyC/1Iw2ka5ARPKhK3wREY9Q4IuIeIQCX0TEIxT4IiIeocAXEfEIBb6IiEco8EVEPEKBLyLiEQp8ERGPUOCLiHiEAl9ExCMU+CIiHqHAFxHxCAW+iIhHKPBFRDxCgS8i4hEKfBERj1Dgi4h4hAJfRMQjFPgiIh6hwBcR8QgFvoiIRyjwRUQ8QoEvIuIRCnwREY9Q4IuIeIQCX0TEI4oU+GYWm8+ypNNfjoiIhEtRr/Cnmln00RdmVg2YG56SREQkHIoa+NOByWYWZWZ1gNnAY+EqSkRETj9/UTo55940sxiCwV8H+KNz7osw1iUiIqdZoYFvZo8c/xKoDXwNNDez5s65UWGsTURETqNTXeEnnvB6agHLPe2JZ//GZ1+kkFSxPNPHv3pS+9IVq+j32J+pUa0KAFe3voz7br81tz0QCNDt7oc5q1Iyrz4/BIBBT49gzfqN+P1RNKx/HkMGPkC0v0h/kEkRtGvbhlGjnibK5+Ptdyby/IjRedpr167B38eMolLlJPbu2UfP3v3Yti2N2rVr8MGUt/D5fERH+xk9+h3GvDkBgEt+eyFvvfUi8XFxfPLpv3j4kacAGP7sE1x3/TVkZmayadNW7rzrEfbvP1Dsx1waxXT6I/7zLsEdOkD66IEntUed35iYq7rinIOcAJmfjCfn+2/xndOAmPY9c/v5KlXnyJSXCaxfhu/chsS07Q5mkJnBkWmv4fbsLM7DChtzzkW6hnxl/bjhzCwsH8u+/oaE+Dge/8uoAgN/7MRpuWF+onGTprHm240cPHQ4t89ni1No1bwJAI8OG0Hjixpyy00dwncQp1F8zTaRLqFQPp+PdWsW0r7DraSmprFk8cf0uK0P69ZtyO0zaeIbfPTxPCZMmMKVbVrSq1c3et/ej+joaMyMzMxMypRJYOWKf9HqihtJS9vJ4kWzeOjhp/hy6XJmzZzA/45+m09nz+eaq1vzr/mLCAQCPPvXxwF47PG/RurwT2n/4FaRLqHIfGefD5kZxHa+P9/AJyYWMo8AYFVqE9f1QdJf6Z+3T3wZEh58icMj+0BWJvH9XiTjvRG4XdvxN70GX826ZE57rRiO5vQo8/QkK6itqNMyzzOzMWY2x8z+dfRx+kos2Zpc3JDy5X7ZHz07ftzFZ4tT+P31bfMsb92iKWaGmXFh/fPY+dOu01GqAM2a/pbvvtvC5s3fk5WVxeTJM7ihY7s8ferXr8f8+YsAmL9gETd0DJ6frKwsMjMzAYiNjcXnC/4KVa16FonlEvly6XIAJvzfB9xwQ3sA5s77jEAgAMCSL5dTo0a18B+kR+RsXY9LP1Rwh1DYA1jMSbPLAfA3aE5gw9eQlRla4rC4hOA6cQm4n/eepmojr6izdKYAK4AngIHHPaSIVq5ZT+feD3DvgCFs3Lw1d/nwl8fwSJ87MF/+b8pZ2dl8OHs+l196SXGVWupVr1GVH1K3575O3ZZG9epV8/RZtWotN3W6FoBOna6lXLlEkpIqAlCzZnWWfzWXLZtSGPHCaNLSdlKjelW2pablrr8tNY0aJ2wT4Pbet/Dp7PnhOCwpQFT9psT3HUlc90Ecmf76Se3+C1uQvfrYHJQjM8YQ12MQ8f1H47+oFVkLZxRnuWFV1MDPds695pxb6pz76ujjdBdjZveY2TIzW/b38ZNO9+YjpsF5dZk75W2mjv1f/vD76+n3+J8BWLBoKUkVK3DB/9QtcN0/j3yVxhdfQOOLGhZXuQI8OugZWrduTsrS2bRu1ZzU1LTcq/TU1O1c0vga/qd+S3re1oWzzqpUpG0+Nrgf2dnZvPfe1FN3ltMmsC6F9Ff6kzHxBWKu6pqnzcpWwFelNoGNK3OXRbfoQMa7w0kfeT/ZKxYQ0/624i45bIoa+B+aWR8zq2ZmSUcfBXU2s/bHPS9vZm+Z2Soze8/MqhS0nnNujHOuiXOuyV09b/kvDuPMVrZMAgkJ8UBwqCY7O8DefftZsXotCxZ9SdsudzBw6PMsXb6KQU+/kLveq++8x959B3j0gbsiVXqptH3bDmrVrJ77umaNamzfviNPn7S0nXTpejdNm7XjyaeGA5x0ozUtbSffrPmWyy+/lG3bd1Cj5rGhmho1q7HtuG32vK0r13W4mtt6PhCOQ5IiyNm6Hqt4FiQcG36NatiC7HUpkBN8MychEV/Vs8lJ3QhA9jeLiap1XiTKDYuiBn4vgkM4XwBfhR7LCul//B2pkUAa0BFIAd7478ss2Xbt3svRm+Or135LTo6jQvlyPHxvb/45dRxzprzNiKGP0uySRgx/agAAH3w4m0VLl/P80IG548RyeqQs+5q6dc+hTp1aREdH07XrjXw4a06ePsnJFTELDrMNHtSXseOCf3HWqFGNuLg4ACpUKE/Lls34z3++Y8eOH/n5wM9c2iw49HZb95v58MPZQHBG0IAB99Gpc2/S0zOK6zAFsKRj15e+anXAHw2Hf85d5r/wMrJXLzq2QsYhLDYeSw6+eUf9phE5P20rrnLDrqgfvDrnV+yjiXPu4tDzF82s16/Y1hlp4NDnSVmxmn37D/C7zr3oc0d3srOzAejWqQNzFnzO+9M/ISrKR1xsLCOGPpobJgV5ZuRoqlU5i+73Bt8ATpzKKb9cIBDgwYee4OOP3iPK52PsuPdZu/Y/DB0ygGVfrWTWrLlcccVl/OWZx3A4Fi5cQt9+fwKg/vl1ef75p3AuOGtv1KjX+eab9QA80Pfx3GmZn86ezyefBuc1vPS3PxMbG8unnwTfNL78cjn3PzA4MgdfysTe3BffOQ2whETi+48ma/4H4IsCIHvZPPwNLsV/cStcIADZmRyZ/FLuulahMlY+mZwt645tMCeHIzPfJO6Wh4MXaemH8h33L6mKPC3TzBoCDYC4o8ucc+ML6JsKjCL4Ya37gd+40I7MbJVzrtGp9leSpmVKXmf6tEwpXEmaliknK2xaZpGu8M1sCNCGYOB/DFwLfA7kG/jAmxz7cNY4oBLwk5lVJfhJXRERKWZF/ejmzcBFwArn3O2hG6/vFtTZOTesgOU7zExz0kREIqCodwMznHM5QLaZlQN+BGr9wn3m+2YgIiLhdcorfAveXVxlZhUIDtV8BRwEFheyzqqCmoACp2WKiEj4nDLwnXPOzJo55/YBr5vZp0A551xBoQ7BUG8HnPiZZCM4tVNERIpZUcfwl5tZU+dcinNuSxH6zwLKOue+PrHBzBYUvTwRETldihr4lwLdzWwrcIjglboraHqlc+7OgjbknPvDf12liIj8akUN/Han7iIiImeyon7Sduupe4mIyJlMX9IiIuIRCnwREY9Q4IuIeIQCX0TEIxT4IiIeocAXEfEIBb6IiEco8EVEPEKBLyLiEQp8ERGPUOCLiHiEAl9ExCMU+CIiHqHAFxHxCAW+iIhHKPBFRDxCgS8i4hEKfBERj1Dgi4h4hAJfRMQjFPgiIh6hwBcR8Qh/pAsokC8q0hWIiJQqusIXEfEIBb6IiEco8EVEPEKBLyLiEQp8ERGPUOCLiHiEAl9ExCMU+CIiHqHAFxHxCAW+iIhHKPBFRDxCgS8i4hEKfBERj1Dgi4h4hAJfRMQjFPgiIh6hwBcR8QgFvoiIRyjwRUQ8QoEvIuIRCnwREY9Q4IuIeIQCX0TEIxT4IiIeocAXEfEIBb6IiEco8EVEPEKBLyLiEQp8ERGPUOCLiHiEAl9ExCP8kS6gNHjir6P4bNFSkipWYPq7r5/UvnT5KvoNHkaNalUBuPqKy7jvju657YFAgG539uOsypV4dcQwAJxzvDxmHHPmf47P56PbTdfRo8uNxXNAHtCubRtGjXqaKJ+Pt9+ZyPMjRudpr127Bn8fM4pKlZPYu2cfPXv3Y9u2NC666AJGv/IsieXKEggEePa5V5gyZSYAV7ZpyfDhTxITE83y5au5+57+BAIB+j9yL7fe2hkAvz+K+ufXo2r1Ruzdu6+4D7vUien0R/znXYI7dID00QNPao86vzExV3XFOQc5ATI/GU/O99/iO6cBMe175vbzVarOkSkvE1i/DN+5DYlp2x3MIDODI9New+3ZWZyHFTbmnIt0DfnK2rXpzCwsH8u+Xk1CfDyPP/NCgYE/duI/csP8ROMmTWXN+g0cPHQ4t8+0j+awdPkq/vKnR/D5fOzeu4/kihXCeRinTXz1VpEuoVA+n491axbSvsOtpKamsWTxx/S4rQ/r1m3I7TNp4ht89PE8JkyYwpVtWtKrVzd6396PevXOxTnHxo2bqVatCkuXfELDRm04cOBnNm1cStv23diwYRNDhwxg69ZU3hk7Kc++r7/uGh7sdzfXtOta3IddZPsHn9nn73i+s8+HzAxiO9+fb+ATEwuZRwCwKrWJ6/og6a/0z9snvgwJD77E4ZF9ICuT+H4vkvHeCNyu7fibXoOvZl0yp71WDEdzepR5epIV1KYhndOgycUXUr5c4i9ad8ePP/HZF0v5fcd2eZa/P+0j7rv9D/h8wVNUUsK+JGjW9Ld8990WNm/+nqysLCZPnsENJ/z869evx/z5iwCYv2ARN3RsC8CGDZvYuHEzAGlpO/nxp91UrpxMcnJFMjMz2bBhEwDz5n1G55s6nLTvbt1uZNL708N4dN6Ss3U9Lv1QwR1CYQ9gMbH5dvE3aE5gw9eQlRla4rC4hOA6cQm4n/eepmojT4FfTFZ+s47Ovfpwb/8n2bhpa+7y4S+9wSN97sQs76n4YVsan/zz33S9ox/39n+SrT9sK+6SS63qNaryQ+r23Nep29KoXr1qnj6rVq3lpk7XAtCp07WUK5dIUlLFPH2aNrmYmJhovvtuC7t27cHv99P4kkYAdO58HTVrVc/TPz4+jnZt2zB12sfhOCwpQFT9psT3HUlc90EcmX7yX+D+C1uQvfqL3NdHZowhrscg4vuPxn9RK7IWzijOcsMqLIFvZuXN7DkzW29me8xst5mtCy2rEI59nska/M9vmPuPcUwd9yp/+H1H+j32NAALFn1JUsUKXHB+vZPWyczKIjYmhslvv8zvO7bnyb++WNxle9qjg56hdevmpCydTetWzUlNTSMQCOS2V616FmPHvsxddz3C0WHR7j36MPKFoSxeNIuDBw8RCOTk2eb117fli8XLNHZfzALrUkh/pT8ZE18g5qq8Q2lWtgK+KrUJbFyZuyy6RQcy3h1O+sj7yV6xgJj2txV3yWETriv8ycBeoI1zLsk5lwxcGVo2uaCVzOweM1tmZsv+Pn5imEorfmXLlCEhIR6A1pc1Izs7m7379rNi1VoWfL6Etr/vxcAhz7H0q5UMGvY8AFUrV+LqK1oCwZu8//luc8TqL222b9tBrZrHrr5r1qjG9u078vRJS9tJl65307RZO558ajgA+/cfACAxsSwzZ4znyaeG8+XS5bnrLPnyK9pc1ZkWLa9n4cIlucM7R3XreoOGcyIoZ+t6rOJZkHBs+DWqYQuy16VATujNPCERX9WzyUndCED2N4uJqnVeJMoNi3AFfh3n3HDnXO5vkXNuh3NuOHB2QSs558Y455o455rc1fPWMJVW/Hbt3pN7Fbh67bfkOEeF8uV4+L7b+ef0d5nzj3GMGDaYZo0vYviQRwG4qnULli4PXnWkrFjN2bVqRKz+0iZl2dfUrXsOderUIjo6mq5db+TDWXPy9ElOrohZ8N7X4EF9GTsuePM1Ojqaf0x5i3ff/YCpUz/Ks07lyskAxMTEMHDA/YwZMyG3rVy5RFq3as7MmbPDeWhyAkuqkvvcV60O+KPh8M+5y/wXXkb26kXHVsg4hMXGY8nVAIj6TSNyfio9w6nhmpa51cweBcY553YCmFkVoDfwQ5j2GTEDhzxHyopV7Nt3gN916kGfO28jOzsbgG43Xcec+Z/z/rSPiPJHERcTw4hhg3PDpCB39ujKoGHPM+H96STExzFs8EPFcCTeEAgEePChJ/j4o/eI8vkYO+591q79D0OHDGDZVyuZNWsuV1xxGX955jEcjoULl9C3358A6NKlI61aXUpSckV69gwOD9x518OsXLmGAY/cR4frrsbn8/HGG+OZv+BYkHS68VrmzvuMw4fTI3LMpVXszX3xndMAS0gkvv9osuZ/AL4oALKXzcPf4FL8F7fCBQKQncmRyS/lrmsVKmPlk8nZsu7YBnNyODLzTeJueTh4kZZ+KN9x/5IqLNMyzawiMBi4EagCOGAnMBMY7pzbc6ptlKRpmZLXmT4tUwpXkqZlyskKm5YZlit859xeM3sHmAsscc4dPNpmZu2BT8OxXxERKVi4Zun0A2YADwDfmNnxHxH9azj2KSIihQvXGP7dQGPn3EEzqwN8YGZ1nHMvAYUPXouISFiEK/B9R4dxnHNbzKwNwdA/GwW+iEhEhGta5k4zu/joi1D4Xw9UAi4M0z5FRKQQ4Qr8nkCeT7I457Kdcz2B1mHap4iIFCJcs3RSC2lbVFCbiIiEj748TUTEIxT4IiIeocAXEfEIBb6IiEco8EVEPEKBLyLiEQp8ERGPUOCLiHiEAl9ExCMU+CIiHqHAFxHxCAW+iIhHKPBFRDxCgS8i4hEKfBERj1Dgi4h4hAJfRMQjFPgiIh6hwBcR8QgFvoiIRyjwRUQ8QoEvIuIRCnwREY9Q4IuIeIQCX0TEI8w5F+kaPMnM7nHOjYl0HfLL6PyVXF4+d7rCj5x7Il2A/Co6fyWXZ8+dAl9ExCMU+CIiHqHAjxxPjiGWIjp/JZdnz51u2oqIeISu8EVEPEKBLyLiEQr808DM2pvZt2a20cwG59Mea2bvh9q/NLM6x7U9Flr+rZm1O27522b2o5l9U0yHIScownltbWbLzSzbzG6ORI2Sv1P9/ljQy6Fzu8rMLinuGiNBgf8rmVkUMBq4FmgA3GpmDU7odiew1zlXF3gRGB5atwFwC3AB0B54NbQ9gLGhZRIBRTyv3wO9gfeKtzopgrEU/vtzLVAv9LgHeK0Yaoo4Bf6v1wzY6Jzb5JzLBCYBN57Q50ZgXOj5B8DvzMxCyyc554445zYDG0Pbwzn3GbCnOA5A8nXK8+qc2+KcWwXkRKJAKVgRfn9uBMa7oCVABTOrVjzVRY4C/9erAfxw3OvU0LJ8+zjnsoH9QHIR15XI0Lkp3Tx5fhX4IiIeocD/9bYBtY57XTO0LN8+ZuYHygO7i7iuRIbOTenmyfOrwP/1UoB6ZnaOmcUQvAk784Q+M4Feoec3A/9ywU+8zQRuCc3iOYfgDaSlxVS3FK4o51VKrplAz9BsnebAfudcWqSLCjd/pAso6Zxz2Wb2ADAbiALeds6tMbOngWXOuZnAW8AEM9tI8EbSLaF115jZZGAtkA3c75wLAJjZRKANUMnMUoEhzrm3ivnwPKso59XMmgLTgIpARzMb5py7IIJlS0h+vz9ANIBz7nXgY6ADwYkSh4HbI1Np8dJXK4iIeISGdEREPEKBLyLiEQp8ERGPUOCLiHiEAl9ExCMU+CJnIDN7PNI1SOmjaZlSYoS+cM6cc2fEl5WZWdTRz02EYdsHnXNlz5R6pHTQFb6c0cysTug76ccD3wC1zGygmaWEvsd82HF9nwz1/dzMJprZgFNsu7eZzTCzBWa2wcyGHNc23cy+MrM1ZnbPccsPmtlIM1sJtDCzp0K1fGNmY0JvSoS2+aKZLTOzdWbW1Mymhvbz5+O218PMlprZ12b2hplFmdlzQHxo2f8V1C+/ek7LD11KL+ecHnqcsQ+gDsGvH24eet2W4H9CbQQvWGYBrYGmwNdAHJAIbAAGnGLbvYE0gt9cGk/wDaVJqC0p9O/R5cmh1w7oetw2ko57PgHoGHq+ABgeev4gsB2oBsQS/GbGZKA+8CEQHer3KtAz9PzgcdstrF+eevTQo7CHvlpBSoKtLvid5RAM/LbAitDrsgS/gygRmOGcywAyzOzDIm57rnNuN4CZTQUuB5YB/czsplCfWqF97AYCwD+OW/9KM3sUSACSgDUEwxmOfffOamCNC31Xi5ltCm3zcqAxkBL6wyAe+DGfGn9XSL8T6xEpkAJfSoJDxz034Fnn3BvHdzCzh37htk+8ieXMrA1wNdDCOXfYzBYQ/MsBIMMd+76jOIJX202ccz+Y2dDj+gEcCf2bc9zzo6/9oWMZ55x77BQ1FtYvtx6RU9EYvpQ0s4E7zKwsgJnVMLOzgEUEv8AsLtR2fRG3d42ZJZlZPNAptJ3yBP9LysNmdj7QvIB1j4b7rtA+/9v/1/afwM2h+gnVcXaoLcvMoovQT6TIdIUvJYpzbo6Z1QcWh4Y3DgI9nHMpZjYTWAXsJDiMsh/AzO4Nrft6PptcSnBIpCbwrnNumZmtBu41s3XAt8CSfNbDObfPzN4kOMa/g+BXKv83x7LWzJ4A5piZD8gC7ge2ErxPscrMljvnuhfST6TINC1TSg0zK+ucO2hmCcBnwD3OueWF9O9NcDjmgeKqUSSSdIUvpckYM2tAcKhlXGFhL+JFusIXEfEI3bQVEfEIBb6IiEco8EVEPEKBLyLiEQp8ERGP+H8bcalIQKGLvQAAAABJRU5ErkJggg==",
+      "text/plain": [
+       "<Figure size 432x288 with 1 Axes>"
+      ]
+     },
+     "metadata": {
+      "needs_background": "light"
+     }
+    }
+   ],
+   "metadata": {}
   },
   {
    "cell_type": "markdown",
-   "metadata": {},
    "source": [
     "The calculated RMSE scores can be visualized to comparatively study how model performance is affected by different parameters."
-   ]
+   ],
+   "metadata": {}
   },
   {
    "cell_type": "markdown",
-   "metadata": {},
    "source": [
     "It can be seen from this visualization that RMSE first decreases and then increases as rank increases, due to overfitting. When the rank equals 20 and the regularization parameter equals 0.1, the model achieves the lowest RMSE score."
-   ]
+   ],
+   "metadata": {}
   },
   {
    "cell_type": "markdown",
-   "metadata": {},
    "source": [
     "### 3.5 Top K recommendation"
-   ]
+   ],
+   "metadata": {}
   },
   {
    "cell_type": "markdown",
-   "metadata": {},
    "source": [
     "#### 3.5.1 Top k for all users (items)"
-   ]
+   ],
+   "metadata": {}
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
+   "execution_count": 20,
    "source": [
     "dfs_rec = model.recommendForAllUsers(10)"
-   ]
+   ],
+   "outputs": [],
+   "metadata": {}
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
+   "execution_count": 21,
    "source": [
     "dfs_rec.show(10)"
-   ]
+   ],
+   "outputs": [
+    {
+     "output_type": "stream",
+     "name": "stdout",
+     "text": [
+      "+------+--------------------+\n",
+      "|UserId|     recommendations|\n",
+      "+------+--------------------+\n",
+      "|   471|[[814, 3.7504895]...|\n",
+      "|   463|[[814, 3.1264873]...|\n",
+      "|   833|[[814, 3.3154662]...|\n",
+      "|   496|[[814, 3.055388],...|\n",
+      "|   148|[[814, 4.03012], ...|\n",
+      "|   540|[[814, 3.8661027]...|\n",
+      "|   392|[[814, 4.119951],...|\n",
+      "|   243|[[814, 3.748784],...|\n",
+      "|   623|[[814, 3.9018161]...|\n",
+      "|   737|[[814, 3.8507497]...|\n",
+      "+------+--------------------+\n",
+      "only showing top 10 rows\n",
+      "\n"
+     ]
+    }
+   ],
+   "metadata": {}
   },
   {
    "cell_type": "markdown",
-   "metadata": {},
    "source": [
     "#### 3.5.2 Top k for a selected set of users (items)"
-   ]
+   ],
+   "metadata": {}
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
+   "execution_count": 22,
    "source": [
     "users = dfs_train.select(als.getUserCol()).distinct().limit(3)\n",
     "\n",
     "dfs_rec_subset = model.recommendForUserSubset(users, 10)"
-   ]
+   ],
+   "outputs": [],
+   "metadata": {}
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
+   "execution_count": 23,
    "source": [
     "dfs_rec_subset.show(10)"
-   ]
+   ],
+   "outputs": [
+    {
+     "output_type": "stream",
+     "name": "stdout",
+     "text": [
+      "+------+--------------------+\n",
+      "|UserId|     recommendations|\n",
+      "+------+--------------------+\n",
+      "|   471|[[814, 3.7504895]...|\n",
+      "|   463|[[814, 3.1264873]...|\n",
+      "|   148|[[814, 4.03012], ...|\n",
+      "+------+--------------------+\n",
+      "\n"
+     ]
+    }
+   ],
+   "metadata": {}
   },
   {
    "cell_type": "markdown",
-   "metadata": {},
    "source": [
     "#### 3.5.3 Run-time considerations for top-k recommendations\n",
     "\n",
@@ -593,28 +745,28 @@
     "* Inner products of user-item pairs are calculated individually instead of leveraging matrix block multiplication features which are available in certain contemporary computing acceleration libraries (e.g., BLAS).\n",
     "\n",
     "More details about possible optimizations of the top k recommendations in Spark can be found [here](https://engineeringblog.yelp.com/2018/05/scaling-collaborative-filtering-with-pyspark.html)."
-   ]
+   ],
+   "metadata": {}
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
+   "execution_count": 24,
    "source": [
     "# cleanup spark instance\n",
     "spark.stop()"
-   ]
+   ],
+   "outputs": [],
+   "metadata": {}
   },
   {
    "cell_type": "markdown",
-   "metadata": {},
    "source": [
     "## References"
-   ]
+   ],
+   "metadata": {}
   },
   {
    "cell_type": "markdown",
-   "metadata": {},
    "source": [
     "1. Yehuda Koren, Robert Bell, and Chris Volinsky, \"Matrix Factorization Techniques for Recommender Systems\n",
     "\", ACM Computer, Vol. 42, Issue 8, pp 30-37, Aug., 2009.\n",
@@ -624,7 +776,8 @@
     "4. Seaborn. url: https://seaborn.pydata.org/\n",
     "5. Scaling collaborative filtering with PySpark. url: https://engineeringblog.yelp.com/2018/05/scaling-collaborative-filtering-with-pyspark.html\n",
     "6. Matrix Completion via Alternating Least Square (ALS). url: http://stanford.edu/~rezab/classes/cme323/S15/notes/lec14.pdf"
-   ]
+   ],
+   "metadata": {}
   }
  ],
  "metadata": {
@@ -633,9 +786,8 @@
    "hash": "7ec2189bea0434770dca7423a25e631e1cca9c4e2b4ff137a82f4dff32ac9607"
   },
   "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
+   "name": "python3",
+   "display_name": "Python 3.6.9 64-bit ('.env': venv)"
   },
   "language_info": {
    "codemirror_mode": {
@@ -652,4 +804,4 @@
  },
  "nbformat": 4,
  "nbformat_minor": 2
-}
+}
\ No newline at end of file

From 79eff3bf5c0b53c6414ade74e12ffb1bf543b6af Mon Sep 17 00:00:00 2001
From: Jianjie Liu <jianjl@microsoft.com>
Date: Fri, 24 Sep 2021 18:02:54 +0000
Subject: [PATCH 15/27] Specify tmp path for data serialization

---
 recommenders/datasets/mock/movielens.py       | 45 +++++++++++++------
 recommenders/datasets/movielens.py            | 19 ++++----
 .../datasets/mock/test_movielens.py           | 19 ++++++++
 3 files changed, 60 insertions(+), 23 deletions(-)

diff --git a/recommenders/datasets/mock/movielens.py b/recommenders/datasets/mock/movielens.py
index e71da13f41..8e12a7c3b5 100644
--- a/recommenders/datasets/mock/movielens.py
+++ b/recommenders/datasets/mock/movielens.py
@@ -6,8 +6,8 @@
 """
 try:
     import pandera as pa
-except ImportError as e:
-    raise ImportError("pandera is not installed. Try `pip install recommenders['dev']`") from e
+except ImportError:
+    raise ImportError("pandera is not installed. Try `pip install recommenders['dev']`")
 
 try:
     from pyspark.sql.types import StructField, StructType, IntegerType, StringType, FloatType
@@ -23,7 +23,9 @@
     DEFAULT_GENRE_COL,
     DEFAULT_HEADER
 )
+from recommenders.datasets.download_utils import download_path
 
+import os
 import random
 from typing import Optional
 
@@ -43,9 +45,10 @@ class MockMovielensSchema(pa.SchemaModel):
     Please see https://pandera.readthedocs.io/en/latest/schema_models.html
     for more information.
     """
+    # Some notebooks will do a cross join with userID and itemID,
+    # a sparse range for these IDs can slow down the notebook tests
     userID: Series[int] = Field(in_range={"min_value": 1, "max_value": 10})
     itemID: Series[int] = Field(in_range={"min_value": 1, "max_value": 10})
-    # Rating is on the scale from 1 to 5
     rating: Series[float] = Field(in_range={"min_value": 1, "max_value": 5})
     timestamp: Series[str] = Field(eq="2022-2-22")
     title: Series[str] = Field(eq="foo")
@@ -90,24 +93,45 @@ def get_spark_df(
         spark,
         size: int = 3, seed: int = 100,
         keep_title_col: bool = False, keep_genre_col: bool = False,
+        tmp_path: Optional[str] = None,
     ):
         """Return fake movielens dataset as a Spark Dataframe with specified rows
 
         Args:
             spark (SparkSession): spark session to load the dataframe into
             size (int): number of rows to generate
-            seed (int, optional): seeding the pseudo-number generation. Defaults to 100.
+            seed (int): seeding the pseudo-number generation. Defaults to 100.
             keep_title_col (bool): remove the title column if False. Defaults to False.
             keep_genre_col (bool): remove the genre column if False. Defaults to False.
+            tmp_path (str, optional): path to store files for serialization purpose
+                when transferring data from python to java.
+                If None, a temporal path is used instead
 
         Returns:
             pyspark.sql.DataFrame: a mock dataset
         """
         pandas_df = cls.get_df(size=size, seed=seed, keep_title_col=True, keep_genre_col=True)
-        # serialize the pandas.df to avoid the expensive java <-> python communication
-        pandas_df.to_csv('test.csv', header=False, index=False)
 
-        deserialization_schema = StructType([
+        # generate temp folder
+        with download_path(tmp_path) as tmp_folder:
+            filepath = os.path.join(tmp_folder, f"mock_movielens_{size}.csv")
+            # serialize the pandas.df as a csv to avoid the expensive java <-> python communication
+            pandas_df.to_csv(filepath, header=False, index=False)
+            print(f"Saving file {filepath}.")
+            spark_df = spark.read.csv(filepath, schema=cls._get_spark_deserialization_schema())
+            # Cache and force trigger action since data-file might be removed.
+            spark_df.cache()
+            spark_df.count()
+
+        if not keep_title_col:
+            spark_df = spark_df.drop(DEFAULT_TITLE_COL)
+        if not keep_genre_col:
+            spark_df = spark_df.drop(DEFAULT_GENRE_COL)
+        return spark_df
+
+    @classmethod
+    def _get_spark_deserialization_schema(cls):
+        return StructType([
             StructField(DEFAULT_USER_COL, IntegerType()),
             StructField(DEFAULT_ITEM_COL, IntegerType()),
             StructField(DEFAULT_RATING_COL, FloatType()),
@@ -115,10 +139,3 @@ def get_spark_df(
             StructField(DEFAULT_TITLE_COL, StringType()),
             StructField(DEFAULT_GENRE_COL, StringType()),
         ])
-        spark_df = spark.read.csv('test.csv', schema=deserialization_schema)
-
-        if not keep_title_col:
-            spark_df = spark_df.drop(DEFAULT_TITLE_COL)
-        if not keep_genre_col:
-            spark_df = spark_df.drop(DEFAULT_GENRE_COL)
-        return spark_df
diff --git a/recommenders/datasets/movielens.py b/recommenders/datasets/movielens.py
index 60f33a5e92..fb9ba4aede 100644
--- a/recommenders/datasets/movielens.py
+++ b/recommenders/datasets/movielens.py
@@ -101,7 +101,6 @@ def item_has_header(self):
 # Fake data for testing only
 MOCK_DATA_FORMAT = {
     "mock100": {"size": 100, "seed": 0},
-    "mock10": {"size": 10, "seed": 6}
 }
 
 # 100K data genres index to string mapper. For 1m, 10m, and 20m, the genres labels are already in the dataset.
@@ -153,15 +152,16 @@ def load_pandas_df(
 
     Args:
         size (str): Size of the data to load. One of ("100k", "1m", "10m", "20m", "mock100").
-        header* (list or tuple or None): Rating dataset header. 'DEFAULT_HEADER' is set for all mock data sizes ("mock*").
+        header* (list or tuple or None): Rating dataset header.
+            If size is set to any of 'MOCK_DATA_FORMAT', this parameter is ignored and data is rendered using the 'DEFAULT_HEADER' instead.
         local_cache_path* (str): Path (directory or a zip file) to cache the downloaded zip file.
             If None, all the intermediate files will be stored in a temporary directory and removed after use.
-        title_col* (str): Movie title column name. If None, the column will not be loaded.
-        genres_col* (str): Genres column name. Genres are '|' separated string.
+        title_col (str): Movie title column name. If None, the column will not be loaded.
+        genres_col (str): Genres column name. Genres are '|' separated string.
             If None, the column will not be loaded.
         year_col* (str): Movie release year column name. If None, the column will not be loaded.
 
-        All (*) arguments are not applicable when mock dataset is specified (size = "mock*")
+            All (*) arguments are not applicable when mock dataset is specified (size = "mock*")
 
     Returns:
         pandas.DataFrame: Movie rating dataset.
@@ -361,10 +361,11 @@ def load_spark_df(
     Args:
         spark (pyspark.SparkSession): Spark session.
         size (str): Size of the data to load. One of ("100k", "1m", "10m", "20m", "mock100").
-        header* (list or tuple): Rating dataset header. 'DEFAULT_HEADER' is set for all mock data sizes ("mock*").
+        header* (list or tuple): Rating dataset header.
             If schema is provided, this argument is ignored.
-        schema* (pyspark.StructType): Dataset schema.
-        local_cache_path* (str): Path (directory or a zip file) to cache the downloaded zip file.
+        schema* (pyspark.StructType): Dataset schema. 
+            If size is set to any of 'MOCK_DATA_FORMAT', data is rendered in the 'MockMovielensSchema' instead.
+        local_cache_path (str): Path (directory or a zip file) to cache the downloaded zip file.
             If None, all the intermediate files will be stored in a temporary directory and removed after use.
         dbutils (Databricks.dbutils): Databricks utility object
         title_col (str): Title column name. If None, the column will not be loaded.
@@ -372,7 +373,7 @@ def load_spark_df(
             If None, the column will not be loaded.
         year_col* (str): Movie release year column name. If None, the column will not be loaded.
 
-            All (*) arguments are not applicable when mock dataset is specified (size = "mock*")
+            All (*) arguments are not applicable if size is set to any of 'MOCK_DATA_FORMAT'
 
     Returns:
         pyspark.sql.DataFrame: Movie rating dataset.
diff --git a/tests/unit/recommenders/datasets/mock/test_movielens.py b/tests/unit/recommenders/datasets/mock/test_movielens.py
index e8a6e5f8be..0db9e26b59 100644
--- a/tests/unit/recommenders/datasets/mock/test_movielens.py
+++ b/tests/unit/recommenders/datasets/mock/test_movielens.py
@@ -1,3 +1,5 @@
+import os
+
 from recommenders.datasets.mock.movielens import MockMovielensSchema
 from recommenders.datasets.movielens import DEFAULT_HEADER
 from recommenders.utils.constants import (
@@ -7,6 +9,7 @@
 
 import pytest
 import pandas
+from pytest_mock import MockerFixture
 
 
 @pytest.mark.parametrize("size", [10, 100])
@@ -62,3 +65,19 @@ def test_mock_movielens_schema__get_spark_df__return_success(spark, size, seed,
         assert df.schema[DEFAULT_TITLE_COL]
     if keep_genre_col:
         assert df.schema[DEFAULT_GENRE_COL]
+
+
+def test_mock_movielens_schema__get_spark_df__store_tmp_file(spark, tmp_path):
+    data_size = 3
+    MockMovielensSchema.get_spark_df(spark, size=data_size, tmp_path=tmp_path)
+    assert os.path.exists(os.path.join(tmp_path, f"mock_movielens_{data_size}.csv"))
+
+
+def test_mock_movielens_schema__get_spark_df__data_serialization_default_param(spark, mocker: MockerFixture):
+    data_size = 3
+    to_csv_spy = mocker.spy(pandas.DataFrame, "to_csv")
+
+    df = MockMovielensSchema.get_spark_df(spark, size=data_size)
+    # assertions
+    to_csv_spy.assert_called_once()
+    assert df.count() == data_size

From 477391f207e1bcd0fe41c1f7d4525f9e0d4762bc Mon Sep 17 00:00:00 2001
From: Jianjie Liu <jianjl@microsoft.com>
Date: Fri, 24 Sep 2021 18:51:26 +0000
Subject: [PATCH 16/27] Add pytest-mock as 'dev' dependency

---
 setup.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/setup.py b/setup.py
index 3bcdb23f05..c0d7d67377 100644
--- a/setup.py
+++ b/setup.py
@@ -78,6 +78,7 @@
         "pandera[strategies]>=0.6.5",  # For generating fake datasets
         "pytest>=3.6.4",
         "pytest-cov>=2.12.1",
+        "pytest-mock>=3.6.1",  # for access to mock fixtures in pytest
     ],
 }
 # for the brave of heart

From 97c5be0ea19d41c268d393b92ab3bc64f4ce9663 Mon Sep 17 00:00:00 2001
From: Jianjie Liu <jianjl@microsoft.com>
Date: Fri, 24 Sep 2021 19:00:25 +0000
Subject: [PATCH 17/27] Add spark test markers to new tests

---
 tests/unit/recommenders/datasets/mock/test_movielens.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/tests/unit/recommenders/datasets/mock/test_movielens.py b/tests/unit/recommenders/datasets/mock/test_movielens.py
index 0db9e26b59..bff8e05f62 100644
--- a/tests/unit/recommenders/datasets/mock/test_movielens.py
+++ b/tests/unit/recommenders/datasets/mock/test_movielens.py
@@ -67,12 +67,15 @@ def test_mock_movielens_schema__get_spark_df__return_success(spark, size, seed,
         assert df.schema[DEFAULT_GENRE_COL]
 
 
+@pytest.mark.spark
 def test_mock_movielens_schema__get_spark_df__store_tmp_file(spark, tmp_path):
     data_size = 3
     MockMovielensSchema.get_spark_df(spark, size=data_size, tmp_path=tmp_path)
     assert os.path.exists(os.path.join(tmp_path, f"mock_movielens_{data_size}.csv"))
 
 
+
+@pytest.mark.spark
 def test_mock_movielens_schema__get_spark_df__data_serialization_default_param(spark, mocker: MockerFixture):
     data_size = 3
     to_csv_spy = mocker.spy(pandas.DataFrame, "to_csv")

From cb2d140a15d00d7bccccdafe1f85f4e9a0cbec8e Mon Sep 17 00:00:00 2001
From: Jianjie Liu <jianjl@microsoft.com>
Date: Mon, 27 Sep 2021 16:01:16 +0000
Subject: [PATCH 18/27] Small code cleanup

---
 recommenders/datasets/mock/movielens.py | 1 -
 recommenders/datasets/movielens.py      | 4 +---
 2 files changed, 1 insertion(+), 4 deletions(-)

diff --git a/recommenders/datasets/mock/movielens.py b/recommenders/datasets/mock/movielens.py
index 8e12a7c3b5..d7e1264607 100644
--- a/recommenders/datasets/mock/movielens.py
+++ b/recommenders/datasets/mock/movielens.py
@@ -117,7 +117,6 @@ def get_spark_df(
             filepath = os.path.join(tmp_folder, f"mock_movielens_{size}.csv")
             # serialize the pandas.df as a csv to avoid the expensive java <-> python communication
             pandas_df.to_csv(filepath, header=False, index=False)
-            print(f"Saving file {filepath}.")
             spark_df = spark.read.csv(filepath, schema=cls._get_spark_deserialization_schema())
             # Cache and force trigger action since data-file might be removed.
             spark_df.cache()
diff --git a/recommenders/datasets/movielens.py b/recommenders/datasets/movielens.py
index fb9ba4aede..00a80f9d1a 100644
--- a/recommenders/datasets/movielens.py
+++ b/recommenders/datasets/movielens.py
@@ -20,9 +20,7 @@
         StructField,
         IntegerType,
         FloatType,
-        DoubleType,
-        LongType,
-        StringType,
+        LongType
     )
     from pyspark.sql.functions import concat_ws, col
 except ImportError:

From 65a5327145d8ac792f42e9f8e3bbc86ecb8d3d28 Mon Sep 17 00:00:00 2001
From: Jianjie Liu <jianjl@microsoft.com>
Date: Mon, 27 Sep 2021 16:11:32 +0000
Subject: [PATCH 19/27] Install 'dev' dependencies in ADO build

---
 tests/ci/azure_pipeline_test/dsvm_nightly_linux_cpu.yml      | 2 +-
 tests/ci/azure_pipeline_test/dsvm_nightly_linux_gpu.yml      | 2 +-
 tests/ci/azure_pipeline_test/dsvm_nightly_linux_pyspark.yml  | 2 +-
 tests/ci/azure_pipeline_test/dsvm_notebook_linux_cpu.yml     | 2 +-
 tests/ci/azure_pipeline_test/dsvm_notebook_linux_gpu.yml     | 2 +-
 tests/ci/azure_pipeline_test/dsvm_notebook_linux_pyspark.yml | 2 +-
 tests/ci/azure_pipeline_test/dsvm_unit_linux_cpu.yml         | 2 +-
 tests/ci/azure_pipeline_test/dsvm_unit_linux_gpu.yml         | 2 +-
 tests/ci/azure_pipeline_test/dsvm_unit_linux_pyspark.yml     | 2 +-
 9 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/tests/ci/azure_pipeline_test/dsvm_nightly_linux_cpu.yml b/tests/ci/azure_pipeline_test/dsvm_nightly_linux_cpu.yml
index 2c5a698243..15b237650a 100644
--- a/tests/ci/azure_pipeline_test/dsvm_nightly_linux_cpu.yml
+++ b/tests/ci/azure_pipeline_test/dsvm_nightly_linux_cpu.yml
@@ -33,6 +33,6 @@ extends:
     timeout: 180 
     conda_env: "nightly_linux_cpu"
     conda_opts: "python=3.6"
-    pip_opts: "[examples]"
+    pip_opts: "[examples,dev]"
     pytest_markers: "not spark and not gpu"
     pytest_params: "-x"
diff --git a/tests/ci/azure_pipeline_test/dsvm_nightly_linux_gpu.yml b/tests/ci/azure_pipeline_test/dsvm_nightly_linux_gpu.yml
index b1182c34c9..c43e8ec981 100644
--- a/tests/ci/azure_pipeline_test/dsvm_nightly_linux_gpu.yml
+++ b/tests/ci/azure_pipeline_test/dsvm_nightly_linux_gpu.yml
@@ -32,6 +32,6 @@ extends:
     timeout: 240
     conda_env: "nightly_linux_gpu"
     conda_opts: "python=3.6 cudatoolkit=10.0 \"cudnn>=7.6\""
-    pip_opts: "[gpu,examples]  -f https://download.pytorch.org/whl/cu100/torch_stable.html"
+    pip_opts: "[gpu,examples,dev]  -f https://download.pytorch.org/whl/cu100/torch_stable.html"
     pytest_markers: "not spark and gpu"
     pytest_params: "-x"
diff --git a/tests/ci/azure_pipeline_test/dsvm_nightly_linux_pyspark.yml b/tests/ci/azure_pipeline_test/dsvm_nightly_linux_pyspark.yml
index 6fd4e526ea..f542f059ff 100644
--- a/tests/ci/azure_pipeline_test/dsvm_nightly_linux_pyspark.yml
+++ b/tests/ci/azure_pipeline_test/dsvm_nightly_linux_pyspark.yml
@@ -33,6 +33,6 @@ extends:
     timeout: 180
     conda_env: "nightly_linux_spark"
     conda_opts: "python=3.6"
-    pip_opts: "[spark,examples]"
+    pip_opts: "[spark,examples,dev]"
     pytest_markers: "spark and not gpu"
     pytest_params: "-x"
diff --git a/tests/ci/azure_pipeline_test/dsvm_notebook_linux_cpu.yml b/tests/ci/azure_pipeline_test/dsvm_notebook_linux_cpu.yml
index b75cc0c3f5..93eaeacc84 100644
--- a/tests/ci/azure_pipeline_test/dsvm_notebook_linux_cpu.yml
+++ b/tests/ci/azure_pipeline_test/dsvm_notebook_linux_cpu.yml
@@ -60,5 +60,5 @@ extends:
     task_name: "Test - Unit Notebook Linux CPU"
     conda_env: "unit_notebook_linux_cpu"
     conda_opts: "python=3.6"
-    pip_opts: "[examples]"
+    pip_opts: "[examples,dev]"
     pytest_markers: "notebooks and not spark and not gpu"
diff --git a/tests/ci/azure_pipeline_test/dsvm_notebook_linux_gpu.yml b/tests/ci/azure_pipeline_test/dsvm_notebook_linux_gpu.yml
index 9cb44639e0..6d7594a143 100644
--- a/tests/ci/azure_pipeline_test/dsvm_notebook_linux_gpu.yml
+++ b/tests/ci/azure_pipeline_test/dsvm_notebook_linux_gpu.yml
@@ -60,5 +60,5 @@ extends:
     task_name: "Test - Unit Notebook Linux GPU"
     conda_env: "unit_notebook_linux_gpu"
     conda_opts: "python=3.6 cudatoolkit=10.0 \"cudnn>=7.6\""
-    pip_opts: "[gpu,examples]  -f https://download.pytorch.org/whl/cu100/torch_stable.html"
+    pip_opts: "[gpu,examples,dev]  -f https://download.pytorch.org/whl/cu100/torch_stable.html"
     pytest_markers: "notebooks and not spark and gpu"
diff --git a/tests/ci/azure_pipeline_test/dsvm_notebook_linux_pyspark.yml b/tests/ci/azure_pipeline_test/dsvm_notebook_linux_pyspark.yml
index 535f6936a7..31d699588d 100644
--- a/tests/ci/azure_pipeline_test/dsvm_notebook_linux_pyspark.yml
+++ b/tests/ci/azure_pipeline_test/dsvm_notebook_linux_pyspark.yml
@@ -60,5 +60,5 @@ extends:
     task_name: "Test - Unit Notebook Linux Spark"
     conda_env: "unit_notebook_linux_spark"
     conda_opts: "python=3.6"
-    pip_opts: "[spark,examples]"
+    pip_opts: "[spark,examples,dev]"
     pytest_markers: "notebooks and spark and not gpu"
diff --git a/tests/ci/azure_pipeline_test/dsvm_unit_linux_cpu.yml b/tests/ci/azure_pipeline_test/dsvm_unit_linux_cpu.yml
index be3b95c587..26ed5bdf2f 100644
--- a/tests/ci/azure_pipeline_test/dsvm_unit_linux_cpu.yml
+++ b/tests/ci/azure_pipeline_test/dsvm_unit_linux_cpu.yml
@@ -60,5 +60,5 @@ extends:
     task_name: "Test - Unit Linux CPU"
     conda_env: "unit_linux_cpu"
     conda_opts: "python=3.6"
-    pip_opts: ""
+    pip_opts: "[dev]"
     pytest_markers: "not notebooks and not spark and not gpu"
diff --git a/tests/ci/azure_pipeline_test/dsvm_unit_linux_gpu.yml b/tests/ci/azure_pipeline_test/dsvm_unit_linux_gpu.yml
index b9a76211d9..9aa46047e6 100644
--- a/tests/ci/azure_pipeline_test/dsvm_unit_linux_gpu.yml
+++ b/tests/ci/azure_pipeline_test/dsvm_unit_linux_gpu.yml
@@ -60,5 +60,5 @@ extends:
     task_name: "Test - Unit Linux GPU"
     conda_env: "unit_linux_gpu"
     conda_opts: "python=3.6 cudatoolkit=10.0 \"cudnn>=7.6\""
-    pip_opts: "[gpu] -f https://download.pytorch.org/whl/cu100/torch_stable.html"
+    pip_opts: "[gpu,dev] -f https://download.pytorch.org/whl/cu100/torch_stable.html"
     pytest_markers: "not notebooks and not spark and gpu"
diff --git a/tests/ci/azure_pipeline_test/dsvm_unit_linux_pyspark.yml b/tests/ci/azure_pipeline_test/dsvm_unit_linux_pyspark.yml
index f99b151cad..1f3006a05e 100644
--- a/tests/ci/azure_pipeline_test/dsvm_unit_linux_pyspark.yml
+++ b/tests/ci/azure_pipeline_test/dsvm_unit_linux_pyspark.yml
@@ -60,5 +60,5 @@ extends:
     task_name: "Test - Unit Linux Spark"
     conda_env: "unit_linux_spark"
     conda_opts: "python=3.6"
-    pip_opts: "[spark]"
+    pip_opts: "[spark,dev]"
     pytest_markers: "not notebooks and spark and not gpu"

From c2a44589fd61db9f7ea231d56f8b73a86c8e3942 Mon Sep 17 00:00:00 2001
From: Jianjie Liu <jianjl@microsoft.com>
Date: Mon, 27 Sep 2021 19:09:53 +0000
Subject: [PATCH 20/27] Undone default partition changes

---
 recommenders/evaluation/spark_evaluation.py | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/recommenders/evaluation/spark_evaluation.py b/recommenders/evaluation/spark_evaluation.py
index 5110d72e82..e5112965b2 100644
--- a/recommenders/evaluation/spark_evaluation.py
+++ b/recommenders/evaluation/spark_evaluation.py
@@ -1,10 +1,6 @@
 # Copyright (c) Microsoft Corporation. All rights reserved.
 # Licensed under the MIT License.
 
-
-import numpy as np
-from pyspark.sql.types import LongType
-
 try:
     from pyspark.mllib.evaluation import RegressionMetrics, RankingMetrics
     from pyspark.sql import Window, DataFrame
@@ -618,7 +614,7 @@ def _get_pairwise_items(self, df):
             .select(self.col_user, "i1", "i2")
         )
 
-    def _get_cosine_similarity(self, n_partitions=10):
+    def _get_cosine_similarity(self, n_partitions=200):
 
         if self.item_sim_measure == "item_cooccurrence_count":
             # calculate item-item similarity based on item co-occurrence count

From 2306b2b94040696f1ad73f6766c97fdbdab15011 Mon Sep 17 00:00:00 2001
From: Jianjie Liu <jianjl@microsoft.com>
Date: Wed, 29 Sep 2021 21:11:32 +0000
Subject: [PATCH 21/27] Fix bug after merge

---
 examples/03_evaluate/als_movielens_diversity_metrics.ipynb | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/03_evaluate/als_movielens_diversity_metrics.ipynb b/examples/03_evaluate/als_movielens_diversity_metrics.ipynb
index 1f4a4ed081..356224adef 100644
--- a/examples/03_evaluate/als_movielens_diversity_metrics.ipynb
+++ b/examples/03_evaluate/als_movielens_diversity_metrics.ipynb
@@ -153,7 +153,7 @@
     "import pyspark\n",
     "from pyspark.ml.recommendation import ALS\n",
     "import pyspark.sql.functions as F\n",
-    "from pyspark.sql.types import FloatType, IntegerType, LongType, StructType, StructField\n",
+    "from pyspark.sql.types import FloatType, IntegerType, StringType, StructType, StructField\n",
     "from pyspark.ml.feature import Tokenizer, StopWordsRemover\n",
     "from pyspark.ml.feature import HashingTF, CountVectorizer, VectorAssembler\n",
     "\n",

From b0bcd75e4149cb323a10e3d6b4c0c3ba111dca73 Mon Sep 17 00:00:00 2001
From: Jianjie Liu <jianjl@microsoft.com>
Date: Tue, 5 Oct 2021 19:58:24 +0000
Subject: [PATCH 22/27] Undo datatype changes

---
 .../als_movielens_diversity_metrics.ipynb     | 60 +++++++++----------
 1 file changed, 30 insertions(+), 30 deletions(-)

diff --git a/examples/03_evaluate/als_movielens_diversity_metrics.ipynb b/examples/03_evaluate/als_movielens_diversity_metrics.ipynb
index 356224adef..289d5b93fd 100644
--- a/examples/03_evaluate/als_movielens_diversity_metrics.ipynb
+++ b/examples/03_evaluate/als_movielens_diversity_metrics.ipynb
@@ -153,7 +153,7 @@
     "import pyspark\n",
     "from pyspark.ml.recommendation import ALS\n",
     "import pyspark.sql.functions as F\n",
-    "from pyspark.sql.types import FloatType, IntegerType, StringType, StructType, StructField\n",
+    "from pyspark.sql.types import FloatType, IntegerType, LongType, StructType, StructField\n",
     "from pyspark.ml.feature import Tokenizer, StopWordsRemover\n",
     "from pyspark.ml.feature import HashingTF, CountVectorizer, VectorAssembler\n",
     "\n",
@@ -177,8 +177,8 @@
      "output_type": "stream",
      "name": "stdout",
      "text": [
-      "System version: 3.6.13 |Anaconda, Inc.| (default, Jun  4 2021, 14:25:59) \n",
-      "[GCC 7.5.0]\n",
+      "System version: 3.6.9 (default, Jan 26 2021, 15:33:00) \n",
+      "[GCC 8.4.0]\n",
       "Spark version: 2.4.8\n"
      ]
     }
@@ -205,7 +205,7 @@
     "\n",
     "# user, item column names\n",
     "COL_USER=\"UserId\"\n",
-    "COL_ITEM=\"ItemId\"\n",
+    "COL_ITEM=\"MovieId\"\n",
     "COL_RATING=\"Rating\"\n",
     "COL_TITLE=\"Title\"\n",
     "COL_GENRE=\"Genre\""
@@ -257,7 +257,7 @@
     "        StructField(COL_USER, IntegerType()),\n",
     "        StructField(COL_ITEM, IntegerType()),\n",
     "        StructField(COL_RATING, FloatType()),\n",
-    "        StructField(\"Timestamp\", StringType()),\n",
+    "        StructField(\"Timestamp\", LongType()),\n",
     "    )\n",
     ")\n",
     "\n",
@@ -269,37 +269,37 @@
      "output_type": "stream",
      "name": "stderr",
      "text": [
-      "100%|██████████| 4.81k/4.81k [00:00<00:00, 15.6kKB/s]\n"
+      "100%|██████████| 4.81k/4.81k [00:00<00:00, 20.1kKB/s]\n"
      ]
     },
     {
      "output_type": "stream",
      "name": "stdout",
      "text": [
-      "+------+------+------+---------+--------------------+------+\n",
-      "|ItemId|UserId|Rating|Timestamp|               Title| Genre|\n",
-      "+------+------+------+---------+--------------------+------+\n",
-      "|    26|   138|   5.0|879024232|Brothers McMullen...|Comedy|\n",
-      "|    26|   224|   3.0|888104153|Brothers McMullen...|Comedy|\n",
-      "|    26|    18|   4.0|880129731|Brothers McMullen...|Comedy|\n",
-      "|    26|   222|   3.0|878183043|Brothers McMullen...|Comedy|\n",
-      "|    26|    43|   5.0|883954901|Brothers McMullen...|Comedy|\n",
-      "|    26|   201|   4.0|884111927|Brothers McMullen...|Comedy|\n",
-      "|    26|   299|   4.0|878192601|Brothers McMullen...|Comedy|\n",
-      "|    26|    95|   3.0|880571951|Brothers McMullen...|Comedy|\n",
-      "|    26|    89|   3.0|879459909|Brothers McMullen...|Comedy|\n",
-      "|    26|   361|   3.0|879440941|Brothers McMullen...|Comedy|\n",
-      "|    26|   194|   3.0|879522240|Brothers McMullen...|Comedy|\n",
-      "|    26|   391|   5.0|877399745|Brothers McMullen...|Comedy|\n",
-      "|    26|   345|   3.0|884993555|Brothers McMullen...|Comedy|\n",
-      "|    26|   303|   4.0|879468307|Brothers McMullen...|Comedy|\n",
-      "|    26|   401|   3.0|891033395|Brothers McMullen...|Comedy|\n",
-      "|    26|   429|   3.0|882386333|Brothers McMullen...|Comedy|\n",
-      "|    26|   293|   3.0|888907015|Brothers McMullen...|Comedy|\n",
-      "|    26|   270|   5.0|876954995|Brothers McMullen...|Comedy|\n",
-      "|    26|   442|   3.0|883388576|Brothers McMullen...|Comedy|\n",
-      "|    26|   342|   2.0|875320037|Brothers McMullen...|Comedy|\n",
-      "+------+------+------+---------+--------------------+------+\n",
+      "+-------+------+------+---------+--------------------+------+\n",
+      "|MovieId|UserId|Rating|Timestamp|               Title| Genre|\n",
+      "+-------+------+------+---------+--------------------+------+\n",
+      "|     26|   138|   5.0|879024232|Brothers McMullen...|Comedy|\n",
+      "|     26|   224|   3.0|888104153|Brothers McMullen...|Comedy|\n",
+      "|     26|    18|   4.0|880129731|Brothers McMullen...|Comedy|\n",
+      "|     26|   222|   3.0|878183043|Brothers McMullen...|Comedy|\n",
+      "|     26|    43|   5.0|883954901|Brothers McMullen...|Comedy|\n",
+      "|     26|   201|   4.0|884111927|Brothers McMullen...|Comedy|\n",
+      "|     26|   299|   4.0|878192601|Brothers McMullen...|Comedy|\n",
+      "|     26|    95|   3.0|880571951|Brothers McMullen...|Comedy|\n",
+      "|     26|    89|   3.0|879459909|Brothers McMullen...|Comedy|\n",
+      "|     26|   361|   3.0|879440941|Brothers McMullen...|Comedy|\n",
+      "|     26|   194|   3.0|879522240|Brothers McMullen...|Comedy|\n",
+      "|     26|   391|   5.0|877399745|Brothers McMullen...|Comedy|\n",
+      "|     26|   345|   3.0|884993555|Brothers McMullen...|Comedy|\n",
+      "|     26|   303|   4.0|879468307|Brothers McMullen...|Comedy|\n",
+      "|     26|   401|   3.0|891033395|Brothers McMullen...|Comedy|\n",
+      "|     26|   429|   3.0|882386333|Brothers McMullen...|Comedy|\n",
+      "|     26|   293|   3.0|888907015|Brothers McMullen...|Comedy|\n",
+      "|     26|   270|   5.0|876954995|Brothers McMullen...|Comedy|\n",
+      "|     26|   442|   3.0|883388576|Brothers McMullen...|Comedy|\n",
+      "|     26|   342|   2.0|875320037|Brothers McMullen...|Comedy|\n",
+      "+-------+------+------+---------+--------------------+------+\n",
       "only showing top 20 rows\n",
       "\n"
      ]

From fd33efe2d6fc1f736809665c8d5657476374de32 Mon Sep 17 00:00:00 2001
From: Jianjie Liu <jianjl@microsoft.com>
Date: Tue, 5 Oct 2021 20:20:54 +0000
Subject: [PATCH 23/27] Merge mock schema into movielens.py

---
 recommenders/datasets/mock/__init__.py        |   0
 recommenders/datasets/mock/movielens.py       | 140 ----------------
 recommenders/datasets/movielens.py            | 151 ++++++++++++++++--
 .../recommenders/datasets/mock/__init__.py    |   0
 .../datasets/mock/test_movielens.py           |  86 ----------
 .../recommenders/datasets/test_movielens.py   |  83 +++++++++-
 6 files changed, 214 insertions(+), 246 deletions(-)
 delete mode 100644 recommenders/datasets/mock/__init__.py
 delete mode 100644 recommenders/datasets/mock/movielens.py
 delete mode 100644 tests/unit/recommenders/datasets/mock/__init__.py
 delete mode 100644 tests/unit/recommenders/datasets/mock/test_movielens.py

diff --git a/recommenders/datasets/mock/__init__.py b/recommenders/datasets/mock/__init__.py
deleted file mode 100644
index e69de29bb2..0000000000
diff --git a/recommenders/datasets/mock/movielens.py b/recommenders/datasets/mock/movielens.py
deleted file mode 100644
index d7e1264607..0000000000
--- a/recommenders/datasets/mock/movielens.py
+++ /dev/null
@@ -1,140 +0,0 @@
-# Copyright (c) Microsoft Corporation. All rights reserved.
-# Licensed under the MIT License.
-
-"""
-Mock dataset schema to generate fake data for testing use. This will mimic the Movielens Dataset
-"""
-try:
-    import pandera as pa
-except ImportError:
-    raise ImportError("pandera is not installed. Try `pip install recommenders['dev']`")
-
-try:
-    from pyspark.sql.types import StructField, StructType, IntegerType, StringType, FloatType
-except ImportError:
-    pass  # so the environment without spark doesn't break
-
-from recommenders.utils.constants import (
-    DEFAULT_USER_COL,
-    DEFAULT_ITEM_COL,
-    DEFAULT_RATING_COL,
-    DEFAULT_TIMESTAMP_COL,
-    DEFAULT_TITLE_COL,
-    DEFAULT_GENRE_COL,
-    DEFAULT_HEADER
-)
-from recommenders.datasets.download_utils import download_path
-
-import os
-import random
-from typing import Optional
-
-import pandas
-from pandera.typing import Series
-from pandera import Field
-
-
-class MockMovielensSchema(pa.SchemaModel):
-    """
-    Mock dataset schema to generate fake data for testing purpose.
-    This schema is configured to mimic the Movielens dataset
-
-    http://files.grouplens.org/datasets/movielens/ml-100k/
-
-    Dataset schema and generation is configured using pandera.
-    Please see https://pandera.readthedocs.io/en/latest/schema_models.html
-    for more information.
-    """
-    # Some notebooks will do a cross join with userID and itemID,
-    # a sparse range for these IDs can slow down the notebook tests
-    userID: Series[int] = Field(in_range={"min_value": 1, "max_value": 10})
-    itemID: Series[int] = Field(in_range={"min_value": 1, "max_value": 10})
-    rating: Series[float] = Field(in_range={"min_value": 1, "max_value": 5})
-    timestamp: Series[str] = Field(eq="2022-2-22")
-    title: Series[str] = Field(eq="foo")
-    genre: Series[str] = Field(eq="genreA|0")
-
-    @classmethod
-    def get_df(
-        cls,
-        size: int = 3, seed: int = 100,
-        keep_first_n_cols: Optional[int] = None,
-        keep_title_col: bool = False, keep_genre_col: bool = False,
-    ) -> pandas.DataFrame:
-        """Return fake movielens dataset as a Pandas Dataframe with specified rows.
-
-        Args:
-            size (int): number of rows to generate
-            seed (int, optional): seeding the pseudo-number generation. Defaults to 100.
-            keep_first_n_cols (int, optional): keep the first n default movielens columns.
-            keep_title_col (bool): remove the title column if False. Defaults to True.
-            keep_genre_col (bool): remove the genre column if False. Defaults to True.
-
-        Returns:
-            pandas.DataFrame: a mock dataset
-        """
-        schema = cls.to_schema()
-        if keep_first_n_cols is not None:
-            if keep_first_n_cols < 1 or keep_first_n_cols > len(DEFAULT_HEADER):
-                raise ValueError(f"Invalid value for 'keep_first_n_cols': {keep_first_n_cols}. Valid range: [1-{len(DEFAULT_HEADER)}]")
-            schema = schema.remove_columns(DEFAULT_HEADER[keep_first_n_cols:])
-        if not keep_title_col:
-            schema = schema.remove_columns([DEFAULT_TITLE_COL])
-        if not keep_genre_col:
-            schema = schema.remove_columns([DEFAULT_GENRE_COL])
-
-        random.seed(seed)
-        # For more information on data synthesis, see https://pandera.readthedocs.io/en/latest/data_synthesis_strategies.html
-        return schema.example(size=size)
-
-    @classmethod
-    def get_spark_df(
-        cls,
-        spark,
-        size: int = 3, seed: int = 100,
-        keep_title_col: bool = False, keep_genre_col: bool = False,
-        tmp_path: Optional[str] = None,
-    ):
-        """Return fake movielens dataset as a Spark Dataframe with specified rows
-
-        Args:
-            spark (SparkSession): spark session to load the dataframe into
-            size (int): number of rows to generate
-            seed (int): seeding the pseudo-number generation. Defaults to 100.
-            keep_title_col (bool): remove the title column if False. Defaults to False.
-            keep_genre_col (bool): remove the genre column if False. Defaults to False.
-            tmp_path (str, optional): path to store files for serialization purpose
-                when transferring data from python to java.
-                If None, a temporal path is used instead
-
-        Returns:
-            pyspark.sql.DataFrame: a mock dataset
-        """
-        pandas_df = cls.get_df(size=size, seed=seed, keep_title_col=True, keep_genre_col=True)
-
-        # generate temp folder
-        with download_path(tmp_path) as tmp_folder:
-            filepath = os.path.join(tmp_folder, f"mock_movielens_{size}.csv")
-            # serialize the pandas.df as a csv to avoid the expensive java <-> python communication
-            pandas_df.to_csv(filepath, header=False, index=False)
-            spark_df = spark.read.csv(filepath, schema=cls._get_spark_deserialization_schema())
-            # Cache and force trigger action since data-file might be removed.
-            spark_df.cache()
-            spark_df.count()
-
-        if not keep_title_col:
-            spark_df = spark_df.drop(DEFAULT_TITLE_COL)
-        if not keep_genre_col:
-            spark_df = spark_df.drop(DEFAULT_GENRE_COL)
-        return spark_df
-
-    @classmethod
-    def _get_spark_deserialization_schema(cls):
-        return StructType([
-            StructField(DEFAULT_USER_COL, IntegerType()),
-            StructField(DEFAULT_ITEM_COL, IntegerType()),
-            StructField(DEFAULT_RATING_COL, FloatType()),
-            StructField(DEFAULT_TIMESTAMP_COL, StringType()),
-            StructField(DEFAULT_TITLE_COL, StringType()),
-            StructField(DEFAULT_GENRE_COL, StringType()),
-        ])
diff --git a/recommenders/datasets/movielens.py b/recommenders/datasets/movielens.py
index 00a80f9d1a..88b67322d4 100644
--- a/recommenders/datasets/movielens.py
+++ b/recommenders/datasets/movielens.py
@@ -3,21 +3,29 @@
 
 import os
 import re
+import random
 import shutil
 import warnings
 import pandas as pd
+from typing import Optional
 from zipfile import ZipFile
 from recommenders.datasets.download_utils import maybe_download, download_path
 from recommenders.utils.notebook_utils import is_databricks
 from recommenders.utils.constants import (
     DEFAULT_HEADER,
     DEFAULT_ITEM_COL,
+    DEFAULT_USER_COL,
+    DEFAULT_RATING_COL,
+    DEFAULT_TIMESTAMP_COL,
+    DEFAULT_TITLE_COL,
+    DEFAULT_GENRE_COL,
 )
 
 try:
     from pyspark.sql.types import (
         StructType,
         StructField,
+        StringType,
         IntegerType,
         FloatType,
         LongType
@@ -26,6 +34,13 @@
 except ImportError:
     pass  # so the environment without spark doesn't break
 
+try:
+    import pandera as pa
+    from pandera.typing import Series
+    from pandera import Field
+except ImportError:
+    pass  # so the environment without recommender['dev'] doesn't break
+
 
 class _DataFormat:
     def __init__(
@@ -150,16 +165,16 @@ def load_pandas_df(
 
     Args:
         size (str): Size of the data to load. One of ("100k", "1m", "10m", "20m", "mock100").
-        header* (list or tuple or None): Rating dataset header.
-            If size is set to any of 'MOCK_DATA_FORMAT', this parameter is ignored and data is rendered using the 'DEFAULT_HEADER' instead.
-        local_cache_path* (str): Path (directory or a zip file) to cache the downloaded zip file.
+        header (list or tuple or None): Rating dataset header.
+            If `size` is set to any of 'MOCK_DATA_FORMAT', this parameter is ignored and data is rendered using the 'DEFAULT_HEADER' instead.
+        local_cache_path (str): Path (directory or a zip file) to cache the downloaded zip file.
             If None, all the intermediate files will be stored in a temporary directory and removed after use.
+            If `size` is set to any of 'MOCK_DATA_FORMAT', this parameter is ignored.
         title_col (str): Movie title column name. If None, the column will not be loaded.
         genres_col (str): Genres column name. Genres are '|' separated string.
             If None, the column will not be loaded.
-        year_col* (str): Movie release year column name. If None, the column will not be loaded.
-
-            All (*) arguments are not applicable when mock dataset is specified (size = "mock*")
+        year_col (str): Movie release year column name. If None, the column will not be loaded.
+            If `size` is set to any of 'MOCK_DATA_FORMAT', this parameter is ignored.
 
     Returns:
         pandas.DataFrame: Movie rating dataset.
@@ -195,8 +210,6 @@ def load_pandas_df(
         header = header[:4]
 
     if size in MOCK_DATA_FORMAT:
-        # function-wide import to isolate extra dependencies from the mock schema will use
-        from recommenders.datasets.mock.movielens import MockMovielensSchema
         # generate fake data
         return MockMovielensSchema.get_df(
             keep_first_n_cols=len(header),
@@ -359,19 +372,19 @@ def load_spark_df(
     Args:
         spark (pyspark.SparkSession): Spark session.
         size (str): Size of the data to load. One of ("100k", "1m", "10m", "20m", "mock100").
-        header* (list or tuple): Rating dataset header.
-            If schema is provided, this argument is ignored.
-        schema* (pyspark.StructType): Dataset schema. 
-            If size is set to any of 'MOCK_DATA_FORMAT', data is rendered in the 'MockMovielensSchema' instead.
+        header (list or tuple): Rating dataset header.
+            If `schema` is provided or `size` is set to any of 'MOCK_DATA_FORMAT', this argument is ignored.
+        schema (pyspark.StructType): Dataset schema.
+            If `size` is set to any of 'MOCK_DATA_FORMAT', data is rendered in the 'MockMovielensSchema' instead.
         local_cache_path (str): Path (directory or a zip file) to cache the downloaded zip file.
             If None, all the intermediate files will be stored in a temporary directory and removed after use.
         dbutils (Databricks.dbutils): Databricks utility object
+            If `size` is set to any of 'MOCK_DATA_FORMAT', this parameter is ignored.
         title_col (str): Title column name. If None, the column will not be loaded.
         genres_col (str): Genres column name. Genres are '|' separated string.
             If None, the column will not be loaded.
-        year_col* (str): Movie release year column name. If None, the column will not be loaded.
-
-            All (*) arguments are not applicable if size is set to any of 'MOCK_DATA_FORMAT'
+        year_col (str): Movie release year column name. If None, the column will not be loaded.
+            If `size` is set to any of 'MOCK_DATA_FORMAT', this parameter is ignored.
 
     Returns:
         pyspark.sql.DataFrame: Movie rating dataset.
@@ -410,8 +423,6 @@ def load_spark_df(
         raise ValueError(ERROR_MOVIE_LENS_SIZE)
 
     if size in MOCK_DATA_FORMAT:
-        # function-wide import to isolate extra dependencies from the mock schema will use
-        from recommenders.datasets.mock.movielens import MockMovielensSchema
         # generate fake data
         return MockMovielensSchema.get_spark_df(
             spark,
@@ -560,3 +571,109 @@ def extract_movielens(size, rating_path, item_path, zip_path):
             shutil.copyfileobj(zf, f)
         with z.open(DATA_FORMAT[size].item_path) as zf, open(item_path, "wb") as f:
             shutil.copyfileobj(zf, f)
+
+
+class MockMovielensSchema(pa.SchemaModel):
+    """
+    Mock dataset schema to generate fake data for testing purpose.
+    This schema is configured to mimic the Movielens dataset
+
+    http://files.grouplens.org/datasets/movielens/ml-100k/
+
+    Dataset schema and generation is configured using pandera.
+    Please see https://pandera.readthedocs.io/en/latest/schema_models.html
+    for more information.
+    """
+    # Some notebooks will do a cross join with userID and itemID,
+    # a sparse range for these IDs can slow down the notebook tests
+    userID: Series[int] = Field(in_range={"min_value": 1, "max_value": 10})
+    itemID: Series[int] = Field(in_range={"min_value": 1, "max_value": 10})
+    rating: Series[float] = Field(in_range={"min_value": 1, "max_value": 5})
+    timestamp: Series[int]
+    title: Series[str] = Field(eq="foo")
+    genre: Series[str] = Field(eq="genreA|0")
+
+    @classmethod
+    def get_df(
+        cls,
+        size: int = 3, seed: int = 100,
+        keep_first_n_cols: Optional[int] = None,
+        keep_title_col: bool = False, keep_genre_col: bool = False,
+    ) -> pd.DataFrame:
+        """Return fake movielens dataset as a Pandas Dataframe with specified rows.
+
+        Args:
+            size (int): number of rows to generate
+            seed (int, optional): seeding the pseudo-number generation. Defaults to 100.
+            keep_first_n_cols (int, optional): keep the first n default movielens columns.
+            keep_title_col (bool): remove the title column if False. Defaults to True.
+            keep_genre_col (bool): remove the genre column if False. Defaults to True.
+
+        Returns:
+            pandas.DataFrame: a mock dataset
+        """
+        schema = cls.to_schema()
+        if keep_first_n_cols is not None:
+            if keep_first_n_cols < 1 or keep_first_n_cols > len(DEFAULT_HEADER):
+                raise ValueError(f"Invalid value for 'keep_first_n_cols': {keep_first_n_cols}. Valid range: [1-{len(DEFAULT_HEADER)}]")
+            schema = schema.remove_columns(DEFAULT_HEADER[keep_first_n_cols:])
+        if not keep_title_col:
+            schema = schema.remove_columns([DEFAULT_TITLE_COL])
+        if not keep_genre_col:
+            schema = schema.remove_columns([DEFAULT_GENRE_COL])
+
+        random.seed(seed)
+        # For more information on data synthesis, see https://pandera.readthedocs.io/en/latest/data_synthesis_strategies.html
+        return schema.example(size=size)
+
+    @classmethod
+    def get_spark_df(
+        cls,
+        spark,
+        size: int = 3, seed: int = 100,
+        keep_title_col: bool = False, keep_genre_col: bool = False,
+        tmp_path: Optional[str] = None,
+    ):
+        """Return fake movielens dataset as a Spark Dataframe with specified rows
+
+        Args:
+            spark (SparkSession): spark session to load the dataframe into
+            size (int): number of rows to generate
+            seed (int): seeding the pseudo-number generation. Defaults to 100.
+            keep_title_col (bool): remove the title column if False. Defaults to False.
+            keep_genre_col (bool): remove the genre column if False. Defaults to False.
+            tmp_path (str, optional): path to store files for serialization purpose
+                when transferring data from python to java.
+                If None, a temporal path is used instead
+
+        Returns:
+            pyspark.sql.DataFrame: a mock dataset
+        """
+        pandas_df = cls.get_df(size=size, seed=seed, keep_title_col=True, keep_genre_col=True)
+
+        # generate temp folder
+        with download_path(tmp_path) as tmp_folder:
+            filepath = os.path.join(tmp_folder, f"mock_movielens_{size}.csv")
+            # serialize the pandas.df as a csv to avoid the expensive java <-> python communication
+            pandas_df.to_csv(filepath, header=False, index=False)
+            spark_df = spark.read.csv(filepath, schema=cls._get_spark_deserialization_schema())
+            # Cache and force trigger action since data-file might be removed.
+            spark_df.cache()
+            spark_df.count()
+
+        if not keep_title_col:
+            spark_df = spark_df.drop(DEFAULT_TITLE_COL)
+        if not keep_genre_col:
+            spark_df = spark_df.drop(DEFAULT_GENRE_COL)
+        return spark_df
+
+    @classmethod
+    def _get_spark_deserialization_schema(cls):
+        return StructType([
+            StructField(DEFAULT_USER_COL, IntegerType()),
+            StructField(DEFAULT_ITEM_COL, IntegerType()),
+            StructField(DEFAULT_RATING_COL, FloatType()),
+            StructField(DEFAULT_TIMESTAMP_COL, StringType()),
+            StructField(DEFAULT_TITLE_COL, StringType()),
+            StructField(DEFAULT_GENRE_COL, StringType()),
+        ])
diff --git a/tests/unit/recommenders/datasets/mock/__init__.py b/tests/unit/recommenders/datasets/mock/__init__.py
deleted file mode 100644
index e69de29bb2..0000000000
diff --git a/tests/unit/recommenders/datasets/mock/test_movielens.py b/tests/unit/recommenders/datasets/mock/test_movielens.py
deleted file mode 100644
index bff8e05f62..0000000000
--- a/tests/unit/recommenders/datasets/mock/test_movielens.py
+++ /dev/null
@@ -1,86 +0,0 @@
-import os
-
-from recommenders.datasets.mock.movielens import MockMovielensSchema
-from recommenders.datasets.movielens import DEFAULT_HEADER
-from recommenders.utils.constants import (
-    DEFAULT_GENRE_COL,
-    DEFAULT_TITLE_COL,
-)
-
-import pytest
-import pandas
-from pytest_mock import MockerFixture
-
-
-@pytest.mark.parametrize("size", [10, 100])
-def test_mock_movielens_schema__has_default_col_names(size):
-    df = MockMovielensSchema.example(size=size)
-    for col_name in DEFAULT_HEADER:
-        assert col_name in df.columns
-
-
-@pytest.mark.parametrize("keep_first_n_cols", [1, 2, 3, 4])
-def test_mock_movielens_schema__get_df_remove_default_col__return_success(keep_first_n_cols):
-    df = MockMovielensSchema.get_df(size=3, keep_first_n_cols=keep_first_n_cols)
-    assert len(df) > 0
-    assert len(df.columns) == keep_first_n_cols
-
-
-@pytest.mark.parametrize("keep_first_n_cols", [-1, 0, 100])
-def test_mock_movielens_schema__get_df_invalid_param__return_failure(keep_first_n_cols):
-    with pytest.raises(ValueError, match=r"Invalid value.*"):
-        MockMovielensSchema.get_df(size=3, keep_first_n_cols=keep_first_n_cols)
-
-
-@pytest.mark.parametrize("keep_genre_col", [True, False])
-@pytest.mark.parametrize("keep_title_col", [True, False])
-@pytest.mark.parametrize("keep_first_n_cols", [None, 2])
-@pytest.mark.parametrize("seed", [-1])  # seed for pseudo-random # generation
-@pytest.mark.parametrize("size", [0, 3, 10])
-def test_mock_movielens_schema__get_df__return_success(size, seed, keep_first_n_cols, keep_title_col, keep_genre_col):
-    df = MockMovielensSchema.get_df(
-        size=size, seed=seed,
-        keep_first_n_cols=keep_first_n_cols,
-        keep_title_col=keep_title_col, keep_genre_col=keep_genre_col
-    )
-    assert type(df) == pandas.DataFrame
-    assert len(df) == size
-
-    if keep_title_col:
-        assert len(df[DEFAULT_TITLE_COL]) == size
-    if keep_genre_col:
-        assert len(df[DEFAULT_GENRE_COL]) == size
-
-
-@pytest.mark.spark
-@pytest.mark.parametrize("keep_genre_col", [True, False])
-@pytest.mark.parametrize("keep_title_col", [True, False])
-@pytest.mark.parametrize("seed", [101])  # seed for pseudo-random # generation
-@pytest.mark.parametrize("size", [0, 3, 10])
-def test_mock_movielens_schema__get_spark_df__return_success(spark, size, seed, keep_title_col, keep_genre_col):
-    df = MockMovielensSchema.get_spark_df(spark, size=size, seed=seed, keep_title_col=keep_title_col, keep_genre_col=keep_genre_col)
-    assert df.count() == size
-
-    if keep_title_col:
-        assert df.schema[DEFAULT_TITLE_COL]
-    if keep_genre_col:
-        assert df.schema[DEFAULT_GENRE_COL]
-
-
-@pytest.mark.spark
-def test_mock_movielens_schema__get_spark_df__store_tmp_file(spark, tmp_path):
-    data_size = 3
-    MockMovielensSchema.get_spark_df(spark, size=data_size, tmp_path=tmp_path)
-    assert os.path.exists(os.path.join(tmp_path, f"mock_movielens_{data_size}.csv"))
-
-
-
-@pytest.mark.spark
-def test_mock_movielens_schema__get_spark_df__data_serialization_default_param(spark, mocker: MockerFixture):
-    data_size = 3
-    to_csv_spy = mocker.spy(pandas.DataFrame, "to_csv")
-
-    df = MockMovielensSchema.get_spark_df(spark, size=data_size)
-    # assertions
-    to_csv_spy.assert_called_once()
-    assert df.count() == data_size
diff --git a/tests/unit/recommenders/datasets/test_movielens.py b/tests/unit/recommenders/datasets/test_movielens.py
index ddba43a580..d8f12771f9 100644
--- a/tests/unit/recommenders/datasets/test_movielens.py
+++ b/tests/unit/recommenders/datasets/test_movielens.py
@@ -1,10 +1,87 @@
-from recommenders.datasets.movielens import DATA_FORMAT, MOCK_DATA_FORMAT
+import os
+import pandas
+import pytest
+
+from recommenders.datasets.movielens import MockMovielensSchema
 from recommenders.datasets.movielens import load_pandas_df, load_spark_df
+from recommenders.datasets.movielens import DATA_FORMAT, MOCK_DATA_FORMAT, DEFAULT_HEADER
 from recommenders.utils.constants import DEFAULT_GENRE_COL, DEFAULT_TITLE_COL
 
-import pandas
-import pytest
 from pandas.core.series import Series
+from pytest_mock import MockerFixture
+
+
+@pytest.mark.parametrize("size", [10, 100])
+def test_mock_movielens_schema__has_default_col_names(size):
+    df = MockMovielensSchema.example(size=size)
+    for col_name in DEFAULT_HEADER:
+        assert col_name in df.columns
+
+
+@pytest.mark.parametrize("keep_first_n_cols", [1, 2, 3, 4])
+def test_mock_movielens_schema__get_df_remove_default_col__return_success(keep_first_n_cols):
+    df = MockMovielensSchema.get_df(size=3, keep_first_n_cols=keep_first_n_cols)
+    assert len(df) > 0
+    assert len(df.columns) == keep_first_n_cols
+
+
+@pytest.mark.parametrize("keep_first_n_cols", [-1, 0, 100])
+def test_mock_movielens_schema__get_df_invalid_param__return_failure(keep_first_n_cols):
+    with pytest.raises(ValueError, match=r"Invalid value.*"):
+        MockMovielensSchema.get_df(size=3, keep_first_n_cols=keep_first_n_cols)
+
+
+@pytest.mark.parametrize("keep_genre_col", [True, False])
+@pytest.mark.parametrize("keep_title_col", [True, False])
+@pytest.mark.parametrize("keep_first_n_cols", [None, 2])
+@pytest.mark.parametrize("seed", [-1])  # seed for pseudo-random # generation
+@pytest.mark.parametrize("size", [0, 3, 10])
+def test_mock_movielens_schema__get_df__return_success(size, seed, keep_first_n_cols, keep_title_col, keep_genre_col):
+    df = MockMovielensSchema.get_df(
+        size=size, seed=seed,
+        keep_first_n_cols=keep_first_n_cols,
+        keep_title_col=keep_title_col, keep_genre_col=keep_genre_col
+    )
+    assert type(df) == pandas.DataFrame
+    assert len(df) == size
+
+    if keep_title_col:
+        assert len(df[DEFAULT_TITLE_COL]) == size
+    if keep_genre_col:
+        assert len(df[DEFAULT_GENRE_COL]) == size
+
+
+@pytest.mark.spark
+@pytest.mark.parametrize("keep_genre_col", [True, False])
+@pytest.mark.parametrize("keep_title_col", [True, False])
+@pytest.mark.parametrize("seed", [101])  # seed for pseudo-random # generation
+@pytest.mark.parametrize("size", [0, 3, 10])
+def test_mock_movielens_schema__get_spark_df__return_success(spark, size, seed, keep_title_col, keep_genre_col):
+    df = MockMovielensSchema.get_spark_df(spark, size=size, seed=seed, keep_title_col=keep_title_col, keep_genre_col=keep_genre_col)
+    assert df.count() == size
+
+    if keep_title_col:
+        assert df.schema[DEFAULT_TITLE_COL]
+    if keep_genre_col:
+        assert df.schema[DEFAULT_GENRE_COL]
+
+
+@pytest.mark.spark
+def test_mock_movielens_schema__get_spark_df__store_tmp_file(spark, tmp_path):
+    data_size = 3
+    MockMovielensSchema.get_spark_df(spark, size=data_size, tmp_path=tmp_path)
+    assert os.path.exists(os.path.join(tmp_path, f"mock_movielens_{data_size}.csv"))
+
+
+@pytest.mark.spark
+def test_mock_movielens_schema__get_spark_df__data_serialization_default_param(spark, mocker: MockerFixture):
+    data_size = 3
+    to_csv_spy = mocker.spy(pandas.DataFrame, "to_csv")
+
+    df = MockMovielensSchema.get_spark_df(spark, size=data_size)
+    # assertions
+    to_csv_spy.assert_called_once()
+    assert df.count() == data_size
 
 
 def test_mock_movielens_data__no_name_collision():

From 33c05cdeb562f982fd5263dfb833f4eb7a6c56c1 Mon Sep 17 00:00:00 2001
From: Jianjie Liu <jianjl@microsoft.com>
Date: Tue, 5 Oct 2021 21:07:41 +0000
Subject: [PATCH 24/27] Remove fake_movielens marker

---
 recommenders/datasets/movielens.py            |  4 +--
 tests/unit/examples/test_notebooks_pyspark.py | 26 +++++++++----------
 tests/unit/examples/test_notebooks_python.py  | 12 +++------
 tox.ini                                       |  1 -
 4 files changed, 19 insertions(+), 24 deletions(-)

diff --git a/recommenders/datasets/movielens.py b/recommenders/datasets/movielens.py
index 88b67322d4..d054bc64fb 100644
--- a/recommenders/datasets/movielens.py
+++ b/recommenders/datasets/movielens.py
@@ -36,8 +36,8 @@
 
 try:
     import pandera as pa
-    from pandera.typing import Series
     from pandera import Field
+    from pandera.typing import Series
 except ImportError:
     pass  # so the environment without recommender['dev'] doesn't break
 
@@ -113,7 +113,7 @@ def item_has_header(self):
 
 # Fake data for testing only
 MOCK_DATA_FORMAT = {
-    "mock100": {"size": 100, "seed": 0},
+    "mock100": {"size": 100, "seed": 6},
 }
 
 # 100K data genres index to string mapper. For 1m, 10m, and 20m, the genres labels are already in the dataset.
diff --git a/tests/unit/examples/test_notebooks_pyspark.py b/tests/unit/examples/test_notebooks_pyspark.py
index 46691f3885..15a5a8ad7c 100644
--- a/tests/unit/examples/test_notebooks_pyspark.py
+++ b/tests/unit/examples/test_notebooks_pyspark.py
@@ -30,16 +30,14 @@ def test_data_split_runs(notebooks, output_notebook, kernel_name):
 
 @pytest.mark.notebooks
 @pytest.mark.spark
-@pytest.mark.fake_movielens
 @pytest.mark.skipif(
     sys.platform == "win32", reason="Takes 2764.50s in Windows, while in Linux 124.35s"
 )
-@pytest.mark.parametrize("data_size", ["mock100"])
-def test_als_deep_dive_runs(notebooks, output_notebook, kernel_name, data_size):
+def test_als_deep_dive_runs(notebooks, output_notebook, kernel_name):
     notebook_path = notebooks["als_deep_dive"]
     pm.execute_notebook(notebook_path, output_notebook, kernel_name=kernel_name,
                         parameters=dict(
-                            MOVIELENS_DATA_SIZE=data_size,
+                            MOVIELENS_DATA_SIZE="mock100",
                             COL_USER=DEFAULT_USER_COL,
                             COL_ITEM=DEFAULT_ITEM_COL,
                             COL_RATING=DEFAULT_RATING_COL,
@@ -58,29 +56,31 @@ def test_evaluation_runs(notebooks, output_notebook, kernel_name):
 
 @pytest.mark.notebooks
 @pytest.mark.spark
-@pytest.mark.fake_movielens
-@pytest.mark.parametrize("data_size", ["mock100"])
-def test_evaluation_diversity_runs(notebooks, output_notebook, kernel_name, data_size):
+def test_evaluation_diversity_runs(notebooks, output_notebook, kernel_name):
     notebook_path = notebooks["evaluation_diversity"]
-    pm.execute_notebook(notebook_path, output_notebook, kernel_name=kernel_name, 
-                        parameters=dict(TOP_K=10, MOVIELENS_DATA_SIZE=data_size))
+    pm.execute_notebook(notebook_path, output_notebook, kernel_name=kernel_name,
+                        parameters=dict(
+                            TOP_K=10,
+                            MOVIELENS_DATA_SIZE="mock100",
+                            COL_USER=DEFAULT_USER_COL,
+                            COL_ITEM=DEFAULT_ITEM_COL,
+                            COL_RATING=DEFAULT_RATING_COL,
+                        ))
 
 
 @pytest.mark.notebooks
 @pytest.mark.spark
-@pytest.mark.fake_movielens
 @pytest.mark.skipif(
     sys.platform == "win32", reason="Takes 2409.69s in Windows, while in Linux 138.30s"
 )
-@pytest.mark.parametrize("data_size", ["mock100"])
-def test_spark_tuning(notebooks, output_notebook, kernel_name, data_size):
+def test_spark_tuning(notebooks, output_notebook, kernel_name):
     notebook_path = notebooks["spark_tuning"]
     pm.execute_notebook(
         notebook_path,
         output_notebook,
         kernel_name=kernel_name,
         parameters=dict(
-            MOVIELENS_DATA_SIZE=data_size,
+            MOVIELENS_DATA_SIZE="mock100",
             NUMBER_CORES="*",
             NUMBER_ITERATIONS=3,
             SUBSET_RATIO=0.5,
diff --git a/tests/unit/examples/test_notebooks_python.py b/tests/unit/examples/test_notebooks_python.py
index 021d80fdc3..e9cda6810e 100644
--- a/tests/unit/examples/test_notebooks_python.py
+++ b/tests/unit/examples/test_notebooks_python.py
@@ -50,12 +50,10 @@ def test_baseline_deep_dive_runs(notebooks, output_notebook, kernel_name):
 
 
 @pytest.mark.notebooks
-@pytest.mark.fake_movielens
-@pytest.mark.parametrize("data_size", ["mock100"])
-def test_surprise_deep_dive_runs(notebooks, output_notebook, kernel_name, data_size):
+def test_surprise_deep_dive_runs(notebooks, output_notebook, kernel_name):
     notebook_path = notebooks["surprise_svd_deep_dive"]
     pm.execute_notebook(notebook_path, output_notebook, kernel_name=kernel_name,
-                        parameters=dict(MOVIELENS_DATA_SIZE=data_size))
+                        parameters=dict(MOVIELENS_DATA_SIZE="mock100"))
 
 
 @pytest.mark.notebooks
@@ -101,12 +99,10 @@ def test_wikidata_runs(notebooks, output_notebook, kernel_name, tmp):
 
 
 @pytest.mark.notebooks
-@pytest.mark.fake_movielens
-@pytest.mark.parametrize("data_size", ["mock100"])
-def test_rlrmc_quickstart_runs(notebooks, output_notebook, kernel_name, data_size):
+def test_rlrmc_quickstart_runs(notebooks, output_notebook, kernel_name):
     notebook_path = notebooks["rlrmc_quickstart"]
     pm.execute_notebook(notebook_path, output_notebook, kernel_name=kernel_name,
-                        parameters=dict(rank_parameter=2, MOVIELENS_DATA_SIZE=data_size))
+                        parameters=dict(rank_parameter=2, MOVIELENS_DATA_SIZE="mock100"))
 
 
 @pytest.mark.notebooks
diff --git a/tox.ini b/tox.ini
index 7ede574a2e..815e06dc14 100644
--- a/tox.ini
+++ b/tox.ini
@@ -66,7 +66,6 @@ markers =
     gpu: mark a test as gpu test
     spark: mark a test as spark test
     vw: mark a test as vowpal wabbit test
-    fake_movielens: mark a test that uses the fake dataset instead
 testpaths =
     tests
 addopts =

From 84d83e2e7d2dfe19440df4e558e5c469ecc96f56 Mon Sep 17 00:00:00 2001
From: Jianjie Liu <jianjl@microsoft.com>
Date: Fri, 8 Oct 2021 14:28:53 +0000
Subject: [PATCH 25/27] Add pandera as a core dependency

---
 recommenders/datasets/movielens.py | 11 ++++-------
 setup.py                           |  2 +-
 2 files changed, 5 insertions(+), 8 deletions(-)

diff --git a/recommenders/datasets/movielens.py b/recommenders/datasets/movielens.py
index d054bc64fb..cf19874f71 100644
--- a/recommenders/datasets/movielens.py
+++ b/recommenders/datasets/movielens.py
@@ -7,6 +7,9 @@
 import shutil
 import warnings
 import pandas as pd
+import pandera as pa
+from pandera import Field
+from pandera.typing import Series
 from typing import Optional
 from zipfile import ZipFile
 from recommenders.datasets.download_utils import maybe_download, download_path
@@ -21,6 +24,7 @@
     DEFAULT_GENRE_COL,
 )
 
+
 try:
     from pyspark.sql.types import (
         StructType,
@@ -34,13 +38,6 @@
 except ImportError:
     pass  # so the environment without spark doesn't break
 
-try:
-    import pandera as pa
-    from pandera import Field
-    from pandera.typing import Series
-except ImportError:
-    pass  # so the environment without recommender['dev'] doesn't break
-
 
 class _DataFormat:
     def __init__(
diff --git a/setup.py b/setup.py
index c0d7d67377..51a330d65d 100644
--- a/setup.py
+++ b/setup.py
@@ -45,6 +45,7 @@
     "cornac>=1.1.2,<2",
     "scikit-surprise>=0.19.1,<=1.1.1",
     "retrying>=1.3.3",
+    "pandera[strategies]>=0.6.5",  # For generating fake datasets
 ]
 
 # shared dependencies
@@ -75,7 +76,6 @@
     ],
     "dev": [
         "black>=18.6b4,<21",
-        "pandera[strategies]>=0.6.5",  # For generating fake datasets
         "pytest>=3.6.4",
         "pytest-cov>=2.12.1",
         "pytest-mock>=3.6.1",  # for access to mock fixtures in pytest

From 06ff90124762be8737bc54416e057517cb090d7e Mon Sep 17 00:00:00 2001
From: Jianjie Liu <jianjl@microsoft.com>
Date: Fri, 8 Oct 2021 14:53:43 +0000
Subject: [PATCH 26/27] Run als quickstart NB on mock100

---
 examples/00_quick_start/als_movielens.ipynb   | 296 +++++++++---------
 tests/unit/examples/test_notebooks_pyspark.py |   8 +-
 2 files changed, 158 insertions(+), 146 deletions(-)

diff --git a/examples/00_quick_start/als_movielens.ipynb b/examples/00_quick_start/als_movielens.ipynb
index 059d7f0ead..1f1b1a0bf0 100644
--- a/examples/00_quick_start/als_movielens.ipynb
+++ b/examples/00_quick_start/als_movielens.ipynb
@@ -2,46 +2,34 @@
  "cells": [
   {
    "cell_type": "markdown",
-   "metadata": {},
    "source": [
     "<i>Copyright (c) Microsoft Corporation. All rights reserved.</i>\n",
     "\n",
     "<i>Licensed under the MIT License.</i>"
-   ]
+   ],
+   "metadata": {}
   },
   {
    "cell_type": "markdown",
-   "metadata": {},
    "source": [
     "# Running ALS on MovieLens (PySpark)\n",
     "\n",
     "Matrix factorization by [ALS](https://spark.apache.org/docs/latest/api/python/_modules/pyspark/ml/recommendation.html#ALS) (Alternating Least Squares) is a well known collaborative filtering algorithm.\n",
     "\n",
     "This notebook provides an example of how to utilize and evaluate ALS PySpark ML (DataFrame-based API) implementation, meant for large-scale distributed datasets. We use a smaller dataset in this example to run ALS efficiently on multiple cores of a [Data Science Virtual Machine](https://azure.microsoft.com/en-gb/services/virtual-machines/data-science-virtual-machines/)."
-   ]
+   ],
+   "metadata": {}
   },
   {
    "cell_type": "markdown",
-   "metadata": {},
    "source": [
     "**Note**: This notebook requires a PySpark environment to run properly. Please follow the steps in [SETUP.md](../../SETUP.md) to install the PySpark environment."
-   ]
+   ],
+   "metadata": {}
   },
   {
    "cell_type": "code",
    "execution_count": 1,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "System version: 3.6.8 |Anaconda, Inc.| (default, Dec 30 2018, 01:22:34) \n",
-      "[GCC 7.3.0]\n",
-      "Spark version: 2.3.1\n"
-     ]
-    }
-   ],
    "source": [
     "# set the environment path to find Recommenders\n",
     "import sys\n",
@@ -61,74 +49,105 @@
     "\n",
     "print(\"System version: {}\".format(sys.version))\n",
     "print(\"Spark version: {}\".format(pyspark.__version__))\n"
-   ]
+   ],
+   "outputs": [
+    {
+     "output_type": "stream",
+     "name": "stdout",
+     "text": [
+      "System version: 3.6.8 |Anaconda, Inc.| (default, Dec 30 2018, 01:22:34) \n",
+      "[GCC 7.3.0]\n",
+      "Spark version: 2.3.1\n"
+     ]
+    }
+   ],
+   "metadata": {}
   },
   {
    "cell_type": "markdown",
-   "metadata": {},
    "source": [
     "Set the default parameters."
-   ]
+   ],
+   "metadata": {}
   },
   {
    "cell_type": "code",
    "execution_count": 2,
-   "metadata": {
-    "tags": [
-     "parameters"
-    ]
-   },
-   "outputs": [],
    "source": [
     "# top k items to recommend\n",
     "TOP_K = 10\n",
     "\n",
     "# Select MovieLens data size: 100k, 1m, 10m, or 20m\n",
-    "MOVIELENS_DATA_SIZE = '100k'"
-   ]
+    "MOVIELENS_DATA_SIZE = '100k'\n",
+    "\n",
+    "# Column names for the dataset\n",
+    "COL_USER = \"UserId\"\n",
+    "COL_ITEM = \"MovieId\"\n",
+    "COL_RATING = \"Rating\"\n",
+    "COL_TIMESTAMP = \"Timestamp\""
+   ],
+   "outputs": [],
+   "metadata": {
+    "tags": [
+     "parameters"
+    ]
+   }
   },
   {
    "cell_type": "markdown",
-   "metadata": {},
    "source": [
     "### 0. Set up Spark context\n",
     "\n",
     "The following settings work well for debugging locally on VM - change when running on a cluster. We set up a giant single executor with many threads and specify memory cap. "
-   ]
+   ],
+   "metadata": {}
   },
   {
    "cell_type": "code",
    "execution_count": 4,
-   "metadata": {},
-   "outputs": [],
    "source": [
     "# the following settings work well for debugging locally on VM - change when running on a cluster\n",
     "# set up a giant single executor with many threads and specify memory cap\n",
     "spark = start_or_get_spark(\"ALS PySpark\", memory=\"16g\")"
-   ]
+   ],
+   "outputs": [],
+   "metadata": {}
   },
   {
    "cell_type": "markdown",
-   "metadata": {},
    "source": [
     "### 1. Download the MovieLens dataset"
-   ]
+   ],
+   "metadata": {}
   },
   {
    "cell_type": "code",
    "execution_count": 5,
-   "metadata": {},
+   "source": [
+    "# Note: The DataFrame-based API for ALS currently only supports integers for user and item ids.\n",
+    "schema = StructType(\n",
+    "    (\n",
+    "        StructField(COL_USER, IntegerType()),\n",
+    "        StructField(COL_ITEM, IntegerType()),\n",
+    "        StructField(COL_RATING, FloatType()),\n",
+    "        StructField(COL_TIMESTAMP, LongType()),\n",
+    "    )\n",
+    ")\n",
+    "\n",
+    "data = movielens.load_spark_df(spark, size=MOVIELENS_DATA_SIZE, schema=schema)\n",
+    "data.show()"
+   ],
    "outputs": [
     {
-     "name": "stderr",
      "output_type": "stream",
+     "name": "stderr",
      "text": [
       "100%|██████████| 4.81k/4.81k [00:00<00:00, 19.9kKB/s]\n"
      ]
     },
     {
-     "name": "stdout",
      "output_type": "stream",
+     "name": "stdout",
      "text": [
       "+------+-------+------+---------+\n",
       "|UserId|MovieId|Rating|Timestamp|\n",
@@ -159,68 +178,53 @@
      ]
     }
    ],
-   "source": [
-    "# Note: The DataFrame-based API for ALS currently only supports integers for user and item ids.\n",
-    "schema = StructType(\n",
-    "    (\n",
-    "        StructField(\"UserId\", IntegerType()),\n",
-    "        StructField(\"MovieId\", IntegerType()),\n",
-    "        StructField(\"Rating\", FloatType()),\n",
-    "        StructField(\"Timestamp\", LongType()),\n",
-    "    )\n",
-    ")\n",
-    "\n",
-    "data = movielens.load_spark_df(spark, size=MOVIELENS_DATA_SIZE, schema=schema)\n",
-    "data.show()"
-   ]
+   "metadata": {}
   },
   {
    "cell_type": "markdown",
-   "metadata": {},
    "source": [
     "### 2. Split the data using the Spark random splitter provided in utilities"
-   ]
+   ],
+   "metadata": {}
   },
   {
    "cell_type": "code",
    "execution_count": 6,
-   "metadata": {},
+   "source": [
+    "train, test = spark_random_split(data, ratio=0.75, seed=123)\n",
+    "print (\"N train\", train.cache().count())\n",
+    "print (\"N test\", test.cache().count())"
+   ],
    "outputs": [
     {
-     "name": "stdout",
      "output_type": "stream",
+     "name": "stdout",
      "text": [
       "N train 75193\n",
       "N test 24807\n"
      ]
     }
    ],
-   "source": [
-    "train, test = spark_random_split(data, ratio=0.75, seed=123)\n",
-    "print (\"N train\", train.cache().count())\n",
-    "print (\"N test\", test.cache().count())"
-   ]
+   "metadata": {}
   },
   {
    "cell_type": "markdown",
-   "metadata": {},
    "source": [
     "### 3. Train the ALS model on the training data, and get the top-k recommendations for our testing data\n",
     "\n",
     "To predict movie ratings, we use the rating data in the training set as users' explicit feedback. The hyperparameters used in building the model are referenced from [here](http://mymedialite.net/examples/datasets.html). We do not constrain the latent factors (`nonnegative = False`) in order to allow for both positive and negative preferences towards movies.\n",
     "Timing will vary depending on the machine being used to train."
-   ]
+   ],
+   "metadata": {}
   },
   {
    "cell_type": "code",
    "execution_count": 7,
-   "metadata": {},
-   "outputs": [],
    "source": [
     "header = {\n",
-    "    \"userCol\": \"UserId\",\n",
-    "    \"itemCol\": \"MovieId\",\n",
-    "    \"ratingCol\": \"Rating\",\n",
+    "    \"userCol\": COL_USER,\n",
+    "    \"itemCol\": COL_ITEM,\n",
+    "    \"ratingCol\": COL_RATING,\n",
     "}\n",
     "\n",
     "\n",
@@ -234,84 +238,88 @@
     "    seed=42,\n",
     "    **header\n",
     ")"
-   ]
+   ],
+   "outputs": [],
+   "metadata": {}
   },
   {
    "cell_type": "code",
    "execution_count": 8,
-   "metadata": {},
+   "source": [
+    "with Timer() as train_time:\n",
+    "    model = als.fit(train)\n",
+    "\n",
+    "print(\"Took {} seconds for training.\".format(train_time.interval))"
+   ],
    "outputs": [
     {
-     "name": "stdout",
      "output_type": "stream",
+     "name": "stdout",
      "text": [
       "Took 3.2410509269684553 seconds for training.\n"
      ]
     }
    ],
-   "source": [
-    "with Timer() as train_time:\n",
-    "    model = als.fit(train)\n",
-    "\n",
-    "print(\"Took {} seconds for training.\".format(train_time.interval))"
-   ]
+   "metadata": {}
   },
   {
    "cell_type": "markdown",
-   "metadata": {},
    "source": [
     "In the movie recommendation use case, recommending movies that have been rated by the users do not make sense. Therefore, the rated movies are removed from the recommended items.\n",
     "\n",
     "In order to achieve this, we recommend all movies to all users, and then remove the user-movie pairs that exist in the training dataset."
-   ]
+   ],
+   "metadata": {}
   },
   {
    "cell_type": "code",
    "execution_count": 9,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Took 10.559875106438994 seconds for prediction.\n"
-     ]
-    }
-   ],
    "source": [
     "with Timer() as test_time:\n",
     "\n",
     "    # Get the cross join of all user-item pairs and score them.\n",
-    "    users = train.select('UserId').distinct()\n",
-    "    items = train.select('MovieId').distinct()\n",
+    "    users = train.select(COL_USER).distinct()\n",
+    "    items = train.select(COL_ITEM).distinct()\n",
     "    user_item = users.crossJoin(items)\n",
     "    dfs_pred = model.transform(user_item)\n",
     "\n",
     "    # Remove seen items.\n",
     "    dfs_pred_exclude_train = dfs_pred.alias(\"pred\").join(\n",
     "        train.alias(\"train\"),\n",
-    "        (dfs_pred['UserId'] == train['UserId']) & (dfs_pred['MovieId'] == train['MovieId']),\n",
+    "        (dfs_pred[COL_USER] == train[COL_USER]) & (dfs_pred[COL_ITEM] == train[COL_ITEM]),\n",
     "        how='outer'\n",
     "    )\n",
     "\n",
-    "    top_all = dfs_pred_exclude_train.filter(dfs_pred_exclude_train[\"train.Rating\"].isNull()) \\\n",
-    "        .select('pred.' + 'UserId', 'pred.' + 'MovieId', 'pred.' + \"prediction\")\n",
+    "    top_all = dfs_pred_exclude_train.filter(dfs_pred_exclude_train[f\"train.{COL_RATING}\"].isNull()) \\\n",
+    "        .select('pred.' + COL_USER, 'pred.' + COL_ITEM, 'pred.' + \"prediction\")\n",
     "\n",
     "    # In Spark, transformations are lazy evaluation\n",
     "    # Use an action to force execute and measure the test time \n",
     "    top_all.cache().count()\n",
     "\n",
     "print(\"Took {} seconds for prediction.\".format(test_time.interval))"
-   ]
+   ],
+   "outputs": [
+    {
+     "output_type": "stream",
+     "name": "stdout",
+     "text": [
+      "Took 10.559875106438994 seconds for prediction.\n"
+     ]
+    }
+   ],
+   "metadata": {}
   },
   {
    "cell_type": "code",
    "execution_count": 10,
-   "metadata": {},
+   "source": [
+    "top_all.show()"
+   ],
    "outputs": [
     {
-     "name": "stdout",
      "output_type": "stream",
+     "name": "stdout",
      "text": [
       "+------+-------+----------+\n",
       "|UserId|MovieId|prediction|\n",
@@ -342,36 +350,41 @@
      ]
     }
    ],
-   "source": [
-    "top_all.show()"
-   ]
+   "metadata": {}
   },
   {
    "cell_type": "markdown",
-   "metadata": {},
    "source": [
     "### 4. Evaluate how well ALS performs"
-   ]
+   ],
+   "metadata": {}
   },
   {
    "cell_type": "code",
    "execution_count": 11,
-   "metadata": {},
-   "outputs": [],
    "source": [
-    "rank_eval = SparkRankingEvaluation(test, top_all, k = TOP_K, col_user=\"UserId\", col_item=\"MovieId\", \n",
-    "                                    col_rating=\"Rating\", col_prediction=\"prediction\", \n",
+    "rank_eval = SparkRankingEvaluation(test, top_all, k = TOP_K, col_user=COL_USER, col_item=COL_ITEM, \n",
+    "                                    col_rating=COL_RATING, col_prediction=\"prediction\", \n",
     "                                    relevancy_method=\"top_k\")"
-   ]
+   ],
+   "outputs": [],
+   "metadata": {}
   },
   {
    "cell_type": "code",
    "execution_count": 12,
-   "metadata": {},
+   "source": [
+    "print(\"Model:\\tALS\",\n",
+    "      \"Top K:\\t%d\" % rank_eval.k,\n",
+    "      \"MAP:\\t%f\" % rank_eval.map_at_k(),\n",
+    "      \"NDCG:\\t%f\" % rank_eval.ndcg_at_k(),\n",
+    "      \"Precision@K:\\t%f\" % rank_eval.precision_at_k(),\n",
+    "      \"Recall@K:\\t%f\" % rank_eval.recall_at_k(), sep='\\n')"
+   ],
    "outputs": [
     {
-     "name": "stdout",
      "output_type": "stream",
+     "name": "stdout",
      "text": [
       "Model:\tALS\n",
       "Top K:\t10\n",
@@ -382,30 +395,27 @@
      ]
     }
    ],
-   "source": [
-    "print(\"Model:\\tALS\",\n",
-    "      \"Top K:\\t%d\" % rank_eval.k,\n",
-    "      \"MAP:\\t%f\" % rank_eval.map_at_k(),\n",
-    "      \"NDCG:\\t%f\" % rank_eval.ndcg_at_k(),\n",
-    "      \"Precision@K:\\t%f\" % rank_eval.precision_at_k(),\n",
-    "      \"Recall@K:\\t%f\" % rank_eval.recall_at_k(), sep='\\n')"
-   ]
+   "metadata": {}
   },
   {
    "cell_type": "markdown",
-   "metadata": {},
    "source": [
     "### 5. Evaluate rating prediction"
-   ]
+   ],
+   "metadata": {}
   },
   {
    "cell_type": "code",
    "execution_count": 13,
-   "metadata": {},
+   "source": [
+    "# Generate predicted ratings.\n",
+    "prediction = model.transform(test)\n",
+    "prediction.cache().show()\n"
+   ],
    "outputs": [
     {
-     "name": "stdout",
      "output_type": "stream",
+     "name": "stdout",
      "text": [
       "+------+-------+------+---------+----------+\n",
       "|UserId|MovieId|Rating|Timestamp|prediction|\n",
@@ -436,20 +446,25 @@
      ]
     }
    ],
-   "source": [
-    "# Generate predicted ratings.\n",
-    "prediction = model.transform(test)\n",
-    "prediction.cache().show()\n"
-   ]
+   "metadata": {}
   },
   {
    "cell_type": "code",
    "execution_count": 14,
-   "metadata": {},
+   "source": [
+    "rating_eval = SparkRatingEvaluation(test, prediction, col_user=COL_USER, col_item=COL_ITEM, \n",
+    "                                    col_rating=COL_RATING, col_prediction=\"prediction\")\n",
+    "\n",
+    "print(\"Model:\\tALS rating prediction\",\n",
+    "      \"RMSE:\\t%f\" % rating_eval.rmse(),\n",
+    "      \"MAE:\\t%f\" % rating_eval.mae(),\n",
+    "      \"Explained variance:\\t%f\" % rating_eval.exp_var(),\n",
+    "      \"R squared:\\t%f\" % rating_eval.rsquared(), sep='\\n')"
+   ],
    "outputs": [
     {
-     "name": "stdout",
      "output_type": "stream",
+     "name": "stdout",
      "text": [
       "Model:\tALS rating prediction\n",
       "RMSE:\t0.967296\n",
@@ -459,22 +474,11 @@
      ]
     }
    ],
-   "source": [
-    "rating_eval = SparkRatingEvaluation(test, prediction, col_user=\"UserId\", col_item=\"MovieId\", \n",
-    "                                    col_rating=\"Rating\", col_prediction=\"prediction\")\n",
-    "\n",
-    "print(\"Model:\\tALS rating prediction\",\n",
-    "      \"RMSE:\\t%f\" % rating_eval.rmse(),\n",
-    "      \"MAE:\\t%f\" % rating_eval.mae(),\n",
-    "      \"Explained variance:\\t%f\" % rating_eval.exp_var(),\n",
-    "      \"R squared:\\t%f\" % rating_eval.rsquared(), sep='\\n')"
-   ]
+   "metadata": {}
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {},
-   "outputs": [],
    "source": [
     "if is_jupyter():\n",
     "    # Record results with papermill for tests\n",
@@ -490,17 +494,19 @@
     "    sb.glue(\"rsquared\", rating_eval.rsquared())\n",
     "    sb.glue(\"train_time\", train_time.interval)\n",
     "    sb.glue(\"test_time\", test_time.interval)"
-   ]
+   ],
+   "outputs": [],
+   "metadata": {}
   },
   {
    "cell_type": "code",
    "execution_count": 17,
-   "metadata": {},
-   "outputs": [],
    "source": [
     "# cleanup spark instance\n",
     "spark.stop()"
-   ]
+   ],
+   "outputs": [],
+   "metadata": {}
   }
  ],
  "metadata": {
diff --git a/tests/unit/examples/test_notebooks_pyspark.py b/tests/unit/examples/test_notebooks_pyspark.py
index 15a5a8ad7c..6ccd970492 100644
--- a/tests/unit/examples/test_notebooks_pyspark.py
+++ b/tests/unit/examples/test_notebooks_pyspark.py
@@ -18,7 +18,13 @@
 )
 def test_als_pyspark_runs(notebooks, output_notebook, kernel_name):
     notebook_path = notebooks["als_pyspark"]
-    pm.execute_notebook(notebook_path, output_notebook, kernel_name=kernel_name)
+    pm.execute_notebook(notebook_path, output_notebook, kernel_name=kernel_name,
+                        parameters=dict(
+                            MOVIELENS_DATA_SIZE="mock100",
+                            COL_USER=DEFAULT_USER_COL,
+                            COL_ITEM=DEFAULT_ITEM_COL,
+                            COL_RATING=DEFAULT_RATING_COL,
+                        ))
 
 
 @pytest.mark.notebooks

From a097dc988bea8438f26363d2639495eedbf83218 Mon Sep 17 00:00:00 2001
From: Jianjie Liu <jianjl@microsoft.com>
Date: Fri, 8 Oct 2021 16:08:59 +0000
Subject: [PATCH 27/27] Revert "Add pandera as a core dependency"

This reverts commit 84d83e2e7d2dfe19440df4e558e5c469ecc96f56.
---
 recommenders/datasets/movielens.py | 11 +++++++----
 setup.py                           |  2 +-
 2 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/recommenders/datasets/movielens.py b/recommenders/datasets/movielens.py
index cf19874f71..d054bc64fb 100644
--- a/recommenders/datasets/movielens.py
+++ b/recommenders/datasets/movielens.py
@@ -7,9 +7,6 @@
 import shutil
 import warnings
 import pandas as pd
-import pandera as pa
-from pandera import Field
-from pandera.typing import Series
 from typing import Optional
 from zipfile import ZipFile
 from recommenders.datasets.download_utils import maybe_download, download_path
@@ -24,7 +21,6 @@
     DEFAULT_GENRE_COL,
 )
 
-
 try:
     from pyspark.sql.types import (
         StructType,
@@ -38,6 +34,13 @@
 except ImportError:
     pass  # so the environment without spark doesn't break
 
+try:
+    import pandera as pa
+    from pandera import Field
+    from pandera.typing import Series
+except ImportError:
+    pass  # so the environment without recommender['dev'] doesn't break
+
 
 class _DataFormat:
     def __init__(
diff --git a/setup.py b/setup.py
index 51a330d65d..c0d7d67377 100644
--- a/setup.py
+++ b/setup.py
@@ -45,7 +45,6 @@
     "cornac>=1.1.2,<2",
     "scikit-surprise>=0.19.1,<=1.1.1",
     "retrying>=1.3.3",
-    "pandera[strategies]>=0.6.5",  # For generating fake datasets
 ]
 
 # shared dependencies
@@ -76,6 +75,7 @@
     ],
     "dev": [
         "black>=18.6b4,<21",
+        "pandera[strategies]>=0.6.5",  # For generating fake datasets
         "pytest>=3.6.4",
         "pytest-cov>=2.12.1",
         "pytest-mock>=3.6.1",  # for access to mock fixtures in pytest