diff --git a/machine_learning/4_clustering_and_retrieval/lecture/week2/.ipynb_checkpoints/quiz-KD-trees-checkpoint.ipynb b/machine_learning/4_clustering_and_retrieval/lecture/week2/.ipynb_checkpoints/quiz-KD-trees-checkpoint.ipynb
new file mode 100644
index 0000000..a83df8c
--- /dev/null
+++ b/machine_learning/4_clustering_and_retrieval/lecture/week2/.ipynb_checkpoints/quiz-KD-trees-checkpoint.ipynb
@@ -0,0 +1,519 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# KD-trees"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Question 1\n",
+ "\n",
+ "\n",
+ "\n",
+ "*Screenshot taken from [Coursera](https://www.coursera.org/learn/ml-clustering-and-retrieval/exam/YV0W4/representations-and-metrics)*\n",
+ "\n",
+ ""
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Question 2\n",
+ "\n",
+ "\n",
+ "\n",
+ "*Screenshot taken from [Coursera](https://www.coursera.org/learn/ml-clustering-and-retrieval/exam/YV0W4/representations-and-metrics)*\n",
+ "\n",
+ ""
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "**Answer**"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 74,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "array([[-1.58, -2.01],\n",
+ " [ 0.91, 3.98],\n",
+ " [-0.73, 4. ],\n",
+ " [-4.22, 1.16],\n",
+ " [ 4.19, -2.02],\n",
+ " [-0.33, 2.15]])"
+ ]
+ },
+ "execution_count": 74,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "import numpy as np\n",
+ "\n",
+ "x1 = np.array([-1.58, 0.91, -0.73, -4.22, 4.19, -0.33])\n",
+ "x2 = np.array([-2.01, 3.98, 4.00, 1.16, -2.02, 2.15])\n",
+ "\n",
+ "x = np.vstack((x1, x2)).T\n",
+ "x"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 89,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "-0.01499999999999968"
+ ]
+ },
+ "execution_count": 89,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Mid range of x1\n",
+ "x1_midrange = (x1.max() + x1.min())/2\n",
+ "x1_midrange"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 90,
+ "metadata": {
+ "collapsed": true
+ },
+ "outputs": [],
+ "source": [
+ "def get_mid_range(data, column=0):\n",
+ " \"\"\" Get midrange of data by column\n",
+ " - x1: column=0\n",
+ " - x2: column=1\n",
+ " \"\"\"\n",
+ " midrange = (data[:, column].max() + data[:, column].min())/2\n",
+ " return midrange \n",
+ "\n",
+ "def split_by(x, value, column=0):\n",
+ " \"\"\" Split x array by value and column\n",
+ " - x1: column=0\n",
+ " - x2: column=1\n",
+ " \"\"\"\n",
+ " split1 = x[x[:, column] <= value]\n",
+ " split2 = x[x[:, column] > value]\n",
+ " return split1, split2"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 91,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "array([[-1.58, -2.01],\n",
+ " [-0.73, 4. ],\n",
+ " [-4.22, 1.16],\n",
+ " [-0.33, 2.15]])"
+ ]
+ },
+ "execution_count": 91,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "x1_midrange = get_mid_range(x)\n",
+ "x1_split1, x1_split2 = split_by(x, x1_midrange)\n",
+ "\n",
+ "# Split values of x1\n",
+ "x1_split1"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 92,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "array([[ 0.91, 3.98],\n",
+ " [ 4.19, -2.02]])"
+ ]
+ },
+ "execution_count": 92,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Split values of x1\n",
+ "x1_split2"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Question 3\n",
+ "\n",
+ "\n",
+ "\n",
+ "*Screenshot taken from [Coursera](https://www.coursera.org/learn/ml-clustering-and-retrieval/exam/YV0W4/representations-and-metrics)*\n",
+ "\n",
+ ""
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "**Answer**"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 93,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "0.995\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Mid range of x2 for the 1st split\n",
+ "# x1_split1_x2_midrange = (x1_split1[:, 1].max() + x1_split1[:, 1].min())/2\n",
+ "x1_split1_x2_midrange = get_mid_range(x1_split1, column=1)\n",
+ "\n",
+ "print x1_split1_x2_midrange"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 94,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "0.98\n"
+ ]
+ }
+ ],
+ "source": [
+ "# # Mid range of x2 for 2nd split\n",
+ "x1_split2_x2_midrange = get_mid_range(x1_split2, column=1)\n",
+ "print x1_split2_x2_midrange"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Question 4\n",
+ "\n",
+ "\n",
+ "\n",
+ "*Screenshot taken from [Coursera](https://www.coursera.org/learn/ml-clustering-and-retrieval/exam/YV0W4/representations-and-metrics)*\n",
+ "\n",
+ ""
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "**Answer**"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 95,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "array([[-1.58, -2.01]])"
+ ]
+ },
+ "execution_count": 95,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "x1_split1_x2_split1, x1_split1_x2_split2 = split_by(x1_split1, x2_x1_split1_midrange, column=1)\n",
+ "x1_split1_x2_split1"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 97,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "array([[-0.73, 4. ],\n",
+ " [-4.22, 1.16],\n",
+ " [-0.33, 2.15]])"
+ ]
+ },
+ "execution_count": 97,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# node still has 3 data points\n",
+ "# continue to split\n",
+ "\n",
+ "x1_split1_x2_split2 "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 98,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "2.5800000000000001"
+ ]
+ },
+ "execution_count": 98,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "x1_split1_x2_split2_midrange = get_mid_range(x1_split1_x2_split2, column=1)\n",
+ "x1_split1_x2_split2_midrange"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 99,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "array([[-4.22, 1.16],\n",
+ " [-0.33, 2.15]])"
+ ]
+ },
+ "execution_count": 99,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "x1_split1_x2_split2_x2_split1, x1_split1_x2_split2_x2_split2 = split_by(x1_split1_x2_split2, \n",
+ " x1_split1_x2_split2_midrange, \n",
+ " column=1)\n",
+ "x1_split1_x2_split2_x2_split1\n",
+ "# Continue to split"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 100,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "array([[-0.73, 4. ]])"
+ ]
+ },
+ "execution_count": 100,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "x1_split1_x2_split2_x2_split2"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 102,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "1.6549999999999998"
+ ]
+ },
+ "execution_count": 102,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "x1_split1_x2_split2_x2_split1_midrange = get_mid_range(x1_split1_x2_split2_x2_split1, column=1)\n",
+ "x1_split1_x2_split2_x2_split1_midrange"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "- After split x1_split1_x2_split2_x2_split1 by 1.654, then we will have 2 more leaves. Data point 4: [-4.22, 1.16] will be the leaves contain the query point (-3, 1.5)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Question 5\n",
+ "\n",
+ "\n",
+ "\n",
+ "\n",
+ "\n",
+ "*Screenshot taken from [Coursera](https://www.coursera.org/learn/ml-clustering-and-retrieval/exam/YV0W4/representations-and-metrics)*\n",
+ "\n",
+ ""
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "**Answer**"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 86,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "array([[ 4.19, -2.02]])"
+ ]
+ },
+ "execution_count": 86,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "x1_split2_x2_split1, x1_split2_x2_split2 = split_by(x1_split2, x1_split2_x2_midrange, column=1)\n",
+ "x1_split2_x2_split1"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 87,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "array([[ 0.91, 3.98]])"
+ ]
+ },
+ "execution_count": 87,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "x1_split2_x2_split2"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": true
+ },
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 2",
+ "language": "python",
+ "name": "python2"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 2
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython2",
+ "version": "2.7.11"
+ },
+ "toc": {
+ "toc_cell": false,
+ "toc_number_sections": false,
+ "toc_threshold": "8",
+ "toc_window_display": false
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 0
+}
diff --git a/machine_learning/4_clustering_and_retrieval/lecture/week2/images/Screen Shot 2016-07-02 at 11.56.04 PM.png b/machine_learning/4_clustering_and_retrieval/lecture/week2/images/Screen Shot 2016-07-02 at 11.56.04 PM.png
new file mode 100644
index 0000000..7bcc7a9
Binary files /dev/null and b/machine_learning/4_clustering_and_retrieval/lecture/week2/images/Screen Shot 2016-07-02 at 11.56.04 PM.png differ
diff --git a/machine_learning/4_clustering_and_retrieval/lecture/week2/images/Screen Shot 2016-07-02 at 11.56.09 PM.png b/machine_learning/4_clustering_and_retrieval/lecture/week2/images/Screen Shot 2016-07-02 at 11.56.09 PM.png
new file mode 100644
index 0000000..34ffa45
Binary files /dev/null and b/machine_learning/4_clustering_and_retrieval/lecture/week2/images/Screen Shot 2016-07-02 at 11.56.09 PM.png differ
diff --git a/machine_learning/4_clustering_and_retrieval/lecture/week2/images/Screen Shot 2016-07-02 at 11.56.17 PM.png b/machine_learning/4_clustering_and_retrieval/lecture/week2/images/Screen Shot 2016-07-02 at 11.56.17 PM.png
new file mode 100644
index 0000000..d03af14
Binary files /dev/null and b/machine_learning/4_clustering_and_retrieval/lecture/week2/images/Screen Shot 2016-07-02 at 11.56.17 PM.png differ
diff --git a/machine_learning/4_clustering_and_retrieval/lecture/week2/images/Screen Shot 2016-07-03 at 12.10.42 AM.png b/machine_learning/4_clustering_and_retrieval/lecture/week2/images/Screen Shot 2016-07-03 at 12.10.42 AM.png
new file mode 100644
index 0000000..86a5007
Binary files /dev/null and b/machine_learning/4_clustering_and_retrieval/lecture/week2/images/Screen Shot 2016-07-03 at 12.10.42 AM.png differ
diff --git a/machine_learning/4_clustering_and_retrieval/lecture/week2/images/Screen Shot 2016-07-03 at 12.14.16 AM.png b/machine_learning/4_clustering_and_retrieval/lecture/week2/images/Screen Shot 2016-07-03 at 12.14.16 AM.png
new file mode 100644
index 0000000..945be4e
Binary files /dev/null and b/machine_learning/4_clustering_and_retrieval/lecture/week2/images/Screen Shot 2016-07-03 at 12.14.16 AM.png differ
diff --git a/machine_learning/4_clustering_and_retrieval/lecture/week2/images/Screen Shot 2016-07-03 at 12.16.20 AM.png b/machine_learning/4_clustering_and_retrieval/lecture/week2/images/Screen Shot 2016-07-03 at 12.16.20 AM.png
new file mode 100644
index 0000000..8334acf
Binary files /dev/null and b/machine_learning/4_clustering_and_retrieval/lecture/week2/images/Screen Shot 2016-07-03 at 12.16.20 AM.png differ
diff --git a/machine_learning/4_clustering_and_retrieval/lecture/week2/images/Screen Shot 2016-07-03 at 12.16.46 AM.png b/machine_learning/4_clustering_and_retrieval/lecture/week2/images/Screen Shot 2016-07-03 at 12.16.46 AM.png
new file mode 100644
index 0000000..7868137
Binary files /dev/null and b/machine_learning/4_clustering_and_retrieval/lecture/week2/images/Screen Shot 2016-07-03 at 12.16.46 AM.png differ
diff --git a/machine_learning/4_clustering_and_retrieval/lecture/week2/quiz-KD-trees.ipynb b/machine_learning/4_clustering_and_retrieval/lecture/week2/quiz-KD-trees.ipynb
new file mode 100644
index 0000000..a83df8c
--- /dev/null
+++ b/machine_learning/4_clustering_and_retrieval/lecture/week2/quiz-KD-trees.ipynb
@@ -0,0 +1,519 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# KD-trees"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Question 1\n",
+ "\n",
+ "\n",
+ "\n",
+ "*Screenshot taken from [Coursera](https://www.coursera.org/learn/ml-clustering-and-retrieval/exam/YV0W4/representations-and-metrics)*\n",
+ "\n",
+ ""
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Question 2\n",
+ "\n",
+ "\n",
+ "\n",
+ "*Screenshot taken from [Coursera](https://www.coursera.org/learn/ml-clustering-and-retrieval/exam/YV0W4/representations-and-metrics)*\n",
+ "\n",
+ ""
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "**Answer**"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 74,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "array([[-1.58, -2.01],\n",
+ " [ 0.91, 3.98],\n",
+ " [-0.73, 4. ],\n",
+ " [-4.22, 1.16],\n",
+ " [ 4.19, -2.02],\n",
+ " [-0.33, 2.15]])"
+ ]
+ },
+ "execution_count": 74,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "import numpy as np\n",
+ "\n",
+ "x1 = np.array([-1.58, 0.91, -0.73, -4.22, 4.19, -0.33])\n",
+ "x2 = np.array([-2.01, 3.98, 4.00, 1.16, -2.02, 2.15])\n",
+ "\n",
+ "x = np.vstack((x1, x2)).T\n",
+ "x"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 89,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "-0.01499999999999968"
+ ]
+ },
+ "execution_count": 89,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Mid range of x1\n",
+ "x1_midrange = (x1.max() + x1.min())/2\n",
+ "x1_midrange"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 90,
+ "metadata": {
+ "collapsed": true
+ },
+ "outputs": [],
+ "source": [
+ "def get_mid_range(data, column=0):\n",
+ " \"\"\" Get midrange of data by column\n",
+ " - x1: column=0\n",
+ " - x2: column=1\n",
+ " \"\"\"\n",
+ " midrange = (data[:, column].max() + data[:, column].min())/2\n",
+ " return midrange \n",
+ "\n",
+ "def split_by(x, value, column=0):\n",
+ " \"\"\" Split x array by value and column\n",
+ " - x1: column=0\n",
+ " - x2: column=1\n",
+ " \"\"\"\n",
+ " split1 = x[x[:, column] <= value]\n",
+ " split2 = x[x[:, column] > value]\n",
+ " return split1, split2"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 91,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "array([[-1.58, -2.01],\n",
+ " [-0.73, 4. ],\n",
+ " [-4.22, 1.16],\n",
+ " [-0.33, 2.15]])"
+ ]
+ },
+ "execution_count": 91,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "x1_midrange = get_mid_range(x)\n",
+ "x1_split1, x1_split2 = split_by(x, x1_midrange)\n",
+ "\n",
+ "# Split values of x1\n",
+ "x1_split1"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 92,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "array([[ 0.91, 3.98],\n",
+ " [ 4.19, -2.02]])"
+ ]
+ },
+ "execution_count": 92,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Split values of x1\n",
+ "x1_split2"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Question 3\n",
+ "\n",
+ "\n",
+ "\n",
+ "*Screenshot taken from [Coursera](https://www.coursera.org/learn/ml-clustering-and-retrieval/exam/YV0W4/representations-and-metrics)*\n",
+ "\n",
+ ""
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "**Answer**"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 93,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "0.995\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Mid range of x2 for the 1st split\n",
+ "# x1_split1_x2_midrange = (x1_split1[:, 1].max() + x1_split1[:, 1].min())/2\n",
+ "x1_split1_x2_midrange = get_mid_range(x1_split1, column=1)\n",
+ "\n",
+ "print x1_split1_x2_midrange"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 94,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "0.98\n"
+ ]
+ }
+ ],
+ "source": [
+ "# # Mid range of x2 for 2nd split\n",
+ "x1_split2_x2_midrange = get_mid_range(x1_split2, column=1)\n",
+ "print x1_split2_x2_midrange"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Question 4\n",
+ "\n",
+ "\n",
+ "\n",
+ "*Screenshot taken from [Coursera](https://www.coursera.org/learn/ml-clustering-and-retrieval/exam/YV0W4/representations-and-metrics)*\n",
+ "\n",
+ ""
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "**Answer**"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 95,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "array([[-1.58, -2.01]])"
+ ]
+ },
+ "execution_count": 95,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "x1_split1_x2_split1, x1_split1_x2_split2 = split_by(x1_split1, x2_x1_split1_midrange, column=1)\n",
+ "x1_split1_x2_split1"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 97,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "array([[-0.73, 4. ],\n",
+ " [-4.22, 1.16],\n",
+ " [-0.33, 2.15]])"
+ ]
+ },
+ "execution_count": 97,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# node still has 3 data points\n",
+ "# continue to split\n",
+ "\n",
+ "x1_split1_x2_split2 "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 98,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "2.5800000000000001"
+ ]
+ },
+ "execution_count": 98,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "x1_split1_x2_split2_midrange = get_mid_range(x1_split1_x2_split2, column=1)\n",
+ "x1_split1_x2_split2_midrange"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 99,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "array([[-4.22, 1.16],\n",
+ " [-0.33, 2.15]])"
+ ]
+ },
+ "execution_count": 99,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "x1_split1_x2_split2_x2_split1, x1_split1_x2_split2_x2_split2 = split_by(x1_split1_x2_split2, \n",
+ " x1_split1_x2_split2_midrange, \n",
+ " column=1)\n",
+ "x1_split1_x2_split2_x2_split1\n",
+ "# Continue to split"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 100,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "array([[-0.73, 4. ]])"
+ ]
+ },
+ "execution_count": 100,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "x1_split1_x2_split2_x2_split2"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 102,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "1.6549999999999998"
+ ]
+ },
+ "execution_count": 102,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "x1_split1_x2_split2_x2_split1_midrange = get_mid_range(x1_split1_x2_split2_x2_split1, column=1)\n",
+ "x1_split1_x2_split2_x2_split1_midrange"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "- After split x1_split1_x2_split2_x2_split1 by 1.654, then we will have 2 more leaves. Data point 4: [-4.22, 1.16] will be the leaves contain the query point (-3, 1.5)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Question 5\n",
+ "\n",
+ "\n",
+ "\n",
+ "\n",
+ "\n",
+ "*Screenshot taken from [Coursera](https://www.coursera.org/learn/ml-clustering-and-retrieval/exam/YV0W4/representations-and-metrics)*\n",
+ "\n",
+ ""
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "**Answer**"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 86,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "array([[ 4.19, -2.02]])"
+ ]
+ },
+ "execution_count": 86,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "x1_split2_x2_split1, x1_split2_x2_split2 = split_by(x1_split2, x1_split2_x2_midrange, column=1)\n",
+ "x1_split2_x2_split1"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 87,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "array([[ 0.91, 3.98]])"
+ ]
+ },
+ "execution_count": 87,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "x1_split2_x2_split2"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": true
+ },
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 2",
+ "language": "python",
+ "name": "python2"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 2
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython2",
+ "version": "2.7.11"
+ },
+ "toc": {
+ "toc_cell": false,
+ "toc_number_sections": false,
+ "toc_threshold": "8",
+ "toc_window_display": false
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 0
+}