From 05a465c83282ba2a621fd69c80e74bdc1b6f51e9 Mon Sep 17 00:00:00 2001 From: Tuan Vu Date: Sat, 19 Mar 2016 20:41:54 -0700 Subject: [PATCH] Decision Trees --- ...signment-2-blank-Graphlab-checkpoint.ipynb | 340 ++++++++++++++++-- ...ion-tree-assignment-2-blank-Graphlab.ipynb | 340 ++++++++++++++++-- 2 files changed, 616 insertions(+), 64 deletions(-) diff --git a/machine_learning/3_classification/assigment/week3/.ipynb_checkpoints/module-5-decision-tree-assignment-2-blank-Graphlab-checkpoint.ipynb b/machine_learning/3_classification/assigment/week3/.ipynb_checkpoints/module-5-decision-tree-assignment-2-blank-Graphlab-checkpoint.ipynb index e63d34d..97a4797 100644 --- a/machine_learning/3_classification/assigment/week3/.ipynb_checkpoints/module-5-decision-tree-assignment-2-blank-Graphlab-checkpoint.ipynb +++ b/machine_learning/3_classification/assigment/week3/.ipynb_checkpoints/module-5-decision-tree-assignment-2-blank-Graphlab-checkpoint.ipynb @@ -45,7 +45,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 3, "metadata": { "collapsed": false, "scrolled": false @@ -73,7 +73,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 5, "metadata": { "collapsed": false }, @@ -91,7 +91,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 6, "metadata": { "collapsed": false }, @@ -118,7 +118,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 7, "metadata": { "collapsed": false, "scrolled": false @@ -143,7 +143,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 8, "metadata": { "collapsed": false }, @@ -263,7 +263,7 @@ "You can use print_rows(num_rows=m, num_columns=n) to print more rows and columns." ] }, - "execution_count": 10, + "execution_count": 8, "metadata": {}, "output_type": "execute_result" } @@ -288,7 +288,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 9, "metadata": { "collapsed": false }, @@ -357,7 +357,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 10, "metadata": { "collapsed": false }, @@ -385,7 +385,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 11, "metadata": { "collapsed": false }, @@ -420,7 +420,7 @@ " 'emp_length.n/a']" ] }, - "execution_count": 13, + "execution_count": 11, "metadata": {}, "output_type": "execute_result" } @@ -433,7 +433,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 12, "metadata": { "collapsed": false }, @@ -452,7 +452,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 13, "metadata": { "collapsed": false }, @@ -594,7 +594,7 @@ "[1 rows x 26 columns]" ] }, - "execution_count": 15, + "execution_count": 13, "metadata": {}, "output_type": "execute_result" } @@ -612,7 +612,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 14, "metadata": { "collapsed": false }, @@ -625,7 +625,7 @@ "[0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 1L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 0L, 0L, 0L, 1L, 0L, ... ]" ] }, - "execution_count": 16, + "execution_count": 14, "metadata": {}, "output_type": "execute_result" } @@ -645,7 +645,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 15, "metadata": { "collapsed": false }, @@ -675,7 +675,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 16, "metadata": { "collapsed": true }, @@ -716,7 +716,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 17, "metadata": { "collapsed": true }, @@ -746,7 +746,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 18, "metadata": { "collapsed": false }, @@ -822,7 +822,7 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 19, "metadata": { "collapsed": false }, @@ -879,7 +879,7 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 20, "metadata": { "collapsed": false }, @@ -920,7 +920,7 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 21, "metadata": { "collapsed": false }, @@ -963,7 +963,7 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": 22, "metadata": { "collapsed": false }, @@ -1038,7 +1038,7 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": 23, "metadata": { "collapsed": false }, @@ -1059,7 +1059,7 @@ }, { "cell_type": "code", - "execution_count": 26, + "execution_count": 24, "metadata": { "collapsed": false }, @@ -1122,6 +1122,80 @@ " print 'Number of nodes that should be there : 13' " ] }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "{'is_leaf': False,\n", + " 'left': {'is_leaf': False,\n", + " 'left': {'is_leaf': False,\n", + " 'left': {'is_leaf': True,\n", + " 'left': None,\n", + " 'prediction': -1,\n", + " 'right': None,\n", + " 'splitting_feature': None},\n", + " 'prediction': None,\n", + " 'right': {'is_leaf': True,\n", + " 'left': None,\n", + " 'prediction': -1,\n", + " 'right': None,\n", + " 'splitting_feature': None},\n", + " 'splitting_feature': 'grade.B'},\n", + " 'prediction': None,\n", + " 'right': {'is_leaf': False,\n", + " 'left': {'is_leaf': True,\n", + " 'left': None,\n", + " 'prediction': 1,\n", + " 'right': None,\n", + " 'splitting_feature': None},\n", + " 'prediction': None,\n", + " 'right': {'is_leaf': True,\n", + " 'left': None,\n", + " 'prediction': -1,\n", + " 'right': None,\n", + " 'splitting_feature': None},\n", + " 'splitting_feature': 'emp_length.n/a'},\n", + " 'splitting_feature': 'grade.A'},\n", + " 'prediction': None,\n", + " 'right': {'is_leaf': False,\n", + " 'left': {'is_leaf': False,\n", + " 'left': {'is_leaf': True,\n", + " 'left': None,\n", + " 'prediction': 1,\n", + " 'right': None,\n", + " 'splitting_feature': None},\n", + " 'prediction': None,\n", + " 'right': {'is_leaf': True,\n", + " 'left': None,\n", + " 'prediction': -1,\n", + " 'right': None,\n", + " 'splitting_feature': None},\n", + " 'splitting_feature': 'grade.E'},\n", + " 'prediction': None,\n", + " 'right': {'is_leaf': True,\n", + " 'left': None,\n", + " 'prediction': -1,\n", + " 'right': None,\n", + " 'splitting_feature': None},\n", + " 'splitting_feature': 'grade.D'},\n", + " 'splitting_feature': 'term. 36 months'}" + ] + }, + "execution_count": 31, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "small_data_decision_tree" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -1135,7 +1209,7 @@ }, { "cell_type": "code", - "execution_count": 27, + "execution_count": 35, "metadata": { "collapsed": false }, @@ -1274,6 +1348,176 @@ "my_decision_tree = decision_tree_create(train_data, features, 'safe_loans', max_depth = 6)" ] }, + { + "cell_type": "code", + "execution_count": 41, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "{'is_leaf': False,\n", + " 'left': {'is_leaf': False,\n", + " 'left': {'is_leaf': False,\n", + " 'left': {'is_leaf': False,\n", + " 'left': {'is_leaf': False,\n", + " 'left': {'is_leaf': False,\n", + " 'left': {'is_leaf': True,\n", + " 'left': None,\n", + " 'prediction': -1,\n", + " 'right': None,\n", + " 'splitting_feature': None},\n", + " 'prediction': None,\n", + " 'right': {'is_leaf': True,\n", + " 'left': None,\n", + " 'prediction': -1,\n", + " 'right': None,\n", + " 'splitting_feature': None},\n", + " 'splitting_feature': 'grade.E'},\n", + " 'prediction': None,\n", + " 'right': {'is_leaf': True,\n", + " 'left': None,\n", + " 'prediction': -1,\n", + " 'right': None,\n", + " 'splitting_feature': None},\n", + " 'splitting_feature': 'grade.D'},\n", + " 'prediction': None,\n", + " 'right': {'is_leaf': True,\n", + " 'left': None,\n", + " 'prediction': -1,\n", + " 'right': None,\n", + " 'splitting_feature': None},\n", + " 'splitting_feature': 'grade.C'},\n", + " 'prediction': None,\n", + " 'right': {'is_leaf': False,\n", + " 'left': {'is_leaf': True,\n", + " 'left': None,\n", + " 'prediction': -1,\n", + " 'right': None,\n", + " 'splitting_feature': None},\n", + " 'prediction': None,\n", + " 'right': {'is_leaf': False,\n", + " 'left': {'is_leaf': True,\n", + " 'left': None,\n", + " 'prediction': 1,\n", + " 'right': None,\n", + " 'splitting_feature': None},\n", + " 'prediction': None,\n", + " 'right': {'is_leaf': True,\n", + " 'left': None,\n", + " 'prediction': -1,\n", + " 'right': None,\n", + " 'splitting_feature': None},\n", + " 'splitting_feature': 'home_ownership.MORTGAGE'},\n", + " 'splitting_feature': 'emp_length.5 years'},\n", + " 'splitting_feature': 'grade.B'},\n", + " 'prediction': None,\n", + " 'right': {'is_leaf': False,\n", + " 'left': {'is_leaf': False,\n", + " 'left': {'is_leaf': True,\n", + " 'left': None,\n", + " 'prediction': 1,\n", + " 'right': None,\n", + " 'splitting_feature': None},\n", + " 'prediction': None,\n", + " 'right': {'is_leaf': True,\n", + " 'left': None,\n", + " 'prediction': -1,\n", + " 'right': None,\n", + " 'splitting_feature': None},\n", + " 'splitting_feature': 'emp_length.< 1 year'},\n", + " 'prediction': None,\n", + " 'right': {'is_leaf': True,\n", + " 'left': None,\n", + " 'prediction': -1,\n", + " 'right': None,\n", + " 'splitting_feature': None},\n", + " 'splitting_feature': 'emp_length.n/a'},\n", + " 'splitting_feature': 'grade.A'},\n", + " 'prediction': None,\n", + " 'right': {'is_leaf': False,\n", + " 'left': {'is_leaf': False,\n", + " 'left': {'is_leaf': False,\n", + " 'left': {'is_leaf': False,\n", + " 'left': {'is_leaf': False,\n", + " 'left': {'is_leaf': True,\n", + " 'left': None,\n", + " 'prediction': 1,\n", + " 'right': None,\n", + " 'splitting_feature': None},\n", + " 'prediction': None,\n", + " 'right': {'is_leaf': True,\n", + " 'left': None,\n", + " 'prediction': -1,\n", + " 'right': None,\n", + " 'splitting_feature': None},\n", + " 'splitting_feature': 'grade.G'},\n", + " 'prediction': None,\n", + " 'right': {'is_leaf': False,\n", + " 'left': {'is_leaf': True,\n", + " 'left': None,\n", + " 'prediction': -1,\n", + " 'right': None,\n", + " 'splitting_feature': None},\n", + " 'prediction': None,\n", + " 'right': {'is_leaf': True,\n", + " 'left': None,\n", + " 'prediction': 1,\n", + " 'right': None,\n", + " 'splitting_feature': None},\n", + " 'splitting_feature': 'grade.A'},\n", + " 'splitting_feature': 'emp_length.n/a'},\n", + " 'prediction': None,\n", + " 'right': {'is_leaf': False,\n", + " 'left': {'is_leaf': True,\n", + " 'left': None,\n", + " 'prediction': -1,\n", + " 'right': None,\n", + " 'splitting_feature': None},\n", + " 'prediction': None,\n", + " 'right': {'is_leaf': False,\n", + " 'left': {'is_leaf': True,\n", + " 'left': None,\n", + " 'prediction': 1,\n", + " 'right': None,\n", + " 'splitting_feature': None},\n", + " 'prediction': None,\n", + " 'right': {'is_leaf': True,\n", + " 'left': None,\n", + " 'prediction': -1,\n", + " 'right': None,\n", + " 'splitting_feature': None},\n", + " 'splitting_feature': 'home_ownership.OWN'},\n", + " 'splitting_feature': 'emp_length.8 years'},\n", + " 'splitting_feature': 'grade.F'},\n", + " 'prediction': None,\n", + " 'right': {'is_leaf': True,\n", + " 'left': None,\n", + " 'prediction': -1,\n", + " 'right': None,\n", + " 'splitting_feature': None},\n", + " 'splitting_feature': 'grade.E'},\n", + " 'prediction': None,\n", + " 'right': {'is_leaf': True,\n", + " 'left': None,\n", + " 'prediction': -1,\n", + " 'right': None,\n", + " 'splitting_feature': None},\n", + " 'splitting_feature': 'grade.D'},\n", + " 'splitting_feature': 'term. 36 months'}" + ] + }, + "execution_count": 41, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "my_decision_tree" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -1287,7 +1531,7 @@ }, { "cell_type": "code", - "execution_count": 28, + "execution_count": 36, "metadata": { "collapsed": false }, @@ -1319,7 +1563,7 @@ }, { "cell_type": "code", - "execution_count": 29, + "execution_count": 37, "metadata": { "collapsed": false }, @@ -1355,7 +1599,7 @@ " 'term. 60 months': 1L}" ] }, - "execution_count": 29, + "execution_count": 37, "metadata": {}, "output_type": "execute_result" } @@ -1366,7 +1610,7 @@ }, { "cell_type": "code", - "execution_count": 30, + "execution_count": 38, "metadata": { "collapsed": false }, @@ -1392,7 +1636,7 @@ }, { "cell_type": "code", - "execution_count": 31, + "execution_count": 39, "metadata": { "collapsed": false }, @@ -1415,7 +1659,7 @@ "-1" ] }, - "execution_count": 31, + "execution_count": 39, "metadata": {}, "output_type": "execute_result" } @@ -1424,6 +1668,38 @@ "classify(my_decision_tree, test_data[0], annotate=True)" ] }, + { + "cell_type": "code", + "execution_count": 40, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Split on term. 36 months = 0\n", + "Split on grade.A = 0\n", + "Split on grade.B = 0\n", + "At leaf, predicting -1\n" + ] + }, + { + "data": { + "text/plain": [ + "-1" + ] + }, + "execution_count": 40, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "classify(small_data_decision_tree, test_data[0], annotate=True)" + ] + }, { "cell_type": "markdown", "metadata": {}, diff --git a/machine_learning/3_classification/assigment/week3/module-5-decision-tree-assignment-2-blank-Graphlab.ipynb b/machine_learning/3_classification/assigment/week3/module-5-decision-tree-assignment-2-blank-Graphlab.ipynb index e63d34d..97a4797 100644 --- a/machine_learning/3_classification/assigment/week3/module-5-decision-tree-assignment-2-blank-Graphlab.ipynb +++ b/machine_learning/3_classification/assigment/week3/module-5-decision-tree-assignment-2-blank-Graphlab.ipynb @@ -45,7 +45,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 3, "metadata": { "collapsed": false, "scrolled": false @@ -73,7 +73,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 5, "metadata": { "collapsed": false }, @@ -91,7 +91,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 6, "metadata": { "collapsed": false }, @@ -118,7 +118,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 7, "metadata": { "collapsed": false, "scrolled": false @@ -143,7 +143,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 8, "metadata": { "collapsed": false }, @@ -263,7 +263,7 @@ "You can use print_rows(num_rows=m, num_columns=n) to print more rows and columns." ] }, - "execution_count": 10, + "execution_count": 8, "metadata": {}, "output_type": "execute_result" } @@ -288,7 +288,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 9, "metadata": { "collapsed": false }, @@ -357,7 +357,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 10, "metadata": { "collapsed": false }, @@ -385,7 +385,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 11, "metadata": { "collapsed": false }, @@ -420,7 +420,7 @@ " 'emp_length.n/a']" ] }, - "execution_count": 13, + "execution_count": 11, "metadata": {}, "output_type": "execute_result" } @@ -433,7 +433,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 12, "metadata": { "collapsed": false }, @@ -452,7 +452,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 13, "metadata": { "collapsed": false }, @@ -594,7 +594,7 @@ "[1 rows x 26 columns]" ] }, - "execution_count": 15, + "execution_count": 13, "metadata": {}, "output_type": "execute_result" } @@ -612,7 +612,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 14, "metadata": { "collapsed": false }, @@ -625,7 +625,7 @@ "[0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 1L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 0L, 0L, 0L, 1L, 0L, ... ]" ] }, - "execution_count": 16, + "execution_count": 14, "metadata": {}, "output_type": "execute_result" } @@ -645,7 +645,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 15, "metadata": { "collapsed": false }, @@ -675,7 +675,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 16, "metadata": { "collapsed": true }, @@ -716,7 +716,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 17, "metadata": { "collapsed": true }, @@ -746,7 +746,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 18, "metadata": { "collapsed": false }, @@ -822,7 +822,7 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 19, "metadata": { "collapsed": false }, @@ -879,7 +879,7 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 20, "metadata": { "collapsed": false }, @@ -920,7 +920,7 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 21, "metadata": { "collapsed": false }, @@ -963,7 +963,7 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": 22, "metadata": { "collapsed": false }, @@ -1038,7 +1038,7 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": 23, "metadata": { "collapsed": false }, @@ -1059,7 +1059,7 @@ }, { "cell_type": "code", - "execution_count": 26, + "execution_count": 24, "metadata": { "collapsed": false }, @@ -1122,6 +1122,80 @@ " print 'Number of nodes that should be there : 13' " ] }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "{'is_leaf': False,\n", + " 'left': {'is_leaf': False,\n", + " 'left': {'is_leaf': False,\n", + " 'left': {'is_leaf': True,\n", + " 'left': None,\n", + " 'prediction': -1,\n", + " 'right': None,\n", + " 'splitting_feature': None},\n", + " 'prediction': None,\n", + " 'right': {'is_leaf': True,\n", + " 'left': None,\n", + " 'prediction': -1,\n", + " 'right': None,\n", + " 'splitting_feature': None},\n", + " 'splitting_feature': 'grade.B'},\n", + " 'prediction': None,\n", + " 'right': {'is_leaf': False,\n", + " 'left': {'is_leaf': True,\n", + " 'left': None,\n", + " 'prediction': 1,\n", + " 'right': None,\n", + " 'splitting_feature': None},\n", + " 'prediction': None,\n", + " 'right': {'is_leaf': True,\n", + " 'left': None,\n", + " 'prediction': -1,\n", + " 'right': None,\n", + " 'splitting_feature': None},\n", + " 'splitting_feature': 'emp_length.n/a'},\n", + " 'splitting_feature': 'grade.A'},\n", + " 'prediction': None,\n", + " 'right': {'is_leaf': False,\n", + " 'left': {'is_leaf': False,\n", + " 'left': {'is_leaf': True,\n", + " 'left': None,\n", + " 'prediction': 1,\n", + " 'right': None,\n", + " 'splitting_feature': None},\n", + " 'prediction': None,\n", + " 'right': {'is_leaf': True,\n", + " 'left': None,\n", + " 'prediction': -1,\n", + " 'right': None,\n", + " 'splitting_feature': None},\n", + " 'splitting_feature': 'grade.E'},\n", + " 'prediction': None,\n", + " 'right': {'is_leaf': True,\n", + " 'left': None,\n", + " 'prediction': -1,\n", + " 'right': None,\n", + " 'splitting_feature': None},\n", + " 'splitting_feature': 'grade.D'},\n", + " 'splitting_feature': 'term. 36 months'}" + ] + }, + "execution_count": 31, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "small_data_decision_tree" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -1135,7 +1209,7 @@ }, { "cell_type": "code", - "execution_count": 27, + "execution_count": 35, "metadata": { "collapsed": false }, @@ -1274,6 +1348,176 @@ "my_decision_tree = decision_tree_create(train_data, features, 'safe_loans', max_depth = 6)" ] }, + { + "cell_type": "code", + "execution_count": 41, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "{'is_leaf': False,\n", + " 'left': {'is_leaf': False,\n", + " 'left': {'is_leaf': False,\n", + " 'left': {'is_leaf': False,\n", + " 'left': {'is_leaf': False,\n", + " 'left': {'is_leaf': False,\n", + " 'left': {'is_leaf': True,\n", + " 'left': None,\n", + " 'prediction': -1,\n", + " 'right': None,\n", + " 'splitting_feature': None},\n", + " 'prediction': None,\n", + " 'right': {'is_leaf': True,\n", + " 'left': None,\n", + " 'prediction': -1,\n", + " 'right': None,\n", + " 'splitting_feature': None},\n", + " 'splitting_feature': 'grade.E'},\n", + " 'prediction': None,\n", + " 'right': {'is_leaf': True,\n", + " 'left': None,\n", + " 'prediction': -1,\n", + " 'right': None,\n", + " 'splitting_feature': None},\n", + " 'splitting_feature': 'grade.D'},\n", + " 'prediction': None,\n", + " 'right': {'is_leaf': True,\n", + " 'left': None,\n", + " 'prediction': -1,\n", + " 'right': None,\n", + " 'splitting_feature': None},\n", + " 'splitting_feature': 'grade.C'},\n", + " 'prediction': None,\n", + " 'right': {'is_leaf': False,\n", + " 'left': {'is_leaf': True,\n", + " 'left': None,\n", + " 'prediction': -1,\n", + " 'right': None,\n", + " 'splitting_feature': None},\n", + " 'prediction': None,\n", + " 'right': {'is_leaf': False,\n", + " 'left': {'is_leaf': True,\n", + " 'left': None,\n", + " 'prediction': 1,\n", + " 'right': None,\n", + " 'splitting_feature': None},\n", + " 'prediction': None,\n", + " 'right': {'is_leaf': True,\n", + " 'left': None,\n", + " 'prediction': -1,\n", + " 'right': None,\n", + " 'splitting_feature': None},\n", + " 'splitting_feature': 'home_ownership.MORTGAGE'},\n", + " 'splitting_feature': 'emp_length.5 years'},\n", + " 'splitting_feature': 'grade.B'},\n", + " 'prediction': None,\n", + " 'right': {'is_leaf': False,\n", + " 'left': {'is_leaf': False,\n", + " 'left': {'is_leaf': True,\n", + " 'left': None,\n", + " 'prediction': 1,\n", + " 'right': None,\n", + " 'splitting_feature': None},\n", + " 'prediction': None,\n", + " 'right': {'is_leaf': True,\n", + " 'left': None,\n", + " 'prediction': -1,\n", + " 'right': None,\n", + " 'splitting_feature': None},\n", + " 'splitting_feature': 'emp_length.< 1 year'},\n", + " 'prediction': None,\n", + " 'right': {'is_leaf': True,\n", + " 'left': None,\n", + " 'prediction': -1,\n", + " 'right': None,\n", + " 'splitting_feature': None},\n", + " 'splitting_feature': 'emp_length.n/a'},\n", + " 'splitting_feature': 'grade.A'},\n", + " 'prediction': None,\n", + " 'right': {'is_leaf': False,\n", + " 'left': {'is_leaf': False,\n", + " 'left': {'is_leaf': False,\n", + " 'left': {'is_leaf': False,\n", + " 'left': {'is_leaf': False,\n", + " 'left': {'is_leaf': True,\n", + " 'left': None,\n", + " 'prediction': 1,\n", + " 'right': None,\n", + " 'splitting_feature': None},\n", + " 'prediction': None,\n", + " 'right': {'is_leaf': True,\n", + " 'left': None,\n", + " 'prediction': -1,\n", + " 'right': None,\n", + " 'splitting_feature': None},\n", + " 'splitting_feature': 'grade.G'},\n", + " 'prediction': None,\n", + " 'right': {'is_leaf': False,\n", + " 'left': {'is_leaf': True,\n", + " 'left': None,\n", + " 'prediction': -1,\n", + " 'right': None,\n", + " 'splitting_feature': None},\n", + " 'prediction': None,\n", + " 'right': {'is_leaf': True,\n", + " 'left': None,\n", + " 'prediction': 1,\n", + " 'right': None,\n", + " 'splitting_feature': None},\n", + " 'splitting_feature': 'grade.A'},\n", + " 'splitting_feature': 'emp_length.n/a'},\n", + " 'prediction': None,\n", + " 'right': {'is_leaf': False,\n", + " 'left': {'is_leaf': True,\n", + " 'left': None,\n", + " 'prediction': -1,\n", + " 'right': None,\n", + " 'splitting_feature': None},\n", + " 'prediction': None,\n", + " 'right': {'is_leaf': False,\n", + " 'left': {'is_leaf': True,\n", + " 'left': None,\n", + " 'prediction': 1,\n", + " 'right': None,\n", + " 'splitting_feature': None},\n", + " 'prediction': None,\n", + " 'right': {'is_leaf': True,\n", + " 'left': None,\n", + " 'prediction': -1,\n", + " 'right': None,\n", + " 'splitting_feature': None},\n", + " 'splitting_feature': 'home_ownership.OWN'},\n", + " 'splitting_feature': 'emp_length.8 years'},\n", + " 'splitting_feature': 'grade.F'},\n", + " 'prediction': None,\n", + " 'right': {'is_leaf': True,\n", + " 'left': None,\n", + " 'prediction': -1,\n", + " 'right': None,\n", + " 'splitting_feature': None},\n", + " 'splitting_feature': 'grade.E'},\n", + " 'prediction': None,\n", + " 'right': {'is_leaf': True,\n", + " 'left': None,\n", + " 'prediction': -1,\n", + " 'right': None,\n", + " 'splitting_feature': None},\n", + " 'splitting_feature': 'grade.D'},\n", + " 'splitting_feature': 'term. 36 months'}" + ] + }, + "execution_count": 41, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "my_decision_tree" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -1287,7 +1531,7 @@ }, { "cell_type": "code", - "execution_count": 28, + "execution_count": 36, "metadata": { "collapsed": false }, @@ -1319,7 +1563,7 @@ }, { "cell_type": "code", - "execution_count": 29, + "execution_count": 37, "metadata": { "collapsed": false }, @@ -1355,7 +1599,7 @@ " 'term. 60 months': 1L}" ] }, - "execution_count": 29, + "execution_count": 37, "metadata": {}, "output_type": "execute_result" } @@ -1366,7 +1610,7 @@ }, { "cell_type": "code", - "execution_count": 30, + "execution_count": 38, "metadata": { "collapsed": false }, @@ -1392,7 +1636,7 @@ }, { "cell_type": "code", - "execution_count": 31, + "execution_count": 39, "metadata": { "collapsed": false }, @@ -1415,7 +1659,7 @@ "-1" ] }, - "execution_count": 31, + "execution_count": 39, "metadata": {}, "output_type": "execute_result" } @@ -1424,6 +1668,38 @@ "classify(my_decision_tree, test_data[0], annotate=True)" ] }, + { + "cell_type": "code", + "execution_count": 40, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Split on term. 36 months = 0\n", + "Split on grade.A = 0\n", + "Split on grade.B = 0\n", + "At leaf, predicting -1\n" + ] + }, + { + "data": { + "text/plain": [ + "-1" + ] + }, + "execution_count": 40, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "classify(small_data_decision_tree, test_data[0], annotate=True)" + ] + }, { "cell_type": "markdown", "metadata": {},