removed messed up pip module in setup.py

splicemachine · Aug 10, 2018 · ae90490 · ae90490
1 parent a81a69f
commit ae90490
Show file tree

Hide file tree

Showing 7 changed files with 192 additions and 17 deletions.
diff --git a/.idea/misc.xml b/.idea/misc.xml
diff --git a/.idea/modules.xml b/.idea/modules.xml
diff --git a/.idea/new-pysplice.iml b/.idea/new-pysplice.iml
diff --git a/.idea/vcs.xml b/.idea/vcs.xml
diff --git a/.idea/workspace.xml b/.idea/workspace.xml
diff --git a/setup.py b/setup.py
@@ -25,7 +25,11 @@
     "py4j==0.10.7",
     "pyspark==2.3.1",
     "pytest==3.6.1",
-    "six==1.11.0"
+    "six==1.11.0",
+    "mlflow==0.4.2",
+    "graphviz==0.8.4",
+    "pyspark==2.1.0",
+    "numpy==1.14.3"
 ]
 setup(
     name="splicemachine",

diff --git a/splicemachine/ml/zeppelin.py b/splicemachine/ml/zeppelin.py
@@ -39,7 +39,8 @@ def handle_handlers(handler, *args, **kwargs):
     def log_metadata(self, handler, *args, **kwargs):
         if not self.run_uuid:
             with mlflow.start_run():
-                self.run_uuid = (mlflow.active_run().__dict__['_info'].__dict__['_run_uuid'])
+                self.run_uuid = (mlflow.active_run().__dict__[
+                                 '_info'].__dict__['_run_uuid'])
                 print("Logged using handler " + handler)
                 Run.handle_handlers(handler, *args, **kwargs)
         else:
@@ -77,7 +78,8 @@ def show_confusion_matrix(TP, TN, FP, FN):
     :param FN: False Negatives
     """
     confusion_rdd = sc.parallelize([['Predicted', TP, FN], ['Actual', FP, TN]])
-    confusion_matrix = sqlContext.createDataFrame(confusion_rdd, ['', 'Actual', 'Predicted'])
+    confusion_matrix = sqlContext.createDataFrame(
+        confusion_rdd, ['', 'Actual', 'Predicted'])
     confusion_matrix.show()
 
 
@@ -98,7 +100,8 @@ def experiment_maker(experiment_id):
                     e._experiment_id)  # use already created experiment
 
         if not found:
-            _id = mlflow.tracking.create_experiment(experiment_id)  # create new experiment
+            _id = mlflow.tracking.create_experiment(
+                experiment_id)  # create new experiment
             print('Success! Created Experiment')
             os.environ['MLFLOW_EXPERIMENT_ID'] = str(_id)  # use it
     else:
@@ -135,7 +138,7 @@ def input(self, predictions_dataframe):
                                                   self.prediction_column)  # Select the actual and the predicted labels
 
         self.avg_tp.append(pred_v_lab[(pred_v_lab.label == 1) & (
-                pred_v_lab.prediction == 1)].count())  # Add confusion stats
+            pred_v_lab.prediction == 1)].count())  # Add confusion stats
         self.avg_tn.append(
             pred_v_lab[(pred_v_lab.label == 0) & (pred_v_lab.prediction == 0)].count())
         self.avg_fp.append(
@@ -177,13 +180,14 @@ def get_results(self, output_type='dataframe'):
             return computed_metrics
 
         else:
-            metrics_row = Row('TPR', 'SPC', 'PPV', 'NPV', 'FPR', 'FDR', 'FNR', 'ACC', 'F1', 'MCC')
+            metrics_row = Row('TPR', 'SPC', 'PPV', 'NPV',
+                              'FPR', 'FDR', 'FNR', 'ACC', 'F1', 'MCC')
             computed_row = metrics_row(*computed_metrics.values())
             computed_df = sqlContext.createDataFrame([computed_row])
             return computed_df
 
 
-def print_horizantal_line(l):
+def print_horizontal_line(l):
     print("".join(['-' * l]))
 
 
@@ -235,8 +239,9 @@ def visualize(model, feature_column_names, label_names, tree_name, visual=True):
         time.sleep(3)
 
         print('You can find your visualization at "https://docs.google.com/gview?url=https'
-              '://<cluster_name>.splicemachine.io/assets/images/' + tree_name + '.pdf&embedded=tru'
-                                                                                'e#view=fith')
+              '://<cluster_name>.splicemachine.io/assets/images/' +
+              tree_name + '.pdf&embedded=tru'
+              'e#view=fith')
 
     @staticmethod
     def replacer(string, bad, good):
@@ -269,10 +274,13 @@ def add_node(dot, parent, node_hash, root, realroot=False):
                 dot.edge(node_hash, node_id)
             if root.get('children'):
                 if not root['children'][0].get('children'):
-                    DecisionTreeVisualizer.add_node(dot, root['name'], node_id, root['children'][0])
+                    DecisionTreeVisualizer.add_node(
+                        dot, root['name'], node_id, root['children'][0])
                 else:
-                    DecisionTreeVisualizer.add_node(dot, root['name'], node_id, root['children'][0])
-                    DecisionTreeVisualizer.add_node(dot, root['name'], node_id, root['children'][1])
+                    DecisionTreeVisualizer.add_node(
+                        dot, root['name'], node_id, root['children'][0])
+                    DecisionTreeVisualizer.add_node(
+                        dot, root['name'], node_id, root['children'][1])
 
     @staticmethod
     def parse(lines):
@@ -285,12 +293,16 @@ def parse(lines):
         while lines:
 
             if lines[0].startswith('If'):
-                bl = ' '.join(lines.pop(0).split()[1:]).replace('(', '').replace(')', '')
-                block.append({'name': bl, 'children': DecisionTreeVisualizer.parse(lines)})
+                bl = ' '.join(lines.pop(0).split()[1:]).replace(
+                    '(', '').replace(')', '')
+                block.append(
+                    {'name': bl, 'children': DecisionTreeVisualizer.parse(lines)})
 
                 if lines[0].startswith('Else'):
-                    be = ' '.join(lines.pop(0).split()[1:]).replace('(', '').replace(')', '')
-                    block.append({'name': be, 'children': DecisionTreeVisualizer.parse(lines)})
+                    be = ' '.join(lines.pop(0).split()[1:]).replace(
+                        '(', '').replace(')', '')
+                    block.append(
+                        {'name': be, 'children': DecisionTreeVisualizer.parse(lines)})
             elif not lines[0].startswith(('If', 'Else')):
                 block2 = lines.pop(0)
                 block.append({'name': block2})
@@ -314,5 +326,6 @@ def tree_json(tree):
                 break
             if not line:
                 break
-        res = [{'name': 'Root', 'children': DecisionTreeVisualizer.parse(data[1:])}]
+        res = [
+            {'name': 'Root', 'children': DecisionTreeVisualizer.parse(data[1:])}]
         return res[0]