ENH: it works for soma-workflow for memory mapping, but it needs to t…

…est #13
neurospin · Aug 26, 2013 · 0a3494d · 0a3494d
1 parent 08e124b
commit 0a3494d
Show file tree

Hide file tree

Showing 4 changed files with 17 additions and 15 deletions.
diff --git a/bin/epac_mapper b/bin/epac_mapper
@@ -15,6 +15,7 @@ import numpy as np
 from epac import conf, StoreFs, MapperSubtrees
 from epac.map_reduce.inputs import NodesInput
 from epac.map_reduce.engine import SomaWorkflowEngine
+from epac.utils import load_dictionary
 
 
 # load all the class definition so that can be load classes, like methods
@@ -102,7 +103,9 @@ if __name__ == "__main__":
     # datasets_filepath ="/tmp/tmpO8D3dG_datasets.npz"
     # keys="fs:///tmp/tmpXyC_XE/ParPerm/Perm(nb=0)"
 
-    Xy = load_datasets(datasets_filepath)
+    # Xy = load_datasets(datasets_filepath)
+    Xy = load_dictionary(datasets_filepath)
+
     tree_root_relative_path = SomaWorkflowEngine.tree_root_relative_path
     store_fs = StoreFs(tree_root_relative_path)
     tree = store_fs.load(key=conf.STORE_EXECUTION_TREE_PREFIX)

diff --git a/epac/configuration.py b/epac/configuration.py
@@ -32,8 +32,9 @@ class conf:
     RESULT_SET = "result_set"
     ML_CLASSIFICATION_MODE = None  # Set to True to force classification mode
     DICT_INDEX_FILE = "dict_index.txt"
+    # when the data larger than 500MB, it needs memmory mapping
     MEMM_THRESHOLD = 500000000L
-    
+
     @classmethod
     def init_ml(cls, **Xy):
         ## Try to guess if ML tasl is of classification or regression

diff --git a/epac/map_reduce/engine.py b/epac/map_reduce/engine.py
@@ -23,6 +23,8 @@
 from epac.map_reduce.split_input import SplitNodesInput
 from epac.map_reduce.inputs import NodesInput
 
+from epac.utils import save_dictionary
+from epac.utils import load_dictionary
 
 class Engine(object):
     __metaclass__ = ABCMeta
@@ -148,7 +150,7 @@ def run(self, **Xy):
 class SomaWorkflowEngine(LocalEngine):
     '''Using soma-workflow to run epac tree in parallel
     '''
-    dataset_relative_path = "./dataset.npz"
+    dataset_relative_path = "./dataset"
     open_me_by_soma_workflow_gui = "open_me_by_soma_workflow_gui"
 
     def __init__(self,
@@ -275,8 +277,10 @@ def run(self, **Xy):
 
         ## Save the database and tree to working directory
         ## ===============================================
-        np.savez(os.path.join(tmp_work_dir_path,
-                 SomaWorkflowEngine.dataset_relative_path), **Xy)
+        # np.savez(os.path.join(tmp_work_dir_path,
+        # SomaWorkflowEngine.dataset_relative_path), **Xy)
+        save_dictionary(os.path.join(tmp_work_dir_path,
+            SomaWorkflowEngine.dataset_relative_path), **Xy)
         store = StoreFs(dirpath=os.path.join(
             tmp_work_dir_path,
             SomaWorkflowEngine.tree_root_relative_path))

diff --git a/examples/run_a_big_matrix.py b/examples/run_a_big_matrix.py
@@ -40,14 +40,10 @@ def convert2memmap(np_mat):
 np.savez("/tmp/data.dat", **Xy)
 
 
-
-
-
 def load_datasets(datasets_filepath):
     Xy = np.load(datasets_filepath)
     return {k: Xy[k] for k in Xy.keys()}
 
-
 Xy = load_datasets("/tmp/data.dat.npz")
 
 from sklearn.svm import SVC
@@ -56,12 +52,10 @@ def load_datasets(datasets_filepath):
                       SVC(kernel="rbf")]),
                       n_folds=3)
 
-
-from epac import LocalEngine
-local_engine = LocalEngine(cv_svm, num_processes=2)
-cv_svm = local_engine.run(X=X, y=y)
-print cv_svm.reduce()
-
+#from epac import LocalEngine
+#local_engine = LocalEngine(cv_svm, num_processes=2)
+#cv_svm = local_engine.run(X=X, y=y)
+#print cv_svm.reduce()
 
 from epac import SomaWorkflowEngine
 swf_engine = SomaWorkflowEngine(cv_svm, num_processes=2)