Skip to content

Commit

Permalink
ENH: it works for soma-workflow for memory mapping, but it needs to t…
Browse files Browse the repository at this point in the history
…est #13
  • Loading branch information
JinpengLI committed Aug 26, 2013
1 parent 08e124b commit 0a3494d
Show file tree
Hide file tree
Showing 4 changed files with 17 additions and 15 deletions.
5 changes: 4 additions & 1 deletion bin/epac_mapper
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ import numpy as np
from epac import conf, StoreFs, MapperSubtrees
from epac.map_reduce.inputs import NodesInput
from epac.map_reduce.engine import SomaWorkflowEngine
from epac.utils import load_dictionary


# load all the class definition so that can be load classes, like methods
Expand Down Expand Up @@ -102,7 +103,9 @@ if __name__ == "__main__":
# datasets_filepath ="/tmp/tmpO8D3dG_datasets.npz"
# keys="fs:///tmp/tmpXyC_XE/ParPerm/Perm(nb=0)"

Xy = load_datasets(datasets_filepath)
# Xy = load_datasets(datasets_filepath)
Xy = load_dictionary(datasets_filepath)

tree_root_relative_path = SomaWorkflowEngine.tree_root_relative_path
store_fs = StoreFs(tree_root_relative_path)
tree = store_fs.load(key=conf.STORE_EXECUTION_TREE_PREFIX)
Expand Down
3 changes: 2 additions & 1 deletion epac/configuration.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,9 @@ class conf:
RESULT_SET = "result_set"
ML_CLASSIFICATION_MODE = None # Set to True to force classification mode
DICT_INDEX_FILE = "dict_index.txt"
# when the data larger than 500MB, it needs memmory mapping
MEMM_THRESHOLD = 500000000L

@classmethod
def init_ml(cls, **Xy):
## Try to guess if ML tasl is of classification or regression
Expand Down
10 changes: 7 additions & 3 deletions epac/map_reduce/engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@
from epac.map_reduce.split_input import SplitNodesInput
from epac.map_reduce.inputs import NodesInput

from epac.utils import save_dictionary
from epac.utils import load_dictionary

class Engine(object):
__metaclass__ = ABCMeta
Expand Down Expand Up @@ -148,7 +150,7 @@ def run(self, **Xy):
class SomaWorkflowEngine(LocalEngine):
'''Using soma-workflow to run epac tree in parallel
'''
dataset_relative_path = "./dataset.npz"
dataset_relative_path = "./dataset"
open_me_by_soma_workflow_gui = "open_me_by_soma_workflow_gui"

def __init__(self,
Expand Down Expand Up @@ -275,8 +277,10 @@ def run(self, **Xy):

## Save the database and tree to working directory
## ===============================================
np.savez(os.path.join(tmp_work_dir_path,
SomaWorkflowEngine.dataset_relative_path), **Xy)
# np.savez(os.path.join(tmp_work_dir_path,
# SomaWorkflowEngine.dataset_relative_path), **Xy)
save_dictionary(os.path.join(tmp_work_dir_path,
SomaWorkflowEngine.dataset_relative_path), **Xy)
store = StoreFs(dirpath=os.path.join(
tmp_work_dir_path,
SomaWorkflowEngine.tree_root_relative_path))
Expand Down
14 changes: 4 additions & 10 deletions examples/run_a_big_matrix.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,14 +40,10 @@ def convert2memmap(np_mat):
np.savez("/tmp/data.dat", **Xy)





def load_datasets(datasets_filepath):
Xy = np.load(datasets_filepath)
return {k: Xy[k] for k in Xy.keys()}


Xy = load_datasets("/tmp/data.dat.npz")

from sklearn.svm import SVC
Expand All @@ -56,12 +52,10 @@ def load_datasets(datasets_filepath):
SVC(kernel="rbf")]),
n_folds=3)


from epac import LocalEngine
local_engine = LocalEngine(cv_svm, num_processes=2)
cv_svm = local_engine.run(X=X, y=y)
print cv_svm.reduce()

#from epac import LocalEngine
#local_engine = LocalEngine(cv_svm, num_processes=2)
#cv_svm = local_engine.run(X=X, y=y)
#print cv_svm.reduce()

from epac import SomaWorkflowEngine
swf_engine = SomaWorkflowEngine(cv_svm, num_processes=2)
Expand Down

0 comments on commit 0a3494d

Please sign in to comment.