suny-downstate-medical-center · jchen6727 · Aug 1, 2024 · Mar 17, 2024 · Mar 18, 2024 · Mar 19, 2024
diff --git a/doc/source/user_documentation.rst b/doc/source/user_documentation.rst
@@ -2963,3 +2963,22 @@ The ``out_json`` output contains a dictionary which includes the ``loss`` metric
 
 In a multi-objective optimization, the relevant ``PYR_loss``, ``BC_loss``, and ``OLM_loss`` components are additionally included (see ``mo_optuna_search.py``)
 
+8. Parameter Importance Evaluation Using fANOVA
+-----------------------------------------------
+A new feature in the batchtools beta release is the ability to evaluate parameter importance using a functional ANOVA inspired algorithm via the `Optuna` and `scikit-learn` libraries.
+(See `the original Hutter paper <http://proceedings.mlr.press/v32/hutter14.pdf>`_  and its `citation <https://automl.github.io/fanova/cite.html>`_)
+
+Currently, only unpaired single parameter importance to a single metric score is supported through the `NetPyNE.batchtools.analysis` `Analyzer` object, with an example of its usage
+`here <https://github.com/suny-downstate-medical-center/netpyne/tree/batch/netpyne/batchtools/examples/rosenbrock/fanova_rosenbrock>`_:
+
+In its current iteration, demonstrating the example requires generating an output `grid.csv` using `batch.py`, then loading that `grid.csv` into the `Analyzer` object. Then, using `run_analysis` will generate, per parameter, a single score indicative of the estimated `importance` of the parameter: that is, the estimated effect on the total variance of the model within the given bounds.
+
+.. code-block:: python
+
+        # from analysis.py
+        from netpyne.batchtools.analysis import Analyzer
+
+        analyzer = Analyzer(params = ['x.0', 'x.1', 'x.2', 'x.3'], metrics = ['fx']) # specify the parameter space and metrics of the batch function
+        analyzer.load_file('grid.csv') # load the grid file generated by the batch run
+        results = analyzer.run_analysis() # run fANOVA analysis and store the importance values in a results dictionary
+
diff --git a/netpyne/batchtools/__init__.py b/netpyne/batchtools/__init__.py
@@ -1,19 +1,26 @@
 from netpyne.batchtools.runners import NetpyneRunner
 from batchtk.runtk import dispatchers
-
 from netpyne.batchtools import submits
 from batchtk import runtk
+from netpyne.batchtools.analysis import Analyzer
 
 specs = NetpyneRunner()
+
 from netpyne.batchtools.comm import Comm
 
+comm = Comm()
+
 dispatchers = dispatchers
 submits = submits
 runtk = runtk
 
 
-comm = Comm()
-
+"""
+def analyze_from_file(filename):
+    analyzer = Fanova()
+    analyzer.load_file(filename)
+    analyzer.run_analysis(
+"""
 
 #from ray import tune as space.comm
 #list and lb ub

diff --git a/netpyne/batchtools/analysis.py b/netpyne/batchtools/analysis.py
@@ -0,0 +1,55 @@
+import pandas
+from collections import namedtuple
+import numpy
+
+from optuna.importance._fanova._fanova import _Fanova
+
+
+class Fanova(object):
+    def __init__(self, n_trees: int = 64, max_depth: int = 64, seed: int | None = None) -> None:
+        self._evaluator = _Fanova(
+            n_trees=n_trees,
+            max_depth=max_depth,
+            min_samples_split=2,
+            min_samples_leaf=1,
+            seed=seed,
+        )
+
+    def evaluate(self, X: pandas.DataFrame, y: pandas.DataFrame) -> dict:
+        assert X.shape[0] == y.shape[0] # all rows must be present
+        assert y.shape[1] == 1 # only evaluation for single metric supported
+
+        evaluator = self._evaluator
+        #mins, maxs = X.min().values, X.max().values #in case bound matching is necessary.
+        search_spaces = numpy.array([X.min().values, X.max().values]).T # bounds
+        column_to_encoded_columns = [numpy.atleast_1d(i) for i in range(X.shape[1])] # encoding (no 1 hot/categorical)
+        evaluator.fit(X.values, y.values.ravel(), search_spaces, column_to_encoded_columns)
+        importances = numpy.array(
+            [evaluator.get_importance(i)[0] for i in range(X.shape[1])]
+        )
+        return {col: imp for col, imp in zip(X.columns, importances)}
+
+
+class Analyzer(object):
+    def __init__(self,
+                 params: list, # list of parameters
+                 metrics: list, # list of metrics
+                 evaluator = Fanova()) -> None:
+        self.params = params
+        self.metrics = metrics
+        self.data = None
+        self.evaluator = evaluator
+
+    def load_file(self,
+                  filename: str # filename (.csv) containing the completed batchtools trials
+                  ) -> None:
+        data = pandas.read_csv(filename)
+        param_space = data[["config/{}".format(param) for param in self.params]]
+        param_space = param_space.rename(columns={'config/{}'.format(param): param for param in self.params})
+        results = data[self.metrics]
+        self.data = namedtuple('data', ['param_space', 'results'])(param_space, results)
+
+    def run_analysis(self) -> dict:
+        return self.evaluator.evaluate(self.data.param_space, self.data.results)
+
+
diff --git a/netpyne/batchtools/docs/batchtools.rst b/netpyne/batchtools/docs/batchtools.rst
@@ -414,3 +414,22 @@ The ``out_json`` output contains a dictionary which includes the ``loss`` metric
 
 In a multi-objective optimization, the relevant ``PYR_loss``, ``BC_loss``, and ``OLM_loss`` components are additionally included (see ``mo_optuna_search.py``)
 
+8. Parameter Importance Evaluation Using fANOVA
+-----------------------------------------------
+A new feature in the batchtools beta release is the ability to evaluate parameter importance using a functional ANOVA inspired algorithm via the `Optuna` and `scikit-learn` libraries.
+(See `the original Hutter paper <http://proceedings.mlr.press/v32/hutter14.pdf>`_  and its `citation <https://automl.github.io/fanova/cite.html>`_)
+
+Currently, only unpaired single parameter importance to a single metric score is supported through the `NetPyNE.batchtools.analysis` `Analyzer` object, with an example of its usage
+`here <https://github.com/suny-downstate-medical-center/netpyne/tree/batch/netpyne/batchtools/examples/rosenbrock/fanova_rosenbrock>`_:
+
+In its current iteration, demonstrating the example requires generating an output `grid.csv` using `batch.py`, then loading that `grid.csv` into the `Analyzer` object. Then, using `run_analysis` will generate, per parameter, a single score indicative of the estimated `importance` of the parameter: that is, the estimated effect on the total variance of the model within the given bounds.
+
+.. code-block:: python
+
+        # from analysis.py
+        from netpyne.batchtools.analysis import Analyzer
+
+        analyzer = Analyzer(params = ['x.0', 'x.1', 'x.2', 'x.3'], metrics = ['fx']) # specify the parameter space and metrics of the batch function
+        analyzer.load_file('grid.csv') # load the grid file generated by the batch run
+        results = analyzer.run_analysis() # run fANOVA analysis and store the importance values in a results dictionary
+
diff --git a/netpyne/batchtools/examples/CA3/cfg.py b/netpyne/batchtools/examples/CA3/cfg.py
@@ -17,6 +17,9 @@
 cfg.saveJson = True
 cfg.printRunTime = 0.1 
 cfg.recordLFP = None # don't save this
+cfg.simLabel = 'ca3'
+cfg.saveFolder = '.'
+
 
 cfg.analysis['plotRaster'] = {'saveFig': True} # raster ok
 cfg.analysis['plotTraces'] = { } # don't save this

diff --git a/netpyne/batchtools/examples/rosenbrock/fanova_rosenbrock/analysis.py b/netpyne/batchtools/examples/rosenbrock/fanova_rosenbrock/analysis.py
@@ -0,0 +1,6 @@
+from netpyne.batchtools.analysis import Analyzer
+
+analyzer = Analyzer(params = ['x.0', 'x.1', 'x.2', 'x.3'], metrics = ['fx'])
+analyzer.load_file('optuna.csv')
+results = analyzer.run_analysis()
+
diff --git a/netpyne/batchtools/examples/rosenbrock/fanova_rosenbrock/batch.py b/netpyne/batchtools/examples/rosenbrock/fanova_rosenbrock/batch.py
@@ -1,23 +1,24 @@
 from netpyne.batchtools.search import search
+import numpy
 
-params = {'x.0': [0, 3],
-          'x.1': [0, 3],
-          'x.2': [0, 3],
-          'x.3': [0, 3],
+params = {'x.0': numpy.linspace(-1, 3, 5),
+          'x.1': numpy.linspace(-1, 3, 5),
+          'x.2': numpy.linspace(-1, 3, 5),
+          'x.3': numpy.linspace(-1, 3, 5),
           }
 
 # use shell_config if running directly on the machine
 shell_config = {'command': 'python rosenbrock.py',}
 
 search(job_type = 'sh', # or sh
        comm_type = 'socket',
-       label = 'optuna',
+       label = 'grid',
        params = params,
-       output_path = '../optuna_batch',
+       output_path = '../grid_batch',
        checkpoint_path = '../ray',
        run_config = {'command': 'python rosenbrock.py'},
-       num_samples = 9,
+       num_samples = 1,
        metric = 'fx',
        mode = 'min',
-       algorithm = 'optuna',
+       algorithm = 'variant_generator',
        max_concurrent = 3)