extract brainio_{base,collection} into separate repositories

https://github.com/brain-score/brainio_base https://github.com/brain-score/brainio_collection
brain-score · Jan 16, 2019 · ae86c96 · ae86c96
1 parent 6d4c678
commit ae86c96
Show file tree

Hide file tree

Showing 41 changed files with 31 additions and 9,197 deletions.
diff --git a/.gitignore b/.gitignore
diff --git a/LICENSE b/LICENSE
@@ -1,7 +1,7 @@
 
 MIT License
 
-Copyright (c) 2017, Jon Prescott-Roy
+Copyright (c) 2017, Martin Schrimpf and Jon Prescott-Roy
 
 Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
 

diff --git a/MANIFEST.in b/MANIFEST.in
@@ -1,10 +1,8 @@
 include LICENSE
-include README.rst
+include README.md
 
 recursive-include tests *
 recursive-exclude * __pycache__
 recursive-exclude * *.py[co]
 
 recursive-include docs *.rst conf.py Makefile make.bat *.jpg *.png *.gif
-
-include brainscore/lookup.db
diff --git a/README.md b/README.md
@@ -6,15 +6,15 @@ Brain recordings (termed "assemblies", e.g. neural or behavioral)
 are packaged in a [standard format](http://xarray.pydata.org/).
 This allows metrics (e.g. neural predictivity, RDMs) to operate
 on many assemblies without having to be re-written.
-Together with http://github.com/dicarlolab/candidate_models, `brainscore`
+Together with http://github.com/brain-score/candidate_models, `brainscore`
 allows scoring candidate models of the brain on a range of assemblies and metrics.
 
 
 ## Quick setup
 
-Recommended for most users. Use Brain-Score as a library. You will need Python >= 3.6.
+Recommended for most users. Use Brain-Score as a library. You will need Python >= 3.6 and pip >= 18.1.
 
-`pip install --process-dependency-links git+https://github.com/dicarlolab/brain-score`
+`pip install git+https://github.com/brain-score/brain-score`
 
 To contribute code to Brain-Score, see the [Development Setup](#development-setup).
 
@@ -34,7 +34,7 @@ Coordinates:
   - neuroid_id       (neuroid) object 'Chabo_L_M_5_9' 'Chabo_L_M_6_9' ...
   ...
 $ ...
-$ metric = RDMCrossValidated()
+$ metric = RDM()
 $ score = metric(assembly1=hvm, assembly2=hvm)
 Score(aggregation: 2)>
 array([1., 0.])
@@ -66,9 +66,8 @@ Only necessary if you plan to change code.
 2. Clone the Git repository to wherever you keep repositories:
     * `cd ~/dev`
     * `git clone git@github.com:dicarlolab/brain-score.git`
-3. Create and activate a Conda environment with relevant packages:
-    * `conda env create -f environment.yml`
-    * `conda activate brainscore`
+3. Install the depencies (we suggest doing this in a [conda environment](https://conda.io/docs/user-guide/tasks/manage-environments.html)):
+    * `pip install -e .`
 
 
 ## License

diff --git a/brainscore/__init__.py b/brainscore/__init__.py
@@ -1,10 +1,4 @@
-# -*- coding: utf-8 -*-
-
-__author__ = """Jon Prescott-Roy"""
-__email__ = '[email protected]'
-__version__ = '0.1.0'
-
-from .fetch import get_assembly, get_stimulus_set
+from brainio_collection.fetch import get_assembly, get_stimulus_set
 
 from brainscore.contrib import benchmarks as contrib_benchmarks
 

diff --git a/brainscore/assemblies.py b/brainscore/assemblies.py
@@ -1,242 +1,4 @@
-from __future__ import absolute_import, division, print_function, unicode_literals
-
-import functools
-import operator
-from collections import OrderedDict, defaultdict
-
-import numpy as np
-import peewee
 import xarray as xr
-from xarray import DataArray
-
-from brainscore.lookup import pwdb
-from brainscore.stimuli import StimulusSetModel
-
-
-class DataPoint(object):
-    """A DataPoint represents one value, usually a recording from one neuron or node,
-    in response to one presentation of a stimulus.  """
-
-    def __init__(self, value, neuroid, presentation):
-        self.value = value
-        self.neuroid = neuroid
-        self.presentation = presentation
-
-
-class DataAssembly(DataArray):
-    """A DataAssembly represents a set of data a researcher wishes to work with for
-    an analysis or benchmarking task.  """
-
-    def __init__(self, *args, **kwargs):
-        super(DataAssembly, self).__init__(*args, **kwargs)
-        gather_indexes(self)
-
-    def multi_groupby(self, group_coord_names, *args, **kwargs):
-        delimiter = "|"
-        multi_group_name = "multi_group"
-        dim = self._dim_of_group_coords(group_coord_names)
-        tmp_assy = self._join_group_coords(dim, group_coord_names, delimiter, multi_group_name)
-        result = tmp_assy.groupby(multi_group_name, *args, **kwargs)
-        return GroupbyBridge(result, self, dim, group_coord_names, delimiter, multi_group_name)
-
-    def _join_group_coords(self, dim, group_coord_names, delimiter, multi_group_name):
-        tmp_assy = self.copy()
-        group_coords = [tmp_assy.coords[c] for c in group_coord_names]
-        multi_group_coord = []
-        for coords in zip(*group_coords):
-            multi_group_coord.append(delimiter.join([str(c.values) for c in coords]))
-        tmp_assy.coords[multi_group_name] = dim, multi_group_coord
-        tmp_assy.set_index(append=True, inplace=True, **{dim: multi_group_name})
-        return tmp_assy
-
-    def _dim_of_group_coords(self, group_coord_names):
-        dimses = [self.coords[coord_name].dims for coord_name in group_coord_names]
-        dims = [dim for dim_tuple in dimses for dim in dim_tuple]
-        if len(set(dims)) == 1:
-            return dims[0]
-        else:
-            raise GroupbyError("All coordinates for grouping must be associated with the same single dimension.  ")
-
-    def multisel(self, method=None, tolerance=None, drop=False, **indexers):
-        """
-        partial workaround to keep multi-indexes and scalar coords
-        https://github.com/pydata/xarray/issues/1491, https://github.com/pydata/xarray/pull/1426
-
-        this method might slow things down, use with caution
-        """
-        indexer_dims = {index: self[index].dims for index in indexers}
-        dims = []
-        for _dims in indexer_dims.values():
-            assert len(_dims) == 1
-            dims.append(_dims[0])
-        coords_dim, dim_coords = {}, defaultdict(list)
-        for dim in dims:
-            for coord, coord_dims, _ in walk_coords(self):
-                if array_is_element(coord_dims, dim):
-                    coords_dim[coord] = dim
-                    dim_coords[dim].append(coord)
-
-        result = super().sel(method=method, tolerance=tolerance, drop=drop, **indexers)
-
-        # un-drop potentially dropped dims
-        for coord, value in indexers.items():
-            dim = self[coord].dims
-            assert len(dim) == 1
-            dim = dim[0]
-            if not hasattr(result, coord) and dim not in result.dims:
-                result = result.expand_dims(coord)
-                result[coord] = [value]
-
-        # stack back together
-        stack_dims = list(result.dims)
-        for result_dim in stack_dims:
-            if result_dim not in self.dims:
-                original_dim = coords_dim[result_dim]
-                stack_coords = [coord for coord in dim_coords[original_dim] if hasattr(result, coord)]
-                for coord in stack_coords:
-                    stack_dims.remove(coord)
-                result = result.stack(**{original_dim: stack_coords})
-        # add scalar indexer variable
-        for index, value in indexers.items():
-            if hasattr(result, index):
-                continue  # already set, potentially during un-dropping
-            dim = indexer_dims[index]
-            assert len(dim) == 1
-            value = np.repeat(value, len(result[dim[0]]))
-            result[index] = dim, value
-        return result
-
-
-class BehavioralAssembly(DataAssembly):
-    """A BehavioralAssembly is a DataAssembly containing behavioral data.  """
-    pass
-
-
-class NeuroidAssembly(DataAssembly):
-    """A NeuroidAssembly is a DataAssembly containing data recorded from either neurons
-    or neuron analogues.  """
-    pass
-
-
-class NeuronRecordingAssembly(NeuroidAssembly):
-    """A NeuronRecordingAssembly is a NeuroidAssembly containing data recorded from neurons.  """
-    pass
-
-
-class ModelFeaturesAssembly(NeuroidAssembly):
-    """A ModelFeaturesAssembly is a NeuroidAssembly containing data captured from nodes in
-    a machine learning model.  """
-    pass
-
-
-def coords_for_dim(xr_data, dim, exclude_indexes=True):
-    result = OrderedDict()
-    for key, value in xr_data.coords.variables.items():
-        only_this_dim = value.dims == (dim,)
-        exclude_because_index = exclude_indexes and isinstance(value, xr.IndexVariable)
-        if only_this_dim and not exclude_because_index:
-            result[key] = value
-    return result
-
-
-def gather_indexes(xr_data):
-    """This is only necessary as long as xarray cannot persist MultiIndex to netCDF.  """
-    coords_d = {}
-    for dim in xr_data.dims:
-        coords = coords_for_dim(xr_data, dim)
-        if coords:
-            coords_d[dim] = list(coords.keys())
-    if coords_d:
-        xr_data.set_index(append=True, inplace=True, **coords_d)
-    return xr_data
-
-
-class GroupbyBridge(object):
-    """Wraps an xarray GroupBy object to allow grouping on multiple coordinates.   """
-
-    def __init__(self, groupby, assembly, dim, group_coord_names, delimiter, multi_group_name):
-        self.groupby = groupby
-        self.assembly = assembly
-        self.dim = dim
-        self.group_coord_names = group_coord_names
-        self.delimiter = delimiter
-        self.multi_group_name = multi_group_name
-
-    def __getattr__(self, attr):
-        result = getattr(self.groupby, attr)
-        if callable(result):
-            result = self.wrap_groupby(result)
-        return result
-
-    def wrap_groupby(self, func):
-        def wrapper(*args, **kwargs):
-            result = func(*args, **kwargs)
-            if isinstance(result, type(self.assembly)):
-                result = self.split_group_coords(result)
-            return result
-
-        return wrapper
-
-    def split_group_coords(self, result):
-        split_coords = np.array(
-            list(map(lambda s: s.split(self.delimiter) if isinstance(s, str) else [s],
-                     result.coords[self.multi_group_name].values))).T
-        for coord_name, coord in zip(self.group_coord_names, split_coords):
-            result.coords[coord_name] = (self.multi_group_name, coord)
-        result.reset_index(self.multi_group_name, drop=True, inplace=True)
-        result.set_index(append=True, inplace=True, **{self.multi_group_name: self.group_coord_names})
-        result = result.rename({self.multi_group_name: self.dim})
-        return result
-
-
-class GroupbyError(Exception):
-    pass
-
-
-class AssemblyModel(peewee.Model):
-    """An AssemblyModel stores information about the canonical location where the data
-    for a DataAssembly is stored.  """
-    name = peewee.CharField()
-    assembly_class = peewee.CharField()
-    stimulus_set = peewee.ForeignKeyField(StimulusSetModel, backref="assembly_models")
-
-    class Meta:
-        database = pwdb
-
-
-class AssemblyStoreModel(peewee.Model):
-    """An AssemblyStoreModel stores the location of a DataAssembly data file.  """
-    assembly_type = peewee.CharField()
-    location_type = peewee.CharField()
-    location = peewee.CharField()
-    unique_name = peewee.CharField(unique=True, null=True, index=True)
-    sha1 = peewee.CharField(unique=True, null=True, index=True)
-
-    class Meta:
-        database = pwdb
-
-
-class AssemblyStoreMap(peewee.Model):
-    """An AssemblyStoreMap links an AssemblyRecord to an AssemblyStore.  """
-    assembly_model = peewee.ForeignKeyField(AssemblyModel, backref="assembly_store_maps")
-    assembly_store_model = peewee.ForeignKeyField(AssemblyStoreModel, backref="assembly_store_maps")
-    role = peewee.CharField()
-
-    class Meta:
-        database = pwdb
-
-
-class AssemblyLookupError(Exception):
-    pass
-
-
-def lookup_assembly(name):
-    pwdb.connect(reuse_if_open=True)
-    try:
-        assy = AssemblyModel.get(AssemblyModel.name == name)
-    except AssemblyModel.DoesNotExist as e:
-        raise AssemblyLookupError("A DataAssembly named " + name + " was not found.")
-    return assy
 
 
 def merge_data_arrays(data_arrays):