Skip to content

Commit

Permalink
extract brainio_{base,collection} into separate repositories
Browse files Browse the repository at this point in the history
  • Loading branch information
mschrimpf committed Jan 16, 2019
1 parent 6d4c678 commit ae86c96
Show file tree
Hide file tree
Showing 41 changed files with 31 additions and 9,197 deletions.
5 changes: 0 additions & 5 deletions .gitignore

This file was deleted.

2 changes: 1 addition & 1 deletion LICENSE
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@

MIT License

Copyright (c) 2017, Jon Prescott-Roy
Copyright (c) 2017, Martin Schrimpf and Jon Prescott-Roy

Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:

Expand Down
4 changes: 1 addition & 3 deletions MANIFEST.in
Original file line number Diff line number Diff line change
@@ -1,10 +1,8 @@
include LICENSE
include README.rst
include README.md

recursive-include tests *
recursive-exclude * __pycache__
recursive-exclude * *.py[co]

recursive-include docs *.rst conf.py Makefile make.bat *.jpg *.png *.gif

include brainscore/lookup.db
13 changes: 6 additions & 7 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,15 @@ Brain recordings (termed "assemblies", e.g. neural or behavioral)
are packaged in a [standard format](http://xarray.pydata.org/).
This allows metrics (e.g. neural predictivity, RDMs) to operate
on many assemblies without having to be re-written.
Together with http://github.com/dicarlolab/candidate_models, `brainscore`
Together with http://github.com/brain-score/candidate_models, `brainscore`
allows scoring candidate models of the brain on a range of assemblies and metrics.


## Quick setup

Recommended for most users. Use Brain-Score as a library. You will need Python >= 3.6.
Recommended for most users. Use Brain-Score as a library. You will need Python >= 3.6 and pip >= 18.1.

`pip install --process-dependency-links git+https://github.com/dicarlolab/brain-score`
`pip install git+https://github.com/brain-score/brain-score`

To contribute code to Brain-Score, see the [Development Setup](#development-setup).

Expand All @@ -34,7 +34,7 @@ Coordinates:
- neuroid_id (neuroid) object 'Chabo_L_M_5_9' 'Chabo_L_M_6_9' ...
...
$ ...
$ metric = RDMCrossValidated()
$ metric = RDM()
$ score = metric(assembly1=hvm, assembly2=hvm)
Score(aggregation: 2)>
array([1., 0.])
Expand Down Expand Up @@ -66,9 +66,8 @@ Only necessary if you plan to change code.
2. Clone the Git repository to wherever you keep repositories:
* `cd ~/dev`
* `git clone git@github.com:dicarlolab/brain-score.git`
3. Create and activate a Conda environment with relevant packages:
* `conda env create -f environment.yml`
* `conda activate brainscore`
3. Install the depencies (we suggest doing this in a [conda environment](https://conda.io/docs/user-guide/tasks/manage-environments.html)):
* `pip install -e .`


## License
Expand Down
8 changes: 1 addition & 7 deletions brainscore/__init__.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,4 @@
# -*- coding: utf-8 -*-

__author__ = """Jon Prescott-Roy"""
__email__ = '[email protected]'
__version__ = '0.1.0'

from .fetch import get_assembly, get_stimulus_set
from brainio_collection.fetch import get_assembly, get_stimulus_set

from brainscore.contrib import benchmarks as contrib_benchmarks

Expand Down
238 changes: 0 additions & 238 deletions brainscore/assemblies.py
Original file line number Diff line number Diff line change
@@ -1,242 +1,4 @@
from __future__ import absolute_import, division, print_function, unicode_literals

import functools
import operator
from collections import OrderedDict, defaultdict

import numpy as np
import peewee
import xarray as xr
from xarray import DataArray

from brainscore.lookup import pwdb
from brainscore.stimuli import StimulusSetModel


class DataPoint(object):
"""A DataPoint represents one value, usually a recording from one neuron or node,
in response to one presentation of a stimulus. """

def __init__(self, value, neuroid, presentation):
self.value = value
self.neuroid = neuroid
self.presentation = presentation


class DataAssembly(DataArray):
"""A DataAssembly represents a set of data a researcher wishes to work with for
an analysis or benchmarking task. """

def __init__(self, *args, **kwargs):
super(DataAssembly, self).__init__(*args, **kwargs)
gather_indexes(self)

def multi_groupby(self, group_coord_names, *args, **kwargs):
delimiter = "|"
multi_group_name = "multi_group"
dim = self._dim_of_group_coords(group_coord_names)
tmp_assy = self._join_group_coords(dim, group_coord_names, delimiter, multi_group_name)
result = tmp_assy.groupby(multi_group_name, *args, **kwargs)
return GroupbyBridge(result, self, dim, group_coord_names, delimiter, multi_group_name)

def _join_group_coords(self, dim, group_coord_names, delimiter, multi_group_name):
tmp_assy = self.copy()
group_coords = [tmp_assy.coords[c] for c in group_coord_names]
multi_group_coord = []
for coords in zip(*group_coords):
multi_group_coord.append(delimiter.join([str(c.values) for c in coords]))
tmp_assy.coords[multi_group_name] = dim, multi_group_coord
tmp_assy.set_index(append=True, inplace=True, **{dim: multi_group_name})
return tmp_assy

def _dim_of_group_coords(self, group_coord_names):
dimses = [self.coords[coord_name].dims for coord_name in group_coord_names]
dims = [dim for dim_tuple in dimses for dim in dim_tuple]
if len(set(dims)) == 1:
return dims[0]
else:
raise GroupbyError("All coordinates for grouping must be associated with the same single dimension. ")

def multisel(self, method=None, tolerance=None, drop=False, **indexers):
"""
partial workaround to keep multi-indexes and scalar coords
https://github.com/pydata/xarray/issues/1491, https://github.com/pydata/xarray/pull/1426
this method might slow things down, use with caution
"""
indexer_dims = {index: self[index].dims for index in indexers}
dims = []
for _dims in indexer_dims.values():
assert len(_dims) == 1
dims.append(_dims[0])
coords_dim, dim_coords = {}, defaultdict(list)
for dim in dims:
for coord, coord_dims, _ in walk_coords(self):
if array_is_element(coord_dims, dim):
coords_dim[coord] = dim
dim_coords[dim].append(coord)

result = super().sel(method=method, tolerance=tolerance, drop=drop, **indexers)

# un-drop potentially dropped dims
for coord, value in indexers.items():
dim = self[coord].dims
assert len(dim) == 1
dim = dim[0]
if not hasattr(result, coord) and dim not in result.dims:
result = result.expand_dims(coord)
result[coord] = [value]

# stack back together
stack_dims = list(result.dims)
for result_dim in stack_dims:
if result_dim not in self.dims:
original_dim = coords_dim[result_dim]
stack_coords = [coord for coord in dim_coords[original_dim] if hasattr(result, coord)]
for coord in stack_coords:
stack_dims.remove(coord)
result = result.stack(**{original_dim: stack_coords})
# add scalar indexer variable
for index, value in indexers.items():
if hasattr(result, index):
continue # already set, potentially during un-dropping
dim = indexer_dims[index]
assert len(dim) == 1
value = np.repeat(value, len(result[dim[0]]))
result[index] = dim, value
return result


class BehavioralAssembly(DataAssembly):
"""A BehavioralAssembly is a DataAssembly containing behavioral data. """
pass


class NeuroidAssembly(DataAssembly):
"""A NeuroidAssembly is a DataAssembly containing data recorded from either neurons
or neuron analogues. """
pass


class NeuronRecordingAssembly(NeuroidAssembly):
"""A NeuronRecordingAssembly is a NeuroidAssembly containing data recorded from neurons. """
pass


class ModelFeaturesAssembly(NeuroidAssembly):
"""A ModelFeaturesAssembly is a NeuroidAssembly containing data captured from nodes in
a machine learning model. """
pass


def coords_for_dim(xr_data, dim, exclude_indexes=True):
result = OrderedDict()
for key, value in xr_data.coords.variables.items():
only_this_dim = value.dims == (dim,)
exclude_because_index = exclude_indexes and isinstance(value, xr.IndexVariable)
if only_this_dim and not exclude_because_index:
result[key] = value
return result


def gather_indexes(xr_data):
"""This is only necessary as long as xarray cannot persist MultiIndex to netCDF. """
coords_d = {}
for dim in xr_data.dims:
coords = coords_for_dim(xr_data, dim)
if coords:
coords_d[dim] = list(coords.keys())
if coords_d:
xr_data.set_index(append=True, inplace=True, **coords_d)
return xr_data


class GroupbyBridge(object):
"""Wraps an xarray GroupBy object to allow grouping on multiple coordinates. """

def __init__(self, groupby, assembly, dim, group_coord_names, delimiter, multi_group_name):
self.groupby = groupby
self.assembly = assembly
self.dim = dim
self.group_coord_names = group_coord_names
self.delimiter = delimiter
self.multi_group_name = multi_group_name

def __getattr__(self, attr):
result = getattr(self.groupby, attr)
if callable(result):
result = self.wrap_groupby(result)
return result

def wrap_groupby(self, func):
def wrapper(*args, **kwargs):
result = func(*args, **kwargs)
if isinstance(result, type(self.assembly)):
result = self.split_group_coords(result)
return result

return wrapper

def split_group_coords(self, result):
split_coords = np.array(
list(map(lambda s: s.split(self.delimiter) if isinstance(s, str) else [s],
result.coords[self.multi_group_name].values))).T
for coord_name, coord in zip(self.group_coord_names, split_coords):
result.coords[coord_name] = (self.multi_group_name, coord)
result.reset_index(self.multi_group_name, drop=True, inplace=True)
result.set_index(append=True, inplace=True, **{self.multi_group_name: self.group_coord_names})
result = result.rename({self.multi_group_name: self.dim})
return result


class GroupbyError(Exception):
pass


class AssemblyModel(peewee.Model):
"""An AssemblyModel stores information about the canonical location where the data
for a DataAssembly is stored. """
name = peewee.CharField()
assembly_class = peewee.CharField()
stimulus_set = peewee.ForeignKeyField(StimulusSetModel, backref="assembly_models")

class Meta:
database = pwdb


class AssemblyStoreModel(peewee.Model):
"""An AssemblyStoreModel stores the location of a DataAssembly data file. """
assembly_type = peewee.CharField()
location_type = peewee.CharField()
location = peewee.CharField()
unique_name = peewee.CharField(unique=True, null=True, index=True)
sha1 = peewee.CharField(unique=True, null=True, index=True)

class Meta:
database = pwdb


class AssemblyStoreMap(peewee.Model):
"""An AssemblyStoreMap links an AssemblyRecord to an AssemblyStore. """
assembly_model = peewee.ForeignKeyField(AssemblyModel, backref="assembly_store_maps")
assembly_store_model = peewee.ForeignKeyField(AssemblyStoreModel, backref="assembly_store_maps")
role = peewee.CharField()

class Meta:
database = pwdb


class AssemblyLookupError(Exception):
pass


def lookup_assembly(name):
pwdb.connect(reuse_if_open=True)
try:
assy = AssemblyModel.get(AssemblyModel.name == name)
except AssemblyModel.DoesNotExist as e:
raise AssemblyLookupError("A DataAssembly named " + name + " was not found.")
return assy


def merge_data_arrays(data_arrays):
Expand Down
Loading

0 comments on commit ae86c96

Please sign in to comment.