Skip to content

Commit

Permalink
THINGS behavioral benchmark and odd-one-out model_helper (#434)
Browse files Browse the repository at this point in the history
* integrate @linus-md's model-tools PR#70

brain-score/model-tools#70

* Use new load functions

* Update

* Include working notebook

* Add TODOs

* More TODOs

* Clean up imports

* refactor to pass list

* Implement ``calculate_similarity_matrix()``

* Implement preliminary ``calculate_choices()``

* Move benchmark draft to this PR

* Update

* Update benchmark

* Update benchmark.py to 2.0 standards

* Update hebart2023/test.py to 2.0 standards

* Update hebart2023/__init__.py to 2.0 standards

* Update benchmark.py

* Package triplets in assembly

* Add triplet test

* Make data compatible with interface

* Update sample.ipynb

* Update sample.ipynb

* Package choices

* Update benchmark.py

* Update test.py

* Update test.py

* Update behavior.py

* Update test_behavior.py

* Update draft

* Update

* Update behavior.py

* Fixed similarit_matrix indexing

* Scores for 3333 triplets

* sort stimuli

* Update sorting

* Fix stimulus_paths

* Running benchmark

* Delete draft.ipynb

* Update benchmark.py

* Update test.py

* Update test_behavior.py

* Update behavior.py

* add sample triplet

* add vectorized numpy choice function to use with full stimulus set

* Revert "add vectorized numpy choice function to use with full stimulus set"

This reverts commit 5131544.

* Add tests

* Update benchmark and tests

* Finalize behavior tests

* Fix benchmark test

* Remove slow test

* Fix typo in tutorial

* Speed up alexnet test

* Trigger CI

* Update brainscore_vision/benchmarks/hebart2023/test.py

Co-authored-by: Martin Schrimpf <[email protected]>

* Update brainscore_vision/benchmarks/hebart2023/test.py

Co-authored-by: Martin Schrimpf <[email protected]>

* Update brainscore_vision/benchmarks/hebart2023/benchmark.py

Co-authored-by: Martin Schrimpf <[email protected]>

* Update __init__.py

* Update benchmark.py

* Update test.py

* Explain noise ceiling

* Fix ``test_benchmark_registry()``

* Update ceiling

* Update test.py

* Fix typo

* All tests passing again

* Update benchmark.py

* Update test.py

* Add missing import

* load assembly and stimulus set inside test methods

---------

Co-authored-by: Linus Sommer <[email protected]>
Co-authored-by: linus-md <[email protected]>
  • Loading branch information
3 people authored Mar 12, 2024
1 parent 5241295 commit 47ff835
Show file tree
Hide file tree
Showing 9 changed files with 281 additions and 21 deletions.
5 changes: 5 additions & 0 deletions brainscore_vision/benchmarks/hebart2023/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
from brainscore_vision import benchmark_registry
from .benchmark import Hebart2023Match

benchmark_registry['Hebart2023-match'] = Hebart2023Match

76 changes: 76 additions & 0 deletions brainscore_vision/benchmarks/hebart2023/benchmark.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
import numpy as np
import pandas as pd
from brainio.stimuli import StimulusSet
from brainscore_vision import load_dataset, load_stimulus_set
from brainscore_vision.benchmarks import BenchmarkBase
from brainscore_vision.benchmark_helpers.screen import place_on_screen
from brainscore_vision.model_interface import BrainModel
from brainscore_vision.metrics import Score

BIBTEX = """@article{10.7554/eLife.82580,
author = {Hebart, Martin N and Contier, Oliver and Teichmann, Lina and Rockter, Adam H and Zheng, Charles Y and Kidder, Alexis and Corriveau, Anna and Vaziri-Pashkam, Maryam and Baker, Chris I},
journal = {eLife},
month = {feb},
pages = {e82580},
title = {THINGS-data, a multimodal collection of large-scale datasets for investigating object representations in human brain and behavior},
volume = 12,
year = 2023
}"""

class Hebart2023Match(BenchmarkBase):
def __init__(self, similarity_measure='dot'):
self._visual_degrees = 8
self._number_of_trials = 1
self._assembly = load_dataset('Hebart2023')
self._stimulus_set = load_stimulus_set('Hebart2023')

# The noise ceiling was computed by averaging the percentage of participants
# who made the same choice for a given triplet. See the paper for more detail.
super().__init__(
identifier=f'Habart2023Match_{similarity_measure}', version=1,
ceiling_func=lambda: Score(0.6767),
parent='Hebart2023',
bibtex=BIBTEX
)

def set_number_of_triplets(self, n):
self._assembly = self._assembly[:n]

def __call__(self, candidate: BrainModel):
# Create the new StimulusSet
self.triplets = np.array([
self._assembly.coords["image_1"].values,
self._assembly.coords["image_2"].values,
self._assembly.coords["image_3"].values
]).T.reshape(-1, 1)

stimuli_data = [self._stimulus_set.loc[stim] for stim in self.triplets]
stimuli = pd.concat(stimuli_data)
stimuli.columns = self._stimulus_set.columns

stimuli = StimulusSet(stimuli)
stimuli.identifier = 'Hebart2023'
stimuli.stimulus_paths = self._stimulus_set.stimulus_paths
stimuli['stimulus_id'] = stimuli['stimulus_id'].astype(int)

# Prepare the stimuli
candidate.start_task(BrainModel.Task.odd_one_out)
stimuli = place_on_screen(
stimulus_set=stimuli,
target_visual_degrees=candidate.visual_degrees(),
source_visual_degrees=self._visual_degrees
)

# Run the model
choices = candidate.look_at(stimuli, self._number_of_trials)

# Score the model
# We chose not to compute error estimates but you could compute them
# by spliting the data into five folds and computing the standard deviation.
correct_choices = choices.values == self._assembly.coords["image_3"].values
raw_score = np.sum(correct_choices)/len(choices)
score = (raw_score - 1/3)/(self.ceiling - 1/3)
score = max(0, score)
score.attrs['raw'] = raw_score
score.attrs['ceiling'] = self.ceiling
return score
17 changes: 17 additions & 0 deletions brainscore_vision/benchmarks/hebart2023/test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
import pytest

from brainscore_vision import load_benchmark, load_model

@pytest.mark.private_access
def test_ceiling():
benchmark = load_benchmark('Hebart2023-match')
ceiling = benchmark.ceiling
assert ceiling == pytest.approx(0.6767, abs=0.0001)

@pytest.mark.private_access
def test_alexnet_consistency():
benchmark = load_benchmark('Hebart2023-match')
benchmark.set_number_of_triplets(n=1000)
model = load_model('alexnet')
score = benchmark(model)
assert score == pytest.approx(0.38, abs=0.02)
34 changes: 19 additions & 15 deletions brainscore_vision/data/hebart2023/test.py
Original file line number Diff line number Diff line change
@@ -1,38 +1,42 @@
import numpy as np
import pytest

import brainscore
from brainio.stimuli import StimulusSet

from brainscore_vision import load_stimulus_set, load_dataset
from brainscore_vision.model_interface import BehavioralAssembly


@pytest.mark.memory_intense
@pytest.mark.private_access
class TestHebart2023:
assembly = brainscore.get_assembly('Hebart2023')
stimulus_set = brainscore.get_stimulus_set("Hebart2023")

def test_assembly(self):
stimulus_id = self.assembly.coords["stimulus_id"]
triplet_id = self.assembly.coords["triplet_id"]
assembly = load_dataset('Hebart2023')

stimulus_id = assembly.coords["stimulus_id"]
triplet_id = assembly.coords["triplet_id"]
assert len(stimulus_id) == len(triplet_id) == 453642
assert len(np.unique(stimulus_id)) == 1854

image_1 = self.assembly.coords["image_1"]
image_2 = self.assembly.coords["image_2"]
image_3 = self.assembly.coords["image_3"]
image_1 = assembly.coords["image_1"]
image_2 = assembly.coords["image_2"]
image_3 = assembly.coords["image_3"]
assert len(image_1) == len(image_2) == len(image_3) == 453642

def test_assembly_stimulusset_ids_match(self):
stimulusset_ids = self.stimulus_set['stimulus_id']
stimulus_set = load_stimulus_set("Hebart2023")
assembly = load_dataset('Hebart2023')

stimulusset_ids = stimulus_set['stimulus_id']
for assembly_stimulusid in ['image_1', 'image_2', 'image_3']:
assembly_values = self.assembly[assembly_stimulusid].values
assembly_values = assembly[assembly_stimulusid].values
assert set(assembly_values) == set(stimulusset_ids), \
f"Assembly stimulus id reference '{assembly_stimulusid}' does not match stimulus_set"

def test_stimulus_set(self):
assert len(self.stimulus_set) == 1854
stimulus_set = load_stimulus_set("Hebart2023")
assert len(stimulus_set) == 1854
assert {'unique_id', 'stimulus_id', 'filename',
'WordNet_ID', 'Wordnet_ID2', 'Wordnet_ID3', 'Wordnet_ID4', 'WordNet_synonyms',
'freq_1', 'freq_2', 'top_down_1', 'top_down_2', 'bottom_up', 'word_freq', 'word_freq_online',
'example_image', 'dispersion', 'dominant_part', 'rank'} == set(self.stimulus_set.columns)
assert isinstance(self.stimulus_set, StimulusSet)
'example_image', 'dispersion', 'dominant_part', 'rank'} == set(stimulus_set.columns)
assert isinstance(stimulus_set, StimulusSet)
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from brainscore_vision.model_helpers.brain_transformation.temporal import TemporalIgnore
from brainscore_vision.model_interface import BrainModel
from brainscore_vision.utils import LazyLoad
from .behavior import BehaviorArbiter, LabelBehavior, ProbabilitiesMapping
from .behavior import BehaviorArbiter, LabelBehavior, ProbabilitiesMapping, OddOneOut
from .neural import LayerMappedModel, LayerSelection, LayerScores

STANDARD_REGION_BENCHMARKS = {
Expand Down Expand Up @@ -44,8 +44,12 @@ def __init__(self, identifier,
behavioral_readout_layer = behavioral_readout_layer or layers[-1]
probabilities_behavior = ProbabilitiesMapping(identifier=identifier, activations_model=activations_model,
layer=behavioral_readout_layer)
odd_one_out = OddOneOut(identifier=identifier, activations_model=activations_model,
layer=behavioral_readout_layer)
self.behavior_model = BehaviorArbiter({BrainModel.Task.label: logits_behavior,
BrainModel.Task.probabilities: probabilities_behavior})
BrainModel.Task.probabilities: probabilities_behavior,
BrainModel.Task.odd_one_out: odd_one_out,
})
self.do_behavior = False

def visual_degrees(self) -> int:
Expand Down
92 changes: 90 additions & 2 deletions brainscore_vision/model_helpers/brain_transformation/behavior.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,15 @@
import os
from collections import OrderedDict
from typing import Union, List

import numpy as np
import os
import pandas as pd
import xarray as xr
import sklearn.linear_model
import sklearn.multioutput

from brainio.assemblies import walk_coords, array_is_element, BehavioralAssembly
from brainio.assemblies import walk_coords, array_is_element, BehavioralAssembly, DataAssembly
from brainio.stimuli import StimulusSet
from brainscore_vision.model_helpers.utils import make_list
from brainscore_vision.model_interface import BrainModel

Expand Down Expand Up @@ -215,3 +219,87 @@ def labels_to_indices(self, labels):
indices.append(label2index[label])
index2label = OrderedDict((index, label) for label, index in label2index.items())
return indices, index2label


class OddOneOut(BrainModel):
def __init__(self, identifier: str, activations_model, layer: Union[str, List[str]]):
"""
:param identifier: a string to identify the model
:param activations_model: the model from which to retrieve representations for stimuli
:param layer: the single behavioral readout layer or a list of layers to read out of.
"""
self._identifier = identifier
self.activations_model = activations_model
self.readout = make_list(layer)
self.current_task = BrainModel.Task.odd_one_out
self.similarity_measure = 'dot'

@property
def identifier(self):
return self._identifier

def start_task(self, task: BrainModel.Task):
assert task == BrainModel.Task.odd_one_out
self.current_task = task

def look_at(self, triplets, number_of_trials=1):
# Compute unique features and image_pathst
stimuli = triplets.drop_duplicates(subset=['stimulus_id'])
stimuli = stimuli.sort_values(by='stimulus_id')

# Get features
features = self.activations_model(stimuli, layers=self.readout)
features = features.transpose('presentation', 'neuroid')

# Compute similarity matrix
similarity_matrix = self.calculate_similarity_matrix(features)

# Compute choices
triplets = np.array(triplets["stimulus_id"])
assert len(triplets) % 3 == 0, "No. of stimuli must be a multiple of 3"
choices = self.calculate_choices(similarity_matrix, triplets)

# Return choices
choices = BehavioralAssembly(
choices,
coords={'stimulus_id': triplets[2::3]},
dims=['stimulus_id'])

return choices

def set_similarity_measure(self, similarity_measure):
self.similarity_measure = similarity_measure

def calculate_similarity_matrix(self, features):
features = features.transpose('presentation', 'neuroid')
values = features.values
if self.similarity_measure == 'dot':
similarity_matrix = np.dot(values, np.transpose(values))
elif self.similarity_measure == 'cosine':
row_norms = np.linalg.norm(values, axis=1).reshape(-1, 1)
norm_product = np.dot(row_norms, row_norms.T)
dot_product = np.dot(values, np.transpose(values))
similarity_matrix = dot_product / norm_product
else:
raise ValueError(
f"Unknown similarity_measure {self.similarity_measure} -- expected one of 'dot' or 'cosine'")

similarity_matrix = DataAssembly(similarity_matrix, coords={
**{f"{coord}_left": ('presentation_left', values) for coord, _, values in
walk_coords(features['presentation'])},
**{f"{coord}_right": ('presentation_right', values) for coord, _, values in
walk_coords(features['presentation'])}
}, dims=['presentation_left', 'presentation_right'])
return similarity_matrix

def calculate_choices(self, similarity_matrix, triplets):
triplets = np.array(triplets).reshape(-1, 3)
choice_predictions = []
for triplet in triplets:
i, j, k = triplet
sims = [similarity_matrix.sel(stimulus_id_left=i, stimulus_id_right=j),
similarity_matrix.sel(stimulus_id_left=i, stimulus_id_right=k),
similarity_matrix.sel(stimulus_id_left=j, stimulus_id_right=k)]
idx = triplet[2 - np.argmax(sims)]
choice_predictions.append(idx)
return choice_predictions
4 changes: 2 additions & 2 deletions docs/source/modules/benchmark_tutorial.rst
Original file line number Diff line number Diff line change
Expand Up @@ -129,8 +129,8 @@ Here is an example of a BehavioralAssembly:
== len(set(assembly['distractor_object'].values)) == 2
# upload to S3
package_data_assembly(assembly, assembly_identifier=assembly.name, ,
assembly_class='BehavioralAssembly'
package_data_assembly(assembly, assembly_identifier=assembly.name,
assembly_class='BehavioralAssembly',
stimulus_set_identifier=stimuli.name) # link to the StimulusSet
In our experience, it is generally a good idea to include as much metadata as possible (on both StimulusSet and
Expand Down
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
66 changes: 66 additions & 0 deletions tests/test_model_helpers/brain_transformation/test_behavior.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import pytest
from pytest import approx

import brainscore_vision
from brainio.assemblies import BehavioralAssembly
from brainio.stimuli import StimulusSet
from brainscore_vision.benchmark_helpers.screen import place_on_screen
Expand Down Expand Up @@ -79,6 +80,14 @@ def mock_stimulus_set():
stimuli.identifier = 'TestLabelBehavior.rgb_1_2'
return stimuli

def mock_triplet():
stimuli = StimulusSet({'stimulus_id': ['1', '2', '3'], 'filename': ['rgb1', 'rgb2', 'rgb3']})
stimuli.stimulus_paths = {'1': os.path.join(os.path.dirname(__file__), 'rgb1.jpg'),
'2': os.path.join(os.path.dirname(__file__), 'rgb2.jpg'),
'3': os.path.join(os.path.dirname(__file__), 'rgb3.jpg')}
stimuli.identifier = 'TestLabelBehavior.rgb_1_2_3'
return stimuli


class TestLogitsBehavior:
"""
Expand Down Expand Up @@ -125,3 +134,60 @@ def test_creates_probabilities(self):
probabilities.sel(stimulus_id='rgb2', choice='label2').values)
assert probabilities.sel(stimulus_id='rgb1', choice='label1') + \
probabilities.sel(stimulus_id='rgb1', choice='label2') == approx(1)


class TestOddOneOut:
def test_import(self):
from brainscore_vision.model_helpers.brain_transformation.behavior import OddOneOut

def test_dot(self):
from brainscore_vision.model_helpers.brain_transformation.behavior import OddOneOut

# Set up the task
activations_model = pytorch_custom()
brain_model = ModelCommitment(
identifier=activations_model.identifier,
activations_model=activations_model,
layers=["relu2"],
behavioral_readout_layer='relu2')

brain_model = OddOneOut(identifier='pytorch-custom',
activations_model=activations_model,
layer=["relu2"])

# Test similarity measure functionality
assert brain_model.similarity_measure == 'dot'

# Test the task and output
stimuli = mock_triplet()
brain_model.start_task(BrainModel.Task.odd_one_out)
choice = brain_model.look_at(stimuli)
assert isinstance(choice, BehavioralAssembly)
assert len(choice.values) == 1

def test_cosine(self):
from brainscore_vision.model_helpers.brain_transformation.behavior import OddOneOut

# Set up the task
activations_model = pytorch_custom()
brain_model = ModelCommitment(
identifier=activations_model.identifier,
activations_model=activations_model,
layers=["relu2"],
behavioral_readout_layer='relu2')

brain_model = OddOneOut(identifier='pytorch-custom',
activations_model=activations_model,
layer=["relu2"])

# Test similarity measure functionality
brain_model.set_similarity_measure('cosine')
assert brain_model.similarity_measure == 'cosine'

# Test the task and output
stimuli = mock_triplet()
brain_model.start_task(BrainModel.Task.odd_one_out)
choice = brain_model.look_at(stimuli)
assert isinstance(choice, BehavioralAssembly)
assert len(choice.values) == 1

0 comments on commit 47ff835

Please sign in to comment.