From 4e308c04762c7cca610adfc83695f15328ffed06 Mon Sep 17 00:00:00 2001 From: Ben Lonnqvist Date: Wed, 4 Sep 2024 17:29:16 +0200 Subject: [PATCH 01/29] add wordnet-mapped indices for Coggan 2024 --- .../model_helpers/brain_transformation/behavior.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/brainscore_vision/model_helpers/brain_transformation/behavior.py b/brainscore_vision/model_helpers/brain_transformation/behavior.py index 2c01012a0..89f8a4ff3 100644 --- a/brainscore_vision/model_helpers/brain_transformation/behavior.py +++ b/brainscore_vision/model_helpers/brain_transformation/behavior.py @@ -154,6 +154,14 @@ class LabelToImagenetIndices: shovel_indices = [792] # truck indices used as defined by Geirhos et al., 2021. + # added from the Coggan 2024 behavioral benchmark: + bison_indices = [347] + hare_indices = [331] + jeep_indices = [609] + teapot_indices = [849] + + # car, bear, lamp, elephant + @classmethod def label_to_indices(cls, label): # for handling multi-word labels given by models or benchmarks From fc2de9dd5e4331df246b4c0b71faea97ef40fa18 Mon Sep 17 00:00:00 2001 From: Ben Lonnqvist Date: Wed, 4 Sep 2024 17:38:37 +0200 Subject: [PATCH 02/29] update comment about other indices --- .../model_helpers/brain_transformation/behavior.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/brainscore_vision/model_helpers/brain_transformation/behavior.py b/brainscore_vision/model_helpers/brain_transformation/behavior.py index 89f8a4ff3..3e036bf25 100644 --- a/brainscore_vision/model_helpers/brain_transformation/behavior.py +++ b/brainscore_vision/model_helpers/brain_transformation/behavior.py @@ -160,7 +160,8 @@ class LabelToImagenetIndices: jeep_indices = [609] teapot_indices = [849] - # car, bear, lamp, elephant + # car, bear, and elephant indices used as defined by Geirhos et al., 2021. + # lamp indices used as defined by the Scialom2024 benchmark @classmethod def label_to_indices(cls, label): From 118141723b187864ac3a501389f06d4dabacc005 Mon Sep 17 00:00:00 2001 From: Ben Lonnqvist Date: Thu, 5 Sep 2024 11:54:36 +0200 Subject: [PATCH 03/29] add labeling benchmark and engineering benchmark --- .../coggan2024_behavior/benchmark.py | 61 +++++++++++++++++-- 1 file changed, 57 insertions(+), 4 deletions(-) diff --git a/brainscore_vision/benchmarks/coggan2024_behavior/benchmark.py b/brainscore_vision/benchmarks/coggan2024_behavior/benchmark.py index 14db4121f..c00a52335 100644 --- a/brainscore_vision/benchmarks/coggan2024_behavior/benchmark.py +++ b/brainscore_vision/benchmarks/coggan2024_behavior/benchmark.py @@ -1,12 +1,10 @@ # Created by David Coggan on 2024 06 25 import numpy as np -from brainio.assemblies import DataAssembly, BehavioralAssembly -from brainscore_vision import load_stimulus_set, load_dataset +from brainscore_vision import load_stimulus_set, load_dataset, load_metric from brainscore_vision.benchmarks import BenchmarkBase from brainscore_vision.benchmark_helpers.screen import place_on_screen from brainscore_core.metrics import Score -from brainscore_vision.metric_helpers import Defaults as XarrayDefaults from brainscore_vision.model_interface import BrainModel from brainscore_vision.utils import LazyLoad from scipy.stats import sem @@ -21,6 +19,39 @@ url = {}, journal = {in prep}}""" + +class Coggan2024_behavior_ConditionWiseLabelingAccuracySimilarity(BenchmarkBase): + def __init__(self): + self._metric = load_metric('conditionwise_accuracy_distance') + self._fitting_stimuli = load_stimulus_set('Coggan2024_behavior_fitting') # this fails is wrapped by LazyLoad + self._assembly = LazyLoad(lambda: load_dataset('Coggan2024_behavior')) + self._visual_degrees = 10 + self._number_of_trials = 1 + self._ceiling_func = lambda assembly: get_noise_ceiling(assembly) + super(Coggan2024_behavior_ConditionWiseLabelingAccuracySimilarity, self).__init__( + identifier='tong.Coggan2024_behavior-LabelingConditionWiseAccuracySimilarity', + version=1, + ceiling_func=lambda: self._metric.ceiling(self._assembly), + parent='behavior', + bibtex=BIBTEX, + ) + + def __call__(self, candidate: BrainModel): + choice_labels = set(self._assembly['object_class'].values) + choice_labels = list(sorted(choice_labels)) + candidate.start_task(BrainModel.Task.label, choice_labels) + stimulus_set = place_on_screen(self._assembly.stimulus_set, + target_visual_degrees=candidate.visual_degrees(), + source_visual_degrees=self._visual_degrees) + labels = candidate.look_at(stimulus_set, number_of_trials=self._number_of_trials) + raw_score = self._metric(labels, self._assembly) + ceiling = self.ceiling + score = raw_score / ceiling + score.attrs['raw'] = raw_score + score.attrs['ceiling'] = ceiling + return score + + class Coggan2024_behavior_ConditionWiseAccuracySimilarity(BenchmarkBase): """ @@ -125,5 +156,27 @@ def ceiler(score: Score, ceiling: Score) -> Score: return ceiled_score +class Coggan2024_behavior_ConditionWiseLabelingEngineeringAccuracy(BenchmarkBase): + def __init__(self): + self._metric = load_metric('accuracy') + self._ceiling_func = lambda assembly: get_noise_ceiling(assembly) + self._stimulus_set = load_dataset('Coggan2024_behavior').stimulus_set + super(Coggan2024_behavior_ConditionWiseLabelingEngineeringAccuracy, self).__init__( + identifier='tong.Coggan2024_behavior-LabelingConditionWiseEngineeringAccuracy', + version=1, + ceiling_func=lambda: Score(1), + parent='Coggan2024-top1', + bibtex=BIBTEX, + ) - + def __call__(self, candidate: BrainModel): + choice_labels = set(self._assembly['object_class'].values) + choice_labels = list(sorted(choice_labels)) + candidate.start_task(BrainModel.Task.label, choice_labels) + labels = candidate.look_at(self._stimulus_set) + raw_score = self._metric(labels, self._stimulus_set['object_class'].values) + ceiling = self.ceiling + score = raw_score / ceiling + score.attrs['raw'] = raw_score + score.attrs['ceiling'] = ceiling + return score \ No newline at end of file From 5cd3df42808225186da41a7305bbc91689ec2b27 Mon Sep 17 00:00:00 2001 From: Ben Lonnqvist Date: Tue, 1 Oct 2024 11:12:23 +0200 Subject: [PATCH 04/29] hook up benchmark to the new accuracy distance metric --- brainscore_vision/benchmarks/coggan2024_behavior/benchmark.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/brainscore_vision/benchmarks/coggan2024_behavior/benchmark.py b/brainscore_vision/benchmarks/coggan2024_behavior/benchmark.py index 6e1d019d1..8e7a0b50f 100644 --- a/brainscore_vision/benchmarks/coggan2024_behavior/benchmark.py +++ b/brainscore_vision/benchmarks/coggan2024_behavior/benchmark.py @@ -22,7 +22,7 @@ class Coggan2024_behavior_ConditionWiseLabelingAccuracySimilarity(BenchmarkBase): def __init__(self): - self._metric = load_metric('conditionwise_accuracy_distance') + self._metric = load_metric('accuracy_distance') self._fitting_stimuli = load_stimulus_set('Coggan2024_behavior_fitting') # this fails is wrapped by LazyLoad self._assembly = LazyLoad(lambda: load_dataset('Coggan2024_behavior')) self._visual_degrees = 10 @@ -44,7 +44,7 @@ def __call__(self, candidate: BrainModel): target_visual_degrees=candidate.visual_degrees(), source_visual_degrees=self._visual_degrees) labels = candidate.look_at(stimulus_set, number_of_trials=self._number_of_trials) - raw_score = self._metric(labels, self._assembly) + raw_score = self._metric(labels, self._assembly, variables=['occluder_type', 'occluder_color']) ceiling = self.ceiling score = raw_score / ceiling score.attrs['raw'] = raw_score From 08cf2364e2f1bc6ddbe60110ce34fa84e2103074 Mon Sep 17 00:00:00 2001 From: Ben Lonnqvist Date: Tue, 1 Oct 2024 11:15:18 +0200 Subject: [PATCH 05/29] fix reference to the benchmark --- .../coggan2024_behavior/benchmark.py | 57 ++++++++++--------- 1 file changed, 29 insertions(+), 28 deletions(-) diff --git a/brainscore_vision/benchmarks/coggan2024_behavior/benchmark.py b/brainscore_vision/benchmarks/coggan2024_behavior/benchmark.py index 8e7a0b50f..275acebe0 100644 --- a/brainscore_vision/benchmarks/coggan2024_behavior/benchmark.py +++ b/brainscore_vision/benchmarks/coggan2024_behavior/benchmark.py @@ -52,8 +52,35 @@ def __call__(self, candidate: BrainModel): return score -class Coggan2024_behavior_ConditionWiseAccuracySimilarity(BenchmarkBase): +class Coggan2024_behavior_ConditionWiseLabelingEngineeringAccuracy(BenchmarkBase): + def __init__(self): + self._metric = load_metric('accuracy') + self._ceiling_func = lambda assembly: get_noise_ceiling(assembly) + self._stimulus_set = load_dataset('Coggan2024_behavior').stimulus_set + super(Coggan2024_behavior_ConditionWiseLabelingEngineeringAccuracy, self).__init__( + identifier='tong.Coggan2024_behavior-LabelingConditionWiseEngineeringAccuracy', + version=1, + ceiling_func=lambda: Score(1), + parent='Coggan2024-top1', + bibtex=BIBTEX, + ) + + def __call__(self, candidate: BrainModel): + choice_labels = set(self._assembly['object_class'].values) + choice_labels = list(sorted(choice_labels)) + candidate.start_task(BrainModel.Task.label, choice_labels) + labels = candidate.look_at(self._stimulus_set) + raw_score = self._metric(labels, self._stimulus_set['object_class'].values) + ceiling = self.ceiling + score = raw_score / ceiling + score.attrs['raw'] = raw_score + score.attrs['ceiling'] = ceiling + return score + +class Coggan2024_behavior_ConditionWiseAccuracySimilarity_Correlation(BenchmarkBase): + ### DEPRECATED IN FAVOR OF Coggan2024_behavior_ConditionWiseLabelingAccuracySimilarity + ### Here for future comparison/reference/proofing """ This benchmark measures classification accuracy for a set of occluded object images, then attains the mean accuracy for each of the 18 occlusion conditions. This is then correlated with the corresponding accuracies for each of the @@ -68,7 +95,7 @@ def __init__(self): self._visual_degrees = 10 self._number_of_trials = 1 self._ceiling_func = lambda assembly: get_noise_ceiling(assembly) - super(Coggan2024_behavior_ConditionWiseAccuracySimilarity, self).__init__( + super(Coggan2024_behavior_ConditionWiseAccuracySimilarity_Correlation, self).__init__( identifier='tong.Coggan2024_behavior-ConditionWiseAccuracySimilarity', version=1, ceiling_func=lambda df: get_noise_ceiling(df), @@ -158,29 +185,3 @@ def ceiler(score: Score, ceiling: Score) -> Score: ceiled_score.attrs[Score.RAW_VALUES_KEY] = score ceiled_score.attrs['ceiling'] = ceiling return ceiled_score - - -class Coggan2024_behavior_ConditionWiseLabelingEngineeringAccuracy(BenchmarkBase): - def __init__(self): - self._metric = load_metric('accuracy') - self._ceiling_func = lambda assembly: get_noise_ceiling(assembly) - self._stimulus_set = load_dataset('Coggan2024_behavior').stimulus_set - super(Coggan2024_behavior_ConditionWiseLabelingEngineeringAccuracy, self).__init__( - identifier='tong.Coggan2024_behavior-LabelingConditionWiseEngineeringAccuracy', - version=1, - ceiling_func=lambda: Score(1), - parent='Coggan2024-top1', - bibtex=BIBTEX, - ) - - def __call__(self, candidate: BrainModel): - choice_labels = set(self._assembly['object_class'].values) - choice_labels = list(sorted(choice_labels)) - candidate.start_task(BrainModel.Task.label, choice_labels) - labels = candidate.look_at(self._stimulus_set) - raw_score = self._metric(labels, self._stimulus_set['object_class'].values) - ceiling = self.ceiling - score = raw_score / ceiling - score.attrs['raw'] = raw_score - score.attrs['ceiling'] = ceiling - return score \ No newline at end of file From 28ff288ea9b5eceb2461baeff9ef46217a2c60eb Mon Sep 17 00:00:00 2001 From: Ben Lonnqvist Date: Tue, 1 Oct 2024 14:50:49 +0200 Subject: [PATCH 06/29] add sports car indices --- .../benchmarks/coggan2024_behavior/__init__.py | 6 ++---- .../benchmarks/coggan2024_behavior/benchmark.py | 5 +++++ .../model_helpers/brain_transformation/behavior.py | 1 + 3 files changed, 8 insertions(+), 4 deletions(-) diff --git a/brainscore_vision/benchmarks/coggan2024_behavior/__init__.py b/brainscore_vision/benchmarks/coggan2024_behavior/__init__.py index a6ca7c28c..86e868958 100644 --- a/brainscore_vision/benchmarks/coggan2024_behavior/__init__.py +++ b/brainscore_vision/benchmarks/coggan2024_behavior/__init__.py @@ -1,8 +1,6 @@ # Created by David Coggan on 2024 06 25 from brainscore_vision import benchmark_registry -from .benchmark import ( - Coggan2024_behavior_ConditionWiseAccuracySimilarity) +from .benchmark import Coggan2024_behavior_ConditionWiseLabelingAccuracySimilarity -benchmark_registry['Coggan2024_behavior-ConditionWiseAccuracySimilarity'] = ( - Coggan2024_behavior_ConditionWiseAccuracySimilarity) +benchmark_registry['Coggan2024_behavior-ConditionWiseAccuracySimilarity'] = Coggan2024_behavior_ConditionWiseLabelingAccuracySimilarity diff --git a/brainscore_vision/benchmarks/coggan2024_behavior/benchmark.py b/brainscore_vision/benchmarks/coggan2024_behavior/benchmark.py index 275acebe0..d5dbd6a9f 100644 --- a/brainscore_vision/benchmarks/coggan2024_behavior/benchmark.py +++ b/brainscore_vision/benchmarks/coggan2024_behavior/benchmark.py @@ -53,6 +53,11 @@ def __call__(self, candidate: BrainModel): class Coggan2024_behavior_ConditionWiseLabelingEngineeringAccuracy(BenchmarkBase): + # TODO: run locally + # TODO: check data format: need sports car indices => just the same as car + # TODO: is metric working? + # TODO: correct ish scores? + # TODO: tests? def __init__(self): self._metric = load_metric('accuracy') self._ceiling_func = lambda assembly: get_noise_ceiling(assembly) diff --git a/brainscore_vision/model_helpers/brain_transformation/behavior.py b/brainscore_vision/model_helpers/brain_transformation/behavior.py index 3e036bf25..af34d11e8 100644 --- a/brainscore_vision/model_helpers/brain_transformation/behavior.py +++ b/brainscore_vision/model_helpers/brain_transformation/behavior.py @@ -159,6 +159,7 @@ class LabelToImagenetIndices: hare_indices = [331] jeep_indices = [609] teapot_indices = [849] + sportscar_indices = [436, 511, 817] # car, bear, and elephant indices used as defined by Geirhos et al., 2021. # lamp indices used as defined by the Scialom2024 benchmark From 0c08c8dcb41d72ff7a3f76d4ded3d27a6c45cb82 Mon Sep 17 00:00:00 2001 From: Ben Lonnqvist Date: Wed, 2 Oct 2024 16:03:44 +0200 Subject: [PATCH 07/29] fix dimension bug, ceiling estimate, and uneven design metric limitations --- .../metrics/accuracy_distance/metric.py | 90 ++++++++++++------- 1 file changed, 59 insertions(+), 31 deletions(-) diff --git a/brainscore_vision/metrics/accuracy_distance/metric.py b/brainscore_vision/metrics/accuracy_distance/metric.py index eb47e3bba..28201f305 100644 --- a/brainscore_vision/metrics/accuracy_distance/metric.py +++ b/brainscore_vision/metrics/accuracy_distance/metric.py @@ -1,4 +1,6 @@ import itertools +from functools import reduce +import operator import numpy as np @@ -20,41 +22,16 @@ class AccuracyDistance(Metric): more target-like pattern of performance across conditions. """ def __call__(self, source: BehavioralAssembly, target: - BehavioralAssembly, variables: tuple=()) -> Score: + BehavioralAssembly, variables: tuple = ()) -> Score: """Target should be the entire BehavioralAssembly, containing truth values.""" subjects = self.extract_subjects(target) subject_scores = [] for subject in subjects: subject_assembly = target.sel(subject=subject) - - # compute single score across the entire dataset - if len(variables) == 0: - subject_score = self.compare_single_subject(source, subject_assembly) - - # compute scores for each condition, then average - else: - cond_scores = [] - - # get iterator across all combinations of variables - if len(variables) == 1: - conditions = set(subject_assembly[variables[0]].values) - conditions = [[c] for c in conditions] # to mimic itertools.product - else: - conditions = itertools.product( - *[set(subject_assembly[v].values) for v in variables]) - - # loop over conditions and compute scores - for cond in conditions: - indexers = {v: cond[i] for i, v in enumerate(variables)} - subject_cond_assembly = subject_assembly.sel(**indexers) - source_cond_assembly = source.sel(**indexers) - # to accomodate unbalanced designs, skip combinations of - # variables that don't exist in both assemblies - if len(subject_cond_assembly) and len(source_cond_assembly): - cond_scores.append(self.compare_single_subject( - source_cond_assembly, subject_cond_assembly)) - subject_score = Score(np.mean(cond_scores)) + subject_score = self.condition_filtered_score_per_subject_source_pair(source=source, + subject=subject_assembly, + variables=variables) subject_score = subject_score.expand_dims('subject') subject_score['subject'] = 'subject', [subject] @@ -89,13 +66,16 @@ def compare_single_subject(self, source: BehavioralAssembly, target: BehavioralA return Score(relative_distance) - def ceiling(self, assembly): + def ceiling(self, assembly, variables = ()): subjects = self.extract_subjects(assembly) subject_scores = [] for subject1, subject2 in itertools.combinations(subjects, 2): subject1_assembly = assembly.sel(subject=subject1) subject2_assembly = assembly.sel(subject=subject2) - pairwise_score = self.compare_single_subject(subject1_assembly, subject2_assembly) + + pairwise_score = self.condition_filtered_score_per_subject_source_pair( + subject1_assembly, subject2_assembly, variables=variables) + pairwise_score = pairwise_score.expand_dims('subject') pairwise_score['subject_left'] = 'subject', [subject1] pairwise_score['subject_right'] = 'subject', [subject2] @@ -107,3 +87,51 @@ def ceiling(self, assembly): def extract_subjects(self, assembly): return list(sorted(set(assembly['subject'].values))) + + def condition_filtered_score_per_subject_source_pair(self, source, subject, variables): + # compute single score across the entire dataset + if len(variables) == 0: + subject_score = self.compare_single_subject(source, subject) + + # compute scores for each condition, then average + else: + cond_scores = [] + # get iterator across all combinations of variables + if len(variables) == 1: + conditions = set(subject[variables[0]].values) + conditions = [[c] for c in conditions] # to mimic itertools.product + else: + conditions = itertools.product( + *[set(subject[v].values) for v in variables]) + + # loop over conditions and compute scores + for cond in conditions: + # filter assemblies for selected condition + subject_cond_assembly = self.get_condition_filtered_assembly(subject, variables, cond) + source_cond_assembly = self.get_condition_filtered_assembly(source, variables, cond) + # select only the values in source_cond_assembly that has the same 'stimulus_id' as + # subject_cond_assembly to accommodate comparisons where not all subjects saw all the same stimuli + stimulus_id_mask = source_cond_assembly['stimulus_id'].isin( + subject_cond_assembly['stimulus_id'].values) + source_cond_assembly = source_cond_assembly.where(stimulus_id_mask, drop=True) + # to accomodate cases where not all conditions are present in both assemblies, filter out + # calculation of the metric for cases where the + if len(subject_cond_assembly) and len(source_cond_assembly): + cond_scores.append(self.compare_single_subject( + source_cond_assembly, subject_cond_assembly)) + + subject_score = Score(np.mean(cond_scores)) + return subject_score + + @staticmethod + def get_condition_filtered_assembly(assembly, variables, cond): + # get the indexers for the condition + indexers = {v: cond[i] for i, v in enumerate(variables)} + # convert indexers into a list of boolean arrays for the assembly values + assembly_indexers = [(assembly[key] == value) for key, value in indexers.items()] + # combine the different conditions into an AND statement to require all conditions simultaneously + condition = reduce(operator.and_, assembly_indexers) + # filter the assembly based on the condition + condition_filtered_assembly = assembly.where(condition, drop=True) + return condition_filtered_assembly + From 9d5cf936ae96801a91dce865a34ee465fe62e3d5 Mon Sep 17 00:00:00 2001 From: Ben Lonnqvist Date: Wed, 2 Oct 2024 16:04:50 +0200 Subject: [PATCH 08/29] add coord truth to the assembly to conform to the labeling task --- .../benchmarks/coggan2024_behavior/benchmark.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/brainscore_vision/benchmarks/coggan2024_behavior/benchmark.py b/brainscore_vision/benchmarks/coggan2024_behavior/benchmark.py index d5dbd6a9f..fa0eca8bd 100644 --- a/brainscore_vision/benchmarks/coggan2024_behavior/benchmark.py +++ b/brainscore_vision/benchmarks/coggan2024_behavior/benchmark.py @@ -25,6 +25,8 @@ def __init__(self): self._metric = load_metric('accuracy_distance') self._fitting_stimuli = load_stimulus_set('Coggan2024_behavior_fitting') # this fails is wrapped by LazyLoad self._assembly = LazyLoad(lambda: load_dataset('Coggan2024_behavior')) + self._assembly['truth'] = self._assembly['object_class'] # the assembly is missing a 'truth' column which is + # required by the labeling task self._visual_degrees = 10 self._number_of_trials = 1 self._ceiling_func = lambda assembly: get_noise_ceiling(assembly) @@ -44,7 +46,7 @@ def __call__(self, candidate: BrainModel): target_visual_degrees=candidate.visual_degrees(), source_visual_degrees=self._visual_degrees) labels = candidate.look_at(stimulus_set, number_of_trials=self._number_of_trials) - raw_score = self._metric(labels, self._assembly, variables=['occluder_type', 'occluder_color']) + raw_score = self._metric(labels, self._assembly, variables=['occluder_type', 'visibility', 'occluder_color']) ceiling = self.ceiling score = raw_score / ceiling score.attrs['raw'] = raw_score @@ -53,11 +55,6 @@ def __call__(self, candidate: BrainModel): class Coggan2024_behavior_ConditionWiseLabelingEngineeringAccuracy(BenchmarkBase): - # TODO: run locally - # TODO: check data format: need sports car indices => just the same as car - # TODO: is metric working? - # TODO: correct ish scores? - # TODO: tests? def __init__(self): self._metric = load_metric('accuracy') self._ceiling_func = lambda assembly: get_noise_ceiling(assembly) From d8892ac3fd342ba5e2047b93d5d4bc9204f19551 Mon Sep 17 00:00:00 2001 From: Ben Lonnqvist Date: Wed, 2 Oct 2024 16:08:25 +0200 Subject: [PATCH 09/29] add a comment --- brainscore_vision/metrics/accuracy_distance/metric.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/brainscore_vision/metrics/accuracy_distance/metric.py b/brainscore_vision/metrics/accuracy_distance/metric.py index 28201f305..72fe2fb34 100644 --- a/brainscore_vision/metrics/accuracy_distance/metric.py +++ b/brainscore_vision/metrics/accuracy_distance/metric.py @@ -115,7 +115,7 @@ def condition_filtered_score_per_subject_source_pair(self, source, subject, vari subject_cond_assembly['stimulus_id'].values) source_cond_assembly = source_cond_assembly.where(stimulus_id_mask, drop=True) # to accomodate cases where not all conditions are present in both assemblies, filter out - # calculation of the metric for cases where the + # calculation of the metric for cases where either assembly has no matches to variables (empty) if len(subject_cond_assembly) and len(source_cond_assembly): cond_scores.append(self.compare_single_subject( source_cond_assembly, subject_cond_assembly)) From 6873f93d4818a0e43da3cd8e14e6431cd3ca0bc4 Mon Sep 17 00:00:00 2001 From: Ben Lonnqvist Date: Wed, 2 Oct 2024 16:15:45 +0200 Subject: [PATCH 10/29] add variables to ceiling --- brainscore_vision/benchmarks/coggan2024_behavior/benchmark.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/brainscore_vision/benchmarks/coggan2024_behavior/benchmark.py b/brainscore_vision/benchmarks/coggan2024_behavior/benchmark.py index fa0eca8bd..3cd685597 100644 --- a/brainscore_vision/benchmarks/coggan2024_behavior/benchmark.py +++ b/brainscore_vision/benchmarks/coggan2024_behavior/benchmark.py @@ -33,7 +33,8 @@ def __init__(self): super(Coggan2024_behavior_ConditionWiseLabelingAccuracySimilarity, self).__init__( identifier='tong.Coggan2024_behavior-LabelingConditionWiseAccuracySimilarity', version=1, - ceiling_func=lambda: self._metric.ceiling(self._assembly), + ceiling_func=lambda: self._metric.ceiling(self._assembly, + variables=['occluder_type', 'visibility', 'occluder_color']), parent='behavior', bibtex=BIBTEX, ) From 74f278066daebe8094573711df39916fdac34e18 Mon Sep 17 00:00:00 2001 From: Ben Lonnqvist Date: Tue, 8 Oct 2024 10:33:03 +0200 Subject: [PATCH 11/29] remove stimulus id requirement from condition-wise accuracy distance --- .../benchmarks/coggan2024_behavior/benchmark.py | 12 +++++++++++- .../metrics/accuracy_distance/metric.py | 16 ++++++++++------ 2 files changed, 21 insertions(+), 7 deletions(-) diff --git a/brainscore_vision/benchmarks/coggan2024_behavior/benchmark.py b/brainscore_vision/benchmarks/coggan2024_behavior/benchmark.py index 3cd685597..49952a295 100644 --- a/brainscore_vision/benchmarks/coggan2024_behavior/benchmark.py +++ b/brainscore_vision/benchmarks/coggan2024_behavior/benchmark.py @@ -26,7 +26,7 @@ def __init__(self): self._fitting_stimuli = load_stimulus_set('Coggan2024_behavior_fitting') # this fails is wrapped by LazyLoad self._assembly = LazyLoad(lambda: load_dataset('Coggan2024_behavior')) self._assembly['truth'] = self._assembly['object_class'] # the assembly is missing a 'truth' column which is - # required by the labeling task + # required by the labeling task self._visual_degrees = 10 self._number_of_trials = 1 self._ceiling_func = lambda assembly: get_noise_ceiling(assembly) @@ -188,3 +188,13 @@ def ceiler(score: Score, ceiling: Score) -> Score: ceiled_score.attrs[Score.RAW_VALUES_KEY] = score ceiled_score.attrs['ceiling'] = ceiling return ceiled_score + + +def remove_nans(data): + """ + removes nans from the data and replaces them with a string 'none'. uses pandas to simultaneously hand numeric + and non-numeric data. + """ + for coord in data.coords: + data[coord] = data[coord].where(~pd.isna(data[coord]), 'none') + return data \ No newline at end of file diff --git a/brainscore_vision/metrics/accuracy_distance/metric.py b/brainscore_vision/metrics/accuracy_distance/metric.py index 72fe2fb34..a782c1a59 100644 --- a/brainscore_vision/metrics/accuracy_distance/metric.py +++ b/brainscore_vision/metrics/accuracy_distance/metric.py @@ -101,19 +101,23 @@ def condition_filtered_score_per_subject_source_pair(self, source, subject, vari conditions = set(subject[variables[0]].values) conditions = [[c] for c in conditions] # to mimic itertools.product else: + # get all combinations of variables that are present in both assemblies conditions = itertools.product( - *[set(subject[v].values) for v in variables]) + *[set(subject[v].values).intersection(set(source[v].values)) for v in variables] + ) # loop over conditions and compute scores + # TODO: 91 conditions?? where do they come from? + # TODO: what did which participants do? is grouping across visibility correct or not? it does not look + # like any participants did the same combination of conditions + + # TODO: but the above is with the caveat that the stimulus_id field is NOT actually the stimulus id. + # it contains the subject number and trial number, in addition to a 5-digit number that may or may not + # be the actual stimulus_id. we should filter based on that, but need to clean up the data to do that. for cond in conditions: # filter assemblies for selected condition subject_cond_assembly = self.get_condition_filtered_assembly(subject, variables, cond) source_cond_assembly = self.get_condition_filtered_assembly(source, variables, cond) - # select only the values in source_cond_assembly that has the same 'stimulus_id' as - # subject_cond_assembly to accommodate comparisons where not all subjects saw all the same stimuli - stimulus_id_mask = source_cond_assembly['stimulus_id'].isin( - subject_cond_assembly['stimulus_id'].values) - source_cond_assembly = source_cond_assembly.where(stimulus_id_mask, drop=True) # to accomodate cases where not all conditions are present in both assemblies, filter out # calculation of the metric for cases where either assembly has no matches to variables (empty) if len(subject_cond_assembly) and len(source_cond_assembly): From c0320169b31b4a1a6525bd4afc23c8b36e031e30 Mon Sep 17 00:00:00 2001 From: Ben Lonnqvist Date: Thu, 17 Oct 2024 17:25:14 +0200 Subject: [PATCH 12/29] fix bug with stimulus selection --- .../metrics/accuracy_distance/metric.py | 25 +++++++++++++------ 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/brainscore_vision/metrics/accuracy_distance/metric.py b/brainscore_vision/metrics/accuracy_distance/metric.py index a782c1a59..0090bf772 100644 --- a/brainscore_vision/metrics/accuracy_distance/metric.py +++ b/brainscore_vision/metrics/accuracy_distance/metric.py @@ -51,7 +51,10 @@ def aggregate(cls, scores): def compare_single_subject(self, source: BehavioralAssembly, target: BehavioralAssembly): source = source.sortby('stimulus_id') target = target.sortby('stimulus_id') - assert (target['stimulus_id'].values == source['stimulus_id'].values).all() + + # we used to assert stimulus_ids being equal here, but since this is not an image-level metric, and because + # some benchmarks (e.g. Coggan2024) show different images from the same categories to humans, the metric + # does not guarantee that the stimulus_ids are the same. # .flatten() because models return lists of lists, and here we compare subject-by-subject source_correct = source.values.flatten() == target['truth'].values @@ -107,13 +110,6 @@ def condition_filtered_score_per_subject_source_pair(self, source, subject, vari ) # loop over conditions and compute scores - # TODO: 91 conditions?? where do they come from? - # TODO: what did which participants do? is grouping across visibility correct or not? it does not look - # like any participants did the same combination of conditions - - # TODO: but the above is with the caveat that the stimulus_id field is NOT actually the stimulus id. - # it contains the subject number and trial number, in addition to a 5-digit number that may or may not - # be the actual stimulus_id. we should filter based on that, but need to clean up the data to do that. for cond in conditions: # filter assemblies for selected condition subject_cond_assembly = self.get_condition_filtered_assembly(subject, variables, cond) @@ -121,6 +117,11 @@ def condition_filtered_score_per_subject_source_pair(self, source, subject, vari # to accomodate cases where not all conditions are present in both assemblies, filter out # calculation of the metric for cases where either assembly has no matches to variables (empty) if len(subject_cond_assembly) and len(source_cond_assembly): + # filter the source_cond_assembly to select only the stimulus_ids in the subject_cond_assembly + source_cond_assembly = self.get_stimulus_id_filtered_assembly( + source_cond_assembly, + subject_cond_assembly['stimulus_id'].values + ) cond_scores.append(self.compare_single_subject( source_cond_assembly, subject_cond_assembly)) @@ -139,3 +140,11 @@ def get_condition_filtered_assembly(assembly, variables, cond): condition_filtered_assembly = assembly.where(condition, drop=True) return condition_filtered_assembly + @staticmethod + def get_stimulus_id_filtered_assembly(assembly, stimulus_ids): + # Create a boolean condition to match the stimulus_id + condition = reduce(operator.or_, [(assembly['stimulus_id'] == stimulus_id) for stimulus_id in stimulus_ids]) + # Filter the assembly based on the condition + condition_filtered_assembly = assembly.where(condition, drop=True) + return condition_filtered_assembly + From 346d81c1fb50db50757b8432e205484f8bc4b352 Mon Sep 17 00:00:00 2001 From: Ben Lonnqvist Date: Thu, 17 Oct 2024 17:26:58 +0200 Subject: [PATCH 13/29] change sports car indices to the correct ones --- .../model_helpers/brain_transformation/behavior.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/brainscore_vision/model_helpers/brain_transformation/behavior.py b/brainscore_vision/model_helpers/brain_transformation/behavior.py index af34d11e8..dca156f19 100644 --- a/brainscore_vision/model_helpers/brain_transformation/behavior.py +++ b/brainscore_vision/model_helpers/brain_transformation/behavior.py @@ -159,7 +159,7 @@ class LabelToImagenetIndices: hare_indices = [331] jeep_indices = [609] teapot_indices = [849] - sportscar_indices = [436, 511, 817] + sportscar_indices = [817] # car, bear, and elephant indices used as defined by Geirhos et al., 2021. # lamp indices used as defined by the Scialom2024 benchmark From cef73a8297c7e772bb8f75eb2df7c82ba21e960b Mon Sep 17 00:00:00 2001 From: Ben Lonnqvist Date: Fri, 18 Oct 2024 11:35:43 +0200 Subject: [PATCH 14/29] fix some happy little bugs with stimulus ids again --- brainscore_vision/metrics/accuracy_distance/metric.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/brainscore_vision/metrics/accuracy_distance/metric.py b/brainscore_vision/metrics/accuracy_distance/metric.py index 0090bf772..289c7eef1 100644 --- a/brainscore_vision/metrics/accuracy_distance/metric.py +++ b/brainscore_vision/metrics/accuracy_distance/metric.py @@ -116,12 +116,13 @@ def condition_filtered_score_per_subject_source_pair(self, source, subject, vari source_cond_assembly = self.get_condition_filtered_assembly(source, variables, cond) # to accomodate cases where not all conditions are present in both assemblies, filter out # calculation of the metric for cases where either assembly has no matches to variables (empty) - if len(subject_cond_assembly) and len(source_cond_assembly): + if len(subject_cond_assembly['presentation']) and len(source_cond_assembly['presentation']): # filter the source_cond_assembly to select only the stimulus_ids in the subject_cond_assembly - source_cond_assembly = self.get_stimulus_id_filtered_assembly( - source_cond_assembly, - subject_cond_assembly['stimulus_id'].values - ) + if len(source_cond_assembly['presentation']) > len(subject_cond_assembly['presentation']): + source_cond_assembly = self.get_stimulus_id_filtered_assembly( + source_cond_assembly, + subject_cond_assembly['stimulus_id'].values + ) cond_scores.append(self.compare_single_subject( source_cond_assembly, subject_cond_assembly)) From 4fd2f8320c94f1865d633569781dcf67900f870f Mon Sep 17 00:00:00 2001 From: Ben Lonnqvist Date: Fri, 18 Oct 2024 12:01:59 +0200 Subject: [PATCH 15/29] add fitting benchmark --- .../coggan2024_behavior/benchmark.py | 39 ++++++++++++++++++- 1 file changed, 37 insertions(+), 2 deletions(-) diff --git a/brainscore_vision/benchmarks/coggan2024_behavior/benchmark.py b/brainscore_vision/benchmarks/coggan2024_behavior/benchmark.py index 49952a295..a4c594d8b 100644 --- a/brainscore_vision/benchmarks/coggan2024_behavior/benchmark.py +++ b/brainscore_vision/benchmarks/coggan2024_behavior/benchmark.py @@ -23,13 +23,11 @@ class Coggan2024_behavior_ConditionWiseLabelingAccuracySimilarity(BenchmarkBase): def __init__(self): self._metric = load_metric('accuracy_distance') - self._fitting_stimuli = load_stimulus_set('Coggan2024_behavior_fitting') # this fails is wrapped by LazyLoad self._assembly = LazyLoad(lambda: load_dataset('Coggan2024_behavior')) self._assembly['truth'] = self._assembly['object_class'] # the assembly is missing a 'truth' column which is # required by the labeling task self._visual_degrees = 10 self._number_of_trials = 1 - self._ceiling_func = lambda assembly: get_noise_ceiling(assembly) super(Coggan2024_behavior_ConditionWiseLabelingAccuracySimilarity, self).__init__( identifier='tong.Coggan2024_behavior-LabelingConditionWiseAccuracySimilarity', version=1, @@ -55,6 +53,43 @@ def __call__(self, candidate: BrainModel): return score +class Coggan2024_behavior_ConditionWiseProbabilitiesAccuracySimilarity(BenchmarkBase): + def __init__(self): + self._metric = load_metric('accuracy_distance') + self._fitting_stimuli = load_stimulus_set('Coggan2024_behavior_fitting') # this fails is wrapped by LazyLoad + self._assembly = LazyLoad(lambda: load_dataset('Coggan2024_behavior')) + self._assembly['truth'] = self._assembly['object_class'] # the assembly is missing a 'truth' column which is + # required by the labeling task + self._visual_degrees = 10 + self._number_of_trials = 1 + super(Coggan2024_behavior_ConditionWiseProbabilitiesAccuracySimilarity, self).__init__( + identifier='tong.Coggan2024_behavior-LabelingConditionWiseAccuracySimilarity', + version=1, + ceiling_func=lambda: self._metric.ceiling(self._assembly, + variables=['occluder_type', 'visibility', 'occluder_color']), + parent='behavior', + bibtex=BIBTEX, + ) + + def __call__(self, candidate: BrainModel): + fitting_stimuli = place_on_screen( + self._fitting_stimuli, + target_visual_degrees=candidate.visual_degrees(), + source_visual_degrees=self._visual_degrees) + candidate.start_task(BrainModel.Task.probabilities, fitting_stimuli) + stimulus_set = place_on_screen(self._assembly.stimulus_set, + target_visual_degrees=candidate.visual_degrees(), + source_visual_degrees=self._visual_degrees) + probabilities = candidate.look_at(stimulus_set, number_of_trials=self._number_of_trials) + labels = [probabilities.choice[c].values for c in probabilities.argmax(axis=1)] + raw_score = self._metric(labels, self._assembly, variables=['occluder_type', 'visibility', 'occluder_color']) + ceiling = self.ceiling + score = raw_score / ceiling + score.attrs['raw'] = raw_score + score.attrs['ceiling'] = ceiling + return score + + class Coggan2024_behavior_ConditionWiseLabelingEngineeringAccuracy(BenchmarkBase): def __init__(self): self._metric = load_metric('accuracy') From 7bdcd91c2be533cbb26f7251f95dad8759108feb Mon Sep 17 00:00:00 2001 From: Ben Lonnqvist Date: Fri, 18 Oct 2024 14:04:26 +0200 Subject: [PATCH 16/29] add leave-one-out ceiling and chance-level adjustment for accuracy distance --- .../metrics/accuracy_distance/metric.py | 41 ++++++++++++++++--- 1 file changed, 35 insertions(+), 6 deletions(-) diff --git a/brainscore_vision/metrics/accuracy_distance/metric.py b/brainscore_vision/metrics/accuracy_distance/metric.py index 289c7eef1..6737d49ce 100644 --- a/brainscore_vision/metrics/accuracy_distance/metric.py +++ b/brainscore_vision/metrics/accuracy_distance/metric.py @@ -3,6 +3,7 @@ import operator import numpy as np +import xarray as xr from brainio.assemblies import BehavioralAssembly from brainscore_core import Metric @@ -22,9 +23,9 @@ class AccuracyDistance(Metric): more target-like pattern of performance across conditions. """ def __call__(self, source: BehavioralAssembly, target: - BehavioralAssembly, variables: tuple = ()) -> Score: + BehavioralAssembly, variables: tuple = (), chance_level = 0.) -> Score: """Target should be the entire BehavioralAssembly, containing truth values.""" - + self.chance_level = chance_level subjects = self.extract_subjects(target) subject_scores = [] for subject in subjects: @@ -55,21 +56,26 @@ def compare_single_subject(self, source: BehavioralAssembly, target: BehavioralA # we used to assert stimulus_ids being equal here, but since this is not an image-level metric, and because # some benchmarks (e.g. Coggan2024) show different images from the same categories to humans, the metric # does not guarantee that the stimulus_ids are the same. - # .flatten() because models return lists of lists, and here we compare subject-by-subject source_correct = source.values.flatten() == target['truth'].values target_correct = target.values == target['truth'].values source_mean = sum(source_correct) / len(source_correct) target_mean = sum(target_correct) / len(target_correct) - maximum_distance = np.max([1 - target_mean, target_mean]) + relative_distance = self.distance_measure(source_mean, target_mean) + + return Score(relative_distance) + + def distance_measure(self, source_mean, target_mean): + maximum_distance = np.max([1 - target_mean, target_mean - self.chance_level]) # get the proportion of the distance between the source and target accuracies, adjusted for the maximum possible # difference between the two accuracies relative_distance = 1 - np.abs(source_mean - target_mean) / maximum_distance - return Score(relative_distance) + return relative_distance - def ceiling(self, assembly, variables = ()): + def ceiling(self, assembly, variables = (), chance_level = 0.): + self.chance_level = chance_level subjects = self.extract_subjects(assembly) subject_scores = [] for subject1, subject2 in itertools.combinations(subjects, 2): @@ -88,6 +94,29 @@ def ceiling(self, assembly, variables = ()): subject_scores = apply_aggregate(aggregate_fnc=self.aggregate, values=subject_scores) return subject_scores + def leave_one_out_ceiling(self, assembly, variables = (), chance_level = 0.): + self.chance_level = chance_level + # convert the above to a working xarray implementation with variables + subjects = self.extract_subjects(assembly) + subject_scores = [] + for subject in subjects: + subject_assembly = assembly.sel(subject=subject) + other_subjects = [s for s in subjects if s != subject] + other_assemblies = assembly.isel(presentation=assembly.subject.isin(other_subjects)) + # merge other_assemblies from a list to a single assembly + group_correct = other_assemblies.multi_groupby(variables).apply(lambda x: x['human_accuracy'].mean()) + subject_correct = subject_assembly.multi_groupby(variables).apply(lambda x: x['human_accuracy'].mean()) + for i, group in enumerate(group_correct.values): + pairwise_score = self.distance_measure(subject_correct.values[i], group) + subject_scores.append(Score(pairwise_score)) + + score = np.mean(subject_scores) + error = np.std(subject_scores) + score = Score(score) + score.attrs['error'] = error + score.attrs['raw'] = subject_scores + return score + def extract_subjects(self, assembly): return list(sorted(set(assembly['subject'].values))) From 069531cb809610a73d50c643e1d0ded4b8b2440e Mon Sep 17 00:00:00 2001 From: Ben Lonnqvist Date: Fri, 18 Oct 2024 14:04:54 +0200 Subject: [PATCH 17/29] add chance-level adjustments to benchmark, and computation of new ceiling --- .../coggan2024_behavior/benchmark.py | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/brainscore_vision/benchmarks/coggan2024_behavior/benchmark.py b/brainscore_vision/benchmarks/coggan2024_behavior/benchmark.py index a4c594d8b..c9c739d28 100644 --- a/brainscore_vision/benchmarks/coggan2024_behavior/benchmark.py +++ b/brainscore_vision/benchmarks/coggan2024_behavior/benchmark.py @@ -31,8 +31,10 @@ def __init__(self): super(Coggan2024_behavior_ConditionWiseLabelingAccuracySimilarity, self).__init__( identifier='tong.Coggan2024_behavior-LabelingConditionWiseAccuracySimilarity', version=1, - ceiling_func=lambda: self._metric.ceiling(self._assembly, - variables=['occluder_type', 'visibility', 'occluder_color']), + ceiling_func=lambda: self._metric.leave_one_out_ceiling( + self._assembly, + variables=['occluder_type', 'visibility', 'occluder_color'], + chance_level=1/8), parent='behavior', bibtex=BIBTEX, ) @@ -45,7 +47,8 @@ def __call__(self, candidate: BrainModel): target_visual_degrees=candidate.visual_degrees(), source_visual_degrees=self._visual_degrees) labels = candidate.look_at(stimulus_set, number_of_trials=self._number_of_trials) - raw_score = self._metric(labels, self._assembly, variables=['occluder_type', 'visibility', 'occluder_color']) + raw_score = self._metric(labels, self._assembly, variables=['occluder_type', 'visibility', 'occluder_color'], + chance_level=1/8) ceiling = self.ceiling score = raw_score / ceiling score.attrs['raw'] = raw_score @@ -65,8 +68,11 @@ def __init__(self): super(Coggan2024_behavior_ConditionWiseProbabilitiesAccuracySimilarity, self).__init__( identifier='tong.Coggan2024_behavior-LabelingConditionWiseAccuracySimilarity', version=1, - ceiling_func=lambda: self._metric.ceiling(self._assembly, - variables=['occluder_type', 'visibility', 'occluder_color']), + ceiling_func=lambda: self._metric.ceiling( + self._assembly, + variables=['occluder_type', 'visibility', 'occluder_color'], + chance_level=1/8 + ), parent='behavior', bibtex=BIBTEX, ) @@ -82,7 +88,8 @@ def __call__(self, candidate: BrainModel): source_visual_degrees=self._visual_degrees) probabilities = candidate.look_at(stimulus_set, number_of_trials=self._number_of_trials) labels = [probabilities.choice[c].values for c in probabilities.argmax(axis=1)] - raw_score = self._metric(labels, self._assembly, variables=['occluder_type', 'visibility', 'occluder_color']) + raw_score = self._metric(labels, self._assembly, variables=['occluder_type', 'visibility', 'occluder_color'], + chance_level=1/8) ceiling = self.ceiling score = raw_score / ceiling score.attrs['raw'] = raw_score From 0af1cb2b4cc3aecb9f9e10096f018c4e9967b419 Mon Sep 17 00:00:00 2001 From: Ben Lonnqvist Date: Mon, 21 Oct 2024 12:01:43 +0200 Subject: [PATCH 18/29] optimize condition-wise accuracydistance --- .../metrics/accuracy_distance/metric.py | 47 +++++++------------ 1 file changed, 16 insertions(+), 31 deletions(-) diff --git a/brainscore_vision/metrics/accuracy_distance/metric.py b/brainscore_vision/metrics/accuracy_distance/metric.py index 6737d49ce..a5065c8e4 100644 --- a/brainscore_vision/metrics/accuracy_distance/metric.py +++ b/brainscore_vision/metrics/accuracy_distance/metric.py @@ -128,32 +128,17 @@ def condition_filtered_score_per_subject_source_pair(self, source, subject, vari # compute scores for each condition, then average else: cond_scores = [] - # get iterator across all combinations of variables - if len(variables) == 1: - conditions = set(subject[variables[0]].values) - conditions = [[c] for c in conditions] # to mimic itertools.product - else: - # get all combinations of variables that are present in both assemblies - conditions = itertools.product( - *[set(subject[v].values).intersection(set(source[v].values)) for v in variables] - ) - - # loop over conditions and compute scores - for cond in conditions: - # filter assemblies for selected condition - subject_cond_assembly = self.get_condition_filtered_assembly(subject, variables, cond) - source_cond_assembly = self.get_condition_filtered_assembly(source, variables, cond) - # to accomodate cases where not all conditions are present in both assemblies, filter out - # calculation of the metric for cases where either assembly has no matches to variables (empty) - if len(subject_cond_assembly['presentation']) and len(source_cond_assembly['presentation']): - # filter the source_cond_assembly to select only the stimulus_ids in the subject_cond_assembly - if len(source_cond_assembly['presentation']) > len(subject_cond_assembly['presentation']): - source_cond_assembly = self.get_stimulus_id_filtered_assembly( - source_cond_assembly, - subject_cond_assembly['stimulus_id'].values - ) - cond_scores.append(self.compare_single_subject( - source_cond_assembly, subject_cond_assembly)) + source = self.get_stimulus_id_filtered_assembly( + source, + subject['stimulus_id'].values + ) + # add a new coordinate to the source assembly that measures the accuracy of the model + source['is_correct'] = 'presentation', *(source['label'].values == source.values) + source_correct = source.multi_groupby(variables).apply(lambda x: x['is_correct'].mean()) + subject_correct = subject.multi_groupby(variables).apply(lambda x: x['human_accuracy'].mean()) + for i, this_source_correct in enumerate(source_correct.values): + condition_score = self.distance_measure(this_source_correct, subject_correct.values[i]) + cond_scores.append(Score(condition_score)) subject_score = Score(np.mean(cond_scores)) return subject_score @@ -172,9 +157,9 @@ def get_condition_filtered_assembly(assembly, variables, cond): @staticmethod def get_stimulus_id_filtered_assembly(assembly, stimulus_ids): - # Create a boolean condition to match the stimulus_id - condition = reduce(operator.or_, [(assembly['stimulus_id'] == stimulus_id) for stimulus_id in stimulus_ids]) - # Filter the assembly based on the condition - condition_filtered_assembly = assembly.where(condition, drop=True) + # find the indices of the stimulus_ids in the assembly quickly + condition = xr.DataArray(np.isin(assembly['stimulus_id'].values, stimulus_ids), + dims=assembly['stimulus_id'].dims) + # Apply the condition with `where` + condition_filtered_assembly = BehavioralAssembly(assembly.where(condition, drop=True)) return condition_filtered_assembly - From 9c58dc5868644ffc709675f95f66c9275eb22d4f Mon Sep 17 00:00:00 2001 From: Ben Lonnqvist Date: Mon, 21 Oct 2024 13:55:13 +0200 Subject: [PATCH 19/29] update test value for accuracydistance --- brainscore_vision/benchmarks/coggan2024_behavior/test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/brainscore_vision/benchmarks/coggan2024_behavior/test.py b/brainscore_vision/benchmarks/coggan2024_behavior/test.py index 218194684..ce2f01c94 100644 --- a/brainscore_vision/benchmarks/coggan2024_behavior/test.py +++ b/brainscore_vision/benchmarks/coggan2024_behavior/test.py @@ -16,6 +16,6 @@ def test_benchmarks(): 'Coggan2024_behavior-ConditionWiseAccuracySimilarity') model = load_model('alexnet') result = benchmark(model) - assert result.values == approx(0.1318, abs=.001) + assert result.values == approx(0.34431372, abs=.001) From 9b5841a6db80c5ff2056707627490dcd59de7c5a Mon Sep 17 00:00:00 2001 From: Ben Lonnqvist Date: Mon, 21 Oct 2024 13:57:59 +0200 Subject: [PATCH 20/29] remove unnecessary method --- .../metrics/accuracy_distance/metric.py | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/brainscore_vision/metrics/accuracy_distance/metric.py b/brainscore_vision/metrics/accuracy_distance/metric.py index a5065c8e4..6e53f4aff 100644 --- a/brainscore_vision/metrics/accuracy_distance/metric.py +++ b/brainscore_vision/metrics/accuracy_distance/metric.py @@ -143,18 +143,6 @@ def condition_filtered_score_per_subject_source_pair(self, source, subject, vari subject_score = Score(np.mean(cond_scores)) return subject_score - @staticmethod - def get_condition_filtered_assembly(assembly, variables, cond): - # get the indexers for the condition - indexers = {v: cond[i] for i, v in enumerate(variables)} - # convert indexers into a list of boolean arrays for the assembly values - assembly_indexers = [(assembly[key] == value) for key, value in indexers.items()] - # combine the different conditions into an AND statement to require all conditions simultaneously - condition = reduce(operator.and_, assembly_indexers) - # filter the assembly based on the condition - condition_filtered_assembly = assembly.where(condition, drop=True) - return condition_filtered_assembly - @staticmethod def get_stimulus_id_filtered_assembly(assembly, stimulus_ids): # find the indices of the stimulus_ids in the assembly quickly From 7cbc53a116c8ff860f21f33b07d7ef0817154271 Mon Sep 17 00:00:00 2001 From: Ben Lonnqvist Date: Tue, 29 Oct 2024 11:52:58 +0100 Subject: [PATCH 21/29] update init --- brainscore_vision/benchmarks/coggan2024_behavior/__init__.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/brainscore_vision/benchmarks/coggan2024_behavior/__init__.py b/brainscore_vision/benchmarks/coggan2024_behavior/__init__.py index 86e868958..7e7ce6f1a 100644 --- a/brainscore_vision/benchmarks/coggan2024_behavior/__init__.py +++ b/brainscore_vision/benchmarks/coggan2024_behavior/__init__.py @@ -1,6 +1,7 @@ # Created by David Coggan on 2024 06 25 from brainscore_vision import benchmark_registry -from .benchmark import Coggan2024_behavior_ConditionWiseLabelingAccuracySimilarity +from .benchmark import ( + Coggan2024_behavior_ConditionWiseLabelingAccuracySimilarity) benchmark_registry['Coggan2024_behavior-ConditionWiseAccuracySimilarity'] = Coggan2024_behavior_ConditionWiseLabelingAccuracySimilarity From fff8a74d674de7e3a2c0a35e2a8537c96be512c3 Mon Sep 17 00:00:00 2001 From: Ben Lonnqvist Date: Tue, 29 Oct 2024 11:56:27 +0100 Subject: [PATCH 22/29] add prefix as it was before --- brainscore_vision/benchmarks/coggan2024_behavior/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/brainscore_vision/benchmarks/coggan2024_behavior/__init__.py b/brainscore_vision/benchmarks/coggan2024_behavior/__init__.py index 7e7ce6f1a..cd5c40f18 100644 --- a/brainscore_vision/benchmarks/coggan2024_behavior/__init__.py +++ b/brainscore_vision/benchmarks/coggan2024_behavior/__init__.py @@ -4,4 +4,4 @@ from .benchmark import ( Coggan2024_behavior_ConditionWiseLabelingAccuracySimilarity) -benchmark_registry['Coggan2024_behavior-ConditionWiseAccuracySimilarity'] = Coggan2024_behavior_ConditionWiseLabelingAccuracySimilarity +benchmark_registry['tong.Coggan2024_behavior-ConditionWiseAccuracySimilarity'] = Coggan2024_behavior_ConditionWiseLabelingAccuracySimilarity From 7aa287f5a9a6bcb1236024bd83139fb6a00878f7 Mon Sep 17 00:00:00 2001 From: Ben Lonnqvist Date: Tue, 29 Oct 2024 11:59:12 +0100 Subject: [PATCH 23/29] add all benchmarks --- .../benchmarks/coggan2024_behavior/__init__.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/brainscore_vision/benchmarks/coggan2024_behavior/__init__.py b/brainscore_vision/benchmarks/coggan2024_behavior/__init__.py index cd5c40f18..4676fca0c 100644 --- a/brainscore_vision/benchmarks/coggan2024_behavior/__init__.py +++ b/brainscore_vision/benchmarks/coggan2024_behavior/__init__.py @@ -2,6 +2,10 @@ from brainscore_vision import benchmark_registry from .benchmark import ( - Coggan2024_behavior_ConditionWiseLabelingAccuracySimilarity) + Coggan2024_behavior_ConditionWiseLabelingAccuracySimilarity, + Coggan2024_behavior_ConditionWiseProbabilitiesAccuracySimilarity, + Coggan2024_behavior_ConditionWiseLabelingEngineeringAccuracy) -benchmark_registry['tong.Coggan2024_behavior-ConditionWiseAccuracySimilarity'] = Coggan2024_behavior_ConditionWiseLabelingAccuracySimilarity +benchmark_registry['tong.Coggan2024_behavior-ConditionWiseLabelingAccuracySimilarity'] = Coggan2024_behavior_ConditionWiseLabelingAccuracySimilarity +benchmark_registry['tong.Coggan2024_behavior-ConditionWiseProbabilitiesAccuracySimilarity'] = Coggan2024_behavior_ConditionWiseProbabilitiesAccuracySimilarity +benchmark_registry['tong.Coggan2024_behavior-ConditionWiseLabelingEngineeringAccuracy'] = Coggan2024_behavior_ConditionWiseLabelingEngineeringAccuracy From 30b5f850ad1e9fe4b5e49534e2bb1af6af641aed Mon Sep 17 00:00:00 2001 From: Ben Lonnqvist Date: Wed, 30 Oct 2024 14:18:59 +0100 Subject: [PATCH 24/29] happy little test --- brainscore_vision/benchmarks/coggan2024_behavior/test.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/brainscore_vision/benchmarks/coggan2024_behavior/test.py b/brainscore_vision/benchmarks/coggan2024_behavior/test.py index a2b0eda3b..5d4821763 100644 --- a/brainscore_vision/benchmarks/coggan2024_behavior/test.py +++ b/brainscore_vision/benchmarks/coggan2024_behavior/test.py @@ -7,8 +7,9 @@ def test_benchmark_registry(): - assert ('tong.Coggan2024_behavior-ConditionWiseAccuracySimilarity' in - benchmark_registry) + assert ('tong.Coggan2024_behavior-ConditionWiseLabelingAccuracySimilarity' in benchmark_registry and + 'tong.Coggan2024_behavior-ConditionWiseProbabilitiesAccuracySimilarity' in benchmark_registry and + 'tong.Coggan2024_behavior-ConditionWiseLabelingEngineeringAccuracy' in benchmark_registry) @pytest.mark.private_access def test_benchmarks(): From 6528a651451f7d821b82c36855faefee9df5ebd4 Mon Sep 17 00:00:00 2001 From: Ben Lonnqvist Date: Wed, 30 Oct 2024 14:20:58 +0100 Subject: [PATCH 25/29] add label to accuracy distance test --- brainscore_vision/metrics/accuracy_distance/test.py | 1 + 1 file changed, 1 insertion(+) diff --git a/brainscore_vision/metrics/accuracy_distance/test.py b/brainscore_vision/metrics/accuracy_distance/test.py index d6414b790..23696be90 100644 --- a/brainscore_vision/metrics/accuracy_distance/test.py +++ b/brainscore_vision/metrics/accuracy_distance/test.py @@ -51,6 +51,7 @@ def _make_data(): ], coords={'stimulus_id': ('presentation', np.resize(np.arange(9), 9 * 3)), 'truth': ('presentation', np.resize(['dog', 'cat', 'chair'], 9 * 3)), + 'label': ('presentation', np.resize(['dog', 'cat', 'chair'], 9 * 3)), 'condition': ('presentation', np.resize([1, 1, 1, 2, 2, 2, 3, 3, 3], 9 * 3)), 'animacy': ('presentation', np.resize(['animate', 'animate', 'inanimate'], 9 * 3)), 'subject': ('presentation', ['A'] * 9 + ['B'] * 9 + ['C'] * 9)}, From 17cb26de263544645bab8ac8efc382335dbad849 Mon Sep 17 00:00:00 2001 From: Ben Lonnqvist Date: Tue, 5 Nov 2024 11:19:54 +0100 Subject: [PATCH 26/29] fix the id of a test --- brainscore_vision/benchmarks/coggan2024_behavior/test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/brainscore_vision/benchmarks/coggan2024_behavior/test.py b/brainscore_vision/benchmarks/coggan2024_behavior/test.py index 5d4821763..1e94c12fe 100644 --- a/brainscore_vision/benchmarks/coggan2024_behavior/test.py +++ b/brainscore_vision/benchmarks/coggan2024_behavior/test.py @@ -14,7 +14,7 @@ def test_benchmark_registry(): @pytest.mark.private_access def test_benchmarks(): benchmark = load_benchmark( - 'tong.Coggan2024_behavior-ConditionWiseAccuracySimilarity') + 'tong.Coggan2024_behavior-ConditionWiseLabelingAccuracySimilarity') model = load_model('alexnet') result = benchmark(model) assert result.values == approx(0.34431372, abs=.001) From d2f76706625064ccd775d059f80c8569ef3649dd Mon Sep 17 00:00:00 2001 From: Ben Lonnqvist Date: Tue, 5 Nov 2024 11:47:22 +0100 Subject: [PATCH 27/29] fix bug with accuracy distance test now that it was changed --- .../metrics/accuracy_distance/test.py | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/brainscore_vision/metrics/accuracy_distance/test.py b/brainscore_vision/metrics/accuracy_distance/test.py index 23696be90..707fec862 100644 --- a/brainscore_vision/metrics/accuracy_distance/test.py +++ b/brainscore_vision/metrics/accuracy_distance/test.py @@ -1,5 +1,5 @@ import numpy as np -from pytest import approx +#from pytest import approx from brainio.assemblies import BehavioralAssembly from brainscore_vision import load_metric @@ -16,14 +16,16 @@ def test_score_single_variable(): assembly = _make_data() metric = load_metric('accuracy_distance') score = metric(assembly.sel(subject='C'), assembly, ('condition',)) - assert score == approx(0.55555556) + print(score) + # assert score == approx(0.55555556) def test_score_multi_variable(): assembly = _make_data() metric = load_metric('accuracy_distance') score = metric(assembly.sel(subject='C'), assembly, ('condition','animacy')) - assert score == approx(0.55555556) + print(score) + # assert score == approx(0.55555556) def test_has_error(): @@ -45,14 +47,18 @@ def _make_data(): # subject A is 5 / 9 = 0.55...% correct # subject B is 4 / 9 = 0.44...% correct # subject C is 9 / 9 = 100% correct - return BehavioralAssembly(['dog', 'cat', 'chair', 'cat', 'dog', 'dog', 'dog', 'dog', 'chair', # subject A + return BehavioralAssembly([['dog', 'cat', 'chair', 'cat', 'dog', 'dog', 'dog', 'dog', 'chair', # subject A 'cat', 'cat', 'chair', 'cat', 'dog', 'cat', 'chair', 'cat', 'cat', # subject B 'dog', 'cat', 'chair', 'dog', 'cat', 'chair', 'dog', 'cat', 'chair' # subject C - ], + ]], coords={'stimulus_id': ('presentation', np.resize(np.arange(9), 9 * 3)), 'truth': ('presentation', np.resize(['dog', 'cat', 'chair'], 9 * 3)), 'label': ('presentation', np.resize(['dog', 'cat', 'chair'], 9 * 3)), 'condition': ('presentation', np.resize([1, 1, 1, 2, 2, 2, 3, 3, 3], 9 * 3)), 'animacy': ('presentation', np.resize(['animate', 'animate', 'inanimate'], 9 * 3)), 'subject': ('presentation', ['A'] * 9 + ['B'] * 9 + ['C'] * 9)}, - dims=['presentation']) + dims=['choice', 'presentation']) + + +test_score_single_variable() +test_score_multi_variable() \ No newline at end of file From 6f129ea6b153af3c5577ea910296e2bd6ab30566 Mon Sep 17 00:00:00 2001 From: Ben Lonnqvist Date: Tue, 5 Nov 2024 15:46:51 +0100 Subject: [PATCH 28/29] re-add removed (fixed?) tests --- brainscore_vision/metrics/accuracy_distance/test.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/brainscore_vision/metrics/accuracy_distance/test.py b/brainscore_vision/metrics/accuracy_distance/test.py index 707fec862..965893069 100644 --- a/brainscore_vision/metrics/accuracy_distance/test.py +++ b/brainscore_vision/metrics/accuracy_distance/test.py @@ -58,7 +58,3 @@ def _make_data(): 'animacy': ('presentation', np.resize(['animate', 'animate', 'inanimate'], 9 * 3)), 'subject': ('presentation', ['A'] * 9 + ['B'] * 9 + ['C'] * 9)}, dims=['choice', 'presentation']) - - -test_score_single_variable() -test_score_multi_variable() \ No newline at end of file From 4a776b138be8ec7c1bc599051eab53fa0487fe3e Mon Sep 17 00:00:00 2001 From: Ben Lonnqvist Date: Tue, 5 Nov 2024 15:47:06 +0100 Subject: [PATCH 29/29] re-add removed (fixed?) tests --- brainscore_vision/metrics/accuracy_distance/test.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/brainscore_vision/metrics/accuracy_distance/test.py b/brainscore_vision/metrics/accuracy_distance/test.py index 965893069..b27888f76 100644 --- a/brainscore_vision/metrics/accuracy_distance/test.py +++ b/brainscore_vision/metrics/accuracy_distance/test.py @@ -1,5 +1,5 @@ import numpy as np -#from pytest import approx +from pytest import approx from brainio.assemblies import BehavioralAssembly from brainscore_vision import load_metric @@ -17,7 +17,7 @@ def test_score_single_variable(): metric = load_metric('accuracy_distance') score = metric(assembly.sel(subject='C'), assembly, ('condition',)) print(score) - # assert score == approx(0.55555556) + assert score == approx(0.55555556) def test_score_multi_variable(): @@ -25,7 +25,7 @@ def test_score_multi_variable(): metric = load_metric('accuracy_distance') score = metric(assembly.sel(subject='C'), assembly, ('condition','animacy')) print(score) - # assert score == approx(0.55555556) + assert score == approx(0.55555556) def test_has_error():