diff --git a/migrations/2020-12-20_pkl_to_nc.py b/migrations/2020-12-20_pkl_to_nc.py new file mode 100644 index 000000000..517502d9b --- /dev/null +++ b/migrations/2020-12-20_pkl_to_nc.py @@ -0,0 +1,90 @@ +import pickle +from pathlib import Path + +import boto3 +from brainio_base.assemblies import BehavioralAssembly +from brainio_collection.packaging import write_netcdf + + +local_pkl_names = [ + 'alexnet-probabilities.pkl', + 'resnet34-probabilities.pkl', + 'resnet18-probabilities.pkl' +] + + +s3_pkl_names = [ + "alexnet-freemanziemba2013.aperture-private.pkl", + "alexnet-majaj2015.private-features.12.pkl", + "CORnetZ-rajalingham2018public.pkl", + "cornet_s-kar2019.pkl", + "alexnet-sanghavi2020-features.12.pkl", + "alexnet-sanghavijozwik2020-features.12.pkl", + "alexnet-sanghavimurty2020-features.12.pkl", + "alexnet-rajalingham2020-features.12.pkl", +] + + +def local_pkls(): + target_dir_path = Path(__file__).parents[1] / "tests" / "test_metrics" + for pkl_name in local_pkl_names: + pkl_path = target_dir_path / pkl_name + nc_path = pkl_path.with_suffix(".nc") + if not nc_path.exists(): + print(f"{nc_path} does not exist. ") + with open(pkl_path, 'rb') as f: + unpickled = pickle.load(f) + # write netcdf + sha1 = write_netcdf(BehavioralAssembly(unpickled["data"]), str(nc_path)) + else: + print(f"{nc_path} already exists. ") + + +def s3_pkls(): + session = boto3.session.Session(profile_name="dicarlolab_jjpr") + s3 = session.client("s3") + bucket_name = "brain-score-tests" + def exists(key): + try: + s3.head_object(Bucket=bucket_name, Key=key) + return True + except s3.exceptions.NoSuchKey: + return False + + prefix_path = Path("tests", "test_benchmarks") + target_dir_path = Path(__file__).parent / "test_pkl" + + for pkl_name in s3_pkl_names: + pkl_path = Path(pkl_name) + nc_path = pkl_path.with_suffix(".nc") + object_key_pkl = prefix_path / pkl_path + target_file_pkl = target_dir_path / pkl_path + target_file_nc = target_dir_path / nc_path + object_key_nc = prefix_path / nc_path + + if not exists(str(object_key_nc)): + print(f"{object_key_nc} does not exist. ") + if not target_file_nc.exists(): + if not target_file_pkl.exists(): + # fetch file + s3.download_file(bucket_name, str(object_key_pkl), str(target_file_pkl)) + # unpickle + with open(target_file_pkl, 'rb') as f: + unpickled = pickle.load(f) + # write netcdf + sha1 = write_netcdf(unpickled["data"], str(target_file_nc)) + # upload + s3.upload_file(str(target_file_nc), bucket_name, str(object_key_nc)) + else: + print(f"{object_key_nc} already exists. ") + + +def main(): + # assert xarray is 0.12.3 + local_pkls() + s3_pkls() + + +if __name__ == '__main__': + main() + diff --git a/setup.py b/setup.py index 831b9c5b9..d7a1283ab 100644 --- a/setup.py +++ b/setup.py @@ -28,7 +28,7 @@ "tensorflow", "result_caching @ git+https://github.com/mschrimpf/result_caching", "jupyter", - "pandas==0.25.3", + "pandas", "pybtex", 'peewee', 'psycopg2-binary' diff --git a/test_setup.sh b/test_setup.sh index f3c017e86..2675e3462 100755 --- a/test_setup.sh +++ b/test_setup.sh @@ -3,7 +3,7 @@ # get directory of this script (i.e. tests), following https://stackoverflow.com/a/246128/2225200 SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" -for f in alexnet-freemanziemba2013.aperture-private.pkl alexnet-majaj2015.private-features.12.pkl CORnetZ-rajalingham2018public.pkl cornet_s-kar2019.pkl alexnet-sanghavi2020-features.12.pkl alexnet-sanghavijozwik2020-features.12.pkl alexnet-sanghavimurty2020-features.12.pkl alexnet-rajalingham2020-features.12.pkl +for f in alexnet-freemanziemba2013.aperture-private.nc alexnet-majaj2015.private-features.12.nc CORnetZ-rajalingham2018public.nc cornet_s-kar2019.nc alexnet-sanghavi2020-features.12.nc alexnet-sanghavijozwik2020-features.12.nc alexnet-sanghavimurty2020-features.12.nc alexnet-rajalingham2020-features.12.nc do aws --no-sign-request s3 cp s3://brain-score-tests/tests/test_benchmarks/${f} ${SCRIPT_DIR}/tests/test_benchmarks/ done diff --git a/tests/test_benchmarks/test___init__.py b/tests/test_benchmarks/test___init__.py index 8ae25d025..aa292f2ff 100644 --- a/tests/test_benchmarks/test___init__.py +++ b/tests/test_benchmarks/test___init__.py @@ -1,5 +1,4 @@ import os -import pickle import numpy as np import pytest @@ -7,10 +6,12 @@ from pathlib import Path from pytest import approx from typing import List, Tuple +import xarray as xr from brainscore.benchmarks import benchmark_pool, public_benchmark_pool, evaluation_benchmark_pool from brainscore.model_interface import BrainModel from tests.test_benchmarks import PrecomputedFeatures +from brainio_base.assemblies import BehavioralAssembly class TestPoolList: @@ -148,7 +149,7 @@ class TestPrecomputed: ('movshon.FreemanZiemba2013.V2-pls', approx(.459283, abs=.005)), ]) def test_FreemanZiemba2013(self, benchmark, expected): - self.run_test(benchmark=benchmark, file='alexnet-freemanziemba2013.aperture-private.pkl', expected=expected) + self.run_test(benchmark=benchmark, file='alexnet-freemanziemba2013.aperture-private.nc', expected=expected) @pytest.mark.memory_intense @pytest.mark.parametrize('benchmark, expected', [ @@ -156,13 +157,12 @@ def test_FreemanZiemba2013(self, benchmark, expected): ('dicarlo.MajajHong2015.IT-pls', approx(.584053, abs=.005)), ]) def test_MajajHong2015(self, benchmark, expected): - self.run_test(benchmark=benchmark, file='alexnet-majaj2015.private-features.12.pkl', expected=expected) + self.run_test(benchmark=benchmark, file='alexnet-majaj2015.private-features.12.nc', expected=expected) def run_test(self, benchmark, file, expected): benchmark = benchmark_pool[benchmark] precomputed_features = Path(__file__).parent / file - with open(precomputed_features, 'rb') as f: - precomputed_features = pickle.load(f)['data'] + precomputed_features = BehavioralAssembly(xr.load_dataarray(precomputed_features)) precomputed_features = precomputed_features.stack(presentation=['stimulus_path']) precomputed_paths = list(map(lambda f: Path(f).name, precomputed_features['stimulus_path'].values)) # attach stimulus set meta @@ -184,9 +184,8 @@ def run_test(self, benchmark, file, expected): @pytest.mark.slow def test_Kar2019ost_cornet_s(self): benchmark = benchmark_pool['dicarlo.Kar2019-ost'] - precomputed_features = Path(__file__).parent / 'cornet_s-kar2019.pkl' - with open(precomputed_features, 'rb') as f: - precomputed_features = pickle.load(f)['data'] + precomputed_features = Path(__file__).parent / 'cornet_s-kar2019.nc' + precomputed_features = BehavioralAssembly(xr.load_dataarray(precomputed_features)) precomputed_features = PrecomputedFeatures(precomputed_features, visual_degrees=8) # score score = benchmark(precomputed_features).raw @@ -194,9 +193,8 @@ def test_Kar2019ost_cornet_s(self): def test_Rajalingham2018public(self): # load features - precomputed_features = Path(__file__).parent / 'CORnetZ-rajalingham2018public.pkl' - with open(precomputed_features, 'rb') as f: - precomputed_features = pickle.load(f)['data'] + precomputed_features = Path(__file__).parent / 'CORnetZ-rajalingham2018public.nc' + precomputed_features = BehavioralAssembly(xr.load_dataarray(precomputed_features)) precomputed_features = PrecomputedFeatures(precomputed_features, visual_degrees=8, # doesn't matter, features are already computed ) @@ -212,7 +210,7 @@ def test_Rajalingham2018public(self): ('dicarlo.Sanghavi2020.IT-pls', approx(.611347, abs=.015)), ]) def test_Sanghavi2020(self, benchmark, expected): - self.run_test(benchmark=benchmark, file='alexnet-sanghavi2020-features.12.pkl', expected=expected) + self.run_test(benchmark=benchmark, file='alexnet-sanghavi2020-features.12.nc', expected=expected) @pytest.mark.memory_intense @pytest.mark.slow @@ -221,7 +219,7 @@ def test_Sanghavi2020(self, benchmark, expected): ('dicarlo.SanghaviJozwik2020.IT-pls', approx(.590543, abs=.005)), ]) def test_SanghaviJozwik2020(self, benchmark, expected): - self.run_test(benchmark=benchmark, file='alexnet-sanghavijozwik2020-features.12.pkl', expected=expected) + self.run_test(benchmark=benchmark, file='alexnet-sanghavijozwik2020-features.12.nc', expected=expected) @pytest.mark.memory_intense @pytest.mark.parametrize('benchmark, expected', [ @@ -229,7 +227,7 @@ def test_SanghaviJozwik2020(self, benchmark, expected): ('dicarlo.SanghaviMurty2020.IT-pls', approx(.53006, abs=.015)), ]) def test_SanghaviMurty2020(self, benchmark, expected): - self.run_test(benchmark=benchmark, file='alexnet-sanghavimurty2020-features.12.pkl', expected=expected) + self.run_test(benchmark=benchmark, file='alexnet-sanghavimurty2020-features.12.nc', expected=expected) @pytest.mark.memory_intense @pytest.mark.slow @@ -237,7 +235,7 @@ def test_SanghaviMurty2020(self, benchmark, expected): ('dicarlo.Rajalingham2020.IT-pls', approx(.147549, abs=.01)), ]) def test_Rajalingham2020(self, benchmark, expected): - self.run_test(benchmark=benchmark, file='alexnet-rajalingham2020-features.12.pkl', expected=expected) + self.run_test(benchmark=benchmark, file='alexnet-rajalingham2020-features.12.nc', expected=expected) class TestVisualDegrees: diff --git a/tests/test_benchmarks/test_rajalingham2018.py b/tests/test_benchmarks/test_rajalingham2018.py index b3da8adf1..dae2e8c0f 100644 --- a/tests/test_benchmarks/test_rajalingham2018.py +++ b/tests/test_benchmarks/test_rajalingham2018.py @@ -1,7 +1,9 @@ import numpy as np import os +from pathlib import Path import pandas as pd +import xarray as xr import pytest from pytest import approx @@ -25,9 +27,8 @@ def test_ceiling(self): ]) def test_precomputed(self, model, expected_score): benchmark = DicarloRajalingham2018I2n() - probabilities = pd.read_pickle(os.path.join(os.path.dirname(__file__), '..', 'test_metrics', - f'{model}-probabilities.pkl'))['data'] - probabilities = BehavioralAssembly(probabilities) + probabilities = Path(__file__).parent.parent / 'test_metrics' / f'{model}-probabilities.nc' + probabilities = BehavioralAssembly(xr.load_dataarray(probabilities)) candidate = PrecomputedProbabilities(probabilities) score = benchmark(candidate) assert score.raw.sel(aggregation='center') == approx(expected_score, abs=.005) diff --git a/tests/test_metrics/alexnet-probabilities.nc b/tests/test_metrics/alexnet-probabilities.nc new file mode 100644 index 000000000..fc5111922 Binary files /dev/null and b/tests/test_metrics/alexnet-probabilities.nc differ diff --git a/tests/test_metrics/alexnet-probabilities.pkl b/tests/test_metrics/alexnet-probabilities.pkl deleted file mode 100644 index 6dac86ad6..000000000 Binary files a/tests/test_metrics/alexnet-probabilities.pkl and /dev/null differ diff --git a/tests/test_metrics/resnet18-probabilities.nc b/tests/test_metrics/resnet18-probabilities.nc new file mode 100644 index 000000000..0e732d630 Binary files /dev/null and b/tests/test_metrics/resnet18-probabilities.nc differ diff --git a/tests/test_metrics/resnet18-probabilities.pkl b/tests/test_metrics/resnet18-probabilities.pkl deleted file mode 100644 index c18976c0d..000000000 Binary files a/tests/test_metrics/resnet18-probabilities.pkl and /dev/null differ diff --git a/tests/test_metrics/resnet34-probabilities.nc b/tests/test_metrics/resnet34-probabilities.nc new file mode 100644 index 000000000..2b157ec03 Binary files /dev/null and b/tests/test_metrics/resnet34-probabilities.nc differ diff --git a/tests/test_metrics/resnet34-probabilities.pkl b/tests/test_metrics/resnet34-probabilities.pkl deleted file mode 100644 index b5c34059c..000000000 Binary files a/tests/test_metrics/resnet34-probabilities.pkl and /dev/null differ diff --git a/tests/test_metrics/test_behavior.py b/tests/test_metrics/test_behavior.py index e75dc9bee..61ce4c7a6 100644 --- a/tests/test_metrics/test_behavior.py +++ b/tests/test_metrics/test_behavior.py @@ -1,6 +1,8 @@ import os +from pathlib import Path import pandas as pd +import xarray as xr import pytest from pytest import approx @@ -20,9 +22,8 @@ class TestI2N: def test_model(self, model, expected_score): # assemblies objectome = load_assembly() - probabilities = pd.read_pickle(os.path.join(os.path.dirname(__file__), - f'{model}-probabilities.pkl'))['data'] - probabilities = BehavioralAssembly(probabilities) + probabilities = Path(__file__).parent / f'{model}-probabilities.nc' + probabilities = BehavioralAssembly(xr.load_dataarray(probabilities)) # metric i2n = I2n() score = i2n(probabilities, objectome) diff --git a/tests/test_metrics/test_transformations.py b/tests/test_metrics/test_transformations.py index 74384eab0..ea73b9787 100644 --- a/tests/test_metrics/test_transformations.py +++ b/tests/test_metrics/test_transformations.py @@ -154,14 +154,19 @@ def test_no_expand_raw_level(self): class RawMetricPlaceholder(Metric): def __call__(self, assembly, *args, **kwargs): result = Score([assembly.values[0]], dims=['dim']) - raw = result.copy() - raw['dim_id'] = 'dim', [assembly.values[1]] - raw['division_coord'] = 'dim', [assembly.values[2]] + raw = Score(result.copy(), coords={ + 'dim_id': ('dim', [assembly.values[1]]), + 'division_coord': ('dim', [assembly.values[2]]) + }) result.attrs['raw'] = raw return result metric = RawMetricPlaceholder() result = transformation(assembly, apply=metric) + assert result.dims == ("division_coord", "dim") assert hasattr(result, 'raw') - assert 'division_coord' not in result.raw # no dimension + assert result.raw.dims == ("dim",) + assert 'division_coord' not in result.raw.dims # no dimension assert hasattr(result.raw, 'division_coord') # but a level + assert result.raw["dim"].variable.level_names == ["dim_id", "division_coord"] +