From 12f1e1c8eeed4db79ddc08509dabc9183cc00ea9 Mon Sep 17 00:00:00 2001 From: Mike Ferguson Date: Thu, 16 May 2024 08:52:10 -0400 Subject: [PATCH 1/6] added cvt_cvt-w24-384-in22k_finetuned-in1k_4 model --- .../__init__.py | 9 ++ .../model.py | 135 ++++++++++++++++++ .../requirements.txt | 6 + .../test.py | 8 ++ 4 files changed, 158 insertions(+) create mode 100644 brainscore_vision/models/cvt_cvt-w24-384-in22k_finetuned-in1k_4/__init__.py create mode 100644 brainscore_vision/models/cvt_cvt-w24-384-in22k_finetuned-in1k_4/model.py create mode 100644 brainscore_vision/models/cvt_cvt-w24-384-in22k_finetuned-in1k_4/requirements.txt create mode 100644 brainscore_vision/models/cvt_cvt-w24-384-in22k_finetuned-in1k_4/test.py diff --git a/brainscore_vision/models/cvt_cvt-w24-384-in22k_finetuned-in1k_4/__init__.py b/brainscore_vision/models/cvt_cvt-w24-384-in22k_finetuned-in1k_4/__init__.py new file mode 100644 index 000000000..7059cbedf --- /dev/null +++ b/brainscore_vision/models/cvt_cvt-w24-384-in22k_finetuned-in1k_4/__init__.py @@ -0,0 +1,9 @@ +from brainscore_vision import model_registry +from brainscore_vision.model_helpers.brain_transformation import ModelCommitment +from .model import get_model, get_layers + + +model_registry['cvt_cvt-w24-384-in22k_finetuned-in1k_4'] = \ + lambda: ModelCommitment(identifier='cvt_cvt-w24-384-in22k_finetuned-in1k_4', + activations_model=get_model('cvt_cvt-w24-384-in22k_finetuned-in1k_4'), + layers=get_layers('cvt_cvt-w24-384-in22k_finetuned-in1k_4')) \ No newline at end of file diff --git a/brainscore_vision/models/cvt_cvt-w24-384-in22k_finetuned-in1k_4/model.py b/brainscore_vision/models/cvt_cvt-w24-384-in22k_finetuned-in1k_4/model.py new file mode 100644 index 000000000..5f3e6d454 --- /dev/null +++ b/brainscore_vision/models/cvt_cvt-w24-384-in22k_finetuned-in1k_4/model.py @@ -0,0 +1,135 @@ +from model_helpers.check_submission import check_models +import functools +from transformers import AutoFeatureExtractor, CvtForImageClassification +from model_helpers.activations.pytorch import PytorchWrapper +from PIL import Image +import numpy as np +import torch + +""" +Template module for a base model submission to brain-score +""" + + +def get_model(name): + assert name == 'cvt_cvt-w24-384-in22k_finetuned-in1k_4' + # https://huggingface.co/models?sort=downloads&search=cvt + image_size = 384 + processor = AutoFeatureExtractor.from_pretrained('microsoft/cvt-w24-384-22k') + model = CvtForImageClassification.from_pretrained('microsoft/cvt-w24-384-22k') + preprocessing = functools.partial(load_preprocess_images, processor=processor, image_size=image_size) + wrapper = PytorchWrapper(identifier=name, model=model, preprocessing=preprocessing) + wrapper.image_size = image_size + + return wrapper + + +def get_layers(name): + assert name == 'cvt_cvt-w24-384-in22k_finetuned-in1k_4' + layers = [] + layers += [f'cvt.encoder.stages.0.layers.{i}' for i in range(2)] + layers += [f'cvt.encoder.stages.1.layers.{i}' for i in range(2)] + layers += [f'cvt.encoder.stages.2.layers.{i}' for i in range(20)] + layers += ['layernorm'] + layers = ['cvt.encoder.stages.2.layers.3'] + return layers + + +def get_bibtex(model_identifier): + """ + A method returning the bibtex reference of the requested model as a string. + """ + return '' + + +def load_preprocess_images(image_filepaths, image_size, processor=None, **kwargs): + images = load_images(image_filepaths) + # images = [, ...] + images = [image.resize((image_size, image_size)) for image in images] + if processor is not None: + images = [processor(images=image, return_tensors="pt", **kwargs) for image in images] + if len(images[0].keys()) != 1: + raise NotImplementedError(f'unknown processor for getting model {processor}') + assert list(images[0].keys())[0] == 'pixel_values' + images = [image['pixel_values'] for image in images] + images = torch.cat(images) + images = images.cpu().numpy() + else: + images = preprocess_images(images, image_size=image_size, **kwargs) + return images + + +def load_images(image_filepaths): + return [load_image(image_filepath) for image_filepath in image_filepaths] + + +def load_image(image_filepath): + with Image.open(image_filepath) as pil_image: + if 'L' not in pil_image.mode.upper() and 'A' not in pil_image.mode.upper() \ + and 'P' not in pil_image.mode.upper(): # not binary and not alpha and not palletized + # work around to https://github.com/python-pillow/Pillow/issues/1144, + # see https://stackoverflow.com/a/30376272/2225200 + return pil_image.copy() + else: # make sure potential binary images are in RGB + rgb_image = Image.new("RGB", pil_image.size) + rgb_image.paste(pil_image) + return rgb_image + + +def preprocess_images(images, image_size, **kwargs): + preprocess = torchvision_preprocess_input(image_size, **kwargs) + images = [preprocess(image) for image in images] + images = np.concatenate(images) + return images + + +def torchvision_preprocess_input(image_size, **kwargs): + from torchvision import transforms + return transforms.Compose([ + transforms.Resize((image_size, image_size)), + torchvision_preprocess(**kwargs), + ]) + + +def torchvision_preprocess(normalize_mean=(0.485, 0.456, 0.406), normalize_std=(0.229, 0.224, 0.225)): + from torchvision import transforms + return transforms.Compose([ + transforms.ToTensor(), + transforms.Normalize(mean=normalize_mean, std=normalize_std), + lambda img: img.unsqueeze(0) + ]) + + +def create_static_video(image, num_frames, normalize_0to1=False, channel_dim=3): + ''' + Create a static video with the same image in all frames. + Args: + image (PIL.Image.Image): Input image. + num_frames (int): Number of frames in the video. + Returns: + result (np.ndarray): np array of frames of shape (num_frames, height, width, 3). + ''' + frames = [] + for _ in range(num_frames): + frame = np.array(image) + if normalize_0to1: + frame = frame / 255. + if channel_dim == 1: + frame = frame.transpose(2, 0, 1) + frames.append(frame) + return np.stack(frames) + + +if __name__ == '__main__': + # Use this method to ensure the correctness of the BaseModel implementations. + # It executes a mock run of brain-score benchmarks. + check_models.check_base_models(__name__) + +""" +Notes on the error: + +- 'channel_x' key error: +# 'embeddings.patch_embeddings.projection', +https://github.com/search?q=repo%3Abrain-score%2Fmodel-tools%20channel_x&type=code + +""" \ No newline at end of file diff --git a/brainscore_vision/models/cvt_cvt-w24-384-in22k_finetuned-in1k_4/requirements.txt b/brainscore_vision/models/cvt_cvt-w24-384-in22k_finetuned-in1k_4/requirements.txt new file mode 100644 index 000000000..5b02f2ff1 --- /dev/null +++ b/brainscore_vision/models/cvt_cvt-w24-384-in22k_finetuned-in1k_4/requirements.txt @@ -0,0 +1,6 @@ +numpy +torch +transformers +ssl +PIL +functools \ No newline at end of file diff --git a/brainscore_vision/models/cvt_cvt-w24-384-in22k_finetuned-in1k_4/test.py b/brainscore_vision/models/cvt_cvt-w24-384-in22k_finetuned-in1k_4/test.py new file mode 100644 index 000000000..c48b4a7e7 --- /dev/null +++ b/brainscore_vision/models/cvt_cvt-w24-384-in22k_finetuned-in1k_4/test.py @@ -0,0 +1,8 @@ +import pytest +import brainscore_vision + + +@pytest.mark.travis_slow +def test_has_identifier(): + model = brainscore_vision.load_model('cvt_cvt-w24-384-in22k_finetuned-in1k_4') + assert model.identifier == 'cvt_cvt-w24-384-in22k_finetuned-in1k_4' \ No newline at end of file From b5689c5dbc01f26a24c28d5414b215c839c63e7e Mon Sep 17 00:00:00 2001 From: Mike Ferguson Date: Thu, 16 May 2024 09:04:00 -0400 Subject: [PATCH 2/6] fixes model_helpers import --- .../models/cvt_cvt-w24-384-in22k_finetuned-in1k_4/model.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/brainscore_vision/models/cvt_cvt-w24-384-in22k_finetuned-in1k_4/model.py b/brainscore_vision/models/cvt_cvt-w24-384-in22k_finetuned-in1k_4/model.py index 5f3e6d454..87e28fcf2 100644 --- a/brainscore_vision/models/cvt_cvt-w24-384-in22k_finetuned-in1k_4/model.py +++ b/brainscore_vision/models/cvt_cvt-w24-384-in22k_finetuned-in1k_4/model.py @@ -1,7 +1,7 @@ -from model_helpers.check_submission import check_models +from brainscore_vision.model_helpers.check_submission import check_models import functools from transformers import AutoFeatureExtractor, CvtForImageClassification -from model_helpers.activations.pytorch import PytorchWrapper +from brainscore_vision.model_helpers.activations.pytorch import PytorchWrapper from PIL import Image import numpy as np import torch From 080a8b3ada194fcd3599ad30c0e39964f895124c Mon Sep 17 00:00:00 2001 From: Michael Ferguson Date: Thu, 16 May 2024 10:48:33 -0400 Subject: [PATCH 3/6] Update brainscore_vision/models/cvt_cvt-w24-384-in22k_finetuned-in1k_4/model.py Co-authored-by: Martin Schrimpf --- .../model.py | 20 ++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/brainscore_vision/models/cvt_cvt-w24-384-in22k_finetuned-in1k_4/model.py b/brainscore_vision/models/cvt_cvt-w24-384-in22k_finetuned-in1k_4/model.py index 87e28fcf2..1f64a84a1 100644 --- a/brainscore_vision/models/cvt_cvt-w24-384-in22k_finetuned-in1k_4/model.py +++ b/brainscore_vision/models/cvt_cvt-w24-384-in22k_finetuned-in1k_4/model.py @@ -39,7 +39,25 @@ def get_bibtex(model_identifier): """ A method returning the bibtex reference of the requested model as a string. """ - return '' + return """@article{DBLP:journals/corr/abs-2103-15808, + author = {Haiping Wu and + Bin Xiao and + Noel Codella and + Mengchen Liu and + Xiyang Dai and + Lu Yuan and + Lei Zhang}, + title = {CvT: Introducing Convolutions to Vision Transformers}, + journal = {CoRR}, + volume = {abs/2103.15808}, + year = {2021}, + url = {https://arxiv.org/abs/2103.15808}, + eprinttype = {arXiv}, + eprint = {2103.15808}, + timestamp = {Tue, 18 Oct 2022 08:35:30 +0200}, + biburl = {https://dblp.org/rec/journals/corr/abs-2103-15808.bib}, + bibsource = {dblp computer science bibliography, https://dblp.org} +}""" def load_preprocess_images(image_filepaths, image_size, processor=None, **kwargs): From decada2cb169854a848ca4412ea4211de360f4d7 Mon Sep 17 00:00:00 2001 From: Michael Ferguson Date: Thu, 16 May 2024 10:48:43 -0400 Subject: [PATCH 4/6] Update brainscore_vision/models/cvt_cvt-w24-384-in22k_finetuned-in1k_4/requirements.txt Co-authored-by: Martin Schrimpf --- .../cvt_cvt-w24-384-in22k_finetuned-in1k_4/requirements.txt | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/brainscore_vision/models/cvt_cvt-w24-384-in22k_finetuned-in1k_4/requirements.txt b/brainscore_vision/models/cvt_cvt-w24-384-in22k_finetuned-in1k_4/requirements.txt index 5b02f2ff1..aaf762ea5 100644 --- a/brainscore_vision/models/cvt_cvt-w24-384-in22k_finetuned-in1k_4/requirements.txt +++ b/brainscore_vision/models/cvt_cvt-w24-384-in22k_finetuned-in1k_4/requirements.txt @@ -2,5 +2,4 @@ numpy torch transformers ssl -PIL -functools \ No newline at end of file +PIL \ No newline at end of file From 9fb6a023a335f16014251ee66cc297a6aa7bfce1 Mon Sep 17 00:00:00 2001 From: Mike Ferguson Date: Thu, 16 May 2024 11:43:28 -0400 Subject: [PATCH 5/6] PR comments --- .../cvt_cvt-w24-384-in22k_finetuned-in1k_4/model.py | 13 +++++++++++-- .../requirements.txt | 1 - 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/brainscore_vision/models/cvt_cvt-w24-384-in22k_finetuned-in1k_4/model.py b/brainscore_vision/models/cvt_cvt-w24-384-in22k_finetuned-in1k_4/model.py index 1f64a84a1..900f6573f 100644 --- a/brainscore_vision/models/cvt_cvt-w24-384-in22k_finetuned-in1k_4/model.py +++ b/brainscore_vision/models/cvt_cvt-w24-384-in22k_finetuned-in1k_4/model.py @@ -1,10 +1,11 @@ from brainscore_vision.model_helpers.check_submission import check_models import functools +import numpy as np +import torch from transformers import AutoFeatureExtractor, CvtForImageClassification from brainscore_vision.model_helpers.activations.pytorch import PytorchWrapper from PIL import Image -import numpy as np -import torch + """ Template module for a base model submission to brain-score @@ -143,6 +144,14 @@ def create_static_video(image, num_frames, normalize_0to1=False, channel_dim=3): # It executes a mock run of brain-score benchmarks. check_models.check_base_models(__name__) + +''' +Below Notes are from the original model file from Brain-Score 1.0, and +kept in this file for posterity. + +''' + + """ Notes on the error: diff --git a/brainscore_vision/models/cvt_cvt-w24-384-in22k_finetuned-in1k_4/requirements.txt b/brainscore_vision/models/cvt_cvt-w24-384-in22k_finetuned-in1k_4/requirements.txt index aaf762ea5..304640474 100644 --- a/brainscore_vision/models/cvt_cvt-w24-384-in22k_finetuned-in1k_4/requirements.txt +++ b/brainscore_vision/models/cvt_cvt-w24-384-in22k_finetuned-in1k_4/requirements.txt @@ -1,5 +1,4 @@ numpy torch transformers -ssl PIL \ No newline at end of file From 7649a7ef8d4f7326c9c45173769a78beabb48870 Mon Sep 17 00:00:00 2001 From: Mike Ferguson Date: Thu, 16 May 2024 12:00:45 -0400 Subject: [PATCH 6/6] PR comments --- .../cvt_cvt-w24-384-in22k_finetuned-in1k_4/requirements.txt | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/brainscore_vision/models/cvt_cvt-w24-384-in22k_finetuned-in1k_4/requirements.txt b/brainscore_vision/models/cvt_cvt-w24-384-in22k_finetuned-in1k_4/requirements.txt index 304640474..08b60182a 100644 --- a/brainscore_vision/models/cvt_cvt-w24-384-in22k_finetuned-in1k_4/requirements.txt +++ b/brainscore_vision/models/cvt_cvt-w24-384-in22k_finetuned-in1k_4/requirements.txt @@ -1,4 +1,3 @@ numpy torch -transformers -PIL \ No newline at end of file +transformers \ No newline at end of file