DeepGaze I, updates

Signed-off-by: Matthias Kümmmerer <[email protected]>
matthias-k · Jun 7, 2021 · e146c0c · e146c0c
1 parent a49c464
commit e146c0c
Show file tree

Hide file tree

Showing 5 changed files with 70 additions and 20 deletions.
diff --git a/README.md b/README.md
@@ -1,6 +1,6 @@
-# DeepGaze II and DeepGaze IIE
+# DeepGaze I, DeepGaze II and DeepGaze IIE
 
-This repository contains the pytorch implementations of DeepGaze II and DeepGaze IIE
+This repository contains the pytorch implementations of DeepGaze I, DeepGaze II and DeepGaze IIE
 
 This is how use the pretained DeepGaze IIE model:
 
@@ -9,9 +9,12 @@ from scipy.misc import face
 from scipy.ndimage import zoom
 import torch
 
+import deepgaze_pytorch
+
 DEVICE = 'cuda'
 
-model = deepgaze_pytorch.deepgaze2e(pretrained=True).to(DEVICE)
+# you can use DeepGazeI or DeepGazeIIE
+model = deepgaze_pytorch.DeepGazeIIE(pretrained=True).to(DEVICE)
 
 image = face()
 
@@ -29,8 +32,20 @@ centerbias_tensor = torch.tensor([centerbias]).to(DEVICE)
 
 log_density_prediction = model(image_tensor, centerbias_tensor))
 ```
+Please note that all DeepGaze models have been trained on the MIT1003 dataset which has a resolution of 35 pixels per degree of visual angle and an image size of mostly 1024 pixel in the longer side. Depending how your images have been presented, you might have to downscale or upscale them before passing them to the DeepGaze models.
+
+### Notes about the implementations
+
+* `DeepGaze I`: Please note that the included DeepGaze I model is not exactly the one from the original paper. The original model used caffe for AlexNet and theano for the linear readout
+and was trained using the SFO optimizer. Here, we use the torch implementation of AlexNet (without any adaptations) and the DeepGaze II torch implementation with a simple
+linear readout network. The model has been retrained with Adam, but still on the same dataset (all images of MIT1003 which are of size 1024x768). Also, we don't use the sparsity
+penalty anymore.
+
+
+### References
 
 If you use these models, please cite the according papers:
 
+* DeepGaze I: [Kümmerer, M., Theis, L., & Bethge, M. (2015). Deep Gaze I: Boosting Saliency Prediction with Feature Maps Trained on ImageNet. ICLR Workshop Track](http://arxiv.org/abs/1411.1045)
 * DeepGaze II: [Kümmerer, M., Wallis, T. S. A., Gatys, L. A., & Bethge, M. (2017). Understanding Low- and High-Level Contributions to Fixation Prediction. 4789–4798.](http://openaccess.thecvf.com/content_iccv_2017/html/Kummerer_Understanding_Low-_and_ICCV_2017_paper.html)
 * DeepGaze IIE: [Linardos, A., Kümmerer, M., Press, O., & Bethge, M. (2021). Calibrated prediction in and out-of-domain for state-of-the-art saliency modeling. ArXiv:2105.12441 [Cs]](http://arxiv.org/abs/2105.12441)
diff --git a/deepgaze_pytorch/__init__.py b/deepgaze_pytorch/__init__.py
@@ -1,2 +1,2 @@
-from .deepgaze import DeepGazeII, DeepGazeIII, FeatureExtractor
-from .deepgaze2e import deepgaze2e
+from .deepgaze1 import DeepGazeI
+from .deepgaze2e import DeepGazeIIE
diff --git a/deepgaze_pytorch/deepgaze1.py b/deepgaze_pytorch/deepgaze1.py
@@ -0,0 +1,34 @@
+from collections import OrderedDict
+
+import torch
+import torch.nn as nn
+
+from torch.utils import model_zoo
+
+from .features.alexnet import RGBalexnet
+from .modules import FeatureExtractor, Finalizer, DeepGazeII as TorchDeepGazeII
+
+
+class DeepGazeI(TorchDeepGazeII):
+    """DeepGaze I model
+
+    Kümmerer, M., Theis, L., & Bethge, M. (2015). Deep Gaze I: Boosting Saliency Prediction with Feature Maps Trained on ImageNet. ICLR Workshop Track. http://arxiv.org/abs/1411.1045
+    """
+    def __init__(self, pretrained=True):
+        features = RGBalexnet()
+        feature_extractor = FeatureExtractor(features, ['1.features.10'])
+
+        readout_network = nn.Sequential(OrderedDict([
+            ('conv0', nn.Conv2d(256, 1, (1, 1), bias=False)),
+        ]))
+
+        super().__init__(
+            features=feature_extractor,
+            readout_network=readout_network,
+            downsample=2,
+            readout_factor=4,
+            saliency_map_factor=4,
+        )
+
+        if pretrained:
+            raise NotImplementedError()
diff --git a/deepgaze_pytorch/deepgaze2e.py b/deepgaze_pytorch/deepgaze2e.py
@@ -9,7 +9,7 @@
 
 from torch.utils import model_zoo
 
-from .deepgaze import FeatureExtractor, Finalizer, DeepGazeIIIMixture, MixtureModel
+from .modules import FeatureExtractor, Finalizer, DeepGazeIIIMixture, MixtureModel
 
 from .layers import (
     Conv2dMultiInput,
@@ -69,12 +69,12 @@ def build_saliency_network(input_channels):
         ('conv0', nn.Conv2d(input_channels, 8, (1, 1), bias=False)),
         ('bias0', Bias(8)),
         ('softplus0', nn.Softplus()),
-    
+
         ('layernorm1', LayerNorm(8)),
         ('conv1', nn.Conv2d(8, 16, (1, 1), bias=False)),
         ('bias1', Bias(16)),
         ('softplus1', nn.Softplus()),
-    
+
         ('layernorm2', LayerNorm(16)),
         ('conv2', nn.Conv2d(16, 1, (1, 1), bias=False)),
         ('bias2', Bias(1)),
@@ -88,19 +88,16 @@ def build_fixation_selection_network():
         ('conv0', Conv2dMultiInput([1, 0], 128, (1, 1), bias=False)),
         ('bias0', Bias(128)),
         ('softplus0', nn.Softplus()),
-    
+
         ('layernorm1', LayerNorm(128)),
         ('conv1', nn.Conv2d(128, 16, (1, 1), bias=False)),
         ('bias1', Bias(16)),
         ('softplus1', nn.Softplus()),
-
-        #('layernorm2', LayerNorm(16)),
+
         ('conv2', nn.Conv2d(16, 1, (1, 1), bias=False)),
     ]))
 
 
-
-
 def build_deepgaze_mixture(backbone_config, components=10):
     feature_class = import_class(backbone_config['type'])
     features = feature_class()
@@ -133,15 +130,19 @@ def build_deepgaze_mixture(backbone_config, components=10):
     )
 
 
-def deepgaze2e(pretrained=True):
-    backbone_models = [build_deepgaze_mixture(backbone_config, components=3 * 10) for backbone_config in BACKBONES]
-    model = MixtureModel(backbone_models)
-
-    if pretrained:
-        model.load_state_dict(model_zoo.load_url('https://github.com/matthias-k/DeepGaze/releases/download/v1.0.0/deepgaze2e.pth'))
+class DeepGazeIIE(MixtureModel):
+    """DeepGazeIIE model
 
-    return model
+    :note
+    See Linardos, A., Kümmerer, M., Press, O., & Bethge, M. (2021). Calibrated prediction in and out-of-domain for state-of-the-art saliency modeling. ArXiv:2105.12441 [Cs], http://arxiv.org/abs/2105.12441
+    """
+    def __init__(self, pretrained=True):
+        # we average over 3 instances per backbone, each instance has 10 crossvalidation folds
+        backbone_models = [build_deepgaze_mixture(backbone_config, components=3 * 10) for backbone_config in BACKBONES]
+        super().__init__(backbone_models)
 
+        if pretrained:
+            self.load_state_dict(model_zoo.load_url('https://github.com/matthias-k/DeepGaze/releases/download/v1.0.0/deepgaze2e.pth'))
 
 
 def import_class(name):

diff --git a/deepgaze_pytorch/deepgaze.py → deepgaze_pytorch/modules.py b/deepgaze_pytorch/deepgaze.py → deepgaze_pytorch/modules.py