Skip to content

Commit

Permalink
Merge pull request #52 from matthias-k/bugfix-hdf5-filenames-for-subs…
Browse files Browse the repository at this point in the history
…timuli

Bugfix hdf5 filenames for substimuli
  • Loading branch information
matthias-k authored Mar 2, 2024
2 parents a34a69e + 761fe10 commit 6a685b7
Show file tree
Hide file tree
Showing 4 changed files with 151 additions and 47 deletions.
16 changes: 13 additions & 3 deletions pysaliency/datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -1156,12 +1156,22 @@ def __init__(self, stimuli, attributes=None):
def __len__(self):
return len(self.stimuli)

def _get_attribute_for_stimulus_subset(self, index):
sub_attributes = {}
for attribute_name, attribute_value in self.attributes.items():
if isinstance(index, (list, np.ndarray)) and not isinstance(attribute_value, np.ndarray):
sub_attributes[attribute_name] = [attribute_value[i] for i in index]
else:
sub_attributes[attribute_name] = attribute_value[index]

return sub_attributes

def __getitem__(self, index):
if isinstance(index, slice):
attributes = {key: value[index] for key, value in self.attributes.items()}
attributes = self._get_attribute_for_stimulus_subset(index)
return ObjectStimuli([self.stimulus_objects[i] for i in range(len(self))[index]], attributes=attributes)
elif isinstance(index, list):
attributes = {key: value[index] for key, value in self.attributes.items()}
attributes = self._get_attribute_for_stimulus_subset(index)
return ObjectStimuli([self.stimulus_objects[i] for i in index], attributes=attributes)
else:
return self.stimulus_objects[index]
Expand Down Expand Up @@ -1337,7 +1347,7 @@ def __getitem__(self, index):
if isinstance(index, (list, np.ndarray)):
filenames = [self.filenames[i] for i in index]
shapes = [self.shapes[i] for i in index]
attributes = {key: [value[i] for i in index] for key, value in self.attributes.items()}
attributes = self._get_attribute_for_stimulus_subset(index)
return type(self)(filenames=filenames, shapes=shapes, attributes=attributes, cached=self.cached)
else:
return self.stimulus_objects[index]
Expand Down
77 changes: 52 additions & 25 deletions pysaliency/precomputed_models.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from __future__ import print_function, division, absolute_import
from __future__ import absolute_import, division, print_function

import glob
import os.path
Expand All @@ -8,14 +8,14 @@

import numpy as np
from imageio import imread
from scipy.special import logsumexp
from scipy.io import loadmat
from scipy.special import logsumexp
from tqdm import tqdm

from .datasets import FileStimuli, get_image_hash
from .models import Model
from .saliency_map_models import SaliencyMapModel
from .datasets import get_image_hash, FileStimuli
from .utils import get_minimal_unique_filenames
from .utils import full_split, get_minimal_unique_filenames


def get_stimuli_filenames(stimuli):
Expand All @@ -28,6 +28,44 @@ def get_stimuli_filenames(stimuli):
return stimuli.filenames


def get_keys_from_filenames(filenames, keys):
"""checks how much filenames have to be shorted to get the correct hdf5 or other keys"""
first_filename_parts = full_split(filenames[0])
for part_index in range(len(first_filename_parts)):
remaining_filename = os.path.join(*first_filename_parts[part_index:])
if remaining_filename in keys:
break
else:
raise ValueError('No common prefix found from {}'.format(filenames[0]))

filename_keys = []
for filename in filenames:
filename_parts = full_split(filename)
remaining_filename = os.path.join(*filename_parts[part_index:])
filename_keys.append(remaining_filename)

return filename_keys


def get_keys_from_filenames_with_prefix(filenames, keys):
"""checks how much filenames have to be shorted to get the correct hdf5 or other keys, where the keys might have a prefix"""
first_key_parts = full_split(keys[0])

for key_part_index in range(len(first_key_parts)):
remaining_keys = [os.path.join(*full_split(key)[key_part_index:]) for key in keys]
try:
filename_keys = get_keys_from_filenames(filenames, remaining_keys)
except ValueError:
continue
else:
full_filename_keys = []
for key, filename_key in zip(keys, filename_keys):
full_filename_keys.append(os.path.join(*full_split(key)[:key_part_index], filename_key))
return full_filename_keys

raise ValueError('No common prefix found from {} and {}'.format(filenames[0], keys[0]))


def export_model_to_hdf5(model, stimuli, filename, compression=9, overwrite=True, flush=False):
"""Export pysaliency model predictions for stimuli into hdf5 file
Expand Down Expand Up @@ -83,8 +121,8 @@ def _file_for_stimulus(self, stimulus):

try:
stimulus_index = self.stimuli.stimulus_ids.index(stimulus_id)
except IndexError:
raise IndexError("Stimulus id '{}' not found in stimuli!".format(stimulus_id))
except IndexError as exc:
raise IndexError("Stimulus id '{}' not found in stimuli!".format(stimulus_id)) from exc

return self.files[stimulus_index]

Expand Down Expand Up @@ -114,8 +152,8 @@ def __init__(self, stimuli, directory, **kwargs):
files = [os.path.relpath(filename, start=directory) for filename in glob.glob(os.path.join(directory, '**', '*'), recursive=True)]
stems = [os.path.splitext(f)[0] for f in files]

stimuli_files = get_minimal_unique_filenames(stimulus_filenames)
stimuli_stems = [os.path.splitext(f)[0] for f in stimuli_files]
stimuli_stems = [os.path.splitext(f)[0] for f in stimulus_filenames]
stimuli_stems = get_keys_from_filenames(stimuli_stems, stems)

if not set(stimuli_stems).issubset(stems):
missing_predictions = set(stimuli_stems).difference(stems)
Expand Down Expand Up @@ -197,14 +235,6 @@ def get_keys_recursive(group, prefix=''):

return keys

def get_stimulus_key(stimulus_name, all_keys):
matching_keys = [key for key in all_keys if key.endswith(stimulus_name)]
if len(matching_keys) == 0:
raise ValueError(f"Stimulus {stimulus_name} not found in hdf5 file!")
elif len(matching_keys) > 1:
raise ValueError(f"Stimulus {stimulus_name} not unique in hdf5 file!")
return matching_keys[0]


class HDF5SaliencyMapModel(SaliencyMapModel):
""" exposes a HDF5 file with saliency maps as pysaliency model
Expand All @@ -220,23 +250,20 @@ def __init__(self, stimuli, filename, check_shape=True, **kwargs):
self.filename = filename
self.check_shape = check_shape

self.names = get_minimal_unique_filenames(
get_stimuli_filenames(stimuli)
)

import h5py
self.hdf5_file = h5py.File(self.filename, 'r')
self.all_keys = get_keys_recursive(self.hdf5_file)

self.names = get_keys_from_filenames(get_stimuli_filenames(stimuli), self.all_keys)

def _saliency_map(self, stimulus):
stimulus_id = get_image_hash(stimulus)
stimulus_index = self.stimuli.stimulus_ids.index(stimulus_id)
stimulus_filename = self.names[stimulus_index]
stimulus_key = get_stimulus_key(stimulus_filename, self.all_keys)
stimulus_key = self.names[stimulus_index]
smap = self.hdf5_file[stimulus_key][:]
if not smap.shape == (stimulus.shape[0], stimulus.shape[1]):
if self.check_shape:
warnings.warn('Wrong shape for stimulus {}'.format(stimulus_key))
warnings.warn('Wrong shape for stimulus {}'.format(stimulus_key), stacklevel=4)
return smap


Expand Down Expand Up @@ -302,8 +329,8 @@ def __init__(self, stimuli, archive_file, *args, **kwargs):
files = [f for f in files if '__macosx' not in f.lower()]
stems = [os.path.splitext(f)[0] for f in files]

stimuli_files = get_minimal_unique_filenames(get_stimuli_filenames(stimuli))
stimuli_stems = [os.path.splitext(f)[0] for f in stimuli_files]
stimuli_stems = [os.path.splitext(f)[0] for f in get_stimuli_filenames(stimuli)]
stimuli_stems = get_keys_from_filenames_with_prefix(stimuli_stems, stems)

prediction_filenames = []
for stimuli_stem in stimuli_stems:
Expand Down
10 changes: 10 additions & 0 deletions tests/test_datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -561,6 +561,11 @@ def test_stimuli_attributes(stimuli_with_attributes, tmp_path):
assert stimuli_with_attributes.attributes['dva'][:5] == partial_stimuli.attributes['dva']
assert stimuli_with_attributes.attributes['some_strings'][:5] == partial_stimuli.attributes['some_strings']

partial_stimuli = stimuli_with_attributes[[1, 2, 6]]
assert stimuli_with_attributes.attributes.keys() == partial_stimuli.attributes.keys()
assert list(np.array(stimuli_with_attributes.attributes['dva'])[[1, 2, 6]]) == partial_stimuli.attributes['dva']
assert list(np.array(stimuli_with_attributes.attributes['some_strings'])[[1, 2, 6]]) == partial_stimuli.attributes['some_strings']


@pytest.fixture
def file_stimuli_with_attributes(tmpdir):
Expand Down Expand Up @@ -601,6 +606,11 @@ def test_file_stimuli_attributes(file_stimuli_with_attributes, tmp_path):
assert file_stimuli_with_attributes.attributes['dva'][:5] == partial_stimuli.attributes['dva']
assert file_stimuli_with_attributes.attributes['some_strings'][:5] == partial_stimuli.attributes['some_strings']

partial_stimuli = file_stimuli_with_attributes[[1, 2, 6]]
assert file_stimuli_with_attributes.attributes.keys() == partial_stimuli.attributes.keys()
assert list(np.array(file_stimuli_with_attributes.attributes['dva'])[[1, 2, 6]]) == partial_stimuli.attributes['dva']
assert list(np.array(file_stimuli_with_attributes.attributes['some_strings'])[[1, 2, 6]]) == partial_stimuli.attributes['some_strings']


def test_concatenate_stimuli_with_attributes(stimuli_with_attributes, file_stimuli_with_attributes):
concatenated_stimuli = pysaliency.datasets.concatenate_stimuli([stimuli_with_attributes, file_stimuli_with_attributes])
Expand Down
95 changes: 76 additions & 19 deletions tests/test_precomputed_models.py
Original file line number Diff line number Diff line change
@@ -1,24 +1,28 @@
from __future__ import division, print_function, absolute_import, unicode_literals
from __future__ import absolute_import, division, print_function, unicode_literals

import os
import pathlib
import zipfile

import numpy as np
import pytest

from imageio import imsave
import numpy as np

import pysaliency
from pysaliency import export_model_to_hdf5


class TestSaliencyMapModel(pysaliency.SaliencyMapModel):
def _saliency_map(self, stimulus):
stimulus_data = pysaliency.datasets.as_stimulus(stimulus).stimulus_data
return np.array(stimulus_data, dtype=float)


@pytest.fixture
def file_stimuli(tmpdir):
filenames = []
for i in range(3):
# TODO: change back to stimulus_... once this is supported again
filename = tmpdir.join('_stimulus_{:04d}.png'.format(i))
filename = tmpdir.join('stimulus_{:04d}.png'.format(i))
imsave(str(filename), np.random.randint(low=0, high=255, size=(100, 100, 3), dtype=np.uint8))
filenames.append(str(filename))

Expand All @@ -37,8 +41,7 @@ def stimuli_with_filenames(tmpdir):
filenames = []
stimuli = []
for i in range(3):
# TODO: change back to stimulus_... once this is supported again
filename = tmpdir.join('_stimulus_{:04d}.png'.format(i))
filename = tmpdir.join('stimulus_{:04d}.png'.format(i))
stimuli.append(np.random.randint(low=0, high=255, size=(100, 100, 3), dtype=np.uint8))
filenames.append(str(filename))

Expand All @@ -61,6 +64,14 @@ def stimuli(file_stimuli, stimuli_with_filenames, request):
raise ValueError(request.param)


@pytest.fixture
def sub_stimuli(stimuli):
unique_filenames = pysaliency.utils.get_minimal_unique_filenames(
pysaliency.precomputed_models.get_stimuli_filenames(stimuli)
)
return stimuli[[i for i, f in enumerate(unique_filenames) if f.startswith('sub_directory_0001')]]


@pytest.fixture
def saliency_maps_in_directory(file_stimuli, tmpdir):
stimuli_files = pysaliency.utils.get_minimal_unique_filenames(file_stimuli.filenames)
Expand All @@ -80,7 +91,7 @@ def saliency_maps_in_directory(file_stimuli, tmpdir):


def test_export_model_to_hdf5(stimuli, tmpdir):
model = pysaliency.UniformModel()
model = pysaliency.models.SaliencyMapNormalizingModel(TestSaliencyMapModel())
filename = str(tmpdir.join('model.hdf5'))
export_model_to_hdf5(model, stimuli, filename)

Expand All @@ -89,6 +100,16 @@ def test_export_model_to_hdf5(stimuli, tmpdir):
np.testing.assert_allclose(model.log_density(s), model2.log_density(s))


def test_hdf5_model_sub_stimuli(stimuli, sub_stimuli, tmpdir):
model = pysaliency.models.SaliencyMapNormalizingModel(TestSaliencyMapModel())
filename = str(tmpdir.join('model.hdf5'))
export_model_to_hdf5(model, stimuli, filename)

model2 = pysaliency.HDF5Model(sub_stimuli, filename)
for s in sub_stimuli:
np.testing.assert_allclose(model.log_density(s), model2.log_density(s))


def test_export_model_overwrite(file_stimuli, tmpdir):
model1 = pysaliency.GaussianSaliencyMapModel(width=0.1)
model2 = pysaliency.GaussianSaliencyMapModel(width=0.8)
Expand Down Expand Up @@ -124,35 +145,71 @@ def test_export_model_no_overwrite(file_stimuli, tmpdir):
np.testing.assert_allclose(model2.saliency_map(s), model3.saliency_map(s))


def test_saliency_map_model_from_directory(file_stimuli, saliency_maps_in_directory):
def test_saliency_map_model_from_directory(stimuli, saliency_maps_in_directory):
directory, predictions = saliency_maps_in_directory
model = pysaliency.SaliencyMapModelFromDirectory(file_stimuli, directory)
model = pysaliency.SaliencyMapModelFromDirectory(stimuli, directory)

for stimulus_index, stimulus in enumerate(file_stimuli):
for stimulus_index, stimulus in enumerate(stimuli):
expected = predictions[stimulus_index]
actual = model.saliency_map(stimulus)
np.testing.assert_equal(actual, expected)

@pytest.mark.skip("currently archivemodels can't handle same stimuli names in directory and subdirectory")
def test_saliency_map_model_from_archive(file_stimuli, saliency_maps_in_directory, tmpdir):

def test_saliency_map_model_from_directory_sub_stimuli(stimuli, sub_stimuli, saliency_maps_in_directory):
directory, predictions = saliency_maps_in_directory
full_model = pysaliency.SaliencyMapModelFromDirectory(stimuli, directory)
sub_model = pysaliency.SaliencyMapModelFromDirectory(sub_stimuli, directory)

for stimulus in sub_stimuli:
expected = full_model.saliency_map(stimulus)
actual = sub_model.saliency_map(stimulus)
np.testing.assert_equal(actual, expected)


def test_saliency_map_model_from_archive(stimuli, saliency_maps_in_directory, tmpdir):
directory, predictions = saliency_maps_in_directory

archive = tmpdir / 'predictions.zip'

# from https://stackoverflow.com/a/1855118
def zipdir(path, ziph):
for root, dirs, files in os.walk(path):
for root, _, files in os.walk(path):
for file in files:
ziph.write(os.path.join(root, file),
os.path.relpath(os.path.join(root, file),
ziph.write(os.path.join(root, file),
os.path.relpath(os.path.join(root, file),
os.path.join(path, '..')))

with zipfile.ZipFile(str(archive), 'w', zipfile.ZIP_DEFLATED) as zipf:
zipdir(str(directory), zipf)

model = pysaliency.precomputed_models.SaliencyMapModelFromArchive(file_stimuli, str(archive))
model = pysaliency.precomputed_models.SaliencyMapModelFromArchive(stimuli, str(archive))

for stimulus_index, stimulus in enumerate(file_stimuli):
for stimulus_index, stimulus in enumerate(stimuli):
expected = predictions[stimulus_index]
actual = model.saliency_map(stimulus)
np.testing.assert_equal(actual, expected)


def test_saliency_map_model_from_archive_sub_stimuli(stimuli, sub_stimuli, saliency_maps_in_directory, tmpdir):
directory, predictions = saliency_maps_in_directory

archive = tmpdir / 'predictions.zip'

# from https://stackoverflow.com/a/1855118
def zipdir(path, ziph):
for root, _, files in os.walk(path):
for file in files:
ziph.write(os.path.join(root, file),
os.path.relpath(os.path.join(root, file),
os.path.join(path, '..')))

with zipfile.ZipFile(str(archive), 'w', zipfile.ZIP_DEFLATED) as zipf:
zipdir(str(directory), zipf)

full_model = pysaliency.precomputed_models.SaliencyMapModelFromArchive(stimuli, str(archive))
sub_model = pysaliency.precomputed_models.SaliencyMapModelFromArchive(sub_stimuli, str(archive))

for stimulus in sub_stimuli:
expected = full_model.saliency_map(stimulus)
actual = sub_model.saliency_map(stimulus)
np.testing.assert_equal(actual, expected)

0 comments on commit 6a685b7

Please sign in to comment.