diff --git a/pysaliency/precomputed_models.py b/pysaliency/precomputed_models.py index 8fbb0d4..e6ab5b8 100644 --- a/pysaliency/precomputed_models.py +++ b/pysaliency/precomputed_models.py @@ -1,4 +1,4 @@ -from __future__ import print_function, division, absolute_import +from __future__ import absolute_import, division, print_function import glob import os.path @@ -8,14 +8,14 @@ import numpy as np from imageio import imread -from scipy.special import logsumexp from scipy.io import loadmat +from scipy.special import logsumexp from tqdm import tqdm +from .datasets import FileStimuli, get_image_hash from .models import Model from .saliency_map_models import SaliencyMapModel -from .datasets import get_image_hash, FileStimuli -from .utils import get_minimal_unique_filenames +from .utils import full_split, get_minimal_unique_filenames def get_stimuli_filenames(stimuli): @@ -28,6 +28,44 @@ def get_stimuli_filenames(stimuli): return stimuli.filenames +def get_keys_from_filenames(filenames, keys): + """checks how much filenames have to be shorted to get the correct hdf5 or other keys""" + first_filename_parts = full_split(filenames[0]) + for part_index in range(len(first_filename_parts)): + remaining_filename = os.path.join(*first_filename_parts[part_index:]) + if remaining_filename in keys: + break + else: + raise ValueError('No common prefix found from {}'.format(filenames[0])) + + filename_keys = [] + for filename in filenames: + filename_parts = full_split(filename) + remaining_filename = os.path.join(*filename_parts[part_index:]) + filename_keys.append(remaining_filename) + + return filename_keys + + +def get_keys_from_filenames_with_prefix(filenames, keys): + """checks how much filenames have to be shorted to get the correct hdf5 or other keys, where the keys might have a prefix""" + first_key_parts = full_split(keys[0]) + + for key_part_index in range(len(first_key_parts)): + remaining_keys = [os.path.join(*full_split(key)[key_part_index:]) for key in keys] + try: + filename_keys = get_keys_from_filenames(filenames, remaining_keys) + except ValueError: + continue + else: + full_filename_keys = [] + for key, filename_key in zip(keys, filename_keys): + full_filename_keys.append(os.path.join(*full_split(key)[:key_part_index], filename_key)) + return full_filename_keys + + raise ValueError('No common prefix found from {} and {}'.format(filenames[0], keys[0])) + + def export_model_to_hdf5(model, stimuli, filename, compression=9, overwrite=True, flush=False): """Export pysaliency model predictions for stimuli into hdf5 file @@ -83,8 +121,8 @@ def _file_for_stimulus(self, stimulus): try: stimulus_index = self.stimuli.stimulus_ids.index(stimulus_id) - except IndexError: - raise IndexError("Stimulus id '{}' not found in stimuli!".format(stimulus_id)) + except IndexError as exc: + raise IndexError("Stimulus id '{}' not found in stimuli!".format(stimulus_id)) from exc return self.files[stimulus_index] @@ -114,8 +152,8 @@ def __init__(self, stimuli, directory, **kwargs): files = [os.path.relpath(filename, start=directory) for filename in glob.glob(os.path.join(directory, '**', '*'), recursive=True)] stems = [os.path.splitext(f)[0] for f in files] - stimuli_files = get_minimal_unique_filenames(stimulus_filenames) - stimuli_stems = [os.path.splitext(f)[0] for f in stimuli_files] + stimuli_stems = [os.path.splitext(f)[0] for f in stimulus_filenames] + stimuli_stems = get_keys_from_filenames(stimuli_stems, stems) if not set(stimuli_stems).issubset(stems): missing_predictions = set(stimuli_stems).difference(stems) @@ -197,14 +235,6 @@ def get_keys_recursive(group, prefix=''): return keys -def get_stimulus_key(stimulus_name, all_keys): - matching_keys = [key for key in all_keys if key.endswith(stimulus_name)] - if len(matching_keys) == 0: - raise ValueError(f"Stimulus {stimulus_name} not found in hdf5 file!") - elif len(matching_keys) > 1: - raise ValueError(f"Stimulus {stimulus_name} not unique in hdf5 file!") - return matching_keys[0] - class HDF5SaliencyMapModel(SaliencyMapModel): """ exposes a HDF5 file with saliency maps as pysaliency model @@ -220,23 +250,20 @@ def __init__(self, stimuli, filename, check_shape=True, **kwargs): self.filename = filename self.check_shape = check_shape - self.names = get_minimal_unique_filenames( - get_stimuli_filenames(stimuli) - ) - import h5py self.hdf5_file = h5py.File(self.filename, 'r') self.all_keys = get_keys_recursive(self.hdf5_file) + self.names = get_keys_from_filenames(get_stimuli_filenames(stimuli), self.all_keys) + def _saliency_map(self, stimulus): stimulus_id = get_image_hash(stimulus) stimulus_index = self.stimuli.stimulus_ids.index(stimulus_id) - stimulus_filename = self.names[stimulus_index] - stimulus_key = get_stimulus_key(stimulus_filename, self.all_keys) + stimulus_key = self.names[stimulus_index] smap = self.hdf5_file[stimulus_key][:] if not smap.shape == (stimulus.shape[0], stimulus.shape[1]): if self.check_shape: - warnings.warn('Wrong shape for stimulus {}'.format(stimulus_key)) + warnings.warn('Wrong shape for stimulus {}'.format(stimulus_key), stacklevel=4) return smap @@ -302,8 +329,8 @@ def __init__(self, stimuli, archive_file, *args, **kwargs): files = [f for f in files if '__macosx' not in f.lower()] stems = [os.path.splitext(f)[0] for f in files] - stimuli_files = get_minimal_unique_filenames(get_stimuli_filenames(stimuli)) - stimuli_stems = [os.path.splitext(f)[0] for f in stimuli_files] + stimuli_stems = [os.path.splitext(f)[0] for f in get_stimuli_filenames(stimuli)] + stimuli_stems = get_keys_from_filenames_with_prefix(stimuli_stems, stems) prediction_filenames = [] for stimuli_stem in stimuli_stems: diff --git a/tests/test_precomputed_models.py b/tests/test_precomputed_models.py index 1b25a57..4382f54 100644 --- a/tests/test_precomputed_models.py +++ b/tests/test_precomputed_models.py @@ -1,24 +1,28 @@ -from __future__ import division, print_function, absolute_import, unicode_literals +from __future__ import absolute_import, division, print_function, unicode_literals import os import pathlib import zipfile +import numpy as np import pytest - from imageio import imsave -import numpy as np import pysaliency from pysaliency import export_model_to_hdf5 +class TestSaliencyMapModel(pysaliency.SaliencyMapModel): + def _saliency_map(self, stimulus): + stimulus_data = pysaliency.datasets.as_stimulus(stimulus).stimulus_data + return np.array(stimulus_data, dtype=float) + + @pytest.fixture def file_stimuli(tmpdir): filenames = [] for i in range(3): - # TODO: change back to stimulus_... once this is supported again - filename = tmpdir.join('_stimulus_{:04d}.png'.format(i)) + filename = tmpdir.join('stimulus_{:04d}.png'.format(i)) imsave(str(filename), np.random.randint(low=0, high=255, size=(100, 100, 3), dtype=np.uint8)) filenames.append(str(filename)) @@ -37,8 +41,7 @@ def stimuli_with_filenames(tmpdir): filenames = [] stimuli = [] for i in range(3): - # TODO: change back to stimulus_... once this is supported again - filename = tmpdir.join('_stimulus_{:04d}.png'.format(i)) + filename = tmpdir.join('stimulus_{:04d}.png'.format(i)) stimuli.append(np.random.randint(low=0, high=255, size=(100, 100, 3), dtype=np.uint8)) filenames.append(str(filename)) @@ -61,6 +64,14 @@ def stimuli(file_stimuli, stimuli_with_filenames, request): raise ValueError(request.param) +@pytest.fixture +def sub_stimuli(stimuli): + unique_filenames = pysaliency.utils.get_minimal_unique_filenames( + pysaliency.precomputed_models.get_stimuli_filenames(stimuli) + ) + return stimuli[[i for i, f in enumerate(unique_filenames) if f.startswith('sub_directory_0001')]] + + @pytest.fixture def saliency_maps_in_directory(file_stimuli, tmpdir): stimuli_files = pysaliency.utils.get_minimal_unique_filenames(file_stimuli.filenames) @@ -80,7 +91,7 @@ def saliency_maps_in_directory(file_stimuli, tmpdir): def test_export_model_to_hdf5(stimuli, tmpdir): - model = pysaliency.UniformModel() + model = pysaliency.models.SaliencyMapNormalizingModel(TestSaliencyMapModel()) filename = str(tmpdir.join('model.hdf5')) export_model_to_hdf5(model, stimuli, filename) @@ -89,6 +100,16 @@ def test_export_model_to_hdf5(stimuli, tmpdir): np.testing.assert_allclose(model.log_density(s), model2.log_density(s)) +def test_hdf5_model_sub_stimuli(stimuli, sub_stimuli, tmpdir): + model = pysaliency.models.SaliencyMapNormalizingModel(TestSaliencyMapModel()) + filename = str(tmpdir.join('model.hdf5')) + export_model_to_hdf5(model, stimuli, filename) + + model2 = pysaliency.HDF5Model(sub_stimuli, filename) + for s in sub_stimuli: + np.testing.assert_allclose(model.log_density(s), model2.log_density(s)) + + def test_export_model_overwrite(file_stimuli, tmpdir): model1 = pysaliency.GaussianSaliencyMapModel(width=0.1) model2 = pysaliency.GaussianSaliencyMapModel(width=0.8) @@ -124,35 +145,71 @@ def test_export_model_no_overwrite(file_stimuli, tmpdir): np.testing.assert_allclose(model2.saliency_map(s), model3.saliency_map(s)) -def test_saliency_map_model_from_directory(file_stimuli, saliency_maps_in_directory): +def test_saliency_map_model_from_directory(stimuli, saliency_maps_in_directory): directory, predictions = saliency_maps_in_directory - model = pysaliency.SaliencyMapModelFromDirectory(file_stimuli, directory) + model = pysaliency.SaliencyMapModelFromDirectory(stimuli, directory) - for stimulus_index, stimulus in enumerate(file_stimuli): + for stimulus_index, stimulus in enumerate(stimuli): expected = predictions[stimulus_index] actual = model.saliency_map(stimulus) np.testing.assert_equal(actual, expected) -@pytest.mark.skip("currently archivemodels can't handle same stimuli names in directory and subdirectory") -def test_saliency_map_model_from_archive(file_stimuli, saliency_maps_in_directory, tmpdir): + +def test_saliency_map_model_from_directory_sub_stimuli(stimuli, sub_stimuli, saliency_maps_in_directory): + directory, predictions = saliency_maps_in_directory + full_model = pysaliency.SaliencyMapModelFromDirectory(stimuli, directory) + sub_model = pysaliency.SaliencyMapModelFromDirectory(sub_stimuli, directory) + + for stimulus in sub_stimuli: + expected = full_model.saliency_map(stimulus) + actual = sub_model.saliency_map(stimulus) + np.testing.assert_equal(actual, expected) + + +def test_saliency_map_model_from_archive(stimuli, saliency_maps_in_directory, tmpdir): directory, predictions = saliency_maps_in_directory archive = tmpdir / 'predictions.zip' # from https://stackoverflow.com/a/1855118 def zipdir(path, ziph): - for root, dirs, files in os.walk(path): + for root, _, files in os.walk(path): for file in files: - ziph.write(os.path.join(root, file), - os.path.relpath(os.path.join(root, file), + ziph.write(os.path.join(root, file), + os.path.relpath(os.path.join(root, file), os.path.join(path, '..'))) - + with zipfile.ZipFile(str(archive), 'w', zipfile.ZIP_DEFLATED) as zipf: zipdir(str(directory), zipf) - model = pysaliency.precomputed_models.SaliencyMapModelFromArchive(file_stimuli, str(archive)) + model = pysaliency.precomputed_models.SaliencyMapModelFromArchive(stimuli, str(archive)) - for stimulus_index, stimulus in enumerate(file_stimuli): + for stimulus_index, stimulus in enumerate(stimuli): expected = predictions[stimulus_index] actual = model.saliency_map(stimulus) np.testing.assert_equal(actual, expected) + + +def test_saliency_map_model_from_archive_sub_stimuli(stimuli, sub_stimuli, saliency_maps_in_directory, tmpdir): + directory, predictions = saliency_maps_in_directory + + archive = tmpdir / 'predictions.zip' + + # from https://stackoverflow.com/a/1855118 + def zipdir(path, ziph): + for root, _, files in os.walk(path): + for file in files: + ziph.write(os.path.join(root, file), + os.path.relpath(os.path.join(root, file), + os.path.join(path, '..'))) + + with zipfile.ZipFile(str(archive), 'w', zipfile.ZIP_DEFLATED) as zipf: + zipdir(str(directory), zipf) + + full_model = pysaliency.precomputed_models.SaliencyMapModelFromArchive(stimuli, str(archive)) + sub_model = pysaliency.precomputed_models.SaliencyMapModelFromArchive(sub_stimuli, str(archive)) + + for stimulus in sub_stimuli: + expected = full_model.saliency_map(stimulus) + actual = sub_model.saliency_map(stimulus) + np.testing.assert_equal(actual, expected) \ No newline at end of file