pass keyword arguments to read_hdf5 (#86)

Signed-off-by: Matthias Kümmerer <[email protected]>
matthias-k · Sep 25, 2024 · c2ea6de · c2ea6de
1 parent 17a2757
commit c2ea6de
Show file tree

Hide file tree

Showing 3 changed files with 24 additions and 10 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -23,6 +23,7 @@
   * Feature: The RARE2007 model is now available as `pysaliency.external_models.RARE2007`. It's execution requires MATLAB.
   * matlab scripts are now called with the `-batch` option instead of `-nodisplay -nosplash -r`, which should behave better.
   * Enhancement: preloaded stimulus ids are passed on to subsets of Stimuli and FileStimuli.
+  * Feature: `pysaliency.read_hdf5` now takes additional keyword arguments which are passed to the respective class methods. This allows, e.g., to load `FileStimuli` with caching disabled.
 
 
 * 0.2.22:

diff --git a/pysaliency/datasets/__init__.py b/pysaliency/datasets/__init__.py
@@ -12,30 +12,30 @@
 
 
 @cached(WeakValueDictionary())
-def _read_hdf5_from_file(source):
+def _read_hdf5_from_file(source, **kwargs):
     import h5py
     with h5py.File(source, 'r') as hdf5_file:
-        return read_hdf5(hdf5_file)
+        return read_hdf5(hdf5_file, **kwargs)
 
 
-def read_hdf5(source):
+def read_hdf5(source, **kwargs):
     if isinstance(source, (str, pathlib.Path)):
-        return _read_hdf5_from_file(source)
+        return _read_hdf5_from_file(source, **kwargs)
 
     data_type = decode_string(source.attrs['type'])
 
     if data_type == 'Fixations':
-        return Fixations.read_hdf5(source)
+        return Fixations.read_hdf5(source, **kwargs)
     elif data_type == 'ScanpathFixations':
-        return ScanpathFixations.read_hdf5(source)
+        return ScanpathFixations.read_hdf5(source, **kwargs)
     elif data_type == 'FixationTrains':
-        return FixationTrains.read_hdf5(source)
+        return FixationTrains.read_hdf5(source, **kwargs)
     elif data_type == 'Scanpaths':
-        return Scanpaths.read_hdf5(source)
+        return Scanpaths.read_hdf5(source, **kwargs)
     elif data_type == 'Stimuli':
-        return Stimuli.read_hdf5(source)
+        return Stimuli.read_hdf5(source, **kwargs)
     elif data_type == 'FileStimuli':
-        return FileStimuli.read_hdf5(source)
+        return FileStimuli.read_hdf5(source, **kwargs)
     else:
         raise ValueError("Invalid HDF content type:", data_type)
 

diff --git a/tests/datasets/test_stimuli.py b/tests/datasets/test_stimuli.py
@@ -243,6 +243,19 @@ def test_file_stimuli_attributes(file_stimuli_with_attributes, tmp_path):
     assert list(np.array(file_stimuli_with_attributes.attributes['some_strings'])[mask]) == partial_stimuli.attributes['some_strings']
 
 
+def test_file_stimuli_readhdf5_cached(file_stimuli_with_attributes, tmp_path):
+    filename = tmp_path / 'stimuli.hdf5'
+    file_stimuli_with_attributes.to_hdf5(str(filename))
+
+    new_stimuli = pysaliency.read_hdf5(str(filename))
+
+    assert new_stimuli.cached
+
+    new_stimuli2 = pysaliency.read_hdf5(str(filename), cached=False)
+
+    assert not new_stimuli2.cached
+
+
 def test_concatenate_stimuli_with_attributes(stimuli_with_attributes, file_stimuli_with_attributes):
     concatenated_stimuli = pysaliency.datasets.concatenate_stimuli([stimuli_with_attributes, file_stimuli_with_attributes])