diff --git a/pysaliency/datasets/__init__.py b/pysaliency/datasets/__init__.py
index 343a666..fe2eb65 100644
--- a/pysaliency/datasets/__init__.py
+++ b/pysaliency/datasets/__init__.py
@@ -1,37 +1,14 @@
-import json
 import pathlib
-import warnings
 from typing import Dict, List, Optional, Union
 from weakref import WeakValueDictionary
 
 import numpy as np
 from boltons.cacheutils import cached
-from tqdm import tqdm
 
-from ..utils import remove_trailing_nans
-from ..utils.variable_length_array import VariableLengthArray, concatenate_variable_length_arrays
+from .fixations import Fixations, FixationTrains, scanpaths_from_fixations
+from .scanpaths import Scanpaths
 from .stimuli import FileStimuli, ObjectStimuli, Stimuli, StimuliStimulus, Stimulus, as_stimulus, check_prediction_shape, get_image_hash
-from .utils import decode_string, hdf5_wrapper, create_hdf5_dataset
-
-
-def _split_crossval(fixations, part, partcount):
-    xs = []
-    ys = []
-    ts = []
-    ns = []
-    subjects = []
-    N = int((fixations.train_ns.max()+1)/partcount)
-    for n in range(N):
-        this_inds = np.nonzero(fixations.train_ns == partcount*n+part)[0]
-        new_inds = this_inds
-        for i in new_inds:
-            xs.append(fixations.train_xs[i])
-            ys.append(fixations.train_ys[i])
-            ts.append(fixations.train_ts[i])
-            ns.append(n)
-            subjects.append(fixations.train_subjects[i])
-    new_fixations = Fixations.from_fixation_trains(xs, ys, ts, ns, subjects)
-    return new_fixations
+from .utils import _load_attribute_dict_from_hdf5, concatenate_attributes, create_hdf5_dataset, decode_string, get_merged_attribute_list, hdf5_wrapper
 
 
 @cached(WeakValueDictionary())
@@ -61,1215 +38,6 @@ def read_hdf5(source):
         raise ValueError("Invalid HDF content type:", data_type)
 
 
-class Fixations(object):
-    """Capsules the fixations of a dataset and provides different methods
-       of accessing them, e.g. in fixation trains, as conditional fixations
-       or just all fixations at once.
-
-       Fixations consist of:
-           x: the x-position of the fixation
-           y: the y-position of the fixation
-           t: the time of the fixation
-           x_hist: the previous x-positions in the history of this fixation
-           y_hist: the previous y-positions in the history of this fixation
-           t_hist: the previous times in the history of this fixation
-           subject: the subject who made the fixation
-           n: the number of the stimuli (optional, only needed when evaluating not on single images)
-
-        Fixations support slicing via fixations[indices] as a shortcut for fixations.filter.
-
-        Although all fixations have a history of previous fixations, these histories
-        do not have to form a set of fixation sequences. For example, if a fixation
-        has a previous fixation, this previous fixation does not have to be as a
-        fixation of its on in the dataset. This is important because otherwise
-        a lot of useful filtering operations would not be possible (e.g. filter
-        for all fixations with at least one previous fixation to calculate
-        saccade lengths). If you need fixation trains, use the subclass
-        `FixationTrains`.
-    """
-    __attributes__ = ['subjects']
-
-    def __init__(self,
-                 x: Union[List, np.ndarray],
-                 y: Union[List, np.ndarray],
-                 t: Union[List, np.ndarray],
-                 x_hist: Union[List, VariableLengthArray],
-                 y_hist: Union[List, VariableLengthArray],
-                 t_hist: Union[List, VariableLengthArray],
-                 n: Union[List, np.ndarray],
-                 subjects: Optional[Union[List, np.ndarray]] = None,
-                 attributes: Optional[Dict[str, Union[np.ndarray, VariableLengthArray]]] = None):
-
-        self.x = np.asarray(x)
-        self.y = np.asarray(y)
-        self.t = np.asarray(t)
-        self.n = np.asarray(n)
-
-        # would be nice, is not yet supported. But we can simply pass the VariableLengthArray instead
-        # if isinstance(x_hist, list):
-        #     x_hist = VariableLengthArray(x_hist)
-        #     self.lengths = x_hist.lengths
-        if isinstance(x_hist, (list, np.ndarray)):
-            x_hist = np.array(x_hist)
-            self.lengths = (1 - np.isnan(x_hist)).sum(axis=-1)
-            x_hist = VariableLengthArray(x_hist, lengths=self.lengths)
-        elif isinstance(x_hist, VariableLengthArray):
-            self.lengths = x_hist.lengths
-
-
-        y_hist = self._as_variable_length_array(y_hist)
-        t_hist = self._as_variable_length_array(t_hist)
-
-        if subjects is not None:
-            subjects = np.asarray(subjects)
-
-        self.x_hist = x_hist
-        self.y_hist = y_hist
-        self.t_hist = t_hist
-        self.n = n
-        self.subjects = subjects
-
-        if not len(self.x) == len(self.y) == len(self.t) == len(self.x_hist) == len(self.y_hist) == len(self.t_hist) == len(self.n):
-            raise ValueError("Lengths of fixations have to match")
-        if self.subjects is not None and not len(self.x) == len(self.subjects):
-            raise ValueError("Length of subjects has to match number of fixations")
-
-        if attributes is not None:
-            self.__attributes__ = list(self.__attributes__)
-            for name, value in attributes.items():
-                if name not in self.__attributes__:
-                    self.__attributes__.append(name)
-                if not len(value) == len(self.x):
-                    raise ValueError(f"Length of attribute '{name}' has to match number of fixations")
-                setattr(self, name, value)
-
-
-    def _check_lengths(self, other: VariableLengthArray):
-        if not len(self) == len(other):
-            raise ValueError("Length of scanpaths has to match")
-        if not np.all(self.lengths == other.lengths):
-            raise ValueError("Lengths of scanpaths have to match")
-
-    def _as_variable_length_array(self, data: Union[np.ndarray, VariableLengthArray]) -> VariableLengthArray:
-        if not isinstance(data, VariableLengthArray):
-            data = VariableLengthArray(data, self.lengths)
-
-        self._check_lengths(data)
-
-        return data
-
-    @classmethod
-    def create_without_history(cls, x, y, n, subjects=None):
-        """ Create new fixation object from fixation data without time and optionally
-            without subject information
-        """
-        N = len(x)
-        t = np.zeros(N)
-        x_hist = np.empty((N, 1))*np.nan
-        y_hist = np.empty((N, 1))*np.nan
-        t_hist = np.empty((N, 1))*np.nan
-        if subjects is None:
-            subjects = np.ones(N)
-        return cls(x, y,  t, x_hist, y_hist, t_hist, n, subjects)
-
-    @classmethod
-    def from_fixation_matrices(cls, matrices):
-        """
-        create new Fixation object with fixations from fixation matrices
-
-        Often, fixations are stored in fixation matrices: For each stimulus, there
-        is a matrix of the same size as the image which ones in each fixated location
-        and zero everywhere else. This method allows to construct a `Fixation` instance
-        from such fixation matrices.
-
-        >>> matrix1 = np.zeros((10,10))
-        >>> matrix1[5, 2] = 1
-        >>> matrix1[3, 3] = 1
-        >>> matrix2  = np.zeros((20, 30))
-        >>> matrix2[10, 20] = 1
-        >>> fixations = pysaliency.Fixation.from_fixation_matrices(
-                [matrix1,
-                 matrix2])
-        """
-        xs = []
-        ys = []
-        ns = []
-        for _n, matrix in enumerate(matrices):
-            y, x = np.nonzero(matrix)
-            n = [_n] * len(y)
-            xs.append(x)
-            ys.append(y)
-            ns.append(n)
-        x = np.hstack(xs).astype(float)
-        y = np.hstack(ys).astype(float)
-        n = np.hstack(ns)
-        return cls.create_without_history(x, y, n)
-
-    @classmethod
-    def concatenate(cls, fixations):
-        kwargs = {}
-        for key in ['x', 'y', 't', 'x_hist', 'y_hist', 't_hist', 'n', 'subjects']:
-            kwargs[key] = concatenate_attributes(getattr(f, key) for f in fixations)
-
-        attributes = _get_merged_attribute_list([f.__attributes__ for f in fixations])
-        attribute_dict = {}
-        for key in attributes:
-            if key == 'subjects':
-                continue
-            attribute_dict[key] = concatenate_attributes(getattr(f, key) for f in fixations)
-
-        kwargs['attributes'] = attribute_dict
-
-        new_fixations = cls(**kwargs)
-
-        return new_fixations
-
-    def __getitem__(self, indices):
-        return self.filter(indices)
-
-    def __len__(self):
-        return len(self.x)
-
-    def filter(self, inds):
-        """
-        Create new fixations object which contains only the fixations with indexes in inds
-
-        .. note::
-            The fixation trains of the object are left as is. Filtering the fixation trains
-            is not possible as the indices may include only some fixation of a fixation train.
-
-            The attributes `consistent_fixation_trains` tracks whether a `Fixations` instance
-            still has consistent fixation trains. The return of this function will be marked
-            to have inconsistent fixation trains. If you need to filter with consistent
-            fixation trains, use `Fixations.filter_fixation_trains`.
-        """
-
-        kwargs = {}
-        other_attributes = {}
-
-        def filter_array(name):
-            print("Filtering", name)
-            kwargs[name] = getattr(self, name)[inds].copy()
-
-        for name in ['x', 'y', 't', 'x_hist', 'y_hist', 't_hist', 'n']:
-            filter_array(name)
-
-        for name in self.__attributes__:
-            filter_array(name)
-            if name != 'subjects':
-                other_attributes[name] = kwargs.pop(name)
-
-        new_fix = Fixations(**kwargs)
-        for key, value in other_attributes.items():
-            setattr(new_fix, key, value)
-        new_fix.__attributes__ = list(self.__attributes__)
-        return new_fix
-
-    def _get_previous_values(self, name, index):
-        """return fixations.name[np.arange(len(fixations.name)),index]"""
-        a = getattr(self, name)
-        inds = np.arange(len(a))
-        if index >= 0:
-            return a[inds, index]
-        else:
-            indexes = self.lengths + index
-            return a[inds, indexes]
-
-    def get_saccade(self, index = -1):
-        """
-        Return saccades for all fixations.
-
-        @type  index: integer
-        @param index: index of the saccade to return. `index==-1` returns the
-                      the last saccades of all fixations etc.
-
-        @return dx, dy, dt of the saccade
-
-        Example:
-
-            dx, dy, dt = fixations.get_saccade(-1)
-            mean_saccade_length = np.sqrt(dx**2+dy**2).mean()
-        """
-
-        if index > 0:
-            raise NotImplemented()
-        if index == -1:
-            x1 = self.x
-            y1 = self.y
-            t1 = self.t
-        else:
-            x1 = self._get_previous_values('x_hist', index+1)
-            y1 = self._get_previous_values('y_hist', index+1)
-            t1 = self._get_previous_values('t_hist', index+1)
-        dx = x1 - self._get_previous_values('x_hist', index)
-        dy = y1 - self._get_previous_values('y_hist', index)
-        dt = t1 - self._get_previous_values('t_hist', index)
-        return dx, dy, dt
-        #return np.vstack((dy,dx)).T
-
-    @property
-    def x_int(self):
-        """ x coordinates of the fixations, converted to integers """
-        return np.asarray(self.x, dtype=int)
-
-    @property
-    def y_int(self):
-        """ y coordinates of the fixations, converted to integers """
-        return np.asarray(self.y, dtype=int)
-
-    @property
-    def subject_count(self):
-        return self.subjects.max()+1
-
-    def copy(self):
-        cfix = Fixations(self.x.copy(), self.y.copy(), self.t.copy(),
-                         self.x_hist.copy(), self.y_hist.copy(), self.t_hist.copy(),
-                         self.n.copy(), self.subjects.copy() if self.subjects is not None else None)
-        cfix.__attributes__ = list(self.__attributes__)
-        for name in self.__attributes__:
-            setattr(cfix, name, getattr(self, name).copy())
-        return cfix
-
-    @classmethod
-    def FixationsWithoutHistory(cls, x, y, t, n, subjects):
-        x_hist = np.empty((len(x), 1))
-        x_hist[:] = np.nan
-        y_hist = np.empty((len(x), 1))
-        y_hist[:] = np.nan
-        t_hist = np.empty((len(x), 1))
-        t_hist[:] = np.nan
-        return cls(x, y, t, x_hist, y_hist, t_hist, n, subjects)
-
-    @hdf5_wrapper(mode='w')
-    def to_hdf5(self, target):
-        """ Write fixations to hdf5 file or hdf5 group
-        """
-
-        target.attrs['type'] = np.string_('Fixations')
-        target.attrs['version'] = np.string_('1.1')
-
-        variable_length_arrays = []
-
-        for attribute in ['x', 'y', 't', 'x_hist', 'y_hist', 't_hist', 'n', 'lengths'] + self.__attributes__:
-            data = getattr(self, attribute)
-            if isinstance(data, VariableLengthArray):
-                variable_length_arrays.append(attribute)
-                data = data._data
-            target.create_dataset(attribute, data=data)
-
-        target.attrs['__attributes__'] = np.string_(json.dumps(self.__attributes__))
-        target.attrs['__variable_length_arrays__'] = np.string_(json.dumps(sorted(variable_length_arrays)))
-
-    @classmethod
-    @hdf5_wrapper(mode='r')
-    def read_hdf5(cls, source):
-        """ Read fixations from hdf5 file or hdf5 group """
-
-        # TODO: rewrite to use constructor instead of manipulating the object directly
-
-        data_type = decode_string(source.attrs['type'])
-        data_version = decode_string(source.attrs['version'])
-
-        if data_type != 'Fixations':
-            raise ValueError("Invalid type! Expected 'Fixations', got", data_type)
-
-        if data_version not in ['1.0', '1.1']:
-            raise ValueError("Invalid version! Expected '1.0', got", data_version)
-
-        data = {key: source[key][...] for key in ['x', 'y', 't', 'x_hist', 'y_hist', 't_hist', 'n', 'subjects']}
-        fixations = cls(**data)
-
-        json_attributes = source.attrs['__attributes__']
-        if not isinstance(json_attributes, str):
-            json_attributes = json_attributes.decode('utf8')
-        __attributes__ = json.loads(json_attributes)
-        fixations.__attributes__ = list(__attributes__)
-
-        if data_version == '1.1':
-            lengths = source['lengths'][...]
-
-            json_variable_length_arrays = source.attrs['__variable_length_arrays__']
-            if not isinstance(json_variable_length_arrays, str):
-                json_variable_length_arrays = json_variable_length_arrays.decode('utf8')
-            variable_length_arrays = json.loads(json_variable_length_arrays)
-
-        else:
-            lengths = fixations.lengths
-            variable_length_arrays = ['x_hist', 'y_hist', 't_hist'] + [key for key in __attributes__ if key.endswith('_hist')]
-
-        for key in __attributes__:
-            data = source[key][...]
-
-            if key in variable_length_arrays:
-                data = VariableLengthArray(data, lengths)
-
-            setattr(fixations, key, data)
-
-        return fixations
-
-
-class FixationTrains(Fixations):
-    """
-    Capsules the fixations of a dataset as fixation trains.
-
-    Additionally to `Fixations`, `FixationTrains`-instances
-    have the attributes
-        train_xs: 2d array (number_of_trains, maximum_length_of_train)
-        train_ys: 2d array (number_of_trains, maximum_length_of_train)
-        train_ts: 2d array (number_of_trains, maximum_length_of_train)
-        train_ns: 1d array (number_of_trains)
-        train_subjects: 1d array (number_of_trains)
-
-    scanpath_attributes: dictionary of attributes applying to full scanpaths, e.g. task
-            {attribute_name: $NUM_SCANPATHS-length-list}
-            scanpath attributes will automatically also become attributes
-    scanpath_fixation_attribute: dictionary of attributes applying to fixations in the scanpath, e.g. duration
-            {attribute_name: $NUM_SCANPATH x $NUM_FIXATIONS_IN_SCANPATH}
-            scanpath fixation attributes will generate two attributes: the value for each fixation
-            and the history for previous fixations. E.g. a scanpath fixation attribute "durations" will generate
-            an attribute "durations" and an attribute "durations_hist"
-
-    """
-    def __init__(self, train_xs, train_ys, train_ts, train_ns, train_subjects, scanpath_attributes=None, scanpath_fixation_attributes=None, attributes=None, scanpath_attribute_mapping=None):
-        self.__attributes__ = list(self.__attributes__)
-        self.__attributes__.append('scanpath_index')
-
-        scanpath_attributes = scanpath_attributes or {}
-        scanpath_attribute_mapping = scanpath_attribute_mapping or {}
-        scanpath_fixation_attributes = scanpath_fixation_attributes or {}
-
-        if 'subject' in scanpath_attributes and train_subjects is not None:
-            raise ValueError("subject should not be in scanpath_attributes if train_subjects is specified")
-        if 'subject' not in scanpath_attributes:
-            scanpath_attributes['subject'] = train_subjects
-
-        if 'ts' in scanpath_fixation_attributes and train_ts is not None:
-            raise ValueError("ts should not be in scanpath_fixation_attributes if train_ts is specified")
-        if 'ts' not in scanpath_fixation_attributes:
-            scanpath_fixation_attributes['ts'] = train_ts
-            scanpath_attribute_mapping['ts'] = 't'
-
-        lengths = [len(remove_trailing_nans(xs)) for xs in train_xs]
-
-        scanpaths = Scanpaths(
-            xs=train_xs,
-            ys=train_ys,
-            n=train_ns,
-            lengths=lengths,
-            scanpath_attributes=scanpath_attributes,
-            fixation_attributes=scanpath_fixation_attributes,
-            attribute_mapping=scanpath_attribute_mapping,
-        )
-
-        self.scanpaths = scanpaths
-
-        N_fixations = scanpaths.lengths.sum()
-        max_scanpath_length = scanpaths.lengths.max() if len(self.scanpaths) else 0
-        max_history_length = max(max_scanpath_length - 1, 0)
-
-
-        # Create conditional fixations
-        self.x = np.empty(N_fixations)
-        self.y = np.empty(N_fixations)
-        self.t = np.empty(N_fixations)
-        self.x_hist = np.empty((N_fixations, max_history_length))
-        self.y_hist = np.empty((N_fixations, max_history_length))
-        self.t_hist = np.empty((N_fixations, max_history_length))
-        self.x_hist[:] = np.nan
-        self.y_hist[:] = np.nan
-        self.t_hist[:] = np.nan
-        self.n = np.empty(N_fixations, dtype=int)
-        self.lengths = np.empty(N_fixations, dtype=int)
-        # self.train_lengths = np.empty(len(self.train_xs), dtype=int)
-        self.subjects = np.empty(N_fixations, dtype=int)
-        self.scanpath_index = np.empty(N_fixations, dtype=int)
-
-        out_index = 0
-        # TODO: maybe implement in numba?
-        # probably best: have function fill_fixation_data(scanpath_data, fixation_data, hist_data=None)
-        for train_index in range(len(self.scanpaths)):
-            #fix_length = len(remove_trailing_nans(self.train_xs[train_index]))
-            # self.train_lengths[train_index] = fix_length
-            for fix_index in range(self.scanpaths.lengths[train_index]):
-                self.x[out_index] = self.scanpaths.xs[train_index][fix_index]
-                self.y[out_index] = self.scanpaths.ys[train_index][fix_index]
-                self.t[out_index] = self.scanpaths.ts[train_index][fix_index]
-                self.n[out_index] = self.scanpaths.n[train_index]
-                self.subjects[out_index] = self.scanpaths.scanpath_attributes['subject'][train_index]
-                self.lengths[out_index] = fix_index
-                self.scanpath_index[out_index] = train_index
-                self.x_hist[out_index][:fix_index] = self.scanpaths.xs[train_index][:fix_index]
-                self.y_hist[out_index][:fix_index] = self.scanpaths.ys[train_index][:fix_index]
-                self.t_hist[out_index][:fix_index] = self.scanpaths.ts[train_index][:fix_index]
-                out_index += 1
-
-        # TODO: this should become irrelevant once FixationTrains is also completely upgraded to VariableLengthArrays
-        self.x_hist = VariableLengthArray(self.x_hist, self.lengths)
-        self.y_hist = VariableLengthArray(self.y_hist, self.lengths)
-        self.t_hist = VariableLengthArray(self.t_hist, self.lengths)
-
-        #if scanpath_attributes is not None:
-        #    assert isinstance(scanpath_attributes, dict)
-        #    self.scanpath_attributes = {key: np.array(value) for key, value in scanpath_attributes.items()}
-        #    for key, value in self.scanpath_attributes.items():
-        #        assert len(value) == len(self.scanpaths)
-        #else:
-        #    self.scanpath_attributes = {}
-
-        #if scanpath_fixation_attributes is not None:
-        #    assert isinstance(scanpath_fixation_attributes, dict)
-        #    self.scanpath_fixation_attributes = {}
-        #    for key, value in scanpath_fixation_attributes.items():
-        #        self.scanpath_fixation_attributes[key] = self._as_variable_length_scanpath_array(value)
-        #else:
-        #    self.scanpath_fixation_attributes = {}
-
-        # self.scanpath_attribute_mapping = self.scanpaths.attribute_mapp
-
-
-        if attributes is None:
-            attributes = {}
-        elif attributes:
-            warnings.warn("don't use attributes for FixationTrains, use scanpath_attributes or scanpath_fixation_attributes instead!", stacklevel=2)
-
-        self.auto_attributes = []
-
-        for attribute_name, value in self.scanpaths.scanpath_attributes.items():
-            if attribute_name == 'subject':
-                continue
-            new_attribute_name = self.scanpaths.attribute_mapping.get(attribute_name, attribute_name)
-            if new_attribute_name in attributes:
-                raise ValueError("attribute name clash: {new_attribute_name}".format(new_attribute_name=new_attribute_name))
-            attribute_shape = [] if not value.any() else np.asarray(value[0]).shape
-            attributes[new_attribute_name] = np.empty([N_fixations] + list(attribute_shape), dtype=value.dtype)
-            self.auto_attributes.append(new_attribute_name)
-
-            out_index = 0
-            for train_index in range(len(self.scanpaths)):
-                for fix_index in range(self.scanpaths.lengths[train_index]):
-                    attributes[new_attribute_name][out_index] = self.scanpaths.scanpath_attributes[attribute_name][train_index]
-                    out_index += 1
-
-
-        for attribute_name, value in self.scanpaths.fixation_attributes.items():
-            if attribute_name == 'ts':
-                continue
-            new_attribute_name = self.scanpaths.attribute_mapping.get(attribute_name, attribute_name)
-            if new_attribute_name in attributes:
-                raise ValueError("attribute name clash: {new_attribute_name}".format(new_attribute_name=new_attribute_name))
-            attributes[new_attribute_name] = np.empty(N_fixations)
-            self.auto_attributes.append(new_attribute_name)
-
-            hist_attribute_name = new_attribute_name + '_hist'
-            if hist_attribute_name in attributes:
-                raise ValueError("attribute name clash: {hist_attribute_name}".format(hist_attribute_name=hist_attribute_name))
-            attributes[hist_attribute_name] = np.full((N_fixations, max_history_length), fill_value=np.nan)
-            self.auto_attributes.append(hist_attribute_name)
-
-            out_index = 0
-            for train_index in range(len(self.scanpaths)):
-                for fix_index in range(self.scanpaths.lengths[train_index]):
-                    attributes[new_attribute_name][out_index] = self.scanpaths.fixation_attributes[attribute_name][train_index, fix_index]
-                    attributes[hist_attribute_name][out_index][:fix_index] = self.scanpaths.fixation_attributes[attribute_name][train_index, :fix_index]
-                    out_index += 1
-
-            attributes[hist_attribute_name] = VariableLengthArray(attributes[hist_attribute_name], self.lengths)
-
-        if attributes:
-            self.__attributes__ = list(self.__attributes__)
-            for key, value in attributes.items():
-                assert key != 'subjects'
-                assert key != 'scanpath_index'
-                assert key != 't'
-                assert len(value) == len(self.x)
-                self.__attributes__.append(key)
-                if not isinstance(value, VariableLengthArray):
-                    value = np.array(value)
-                setattr(self, key, value)
-
-        self.full_nonfixations = None
-
-    # def _check_train_lengths(self, other: VariableLengthArray):
-    #     if not len(self.train_lengths) == len(other):
-    #         raise ValueError("Length of scanpaths has to match")
-    #     if not np.all(self.train_lengths == other.lengths):
-    #         raise ValueError("Lengths of scanpaths have to match")
-
-    # def _as_variable_length_scanpath_array(self, data: Union[np.ndarray, VariableLengthArray]) -> VariableLengthArray:
-    #     if not isinstance(data, VariableLengthArray):
-    #         data = VariableLengthArray(data, self.train_lengths)
-
-    #     self._check_train_lengths(data)
-
-    #     return data
-
-    @property
-    def train_xs(self) -> VariableLengthArray:
-        return self.scanpaths.xs
-
-    @property
-    def train_ys(self) -> VariableLengthArray:
-        return self.scanpaths.ys
-
-    @property
-    def train_ts(self) -> VariableLengthArray:
-        return self.scanpaths.ts
-
-    @property
-    def train_ns(self) -> np.ndarray:
-        return self.scanpaths.n
-
-    @property
-    def train_subjects(self) -> VariableLengthArray:
-        return self.scanpaths.subject
-
-    @property
-    def train_lengths(self) -> np.ndarray:
-        return self.scanpaths.lengths
-
-    @property
-    def scanpath_attributes(self) -> Dict[str, np.ndarray]:
-        return {
-            key: value for key, value in self.scanpaths.scanpath_attributes.items() if key != 'subject'
-        }
-
-    @property
-    def scanpath_fixation_attributes(self) -> Dict[str, VariableLengthArray]:
-        return {
-            key: value for key, value in self.scanpaths.fixation_attributes.items() if key != 'ts'
-        }
-
-    @property
-    def scanpath_attribute_mapping(self) -> Dict[str, str]:
-        return {
-            key: value for key, value in self.scanpaths.attribute_mapping.items() if key != 'ts'
-        }
-
-    @classmethod
-    def concatenate(cls, fixation_trains):
-        kwargs = {}
-
-        for key in ['train_xs', 'train_ys', 'train_ts', 'train_ns', 'train_subjects']:
-            kwargs[key] = concatenate_attributes(getattr(f, key) for f in fixation_trains)
-
-        def _real_attributes(scanpaths: FixationTrains):
-            return [attribute_name for attribute_name in scanpaths.__attributes__ if attribute_name not in scanpaths.auto_attributes + ['scanpath_index']]
-
-        def _mapped_attribute_name(attribute_name: str, scanpaths: FixationTrains):
-            names = [s.scanpath_attribute_mapping.get(attribute_name) for s in scanpaths]
-            if len(set(names)) > 1:
-                raise ValueError(f"inconsistent attribute name mappings for '{attribute_name}': {names}")
-
-            return names[0]
-
-        attributes = _get_merged_attribute_list([_real_attributes(f) for f in fixation_trains])
-        attribute_dict = {}
-        for key in attributes:
-            if key == 'subjects':
-                continue
-            attribute_dict[key] = concatenate_attributes(getattr(f, key) for f in fixation_trains)
-
-        kwargs['attributes'] = attribute_dict
-
-        scanpath_attribute_names = _get_merged_attribute_list([list(f.scanpath_attributes) for f in fixation_trains])
-
-        kwargs['scanpath_attributes'] = {}
-        kwargs['scanpath_attribute_mapping'] = {}
-        for name in scanpath_attribute_names:
-            kwargs['scanpath_attributes'][name] = concatenate_attributes(f.scanpath_attributes[name] for f in fixation_trains)
-            mapped_name = _mapped_attribute_name(name, fixation_trains)
-            if mapped_name is not None:
-                kwargs['scanpath_attribute_mapping'][name] = mapped_name
-
-        scanpath_fixation_attribute_names = _get_merged_attribute_list([list(f.scanpath_fixation_attributes) for f in fixation_trains])
-
-        kwargs['scanpath_fixation_attributes'] = {}
-        for name in scanpath_fixation_attribute_names:
-            kwargs['scanpath_fixation_attributes'][name] = concatenate_attributes(f.scanpath_fixation_attributes[name] for f in fixation_trains)
-            mapped_name = _mapped_attribute_name(name, fixation_trains)
-            if mapped_name is not None:
-                kwargs['scanpath_attribute_mapping'][name] = mapped_name
-
-        new_fixations = cls(**kwargs)
-
-        return new_fixations
-
-
-    def set_scanpath_attribute(self, name, data, fixation_attribute_name=None):
-        """Sets a scanpath attribute
-        name: name of scanpath attribute
-        data: data of scanpath attribute, has to be of same length as number of scanpaths
-        fixation_attribute: name of automatically generated fixation attribute if it should be different than scanpath attribute name
-        """
-        if not len(data) == len(self.train_xs):
-            raise ValueError(f'Length of scanpath attribute data has to match number of scanpaths: {len(data)} != {len(self.train_xs)}')
-        self.scanpath_attributes[name] = data
-
-        if fixation_attribute_name is not None:
-            self.scanpath_attribute_mapping[name] = fixation_attribute_name
-
-        new_attribute_name = self.scanpath_attribute_mapping.get(name, name)
-        if new_attribute_name in self.attributes and new_attribute_name not in self.auto_attributes:
-            raise ValueError("attribute name clash: {new_attribute_name}".format(new_attribute_name=new_attribute_name))
-
-        attribute_shape = np.asarray(data[0]).shape
-        self.attributes[new_attribute_name] = np.empty([len(self.train_xs)] + list(attribute_shape), dtype=data.dtype)
-        if new_attribute_name not in self.auto_attributes:
-            self.auto_attributes.append(new_attribute_name)
-
-        out_index = 0
-        for train_index in range(len(self.train_xs)):
-            fix_length = (1 - np.isnan(self.train_xs[train_index])).sum()
-            for _ in range(fix_length):
-                self.attributes[new_attribute_name][out_index] = self.scanpath_attributes[name][train_index]
-                out_index += 1
-
-    def copy(self):
-        copied_attributes = {}
-        for attribute_name in self.__attributes__:
-            if attribute_name in ['subjects', 'scanpath_index'] + self.auto_attributes:
-                continue
-            copied_attributes[attribute_name] = getattr(self, attribute_name).copy()
-        copied_scanpaths = FixationTrains(
-            train_xs=self.train_xs.copy(),
-            train_ys=self.train_ys.copy(),
-            train_ts=self.train_ts.copy(),
-            train_ns=self.train_ns.copy(),
-            train_subjects=self.train_subjects.copy(),
-            scanpath_attributes={
-                key: value.copy() for key, value in self.scanpath_attributes.items()
-            } if self.scanpath_attributes else None,
-            scanpath_fixation_attributes={
-                key: value.copy() for key, value in self.scanpath_fixation_attributes.items()
-            } if self.scanpath_fixation_attributes else None,
-            scanpath_attribute_mapping=dict(self.scanpath_attribute_mapping),
-            attributes=copied_attributes if copied_attributes else None,
-        )
-        return copied_scanpaths
-
-    def filter_fixation_trains(self, indices):
-        """
-        Create new fixations object which contains only the fixation trains indicated.
-        """
-
-        filtered_scanpaths = self.scanpaths[indices]
-
-        scanpath_indices = np.arange(len(self.scanpaths), dtype=int)[indices]
-        fixation_indices = np.in1d(self.scanpath_index, scanpath_indices)
-
-        attributes = {
-            attribute_name: getattr(self, attribute_name)[fixation_indices] for attribute_name in self.__attributes__ if attribute_name not in ['subjects', 'scanpath_index'] + self.auto_attributes
-        }
-
-        return type(self)(
-            train_xs=filtered_scanpaths.xs,
-            train_ys=filtered_scanpaths.ys,
-            train_ts=None, # filtered_scanpaths.ts,
-            train_ns=filtered_scanpaths.n,
-            train_subjects=None, # filtered_scanpaths.subject,
-            scanpath_attributes=filtered_scanpaths.scanpath_attributes,
-            scanpath_fixation_attributes=filtered_scanpaths.fixation_attributes,
-            scanpath_attribute_mapping=dict(filtered_scanpaths.attribute_mapping),
-            attributes=attributes,
-        )
-
-
-    def fixation_trains(self):
-        """Yield for every fixation train of the dataset:
-             xs, ys, ts, n, subject
-        """
-        for i in range(len(self.train_xs)):
-            length = (1 - np.isnan(self.train_xs[i])).sum()
-            xs = self.train_xs[i][:length]
-            ys = self.train_ys[i][:length]
-            ts = self.train_ts[i][:length]
-            n = self.train_ns[i]
-            subject = self.train_subjects[i]
-            yield xs, ys, ts, n, subject
-
-    @classmethod
-    def from_fixation_trains(cls, xs, ys, ts, ns, subjects, attributes=None, scanpath_attributes=None, scanpath_fixation_attributes=None, scanpath_attribute_mapping=None):
-        """ Create Fixation object from fixation trains.
-              - xs, ys, ts: Lists of array_like of double. Each array has to contain
-                    the data from one fixation train.
-              - ns, subjects: lists of int. ns has to contain the image index for
-                    each fixation train, subjects the subject index for each
-                    fixation train
-        """
-        maxlength = max([len(x_train) for x_train in xs])
-        train_xs = np.empty((len(xs), maxlength))
-        train_xs[:] = np.nan
-        train_ys = np.empty((len(xs), maxlength))
-        train_ys[:] = np.nan
-        train_ts = np.empty((len(xs), maxlength))
-        train_ts[:] = np.nan
-        train_ns = np.empty(len(xs), dtype=int)
-        train_subjects = np.empty(len(xs), dtype=int)
-
-        padded_scanpath_fixation_attributes = {}
-        if scanpath_fixation_attributes is not None:
-            for key, value in scanpath_fixation_attributes.items():
-                assert len(value) == len(xs)
-                if isinstance(value, list):
-                    padded_scanpath_fixation_attributes[key] = np.full((len(xs), maxlength), fill_value=np.nan, dtype=float)
-
-        for i in range(len(train_xs)):
-            length = len(xs[i])
-            train_xs[i, :length] = xs[i]
-            train_ys[i, :length] = ys[i]
-            train_ts[i, :length] = ts[i]
-            train_ns[i] = ns[i]
-            train_subjects[i] = subjects[i]
-            for attribute_name in padded_scanpath_fixation_attributes.keys():
-                padded_scanpath_fixation_attributes[attribute_name][i, :length] = scanpath_fixation_attributes[attribute_name][i]
-
-        return cls(
-            train_xs,
-            train_ys,
-            train_ts,
-            train_ns,
-            train_subjects,
-            attributes=attributes,
-            scanpath_attributes=scanpath_attributes,
-            scanpath_fixation_attributes=padded_scanpath_fixation_attributes,
-            scanpath_attribute_mapping=scanpath_attribute_mapping)
-
-    def generate_crossval(self, splitcount = 10):
-        train_xs_training = []
-        train_xs_eval = []
-        train_ys_training = []
-        train_ys_eval = []
-        train_ts_training = []
-        train_ts_eval = []
-        train_ns_training = []
-        train_ns_eval = []
-        train_subjects_training = []
-        train_subjects_eval = []
-        # We have to make the crossvalidation data
-        # reproducible. Therefore we use a
-        # RandomState with fixed seed for the shuffling.
-        rs = np.random.RandomState(42)
-        for n in range(self.n.max()+1):
-            inds = np.nonzero(self.train_ns == n)[0]
-            rs.shuffle(inds)
-            parts = np.array_split(inds, splitcount)
-            for eval_index in range(splitcount):
-                for index in range(splitcount):
-                    part = parts[index]
-                    if len(part) == 0:
-                        continue
-                    xs = self.train_xs[part]
-                    ys = self.train_ys[part]
-                    ts = self.train_ts[part]
-                    ns = self.train_ns[part]
-                    subjects = self.train_subjects[part]
-                    if index == eval_index:
-                        train_xs_eval.append(xs)
-                        train_ys_eval.append(ys)
-                        train_ts_eval.append(ts)
-                        train_ns_eval.append(ns * splitcount + eval_index)
-                        train_subjects_eval.append(subjects)
-                    else:
-                        train_xs_training.append(xs)
-                        train_ys_training.append(ys)
-                        train_ts_training.append(ts)
-                        train_ns_training.append(ns * splitcount + eval_index)
-                        train_subjects_training.append(subjects)
-        train_xs_eval = np.vstack(train_xs_eval)
-        train_ys_eval = np.vstack(train_ys_eval)
-        train_ts_eval = np.vstack(train_ts_eval)
-        train_ns_eval = np.hstack(train_ns_eval)
-        train_subjects_eval = np.hstack(train_subjects_eval)
-        train_xs_training = np.vstack(train_xs_training)
-        train_ys_training = np.vstack(train_ys_training)
-        train_ts_training = np.vstack(train_ts_training)
-        train_ns_training = np.hstack(train_ns_training)
-        train_subjects_training = np.hstack(train_subjects_training)
-        fixations_training = type(self).from_fixation_trains(train_xs_training, train_ys_training,
-                                                             train_ts_training, train_ns_training,
-                                                             train_subjects_training)
-        fixations_evaluation = type(self).from_fixation_trains(train_xs_eval, train_ys_eval,
-                                                               train_ts_eval, train_ns_eval,
-                                                               train_subjects_eval)
-        return fixations_training, fixations_evaluation
-
-    def shuffle_fixations(self, stimuli=None):
-        new_indices = []
-        new_ns = []
-        if stimuli:
-            widths = np.asarray([s[1] for s in stimuli.sizes]).astype(float)
-            heights = np.asarray([s[0] for s in stimuli.sizes]).astype(float)
-            x_factors = []
-            y_factors = []
-        for n in range(self.n.max()+1):
-            inds = np.nonzero(~(self.n == n))[0]
-            new_indices.extend(inds)
-            new_ns.extend([n]*len(inds))
-            if stimuli:
-                other_ns = self.n[inds]
-                x_factors.extend(stimuli.sizes[n][1]/widths[other_ns])
-                y_factors.extend(stimuli.sizes[n][0]/heights[other_ns])
-        new_fixations = self[new_indices]
-        new_fixations.n = np.asarray(new_ns)
-        if stimuli:
-            x_factors = np.asarray(x_factors)
-            y_factors = np.asarray(y_factors)
-            new_fixations.x = x_factors*new_fixations.x
-            new_fixations.x_hist = x_factors[:, np.newaxis]*new_fixations.x_hist
-            new_fixations.y = y_factors*new_fixations.y
-            new_fixations.y_hist = y_factors[:, np.newaxis]*new_fixations.y_hist
-        return new_fixations
-
-    def shuffle_fixation_trains(self, stimuli=None):
-        """
-
-        """
-        if not self.consistent_fixation_trains:
-            raise ValueError('Cannot shuffle fixation trains as fixation trains not consistent!')
-        train_xs = []
-        train_ys = []
-        train_ts = []
-        train_ns = []
-        train_subjects = []
-        for n in range(self.n.max()+1):
-            inds = ~(self.train_ns == n)
-            train_xs.append(self.train_xs[inds])
-            train_ys.append(self.train_ys[inds])
-            train_ts.append(self.train_ts[inds])
-            train_ns.append(np.ones(inds.sum(), dtype=int)*n)
-            train_subjects.append(self.train_subjects[inds])
-        train_xs = np.vstack(train_xs)
-        train_ys = np.vstack(train_ys)
-        train_ts = np.vstack(train_ts)
-        train_ns = np.hstack(train_ns)
-        train_subjects = np.hstack(train_subjects)
-        full_nonfixations = type(self)(train_xs, train_ys, train_ts, train_ns, train_subjects)
-        #self.full_nonfixations = full_nonfixations
-        return full_nonfixations
-
-    def generate_full_nonfixations(self, stimuli=None):
-        """
-        Generate nonfixational distribution from this
-        fixation object by using all fixation trains of
-        other images. The individual fixation trains
-        will be left intact.
-
-        .. warning::
-            This function operates on the fixation trains.
-            Therefore, for filtered fixation objects it
-            might return wrong results.
-        """
-        if self.full_nonfixations is not None:
-            print("Reusing nonfixations!")
-            return self.full_nonfixations
-        train_xs = []
-        train_ys = []
-        train_ts = []
-        train_ns = []
-        train_subjects = []
-        for n in range(self.n.max()+1):
-            inds = ~(self.train_ns == n)
-            train_xs.append(self.train_xs[inds])
-            train_ys.append(self.train_ys[inds])
-            train_ts.append(self.train_ts[inds])
-            train_ns.append(np.ones(inds.sum(), dtype=int)*n)
-            train_subjects.append(self.train_subjects[inds])
-        train_xs = np.vstack(train_xs)
-        train_ys = np.vstack(train_ys)
-        train_ts = np.vstack(train_ts)
-        train_ns = np.hstack(train_ns)
-        train_subjects = np.hstack(train_subjects)
-        full_nonfixations = type(self)(train_xs, train_ys, train_ts, train_ns, train_subjects)
-        self.full_nonfixations = full_nonfixations
-        return full_nonfixations
-
-    def generate_nonfixation_partners(self, seed=42):
-        """Generate nonfixational distribution from this
-        fixation object such that for every fixation in the
-        original fixation object there is a corresponding
-        fixation on the same image but on a different
-        position that comes from some other fixation.
-
-        This destroys the temporal ordering of the fixation
-        trains."""
-        train_xs = self.train_xs.copy()
-        train_ys = self.train_ys.copy()
-        train_ts = self.train_ts.copy()
-        train_ns = self.train_ns.copy()
-        train_subjects = self.train_subjects.copy()
-        rs = np.random.RandomState(seed)
-        for train_index in range(len(train_ns)):
-            n = train_ns[train_index]
-            inds = np.nonzero(self.n != n)[0]
-            length = (1 - np.isnan(train_xs[train_index])).sum()
-            for i in range(length):
-                new_fix_index = rs.choice(inds)
-                train_xs[train_index][i] = self.x[new_fix_index]
-                train_ys[train_index][i] = self.y[new_fix_index]
-                train_ts[train_index][i] = self.t[new_fix_index]
-        return type(self)(train_xs, train_ys, train_ts, train_ns, train_subjects)
-
-    @hdf5_wrapper(mode='w')
-    def to_hdf5(self, target):
-        """ Write fixationtrains to hdf5 file or hdf5 group
-        """
-
-        target.attrs['type'] = np.string_('FixationTrains')
-        target.attrs['version'] = np.string_('1.3')
-
-        variable_length_arrays = []
-
-        for attribute in ['train_xs', 'train_ys', 'train_ts', 'train_ns', 'train_subjects', 'train_lengths'] + self.__attributes__:
-            if attribute in ['subjects', 'scanpath_index'] + self.auto_attributes:
-                continue
-
-            data = getattr(self, attribute)
-            if isinstance(data, VariableLengthArray):
-                variable_length_arrays.append(attribute)
-                data = data._data
-            target.create_dataset(attribute, data=data)
-
-        saved_attributes = [attribute_name for attribute_name in self.__attributes__ if attribute_name not in self.auto_attributes]
-        target.attrs['__attributes__'] = np.string_(json.dumps(saved_attributes))
-
-        target.attrs['scanpath_attribute_mapping'] = np.string_(json.dumps(self.scanpath_attribute_mapping))
-
-        scanpath_attributes_group = target.create_group('scanpath_attributes')
-        for attribute_name, attribute_value in self.scanpath_attributes.items():
-            scanpath_attributes_group.create_dataset(attribute_name, data=attribute_value)
-        scanpath_attributes_group.attrs['__attributes__'] = np.string_(json.dumps(sorted(self.scanpath_attributes.keys())))
-
-        scanpath_fixation_attributes_group = target.create_group('scanpath_fixation_attributes')
-        for attribute_name, attribute_value in self.scanpath_fixation_attributes.items():
-            scanpath_fixation_attributes_group.create_dataset(attribute_name, data=attribute_value._data)
-        scanpath_fixation_attributes_group.attrs['__attributes__'] = np.string_(json.dumps(sorted(self.scanpath_fixation_attributes.keys())))
-
-
-    @classmethod
-    @hdf5_wrapper(mode='r')
-    def read_hdf5(cls, source):
-        """ Read train fixations from hdf5 file or hdf5 group """
-
-        data_type = decode_string(source.attrs['type'])
-        data_version = decode_string(source.attrs['version'])
-
-        if data_type != 'FixationTrains':
-            raise ValueError("Invalid type! Expected 'FixationTrains', got", data_type)
-
-        valid_versions = ['1.0', '1.1', '1.2', '1.3']
-        if data_version not in valid_versions:
-            raise ValueError("Invalid version! Expected one of {}, got {}".format(', '.join(valid_versions), data_version))
-
-        data = {key: source[key][...] for key in ['train_xs', 'train_ys', 'train_ts', 'train_ns', 'train_subjects']}
-
-        json_attributes = decode_string(source.attrs['__attributes__'])
-
-        attribute_names = list(json.loads(json_attributes))
-
-        attributes = {}
-        for key in attribute_names:
-            if key in ['subjects', 'scanpath_index']:
-                continue
-
-            attributes[key] = source[key][...]
-
-        data['attributes'] = attributes
-
-        if data_version < '1.1':
-            data['scanpath_attributes'] = {}
-        else:
-            data['scanpath_attributes'] = _load_attribute_dict_from_hdf5(source['scanpath_attributes'])
-
-        if data_version < '1.2':
-            data['scanpath_fixation_attributes'] = {}
-            data['scanpath_attribute_mapping'] = {}
-        else:
-            data['scanpath_fixation_attributes'] = _load_attribute_dict_from_hdf5(source['scanpath_fixation_attributes'])
-            data['scanpath_attribute_mapping'] = json.loads(decode_string(source.attrs['scanpath_attribute_mapping']))
-
-        if data_version < '1.3':
-            train_lengths = np.array([len(remove_trailing_nans(data['train_xs'][i])) for i in range(len(data['train_xs']))])
-        else:
-            train_lengths = source['train_lengths'][...]
-
-        data['scanpath_fixation_attributes'] = {
-            key: VariableLengthArray(value, train_lengths) for key, value in data['scanpath_fixation_attributes'].items()
-        }
-
-        fixations = cls(**data)
-
-        return fixations
-
-
-class Scanpaths(object):
-    """
-    Represents a collection of scanpaths.
-
-    Attributes:
-        xs (VariableLengthArray): The x-coordinates of the scanpaths.
-        ys (VariableLengthArray): The y-coordinates of the scanpaths.
-        n (np.ndarray): The image index
-        lengths (np.ndarray): The lengths of each scanpath.
-        scanpath_attributes (dict): Additional attributes associated with the scanpaths.
-        fixation_attributes (dict): Additional attributes associated with the fixations in the scanpaths.
-        attribute_mapping (dict): Mapping of attribute names to their corresponding values, will be used when creating `Fixations` instances from the `Scanpaths` instance.
-             for example {'durations': 'duration'}
-    """
-
-    xs: VariableLengthArray
-    ys: VariableLengthArray
-    n: np.ndarray
-
-    def __init__(self,
-                 xs: Union[np.ndarray, VariableLengthArray],
-                 ys: Union[np.ndarray, VariableLengthArray],
-                 n: np.ndarray,
-                 lengths=None,
-                 scanpath_attributes: Optional[Dict[str, np.ndarray]] = None,
-                 fixation_attributes: Optional[Dict[str, Union[np.ndarray, VariableLengthArray]]]=None,
-                 attribute_mapping=Dict[str, str]):
-
-        self.n = np.asarray(n)
-
-        if not isinstance(xs, VariableLengthArray):
-            self.xs = VariableLengthArray(xs, lengths)
-        else:
-            self.xs = xs
-
-        if lengths is not None:
-            if not np.all(self.xs.lengths == lengths):
-                raise ValueError("Lengths of xs and lengths do not match")
-
-        self.lengths = self.xs.lengths.copy()
-
-        self.ys = self._as_variable_length_array(ys)
-
-        if not len(self.xs) == len(self.ys) == len(self.n):
-            raise ValueError("Length of xs, ys, ts and n has to match")
-
-        # setting scanpath attributes
-
-        scanpath_attributes = scanpath_attributes or {}
-        self.scanpath_attributes = {key: np.array(value) for key, value in scanpath_attributes.items()}
-
-        for key, value in self.scanpath_attributes.items():
-            if not len(value) == len(self.xs):
-                raise ValueError(f"Length of scanpath attribute {key} has to match number of scanpaths, but got {len(value)} != {len(self.xs)}")
-
-        # setting fixation attributes
-
-        fixation_attributes = fixation_attributes or {}
-
-        self.fixation_attributes = {key: self._as_variable_length_array(value) for key, value in fixation_attributes.items()}
-
-        self.attribute_mapping = attribute_mapping or {}
-
-    def _check_lengths(self, other: VariableLengthArray):
-        if not len(self) == len(other):
-            raise ValueError("Length of scanpaths has to match")
-        if not np.all(self.lengths == other.lengths):
-            raise ValueError("Lengths of scanpaths have to match")
-
-    def _as_variable_length_array(self, data: Union[np.ndarray, VariableLengthArray]) -> VariableLengthArray:
-        if not isinstance(data, VariableLengthArray):
-            data = VariableLengthArray(data, self.lengths)
-
-        self._check_lengths(data)
-
-        return data
-
-    def __len__(self):
-        return len(self.xs)
-
-    @property
-    def ts(self) -> VariableLengthArray:
-        return self.fixation_attributes['ts']
-
-    @property
-    def subject(self) -> VariableLengthArray:
-        return self.scanpath_attributes['subject']
-
-
-    @hdf5_wrapper(mode='w')
-    def to_hdf5(self, target):
-        """ Write scanpaths to hdf5 file or hdf5 group
-        """
-        target.attrs['type'] = np.string_('Scanpaths')
-        target.attrs['version'] = np.string_('1.0')
-
-        target.create_dataset('xs', data=self.xs._data)
-        target.create_dataset('ys', data=self.ys._data)
-        target.create_dataset('n', data=self.n)
-        target.create_dataset('lengths', data=self.lengths)
-
-        scanpath_attributes_group = target.create_group('scanpath_attributes')
-        for attribute_name, attribute_value in self.scanpath_attributes.items():
-            create_hdf5_dataset(scanpath_attributes_group, attribute_name, attribute_value)
-        scanpath_attributes_group.attrs['__attributes__'] = np.string_(json.dumps(sorted(self.scanpath_attributes.keys())))
-
-        fixation_attributes_group = target.create_group('fixation_attributes')
-        for attribute_name, attribute_value in self.fixation_attributes.items():
-            fixation_attributes_group.create_dataset(attribute_name, data=attribute_value._data)
-        fixation_attributes_group.attrs['__attributes__'] = np.string_(json.dumps(sorted(self.fixation_attributes.keys())))
-
-        target.attrs['attribute_mapping'] = np.string_(json.dumps(self.attribute_mapping))
-
-
-    @classmethod
-    @hdf5_wrapper(mode='r')
-    def read_hdf5(cls, source):
-        data_type = decode_string(source.attrs['type'])
-        data_version = decode_string(source.attrs['version'])
-
-        if data_type != 'Scanpaths':
-            raise ValueError("Invalid type! Expected 'Scanpaths', got", data_type)
-
-        valid_versions = ['1.0']
-        if data_version not in valid_versions:
-            raise ValueError("Invalid version! Expected one of {}, got {}".format(', '.join(valid_versions), data_version))
-
-        lengths = source['lengths'][...]
-        xs = VariableLengthArray(source['xs'][...], lengths)
-        ys = VariableLengthArray(source['ys'][...], lengths)
-        n = source['n'][...]
-
-        scanpath_attributes = _load_attribute_dict_from_hdf5(source['scanpath_attributes'])
-
-        fixation_attributes_group = source['fixation_attributes']
-        json_attributes = fixation_attributes_group.attrs['__attributes__']
-        if not isinstance(json_attributes, str):
-            json_attributes = json_attributes.decode('utf8')
-        __attributes__ = json.loads(json_attributes)
-
-        fixation_attributes = {attribute: VariableLengthArray(fixation_attributes_group[attribute][...], lengths) for attribute in __attributes__}
-
-        return cls(
-            xs=xs,
-            ys=ys,
-            n=n,
-            lengths=lengths,
-            scanpath_attributes=scanpath_attributes,
-            fixation_attributes=fixation_attributes,
-            attribute_mapping=json.loads(decode_string(source.attrs['attribute_mapping']))
-        )
-
-    def __getitem__(self, index):
-        # TODO
-        # - integer to return single scanpath
-        # - 2d index to return single Fixation (for now via index of scanpath and index of fixation in scanpath)
-        # - 2d index array to return Fixations instance (for now via index of scanpath and index of fixation in scanpath)
-
-        if isinstance(index, tuple):
-            raise NotImplementedError("Not implemented yet")
-        elif isinstance(index, int):
-            raise NotImplementedError("Not implemented yet")
-        else:
-            return type(self)(self.xs[index], self.ys[index], self.n[index], self.lengths[index],
-                              scanpath_attributes={key: value[index] for key, value in self.scanpath_attributes.items()},
-                              fixation_attributes={key: value[index] for key, value in self.fixation_attributes.items()},
-                              attribute_mapping=self.attribute_mapping)
-
-
 def create_subset(stimuli, fixations, stimuli_indices):
     """Create subset of stimuli and fixations using only stimuli
     with given indices.
@@ -1313,24 +81,9 @@ def create_subset(stimuli, fixations, stimuli_indices):
     return new_stimuli, new_fixations
 
 
-def _get_merged_attribute_list(attributes):
-    all_attributes = set(attributes[0])
-    common_attributes = set(attributes[0])
-
-    for _attributes in attributes[1:]:
-        all_attributes = all_attributes.union(_attributes)
-        common_attributes = common_attributes.intersection(_attributes)
-
-    if common_attributes != all_attributes:
-        lost_attributes = all_attributes.difference(common_attributes)
-        warnings.warn(f"Discarding attributes which are not present everywhere: {lost_attributes}", stacklevel=4)
-
-    return sorted(common_attributes)
-
-
 def concatenate_stimuli(stimuli):
     attributes = {}
-    for key in _get_merged_attribute_list([list(s.attributes.keys()) for s in stimuli]):
+    for key in get_merged_attribute_list([list(s.attributes.keys()) for s in stimuli]):
         attributes[key] = concatenate_attributes(s.attributes[key] for s in stimuli)
 
     if all(isinstance(s, FileStimuli) for s in stimuli):
@@ -1339,32 +92,6 @@ def concatenate_stimuli(stimuli):
         return ObjectStimuli(sum([s.stimulus_objects for s in stimuli], []), attributes=attributes)
 
 
-def concatenate_attributes(attributes):
-    attributes = list(attributes)
-
-    if isinstance(attributes[0], VariableLengthArray):
-        return concatenate_variable_length_arrays(attributes)
-
-    attributes = [np.array(a) for a in attributes]
-    for a in attributes:
-        assert len(a.shape) == len(attributes[0].shape)
-
-    if len(attributes[0].shape) == 1:
-        return np.hstack(attributes)
-
-    else:
-        assert len(attributes[0].shape) == 2
-        max_cols = max(a.shape[1] for a in attributes)
-        padded_attributes = []
-        for a in attributes:
-            if a.shape[1] < max_cols:
-                padding = np.empty((a.shape[0], max_cols-a.shape[1]), dtype=a.dtype)
-                padding[:] = np.nan
-                padded_attributes.append(np.hstack((a, padding)))
-            else:
-                padded_attributes.append(a)
-        return np.vstack(padded_attributes)
-
 #np.testing.assert_allclose(concatenate_attributes([[0], [1, 2, 3]]), [0,1,2,3])
 #np.testing.assert_allclose(concatenate_attributes([[[0]], [[1],[2], [3]]]), [[0],[1],[2],[3]])
 #np.testing.assert_allclose(concatenate_attributes([[[0.,1.]], [[1.],[2.], [3.]]]), [[0, 1],[1,np.nan],[2,np.nan],[3,np.nan]])
@@ -1485,145 +212,4 @@ def create_nonfixations(stimuli, fixations, index, adjust_n = True, adjust_histo
     if adjust_n:
         non_fixations.n = np.ones(len(non_fixations.n), dtype=int)*index
 
-    return non_fixations
-
-
-def _scanpath_from_fixation_index(fixations, fixation_index, scanpath_attribute_names, scanpath_fixation_attribute_names):
-    history_length = fixations.lengths[fixation_index]
-    xs = np.hstack((
-        fixations.x_hist[fixation_index, :history_length],
-        [fixations.x[fixation_index]]
-    ))
-
-    ys = np.hstack((
-        fixations.y_hist[fixation_index, :history_length],
-        [fixations.y[fixation_index]]
-    ))
-
-    ts = np.hstack((
-        fixations.t_hist[fixation_index, :history_length],
-        [fixations.t[fixation_index]]
-    ))
-
-    n = fixations.n[fixation_index]
-
-    subject = fixations.subjects[fixation_index]
-
-    scanpath_attributes = {
-        attribute: getattr(fixations, attribute)[fixation_index]
-        for attribute in scanpath_attribute_names
-    }
-
-    scanpath_fixation_attributes = {}
-    for attribute in scanpath_fixation_attribute_names:
-        attribute_value = np.hstack((
-            getattr(fixations, '{attribute}_hist'.format(attribute=attribute))[fixation_index, :history_length],
-            [getattr(fixations, attribute)[fixation_index]]
-        ))
-        scanpath_fixation_attributes[attribute] = attribute_value
-
-
-    return xs, ys, ts, n, subject, scanpath_attributes, scanpath_fixation_attributes
-
-
-def scanpaths_from_fixations(fixations, verbose=False):
-    """ reconstructs scanpaths (FixationTrains) from fixation which originally came from scanpaths.
-
-    when called as in
-
-        scanpaths, indices = scanpaths_from_fixations(fixations)
-
-    you will get scanpathhs[indices] == fixations.
-
-    :note
-        only works if the original scanpaths only used scanpath_attributes and scanpath_fixation_attribute,
-        but not attributes (which should not be used for scanpaths anyway).
-    """
-    if 'scanpath_index' not in fixations.__attributes__:
-        raise NotImplementedError("Fixations with scanpath_index attribute required!")
-
-    scanpath_xs = []
-    scanpath_ys = []
-    scanpath_ts = []
-    scanpath_ns = []
-    scanpath_subjects = []
-    __attributes__ = [attribute for attribute in fixations.__attributes__ if attribute != 'subjects' and attribute != 'scanpath_index' and not attribute.endswith('_hist')]
-    __scanpath_attributes__ = [attribute for attribute in __attributes__ if '{attribute}_hist'.format(attribute=attribute) not in fixations.__attributes__]
-    __scanpath_fixation_attributes__ = [attribute for attribute in __attributes__ if attribute not in __scanpath_attributes__]
-
-    scanpath_fixation_attributes = {attribute: [] for attribute in __scanpath_fixation_attributes__}
-    scanpath_attributes = {attribute: [] for attribute in __scanpath_attributes__}
-
-    attribute_shapes = {
-        attribute: getattr(fixations, attribute)[0].shape for attribute in __attributes__
-    }
-
-    __all_attributes__ = __attributes__ + ['{attribute}_hist'.format(attribute=attribute) for attribute in __scanpath_fixation_attributes__]
-
-    indices = np.ones(len(fixations), dtype=int) * -1
-    fixation_counter = 0
-
-    for scanpath_index in tqdm(sorted(np.unique(fixations.scanpath_index)), disable=not verbose):
-        scanpath_indices = fixations.scanpath_index == scanpath_index
-        scanpath_integer_indices = np.nonzero(scanpath_indices)[0]
-        lengths = fixations.lengths[scanpath_indices]
-
-        # build scanpath up to maximum length
-        maximum_length = max(lengths)
-        _index_of_maximum_length = np.argmax(lengths)
-        index_of_maximum_length = scanpath_integer_indices[_index_of_maximum_length]
-
-        xs, ys, ts, n, subject, this_scanpath_attributes, this_scanpath_fixation_attributes = _scanpath_from_fixation_index(
-            fixations,
-            index_of_maximum_length,
-            __scanpath_attributes__,
-            __scanpath_fixation_attributes__
-        )
-
-        scanpath_xs.append(xs)
-        scanpath_ys.append(ys)
-        scanpath_ts.append(ts)
-        scanpath_ns.append(n)
-        scanpath_subjects.append(subject)
-
-        for attribute, value in this_scanpath_fixation_attributes.items():
-            scanpath_fixation_attributes[attribute].append(value)
-        for attribute, value in this_scanpath_attributes.items():
-            scanpath_attributes[attribute].append(value)
-
-        # build indices
-
-        for index_in_scanpath in range(maximum_length+1):
-            if index_in_scanpath in lengths:
-                # add index to indices
-                index_in_fixations = scanpath_integer_indices[list(lengths).index(index_in_scanpath)]
-
-                # there might be one fixation multiple times in fixations.
-                indices_in_fixations = scanpath_integer_indices[lengths == index_in_scanpath]
-                indices[indices_in_fixations] = fixation_counter + index_in_scanpath
-
-        fixation_counter += len(xs)
-
-    scanpath_attributes = {
-        attribute: np.array(value) for attribute, value in scanpath_attributes.items()
-    }
-
-    return FixationTrains.from_fixation_trains(
-        xs=scanpath_xs,
-        ys=scanpath_ys,
-        ts=scanpath_ts,
-        ns=scanpath_ns,
-        subjects=scanpath_subjects,
-        scanpath_attributes=scanpath_attributes,
-        scanpath_fixation_attributes=scanpath_fixation_attributes
-    ), indices
-
-
-def _load_attribute_dict_from_hdf5(attribute_group):
-    json_attributes = attribute_group.attrs['__attributes__']
-    if not isinstance(json_attributes, str):
-        json_attributes = json_attributes.decode('utf8')
-    __attributes__ = json.loads(json_attributes)
-
-    attributes = {attribute: attribute_group[attribute][...] for attribute in __attributes__}
-    return attributes
\ No newline at end of file
+    return non_fixations
\ No newline at end of file
diff --git a/pysaliency/datasets/fixations.py b/pysaliency/datasets/fixations.py
index d61a9fc..e514885 100644
--- a/pysaliency/datasets/fixations.py
+++ b/pysaliency/datasets/fixations.py
@@ -1,23 +1,1182 @@
 import json
-import os
-import pathlib
 import warnings
-from collections.abc import Sequence
-from functools import wraps
-from hashlib import sha1
 from typing import Dict, List, Optional, Union
-from weakref import WeakValueDictionary
 
 import numpy as np
-from boltons.cacheutils import cached
+from tqdm import tqdm
 
-from ..utils.variable_length_array import VariableLengthArray, concatenate_variable_length_arrays
+from ..utils import remove_trailing_nans
+from ..utils.variable_length_array import VariableLengthArray
+from .scanpaths import Scanpaths
+from .utils import get_merged_attribute_list, _load_attribute_dict_from_hdf5, concatenate_attributes, decode_string, hdf5_wrapper
 
-try:
-    from imageio.v3 import imread
-except ImportError:
-    from imageio import imread
-from PIL import Image
-from tqdm import tqdm
 
-from ..utils import LazyList, remove_trailing_nans
\ No newline at end of file
+class Fixations(object):
+    """Capsules the fixations of a dataset and provides different methods
+       of accessing them, e.g. in fixation trains, as conditional fixations
+       or just all fixations at once.
+
+       Fixations consist of:
+           x: the x-position of the fixation
+           y: the y-position of the fixation
+           t: the time of the fixation
+           x_hist: the previous x-positions in the history of this fixation
+           y_hist: the previous y-positions in the history of this fixation
+           t_hist: the previous times in the history of this fixation
+           subject: the subject who made the fixation
+           n: the number of the stimuli (optional, only needed when evaluating not on single images)
+
+        Fixations support slicing via fixations[indices] as a shortcut for fixations.filter.
+
+        Although all fixations have a history of previous fixations, these histories
+        do not have to form a set of fixation sequences. For example, if a fixation
+        has a previous fixation, this previous fixation does not have to be as a
+        fixation of its on in the dataset. This is important because otherwise
+        a lot of useful filtering operations would not be possible (e.g. filter
+        for all fixations with at least one previous fixation to calculate
+        saccade lengths). If you need fixation trains, use the subclass
+        `FixationTrains`.
+    """
+    __attributes__ = ['subjects']
+
+    def __init__(self,
+                 x: Union[List, np.ndarray],
+                 y: Union[List, np.ndarray],
+                 t: Union[List, np.ndarray],
+                 x_hist: Union[List, VariableLengthArray],
+                 y_hist: Union[List, VariableLengthArray],
+                 t_hist: Union[List, VariableLengthArray],
+                 n: Union[List, np.ndarray],
+                 subjects: Optional[Union[List, np.ndarray]] = None,
+                 attributes: Optional[Dict[str, Union[np.ndarray, VariableLengthArray]]] = None):
+
+        self.x = np.asarray(x)
+        self.y = np.asarray(y)
+        self.t = np.asarray(t)
+        self.n = np.asarray(n)
+
+        # would be nice, is not yet supported. But we can simply pass the VariableLengthArray instead
+        # if isinstance(x_hist, list):
+        #     x_hist = VariableLengthArray(x_hist)
+        #     self.lengths = x_hist.lengths
+        if isinstance(x_hist, (list, np.ndarray)):
+            x_hist = np.array(x_hist)
+            self.lengths = (1 - np.isnan(x_hist)).sum(axis=-1)
+            x_hist = VariableLengthArray(x_hist, lengths=self.lengths)
+        elif isinstance(x_hist, VariableLengthArray):
+            self.lengths = x_hist.lengths
+
+
+        y_hist = self._as_variable_length_array(y_hist)
+        t_hist = self._as_variable_length_array(t_hist)
+
+        if subjects is not None:
+            subjects = np.asarray(subjects)
+
+        self.x_hist = x_hist
+        self.y_hist = y_hist
+        self.t_hist = t_hist
+        self.n = n
+        self.subjects = subjects
+
+        if not len(self.x) == len(self.y) == len(self.t) == len(self.x_hist) == len(self.y_hist) == len(self.t_hist) == len(self.n):
+            raise ValueError("Lengths of fixations have to match")
+        if self.subjects is not None and not len(self.x) == len(self.subjects):
+            raise ValueError("Length of subjects has to match number of fixations")
+
+        if attributes is not None:
+            self.__attributes__ = list(self.__attributes__)
+            for name, value in attributes.items():
+                if name not in self.__attributes__:
+                    self.__attributes__.append(name)
+                if not len(value) == len(self.x):
+                    raise ValueError(f"Length of attribute '{name}' has to match number of fixations")
+                setattr(self, name, value)
+
+
+    def _check_lengths(self, other: VariableLengthArray):
+        if not len(self) == len(other):
+            raise ValueError("Length of scanpaths has to match")
+        if not np.all(self.lengths == other.lengths):
+            raise ValueError("Lengths of scanpaths have to match")
+
+    def _as_variable_length_array(self, data: Union[np.ndarray, VariableLengthArray]) -> VariableLengthArray:
+        if not isinstance(data, VariableLengthArray):
+            data = VariableLengthArray(data, self.lengths)
+
+        self._check_lengths(data)
+
+        return data
+
+    @classmethod
+    def create_without_history(cls, x, y, n, subjects=None):
+        """ Create new fixation object from fixation data without time and optionally
+            without subject information
+        """
+        N = len(x)
+        t = np.zeros(N)
+        x_hist = np.empty((N, 1))*np.nan
+        y_hist = np.empty((N, 1))*np.nan
+        t_hist = np.empty((N, 1))*np.nan
+        if subjects is None:
+            subjects = np.ones(N)
+        return cls(x, y,  t, x_hist, y_hist, t_hist, n, subjects)
+
+    @classmethod
+    def from_fixation_matrices(cls, matrices):
+        """
+        create new Fixation object with fixations from fixation matrices
+
+        Often, fixations are stored in fixation matrices: For each stimulus, there
+        is a matrix of the same size as the image which ones in each fixated location
+        and zero everywhere else. This method allows to construct a `Fixation` instance
+        from such fixation matrices.
+
+        >>> matrix1 = np.zeros((10,10))
+        >>> matrix1[5, 2] = 1
+        >>> matrix1[3, 3] = 1
+        >>> matrix2  = np.zeros((20, 30))
+        >>> matrix2[10, 20] = 1
+        >>> fixations = pysaliency.Fixation.from_fixation_matrices(
+                [matrix1,
+                 matrix2])
+        """
+        xs = []
+        ys = []
+        ns = []
+        for _n, matrix in enumerate(matrices):
+            y, x = np.nonzero(matrix)
+            n = [_n] * len(y)
+            xs.append(x)
+            ys.append(y)
+            ns.append(n)
+        x = np.hstack(xs).astype(float)
+        y = np.hstack(ys).astype(float)
+        n = np.hstack(ns)
+        return cls.create_without_history(x, y, n)
+
+    @classmethod
+    def concatenate(cls, fixations):
+        kwargs = {}
+        for key in ['x', 'y', 't', 'x_hist', 'y_hist', 't_hist', 'n', 'subjects']:
+            kwargs[key] = concatenate_attributes(getattr(f, key) for f in fixations)
+
+        attributes = get_merged_attribute_list([f.__attributes__ for f in fixations])
+        attribute_dict = {}
+        for key in attributes:
+            if key == 'subjects':
+                continue
+            attribute_dict[key] = concatenate_attributes(getattr(f, key) for f in fixations)
+
+        kwargs['attributes'] = attribute_dict
+
+        new_fixations = cls(**kwargs)
+
+        return new_fixations
+
+    def __getitem__(self, indices):
+        return self.filter(indices)
+
+    def __len__(self):
+        return len(self.x)
+
+    def filter(self, inds):
+        """
+        Create new fixations object which contains only the fixations with indexes in inds
+
+        .. note::
+            The fixation trains of the object are left as is. Filtering the fixation trains
+            is not possible as the indices may include only some fixation of a fixation train.
+
+            The attributes `consistent_fixation_trains` tracks whether a `Fixations` instance
+            still has consistent fixation trains. The return of this function will be marked
+            to have inconsistent fixation trains. If you need to filter with consistent
+            fixation trains, use `Fixations.filter_fixation_trains`.
+        """
+
+        kwargs = {}
+        other_attributes = {}
+
+        def filter_array(name):
+            print("Filtering", name)
+            kwargs[name] = getattr(self, name)[inds].copy()
+
+        for name in ['x', 'y', 't', 'x_hist', 'y_hist', 't_hist', 'n']:
+            filter_array(name)
+
+        for name in self.__attributes__:
+            filter_array(name)
+            if name != 'subjects':
+                other_attributes[name] = kwargs.pop(name)
+
+        new_fix = Fixations(**kwargs)
+        for key, value in other_attributes.items():
+            setattr(new_fix, key, value)
+        new_fix.__attributes__ = list(self.__attributes__)
+        return new_fix
+
+    def _get_previous_values(self, name, index):
+        """return fixations.name[np.arange(len(fixations.name)),index]"""
+        a = getattr(self, name)
+        inds = np.arange(len(a))
+        if index >= 0:
+            return a[inds, index]
+        else:
+            indexes = self.lengths + index
+            return a[inds, indexes]
+
+    def get_saccade(self, index = -1):
+        """
+        Return saccades for all fixations.
+
+        @type  index: integer
+        @param index: index of the saccade to return. `index==-1` returns the
+                      the last saccades of all fixations etc.
+
+        @return dx, dy, dt of the saccade
+
+        Example:
+
+            dx, dy, dt = fixations.get_saccade(-1)
+            mean_saccade_length = np.sqrt(dx**2+dy**2).mean()
+        """
+
+        if index > 0:
+            raise NotImplemented()
+        if index == -1:
+            x1 = self.x
+            y1 = self.y
+            t1 = self.t
+        else:
+            x1 = self._get_previous_values('x_hist', index+1)
+            y1 = self._get_previous_values('y_hist', index+1)
+            t1 = self._get_previous_values('t_hist', index+1)
+        dx = x1 - self._get_previous_values('x_hist', index)
+        dy = y1 - self._get_previous_values('y_hist', index)
+        dt = t1 - self._get_previous_values('t_hist', index)
+        return dx, dy, dt
+        #return np.vstack((dy,dx)).T
+
+    @property
+    def x_int(self):
+        """ x coordinates of the fixations, converted to integers """
+        return np.asarray(self.x, dtype=int)
+
+    @property
+    def y_int(self):
+        """ y coordinates of the fixations, converted to integers """
+        return np.asarray(self.y, dtype=int)
+
+    @property
+    def subject_count(self):
+        return self.subjects.max()+1
+
+    def copy(self):
+        cfix = Fixations(self.x.copy(), self.y.copy(), self.t.copy(),
+                         self.x_hist.copy(), self.y_hist.copy(), self.t_hist.copy(),
+                         self.n.copy(), self.subjects.copy() if self.subjects is not None else None)
+        cfix.__attributes__ = list(self.__attributes__)
+        for name in self.__attributes__:
+            setattr(cfix, name, getattr(self, name).copy())
+        return cfix
+
+    @classmethod
+    def FixationsWithoutHistory(cls, x, y, t, n, subjects):
+        x_hist = np.empty((len(x), 1))
+        x_hist[:] = np.nan
+        y_hist = np.empty((len(x), 1))
+        y_hist[:] = np.nan
+        t_hist = np.empty((len(x), 1))
+        t_hist[:] = np.nan
+        return cls(x, y, t, x_hist, y_hist, t_hist, n, subjects)
+
+    @hdf5_wrapper(mode='w')
+    def to_hdf5(self, target):
+        """ Write fixations to hdf5 file or hdf5 group
+        """
+
+        target.attrs['type'] = np.string_('Fixations')
+        target.attrs['version'] = np.string_('1.1')
+
+        variable_length_arrays = []
+
+        for attribute in ['x', 'y', 't', 'x_hist', 'y_hist', 't_hist', 'n', 'lengths'] + self.__attributes__:
+            data = getattr(self, attribute)
+            if isinstance(data, VariableLengthArray):
+                variable_length_arrays.append(attribute)
+                data = data._data
+            target.create_dataset(attribute, data=data)
+
+        target.attrs['__attributes__'] = np.string_(json.dumps(self.__attributes__))
+        target.attrs['__variable_length_arrays__'] = np.string_(json.dumps(sorted(variable_length_arrays)))
+
+    @classmethod
+    @hdf5_wrapper(mode='r')
+    def read_hdf5(cls, source):
+        """ Read fixations from hdf5 file or hdf5 group """
+
+        # TODO: rewrite to use constructor instead of manipulating the object directly
+
+        data_type = decode_string(source.attrs['type'])
+        data_version = decode_string(source.attrs['version'])
+
+        if data_type != 'Fixations':
+            raise ValueError("Invalid type! Expected 'Fixations', got", data_type)
+
+        if data_version not in ['1.0', '1.1']:
+            raise ValueError("Invalid version! Expected '1.0', got", data_version)
+
+        data = {key: source[key][...] for key in ['x', 'y', 't', 'x_hist', 'y_hist', 't_hist', 'n', 'subjects']}
+        fixations = cls(**data)
+
+        json_attributes = source.attrs['__attributes__']
+        if not isinstance(json_attributes, str):
+            json_attributes = json_attributes.decode('utf8')
+        __attributes__ = json.loads(json_attributes)
+        fixations.__attributes__ = list(__attributes__)
+
+        if data_version == '1.1':
+            lengths = source['lengths'][...]
+
+            json_variable_length_arrays = source.attrs['__variable_length_arrays__']
+            if not isinstance(json_variable_length_arrays, str):
+                json_variable_length_arrays = json_variable_length_arrays.decode('utf8')
+            variable_length_arrays = json.loads(json_variable_length_arrays)
+
+        else:
+            lengths = fixations.lengths
+            variable_length_arrays = ['x_hist', 'y_hist', 't_hist'] + [key for key in __attributes__ if key.endswith('_hist')]
+
+        for key in __attributes__:
+            data = source[key][...]
+
+            if key in variable_length_arrays:
+                data = VariableLengthArray(data, lengths)
+
+            setattr(fixations, key, data)
+
+        return fixations
+
+
+class FixationTrains(Fixations):
+    """
+    Capsules the fixations of a dataset as fixation trains.
+
+    Additionally to `Fixations`, `FixationTrains`-instances
+    have the attributes
+        train_xs: 2d array (number_of_trains, maximum_length_of_train)
+        train_ys: 2d array (number_of_trains, maximum_length_of_train)
+        train_ts: 2d array (number_of_trains, maximum_length_of_train)
+        train_ns: 1d array (number_of_trains)
+        train_subjects: 1d array (number_of_trains)
+
+    scanpath_attributes: dictionary of attributes applying to full scanpaths, e.g. task
+            {attribute_name: $NUM_SCANPATHS-length-list}
+            scanpath attributes will automatically also become attributes
+    scanpath_fixation_attribute: dictionary of attributes applying to fixations in the scanpath, e.g. duration
+            {attribute_name: $NUM_SCANPATH x $NUM_FIXATIONS_IN_SCANPATH}
+            scanpath fixation attributes will generate two attributes: the value for each fixation
+            and the history for previous fixations. E.g. a scanpath fixation attribute "durations" will generate
+            an attribute "durations" and an attribute "durations_hist"
+
+    """
+    def __init__(self, train_xs, train_ys, train_ts, train_ns, train_subjects, scanpath_attributes=None, scanpath_fixation_attributes=None, attributes=None, scanpath_attribute_mapping=None):
+        self.__attributes__ = list(self.__attributes__)
+        self.__attributes__.append('scanpath_index')
+
+        scanpath_attributes = scanpath_attributes or {}
+        scanpath_attribute_mapping = scanpath_attribute_mapping or {}
+        scanpath_fixation_attributes = scanpath_fixation_attributes or {}
+
+        if 'subject' in scanpath_attributes and train_subjects is not None:
+            raise ValueError("subject should not be in scanpath_attributes if train_subjects is specified")
+        if 'subject' not in scanpath_attributes:
+            scanpath_attributes['subject'] = train_subjects
+
+        if 'ts' in scanpath_fixation_attributes and train_ts is not None:
+            raise ValueError("ts should not be in scanpath_fixation_attributes if train_ts is specified")
+        if 'ts' not in scanpath_fixation_attributes:
+            scanpath_fixation_attributes['ts'] = train_ts
+            scanpath_attribute_mapping['ts'] = 't'
+
+        lengths = [len(remove_trailing_nans(xs)) for xs in train_xs]
+
+        scanpaths = Scanpaths(
+            xs=train_xs,
+            ys=train_ys,
+            n=train_ns,
+            lengths=lengths,
+            scanpath_attributes=scanpath_attributes,
+            fixation_attributes=scanpath_fixation_attributes,
+            attribute_mapping=scanpath_attribute_mapping,
+        )
+
+        self.scanpaths = scanpaths
+
+        N_fixations = scanpaths.lengths.sum()
+        max_scanpath_length = scanpaths.lengths.max() if len(self.scanpaths) else 0
+        max_history_length = max(max_scanpath_length - 1, 0)
+
+
+        # Create conditional fixations
+        self.x = np.empty(N_fixations)
+        self.y = np.empty(N_fixations)
+        self.t = np.empty(N_fixations)
+        self.x_hist = np.empty((N_fixations, max_history_length))
+        self.y_hist = np.empty((N_fixations, max_history_length))
+        self.t_hist = np.empty((N_fixations, max_history_length))
+        self.x_hist[:] = np.nan
+        self.y_hist[:] = np.nan
+        self.t_hist[:] = np.nan
+        self.n = np.empty(N_fixations, dtype=int)
+        self.lengths = np.empty(N_fixations, dtype=int)
+        # self.train_lengths = np.empty(len(self.train_xs), dtype=int)
+        self.subjects = np.empty(N_fixations, dtype=int)
+        self.scanpath_index = np.empty(N_fixations, dtype=int)
+
+        out_index = 0
+        # TODO: maybe implement in numba?
+        # probably best: have function fill_fixation_data(scanpath_data, fixation_data, hist_data=None)
+        for train_index in range(len(self.scanpaths)):
+            #fix_length = len(remove_trailing_nans(self.train_xs[train_index]))
+            # self.train_lengths[train_index] = fix_length
+            for fix_index in range(self.scanpaths.lengths[train_index]):
+                self.x[out_index] = self.scanpaths.xs[train_index][fix_index]
+                self.y[out_index] = self.scanpaths.ys[train_index][fix_index]
+                self.t[out_index] = self.scanpaths.ts[train_index][fix_index]
+                self.n[out_index] = self.scanpaths.n[train_index]
+                self.subjects[out_index] = self.scanpaths.scanpath_attributes['subject'][train_index]
+                self.lengths[out_index] = fix_index
+                self.scanpath_index[out_index] = train_index
+                self.x_hist[out_index][:fix_index] = self.scanpaths.xs[train_index][:fix_index]
+                self.y_hist[out_index][:fix_index] = self.scanpaths.ys[train_index][:fix_index]
+                self.t_hist[out_index][:fix_index] = self.scanpaths.ts[train_index][:fix_index]
+                out_index += 1
+
+        # TODO: this should become irrelevant once FixationTrains is also completely upgraded to VariableLengthArrays
+        self.x_hist = VariableLengthArray(self.x_hist, self.lengths)
+        self.y_hist = VariableLengthArray(self.y_hist, self.lengths)
+        self.t_hist = VariableLengthArray(self.t_hist, self.lengths)
+
+        #if scanpath_attributes is not None:
+        #    assert isinstance(scanpath_attributes, dict)
+        #    self.scanpath_attributes = {key: np.array(value) for key, value in scanpath_attributes.items()}
+        #    for key, value in self.scanpath_attributes.items():
+        #        assert len(value) == len(self.scanpaths)
+        #else:
+        #    self.scanpath_attributes = {}
+
+        #if scanpath_fixation_attributes is not None:
+        #    assert isinstance(scanpath_fixation_attributes, dict)
+        #    self.scanpath_fixation_attributes = {}
+        #    for key, value in scanpath_fixation_attributes.items():
+        #        self.scanpath_fixation_attributes[key] = self._as_variable_length_scanpath_array(value)
+        #else:
+        #    self.scanpath_fixation_attributes = {}
+
+        # self.scanpath_attribute_mapping = self.scanpaths.attribute_mapp
+
+
+        if attributes is None:
+            attributes = {}
+        elif attributes:
+            warnings.warn("don't use attributes for FixationTrains, use scanpath_attributes or scanpath_fixation_attributes instead!", stacklevel=2)
+
+        self.auto_attributes = []
+
+        for attribute_name, value in self.scanpaths.scanpath_attributes.items():
+            if attribute_name == 'subject':
+                continue
+            new_attribute_name = self.scanpaths.attribute_mapping.get(attribute_name, attribute_name)
+            if new_attribute_name in attributes:
+                raise ValueError("attribute name clash: {new_attribute_name}".format(new_attribute_name=new_attribute_name))
+            attribute_shape = [] if not value.any() else np.asarray(value[0]).shape
+            attributes[new_attribute_name] = np.empty([N_fixations] + list(attribute_shape), dtype=value.dtype)
+            self.auto_attributes.append(new_attribute_name)
+
+            out_index = 0
+            for train_index in range(len(self.scanpaths)):
+                for fix_index in range(self.scanpaths.lengths[train_index]):
+                    attributes[new_attribute_name][out_index] = self.scanpaths.scanpath_attributes[attribute_name][train_index]
+                    out_index += 1
+
+
+        for attribute_name, value in self.scanpaths.fixation_attributes.items():
+            if attribute_name == 'ts':
+                continue
+            new_attribute_name = self.scanpaths.attribute_mapping.get(attribute_name, attribute_name)
+            if new_attribute_name in attributes:
+                raise ValueError("attribute name clash: {new_attribute_name}".format(new_attribute_name=new_attribute_name))
+            attributes[new_attribute_name] = np.empty(N_fixations)
+            self.auto_attributes.append(new_attribute_name)
+
+            hist_attribute_name = new_attribute_name + '_hist'
+            if hist_attribute_name in attributes:
+                raise ValueError("attribute name clash: {hist_attribute_name}".format(hist_attribute_name=hist_attribute_name))
+            attributes[hist_attribute_name] = np.full((N_fixations, max_history_length), fill_value=np.nan)
+            self.auto_attributes.append(hist_attribute_name)
+
+            out_index = 0
+            for train_index in range(len(self.scanpaths)):
+                for fix_index in range(self.scanpaths.lengths[train_index]):
+                    attributes[new_attribute_name][out_index] = self.scanpaths.fixation_attributes[attribute_name][train_index, fix_index]
+                    attributes[hist_attribute_name][out_index][:fix_index] = self.scanpaths.fixation_attributes[attribute_name][train_index, :fix_index]
+                    out_index += 1
+
+            attributes[hist_attribute_name] = VariableLengthArray(attributes[hist_attribute_name], self.lengths)
+
+        if attributes:
+            self.__attributes__ = list(self.__attributes__)
+            for key, value in attributes.items():
+                assert key != 'subjects'
+                assert key != 'scanpath_index'
+                assert key != 't'
+                assert len(value) == len(self.x)
+                self.__attributes__.append(key)
+                if not isinstance(value, VariableLengthArray):
+                    value = np.array(value)
+                setattr(self, key, value)
+
+        self.full_nonfixations = None
+
+    # def _check_train_lengths(self, other: VariableLengthArray):
+    #     if not len(self.train_lengths) == len(other):
+    #         raise ValueError("Length of scanpaths has to match")
+    #     if not np.all(self.train_lengths == other.lengths):
+    #         raise ValueError("Lengths of scanpaths have to match")
+
+    # def _as_variable_length_scanpath_array(self, data: Union[np.ndarray, VariableLengthArray]) -> VariableLengthArray:
+    #     if not isinstance(data, VariableLengthArray):
+    #         data = VariableLengthArray(data, self.train_lengths)
+
+    #     self._check_train_lengths(data)
+
+    #     return data
+
+    @property
+    def train_xs(self) -> VariableLengthArray:
+        return self.scanpaths.xs
+
+    @property
+    def train_ys(self) -> VariableLengthArray:
+        return self.scanpaths.ys
+
+    @property
+    def train_ts(self) -> VariableLengthArray:
+        return self.scanpaths.ts
+
+    @property
+    def train_ns(self) -> np.ndarray:
+        return self.scanpaths.n
+
+    @property
+    def train_subjects(self) -> VariableLengthArray:
+        return self.scanpaths.subject
+
+    @property
+    def train_lengths(self) -> np.ndarray:
+        return self.scanpaths.lengths
+
+    @property
+    def scanpath_attributes(self) -> Dict[str, np.ndarray]:
+        return {
+            key: value for key, value in self.scanpaths.scanpath_attributes.items() if key != 'subject'
+        }
+
+    @property
+    def scanpath_fixation_attributes(self) -> Dict[str, VariableLengthArray]:
+        return {
+            key: value for key, value in self.scanpaths.fixation_attributes.items() if key != 'ts'
+        }
+
+    @property
+    def scanpath_attribute_mapping(self) -> Dict[str, str]:
+        return {
+            key: value for key, value in self.scanpaths.attribute_mapping.items() if key != 'ts'
+        }
+
+    @classmethod
+    def concatenate(cls, fixation_trains):
+        kwargs = {}
+
+        for key in ['train_xs', 'train_ys', 'train_ts', 'train_ns', 'train_subjects']:
+            kwargs[key] = concatenate_attributes(getattr(f, key) for f in fixation_trains)
+
+        def _real_attributes(scanpaths: FixationTrains):
+            return [attribute_name for attribute_name in scanpaths.__attributes__ if attribute_name not in scanpaths.auto_attributes + ['scanpath_index']]
+
+        def _mapped_attribute_name(attribute_name: str, scanpaths: FixationTrains):
+            names = [s.scanpath_attribute_mapping.get(attribute_name) for s in scanpaths]
+            if len(set(names)) > 1:
+                raise ValueError(f"inconsistent attribute name mappings for '{attribute_name}': {names}")
+
+            return names[0]
+
+        attributes = get_merged_attribute_list([_real_attributes(f) for f in fixation_trains])
+        attribute_dict = {}
+        for key in attributes:
+            if key == 'subjects':
+                continue
+            attribute_dict[key] = concatenate_attributes(getattr(f, key) for f in fixation_trains)
+
+        kwargs['attributes'] = attribute_dict
+
+        scanpath_attribute_names = get_merged_attribute_list([list(f.scanpath_attributes) for f in fixation_trains])
+
+        kwargs['scanpath_attributes'] = {}
+        kwargs['scanpath_attribute_mapping'] = {}
+        for name in scanpath_attribute_names:
+            kwargs['scanpath_attributes'][name] = concatenate_attributes(f.scanpath_attributes[name] for f in fixation_trains)
+            mapped_name = _mapped_attribute_name(name, fixation_trains)
+            if mapped_name is not None:
+                kwargs['scanpath_attribute_mapping'][name] = mapped_name
+
+        scanpath_fixation_attribute_names = get_merged_attribute_list([list(f.scanpath_fixation_attributes) for f in fixation_trains])
+
+        kwargs['scanpath_fixation_attributes'] = {}
+        for name in scanpath_fixation_attribute_names:
+            kwargs['scanpath_fixation_attributes'][name] = concatenate_attributes(f.scanpath_fixation_attributes[name] for f in fixation_trains)
+            mapped_name = _mapped_attribute_name(name, fixation_trains)
+            if mapped_name is not None:
+                kwargs['scanpath_attribute_mapping'][name] = mapped_name
+
+        new_fixations = cls(**kwargs)
+
+        return new_fixations
+
+
+    def set_scanpath_attribute(self, name, data, fixation_attribute_name=None):
+        """Sets a scanpath attribute
+        name: name of scanpath attribute
+        data: data of scanpath attribute, has to be of same length as number of scanpaths
+        fixation_attribute: name of automatically generated fixation attribute if it should be different than scanpath attribute name
+        """
+        if not len(data) == len(self.train_xs):
+            raise ValueError(f'Length of scanpath attribute data has to match number of scanpaths: {len(data)} != {len(self.train_xs)}')
+        self.scanpath_attributes[name] = data
+
+        if fixation_attribute_name is not None:
+            self.scanpath_attribute_mapping[name] = fixation_attribute_name
+
+        new_attribute_name = self.scanpath_attribute_mapping.get(name, name)
+        if new_attribute_name in self.attributes and new_attribute_name not in self.auto_attributes:
+            raise ValueError("attribute name clash: {new_attribute_name}".format(new_attribute_name=new_attribute_name))
+
+        attribute_shape = np.asarray(data[0]).shape
+        self.attributes[new_attribute_name] = np.empty([len(self.train_xs)] + list(attribute_shape), dtype=data.dtype)
+        if new_attribute_name not in self.auto_attributes:
+            self.auto_attributes.append(new_attribute_name)
+
+        out_index = 0
+        for train_index in range(len(self.train_xs)):
+            fix_length = (1 - np.isnan(self.train_xs[train_index])).sum()
+            for _ in range(fix_length):
+                self.attributes[new_attribute_name][out_index] = self.scanpath_attributes[name][train_index]
+                out_index += 1
+
+    def copy(self):
+        copied_attributes = {}
+        for attribute_name in self.__attributes__:
+            if attribute_name in ['subjects', 'scanpath_index'] + self.auto_attributes:
+                continue
+            copied_attributes[attribute_name] = getattr(self, attribute_name).copy()
+        copied_scanpaths = FixationTrains(
+            train_xs=self.train_xs.copy(),
+            train_ys=self.train_ys.copy(),
+            train_ts=self.train_ts.copy(),
+            train_ns=self.train_ns.copy(),
+            train_subjects=self.train_subjects.copy(),
+            scanpath_attributes={
+                key: value.copy() for key, value in self.scanpath_attributes.items()
+            } if self.scanpath_attributes else None,
+            scanpath_fixation_attributes={
+                key: value.copy() for key, value in self.scanpath_fixation_attributes.items()
+            } if self.scanpath_fixation_attributes else None,
+            scanpath_attribute_mapping=dict(self.scanpath_attribute_mapping),
+            attributes=copied_attributes if copied_attributes else None,
+        )
+        return copied_scanpaths
+
+    def filter_fixation_trains(self, indices):
+        """
+        Create new fixations object which contains only the fixation trains indicated.
+        """
+
+        filtered_scanpaths = self.scanpaths[indices]
+
+        scanpath_indices = np.arange(len(self.scanpaths), dtype=int)[indices]
+        fixation_indices = np.in1d(self.scanpath_index, scanpath_indices)
+
+        attributes = {
+            attribute_name: getattr(self, attribute_name)[fixation_indices] for attribute_name in self.__attributes__ if attribute_name not in ['subjects', 'scanpath_index'] + self.auto_attributes
+        }
+
+        return type(self)(
+            train_xs=filtered_scanpaths.xs,
+            train_ys=filtered_scanpaths.ys,
+            train_ts=None, # filtered_scanpaths.ts,
+            train_ns=filtered_scanpaths.n,
+            train_subjects=None, # filtered_scanpaths.subject,
+            scanpath_attributes=filtered_scanpaths.scanpath_attributes,
+            scanpath_fixation_attributes=filtered_scanpaths.fixation_attributes,
+            scanpath_attribute_mapping=dict(filtered_scanpaths.attribute_mapping),
+            attributes=attributes,
+        )
+
+
+    def fixation_trains(self):
+        """Yield for every fixation train of the dataset:
+             xs, ys, ts, n, subject
+        """
+        for i in range(len(self.train_xs)):
+            length = (1 - np.isnan(self.train_xs[i])).sum()
+            xs = self.train_xs[i][:length]
+            ys = self.train_ys[i][:length]
+            ts = self.train_ts[i][:length]
+            n = self.train_ns[i]
+            subject = self.train_subjects[i]
+            yield xs, ys, ts, n, subject
+
+    @classmethod
+    def from_fixation_trains(cls, xs, ys, ts, ns, subjects, attributes=None, scanpath_attributes=None, scanpath_fixation_attributes=None, scanpath_attribute_mapping=None):
+        """ Create Fixation object from fixation trains.
+              - xs, ys, ts: Lists of array_like of double. Each array has to contain
+                    the data from one fixation train.
+              - ns, subjects: lists of int. ns has to contain the image index for
+                    each fixation train, subjects the subject index for each
+                    fixation train
+        """
+        maxlength = max([len(x_train) for x_train in xs])
+        train_xs = np.empty((len(xs), maxlength))
+        train_xs[:] = np.nan
+        train_ys = np.empty((len(xs), maxlength))
+        train_ys[:] = np.nan
+        train_ts = np.empty((len(xs), maxlength))
+        train_ts[:] = np.nan
+        train_ns = np.empty(len(xs), dtype=int)
+        train_subjects = np.empty(len(xs), dtype=int)
+
+        padded_scanpath_fixation_attributes = {}
+        if scanpath_fixation_attributes is not None:
+            for key, value in scanpath_fixation_attributes.items():
+                assert len(value) == len(xs)
+                if isinstance(value, list):
+                    padded_scanpath_fixation_attributes[key] = np.full((len(xs), maxlength), fill_value=np.nan, dtype=float)
+
+        for i in range(len(train_xs)):
+            length = len(xs[i])
+            train_xs[i, :length] = xs[i]
+            train_ys[i, :length] = ys[i]
+            train_ts[i, :length] = ts[i]
+            train_ns[i] = ns[i]
+            train_subjects[i] = subjects[i]
+            for attribute_name in padded_scanpath_fixation_attributes.keys():
+                padded_scanpath_fixation_attributes[attribute_name][i, :length] = scanpath_fixation_attributes[attribute_name][i]
+
+        return cls(
+            train_xs,
+            train_ys,
+            train_ts,
+            train_ns,
+            train_subjects,
+            attributes=attributes,
+            scanpath_attributes=scanpath_attributes,
+            scanpath_fixation_attributes=padded_scanpath_fixation_attributes,
+            scanpath_attribute_mapping=scanpath_attribute_mapping)
+
+    def generate_crossval(self, splitcount = 10):
+        train_xs_training = []
+        train_xs_eval = []
+        train_ys_training = []
+        train_ys_eval = []
+        train_ts_training = []
+        train_ts_eval = []
+        train_ns_training = []
+        train_ns_eval = []
+        train_subjects_training = []
+        train_subjects_eval = []
+        # We have to make the crossvalidation data
+        # reproducible. Therefore we use a
+        # RandomState with fixed seed for the shuffling.
+        rs = np.random.RandomState(42)
+        for n in range(self.n.max()+1):
+            inds = np.nonzero(self.train_ns == n)[0]
+            rs.shuffle(inds)
+            parts = np.array_split(inds, splitcount)
+            for eval_index in range(splitcount):
+                for index in range(splitcount):
+                    part = parts[index]
+                    if len(part) == 0:
+                        continue
+                    xs = self.train_xs[part]
+                    ys = self.train_ys[part]
+                    ts = self.train_ts[part]
+                    ns = self.train_ns[part]
+                    subjects = self.train_subjects[part]
+                    if index == eval_index:
+                        train_xs_eval.append(xs)
+                        train_ys_eval.append(ys)
+                        train_ts_eval.append(ts)
+                        train_ns_eval.append(ns * splitcount + eval_index)
+                        train_subjects_eval.append(subjects)
+                    else:
+                        train_xs_training.append(xs)
+                        train_ys_training.append(ys)
+                        train_ts_training.append(ts)
+                        train_ns_training.append(ns * splitcount + eval_index)
+                        train_subjects_training.append(subjects)
+        train_xs_eval = np.vstack(train_xs_eval)
+        train_ys_eval = np.vstack(train_ys_eval)
+        train_ts_eval = np.vstack(train_ts_eval)
+        train_ns_eval = np.hstack(train_ns_eval)
+        train_subjects_eval = np.hstack(train_subjects_eval)
+        train_xs_training = np.vstack(train_xs_training)
+        train_ys_training = np.vstack(train_ys_training)
+        train_ts_training = np.vstack(train_ts_training)
+        train_ns_training = np.hstack(train_ns_training)
+        train_subjects_training = np.hstack(train_subjects_training)
+        fixations_training = type(self).from_fixation_trains(train_xs_training, train_ys_training,
+                                                             train_ts_training, train_ns_training,
+                                                             train_subjects_training)
+        fixations_evaluation = type(self).from_fixation_trains(train_xs_eval, train_ys_eval,
+                                                               train_ts_eval, train_ns_eval,
+                                                               train_subjects_eval)
+        return fixations_training, fixations_evaluation
+
+    def shuffle_fixations(self, stimuli=None):
+        new_indices = []
+        new_ns = []
+        if stimuli:
+            widths = np.asarray([s[1] for s in stimuli.sizes]).astype(float)
+            heights = np.asarray([s[0] for s in stimuli.sizes]).astype(float)
+            x_factors = []
+            y_factors = []
+        for n in range(self.n.max()+1):
+            inds = np.nonzero(~(self.n == n))[0]
+            new_indices.extend(inds)
+            new_ns.extend([n]*len(inds))
+            if stimuli:
+                other_ns = self.n[inds]
+                x_factors.extend(stimuli.sizes[n][1]/widths[other_ns])
+                y_factors.extend(stimuli.sizes[n][0]/heights[other_ns])
+        new_fixations = self[new_indices]
+        new_fixations.n = np.asarray(new_ns)
+        if stimuli:
+            x_factors = np.asarray(x_factors)
+            y_factors = np.asarray(y_factors)
+            new_fixations.x = x_factors*new_fixations.x
+            new_fixations.x_hist = x_factors[:, np.newaxis]*new_fixations.x_hist
+            new_fixations.y = y_factors*new_fixations.y
+            new_fixations.y_hist = y_factors[:, np.newaxis]*new_fixations.y_hist
+        return new_fixations
+
+    def shuffle_fixation_trains(self, stimuli=None):
+        """
+
+        """
+        if not self.consistent_fixation_trains:
+            raise ValueError('Cannot shuffle fixation trains as fixation trains not consistent!')
+        train_xs = []
+        train_ys = []
+        train_ts = []
+        train_ns = []
+        train_subjects = []
+        for n in range(self.n.max()+1):
+            inds = ~(self.train_ns == n)
+            train_xs.append(self.train_xs[inds])
+            train_ys.append(self.train_ys[inds])
+            train_ts.append(self.train_ts[inds])
+            train_ns.append(np.ones(inds.sum(), dtype=int)*n)
+            train_subjects.append(self.train_subjects[inds])
+        train_xs = np.vstack(train_xs)
+        train_ys = np.vstack(train_ys)
+        train_ts = np.vstack(train_ts)
+        train_ns = np.hstack(train_ns)
+        train_subjects = np.hstack(train_subjects)
+        full_nonfixations = type(self)(train_xs, train_ys, train_ts, train_ns, train_subjects)
+        #self.full_nonfixations = full_nonfixations
+        return full_nonfixations
+
+    def generate_full_nonfixations(self, stimuli=None):
+        """
+        Generate nonfixational distribution from this
+        fixation object by using all fixation trains of
+        other images. The individual fixation trains
+        will be left intact.
+
+        .. warning::
+            This function operates on the fixation trains.
+            Therefore, for filtered fixation objects it
+            might return wrong results.
+        """
+        if self.full_nonfixations is not None:
+            print("Reusing nonfixations!")
+            return self.full_nonfixations
+        train_xs = []
+        train_ys = []
+        train_ts = []
+        train_ns = []
+        train_subjects = []
+        for n in range(self.n.max()+1):
+            inds = ~(self.train_ns == n)
+            train_xs.append(self.train_xs[inds])
+            train_ys.append(self.train_ys[inds])
+            train_ts.append(self.train_ts[inds])
+            train_ns.append(np.ones(inds.sum(), dtype=int)*n)
+            train_subjects.append(self.train_subjects[inds])
+        train_xs = np.vstack(train_xs)
+        train_ys = np.vstack(train_ys)
+        train_ts = np.vstack(train_ts)
+        train_ns = np.hstack(train_ns)
+        train_subjects = np.hstack(train_subjects)
+        full_nonfixations = type(self)(train_xs, train_ys, train_ts, train_ns, train_subjects)
+        self.full_nonfixations = full_nonfixations
+        return full_nonfixations
+
+    def generate_nonfixation_partners(self, seed=42):
+        """Generate nonfixational distribution from this
+        fixation object such that for every fixation in the
+        original fixation object there is a corresponding
+        fixation on the same image but on a different
+        position that comes from some other fixation.
+
+        This destroys the temporal ordering of the fixation
+        trains."""
+        train_xs = self.train_xs.copy()
+        train_ys = self.train_ys.copy()
+        train_ts = self.train_ts.copy()
+        train_ns = self.train_ns.copy()
+        train_subjects = self.train_subjects.copy()
+        rs = np.random.RandomState(seed)
+        for train_index in range(len(train_ns)):
+            n = train_ns[train_index]
+            inds = np.nonzero(self.n != n)[0]
+            length = (1 - np.isnan(train_xs[train_index])).sum()
+            for i in range(length):
+                new_fix_index = rs.choice(inds)
+                train_xs[train_index][i] = self.x[new_fix_index]
+                train_ys[train_index][i] = self.y[new_fix_index]
+                train_ts[train_index][i] = self.t[new_fix_index]
+        return type(self)(train_xs, train_ys, train_ts, train_ns, train_subjects)
+
+    @hdf5_wrapper(mode='w')
+    def to_hdf5(self, target):
+        """ Write fixationtrains to hdf5 file or hdf5 group
+        """
+
+        target.attrs['type'] = np.string_('FixationTrains')
+        target.attrs['version'] = np.string_('1.3')
+
+        variable_length_arrays = []
+
+        for attribute in ['train_xs', 'train_ys', 'train_ts', 'train_ns', 'train_subjects', 'train_lengths'] + self.__attributes__:
+            if attribute in ['subjects', 'scanpath_index'] + self.auto_attributes:
+                continue
+
+            data = getattr(self, attribute)
+            if isinstance(data, VariableLengthArray):
+                variable_length_arrays.append(attribute)
+                data = data._data
+            target.create_dataset(attribute, data=data)
+
+        saved_attributes = [attribute_name for attribute_name in self.__attributes__ if attribute_name not in self.auto_attributes]
+        target.attrs['__attributes__'] = np.string_(json.dumps(saved_attributes))
+
+        target.attrs['scanpath_attribute_mapping'] = np.string_(json.dumps(self.scanpath_attribute_mapping))
+
+        scanpath_attributes_group = target.create_group('scanpath_attributes')
+        for attribute_name, attribute_value in self.scanpath_attributes.items():
+            scanpath_attributes_group.create_dataset(attribute_name, data=attribute_value)
+        scanpath_attributes_group.attrs['__attributes__'] = np.string_(json.dumps(sorted(self.scanpath_attributes.keys())))
+
+        scanpath_fixation_attributes_group = target.create_group('scanpath_fixation_attributes')
+        for attribute_name, attribute_value in self.scanpath_fixation_attributes.items():
+            scanpath_fixation_attributes_group.create_dataset(attribute_name, data=attribute_value._data)
+        scanpath_fixation_attributes_group.attrs['__attributes__'] = np.string_(json.dumps(sorted(self.scanpath_fixation_attributes.keys())))
+
+
+    @classmethod
+    @hdf5_wrapper(mode='r')
+    def read_hdf5(cls, source):
+        """ Read train fixations from hdf5 file or hdf5 group """
+
+        data_type = decode_string(source.attrs['type'])
+        data_version = decode_string(source.attrs['version'])
+
+        if data_type != 'FixationTrains':
+            raise ValueError("Invalid type! Expected 'FixationTrains', got", data_type)
+
+        valid_versions = ['1.0', '1.1', '1.2', '1.3']
+        if data_version not in valid_versions:
+            raise ValueError("Invalid version! Expected one of {}, got {}".format(', '.join(valid_versions), data_version))
+
+        data = {key: source[key][...] for key in ['train_xs', 'train_ys', 'train_ts', 'train_ns', 'train_subjects']}
+
+        json_attributes = decode_string(source.attrs['__attributes__'])
+
+        attribute_names = list(json.loads(json_attributes))
+
+        attributes = {}
+        for key in attribute_names:
+            if key in ['subjects', 'scanpath_index']:
+                continue
+
+            attributes[key] = source[key][...]
+
+        data['attributes'] = attributes
+
+        if data_version < '1.1':
+            data['scanpath_attributes'] = {}
+        else:
+            data['scanpath_attributes'] = _load_attribute_dict_from_hdf5(source['scanpath_attributes'])
+
+        if data_version < '1.2':
+            data['scanpath_fixation_attributes'] = {}
+            data['scanpath_attribute_mapping'] = {}
+        else:
+            data['scanpath_fixation_attributes'] = _load_attribute_dict_from_hdf5(source['scanpath_fixation_attributes'])
+            data['scanpath_attribute_mapping'] = json.loads(decode_string(source.attrs['scanpath_attribute_mapping']))
+
+        if data_version < '1.3':
+            train_lengths = np.array([len(remove_trailing_nans(data['train_xs'][i])) for i in range(len(data['train_xs']))])
+        else:
+            train_lengths = source['train_lengths'][...]
+
+        data['scanpath_fixation_attributes'] = {
+            key: VariableLengthArray(value, train_lengths) for key, value in data['scanpath_fixation_attributes'].items()
+        }
+
+        fixations = cls(**data)
+
+        return fixations
+
+
+def _scanpath_from_fixation_index(fixations, fixation_index, scanpath_attribute_names, scanpath_fixation_attribute_names):
+    history_length = fixations.lengths[fixation_index]
+    xs = np.hstack((
+        fixations.x_hist[fixation_index, :history_length],
+        [fixations.x[fixation_index]]
+    ))
+
+    ys = np.hstack((
+        fixations.y_hist[fixation_index, :history_length],
+        [fixations.y[fixation_index]]
+    ))
+
+    ts = np.hstack((
+        fixations.t_hist[fixation_index, :history_length],
+        [fixations.t[fixation_index]]
+    ))
+
+    n = fixations.n[fixation_index]
+
+    subject = fixations.subjects[fixation_index]
+
+    scanpath_attributes = {
+        attribute: getattr(fixations, attribute)[fixation_index]
+        for attribute in scanpath_attribute_names
+    }
+
+    scanpath_fixation_attributes = {}
+    for attribute in scanpath_fixation_attribute_names:
+        attribute_value = np.hstack((
+            getattr(fixations, '{attribute}_hist'.format(attribute=attribute))[fixation_index, :history_length],
+            [getattr(fixations, attribute)[fixation_index]]
+        ))
+        scanpath_fixation_attributes[attribute] = attribute_value
+
+
+    return xs, ys, ts, n, subject, scanpath_attributes, scanpath_fixation_attributes
+
+
+def scanpaths_from_fixations(fixations, verbose=False):
+    """ reconstructs scanpaths (FixationTrains) from fixation which originally came from scanpaths.
+
+    when called as in
+
+        scanpaths, indices = scanpaths_from_fixations(fixations)
+
+    you will get scanpathhs[indices] == fixations.
+
+    :note
+        only works if the original scanpaths only used scanpath_attributes and scanpath_fixation_attribute,
+        but not attributes (which should not be used for scanpaths anyway).
+    """
+    if 'scanpath_index' not in fixations.__attributes__:
+        raise NotImplementedError("Fixations with scanpath_index attribute required!")
+
+    scanpath_xs = []
+    scanpath_ys = []
+    scanpath_ts = []
+    scanpath_ns = []
+    scanpath_subjects = []
+    __attributes__ = [attribute for attribute in fixations.__attributes__ if attribute != 'subjects' and attribute != 'scanpath_index' and not attribute.endswith('_hist')]
+    __scanpath_attributes__ = [attribute for attribute in __attributes__ if '{attribute}_hist'.format(attribute=attribute) not in fixations.__attributes__]
+    __scanpath_fixation_attributes__ = [attribute for attribute in __attributes__ if attribute not in __scanpath_attributes__]
+
+    scanpath_fixation_attributes = {attribute: [] for attribute in __scanpath_fixation_attributes__}
+    scanpath_attributes = {attribute: [] for attribute in __scanpath_attributes__}
+
+    attribute_shapes = {
+        attribute: getattr(fixations, attribute)[0].shape for attribute in __attributes__
+    }
+
+    __all_attributes__ = __attributes__ + ['{attribute}_hist'.format(attribute=attribute) for attribute in __scanpath_fixation_attributes__]
+
+    indices = np.ones(len(fixations), dtype=int) * -1
+    fixation_counter = 0
+
+    for scanpath_index in tqdm(sorted(np.unique(fixations.scanpath_index)), disable=not verbose):
+        scanpath_indices = fixations.scanpath_index == scanpath_index
+        scanpath_integer_indices = np.nonzero(scanpath_indices)[0]
+        lengths = fixations.lengths[scanpath_indices]
+
+        # build scanpath up to maximum length
+        maximum_length = max(lengths)
+        _index_of_maximum_length = np.argmax(lengths)
+        index_of_maximum_length = scanpath_integer_indices[_index_of_maximum_length]
+
+        xs, ys, ts, n, subject, this_scanpath_attributes, this_scanpath_fixation_attributes = _scanpath_from_fixation_index(
+            fixations,
+            index_of_maximum_length,
+            __scanpath_attributes__,
+            __scanpath_fixation_attributes__
+        )
+
+        scanpath_xs.append(xs)
+        scanpath_ys.append(ys)
+        scanpath_ts.append(ts)
+        scanpath_ns.append(n)
+        scanpath_subjects.append(subject)
+
+        for attribute, value in this_scanpath_fixation_attributes.items():
+            scanpath_fixation_attributes[attribute].append(value)
+        for attribute, value in this_scanpath_attributes.items():
+            scanpath_attributes[attribute].append(value)
+
+        # build indices
+
+        for index_in_scanpath in range(maximum_length+1):
+            if index_in_scanpath in lengths:
+                # add index to indices
+                index_in_fixations = scanpath_integer_indices[list(lengths).index(index_in_scanpath)]
+
+                # there might be one fixation multiple times in fixations.
+                indices_in_fixations = scanpath_integer_indices[lengths == index_in_scanpath]
+                indices[indices_in_fixations] = fixation_counter + index_in_scanpath
+
+        fixation_counter += len(xs)
+
+    scanpath_attributes = {
+        attribute: np.array(value) for attribute, value in scanpath_attributes.items()
+    }
+
+    return FixationTrains.from_fixation_trains(
+        xs=scanpath_xs,
+        ys=scanpath_ys,
+        ts=scanpath_ts,
+        ns=scanpath_ns,
+        subjects=scanpath_subjects,
+        scanpath_attributes=scanpath_attributes,
+        scanpath_fixation_attributes=scanpath_fixation_attributes
+    ), indices
\ No newline at end of file
diff --git a/pysaliency/datasets/scanpaths.py b/pysaliency/datasets/scanpaths.py
new file mode 100644
index 0000000..b004a30
--- /dev/null
+++ b/pysaliency/datasets/scanpaths.py
@@ -0,0 +1,177 @@
+import json
+from typing import Dict, List, Optional, Union
+
+import numpy as np
+from boltons.cacheutils import cached
+
+from ..utils.variable_length_array import VariableLengthArray, concatenate_variable_length_arrays
+from .utils import create_hdf5_dataset, decode_string, hdf5_wrapper, _load_attribute_dict_from_hdf5
+
+
+class Scanpaths(object):
+    """
+    Represents a collection of scanpaths.
+
+    Attributes:
+        xs (VariableLengthArray): The x-coordinates of the scanpaths.
+        ys (VariableLengthArray): The y-coordinates of the scanpaths.
+        n (np.ndarray): The image index
+        lengths (np.ndarray): The lengths of each scanpath.
+        scanpath_attributes (dict): Additional attributes associated with the scanpaths.
+        fixation_attributes (dict): Additional attributes associated with the fixations in the scanpaths.
+        attribute_mapping (dict): Mapping of attribute names to their corresponding values, will be used when creating `Fixations` instances from the `Scanpaths` instance.
+             for example {'durations': 'duration'}
+    """
+
+    xs: VariableLengthArray
+    ys: VariableLengthArray
+    n: np.ndarray
+
+    def __init__(self,
+                 xs: Union[np.ndarray, VariableLengthArray],
+                 ys: Union[np.ndarray, VariableLengthArray],
+                 n: np.ndarray,
+                 lengths=None,
+                 scanpath_attributes: Optional[Dict[str, np.ndarray]] = None,
+                 fixation_attributes: Optional[Dict[str, Union[np.ndarray, VariableLengthArray]]]=None,
+                 attribute_mapping=Dict[str, str]):
+
+        self.n = np.asarray(n)
+
+        if not isinstance(xs, VariableLengthArray):
+            self.xs = VariableLengthArray(xs, lengths)
+        else:
+            self.xs = xs
+
+        if lengths is not None:
+            if not np.all(self.xs.lengths == lengths):
+                raise ValueError("Lengths of xs and lengths do not match")
+
+        self.lengths = self.xs.lengths.copy()
+
+        self.ys = self._as_variable_length_array(ys)
+
+        if not len(self.xs) == len(self.ys) == len(self.n):
+            raise ValueError("Length of xs, ys, ts and n has to match")
+
+        # setting scanpath attributes
+
+        scanpath_attributes = scanpath_attributes or {}
+        self.scanpath_attributes = {key: np.array(value) for key, value in scanpath_attributes.items()}
+
+        for key, value in self.scanpath_attributes.items():
+            if not len(value) == len(self.xs):
+                raise ValueError(f"Length of scanpath attribute {key} has to match number of scanpaths, but got {len(value)} != {len(self.xs)}")
+
+        # setting fixation attributes
+
+        fixation_attributes = fixation_attributes or {}
+
+        self.fixation_attributes = {key: self._as_variable_length_array(value) for key, value in fixation_attributes.items()}
+
+        self.attribute_mapping = attribute_mapping or {}
+
+    def _check_lengths(self, other: VariableLengthArray):
+        if not len(self) == len(other):
+            raise ValueError("Length of scanpaths has to match")
+        if not np.all(self.lengths == other.lengths):
+            raise ValueError("Lengths of scanpaths have to match")
+
+    def _as_variable_length_array(self, data: Union[np.ndarray, VariableLengthArray]) -> VariableLengthArray:
+        if not isinstance(data, VariableLengthArray):
+            data = VariableLengthArray(data, self.lengths)
+
+        self._check_lengths(data)
+
+        return data
+
+    def __len__(self):
+        return len(self.xs)
+
+    @property
+    def ts(self) -> VariableLengthArray:
+        return self.fixation_attributes['ts']
+
+    @property
+    def subject(self) -> VariableLengthArray:
+        return self.scanpath_attributes['subject']
+
+
+    @hdf5_wrapper(mode='w')
+    def to_hdf5(self, target):
+        """ Write scanpaths to hdf5 file or hdf5 group
+        """
+        target.attrs['type'] = np.string_('Scanpaths')
+        target.attrs['version'] = np.string_('1.0')
+
+        target.create_dataset('xs', data=self.xs._data)
+        target.create_dataset('ys', data=self.ys._data)
+        target.create_dataset('n', data=self.n)
+        target.create_dataset('lengths', data=self.lengths)
+
+        scanpath_attributes_group = target.create_group('scanpath_attributes')
+        for attribute_name, attribute_value in self.scanpath_attributes.items():
+            create_hdf5_dataset(scanpath_attributes_group, attribute_name, attribute_value)
+        scanpath_attributes_group.attrs['__attributes__'] = np.string_(json.dumps(sorted(self.scanpath_attributes.keys())))
+
+        fixation_attributes_group = target.create_group('fixation_attributes')
+        for attribute_name, attribute_value in self.fixation_attributes.items():
+            fixation_attributes_group.create_dataset(attribute_name, data=attribute_value._data)
+        fixation_attributes_group.attrs['__attributes__'] = np.string_(json.dumps(sorted(self.fixation_attributes.keys())))
+
+        target.attrs['attribute_mapping'] = np.string_(json.dumps(self.attribute_mapping))
+
+
+    @classmethod
+    @hdf5_wrapper(mode='r')
+    def read_hdf5(cls, source):
+        data_type = decode_string(source.attrs['type'])
+        data_version = decode_string(source.attrs['version'])
+
+        if data_type != 'Scanpaths':
+            raise ValueError("Invalid type! Expected 'Scanpaths', got", data_type)
+
+        valid_versions = ['1.0']
+        if data_version not in valid_versions:
+            raise ValueError("Invalid version! Expected one of {}, got {}".format(', '.join(valid_versions), data_version))
+
+        lengths = source['lengths'][...]
+        xs = VariableLengthArray(source['xs'][...], lengths)
+        ys = VariableLengthArray(source['ys'][...], lengths)
+        n = source['n'][...]
+
+        scanpath_attributes = _load_attribute_dict_from_hdf5(source['scanpath_attributes'])
+
+        fixation_attributes_group = source['fixation_attributes']
+        json_attributes = fixation_attributes_group.attrs['__attributes__']
+        if not isinstance(json_attributes, str):
+            json_attributes = json_attributes.decode('utf8')
+        __attributes__ = json.loads(json_attributes)
+
+        fixation_attributes = {attribute: VariableLengthArray(fixation_attributes_group[attribute][...], lengths) for attribute in __attributes__}
+
+        return cls(
+            xs=xs,
+            ys=ys,
+            n=n,
+            lengths=lengths,
+            scanpath_attributes=scanpath_attributes,
+            fixation_attributes=fixation_attributes,
+            attribute_mapping=json.loads(decode_string(source.attrs['attribute_mapping']))
+        )
+
+    def __getitem__(self, index):
+        # TODO
+        # - integer to return single scanpath
+        # - 2d index to return single Fixation (for now via index of scanpath and index of fixation in scanpath)
+        # - 2d index array to return Fixations instance (for now via index of scanpath and index of fixation in scanpath)
+
+        if isinstance(index, tuple):
+            raise NotImplementedError("Not implemented yet")
+        elif isinstance(index, int):
+            raise NotImplementedError("Not implemented yet")
+        else:
+            return type(self)(self.xs[index], self.ys[index], self.n[index], self.lengths[index],
+                              scanpath_attributes={key: value[index] for key, value in self.scanpath_attributes.items()},
+                              fixation_attributes={key: value[index] for key, value in self.fixation_attributes.items()},
+                              attribute_mapping=self.attribute_mapping)
\ No newline at end of file
diff --git a/pysaliency/datasets/utils.py b/pysaliency/datasets/utils.py
index 10bb50b..379df8f 100644
--- a/pysaliency/datasets/utils.py
+++ b/pysaliency/datasets/utils.py
@@ -59,4 +59,70 @@ def create_hdf5_dataset(target, name, data):
             dtype=h5py.special_dtype(vlen=str)
         )
     else:
-        target.create_dataset(name, data=data)
\ No newline at end of file
+        target.create_dataset(name, data=data)
+
+
+def get_merged_attribute_list(attributes):
+    all_attributes = set(attributes[0])
+    common_attributes = set(attributes[0])
+
+    for _attributes in attributes[1:]:
+        all_attributes = all_attributes.union(_attributes)
+        common_attributes = common_attributes.intersection(_attributes)
+
+    if common_attributes != all_attributes:
+        lost_attributes = all_attributes.difference(common_attributes)
+        warnings.warn(f"Discarding attributes which are not present everywhere: {lost_attributes}", stacklevel=4)
+
+    return sorted(common_attributes)
+
+def _load_attribute_dict_from_hdf5(attribute_group):
+    json_attributes = attribute_group.attrs['__attributes__']
+    if not isinstance(json_attributes, str):
+        json_attributes = json_attributes.decode('utf8')
+    __attributes__ = json.loads(json_attributes)
+
+    attributes = {attribute: attribute_group[attribute][...] for attribute in __attributes__}
+    return attributes
+
+
+def get_merged_attribute_list(attributes):
+    all_attributes = set(attributes[0])
+    common_attributes = set(attributes[0])
+
+    for _attributes in attributes[1:]:
+        all_attributes = all_attributes.union(_attributes)
+        common_attributes = common_attributes.intersection(_attributes)
+
+    if common_attributes != all_attributes:
+        lost_attributes = all_attributes.difference(common_attributes)
+        warnings.warn(f"Discarding attributes which are not present everywhere: {lost_attributes}", stacklevel=4)
+
+    return sorted(common_attributes)
+
+
+def concatenate_attributes(attributes):
+    attributes = list(attributes)
+
+    if isinstance(attributes[0], VariableLengthArray):
+        return concatenate_variable_length_arrays(attributes)
+
+    attributes = [np.array(a) for a in attributes]
+    for a in attributes:
+        assert len(a.shape) == len(attributes[0].shape)
+
+    if len(attributes[0].shape) == 1:
+        return np.hstack(attributes)
+
+    else:
+        assert len(attributes[0].shape) == 2
+        max_cols = max(a.shape[1] for a in attributes)
+        padded_attributes = []
+        for a in attributes:
+            if a.shape[1] < max_cols:
+                padding = np.empty((a.shape[0], max_cols-a.shape[1]), dtype=a.dtype)
+                padding[:] = np.nan
+                padded_attributes.append(np.hstack((a, padding)))
+            else:
+                padded_attributes.append(a)
+        return np.vstack(padded_attributes)
\ No newline at end of file