From 6c57b5ca2b11c9a0f75b01eb59a2c81e00095bb6 Mon Sep 17 00:00:00 2001 From: Jeremy Magland Date: Wed, 4 Oct 2023 09:46:41 -0400 Subject: [PATCH 1/9] adjust eps for whitening in case of very small magnitude data --- src/spikeinterface/preprocessing/whiten.py | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/src/spikeinterface/preprocessing/whiten.py b/src/spikeinterface/preprocessing/whiten.py index cb2346ba68..c8eece2623 100644 --- a/src/spikeinterface/preprocessing/whiten.py +++ b/src/spikeinterface/preprocessing/whiten.py @@ -68,7 +68,7 @@ def __init__( M = np.asarray(M) else: W, M = compute_whitening_matrix( - recording, mode, random_chunk_kwargs, apply_mean, radius_um=radius_um, eps=1e-8 + recording, mode, random_chunk_kwargs, apply_mean, radius_um=radius_um ) BasePreprocessor.__init__(self, recording, dtype=dtype_) @@ -122,7 +122,7 @@ def get_traces(self, start_frame, end_frame, channel_indices): whiten = define_function_from_class(source_class=WhitenRecording, name="whiten") -def compute_whitening_matrix(recording, mode, random_chunk_kwargs, apply_mean, radius_um=None, eps=1e-8): +def compute_whitening_matrix(recording, mode, random_chunk_kwargs, apply_mean, radius_um=None): """ Compute whitening matrix @@ -167,6 +167,20 @@ def compute_whitening_matrix(recording, mode, random_chunk_kwargs, apply_mean, r cov = data.T @ data cov = cov / data.shape[0] + # Here we determine eps used below to avoid division by zero. + # Typically we can assume that data is in units of + # microvolts, but this is not always the case. When data + # is float type and scaled down to very small values, then the + # default eps=1e-6 can be too large, resulting in incorrect + # whitening. We therefore check to see if the data is float + # type and we estimate a more reasonable eps in the case + # where the data is on a scale less than 1. + eps = 1e-6 # the default + if data.dtype.kind == "f": + median_data_sqr = np.median(data ** 2) # use the square because cov (and hence S) scales as the square + if median_data_sqr < 1 and median_data_sqr > 0: + eps = max(1e-16, median_data_sqr * 1e-3) # use a small fraction of the median of the squared data + if mode == "global": U, S, Ut = np.linalg.svd(cov, full_matrices=True) W = (U @ np.diag(1 / np.sqrt(S + eps))) @ Ut From 39ef079b0d8778e27a0500830464259e38aa528b Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 4 Oct 2023 13:52:43 +0000 Subject: [PATCH 2/9] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/spikeinterface/preprocessing/whiten.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/src/spikeinterface/preprocessing/whiten.py b/src/spikeinterface/preprocessing/whiten.py index c8eece2623..49ca3c7926 100644 --- a/src/spikeinterface/preprocessing/whiten.py +++ b/src/spikeinterface/preprocessing/whiten.py @@ -67,9 +67,7 @@ def __init__( if M is not None: M = np.asarray(M) else: - W, M = compute_whitening_matrix( - recording, mode, random_chunk_kwargs, apply_mean, radius_um=radius_um - ) + W, M = compute_whitening_matrix(recording, mode, random_chunk_kwargs, apply_mean, radius_um=radius_um) BasePreprocessor.__init__(self, recording, dtype=dtype_) @@ -175,11 +173,11 @@ def compute_whitening_matrix(recording, mode, random_chunk_kwargs, apply_mean, r # whitening. We therefore check to see if the data is float # type and we estimate a more reasonable eps in the case # where the data is on a scale less than 1. - eps = 1e-6 # the default + eps = 1e-6 # the default if data.dtype.kind == "f": - median_data_sqr = np.median(data ** 2) # use the square because cov (and hence S) scales as the square + median_data_sqr = np.median(data**2) # use the square because cov (and hence S) scales as the square if median_data_sqr < 1 and median_data_sqr > 0: - eps = max(1e-16, median_data_sqr * 1e-3) # use a small fraction of the median of the squared data + eps = max(1e-16, median_data_sqr * 1e-3) # use a small fraction of the median of the squared data if mode == "global": U, S, Ut = np.linalg.svd(cov, full_matrices=True) From 2b7d2cea7b6a90b9f807b2322fee98ffa6464248 Mon Sep 17 00:00:00 2001 From: Jeremy Magland Date: Wed, 4 Oct 2023 11:22:11 -0400 Subject: [PATCH 3/9] adjust whiten tests to not use eps arg --- src/spikeinterface/preprocessing/tests/test_whiten.py | 6 +++--- src/spikeinterface/preprocessing/whiten.py | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/spikeinterface/preprocessing/tests/test_whiten.py b/src/spikeinterface/preprocessing/tests/test_whiten.py index 0848c1a176..40674a08f4 100644 --- a/src/spikeinterface/preprocessing/tests/test_whiten.py +++ b/src/spikeinterface/preprocessing/tests/test_whiten.py @@ -20,13 +20,13 @@ def test_whiten(): print(rec.get_channel_locations()) random_chunk_kwargs = {} - W, M = compute_whitening_matrix(rec, "global", random_chunk_kwargs, apply_mean=False, radius_um=None, eps=1e-8) + W, M = compute_whitening_matrix(rec, "global", random_chunk_kwargs, apply_mean=False, radius_um=None) print(W) print(M) with pytest.raises(AssertionError): - W, M = compute_whitening_matrix(rec, "local", random_chunk_kwargs, apply_mean=False, radius_um=None, eps=1e-8) - W, M = compute_whitening_matrix(rec, "local", random_chunk_kwargs, apply_mean=False, radius_um=25, eps=1e-8) + W, M = compute_whitening_matrix(rec, "local", random_chunk_kwargs, apply_mean=False, radius_um=None) + W, M = compute_whitening_matrix(rec, "local", random_chunk_kwargs, apply_mean=False, radius_um=25) # W must be sparse np.sum(W == 0) == 6 diff --git a/src/spikeinterface/preprocessing/whiten.py b/src/spikeinterface/preprocessing/whiten.py index c8eece2623..5300b97de3 100644 --- a/src/spikeinterface/preprocessing/whiten.py +++ b/src/spikeinterface/preprocessing/whiten.py @@ -171,11 +171,11 @@ def compute_whitening_matrix(recording, mode, random_chunk_kwargs, apply_mean, r # Typically we can assume that data is in units of # microvolts, but this is not always the case. When data # is float type and scaled down to very small values, then the - # default eps=1e-6 can be too large, resulting in incorrect + # default eps=1e-8 can be too large, resulting in incorrect # whitening. We therefore check to see if the data is float # type and we estimate a more reasonable eps in the case # where the data is on a scale less than 1. - eps = 1e-6 # the default + eps = 1e-8 # the default if data.dtype.kind == "f": median_data_sqr = np.median(data ** 2) # use the square because cov (and hence S) scales as the square if median_data_sqr < 1 and median_data_sqr > 0: From b2337d0d65497f1cb0cc6ef6c18f21c0b3fc5ac4 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 4 Oct 2023 15:24:27 +0000 Subject: [PATCH 4/9] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/spikeinterface/preprocessing/whiten.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/spikeinterface/preprocessing/whiten.py b/src/spikeinterface/preprocessing/whiten.py index 8e8bf3e9cb..afa3227e76 100644 --- a/src/spikeinterface/preprocessing/whiten.py +++ b/src/spikeinterface/preprocessing/whiten.py @@ -173,7 +173,7 @@ def compute_whitening_matrix(recording, mode, random_chunk_kwargs, apply_mean, r # whitening. We therefore check to see if the data is float # type and we estimate a more reasonable eps in the case # where the data is on a scale less than 1. - eps = 1e-8 # the default + eps = 1e-8 # the default if data.dtype.kind == "f": median_data_sqr = np.median(data**2) # use the square because cov (and hence S) scales as the square if median_data_sqr < 1 and median_data_sqr > 0: From 2f7176469b2b06b04baf36f8f8d2ff704cdf2095 Mon Sep 17 00:00:00 2001 From: Jeremy Magland Date: Wed, 4 Oct 2023 11:32:10 -0400 Subject: [PATCH 5/9] use "mV" in comment --- src/spikeinterface/preprocessing/whiten.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/spikeinterface/preprocessing/whiten.py b/src/spikeinterface/preprocessing/whiten.py index afa3227e76..7cd4766f2b 100644 --- a/src/spikeinterface/preprocessing/whiten.py +++ b/src/spikeinterface/preprocessing/whiten.py @@ -167,7 +167,7 @@ def compute_whitening_matrix(recording, mode, random_chunk_kwargs, apply_mean, r # Here we determine eps used below to avoid division by zero. # Typically we can assume that data is in units of - # microvolts, but this is not always the case. When data + # mV, but this is not always the case. When data # is float type and scaled down to very small values, then the # default eps=1e-8 can be too large, resulting in incorrect # whitening. We therefore check to see if the data is float From ba81bb111a992320427219fbdad3ff4c72ae9003 Mon Sep 17 00:00:00 2001 From: Jeremy Magland Date: Fri, 6 Oct 2023 17:10:07 -0400 Subject: [PATCH 6/9] Update src/spikeinterface/preprocessing/whiten.py Co-authored-by: Alessio Buccino --- src/spikeinterface/preprocessing/whiten.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/spikeinterface/preprocessing/whiten.py b/src/spikeinterface/preprocessing/whiten.py index 7cd4766f2b..cae7fe6334 100644 --- a/src/spikeinterface/preprocessing/whiten.py +++ b/src/spikeinterface/preprocessing/whiten.py @@ -166,8 +166,8 @@ def compute_whitening_matrix(recording, mode, random_chunk_kwargs, apply_mean, r cov = cov / data.shape[0] # Here we determine eps used below to avoid division by zero. - # Typically we can assume that data is in units of - # mV, but this is not always the case. When data + # Typically we can assume that data is either unscaled integers or in units of + # uV, but this is not always the case. When data # is float type and scaled down to very small values, then the # default eps=1e-8 can be too large, resulting in incorrect # whitening. We therefore check to see if the data is float From 0699434f6573f744dddd4a4ffe3c6b0b2f7e67d8 Mon Sep 17 00:00:00 2001 From: Alessio Buccino Date: Wed, 25 Oct 2023 17:50:25 +0200 Subject: [PATCH 7/9] Expose eps at the function level and clarify options --- src/spikeinterface/preprocessing/whiten.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/src/spikeinterface/preprocessing/whiten.py b/src/spikeinterface/preprocessing/whiten.py index cae7fe6334..54f6e0e903 100644 --- a/src/spikeinterface/preprocessing/whiten.py +++ b/src/spikeinterface/preprocessing/whiten.py @@ -29,6 +29,10 @@ class WhitenRecording(BasePreprocessor): Apply a scaling factor to fit the integer range. This is used when the dtype is an integer, so that the output is scaled. For example, a value of `int_scale=200` will scale the traces value to a standard deviation of 200. + eps : float, default 1e-8 + Small epsilon to regularize SVD. + If None, eps is estimated from the data. If the data is float type and scaled down to very small values, + then the eps is automatically set to a small fraction of the median of the squared data. W : 2d np.array Pre-computed whitening matrix, by default None M : 1d np.array or None @@ -52,6 +56,7 @@ def __init__( mode="global", radius_um=100.0, int_scale=None, + eps=1e-8, W=None, M=None, **random_chunk_kwargs, @@ -67,7 +72,9 @@ def __init__( if M is not None: M = np.asarray(M) else: - W, M = compute_whitening_matrix(recording, mode, random_chunk_kwargs, apply_mean, radius_um=radius_um) + W, M = compute_whitening_matrix( + recording, mode, random_chunk_kwargs, apply_mean, radius_um=radius_um, eps=eps + ) BasePreprocessor.__init__(self, recording, dtype=dtype_) @@ -120,7 +127,7 @@ def get_traces(self, start_frame, end_frame, channel_indices): whiten = define_function_from_class(source_class=WhitenRecording, name="whiten") -def compute_whitening_matrix(recording, mode, random_chunk_kwargs, apply_mean, radius_um=None): +def compute_whitening_matrix(recording, mode, random_chunk_kwargs, apply_mean, radius_um=None, eps=1e-8): """ Compute whitening matrix @@ -173,8 +180,7 @@ def compute_whitening_matrix(recording, mode, random_chunk_kwargs, apply_mean, r # whitening. We therefore check to see if the data is float # type and we estimate a more reasonable eps in the case # where the data is on a scale less than 1. - eps = 1e-8 # the default - if data.dtype.kind == "f": + if data.dtype.kind == "f" or eps is None: median_data_sqr = np.median(data**2) # use the square because cov (and hence S) scales as the square if median_data_sqr < 1 and median_data_sqr > 0: eps = max(1e-16, median_data_sqr * 1e-3) # use a small fraction of the median of the squared data From 4a6a6b91d4f8cd6c3e48ee719e58d81b8c9735e8 Mon Sep 17 00:00:00 2001 From: Alessio Buccino Date: Thu, 26 Oct 2023 11:58:02 +0200 Subject: [PATCH 8/9] Use eps=None by default --- src/spikeinterface/preprocessing/whiten.py | 39 ++++++++++++---------- 1 file changed, 21 insertions(+), 18 deletions(-) diff --git a/src/spikeinterface/preprocessing/whiten.py b/src/spikeinterface/preprocessing/whiten.py index 54f6e0e903..ec3a3e91a9 100644 --- a/src/spikeinterface/preprocessing/whiten.py +++ b/src/spikeinterface/preprocessing/whiten.py @@ -15,27 +15,27 @@ class WhitenRecording(BasePreprocessor): ---------- recording: RecordingExtractor The recording extractor to be whitened. - dtype: None or dtype + dtype: None or dtype, default: None If None the the parent dtype is kept. For integer dtype a int_scale must be also given. - mode: 'global' / 'local' + mode: 'global' / 'local', default: 'global' 'global' use the entire covariance matrix to compute the W matrix 'local' use local covariance (by radius) to compute the W matrix - radius_um: None or float + radius_um: None or float, default: None Used for mode = 'local' to get the neighborhood - apply_mean: bool + apply_mean: bool, default: False Substract or not the mean matrix M before the dot product with W. - int_scale : None or float + int_scale : None or float, default: None Apply a scaling factor to fit the integer range. This is used when the dtype is an integer, so that the output is scaled. For example, a value of `int_scale=200` will scale the traces value to a standard deviation of 200. - eps : float, default 1e-8 + eps : float or None, default: None Small epsilon to regularize SVD. - If None, eps is estimated from the data. If the data is float type and scaled down to very small values, - then the eps is automatically set to a small fraction of the median of the squared data. - W : 2d np.array - Pre-computed whitening matrix, by default None - M : 1d np.array or None + If None, eps is default to 1e-8. If the data is float type and scaled down to very small values, + then the eps is automatically set to a small fraction (1e-3) of the median of the squared data. + W : 2d np.array, default: None + Pre-computed whitening matrix + M : 1d np.array or None, default: None Pre-computed means. M can be None when previously computed with apply_mean=False **random_chunk_kwargs : Keyword arguments for `spikeinterface.core.get_random_data_chunk()` function @@ -56,7 +56,7 @@ def __init__( mode="global", radius_um=100.0, int_scale=None, - eps=1e-8, + eps=None, W=None, M=None, **random_chunk_kwargs, @@ -127,7 +127,7 @@ def get_traces(self, start_frame, end_frame, channel_indices): whiten = define_function_from_class(source_class=WhitenRecording, name="whiten") -def compute_whitening_matrix(recording, mode, random_chunk_kwargs, apply_mean, radius_um=None, eps=1e-8): +def compute_whitening_matrix(recording, mode, random_chunk_kwargs, apply_mean, radius_um=None, eps=None): """ Compute whitening matrix @@ -145,10 +145,11 @@ def compute_whitening_matrix(recording, mode, random_chunk_kwargs, apply_mean, r Keyword arguments for get_random_data_chunks() apply_mean : bool If True, the mean is removed prior to computing the covariance - radius_um : float, optional - Used for mode = 'local' to get the neighborhood, by default None - eps : float, optional - Small epsilon to regularize SVD, by default 1e-8 + radius_um : float, default: None + Used for mode = 'local' to get the neighborhood + eps : float, default: None + Small epsilon to regularize SVD. If None, the default is set to 1e-8, but if the data is float type and scaled + down to very small values, eps is automatically set to a small fraction (1e-3) of the median of the squared data. Returns ------- @@ -180,7 +181,9 @@ def compute_whitening_matrix(recording, mode, random_chunk_kwargs, apply_mean, r # whitening. We therefore check to see if the data is float # type and we estimate a more reasonable eps in the case # where the data is on a scale less than 1. - if data.dtype.kind == "f" or eps is None: + if eps is None: + eps = 1e-8 + if data.dtype.kind == "f": median_data_sqr = np.median(data**2) # use the square because cov (and hence S) scales as the square if median_data_sqr < 1 and median_data_sqr > 0: eps = max(1e-16, median_data_sqr * 1e-3) # use a small fraction of the median of the squared data From d93ba0fe48e3395fc940d0a0f05e483631dc5651 Mon Sep 17 00:00:00 2001 From: Alessio Buccino Date: Thu, 26 Oct 2023 12:19:42 +0200 Subject: [PATCH 9/9] Update src/spikeinterface/preprocessing/whiten.py --- src/spikeinterface/preprocessing/whiten.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/spikeinterface/preprocessing/whiten.py b/src/spikeinterface/preprocessing/whiten.py index ec3a3e91a9..ac80f58182 100644 --- a/src/spikeinterface/preprocessing/whiten.py +++ b/src/spikeinterface/preprocessing/whiten.py @@ -31,7 +31,7 @@ class WhitenRecording(BasePreprocessor): For example, a value of `int_scale=200` will scale the traces value to a standard deviation of 200. eps : float or None, default: None Small epsilon to regularize SVD. - If None, eps is default to 1e-8. If the data is float type and scaled down to very small values, + If None, eps is defaulted to 1e-8. If the data is float type and scaled down to very small values, then the eps is automatically set to a small fraction (1e-3) of the median of the squared data. W : 2d np.array, default: None Pre-computed whitening matrix