From 6c57b5ca2b11c9a0f75b01eb59a2c81e00095bb6 Mon Sep 17 00:00:00 2001
From: Jeremy Magland <jmagland@flatironinstitute.org>
Date: Wed, 4 Oct 2023 09:46:41 -0400
Subject: [PATCH 1/9] adjust eps for whitening in case of very small magnitude
 data

---
 src/spikeinterface/preprocessing/whiten.py | 18 ++++++++++++++++--
 1 file changed, 16 insertions(+), 2 deletions(-)

diff --git a/src/spikeinterface/preprocessing/whiten.py b/src/spikeinterface/preprocessing/whiten.py
index cb2346ba68..c8eece2623 100644
--- a/src/spikeinterface/preprocessing/whiten.py
+++ b/src/spikeinterface/preprocessing/whiten.py
@@ -68,7 +68,7 @@ def __init__(
                 M = np.asarray(M)
         else:
             W, M = compute_whitening_matrix(
-                recording, mode, random_chunk_kwargs, apply_mean, radius_um=radius_um, eps=1e-8
+                recording, mode, random_chunk_kwargs, apply_mean, radius_um=radius_um
             )
 
         BasePreprocessor.__init__(self, recording, dtype=dtype_)
@@ -122,7 +122,7 @@ def get_traces(self, start_frame, end_frame, channel_indices):
 whiten = define_function_from_class(source_class=WhitenRecording, name="whiten")
 
 
-def compute_whitening_matrix(recording, mode, random_chunk_kwargs, apply_mean, radius_um=None, eps=1e-8):
+def compute_whitening_matrix(recording, mode, random_chunk_kwargs, apply_mean, radius_um=None):
     """
     Compute whitening matrix
 
@@ -167,6 +167,20 @@ def compute_whitening_matrix(recording, mode, random_chunk_kwargs, apply_mean, r
     cov = data.T @ data
     cov = cov / data.shape[0]
 
+    # Here we determine eps used below to avoid division by zero.
+    # Typically we can assume that data is in units of
+    # microvolts, but this is not always the case. When data
+    # is float type and scaled down to very small values, then the
+    # default eps=1e-6 can be too large, resulting in incorrect
+    # whitening. We therefore check to see if the data is float
+    # type and we estimate a more reasonable eps in the case
+    # where the data is on a scale less than 1.
+    eps = 1e-6 # the default
+    if data.dtype.kind == "f":
+        median_data_sqr = np.median(data ** 2) # use the square because cov (and hence S) scales as the square
+        if median_data_sqr < 1 and median_data_sqr > 0:
+            eps = max(1e-16, median_data_sqr * 1e-3) # use a small fraction of the median of the squared data
+
     if mode == "global":
         U, S, Ut = np.linalg.svd(cov, full_matrices=True)
         W = (U @ np.diag(1 / np.sqrt(S + eps))) @ Ut

From 39ef079b0d8778e27a0500830464259e38aa528b Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Wed, 4 Oct 2023 13:52:43 +0000
Subject: [PATCH 2/9] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 src/spikeinterface/preprocessing/whiten.py | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/src/spikeinterface/preprocessing/whiten.py b/src/spikeinterface/preprocessing/whiten.py
index c8eece2623..49ca3c7926 100644
--- a/src/spikeinterface/preprocessing/whiten.py
+++ b/src/spikeinterface/preprocessing/whiten.py
@@ -67,9 +67,7 @@ def __init__(
             if M is not None:
                 M = np.asarray(M)
         else:
-            W, M = compute_whitening_matrix(
-                recording, mode, random_chunk_kwargs, apply_mean, radius_um=radius_um
-            )
+            W, M = compute_whitening_matrix(recording, mode, random_chunk_kwargs, apply_mean, radius_um=radius_um)
 
         BasePreprocessor.__init__(self, recording, dtype=dtype_)
 
@@ -175,11 +173,11 @@ def compute_whitening_matrix(recording, mode, random_chunk_kwargs, apply_mean, r
     # whitening. We therefore check to see if the data is float
     # type and we estimate a more reasonable eps in the case
     # where the data is on a scale less than 1.
-    eps = 1e-6 # the default
+    eps = 1e-6  # the default
     if data.dtype.kind == "f":
-        median_data_sqr = np.median(data ** 2) # use the square because cov (and hence S) scales as the square
+        median_data_sqr = np.median(data**2)  # use the square because cov (and hence S) scales as the square
         if median_data_sqr < 1 and median_data_sqr > 0:
-            eps = max(1e-16, median_data_sqr * 1e-3) # use a small fraction of the median of the squared data
+            eps = max(1e-16, median_data_sqr * 1e-3)  # use a small fraction of the median of the squared data
 
     if mode == "global":
         U, S, Ut = np.linalg.svd(cov, full_matrices=True)

From 2b7d2cea7b6a90b9f807b2322fee98ffa6464248 Mon Sep 17 00:00:00 2001
From: Jeremy Magland <jmagland@flatironinstitute.org>
Date: Wed, 4 Oct 2023 11:22:11 -0400
Subject: [PATCH 3/9] adjust whiten tests to not use eps arg

---
 src/spikeinterface/preprocessing/tests/test_whiten.py | 6 +++---
 src/spikeinterface/preprocessing/whiten.py            | 4 ++--
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/spikeinterface/preprocessing/tests/test_whiten.py b/src/spikeinterface/preprocessing/tests/test_whiten.py
index 0848c1a176..40674a08f4 100644
--- a/src/spikeinterface/preprocessing/tests/test_whiten.py
+++ b/src/spikeinterface/preprocessing/tests/test_whiten.py
@@ -20,13 +20,13 @@ def test_whiten():
 
     print(rec.get_channel_locations())
     random_chunk_kwargs = {}
-    W, M = compute_whitening_matrix(rec, "global", random_chunk_kwargs, apply_mean=False, radius_um=None, eps=1e-8)
+    W, M = compute_whitening_matrix(rec, "global", random_chunk_kwargs, apply_mean=False, radius_um=None)
     print(W)
     print(M)
 
     with pytest.raises(AssertionError):
-        W, M = compute_whitening_matrix(rec, "local", random_chunk_kwargs, apply_mean=False, radius_um=None, eps=1e-8)
-    W, M = compute_whitening_matrix(rec, "local", random_chunk_kwargs, apply_mean=False, radius_um=25, eps=1e-8)
+        W, M = compute_whitening_matrix(rec, "local", random_chunk_kwargs, apply_mean=False, radius_um=None)
+    W, M = compute_whitening_matrix(rec, "local", random_chunk_kwargs, apply_mean=False, radius_um=25)
     # W must be sparse
     np.sum(W == 0) == 6
 
diff --git a/src/spikeinterface/preprocessing/whiten.py b/src/spikeinterface/preprocessing/whiten.py
index c8eece2623..5300b97de3 100644
--- a/src/spikeinterface/preprocessing/whiten.py
+++ b/src/spikeinterface/preprocessing/whiten.py
@@ -171,11 +171,11 @@ def compute_whitening_matrix(recording, mode, random_chunk_kwargs, apply_mean, r
     # Typically we can assume that data is in units of
     # microvolts, but this is not always the case. When data
     # is float type and scaled down to very small values, then the
-    # default eps=1e-6 can be too large, resulting in incorrect
+    # default eps=1e-8 can be too large, resulting in incorrect
     # whitening. We therefore check to see if the data is float
     # type and we estimate a more reasonable eps in the case
     # where the data is on a scale less than 1.
-    eps = 1e-6 # the default
+    eps = 1e-8 # the default
     if data.dtype.kind == "f":
         median_data_sqr = np.median(data ** 2) # use the square because cov (and hence S) scales as the square
         if median_data_sqr < 1 and median_data_sqr > 0:

From b2337d0d65497f1cb0cc6ef6c18f21c0b3fc5ac4 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Wed, 4 Oct 2023 15:24:27 +0000
Subject: [PATCH 4/9] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 src/spikeinterface/preprocessing/whiten.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/spikeinterface/preprocessing/whiten.py b/src/spikeinterface/preprocessing/whiten.py
index 8e8bf3e9cb..afa3227e76 100644
--- a/src/spikeinterface/preprocessing/whiten.py
+++ b/src/spikeinterface/preprocessing/whiten.py
@@ -173,7 +173,7 @@ def compute_whitening_matrix(recording, mode, random_chunk_kwargs, apply_mean, r
     # whitening. We therefore check to see if the data is float
     # type and we estimate a more reasonable eps in the case
     # where the data is on a scale less than 1.
-    eps = 1e-8 # the default
+    eps = 1e-8  # the default
     if data.dtype.kind == "f":
         median_data_sqr = np.median(data**2)  # use the square because cov (and hence S) scales as the square
         if median_data_sqr < 1 and median_data_sqr > 0:

From 2f7176469b2b06b04baf36f8f8d2ff704cdf2095 Mon Sep 17 00:00:00 2001
From: Jeremy Magland <magland@users.noreply.github.com>
Date: Wed, 4 Oct 2023 11:32:10 -0400
Subject: [PATCH 5/9] use "mV" in comment

---
 src/spikeinterface/preprocessing/whiten.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/spikeinterface/preprocessing/whiten.py b/src/spikeinterface/preprocessing/whiten.py
index afa3227e76..7cd4766f2b 100644
--- a/src/spikeinterface/preprocessing/whiten.py
+++ b/src/spikeinterface/preprocessing/whiten.py
@@ -167,7 +167,7 @@ def compute_whitening_matrix(recording, mode, random_chunk_kwargs, apply_mean, r
 
     # Here we determine eps used below to avoid division by zero.
     # Typically we can assume that data is in units of
-    # microvolts, but this is not always the case. When data
+    # mV, but this is not always the case. When data
     # is float type and scaled down to very small values, then the
     # default eps=1e-8 can be too large, resulting in incorrect
     # whitening. We therefore check to see if the data is float

From ba81bb111a992320427219fbdad3ff4c72ae9003 Mon Sep 17 00:00:00 2001
From: Jeremy Magland <magland@users.noreply.github.com>
Date: Fri, 6 Oct 2023 17:10:07 -0400
Subject: [PATCH 6/9] Update src/spikeinterface/preprocessing/whiten.py

Co-authored-by: Alessio Buccino <alejoe9187@gmail.com>
---
 src/spikeinterface/preprocessing/whiten.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/spikeinterface/preprocessing/whiten.py b/src/spikeinterface/preprocessing/whiten.py
index 7cd4766f2b..cae7fe6334 100644
--- a/src/spikeinterface/preprocessing/whiten.py
+++ b/src/spikeinterface/preprocessing/whiten.py
@@ -166,8 +166,8 @@ def compute_whitening_matrix(recording, mode, random_chunk_kwargs, apply_mean, r
     cov = cov / data.shape[0]
 
     # Here we determine eps used below to avoid division by zero.
-    # Typically we can assume that data is in units of
-    # mV, but this is not always the case. When data
+    # Typically we can assume that data is either unscaled integers or in units of
+    # uV, but this is not always the case. When data
     # is float type and scaled down to very small values, then the
     # default eps=1e-8 can be too large, resulting in incorrect
     # whitening. We therefore check to see if the data is float

From 0699434f6573f744dddd4a4ffe3c6b0b2f7e67d8 Mon Sep 17 00:00:00 2001
From: Alessio Buccino <alejoe9187@gmail.com>
Date: Wed, 25 Oct 2023 17:50:25 +0200
Subject: [PATCH 7/9] Expose eps at the function level and clarify options

---
 src/spikeinterface/preprocessing/whiten.py | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/src/spikeinterface/preprocessing/whiten.py b/src/spikeinterface/preprocessing/whiten.py
index cae7fe6334..54f6e0e903 100644
--- a/src/spikeinterface/preprocessing/whiten.py
+++ b/src/spikeinterface/preprocessing/whiten.py
@@ -29,6 +29,10 @@ class WhitenRecording(BasePreprocessor):
         Apply a scaling factor to fit the integer range.
         This is used when the dtype is an integer, so that the output is scaled.
         For example, a value of `int_scale=200` will scale the traces value to a standard deviation of 200.
+    eps : float, default 1e-8
+        Small epsilon to regularize SVD.
+        If None, eps is estimated from the data. If the data is float type and scaled down to very small values,
+        then the eps is automatically set to a small fraction of the median of the squared data.
     W : 2d np.array
         Pre-computed whitening matrix, by default None
     M : 1d np.array or None
@@ -52,6 +56,7 @@ def __init__(
         mode="global",
         radius_um=100.0,
         int_scale=None,
+        eps=1e-8,
         W=None,
         M=None,
         **random_chunk_kwargs,
@@ -67,7 +72,9 @@ def __init__(
             if M is not None:
                 M = np.asarray(M)
         else:
-            W, M = compute_whitening_matrix(recording, mode, random_chunk_kwargs, apply_mean, radius_um=radius_um)
+            W, M = compute_whitening_matrix(
+                recording, mode, random_chunk_kwargs, apply_mean, radius_um=radius_um, eps=eps
+            )
 
         BasePreprocessor.__init__(self, recording, dtype=dtype_)
 
@@ -120,7 +127,7 @@ def get_traces(self, start_frame, end_frame, channel_indices):
 whiten = define_function_from_class(source_class=WhitenRecording, name="whiten")
 
 
-def compute_whitening_matrix(recording, mode, random_chunk_kwargs, apply_mean, radius_um=None):
+def compute_whitening_matrix(recording, mode, random_chunk_kwargs, apply_mean, radius_um=None, eps=1e-8):
     """
     Compute whitening matrix
 
@@ -173,8 +180,7 @@ def compute_whitening_matrix(recording, mode, random_chunk_kwargs, apply_mean, r
     # whitening. We therefore check to see if the data is float
     # type and we estimate a more reasonable eps in the case
     # where the data is on a scale less than 1.
-    eps = 1e-8  # the default
-    if data.dtype.kind == "f":
+    if data.dtype.kind == "f" or eps is None:
         median_data_sqr = np.median(data**2)  # use the square because cov (and hence S) scales as the square
         if median_data_sqr < 1 and median_data_sqr > 0:
             eps = max(1e-16, median_data_sqr * 1e-3)  # use a small fraction of the median of the squared data

From 4a6a6b91d4f8cd6c3e48ee719e58d81b8c9735e8 Mon Sep 17 00:00:00 2001
From: Alessio Buccino <alejoe9187@gmail.com>
Date: Thu, 26 Oct 2023 11:58:02 +0200
Subject: [PATCH 8/9] Use eps=None by default

---
 src/spikeinterface/preprocessing/whiten.py | 39 ++++++++++++----------
 1 file changed, 21 insertions(+), 18 deletions(-)

diff --git a/src/spikeinterface/preprocessing/whiten.py b/src/spikeinterface/preprocessing/whiten.py
index 54f6e0e903..ec3a3e91a9 100644
--- a/src/spikeinterface/preprocessing/whiten.py
+++ b/src/spikeinterface/preprocessing/whiten.py
@@ -15,27 +15,27 @@ class WhitenRecording(BasePreprocessor):
     ----------
     recording: RecordingExtractor
         The recording extractor to be whitened.
-    dtype: None or dtype
+    dtype: None or dtype, default: None
         If None the the parent dtype is kept.
         For integer dtype a int_scale must be also given.
-    mode: 'global' / 'local'
+    mode: 'global' / 'local', default: 'global'
         'global' use the entire covariance matrix to compute the W matrix
         'local' use local covariance (by radius) to compute the W matrix
-    radius_um: None or float
+    radius_um: None or float, default: None
         Used for mode = 'local' to get the neighborhood
-    apply_mean: bool
+    apply_mean: bool, default: False
         Substract or not the mean matrix M before the dot product with W.
-    int_scale : None or float
+    int_scale : None or float, default: None
         Apply a scaling factor to fit the integer range.
         This is used when the dtype is an integer, so that the output is scaled.
         For example, a value of `int_scale=200` will scale the traces value to a standard deviation of 200.
-    eps : float, default 1e-8
+    eps : float or None, default: None
         Small epsilon to regularize SVD.
-        If None, eps is estimated from the data. If the data is float type and scaled down to very small values,
-        then the eps is automatically set to a small fraction of the median of the squared data.
-    W : 2d np.array
-        Pre-computed whitening matrix, by default None
-    M : 1d np.array or None
+        If None, eps is default to 1e-8. If the data is float type and scaled down to very small values,
+        then the eps is automatically set to a small fraction (1e-3) of the median of the squared data.
+    W : 2d np.array, default: None
+        Pre-computed whitening matrix
+    M : 1d np.array or None, default: None
         Pre-computed means.
         M can be None when previously computed with apply_mean=False
     **random_chunk_kwargs : Keyword arguments for `spikeinterface.core.get_random_data_chunk()` function
@@ -56,7 +56,7 @@ def __init__(
         mode="global",
         radius_um=100.0,
         int_scale=None,
-        eps=1e-8,
+        eps=None,
         W=None,
         M=None,
         **random_chunk_kwargs,
@@ -127,7 +127,7 @@ def get_traces(self, start_frame, end_frame, channel_indices):
 whiten = define_function_from_class(source_class=WhitenRecording, name="whiten")
 
 
-def compute_whitening_matrix(recording, mode, random_chunk_kwargs, apply_mean, radius_um=None, eps=1e-8):
+def compute_whitening_matrix(recording, mode, random_chunk_kwargs, apply_mean, radius_um=None, eps=None):
     """
     Compute whitening matrix
 
@@ -145,10 +145,11 @@ def compute_whitening_matrix(recording, mode, random_chunk_kwargs, apply_mean, r
         Keyword arguments for  get_random_data_chunks()
     apply_mean : bool
         If True, the mean is removed prior to computing the covariance
-    radius_um : float, optional
-        Used for mode = 'local' to get the neighborhood, by default None
-    eps : float, optional
-        Small epsilon to regularize SVD, by default 1e-8
+    radius_um : float, default: None
+        Used for mode = 'local' to get the neighborhood
+    eps : float, default: None
+        Small epsilon to regularize SVD. If None, the default is set to 1e-8, but if the data is float type and scaled
+        down to very small values, eps is automatically set to a small fraction (1e-3) of the median of the squared data.
 
     Returns
     -------
@@ -180,7 +181,9 @@ def compute_whitening_matrix(recording, mode, random_chunk_kwargs, apply_mean, r
     # whitening. We therefore check to see if the data is float
     # type and we estimate a more reasonable eps in the case
     # where the data is on a scale less than 1.
-    if data.dtype.kind == "f" or eps is None:
+    if eps is None:
+        eps = 1e-8
+    if data.dtype.kind == "f":
         median_data_sqr = np.median(data**2)  # use the square because cov (and hence S) scales as the square
         if median_data_sqr < 1 and median_data_sqr > 0:
             eps = max(1e-16, median_data_sqr * 1e-3)  # use a small fraction of the median of the squared data

From d93ba0fe48e3395fc940d0a0f05e483631dc5651 Mon Sep 17 00:00:00 2001
From: Alessio Buccino <alejoe9187@gmail.com>
Date: Thu, 26 Oct 2023 12:19:42 +0200
Subject: [PATCH 9/9] Update src/spikeinterface/preprocessing/whiten.py

---
 src/spikeinterface/preprocessing/whiten.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/spikeinterface/preprocessing/whiten.py b/src/spikeinterface/preprocessing/whiten.py
index ec3a3e91a9..ac80f58182 100644
--- a/src/spikeinterface/preprocessing/whiten.py
+++ b/src/spikeinterface/preprocessing/whiten.py
@@ -31,7 +31,7 @@ class WhitenRecording(BasePreprocessor):
         For example, a value of `int_scale=200` will scale the traces value to a standard deviation of 200.
     eps : float or None, default: None
         Small epsilon to regularize SVD.
-        If None, eps is default to 1e-8. If the data is float type and scaled down to very small values,
+        If None, eps is defaulted to 1e-8. If the data is float type and scaled down to very small values,
         then the eps is automatically set to a small fraction (1e-3) of the median of the squared data.
     W : 2d np.array, default: None
         Pre-computed whitening matrix