From 68fe2ba08f9c41b3feaf7866fee934291d78f7ea Mon Sep 17 00:00:00 2001
From: Pierre Yger <pierre.yger@gmail.com>
Date: Tue, 26 Sep 2023 09:26:40 +0200
Subject: [PATCH 01/25] OMP with SVD decomposition

---
 .../sortingcomponents/matching/circus.py      | 307 ++++++++++++++++++
 .../sortingcomponents/matching/method_list.py |   3 +-
 2 files changed, 309 insertions(+), 1 deletion(-)

diff --git a/src/spikeinterface/sortingcomponents/matching/circus.py b/src/spikeinterface/sortingcomponents/matching/circus.py
index a19e7b71b5..e86c913976 100644
--- a/src/spikeinterface/sortingcomponents/matching/circus.py
+++ b/src/spikeinterface/sortingcomponents/matching/circus.py
@@ -478,6 +478,313 @@ def main_function(cls, traces, d):
         return spikes
 
 
+class CircusOMPSVDPeeler(BaseTemplateMatchingEngine):
+    """
+    Orthogonal Matching Pursuit inspired from Spyking Circus sorter
+
+    https://elifesciences.org/articles/34518
+
+    This is an Orthogonal Template Matching algorithm. For speed and
+    memory optimization, templates are automatically sparsified. Signal
+    is convolved with the templates, and as long as some scalar products
+    are higher than a given threshold, we use a Cholesky decomposition
+    to compute the optimal amplitudes needed to reconstruct the signal.
+
+    IMPORTANT NOTE: small chunks are more efficient for such Peeler,
+    consider using 100ms chunk
+
+    Parameters
+    ----------
+    amplitude: tuple
+        (Minimal, Maximal) amplitudes allowed for every template
+    omp_min_sps: float
+        Stopping criteria of the OMP algorithm, in percentage of the norm
+    noise_levels: array
+        The noise levels, for every channels. If None, they will be automatically
+        computed
+    random_chunk_kwargs: dict
+        Parameters for computing noise levels, if not provided (sub optimal)
+    sparse_kwargs: dict
+        Parameters to extract a sparsity mask from the waveform_extractor, if not
+        already sparse.
+    -----
+    """
+
+    _default_params = {
+        "amplitudes": [0.6, 2],
+        "omp_min_sps": 0.1,
+        "waveform_extractor": None,
+        "templates": None,
+        "overlaps": None,
+        "norms": None,
+        "random_chunk_kwargs": {},
+        "noise_levels": None,
+        "rank" : 3,
+        "sparse_kwargs": {"method": "ptp", "threshold": 1},
+        "ignored_ids": [],
+        "vicinity": 0,
+    }
+
+    @classmethod
+    def _prepare_templates(cls, d):
+        waveform_extractor = d["waveform_extractor"]
+        num_templates = len(d["waveform_extractor"].sorting.unit_ids)
+
+        if not waveform_extractor.is_sparse():
+            sparsity = compute_sparsity(waveform_extractor, **d["sparse_kwargs"]).mask
+        else:
+            sparsity = waveform_extractor.sparsity.mask
+
+        templates = waveform_extractor.get_all_templates(mode="median").copy()
+
+        temporal, singular, spatial = np.linalg.svd(templates, full_matrices=False)
+
+        # Keep only the strongest components
+        rank = d['rank']
+        d['templates'] = {}
+        d["norms"] = np.zeros(num_templates, dtype=np.float32)
+        d['sparsities'] = {}
+        d["norms"] = np.linalg.norm(templates, axis=(1, 2))
+        for i in range(num_templates):
+            d['sparsities'][i] = np.arange(templates.shape[2])  
+            d['templates'][i] = templates[i] / d["norms"][i]
+
+        temporal = temporal[:, :, :rank]
+        d["temporal"] = np.flip(temporal, axis=1)
+        d["singular"] = singular[:, :rank]
+        d["spatial"] = spatial[:, :rank, :]
+        
+        d['temporal'] /= d['norms'][:, np.newaxis, np.newaxis]
+        
+        d["spatial"] = np.moveaxis(d['spatial'][:, :rank, :], [0, 1, 2], [1, 0, 2])
+        d['temporal'] = np.moveaxis(d['temporal'][:, :, :rank], [0, 1, 2], [1, 2, 0])
+        d['singular'] = d['singular'].T[:, :, np.newaxis]
+        return d
+
+    @classmethod
+    def initialize_and_check_kwargs(cls, recording, kwargs):
+        d = cls._default_params.copy()
+        d.update(kwargs)
+
+        # assert isinstance(d['waveform_extractor'], WaveformExtractor)
+
+        for v in ["omp_min_sps"]:
+            assert (d[v] >= 0) and (d[v] <= 1), f"{v} should be in [0, 1]"
+
+        d["num_channels"] = d["waveform_extractor"].recording.get_num_channels()
+        d["num_samples"] = d["waveform_extractor"].nsamples
+        d["nbefore"] = d["waveform_extractor"].nbefore
+        d["nafter"] = d["waveform_extractor"].nafter
+        d["sampling_frequency"] = d["waveform_extractor"].recording.get_sampling_frequency()
+        d["vicinity"] *= d["num_samples"]
+
+        if d["noise_levels"] is None:
+            print("CircusOMPPeeler : noise should be computed outside")
+            d["noise_levels"] = get_noise_levels(recording, **d["random_chunk_kwargs"], return_scaled=False)
+
+        if d["templates"] is None:
+            d = cls._prepare_templates(d)
+        else:
+            for key in ["norms", "sparsities"]:
+                assert d[key] is not None, "If templates are provided, %d should also be there" % key
+
+        d["num_templates"] = len(d["templates"])
+
+        if d["overlaps"] is None:
+            d["overlaps"] = compute_overlaps(d["templates"], d["num_samples"], d["num_channels"], d["sparsities"])
+
+        d["ignored_ids"] = np.array(d["ignored_ids"])
+
+        omp_min_sps = d["omp_min_sps"]
+        # nb_active_channels = np.array([len(d['sparsities'][count]) for count in range(d['num_templates'])])
+        d["stop_criteria"] = omp_min_sps * np.sqrt(d["noise_levels"].sum() * d["num_samples"])
+
+        return d
+
+    @classmethod
+    def serialize_method_kwargs(cls, kwargs):
+        kwargs = dict(kwargs)
+        # remove waveform_extractor
+        kwargs.pop("waveform_extractor")
+        return kwargs
+
+    @classmethod
+    def unserialize_in_worker(cls, kwargs):
+        return kwargs
+
+    @classmethod
+    def get_margin(cls, recording, kwargs):
+        margin = 2 * max(kwargs["nbefore"], kwargs["nafter"])
+        return margin
+
+    @classmethod
+    def main_function(cls, traces, d):
+        templates = d["templates"]
+        num_templates = d["num_templates"]
+        num_channels = d["num_channels"]
+        num_samples = d["num_samples"]
+        overlaps = d["overlaps"]
+        norms = d["norms"]
+        nbefore = d["nbefore"]
+        nafter = d["nafter"]
+        omp_tol = np.finfo(np.float32).eps
+        num_samples = d["nafter"] + d["nbefore"]
+        neighbor_window = num_samples - 1
+        min_amplitude, max_amplitude = d["amplitudes"]
+        sparsities = d["sparsities"]
+        ignored_ids = d["ignored_ids"]
+        stop_criteria = d["stop_criteria"]
+        vicinity = d["vicinity"]
+        rank = d['rank']
+
+        num_timesteps = len(traces)
+
+        num_peaks = num_timesteps - num_samples + 1
+        conv_shape = (num_templates, num_peaks)
+        scalar_products = np.zeros(conv_shape, dtype=np.float32)
+        
+        # Filter using overlap-and-add convolution
+        spatially_filtered_data = np.matmul(d['spatial'], traces.T[np.newaxis, :, :])
+        scaled_filtered_data = spatially_filtered_data * d['singular']
+        objective_by_rank = scipy.signal.oaconvolve(scaled_filtered_data, d['temporal'], axes=2, mode="valid")
+        scalar_products += np.sum(objective_by_rank, axis=0)
+
+        if len(ignored_ids) > 0:
+            scalar_products[ignored_ids] = -np.inf
+
+        num_spikes = 0
+
+        spikes = np.empty(scalar_products.size, dtype=spike_dtype)
+        idx_lookup = np.arange(scalar_products.size).reshape(num_templates, -1)
+
+        M = np.zeros((100, 100), dtype=np.float32)
+
+        all_selections = np.empty((2, scalar_products.size), dtype=np.int32)
+        final_amplitudes = np.zeros(scalar_products.shape, dtype=np.float32)
+        num_selection = 0
+
+        full_sps = scalar_products.copy()
+
+        neighbors = {}
+        cached_overlaps = {}
+
+        is_valid = scalar_products > stop_criteria
+        all_amplitudes = np.zeros(0, dtype=np.float32)
+        is_in_vicinity = np.zeros(0, dtype=np.int32)
+
+        while np.any(is_valid):
+            best_amplitude_ind = scalar_products[is_valid].argmax()
+            best_cluster_ind, peak_index = np.unravel_index(idx_lookup[is_valid][best_amplitude_ind], idx_lookup.shape)
+
+            if num_selection > 0:
+                delta_t = selection[1] - peak_index
+                idx = np.where((delta_t < neighbor_window) & (delta_t > -num_samples))[0]
+                myline = num_samples + delta_t[idx]
+
+                if not best_cluster_ind in cached_overlaps:
+                    cached_overlaps[best_cluster_ind] = overlaps[best_cluster_ind].toarray()
+
+                if num_selection == M.shape[0]:
+                    Z = np.zeros((2 * num_selection, 2 * num_selection), dtype=np.float32)
+                    Z[:num_selection, :num_selection] = M
+                    M = Z
+
+                M[num_selection, idx] = cached_overlaps[best_cluster_ind][selection[0, idx], myline]
+
+                if vicinity == 0:
+                    scipy.linalg.solve_triangular(
+                        M[:num_selection, :num_selection],
+                        M[num_selection, :num_selection],
+                        trans=0,
+                        lower=1,
+                        overwrite_b=True,
+                        check_finite=False,
+                    )
+
+                    v = nrm2(M[num_selection, :num_selection]) ** 2
+                    Lkk = 1 - v
+                    if Lkk <= omp_tol:  # selected atoms are dependent
+                        break
+                    M[num_selection, num_selection] = np.sqrt(Lkk)
+                else:
+                    is_in_vicinity = np.where(np.abs(delta_t) < vicinity)[0]
+
+                    if len(is_in_vicinity) > 0:
+                        L = M[is_in_vicinity, :][:, is_in_vicinity]
+
+                        M[num_selection, is_in_vicinity] = scipy.linalg.solve_triangular(
+                            L, M[num_selection, is_in_vicinity], trans=0, lower=1, overwrite_b=True, check_finite=False
+                        )
+
+                        v = nrm2(M[num_selection, is_in_vicinity]) ** 2
+                        Lkk = 1 - v
+                        if Lkk <= omp_tol:  # selected atoms are dependent
+                            break
+                        M[num_selection, num_selection] = np.sqrt(Lkk)
+                    else:
+                        M[num_selection, num_selection] = 1.0
+            else:
+                M[0, 0] = 1
+
+            all_selections[:, num_selection] = [best_cluster_ind, peak_index]
+            num_selection += 1
+
+            selection = all_selections[:, :num_selection]
+            res_sps = full_sps[selection[0], selection[1]]
+
+            if True:  # vicinity == 0:
+                all_amplitudes, _ = potrs(M[:num_selection, :num_selection], res_sps, lower=True, overwrite_b=False)
+                all_amplitudes /= norms[selection[0]]
+            else:
+                # This is not working, need to figure out why
+                is_in_vicinity = np.append(is_in_vicinity, num_selection - 1)
+                all_amplitudes = np.append(all_amplitudes, np.float32(1))
+                L = M[is_in_vicinity, :][:, is_in_vicinity]
+                all_amplitudes[is_in_vicinity], _ = potrs(L, res_sps[is_in_vicinity], lower=True, overwrite_b=False)
+                all_amplitudes[is_in_vicinity] /= norms[selection[0][is_in_vicinity]]
+
+            diff_amplitudes = all_amplitudes - final_amplitudes[selection[0], selection[1]]
+            modified = np.where(np.abs(diff_amplitudes) > omp_tol)[0]
+            final_amplitudes[selection[0], selection[1]] = all_amplitudes
+
+            for i in modified:
+                tmp_best, tmp_peak = selection[:, i]
+                diff_amp = diff_amplitudes[i] * norms[tmp_best]
+
+                if not tmp_best in cached_overlaps:
+                    cached_overlaps[tmp_best] = overlaps[tmp_best].toarray()
+
+                if not tmp_peak in neighbors.keys():
+                    idx = [max(0, tmp_peak - num_samples), min(num_peaks, tmp_peak + neighbor_window)]
+                    tdx = [num_samples + idx[0] - tmp_peak, num_samples + idx[1] - tmp_peak]
+                    neighbors[tmp_peak] = {"idx": idx, "tdx": tdx}
+
+                idx = neighbors[tmp_peak]["idx"]
+                tdx = neighbors[tmp_peak]["tdx"]
+
+                to_add = diff_amp * cached_overlaps[tmp_best][:, tdx[0] : tdx[1]]
+                scalar_products[:, idx[0] : idx[1]] -= to_add
+
+            is_valid = scalar_products > stop_criteria
+
+        is_valid = (final_amplitudes > min_amplitude) * (final_amplitudes < max_amplitude)
+        valid_indices = np.where(is_valid)
+
+        num_spikes = len(valid_indices[0])
+        spikes["sample_index"][:num_spikes] = valid_indices[1] + d["nbefore"]
+        spikes["channel_index"][:num_spikes] = 0
+        spikes["cluster_index"][:num_spikes] = valid_indices[0]
+        spikes["amplitude"][:num_spikes] = final_amplitudes[valid_indices[0], valid_indices[1]]
+
+        spikes = spikes[:num_spikes]
+        order = np.argsort(spikes["sample_index"])
+        spikes = spikes[order]
+
+        return spikes
+
+
+
+
 class CircusPeeler(BaseTemplateMatchingEngine):
 
     """
diff --git a/src/spikeinterface/sortingcomponents/matching/method_list.py b/src/spikeinterface/sortingcomponents/matching/method_list.py
index bedc04a9d5..46c4a53872 100644
--- a/src/spikeinterface/sortingcomponents/matching/method_list.py
+++ b/src/spikeinterface/sortingcomponents/matching/method_list.py
@@ -1,6 +1,6 @@
 from .naive import NaiveMatching
 from .tdc import TridesclousPeeler
-from .circus import CircusPeeler, CircusOMPPeeler
+from .circus import CircusPeeler, CircusOMPPeeler, CircusOMPSVDPeeler
 from .wobble import WobbleMatch
 
 matching_methods = {
@@ -8,5 +8,6 @@
     "tridesclous": TridesclousPeeler,
     "circus": CircusPeeler,
     "circus-omp": CircusOMPPeeler,
+    'circus-omp-svd' : CircusOMPSVDPeeler, 
     "wobble": WobbleMatch,
 }

From cc4720460127960d5d8cf16248690b3323c6c4a9 Mon Sep 17 00:00:00 2001
From: Pierre Yger <pierre.yger@gmail.com>
Date: Tue, 26 Sep 2023 10:49:57 +0200
Subject: [PATCH 02/25] Increase default rank

---
 .../sortingcomponents/matching/circus.py       | 18 ++++++++++--------
 1 file changed, 10 insertions(+), 8 deletions(-)

diff --git a/src/spikeinterface/sortingcomponents/matching/circus.py b/src/spikeinterface/sortingcomponents/matching/circus.py
index e86c913976..bc378fb9a2 100644
--- a/src/spikeinterface/sortingcomponents/matching/circus.py
+++ b/src/spikeinterface/sortingcomponents/matching/circus.py
@@ -519,7 +519,7 @@ class CircusOMPSVDPeeler(BaseTemplateMatchingEngine):
         "norms": None,
         "random_chunk_kwargs": {},
         "noise_levels": None,
-        "rank" : 3,
+        "rank" : 10,
         "sparse_kwargs": {"method": "ptp", "threshold": 1},
         "ignored_ids": [],
         "vicinity": 0,
@@ -537,17 +537,20 @@ def _prepare_templates(cls, d):
 
         templates = waveform_extractor.get_all_templates(mode="median").copy()
 
-        temporal, singular, spatial = np.linalg.svd(templates, full_matrices=False)
-
         # Keep only the strongest components
         rank = d['rank']
         d['templates'] = {}
         d["norms"] = np.zeros(num_templates, dtype=np.float32)
         d['sparsities'] = {}
-        d["norms"] = np.linalg.norm(templates, axis=(1, 2))
-        for i in range(num_templates):
-            d['sparsities'][i] = np.arange(templates.shape[2])  
-            d['templates'][i] = templates[i] / d["norms"][i]
+        
+        for count in range(num_templates):
+            template = templates[count][:, sparsity[count]]
+            (d["sparsities"][count],) = np.nonzero(sparsity[count])
+            d["norms"][count] = np.linalg.norm(template)
+            templates[count][:, ~sparsity[count]] = 0
+            d["templates"][count] = template / d["norms"][count]
+
+        temporal, singular, spatial = np.linalg.svd(templates, full_matrices=False)
 
         temporal = temporal[:, :, :rank]
         d["temporal"] = np.flip(temporal, axis=1)
@@ -631,7 +634,6 @@ def main_function(cls, traces, d):
         num_samples = d["nafter"] + d["nbefore"]
         neighbor_window = num_samples - 1
         min_amplitude, max_amplitude = d["amplitudes"]
-        sparsities = d["sparsities"]
         ignored_ids = d["ignored_ids"]
         stop_criteria = d["stop_criteria"]
         vicinity = d["vicinity"]

From 10c33c1c8645aa7e144bdb8efbc06b993c79c4b0 Mon Sep 17 00:00:00 2001
From: Pierre Yger <pierre.yger@gmail.com>
Date: Tue, 26 Sep 2023 12:01:10 +0200
Subject: [PATCH 03/25] To be tried

---
 src/spikeinterface/sortingcomponents/matching/circus.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/spikeinterface/sortingcomponents/matching/circus.py b/src/spikeinterface/sortingcomponents/matching/circus.py
index bc378fb9a2..8c002a5cc7 100644
--- a/src/spikeinterface/sortingcomponents/matching/circus.py
+++ b/src/spikeinterface/sortingcomponents/matching/circus.py
@@ -601,6 +601,7 @@ def initialize_and_check_kwargs(cls, recording, kwargs):
         omp_min_sps = d["omp_min_sps"]
         # nb_active_channels = np.array([len(d['sparsities'][count]) for count in range(d['num_templates'])])
         d["stop_criteria"] = omp_min_sps * np.sqrt(d["noise_levels"].sum() * d["num_samples"])
+        #d['stop_criteria'] = omp_min_sps * np.maximum(d['norms'], np.sqrt(d["noise_levels"].sum() * d["num_samples"]))
 
         return d
 
@@ -635,7 +636,7 @@ def main_function(cls, traces, d):
         neighbor_window = num_samples - 1
         min_amplitude, max_amplitude = d["amplitudes"]
         ignored_ids = d["ignored_ids"]
-        stop_criteria = d["stop_criteria"]
+        stop_criteria = d["stop_criteria"]#[:, np.newaxis]
         vicinity = d["vicinity"]
         rank = d['rank']
 

From b2a9b70abeb1fccbfa73e51f604253c0f02c81c0 Mon Sep 17 00:00:00 2001
From: Pierre Yger <pierre.yger@gmail.com>
Date: Tue, 26 Sep 2023 16:33:17 +0200
Subject: [PATCH 04/25] WIP

---
 src/spikeinterface/sortingcomponents/matching/circus.py | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/src/spikeinterface/sortingcomponents/matching/circus.py b/src/spikeinterface/sortingcomponents/matching/circus.py
index 8c002a5cc7..482d36956f 100644
--- a/src/spikeinterface/sortingcomponents/matching/circus.py
+++ b/src/spikeinterface/sortingcomponents/matching/circus.py
@@ -519,7 +519,7 @@ class CircusOMPSVDPeeler(BaseTemplateMatchingEngine):
         "norms": None,
         "random_chunk_kwargs": {},
         "noise_levels": None,
-        "rank" : 10,
+        "rank" : 5,
         "sparse_kwargs": {"method": "ptp", "threshold": 1},
         "ignored_ids": [],
         "vicinity": 0,
@@ -599,9 +599,8 @@ def initialize_and_check_kwargs(cls, recording, kwargs):
         d["ignored_ids"] = np.array(d["ignored_ids"])
 
         omp_min_sps = d["omp_min_sps"]
-        # nb_active_channels = np.array([len(d['sparsities'][count]) for count in range(d['num_templates'])])
-        d["stop_criteria"] = omp_min_sps * np.sqrt(d["noise_levels"].sum() * d["num_samples"])
-        #d['stop_criteria'] = omp_min_sps * np.maximum(d['norms'], np.sqrt(d["noise_levels"].sum() * d["num_samples"]))
+        #d["stop_criteria"] = omp_min_sps * np.sqrt(d["noise_levels"].sum() * d["num_samples"])
+        d['stop_criteria'] = omp_min_sps * np.maximum(d['norms'], np.sqrt(d["noise_levels"].sum() * d["num_samples"]))
 
         return d
 
@@ -636,7 +635,7 @@ def main_function(cls, traces, d):
         neighbor_window = num_samples - 1
         min_amplitude, max_amplitude = d["amplitudes"]
         ignored_ids = d["ignored_ids"]
-        stop_criteria = d["stop_criteria"]#[:, np.newaxis]
+        stop_criteria = d["stop_criteria"][:, np.newaxis]
         vicinity = d["vicinity"]
         rank = d['rank']
 

From 3c94594fdd5ee6a58c2635a2f9a8dba9c8ce500d Mon Sep 17 00:00:00 2001
From: Pierre Yger <pierre.yger@gmail.com>
Date: Tue, 26 Sep 2023 17:01:51 +0200
Subject: [PATCH 05/25] Working with circus2

---
 .../sorters/internal/spyking_circus2.py       |  2 +-
 .../clustering/clustering_tools.py            |  7 ++--
 .../sortingcomponents/matching/circus.py      | 37 ++++++++++---------
 3 files changed, 25 insertions(+), 21 deletions(-)

diff --git a/src/spikeinterface/sorters/internal/spyking_circus2.py b/src/spikeinterface/sorters/internal/spyking_circus2.py
index db3d88f116..7097b9e56b 100644
--- a/src/spikeinterface/sorters/internal/spyking_circus2.py
+++ b/src/spikeinterface/sorters/internal/spyking_circus2.py
@@ -152,7 +152,7 @@ def _run_from_folder(cls, sorter_output_folder, params, verbose):
         matching_job_params["chunk_duration"] = "100ms"
 
         spikes = find_spikes_from_templates(
-            recording_f, method="circus-omp", method_kwargs=matching_params, **matching_job_params
+            recording_f, method="circus-omp-svd", method_kwargs=matching_params, **matching_job_params
         )
 
         if verbose:
diff --git a/src/spikeinterface/sortingcomponents/clustering/clustering_tools.py b/src/spikeinterface/sortingcomponents/clustering/clustering_tools.py
index b87bbc7cee..99836fa293 100644
--- a/src/spikeinterface/sortingcomponents/clustering/clustering_tools.py
+++ b/src/spikeinterface/sortingcomponents/clustering/clustering_tools.py
@@ -602,8 +602,6 @@ def remove_duplicates_via_matching(
             "noise_levels": noise_levels,
             "amplitudes": [0.95, 1.05],
             "omp_min_sps": 0.1,
-            "templates": None,
-            "overlaps": None,
         }
     )
 
@@ -618,7 +616,7 @@ def remove_duplicates_via_matching(
 
         method_kwargs.update({"ignored_ids": ignore_ids + [i]})
         spikes, computed = find_spikes_from_templates(
-            sub_recording, method="circus-omp", method_kwargs=method_kwargs, extra_outputs=True, **job_kwargs
+            sub_recording, method="circus-omp-svd", method_kwargs=method_kwargs, extra_outputs=True, **job_kwargs
         )
         method_kwargs.update(
             {
@@ -626,6 +624,9 @@ def remove_duplicates_via_matching(
                 "templates": computed["templates"],
                 "norms": computed["norms"],
                 "sparsities": computed["sparsities"],
+                "temporal" : computed["temporal"],
+                "spatial" : computed["spatial"],
+                "singular" : computed["singular"],
             }
         )
         valid = (spikes["sample_index"] >= half_marging) * (spikes["sample_index"] < duration + half_marging)
diff --git a/src/spikeinterface/sortingcomponents/matching/circus.py b/src/spikeinterface/sortingcomponents/matching/circus.py
index 482d36956f..e955687ed7 100644
--- a/src/spikeinterface/sortingcomponents/matching/circus.py
+++ b/src/spikeinterface/sortingcomponents/matching/circus.py
@@ -514,9 +514,6 @@ class CircusOMPSVDPeeler(BaseTemplateMatchingEngine):
         "amplitudes": [0.6, 2],
         "omp_min_sps": 0.1,
         "waveform_extractor": None,
-        "templates": None,
-        "overlaps": None,
-        "norms": None,
         "random_chunk_kwargs": {},
         "noise_levels": None,
         "rank" : 5,
@@ -537,28 +534,34 @@ def _prepare_templates(cls, d):
 
         templates = waveform_extractor.get_all_templates(mode="median").copy()
 
-        # Keep only the strongest components
-        rank = d['rank']
-        d['templates'] = {}
-        d["norms"] = np.zeros(num_templates, dtype=np.float32)
+        #First, we set masked channels to 0
         d['sparsities'] = {}
-        
         for count in range(num_templates):
             template = templates[count][:, sparsity[count]]
             (d["sparsities"][count],) = np.nonzero(sparsity[count])
-            d["norms"][count] = np.linalg.norm(template)
             templates[count][:, ~sparsity[count]] = 0
-            d["templates"][count] = template / d["norms"][count]
 
+        # Then we keep only the strongest components
+        rank = d['rank']
         temporal, singular, spatial = np.linalg.svd(templates, full_matrices=False)
-
-        temporal = temporal[:, :, :rank]
-        d["temporal"] = np.flip(temporal, axis=1)
+        d["temporal"] = temporal[:, :, :rank]
         d["singular"] = singular[:, :rank]
         d["spatial"] = spatial[:, :rank, :]
         
-        d['temporal'] /= d['norms'][:, np.newaxis, np.newaxis]
+        # We reconstruct the approximated templates
+        templates = np.matmul(d["temporal"] * d["singular"][:, np.newaxis, :], d["spatial"])
+
+        d["temporal"] = np.flip(temporal, axis=1)
+        d['templates'] = {}
+        d["norms"] = np.zeros(num_templates, dtype=np.float32)
+        
+        # And get the norms, saving compressed templates for CC matrix
+        for count in range(num_templates):
+            template = templates[count][:, sparsity[count]]
+            d["norms"][count] = np.linalg.norm(template)
+            d["templates"][count] = template / d["norms"][count]    
         
+        d['temporal'] /= d['norms'][:, np.newaxis, np.newaxis]
         d["spatial"] = np.moveaxis(d['spatial'][:, :rank, :], [0, 1, 2], [1, 0, 2])
         d['temporal'] = np.moveaxis(d['temporal'][:, :, :rank], [0, 1, 2], [1, 2, 0])
         d['singular'] = d['singular'].T[:, :, np.newaxis]
@@ -585,15 +588,15 @@ def initialize_and_check_kwargs(cls, recording, kwargs):
             print("CircusOMPPeeler : noise should be computed outside")
             d["noise_levels"] = get_noise_levels(recording, **d["random_chunk_kwargs"], return_scaled=False)
 
-        if d["templates"] is None:
+        if "templates" not in d:
             d = cls._prepare_templates(d)
         else:
-            for key in ["norms", "sparsities"]:
+            for key in ["norms", "sparsities", 'temporal', 'spatial', 'singular']:
                 assert d[key] is not None, "If templates are provided, %d should also be there" % key
 
         d["num_templates"] = len(d["templates"])
 
-        if d["overlaps"] is None:
+        if "overlaps" not in d:
             d["overlaps"] = compute_overlaps(d["templates"], d["num_samples"], d["num_channels"], d["sparsities"])
 
         d["ignored_ids"] = np.array(d["ignored_ids"])

From 46149ef0730a8965f2ae612e9672419a18dc674c Mon Sep 17 00:00:00 2001
From: Pierre Yger <pierre.yger@gmail.com>
Date: Tue, 26 Sep 2023 22:29:35 +0200
Subject: [PATCH 06/25] Put OMP with SVD as default

---
 .../sorters/internal/spyking_circus2.py       |   2 +-
 .../clustering/clustering_tools.py            |   2 +-
 .../sortingcomponents/matching/circus.py      | 315 ------------------
 .../sortingcomponents/matching/method_list.py |   1 -
 4 files changed, 2 insertions(+), 318 deletions(-)

diff --git a/src/spikeinterface/sorters/internal/spyking_circus2.py b/src/spikeinterface/sorters/internal/spyking_circus2.py
index 7097b9e56b..db3d88f116 100644
--- a/src/spikeinterface/sorters/internal/spyking_circus2.py
+++ b/src/spikeinterface/sorters/internal/spyking_circus2.py
@@ -152,7 +152,7 @@ def _run_from_folder(cls, sorter_output_folder, params, verbose):
         matching_job_params["chunk_duration"] = "100ms"
 
         spikes = find_spikes_from_templates(
-            recording_f, method="circus-omp-svd", method_kwargs=matching_params, **matching_job_params
+            recording_f, method="circus-omp", method_kwargs=matching_params, **matching_job_params
         )
 
         if verbose:
diff --git a/src/spikeinterface/sortingcomponents/clustering/clustering_tools.py b/src/spikeinterface/sortingcomponents/clustering/clustering_tools.py
index 99836fa293..7a2af09942 100644
--- a/src/spikeinterface/sortingcomponents/clustering/clustering_tools.py
+++ b/src/spikeinterface/sortingcomponents/clustering/clustering_tools.py
@@ -616,7 +616,7 @@ def remove_duplicates_via_matching(
 
         method_kwargs.update({"ignored_ids": ignore_ids + [i]})
         spikes, computed = find_spikes_from_templates(
-            sub_recording, method="circus-omp-svd", method_kwargs=method_kwargs, extra_outputs=True, **job_kwargs
+            sub_recording, method="circus-omp", method_kwargs=method_kwargs, extra_outputs=True, **job_kwargs
         )
         method_kwargs.update(
             {
diff --git a/src/spikeinterface/sortingcomponents/matching/circus.py b/src/spikeinterface/sortingcomponents/matching/circus.py
index e955687ed7..aeac69fc86 100644
--- a/src/spikeinterface/sortingcomponents/matching/circus.py
+++ b/src/spikeinterface/sortingcomponents/matching/circus.py
@@ -195,321 +195,6 @@ class CircusOMPPeeler(BaseTemplateMatchingEngine):
     -----
     """
 
-    _default_params = {
-        "amplitudes": [0.6, 2],
-        "omp_min_sps": 0.1,
-        "waveform_extractor": None,
-        "templates": None,
-        "overlaps": None,
-        "norms": None,
-        "random_chunk_kwargs": {},
-        "noise_levels": None,
-        "sparse_kwargs": {"method": "ptp", "threshold": 1},
-        "ignored_ids": [],
-        "vicinity": 0,
-    }
-
-    @classmethod
-    def _prepare_templates(cls, d):
-        waveform_extractor = d["waveform_extractor"]
-        num_templates = len(d["waveform_extractor"].sorting.unit_ids)
-
-        if not waveform_extractor.is_sparse():
-            sparsity = compute_sparsity(waveform_extractor, **d["sparse_kwargs"]).mask
-        else:
-            sparsity = waveform_extractor.sparsity.mask
-
-        templates = waveform_extractor.get_all_templates(mode="median").copy()
-
-        d["sparsities"] = {}
-        d["templates"] = {}
-        d["norms"] = np.zeros(num_templates, dtype=np.float32)
-
-        for count, unit_id in enumerate(waveform_extractor.sorting.unit_ids):
-            template = templates[count][:, sparsity[count]]
-            (d["sparsities"][count],) = np.nonzero(sparsity[count])
-            d["norms"][count] = np.linalg.norm(template)
-            d["templates"][count] = template / d["norms"][count]
-
-        return d
-
-    @classmethod
-    def initialize_and_check_kwargs(cls, recording, kwargs):
-        d = cls._default_params.copy()
-        d.update(kwargs)
-
-        # assert isinstance(d['waveform_extractor'], WaveformExtractor)
-
-        for v in ["omp_min_sps"]:
-            assert (d[v] >= 0) and (d[v] <= 1), f"{v} should be in [0, 1]"
-
-        d["num_channels"] = d["waveform_extractor"].recording.get_num_channels()
-        d["num_samples"] = d["waveform_extractor"].nsamples
-        d["nbefore"] = d["waveform_extractor"].nbefore
-        d["nafter"] = d["waveform_extractor"].nafter
-        d["sampling_frequency"] = d["waveform_extractor"].recording.get_sampling_frequency()
-        d["vicinity"] *= d["num_samples"]
-
-        if d["noise_levels"] is None:
-            print("CircusOMPPeeler : noise should be computed outside")
-            d["noise_levels"] = get_noise_levels(recording, **d["random_chunk_kwargs"], return_scaled=False)
-
-        if d["templates"] is None:
-            d = cls._prepare_templates(d)
-        else:
-            for key in ["norms", "sparsities"]:
-                assert d[key] is not None, "If templates are provided, %d should also be there" % key
-
-        d["num_templates"] = len(d["templates"])
-
-        if d["overlaps"] is None:
-            d["overlaps"] = compute_overlaps(d["templates"], d["num_samples"], d["num_channels"], d["sparsities"])
-
-        d["ignored_ids"] = np.array(d["ignored_ids"])
-
-        omp_min_sps = d["omp_min_sps"]
-        # nb_active_channels = np.array([len(d['sparsities'][count]) for count in range(d['num_templates'])])
-        d["stop_criteria"] = omp_min_sps * np.sqrt(d["noise_levels"].sum() * d["num_samples"])
-
-        return d
-
-    @classmethod
-    def serialize_method_kwargs(cls, kwargs):
-        kwargs = dict(kwargs)
-        # remove waveform_extractor
-        kwargs.pop("waveform_extractor")
-        return kwargs
-
-    @classmethod
-    def unserialize_in_worker(cls, kwargs):
-        return kwargs
-
-    @classmethod
-    def get_margin(cls, recording, kwargs):
-        margin = 2 * max(kwargs["nbefore"], kwargs["nafter"])
-        return margin
-
-    @classmethod
-    def main_function(cls, traces, d):
-        templates = d["templates"]
-        num_templates = d["num_templates"]
-        num_channels = d["num_channels"]
-        num_samples = d["num_samples"]
-        overlaps = d["overlaps"]
-        norms = d["norms"]
-        nbefore = d["nbefore"]
-        nafter = d["nafter"]
-        omp_tol = np.finfo(np.float32).eps
-        num_samples = d["nafter"] + d["nbefore"]
-        neighbor_window = num_samples - 1
-        min_amplitude, max_amplitude = d["amplitudes"]
-        sparsities = d["sparsities"]
-        ignored_ids = d["ignored_ids"]
-        stop_criteria = d["stop_criteria"]
-        vicinity = d["vicinity"]
-
-        if "cached_fft_kernels" not in d:
-            d["cached_fft_kernels"] = {"fshape": 0}
-
-        cached_fft_kernels = d["cached_fft_kernels"]
-
-        num_timesteps = len(traces)
-
-        num_peaks = num_timesteps - num_samples + 1
-
-        traces = traces.T
-
-        dummy_filter = np.empty((num_channels, num_samples), dtype=np.float32)
-        dummy_traces = np.empty((num_channels, num_timesteps), dtype=np.float32)
-
-        fshape, axes = get_scipy_shape(dummy_filter, traces, axes=1)
-        fft_cache = {"full": sp_fft.rfftn(traces, fshape, axes=axes)}
-
-        scalar_products = np.empty((num_templates, num_peaks), dtype=np.float32)
-
-        flagged_chunk = cached_fft_kernels["fshape"] != fshape[0]
-
-        for i in range(num_templates):
-            if i not in ignored_ids:
-                if i not in cached_fft_kernels or flagged_chunk:
-                    kernel_filter = np.ascontiguousarray(templates[i][::-1].T)
-                    cached_fft_kernels.update({i: sp_fft.rfftn(kernel_filter, fshape, axes=axes)})
-                    cached_fft_kernels["fshape"] = fshape[0]
-
-                fft_cache.update({"mask": sparsities[i], "template": cached_fft_kernels[i]})
-
-                convolution = fftconvolve_with_cache(dummy_filter, dummy_traces, fft_cache, axes=1, mode="valid")
-                if len(convolution) > 0:
-                    scalar_products[i] = convolution.sum(0)
-                else:
-                    scalar_products[i] = 0
-
-        if len(ignored_ids) > 0:
-            scalar_products[ignored_ids] = -np.inf
-
-        num_spikes = 0
-
-        spikes = np.empty(scalar_products.size, dtype=spike_dtype)
-        idx_lookup = np.arange(scalar_products.size).reshape(num_templates, -1)
-
-        M = np.zeros((100, 100), dtype=np.float32)
-
-        all_selections = np.empty((2, scalar_products.size), dtype=np.int32)
-        final_amplitudes = np.zeros(scalar_products.shape, dtype=np.float32)
-        num_selection = 0
-
-        full_sps = scalar_products.copy()
-
-        neighbors = {}
-        cached_overlaps = {}
-
-        is_valid = scalar_products > stop_criteria
-        all_amplitudes = np.zeros(0, dtype=np.float32)
-        is_in_vicinity = np.zeros(0, dtype=np.int32)
-
-        while np.any(is_valid):
-            best_amplitude_ind = scalar_products[is_valid].argmax()
-            best_cluster_ind, peak_index = np.unravel_index(idx_lookup[is_valid][best_amplitude_ind], idx_lookup.shape)
-
-            if num_selection > 0:
-                delta_t = selection[1] - peak_index
-                idx = np.where((delta_t < neighbor_window) & (delta_t > -num_samples))[0]
-                myline = num_samples + delta_t[idx]
-
-                if not best_cluster_ind in cached_overlaps:
-                    cached_overlaps[best_cluster_ind] = overlaps[best_cluster_ind].toarray()
-
-                if num_selection == M.shape[0]:
-                    Z = np.zeros((2 * num_selection, 2 * num_selection), dtype=np.float32)
-                    Z[:num_selection, :num_selection] = M
-                    M = Z
-
-                M[num_selection, idx] = cached_overlaps[best_cluster_ind][selection[0, idx], myline]
-
-                if vicinity == 0:
-                    scipy.linalg.solve_triangular(
-                        M[:num_selection, :num_selection],
-                        M[num_selection, :num_selection],
-                        trans=0,
-                        lower=1,
-                        overwrite_b=True,
-                        check_finite=False,
-                    )
-
-                    v = nrm2(M[num_selection, :num_selection]) ** 2
-                    Lkk = 1 - v
-                    if Lkk <= omp_tol:  # selected atoms are dependent
-                        break
-                    M[num_selection, num_selection] = np.sqrt(Lkk)
-                else:
-                    is_in_vicinity = np.where(np.abs(delta_t) < vicinity)[0]
-
-                    if len(is_in_vicinity) > 0:
-                        L = M[is_in_vicinity, :][:, is_in_vicinity]
-
-                        M[num_selection, is_in_vicinity] = scipy.linalg.solve_triangular(
-                            L, M[num_selection, is_in_vicinity], trans=0, lower=1, overwrite_b=True, check_finite=False
-                        )
-
-                        v = nrm2(M[num_selection, is_in_vicinity]) ** 2
-                        Lkk = 1 - v
-                        if Lkk <= omp_tol:  # selected atoms are dependent
-                            break
-                        M[num_selection, num_selection] = np.sqrt(Lkk)
-                    else:
-                        M[num_selection, num_selection] = 1.0
-            else:
-                M[0, 0] = 1
-
-            all_selections[:, num_selection] = [best_cluster_ind, peak_index]
-            num_selection += 1
-
-            selection = all_selections[:, :num_selection]
-            res_sps = full_sps[selection[0], selection[1]]
-
-            if True:  # vicinity == 0:
-                all_amplitudes, _ = potrs(M[:num_selection, :num_selection], res_sps, lower=True, overwrite_b=False)
-                all_amplitudes /= norms[selection[0]]
-            else:
-                # This is not working, need to figure out why
-                is_in_vicinity = np.append(is_in_vicinity, num_selection - 1)
-                all_amplitudes = np.append(all_amplitudes, np.float32(1))
-                L = M[is_in_vicinity, :][:, is_in_vicinity]
-                all_amplitudes[is_in_vicinity], _ = potrs(L, res_sps[is_in_vicinity], lower=True, overwrite_b=False)
-                all_amplitudes[is_in_vicinity] /= norms[selection[0][is_in_vicinity]]
-
-            diff_amplitudes = all_amplitudes - final_amplitudes[selection[0], selection[1]]
-            modified = np.where(np.abs(diff_amplitudes) > omp_tol)[0]
-            final_amplitudes[selection[0], selection[1]] = all_amplitudes
-
-            for i in modified:
-                tmp_best, tmp_peak = selection[:, i]
-                diff_amp = diff_amplitudes[i] * norms[tmp_best]
-
-                if not tmp_best in cached_overlaps:
-                    cached_overlaps[tmp_best] = overlaps[tmp_best].toarray()
-
-                if not tmp_peak in neighbors.keys():
-                    idx = [max(0, tmp_peak - num_samples), min(num_peaks, tmp_peak + neighbor_window)]
-                    tdx = [num_samples + idx[0] - tmp_peak, num_samples + idx[1] - tmp_peak]
-                    neighbors[tmp_peak] = {"idx": idx, "tdx": tdx}
-
-                idx = neighbors[tmp_peak]["idx"]
-                tdx = neighbors[tmp_peak]["tdx"]
-
-                to_add = diff_amp * cached_overlaps[tmp_best][:, tdx[0] : tdx[1]]
-                scalar_products[:, idx[0] : idx[1]] -= to_add
-
-            is_valid = scalar_products > stop_criteria
-
-        is_valid = (final_amplitudes > min_amplitude) * (final_amplitudes < max_amplitude)
-        valid_indices = np.where(is_valid)
-
-        num_spikes = len(valid_indices[0])
-        spikes["sample_index"][:num_spikes] = valid_indices[1] + d["nbefore"]
-        spikes["channel_index"][:num_spikes] = 0
-        spikes["cluster_index"][:num_spikes] = valid_indices[0]
-        spikes["amplitude"][:num_spikes] = final_amplitudes[valid_indices[0], valid_indices[1]]
-
-        spikes = spikes[:num_spikes]
-        order = np.argsort(spikes["sample_index"])
-        spikes = spikes[order]
-
-        return spikes
-
-
-class CircusOMPSVDPeeler(BaseTemplateMatchingEngine):
-    """
-    Orthogonal Matching Pursuit inspired from Spyking Circus sorter
-
-    https://elifesciences.org/articles/34518
-
-    This is an Orthogonal Template Matching algorithm. For speed and
-    memory optimization, templates are automatically sparsified. Signal
-    is convolved with the templates, and as long as some scalar products
-    are higher than a given threshold, we use a Cholesky decomposition
-    to compute the optimal amplitudes needed to reconstruct the signal.
-
-    IMPORTANT NOTE: small chunks are more efficient for such Peeler,
-    consider using 100ms chunk
-
-    Parameters
-    ----------
-    amplitude: tuple
-        (Minimal, Maximal) amplitudes allowed for every template
-    omp_min_sps: float
-        Stopping criteria of the OMP algorithm, in percentage of the norm
-    noise_levels: array
-        The noise levels, for every channels. If None, they will be automatically
-        computed
-    random_chunk_kwargs: dict
-        Parameters for computing noise levels, if not provided (sub optimal)
-    sparse_kwargs: dict
-        Parameters to extract a sparsity mask from the waveform_extractor, if not
-        already sparse.
-    -----
-    """
-
     _default_params = {
         "amplitudes": [0.6, 2],
         "omp_min_sps": 0.1,
diff --git a/src/spikeinterface/sortingcomponents/matching/method_list.py b/src/spikeinterface/sortingcomponents/matching/method_list.py
index 46c4a53872..c00c0a1fd3 100644
--- a/src/spikeinterface/sortingcomponents/matching/method_list.py
+++ b/src/spikeinterface/sortingcomponents/matching/method_list.py
@@ -8,6 +8,5 @@
     "tridesclous": TridesclousPeeler,
     "circus": CircusPeeler,
     "circus-omp": CircusOMPPeeler,
-    'circus-omp-svd' : CircusOMPSVDPeeler, 
     "wobble": WobbleMatch,
 }

From f21d80bf3cb34e5f39d59a7692a0c594025ea7b8 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 26 Sep 2023 20:32:10 +0000
Subject: [PATCH 07/25] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 .../clustering/clustering_tools.py            |  6 +--
 .../sortingcomponents/matching/circus.py      | 44 +++++++++----------
 2 files changed, 24 insertions(+), 26 deletions(-)

diff --git a/src/spikeinterface/sortingcomponents/clustering/clustering_tools.py b/src/spikeinterface/sortingcomponents/clustering/clustering_tools.py
index 7a2af09942..c1b635fdaf 100644
--- a/src/spikeinterface/sortingcomponents/clustering/clustering_tools.py
+++ b/src/spikeinterface/sortingcomponents/clustering/clustering_tools.py
@@ -624,9 +624,9 @@ def remove_duplicates_via_matching(
                 "templates": computed["templates"],
                 "norms": computed["norms"],
                 "sparsities": computed["sparsities"],
-                "temporal" : computed["temporal"],
-                "spatial" : computed["spatial"],
-                "singular" : computed["singular"],
+                "temporal": computed["temporal"],
+                "spatial": computed["spatial"],
+                "singular": computed["singular"],
             }
         )
         valid = (spikes["sample_index"] >= half_marging) * (spikes["sample_index"] < duration + half_marging)
diff --git a/src/spikeinterface/sortingcomponents/matching/circus.py b/src/spikeinterface/sortingcomponents/matching/circus.py
index aeac69fc86..d2b02ea15d 100644
--- a/src/spikeinterface/sortingcomponents/matching/circus.py
+++ b/src/spikeinterface/sortingcomponents/matching/circus.py
@@ -201,7 +201,7 @@ class CircusOMPPeeler(BaseTemplateMatchingEngine):
         "waveform_extractor": None,
         "random_chunk_kwargs": {},
         "noise_levels": None,
-        "rank" : 5,
+        "rank": 5,
         "sparse_kwargs": {"method": "ptp", "threshold": 1},
         "ignored_ids": [],
         "vicinity": 0,
@@ -219,37 +219,37 @@ def _prepare_templates(cls, d):
 
         templates = waveform_extractor.get_all_templates(mode="median").copy()
 
-        #First, we set masked channels to 0
-        d['sparsities'] = {}
+        # First, we set masked channels to 0
+        d["sparsities"] = {}
         for count in range(num_templates):
             template = templates[count][:, sparsity[count]]
             (d["sparsities"][count],) = np.nonzero(sparsity[count])
             templates[count][:, ~sparsity[count]] = 0
 
         # Then we keep only the strongest components
-        rank = d['rank']
+        rank = d["rank"]
         temporal, singular, spatial = np.linalg.svd(templates, full_matrices=False)
         d["temporal"] = temporal[:, :, :rank]
         d["singular"] = singular[:, :rank]
         d["spatial"] = spatial[:, :rank, :]
-        
+
         # We reconstruct the approximated templates
         templates = np.matmul(d["temporal"] * d["singular"][:, np.newaxis, :], d["spatial"])
 
         d["temporal"] = np.flip(temporal, axis=1)
-        d['templates'] = {}
+        d["templates"] = {}
         d["norms"] = np.zeros(num_templates, dtype=np.float32)
-        
+
         # And get the norms, saving compressed templates for CC matrix
         for count in range(num_templates):
             template = templates[count][:, sparsity[count]]
             d["norms"][count] = np.linalg.norm(template)
-            d["templates"][count] = template / d["norms"][count]    
-        
-        d['temporal'] /= d['norms'][:, np.newaxis, np.newaxis]
-        d["spatial"] = np.moveaxis(d['spatial'][:, :rank, :], [0, 1, 2], [1, 0, 2])
-        d['temporal'] = np.moveaxis(d['temporal'][:, :, :rank], [0, 1, 2], [1, 2, 0])
-        d['singular'] = d['singular'].T[:, :, np.newaxis]
+            d["templates"][count] = template / d["norms"][count]
+
+        d["temporal"] /= d["norms"][:, np.newaxis, np.newaxis]
+        d["spatial"] = np.moveaxis(d["spatial"][:, :rank, :], [0, 1, 2], [1, 0, 2])
+        d["temporal"] = np.moveaxis(d["temporal"][:, :, :rank], [0, 1, 2], [1, 2, 0])
+        d["singular"] = d["singular"].T[:, :, np.newaxis]
         return d
 
     @classmethod
@@ -276,7 +276,7 @@ def initialize_and_check_kwargs(cls, recording, kwargs):
         if "templates" not in d:
             d = cls._prepare_templates(d)
         else:
-            for key in ["norms", "sparsities", 'temporal', 'spatial', 'singular']:
+            for key in ["norms", "sparsities", "temporal", "spatial", "singular"]:
                 assert d[key] is not None, "If templates are provided, %d should also be there" % key
 
         d["num_templates"] = len(d["templates"])
@@ -287,8 +287,8 @@ def initialize_and_check_kwargs(cls, recording, kwargs):
         d["ignored_ids"] = np.array(d["ignored_ids"])
 
         omp_min_sps = d["omp_min_sps"]
-        #d["stop_criteria"] = omp_min_sps * np.sqrt(d["noise_levels"].sum() * d["num_samples"])
-        d['stop_criteria'] = omp_min_sps * np.maximum(d['norms'], np.sqrt(d["noise_levels"].sum() * d["num_samples"]))
+        # d["stop_criteria"] = omp_min_sps * np.sqrt(d["noise_levels"].sum() * d["num_samples"])
+        d["stop_criteria"] = omp_min_sps * np.maximum(d["norms"], np.sqrt(d["noise_levels"].sum() * d["num_samples"]))
 
         return d
 
@@ -325,18 +325,18 @@ def main_function(cls, traces, d):
         ignored_ids = d["ignored_ids"]
         stop_criteria = d["stop_criteria"][:, np.newaxis]
         vicinity = d["vicinity"]
-        rank = d['rank']
+        rank = d["rank"]
 
         num_timesteps = len(traces)
 
         num_peaks = num_timesteps - num_samples + 1
         conv_shape = (num_templates, num_peaks)
         scalar_products = np.zeros(conv_shape, dtype=np.float32)
-        
+
         # Filter using overlap-and-add convolution
-        spatially_filtered_data = np.matmul(d['spatial'], traces.T[np.newaxis, :, :])
-        scaled_filtered_data = spatially_filtered_data * d['singular']
-        objective_by_rank = scipy.signal.oaconvolve(scaled_filtered_data, d['temporal'], axes=2, mode="valid")
+        spatially_filtered_data = np.matmul(d["spatial"], traces.T[np.newaxis, :, :])
+        scaled_filtered_data = spatially_filtered_data * d["singular"]
+        objective_by_rank = scipy.signal.oaconvolve(scaled_filtered_data, d["temporal"], axes=2, mode="valid")
         scalar_products += np.sum(objective_by_rank, axis=0)
 
         if len(ignored_ids) > 0:
@@ -473,8 +473,6 @@ def main_function(cls, traces, d):
         return spikes
 
 
-
-
 class CircusPeeler(BaseTemplateMatchingEngine):
 
     """

From a275bcaaf14819e64aa24a78a504b134f1d9288e Mon Sep 17 00:00:00 2001
From: Pierre Yger <pierre.yger@gmail.com>
Date: Tue, 26 Sep 2023 22:32:57 +0200
Subject: [PATCH 08/25] Patch

---
 src/spikeinterface/sortingcomponents/matching/method_list.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/spikeinterface/sortingcomponents/matching/method_list.py b/src/spikeinterface/sortingcomponents/matching/method_list.py
index c00c0a1fd3..bedc04a9d5 100644
--- a/src/spikeinterface/sortingcomponents/matching/method_list.py
+++ b/src/spikeinterface/sortingcomponents/matching/method_list.py
@@ -1,6 +1,6 @@
 from .naive import NaiveMatching
 from .tdc import TridesclousPeeler
-from .circus import CircusPeeler, CircusOMPPeeler, CircusOMPSVDPeeler
+from .circus import CircusPeeler, CircusOMPPeeler
 from .wobble import WobbleMatch
 
 matching_methods = {

From 85eb432c16a0719520a8dcbb24d2c8bb2c804d60 Mon Sep 17 00:00:00 2001
From: Pierre Yger <pierre.yger@gmail.com>
Date: Tue, 26 Sep 2023 22:44:15 +0200
Subject: [PATCH 09/25] Cleaning useless functions

---
 .../clustering/clustering_tools.py            |  6 --
 .../sortingcomponents/matching/circus.py      | 95 -------------------
 2 files changed, 101 deletions(-)

diff --git a/src/spikeinterface/sortingcomponents/clustering/clustering_tools.py b/src/spikeinterface/sortingcomponents/clustering/clustering_tools.py
index c1b635fdaf..5ff74db3e7 100644
--- a/src/spikeinterface/sortingcomponents/clustering/clustering_tools.py
+++ b/src/spikeinterface/sortingcomponents/clustering/clustering_tools.py
@@ -546,7 +546,6 @@ def remove_duplicates_via_matching(
     from spikeinterface.core import NumpySorting
     from spikeinterface.core import extract_waveforms
     from spikeinterface.core import get_global_tmp_folder
-    from spikeinterface.sortingcomponents.matching.circus import get_scipy_shape
     import string, random, shutil, os
     from pathlib import Path
 
@@ -591,11 +590,6 @@ def remove_duplicates_via_matching(
 
     chunk_size = duration + 3 * margin
 
-    dummy_filter = np.empty((num_chans, duration), dtype=np.float32)
-    dummy_traces = np.empty((num_chans, chunk_size), dtype=np.float32)
-
-    fshape, axes = get_scipy_shape(dummy_filter, dummy_traces, axes=1)
-
     method_kwargs.update(
         {
             "waveform_extractor": waveform_extractor,
diff --git a/src/spikeinterface/sortingcomponents/matching/circus.py b/src/spikeinterface/sortingcomponents/matching/circus.py
index d2b02ea15d..ec6ef3a292 100644
--- a/src/spikeinterface/sortingcomponents/matching/circus.py
+++ b/src/spikeinterface/sortingcomponents/matching/circus.py
@@ -35,101 +35,6 @@
 
 #################
 # Circus peeler #
-#################
-
-from scipy.fft._helper import _init_nd_shape_and_axes
-
-try:
-    from scipy.signal.signaltools import _init_freq_conv_axes, _apply_conv_mode
-except Exception:
-    from scipy.signal._signaltools import _init_freq_conv_axes, _apply_conv_mode
-from scipy import linalg, fft as sp_fft
-
-
-def get_scipy_shape(in1, in2, mode="full", axes=None, calc_fast_len=True):
-    in1 = np.asarray(in1)
-    in2 = np.asarray(in2)
-
-    if in1.ndim == in2.ndim == 0:  # scalar inputs
-        return in1 * in2
-    elif in1.ndim != in2.ndim:
-        raise ValueError("in1 and in2 should have the same dimensionality")
-    elif in1.size == 0 or in2.size == 0:  # empty arrays
-        return np.array([])
-
-    in1, in2, axes = _init_freq_conv_axes(in1, in2, mode, axes, sorted_axes=False)
-
-    s1 = in1.shape
-    s2 = in2.shape
-
-    shape = [max((s1[i], s2[i])) if i not in axes else s1[i] + s2[i] - 1 for i in range(in1.ndim)]
-
-    if not len(axes):
-        return in1 * in2
-
-    complex_result = in1.dtype.kind == "c" or in2.dtype.kind == "c"
-
-    if calc_fast_len:
-        # Speed up FFT by padding to optimal size.
-        fshape = [sp_fft.next_fast_len(shape[a], not complex_result) for a in axes]
-    else:
-        fshape = shape
-
-    return fshape, axes
-
-
-def fftconvolve_with_cache(in1, in2, cache, mode="full", axes=None):
-    in1 = np.asarray(in1)
-    in2 = np.asarray(in2)
-
-    if in1.ndim == in2.ndim == 0:  # scalar inputs
-        return in1 * in2
-    elif in1.ndim != in2.ndim:
-        raise ValueError("in1 and in2 should have the same dimensionality")
-    elif in1.size == 0 or in2.size == 0:  # empty arrays
-        return np.array([])
-
-    in1, in2, axes = _init_freq_conv_axes(in1, in2, mode, axes, sorted_axes=False)
-
-    s1 = in1.shape
-    s2 = in2.shape
-
-    shape = [max((s1[i], s2[i])) if i not in axes else s1[i] + s2[i] - 1 for i in range(in1.ndim)]
-
-    ret = _freq_domain_conv(in1, in2, axes, shape, cache, calc_fast_len=True)
-
-    return _apply_conv_mode(ret, s1, s2, mode, axes)
-
-
-def _freq_domain_conv(in1, in2, axes, shape, cache, calc_fast_len=True):
-    if not len(axes):
-        return in1 * in2
-
-    complex_result = in1.dtype.kind == "c" or in2.dtype.kind == "c"
-
-    if calc_fast_len:
-        # Speed up FFT by padding to optimal size.
-        fshape = [sp_fft.next_fast_len(shape[a], not complex_result) for a in axes]
-    else:
-        fshape = shape
-
-    if not complex_result:
-        fft, ifft = sp_fft.rfftn, sp_fft.irfftn
-    else:
-        fft, ifft = sp_fft.fftn, sp_fft.ifftn
-
-    sp1 = cache["full"][cache["mask"]]
-    sp2 = cache["template"]
-
-    # sp2 = fft(in2[cache['mask']], fshape, axes=axes)
-    ret = ifft(sp1 * sp2, fshape, axes=axes)
-
-    if calc_fast_len:
-        fslice = tuple([slice(sz) for sz in shape])
-        ret = ret[fslice]
-
-    return ret
-
 
 def compute_overlaps(templates, num_samples, num_channels, sparsities):
     num_templates = len(templates)

From 15ae43215bf5a3b49a52081e18ad8ba3810bce15 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 26 Sep 2023 20:44:37 +0000
Subject: [PATCH 10/25] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 src/spikeinterface/sortingcomponents/matching/circus.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/spikeinterface/sortingcomponents/matching/circus.py b/src/spikeinterface/sortingcomponents/matching/circus.py
index ec6ef3a292..7bef8358de 100644
--- a/src/spikeinterface/sortingcomponents/matching/circus.py
+++ b/src/spikeinterface/sortingcomponents/matching/circus.py
@@ -36,6 +36,7 @@
 #################
 # Circus peeler #
 
+
 def compute_overlaps(templates, num_samples, num_channels, sparsities):
     num_templates = len(templates)
 

From 41155a1835f348d9181501d823cd78fca5cf6191 Mon Sep 17 00:00:00 2001
From: Pierre Yger <pierre.yger@gmail.com>
Date: Wed, 27 Sep 2023 13:36:15 +0200
Subject: [PATCH 11/25] Changing the internal representation of overlaps

---
 .../clustering/clustering_tools.py            |  4 +-
 .../sortingcomponents/matching/circus.py      | 78 +++++++++++++------
 2 files changed, 59 insertions(+), 23 deletions(-)

diff --git a/src/spikeinterface/sortingcomponents/clustering/clustering_tools.py b/src/spikeinterface/sortingcomponents/clustering/clustering_tools.py
index 5ff74db3e7..032694a47e 100644
--- a/src/spikeinterface/sortingcomponents/clustering/clustering_tools.py
+++ b/src/spikeinterface/sortingcomponents/clustering/clustering_tools.py
@@ -617,10 +617,12 @@ def remove_duplicates_via_matching(
                 "overlaps": computed["overlaps"],
                 "templates": computed["templates"],
                 "norms": computed["norms"],
-                "sparsities": computed["sparsities"],
                 "temporal": computed["temporal"],
                 "spatial": computed["spatial"],
                 "singular": computed["singular"],
+                "units_overlaps": computed["units_overlaps"],
+                "unit_overlaps_indices": computed["unit_overlaps_indices"],
+                "sparsity_mask": computed["sparsity_mask"],
             }
         )
         valid = (spikes["sample_index"] >= half_marging) * (spikes["sample_index"] < duration + half_marging)
diff --git a/src/spikeinterface/sortingcomponents/matching/circus.py b/src/spikeinterface/sortingcomponents/matching/circus.py
index ec6ef3a292..ffc2a225e8 100644
--- a/src/spikeinterface/sortingcomponents/matching/circus.py
+++ b/src/spikeinterface/sortingcomponents/matching/circus.py
@@ -122,14 +122,20 @@ def _prepare_templates(cls, d):
         else:
             sparsity = waveform_extractor.sparsity.mask
 
+        d['sparsity_mask'] = sparsity
+        units_overlaps = np.sum(
+            np.logical_and(sparsity[:, np.newaxis, :], sparsity[np.newaxis, :, :]), axis=2
+        )
+        d['units_overlaps'] = units_overlaps > 0
+        d['unit_overlaps_indices'] = {}
+        for i in range(num_templates):
+            d['unit_overlaps_indices'][i], = np.nonzero(d['units_overlaps'][i])
+
         templates = waveform_extractor.get_all_templates(mode="median").copy()
 
         # First, we set masked channels to 0
-        d["sparsities"] = {}
         for count in range(num_templates):
-            template = templates[count][:, sparsity[count]]
-            (d["sparsities"][count],) = np.nonzero(sparsity[count])
-            templates[count][:, ~sparsity[count]] = 0
+            templates[count][:, ~d['sparsity_mask'][count]] = 0
 
         # Then we keep only the strongest components
         rank = d["rank"]
@@ -141,19 +147,45 @@ def _prepare_templates(cls, d):
         # We reconstruct the approximated templates
         templates = np.matmul(d["temporal"] * d["singular"][:, np.newaxis, :], d["spatial"])
 
-        d["temporal"] = np.flip(temporal, axis=1)
         d["templates"] = {}
         d["norms"] = np.zeros(num_templates, dtype=np.float32)
 
         # And get the norms, saving compressed templates for CC matrix
         for count in range(num_templates):
-            template = templates[count][:, sparsity[count]]
+            template = templates[count][:, d['sparsity_mask'][count]]
             d["norms"][count] = np.linalg.norm(template)
             d["templates"][count] = template / d["norms"][count]
 
         d["temporal"] /= d["norms"][:, np.newaxis, np.newaxis]
-        d["spatial"] = np.moveaxis(d["spatial"][:, :rank, :], [0, 1, 2], [1, 0, 2])
-        d["temporal"] = np.moveaxis(d["temporal"][:, :, :rank], [0, 1, 2], [1, 2, 0])
+        d["temporal"] = np.flip(d["temporal"], axis=1)
+
+        d['overlaps'] = []
+        for i in range(num_templates):
+            num_overlaps = np.sum(d['units_overlaps'][i])
+            overlapping_units = np.where(d['units_overlaps'][i])[0]
+
+            # Reconstruct unit template from SVD Matrices
+            data = d['temporal'][i] * d['singular'][i][np.newaxis, :]
+            template_i = np.matmul(data, d['spatial'][i, :, :])
+            template_i = np.flipud(template_i)
+
+            unit_overlaps = np.zeros([num_overlaps, 2*d['num_samples'] - 1], dtype=np.float32)
+
+            for count, j in enumerate(overlapping_units):
+                overlapped_channels = d['sparsity_mask'][j]
+                visible_i = template_i[:, overlapped_channels]
+
+                spatial_filters = d['spatial'][j, :, overlapped_channels]
+                spatially_filtered_template = np.matmul(visible_i, spatial_filters)
+                visible_i = spatially_filtered_template * d['singular'][j]
+                
+                for rank in range(visible_i.shape[1]):
+                    unit_overlaps[count, :] += np.convolve(visible_i[:, rank], d['temporal'][j][:, rank], mode='full')
+
+            d['overlaps'].append(unit_overlaps)
+
+        d["spatial"] = np.moveaxis(d["spatial"], [0, 1, 2], [1, 0, 2])
+        d["temporal"] = np.moveaxis(d["temporal"], [0, 1, 2], [1, 2, 0])
         d["singular"] = d["singular"].T[:, :, np.newaxis]
         return d
 
@@ -181,14 +213,10 @@ def initialize_and_check_kwargs(cls, recording, kwargs):
         if "templates" not in d:
             d = cls._prepare_templates(d)
         else:
-            for key in ["norms", "sparsities", "temporal", "spatial", "singular"]:
+            for key in ["norms", "temporal", "spatial", "singular", "units_overlaps", "sparsity_mask", "unit_overlaps_indices"]:
                 assert d[key] is not None, "If templates are provided, %d should also be there" % key
 
         d["num_templates"] = len(d["templates"])
-
-        if "overlaps" not in d:
-            d["overlaps"] = compute_overlaps(d["templates"], d["num_samples"], d["num_channels"], d["sparsities"])
-
         d["ignored_ids"] = np.array(d["ignored_ids"])
 
         omp_min_sps = d["omp_min_sps"]
@@ -252,7 +280,7 @@ def main_function(cls, traces, d):
         spikes = np.empty(scalar_products.size, dtype=spike_dtype)
         idx_lookup = np.arange(scalar_products.size).reshape(num_templates, -1)
 
-        M = np.zeros((100, 100), dtype=np.float32)
+        M = np.zeros((num_templates, num_templates), dtype=np.float32)
 
         all_selections = np.empty((2, scalar_products.size), dtype=np.int32)
         final_amplitudes = np.zeros(scalar_products.shape, dtype=np.float32)
@@ -273,18 +301,24 @@ def main_function(cls, traces, d):
 
             if num_selection > 0:
                 delta_t = selection[1] - peak_index
-                idx = np.where((delta_t < neighbor_window) & (delta_t > -num_samples))[0]
+                idx = np.where((delta_t < neighbor_window) & (delta_t >= -num_samples))[0]
                 myline = num_samples + delta_t[idx]
+                myindices = selection[0, idx]
 
-                if not best_cluster_ind in cached_overlaps:
-                    cached_overlaps[best_cluster_ind] = overlaps[best_cluster_ind].toarray()
+                local_overlaps = overlaps[best_cluster_ind]
+                overlapping_templates = d['unit_overlaps_indices'][best_cluster_ind]
 
                 if num_selection == M.shape[0]:
                     Z = np.zeros((2 * num_selection, 2 * num_selection), dtype=np.float32)
                     Z[:num_selection, :num_selection] = M
                     M = Z
 
-                M[num_selection, idx] = cached_overlaps[best_cluster_ind][selection[0, idx], myline]
+                mask = np.isin(myindices, overlapping_templates)
+                a, b = myindices[mask], myline[mask]
+
+                table = np.zeros(num_templates, dtype=int)
+                table[overlapping_templates] = np.arange(len(overlapping_templates))
+                M[num_selection, myindices[mask]] = local_overlaps[table[a], b]
 
                 if vicinity == 0:
                     scipy.linalg.solve_triangular(
@@ -346,8 +380,8 @@ def main_function(cls, traces, d):
                 tmp_best, tmp_peak = selection[:, i]
                 diff_amp = diff_amplitudes[i] * norms[tmp_best]
 
-                if not tmp_best in cached_overlaps:
-                    cached_overlaps[tmp_best] = overlaps[tmp_best].toarray()
+                local_overlaps = overlaps[tmp_best]
+                overlapping_templates = d['units_overlaps'][tmp_best]
 
                 if not tmp_peak in neighbors.keys():
                     idx = [max(0, tmp_peak - num_samples), min(num_peaks, tmp_peak + neighbor_window)]
@@ -357,8 +391,8 @@ def main_function(cls, traces, d):
                 idx = neighbors[tmp_peak]["idx"]
                 tdx = neighbors[tmp_peak]["tdx"]
 
-                to_add = diff_amp * cached_overlaps[tmp_best][:, tdx[0] : tdx[1]]
-                scalar_products[:, idx[0] : idx[1]] -= to_add
+                to_add = diff_amp * local_overlaps[:, tdx[0] : tdx[1]]
+                scalar_products[overlapping_templates, idx[0] : idx[1]] -= to_add
 
             is_valid = scalar_products > stop_criteria
 

From 97aff7f6754e7c4d333b95629552fe37151bf24f Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Wed, 27 Sep 2023 11:36:51 +0000
Subject: [PATCH 12/25] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 .../sortingcomponents/matching/circus.py      | 54 ++++++++++---------
 1 file changed, 30 insertions(+), 24 deletions(-)

diff --git a/src/spikeinterface/sortingcomponents/matching/circus.py b/src/spikeinterface/sortingcomponents/matching/circus.py
index e047cbdd31..5924d3bc18 100644
--- a/src/spikeinterface/sortingcomponents/matching/circus.py
+++ b/src/spikeinterface/sortingcomponents/matching/circus.py
@@ -123,20 +123,18 @@ def _prepare_templates(cls, d):
         else:
             sparsity = waveform_extractor.sparsity.mask
 
-        d['sparsity_mask'] = sparsity
-        units_overlaps = np.sum(
-            np.logical_and(sparsity[:, np.newaxis, :], sparsity[np.newaxis, :, :]), axis=2
-        )
-        d['units_overlaps'] = units_overlaps > 0
-        d['unit_overlaps_indices'] = {}
+        d["sparsity_mask"] = sparsity
+        units_overlaps = np.sum(np.logical_and(sparsity[:, np.newaxis, :], sparsity[np.newaxis, :, :]), axis=2)
+        d["units_overlaps"] = units_overlaps > 0
+        d["unit_overlaps_indices"] = {}
         for i in range(num_templates):
-            d['unit_overlaps_indices'][i], = np.nonzero(d['units_overlaps'][i])
+            (d["unit_overlaps_indices"][i],) = np.nonzero(d["units_overlaps"][i])
 
         templates = waveform_extractor.get_all_templates(mode="median").copy()
 
         # First, we set masked channels to 0
         for count in range(num_templates):
-            templates[count][:, ~d['sparsity_mask'][count]] = 0
+            templates[count][:, ~d["sparsity_mask"][count]] = 0
 
         # Then we keep only the strongest components
         rank = d["rank"]
@@ -153,37 +151,37 @@ def _prepare_templates(cls, d):
 
         # And get the norms, saving compressed templates for CC matrix
         for count in range(num_templates):
-            template = templates[count][:, d['sparsity_mask'][count]]
+            template = templates[count][:, d["sparsity_mask"][count]]
             d["norms"][count] = np.linalg.norm(template)
             d["templates"][count] = template / d["norms"][count]
 
         d["temporal"] /= d["norms"][:, np.newaxis, np.newaxis]
         d["temporal"] = np.flip(d["temporal"], axis=1)
 
-        d['overlaps'] = []
+        d["overlaps"] = []
         for i in range(num_templates):
-            num_overlaps = np.sum(d['units_overlaps'][i])
-            overlapping_units = np.where(d['units_overlaps'][i])[0]
+            num_overlaps = np.sum(d["units_overlaps"][i])
+            overlapping_units = np.where(d["units_overlaps"][i])[0]
 
             # Reconstruct unit template from SVD Matrices
-            data = d['temporal'][i] * d['singular'][i][np.newaxis, :]
-            template_i = np.matmul(data, d['spatial'][i, :, :])
+            data = d["temporal"][i] * d["singular"][i][np.newaxis, :]
+            template_i = np.matmul(data, d["spatial"][i, :, :])
             template_i = np.flipud(template_i)
 
-            unit_overlaps = np.zeros([num_overlaps, 2*d['num_samples'] - 1], dtype=np.float32)
+            unit_overlaps = np.zeros([num_overlaps, 2 * d["num_samples"] - 1], dtype=np.float32)
 
             for count, j in enumerate(overlapping_units):
-                overlapped_channels = d['sparsity_mask'][j]
+                overlapped_channels = d["sparsity_mask"][j]
                 visible_i = template_i[:, overlapped_channels]
 
-                spatial_filters = d['spatial'][j, :, overlapped_channels]
+                spatial_filters = d["spatial"][j, :, overlapped_channels]
                 spatially_filtered_template = np.matmul(visible_i, spatial_filters)
-                visible_i = spatially_filtered_template * d['singular'][j]
-                
+                visible_i = spatially_filtered_template * d["singular"][j]
+
                 for rank in range(visible_i.shape[1]):
-                    unit_overlaps[count, :] += np.convolve(visible_i[:, rank], d['temporal'][j][:, rank], mode='full')
+                    unit_overlaps[count, :] += np.convolve(visible_i[:, rank], d["temporal"][j][:, rank], mode="full")
 
-            d['overlaps'].append(unit_overlaps)
+            d["overlaps"].append(unit_overlaps)
 
         d["spatial"] = np.moveaxis(d["spatial"], [0, 1, 2], [1, 0, 2])
         d["temporal"] = np.moveaxis(d["temporal"], [0, 1, 2], [1, 2, 0])
@@ -214,7 +212,15 @@ def initialize_and_check_kwargs(cls, recording, kwargs):
         if "templates" not in d:
             d = cls._prepare_templates(d)
         else:
-            for key in ["norms", "temporal", "spatial", "singular", "units_overlaps", "sparsity_mask", "unit_overlaps_indices"]:
+            for key in [
+                "norms",
+                "temporal",
+                "spatial",
+                "singular",
+                "units_overlaps",
+                "sparsity_mask",
+                "unit_overlaps_indices",
+            ]:
                 assert d[key] is not None, "If templates are provided, %d should also be there" % key
 
         d["num_templates"] = len(d["templates"])
@@ -307,7 +313,7 @@ def main_function(cls, traces, d):
                 myindices = selection[0, idx]
 
                 local_overlaps = overlaps[best_cluster_ind]
-                overlapping_templates = d['unit_overlaps_indices'][best_cluster_ind]
+                overlapping_templates = d["unit_overlaps_indices"][best_cluster_ind]
 
                 if num_selection == M.shape[0]:
                     Z = np.zeros((2 * num_selection, 2 * num_selection), dtype=np.float32)
@@ -382,7 +388,7 @@ def main_function(cls, traces, d):
                 diff_amp = diff_amplitudes[i] * norms[tmp_best]
 
                 local_overlaps = overlaps[tmp_best]
-                overlapping_templates = d['units_overlaps'][tmp_best]
+                overlapping_templates = d["units_overlaps"][tmp_best]
 
                 if not tmp_peak in neighbors.keys():
                     idx = [max(0, tmp_peak - num_samples), min(num_peaks, tmp_peak + neighbor_window)]

From 8da6b79daa95bc4148123e76742607fb82b23fb3 Mon Sep 17 00:00:00 2001
From: Pierre Yger <pierre.yger@gmail.com>
Date: Wed, 27 Sep 2023 13:59:41 +0200
Subject: [PATCH 13/25] Keeping the two matching engines for more tests before
 merging and final decision

---
 .../clustering/clustering_tools.py            |  39 +-
 .../sortingcomponents/matching/circus.py      | 410 +++++++++++++++++-
 .../sortingcomponents/matching/method_list.py |   5 +-
 3 files changed, 436 insertions(+), 18 deletions(-)

diff --git a/src/spikeinterface/sortingcomponents/clustering/clustering_tools.py b/src/spikeinterface/sortingcomponents/clustering/clustering_tools.py
index 032694a47e..455af3ddfd 100644
--- a/src/spikeinterface/sortingcomponents/clustering/clustering_tools.py
+++ b/src/spikeinterface/sortingcomponents/clustering/clustering_tools.py
@@ -539,6 +539,7 @@ def remove_duplicates_via_matching(
     method_kwargs={},
     job_kwargs={},
     tmp_folder=None,
+    method='circus-omp'
 ):
     from spikeinterface.sortingcomponents.matching import find_spikes_from_templates
     from spikeinterface import get_noise_levels
@@ -610,21 +611,31 @@ def remove_duplicates_via_matching(
 
         method_kwargs.update({"ignored_ids": ignore_ids + [i]})
         spikes, computed = find_spikes_from_templates(
-            sub_recording, method="circus-omp", method_kwargs=method_kwargs, extra_outputs=True, **job_kwargs
-        )
-        method_kwargs.update(
-            {
-                "overlaps": computed["overlaps"],
-                "templates": computed["templates"],
-                "norms": computed["norms"],
-                "temporal": computed["temporal"],
-                "spatial": computed["spatial"],
-                "singular": computed["singular"],
-                "units_overlaps": computed["units_overlaps"],
-                "unit_overlaps_indices": computed["unit_overlaps_indices"],
-                "sparsity_mask": computed["sparsity_mask"],
-            }
+            sub_recording, method=method, method_kwargs=method_kwargs, extra_outputs=True, **job_kwargs
         )
+        if method == 'circus-omp-vsd':
+            method_kwargs.update(
+                {
+                    "overlaps": computed["overlaps"],
+                    "templates": computed["templates"],
+                    "norms": computed["norms"],
+                    "temporal": computed["temporal"],
+                    "spatial": computed["spatial"],
+                    "singular": computed["singular"],
+                    "units_overlaps": computed["units_overlaps"],
+                    "unit_overlaps_indices": computed["unit_overlaps_indices"],
+                    "sparsity_mask": computed["sparsity_mask"],
+                }
+            )
+        elif method == 'circus-omp':
+            method_kwargs.update(
+                {
+                    "overlaps": computed["overlaps"],
+                    "templates": computed["templates"],
+                    "norms": computed["norms"],
+                    "sparsities": computed["sparsities"]
+                }
+            )
         valid = (spikes["sample_index"] >= half_marging) * (spikes["sample_index"] < duration + half_marging)
         if np.sum(valid) > 0:
             if np.sum(valid) == 1:
diff --git a/src/spikeinterface/sortingcomponents/matching/circus.py b/src/spikeinterface/sortingcomponents/matching/circus.py
index e047cbdd31..08be0985f1 100644
--- a/src/spikeinterface/sortingcomponents/matching/circus.py
+++ b/src/spikeinterface/sortingcomponents/matching/circus.py
@@ -33,8 +33,100 @@
 
 from .main import BaseTemplateMatchingEngine
 
-#################
-# Circus peeler #
+
+from scipy.fft._helper import _init_nd_shape_and_axes
+
+try:
+    from scipy.signal.signaltools import _init_freq_conv_axes, _apply_conv_mode
+except Exception:
+    from scipy.signal._signaltools import _init_freq_conv_axes, _apply_conv_mode
+from scipy import linalg, fft as sp_fft
+
+
+def get_scipy_shape(in1, in2, mode="full", axes=None, calc_fast_len=True):
+    in1 = np.asarray(in1)
+    in2 = np.asarray(in2)
+
+    if in1.ndim == in2.ndim == 0:  # scalar inputs
+        return in1 * in2
+    elif in1.ndim != in2.ndim:
+        raise ValueError("in1 and in2 should have the same dimensionality")
+    elif in1.size == 0 or in2.size == 0:  # empty arrays
+        return np.array([])
+
+    in1, in2, axes = _init_freq_conv_axes(in1, in2, mode, axes, sorted_axes=False)
+
+    s1 = in1.shape
+    s2 = in2.shape
+
+    shape = [max((s1[i], s2[i])) if i not in axes else s1[i] + s2[i] - 1 for i in range(in1.ndim)]
+
+    if not len(axes):
+        return in1 * in2
+
+    complex_result = in1.dtype.kind == "c" or in2.dtype.kind == "c"
+
+    if calc_fast_len:
+        # Speed up FFT by padding to optimal size.
+        fshape = [sp_fft.next_fast_len(shape[a], not complex_result) for a in axes]
+    else:
+        fshape = shape
+
+    return fshape, axes
+
+
+def fftconvolve_with_cache(in1, in2, cache, mode="full", axes=None):
+    in1 = np.asarray(in1)
+    in2 = np.asarray(in2)
+
+    if in1.ndim == in2.ndim == 0:  # scalar inputs
+        return in1 * in2
+    elif in1.ndim != in2.ndim:
+        raise ValueError("in1 and in2 should have the same dimensionality")
+    elif in1.size == 0 or in2.size == 0:  # empty arrays
+        return np.array([])
+
+    in1, in2, axes = _init_freq_conv_axes(in1, in2, mode, axes, sorted_axes=False)
+
+    s1 = in1.shape
+    s2 = in2.shape
+
+    shape = [max((s1[i], s2[i])) if i not in axes else s1[i] + s2[i] - 1 for i in range(in1.ndim)]
+
+    ret = _freq_domain_conv(in1, in2, axes, shape, cache, calc_fast_len=True)
+
+    return _apply_conv_mode(ret, s1, s2, mode, axes)
+
+
+def _freq_domain_conv(in1, in2, axes, shape, cache, calc_fast_len=True):
+    if not len(axes):
+        return in1 * in2
+
+    complex_result = in1.dtype.kind == "c" or in2.dtype.kind == "c"
+
+    if calc_fast_len:
+        # Speed up FFT by padding to optimal size.
+        fshape = [sp_fft.next_fast_len(shape[a], not complex_result) for a in axes]
+    else:
+        fshape = shape
+
+    if not complex_result:
+        fft, ifft = sp_fft.rfftn, sp_fft.irfftn
+    else:
+        fft, ifft = sp_fft.fftn, sp_fft.ifftn
+
+    sp1 = cache["full"][cache["mask"]]
+    sp2 = cache["template"]
+
+    # sp2 = fft(in2[cache['mask']], fshape, axes=axes)
+    ret = ifft(sp1 * sp2, fshape, axes=axes)
+
+    if calc_fast_len:
+        fslice = tuple([slice(sz) for sz in shape])
+        ret = ret[fslice]
+
+    return ret
+
 
 
 def compute_overlaps(templates, num_samples, num_channels, sparsities):
@@ -101,6 +193,320 @@ class CircusOMPPeeler(BaseTemplateMatchingEngine):
     -----
     """
 
+    _default_params = {
+        "amplitudes": [0.6, 2],
+        "omp_min_sps": 0.1,
+        "waveform_extractor": None,
+        "templates": None,
+        "overlaps": None,
+        "norms": None,
+        "random_chunk_kwargs": {},
+        "noise_levels": None,
+        "sparse_kwargs": {"method": "ptp", "threshold": 1},
+        "ignored_ids": [],
+        "vicinity": 0,
+    }
+
+    @classmethod
+    def _prepare_templates(cls, d):
+        waveform_extractor = d["waveform_extractor"]
+        num_templates = len(d["waveform_extractor"].sorting.unit_ids)
+
+        if not waveform_extractor.is_sparse():
+            sparsity = compute_sparsity(waveform_extractor, **d["sparse_kwargs"]).mask
+        else:
+            sparsity = waveform_extractor.sparsity.mask
+
+        templates = waveform_extractor.get_all_templates(mode="median").copy()
+
+        d["sparsities"] = {}
+        d["templates"] = {}
+        d["norms"] = np.zeros(num_templates, dtype=np.float32)
+
+        for count, unit_id in enumerate(waveform_extractor.sorting.unit_ids):
+            template = templates[count][:, sparsity[count]]
+            (d["sparsities"][count],) = np.nonzero(sparsity[count])
+            d["norms"][count] = np.linalg.norm(template)
+            d["templates"][count] = template / d["norms"][count]
+
+        return d
+
+    @classmethod
+    def initialize_and_check_kwargs(cls, recording, kwargs):
+        d = cls._default_params.copy()
+        d.update(kwargs)
+
+        # assert isinstance(d['waveform_extractor'], WaveformExtractor)
+
+        for v in ["omp_min_sps"]:
+            assert (d[v] >= 0) and (d[v] <= 1), f"{v} should be in [0, 1]"
+
+        d["num_channels"] = d["waveform_extractor"].recording.get_num_channels()
+        d["num_samples"] = d["waveform_extractor"].nsamples
+        d["nbefore"] = d["waveform_extractor"].nbefore
+        d["nafter"] = d["waveform_extractor"].nafter
+        d["sampling_frequency"] = d["waveform_extractor"].recording.get_sampling_frequency()
+        d["vicinity"] *= d["num_samples"]
+
+        if d["noise_levels"] is None:
+            print("CircusOMPPeeler : noise should be computed outside")
+            d["noise_levels"] = get_noise_levels(recording, **d["random_chunk_kwargs"], return_scaled=False)
+
+        if d["templates"] is None:
+            d = cls._prepare_templates(d)
+        else:
+            for key in ["norms", "sparsities"]:
+                assert d[key] is not None, "If templates are provided, %d should also be there" % key
+
+        d["num_templates"] = len(d["templates"])
+
+        if d["overlaps"] is None:
+            d["overlaps"] = compute_overlaps(d["templates"], d["num_samples"], d["num_channels"], d["sparsities"])
+
+        d["ignored_ids"] = np.array(d["ignored_ids"])
+
+        omp_min_sps = d["omp_min_sps"]
+        # nb_active_channels = np.array([len(d['sparsities'][count]) for count in range(d['num_templates'])])
+        d["stop_criteria"] = omp_min_sps * np.sqrt(d["noise_levels"].sum() * d["num_samples"])
+
+        return d
+
+    @classmethod
+    def serialize_method_kwargs(cls, kwargs):
+        kwargs = dict(kwargs)
+        # remove waveform_extractor
+        kwargs.pop("waveform_extractor")
+        return kwargs
+
+    @classmethod
+    def unserialize_in_worker(cls, kwargs):
+        return kwargs
+
+    @classmethod
+    def get_margin(cls, recording, kwargs):
+        margin = 2 * max(kwargs["nbefore"], kwargs["nafter"])
+        return margin
+
+    @classmethod
+    def main_function(cls, traces, d):
+        templates = d["templates"]
+        num_templates = d["num_templates"]
+        num_channels = d["num_channels"]
+        num_samples = d["num_samples"]
+        overlaps = d["overlaps"]
+        norms = d["norms"]
+        nbefore = d["nbefore"]
+        nafter = d["nafter"]
+        omp_tol = np.finfo(np.float32).eps
+        num_samples = d["nafter"] + d["nbefore"]
+        neighbor_window = num_samples - 1
+        min_amplitude, max_amplitude = d["amplitudes"]
+        sparsities = d["sparsities"]
+        ignored_ids = d["ignored_ids"]
+        stop_criteria = d["stop_criteria"]
+        vicinity = d["vicinity"]
+
+        if "cached_fft_kernels" not in d:
+            d["cached_fft_kernels"] = {"fshape": 0}
+
+        cached_fft_kernels = d["cached_fft_kernels"]
+
+        num_timesteps = len(traces)
+
+        num_peaks = num_timesteps - num_samples + 1
+
+        traces = traces.T
+
+        dummy_filter = np.empty((num_channels, num_samples), dtype=np.float32)
+        dummy_traces = np.empty((num_channels, num_timesteps), dtype=np.float32)
+
+        fshape, axes = get_scipy_shape(dummy_filter, traces, axes=1)
+        fft_cache = {"full": sp_fft.rfftn(traces, fshape, axes=axes)}
+
+        scalar_products = np.empty((num_templates, num_peaks), dtype=np.float32)
+
+        flagged_chunk = cached_fft_kernels["fshape"] != fshape[0]
+
+        for i in range(num_templates):
+            if i not in ignored_ids:
+                if i not in cached_fft_kernels or flagged_chunk:
+                    kernel_filter = np.ascontiguousarray(templates[i][::-1].T)
+                    cached_fft_kernels.update({i: sp_fft.rfftn(kernel_filter, fshape, axes=axes)})
+                    cached_fft_kernels["fshape"] = fshape[0]
+
+                fft_cache.update({"mask": sparsities[i], "template": cached_fft_kernels[i]})
+
+                convolution = fftconvolve_with_cache(dummy_filter, dummy_traces, fft_cache, axes=1, mode="valid")
+                if len(convolution) > 0:
+                    scalar_products[i] = convolution.sum(0)
+                else:
+                    scalar_products[i] = 0
+
+        if len(ignored_ids) > 0:
+            scalar_products[ignored_ids] = -np.inf
+
+        num_spikes = 0
+
+        spikes = np.empty(scalar_products.size, dtype=spike_dtype)
+        idx_lookup = np.arange(scalar_products.size).reshape(num_templates, -1)
+
+        M = np.zeros((100, 100), dtype=np.float32)
+
+        all_selections = np.empty((2, scalar_products.size), dtype=np.int32)
+        final_amplitudes = np.zeros(scalar_products.shape, dtype=np.float32)
+        num_selection = 0
+
+        full_sps = scalar_products.copy()
+
+        neighbors = {}
+        cached_overlaps = {}
+
+        is_valid = scalar_products > stop_criteria
+        all_amplitudes = np.zeros(0, dtype=np.float32)
+        is_in_vicinity = np.zeros(0, dtype=np.int32)
+
+        while np.any(is_valid):
+            best_amplitude_ind = scalar_products[is_valid].argmax()
+            best_cluster_ind, peak_index = np.unravel_index(idx_lookup[is_valid][best_amplitude_ind], idx_lookup.shape)
+
+            if num_selection > 0:
+                delta_t = selection[1] - peak_index
+                idx = np.where((delta_t < neighbor_window) & (delta_t > -num_samples))[0]
+                myline = num_samples + delta_t[idx]
+
+                if not best_cluster_ind in cached_overlaps:
+                    cached_overlaps[best_cluster_ind] = overlaps[best_cluster_ind].toarray()
+
+                if num_selection == M.shape[0]:
+                    Z = np.zeros((2 * num_selection, 2 * num_selection), dtype=np.float32)
+                    Z[:num_selection, :num_selection] = M
+                    M = Z
+
+                M[num_selection, idx] = cached_overlaps[best_cluster_ind][selection[0, idx], myline]
+
+                if vicinity == 0:
+                    scipy.linalg.solve_triangular(
+                        M[:num_selection, :num_selection],
+                        M[num_selection, :num_selection],
+                        trans=0,
+                        lower=1,
+                        overwrite_b=True,
+                        check_finite=False,
+                    )
+
+                    v = nrm2(M[num_selection, :num_selection]) ** 2
+                    Lkk = 1 - v
+                    if Lkk <= omp_tol:  # selected atoms are dependent
+                        break
+                    M[num_selection, num_selection] = np.sqrt(Lkk)
+                else:
+                    is_in_vicinity = np.where(np.abs(delta_t) < vicinity)[0]
+
+                    if len(is_in_vicinity) > 0:
+                        L = M[is_in_vicinity, :][:, is_in_vicinity]
+
+                        M[num_selection, is_in_vicinity] = scipy.linalg.solve_triangular(
+                            L, M[num_selection, is_in_vicinity], trans=0, lower=1, overwrite_b=True, check_finite=False
+                        )
+
+                        v = nrm2(M[num_selection, is_in_vicinity]) ** 2
+                        Lkk = 1 - v
+                        if Lkk <= omp_tol:  # selected atoms are dependent
+                            break
+                        M[num_selection, num_selection] = np.sqrt(Lkk)
+                    else:
+                        M[num_selection, num_selection] = 1.0
+            else:
+                M[0, 0] = 1
+
+            all_selections[:, num_selection] = [best_cluster_ind, peak_index]
+            num_selection += 1
+
+            selection = all_selections[:, :num_selection]
+            res_sps = full_sps[selection[0], selection[1]]
+
+            if True:  # vicinity == 0:
+                all_amplitudes, _ = potrs(M[:num_selection, :num_selection], res_sps, lower=True, overwrite_b=False)
+                all_amplitudes /= norms[selection[0]]
+            else:
+                # This is not working, need to figure out why
+                is_in_vicinity = np.append(is_in_vicinity, num_selection - 1)
+                all_amplitudes = np.append(all_amplitudes, np.float32(1))
+                L = M[is_in_vicinity, :][:, is_in_vicinity]
+                all_amplitudes[is_in_vicinity], _ = potrs(L, res_sps[is_in_vicinity], lower=True, overwrite_b=False)
+                all_amplitudes[is_in_vicinity] /= norms[selection[0][is_in_vicinity]]
+
+            diff_amplitudes = all_amplitudes - final_amplitudes[selection[0], selection[1]]
+            modified = np.where(np.abs(diff_amplitudes) > omp_tol)[0]
+            final_amplitudes[selection[0], selection[1]] = all_amplitudes
+
+            for i in modified:
+                tmp_best, tmp_peak = selection[:, i]
+                diff_amp = diff_amplitudes[i] * norms[tmp_best]
+
+                if not tmp_best in cached_overlaps:
+                    cached_overlaps[tmp_best] = overlaps[tmp_best].toarray()
+
+                if not tmp_peak in neighbors.keys():
+                    idx = [max(0, tmp_peak - num_samples), min(num_peaks, tmp_peak + neighbor_window)]
+                    tdx = [num_samples + idx[0] - tmp_peak, num_samples + idx[1] - tmp_peak]
+                    neighbors[tmp_peak] = {"idx": idx, "tdx": tdx}
+
+                idx = neighbors[tmp_peak]["idx"]
+                tdx = neighbors[tmp_peak]["tdx"]
+
+                to_add = diff_amp * cached_overlaps[tmp_best][:, tdx[0] : tdx[1]]
+                scalar_products[:, idx[0] : idx[1]] -= to_add
+
+            is_valid = scalar_products > stop_criteria
+
+        is_valid = (final_amplitudes > min_amplitude) * (final_amplitudes < max_amplitude)
+        valid_indices = np.where(is_valid)
+
+        num_spikes = len(valid_indices[0])
+        spikes["sample_index"][:num_spikes] = valid_indices[1] + d["nbefore"]
+        spikes["channel_index"][:num_spikes] = 0
+        spikes["cluster_index"][:num_spikes] = valid_indices[0]
+        spikes["amplitude"][:num_spikes] = final_amplitudes[valid_indices[0], valid_indices[1]]
+
+        spikes = spikes[:num_spikes]
+        order = np.argsort(spikes["sample_index"])
+        spikes = spikes[order]
+
+        return spikes
+
+class CircusOMPSVDPeeler(BaseTemplateMatchingEngine):
+    """
+    Orthogonal Matching Pursuit inspired from Spyking Circus sorter
+
+    https://elifesciences.org/articles/34518
+
+    This is an Orthogonal Template Matching algorithm. For speed and
+    memory optimization, templates are automatically sparsified. Signal
+    is convolved with the templates, and as long as some scalar products
+    are higher than a given threshold, we use a Cholesky decomposition
+    to compute the optimal amplitudes needed to reconstruct the signal.
+
+    IMPORTANT NOTE: small chunks are more efficient for such Peeler,
+    consider using 100ms chunk
+
+    Parameters
+    ----------
+    amplitude: tuple
+        (Minimal, Maximal) amplitudes allowed for every template
+    omp_min_sps: float
+        Stopping criteria of the OMP algorithm, in percentage of the norm
+    noise_levels: array
+        The noise levels, for every channels. If None, they will be automatically
+        computed
+    random_chunk_kwargs: dict
+        Parameters for computing noise levels, if not provided (sub optimal)
+    sparse_kwargs: dict
+        Parameters to extract a sparsity mask from the waveform_extractor, if not
+        already sparse.
+    -----
+    """
+
     _default_params = {
         "amplitudes": [0.6, 2],
         "omp_min_sps": 0.1,
diff --git a/src/spikeinterface/sortingcomponents/matching/method_list.py b/src/spikeinterface/sortingcomponents/matching/method_list.py
index bedc04a9d5..99c2817338 100644
--- a/src/spikeinterface/sortingcomponents/matching/method_list.py
+++ b/src/spikeinterface/sortingcomponents/matching/method_list.py
@@ -1,6 +1,6 @@
 from .naive import NaiveMatching
 from .tdc import TridesclousPeeler
-from .circus import CircusPeeler, CircusOMPPeeler
+from .circus import CircusPeeler, CircusOMPPeeler, CircusOMPSVDPeeler
 from .wobble import WobbleMatch
 
 matching_methods = {
@@ -8,5 +8,6 @@
     "tridesclous": TridesclousPeeler,
     "circus": CircusPeeler,
     "circus-omp": CircusOMPPeeler,
+    'circus-omp-svd' : CircusOMPSVDPeeler, 
     "wobble": WobbleMatch,
-}
+}
\ No newline at end of file

From a6b4774000159f8db5439072acc8bdec4757d26b Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Wed, 27 Sep 2023 12:00:19 +0000
Subject: [PATCH 14/25] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 .../clustering/clustering_tools.py                 | 14 ++++----------
 .../sortingcomponents/matching/circus.py           |  2 +-
 .../sortingcomponents/matching/method_list.py      |  4 ++--
 3 files changed, 7 insertions(+), 13 deletions(-)

diff --git a/src/spikeinterface/sortingcomponents/clustering/clustering_tools.py b/src/spikeinterface/sortingcomponents/clustering/clustering_tools.py
index 455af3ddfd..17c38e2f8a 100644
--- a/src/spikeinterface/sortingcomponents/clustering/clustering_tools.py
+++ b/src/spikeinterface/sortingcomponents/clustering/clustering_tools.py
@@ -533,13 +533,7 @@ def remove_duplicates(
 
 
 def remove_duplicates_via_matching(
-    waveform_extractor,
-    noise_levels,
-    peak_labels,
-    method_kwargs={},
-    job_kwargs={},
-    tmp_folder=None,
-    method='circus-omp'
+    waveform_extractor, noise_levels, peak_labels, method_kwargs={}, job_kwargs={}, tmp_folder=None, method="circus-omp"
 ):
     from spikeinterface.sortingcomponents.matching import find_spikes_from_templates
     from spikeinterface import get_noise_levels
@@ -613,7 +607,7 @@ def remove_duplicates_via_matching(
         spikes, computed = find_spikes_from_templates(
             sub_recording, method=method, method_kwargs=method_kwargs, extra_outputs=True, **job_kwargs
         )
-        if method == 'circus-omp-vsd':
+        if method == "circus-omp-vsd":
             method_kwargs.update(
                 {
                     "overlaps": computed["overlaps"],
@@ -627,13 +621,13 @@ def remove_duplicates_via_matching(
                     "sparsity_mask": computed["sparsity_mask"],
                 }
             )
-        elif method == 'circus-omp':
+        elif method == "circus-omp":
             method_kwargs.update(
                 {
                     "overlaps": computed["overlaps"],
                     "templates": computed["templates"],
                     "norms": computed["norms"],
-                    "sparsities": computed["sparsities"]
+                    "sparsities": computed["sparsities"],
                 }
             )
         valid = (spikes["sample_index"] >= half_marging) * (spikes["sample_index"] < duration + half_marging)
diff --git a/src/spikeinterface/sortingcomponents/matching/circus.py b/src/spikeinterface/sortingcomponents/matching/circus.py
index e7bdcd161c..502c887ac4 100644
--- a/src/spikeinterface/sortingcomponents/matching/circus.py
+++ b/src/spikeinterface/sortingcomponents/matching/circus.py
@@ -128,7 +128,6 @@ def _freq_domain_conv(in1, in2, axes, shape, cache, calc_fast_len=True):
     return ret
 
 
-
 def compute_overlaps(templates, num_samples, num_channels, sparsities):
     num_templates = len(templates)
 
@@ -475,6 +474,7 @@ def main_function(cls, traces, d):
 
         return spikes
 
+
 class CircusOMPSVDPeeler(BaseTemplateMatchingEngine):
     """
     Orthogonal Matching Pursuit inspired from Spyking Circus sorter
diff --git a/src/spikeinterface/sortingcomponents/matching/method_list.py b/src/spikeinterface/sortingcomponents/matching/method_list.py
index 99c2817338..d982943126 100644
--- a/src/spikeinterface/sortingcomponents/matching/method_list.py
+++ b/src/spikeinterface/sortingcomponents/matching/method_list.py
@@ -8,6 +8,6 @@
     "tridesclous": TridesclousPeeler,
     "circus": CircusPeeler,
     "circus-omp": CircusOMPPeeler,
-    'circus-omp-svd' : CircusOMPSVDPeeler, 
+    "circus-omp-svd": CircusOMPSVDPeeler,
     "wobble": WobbleMatch,
-}
\ No newline at end of file
+}

From 257c74c856254f8ed31365f0629b53baf844fb74 Mon Sep 17 00:00:00 2001
From: Pierre Yger <pierre.yger@gmail.com>
Date: Wed, 27 Sep 2023 15:52:38 +0200
Subject: [PATCH 15/25] Slight misalignement

---
 .../sortingcomponents/matching/circus.py               | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/spikeinterface/sortingcomponents/matching/circus.py b/src/spikeinterface/sortingcomponents/matching/circus.py
index e7bdcd161c..04d780bb6b 100644
--- a/src/spikeinterface/sortingcomponents/matching/circus.py
+++ b/src/spikeinterface/sortingcomponents/matching/circus.py
@@ -714,8 +714,8 @@ def main_function(cls, traces, d):
 
             if num_selection > 0:
                 delta_t = selection[1] - peak_index
-                idx = np.where((delta_t < neighbor_window) & (delta_t >= -num_samples))[0]
-                myline = num_samples + delta_t[idx]
+                idx = np.where((delta_t < num_samples) & (delta_t > -num_samples))[0]
+                myline = neighbor_window + delta_t[idx]
                 myindices = selection[0, idx]
 
                 local_overlaps = overlaps[best_cluster_ind]
@@ -731,7 +731,7 @@ def main_function(cls, traces, d):
 
                 table = np.zeros(num_templates, dtype=int)
                 table[overlapping_templates] = np.arange(len(overlapping_templates))
-                M[num_selection, myindices[mask]] = local_overlaps[table[a], b]
+                M[num_selection, idx[mask]] = local_overlaps[table[a], b]
 
                 if vicinity == 0:
                     scipy.linalg.solve_triangular(
@@ -797,8 +797,8 @@ def main_function(cls, traces, d):
                 overlapping_templates = d["units_overlaps"][tmp_best]
 
                 if not tmp_peak in neighbors.keys():
-                    idx = [max(0, tmp_peak - num_samples), min(num_peaks, tmp_peak + neighbor_window)]
-                    tdx = [num_samples + idx[0] - tmp_peak, num_samples + idx[1] - tmp_peak]
+                    idx = [max(0, tmp_peak - neighbor_window), min(num_peaks, tmp_peak + num_samples)]
+                    tdx = [neighbor_window + idx[0] - tmp_peak, num_samples + idx[1] - tmp_peak - 1]
                     neighbors[tmp_peak] = {"idx": idx, "tdx": tdx}
 
                 idx = neighbors[tmp_peak]["idx"]

From 0a2c0f618b11374558f536147845a1cbc6710661 Mon Sep 17 00:00:00 2001
From: Pierre Yger <pierre.yger@gmail.com>
Date: Wed, 27 Sep 2023 16:01:21 +0200
Subject: [PATCH 16/25] Default SVD Peeler is now good to go

---
 src/spikeinterface/sorters/internal/spyking_circus2.py          | 2 +-
 .../sortingcomponents/clustering/clustering_tools.py            | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/spikeinterface/sorters/internal/spyking_circus2.py b/src/spikeinterface/sorters/internal/spyking_circus2.py
index db3d88f116..7097b9e56b 100644
--- a/src/spikeinterface/sorters/internal/spyking_circus2.py
+++ b/src/spikeinterface/sorters/internal/spyking_circus2.py
@@ -152,7 +152,7 @@ def _run_from_folder(cls, sorter_output_folder, params, verbose):
         matching_job_params["chunk_duration"] = "100ms"
 
         spikes = find_spikes_from_templates(
-            recording_f, method="circus-omp", method_kwargs=matching_params, **matching_job_params
+            recording_f, method="circus-omp-svd", method_kwargs=matching_params, **matching_job_params
         )
 
         if verbose:
diff --git a/src/spikeinterface/sortingcomponents/clustering/clustering_tools.py b/src/spikeinterface/sortingcomponents/clustering/clustering_tools.py
index 17c38e2f8a..273b1402fe 100644
--- a/src/spikeinterface/sortingcomponents/clustering/clustering_tools.py
+++ b/src/spikeinterface/sortingcomponents/clustering/clustering_tools.py
@@ -533,7 +533,7 @@ def remove_duplicates(
 
 
 def remove_duplicates_via_matching(
-    waveform_extractor, noise_levels, peak_labels, method_kwargs={}, job_kwargs={}, tmp_folder=None, method="circus-omp"
+    waveform_extractor, noise_levels, peak_labels, method_kwargs={}, job_kwargs={}, tmp_folder=None, method="circus-omp-svd"
 ):
     from spikeinterface.sortingcomponents.matching import find_spikes_from_templates
     from spikeinterface import get_noise_levels

From 5fbc88d416f863784ee7ed890c45f04726d4dc5a Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Wed, 27 Sep 2023 14:01:43 +0000
Subject: [PATCH 17/25] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 .../sortingcomponents/clustering/clustering_tools.py      | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/src/spikeinterface/sortingcomponents/clustering/clustering_tools.py b/src/spikeinterface/sortingcomponents/clustering/clustering_tools.py
index 273b1402fe..af3a9cb86a 100644
--- a/src/spikeinterface/sortingcomponents/clustering/clustering_tools.py
+++ b/src/spikeinterface/sortingcomponents/clustering/clustering_tools.py
@@ -533,7 +533,13 @@ def remove_duplicates(
 
 
 def remove_duplicates_via_matching(
-    waveform_extractor, noise_levels, peak_labels, method_kwargs={}, job_kwargs={}, tmp_folder=None, method="circus-omp-svd"
+    waveform_extractor,
+    noise_levels,
+    peak_labels,
+    method_kwargs={},
+    job_kwargs={},
+    tmp_folder=None,
+    method="circus-omp-svd",
 ):
     from spikeinterface.sortingcomponents.matching import find_spikes_from_templates
     from spikeinterface import get_noise_levels

From 9f45f2e5757e9f3dcb890a65d69bdecbca8c7eb6 Mon Sep 17 00:00:00 2001
From: Pierre Yger <pierre.yger@gmail.com>
Date: Wed, 27 Sep 2023 17:35:31 +0200
Subject: [PATCH 18/25] Enhance the clustering

---
 .../sorters/internal/spyking_circus2.py       |   2 +-
 .../clustering/random_projections.py          | 106 +++++++++---------
 .../sortingcomponents/features_from_peaks.py  |  27 +++--
 3 files changed, 71 insertions(+), 64 deletions(-)

diff --git a/src/spikeinterface/sorters/internal/spyking_circus2.py b/src/spikeinterface/sorters/internal/spyking_circus2.py
index 7097b9e56b..55a36d26d5 100644
--- a/src/spikeinterface/sorters/internal/spyking_circus2.py
+++ b/src/spikeinterface/sorters/internal/spyking_circus2.py
@@ -20,7 +20,7 @@ class Spykingcircus2Sorter(ComponentsBasedSorter):
     sorter_name = "spykingcircus2"
 
     _default_params = {
-        "general": {"ms_before": 2, "ms_after": 2, "radius_um": 75},
+        "general": {"ms_before": 2, "ms_after": 2, "radius_um": 100},
         "waveforms": {"max_spikes_per_unit": 200, "overwrite": True, "sparse": True, "method": "ptp", "threshold": 1},
         "filtering": {"dtype": "float32"},
         "detection": {"peak_sign": "neg", "detect_threshold": 5},
diff --git a/src/spikeinterface/sortingcomponents/clustering/random_projections.py b/src/spikeinterface/sortingcomponents/clustering/random_projections.py
index be8ecd6702..8c0cab07c6 100644
--- a/src/spikeinterface/sortingcomponents/clustering/random_projections.py
+++ b/src/spikeinterface/sortingcomponents/clustering/random_projections.py
@@ -18,7 +18,9 @@
 from .clustering_tools import remove_duplicates, remove_duplicates_via_matching, remove_duplicates_via_dip
 from spikeinterface.core import NumpySorting
 from spikeinterface.core import extract_waveforms
-from spikeinterface.sortingcomponents.features_from_peaks import compute_features_from_peaks, EnergyFeature
+from spikeinterface.sortingcomponents.waveforms.savgol_denoiser import SavGolDenoiser
+from spikeinterface.sortingcomponents.features_from_peaks import RandomProjectionsFeature
+from spikeinterface.core.node_pipeline import run_node_pipeline, ExtractDenseWaveforms, PeakRetriever
 
 
 class RandomProjectionClustering:
@@ -34,17 +36,17 @@ class RandomProjectionClustering:
             "cluster_selection_method": "leaf",
         },
         "cleaning_kwargs": {},
+        "waveforms" : {"ms_before" : 2, "ms_after" : 2, "max_spikes_per_unit": 100},
         "radius_um": 100,
-        "max_spikes_per_unit": 200,
         "selection_method": "closest_to_centroid",
-        "nb_projections": {"ptp": 8, "energy": 2},
-        "ms_before": 1.5,
-        "ms_after": 1.5,
+        "nb_projections": 10,
+        "ms_before": 1,
+        "ms_after": 1,
         "random_seed": 42,
-        "shared_memory": False,
-        "min_values": {"ptp": 0, "energy": 0},
+        "smoothing_kwargs" : {"window_length_ms" : 1},
+        "shared_memory": True,
         "tmp_folder": None,
-        "job_kwargs": {"n_jobs": os.cpu_count(), "chunk_memory": "10M", "verbose": True, "progress_bar": True},
+        "job_kwargs": {"n_jobs": os.cpu_count(), "chunk_memory": "100M", "verbose": True, "progress_bar": True},
     }
 
     @classmethod
@@ -74,50 +76,52 @@ def main_function(cls, recording, peaks, params):
 
         np.random.seed(d["random_seed"])
 
-        features_params = {}
-        features_list = []
-
-        noise_snippets = None
-
-        for proj_type in ["ptp", "energy"]:
-            if d["nb_projections"][proj_type] > 0:
-                features_list += [f"random_projections_{proj_type}"]
-
-                if d["min_values"][proj_type] == "auto":
-                    if noise_snippets is None:
-                        num_segments = recording.get_num_segments()
-                        num_chunks = 3 * d["max_spikes_per_unit"] // num_segments
-                        noise_snippets = get_random_data_chunks(
-                            recording, num_chunks_per_segment=num_chunks, chunk_size=num_samples, seed=42
-                        )
-                        noise_snippets = noise_snippets.reshape(num_chunks, num_samples, num_chans)
-
-                    if proj_type == "energy":
-                        data = np.linalg.norm(noise_snippets, axis=1)
-                        min_values = np.median(data, axis=0)
-                    elif proj_type == "ptp":
-                        data = np.ptp(noise_snippets, axis=1)
-                        min_values = np.median(data, axis=0)
-                elif d["min_values"][proj_type] > 0:
-                    min_values = d["min_values"][proj_type]
-                else:
-                    min_values = None
-
-                projections = np.random.randn(num_chans, d["nb_projections"][proj_type])
-                features_params[f"random_projections_{proj_type}"] = {
-                    "radius_um": params["radius_um"],
-                    "projections": projections,
-                    "min_values": min_values,
-                }
-
-        features_data = compute_features_from_peaks(
-            recording, peaks, features_list, features_params, ms_before=1, ms_after=1, **params["job_kwargs"]
+        if params["tmp_folder"] is None:
+            name = "".join(random.choices(string.ascii_uppercase + string.digits, k=8))
+            tmp_folder = get_global_tmp_folder() / name
+        else:
+            tmp_folder = Path(params["tmp_folder"]).absolute()
+
+        ### Then we extract the SVD features
+        node0 = PeakRetriever(recording, peaks)
+        node1 = ExtractDenseWaveforms(recording, parents=[node0], return_output=False,
+                                    ms_before=params['ms_before'],
+                                    ms_after=params['ms_after']
         )
 
-        if len(features_data) > 1:
-            hdbscan_data = np.hstack((features_data[0], features_data[1]))
-        else:
-            hdbscan_data = features_data[0]
+        node2 = SavGolDenoiser(recording, parents=[node0, node1], return_output=False, **params['smoothing_kwargs'])
+
+        projections = np.random.randn(num_chans, d["nb_projections"])
+        projections -= projections.mean(0)
+        projections /= projections.std(0)
+
+        nbefore = int(params['ms_before'] * fs / 1000)
+        nafter = int(params['ms_after'] * fs / 1000)
+        nsamples = nbefore + nafter
+
+        import scipy
+        x = np.random.randn(100, nsamples, num_chans).astype(np.float32)
+        x = scipy.signal.savgol_filter(x, node2.window_length, node2.order, axis=1)
+
+        ptps = np.ptp(x, axis=1)
+        a, b = np.histogram(ptps.flatten(), np.linspace(0, 100, 1000))
+        ydata = np.cumsum(a)/a.sum()
+        xdata = b[1:]
+
+        from scipy.optimize import curve_fit
+        def sigmoid(x, L ,x0, k, b):
+            y = L / (1 + np.exp(-k*(x-x0))) + b
+            return (y)
+
+        p0 = [max(ydata), np.median(xdata), 1, min(ydata)] # this is an mandatory initial guess
+        popt, pcov = curve_fit(sigmoid, xdata, ydata, p0)
+
+        node3 = RandomProjectionsFeature(recording, parents=[node0, node2], return_output=True,
+                                    projections=projections, radius_um=params['radius_um'])
+
+        pipeline_nodes = [node0, node1, node2, node3]
+
+        hdbscan_data = run_node_pipeline(recording, pipeline_nodes, params["job_kwargs"])
 
         import sklearn
 
@@ -132,7 +136,7 @@ def main_function(cls, recording, peaks, params):
 
         all_indices = np.arange(0, peak_labels.size)
 
-        max_spikes = params["max_spikes_per_unit"]
+        max_spikes = params['waveforms']["max_spikes_per_unit"]
         selection_method = params["selection_method"]
 
         for unit_ind in labels:
diff --git a/src/spikeinterface/sortingcomponents/features_from_peaks.py b/src/spikeinterface/sortingcomponents/features_from_peaks.py
index bd82ffa0a6..2f1acb6a19 100644
--- a/src/spikeinterface/sortingcomponents/features_from_peaks.py
+++ b/src/spikeinterface/sortingcomponents/features_from_peaks.py
@@ -184,41 +184,44 @@ def __init__(
         return_output=True,
         parents=None,
         projections=None,
-        radius_um=150.0,
-        min_values=None,
+        sigmoid=None,
+        radius_um=None
     ):
         PipelineNode.__init__(self, recording, return_output=return_output, parents=parents)
 
         self.projections = projections
-        self.radius_um = radius_um
-        self.min_values = min_values
-
+        self.sigmoid = sigmoid
         self.contact_locations = recording.get_channel_locations()
         self.channel_distance = get_channel_distances(recording)
         self.neighbours_mask = self.channel_distance < radius_um
-
-        self._kwargs.update(dict(projections=projections, radius_um=radius_um, min_values=min_values))
-
+        self.radius_um = radius_um
+        self._kwargs.update(dict(projections=projections, sigmoid=sigmoid, radius_um=radius_um))
         self._dtype = recording.get_dtype()
 
     def get_dtype(self):
         return self._dtype
 
+    def _sigmoid(self, x):
+        L, x0, k, b = self.sigmoid
+        y = L / (1 + np.exp(-k*(x-x0))) + b
+        return y
+
     def compute(self, traces, peaks, waveforms):
         all_projections = np.zeros((peaks.size, self.projections.shape[1]), dtype=self._dtype)
+        
         for main_chan in np.unique(peaks["channel_index"]):
             (idx,) = np.nonzero(peaks["channel_index"] == main_chan)
             (chan_inds,) = np.nonzero(self.neighbours_mask[main_chan])
             local_projections = self.projections[chan_inds, :]
-            wf_ptp = (waveforms[idx][:, :, chan_inds]).ptp(axis=1)
+            wf_ptp = np.ptp(waveforms[idx][:, :, chan_inds], axis=1)
 
-            if self.min_values is not None:
-                wf_ptp = (wf_ptp / self.min_values[chan_inds]) ** 4
+            if self.sigmoid is not None:
+                wf_ptp *= self._sigmoid(wf_ptp)
 
             denom = np.sum(wf_ptp, axis=1)
             mask = denom != 0
-
             all_projections[idx[mask]] = np.dot(wf_ptp[mask], local_projections) / (denom[mask][:, np.newaxis])
+        
         return all_projections
 
 

From 3cbf8f8fc8267ff0bffd8c340514db983e059a0c Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Wed, 27 Sep 2023 15:36:51 +0000
Subject: [PATCH 19/25] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 .../clustering/random_projections.py          | 38 +++++++++++--------
 .../sortingcomponents/features_from_peaks.py  |  8 ++--
 2 files changed, 26 insertions(+), 20 deletions(-)

diff --git a/src/spikeinterface/sortingcomponents/clustering/random_projections.py b/src/spikeinterface/sortingcomponents/clustering/random_projections.py
index 8c0cab07c6..f8cad2cf3f 100644
--- a/src/spikeinterface/sortingcomponents/clustering/random_projections.py
+++ b/src/spikeinterface/sortingcomponents/clustering/random_projections.py
@@ -36,14 +36,14 @@ class RandomProjectionClustering:
             "cluster_selection_method": "leaf",
         },
         "cleaning_kwargs": {},
-        "waveforms" : {"ms_before" : 2, "ms_after" : 2, "max_spikes_per_unit": 100},
+        "waveforms": {"ms_before": 2, "ms_after": 2, "max_spikes_per_unit": 100},
         "radius_um": 100,
         "selection_method": "closest_to_centroid",
         "nb_projections": 10,
         "ms_before": 1,
         "ms_after": 1,
         "random_seed": 42,
-        "smoothing_kwargs" : {"window_length_ms" : 1},
+        "smoothing_kwargs": {"window_length_ms": 1},
         "shared_memory": True,
         "tmp_folder": None,
         "job_kwargs": {"n_jobs": os.cpu_count(), "chunk_memory": "100M", "verbose": True, "progress_bar": True},
@@ -84,40 +84,46 @@ def main_function(cls, recording, peaks, params):
 
         ### Then we extract the SVD features
         node0 = PeakRetriever(recording, peaks)
-        node1 = ExtractDenseWaveforms(recording, parents=[node0], return_output=False,
-                                    ms_before=params['ms_before'],
-                                    ms_after=params['ms_after']
+        node1 = ExtractDenseWaveforms(
+            recording, parents=[node0], return_output=False, ms_before=params["ms_before"], ms_after=params["ms_after"]
         )
 
-        node2 = SavGolDenoiser(recording, parents=[node0, node1], return_output=False, **params['smoothing_kwargs'])
+        node2 = SavGolDenoiser(recording, parents=[node0, node1], return_output=False, **params["smoothing_kwargs"])
 
         projections = np.random.randn(num_chans, d["nb_projections"])
         projections -= projections.mean(0)
         projections /= projections.std(0)
 
-        nbefore = int(params['ms_before'] * fs / 1000)
-        nafter = int(params['ms_after'] * fs / 1000)
+        nbefore = int(params["ms_before"] * fs / 1000)
+        nafter = int(params["ms_after"] * fs / 1000)
         nsamples = nbefore + nafter
 
         import scipy
+
         x = np.random.randn(100, nsamples, num_chans).astype(np.float32)
         x = scipy.signal.savgol_filter(x, node2.window_length, node2.order, axis=1)
 
         ptps = np.ptp(x, axis=1)
         a, b = np.histogram(ptps.flatten(), np.linspace(0, 100, 1000))
-        ydata = np.cumsum(a)/a.sum()
+        ydata = np.cumsum(a) / a.sum()
         xdata = b[1:]
 
         from scipy.optimize import curve_fit
-        def sigmoid(x, L ,x0, k, b):
-            y = L / (1 + np.exp(-k*(x-x0))) + b
-            return (y)
 
-        p0 = [max(ydata), np.median(xdata), 1, min(ydata)] # this is an mandatory initial guess
+        def sigmoid(x, L, x0, k, b):
+            y = L / (1 + np.exp(-k * (x - x0))) + b
+            return y
+
+        p0 = [max(ydata), np.median(xdata), 1, min(ydata)]  # this is an mandatory initial guess
         popt, pcov = curve_fit(sigmoid, xdata, ydata, p0)
 
-        node3 = RandomProjectionsFeature(recording, parents=[node0, node2], return_output=True,
-                                    projections=projections, radius_um=params['radius_um'])
+        node3 = RandomProjectionsFeature(
+            recording,
+            parents=[node0, node2],
+            return_output=True,
+            projections=projections,
+            radius_um=params["radius_um"],
+        )
 
         pipeline_nodes = [node0, node1, node2, node3]
 
@@ -136,7 +142,7 @@ def sigmoid(x, L ,x0, k, b):
 
         all_indices = np.arange(0, peak_labels.size)
 
-        max_spikes = params['waveforms']["max_spikes_per_unit"]
+        max_spikes = params["waveforms"]["max_spikes_per_unit"]
         selection_method = params["selection_method"]
 
         for unit_ind in labels:
diff --git a/src/spikeinterface/sortingcomponents/features_from_peaks.py b/src/spikeinterface/sortingcomponents/features_from_peaks.py
index 2f1acb6a19..b534c2356d 100644
--- a/src/spikeinterface/sortingcomponents/features_from_peaks.py
+++ b/src/spikeinterface/sortingcomponents/features_from_peaks.py
@@ -185,7 +185,7 @@ def __init__(
         parents=None,
         projections=None,
         sigmoid=None,
-        radius_um=None
+        radius_um=None,
     ):
         PipelineNode.__init__(self, recording, return_output=return_output, parents=parents)
 
@@ -203,12 +203,12 @@ def get_dtype(self):
 
     def _sigmoid(self, x):
         L, x0, k, b = self.sigmoid
-        y = L / (1 + np.exp(-k*(x-x0))) + b
+        y = L / (1 + np.exp(-k * (x - x0))) + b
         return y
 
     def compute(self, traces, peaks, waveforms):
         all_projections = np.zeros((peaks.size, self.projections.shape[1]), dtype=self._dtype)
-        
+
         for main_chan in np.unique(peaks["channel_index"]):
             (idx,) = np.nonzero(peaks["channel_index"] == main_chan)
             (chan_inds,) = np.nonzero(self.neighbours_mask[main_chan])
@@ -221,7 +221,7 @@ def compute(self, traces, peaks, waveforms):
             denom = np.sum(wf_ptp, axis=1)
             mask = denom != 0
             all_projections[idx[mask]] = np.dot(wf_ptp[mask], local_projections) / (denom[mask][:, np.newaxis])
-        
+
         return all_projections
 
 

From daddd8cef722a35233dbed530e14775de87b8caa Mon Sep 17 00:00:00 2001
From: Pierre Yger <pierre.yger@gmail.com>
Date: Thu, 28 Sep 2023 09:16:51 +0200
Subject: [PATCH 20/25] Adding a lookup table

---
 .../sortingcomponents/matching/circus.py             | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/src/spikeinterface/sortingcomponents/matching/circus.py b/src/spikeinterface/sortingcomponents/matching/circus.py
index 5775589321..1d13eca1df 100644
--- a/src/spikeinterface/sortingcomponents/matching/circus.py
+++ b/src/spikeinterface/sortingcomponents/matching/circus.py
@@ -128,6 +128,7 @@ def _freq_domain_conv(in1, in2, axes, shape, cache, calc_fast_len=True):
     return ret
 
 
+
 def compute_overlaps(templates, num_samples, num_channels, sparsities):
     num_templates = len(templates)
 
@@ -474,7 +475,6 @@ def main_function(cls, traces, d):
 
         return spikes
 
-
 class CircusOMPSVDPeeler(BaseTemplateMatchingEngine):
     """
     Orthogonal Matching Pursuit inspired from Spyking Circus sorter
@@ -632,6 +632,12 @@ def initialize_and_check_kwargs(cls, recording, kwargs):
         d["num_templates"] = len(d["templates"])
         d["ignored_ids"] = np.array(d["ignored_ids"])
 
+        d["unit_overlaps_tables"] = {}
+        for i in range(d["num_templates"]):
+            d["unit_overlaps_tables"][i] = np.zeros(d["num_templates"], dtype=int)
+            d["unit_overlaps_tables"][i][d["unit_overlaps_indices"][i]] = np.arange(len(d["unit_overlaps_indices"][i]))
+
+
         omp_min_sps = d["omp_min_sps"]
         # d["stop_criteria"] = omp_min_sps * np.sqrt(d["noise_levels"].sum() * d["num_samples"])
         d["stop_criteria"] = omp_min_sps * np.maximum(d["norms"], np.sqrt(d["noise_levels"].sum() * d["num_samples"]))
@@ -720,6 +726,7 @@ def main_function(cls, traces, d):
 
                 local_overlaps = overlaps[best_cluster_ind]
                 overlapping_templates = d["unit_overlaps_indices"][best_cluster_ind]
+                table = d["unit_overlaps_tables"][best_cluster_ind]
 
                 if num_selection == M.shape[0]:
                     Z = np.zeros((2 * num_selection, 2 * num_selection), dtype=np.float32)
@@ -728,9 +735,6 @@ def main_function(cls, traces, d):
 
                 mask = np.isin(myindices, overlapping_templates)
                 a, b = myindices[mask], myline[mask]
-
-                table = np.zeros(num_templates, dtype=int)
-                table[overlapping_templates] = np.arange(len(overlapping_templates))
                 M[num_selection, idx[mask]] = local_overlaps[table[a], b]
 
                 if vicinity == 0:

From d7dcbe05f082f5ecd93d9233b9f5ca30ae51a8f4 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Thu, 28 Sep 2023 07:17:14 +0000
Subject: [PATCH 21/25] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 src/spikeinterface/sortingcomponents/matching/circus.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/spikeinterface/sortingcomponents/matching/circus.py b/src/spikeinterface/sortingcomponents/matching/circus.py
index 1d13eca1df..44c394aec9 100644
--- a/src/spikeinterface/sortingcomponents/matching/circus.py
+++ b/src/spikeinterface/sortingcomponents/matching/circus.py
@@ -128,7 +128,6 @@ def _freq_domain_conv(in1, in2, axes, shape, cache, calc_fast_len=True):
     return ret
 
 
-
 def compute_overlaps(templates, num_samples, num_channels, sparsities):
     num_templates = len(templates)
 
@@ -475,6 +474,7 @@ def main_function(cls, traces, d):
 
         return spikes
 
+
 class CircusOMPSVDPeeler(BaseTemplateMatchingEngine):
     """
     Orthogonal Matching Pursuit inspired from Spyking Circus sorter
@@ -637,7 +637,6 @@ def initialize_and_check_kwargs(cls, recording, kwargs):
             d["unit_overlaps_tables"][i] = np.zeros(d["num_templates"], dtype=int)
             d["unit_overlaps_tables"][i][d["unit_overlaps_indices"][i]] = np.arange(len(d["unit_overlaps_indices"][i]))
 
-
         omp_min_sps = d["omp_min_sps"]
         # d["stop_criteria"] = omp_min_sps * np.sqrt(d["noise_levels"].sum() * d["num_samples"])
         d["stop_criteria"] = omp_min_sps * np.maximum(d["norms"], np.sqrt(d["noise_levels"].sum() * d["num_samples"]))

From d623da38f38924b9c5857abdeccf16891c729bc7 Mon Sep 17 00:00:00 2001
From: Pierre Yger <pierre.yger@gmail.com>
Date: Thu, 28 Sep 2023 10:21:08 +0200
Subject: [PATCH 22/25] typos for cleaning via matching

---
 .../clustering/clustering_tools.py                |  2 +-
 .../clustering/random_projections.py              |  2 +-
 .../sortingcomponents/matching/circus.py          | 15 ++++++++++-----
 3 files changed, 12 insertions(+), 7 deletions(-)

diff --git a/src/spikeinterface/sortingcomponents/clustering/clustering_tools.py b/src/spikeinterface/sortingcomponents/clustering/clustering_tools.py
index af3a9cb86a..28a1a63065 100644
--- a/src/spikeinterface/sortingcomponents/clustering/clustering_tools.py
+++ b/src/spikeinterface/sortingcomponents/clustering/clustering_tools.py
@@ -613,7 +613,7 @@ def remove_duplicates_via_matching(
         spikes, computed = find_spikes_from_templates(
             sub_recording, method=method, method_kwargs=method_kwargs, extra_outputs=True, **job_kwargs
         )
-        if method == "circus-omp-vsd":
+        if method == "circus-omp-svd":
             method_kwargs.update(
                 {
                     "overlaps": computed["overlaps"],
diff --git a/src/spikeinterface/sortingcomponents/clustering/random_projections.py b/src/spikeinterface/sortingcomponents/clustering/random_projections.py
index f8cad2cf3f..df9290a1f5 100644
--- a/src/spikeinterface/sortingcomponents/clustering/random_projections.py
+++ b/src/spikeinterface/sortingcomponents/clustering/random_projections.py
@@ -127,7 +127,7 @@ def sigmoid(x, L, x0, k, b):
 
         pipeline_nodes = [node0, node1, node2, node3]
 
-        hdbscan_data = run_node_pipeline(recording, pipeline_nodes, params["job_kwargs"])
+        hdbscan_data = run_node_pipeline(recording, pipeline_nodes, params["job_kwargs"], job_name="extracting features")
 
         import sklearn
 
diff --git a/src/spikeinterface/sortingcomponents/matching/circus.py b/src/spikeinterface/sortingcomponents/matching/circus.py
index 1d13eca1df..9e02aa4ff6 100644
--- a/src/spikeinterface/sortingcomponents/matching/circus.py
+++ b/src/spikeinterface/sortingcomponents/matching/circus.py
@@ -686,13 +686,18 @@ def main_function(cls, traces, d):
         scalar_products = np.zeros(conv_shape, dtype=np.float32)
 
         # Filter using overlap-and-add convolution
-        spatially_filtered_data = np.matmul(d["spatial"], traces.T[np.newaxis, :, :])
-        scaled_filtered_data = spatially_filtered_data * d["singular"]
-        objective_by_rank = scipy.signal.oaconvolve(scaled_filtered_data, d["temporal"], axes=2, mode="valid")
-        scalar_products += np.sum(objective_by_rank, axis=0)
-
         if len(ignored_ids) > 0:
+            mask = ~np.isin(np.arange(num_templates), ignored_ids)
+            spatially_filtered_data =  np.matmul(d["spatial"][:, mask, :], traces.T[np.newaxis, :, :])
+            scaled_filtered_data = spatially_filtered_data * d["singular"][:, mask, :]
+            objective_by_rank = scipy.signal.oaconvolve(scaled_filtered_data, d["temporal"][:, mask, :], axes=2, mode="valid")
+            scalar_products[mask] += np.sum(objective_by_rank, axis=0)
             scalar_products[ignored_ids] = -np.inf
+        else:
+            spatially_filtered_data = np.matmul(d["spatial"], traces.T[np.newaxis, :, :])
+            scaled_filtered_data = spatially_filtered_data * d["singular"]
+            objective_by_rank = scipy.signal.oaconvolve(scaled_filtered_data, d["temporal"], axes=2, mode="valid")
+            scalar_products += np.sum(objective_by_rank, axis=0)        
 
         num_spikes = 0
 

From fdb84668137ba71b1ca36787032551da52764842 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Thu, 28 Sep 2023 08:21:36 +0000
Subject: [PATCH 23/25] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 .../sortingcomponents/clustering/random_projections.py    | 4 +++-
 src/spikeinterface/sortingcomponents/matching/circus.py   | 8 +++++---
 2 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/src/spikeinterface/sortingcomponents/clustering/random_projections.py b/src/spikeinterface/sortingcomponents/clustering/random_projections.py
index df9290a1f5..864548e7d4 100644
--- a/src/spikeinterface/sortingcomponents/clustering/random_projections.py
+++ b/src/spikeinterface/sortingcomponents/clustering/random_projections.py
@@ -127,7 +127,9 @@ def sigmoid(x, L, x0, k, b):
 
         pipeline_nodes = [node0, node1, node2, node3]
 
-        hdbscan_data = run_node_pipeline(recording, pipeline_nodes, params["job_kwargs"], job_name="extracting features")
+        hdbscan_data = run_node_pipeline(
+            recording, pipeline_nodes, params["job_kwargs"], job_name="extracting features"
+        )
 
         import sklearn
 
diff --git a/src/spikeinterface/sortingcomponents/matching/circus.py b/src/spikeinterface/sortingcomponents/matching/circus.py
index b963447ba2..358691cd25 100644
--- a/src/spikeinterface/sortingcomponents/matching/circus.py
+++ b/src/spikeinterface/sortingcomponents/matching/circus.py
@@ -687,16 +687,18 @@ def main_function(cls, traces, d):
         # Filter using overlap-and-add convolution
         if len(ignored_ids) > 0:
             mask = ~np.isin(np.arange(num_templates), ignored_ids)
-            spatially_filtered_data =  np.matmul(d["spatial"][:, mask, :], traces.T[np.newaxis, :, :])
+            spatially_filtered_data = np.matmul(d["spatial"][:, mask, :], traces.T[np.newaxis, :, :])
             scaled_filtered_data = spatially_filtered_data * d["singular"][:, mask, :]
-            objective_by_rank = scipy.signal.oaconvolve(scaled_filtered_data, d["temporal"][:, mask, :], axes=2, mode="valid")
+            objective_by_rank = scipy.signal.oaconvolve(
+                scaled_filtered_data, d["temporal"][:, mask, :], axes=2, mode="valid"
+            )
             scalar_products[mask] += np.sum(objective_by_rank, axis=0)
             scalar_products[ignored_ids] = -np.inf
         else:
             spatially_filtered_data = np.matmul(d["spatial"], traces.T[np.newaxis, :, :])
             scaled_filtered_data = spatially_filtered_data * d["singular"]
             objective_by_rank = scipy.signal.oaconvolve(scaled_filtered_data, d["temporal"], axes=2, mode="valid")
-            scalar_products += np.sum(objective_by_rank, axis=0)        
+            scalar_products += np.sum(objective_by_rank, axis=0)
 
         num_spikes = 0
 

From 986fe6f50fd33a81fd3bc8ff26e05db22964bf5d Mon Sep 17 00:00:00 2001
From: Alessio Buccino <alejoe9187@gmail.com>
Date: Thu, 28 Sep 2023 13:15:14 +0200
Subject: [PATCH 24/25] CellExplorer: fix tests and deprecation (#2048)

* CellExplorer: fix tests and deprecation

* Drop session_info_matfile_path from __init__
---
 .../cellexplorersortingextractor.py           | 21 -------------------
 .../tests/test_cellexplorerextractor.py       |  2 +-
 2 files changed, 1 insertion(+), 22 deletions(-)

diff --git a/src/spikeinterface/extractors/cellexplorersortingextractor.py b/src/spikeinterface/extractors/cellexplorersortingextractor.py
index b40b998103..31241a4147 100644
--- a/src/spikeinterface/extractors/cellexplorersortingextractor.py
+++ b/src/spikeinterface/extractors/cellexplorersortingextractor.py
@@ -40,7 +40,6 @@ def __init__(
         sampling_frequency: float | None = None,
         session_info_file_path: str | Path | None = None,
         spikes_matfile_path: str | Path | None = None,
-        session_info_matfile_path: str | Path | None = None,
     ):
         try:
             from pymatreader import read_mat
@@ -67,26 +66,6 @@ def __init__(
                 )
             file_path = spikes_matfile_path if file_path is None else file_path
 
-        if session_info_matfile_path is not None:
-            # Raise an error if the warning period has expired
-            deprecation_issued = datetime.datetime(2023, 4, 1)
-            deprecation_deadline = deprecation_issued + datetime.timedelta(days=180)
-            if datetime.datetime.now() > deprecation_deadline:
-                raise ValueError(
-                    "The session_info_matfile_path argument is no longer supported in. Use session_info_file_path instead."
-                )
-
-            # Otherwise, issue a DeprecationWarning
-            else:
-                warnings.warn(
-                    "The session_info_matfile_path argument is deprecated and will be removed in six months. "
-                    "Use session_info_file_path instead.",
-                    DeprecationWarning,
-                )
-            session_info_file_path = (
-                session_info_matfile_path if session_info_file_path is None else session_info_file_path
-            )
-
         self.spikes_cellinfo_path = Path(file_path)
         self.session_path = self.spikes_cellinfo_path.parent
         self.session_id = self.spikes_cellinfo_path.stem.split(".")[0]
diff --git a/src/spikeinterface/extractors/tests/test_cellexplorerextractor.py b/src/spikeinterface/extractors/tests/test_cellexplorerextractor.py
index 35de8a23e2..c4c8d0c993 100644
--- a/src/spikeinterface/extractors/tests/test_cellexplorerextractor.py
+++ b/src/spikeinterface/extractors/tests/test_cellexplorerextractor.py
@@ -26,7 +26,7 @@ class CellExplorerSortingTest(SortingCommonTestSuite, unittest.TestCase):
         (
             "cellexplorer/dataset_2/20170504_396um_0um_merge.spikes.cellinfo.mat",
             {
-                "session_info_matfile_path": local_folder
+                "session_info_file_path": local_folder
                 / "cellexplorer/dataset_2/20170504_396um_0um_merge.sessionInfo.mat"
             },
         ),

From 719ffc9466f2f5f91ed14129fd514379a4c5962f Mon Sep 17 00:00:00 2001
From: Heberto Mayorquin <h.mayorquin@gmail.com>
Date: Thu, 28 Sep 2023 13:15:39 +0200
Subject: [PATCH 25/25] minor corrections to matlab documentation (#2047)

---
 doc/how_to/load_matlab_data.rst | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/doc/how_to/load_matlab_data.rst b/doc/how_to/load_matlab_data.rst
index aaca718096..e12d83810a 100644
--- a/doc/how_to/load_matlab_data.rst
+++ b/doc/how_to/load_matlab_data.rst
@@ -30,7 +30,7 @@ Here, we present a MATLAB code that creates a random dataset and writes it to a
 Loading Data in SpikeInterface
 ------------------------------
 
-After executing the above MATLAB code, a binary file named `your_data_as_a_binary.bin` will be created in your MATLAB directory. To load this file in Python, you'll need its full path.
+After executing the above MATLAB code, a binary file named :code:`your_data_as_a_binary.bin` will be created in your MATLAB directory. To load this file in Python, you'll need its full path.
 
 Use the following Python script to load the binary data into SpikeInterface:
 
@@ -55,7 +55,7 @@ Use the following Python script to load the binary data into SpikeInterface:
 
    # Load data using SpikeInterface
    recording = si.read_binary(file_path, sampling_frequency=sampling_frequency,
-                                        num_channels=num_channels, dtype=dtype)
+                              num_channels=num_channels, dtype=dtype)
 
    # Confirm that the data was loaded correctly by comparing the data shapes and see they match the MATLAB data
    print(recording.get_num_frames(), recording.get_num_channels())
@@ -65,18 +65,18 @@ Follow the steps above to seamlessly import your MATLAB data into SpikeInterface
 Common Pitfalls & Tips
 ----------------------
 
-1. **Data Shape**: Make sure your MATLAB data matrix's first dimension is samples/time and the second is channels. If your time is in the second dimension, use `time_axis=1` in `si.read_binary()`.
+1. **Data Shape**: Make sure your MATLAB data matrix's first dimension is samples/time and the second is channels. If your time is in the second dimension, use :code:`time_axis=1` in :code:`si.read_binary()`.
 2. **File Path**: Always double-check the Python file path.
 3. **Data Type Consistency**: Ensure data types between MATLAB and Python are consistent. MATLAB's `double` is equivalent to Numpy's `float64`.
 4. **Sampling Frequency**: Set the appropriate sampling frequency in Hz for SpikeInterface.
-5. **Transition to Python**: Moving from MATLAB to Python can be challenging. For newcomers to Python, consider reviewing numpy's [Numpy for MATLAB Users](https://numpy.org/doc/stable/user/numpy-for-matlab-users.html) guide.
+5. **Transition to Python**: Moving from MATLAB to Python can be challenging. For newcomers to Python, consider reviewing numpy's `Numpy for MATLAB Users <https://numpy.org/doc/stable/user/numpy-for-matlab-users.html>`_ guide.
 
 Using gains and offsets for integer data
 ----------------------------------------
 
 Raw data formats often store data as integer values for memory efficiency. To give these integers meaningful physical units, you can apply a gain and an offset.
-In SpikeInterface, you can use the `gain_to_uV` and `offset_to_uV` parameters, since traces are handled in microvolts (uV). Both parameters can be integrated into the `read_binary` function.
-If your data in MATLAB is stored as `int16`, and you know the gain and offset, you can use the following code to load the data:
+In SpikeInterface, you can use the :code:`gain_to_uV` and :code:`offset_to_uV` parameters, since traces are handled in microvolts (uV). Both parameters can be integrated into the :code:`read_binary` function.
+If your data in MATLAB is stored as :code:`int16`, and you know the gain and offset, you can use the following code to load the data:
 
 .. code-block:: python
 
@@ -90,7 +90,8 @@ If your data in MATLAB is stored as `int16`, and you know the gain and offset, y
                               num_channels=num_channels, dtype=dtype_int,
                               gain_to_uV=gain_to_uV, offset_to_uV=offset_to_uV)
 
-   recording.get_traces(return_scaled=True)  # Return traces in micro volts (uV)
+   recording.get_traces()  # Return traces in original units [type: int]
+   recording.get_traces(return_scaled=True)  # Return traces in micro volts (uV) [type: float]
 
 
 This will equip your recording object with capabilities to convert the data to float values in uV using the :code:`get_traces()` method with the :code:`return_scaled` parameter set to :code:`True`.