From 2c1ebca773c365ba5c2c8a71c6470d2792ff5f25 Mon Sep 17 00:00:00 2001
From: JoeZiminski <Joseph.j.ziminski@gmail.com>
Date: Mon, 16 Dec 2024 20:46:15 +0000
Subject: [PATCH] Tidying up, begin fixing alignment alg.

---
 .../alignment_utils.py                        | 137 ++++++++++++++----
 .../session_alignment.py                      |  88 +----------
 2 files changed, 119 insertions(+), 106 deletions(-)

diff --git a/src/spikeinterface/preprocessing/inter_session_alignment/alignment_utils.py b/src/spikeinterface/preprocessing/inter_session_alignment/alignment_utils.py
index 710ce67a14..69f196e6fa 100644
--- a/src/spikeinterface/preprocessing/inter_session_alignment/alignment_utils.py
+++ b/src/spikeinterface/preprocessing/inter_session_alignment/alignment_utils.py
@@ -1,8 +1,3 @@
-from signal import signal
-
-from toolz import first
-from torch.onnx.symbolic_opset11 import chunk
-
 from spikeinterface import BaseRecording
 import numpy as np
 from spikeinterface.sortingcomponents.motion.motion_utils import make_2d_motion_histogram
@@ -62,9 +57,7 @@ def get_activity_histogram(
         peak_locations,
         weight_with_amplitude=False,
         direction="y",
-        bin_s=(
-            bin_s if bin_s is not None else recording.get_duration(segment_index=0)
-        ),  # TODO: doube cehck is this already scaling?
+        bin_s=(bin_s if bin_s is not None else recording.get_duration(segment_index=0)),
         bin_um=None,
         hist_margin_um=None,
         spatial_bin_edges=spatial_bin_edges,
@@ -95,15 +88,20 @@ def get_bin_centers(bin_edges):
 
 def estimate_chunk_size(scaled_activity_histogram):
     """
-    Get an estimate of chunk size such that
-    the 80th percentile of the firing rate will be
-    estimated within 10% 90% of the time,
+    Estimate a chunk size based on the firing rate. Intuitively, we
+    want longer chunk size to better estimate low firing rates. The
+    estimation computes a summary of the the firing rates for the session
+    by taking the value 25% of the max of the activity histogram.
+
+    Then, the chunk size that will accurately estimate this firing rate
+    within 90% accuracy, 90% of the time based on assumption of Poisson
+    firing (based on CLT) is computed.
 
-    I think a better way is to take the peaks above half width and find the min.
-    Or just to take the 50th percentile...? NO. Because all peaks might be similar heights
+    Parameters
+    ----------
 
-    corrected based on assumption
-    of Poisson firing (based on CLT).
+    scaled_activity_histogram: np.ndarray
+        The activity histogram scaled to firing rate in Hz.
 
     TODO
     ----
@@ -162,7 +160,7 @@ def get_chunked_hist_supremum(chunked_session_histograms):
 
     min_hist = np.min(chunked_session_histograms, axis=0)
 
-    scaled_range = (max_hist - min_hist) / max_hist  # TODO: no idea if this is a good idea or not
+    scaled_range = (max_hist - min_hist) / (max_hist + 1e-12)
 
     return max_hist, scaled_range
 
@@ -201,28 +199,31 @@ def get_chunked_hist_eigenvector(chunked_session_histograms):
     """
     TODO: a little messy with the 2D stuff. Will probably deprecate anyway.
     """
-    if chunked_session_histograms.shape[0] == 1:  # TODO: handle elsewhere
+    if chunked_session_histograms.shape[0] == 1:
         return chunked_session_histograms.squeeze(), None
 
     is_2d = chunked_session_histograms.ndim == 3
 
     if is_2d:
-        num_hist, num_spat_bin, num_amp_bin = chunked_histograms.shape
+        num_hist, num_spat_bin, num_amp_bin = chunked_session_histograms.shape
         chunked_session_histograms = np.reshape(chunked_session_histograms, (num_hist, num_spat_bin * num_amp_bin))
 
     A = chunked_session_histograms
     S = (1 / A.shape[0]) * A.T @ A
 
-    U, S, Vh = np.linalg.svd(S)  # TODO: this is already symmetric PSD so use eig
+    L, U = np.linalg.eigh(S)
 
-    first_eigenvector = U[:, 0] * np.sqrt(S[0])
-    first_eigenvector = np.abs(first_eigenvector)  # sometimes the eigenvector can be negative
+    first_eigenvector = U[:, -1] * np.sqrt(L[-1])
+    first_eigenvector = np.abs(first_eigenvector)  # sometimes the eigenvector is negative
 
+    # Project all vectors (histograms) onto the principal component,
+    # then take the standard deviation in each dimension (over bins)
     v1 = first_eigenvector[:, np.newaxis]
-    reconstruct = (A @ v1) @ v1.T
-    v1_std = np.std(np.sqrt(reconstruct), axis=0, ddof=0)  # TODO: double check sqrt works out
+    projection_onto_v1 = (A @ v1 @ v1.T) / (v1.T @ v1)
 
-    if is_2d:
+    v1_std = np.std(projection_onto_v1, axis=0)
+
+    if is_2d:  # TODO: double check this
         first_eigenvector = np.reshape(first_eigenvector, (num_spat_bin, num_amp_bin))
         v1_std = np.reshape(v1_std, (num_spat_bin, num_amp_bin))
 
@@ -423,7 +424,9 @@ def compute_histogram_crosscorrelation(
                 windowed_histogram_i = session_histogram_list[i, :] * window
                 windowed_histogram_j = session_histogram_list[j, :] * window
 
-                xcorr = np.correlate(windowed_histogram_i, windowed_histogram_j, mode="full")
+                xcorr = np.correlate(
+                    windowed_histogram_i, windowed_histogram_j, mode="full"
+                )  # TODO: add weight option.
 
                 if num_shifts_block:
                     window_indices = np.arange(center_bin - num_shifts_block, center_bin + num_shifts_block)
@@ -435,6 +438,14 @@ def compute_histogram_crosscorrelation(
 
             # Smooth the cross-correlations across the bins
             if smoothing_sigma_bin:
+                breakpoint()
+                import matplotlib.pyplot as plt
+
+                plt.plot(xcorr_matrix[0, :])
+                X = gaussian_filter(xcorr_matrix, smoothing_sigma_bin, axes=1)
+                plt.plot(X[0, :])
+                plt.show()
+
                 xcorr_matrix = gaussian_filter(xcorr_matrix, smoothing_sigma_bin, axes=1)
 
             # Smooth the cross-correlations across the windows
@@ -495,3 +506,79 @@ def shift_array_fill_zeros(array: np.ndarray, shift: int) -> np.ndarray:
     cut_padded_array = padded_hist[abs_shift:] if shift >= 0 else padded_hist[:-abs_shift]
 
     return cut_padded_array
+
+
+def akima_interpolate_nonrigid_shifts(
+    non_rigid_shifts: np.ndarray,
+    non_rigid_window_centers: np.ndarray,
+    spatial_bin_centers: np.ndarray,
+):
+    """
+    Perform Akima spline interpolation on a set of non-rigid shifts.
+    The non-rigid shifts are per segment of the probe, each segment
+    containing a number of channels. Interpolating these non-rigid
+    shifts to the spatial bin centers gives a more accurate shift
+    per channel.
+
+    Parameters
+    ----------
+    non_rigid_shifts : np.ndarray
+    non_rigid_window_centers : np.ndarray
+    spatial_bin_centers : np.ndarray
+
+    Returns
+    -------
+    interp_nonrigid_shifts : np.ndarray
+        An array (length num_spatial_bins) of shifts
+        interpolated from the non-rigid shifts.
+
+    TODO
+    ----
+    requires scipy 14
+    """
+    from scipy.interpolate import Akima1DInterpolator
+
+    x = non_rigid_window_centers
+    xs = spatial_bin_centers
+
+    num_sessions = non_rigid_shifts.shape[0]
+    num_bins = spatial_bin_centers.shape[0]
+
+    interp_nonrigid_shifts = np.zeros((num_sessions, num_bins))
+    for ses_idx in range(num_sessions):
+
+        y = non_rigid_shifts[ses_idx]
+        y_new = Akima1DInterpolator(x, y, method="akima", extrapolate=True)(xs)
+        interp_nonrigid_shifts[ses_idx, :] = y_new
+
+    return interp_nonrigid_shifts
+
+
+def get_shifts_from_session_matrix(alignment_order: str, session_offsets_matrix: np.ndarray):
+    """
+    Given a matrix of displacements between all sessions, find the
+    shifts (one per session) to bring the sessions into alignment.
+
+    Parameters
+    ----------
+    alignment_order : "to_middle" or "to_session_X" where
+        "N" is the number of the session to align to.
+    session_offsets_matrix : np.ndarray
+        The num_sessions x num_sessions symmetric matrix
+        of displacements between all sessions, generated by
+        `_compute_session_alignment()`.
+
+    Returns
+    -------
+    optimal_shift_indices : np.ndarray
+        A 1 x num_sessions array of shifts to apply to
+        each session in order to bring all sessions into
+        alignment.
+    """
+    if alignment_order == "to_middle":
+        optimal_shift_indices = -np.mean(session_offsets_matrix, axis=0)
+    else:
+        ses_idx = int(alignment_order.split("_")[-1]) - 1
+        optimal_shift_indices = -session_offsets_matrix[ses_idx, :, :]
+
+    return optimal_shift_indices
diff --git a/src/spikeinterface/preprocessing/inter_session_alignment/session_alignment.py b/src/spikeinterface/preprocessing/inter_session_alignment/session_alignment.py
index 330c485433..3ee7eac8d1 100644
--- a/src/spikeinterface/preprocessing/inter_session_alignment/session_alignment.py
+++ b/src/spikeinterface/preprocessing/inter_session_alignment/session_alignment.py
@@ -211,7 +211,7 @@ def align_sessions(
     interpolate_motion_kwargs = copy.deepcopy(interpolate_motion_kwargs)
 
     # Ensure list lengths match and all channel locations are the same across recordings.
-    _check_align_sesssions_inputs(
+    _check_align_sessions_inputs(
         recordings_list, peaks_list, peak_locations_list, alignment_order, estimate_histogram_kwargs
     )
 
@@ -894,11 +894,11 @@ def _compute_session_alignment(
     nonrigid_session_offsets_matrix = alignment_utils.compute_histogram_crosscorrelation(
         shifted_histograms, non_rigid_windows, **compute_alignment_kwargs
     )
-    non_rigid_shifts = _get_shifts_from_session_matrix(alignment_order, nonrigid_session_offsets_matrix)
+    non_rigid_shifts = alignment_utils.get_shifts_from_session_matrix(alignment_order, nonrigid_session_offsets_matrix)
 
     # Akima interpolate the nonrigid bins if required.
     if akima_interp_nonrigid:
-        interp_nonrigid_shifts = _akima_interpolate_nonrigid_shifts(
+        interp_nonrigid_shifts = alignment_utils.akima_interpolate_nonrigid_shifts(
             non_rigid_shifts, non_rigid_window_centers, spatial_bin_centers
         )
         shifts = rigid_shifts + interp_nonrigid_shifts
@@ -944,83 +944,9 @@ def _estimate_rigid_alignment(
         rigid_window,
         **compute_alignment_kwargs,
     )
-    optimal_shift_indices = _get_shifts_from_session_matrix(alignment_order, rigid_session_offsets_matrix)
-
-    return optimal_shift_indices
-
-
-def _akima_interpolate_nonrigid_shifts(
-    non_rigid_shifts: np.ndarray,
-    non_rigid_window_centers: np.ndarray,
-    spatial_bin_centers: np.ndarray,
-):
-    """
-    Perform Akima spline interpolation on a set of non-rigid shifts.
-    The non-rigid shifts are per segment of the probe, each segment
-    containing a number of channels. Interpolating these non-rigid
-    shifts to the spatial bin centers gives a more accurate shift
-    per channel.
-
-    Parameters
-    ----------
-    non_rigid_shifts : np.ndarray
-    non_rigid_window_centers : np.ndarray
-    spatial_bin_centers : np.ndarray
-
-    Returns
-    -------
-    interp_nonrigid_shifts : np.ndarray
-        An array (length num_spatial_bins) of shifts
-        interpolated from the non-rigid shifts.
-
-    TODO
-    ----
-    requires scipy 14
-    """
-    from scipy.interpolate import Akima1DInterpolator
-
-    x = non_rigid_window_centers
-    xs = spatial_bin_centers
-
-    num_sessions = non_rigid_shifts.shape[0]
-    num_bins = spatial_bin_centers.shape[0]
-
-    interp_nonrigid_shifts = np.zeros((num_sessions, num_bins))
-    for ses_idx in range(num_sessions):
-
-        y = non_rigid_shifts[ses_idx]
-        y_new = Akima1DInterpolator(x, y, method="akima", extrapolate=True)(xs)
-        interp_nonrigid_shifts[ses_idx, :] = y_new
-
-    return interp_nonrigid_shifts
-
-
-def _get_shifts_from_session_matrix(alignment_order: str, session_offsets_matrix: np.ndarray):
-    """
-    Given a matrix of displacements between all sessions, find the
-    shifts (one per session) to bring the sessions into alignment.
-
-    Parameters
-    ----------
-    alignment_order : "to_middle" or "to_session_X" where
-        "N" is the number of the session to align to.
-    session_offsets_matrix : np.ndarray
-        The num_sessions x num_sessions symmetric matrix
-        of displacements between all sessions, generated by
-        `_compute_session_alignment()`.
-
-    Returns
-    -------
-    optimal_shift_indices : np.ndarray
-        A 1 x num_sessions array of shifts to apply to
-        each session in order to bring all sessions into
-        alignment.
-    """
-    if alignment_order == "to_middle":
-        optimal_shift_indices = -np.mean(session_offsets_matrix, axis=0)
-    else:
-        ses_idx = int(alignment_order.split("_")[-1]) - 1
-        optimal_shift_indices = -session_offsets_matrix[ses_idx, :, :]
+    optimal_shift_indices = alignment_utils.get_shifts_from_session_matrix(
+        alignment_order, rigid_session_offsets_matrix
+    )
 
     return optimal_shift_indices
 
@@ -1030,7 +956,7 @@ def _get_shifts_from_session_matrix(alignment_order: str, session_offsets_matrix
 # -----------------------------------------------------------------------------
 
 
-def _check_align_sesssions_inputs(
+def _check_align_sessions_inputs(
     recordings_list: list[BaseRecording],
     peaks_list: list[np.ndarray],
     peak_locations_list: list[np.ndarray],