diff --git a/doc/conf.py b/doc/conf.py index 1a5dfb3ec7..4373ec3c36 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -67,6 +67,7 @@ 'numpydoc', 'sphinx.ext.autosectionlabel', 'sphinx_design', + 'sphinxcontrib.jquery', "sphinx.ext.intersphinx", "sphinx.ext.extlinks", "IPython.sphinxext.ipython_directive", diff --git a/doc/index.rst b/doc/index.rst index 080a6ba8ac..96dea055cb 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -58,6 +58,7 @@ SpikeInterface is made of several modules to deal with different aspects of the development/development whatisnew authors + references Other resources diff --git a/doc/references.rst b/doc/references.rst new file mode 100644 index 0000000000..ace51db951 --- /dev/null +++ b/doc/references.rst @@ -0,0 +1,129 @@ +How to Cite +=========== + +If you like SpikeInterface, please star us on `Github `_! +*giving us a star gives a measure of the level of use and interest, which goes a long way to getting funding* + +Please cite SpikeInterface in your papers with our eLife paper: [Buccino]_ + +SpikeInterface stands on the shoulders of giants! +Each method in SpikeInterface draws on (or directly runs) independently-created methods. +Please try to reference the individual works that are important for your analysis pipeline. +If you notice a missing reference, please let us know by `submitting an issue `_ on Github. + +Preprocessing Module +-------------------- +If you use one of the following preprocessing methods, please cite the appropriate source: + +- :code:`phase_shift` or :code:`highpass_spatial_filter` [IBL]_ +- :code:`detect_bad_channels(method='coherence+psd')` [IBL]_ +- :code:`common_reference` [Rolston]_ + +Motion Correction +^^^^^^^^^^^^^^^^^ +If you use the :code:`correct_motion` method in the preprocessing module, please cite [Garcia]_ +as well as the references that correspond to the :code:`preset` you used: + +- :code:`nonrigid_accurate` [Windolf]_ [Varol]_ +- :code:`nonrigid_fast_and_accurate` [Windolf]_ [Varol]_ [Pachitariu]_ +- :code:`rigid_fast` *no additional citation needed* +- :code:`kilosort_like` [Pachitariu]_ + +Sorters Module +-------------- +If you use one of the following spike sorting algorithms (i.e. you use the :code:`run_sorter()` method, +please include the appropriate citation for the :code:`sorter_name` parameter you use: +*Note: unless otherwise stated, the reference given is to be used for all versions of the sorter* + +- :code:`combinato` [Niediek]_ +- :code:`hdsort` [Diggelmann]_ +- :code:`herdingspikes` [Muthmann]_ [Hilgen]_ +- :code:`kilosort` [Pachitariu]_ +- :code:`mountainsort` [Chung]_ +- :code:`spykingcircus` [Yger]_ +- :code:`wavclus` [Chaure]_ +- :code:`yass` [Lee]_ + +Qualitymetrics Module +--------------------- +If you use the :code:`qualitymetrics` module, i.e. you use the :code:`analyzer.compute()` +or :code:`compute_quality_metrics()` methods, please include the citations for the :code:`metric_names` that were particularly +important for your research: + +- :code:`amplitude_cutoff` or :code:`isi_violation` [Hill]_ +- :code:`amplitude_median` or :code:`sliding_rp_violation` [IBL]_ +- :code:`drift` [Siegle]_ +- :code:`rp_violation` [Llobet]_ +- :code:`sd_ratio` [Pouzat]_ +- :code:`snr` [Lemon]_ [Jackson]_ +- :code:`synchrony` [Grun]_ + +If you use the :code:`qualitymetrics.pca_metrics` module, i.e. you use the +:code:`compute_pc_metrics()` method, please include the citations for the :code:`metric_names` that were particularly +important for your research: + +- :code:`d_prime` [Hill]_ +- :code:`isolation_distance` or :code:`l_ratio` [Schmitzer-Torbert]_ +- :code:`nearest_neighbor` or :code:`nn_isolation` or :code:`nn_noise_overlap` [Chung]_ [Siegle]_ +- :code:`silhouette` [Rousseeuw]_ [Hruschka]_ + +Curation Module +--------------- +If you use the :code:`get_potential_auto_merge` method from the curation module, please cite [Llobet]_ + +References +---------- + +.. [Buccino] `SpikeInterface, a unified framework for spike sorting. 2020. `_ + +.. [Buzsaki] `The Log-Dynamic Brain: How Skewed Distributions Affect Network Operations. 2014. `_ + +.. [Chaure] `A novel and fully automatic spike-sorting implementation with variable number of features. 2018. `_ + +.. [Chung] `A Fully Automated Approach to Spike Sorting. 2017. `_ + +.. [Diggelmann] `Automatic spike sorting for high-density microelectrode arrays. 2018. `_ + +.. [Garcia] `A Modular Implementation to Handle and Benchmark Drift Correction for High-Density Extracellular Recordings. 2024. `_ + +.. [Grun] `Impact of higher-order correlations on coincidence distributions of massively parallel data. 2007. `_ + +.. [Harris] `Temporal interaction between single spikes and complex spike bursts in hippocampal pyramidal cells. 2001. `_ + +.. [Hilgen] `Unsupervised Spike Sorting for Large-Scale, High-Density Multielectrode Arrays. 2017. `_ + +.. [Hill] `Quality Metrics to Accompany Spike Sorting of Extracellular Signals. 2011. `_ + +.. [Hruschka] `Evolutionary algorithms for clustering gene-expression data. 2004. `_ + +.. [IBL] `Spike sorting pipeline for the International Brain Laboratory. 2022. `_ + +.. [Jackson] Quantitative assessment of extracellular multichannel recording quality using measures of cluster separation. Society of Neuroscience Abstract. 2005. + +.. [Lee] `YASS: Yet another spike sorter. 2017. `_ + +.. [Lemon] Methods for neuronal recording in conscious animals. IBRO Handbook Series. 1984. + +.. [Llobet] `Automatic post-processing and merging of multiple spike-sorting analyses with Lussac. 2022. `_ + +.. [Muthmann] `Spike Detection for Large Neural Populations Using High Density Multielectrode Arrays. 2015. `_ + +.. [Niediek] `Reliable Analysis of Single-Unit Recordings from the Human Brain under Noisy Conditions: Tracking Neurons over Hours. 2016. `_ + +.. [Pachitariu] `Spike sorting with Kilosort4. 2024. `_ + +.. [Pouzat] `Using noise signature to optimize spike-sorting and to assess neuronal classification quality. 2002. `_ + +.. [Rolston] `Common median referencing for improved action potential detection with multielectrode arrays. 2009. `_ + +.. [Rousseeuw] `Silhouettes: A graphical aid to the interpretation and validation of cluster analysis. 1987. `_ + +.. [Schmitzer-Torbert] `Neuronal Activity in the Rodent Dorsal Striatum in Sequential Navigation: Separation of Spatial and Reward Responses on the Multiple T Task. 2004. `_ + +.. [Siegle] `Survey of Spiking in the Mouse Visual System Reveals Functional Hierarchy. 2021. `_ + +.. [Varol] `Decentralized Motion Inference and Registration of Neuropixel Data. 2021. `_ + +.. [Windolf] `Robust Online Multiband Drift Estimation in Electrophysiology Data. 2022. `_ + +.. [Yger] `A spike sorting toolbox for up to thousands of electrodes validated with ground truth recordings in vitro and in vivo. 2018. `_ diff --git a/pyproject.toml b/pyproject.toml index d040a4a36b..a3551d0451 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -91,7 +91,7 @@ full = [ "h5py", "pandas", "xarray", - "scipy<1.13", + "scipy", "scikit-learn", "networkx", "distinctipy", diff --git a/src/spikeinterface/generation/__init__.py b/src/spikeinterface/generation/__init__.py index d521f9dd9b..eae6320e8d 100644 --- a/src/spikeinterface/generation/__init__.py +++ b/src/spikeinterface/generation/__init__.py @@ -13,5 +13,8 @@ ) from .template_database import ( - fetch_templates_from_database, + fetch_template_object_from_database, + fetch_templates_database_info, + list_available_datasets_in_template_database, + query_templates_from_database, ) diff --git a/src/spikeinterface/generation/template_database.py b/src/spikeinterface/generation/template_database.py index a9b5ef0301..e1cba07c8e 100644 --- a/src/spikeinterface/generation/template_database.py +++ b/src/spikeinterface/generation/template_database.py @@ -1,12 +1,134 @@ -from spikeinterface.core.template import Templates import zarr +import functools +import numpy as np + +from spikeinterface.core.template import Templates + +@functools.cache +def fetch_template_object_from_database(dataset="test_templates.zarr") -> Templates: + """ + Fetch a template dataset from the spikeinterface template database. + A dataset is a collection of templates with associated metadata for one specific recording. -def fetch_templates_from_database(dataset="test_templates.zarr") -> Templates: + Parameters + ---------- + dataset : str, default: "test_templates" + The name of the dataset to fetch. + The dataset must be available in the spikeinterface template database. + Returns + ------- + Templates + _description_ + """ s3_path = f"s3://spikeinterface-template-database/{dataset}/" zarr_group = zarr.open_consolidated(s3_path, storage_options={"anon": True}) templates_object = Templates.from_zarr_group(zarr_group) return templates_object + + +@functools.cache +def fetch_templates_database_info() -> "pandas.DataFrame": + """ + Fetch the information about the templates in the spikeinterface template database. + + Returns + ------- + pd.DataFrame + Dataframe containing the template information. + """ + import pandas as pd + + s3_path = "s3://spikeinterface-template-database/templates.csv" + df = pd.read_csv(s3_path, storage_options={"anon": True}) + + return df + + +def list_available_datasets_in_template_database() -> list: + """ + List all available datasets in the spikeinterface template database. + + Returns + ------- + list + List of available datasets. + """ + df = fetch_templates_database_info() + datasets = np.unique(df["dataset"]).tolist() + + return datasets + + +def query_templates_from_database(template_df: "pandas.DataFrame", verbose: bool = False) -> Templates: + """ + Retrieve templates from the spikeinterface template database. + + Parameters + ---------- + template_df : pd.DataFrame + Dataframe containing the template information, obtained by slicing/querying the output of fetch_templates_info. + + Returns + ------- + Templates + The templates object. + """ + import pandas as pd + + templates_array = [] + requested_datasets = np.unique(template_df["dataset"]).tolist() + if verbose: + print(f"Fetching templates from {len(requested_datasets)} datasets") + + nbefore = None + sampling_frequency = None + channel_locations = None + probe = None + channel_ids = None + + for dataset in requested_datasets: + templates = fetch_template_object_from_database(dataset) + + # check consisency across datasets + if nbefore is None: + nbefore = templates.nbefore + if channel_locations is None: + channel_locations = templates.get_channel_locations() + if sampling_frequency is None: + sampling_frequency = templates.sampling_frequency + if probe is None: + probe = templates.probe + if channel_ids is None: + channel_ids = templates.channel_ids + current_nbefore = templates.nbefore + current_channel_locations = templates.get_channel_locations() + current_sampling_frequency = templates.sampling_frequency + + assert ( + current_nbefore == nbefore + ), f"Number of samples before the peak is not consistent across datasets: {current_nbefore} != {nbefore}" + assert ( + current_sampling_frequency == sampling_frequency + ), f"Sampling frequency is not consistent across datasets: {current_sampling_frequency} != {sampling_frequency}" + assert np.array_equal( + current_channel_locations - current_channel_locations[0], + channel_locations - channel_locations[0], + ), "Channel locations are not consistent across datasets" + + template_indices = template_df[template_df["dataset"] == dataset]["template_index"] + templates_array.append(templates.templates_array[template_indices, :, :]) + + templates_array = np.concatenate(templates_array, axis=0) + templates = Templates( + templates_array, + sampling_frequency=sampling_frequency, + channel_ids=channel_ids, + nbefore=nbefore, + probe=probe, + ) + + return templates diff --git a/src/spikeinterface/generation/tests/test_template_database.py b/src/spikeinterface/generation/tests/test_template_database.py new file mode 100644 index 0000000000..757018de89 --- /dev/null +++ b/src/spikeinterface/generation/tests/test_template_database.py @@ -0,0 +1,51 @@ +import numpy as np + +from spikeinterface.core.template import Templates + +from spikeinterface.generation import ( + fetch_template_object_from_database, + fetch_templates_database_info, + list_available_datasets_in_template_database, + query_templates_from_database, +) + + +def test_fetch_template_object_from_database(): + + available_datasets = list_available_datasets_in_template_database() + assert len(available_datasets) > 0 + + templates = fetch_template_object_from_database("test_templates.zarr") + assert isinstance(templates, Templates) + + assert templates.num_units == 100 + assert templates.num_channels == 384 + + +def test_fetch_templates_database_info(): + import pandas as pd + + templates_info = fetch_templates_database_info() + + assert isinstance(templates_info, pd.DataFrame) + + assert "dataset" in templates_info.columns + + +def test_query_templates_from_database(): + templates_info = fetch_templates_database_info() + + templates_info = templates_info.iloc[::15] + num_selected = len(templates_info) + + templates = query_templates_from_database(templates_info) + + assert isinstance(templates, Templates) + + assert templates.num_units == num_selected + + +if __name__ == "__main__": + test_fetch_template_object_from_database() + test_fetch_templates_database_info() + test_query_templates_from_database() diff --git a/src/spikeinterface/generation/tests/test_template_fetch.py b/src/spikeinterface/generation/tests/test_template_fetch.py deleted file mode 100644 index a7cc31af44..0000000000 --- a/src/spikeinterface/generation/tests/test_template_fetch.py +++ /dev/null @@ -1,13 +0,0 @@ -import pytest -from spikeinterface.generation import fetch_templates_from_database -from spikeinterface.core.template import Templates - - -def test_basic_call(): - - templates = fetch_templates_from_database() - - assert isinstance(templates, Templates) - - assert templates.num_units == 100 - assert templates.num_channels == 384