From 3203642b0982af679abcf466e834d3cb294d56bd Mon Sep 17 00:00:00 2001 From: pauladkisson Date: Mon, 27 Nov 2023 14:10:19 -0800 Subject: [PATCH 01/22] added simple script to iterate through raw data and check for a uuid --- .../convert_dataset.py | 25 +++++++++++++++++++ 1 file changed, 25 insertions(+) create mode 100644 src/datta_lab_to_nwb/markowitz_gillis_nature_2023/convert_dataset.py diff --git a/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/convert_dataset.py b/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/convert_dataset.py new file mode 100644 index 0000000..2dbd46c --- /dev/null +++ b/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/convert_dataset.py @@ -0,0 +1,25 @@ +from pathlib import Path +from typing import Union +from neuroconv.utils import load_dict_from_file + + +def dataset_to_nwb(processed_path: Union[str, Path], raw_dir_path: Union[str, Path], output_dir_path: Union[str, Path]): + processed_path = Path(processed_path) + raw_dir_path = Path(raw_dir_path) + output_dir_path = Path(output_dir_path) + output_dir_path.mkdir(parents=True, exist_ok=True) + + for experimental_folder in raw_dir_path.iterdir(): + assert experimental_folder.is_dir(), f"{experimental_folder} is not a directory" + for session_folder in experimental_folder.iterdir(): + assert session_folder.is_dir(), f"{session_folder} is not a directory" + results_file = session_folder / "proc" / "results00.yaml" + results = load_dict_from_file(results_file) + assert "uuid" in results, f"UUID not found in {results_file}" + + +if __name__ == "__main__": + processed_path = "/path/to/processed" + raw_dir_path = "/path/to/raw" + output_dir_path = "/path/to/output" + dataset_to_nwb(processed_path, raw_dir_path, output_dir_path) From 5232bb9eba94d0b98f212badd9bc30385bf5b094 Mon Sep 17 00:00:00 2001 From: pauladkisson Date: Mon, 27 Nov 2023 17:01:35 -0800 Subject: [PATCH 02/22] fixed bugs with dataset conversion script --- .../convert_dataset.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/convert_dataset.py b/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/convert_dataset.py index 2dbd46c..f8452e6 100644 --- a/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/convert_dataset.py +++ b/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/convert_dataset.py @@ -10,16 +10,16 @@ def dataset_to_nwb(processed_path: Union[str, Path], raw_dir_path: Union[str, Pa output_dir_path.mkdir(parents=True, exist_ok=True) for experimental_folder in raw_dir_path.iterdir(): - assert experimental_folder.is_dir(), f"{experimental_folder} is not a directory" - for session_folder in experimental_folder.iterdir(): - assert session_folder.is_dir(), f"{session_folder} is not a directory" - results_file = session_folder / "proc" / "results00.yaml" - results = load_dict_from_file(results_file) - assert "uuid" in results, f"UUID not found in {results_file}" + if experimental_folder.is_dir(): + for session_folder in experimental_folder.iterdir(): + if session_folder.is_dir(): + results_file = session_folder / "proc" / "results_00.yaml" + results = load_dict_from_file(results_file) + assert "uuid" in results, f"UUID not found in {results_file}" if __name__ == "__main__": - processed_path = "/path/to/processed" - raw_dir_path = "/path/to/raw" - output_dir_path = "/path/to/output" + processed_path = Path("/Volumes/T7/CatalystNeuro/NWB/Datta/dopamine-reinforces-spontaneous-behavior") + raw_dir_path = Path("/Volumes/T7/CatalystNeuro/NWB/Datta/formatted_raw") + output_dir_path = Path("/Volumes/T7/CatalystNeuro/NWB/Datta/conversion_nwb/") dataset_to_nwb(processed_path, raw_dir_path, output_dir_path) From 0449d8186c4abd20fe6ff2e7255eea13a790b7b0 Mon Sep 17 00:00:00 2001 From: pauladkisson Date: Tue, 28 Nov 2023 12:35:41 -0800 Subject: [PATCH 03/22] downgrade dandi to fix conflict --- frozen_dependencies.txt | 3 ++- requirements.txt | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/frozen_dependencies.txt b/frozen_dependencies.txt index 7d9660b..116e3f5 100644 --- a/frozen_dependencies.txt +++ b/frozen_dependencies.txt @@ -32,7 +32,7 @@ colorcet==3.0.1 comm==0.1.4 contourpy==1.1.0 cycler==0.11.0 -dandi==0.55.1 +dandi==0.58.0 dandischema==0.8.4 debugpy==1.6.7 decorator==5.1.1 @@ -120,6 +120,7 @@ nbformat==5.9.2 ndx-events==0.2.0 ndx-grayscalevolume==0.0.2 ndx-icephys-meta==0.1.0 +ndx-moseq @ git+https://github.com/pauladkisson/ndx-moseq.git@cac0b4003525b3ac902fed9a68d90ca459a211f8 ndx-photometry @ git+https://github.com/catalystneuro/ndx-photometry.git@7ea9d755ceac9524125f50ab528b403b135c4530 -e git+https://github.com/rly/ndx-pose.git@f9dd18a8290897e48bdd6ebeedcc0a7095d86265#egg=ndx_pose ndx-spectrum==0.2.2 diff --git a/requirements.txt b/requirements.txt index 1549440..78b781c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,4 @@ +dandi==0.58.0 hdmf==3.6.1 colorcet cellpose From 658f6fed792b53481a1991aa7f9f8f50f04df9ad Mon Sep 17 00:00:00 2001 From: pauladkisson Date: Tue, 28 Nov 2023 12:50:16 -0800 Subject: [PATCH 04/22] added skip sessions to catch problematic sessions --- .../convert_dataset.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/convert_dataset.py b/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/convert_dataset.py index f8452e6..e513e65 100644 --- a/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/convert_dataset.py +++ b/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/convert_dataset.py @@ -3,7 +3,12 @@ from neuroconv.utils import load_dict_from_file -def dataset_to_nwb(processed_path: Union[str, Path], raw_dir_path: Union[str, Path], output_dir_path: Union[str, Path]): +def dataset_to_nwb( + processed_path: Union[str, Path], + raw_dir_path: Union[str, Path], + output_dir_path: Union[str, Path], + skip_sessions: set, +): processed_path = Path(processed_path) raw_dir_path = Path(raw_dir_path) output_dir_path = Path(output_dir_path) @@ -12,7 +17,7 @@ def dataset_to_nwb(processed_path: Union[str, Path], raw_dir_path: Union[str, Pa for experimental_folder in raw_dir_path.iterdir(): if experimental_folder.is_dir(): for session_folder in experimental_folder.iterdir(): - if session_folder.is_dir(): + if session_folder.is_dir() and session_folder.name not in skip_sessions: results_file = session_folder / "proc" / "results_00.yaml" results = load_dict_from_file(results_file) assert "uuid" in results, f"UUID not found in {results_file}" @@ -22,4 +27,7 @@ def dataset_to_nwb(processed_path: Union[str, Path], raw_dir_path: Union[str, Pa processed_path = Path("/Volumes/T7/CatalystNeuro/NWB/Datta/dopamine-reinforces-spontaneous-behavior") raw_dir_path = Path("/Volumes/T7/CatalystNeuro/NWB/Datta/formatted_raw") output_dir_path = Path("/Volumes/T7/CatalystNeuro/NWB/Datta/conversion_nwb/") - dataset_to_nwb(processed_path, raw_dir_path, output_dir_path) + skip_sessions = { + "session_20210420113646-974717", # missing everything except depth video + } + dataset_to_nwb(processed_path, raw_dir_path, output_dir_path, skip_sessions) From 5827148bbf9f56324b888dc00e42c5f7c3639995 Mon Sep 17 00:00:00 2001 From: pauladkisson Date: Tue, 28 Nov 2023 21:48:25 -0800 Subject: [PATCH 05/22] added the rest of the skips --- .../convert_dataset.py | 36 +++++++++++++++---- 1 file changed, 30 insertions(+), 6 deletions(-) diff --git a/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/convert_dataset.py b/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/convert_dataset.py index e513e65..37c1654 100644 --- a/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/convert_dataset.py +++ b/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/convert_dataset.py @@ -1,6 +1,7 @@ from pathlib import Path from typing import Union from neuroconv.utils import load_dict_from_file +from tqdm import tqdm def dataset_to_nwb( @@ -14,8 +15,8 @@ def dataset_to_nwb( output_dir_path = Path(output_dir_path) output_dir_path.mkdir(parents=True, exist_ok=True) - for experimental_folder in raw_dir_path.iterdir(): - if experimental_folder.is_dir(): + for experimental_folder in tqdm(list(raw_dir_path.iterdir())): + if experimental_folder.is_dir() and experimental_folder.name not in skip_experiments: for session_folder in experimental_folder.iterdir(): if session_folder.is_dir() and session_folder.name not in skip_sessions: results_file = session_folder / "proc" / "results_00.yaml" @@ -24,10 +25,33 @@ def dataset_to_nwb( if __name__ == "__main__": - processed_path = Path("/Volumes/T7/CatalystNeuro/NWB/Datta/dopamine-reinforces-spontaneous-behavior") - raw_dir_path = Path("/Volumes/T7/CatalystNeuro/NWB/Datta/formatted_raw") - output_dir_path = Path("/Volumes/T7/CatalystNeuro/NWB/Datta/conversion_nwb/") + processed_path = Path("NWB/DattaConv/processed_data") + raw_dir_path = Path("NWB/DattaConv/raw_data") + output_dir_path = Path("NWB/DattaConv/conversion_output") + skip_experiments = { + "keypoint", # no proc folder for keypoints + } skip_sessions = { - "session_20210420113646-974717", # missing everything except depth video + "session_20210420113646-974717", # _aggregate_results_arhmm_photometry_excitation_pulsed_01: missing everything except depth video + "session_20210309134748-687283", # _aggregate_results_arhmm_excitation_03: missing everything except depth video + "session_20210224083612-947426", # _aggregate_results_arhmm_excitation_03: missing proc folder + "session_20210224094428-535503", # _aggregate_results_arhmm_excitation_03: missing proc folder + "session_20210309120607-939403", # _aggregate_results_arhmm_excitation_03: proc folder empty + "session_20201109130417-162983", # _aggregate_results_arhmm_excitation_01: proc folder empty + "session_20220308114215-760303", # _aggregate_results_arhmm_scalar_03: missing proc folder + "session_20211217102637-612299", # _aggregate_results_arhmm_photometry_06: missing everything except ir video + "session_20211202155132-245700", # _aggregate_results_arhmm_photometry_06: missing everything except ir video + "session_20210128093041-475933", # _aggregate_results_arhmm_photometry_02: missing everything except ir video + "session_20210215185110-281693", # _aggregate_results_arhmm_photometry_02: missing everything except ir video + "session_20210208173229-833584", # _aggregate_results_arhmm_photometry_02: missing everything except ir video + "session_20210201115439-569392", # _aggregate_results_arhmm_photometry_02: missing everything except ir video + "session_20200729112540-313279", # _aggregate_results_arhmm_07: missing everything except depth video + "session_20200810085750-497237", # _aggregate_results_arhmm_07: missing everything except depth video + "session_20200730090228-985303", # _aggregate_results_arhmm_07: missing everything except depth video + "session_20201207093653-476370", # _aggregate_results_arhmm_excitation_02: missing everything except depth video + "session_20210426143230-310843", # _aggregate_results_arhmm_09: missing everything except depth video + "session_20210429135801-758690", # _aggregate_results_arhmm_09: missing everything except depth video + "session_20191111130454-333065", # _aggregate_results_arhmm_05: missing proc folder + "session_20191111130847-263894", # _aggregate_results_arhmm_05: missing proc folder } dataset_to_nwb(processed_path, raw_dir_path, output_dir_path, skip_sessions) From 1ab5b960e1d20d871081cb02e89ed8493bcf893b Mon Sep 17 00:00:00 2001 From: pauladkisson Date: Wed, 29 Nov 2023 17:33:06 -0800 Subject: [PATCH 06/22] added checks for processed data --- .../convert_dataset.py | 46 ++++++++++++++++++- 1 file changed, 45 insertions(+), 1 deletion(-) diff --git a/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/convert_dataset.py b/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/convert_dataset.py index 37c1654..da1b431 100644 --- a/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/convert_dataset.py +++ b/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/convert_dataset.py @@ -2,6 +2,31 @@ from typing import Union from neuroconv.utils import load_dict_from_file from tqdm import tqdm +import pandas as pd + +folder_name_to_experiment_type = { + "_aggregate_results_arhmm_03": "reinforcement", + "_aggregate_results_arhmm_04": "reinforcement", + "_aggregate_results_arhmm_05": "reinforcement", + "_aggregate_results_arhmm_06": "reinforcement", + "_aggregate_results_arhmm_07": "reinforcement", + "_aggregate_results_arhmm_08": "reinforcement", + "_aggregate_results_arhmm_09": "reinforcement", + "_aggregate_results_arhmm_11": "reinforcement", + "_aggregate_results_arhmm_photometry_02": "reinforcement-photometry", + "_aggregate_results_arhmm_photometry_03": "reinforcement-photometry", + "_aggregate_results_arhmm_scalar_01": "velocity-modulation", + "_aggregate_results_arhmm_scalar_03": "velocity-modulation", + "_aggregate_results_arhmm_excitation_01": "reinforcement", + "_aggregate_results_arhmm_excitation_02": "reinforcement", + "_aggregate_results_arhmm_excitation_03": "reinforcement", + "_aggregate_results_arhmm_photometry_excitation_02": "reinforcement-photometry", + "_aggregate_results_arhmm_excitation_pulsed_01": "reinforcement", + "_aggregate_results_arhmm_photometry_excitation_pulsed_01": "reinforcement-photometry", + "_aggregate_results_arhmm_photometry_06": "photometry", + "_aggregate_results_arhmm_photometry_07": "photometry", + "_aggregate_results_arhmm_photometry_08": "photometry", +} def dataset_to_nwb( @@ -17,11 +42,30 @@ def dataset_to_nwb( for experimental_folder in tqdm(list(raw_dir_path.iterdir())): if experimental_folder.is_dir() and experimental_folder.name not in skip_experiments: + experiment_type = folder_name_to_experiment_type[experimental_folder.name] for session_folder in experimental_folder.iterdir(): if session_folder.is_dir() and session_folder.name not in skip_sessions: results_file = session_folder / "proc" / "results_00.yaml" results = load_dict_from_file(results_file) - assert "uuid" in results, f"UUID not found in {results_file}" + raw_uuid = results["uuid"] + if experiment_type == "photometry": + file_paths = [processed_path / "dlight_raw_data/dlight_photometry_processed_full.parquet"] + elif experiment_type == "reinforcement": + file_paths = [processed_path / "optoda_raw_data/closed_loop_behavior.parquet"] + elif experiment_type == "reinforcement-photometry": + file_paths = [ + processed_path / "optoda_raw_data/closed_loop_behavior.parquet", + processed_path / "dlight_raw_data/dlight_photometry_processed_full.parquet", + ] + elif experiment_type == "velocity-modulation": + file_paths = [ + processed_path / "optoda_raw_data/closed_loop_behavior_velocity_conditioned.parquet" + ] + for file_path in file_paths: + session_df = pd.read_parquet(file_path, columns=["uuid"], filters=[("uuid", "==", raw_uuid)]) + assert raw_uuid in set( + session_df["uuid"] + ), f"UUID {raw_uuid} for {session_folder} not found in {file_path}" if __name__ == "__main__": From edd0f2c410aee74ce06b646481633f8577416107 Mon Sep 17 00:00:00 2001 From: pauladkisson Date: Wed, 29 Nov 2023 17:57:22 -0800 Subject: [PATCH 07/22] refactored missing uuids to show all at the end --- .../convert_dataset.py | 34 +++++++++++-------- 1 file changed, 20 insertions(+), 14 deletions(-) diff --git a/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/convert_dataset.py b/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/convert_dataset.py index da1b431..bbf1013 100644 --- a/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/convert_dataset.py +++ b/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/convert_dataset.py @@ -40,6 +40,19 @@ def dataset_to_nwb( output_dir_path = Path(output_dir_path) output_dir_path.mkdir(parents=True, exist_ok=True) + photometry_uuids = pd.read_parquet( + processed_path / "dlight_raw_data/dlight_photometry_processed_full.parquet", columns=["uuid"] + ) + photometry_uuids = set(photometry_uuids["uuid"]) + reinforcement_uuids = pd.read_parquet( + processed_path / "optoda_raw_data/closed_loop_behavior.parquet", columns=["uuid"] + ) + reinforcement_uuids = set(reinforcement_uuids["uuid"]) + velocity_uuids = pd.read_parquet( + processed_path / "optoda_raw_data/closed_loop_behavior_velocity_conditioned.parquet", columns=["uuid"] + ) + velocity_uuids = set(velocity_uuids["uuid"]) + missing_uuids = [] for experimental_folder in tqdm(list(raw_dir_path.iterdir())): if experimental_folder.is_dir() and experimental_folder.name not in skip_experiments: experiment_type = folder_name_to_experiment_type[experimental_folder.name] @@ -49,23 +62,16 @@ def dataset_to_nwb( results = load_dict_from_file(results_file) raw_uuid = results["uuid"] if experiment_type == "photometry": - file_paths = [processed_path / "dlight_raw_data/dlight_photometry_processed_full.parquet"] + processed_uuids = photometry_uuids elif experiment_type == "reinforcement": - file_paths = [processed_path / "optoda_raw_data/closed_loop_behavior.parquet"] + processed_uuids = reinforcement_uuids elif experiment_type == "reinforcement-photometry": - file_paths = [ - processed_path / "optoda_raw_data/closed_loop_behavior.parquet", - processed_path / "dlight_raw_data/dlight_photometry_processed_full.parquet", - ] + processed_uuids = photometry_uuids.union(reinforcement_uuids) elif experiment_type == "velocity-modulation": - file_paths = [ - processed_path / "optoda_raw_data/closed_loop_behavior_velocity_conditioned.parquet" - ] - for file_path in file_paths: - session_df = pd.read_parquet(file_path, columns=["uuid"], filters=[("uuid", "==", raw_uuid)]) - assert raw_uuid in set( - session_df["uuid"] - ), f"UUID {raw_uuid} for {session_folder} not found in {file_path}" + processed_uuids = velocity_uuids + if raw_uuid not in processed_uuids: + missing_uuids.append(raw_uuid) + print(missing_uuids) if __name__ == "__main__": From c3b846a490be5c11c67aea8595fb02b129cada05 Mon Sep 17 00:00:00 2001 From: pauladkisson Date: Thu, 30 Nov 2023 08:45:34 -0800 Subject: [PATCH 08/22] save mising uuids to yaml --- .../markowitz_gillis_nature_2023/convert_dataset.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/convert_dataset.py b/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/convert_dataset.py index bbf1013..cbfacca 100644 --- a/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/convert_dataset.py +++ b/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/convert_dataset.py @@ -3,6 +3,7 @@ from neuroconv.utils import load_dict_from_file from tqdm import tqdm import pandas as pd +import yaml folder_name_to_experiment_type = { "_aggregate_results_arhmm_03": "reinforcement", @@ -71,7 +72,10 @@ def dataset_to_nwb( processed_uuids = velocity_uuids if raw_uuid not in processed_uuids: missing_uuids.append(raw_uuid) - print(missing_uuids) + + # Save missing_uuids to a YAML file + with open(processed_path / "missing_uuids.yaml", "w") as file: + yaml.dump(missing_uuids, file) if __name__ == "__main__": From 93da2a75aa0a7d54a30b60d3e3a6cbd563ec17fa Mon Sep 17 00:00:00 2001 From: pauladkisson Date: Thu, 30 Nov 2023 08:55:48 -0800 Subject: [PATCH 09/22] assert no misclasses --- .../markowitz_gillis_nature_2023/convert_dataset.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/convert_dataset.py b/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/convert_dataset.py index cbfacca..e2ceeeb 100644 --- a/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/convert_dataset.py +++ b/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/convert_dataset.py @@ -53,6 +53,7 @@ def dataset_to_nwb( processed_path / "optoda_raw_data/closed_loop_behavior_velocity_conditioned.parquet", columns=["uuid"] ) velocity_uuids = set(velocity_uuids["uuid"]) + all_uuids = photometry_uuids.union(reinforcement_uuids).union(velocity_uuids) missing_uuids = [] for experimental_folder in tqdm(list(raw_dir_path.iterdir())): if experimental_folder.is_dir() and experimental_folder.name not in skip_experiments: @@ -71,6 +72,9 @@ def dataset_to_nwb( elif experiment_type == "velocity-modulation": processed_uuids = velocity_uuids if raw_uuid not in processed_uuids: + assert ( + raw_uuid not in all_uuids + ), f"expermental folder {experimental_folder.name} with uuid {raw_uuid} is not classified correctly" missing_uuids.append(raw_uuid) # Save missing_uuids to a YAML file From 66392cad94788e6e967042132d49f021b5a148d6 Mon Sep 17 00:00:00 2001 From: pauladkisson Date: Thu, 30 Nov 2023 11:38:40 -0800 Subject: [PATCH 10/22] check missing and extra uuids separately --- .../convert_dataset.py | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/convert_dataset.py b/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/convert_dataset.py index e2ceeeb..ea59464 100644 --- a/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/convert_dataset.py +++ b/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/convert_dataset.py @@ -53,8 +53,9 @@ def dataset_to_nwb( processed_path / "optoda_raw_data/closed_loop_behavior_velocity_conditioned.parquet", columns=["uuid"] ) velocity_uuids = set(velocity_uuids["uuid"]) - all_uuids = photometry_uuids.union(reinforcement_uuids).union(velocity_uuids) - missing_uuids = [] + all_processed_uuids = photometry_uuids.union(reinforcement_uuids).union(velocity_uuids) + all_raw_uuids = set() + extra_uuids = [] for experimental_folder in tqdm(list(raw_dir_path.iterdir())): if experimental_folder.is_dir() and experimental_folder.name not in skip_experiments: experiment_type = folder_name_to_experiment_type[experimental_folder.name] @@ -63,6 +64,7 @@ def dataset_to_nwb( results_file = session_folder / "proc" / "results_00.yaml" results = load_dict_from_file(results_file) raw_uuid = results["uuid"] + all_raw_uuids.add(raw_uuid) if experiment_type == "photometry": processed_uuids = photometry_uuids elif experiment_type == "reinforcement": @@ -73,11 +75,16 @@ def dataset_to_nwb( processed_uuids = velocity_uuids if raw_uuid not in processed_uuids: assert ( - raw_uuid not in all_uuids + raw_uuid not in all_processed_uuids ), f"expermental folder {experimental_folder.name} with uuid {raw_uuid} is not classified correctly" - missing_uuids.append(raw_uuid) + extra_uuids.append(raw_uuid) + + # Save extra_uuids to a YAML file + with open(processed_path / "extra_uuids.yaml", "w") as file: + yaml.dump(extra_uuids, file) # Save missing_uuids to a YAML file + missing_uuids = all_processed_uuids.difference(all_raw_uuids) with open(processed_path / "missing_uuids.yaml", "w") as file: yaml.dump(missing_uuids, file) From 46a50e3d55181cfb618e000c3bb79c58cb5adc81 Mon Sep 17 00:00:00 2001 From: pauladkisson Date: Thu, 30 Nov 2023 11:48:27 -0800 Subject: [PATCH 11/22] add more detail to missing uuids --- .../markowitz_gillis_nature_2023/convert_dataset.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/convert_dataset.py b/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/convert_dataset.py index ea59464..388f551 100644 --- a/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/convert_dataset.py +++ b/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/convert_dataset.py @@ -84,7 +84,14 @@ def dataset_to_nwb( yaml.dump(extra_uuids, file) # Save missing_uuids to a YAML file - missing_uuids = all_processed_uuids.difference(all_raw_uuids) + missing_photometry_uuids = list(photometry_uuids.difference(all_raw_uuids)) + missing_reinforcement_uuids = list(reinforcement_uuids.difference(all_raw_uuids)) + missing_velocity_uuids = list(velocity_uuids.difference(all_raw_uuids)) + missing_uuids = dict( + photometry=missing_photometry_uuids, + reinforcement=missing_reinforcement_uuids, + velocity=missing_velocity_uuids, + ) with open(processed_path / "missing_uuids.yaml", "w") as file: yaml.dump(missing_uuids, file) From 6603a38093f55d881f933f1f5aeb5b660a91785a Mon Sep 17 00:00:00 2001 From: pauladkisson Date: Thu, 30 Nov 2023 17:16:26 -0800 Subject: [PATCH 12/22] refactored dataset conversion script to actually convert data --- .../check_dataset_uuids.py | 129 ++++++++++++++++++ .../convert_dataset.py | 49 +++---- 2 files changed, 154 insertions(+), 24 deletions(-) create mode 100644 src/datta_lab_to_nwb/markowitz_gillis_nature_2023/check_dataset_uuids.py diff --git a/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/check_dataset_uuids.py b/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/check_dataset_uuids.py new file mode 100644 index 0000000..388f551 --- /dev/null +++ b/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/check_dataset_uuids.py @@ -0,0 +1,129 @@ +from pathlib import Path +from typing import Union +from neuroconv.utils import load_dict_from_file +from tqdm import tqdm +import pandas as pd +import yaml + +folder_name_to_experiment_type = { + "_aggregate_results_arhmm_03": "reinforcement", + "_aggregate_results_arhmm_04": "reinforcement", + "_aggregate_results_arhmm_05": "reinforcement", + "_aggregate_results_arhmm_06": "reinforcement", + "_aggregate_results_arhmm_07": "reinforcement", + "_aggregate_results_arhmm_08": "reinforcement", + "_aggregate_results_arhmm_09": "reinforcement", + "_aggregate_results_arhmm_11": "reinforcement", + "_aggregate_results_arhmm_photometry_02": "reinforcement-photometry", + "_aggregate_results_arhmm_photometry_03": "reinforcement-photometry", + "_aggregate_results_arhmm_scalar_01": "velocity-modulation", + "_aggregate_results_arhmm_scalar_03": "velocity-modulation", + "_aggregate_results_arhmm_excitation_01": "reinforcement", + "_aggregate_results_arhmm_excitation_02": "reinforcement", + "_aggregate_results_arhmm_excitation_03": "reinforcement", + "_aggregate_results_arhmm_photometry_excitation_02": "reinforcement-photometry", + "_aggregate_results_arhmm_excitation_pulsed_01": "reinforcement", + "_aggregate_results_arhmm_photometry_excitation_pulsed_01": "reinforcement-photometry", + "_aggregate_results_arhmm_photometry_06": "photometry", + "_aggregate_results_arhmm_photometry_07": "photometry", + "_aggregate_results_arhmm_photometry_08": "photometry", +} + + +def dataset_to_nwb( + processed_path: Union[str, Path], + raw_dir_path: Union[str, Path], + output_dir_path: Union[str, Path], + skip_sessions: set, +): + processed_path = Path(processed_path) + raw_dir_path = Path(raw_dir_path) + output_dir_path = Path(output_dir_path) + output_dir_path.mkdir(parents=True, exist_ok=True) + + photometry_uuids = pd.read_parquet( + processed_path / "dlight_raw_data/dlight_photometry_processed_full.parquet", columns=["uuid"] + ) + photometry_uuids = set(photometry_uuids["uuid"]) + reinforcement_uuids = pd.read_parquet( + processed_path / "optoda_raw_data/closed_loop_behavior.parquet", columns=["uuid"] + ) + reinforcement_uuids = set(reinforcement_uuids["uuid"]) + velocity_uuids = pd.read_parquet( + processed_path / "optoda_raw_data/closed_loop_behavior_velocity_conditioned.parquet", columns=["uuid"] + ) + velocity_uuids = set(velocity_uuids["uuid"]) + all_processed_uuids = photometry_uuids.union(reinforcement_uuids).union(velocity_uuids) + all_raw_uuids = set() + extra_uuids = [] + for experimental_folder in tqdm(list(raw_dir_path.iterdir())): + if experimental_folder.is_dir() and experimental_folder.name not in skip_experiments: + experiment_type = folder_name_to_experiment_type[experimental_folder.name] + for session_folder in experimental_folder.iterdir(): + if session_folder.is_dir() and session_folder.name not in skip_sessions: + results_file = session_folder / "proc" / "results_00.yaml" + results = load_dict_from_file(results_file) + raw_uuid = results["uuid"] + all_raw_uuids.add(raw_uuid) + if experiment_type == "photometry": + processed_uuids = photometry_uuids + elif experiment_type == "reinforcement": + processed_uuids = reinforcement_uuids + elif experiment_type == "reinforcement-photometry": + processed_uuids = photometry_uuids.union(reinforcement_uuids) + elif experiment_type == "velocity-modulation": + processed_uuids = velocity_uuids + if raw_uuid not in processed_uuids: + assert ( + raw_uuid not in all_processed_uuids + ), f"expermental folder {experimental_folder.name} with uuid {raw_uuid} is not classified correctly" + extra_uuids.append(raw_uuid) + + # Save extra_uuids to a YAML file + with open(processed_path / "extra_uuids.yaml", "w") as file: + yaml.dump(extra_uuids, file) + + # Save missing_uuids to a YAML file + missing_photometry_uuids = list(photometry_uuids.difference(all_raw_uuids)) + missing_reinforcement_uuids = list(reinforcement_uuids.difference(all_raw_uuids)) + missing_velocity_uuids = list(velocity_uuids.difference(all_raw_uuids)) + missing_uuids = dict( + photometry=missing_photometry_uuids, + reinforcement=missing_reinforcement_uuids, + velocity=missing_velocity_uuids, + ) + with open(processed_path / "missing_uuids.yaml", "w") as file: + yaml.dump(missing_uuids, file) + + +if __name__ == "__main__": + processed_path = Path("NWB/DattaConv/processed_data") + raw_dir_path = Path("NWB/DattaConv/raw_data") + output_dir_path = Path("NWB/DattaConv/conversion_output") + skip_experiments = { + "keypoint", # no proc folder for keypoints + } + skip_sessions = { + "session_20210420113646-974717", # _aggregate_results_arhmm_photometry_excitation_pulsed_01: missing everything except depth video + "session_20210309134748-687283", # _aggregate_results_arhmm_excitation_03: missing everything except depth video + "session_20210224083612-947426", # _aggregate_results_arhmm_excitation_03: missing proc folder + "session_20210224094428-535503", # _aggregate_results_arhmm_excitation_03: missing proc folder + "session_20210309120607-939403", # _aggregate_results_arhmm_excitation_03: proc folder empty + "session_20201109130417-162983", # _aggregate_results_arhmm_excitation_01: proc folder empty + "session_20220308114215-760303", # _aggregate_results_arhmm_scalar_03: missing proc folder + "session_20211217102637-612299", # _aggregate_results_arhmm_photometry_06: missing everything except ir video + "session_20211202155132-245700", # _aggregate_results_arhmm_photometry_06: missing everything except ir video + "session_20210128093041-475933", # _aggregate_results_arhmm_photometry_02: missing everything except ir video + "session_20210215185110-281693", # _aggregate_results_arhmm_photometry_02: missing everything except ir video + "session_20210208173229-833584", # _aggregate_results_arhmm_photometry_02: missing everything except ir video + "session_20210201115439-569392", # _aggregate_results_arhmm_photometry_02: missing everything except ir video + "session_20200729112540-313279", # _aggregate_results_arhmm_07: missing everything except depth video + "session_20200810085750-497237", # _aggregate_results_arhmm_07: missing everything except depth video + "session_20200730090228-985303", # _aggregate_results_arhmm_07: missing everything except depth video + "session_20201207093653-476370", # _aggregate_results_arhmm_excitation_02: missing everything except depth video + "session_20210426143230-310843", # _aggregate_results_arhmm_09: missing everything except depth video + "session_20210429135801-758690", # _aggregate_results_arhmm_09: missing everything except depth video + "session_20191111130454-333065", # _aggregate_results_arhmm_05: missing proc folder + "session_20191111130847-263894", # _aggregate_results_arhmm_05: missing proc folder + } + dataset_to_nwb(processed_path, raw_dir_path, output_dir_path, skip_sessions) diff --git a/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/convert_dataset.py b/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/convert_dataset.py index 388f551..3c265bb 100644 --- a/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/convert_dataset.py +++ b/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/convert_dataset.py @@ -4,6 +4,7 @@ from tqdm import tqdm import pandas as pd import yaml +from .convert_session import session_to_nwb folder_name_to_experiment_type = { "_aggregate_results_arhmm_03": "reinforcement", @@ -35,6 +36,7 @@ def dataset_to_nwb( raw_dir_path: Union[str, Path], output_dir_path: Union[str, Path], skip_sessions: set, + num_sessions: int = None, ): processed_path = Path(processed_path) raw_dir_path = Path(raw_dir_path) @@ -54,30 +56,29 @@ def dataset_to_nwb( ) velocity_uuids = set(velocity_uuids["uuid"]) all_processed_uuids = photometry_uuids.union(reinforcement_uuids).union(velocity_uuids) - all_raw_uuids = set() - extra_uuids = [] - for experimental_folder in tqdm(list(raw_dir_path.iterdir())): - if experimental_folder.is_dir() and experimental_folder.name not in skip_experiments: - experiment_type = folder_name_to_experiment_type[experimental_folder.name] - for session_folder in experimental_folder.iterdir(): - if session_folder.is_dir() and session_folder.name not in skip_sessions: - results_file = session_folder / "proc" / "results_00.yaml" - results = load_dict_from_file(results_file) - raw_uuid = results["uuid"] - all_raw_uuids.add(raw_uuid) - if experiment_type == "photometry": - processed_uuids = photometry_uuids - elif experiment_type == "reinforcement": - processed_uuids = reinforcement_uuids - elif experiment_type == "reinforcement-photometry": - processed_uuids = photometry_uuids.union(reinforcement_uuids) - elif experiment_type == "velocity-modulation": - processed_uuids = velocity_uuids - if raw_uuid not in processed_uuids: - assert ( - raw_uuid not in all_processed_uuids - ), f"expermental folder {experimental_folder.name} with uuid {raw_uuid} is not classified correctly" - extra_uuids.append(raw_uuid) + experimental_folders = [ + folder for folder in raw_dir_path.iterdir() if folder.is_dir() and folder.name not in skip_experiments + ] + for experimental_folder in tqdm(experimental_folders): + experiment_type = folder_name_to_experiment_type[experimental_folder.name] + session_folders = [ + folder for folder in experimental_folder.iterdir() if folder.is_dir() and folder.name not in skip_sessions + ] + if num_sessions is not None: + session_folders = session_folders[:num_sessions] + for session_folder in session_folders: + results_file = session_folder / "proc" / "results_00.yaml" + results = load_dict_from_file(results_file) + session_uuid = results["uuid"] + if session_uuid not in all_processed_uuids: + continue + session_to_nwb( + session_uuid=session_uuid, + experiment_type=experiment_type, + processed_path=processed_path, + raw_path=session_folder, + output_dir_path=output_dir_path, + ) # Save extra_uuids to a YAML file with open(processed_path / "extra_uuids.yaml", "w") as file: From 53aa7cc7d9b91685169f819eafa4e610982a5d1c Mon Sep 17 00:00:00 2001 From: pauladkisson Date: Thu, 30 Nov 2023 17:22:28 -0800 Subject: [PATCH 13/22] made sure skipped sessions arent counted for num_sessions --- .../markowitz_gillis_nature_2023/convert_dataset.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/convert_dataset.py b/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/convert_dataset.py index 3c265bb..5155955 100644 --- a/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/convert_dataset.py +++ b/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/convert_dataset.py @@ -64,8 +64,9 @@ def dataset_to_nwb( session_folders = [ folder for folder in experimental_folder.iterdir() if folder.is_dir() and folder.name not in skip_sessions ] - if num_sessions is not None: - session_folders = session_folders[:num_sessions] + if num_sessions is None: + num_sessions = len(session_folders) + 1 + session_num = 0 for session_folder in session_folders: results_file = session_folder / "proc" / "results_00.yaml" results = load_dict_from_file(results_file) @@ -79,6 +80,9 @@ def dataset_to_nwb( raw_path=session_folder, output_dir_path=output_dir_path, ) + session_num += 1 + if session_num >= num_sessions: + break # Save extra_uuids to a YAML file with open(processed_path / "extra_uuids.yaml", "w") as file: @@ -127,4 +131,4 @@ def dataset_to_nwb( "session_20191111130454-333065", # _aggregate_results_arhmm_05: missing proc folder "session_20191111130847-263894", # _aggregate_results_arhmm_05: missing proc folder } - dataset_to_nwb(processed_path, raw_dir_path, output_dir_path, skip_sessions) + dataset_to_nwb(processed_path, raw_dir_path, output_dir_path, skip_sessions, num_sessions=1) From 61e34360ff0c12b3409f9c6b43e1dbe4e1da8ba4 Mon Sep 17 00:00:00 2001 From: pauladkisson Date: Thu, 30 Nov 2023 17:23:57 -0800 Subject: [PATCH 14/22] fixed import --- .../markowitz_gillis_nature_2023/convert_dataset.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/convert_dataset.py b/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/convert_dataset.py index 5155955..3e4c3c1 100644 --- a/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/convert_dataset.py +++ b/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/convert_dataset.py @@ -4,7 +4,7 @@ from tqdm import tqdm import pandas as pd import yaml -from .convert_session import session_to_nwb +from datta_lab_to_nwb.markowitz_gillis_nature_2023.convert_session import session_to_nwb folder_name_to_experiment_type = { "_aggregate_results_arhmm_03": "reinforcement", From 62f9cba1b4083d9f965f6255c078678862de56c9 Mon Sep 17 00:00:00 2001 From: pauladkisson Date: Thu, 30 Nov 2023 17:26:34 -0800 Subject: [PATCH 15/22] fixed requirements --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 78b781c..acfef3f 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,7 +5,7 @@ cellpose seaborn ndx-events ndx-photometry @ git+https://github.com/catalystneuro/ndx-photometry.git@7ea9d755ceac9524125f50ab528b403b135c4530 -ndx-moseq @ git+https://github.com/pauladkisson/ndx-moseq.git@cac0b4003525b3ac902fed9a68d90ca459a211f8 +ndx-depth-moseq @ git+https://github.com/catalystneuro/ndx-depth-moseq.git@main pyarrow neuroconv nwbwidgets From dcf722c6788e94f617b2d9608ae945ec8b6969f5 Mon Sep 17 00:00:00 2001 From: pauladkisson Date: Thu, 30 Nov 2023 17:34:58 -0800 Subject: [PATCH 16/22] removed vestigial uuid code --- .../convert_dataset.py | 17 +---------------- 1 file changed, 1 insertion(+), 16 deletions(-) diff --git a/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/convert_dataset.py b/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/convert_dataset.py index 3e4c3c1..3c27c78 100644 --- a/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/convert_dataset.py +++ b/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/convert_dataset.py @@ -68,6 +68,7 @@ def dataset_to_nwb( num_sessions = len(session_folders) + 1 session_num = 0 for session_folder in session_folders: + print(f"Processing {session_folder.name}") results_file = session_folder / "proc" / "results_00.yaml" results = load_dict_from_file(results_file) session_uuid = results["uuid"] @@ -84,22 +85,6 @@ def dataset_to_nwb( if session_num >= num_sessions: break - # Save extra_uuids to a YAML file - with open(processed_path / "extra_uuids.yaml", "w") as file: - yaml.dump(extra_uuids, file) - - # Save missing_uuids to a YAML file - missing_photometry_uuids = list(photometry_uuids.difference(all_raw_uuids)) - missing_reinforcement_uuids = list(reinforcement_uuids.difference(all_raw_uuids)) - missing_velocity_uuids = list(velocity_uuids.difference(all_raw_uuids)) - missing_uuids = dict( - photometry=missing_photometry_uuids, - reinforcement=missing_reinforcement_uuids, - velocity=missing_velocity_uuids, - ) - with open(processed_path / "missing_uuids.yaml", "w") as file: - yaml.dump(missing_uuids, file) - if __name__ == "__main__": processed_path = Path("NWB/DattaConv/processed_data") From 9842a3dd8728031b528fce312a24efa3d8f9b12f Mon Sep 17 00:00:00 2001 From: pauladkisson Date: Fri, 1 Dec 2023 11:26:30 -0800 Subject: [PATCH 17/22] added Us for missing subjects in the sex_map --- .../preconversion/extract_metadata.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/preconversion/extract_metadata.py b/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/preconversion/extract_metadata.py index 91212e0..d0fa9b2 100644 --- a/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/preconversion/extract_metadata.py +++ b/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/preconversion/extract_metadata.py @@ -195,6 +195,11 @@ "dms-dlight-12": "M", "dms-dlight-13": "M", "dms-dlight-14": "M", + "5891": "U", + "5892": "U", + "5893": "U", + "5894": "U", + "vta-nacc-ctrl-7": "U", } From 10d87d8b4b35f97cf6147515c4cc3cb74632c2b4 Mon Sep 17 00:00:00 2001 From: pauladkisson Date: Fri, 1 Dec 2023 11:46:31 -0800 Subject: [PATCH 18/22] removed example uuids to extract all metadata --- .../preconversion/extract_metadata.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/preconversion/extract_metadata.py b/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/preconversion/extract_metadata.py index d0fa9b2..2ad173e 100644 --- a/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/preconversion/extract_metadata.py +++ b/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/preconversion/extract_metadata.py @@ -634,18 +634,16 @@ def get_session_name(session_df): reinforcement_session_metadata, reinforcement_subject_metadata = extract_reinforcement_metadata( data_path, - example_uuids=reinforcement_examples, ) photometry_session_metadata, photometry_subject_metadata = extract_photometry_metadata( data_path, - example_uuids=photometry_examples, ) ( reinforcement_photometry_session_metadata, reinforcement_photometry_subject_metadata, ) = extract_reinforcement_photometry_metadata(data_path, example_uuids=reinforcement_photometry_examples) velocity_session_metadata, velocity_subject_metadata = extract_velocity_modulation_metadata( - data_path, example_uuids=velocity_modulation_examples + data_path, ) keypoint_session_metadata, keypoint_subject_metadata = extract_keypoint_metadata(data_path) From 62509e25e2edf2d7aca8646d090e35b62dfecdbe Mon Sep 17 00:00:00 2001 From: pauladkisson Date: Fri, 1 Dec 2023 13:44:27 -0800 Subject: [PATCH 19/22] delete output directory each run to keep things clean --- .../markowitz_gillis_nature_2023/convert_dataset.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/convert_dataset.py b/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/convert_dataset.py index 3c27c78..3002aa3 100644 --- a/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/convert_dataset.py +++ b/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/convert_dataset.py @@ -5,6 +5,7 @@ import pandas as pd import yaml from datta_lab_to_nwb.markowitz_gillis_nature_2023.convert_session import session_to_nwb +import shutil folder_name_to_experiment_type = { "_aggregate_results_arhmm_03": "reinforcement", @@ -90,6 +91,10 @@ def dataset_to_nwb( processed_path = Path("NWB/DattaConv/processed_data") raw_dir_path = Path("NWB/DattaConv/raw_data") output_dir_path = Path("NWB/DattaConv/conversion_output") + if output_dir_path.exists(): + shutil.rmtree( + output_dir_path, ignore_errors=True + ) # ignore errors due to MacOS race condition (https://github.com/python/cpython/issues/81441) skip_experiments = { "keypoint", # no proc folder for keypoints } From 99d61b2579317de61b93b57c7ca369667c5086d8 Mon Sep 17 00:00:00 2001 From: pauladkisson Date: Fri, 1 Dec 2023 13:52:33 -0800 Subject: [PATCH 20/22] _aggregate_results_arhmm_photometry_excitation_pulsed_01 actually shows up in the reinforcement dataframe not photometry --- .../markowitz_gillis_nature_2023/convert_dataset.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/convert_dataset.py b/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/convert_dataset.py index 3002aa3..bf3ce46 100644 --- a/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/convert_dataset.py +++ b/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/convert_dataset.py @@ -25,7 +25,7 @@ "_aggregate_results_arhmm_excitation_03": "reinforcement", "_aggregate_results_arhmm_photometry_excitation_02": "reinforcement-photometry", "_aggregate_results_arhmm_excitation_pulsed_01": "reinforcement", - "_aggregate_results_arhmm_photometry_excitation_pulsed_01": "reinforcement-photometry", + "_aggregate_results_arhmm_photometry_excitation_pulsed_01": "reinforcement", "_aggregate_results_arhmm_photometry_06": "photometry", "_aggregate_results_arhmm_photometry_07": "photometry", "_aggregate_results_arhmm_photometry_08": "photometry", From c17c549641f6db627ab74d525158686b931cbf96 Mon Sep 17 00:00:00 2001 From: pauladkisson Date: Fri, 1 Dec 2023 14:07:19 -0800 Subject: [PATCH 21/22] fixed bug with alignment for photometry only sessions --- .../markowitz_gillis_nature_2023/convert_session.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/convert_session.py b/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/convert_session.py index 38b44cb..5ffc1ae 100644 --- a/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/convert_session.py +++ b/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/convert_session.py @@ -118,7 +118,8 @@ def session_to_nwb( source_data["MoseqExtract"]["alignment_path"] = str(alignment_path) source_data["BehavioralSyllable"]["alignment_path"] = str(alignment_path) source_data["DepthVideo"]["alignment_path"] = str(alignment_path) - source_data["Optogenetic"]["alignment_path"] = str(alignment_path) + if "reinforcement" in session_metadata.keys(): + source_data["Optogenetic"]["alignment_path"] = str(alignment_path) source_data["BehavioralSyllable"]["file_path"] = str(behavioral_syllable_path) if experiment_type == "velocity-modulation": conversion_options["BehavioralSyllable"] = dict(velocity_modulation=True) From 3a76dcf8dc347ab1749583229b98413b4d965ce4 Mon Sep 17 00:00:00 2001 From: Cody Baker Date: Wed, 6 Dec 2023 15:37:11 -0500 Subject: [PATCH 22/22] debugs --- requirements.txt | 4 +- .../convert_dataset.py | 158 +++++++++++++----- .../convert_session.py | 36 +++- .../optogeneticinterface.py | 11 +- .../rawfiberphotometryinterface.py | 4 +- .../requirements.txt | 3 - 6 files changed, 154 insertions(+), 62 deletions(-) delete mode 100644 src/datta_lab_to_nwb/markowitz_gillis_nature_2023/requirements.txt diff --git a/requirements.txt b/requirements.txt index acfef3f..d3eaa17 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ -dandi==0.58.0 +dandi hdmf==3.6.1 colorcet cellpose @@ -8,6 +8,4 @@ ndx-photometry @ git+https://github.com/catalystneuro/ndx-photometry.git@7ea9d75 ndx-depth-moseq @ git+https://github.com/catalystneuro/ndx-depth-moseq.git@main pyarrow neuroconv -nwbwidgets -nwbinspector pre-commit diff --git a/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/convert_dataset.py b/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/convert_dataset.py index bf3ce46..fece989 100644 --- a/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/convert_dataset.py +++ b/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/convert_dataset.py @@ -1,11 +1,14 @@ +import traceback +import json from pathlib import Path from typing import Union +from concurrent.futures import ProcessPoolExecutor, as_completed + +import pandas as pd from neuroconv.utils import load_dict_from_file from tqdm import tqdm -import pandas as pd -import yaml -from datta_lab_to_nwb.markowitz_gillis_nature_2023.convert_session import session_to_nwb -import shutil + +from datta_lab_to_nwb.markowitz_gillis_nature_2023.convert_session import _safe_session_to_nwb folder_name_to_experiment_type = { "_aggregate_results_arhmm_03": "reinforcement", @@ -32,73 +35,131 @@ } -def dataset_to_nwb( +def get_all_processed_uuids( + *, processed_path: Union[str, Path], - raw_dir_path: Union[str, Path], output_dir_path: Union[str, Path], - skip_sessions: set, - num_sessions: int = None, -): +) -> set: processed_path = Path(processed_path) - raw_dir_path = Path(raw_dir_path) output_dir_path = Path(output_dir_path) - output_dir_path.mkdir(parents=True, exist_ok=True) + + uuid_file_path = output_dir_path.parent / "all_processed_uuids.txt" + + if uuid_file_path.exists(): + with open(file=uuid_file_path, mode="r") as io: + all_processed_uuids = set(json.load(fp=io)) + return all_processed_uuids photometry_uuids = pd.read_parquet( processed_path / "dlight_raw_data/dlight_photometry_processed_full.parquet", columns=["uuid"] ) - photometry_uuids = set(photometry_uuids["uuid"]) + unique_photometry_uuids = set(photometry_uuids["uuid"]) + del photometry_uuids + reinforcement_uuids = pd.read_parquet( processed_path / "optoda_raw_data/closed_loop_behavior.parquet", columns=["uuid"] ) - reinforcement_uuids = set(reinforcement_uuids["uuid"]) + unique_reinforcement_uuids = set(reinforcement_uuids["uuid"]) + del reinforcement_uuids + velocity_uuids = pd.read_parquet( processed_path / "optoda_raw_data/closed_loop_behavior_velocity_conditioned.parquet", columns=["uuid"] ) - velocity_uuids = set(velocity_uuids["uuid"]) - all_processed_uuids = photometry_uuids.union(reinforcement_uuids).union(velocity_uuids) + unique_velocity_uuids = set(velocity_uuids["uuid"]) + del velocity_uuids + + all_processed_uuids = unique_photometry_uuids | unique_reinforcement_uuids | unique_velocity_uuids + + with open(file=uuid_file_path, mode="w") as io: + json.dump(obj=list(all_processed_uuids), fp=io) + return all_processed_uuids + + +def dataset_to_nwb( + *, + processed_path: Union[str, Path], + raw_dir_path: Union[str, Path], + output_dir_path: Union[str, Path], + skip_sessions: set, + number_of_jobs: int, + num_sessions_per_experiment: int = None, +): + processed_path = Path(processed_path) + raw_dir_path = Path(raw_dir_path) + output_dir_path = Path(output_dir_path) + + log_folder_path = output_dir_path.parent / "logs" + log_folder_path.mkdir(exist_ok=True) + + missing_folder_path = output_dir_path.parent / "missing" + missing_folder_path.mkdir(exist_ok=True) + + all_processed_uuids = get_all_processed_uuids(processed_path=processed_path, output_dir_path=output_dir_path) + experimental_folders = [ - folder for folder in raw_dir_path.iterdir() if folder.is_dir() and folder.name not in skip_experiments + folder + for folder in raw_dir_path.iterdir() + if folder.is_dir() and folder.name not in skip_experiments and folder.name.startswith("_") ] - for experimental_folder in tqdm(experimental_folders): + for experimental_folder in tqdm(iterable=experimental_folders, position=0, description="Converting experiments..."): experiment_type = folder_name_to_experiment_type[experimental_folder.name] session_folders = [ folder for folder in experimental_folder.iterdir() if folder.is_dir() and folder.name not in skip_sessions ] - if num_sessions is None: - num_sessions = len(session_folders) + 1 + if num_sessions_per_experiment is None: + num_sessions_per_experiment = len(session_folders) + 1 session_num = 0 - for session_folder in session_folders: - print(f"Processing {session_folder.name}") - results_file = session_folder / "proc" / "results_00.yaml" - results = load_dict_from_file(results_file) - session_uuid = results["uuid"] - if session_uuid not in all_processed_uuids: - continue - session_to_nwb( - session_uuid=session_uuid, - experiment_type=experiment_type, - processed_path=processed_path, - raw_path=session_folder, - output_dir_path=output_dir_path, + + futures = list() + with ProcessPoolExecutor(max_workers=number_of_jobs) as executor: + for session_folder in session_folders: + error_identifier_base = f"{experimental_folder.name}_{session_folder.name}" + + results_file = session_folder / "proc" / "results_00.yaml" + + if not results_file.exists(): + (missing_folder_path / f"{error_identifier_base}.txt").touch() + continue + + results = load_dict_from_file(results_file) + session_uuid = results["uuid"] + if session_uuid not in all_processed_uuids: + continue + + futures.append( + executor.submit( + _safe_session_to_nwb, + session_uuid=session_uuid, + experiment_type=experiment_type, + processed_path=processed_path, + raw_path=session_folder, + output_dir_path=output_dir_path, + log_file_path=log_folder_path / f"{error_identifier_base}_{session_uuid}.txt", + ), + ) + + session_num += 1 + if session_num >= num_sessions_per_experiment: + break + + parallel_iterable = tqdm( + iterable=as_completed(futures), position=1, description="Converting sessionsin parallel..." ) - session_num += 1 - if session_num >= num_sessions: - break + for _ in parallel_iterable: + pass if __name__ == "__main__": - processed_path = Path("NWB/DattaConv/processed_data") - raw_dir_path = Path("NWB/DattaConv/raw_data") - output_dir_path = Path("NWB/DattaConv/conversion_output") - if output_dir_path.exists(): - shutil.rmtree( - output_dir_path, ignore_errors=True - ) # ignore errors due to MacOS race condition (https://github.com/python/cpython/issues/81441) + number_of_jobs = 4 + + processed_path = Path("E:/Datta/dopamine-reinforces-spontaneous-behavior") + raw_dir_path = Path("E:/Datta") + output_dir_path = Path("E:/datta_output/files") + skip_experiments = { "keypoint", # no proc folder for keypoints } - skip_sessions = { + temporary_skip_sessions = { "session_20210420113646-974717", # _aggregate_results_arhmm_photometry_excitation_pulsed_01: missing everything except depth video "session_20210309134748-687283", # _aggregate_results_arhmm_excitation_03: missing everything except depth video "session_20210224083612-947426", # _aggregate_results_arhmm_excitation_03: missing proc folder @@ -120,5 +181,14 @@ def dataset_to_nwb( "session_20210429135801-758690", # _aggregate_results_arhmm_09: missing everything except depth video "session_20191111130454-333065", # _aggregate_results_arhmm_05: missing proc folder "session_20191111130847-263894", # _aggregate_results_arhmm_05: missing proc folder + "session_20200720110309-817092", + "session_20210115130943-880998", } - dataset_to_nwb(processed_path, raw_dir_path, output_dir_path, skip_sessions, num_sessions=1) + dataset_to_nwb( + processed_path=processed_path, + raw_dir_path=raw_dir_path, + output_dir_path=output_dir_path, + skip_sessions=temporary_skip_sessions, + number_of_jobs=number_of_jobs, + num_sessions_per_experiment=1, + ) diff --git a/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/convert_session.py b/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/convert_session.py index 5ffc1ae..e7e177f 100644 --- a/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/convert_session.py +++ b/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/convert_session.py @@ -1,18 +1,42 @@ """Primary script to run to convert an entire session for of data using the NWBConverter.""" -# Standard Library -from pathlib import Path import shutil +import traceback +from pathlib import Path from typing import Union, Literal -# Third Party from neuroconv.utils import dict_deep_update, load_dict_from_file from pynwb import NWBHDF5IO -# Local from datta_lab_to_nwb.markowitz_gillis_nature_2023.postconversion import reproduce_fig1d from datta_lab_to_nwb.markowitz_gillis_nature_2023.nwbconverter import DattaNWBConverter +def _safe_session_to_nwb( + *, + session_uuid: str, + processed_path: Union[str, Path], + raw_path: Union[str, Path], + output_dir_path: Union[str, Path], + experiment_type: Literal["reinforcement", "photometry", "reinforcement-photometry", "velocity-modulation"], + log_file_path: Path, + processed_only: bool = False, + stub_test: bool = False, +): + try: + session_to_nwb( + session_uuid=session_uuid, + processed_path=processed_path, + raw_path=raw_path, + output_dir_path=output_dir_path, + experiment_type=experiment_type, + processed_only=processed_only, + stub_test=stub_test, + ) + except Exception as exception: + with open(file=log_file_path, mode="w") as io: + io.write(f"{type(exception)}: {str(exception)}\n\n{traceback.format_exc()}") + + def session_to_nwb( session_uuid: str, processed_path: Union[str, Path], @@ -139,7 +163,9 @@ def session_to_nwb( metadata = dict_deep_update(metadata, paper_metadata) # Run conversion - converter.run_conversion(metadata=metadata, nwbfile_path=nwbfile_path, conversion_options=conversion_options) + converter.run_conversion( + metadata=metadata, nwbfile_path=nwbfile_path, conversion_options=conversion_options, overwrite=True + ) if __name__ == "__main__": diff --git a/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/optogeneticinterface.py b/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/optogeneticinterface.py index a26efda..cc8c150 100644 --- a/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/optogeneticinterface.py +++ b/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/optogeneticinterface.py @@ -115,6 +115,12 @@ def add_to_nwbfile(self, nwbfile: NWBFile, metadata: dict, velocity_modulation: ], ) + # Reconstruct optogenetic series from feedback status + if pd.isnull(metadata["Optogenetics"]["stim_frequency_Hz"]): # cts stim + data, timestamps = self.reconstruct_cts_stim(metadata, session_df, session_timestamps) + else: # pulsed stim + data, timestamps = self.reconstruct_pulsed_stim(metadata, session_df, session_timestamps) + device = nwbfile.create_device( name="OptoEngineMRL", description="Optogenetic stimulator (Opto Engine MRL-III-635; SKU: RD-635-00500-CWM-SD-03-LED-0)", @@ -127,11 +133,6 @@ def add_to_nwbfile(self, nwbfile: NWBFile, metadata: dict, velocity_modulation: excitation_lambda=635.0, location=metadata["Optogenetics"]["area"], ) - # Reconstruct optogenetic series from feedback status - if pd.isnull(metadata["Optogenetics"]["stim_frequency_Hz"]): # cts stim - data, timestamps = self.reconstruct_cts_stim(metadata, session_df, session_timestamps) - else: # pulsed stim - data, timestamps = self.reconstruct_pulsed_stim(metadata, session_df, session_timestamps) id2sorted_index = metadata["BehavioralSyllable"]["id2sorted_index"] target_syllables = [id2sorted_index[syllable_id] for syllable_id in metadata["Optogenetics"]["target_syllable"]] ogen_series = OptogeneticSeries( diff --git a/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/rawfiberphotometryinterface.py b/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/rawfiberphotometryinterface.py index fd76cd8..1fbb2fb 100644 --- a/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/rawfiberphotometryinterface.py +++ b/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/rawfiberphotometryinterface.py @@ -132,7 +132,7 @@ def add_to_nwbfile(self, nwbfile: NWBFile, metadata: dict) -> None: ), data=H5DataIO(commanded_signal, compression=True), frequency=metadata["FiberPhotometry"]["signal_freq"], - power=metadata["FiberPhotometry"]["signal_amp"], # TODO: Fix this in ndx-photometry + power=float(metadata["FiberPhotometry"]["signal_amp"]), # TODO: Fix this in ndx-photometry timestamps=H5DataIO(timestamps, compression=True), unit="volts", ) @@ -144,7 +144,7 @@ def add_to_nwbfile(self, nwbfile: NWBFile, metadata: dict) -> None: ), data=H5DataIO(commanded_reference, compression=True), frequency=metadata["FiberPhotometry"]["reference_freq"], - power=metadata["FiberPhotometry"]["reference_amp"], # TODO: Fix this in ndx-photometry + power=float(metadata["FiberPhotometry"]["reference_amp"]), # TODO: Fix this in ndx-photometry timestamps=commanded_signal_series.timestamps, unit="volts", ) diff --git a/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/requirements.txt b/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/requirements.txt deleted file mode 100644 index 458b8a2..0000000 --- a/src/datta_lab_to_nwb/markowitz_gillis_nature_2023/requirements.txt +++ /dev/null @@ -1,3 +0,0 @@ -nwb-conversion-tools==0.11.1 # Example of specific pinned dependecy -some-extra-package==1.11.3 # Example of another extra package that's necessary for the current conversion -roiextractors @ git+https://github.com/catalystneuro/roiextractors.git@8db5f9cb3a7ee5efee49b7fd0b694c7a8105519a # Github pinned dependency