From 1727705c636a5c48b99eeaf66bac4bbf9742a0cc Mon Sep 17 00:00:00 2001 From: Thinh Nguyen Date: Tue, 5 Sep 2023 22:16:28 -0500 Subject: [PATCH 1/8] add `validation` argument to dandi upload --- element_interface/dandi.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/element_interface/dandi.py b/element_interface/dandi.py index 078e58a..22486b3 100644 --- a/element_interface/dandi.py +++ b/element_interface/dandi.py @@ -12,6 +12,7 @@ def upload_to_dandi( api_key: str = None, sync: bool = False, existing: str = "refresh", + validation: str = "required", shell=True, # without this param, subprocess interprets first arg as file/dir ): """Upload NWB files to DANDI Archive @@ -27,6 +28,7 @@ def upload_to_dandi( sync (str, optional): If True, delete all files in archive that are not present in the local directory. existing (str, optional): see full description from `dandi upload --help` + validation (str, optional): [require|skip|ignore] see full description from `dandi upload --help` """ working_directory = working_directory or os.path.curdir @@ -84,4 +86,5 @@ def upload_to_dandi( dandi_instance="dandi-staging" if staging else "dandi", existing=existing, sync=sync, + validation=validation, ) From 6582a2dfd3a157d3910086063f0dc6101fd2670c Mon Sep 17 00:00:00 2001 From: Thinh Nguyen Date: Tue, 5 Sep 2023 22:24:18 -0500 Subject: [PATCH 2/8] remove dry run --- element_interface/dandi.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/element_interface/dandi.py b/element_interface/dandi.py index 22486b3..ef9fd13 100644 --- a/element_interface/dandi.py +++ b/element_interface/dandi.py @@ -59,11 +59,6 @@ def upload_to_dandi( shell=shell, ) - subprocess.run( - ["dandi", "organize", "-d", dandiset_directory, data_directory, "-f", "dry"], - shell=shell, # without this param, subprocess interprets first arg as file/dir - ) - subprocess.run( [ "dandi", From 94f663de10b550ea7c2aaf9fa5287536175e4190 Mon Sep 17 00:00:00 2001 From: A-Baji Date: Thu, 16 Nov 2023 14:18:36 -0600 Subject: [PATCH 3/8] revert: :memo: revert docs dark mode cell text color --- docs/src/.overrides/assets/stylesheets/extra.css | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/docs/src/.overrides/assets/stylesheets/extra.css b/docs/src/.overrides/assets/stylesheets/extra.css index 17eb6b5..70875fc 100644 --- a/docs/src/.overrides/assets/stylesheets/extra.css +++ b/docs/src/.overrides/assets/stylesheets/extra.css @@ -92,7 +92,6 @@ html a[title="YouTube"].md-social__link svg { /* --md-footer-fg-color: var(--dj-white); */ } -[data-md-color-scheme="slate"] td, -th { +[data-md-color-scheme="slate"] .jupyter-wrapper .Table Td { color: var(--dj-black) } From a2429940928f027a36b88095568b04849df6f649 Mon Sep 17 00:00:00 2001 From: Thinh Nguyen Date: Thu, 1 Feb 2024 13:39:05 -0600 Subject: [PATCH 4/8] feat(utils): decorator for results memoization for expensive function calls --- element_interface/utils.py | 69 +++++++++++++++++++++++++++++++++++++- 1 file changed, 68 insertions(+), 1 deletion(-) diff --git a/element_interface/utils.py b/element_interface/utils.py index 14d4eee..c3832f4 100644 --- a/element_interface/utils.py +++ b/element_interface/utils.py @@ -5,7 +5,9 @@ import pathlib import sys import uuid - +import json +import pickle +from datetime import datetime from datajoint.utils import to_camel_case logger = logging.getLogger("datajoint") @@ -187,3 +189,68 @@ def __exit__(self, *args): logger.setLevel(self.prev_log_level) sys.stdout.close() sys.stdout = self._original_stdout + + +def memoized_result(parameters: dict, output_directory: str): + """ + This is a decorator factory designed to cache the results of a function based on its input parameters and the state of the output directory. + If the function is called with the same parameters and the output files in the directory remain unchanged, + it returns the cached results; otherwise, it executes the function and caches the new results along with metadata. + Conditions for robust usage: + - the "output_directory" is to store exclusively the resulting files generated by this function call only, not a shared space with other functions/processes + - the "parameters" passed to the decorator captures the true and uniqueness of the arguments to be used in the decorated function call + Args: + parameters: parameters that would identify a unique function call + output_directory: directory location for the output files + + Returns: a decorator to enable a function call to memoize/cached the resulting files + """ + + def decorator(func): + def wrapped(*args, **kwargs): + output_dir = _to_Path(output_directory) + input_hash = dict_to_uuid(parameters) + input_hash_fp = output_dir / f".{input_hash}.json" + # check if results already exist (from previous identical run) + output_dir_files_hash = dict_to_uuid( + { + f.relative_to(output_dir).as_posix(): f.stat().st_size + for f in output_dir.rglob("*") + if f.name != f".{input_hash}.json" + } + ) + if input_hash_fp.exists(): + with open(input_hash_fp, "r") as f: + meta = json.load(f) + if str(output_dir_files_hash) == meta["output_dir_files_hash"]: + logger.info(f"Existing results found, skip '{func.__name__}'") + with open(output_dir / f".{input_hash}_results.pickle", "rb") as f: + results = pickle.load(f) + return results + # no results - trigger the run + logger.info(f"No existing results found, calling '{func.__name__}'") + start_time = datetime.utcnow() + results = func(*args, **kwargs) + + with open(output_dir / f".{input_hash}_results.pickle", "wb") as f: + pickle.dump(results, f, protocol=pickle.HIGHEST_PROTOCOL) + + meta = { + "output_dir_files_hash": dict_to_uuid( + { + f.relative_to(output_dir).as_posix(): f.stat().st_size + for f in output_dir.rglob("*") + if f.name != f".{input_hash}.json" + } + ), + "start_time": start_time, + "completion_time": datetime.utcnow(), + } + with open(input_hash_fp, "w") as f: + json.dump(meta, f, default=str) + + return results + + return wrapped + + return decorator From c908bc21bfa506ba03a68c2e4892b934cb9defad Mon Sep 17 00:00:00 2001 From: Thinh Nguyen Date: Thu, 1 Feb 2024 16:24:58 -0600 Subject: [PATCH 5/8] Update requirements.txt --- requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/requirements.txt b/requirements.txt index b18b774..65c4718 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,2 +1,3 @@ +datajoint dandi>=0.56.0 numpy From 81f1831695f8278291dd7083c9c6e308c5f357b1 Mon Sep 17 00:00:00 2001 From: Thinh Nguyen Date: Thu, 1 Feb 2024 16:25:50 -0600 Subject: [PATCH 6/8] Revert "Update requirements.txt" This reverts commit c908bc21bfa506ba03a68c2e4892b934cb9defad. --- requirements.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 65c4718..b18b774 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,2 @@ -datajoint dandi>=0.56.0 numpy From 48d6aa7fb1168bfcb4db3256a3a1936aaf037c4d Mon Sep 17 00:00:00 2001 From: Thinh Nguyen Date: Fri, 2 Feb 2024 12:09:51 -0600 Subject: [PATCH 7/8] Revert "feat(utils): decorator for results memoization for expensive function calls" This reverts commit a2429940928f027a36b88095568b04849df6f649. --- element_interface/utils.py | 69 +------------------------------------- 1 file changed, 1 insertion(+), 68 deletions(-) diff --git a/element_interface/utils.py b/element_interface/utils.py index c3832f4..14d4eee 100644 --- a/element_interface/utils.py +++ b/element_interface/utils.py @@ -5,9 +5,7 @@ import pathlib import sys import uuid -import json -import pickle -from datetime import datetime + from datajoint.utils import to_camel_case logger = logging.getLogger("datajoint") @@ -189,68 +187,3 @@ def __exit__(self, *args): logger.setLevel(self.prev_log_level) sys.stdout.close() sys.stdout = self._original_stdout - - -def memoized_result(parameters: dict, output_directory: str): - """ - This is a decorator factory designed to cache the results of a function based on its input parameters and the state of the output directory. - If the function is called with the same parameters and the output files in the directory remain unchanged, - it returns the cached results; otherwise, it executes the function and caches the new results along with metadata. - Conditions for robust usage: - - the "output_directory" is to store exclusively the resulting files generated by this function call only, not a shared space with other functions/processes - - the "parameters" passed to the decorator captures the true and uniqueness of the arguments to be used in the decorated function call - Args: - parameters: parameters that would identify a unique function call - output_directory: directory location for the output files - - Returns: a decorator to enable a function call to memoize/cached the resulting files - """ - - def decorator(func): - def wrapped(*args, **kwargs): - output_dir = _to_Path(output_directory) - input_hash = dict_to_uuid(parameters) - input_hash_fp = output_dir / f".{input_hash}.json" - # check if results already exist (from previous identical run) - output_dir_files_hash = dict_to_uuid( - { - f.relative_to(output_dir).as_posix(): f.stat().st_size - for f in output_dir.rglob("*") - if f.name != f".{input_hash}.json" - } - ) - if input_hash_fp.exists(): - with open(input_hash_fp, "r") as f: - meta = json.load(f) - if str(output_dir_files_hash) == meta["output_dir_files_hash"]: - logger.info(f"Existing results found, skip '{func.__name__}'") - with open(output_dir / f".{input_hash}_results.pickle", "rb") as f: - results = pickle.load(f) - return results - # no results - trigger the run - logger.info(f"No existing results found, calling '{func.__name__}'") - start_time = datetime.utcnow() - results = func(*args, **kwargs) - - with open(output_dir / f".{input_hash}_results.pickle", "wb") as f: - pickle.dump(results, f, protocol=pickle.HIGHEST_PROTOCOL) - - meta = { - "output_dir_files_hash": dict_to_uuid( - { - f.relative_to(output_dir).as_posix(): f.stat().st_size - for f in output_dir.rglob("*") - if f.name != f".{input_hash}.json" - } - ), - "start_time": start_time, - "completion_time": datetime.utcnow(), - } - with open(input_hash_fp, "w") as f: - json.dump(meta, f, default=str) - - return results - - return wrapped - - return decorator From b5803e7216d7cfa12d36fc8cc76ecf37fdfd026b Mon Sep 17 00:00:00 2001 From: Thinh Nguyen Date: Mon, 20 May 2024 14:49:42 -0500 Subject: [PATCH 8/8] feat: handle PrairieView new multi-page tif format --- element_interface/prairie_view_loader.py | 104 ++++++++++++++++------- 1 file changed, 75 insertions(+), 29 deletions(-) diff --git a/element_interface/prairie_view_loader.py b/element_interface/prairie_view_loader.py index 6dcda90..f43f7ca 100644 --- a/element_interface/prairie_view_loader.py +++ b/element_interface/prairie_view_loader.py @@ -90,7 +90,7 @@ def get_prairieview_filenames( f".//Sequence/Frame{plane_search}/File{channel_search}" ) - fnames = [f.attrib["filename"] for f in frames] + fnames = np.unique([f.attrib["filename"] for f in frames]).tolist() return fnames if not return_pln_chn else (fnames, plane_idx, channel) def write_single_bigtiff( @@ -114,46 +114,90 @@ def write_single_bigtiff( if output_tiff_fullpath.exists() and not overwrite: return output_tiff_fullpath - if not caiman_compatible: - with tifffile.TiffWriter( - output_tiff_fullpath, - bigtiff=True, - ) as tiff_writer: - try: - for input_file in tiff_names: - with tifffile.TiffFile( - self.prairieview_dir / input_file - ) as tffl: - assert len(tffl.pages) == 1 - tiff_writer.write( - tffl.pages[0].asarray(), - metadata={ - "axes": "YX", - "'fps'": self.meta["frame_rate"], - }, - ) - except Exception as e: - raise Exception(f"Error in processing tiff file {input_file}: {e}") - else: - combined_data = [] + if self.meta["is_multipage"]: + # For multi-page tiff - the pages are organized as: + # (channel x slice x frame) - each page is (height x width) + # - TODO: verify this is the case for Bruker multi-page tiff + # This implementation is partially based on the reference code from `scanreader` package - https://github.com/atlab/scanreader + # See: https://github.com/atlab/scanreader/blob/2a021a85fca011c17e553d0e1c776998d3f2b2d8/scanreader/scans.py#L337 + slice_step = self.meta["num_channels"] + frame_step = self.meta["num_channels"] * self.meta["num_planes"] + slice_idx = self.meta["plane_indices"].index(plane_idx) + channel_idx = self.meta["channels"].index(channel) + + page_indices = [frame_idx * frame_step + slice_idx * slice_step + channel_idx + for frame_idx in range(self.meta["num_frames"])] + + combined_data = np.empty([self.meta["num_frames"], + self.meta["height_in_pixels"], + self.meta["width_in_pixels"]], + dtype=int) + start_page = 0 try: for input_file in tiff_names: with tifffile.TiffFile(self.prairieview_dir / input_file) as tffl: - assert len(tffl.pages) == 1 - combined_data.append(tffl.pages[0].asarray()) + # Get indices in this tiff file and in output array + final_page_in_file = start_page + len(tffl.pages) + is_page_in_file = lambda page: page in range(start_page, final_page_in_file) + pages_in_file = filter(is_page_in_file, page_indices) + file_indices = [page - start_page for page in pages_in_file] + global_indices = [is_page_in_file(page) for page in page_indices] + + # Read from this tiff file (if needed) + if len(file_indices) > 0: + # this line looks a bit ugly but is memory efficient. Do not separate + combined_data[global_indices] = tffl.asarray(key=file_indices) + start_page += len(tffl.pages) except Exception as e: raise Exception(f"Error in processing tiff file {input_file}: {e}") - combined_data = np.dstack(combined_data).transpose( - 2, 0, 1 - ) # (frame x height x width) - tifffile.imwrite( output_tiff_fullpath, combined_data, metadata={"axes": "TYX", "'fps'": self.meta["frame_rate"]}, bigtiff=True, ) + else: + if not caiman_compatible: + with tifffile.TiffWriter( + output_tiff_fullpath, + bigtiff=True, + ) as tiff_writer: + try: + for input_file in tiff_names: + with tifffile.TiffFile( + self.prairieview_dir / input_file + ) as tffl: + assert len(tffl.pages) == 1 + tiff_writer.write( + tffl.pages[0].asarray(), + metadata={ + "axes": "YX", + "'fps'": self.meta["frame_rate"], + }, + ) + except Exception as e: + raise Exception(f"Error in processing tiff file {input_file}: {e}") + else: + combined_data = [] + try: + for input_file in tiff_names: + with tifffile.TiffFile(self.prairieview_dir / input_file) as tffl: + assert len(tffl.pages) == 1 + combined_data.append(tffl.pages[0].asarray()) + except Exception as e: + raise Exception(f"Error in processing tiff file {input_file}: {e}") + + combined_data = np.dstack(combined_data).transpose( + 2, 0, 1 + ) # (frame x height x width) + + tifffile.imwrite( + output_tiff_fullpath, + combined_data, + metadata={"axes": "TYX", "'fps'": self.meta["frame_rate"]}, + bigtiff=True, + ) return output_tiff_fullpath @@ -167,6 +211,7 @@ def _extract_prairieview_metadata(xml_filepath: str): bidirectional_scan = False # Does not support bidirectional roi = 0 + is_multipage = xml_root.find(".//Sequence/Frame/File/[@page]") is not None recording_start_time = xml_root.find(".//Sequence/[@cycle='1']").attrib.get("time") # Get all channels and find unique values @@ -310,6 +355,7 @@ def _extract_prairieview_metadata(xml_filepath: str): frame_period=frame_period, bidirectional=bidirectional_scan, bidirectional_z=bidirection_z, + is_multipage=is_multipage, scan_datetime=scan_datetime, usecs_per_line=usec_per_line, scan_duration=total_scan_duration,