From 1727705c636a5c48b99eeaf66bac4bbf9742a0cc Mon Sep 17 00:00:00 2001 From: Thinh Nguyen Date: Tue, 5 Sep 2023 22:16:28 -0500 Subject: [PATCH 1/6] add `validation` argument to dandi upload --- element_interface/dandi.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/element_interface/dandi.py b/element_interface/dandi.py index 078e58a..22486b3 100644 --- a/element_interface/dandi.py +++ b/element_interface/dandi.py @@ -12,6 +12,7 @@ def upload_to_dandi( api_key: str = None, sync: bool = False, existing: str = "refresh", + validation: str = "required", shell=True, # without this param, subprocess interprets first arg as file/dir ): """Upload NWB files to DANDI Archive @@ -27,6 +28,7 @@ def upload_to_dandi( sync (str, optional): If True, delete all files in archive that are not present in the local directory. existing (str, optional): see full description from `dandi upload --help` + validation (str, optional): [require|skip|ignore] see full description from `dandi upload --help` """ working_directory = working_directory or os.path.curdir @@ -84,4 +86,5 @@ def upload_to_dandi( dandi_instance="dandi-staging" if staging else "dandi", existing=existing, sync=sync, + validation=validation, ) From 6582a2dfd3a157d3910086063f0dc6101fd2670c Mon Sep 17 00:00:00 2001 From: Thinh Nguyen Date: Tue, 5 Sep 2023 22:24:18 -0500 Subject: [PATCH 2/6] remove dry run --- element_interface/dandi.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/element_interface/dandi.py b/element_interface/dandi.py index 22486b3..ef9fd13 100644 --- a/element_interface/dandi.py +++ b/element_interface/dandi.py @@ -59,11 +59,6 @@ def upload_to_dandi( shell=shell, ) - subprocess.run( - ["dandi", "organize", "-d", dandiset_directory, data_directory, "-f", "dry"], - shell=shell, # without this param, subprocess interprets first arg as file/dir - ) - subprocess.run( [ "dandi", From a2429940928f027a36b88095568b04849df6f649 Mon Sep 17 00:00:00 2001 From: Thinh Nguyen Date: Thu, 1 Feb 2024 13:39:05 -0600 Subject: [PATCH 3/6] feat(utils): decorator for results memoization for expensive function calls --- element_interface/utils.py | 69 +++++++++++++++++++++++++++++++++++++- 1 file changed, 68 insertions(+), 1 deletion(-) diff --git a/element_interface/utils.py b/element_interface/utils.py index 14d4eee..c3832f4 100644 --- a/element_interface/utils.py +++ b/element_interface/utils.py @@ -5,7 +5,9 @@ import pathlib import sys import uuid - +import json +import pickle +from datetime import datetime from datajoint.utils import to_camel_case logger = logging.getLogger("datajoint") @@ -187,3 +189,68 @@ def __exit__(self, *args): logger.setLevel(self.prev_log_level) sys.stdout.close() sys.stdout = self._original_stdout + + +def memoized_result(parameters: dict, output_directory: str): + """ + This is a decorator factory designed to cache the results of a function based on its input parameters and the state of the output directory. + If the function is called with the same parameters and the output files in the directory remain unchanged, + it returns the cached results; otherwise, it executes the function and caches the new results along with metadata. + Conditions for robust usage: + - the "output_directory" is to store exclusively the resulting files generated by this function call only, not a shared space with other functions/processes + - the "parameters" passed to the decorator captures the true and uniqueness of the arguments to be used in the decorated function call + Args: + parameters: parameters that would identify a unique function call + output_directory: directory location for the output files + + Returns: a decorator to enable a function call to memoize/cached the resulting files + """ + + def decorator(func): + def wrapped(*args, **kwargs): + output_dir = _to_Path(output_directory) + input_hash = dict_to_uuid(parameters) + input_hash_fp = output_dir / f".{input_hash}.json" + # check if results already exist (from previous identical run) + output_dir_files_hash = dict_to_uuid( + { + f.relative_to(output_dir).as_posix(): f.stat().st_size + for f in output_dir.rglob("*") + if f.name != f".{input_hash}.json" + } + ) + if input_hash_fp.exists(): + with open(input_hash_fp, "r") as f: + meta = json.load(f) + if str(output_dir_files_hash) == meta["output_dir_files_hash"]: + logger.info(f"Existing results found, skip '{func.__name__}'") + with open(output_dir / f".{input_hash}_results.pickle", "rb") as f: + results = pickle.load(f) + return results + # no results - trigger the run + logger.info(f"No existing results found, calling '{func.__name__}'") + start_time = datetime.utcnow() + results = func(*args, **kwargs) + + with open(output_dir / f".{input_hash}_results.pickle", "wb") as f: + pickle.dump(results, f, protocol=pickle.HIGHEST_PROTOCOL) + + meta = { + "output_dir_files_hash": dict_to_uuid( + { + f.relative_to(output_dir).as_posix(): f.stat().st_size + for f in output_dir.rglob("*") + if f.name != f".{input_hash}.json" + } + ), + "start_time": start_time, + "completion_time": datetime.utcnow(), + } + with open(input_hash_fp, "w") as f: + json.dump(meta, f, default=str) + + return results + + return wrapped + + return decorator From c908bc21bfa506ba03a68c2e4892b934cb9defad Mon Sep 17 00:00:00 2001 From: Thinh Nguyen Date: Thu, 1 Feb 2024 16:24:58 -0600 Subject: [PATCH 4/6] Update requirements.txt --- requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/requirements.txt b/requirements.txt index b18b774..65c4718 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,2 +1,3 @@ +datajoint dandi>=0.56.0 numpy From 81f1831695f8278291dd7083c9c6e308c5f357b1 Mon Sep 17 00:00:00 2001 From: Thinh Nguyen Date: Thu, 1 Feb 2024 16:25:50 -0600 Subject: [PATCH 5/6] Revert "Update requirements.txt" This reverts commit c908bc21bfa506ba03a68c2e4892b934cb9defad. --- requirements.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 65c4718..b18b774 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,2 @@ -datajoint dandi>=0.56.0 numpy From 48d6aa7fb1168bfcb4db3256a3a1936aaf037c4d Mon Sep 17 00:00:00 2001 From: Thinh Nguyen Date: Fri, 2 Feb 2024 12:09:51 -0600 Subject: [PATCH 6/6] Revert "feat(utils): decorator for results memoization for expensive function calls" This reverts commit a2429940928f027a36b88095568b04849df6f649. --- element_interface/utils.py | 69 +------------------------------------- 1 file changed, 1 insertion(+), 68 deletions(-) diff --git a/element_interface/utils.py b/element_interface/utils.py index c3832f4..14d4eee 100644 --- a/element_interface/utils.py +++ b/element_interface/utils.py @@ -5,9 +5,7 @@ import pathlib import sys import uuid -import json -import pickle -from datetime import datetime + from datajoint.utils import to_camel_case logger = logging.getLogger("datajoint") @@ -189,68 +187,3 @@ def __exit__(self, *args): logger.setLevel(self.prev_log_level) sys.stdout.close() sys.stdout = self._original_stdout - - -def memoized_result(parameters: dict, output_directory: str): - """ - This is a decorator factory designed to cache the results of a function based on its input parameters and the state of the output directory. - If the function is called with the same parameters and the output files in the directory remain unchanged, - it returns the cached results; otherwise, it executes the function and caches the new results along with metadata. - Conditions for robust usage: - - the "output_directory" is to store exclusively the resulting files generated by this function call only, not a shared space with other functions/processes - - the "parameters" passed to the decorator captures the true and uniqueness of the arguments to be used in the decorated function call - Args: - parameters: parameters that would identify a unique function call - output_directory: directory location for the output files - - Returns: a decorator to enable a function call to memoize/cached the resulting files - """ - - def decorator(func): - def wrapped(*args, **kwargs): - output_dir = _to_Path(output_directory) - input_hash = dict_to_uuid(parameters) - input_hash_fp = output_dir / f".{input_hash}.json" - # check if results already exist (from previous identical run) - output_dir_files_hash = dict_to_uuid( - { - f.relative_to(output_dir).as_posix(): f.stat().st_size - for f in output_dir.rglob("*") - if f.name != f".{input_hash}.json" - } - ) - if input_hash_fp.exists(): - with open(input_hash_fp, "r") as f: - meta = json.load(f) - if str(output_dir_files_hash) == meta["output_dir_files_hash"]: - logger.info(f"Existing results found, skip '{func.__name__}'") - with open(output_dir / f".{input_hash}_results.pickle", "rb") as f: - results = pickle.load(f) - return results - # no results - trigger the run - logger.info(f"No existing results found, calling '{func.__name__}'") - start_time = datetime.utcnow() - results = func(*args, **kwargs) - - with open(output_dir / f".{input_hash}_results.pickle", "wb") as f: - pickle.dump(results, f, protocol=pickle.HIGHEST_PROTOCOL) - - meta = { - "output_dir_files_hash": dict_to_uuid( - { - f.relative_to(output_dir).as_posix(): f.stat().st_size - for f in output_dir.rglob("*") - if f.name != f".{input_hash}.json" - } - ), - "start_time": start_time, - "completion_time": datetime.utcnow(), - } - with open(input_hash_fp, "w") as f: - json.dump(meta, f, default=str) - - return results - - return wrapped - - return decorator