From a4617190989fb55dc913defad544b52814006357 Mon Sep 17 00:00:00 2001 From: bendichter Date: Fri, 17 Mar 2023 15:27:48 -0300 Subject: [PATCH 01/47] add code for extracting and reshaping metadata from dynamic paths --- requirements-minimal.txt | 1 + src/neuroconv/utils/__init__.py | 2 +- src/neuroconv/utils/globbing.py | 111 ++++---------- .../test_utils/test_globbing_utils.py | 145 +++++++----------- 4 files changed, 88 insertions(+), 171 deletions(-) diff --git a/requirements-minimal.txt b/requirements-minimal.txt index a5c5a4071..532689ebe 100644 --- a/requirements-minimal.txt +++ b/requirements-minimal.txt @@ -10,3 +10,4 @@ psutil>=5.8.0 tqdm>=4.60.0 dandi>=0.46.2 pandas +parse \ No newline at end of file diff --git a/src/neuroconv/utils/__init__.py b/src/neuroconv/utils/__init__.py index a21cab39d..ea96f7d6b 100644 --- a/src/neuroconv/utils/__init__.py +++ b/src/neuroconv/utils/__init__.py @@ -5,7 +5,7 @@ exist_dict_in_list, load_dict_from_file, ) -from .globbing import decompose_f_string, parse_f_string +from .globbing import parse_glob_directory from .json_schema import ( NWBMetaDataEncoder, fill_defaults, diff --git a/src/neuroconv/utils/globbing.py b/src/neuroconv/utils/globbing.py index 2f84b7abe..e28330d3e 100644 --- a/src/neuroconv/utils/globbing.py +++ b/src/neuroconv/utils/globbing.py @@ -1,89 +1,42 @@ -import re -from typing import List -from warnings import warn +from glob import glob +import os +from parse import parse -def decompose_f_string(f_string: str) -> (List[str], List[str]): - """ - Decompose an f-string into the list of variable names and the separators between them. - An f-string is any string that contains enclosed curly brackets around text. - A variable is defined as the text expression within the enclosed curly brackets. - The separators are the strings remnants that surround the variables. +def parse_glob_directory(path, format_): + path = str(path) + for filepath in glob(os.path.join(path, "**", "*"), recursive=True): + print(filepath) + filepath = filepath[len(path) + 1:] + result = parse(format_, filepath) + if result: + yield filepath, result.named - An example f-string and components would be: 'This is {an} f-string!', with variable 'an' and separators - 'This is ' and ' f-string!'. - An instance of this example would be: 'This is definitely a good f-string!' with variable value 'definitely a good'. +from collections import defaultdict - Example - ------- - variable_names, separators = decompose_f_string(f_string="a/{x}b{y}/c{z}") - # variable_names = ["x", "y", "z"] - # separators = ["a/", "b", "/c"", ""] - """ - matches = re.findall("{.*?}", f_string) # {.*?} optionally matches any characters enclosed by curly brackets - variable_names = [match.lstrip("{").rstrip("}") for match in matches] - assert not any( - (variable_name == "" for variable_name in variable_names) - ), "Empty variable name detected in f-string! Please ensure there is text between all enclosing '{' and '}'." - pattern = "^.*?{|}.*?{|}.*?$" - # Description: patttern matches the all expressions outside of curly bracket enclosures - # .*?{ optionally matches any characters optionally before curly bracket opening - # | logical 'or' - # }.*?{ between a curly bracket closure and opening - # | - # }.*? after a closure - separators = [x.rstrip("{").lstrip("}") for x in re.findall(pattern=pattern, string=f_string)] - if any((separator == "" for separator in separators[1:-1])): - warn( - "There is an empty separator between two variables in the f-string! " - "The f-string will not be uniquely invertible." - ) - return variable_names, separators +def ddict(): + return defaultdict(ddict) -def parse_f_string(string: str, f_string: str): - """ - Given an instance of an f-string rule, extract the values of the variables specified by the f-string. +def unddict(d): + if isinstance(d, defaultdict): + return {key: unddict(value) for key, value in d.items()} + else: + return d - Recovery of values is only possible in cases where the string instance is uniquely invertible, - which requires at a minimum requires... - 1) Separators between all curly bracket enclosures, *e.g.*, '{var1}{var2}' is not allowed. - An easy way to resolve this is to add a unique separator between them, *i.e.*, '{var1}-{var2}'. - 2) The separator character(s) cannot also occur within the variable values, *e.g.*, '{var1}b{var2}' on - instance 'sub_01bsess_040122' where var1='sub_01 and' and var2='sess_040122'. Since the separator is a single - character 'b' which also occurs in the instance of var1, it cannot be determined which occurrence is the - proper separator. - Resolving this relies on choosing unique separators between variables in the f-string rule; either a single - character that you know will never occur in any of your instances, or preferably a sequence of characters - that would not occur together. In the example above, a simple separator of '-' would suffice, but if other - instances might include that, such as var1='sub-05', then a sequential separator of '--' would work instead. - - Parameters - ---------- - string : str - An instance of the f-string rule. - fstring : str - String containing non-empty substrings enclosed by "{" and "}". - These correspond to the names of variables thought to encode the actual filename string. - """ - variable_names, separators = decompose_f_string(f_string=f_string) - pattern = "^" + "(.+)".join(separators) + "$" # (.+) matches any non-empty sequence of characters - pattern_match = re.findall(pattern=pattern, string=string) - assert pattern_match, "Unable to match f-string pattern to string! Please double check both structures." - variable_values = pattern_match[0] - for idx in range(len(variable_values) - 1): - assert ( - separators[idx + 1] not in variable_values[idx] - ), "Adjacent variable values contain the separator character! The f-string is not uniquely invertible." - values = dict() - for variable_name, variable_value in zip(variable_names, variable_values): - if variable_value != values.get(variable_name, variable_value): - raise ValueError( - f"Duplicated variable placements for '{variable_name}' in f-string do not match in instance! " - f"Expected '{values[variable_name]}' but found '{variable_value}'." - ) - values.update({variable_name: variable_value}) - return values +def unpack_experiment_dynamic_paths(data_directory, source_data_spec): + out = ddict() + for interface, source_data in source_data_spec.items(): + for path_type in ("file_path", "folder_path"): + if path_type in source_data: + for path, metadata in parse_glob_directory(data_directory, source_data["file_path"]): + key = tuple(sorted(metadata.items())) + out[key]["source_data"][interface][path_type] = path + if "session_id" in metadata: + out[key]["metadata"]["NWBFile"]["session_id"] = metadata["session_id"] + if "subject_id" in metadata: + out[key]["metadata"]["Subject"]["subject_id"] = metadata["subject_id"] + return list(unddict(out).values()) \ No newline at end of file diff --git a/tests/test_minimal/test_utils/test_globbing_utils.py b/tests/test_minimal/test_utils/test_globbing_utils.py index 9c306203d..9baa01677 100644 --- a/tests/test_minimal/test_utils/test_globbing_utils.py +++ b/tests/test_minimal/test_utils/test_globbing_utils.py @@ -1,91 +1,54 @@ -from hdmf.testing import TestCase - -from neuroconv.utils import decompose_f_string, parse_f_string - - -class TestGlobbingAssertions(TestCase): - def test_decompose_f_string_assertion(self): - with self.assertRaisesWith( - exc_type=AssertionError, - exc_msg=( - "Empty variable name detected in f-string! Please ensure there is text between all " - "enclosing '{' and '}'." - ), - ): - decompose_f_string(f_string="a/{x}b{y}/c{z}d{}") - - def test_decompose_f_string_separators_assertion(self): - with self.assertWarnsWith( - warn_type=UserWarning, - exc_msg=( - "There is an empty separator between two variables in the f-string! " - "The f-string will not be uniquely invertible." - ), - ): - decompose_f_string(f_string="a/{x}{y}/c{z}") - - def test_parse_f_string_non_invertible_assertion(self): - with self.assertRaisesWith( - exc_type=AssertionError, - exc_msg=( - "Adjacent variable values contain the separator character! The f-string is not uniquely invertible." - ), - ): - parse_f_string(string="a/foobbar/cthat", f_string="a/{x}b{y}/c{z}") - - def test_parse_f_string_bad_structure_assertion(self): - with self.assertRaisesWith( - exc_type=AssertionError, - exc_msg="Unable to match f-string pattern to string! Please double check both structures.", - ): - parse_f_string(string="just/plain/wrong", f_string="a/{x}b{y}/c{z}") - - def test_parse_f_string_duplicated_mismatch_assertion(self): - with self.assertRaisesWith( - exc_type=ValueError, - exc_msg=( - "Duplicated variable placements for 'x' in f-string do not match in instance! " - "Expected 'foo' but found 'wrong'." - ), - ): - parse_f_string(string="a/foobthat/cbar/sub-wrong", f_string="a/{x}b{y}/c{z}/sub-{x}") - - -def test_decompose_f_string(): - variable_names, _ = decompose_f_string(f_string="a/{x}b{y}/c{z}") - assert variable_names == ["x", "y", "z"] - - -def test_decompose_f_string_separators(): - _, separators = decompose_f_string(f_string="a/{x}b{y}/c") - assert separators == ["a/", "b", "/c"] - - -def test_decompose_f_string_separators_leading(): - _, separators = decompose_f_string(f_string="{start}a/{x}b{y}/c") - assert separators == ["", "a/", "b", "/c"] - - -def test_decompose_f_string_separators_trailing(): - _, separators = decompose_f_string(f_string="a/{x}b{y}/c{end}") - assert separators == ["a/", "b", "/c", ""] - - -def test_parse_f_string(): - f_string_values = parse_f_string(string="a/foobthat/cbar", f_string="a/{x}b{y}/c{z}") - assert f_string_values == dict(x="foo", y="that", z="bar") - - -def test_parse_f_string_leading_value(): - f_string_values = parse_f_string(string="123a/foobthat/cbar", f_string="{start}a/{x}b{y}/c{z}") - assert f_string_values == dict(start="123", x="foo", y="that", z="bar") - - -def test_parse_f_string_no_trailing_value(): - f_string_values = parse_f_string(string="a/foobthat/c", f_string="a/{x}b{y}/c") - assert f_string_values == dict(x="foo", y="that") - - -def test_parse_f_string_duplicates(): - f_string_values = parse_f_string(string="a/foobthat/cbar/sub-foo", f_string="a/{x}b{y}/c{z}/sub-{x}") - assert f_string_values == dict(x="foo", y="that", z="bar") +from pathlib import Path + +from neuroconv.utils.globbing import unpack_experiment_dynamic_paths + + +def test_unpack_experiment_dynamic_paths(tmpdir): + base = Path(tmpdir) + for subject_id in ("001", "002"): + Path.mkdir(base / f"sub-{subject_id}") + for session_id in ("101", "102"): + Path.mkdir(base / f"sub-{subject_id}" / f"session_{session_id}") + (base / f"sub-{subject_id}" / f"session_{session_id}" / "abc").touch() + (base / f"sub-{subject_id}" / f"session_{session_id}" / "xyz").touch() + + out = unpack_experiment_dynamic_paths( + base, + dict( + aa=dict(file_path="sub-{subject_id:3}/session_{session_id:3}/abc"), + bb=dict(file_path="sub-{subject_id:3}/session_{session_id:3}/xyz"), + ), + ) + + print(out) + + assert out == [ + { + "source_data": { + "aa": {"file_path": "sub-002/session_101/abc"}, + "bb": {"file_path": "sub-002/session_101/xyz"}, + }, + "metadata": {"NWBFile": {"session_id": "101"}, "Subject": {"subject_id": "002"}}, + }, + { + "source_data": { + "aa": {"file_path": "sub-002/session_102/abc"}, + "bb": {"file_path": "sub-002/session_102/xyz"}, + }, + "metadata": {"NWBFile": {"session_id": "102"}, "Subject": {"subject_id": "002"}}, + }, + { + "source_data": { + "aa": {"file_path": "sub-001/session_101/abc"}, + "bb": {"file_path": "sub-001/session_101/xyz"}, + }, + "metadata": {"NWBFile": {"session_id": "101"}, "Subject": {"subject_id": "001"}}, + }, + { + "source_data": { + "aa": {"file_path": "sub-001/session_102/abc"}, + "bb": {"file_path": "sub-001/session_102/xyz"}, + }, + "metadata": {"NWBFile": {"session_id": "102"}, "Subject": {"subject_id": "001"}}, + }, + ] From c12b3890565a1aa50e469905d733e9669f1bbd88 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 17 Mar 2023 18:29:51 +0000 Subject: [PATCH 02/47] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- requirements-minimal.txt | 2 +- src/neuroconv/utils/globbing.py | 7 ++++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/requirements-minimal.txt b/requirements-minimal.txt index 532689ebe..faf23d20b 100644 --- a/requirements-minimal.txt +++ b/requirements-minimal.txt @@ -10,4 +10,4 @@ psutil>=5.8.0 tqdm>=4.60.0 dandi>=0.46.2 pandas -parse \ No newline at end of file +parse diff --git a/src/neuroconv/utils/globbing.py b/src/neuroconv/utils/globbing.py index e28330d3e..a0c6d61ba 100644 --- a/src/neuroconv/utils/globbing.py +++ b/src/neuroconv/utils/globbing.py @@ -1,5 +1,5 @@ -from glob import glob import os +from glob import glob from parse import parse @@ -8,11 +8,12 @@ def parse_glob_directory(path, format_): path = str(path) for filepath in glob(os.path.join(path, "**", "*"), recursive=True): print(filepath) - filepath = filepath[len(path) + 1:] + filepath = filepath[len(path) + 1 :] result = parse(format_, filepath) if result: yield filepath, result.named + from collections import defaultdict @@ -39,4 +40,4 @@ def unpack_experiment_dynamic_paths(data_directory, source_data_spec): out[key]["metadata"]["NWBFile"]["session_id"] = metadata["session_id"] if "subject_id" in metadata: out[key]["metadata"]["Subject"]["subject_id"] = metadata["subject_id"] - return list(unddict(out).values()) \ No newline at end of file + return list(unddict(out).values()) From 3ab6c3f6542161d4ee48a49e0dc83ecaae4ef81d Mon Sep 17 00:00:00 2001 From: bendichter Date: Fri, 17 Mar 2023 15:30:02 -0300 Subject: [PATCH 03/47] clean up a bit --- src/neuroconv/utils/globbing.py | 4 +--- tests/test_minimal/test_utils/test_globbing_utils.py | 2 -- 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/src/neuroconv/utils/globbing.py b/src/neuroconv/utils/globbing.py index e28330d3e..442ebaf51 100644 --- a/src/neuroconv/utils/globbing.py +++ b/src/neuroconv/utils/globbing.py @@ -1,3 +1,4 @@ +from collections import defaultdict from glob import glob import os @@ -5,7 +6,6 @@ def parse_glob_directory(path, format_): - path = str(path) for filepath in glob(os.path.join(path, "**", "*"), recursive=True): print(filepath) filepath = filepath[len(path) + 1:] @@ -13,8 +13,6 @@ def parse_glob_directory(path, format_): if result: yield filepath, result.named -from collections import defaultdict - def ddict(): return defaultdict(ddict) diff --git a/tests/test_minimal/test_utils/test_globbing_utils.py b/tests/test_minimal/test_utils/test_globbing_utils.py index 9baa01677..d0b723908 100644 --- a/tests/test_minimal/test_utils/test_globbing_utils.py +++ b/tests/test_minimal/test_utils/test_globbing_utils.py @@ -20,8 +20,6 @@ def test_unpack_experiment_dynamic_paths(tmpdir): ), ) - print(out) - assert out == [ { "source_data": { From 54c8193b9006d719bcfbc736f3336c308c1ad211 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 17 Mar 2023 18:31:09 +0000 Subject: [PATCH 04/47] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/neuroconv/utils/globbing.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/neuroconv/utils/globbing.py b/src/neuroconv/utils/globbing.py index 6bcf75df0..49534aaef 100644 --- a/src/neuroconv/utils/globbing.py +++ b/src/neuroconv/utils/globbing.py @@ -1,5 +1,5 @@ -from collections import defaultdict import os +from collections import defaultdict from glob import glob from parse import parse @@ -14,8 +14,6 @@ def parse_glob_directory(path, format_): yield filepath, result.named - - def ddict(): return defaultdict(ddict) From a78c0ec2c1a4e3bd6646540da5d2b61b7d696ff9 Mon Sep 17 00:00:00 2001 From: bendichter Date: Fri, 17 Mar 2023 16:57:19 -0300 Subject: [PATCH 05/47] added some docstrings --- src/neuroconv/utils/globbing.py | 42 ++++++++++++++++++++++++++++++--- 1 file changed, 39 insertions(+), 3 deletions(-) diff --git a/src/neuroconv/utils/globbing.py b/src/neuroconv/utils/globbing.py index 49534aaef..57c3cb1a5 100644 --- a/src/neuroconv/utils/globbing.py +++ b/src/neuroconv/utils/globbing.py @@ -1,13 +1,32 @@ import os from collections import defaultdict from glob import glob +from typing import Union from parse import parse +from neuroconv.utils import FilePathType, FolderPathType -def parse_glob_directory(path, format_): + +def parse_glob_directory( + path: Union[FilePathType, FolderPathType], + format_: str +): + """Find matching paths and return those paths and extracted metadata + + Parameters + ---------- + path: path-like + Start the recursive search here. + format_: str + An f-string formatted query. + + Returns + ------- + + """ + path = str(path) for filepath in glob(os.path.join(path, "**", "*"), recursive=True): - print(filepath) filepath = filepath[len(path) + 1 :] result = parse(format_, filepath) if result: @@ -15,17 +34,34 @@ def parse_glob_directory(path, format_): def ddict(): + """Create a defaultdict of defaultdicts""" return defaultdict(ddict) def unddict(d): + """Turn a ddict into a normal dictionary""" if isinstance(d, defaultdict): return {key: unddict(value) for key, value in d.items()} else: return d -def unpack_experiment_dynamic_paths(data_directory, source_data_spec): +def unpack_experiment_dynamic_paths( + data_directory: FolderPathType, + source_data_spec: dict, +): + """ + + Parameters + ---------- + data_directory : path-like + Directory where the data are. Start the resursive search here. + source_data_spec : dict + Source spec. + Returns + ------- + + """ out = ddict() for interface, source_data in source_data_spec.items(): for path_type in ("file_path", "folder_path"): From ad827e7e9e274c0bdf9a9dd2d3176f7eee704fd5 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 17 Mar 2023 19:58:49 +0000 Subject: [PATCH 06/47] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/neuroconv/utils/globbing.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/neuroconv/utils/globbing.py b/src/neuroconv/utils/globbing.py index 57c3cb1a5..fdb30c444 100644 --- a/src/neuroconv/utils/globbing.py +++ b/src/neuroconv/utils/globbing.py @@ -8,10 +8,7 @@ from neuroconv.utils import FilePathType, FolderPathType -def parse_glob_directory( - path: Union[FilePathType, FolderPathType], - format_: str -): +def parse_glob_directory(path: Union[FilePathType, FolderPathType], format_: str): """Find matching paths and return those paths and extracted metadata Parameters From edce0ab38b885407ab8402ea8d9b6cb08bb7b341 Mon Sep 17 00:00:00 2001 From: bendichter Date: Fri, 17 Mar 2023 17:22:10 -0300 Subject: [PATCH 07/47] fix import error --- src/neuroconv/utils/globbing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/neuroconv/utils/globbing.py b/src/neuroconv/utils/globbing.py index 57c3cb1a5..81d93cdfb 100644 --- a/src/neuroconv/utils/globbing.py +++ b/src/neuroconv/utils/globbing.py @@ -5,7 +5,7 @@ from parse import parse -from neuroconv.utils import FilePathType, FolderPathType +from .types import FilePathType, FolderPathType def parse_glob_directory( From 7d8d2168f034cc8c3730451bb4e562afe1de953d Mon Sep 17 00:00:00 2001 From: bendichter Date: Wed, 22 Mar 2023 15:53:35 -0500 Subject: [PATCH 08/47] fix tests to handle lists of arbitrary order --- .../test_utils/test_globbing_utils.py | 70 ++++++++++++------- 1 file changed, 44 insertions(+), 26 deletions(-) diff --git a/tests/test_minimal/test_utils/test_globbing_utils.py b/tests/test_minimal/test_utils/test_globbing_utils.py index d0b723908..f138784f5 100644 --- a/tests/test_minimal/test_utils/test_globbing_utils.py +++ b/tests/test_minimal/test_utils/test_globbing_utils.py @@ -3,6 +3,21 @@ from neuroconv.utils.globbing import unpack_experiment_dynamic_paths +# helper functions to test for equivalence between set-like lists of dicts. +def freeze(obj): + if isinstance(obj, dict): + return frozenset((k, freeze(v)) for k, v in obj.items()) + elif isinstance(obj, list): + return frozenset(freeze(x) for x in obj) + return obj + + +def are_equivalent_lists(list1, list2): + set1 = set(freeze(x) for x in list1) + set2 = set(freeze(x) for x in list2) + return set1 == set2 + + def test_unpack_experiment_dynamic_paths(tmpdir): base = Path(tmpdir) for subject_id in ("001", "002"): @@ -20,33 +35,36 @@ def test_unpack_experiment_dynamic_paths(tmpdir): ), ) - assert out == [ - { - "source_data": { - "aa": {"file_path": "sub-002/session_101/abc"}, - "bb": {"file_path": "sub-002/session_101/xyz"}, + assert are_equivalent_lists( + out, + [ + { + "source_data": { + "aa": {"file_path": "sub-002/session_101/abc"}, + "bb": {"file_path": "sub-002/session_101/xyz"}, + }, + "metadata": {"NWBFile": {"session_id": "101"}, "Subject": {"subject_id": "002"}}, }, - "metadata": {"NWBFile": {"session_id": "101"}, "Subject": {"subject_id": "002"}}, - }, - { - "source_data": { - "aa": {"file_path": "sub-002/session_102/abc"}, - "bb": {"file_path": "sub-002/session_102/xyz"}, + { + "source_data": { + "aa": {"file_path": "sub-002/session_102/abc"}, + "bb": {"file_path": "sub-002/session_102/xyz"}, + }, + "metadata": {"NWBFile": {"session_id": "102"}, "Subject": {"subject_id": "002"}}, }, - "metadata": {"NWBFile": {"session_id": "102"}, "Subject": {"subject_id": "002"}}, - }, - { - "source_data": { - "aa": {"file_path": "sub-001/session_101/abc"}, - "bb": {"file_path": "sub-001/session_101/xyz"}, + { + "source_data": { + "aa": {"file_path": "sub-001/session_101/abc"}, + "bb": {"file_path": "sub-001/session_101/xyz"}, + }, + "metadata": {"NWBFile": {"session_id": "101"}, "Subject": {"subject_id": "001"}}, }, - "metadata": {"NWBFile": {"session_id": "101"}, "Subject": {"subject_id": "001"}}, - }, - { - "source_data": { - "aa": {"file_path": "sub-001/session_102/abc"}, - "bb": {"file_path": "sub-001/session_102/xyz"}, + { + "source_data": { + "aa": {"file_path": "sub-001/session_102/abc"}, + "bb": {"file_path": "sub-001/session_102/xyz"}, + }, + "metadata": {"NWBFile": {"session_id": "102"}, "Subject": {"subject_id": "001"}}, }, - "metadata": {"NWBFile": {"session_id": "102"}, "Subject": {"subject_id": "001"}}, - }, - ] + ] + ) From 90fd79248d9959f254303093c7cc9fde8b33b24a Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 22 Mar 2023 20:53:56 +0000 Subject: [PATCH 09/47] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- tests/test_minimal/test_utils/test_globbing_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_minimal/test_utils/test_globbing_utils.py b/tests/test_minimal/test_utils/test_globbing_utils.py index f138784f5..20c994eb9 100644 --- a/tests/test_minimal/test_utils/test_globbing_utils.py +++ b/tests/test_minimal/test_utils/test_globbing_utils.py @@ -66,5 +66,5 @@ def test_unpack_experiment_dynamic_paths(tmpdir): }, "metadata": {"NWBFile": {"session_id": "102"}, "Subject": {"subject_id": "001"}}, }, - ] + ], ) From 1e844f94b547813800a31721231dc53a6ac8ddf6 Mon Sep 17 00:00:00 2001 From: bendichter Date: Wed, 22 Mar 2023 15:55:38 -0500 Subject: [PATCH 10/47] fix typo --- src/neuroconv/utils/globbing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/neuroconv/utils/globbing.py b/src/neuroconv/utils/globbing.py index f166f3ea2..0c1ec0c2b 100644 --- a/src/neuroconv/utils/globbing.py +++ b/src/neuroconv/utils/globbing.py @@ -52,7 +52,7 @@ def unpack_experiment_dynamic_paths( Parameters ---------- data_directory : path-like - Directory where the data are. Start the resursive search here. + Directory where the data are. Start the recursive search here. source_data_spec : dict Source spec. Returns From 3cbd2f5d7e0135c6ffef49d1de1193c81ecbe5e7 Mon Sep 17 00:00:00 2001 From: bendichter Date: Wed, 22 Mar 2023 16:19:16 -0500 Subject: [PATCH 11/47] a bit of renaming --- CHANGELOG.md | 1 + src/neuroconv/utils/__init__.py | 2 +- .../utils/{globbing.py => path_expansion.py} | 32 +++++++++---------- ...globbing_utils.py => test_expand_paths.py} | 12 ++++--- 4 files changed, 26 insertions(+), 21 deletions(-) rename src/neuroconv/utils/{globbing.py => path_expansion.py} (78%) rename tests/test_minimal/test_utils/{test_globbing_utils.py => test_expand_paths.py} (90%) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7b598eb7f..8291799a8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -23,6 +23,7 @@ * `neuroconv.utils.jsonschema.get_schema_from_method_signature` can now support the `Dict[str, str]` typehint, which allows `DataInterface.__init__` and `.run_conversion` to handle dictionary arguments. [PR #360](https://github.com/catalystneuro/neuroconv/pull/360) * Added `neuroconv.tools.testing.data_interface_mixins` module, which contains test suites for different types of DataInterfaces [PR #357](https://github.com/catalystneuro/neuroconv/pull/357) +* Added `expand_paths`. [PR #377](https://github.com/catalystneuro/neuroconv/pull/377) ### Testing * The tests for `automatic_dandi_upload` now follow up-to-date DANDI validation rules for file name conventions. [PR #310](https://github.com/catalystneuro/neuroconv/pull/310) diff --git a/src/neuroconv/utils/__init__.py b/src/neuroconv/utils/__init__.py index ea96f7d6b..ec4ee401c 100644 --- a/src/neuroconv/utils/__init__.py +++ b/src/neuroconv/utils/__init__.py @@ -5,7 +5,7 @@ exist_dict_in_list, load_dict_from_file, ) -from .globbing import parse_glob_directory +from .path_expansion import parse_glob_directory, expand_paths from .json_schema import ( NWBMetaDataEncoder, fill_defaults, diff --git a/src/neuroconv/utils/globbing.py b/src/neuroconv/utils/path_expansion.py similarity index 78% rename from src/neuroconv/utils/globbing.py rename to src/neuroconv/utils/path_expansion.py index 0c1ec0c2b..942c9d98e 100644 --- a/src/neuroconv/utils/globbing.py +++ b/src/neuroconv/utils/path_expansion.py @@ -1,14 +1,14 @@ import os from collections import defaultdict from glob import glob -from typing import Union +from typing import Union, Tuple from parse import parse from .types import FilePathType, FolderPathType -def parse_glob_directory(path: Union[FilePathType, FolderPathType], format_: str): +def parse_glob_directory(path: Union[FilePathType, FolderPathType], format_: str) -> Tuple: """Find matching paths and return those paths and extracted metadata Parameters @@ -18,8 +18,11 @@ def parse_glob_directory(path: Union[FilePathType, FolderPathType], format_: str format_: str An f-string formatted query. - Returns - ------- + Yields + ------ + tuple: + filepath: str + metadata: dict """ path = str(path) @@ -30,22 +33,19 @@ def parse_glob_directory(path: Union[FilePathType, FolderPathType], format_: str yield filepath, result.named -def ddict(): +def _ddict(): """Create a defaultdict of defaultdicts""" - return defaultdict(ddict) + return defaultdict(_ddict) -def unddict(d): +def _unddict(d): """Turn a ddict into a normal dictionary""" - if isinstance(d, defaultdict): - return {key: unddict(value) for key, value in d.items()} - else: - return d + return {key: _unddict(value) for key, value in d.items()} if isinstance(d, defaultdict) else d -def unpack_experiment_dynamic_paths( - data_directory: FolderPathType, - source_data_spec: dict, +def expand_paths( + data_directory: FolderPathType, + source_data_spec: dict, ): """ @@ -59,7 +59,7 @@ def unpack_experiment_dynamic_paths( ------- """ - out = ddict() + out = _ddict() for interface, source_data in source_data_spec.items(): for path_type in ("file_path", "folder_path"): if path_type in source_data: @@ -70,4 +70,4 @@ def unpack_experiment_dynamic_paths( out[key]["metadata"]["NWBFile"]["session_id"] = metadata["session_id"] if "subject_id" in metadata: out[key]["metadata"]["Subject"]["subject_id"] = metadata["subject_id"] - return list(unddict(out).values()) + return list(_unddict(out).values()) diff --git a/tests/test_minimal/test_utils/test_globbing_utils.py b/tests/test_minimal/test_utils/test_expand_paths.py similarity index 90% rename from tests/test_minimal/test_utils/test_globbing_utils.py rename to tests/test_minimal/test_utils/test_expand_paths.py index 20c994eb9..ddba345a0 100644 --- a/tests/test_minimal/test_utils/test_globbing_utils.py +++ b/tests/test_minimal/test_utils/test_expand_paths.py @@ -1,9 +1,9 @@ from pathlib import Path -from neuroconv.utils.globbing import unpack_experiment_dynamic_paths +from neuroconv.utils import expand_paths -# helper functions to test for equivalence between set-like lists of dicts. +# helper functions to test for equivalence between set-like lists of dicts. def freeze(obj): if isinstance(obj, dict): return frozenset((k, freeze(v)) for k, v in obj.items()) @@ -18,7 +18,9 @@ def are_equivalent_lists(list1, list2): return set1 == set2 -def test_unpack_experiment_dynamic_paths(tmpdir): +def test_expand_paths(tmpdir): + + # set up directory for parsing base = Path(tmpdir) for subject_id in ("001", "002"): Path.mkdir(base / f"sub-{subject_id}") @@ -27,7 +29,8 @@ def test_unpack_experiment_dynamic_paths(tmpdir): (base / f"sub-{subject_id}" / f"session_{session_id}" / "abc").touch() (base / f"sub-{subject_id}" / f"session_{session_id}" / "xyz").touch() - out = unpack_experiment_dynamic_paths( + # run path parsing + out = expand_paths( base, dict( aa=dict(file_path="sub-{subject_id:3}/session_{session_id:3}/abc"), @@ -35,6 +38,7 @@ def test_unpack_experiment_dynamic_paths(tmpdir): ), ) + # test results assert are_equivalent_lists( out, [ From 363ed88d00bf4a8019683acdb0fb94bd80208859 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 22 Mar 2023 21:19:30 +0000 Subject: [PATCH 12/47] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/neuroconv/utils/__init__.py | 2 +- src/neuroconv/utils/path_expansion.py | 6 +++--- tests/test_minimal/test_utils/test_expand_paths.py | 1 - 3 files changed, 4 insertions(+), 5 deletions(-) diff --git a/src/neuroconv/utils/__init__.py b/src/neuroconv/utils/__init__.py index ec4ee401c..f14730e3a 100644 --- a/src/neuroconv/utils/__init__.py +++ b/src/neuroconv/utils/__init__.py @@ -5,7 +5,6 @@ exist_dict_in_list, load_dict_from_file, ) -from .path_expansion import parse_glob_directory, expand_paths from .json_schema import ( NWBMetaDataEncoder, fill_defaults, @@ -16,6 +15,7 @@ get_schema_from_method_signature, unroot_schema, ) +from .path_expansion import expand_paths, parse_glob_directory from .types import ( ArrayType, FilePathType, diff --git a/src/neuroconv/utils/path_expansion.py b/src/neuroconv/utils/path_expansion.py index 942c9d98e..4c5e3acec 100644 --- a/src/neuroconv/utils/path_expansion.py +++ b/src/neuroconv/utils/path_expansion.py @@ -1,7 +1,7 @@ import os from collections import defaultdict from glob import glob -from typing import Union, Tuple +from typing import Tuple, Union from parse import parse @@ -44,8 +44,8 @@ def _unddict(d): def expand_paths( - data_directory: FolderPathType, - source_data_spec: dict, + data_directory: FolderPathType, + source_data_spec: dict, ): """ diff --git a/tests/test_minimal/test_utils/test_expand_paths.py b/tests/test_minimal/test_utils/test_expand_paths.py index ddba345a0..608453d37 100644 --- a/tests/test_minimal/test_utils/test_expand_paths.py +++ b/tests/test_minimal/test_utils/test_expand_paths.py @@ -19,7 +19,6 @@ def are_equivalent_lists(list1, list2): def test_expand_paths(tmpdir): - # set up directory for parsing base = Path(tmpdir) for subject_id in ("001", "002"): From 1be8ca1a440db8827b3d16211e6e7717580ae872 Mon Sep 17 00:00:00 2001 From: bendichter Date: Wed, 22 Mar 2023 16:23:56 -0500 Subject: [PATCH 13/47] add some comments --- src/neuroconv/utils/path_expansion.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/neuroconv/utils/path_expansion.py b/src/neuroconv/utils/path_expansion.py index 942c9d98e..0ad16b300 100644 --- a/src/neuroconv/utils/path_expansion.py +++ b/src/neuroconv/utils/path_expansion.py @@ -9,7 +9,8 @@ def parse_glob_directory(path: Union[FilePathType, FolderPathType], format_: str) -> Tuple: - """Find matching paths and return those paths and extracted metadata + """ + Match path to an fstring format and extract metadata from the path. Parameters ---------- @@ -48,6 +49,7 @@ def expand_paths( source_data_spec: dict, ): """ + Match paths in a directory to specs and extract metadata from the paths. Parameters ---------- From 7968e0db61730619e9e512af7d57727d29173a42 Mon Sep 17 00:00:00 2001 From: bendichter Date: Wed, 22 Mar 2023 16:50:04 -0500 Subject: [PATCH 14/47] generalize paths for windows --- src/neuroconv/utils/path_expansion.py | 2 +- .../test_utils/test_expand_paths.py | 21 ++++++++++--------- 2 files changed, 12 insertions(+), 11 deletions(-) diff --git a/src/neuroconv/utils/path_expansion.py b/src/neuroconv/utils/path_expansion.py index 6df79de06..3c20af972 100644 --- a/src/neuroconv/utils/path_expansion.py +++ b/src/neuroconv/utils/path_expansion.py @@ -28,7 +28,7 @@ def parse_glob_directory(path: Union[FilePathType, FolderPathType], format_: str """ path = str(path) for filepath in glob(os.path.join(path, "**", "*"), recursive=True): - filepath = filepath[len(path) + 1 :] + filepath = filepath[len(path) + 1:] result = parse(format_, filepath) if result: yield filepath, result.named diff --git a/tests/test_minimal/test_utils/test_expand_paths.py b/tests/test_minimal/test_utils/test_expand_paths.py index 608453d37..f10ded6e6 100644 --- a/tests/test_minimal/test_utils/test_expand_paths.py +++ b/tests/test_minimal/test_utils/test_expand_paths.py @@ -1,3 +1,4 @@ +import os from pathlib import Path from neuroconv.utils import expand_paths @@ -32,8 +33,8 @@ def test_expand_paths(tmpdir): out = expand_paths( base, dict( - aa=dict(file_path="sub-{subject_id:3}/session_{session_id:3}/abc"), - bb=dict(file_path="sub-{subject_id:3}/session_{session_id:3}/xyz"), + aa=dict(file_path=os.path.join("sub-{subject_id:3}", "session_{session_id:3}", "abc")), + bb=dict(file_path=os.path.join("sub-{subject_id:3}", "session_{session_id:3}", "xyz")), ), ) @@ -43,29 +44,29 @@ def test_expand_paths(tmpdir): [ { "source_data": { - "aa": {"file_path": "sub-002/session_101/abc"}, - "bb": {"file_path": "sub-002/session_101/xyz"}, + "aa": {"file_path": os.path.join("sub-002","session_101", "abc")}, + "bb": {"file_path": os.path.join("sub-002", "session_101", "xyz")}, }, "metadata": {"NWBFile": {"session_id": "101"}, "Subject": {"subject_id": "002"}}, }, { "source_data": { - "aa": {"file_path": "sub-002/session_102/abc"}, - "bb": {"file_path": "sub-002/session_102/xyz"}, + "aa": {"file_path": os.path.join("sub-002", "session_102", "abc")}, + "bb": {"file_path": os.path.join("sub-002", "session_102", "xyz")}, }, "metadata": {"NWBFile": {"session_id": "102"}, "Subject": {"subject_id": "002"}}, }, { "source_data": { - "aa": {"file_path": "sub-001/session_101/abc"}, - "bb": {"file_path": "sub-001/session_101/xyz"}, + "aa": {"file_path": os.path.join("sub-001", "session_101", "abc")}, + "bb": {"file_path": os.path.join("sub-001", "session_101", "xyz")}, }, "metadata": {"NWBFile": {"session_id": "101"}, "Subject": {"subject_id": "001"}}, }, { "source_data": { - "aa": {"file_path": "sub-001/session_102/abc"}, - "bb": {"file_path": "sub-001/session_102/xyz"}, + "aa": {"file_path": os.path.join("sub-001", "session_102", "abc")}, + "bb": {"file_path": os.path.join("sub-001", "session_102", "xyz")}, }, "metadata": {"NWBFile": {"session_id": "102"}, "Subject": {"subject_id": "001"}}, }, From b6e376ab45595ca1ce23c71a24454aa30cd5067c Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 22 Mar 2023 21:50:19 +0000 Subject: [PATCH 15/47] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/neuroconv/utils/path_expansion.py | 2 +- tests/test_minimal/test_utils/test_expand_paths.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/neuroconv/utils/path_expansion.py b/src/neuroconv/utils/path_expansion.py index 3c20af972..6df79de06 100644 --- a/src/neuroconv/utils/path_expansion.py +++ b/src/neuroconv/utils/path_expansion.py @@ -28,7 +28,7 @@ def parse_glob_directory(path: Union[FilePathType, FolderPathType], format_: str """ path = str(path) for filepath in glob(os.path.join(path, "**", "*"), recursive=True): - filepath = filepath[len(path) + 1:] + filepath = filepath[len(path) + 1 :] result = parse(format_, filepath) if result: yield filepath, result.named diff --git a/tests/test_minimal/test_utils/test_expand_paths.py b/tests/test_minimal/test_utils/test_expand_paths.py index f10ded6e6..1b0529ccb 100644 --- a/tests/test_minimal/test_utils/test_expand_paths.py +++ b/tests/test_minimal/test_utils/test_expand_paths.py @@ -44,7 +44,7 @@ def test_expand_paths(tmpdir): [ { "source_data": { - "aa": {"file_path": os.path.join("sub-002","session_101", "abc")}, + "aa": {"file_path": os.path.join("sub-002", "session_101", "abc")}, "bb": {"file_path": os.path.join("sub-002", "session_101", "xyz")}, }, "metadata": {"NWBFile": {"session_id": "101"}, "Subject": {"subject_id": "002"}}, From f8c8624ff98c6cbc0fda69e4c04dc44c1a48a8c2 Mon Sep 17 00:00:00 2001 From: bendichter Date: Thu, 23 Mar 2023 11:54:40 -0500 Subject: [PATCH 16/47] create LocalPathExpander and GoogleDrivePathExpander --- src/neuroconv/tools/google_drive.py | 93 ++++++++++++++++++ src/neuroconv/utils/__init__.py | 2 +- src/neuroconv/utils/path_expansion.py | 132 ++++++++++++++++---------- 3 files changed, 175 insertions(+), 52 deletions(-) create mode 100644 src/neuroconv/tools/google_drive.py diff --git a/src/neuroconv/tools/google_drive.py b/src/neuroconv/tools/google_drive.py new file mode 100644 index 000000000..83d6157f7 --- /dev/null +++ b/src/neuroconv/tools/google_drive.py @@ -0,0 +1,93 @@ +import os.path + +from google.auth.transport.requests import Request +from google.oauth2.credentials import Credentials +from google_auth_oauthlib.flow import InstalledAppFlow +from googleapiclient.discovery import build +from googleapiclient.errors import HttpError + +from ..utils.path_expansion import AbstractPathExpander + + +# If modifying these scopes, delete the file token.json. +SCOPES = ['https://www.googleapis.com/auth/drive.metadata.readonly', 'https://www.googleapis.com/auth/drive.readonly'] + + +class GoogleDrivePathExpander(AbstractPathExpander): + """ + + Example + ------- + >>> from neuroconv.tools.google_drive import GoogleDrivePathExpander + >>> path_expander = GoogleDrivePathExpander("/path/to/credentials.json") + >>> list(path_expander.list_directory("1XssGXlQhDco4n8QPYzKX7pkQQKFcRUug")) + >>> path_expander.expand_paths( + ... dict( + ... spikeglx=dict( + ... folder="1XssGXlQhDco4n8QPYzKX7pkQQKFcRUug", + ... paths=dict(file_path="sub-{subject_id}/sub-{subject_id}_ses-{session_id}") + ... ) + ... ) + ... ) + + """ + def __init__(self, credentials_file_path: str): + """ + Initialize a new GoogleDrive. + + Parameters + ---------- + credentials_file_path : str + Path to credentials.json + """ + + creds = None + # The file token.json stores the user's access and refresh tokens, and is + # created automatically when the authorization flow completes for the first + # time. + if os.path.exists('token.json'): + creds = Credentials.from_authorized_user_file('token.json', SCOPES) + # If there are no (valid) credentials available, let the user log in. + if not creds or not creds.valid: + if creds and creds.expired and creds.refresh_token: + creds.refresh(Request()) + else: + flow = InstalledAppFlow.from_client_secrets_file(credentials_file_path, SCOPES) + creds = flow.run_local_server(port=0) + # Save the credentials for the next run + with open('token.json', 'w') as token: + token.write(creds.to_json()) + + try: + self.service = build('drive', 'v3', credentials=creds) + except HttpError as error: + # TODO(developer) - Handle errors from drive API. + print(f'An error occurred: {error}') + + def list_directory(self, folder): + """ + + Parameters + ---------- + folder: str + String of seemingly random characters in URL of Google Drive folder. + + Yields + ------ + str + + """ + + def _list_dir(item_id: str = None, current_path: str = ""): + query = f"'{item_id}' in parents" + results = self.service.files().list(q=query, fields="nextPageToken, files(id, name, mimeType)").execute() + items = results.get("files", []) + + for item in items: + # If the item is a folder, recursively traverse its contents + new_path = f"{current_path}/{item['name']}" + if item["mimeType"] == "application/vnd.google-apps.folder": + yield from _list_dir(item["id"], new_path) + else: + yield new_path[1:] + yield from _list_dir(folder) diff --git a/src/neuroconv/utils/__init__.py b/src/neuroconv/utils/__init__.py index f14730e3a..8a2c5b972 100644 --- a/src/neuroconv/utils/__init__.py +++ b/src/neuroconv/utils/__init__.py @@ -15,7 +15,7 @@ get_schema_from_method_signature, unroot_schema, ) -from .path_expansion import expand_paths, parse_glob_directory +from path_expansion import LocalPathExpander from .types import ( ArrayType, FilePathType, diff --git a/src/neuroconv/utils/path_expansion.py b/src/neuroconv/utils/path_expansion.py index 3c20af972..6b8fc97f5 100644 --- a/src/neuroconv/utils/path_expansion.py +++ b/src/neuroconv/utils/path_expansion.py @@ -1,41 +1,28 @@ +import abc import os from collections import defaultdict from glob import glob -from typing import Tuple, Union +from typing import Union, List, Dict from parse import parse from .types import FilePathType, FolderPathType -def parse_glob_directory(path: Union[FilePathType, FolderPathType], format_: str) -> Tuple: +def _ddict(): """ - Match path to an fstring format and extract metadata from the path. - - Parameters - ---------- - path: path-like - Start the recursive search here. - format_: str - An f-string formatted query. + Create a defaultdict of defaultdicts - Yields - ------ - tuple: - filepath: str - metadata: dict + This allows you to easily nest hierarchical dictionaries. For example, this syntax + >>> a = dict(b=dict(c=dict(d=5))) - """ - path = str(path) - for filepath in glob(os.path.join(path, "**", "*"), recursive=True): - filepath = filepath[len(path) + 1:] - result = parse(format_, filepath) - if result: - yield filepath, result.named + becomes + >>> a = _ddict()["b"]["c"]["d"] = 5 + It becomes particularly useful when modifying an existing hierarchical dictionary, + because the next level is only created if it does not already exist. -def _ddict(): - """Create a defaultdict of defaultdicts""" + """ return defaultdict(_ddict) @@ -44,32 +31,75 @@ def _unddict(d): return {key: _unddict(value) for key, value in d.items()} if isinstance(d, defaultdict) else d -def expand_paths( - data_directory: FolderPathType, - source_data_spec: dict, -): - """ - Match paths in a directory to specs and extract metadata from the paths. +class AbstractPathExpander(abc.ABC): - Parameters - ---------- - data_directory : path-like - Directory where the data are. Start the recursive search here. - source_data_spec : dict - Source spec. - Returns - ------- + @ abc.abstractmethod + def __init__(self, *args, **kwargs): + pass - """ - out = _ddict() - for interface, source_data in source_data_spec.items(): - for path_type in ("file_path", "folder_path"): - if path_type in source_data: - for path, metadata in parse_glob_directory(data_directory, source_data["file_path"]): - key = tuple(sorted(metadata.items())) - out[key]["source_data"][interface][path_type] = path - if "session_id" in metadata: - out[key]["metadata"]["NWBFile"]["session_id"] = metadata["session_id"] - if "subject_id" in metadata: - out[key]["metadata"]["Subject"]["subject_id"] = metadata["subject_id"] - return list(_unddict(out).values()) + def extract_metadata(self, folder, format_: str): + for filepath in self.list_directory(folder): + result = parse(format_, filepath) + if result: + yield filepath, result.named + + @abc.abstractmethod + def list_directory(self, folder): + """ + List all folders and files in a directory recursively + + Yields + ------ + str + + """ + pass + + def expand_paths(self, source_data_spec: dict) -> List[Dict]: + """ + Match paths in a directory to specs and extract metadata from the paths. + + Parameters + ---------- + folder + source_data_spec : dict + Source spec. + + Returns + ------- + + Examples + -------- + >>> path_expander.expand_paths( + ... dict( + ... spikeglx=dict( + ... folder="source_folder", + ... paths=dict( + ... file_path="sub-{subject_id}/sub-{subject_id}_ses-{session_id}" + ... ) + ... ) + ... ) + ... ) + + """ + out = _ddict() + for interface, source_data in source_data_spec.items(): + for path_type in ("file_path", "folder_path"): + if path_type in source_data: + for path, metadata in self.extract_metadata(source_data["folder"], source_data["paths"][path_type]): + key = tuple(sorted(metadata.items())) + out[key]["source_data"][interface][path_type] = path + if "session_id" in metadata: + out[key]["metadata"]["NWBFile"]["session_id"] = metadata["session_id"] + if "subject_id" in metadata: + out[key]["metadata"]["Subject"]["subject_id"] = metadata["subject_id"] + return list(_unddict(out).values()) + + +class LocalPathExpander(object): + def __init__(self): + pass + + def list_directory(self, folder: Union[FilePathType, FolderPathType]): + li = glob(os.path.join(str(folder), "**", "*"), recursive=True) + yield(x[len(folder) + 1:] for x in li) From ef073f93d89398c659450a95098b50c33a194008 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 23 Mar 2023 16:55:39 +0000 Subject: [PATCH 17/47] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/neuroconv/tools/google_drive.py | 15 ++++++++------- src/neuroconv/utils/__init__.py | 3 ++- src/neuroconv/utils/path_expansion.py | 7 +++---- 3 files changed, 13 insertions(+), 12 deletions(-) diff --git a/src/neuroconv/tools/google_drive.py b/src/neuroconv/tools/google_drive.py index 83d6157f7..806a09ec1 100644 --- a/src/neuroconv/tools/google_drive.py +++ b/src/neuroconv/tools/google_drive.py @@ -8,9 +8,8 @@ from ..utils.path_expansion import AbstractPathExpander - # If modifying these scopes, delete the file token.json. -SCOPES = ['https://www.googleapis.com/auth/drive.metadata.readonly', 'https://www.googleapis.com/auth/drive.readonly'] +SCOPES = ["https://www.googleapis.com/auth/drive.metadata.readonly", "https://www.googleapis.com/auth/drive.readonly"] class GoogleDrivePathExpander(AbstractPathExpander): @@ -31,6 +30,7 @@ class GoogleDrivePathExpander(AbstractPathExpander): ... ) """ + def __init__(self, credentials_file_path: str): """ Initialize a new GoogleDrive. @@ -45,8 +45,8 @@ def __init__(self, credentials_file_path: str): # The file token.json stores the user's access and refresh tokens, and is # created automatically when the authorization flow completes for the first # time. - if os.path.exists('token.json'): - creds = Credentials.from_authorized_user_file('token.json', SCOPES) + if os.path.exists("token.json"): + creds = Credentials.from_authorized_user_file("token.json", SCOPES) # If there are no (valid) credentials available, let the user log in. if not creds or not creds.valid: if creds and creds.expired and creds.refresh_token: @@ -55,14 +55,14 @@ def __init__(self, credentials_file_path: str): flow = InstalledAppFlow.from_client_secrets_file(credentials_file_path, SCOPES) creds = flow.run_local_server(port=0) # Save the credentials for the next run - with open('token.json', 'w') as token: + with open("token.json", "w") as token: token.write(creds.to_json()) try: - self.service = build('drive', 'v3', credentials=creds) + self.service = build("drive", "v3", credentials=creds) except HttpError as error: # TODO(developer) - Handle errors from drive API. - print(f'An error occurred: {error}') + print(f"An error occurred: {error}") def list_directory(self, folder): """ @@ -90,4 +90,5 @@ def _list_dir(item_id: str = None, current_path: str = ""): yield from _list_dir(item["id"], new_path) else: yield new_path[1:] + yield from _list_dir(folder) diff --git a/src/neuroconv/utils/__init__.py b/src/neuroconv/utils/__init__.py index 8a2c5b972..2ddeaae69 100644 --- a/src/neuroconv/utils/__init__.py +++ b/src/neuroconv/utils/__init__.py @@ -1,3 +1,5 @@ +from path_expansion import LocalPathExpander + from .checks import calculate_regular_series_rate from .dict import ( append_replace_dict_in_list, @@ -15,7 +17,6 @@ get_schema_from_method_signature, unroot_schema, ) -from path_expansion import LocalPathExpander from .types import ( ArrayType, FilePathType, diff --git a/src/neuroconv/utils/path_expansion.py b/src/neuroconv/utils/path_expansion.py index 6b8fc97f5..caa4b55f7 100644 --- a/src/neuroconv/utils/path_expansion.py +++ b/src/neuroconv/utils/path_expansion.py @@ -2,7 +2,7 @@ import os from collections import defaultdict from glob import glob -from typing import Union, List, Dict +from typing import Dict, List, Union from parse import parse @@ -32,8 +32,7 @@ def _unddict(d): class AbstractPathExpander(abc.ABC): - - @ abc.abstractmethod + @abc.abstractmethod def __init__(self, *args, **kwargs): pass @@ -102,4 +101,4 @@ def __init__(self): def list_directory(self, folder: Union[FilePathType, FolderPathType]): li = glob(os.path.join(str(folder), "**", "*"), recursive=True) - yield(x[len(folder) + 1:] for x in li) + yield (x[len(folder) + 1 :] for x in li) From 1785efae8ed98fcd365b674fbfe341824638d382 Mon Sep 17 00:00:00 2001 From: Garrett Date: Fri, 7 Apr 2023 14:40:23 -0700 Subject: [PATCH 18/47] Fix tests and code for latest path_expansion exports and arguments --- src/neuroconv/utils/__init__.py | 2 +- src/neuroconv/utils/path_expansion.py | 9 +++++---- .../test_utils/test_expand_paths.py | 19 +++++++++++++------ 3 files changed, 19 insertions(+), 11 deletions(-) diff --git a/src/neuroconv/utils/__init__.py b/src/neuroconv/utils/__init__.py index 2ddeaae69..0623dc82b 100644 --- a/src/neuroconv/utils/__init__.py +++ b/src/neuroconv/utils/__init__.py @@ -1,4 +1,4 @@ -from path_expansion import LocalPathExpander +from .path_expansion import LocalPathExpander from .checks import calculate_regular_series_rate from .dict import ( diff --git a/src/neuroconv/utils/path_expansion.py b/src/neuroconv/utils/path_expansion.py index caa4b55f7..b90cf8440 100644 --- a/src/neuroconv/utils/path_expansion.py +++ b/src/neuroconv/utils/path_expansion.py @@ -85,7 +85,7 @@ def expand_paths(self, source_data_spec: dict) -> List[Dict]: for interface, source_data in source_data_spec.items(): for path_type in ("file_path", "folder_path"): if path_type in source_data: - for path, metadata in self.extract_metadata(source_data["folder"], source_data["paths"][path_type]): + for path, metadata in self.extract_metadata(source_data["folder"], source_data[path_type]): key = tuple(sorted(metadata.items())) out[key]["source_data"][interface][path_type] = path if "session_id" in metadata: @@ -95,10 +95,11 @@ def expand_paths(self, source_data_spec: dict) -> List[Dict]: return list(_unddict(out).values()) -class LocalPathExpander(object): +class LocalPathExpander(AbstractPathExpander): def __init__(self): pass def list_directory(self, folder: Union[FilePathType, FolderPathType]): - li = glob(os.path.join(str(folder), "**", "*"), recursive=True) - yield (x[len(folder) + 1 :] for x in li) + folder_str = str(folder) + li = glob(os.path.join(folder_str, "**", "*"), recursive=True) + return (x[len(folder_str) + 1 :] for x in li) diff --git a/tests/test_minimal/test_utils/test_expand_paths.py b/tests/test_minimal/test_utils/test_expand_paths.py index 1b0529ccb..485409c62 100644 --- a/tests/test_minimal/test_utils/test_expand_paths.py +++ b/tests/test_minimal/test_utils/test_expand_paths.py @@ -1,8 +1,7 @@ import os from pathlib import Path -from neuroconv.utils import expand_paths - +from neuroconv.utils import LocalPathExpander # helper functions to test for equivalence between set-like lists of dicts. def freeze(obj): @@ -20,6 +19,9 @@ def are_equivalent_lists(list1, list2): def test_expand_paths(tmpdir): + + expander = LocalPathExpander() + # set up directory for parsing base = Path(tmpdir) for subject_id in ("001", "002"): @@ -30,11 +32,16 @@ def test_expand_paths(tmpdir): (base / f"sub-{subject_id}" / f"session_{session_id}" / "xyz").touch() # run path parsing - out = expand_paths( - base, + out = expander.expand_paths( dict( - aa=dict(file_path=os.path.join("sub-{subject_id:3}", "session_{session_id:3}", "abc")), - bb=dict(file_path=os.path.join("sub-{subject_id:3}", "session_{session_id:3}", "xyz")), + aa=dict( + folder=base, + file_path=os.path.join("sub-{subject_id:3}", "session_{session_id:3}", "abc") + ), + bb=dict( + folder=base, + file_path=os.path.join("sub-{subject_id:3}", "session_{session_id:3}", "xyz") + ), ), ) From 622e50714a61cc32f848c7d2720f89c08ab0a766 Mon Sep 17 00:00:00 2001 From: Ben Dichter Date: Sat, 8 Apr 2023 11:06:50 -0400 Subject: [PATCH 19/47] Update src/neuroconv/utils/path_expansion.py --- src/neuroconv/utils/path_expansion.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/neuroconv/utils/path_expansion.py b/src/neuroconv/utils/path_expansion.py index b90cf8440..88b71257c 100644 --- a/src/neuroconv/utils/path_expansion.py +++ b/src/neuroconv/utils/path_expansion.py @@ -32,9 +32,6 @@ def _unddict(d): class AbstractPathExpander(abc.ABC): - @abc.abstractmethod - def __init__(self, *args, **kwargs): - pass def extract_metadata(self, folder, format_: str): for filepath in self.list_directory(folder): From cabd7a60247b7e42adaffe19f7ce07b374ec1467 Mon Sep 17 00:00:00 2001 From: Ben Dichter Date: Sat, 8 Apr 2023 11:07:10 -0400 Subject: [PATCH 20/47] Update src/neuroconv/utils/path_expansion.py --- src/neuroconv/utils/path_expansion.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/neuroconv/utils/path_expansion.py b/src/neuroconv/utils/path_expansion.py index 88b71257c..bce940f6a 100644 --- a/src/neuroconv/utils/path_expansion.py +++ b/src/neuroconv/utils/path_expansion.py @@ -93,8 +93,6 @@ def expand_paths(self, source_data_spec: dict) -> List[Dict]: class LocalPathExpander(AbstractPathExpander): - def __init__(self): - pass def list_directory(self, folder: Union[FilePathType, FolderPathType]): folder_str = str(folder) From 593e0c67afce26935ad2574eb6b780a09eda1136 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 10 Apr 2023 15:42:29 +0000 Subject: [PATCH 21/47] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/neuroconv/utils/__init__.py | 3 +-- src/neuroconv/utils/path_expansion.py | 2 -- tests/test_minimal/test_utils/test_expand_paths.py | 12 +++--------- 3 files changed, 4 insertions(+), 13 deletions(-) diff --git a/src/neuroconv/utils/__init__.py b/src/neuroconv/utils/__init__.py index f6c0ec00f..6b2041426 100644 --- a/src/neuroconv/utils/__init__.py +++ b/src/neuroconv/utils/__init__.py @@ -1,5 +1,3 @@ -from .path_expansion import LocalPathExpander - from .checks import calculate_regular_series_rate from .dict import ( append_replace_dict_in_list, @@ -16,6 +14,7 @@ get_schema_from_method_signature, unroot_schema, ) +from .path_expansion import LocalPathExpander from .types import ( ArrayType, FilePathType, diff --git a/src/neuroconv/utils/path_expansion.py b/src/neuroconv/utils/path_expansion.py index bce940f6a..54343f148 100644 --- a/src/neuroconv/utils/path_expansion.py +++ b/src/neuroconv/utils/path_expansion.py @@ -32,7 +32,6 @@ def _unddict(d): class AbstractPathExpander(abc.ABC): - def extract_metadata(self, folder, format_: str): for filepath in self.list_directory(folder): result = parse(format_, filepath) @@ -93,7 +92,6 @@ def expand_paths(self, source_data_spec: dict) -> List[Dict]: class LocalPathExpander(AbstractPathExpander): - def list_directory(self, folder: Union[FilePathType, FolderPathType]): folder_str = str(folder) li = glob(os.path.join(folder_str, "**", "*"), recursive=True) diff --git a/tests/test_minimal/test_utils/test_expand_paths.py b/tests/test_minimal/test_utils/test_expand_paths.py index 485409c62..04469beca 100644 --- a/tests/test_minimal/test_utils/test_expand_paths.py +++ b/tests/test_minimal/test_utils/test_expand_paths.py @@ -3,6 +3,7 @@ from neuroconv.utils import LocalPathExpander + # helper functions to test for equivalence between set-like lists of dicts. def freeze(obj): if isinstance(obj, dict): @@ -19,7 +20,6 @@ def are_equivalent_lists(list1, list2): def test_expand_paths(tmpdir): - expander = LocalPathExpander() # set up directory for parsing @@ -34,14 +34,8 @@ def test_expand_paths(tmpdir): # run path parsing out = expander.expand_paths( dict( - aa=dict( - folder=base, - file_path=os.path.join("sub-{subject_id:3}", "session_{session_id:3}", "abc") - ), - bb=dict( - folder=base, - file_path=os.path.join("sub-{subject_id:3}", "session_{session_id:3}", "xyz") - ), + aa=dict(folder=base, file_path=os.path.join("sub-{subject_id:3}", "session_{session_id:3}", "abc")), + bb=dict(folder=base, file_path=os.path.join("sub-{subject_id:3}", "session_{session_id:3}", "xyz")), ), ) From 14570cb76cded1310ff60fafbee1761a553b2b0b Mon Sep 17 00:00:00 2001 From: Garrett Date: Mon, 10 Apr 2023 09:32:49 -0700 Subject: [PATCH 22/47] Return absolute paths --- src/neuroconv/utils/path_expansion.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/neuroconv/utils/path_expansion.py b/src/neuroconv/utils/path_expansion.py index bce940f6a..fb90d22e9 100644 --- a/src/neuroconv/utils/path_expansion.py +++ b/src/neuroconv/utils/path_expansion.py @@ -84,7 +84,7 @@ def expand_paths(self, source_data_spec: dict) -> List[Dict]: if path_type in source_data: for path, metadata in self.extract_metadata(source_data["folder"], source_data[path_type]): key = tuple(sorted(metadata.items())) - out[key]["source_data"][interface][path_type] = path + out[key]["source_data"][interface][path_type] = os.path.join(source_data["folder"], path) # return the absolute path if "session_id" in metadata: out[key]["metadata"]["NWBFile"]["session_id"] = metadata["session_id"] if "subject_id" in metadata: From 2d80dfe91ac5b0c4bd33153685c8bfe75a67fe67 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 10 Apr 2023 16:36:38 +0000 Subject: [PATCH 23/47] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/neuroconv/utils/path_expansion.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/neuroconv/utils/path_expansion.py b/src/neuroconv/utils/path_expansion.py index 4e93ac0b9..3f14bc5e0 100644 --- a/src/neuroconv/utils/path_expansion.py +++ b/src/neuroconv/utils/path_expansion.py @@ -83,7 +83,9 @@ def expand_paths(self, source_data_spec: dict) -> List[Dict]: if path_type in source_data: for path, metadata in self.extract_metadata(source_data["folder"], source_data[path_type]): key = tuple(sorted(metadata.items())) - out[key]["source_data"][interface][path_type] = os.path.join(source_data["folder"], path) # return the absolute path + out[key]["source_data"][interface][path_type] = os.path.join( + source_data["folder"], path + ) # return the absolute path if "session_id" in metadata: out[key]["metadata"]["NWBFile"]["session_id"] = metadata["session_id"] if "subject_id" in metadata: From 86e69225a3978d8e6d556775ee61c35326258e6d Mon Sep 17 00:00:00 2001 From: Ben Dichter Date: Fri, 14 Apr 2023 12:00:18 -0400 Subject: [PATCH 24/47] Update src/neuroconv/utils/path_expansion.py Co-authored-by: Cody Baker <51133164+CodyCBakerPhD@users.noreply.github.com> --- src/neuroconv/utils/path_expansion.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/neuroconv/utils/path_expansion.py b/src/neuroconv/utils/path_expansion.py index 3f14bc5e0..8a44c07b6 100644 --- a/src/neuroconv/utils/path_expansion.py +++ b/src/neuroconv/utils/path_expansion.py @@ -95,6 +95,4 @@ def expand_paths(self, source_data_spec: dict) -> List[Dict]: class LocalPathExpander(AbstractPathExpander): def list_directory(self, folder: Union[FilePathType, FolderPathType]): - folder_str = str(folder) - li = glob(os.path.join(folder_str, "**", "*"), recursive=True) - return (x[len(folder_str) + 1 :] for x in li) + return (str(path.relative_to(folder)) for path in Path(folder).rglob("*")) From d84755a42d0c2ced3e1a148214b0525b81bb3f6d Mon Sep 17 00:00:00 2001 From: bendichter Date: Fri, 14 Apr 2023 12:07:32 -0400 Subject: [PATCH 25/47] add DeelDict, rmv google drive path expander --- src/neuroconv/tools/google_drive.py | 94 ---------------------- src/neuroconv/utils/dict.py | 33 ++++++++ src/neuroconv/utils/path_expansion.py | 31 +------ tests/test_minimal/test_utils/test_dict.py | 45 +++++++++++ 4 files changed, 82 insertions(+), 121 deletions(-) delete mode 100644 src/neuroconv/tools/google_drive.py create mode 100644 tests/test_minimal/test_utils/test_dict.py diff --git a/src/neuroconv/tools/google_drive.py b/src/neuroconv/tools/google_drive.py deleted file mode 100644 index 806a09ec1..000000000 --- a/src/neuroconv/tools/google_drive.py +++ /dev/null @@ -1,94 +0,0 @@ -import os.path - -from google.auth.transport.requests import Request -from google.oauth2.credentials import Credentials -from google_auth_oauthlib.flow import InstalledAppFlow -from googleapiclient.discovery import build -from googleapiclient.errors import HttpError - -from ..utils.path_expansion import AbstractPathExpander - -# If modifying these scopes, delete the file token.json. -SCOPES = ["https://www.googleapis.com/auth/drive.metadata.readonly", "https://www.googleapis.com/auth/drive.readonly"] - - -class GoogleDrivePathExpander(AbstractPathExpander): - """ - - Example - ------- - >>> from neuroconv.tools.google_drive import GoogleDrivePathExpander - >>> path_expander = GoogleDrivePathExpander("/path/to/credentials.json") - >>> list(path_expander.list_directory("1XssGXlQhDco4n8QPYzKX7pkQQKFcRUug")) - >>> path_expander.expand_paths( - ... dict( - ... spikeglx=dict( - ... folder="1XssGXlQhDco4n8QPYzKX7pkQQKFcRUug", - ... paths=dict(file_path="sub-{subject_id}/sub-{subject_id}_ses-{session_id}") - ... ) - ... ) - ... ) - - """ - - def __init__(self, credentials_file_path: str): - """ - Initialize a new GoogleDrive. - - Parameters - ---------- - credentials_file_path : str - Path to credentials.json - """ - - creds = None - # The file token.json stores the user's access and refresh tokens, and is - # created automatically when the authorization flow completes for the first - # time. - if os.path.exists("token.json"): - creds = Credentials.from_authorized_user_file("token.json", SCOPES) - # If there are no (valid) credentials available, let the user log in. - if not creds or not creds.valid: - if creds and creds.expired and creds.refresh_token: - creds.refresh(Request()) - else: - flow = InstalledAppFlow.from_client_secrets_file(credentials_file_path, SCOPES) - creds = flow.run_local_server(port=0) - # Save the credentials for the next run - with open("token.json", "w") as token: - token.write(creds.to_json()) - - try: - self.service = build("drive", "v3", credentials=creds) - except HttpError as error: - # TODO(developer) - Handle errors from drive API. - print(f"An error occurred: {error}") - - def list_directory(self, folder): - """ - - Parameters - ---------- - folder: str - String of seemingly random characters in URL of Google Drive folder. - - Yields - ------ - str - - """ - - def _list_dir(item_id: str = None, current_path: str = ""): - query = f"'{item_id}' in parents" - results = self.service.files().list(q=query, fields="nextPageToken, files(id, name, mimeType)").execute() - items = results.get("files", []) - - for item in items: - # If the item is a folder, recursively traverse its contents - new_path = f"{current_path}/{item['name']}" - if item["mimeType"] == "application/vnd.google-apps.folder": - yield from _list_dir(item["id"], new_path) - else: - yield new_path[1:] - - yield from _list_dir(folder) diff --git a/src/neuroconv/utils/dict.py b/src/neuroconv/utils/dict.py index cea962030..2360b3913 100644 --- a/src/neuroconv/utils/dict.py +++ b/src/neuroconv/utils/dict.py @@ -1,8 +1,11 @@ import collections.abc import json import warnings +from collections import defaultdict from copy import deepcopy +from ctypes import Union from pathlib import Path +from typing import Any import numpy as np import yaml @@ -198,3 +201,33 @@ def dict_deep_update( dict_to_update[key_to_update] = update_values return dict_to_update + + +class DeepDict(defaultdict): + """A defaultdict of defaultdicts""" + + def __init__(self, *args: Any, **kwargs: Any) -> None: + super().__init__(lambda: DeepDict(), *args, **kwargs) + for key, value in self.items(): + if isinstance(value, dict): + self[key] = DeepDict(value) + + def deep_update(self, other: Union[dict, "DeepDict"]) -> None: + for key, value in other.items(): + if key in self and isinstance(self[key], dict) and isinstance(value, dict): + self[key].deep_update(value) + else: + self[key] = value + + def to_dict(self) -> dict: + def _to_dict(d: Union[dict, "DeepDict"]) -> dict: + """Turn a DeepDict into a normal dictionary""" + return {key: _to_dict(value) for key, value in d.items()} if isinstance(d, dict) else d + + return _to_dict(self) + + def __dict__(self) -> dict: + return self.to_dict() + + def __repr__(self) -> str: + return "DeepDict: " + dict.__repr__(self.to_dict()) \ No newline at end of file diff --git a/src/neuroconv/utils/path_expansion.py b/src/neuroconv/utils/path_expansion.py index 8a44c07b6..f29f99750 100644 --- a/src/neuroconv/utils/path_expansion.py +++ b/src/neuroconv/utils/path_expansion.py @@ -1,36 +1,14 @@ import abc import os -from collections import defaultdict -from glob import glob +from pathlib import Path from typing import Dict, List, Union from parse import parse +from .dict import DeepDict from .types import FilePathType, FolderPathType -def _ddict(): - """ - Create a defaultdict of defaultdicts - - This allows you to easily nest hierarchical dictionaries. For example, this syntax - >>> a = dict(b=dict(c=dict(d=5))) - - becomes - >>> a = _ddict()["b"]["c"]["d"] = 5 - - It becomes particularly useful when modifying an existing hierarchical dictionary, - because the next level is only created if it does not already exist. - - """ - return defaultdict(_ddict) - - -def _unddict(d): - """Turn a ddict into a normal dictionary""" - return {key: _unddict(value) for key, value in d.items()} if isinstance(d, defaultdict) else d - - class AbstractPathExpander(abc.ABC): def extract_metadata(self, folder, format_: str): for filepath in self.list_directory(folder): @@ -56,7 +34,6 @@ def expand_paths(self, source_data_spec: dict) -> List[Dict]: Parameters ---------- - folder source_data_spec : dict Source spec. @@ -77,7 +54,7 @@ def expand_paths(self, source_data_spec: dict) -> List[Dict]: ... ) """ - out = _ddict() + out = DeepDict() for interface, source_data in source_data_spec.items(): for path_type in ("file_path", "folder_path"): if path_type in source_data: @@ -90,7 +67,7 @@ def expand_paths(self, source_data_spec: dict) -> List[Dict]: out[key]["metadata"]["NWBFile"]["session_id"] = metadata["session_id"] if "subject_id" in metadata: out[key]["metadata"]["Subject"]["subject_id"] = metadata["subject_id"] - return list(_unddict(out).values()) + return list(dict(out).values()) class LocalPathExpander(AbstractPathExpander): diff --git a/tests/test_minimal/test_utils/test_dict.py b/tests/test_minimal/test_utils/test_dict.py new file mode 100644 index 000000000..4e80bd5ba --- /dev/null +++ b/tests/test_minimal/test_utils/test_dict.py @@ -0,0 +1,45 @@ +import unittest + +from neuroconv.utils.dict import DeepDict + + +class TestDeepDict(unittest.TestCase): + def setUp(self): + self.dd = DeepDict() + self.dd["a"]["b"]["c"] = 42 + self.data = {"a": {"b": {"c": 42}}} + + def test_getitem(self): + self.assertEqual(self.dd["a"]["b"]["c"], 42) + + def test_getitem_hashable(self): + dd = DeepDict() + dd["key1"][1][(3,)] = 42 + self.assertEqual(dd["key1"][1][(3,)], 42) + + def test_missing_key(self): + dd = DeepDict() + self.assertIsInstance(dd["non_existent"], DeepDict) + + def test_to_dict(self): + expected = self.data + self.assertEqual(self.dd.to_dict(), expected) + + def test_dict_magic(self): + expected = self.data + self.assertEqual(dict(self.dd), expected) + + def test_recursive_conversion(self): + dd = DeepDict(self.data) + self.assertIsInstance(dd["a"], DeepDict) + self.assertIsInstance(dd["a"]["b"], DeepDict) + + def test_repr(self): + expected_repr = "DeepDict: {'a': {'b': {'c': 42}}}" + self.assertEqual(repr(self.dd), expected_repr) + + def test_deep_update(self): + update_data = {"a": {"b": {"d": 55}, "e": {"f": 66}}, "g": {"h": 77}} + self.dd.deep_update(update_data) + expected = {"a": {"b": {"c": 42, "d": 55}, "e": {"f": 66}}, "g": {"h": 77}} + self.assertEqual(dict(self.dd), expected) \ No newline at end of file From e0209108949bb4aa3931effc29e47bb628cf0f9b Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 14 Apr 2023 16:07:46 +0000 Subject: [PATCH 26/47] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/neuroconv/utils/dict.py | 2 +- tests/test_minimal/test_utils/test_dict.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/neuroconv/utils/dict.py b/src/neuroconv/utils/dict.py index 2360b3913..9ba6a8364 100644 --- a/src/neuroconv/utils/dict.py +++ b/src/neuroconv/utils/dict.py @@ -230,4 +230,4 @@ def __dict__(self) -> dict: return self.to_dict() def __repr__(self) -> str: - return "DeepDict: " + dict.__repr__(self.to_dict()) \ No newline at end of file + return "DeepDict: " + dict.__repr__(self.to_dict()) diff --git a/tests/test_minimal/test_utils/test_dict.py b/tests/test_minimal/test_utils/test_dict.py index 4e80bd5ba..81ca8dcf4 100644 --- a/tests/test_minimal/test_utils/test_dict.py +++ b/tests/test_minimal/test_utils/test_dict.py @@ -42,4 +42,4 @@ def test_deep_update(self): update_data = {"a": {"b": {"d": 55}, "e": {"f": 66}}, "g": {"h": 77}} self.dd.deep_update(update_data) expected = {"a": {"b": {"c": 42, "d": 55}, "e": {"f": 66}}, "g": {"h": 77}} - self.assertEqual(dict(self.dd), expected) \ No newline at end of file + self.assertEqual(dict(self.dd), expected) From 49352343ebdc1846d7f2ea2642ef35996f60daab Mon Sep 17 00:00:00 2001 From: Ben Dichter Date: Mon, 17 Apr 2023 13:07:03 -0400 Subject: [PATCH 27/47] Update src/neuroconv/utils/path_expansion.py Co-authored-by: Cody Baker <51133164+CodyCBakerPhD@users.noreply.github.com> --- src/neuroconv/utils/path_expansion.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/neuroconv/utils/path_expansion.py b/src/neuroconv/utils/path_expansion.py index f29f99750..5d12078f5 100644 --- a/src/neuroconv/utils/path_expansion.py +++ b/src/neuroconv/utils/path_expansion.py @@ -17,7 +17,7 @@ def extract_metadata(self, folder, format_: str): yield filepath, result.named @abc.abstractmethod - def list_directory(self, folder): + def list_directory(self, folder: Union[FilePathType, FolderPathType]) -> Tuple[str]: """ List all folders and files in a directory recursively From 8aad555be20149f07ae1bd5965a10bb0e7f59a6a Mon Sep 17 00:00:00 2001 From: Ben Dichter Date: Mon, 17 Apr 2023 13:07:52 -0400 Subject: [PATCH 28/47] Update src/neuroconv/utils/path_expansion.py --- src/neuroconv/utils/path_expansion.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/neuroconv/utils/path_expansion.py b/src/neuroconv/utils/path_expansion.py index 5d12078f5..c03037a8b 100644 --- a/src/neuroconv/utils/path_expansion.py +++ b/src/neuroconv/utils/path_expansion.py @@ -1,7 +1,7 @@ import abc import os from pathlib import Path -from typing import Dict, List, Union +from typing import Dict, List, Union, Tuple from parse import parse From db81aa6e888d4593b043311046619d940e153b0d Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 17 Apr 2023 21:11:56 +0000 Subject: [PATCH 29/47] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/neuroconv/utils/path_expansion.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/neuroconv/utils/path_expansion.py b/src/neuroconv/utils/path_expansion.py index c03037a8b..3abae1c32 100644 --- a/src/neuroconv/utils/path_expansion.py +++ b/src/neuroconv/utils/path_expansion.py @@ -1,7 +1,7 @@ import abc import os from pathlib import Path -from typing import Dict, List, Union, Tuple +from typing import Dict, List, Tuple, Union from parse import parse From 248678eb207be7e788239c4d1cdf3d138d76d6ca Mon Sep 17 00:00:00 2001 From: bendichter Date: Mon, 17 Apr 2023 18:42:44 -0400 Subject: [PATCH 30/47] fix tests for absolute paths update typehints to include DeepDict --- src/neuroconv/utils/path_expansion.py | 2 +- .../test_utils/test_expand_paths.py | 76 ++++++++----------- 2 files changed, 32 insertions(+), 46 deletions(-) diff --git a/src/neuroconv/utils/path_expansion.py b/src/neuroconv/utils/path_expansion.py index 3abae1c32..897b1c7df 100644 --- a/src/neuroconv/utils/path_expansion.py +++ b/src/neuroconv/utils/path_expansion.py @@ -28,7 +28,7 @@ def list_directory(self, folder: Union[FilePathType, FolderPathType]) -> Tuple[s """ pass - def expand_paths(self, source_data_spec: dict) -> List[Dict]: + def expand_paths(self, source_data_spec: dict) -> List[DeepDict]: """ Match paths in a directory to specs and extract metadata from the paths. diff --git a/tests/test_minimal/test_utils/test_expand_paths.py b/tests/test_minimal/test_utils/test_expand_paths.py index 04469beca..9d9043d57 100644 --- a/tests/test_minimal/test_utils/test_expand_paths.py +++ b/tests/test_minimal/test_utils/test_expand_paths.py @@ -4,21 +4,6 @@ from neuroconv.utils import LocalPathExpander -# helper functions to test for equivalence between set-like lists of dicts. -def freeze(obj): - if isinstance(obj, dict): - return frozenset((k, freeze(v)) for k, v in obj.items()) - elif isinstance(obj, list): - return frozenset(freeze(x) for x in obj) - return obj - - -def are_equivalent_lists(list1, list2): - set1 = set(freeze(x) for x in list1) - set2 = set(freeze(x) for x in list2) - return set1 == set2 - - def test_expand_paths(tmpdir): expander = LocalPathExpander() @@ -39,37 +24,38 @@ def test_expand_paths(tmpdir): ), ) - # test results - assert are_equivalent_lists( - out, - [ - { - "source_data": { - "aa": {"file_path": os.path.join("sub-002", "session_101", "abc")}, - "bb": {"file_path": os.path.join("sub-002", "session_101", "xyz")}, - }, - "metadata": {"NWBFile": {"session_id": "101"}, "Subject": {"subject_id": "002"}}, + expected = [ + { + "source_data": { + "aa": {"file_path": str(base / "sub-002" / "session_101" / "abc")}, + "bb": {"file_path": str(base / "sub-002" / "session_101" / "xyz")}, }, - { - "source_data": { - "aa": {"file_path": os.path.join("sub-002", "session_102", "abc")}, - "bb": {"file_path": os.path.join("sub-002", "session_102", "xyz")}, - }, - "metadata": {"NWBFile": {"session_id": "102"}, "Subject": {"subject_id": "002"}}, + "metadata": {"NWBFile": {"session_id": "101"}, "Subject": {"subject_id": "002"}}, + }, + { + "source_data": { + "aa": {"file_path": str(base / "sub-002" / "session_102" / "abc")}, + "bb": {"file_path": str(base / "sub-002" / "session_102" / "xyz")}, }, - { - "source_data": { - "aa": {"file_path": os.path.join("sub-001", "session_101", "abc")}, - "bb": {"file_path": os.path.join("sub-001", "session_101", "xyz")}, - }, - "metadata": {"NWBFile": {"session_id": "101"}, "Subject": {"subject_id": "001"}}, + "metadata": {"NWBFile": {"session_id": "102"}, "Subject": {"subject_id": "002"}}, + }, + { + "source_data": { + "aa": {"file_path": str(base / "sub-001" / "session_101" / "abc")}, + "bb": {"file_path": str(base / "sub-001" / "session_101" / "xyz")}, }, - { - "source_data": { - "aa": {"file_path": os.path.join("sub-001", "session_102", "abc")}, - "bb": {"file_path": os.path.join("sub-001", "session_102", "xyz")}, - }, - "metadata": {"NWBFile": {"session_id": "102"}, "Subject": {"subject_id": "001"}}, + "metadata": {"NWBFile": {"session_id": "101"}, "Subject": {"subject_id": "001"}}, + }, + { + "source_data": { + "aa": {"file_path": str(base / "sub-001" / "session_102" / "abc")}, + "bb": {"file_path": str(base / "sub-001" / "session_102" / "xyz")}, }, - ], - ) + "metadata": {"NWBFile": {"session_id": "102"}, "Subject": {"subject_id": "001"}}, + } + ] + + # test results + for x in out: + assert x in expected + assert len(out) == len(expected) From 5ff50ec04bb90455d973d9f3eac2e2b67c605e34 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 17 Apr 2023 22:42:57 +0000 Subject: [PATCH 31/47] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- tests/test_minimal/test_utils/test_expand_paths.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_minimal/test_utils/test_expand_paths.py b/tests/test_minimal/test_utils/test_expand_paths.py index 9d9043d57..9dcb7b9e2 100644 --- a/tests/test_minimal/test_utils/test_expand_paths.py +++ b/tests/test_minimal/test_utils/test_expand_paths.py @@ -52,7 +52,7 @@ def test_expand_paths(tmpdir): "bb": {"file_path": str(base / "sub-001" / "session_102" / "xyz")}, }, "metadata": {"NWBFile": {"session_id": "102"}, "Subject": {"subject_id": "001"}}, - } + }, ] # test results From d5aa48df306b77f24f9d3f51dc125ccf321da455 Mon Sep 17 00:00:00 2001 From: bendichter Date: Mon, 17 Apr 2023 18:44:24 -0400 Subject: [PATCH 32/47] rmv unused import --- src/neuroconv/utils/path_expansion.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/neuroconv/utils/path_expansion.py b/src/neuroconv/utils/path_expansion.py index 897b1c7df..fdb62bb30 100644 --- a/src/neuroconv/utils/path_expansion.py +++ b/src/neuroconv/utils/path_expansion.py @@ -1,7 +1,7 @@ import abc import os from pathlib import Path -from typing import Dict, List, Tuple, Union +from typing import List, Tuple, Union from parse import parse From 861b45b4261e20ca6db2360a671132d73873e373 Mon Sep 17 00:00:00 2001 From: Ben Dichter Date: Wed, 19 Apr 2023 13:27:58 -0400 Subject: [PATCH 33/47] Update src/neuroconv/utils/path_expansion.py Co-authored-by: Cody Baker <51133164+CodyCBakerPhD@users.noreply.github.com> --- src/neuroconv/utils/path_expansion.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/neuroconv/utils/path_expansion.py b/src/neuroconv/utils/path_expansion.py index fdb62bb30..ba0a095e4 100644 --- a/src/neuroconv/utils/path_expansion.py +++ b/src/neuroconv/utils/path_expansion.py @@ -11,6 +11,8 @@ class AbstractPathExpander(abc.ABC): def extract_metadata(self, folder, format_: str): + format_ = format_.replace("\", os.sep) + format_ = format_.replace("/", os.sep) # our f-string uses '/' to communicate os-independent separators for filepath in self.list_directory(folder): result = parse(format_, filepath) if result: From c95ca0732644985f4c652c34621c83eb6aa7de0c Mon Sep 17 00:00:00 2001 From: Ben Dichter Date: Wed, 19 Apr 2023 13:28:09 -0400 Subject: [PATCH 34/47] Update tests/test_minimal/test_utils/test_expand_paths.py Co-authored-by: Cody Baker <51133164+CodyCBakerPhD@users.noreply.github.com> --- tests/test_minimal/test_utils/test_expand_paths.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_minimal/test_utils/test_expand_paths.py b/tests/test_minimal/test_utils/test_expand_paths.py index 9dcb7b9e2..d211c9803 100644 --- a/tests/test_minimal/test_utils/test_expand_paths.py +++ b/tests/test_minimal/test_utils/test_expand_paths.py @@ -19,8 +19,8 @@ def test_expand_paths(tmpdir): # run path parsing out = expander.expand_paths( dict( - aa=dict(folder=base, file_path=os.path.join("sub-{subject_id:3}", "session_{session_id:3}", "abc")), - bb=dict(folder=base, file_path=os.path.join("sub-{subject_id:3}", "session_{session_id:3}", "xyz")), + aa=dict(folder=base, file_path="sub-{subject_id:3}/session_{session_id:3}/abc"), + bb=dict(folder=base, file_path="sub-{subject_id:3}/session_{session_id:3}/xyz"), ), ) From 33a0786da5b8dc69866173c557aba32225885c5c Mon Sep 17 00:00:00 2001 From: Cody Baker <51133164+CodyCBakerPhD@users.noreply.github.com> Date: Wed, 19 Apr 2023 14:36:27 -0400 Subject: [PATCH 35/47] Update path_expansion.py --- src/neuroconv/utils/path_expansion.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/neuroconv/utils/path_expansion.py b/src/neuroconv/utils/path_expansion.py index ba0a095e4..0981ee16c 100644 --- a/src/neuroconv/utils/path_expansion.py +++ b/src/neuroconv/utils/path_expansion.py @@ -11,7 +11,7 @@ class AbstractPathExpander(abc.ABC): def extract_metadata(self, folder, format_: str): - format_ = format_.replace("\", os.sep) + format_ = format_.replace("\\", os.sep) # Actual character is a single back-slash; first is an escape for that format_ = format_.replace("/", os.sep) # our f-string uses '/' to communicate os-independent separators for filepath in self.list_directory(folder): result = parse(format_, filepath) From 0318e86cebd8f99b0a5d288e76b37c74c7c3bd1a Mon Sep 17 00:00:00 2001 From: Cody Baker Date: Thu, 20 Apr 2023 10:20:35 -0400 Subject: [PATCH 36/47] move to tools --- src/neuroconv/tools/__init__.py | 1 + .../{utils => tools}/path_expansion.py | 20 +++++++++---------- src/neuroconv/utils/__init__.py | 2 +- .../test_expand_paths.py | 2 +- 4 files changed, 13 insertions(+), 12 deletions(-) rename src/neuroconv/{utils => tools}/path_expansion.py (81%) rename tests/test_minimal/{test_utils => test_tools}/test_expand_paths.py (97%) diff --git a/src/neuroconv/tools/__init__.py b/src/neuroconv/tools/__init__.py index 1a5227213..5da6c6d72 100644 --- a/src/neuroconv/tools/__init__.py +++ b/src/neuroconv/tools/__init__.py @@ -1 +1,2 @@ from .importing import get_package +from .path_expansion import LocalPathExpander diff --git a/src/neuroconv/utils/path_expansion.py b/src/neuroconv/tools/path_expansion.py similarity index 81% rename from src/neuroconv/utils/path_expansion.py rename to src/neuroconv/tools/path_expansion.py index 0981ee16c..d6728e61a 100644 --- a/src/neuroconv/utils/path_expansion.py +++ b/src/neuroconv/tools/path_expansion.py @@ -1,12 +1,12 @@ import abc import os from pathlib import Path -from typing import List, Tuple, Union +from typing import List, Iterable, Union from parse import parse +from pydantic import FilePath, DirectoryPath -from .dict import DeepDict -from .types import FilePathType, FolderPathType +from ..utils import DeepDict class AbstractPathExpander(abc.ABC): @@ -19,14 +19,14 @@ def extract_metadata(self, folder, format_: str): yield filepath, result.named @abc.abstractmethod - def list_directory(self, folder: Union[FilePathType, FolderPathType]) -> Tuple[str]: + def list_directory(self, folder: Union[FilePath, DirectoryPath]) -> Iterable[str]: """ - List all folders and files in a directory recursively + List all folders and files in a directory recursively. Yields - ------ - str - + ------- + sub_paths : iterable of strings + Generator that yields all sub-paths of file and folders from the common root `folder`. """ pass @@ -41,6 +41,7 @@ def expand_paths(self, source_data_spec: dict) -> List[DeepDict]: Returns ------- + deep_dicts : list of DeepDict objects Examples -------- @@ -54,7 +55,6 @@ def expand_paths(self, source_data_spec: dict) -> List[DeepDict]: ... ) ... ) ... ) - """ out = DeepDict() for interface, source_data in source_data_spec.items(): @@ -73,5 +73,5 @@ def expand_paths(self, source_data_spec: dict) -> List[DeepDict]: class LocalPathExpander(AbstractPathExpander): - def list_directory(self, folder: Union[FilePathType, FolderPathType]): + def list_directory(self, folder: Union[FilePath, DirectoryPath]) -> Iterable[str]: return (str(path.relative_to(folder)) for path in Path(folder).rglob("*")) diff --git a/src/neuroconv/utils/__init__.py b/src/neuroconv/utils/__init__.py index 6b2041426..dd16ae99d 100644 --- a/src/neuroconv/utils/__init__.py +++ b/src/neuroconv/utils/__init__.py @@ -4,6 +4,7 @@ dict_deep_update, exist_dict_in_list, load_dict_from_file, + DeepDict, ) from .json_schema import ( NWBMetaDataEncoder, @@ -14,7 +15,6 @@ get_schema_from_method_signature, unroot_schema, ) -from .path_expansion import LocalPathExpander from .types import ( ArrayType, FilePathType, diff --git a/tests/test_minimal/test_utils/test_expand_paths.py b/tests/test_minimal/test_tools/test_expand_paths.py similarity index 97% rename from tests/test_minimal/test_utils/test_expand_paths.py rename to tests/test_minimal/test_tools/test_expand_paths.py index d211c9803..1af85f92a 100644 --- a/tests/test_minimal/test_utils/test_expand_paths.py +++ b/tests/test_minimal/test_tools/test_expand_paths.py @@ -1,7 +1,7 @@ import os from pathlib import Path -from neuroconv.utils import LocalPathExpander +from neuroconv.tools import LocalPathExpander def test_expand_paths(tmpdir): From 8a8aac4b7e900b490e80438c431cc20ecc15ec99 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 20 Apr 2023 14:20:51 +0000 Subject: [PATCH 37/47] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/neuroconv/tools/path_expansion.py | 4 ++-- src/neuroconv/utils/__init__.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/neuroconv/tools/path_expansion.py b/src/neuroconv/tools/path_expansion.py index d6728e61a..48888ce5f 100644 --- a/src/neuroconv/tools/path_expansion.py +++ b/src/neuroconv/tools/path_expansion.py @@ -1,10 +1,10 @@ import abc import os from pathlib import Path -from typing import List, Iterable, Union +from typing import Iterable, List, Union from parse import parse -from pydantic import FilePath, DirectoryPath +from pydantic import DirectoryPath, FilePath from ..utils import DeepDict diff --git a/src/neuroconv/utils/__init__.py b/src/neuroconv/utils/__init__.py index dd16ae99d..5b9aaddc5 100644 --- a/src/neuroconv/utils/__init__.py +++ b/src/neuroconv/utils/__init__.py @@ -1,10 +1,10 @@ from .checks import calculate_regular_series_rate from .dict import ( + DeepDict, append_replace_dict_in_list, dict_deep_update, exist_dict_in_list, load_dict_from_file, - DeepDict, ) from .json_schema import ( NWBMetaDataEncoder, From c5f1bb03656d00bed2ef59ee61917f1d3cbdd5af Mon Sep 17 00:00:00 2001 From: Cody Baker Date: Thu, 20 Apr 2023 16:08:14 -0400 Subject: [PATCH 38/47] fix import testts --- src/neuroconv/tools/path_expansion.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/neuroconv/tools/path_expansion.py b/src/neuroconv/tools/path_expansion.py index 48888ce5f..a356a6613 100644 --- a/src/neuroconv/tools/path_expansion.py +++ b/src/neuroconv/tools/path_expansion.py @@ -23,6 +23,11 @@ def list_directory(self, folder: Union[FilePath, DirectoryPath]) -> Iterable[str """ List all folders and files in a directory recursively. + Parameters + ---------- + folder : FilePath or DirectoryPath + The base folder whose contents will be iterated recursively. + Yields ------- sub_paths : iterable of strings From ddccef5527d92048af44c437b50a352048ca3f74 Mon Sep 17 00:00:00 2001 From: Ben Dichter Date: Mon, 24 Apr 2023 11:38:59 -0400 Subject: [PATCH 39/47] Update src/neuroconv/tools/path_expansion.py --- src/neuroconv/tools/path_expansion.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/neuroconv/tools/path_expansion.py b/src/neuroconv/tools/path_expansion.py index a356a6613..deae282e3 100644 --- a/src/neuroconv/tools/path_expansion.py +++ b/src/neuroconv/tools/path_expansion.py @@ -19,7 +19,7 @@ def extract_metadata(self, folder, format_: str): yield filepath, result.named @abc.abstractmethod - def list_directory(self, folder: Union[FilePath, DirectoryPath]) -> Iterable[str]: + def list_directory(self, folder: DirectoryPath) -> Iterable[FilePath]: """ List all folders and files in a directory recursively. From e5a4a645ae1b2bc0eca3aa1d93e7223a82ad5194 Mon Sep 17 00:00:00 2001 From: Ben Dichter Date: Mon, 24 Apr 2023 11:40:56 -0400 Subject: [PATCH 40/47] Update src/neuroconv/tools/path_expansion.py --- src/neuroconv/tools/path_expansion.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/neuroconv/tools/path_expansion.py b/src/neuroconv/tools/path_expansion.py index deae282e3..f2d90fd81 100644 --- a/src/neuroconv/tools/path_expansion.py +++ b/src/neuroconv/tools/path_expansion.py @@ -19,7 +19,7 @@ def extract_metadata(self, folder, format_: str): yield filepath, result.named @abc.abstractmethod - def list_directory(self, folder: DirectoryPath) -> Iterable[FilePath]: + def list_directory(self, base_directory: DirectoryPath) -> Iterable[FilePath]: """ List all folders and files in a directory recursively. From 7352a3adda45fe54a7942df27553ecc45d85cb66 Mon Sep 17 00:00:00 2001 From: Ben Dichter Date: Mon, 24 Apr 2023 11:42:00 -0400 Subject: [PATCH 41/47] Update src/neuroconv/tools/path_expansion.py --- src/neuroconv/tools/path_expansion.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/neuroconv/tools/path_expansion.py b/src/neuroconv/tools/path_expansion.py index f2d90fd81..ca8179744 100644 --- a/src/neuroconv/tools/path_expansion.py +++ b/src/neuroconv/tools/path_expansion.py @@ -10,10 +10,10 @@ class AbstractPathExpander(abc.ABC): - def extract_metadata(self, folder, format_: str): + def extract_metadata(self, base_directory: DirectoryPath, format_: str): format_ = format_.replace("\\", os.sep) # Actual character is a single back-slash; first is an escape for that format_ = format_.replace("/", os.sep) # our f-string uses '/' to communicate os-independent separators - for filepath in self.list_directory(folder): + for filepath in self.list_directory(base_directory): result = parse(format_, filepath) if result: yield filepath, result.named From ef4fffee3195c89a6b18722ea2907e8103212a6b Mon Sep 17 00:00:00 2001 From: Ben Dichter Date: Mon, 24 Apr 2023 11:42:41 -0400 Subject: [PATCH 42/47] Update src/neuroconv/tools/path_expansion.py --- src/neuroconv/tools/path_expansion.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/neuroconv/tools/path_expansion.py b/src/neuroconv/tools/path_expansion.py index ca8179744..6e42ad40d 100644 --- a/src/neuroconv/tools/path_expansion.py +++ b/src/neuroconv/tools/path_expansion.py @@ -35,7 +35,7 @@ def list_directory(self, base_directory: DirectoryPath) -> Iterable[FilePath]: """ pass - def expand_paths(self, source_data_spec: dict) -> List[DeepDict]: + def expand_paths(self, source_data_spec: Dict[str,dict]) -> List[DeepDict]: """ Match paths in a directory to specs and extract metadata from the paths. From acc7c508542b45676b35e390300a00e9ccd40312 Mon Sep 17 00:00:00 2001 From: Ben Dichter Date: Mon, 24 Apr 2023 11:42:57 -0400 Subject: [PATCH 43/47] Update path_expansion.py --- src/neuroconv/tools/path_expansion.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/neuroconv/tools/path_expansion.py b/src/neuroconv/tools/path_expansion.py index 6e42ad40d..9131e6bb7 100644 --- a/src/neuroconv/tools/path_expansion.py +++ b/src/neuroconv/tools/path_expansion.py @@ -1,7 +1,7 @@ import abc import os from pathlib import Path -from typing import Iterable, List, Union +from typing import Iterable, List, Union, Dict from parse import parse from pydantic import DirectoryPath, FilePath From fd03d19babaca88c17eb3bec880dafc187e8d80d Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 24 Apr 2023 15:45:16 +0000 Subject: [PATCH 44/47] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/neuroconv/tools/path_expansion.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/neuroconv/tools/path_expansion.py b/src/neuroconv/tools/path_expansion.py index 9131e6bb7..1a7c3155c 100644 --- a/src/neuroconv/tools/path_expansion.py +++ b/src/neuroconv/tools/path_expansion.py @@ -1,7 +1,7 @@ import abc import os from pathlib import Path -from typing import Iterable, List, Union, Dict +from typing import Dict, Iterable, List, Union from parse import parse from pydantic import DirectoryPath, FilePath @@ -35,7 +35,7 @@ def list_directory(self, base_directory: DirectoryPath) -> Iterable[FilePath]: """ pass - def expand_paths(self, source_data_spec: Dict[str,dict]) -> List[DeepDict]: + def expand_paths(self, source_data_spec: Dict[str, dict]) -> List[DeepDict]: """ Match paths in a directory to specs and extract metadata from the paths. From 46564d1008e590f1e0d7975259f43b1b7ca50bc3 Mon Sep 17 00:00:00 2001 From: Cody Baker <51133164+CodyCBakerPhD@users.noreply.github.com> Date: Mon, 24 Apr 2023 11:48:22 -0400 Subject: [PATCH 45/47] Update src/neuroconv/tools/path_expansion.py Co-authored-by: Ben Dichter --- src/neuroconv/tools/path_expansion.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/neuroconv/tools/path_expansion.py b/src/neuroconv/tools/path_expansion.py index 1a7c3155c..5f607ca87 100644 --- a/src/neuroconv/tools/path_expansion.py +++ b/src/neuroconv/tools/path_expansion.py @@ -78,5 +78,5 @@ def expand_paths(self, source_data_spec: Dict[str, dict]) -> List[DeepDict]: class LocalPathExpander(AbstractPathExpander): - def list_directory(self, folder: Union[FilePath, DirectoryPath]) -> Iterable[str]: - return (str(path.relative_to(folder)) for path in Path(folder).rglob("*")) + def list_directory(self, base_directory: DirectoryPath) -> Iterable[FilePath]: + return (str(path.relative_to(base_directory)) for path in Path(folder).rglob("*")) From 5bb48196767ba435c1e86e3ddea99538cecd645b Mon Sep 17 00:00:00 2001 From: Cody Baker <51133164+CodyCBakerPhD@users.noreply.github.com> Date: Mon, 24 Apr 2023 13:59:24 -0400 Subject: [PATCH 46/47] Update imports.py --- tests/imports.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/imports.py b/tests/imports.py index 4ff317287..8c7191390 100644 --- a/tests/imports.py +++ b/tests/imports.py @@ -52,10 +52,12 @@ def test_tools(self): # Sub-modules "importing", # Attached to namespace by importing get_package "nwb_helpers", # Attached to namespace by top __init__ call of NWBConverter + "path_expansion", # Functions and classes imported on the __init__ "get_package", "processes", "deploy_process", + "LocalPathExpander", ] self.assertCountEqual(first=current_structure, second=expected_structure) From c36e2293932b12b0fa7cfdc2fd1eadc18aca2a50 Mon Sep 17 00:00:00 2001 From: Cody Baker Date: Mon, 24 Apr 2023 14:21:42 -0400 Subject: [PATCH 47/47] fix tests to new name --- src/neuroconv/tools/path_expansion.py | 20 ++++++------ .../test_tools/test_expand_paths.py | 31 +++++++++---------- 2 files changed, 26 insertions(+), 25 deletions(-) diff --git a/src/neuroconv/tools/path_expansion.py b/src/neuroconv/tools/path_expansion.py index 5f607ca87..9bb4d407a 100644 --- a/src/neuroconv/tools/path_expansion.py +++ b/src/neuroconv/tools/path_expansion.py @@ -1,7 +1,8 @@ +"""Helpful classes for expanding file or folder paths on a system given a f-string rule for matching patterns.""" import abc import os from pathlib import Path -from typing import Dict, Iterable, List, Union +from typing import Dict, Iterable, List from parse import parse from pydantic import DirectoryPath, FilePath @@ -25,13 +26,13 @@ def list_directory(self, base_directory: DirectoryPath) -> Iterable[FilePath]: Parameters ---------- - folder : FilePath or DirectoryPath - The base folder whose contents will be iterated recursively. + base_directory : DirectoryPath + The base directory whose contents will be iterated recursively. Yields - ------- + ------ sub_paths : iterable of strings - Generator that yields all sub-paths of file and folders from the common root `folder`. + Generator that yields all sub-paths of file and folders from the common root `base_directory`. """ pass @@ -53,7 +54,7 @@ def expand_paths(self, source_data_spec: Dict[str, dict]) -> List[DeepDict]: >>> path_expander.expand_paths( ... dict( ... spikeglx=dict( - ... folder="source_folder", + ... base_directory="source_folder", ... paths=dict( ... file_path="sub-{subject_id}/sub-{subject_id}_ses-{session_id}" ... ) @@ -65,10 +66,10 @@ def expand_paths(self, source_data_spec: Dict[str, dict]) -> List[DeepDict]: for interface, source_data in source_data_spec.items(): for path_type in ("file_path", "folder_path"): if path_type in source_data: - for path, metadata in self.extract_metadata(source_data["folder"], source_data[path_type]): + for path, metadata in self.extract_metadata(source_data["base_directory"], source_data[path_type]): key = tuple(sorted(metadata.items())) out[key]["source_data"][interface][path_type] = os.path.join( - source_data["folder"], path + source_data["base_directory"], path ) # return the absolute path if "session_id" in metadata: out[key]["metadata"]["NWBFile"]["session_id"] = metadata["session_id"] @@ -79,4 +80,5 @@ def expand_paths(self, source_data_spec: Dict[str, dict]) -> List[DeepDict]: class LocalPathExpander(AbstractPathExpander): def list_directory(self, base_directory: DirectoryPath) -> Iterable[FilePath]: - return (str(path.relative_to(base_directory)) for path in Path(folder).rglob("*")) + assert base_directory.is_dir(), f"The specified 'base_directory' ({base_directory}) is not a directory!" + return (str(path.relative_to(base_directory)) for path in Path(base_directory).rglob("*")) diff --git a/tests/test_minimal/test_tools/test_expand_paths.py b/tests/test_minimal/test_tools/test_expand_paths.py index 1af85f92a..1dd7ba798 100644 --- a/tests/test_minimal/test_tools/test_expand_paths.py +++ b/tests/test_minimal/test_tools/test_expand_paths.py @@ -1,4 +1,3 @@ -import os from pathlib import Path from neuroconv.tools import LocalPathExpander @@ -8,48 +7,48 @@ def test_expand_paths(tmpdir): expander = LocalPathExpander() # set up directory for parsing - base = Path(tmpdir) + base_directory = Path(tmpdir) for subject_id in ("001", "002"): - Path.mkdir(base / f"sub-{subject_id}") + Path.mkdir(base_directory / f"sub-{subject_id}") for session_id in ("101", "102"): - Path.mkdir(base / f"sub-{subject_id}" / f"session_{session_id}") - (base / f"sub-{subject_id}" / f"session_{session_id}" / "abc").touch() - (base / f"sub-{subject_id}" / f"session_{session_id}" / "xyz").touch() + Path.mkdir(base_directory / f"sub-{subject_id}" / f"session_{session_id}") + (base_directory / f"sub-{subject_id}" / f"session_{session_id}" / "abc").touch() + (base_directory / f"sub-{subject_id}" / f"session_{session_id}" / "xyz").touch() # run path parsing out = expander.expand_paths( dict( - aa=dict(folder=base, file_path="sub-{subject_id:3}/session_{session_id:3}/abc"), - bb=dict(folder=base, file_path="sub-{subject_id:3}/session_{session_id:3}/xyz"), + aa=dict(base_directory=base_directory, file_path="sub-{subject_id:3}/session_{session_id:3}/abc"), + bb=dict(base_directory=base_directory, file_path="sub-{subject_id:3}/session_{session_id:3}/xyz"), ), ) expected = [ { "source_data": { - "aa": {"file_path": str(base / "sub-002" / "session_101" / "abc")}, - "bb": {"file_path": str(base / "sub-002" / "session_101" / "xyz")}, + "aa": {"file_path": str(base_directory / "sub-002" / "session_101" / "abc")}, + "bb": {"file_path": str(base_directory / "sub-002" / "session_101" / "xyz")}, }, "metadata": {"NWBFile": {"session_id": "101"}, "Subject": {"subject_id": "002"}}, }, { "source_data": { - "aa": {"file_path": str(base / "sub-002" / "session_102" / "abc")}, - "bb": {"file_path": str(base / "sub-002" / "session_102" / "xyz")}, + "aa": {"file_path": str(base_directory / "sub-002" / "session_102" / "abc")}, + "bb": {"file_path": str(base_directory / "sub-002" / "session_102" / "xyz")}, }, "metadata": {"NWBFile": {"session_id": "102"}, "Subject": {"subject_id": "002"}}, }, { "source_data": { - "aa": {"file_path": str(base / "sub-001" / "session_101" / "abc")}, - "bb": {"file_path": str(base / "sub-001" / "session_101" / "xyz")}, + "aa": {"file_path": str(base_directory / "sub-001" / "session_101" / "abc")}, + "bb": {"file_path": str(base_directory / "sub-001" / "session_101" / "xyz")}, }, "metadata": {"NWBFile": {"session_id": "101"}, "Subject": {"subject_id": "001"}}, }, { "source_data": { - "aa": {"file_path": str(base / "sub-001" / "session_102" / "abc")}, - "bb": {"file_path": str(base / "sub-001" / "session_102" / "xyz")}, + "aa": {"file_path": str(base_directory / "sub-001" / "session_102" / "abc")}, + "bb": {"file_path": str(base_directory / "sub-001" / "session_102" / "xyz")}, }, "metadata": {"NWBFile": {"session_id": "102"}, "Subject": {"subject_id": "001"}}, },