Skip to content

Commit

Permalink
Merge pull request #679 from catalystneuro/fix_path_expander_II
Browse files Browse the repository at this point in the history
Fix path expander II
  • Loading branch information
h-mayorquin authored Dec 19, 2023
2 parents 29f02ab + 46f3429 commit acd9fb3
Show file tree
Hide file tree
Showing 3 changed files with 123 additions and 54 deletions.
2 changes: 1 addition & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# Upcoming

### Bug fixes
* LocalPathExpander matches only `folder_paths` or `file_paths` if that is indicated in the passed specification. [PR #675](https://github.com/catalystneuro/neuroconv/pull/675)
* LocalPathExpander matches only `folder_paths` or `file_paths` if that is indicated in the passed specification. [PR #679](https://github.com/catalystneuro/neuroconv/pull/675) and [PR #675](https://github.com/catalystneuro/neuroconv/pull/679
* Fixed depth consideration in partial chunking pattern for the ROI data buffer. [PR #677](https://github.com/catalystneuro/neuroconv/pull/677)
* Fix mapping between channel names and the electrode table when writing more than one `ElectricalSeries` to the NWBFile. This fixes an issue when the converter pipeline of `SpikeGLXConverterPipe` was writing the electrode table region of the NIDQ stream incorrectly [PR #678](https://github.com/catalystneuro/neuroconv/pull/678)

Expand Down
30 changes: 29 additions & 1 deletion src/neuroconv/tools/path_expansion.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,13 +13,41 @@

class AbstractPathExpander(abc.ABC):
def extract_metadata(self, base_directory: DirectoryPath, format_: str):
"""
Uses the parse library to extract metadata from file paths in the base_directory.
This method iterates over files in `base_directory`, parsing each file path according to `format_`.
The format string is adjusted to the current operating system's path separator. The method yields
each file path and its corresponding parsed metadata. To constrain metadata matches to only the
name of the file or folder/directory, the method checks that the metadata does not contain the
OS path separator (e.g., '/' or '\\').
Parameters
----------
base_directory : DirectoryPath
The base directory from which to list files for metadata extraction. It should be a path-like
object that is convertible to a `pathlib.Path`.
format_ : str
The format string used for parsing the file paths. This string can represent a path in any
OS format, and is adjusted internally to match the current OS's path separator.
Yields
------
Tuple[Path, Dict[str, Any]]
A tuple containing the file path as a `Path` object and a dictionary of the named metadata
extracted from the file path.
"""

format_ = format_.replace("\\", os.sep) # Actual character is a single back-slash; first is an escape for that
format_ = format_.replace("/", os.sep) # our f-string uses '/' to communicate os-independent separators

for filepath in self.list_directory(base_directory=Path(base_directory)):
result = parse(format_, filepath)
if result:
yield filepath, result.named
named_result = result.named
no_field_in_metadata_contains_os_sep = all(os.sep not in str(val) for val in named_result.values())
if no_field_in_metadata_contains_os_sep:
yield filepath, named_result

@abc.abstractmethod
def list_directory(self, base_directory: DirectoryPath) -> Iterable[FilePath]:
Expand Down
145 changes: 93 additions & 52 deletions tests/test_minimal/test_tools/test_expand_paths.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,95 +2,136 @@
import unittest
from datetime import datetime
from pathlib import Path

import pytest
from typing import List, Tuple

from neuroconv.tools import LocalPathExpander
from neuroconv.tools.testing import generate_path_expander_demo_ibl
from neuroconv.utils import NWBMetaDataEncoder


def test_only_folder_match(tmpdir):
base_directory = Path(tmpdir)

sub_directory1 = base_directory / "a_simple_pattern_1"
sub_directory2 = base_directory / "a_simple_pattern_2"
def create_test_directories_and_files(
base_directory: Path, directories_and_files: List[Tuple[List[str], List[str]]]
) -> None:
"""
Create test directories and files in a way that is compatible across different
operating systems.
Parameters
----------
base_directory : Path
The base directory under which all subdirectories and files will be created.
directories_and_files : List[Tuple[List[str], List[str]]]
A list where each element is a tuple. The first element of the tuple is a list
of directory components, and the second element is a list of file names to be
created in that directory.
"""
for directory_components, files in directories_and_files:
# Create directory using Path for OS compatibility
full_directory_path = base_directory.joinpath(*directory_components)
full_directory_path.mkdir(parents=True, exist_ok=True)

# Create files in the directory
for file in files:
(full_directory_path / file).touch()

sub_directory1.mkdir(exist_ok=True)
sub_directory2.mkdir(exist_ok=True)

# Add files with the same name to both folders
file1 = sub_directory1 / "a_simple_pattern_1.bin"
file2 = sub_directory2 / "a_simple_pattern_2.bin"
def test_only_folder_match(tmpdir):
base_directory = Path(tmpdir)

# Create files
file1.touch()
file2.touch()
# Define the directories and files to be created
directories_and_files = [
(["subject1", "a_simple_pattern_1"], ["a_simple_pattern_1.bin"]), # matches
(["subject1"], ["a_simple_pattern_file.bin"]), # matches query but is a file
(["subject2", "a_simple_pattern_2", "nested_directory"], []), # match should not contain nested folder
]

# Add another sub-nested folder with a folder
sub_directory3 = sub_directory1 / "a_simple_pattern_3"
sub_directory3.mkdir(exist_ok=True)
file3 = sub_directory3 / "a_simple_pattern_3.bin"
file3.touch()
# Create test directories and files
create_test_directories_and_files(base_directory, directories_and_files)

# Specify source data (note this assumes the files are arranged in the same way as in the example data)
source_data_spec = {
"a_source": {
"base_directory": base_directory,
"folder_path": "a_simple_pattern_{session_id}",
"folder_path": "{subject_id}/a_simple_pattern_{session_id}",
}
}

# Instantiate LocalPathExpander

path_expander = LocalPathExpander()
metadata_list = path_expander.expand_paths(source_data_spec)
folder_paths = [metadata_match["source_data"]["a_source"]["folder_path"] for metadata_match in metadata_list]

expected = {str(sub_directory1), str(sub_directory2), str(sub_directory3)}
matches_list = path_expander.expand_paths(source_data_spec)

folder_paths = [match["source_data"]["a_source"]["folder_path"] for match in matches_list]
# Note that sub_directory3 is not included because it does not conform to the pattern
expected = {
str(base_directory.joinpath("subject1", "a_simple_pattern_1")),
str(base_directory.joinpath("subject2", "a_simple_pattern_2")),
}
assert set(folder_paths) == expected

metadata_list = [match["metadata"].to_dict() for match in matches_list]
expected_metadata = [
{"Subject": {"subject_id": "subject1"}, "NWBFile": {"session_id": "1"}},
{"Subject": {"subject_id": "subject2"}, "NWBFile": {"session_id": "2"}},
]

def test_only_file_match(tmpdir):
base_directory = Path(tmpdir)

sub_directory1 = base_directory / "a_simple_pattern_1"
sub_directory2 = base_directory / "a_simple_pattern_2"
# Sort both lists by subject id to ensure order is the same
metadata_list = sorted(metadata_list, key=lambda x: x["Subject"]["subject_id"])
expected_metadata = sorted(expected_metadata, key=lambda x: x["Subject"]["subject_id"])
assert metadata_list == expected_metadata

sub_directory1.mkdir(exist_ok=True)
sub_directory2.mkdir(exist_ok=True)

# Add files with the same name to both folders
file1 = sub_directory1 / "a_simple_pattern_1.bin"
file2 = sub_directory2 / "a_simple_pattern_2.bin"
def test_only_file_match(tmpdir):
base_directory = Path(tmpdir)

# Create files
file1.touch()
file2.touch()
# Define the directories and files to be created
directories_and_files = [
(["subject1", "a_simple_pattern_1"], ["a_simple_pattern_1.bin"]), # matches
(["subject2", "a_simple_pattern_2"], ["a_simple_pattern_2.bin"]), # matches
( # intermediate nested folder breaks match
["subject1", "intermediate_nested", "a_simple_pattern_3"],
["a_simple_pattern_3.bin"],
),
]

# Add another sub-nested folder with a folder
sub_directory3 = sub_directory1 / "a_simple_pattern_3"
sub_directory3.mkdir(exist_ok=True)
file3 = sub_directory3 / "a_simple_pattern_3.bin"
file3.touch()
# Create test directories and files
create_test_directories_and_files(base_directory, directories_and_files)

# Specify source data (note this assumes the files are arranged in the same way as in the example data)
source_data_spec = {
"a_source": {
"base_directory": base_directory,
"file_path": "a_simple_pattern_{session_id}.bin",
"file_path": "{subject_id}/{a_parent_folder}/a_simple_pattern_{session_id}.bin",
}
}

# Instantiate LocalPathExpander

path_expander = LocalPathExpander()
metadata_list = path_expander.expand_paths(source_data_spec)
file_paths = [metadata_match["source_data"]["a_source"]["file_path"] for metadata_match in metadata_list]
matches_list = path_expander.expand_paths(source_data_spec)
file_paths = set(match["source_data"]["a_source"]["file_path"] for match in matches_list)

# Note that file3 is not included because it does not conform to the pattern
expected = {
str(base_directory / "subject1" / "a_simple_pattern_1" / "a_simple_pattern_1.bin"),
str(base_directory / "subject2" / "a_simple_pattern_2" / "a_simple_pattern_2.bin"),
}
assert file_paths == expected

metadata_list = [match["metadata"].to_dict() for match in matches_list]
expected_metadata = [
{
"Subject": {"subject_id": "subject1"},
"NWBFile": {"session_id": "1"},
"extras": {"a_parent_folder": "a_simple_pattern_1"},
},
{
"Subject": {"subject_id": "subject2"},
"NWBFile": {"session_id": "2"},
"extras": {"a_parent_folder": "a_simple_pattern_2"},
},
]

expected = {str(file1), str(file2), str(file3)}
assert set(file_paths) == expected
# Sort both lists by subject id to ensure order is the same
metadata_list = sorted(metadata_list, key=lambda x: x["Subject"]["subject_id"])
expected_metadata = sorted(expected_metadata, key=lambda x: x["Subject"]["subject_id"])
assert metadata_list == expected_metadata


def test_expand_paths(tmpdir):
Expand Down

0 comments on commit acd9fb3

Please sign in to comment.