-
Notifications
You must be signed in to change notification settings - Fork 168
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
NF: tools/text2filetree.py helper to convert some indented text into …
…our filetree example spec See included within script tests
- Loading branch information
1 parent
5b4b0fd
commit c8c5910
Showing
1 changed file
with
287 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,287 @@ | ||
#!/usr/bin/env python3 | ||
import argparse | ||
import json | ||
import sys | ||
|
||
|
||
def parse_file_tree(input_data, tab_width=4): | ||
""" | ||
Parse a file tree hierarchy from an input stream and convert it to a nested dictionary. | ||
:param input_data: Input string or file-like object representing the file tree | ||
:param tab_width: Number of spaces to replace tabs with | ||
:return: A nested dictionary representing the file tree | ||
""" | ||
# If input is a string, convert to a list of lines | ||
if isinstance(input_data, str): | ||
lines = input_data.splitlines() | ||
else: | ||
lines = [line.rstrip() for line in input_data.readlines()] | ||
|
||
def _parse_tree(lines): | ||
# Create main tree dictionary | ||
tree = {} | ||
|
||
# Stack to keep track of nested dictionaries and their indentation levels | ||
dict_stack = [(tree, -1)] | ||
|
||
for i, line in enumerate(lines): | ||
# Skip empty lines | ||
if not line.strip(): | ||
continue | ||
|
||
# Replace tabs with specified number of spaces | ||
line = line.replace("\t", " " * tab_width).rstrip() | ||
|
||
# Compute indentation and clean name | ||
indent_level = len(line) - len(line.lstrip()) | ||
current_name = line.strip() | ||
|
||
# Find the correct parent dictionary based on indentation | ||
while dict_stack and dict_stack[-1][1] >= indent_level: | ||
dict_stack.pop() | ||
|
||
# If stack is empty, something went wrong with indentation | ||
if not dict_stack: | ||
raise ValueError(f"Invalid indentation for line: {line}") | ||
|
||
# Get the current parent dictionary | ||
parent_dict, _ = dict_stack[-1] | ||
|
||
# Determine if it's a directory | ||
# 1. Explicitly marked with '/' | ||
# 2. Has children on next lines with more indentation | ||
is_dir = current_name.endswith("/") or ( | ||
i < len(lines) - 1 | ||
and len(lines[i + 1]) - len(lines[i + 1].lstrip()) > indent_level | ||
) | ||
|
||
# Normalize directory name | ||
if is_dir and not current_name.endswith("/"): | ||
current_name += "/" | ||
|
||
# Add item to the dictionary | ||
if is_dir: | ||
# Create a new nested dictionary for directories | ||
new_dict = {} | ||
parent_dict[current_name] = new_dict | ||
# Push new dictionary and its indentation to the stack | ||
dict_stack.append((new_dict, indent_level)) | ||
else: | ||
# Add files with empty string value | ||
parent_dict[current_name] = "" | ||
|
||
return tree | ||
|
||
# Call the internal parsing function and return its result | ||
return _parse_tree(lines) | ||
|
||
|
||
def decorate_output(output_str, decoration_type): | ||
""" | ||
Decorate the output based on the specified decoration type | ||
:param output_str: JSON string to be decorated | ||
:param decoration_type: Type of decoration to apply | ||
:return: Decorated output string | ||
""" | ||
if decoration_type == "bids-filetree": | ||
return f"{{{{ MACROS___make_filetree_example(\n\n{output_str}\n\n) }}}}" | ||
return output_str | ||
|
||
|
||
def main(): | ||
# Set up argument parsing | ||
parser = argparse.ArgumentParser( | ||
description="Parse file tree hierarchy into a nested dictionary." | ||
) | ||
parser.add_argument( | ||
"input_file", | ||
nargs="?", | ||
type=argparse.FileType("r"), | ||
default=sys.stdin, | ||
help="Input file to parse (default: stdin)", | ||
) | ||
parser.add_argument( | ||
"--tab-width", | ||
type=int, | ||
default=4, | ||
help="Number of spaces to replace tabs with (default: 4)", | ||
) | ||
parser.add_argument( | ||
"--output-file", | ||
type=str, | ||
default=None, | ||
help="Output file to write the parsed dictionary (default: stdout)", | ||
) | ||
parser.add_argument( | ||
"--indent", type=int, default=2, help="Indentation for JSON output (default: 2)" | ||
) | ||
parser.add_argument( | ||
"-D", | ||
"--decorate", | ||
type=str, | ||
choices=["bids-filetree"], | ||
default=None, | ||
help="Decorate the output with a specific format", | ||
) | ||
|
||
# Parse arguments | ||
args = parser.parse_args() | ||
|
||
# Parse the file tree | ||
result = parse_file_tree(args.input_file, args.tab_width) | ||
|
||
# Prepare output using json.dumps with specified indent | ||
output_str = json.dumps(result, indent=args.indent) | ||
|
||
# Decorate output if specified | ||
if args.decorate: | ||
output_str = decorate_output(output_str, args.decorate) | ||
|
||
# Determine output destination | ||
if args.output_file: | ||
# Write to file | ||
with open(args.output_file, "w") as f: | ||
f.write(output_str) | ||
else: | ||
# Print to stdout | ||
print(output_str) | ||
|
||
|
||
def test_example1(): | ||
""" | ||
Test parsing a file tree with nested directories | ||
""" | ||
input_tree = """file1 | ||
a.dat | ||
sub-1 | ||
subsub | ||
file.dat | ||
filehere | ||
anotherfile""" | ||
|
||
expected_output = { | ||
"file1": "", | ||
"a.dat": "", | ||
"sub-1/": {"subsub/": {"file.dat": ""}, "filehere": ""}, | ||
"anotherfile": "", | ||
} | ||
|
||
# Parse the input tree | ||
result = parse_file_tree(input_tree) | ||
|
||
# Use deep comparison to check the result | ||
assert result == expected_output, f"Expected {expected_output}, but got {result}" | ||
|
||
|
||
def test_decorations(): | ||
""" | ||
Test the output decoration functionality | ||
""" | ||
dummy_json = '{"test": "value"}' | ||
|
||
# Test bids-filetree decoration | ||
decorated = decorate_output(dummy_json, "bids-filetree") | ||
assert ( | ||
decorated == '{{ MACROS___make_filetree_example(\n\n{"test": "value"}\n\n) }}' | ||
) | ||
|
||
# Test no decoration | ||
undecorated = decorate_output(dummy_json, None) | ||
assert undecorated == dummy_json | ||
|
||
|
||
def test_more_complex_tree(): | ||
""" | ||
Test a more complex nested directory structure | ||
""" | ||
input_tree = """root | ||
subdir1 | ||
file1.txt | ||
subsubdir | ||
file2.txt | ||
subdir2 | ||
file3.txt""" | ||
|
||
expected_output = { | ||
"root/": { | ||
"subdir1/": {"file1.txt": "", "subsubdir/": {"file2.txt": ""}}, | ||
"subdir2/": {"file3.txt": ""}, | ||
} | ||
} | ||
|
||
# Parse the input tree | ||
result = parse_file_tree(input_tree) | ||
|
||
# Use deep comparison to check the result | ||
assert result == expected_output, f"Expected {expected_output}, but got {result}" | ||
|
||
|
||
def test_neuroimaging_dataset(): | ||
""" | ||
Test parsing a complex neuroimaging dataset file structure | ||
""" | ||
input_tree = """dataset_description.json | ||
tasks.tsv | ||
tasks.json | ||
participants.tsv | ||
sub-A/ | ||
ses-20220101/ | ||
ephys/ | ||
sub-A_ses-20220101_task-nosepoke_ephys.nix | ||
sub-A_ses-20220101_task-nosepoke_ephys.json | ||
sub-A_ses-20220101_task-nosepoke_events.tsv | ||
sub-A_ses-20220101_task-rest_ephys.nix | ||
sub-A_ses-20220101_task-rest_ephys.json | ||
sub-A_ses-20220101_channels.tsv | ||
sub-A_ses-20220101_electrodes.tsv | ||
sub-A_ses-20220101_probes.tsv | ||
ses-20220102/ | ||
ephys/ | ||
sub-A_ses-20220102_task-rest_ephys.nix | ||
sub-A_ses-20220102_task-rest_ephys.json | ||
sub-A_ses-20220102_channels.tsv | ||
sub-A_ses-20220102_electrodes.tsv | ||
sub-A_ses-20220102_probes.tsv""" | ||
|
||
expected_output = { | ||
"dataset_description.json": "", | ||
"tasks.tsv": "", | ||
"tasks.json": "", | ||
"participants.tsv": "", | ||
"sub-A/": { | ||
"ses-20220101/": { | ||
"ephys/": { | ||
"sub-A_ses-20220101_task-nosepoke_ephys.nix": "", | ||
"sub-A_ses-20220101_task-nosepoke_ephys.json": "", | ||
"sub-A_ses-20220101_task-nosepoke_events.tsv": "", | ||
"sub-A_ses-20220101_task-rest_ephys.nix": "", | ||
"sub-A_ses-20220101_task-rest_ephys.json": "", | ||
"sub-A_ses-20220101_channels.tsv": "", | ||
"sub-A_ses-20220101_electrodes.tsv": "", | ||
"sub-A_ses-20220101_probes.tsv": "", | ||
} | ||
}, | ||
"ses-20220102/": { | ||
"ephys/": { | ||
"sub-A_ses-20220102_task-rest_ephys.nix": "", | ||
"sub-A_ses-20220102_task-rest_ephys.json": "", | ||
"sub-A_ses-20220102_channels.tsv": "", | ||
"sub-A_ses-20220102_electrodes.tsv": "", | ||
"sub-A_ses-20220102_probes.tsv": "", | ||
} | ||
}, | ||
}, | ||
} | ||
|
||
# Parse the input tree | ||
result = parse_file_tree(input_tree) | ||
|
||
# Use deep comparison to check the result | ||
assert result == expected_output, f"Expected {expected_output}, but got {result}" | ||
|
||
|
||
if __name__ == "__main__": | ||
# If run directly, execute main | ||
main() |