diff --git a/.github/workflows/validate_bids-examples.yml b/.github/workflows/validate_bids-examples.yml index 3d685fbe76..7911ad5c82 100644 --- a/.github/workflows/validate_bids-examples.yml +++ b/.github/workflows/validate_bids-examples.yml @@ -103,9 +103,13 @@ jobs: working-directory: bids-examples - name: Show migrated datasets diff - run: git diff + run: git diff HEAD working-directory: bids-examples + # TODO: commit as a merge from current state of bids-examples + # and prior bids-2.0 branch there, but overloading with new updated + # state and recording commit hash of bids-specification used. + - name: Validate all BIDS datasets using bids-validator after migration run: VALIDATOR_ARGS="--schema file://$PWD/../src/schema.json" bash ./run_tests.sh working-directory: bids-examples diff --git a/tools/schemacode/bidsschematools/migrations.py b/tools/schemacode/bidsschematools/migrations.py index 684c5d2aa6..1cf0441cf1 100644 --- a/tools/schemacode/bidsschematools/migrations.py +++ b/tools/schemacode/bidsschematools/migrations.py @@ -1,7 +1,11 @@ import json import os import re +import subprocess +from functools import lru_cache +from itertools import chain from pathlib import Path +from typing import Optional import bidsschematools as bst import bidsschematools.utils @@ -11,10 +15,14 @@ TARGET_VERSION = "2.0.0" +class NotBIDSDatasetError(Exception): + pass + + def get_bids_version(dataset_path: Path) -> str: dataset_description = dataset_path / "dataset_description.json" if not dataset_description.exists(): - raise ValueError(f"dataset_description.json not found in {dataset_path}") + raise NotBIDSDatasetError(f"dataset_description.json not found in {dataset_path}") return json.loads(dataset_description.read_text())["BIDSVersion"] @@ -39,7 +47,7 @@ def migrate_participants(dataset_path: Path): old_file = dataset_path / f"participants{ext}" new_file = dataset_path / f"subjects{ext}" if old_file.exists(): - os.rename(old_file, new_file) + rename_path(old_file, new_file) lgr.info(f" - renamed {old_file} to {new_file}") if ext == ".tsv": # Do manual .decode() and .encode() to avoid changing line endings @@ -53,8 +61,12 @@ def migrate_participants(dataset_path: Path): def migrate_dataset(dataset_path): lgr.info(f"Migrating dataset at {dataset_path}") dataset_path = Path(dataset_path) - if get_bids_version(dataset_path) == TARGET_VERSION: - lgr.info(f"Dataset already at version {TARGET_VERSION}") + try: + if get_bids_version(dataset_path) == TARGET_VERSION: + lgr.info(f"Dataset already at version {TARGET_VERSION}") + return + except NotBIDSDatasetError: + lgr.warning("%s not a BIDS dataset, skipping", dataset_path) return # TODO: possibly add a check for BIDS version in dataset_description.json # and skip if already 2.0, although ideally transformations @@ -65,3 +77,32 @@ def migrate_dataset(dataset_path): ]: lgr.info(f" - applying migration {migration.__name__}") migration(dataset_path) + + +@lru_cache +def path_has_git(path: Path) -> bool: + return (path / ".git").exists() + + +def git_topdir(path: Path) -> Optional[Path]: + """Return top-level directory of a git repository containing path, + or None if not under git.""" + path = path.absolute() + for p in chain([path] if path.is_dir() else [], path.parents): + if path_has_git(p): + return p + return None + + +def rename_path(old_path: Path, new_path: Path): + """git aware rename. If under git, use git mv, otherwise just os.rename.""" + # if under git, use git mv but ensure that on border + # crossing (should just use DataLad and `mv` and it would do the right thing!) + if (old_git_top := git_topdir(old_path)) != (new_git_top := git_topdir(new_path)): + raise NotImplementedError( + f"Did not implement moving across git repo boundaries {old_git_top} -> {new_git_top}" + ) + if old_git_top: + subprocess.run(["git", "mv", str(old_path), str(new_path)], check=True, cwd=old_git_top) + else: + os.rename(old_path, new_path)