Skip to content

Commit

Permalink
make migrate continue on non-bids, use git mv under git
Browse files Browse the repository at this point in the history
  • Loading branch information
yarikoptic committed Apr 27, 2024
1 parent 6e0dc20 commit e5494b2
Show file tree
Hide file tree
Showing 2 changed files with 50 additions and 5 deletions.
6 changes: 5 additions & 1 deletion .github/workflows/validate_bids-examples.yml
Original file line number Diff line number Diff line change
Expand Up @@ -103,9 +103,13 @@ jobs:
working-directory: bids-examples

- name: Show migrated datasets diff
run: git diff
run: git diff HEAD
working-directory: bids-examples

# TODO: commit as a merge from current state of bids-examples
# and prior bids-2.0 branch there, but overloading with new updated
# state and recording commit hash of bids-specification used.

- name: Validate all BIDS datasets using bids-validator after migration
run: VALIDATOR_ARGS="--schema file://$PWD/../src/schema.json" bash ./run_tests.sh
working-directory: bids-examples
49 changes: 45 additions & 4 deletions tools/schemacode/bidsschematools/migrations.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,11 @@
import json
import os
import re
import subprocess
from functools import lru_cache
from itertools import chain
from pathlib import Path
from typing import Optional

import bidsschematools as bst
import bidsschematools.utils
Expand All @@ -11,10 +15,14 @@
TARGET_VERSION = "2.0.0"


class NotBIDSDatasetError(Exception):
pass


def get_bids_version(dataset_path: Path) -> str:
dataset_description = dataset_path / "dataset_description.json"
if not dataset_description.exists():
raise ValueError(f"dataset_description.json not found in {dataset_path}")
raise NotBIDSDatasetError(f"dataset_description.json not found in {dataset_path}")
return json.loads(dataset_description.read_text())["BIDSVersion"]


Expand All @@ -39,7 +47,7 @@ def migrate_participants(dataset_path: Path):
old_file = dataset_path / f"participants{ext}"
new_file = dataset_path / f"subjects{ext}"
if old_file.exists():
os.rename(old_file, new_file)
rename_path(old_file, new_file)
lgr.info(f" - renamed {old_file} to {new_file}")
if ext == ".tsv":
# Do manual .decode() and .encode() to avoid changing line endings
Expand All @@ -53,8 +61,12 @@ def migrate_participants(dataset_path: Path):
def migrate_dataset(dataset_path):
lgr.info(f"Migrating dataset at {dataset_path}")
dataset_path = Path(dataset_path)
if get_bids_version(dataset_path) == TARGET_VERSION:
lgr.info(f"Dataset already at version {TARGET_VERSION}")
try:
if get_bids_version(dataset_path) == TARGET_VERSION:
lgr.info(f"Dataset already at version {TARGET_VERSION}")
return
except NotBIDSDatasetError:
lgr.warning("%s not a BIDS dataset, skipping", dataset_path)
return
# TODO: possibly add a check for BIDS version in dataset_description.json
# and skip if already 2.0, although ideally transformations
Expand All @@ -65,3 +77,32 @@ def migrate_dataset(dataset_path):
]:
lgr.info(f" - applying migration {migration.__name__}")
migration(dataset_path)


@lru_cache
def path_has_git(path: Path) -> bool:
return (path / ".git").exists()


def git_topdir(path: Path) -> Optional[Path]:
"""Return top-level directory of a git repository containing path,
or None if not under git."""
path = path.absolute()
for p in chain([path] if path.is_dir() else [], path.parents):
if path_has_git(p):
return p
return None


def rename_path(old_path: Path, new_path: Path):
"""git aware rename. If under git, use git mv, otherwise just os.rename."""
# if under git, use git mv but ensure that on border
# crossing (should just use DataLad and `mv` and it would do the right thing!)
if (old_git_top := git_topdir(old_path)) != (new_git_top := git_topdir(new_path)):
raise NotImplementedError(
f"Did not implement moving across git repo boundaries {old_git_top} -> {new_git_top}"
)
if old_git_top:
subprocess.run(["git", "mv", str(old_path), str(new_path)], check=True, cwd=old_git_top)
else:
os.rename(old_path, new_path)

0 comments on commit e5494b2

Please sign in to comment.