Skip to content

Commit

Permalink
Merge pull request #5 from Australian-Imaging-Service/github-download
Browse files Browse the repository at this point in the history
added retrieve from github function
  • Loading branch information
tclose authored May 29, 2024
2 parents 7d914c0 + 83f0dd9 commit b7ebd21
Show file tree
Hide file tree
Showing 5 changed files with 149 additions and 4 deletions.
21 changes: 21 additions & 0 deletions medimages4tests/mri/neuro/bold.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
from pathlib import Path
from medimages4tests import base_cache_dir
from medimages4tests.utils import retrieve_from_openneuro, OpenneuroSpec


cache_dir = base_cache_dir / "mri" / "neuro" / "bold"


SAMPLES = {
"ds002014-01": OpenneuroSpec(
dataset="ds002014",
tag="1.0.1",
path="sub-01/func/sub-01_task-languageproduction_run-01_bold",
)
}


def get_image(out_dir: Path = None, sample: str = "ds002014-01"):
if out_dir is None:
out_dir = cache_dir / sample
return retrieve_from_openneuro(SAMPLES[sample], out_dir)
23 changes: 23 additions & 0 deletions medimages4tests/mri/neuro/dwi.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
from pathlib import Path
from medimages4tests import base_cache_dir
from medimages4tests.utils import retrieve_from_openneuro, OpenneuroSpec


cache_dir = base_cache_dir / "mri" / "neuro" / "t1w"


SAMPLES = {
"ds004024-CON031": OpenneuroSpec(
dataset="ds004024",
tag="1.0.1",
path="sub-CON031/ses-mri/dwi/sub-CON031_ses-mri_dwi",
)
}


def get_image(out_dir: Path = None, sample: str = "ds004024-CON031"):
if out_dir is None:
out_dir = cache_dir / sample
return retrieve_from_openneuro(
SAMPLES[sample], out_dir, suffixes=(".nii.gz", ".json", ".bvec", ".bval")
)
24 changes: 21 additions & 3 deletions medimages4tests/mri/neuro/t1w.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from pathlib import Path
from medimages4tests import base_cache_dir
from medimages4tests.utils import retrieve_from_openneuro, OpenneuroSpec

Expand All @@ -10,9 +11,26 @@
dataset="ds004130",
tag="1.0.0",
path="sub-ON01016/anat/sub-ON01016_acq-fspgr_run-01_T1w",
)
),
"ds002014-01": OpenneuroSpec(
dataset="ds002014",
tag="1.0.1",
path="sub-01/anat/sub-01_T1w",
),
"ds001743-01": OpenneuroSpec(
dataset="ds001743",
tag="1.0.1",
path="sub-01/anat/sub-01_T1w",
),
"ds004024-CON031": OpenneuroSpec(
dataset="ds004024",
tag="1.0.1",
path="sub-CON031/ses-mri/dwi/sub-CON031_ses-mri_T1w",
),
}


def get_image(sample="ds004130-ON01016"):
return retrieve_from_openneuro(SAMPLES[sample], cache_dir / sample)
def get_image(out_dir: Path = None, sample: str = "ds004130-ON01016"):
if out_dir is None:
out_dir = cache_dir / sample
return retrieve_from_openneuro(SAMPLES[sample], out_dir)
70 changes: 69 additions & 1 deletion medimages4tests/utils.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,17 @@
from tempfile import mkdtemp
import requests
import tarfile
import shutil
import os
import typing as ty
from pathlib import Path
import openneuro
import attrs
from . import base_cache_dir


@attrs.define
class OpenneuroSpec:

dataset: str
tag: str
path: Path = attrs.field(converter=Path)
Expand All @@ -16,6 +20,7 @@ class OpenneuroSpec:
def retrieve_from_openneuro(
sample, cache_path, suffixes=(".nii.gz", ".json"), force_download=False
):
"""Retrieves an image from the OpenNeuro repository"""
if not cache_path.parent.exists():
cache_path.parent.mkdir(parents=True)
out_path = cache_path.with_suffix(suffixes[0])
Expand All @@ -32,3 +37,66 @@ def retrieve_from_openneuro(
(tmpdir / sample.path).with_suffix(ext), cache_path.with_suffix(ext)
)
return out_path


def retrieve_from_github(
org: str,
repo: str,
path: str,
tag: str = "main",
compressed: bool = True,
cache_dir: ty.Union[Path, str, None] = None,
) -> Path:
"""Retrieves a sample file from a path within a GitHub repository
Parameters
----------
org: str
the Github organisation
repo : str
the name of the git repository within the Github organisation
path : str
the path to the file relative to the repository
tag : str, optional
the git tag (version) to use, "main" by default
compressed : bool, optional
whether the file within the git repo has been archived with tar/gzip and
needs to be uncompressed before use, True by default
cache_dir : Path | str, optional
the directory in which to download and cache the requested file, by default uses
"~/.medimages/cache/github"
"""
if cache_dir is None:
cache_dir = base_cache_dir / "github"
else:
cache_dir = Path(cache_dir).expanduser()
cache_path = (cache_dir / repo / tag).joinpath(*path.split("/"))
if cache_path.exists():
return cache_path
if not cache_path.parent.exists():
cache_path.parent.mkdir(parents=True)
url = f"https://raw.githubusercontent.com/{repo}/{tag}/{path}"
if compressed:
url += ".tar.gz"
response = requests.get(url)
if response.status_code != "200":
raise ValueError(f"Did not find a file to download at '{url}'")
if compressed:
tmp_dir = Path(mkdtemp())
download_path = tmp_dir / url.split("/")[-1]
else:
download_path = cache_path
with open(download_path, "wb") as f:
f.write(response.content)
if compressed:
extract_dir = tmp_dir / "extracted"
extract_dir.mkdir()
with tarfile.open(download_path) as tfile:
tfile.extractall(path=extract_dir)
dir_contents = list(extract_dir.iterdir())
if len(dir_contents) > 1:
raise ValueError(
f"Contents or tar file at {url} contain more than one file/sub-dir ({dir_contents})"
)
os.rename(dir_contents[0], cache_path)
return cache_path
15 changes: 15 additions & 0 deletions tests/test_github.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
import pytest
import nibabel as nb
import numpy as np
from medimages4tests.utils import retrieve_from_github


@pytest.mark.xfail
def test_github_retrieve():

nifti_fpath = retrieve_from_github(
org="nipype", repo="pydra-fsl-testdata", path="melodic_ica"
)
nifti = nifti = nb.load(nifti_fpath)

assert np.array_equal(nifti.header["dim"][:4], [3, 204, 256, 256])

0 comments on commit b7ebd21

Please sign in to comment.