Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

rf(resources): Use acres over vendored data loader #3323

Merged
merged 2 commits into from
Jul 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
170 changes: 1 addition & 169 deletions fmriprep/data/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,176 +7,8 @@
.. automethod:: load.as_path

.. automethod:: load.cached

.. autoclass:: Loader
"""

from __future__ import annotations

import atexit
import os
from contextlib import AbstractContextManager, ExitStack
from functools import cached_property
from pathlib import Path
from types import ModuleType

try:
from functools import cache
except ImportError: # PY38
from functools import lru_cache as cache

try: # Prefer backport to leave consistency to dependency spec
from importlib_resources import as_file, files
except ImportError:
from importlib.resources import as_file, files # type: ignore

try: # Prefer stdlib so Sphinx can link to authoritative documentation
from importlib.resources.abc import Traversable
except ImportError:
from importlib_resources.abc import Traversable

__all__ = ['load']


class Loader:
"""A loader for package files relative to a module

This class wraps :mod:`importlib.resources` to provide a getter
function with an interpreter-lifetime scope. For typical packages
it simply passes through filesystem paths as :class:`~pathlib.Path`
objects. For zipped distributions, it will unpack the files into
a temporary directory that is cleaned up on interpreter exit.

This loader accepts a fully-qualified module name or a module
object.

Expected usage::

'''Data package

.. autofunction:: load_data

.. automethod:: load_data.readable

.. automethod:: load_data.as_path

.. automethod:: load_data.cached
'''

from fmriprep.data import Loader

load_data = Loader(__package__)

:class:`~Loader` objects implement the :func:`callable` interface
and generate a docstring, and are intended to be treated and documented
as functions.

For greater flexibility and improved readability over the ``importlib.resources``
interface, explicit methods are provided to access resources.

+---------------+----------------+------------------+
| On-filesystem | Lifetime | Method |
+---------------+----------------+------------------+
| `True` | Interpreter | :meth:`cached` |
+---------------+----------------+------------------+
| `True` | `with` context | :meth:`as_path` |
+---------------+----------------+------------------+
| `False` | n/a | :meth:`readable` |
+---------------+----------------+------------------+

It is also possible to use ``Loader`` directly::

from fmriprep.data import Loader

Loader(other_package).readable('data/resource.ext').read_text()

with Loader(other_package).as_path('data') as pkgdata:
# Call function that requires full Path implementation
func(pkgdata)

# contrast to

from importlib_resources import files, as_file

files(other_package).joinpath('data/resource.ext').read_text()

with as_file(files(other_package) / 'data') as pkgdata:
func(pkgdata)

.. automethod:: readable

.. automethod:: as_path

.. automethod:: cached
"""

def __init__(self, anchor: str | ModuleType):
self._anchor = anchor
self.files = files(anchor)
self.exit_stack = ExitStack()
atexit.register(self.exit_stack.close)
# Allow class to have a different docstring from instances
self.__doc__ = self._doc

@cached_property
def _doc(self):
"""Construct docstring for instances

Lists the public top-level paths inside the location, where
non-public means has a `.` or `_` prefix or is a 'tests'
directory.
"""
top_level = sorted(
os.path.relpath(p, self.files) + '/'[: p.is_dir()]
for p in self.files.iterdir()
if p.name[0] not in ('.', '_') and p.name != 'tests'
)
doclines = [
f'Load package files relative to ``{self._anchor}``.',
'',
'This package contains the following (top-level) files/directories:',
'',
*(f'* ``{path}``' for path in top_level),
]

return '\n'.join(doclines)

def readable(self, *segments) -> Traversable:
"""Provide read access to a resource through a Path-like interface.

This file may or may not exist on the filesystem, and may be
efficiently used for read operations, including directory traversal.

This result is not cached or copied to the filesystem in cases where
that would be necessary.
"""
return self.files.joinpath(*segments)

def as_path(self, *segments) -> AbstractContextManager[Path]:
"""Ensure data is available as a :class:`~pathlib.Path`.

This method generates a context manager that yields a Path when
entered.

This result is not cached, and any temporary files that are created
are deleted when the context is exited.
"""
return as_file(self.files.joinpath(*segments))

@cache # noqa: B019
def cached(self, *segments) -> Path:
"""Ensure data is available as a :class:`~pathlib.Path`.

Any temporary files that are created remain available throughout
the duration of the program, and are deleted when Python exits.

Results are cached so that multiple calls do not unpack the same
data multiple times, but the cache is sensitive to the specific
argument(s) passed.
"""
return self.exit_stack.enter_context(as_file(self.files.joinpath(*segments)))

__call__ = cached

from acres import Loader

load = Loader(__package__)
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ classifiers = [
license = {file = "LICENSE"}
requires-python = ">=3.10"
dependencies = [
"acres",
"looseversion",
"nibabel >= 4.0.1",
"nipype >= 1.8.5",
Expand Down
47 changes: 24 additions & 23 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# This file was autogenerated by uv via the following command:
# uv pip compile --extra=container --strip-extras pyproject.toml -o requirements.txt -P 3.10
acres==0.1.0
annexremote==1.6.5
# via
# datalad
Expand All @@ -18,13 +19,13 @@ bids-validator==1.14.6
# via pybids
bokeh==3.4.1
# via tedana
boto3==1.34.127
boto3==1.34.144
# via datalad
botocore==1.34.127
botocore==1.34.144
# via
# boto3
# s3transfer
certifi==2024.6.2
certifi==2024.7.4
# via
# requests
# sentry-sdk
Expand All @@ -43,7 +44,7 @@ click==8.1.7
# codecarbon
# nipype
# pybids
codecarbon==2.4.2
codecarbon==2.5.0
contourpy==1.2.1
# via
# bokeh
Expand All @@ -52,7 +53,7 @@ cryptography==42.0.8
# via secretstorage
cycler==0.12.1
# via matplotlib
datalad==1.1.0
datalad==1.1.1
# via
# datalad-next
# datalad-osf
Expand All @@ -67,23 +68,23 @@ etelemetry==0.3.1
# via nipype
fasteners==0.19
# via datalad
filelock==3.15.1
filelock==3.15.4
# via nipype
fonttools==4.53.0
fonttools==4.53.1
# via matplotlib
formulaic==0.5.2
# via pybids
greenlet==3.0.3
# via sqlalchemy
h5py==3.11.0
# via nitransforms
humanize==4.9.0
humanize==4.10.0
# via
# datalad
# datalad-next
idna==3.7
# via requests
imageio==2.34.1
imageio==2.34.2
# via scikit-image
indexed-gzip==1.8.7
# via smriprep
Expand Down Expand Up @@ -146,7 +147,7 @@ mapca==0.0.5
# via tedana
markupsafe==2.1.5
# via jinja2
matplotlib==3.8.4
matplotlib==3.9.1
# via
# nireports
# nitime
Expand Down Expand Up @@ -193,7 +194,7 @@ nipype==1.8.6
# sdcflows
# smriprep
nireports==23.2.1
nitime==0.10.2
nitime==0.11
nitransforms==23.0.1
# via
# niworkflows
Expand Down Expand Up @@ -257,9 +258,9 @@ pandas==2.2.2
# pybids
# seaborn
# tedana
patool==2.2.0
patool==2.3.0
# via datalad
pillow==10.3.0
pillow==10.4.0
# via
# bokeh
# imageio
Expand All @@ -271,7 +272,7 @@ prometheus-client==0.20.0
# via codecarbon
prov==2.0.1
# via nipype
psutil==5.9.8
psutil==6.0.0
# via codecarbon
py-cpuinfo==9.0.0
# via codecarbon
Expand All @@ -288,9 +289,9 @@ pybtex-apa-style==1.3
# via tedana
pycparser==2.22
# via cffi
pydot==2.0.0
pydot==3.0.1
# via nipype
pynvml==11.5.0
pynvml==11.5.3
# via codecarbon
pyparsing==3.1.2
# via
Expand All @@ -305,7 +306,7 @@ python-dateutil==2.9.0.post0
# nipype
# pandas
# prov
python-gitlab==4.6.0
python-gitlab==4.8.0
# via datalad
pytz==2024.1
# via
Expand All @@ -318,7 +319,7 @@ pyyaml==6.0.1
# niworkflows
# pybtex
# smriprep
rapidfuzz==3.9.3
rapidfuzz==3.9.4
# via codecarbon
rdflib==6.3.2
# via
Expand All @@ -336,9 +337,9 @@ requests==2.32.3
# templateflow
requests-toolbelt==1.0.0
# via python-gitlab
s3transfer==0.10.1
s3transfer==0.10.2
# via boto3
scikit-image==0.23.2
scikit-image==0.24.0
# via
# niworkflows
# sdcflows
Expand Down Expand Up @@ -368,7 +369,7 @@ seaborn==0.13.2
# niworkflows
secretstorage==3.3.3
# via keyring
sentry-sdk==2.5.1
sentry-sdk==2.10.0
simplejson==3.19.2
# via nipype
six==1.16.0
Expand All @@ -379,7 +380,7 @@ six==1.16.0
# pybtex
# python-dateutil
smriprep==0.15.0
sqlalchemy==2.0.30
sqlalchemy==2.0.31
# via pybids
svgutils==0.3.4
# via
Expand All @@ -396,7 +397,7 @@ threadpoolctl==3.5.0
# via
# scikit-learn
# tedana
tifffile==2024.5.22
tifffile==2024.7.2
# via scikit-image
toml==0.10.2
# via sdcflows
Expand Down