Skip to content

Commit

Permalink
Merge pull request #72 from ArcanaFramework/mtime-caching
Browse files Browse the repository at this point in the history
Add mtime_cached_property for convenient caching of properties until/unless the mtimes of the files change
  • Loading branch information
tclose authored Sep 4, 2024
2 parents a9a3715 + 53e50d3 commit e55ea31
Show file tree
Hide file tree
Showing 12 changed files with 180 additions and 58 deletions.
Binary file modified docs/source/_static/images/logo_small.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
2 changes: 1 addition & 1 deletion extras/fileformats/extras/application/archive.py
Original file line number Diff line number Diff line change
Expand Up @@ -238,4 +238,4 @@ def relative_path(path: PathType, base_dir: PathType) -> str:
f"Cannot add {path} to archive as it is not a "
f"subdirectory of {base_dir}"
)
return relpath # type: ignore[no-any-return]
return relpath
2 changes: 1 addition & 1 deletion extras/fileformats/extras/application/serialization.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ def convert_data_serialization(
output_path = out_dir / (
in_file.fspath.stem + (output_format.ext if output_format.ext else "")
)
return output_format.save_new(output_path, dct) # type: ignore[no-any-return]
return output_format.save_new(output_path, dct)


@extra_implementation(DataSerialization.load)
Expand Down
2 changes: 1 addition & 1 deletion extras/fileformats/extras/image/converters.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,4 +25,4 @@ def convert_image(
output_path = out_dir / (
in_file.fspath.stem + (output_format.ext if output_format.ext else "")
)
return output_format.save_new(output_path, data_array) # type: ignore[no-any-return]
return output_format.save_new(output_path, data_array)
62 changes: 31 additions & 31 deletions fileformats/core/fileset.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
from fileformats.core.typing import Self
from .utils import (
classproperty,
mtime_cached_property,
fspaths_converter,
describe_task,
matching_source,
Expand Down Expand Up @@ -67,7 +68,10 @@ class FileSet(DataType):
a set of file-system paths pointing to all the resources in the file-set
metadata : dict[str, Any]
metadata associated with the file-set, typically lazily loaded via `read_metadata`
extra hook
extra hook but can be provided directly at the time of instantiation
metadata_keys : list[str]
the keys of the metadata to load when the `metadata` property is called. Provided
to allow for selective loading of metadata fields for performance reasons.
"""

# Class attributes
Expand All @@ -87,14 +91,17 @@ class FileSet(DataType):

# Member attributes
fspaths: ty.FrozenSet[Path]
_metadata: ty.Union[ty.Mapping[str, ty.Any], bool, None]
_explicit_metadata: ty.Optional[ty.Mapping[str, ty.Any]]
_metadata_keys: ty.Optional[ty.List[str]]

def __init__(
self,
fspaths: FspathsInputType,
metadata: ty.Union[ty.Dict[str, ty.Any], bool, None] = False,
metadata: ty.Optional[ty.Dict[str, ty.Any]] = None,
metadata_keys: ty.Optional[ty.List[str]] = None,
):
self._metadata = metadata
self._explicit_metadata = metadata
self._metadata_keys = metadata_keys
self._validate_class()
self.fspaths = fspaths_converter(fspaths)
self._validate_fspaths()
Expand Down Expand Up @@ -179,6 +186,18 @@ def relative_fspaths(self) -> ty.Iterator[Path]:
"Paths for all top-level paths in the file-set relative to the common parent directory"
return (p.relative_to(self.parent) for p in self.fspaths)

@property
def mtimes(self) -> ty.Tuple[ty.Tuple[str, float], ...]:
"""Modification times of all fspaths in the file-set
Returns
-------
tuple[tuple[str, float], ...]
a tuple of tuples containing the file paths and the modification time sorted
by the file path
"""
return tuple((str(p), p.stat().st_mtime) for p in sorted(self.fspaths))

@classproperty
def mime_type(cls) -> str:
"""Generates a MIME type (IANA) identifier from a format class. If an official
Expand Down Expand Up @@ -225,41 +244,22 @@ def possible_exts(cls) -> ty.List[ty.Optional[str]]:
pass
return possible

@property
@mtime_cached_property
def metadata(self) -> ty.Mapping[str, ty.Any]:
"""Lazily load metadata from `read_metadata` extra if implemented, returning an
empty metadata array if not"""
if self._metadata is not False:
assert self._metadata is not True
return self._metadata if self._metadata else {}
if self._explicit_metadata is not None:
return self._explicit_metadata
try:
self._metadata = self.read_metadata()
metadata = self.read_metadata(selected_keys=self._metadata_keys)
except FileFormatsExtrasPkgUninstalledError:
raise
except FileFormatsExtrasPkgNotCheckedError as e:
logger.warning(str(e))
self._metadata = None
metadata = {}
except FileFormatsExtrasError:
self._metadata = None
return self._metadata if self._metadata else {}

def select_metadata(
self, selected_keys: ty.Union[ty.Sequence[str], None] = None
) -> None:
"""Selects a subset of the metadata to be read and stored instead all available
(i.e for performance reasons).
Parameters
----------
selected_keys : Union[Sequence[str], None]
the keys of the values to load. If None, all values are loaded
"""
if not (
isinstance(self._metadata, dict)
and selected_keys is not None
and set(selected_keys).issubset(self._metadata)
):
self._metadata = dict(self.read_metadata(selected_keys))
metadata = {}
return metadata

@extra
def read_metadata(
Expand Down Expand Up @@ -672,7 +672,7 @@ def formats_by_iana_mime(cls) -> ty.Dict[str, ty.Type["FileSet"]]:
"""a dictionary containing all formats by their IANA MIME type (if applicable)"""
if cls._formats_by_iana_mime is None:
cls._formats_by_iana_mime = {
f.iana_mime: f # type: ignore
f.iana_mime: f # type: ignore[misc]
for f in FileSet.all_formats
if f.__dict__.get("iana_mime") is not None
}
Expand Down
40 changes: 30 additions & 10 deletions fileformats/core/tests/test_metadata.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import typing as ty
import pytest
import time
from fileformats.core import FileSet, extra_implementation
from fileformats.generic import File

Expand All @@ -21,7 +22,7 @@ def aformat_read_metadata(


@pytest.fixture
def file_with_metadata(tmp_path):
def file_with_metadata_fspath(tmp_path):
metadata = {
"a": 1,
"b": 2,
Expand All @@ -32,27 +33,46 @@ def file_with_metadata(tmp_path):
fspath = tmp_path / "metadata-file.mf"
with open(fspath, "w") as f:
f.write("\n".join("{}:{}".format(*t) for t in metadata.items()))
return FileWithMetadata(fspath)
return fspath


def test_metadata(file_with_metadata):
def test_metadata(file_with_metadata_fspath):
file_with_metadata = FileWithMetadata(file_with_metadata_fspath)
assert file_with_metadata.metadata["a"] == "1"
assert sorted(file_with_metadata.metadata) == ["a", "b", "c", "d", "e"]


def test_select_metadata(file_with_metadata):
file_with_metadata.select_metadata(["a", "b", "c"])
def test_select_metadata(file_with_metadata_fspath):
file_with_metadata = FileWithMetadata(
file_with_metadata_fspath, metadata_keys=["a", "b", "c"]
)
assert file_with_metadata.metadata["a"] == "1"
assert sorted(file_with_metadata.metadata) == ["a", "b", "c"]


def test_select_metadata_reload(file_with_metadata):
file_with_metadata.select_metadata(["a", "b", "c"])
def test_explicit_metadata(file_with_metadata_fspath):
file_with_metadata = FileWithMetadata(
file_with_metadata_fspath,
metadata={
"a": 1,
"b": 2,
"c": 3,
},
)
# Check that we use the explicitly provided metadata and not one from the file
# contents
assert sorted(file_with_metadata.metadata) == ["a", "b", "c"]
# add new metadata line to check that it isn't loaded
# add new metadata line to check and check that it isn't reloaded
with open(file_with_metadata, "a") as f:
f.write("\nf:6")
file_with_metadata.select_metadata(["a", "b"])
assert sorted(file_with_metadata.metadata) == ["a", "b", "c"]
file_with_metadata.select_metadata(None)


def test_metadata_reload(file_with_metadata_fspath):
file_with_metadata = FileWithMetadata(file_with_metadata_fspath)
assert sorted(file_with_metadata.metadata) == ["a", "b", "c", "d", "e"]
# add new metadata line to check and check that it is reloaded
time.sleep(2)
with open(file_with_metadata, "a") as f:
f.write("\nf:6")
assert sorted(file_with_metadata.metadata) == ["a", "b", "c", "d", "e", "f"]
42 changes: 42 additions & 0 deletions fileformats/core/tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from fileformats.generic import File, Directory, FsObject
from fileformats.core.mixin import WithSeparateHeader
from fileformats.core.exceptions import UnsatisfiableCopyModeError
from fileformats.core.utils import mtime_cached_property
from conftest import write_test_file


Expand Down Expand Up @@ -365,3 +366,44 @@ def test_hash_files(fsobject: FsObject, work_dir: Path, dest_dir: Path):
)
cpy = fsobject.copy(dest_dir)
assert cpy.hash_files() == fsobject.hash_files()


class MtimeTestFile(File):

flag: int

@mtime_cached_property
def cached_prop(self):
return self.flag


def test_mtime_cached_property(tmp_path: Path):
fspath = tmp_path / "file_1.txt"
fspath.write_text("hello")

file = MtimeTestFile(fspath)

file.flag = 0
assert file.cached_prop == 0
# Need a long delay to ensure the mtime changes on Ubuntu and particularly on Windows
# On MacOS, the mtime resolution is much higher so not usually an issue. Use
# explicitly cache clearing if needed
time.sleep(2)
file.flag = 1
assert file.cached_prop == 0
time.sleep(2)
fspath.write_text("world")
assert file.cached_prop == 1


def test_mtime_cached_property_force_clear(tmp_path: Path):
fspath = tmp_path / "file_1.txt"
fspath.write_text("hello")

file = MtimeTestFile(fspath)

file.flag = 0
assert file.cached_prop == 0
file.flag = 1
MtimeTestFile.cached_prop.clear(file)
assert file.cached_prop == 1
8 changes: 7 additions & 1 deletion fileformats/core/typing.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,4 +22,10 @@
PathType: TypeAlias = ty.Union[str, Path]


__all__ = ["CryptoMethod", "FspathsInputType", "PathType", "TypeAlias", "Self"]
__all__ = [
"CryptoMethod",
"FspathsInputType",
"PathType",
"TypeAlias",
"Self",
]
68 changes: 60 additions & 8 deletions fileformats/core/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from types import ModuleType
import urllib.request
import urllib.error
from threading import RLock
import os
import logging
import pkgutil
Expand Down Expand Up @@ -97,22 +98,22 @@ def fspaths_converter(fspaths: FspathsInputType) -> ty.FrozenSet[Path]:

PropReturn = ty.TypeVar("PropReturn")


def classproperty(meth: ty.Callable[..., PropReturn]) -> PropReturn:
"""Access a @classmethod like a @property."""
# mypy doesn't understand class properties yet: https://github.com/python/mypy/issues/2563
return classmethod(property(meth)) # type: ignore


if sys.version_info[:2] < (3, 9):

class classproperty(object):
class classproperty(object): # type: ignore[no-redef] # noqa
def __init__(self, f: ty.Callable[[ty.Type[ty.Any]], ty.Any]):
self.f = f

def __get__(self, obj: ty.Any, owner: ty.Any) -> ty.Any:
return self.f(owner)

else:

def classproperty(meth: ty.Callable[..., PropReturn]) -> PropReturn:
"""Access a @classmethod like a @property."""
# mypy doesn't understand class properties yet: https://github.com/python/mypy/issues/2563
return classmethod(property(meth)) # type: ignore


def add_exc_note(e: Exception, note: str) -> Exception:
"""Adds a note to an exception in a Python <3.11 compatible way
Expand Down Expand Up @@ -248,3 +249,54 @@ def import_extras_module(klass: ty.Type["fileformats.core.DataType"]) -> ExtrasM
else:
extras_imported = True
return ExtrasModule(extras_imported, extras_pkg, extras_pypi)


ReturnType = ty.TypeVar("ReturnType")


class mtime_cached_property:
"""A property that is cached until the mtimes of the files in the fileset are changed"""

def __init__(self, func: ty.Callable[..., ty.Any]):
self.func = func
self.__doc__ = func.__doc__
self.lock = RLock()

@property
def _cache_name(self) -> str:
return f"_{self.func.__name__}_mtime_cache"

def clear(self, instance: "fileformats.core.FileSet") -> None:
"""Forcibly clear the cache"""
del instance.__dict__[self._cache_name]

def __get__(
self,
instance: ty.Optional["fileformats.core.FileSet"],
owner: ty.Optional[ty.Type["fileformats.core.FileSet"]] = None,
) -> ty.Any:
if instance is None: # if accessing property from class not instance
return self
assert isinstance(instance, fileformats.core.FileSet), (
"Cannot use mtime_cached_property instance with "
f"{type(instance).__name__!r} object, only FileSet objects."
)
try:
mtimes, value = instance.__dict__[self._cache_name]
except KeyError:
pass
else:
if instance.mtimes == mtimes:
return value
with self.lock:
# check if another thread filled cache while we awaited lock
try:
mtimes, value = instance.__dict__[self._cache_name]
except KeyError:
pass
else:
if instance.mtimes == mtimes:
return value
value = self.func(instance)
instance.__dict__[self._cache_name] = (instance.mtimes, value)
return value
6 changes: 3 additions & 3 deletions fileformats/generic/file.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
FormatMismatchError,
UnconstrainedExtensionException,
)
from fileformats.core.utils import classproperty
from fileformats.core.utils import classproperty, mtime_cached_property
from .fsobject import FsObject


Expand Down Expand Up @@ -79,7 +79,7 @@ def copy_ext(
)
return Path(new_path).with_suffix(suffix)

@property
@mtime_cached_property
def contents(self) -> ty.Union[str, bytes]:
return self.read_contents()

Expand Down Expand Up @@ -125,7 +125,7 @@ def actual_ext(self) -> str:
"(i.e. matches the None extension)"
)
# Return the longest matching extension, useful for optional extensions
return sorted(matching, key=len)[-1] # type: ignore[no-any-return]
return sorted(matching, key=len)[-1]

@property
def stem(self) -> str:
Expand Down
Loading

0 comments on commit e55ea31

Please sign in to comment.