From 8c157741d65aa99ffda508767668cff4f4b09ff3 Mon Sep 17 00:00:00 2001 From: David Hoese Date: Sat, 11 Nov 2023 20:09:06 -0600 Subject: [PATCH 01/10] Add cache directory and cache geometry slices configuration options --- docs/source/howtos/configuration.rst | 50 ++++++++++++++++++++++++++++ pyresample/_config.py | 2 ++ versioneer.py | 2 +- 3 files changed, 53 insertions(+), 1 deletion(-) diff --git a/docs/source/howtos/configuration.rst b/docs/source/howtos/configuration.rst index b7aa6e3f1..28e075f85 100644 --- a/docs/source/howtos/configuration.rst +++ b/docs/source/howtos/configuration.rst @@ -76,6 +76,56 @@ Or for specific blocks of code: Similarly, if you need to access one of the values you can use the ``pyresample.config.get`` method. +Cache Directory +^^^^^^^^^^^^^^^ + +* **Environment variable**: ``PYRESAMPLE_CACHE_DIR`` +* **YAML/Config Key**: ``cache_dir`` +* **Default**: See below + +Directory where any files cached by Pyresample will be stored. This +directory is not necessarily cleared out by Pyresample, but is rarely used +without explicitly being enabled by the user. This +defaults to a different path depending on your operating system following +the `platformdirs `_ +"user cache dir". + +.. note:: + + Some resampling algorithms provide caching functionality when the user + provides a directory to cache to. These resamplers do not currently use this + configuration option. + +.. _config_cache_sensor_angles_setting: + +Cache Geometry Slices +^^^^^^^^^^^^^^^^^^^^^ + +* **Environment variable**: ``PYRESAMPLE_CACHE_GEOM_SLICES`` +* **YAML/Config Key**: ``cache_geom_slices`` +* **Default**: ``False`` + +Whether or not generated slices for geometry objects are cached to disk. +These slices are used in various parts of Pyresample like +cropping or overlap calculations including those performed in some resampling +algorithms. At the time of writing this is only performed on +``AreaDefinition`` objects through their +:meth:`~pyresample.geometry.AreaDefinition.get_area_slices` method. +Slices are stored in ``cache_dir`` (see above). +Unlike other caching performed in Pyresample where potentially large arrays +are cached, this option saves a pair of ``slice`` objects that consist of +only 3 integers each. This makes the amount of space used in the cache very +small for many cached results. + +When setting this as an environment variable, this should be set with the +string equivalent of the Python boolean values ``="True"`` or ``="False"``. + +.. warning:: + + This caching does not limit the number of entries nor does it expire old + entries. It is up to the user to manage the contents of the cache + directory. + Feature Flags ------------- diff --git a/pyresample/_config.py b/pyresample/_config.py index ede6b33db..f7297a243 100644 --- a/pyresample/_config.py +++ b/pyresample/_config.py @@ -36,6 +36,8 @@ config = Config( "pyresample", defaults=[{ + "cache_dir": platformdirs.user_cache_dir("pyresample", "pytroll"), + "cache_geom_slices": False, "features": { "future_geometries": False, }, diff --git a/versioneer.py b/versioneer.py index 8777802e2..05702ccf2 100644 --- a/versioneer.py +++ b/versioneer.py @@ -528,7 +528,7 @@ def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False, if verbose: print("unable to find command, tried %%s" %% (commands,)) return None, None - stdout = process.communicate()[0].strip().decode() + stdout = process.communicate()[0].strip()._object_hook() if process.returncode != 0: if verbose: print("unable to run %%s (error)" %% dispcmd) From 6fd85a1e044720aacdbe0010e2538f868adb6c00 Mon Sep 17 00:00:00 2001 From: David Hoese Date: Sun, 12 Nov 2023 06:26:00 -0600 Subject: [PATCH 02/10] Add initial proof of concept area slice caching --- pyresample/_caching.py | 70 +++++++++++ pyresample/geometry.py | 135 +++++++++++---------- pyresample/test/conftest.py | 1 + pyresample/test/test_geometry/test_area.py | 20 +++ 4 files changed, 164 insertions(+), 62 deletions(-) create mode 100644 pyresample/_caching.py diff --git a/pyresample/_caching.py b/pyresample/_caching.py new file mode 100644 index 000000000..65e37cfc2 --- /dev/null +++ b/pyresample/_caching.py @@ -0,0 +1,70 @@ +"""Various tools for caching. + +These tools are rarely needed by users and are used where they make sense +throughout pyresample. + +""" + +import functools +import hashlib +import json +from pathlib import Path +from typing import Any, Callable + +import pyresample + + +class JSONCache: + """Decorator class to cache results to a JSON file on-disk.""" + + def __init__(self, *args, **kwargs): + self._callable = None + if len(args) == 1 and not kwargs: + self._callable = args[0] + + def __call__(self, *args, **kwargs): + """Call decorated function and cache the result to JSON.""" + is_decorated = len(args) == 1 and isinstance(args[0], Callable) + if is_decorated: + self._callable = args[0] + + @functools.wraps(self._callable) + def _func(*args, **kwargs): + if not pyresample.config.get("cache_geom_slices", False): + return self._callable(*args, **kwargs) + + # TODO: kwargs + existing_hash = hashlib.sha1() + # hashable_args = [hash(arg) if isinstance(arg, AreaDefinition) else arg for arg in args] + hashable_args = [hash(arg) if arg.__class__.__name__ == "AreaDefinition" else arg for arg in args] + existing_hash.update(json.dumps(tuple(hashable_args)).encode("utf8")) + arg_hash = existing_hash.hexdigest() + print(arg_hash) + base_cache_dir = Path(pyresample.config.get("cache_dir")) / "geometry_slices" + json_path = base_cache_dir / f"{arg_hash}.json" + if not json_path.is_file(): + res = self._callable(*args, **kwargs) + json_path.parent.mkdir(exist_ok=True) + with open(json_path, "w") as json_cache: + json.dump(res, json_cache, cls=_ExtraJSONEncoder) + else: + with open(json_path, "r") as json_cache: + res = json.load(json_cache, object_hook=_object_hook) + return res + + if is_decorated: + return _func + return _func(*args, **kwargs) + + +class _ExtraJSONEncoder(json.JSONEncoder): + def default(self, obj: Any) -> Any: + if isinstance(obj, slice): + return {"__slice__": True, "start": obj.start, "stop": obj.stop, "step": obj.step} + return super().default(obj) + + +def _object_hook(obj: object) -> Any: + if isinstance(obj, dict) and obj.get("__slice__", False): + return slice(obj["start"], obj["stop"], obj["step"]) + return obj diff --git a/pyresample/geometry.py b/pyresample/geometry.py index 7586a06d8..9ef0d9473 100644 --- a/pyresample/geometry.py +++ b/pyresample/geometry.py @@ -34,6 +34,7 @@ from pyproj.aoi import AreaOfUse from pyresample import CHUNK_SIZE +from pyresample._caching import JSONCache from pyresample._spatial_mp import Cartesian, Cartesian_MP, Proj_MP from pyresample.area_config import create_area_def from pyresample.boundary import Boundary, SimpleBoundary @@ -2576,70 +2577,9 @@ def proj4_string(self): "instead.", DeprecationWarning, stacklevel=2) return proj4_dict_to_str(self.proj_dict) - def _get_slice_starts_stops(self, area_to_cover): - """Get x and y start and stop points for slicing.""" - llx, lly, urx, ury = area_to_cover.area_extent - x, y = self.get_array_coordinates_from_projection_coordinates([llx, urx], [lly, ury]) - - # we use `round` because we want the *exterior* of the pixels to contain the area_to_cover's area extent. - if (self.area_extent[0] > self.area_extent[2]) ^ (llx > urx): - xstart = max(0, round(x[1])) - xstop = min(self.width, round(x[0]) + 1) - else: - xstart = max(0, round(x[0])) - xstop = min(self.width, round(x[1]) + 1) - if (self.area_extent[1] > self.area_extent[3]) ^ (lly > ury): - ystart = max(0, round(y[0])) - ystop = min(self.height, round(y[1]) + 1) - else: - ystart = max(0, round(y[1])) - ystop = min(self.height, round(y[0]) + 1) - - return xstart, xstop, ystart, ystop - def get_area_slices(self, area_to_cover, shape_divisible_by=None): """Compute the slice to read based on an `area_to_cover`.""" - if not isinstance(area_to_cover, AreaDefinition): - raise NotImplementedError('Only AreaDefinitions can be used') - - # Intersection only required for two different projections - proj_def_to_cover = area_to_cover.crs - proj_def = self.crs - if proj_def_to_cover == proj_def: - logger.debug('Projections for data and slice areas are' - ' identical: %s', - proj_def_to_cover) - # Get slice parameters - xstart, xstop, ystart, ystop = self._get_slice_starts_stops(area_to_cover) - - x_slice = check_slice_orientation(slice(xstart, xstop)) - y_slice = check_slice_orientation(slice(ystart, ystop)) - x_slice = _ensure_integer_slice(x_slice) - y_slice = _ensure_integer_slice(y_slice) - return x_slice, y_slice - - data_boundary = _get_area_boundary(self) - area_boundary = _get_area_boundary(area_to_cover) - intersection = data_boundary.contour_poly.intersection( - area_boundary.contour_poly) - if intersection is None: - logger.debug('Cannot determine appropriate slicing. ' - "Data and projection area do not overlap.") - raise NotImplementedError - x, y = self.get_array_indices_from_lonlat( - np.rad2deg(intersection.lon), np.rad2deg(intersection.lat)) - x_slice = slice(np.ma.min(x), np.ma.max(x) + 1) - y_slice = slice(np.ma.min(y), np.ma.max(y) + 1) - x_slice = _ensure_integer_slice(x_slice) - y_slice = _ensure_integer_slice(y_slice) - if shape_divisible_by is not None: - x_slice = _make_slice_divisible(x_slice, self.width, - factor=shape_divisible_by) - y_slice = _make_slice_divisible(y_slice, self.height, - factor=shape_divisible_by) - - return (check_slice_orientation(x_slice), - check_slice_orientation(y_slice)) + return get_area_slices(self, area_to_cover, shape_divisible_by) def crop_around(self, other_area): """Crop this area around `other_area`.""" @@ -2740,6 +2680,77 @@ def geocentric_resolution(self, ellps='WGS84', radius=None): return res +@JSONCache() +def get_area_slices( + src_area: AreaDefinition, + area_to_cover: AreaDefinition, + shape_divisible_by: int | None = None +) -> tuple[slice, slice]: + """Compute the slice to read based on an `area_to_cover`.""" + if not isinstance(area_to_cover, AreaDefinition): + raise NotImplementedError('Only AreaDefinitions can be used') + + # Intersection only required for two different projections + proj_def_to_cover = area_to_cover.crs + proj_def = src_area.crs + if proj_def_to_cover == proj_def: + logger.debug('Projections for data and slice areas are identical: %s', + proj_def_to_cover) + # Get slice parameters + xstart, xstop, ystart, ystop = _get_slice_starts_stops(src_area, area_to_cover) + + x_slice = check_slice_orientation(slice(xstart, xstop)) + y_slice = check_slice_orientation(slice(ystart, ystop)) + x_slice = _ensure_integer_slice(x_slice) + y_slice = _ensure_integer_slice(y_slice) + return x_slice, y_slice + + data_boundary = _get_area_boundary(src_area) + area_boundary = _get_area_boundary(area_to_cover) + intersection = data_boundary.contour_poly.intersection( + area_boundary.contour_poly) + if intersection is None: + logger.debug('Cannot determine appropriate slicing. ' + "Data and projection area do not overlap.") + raise NotImplementedError + x, y = src_area.get_array_indices_from_lonlat( + np.rad2deg(intersection.lon), np.rad2deg(intersection.lat)) + x_slice = slice(np.ma.min(x), np.ma.max(x) + 1) + y_slice = slice(np.ma.min(y), np.ma.max(y) + 1) + x_slice = _ensure_integer_slice(x_slice) + y_slice = _ensure_integer_slice(y_slice) + if shape_divisible_by is not None: + x_slice = _make_slice_divisible(x_slice, src_area.width, + factor=shape_divisible_by) + y_slice = _make_slice_divisible(y_slice, src_area.height, + factor=shape_divisible_by) + + return (check_slice_orientation(x_slice), + check_slice_orientation(y_slice)) + + +def _get_slice_starts_stops(src_area, area_to_cover): + """Get x and y start and stop points for slicing.""" + llx, lly, urx, ury = area_to_cover.area_extent + x, y = src_area.get_array_coordinates_from_projection_coordinates([llx, urx], [lly, ury]) + + # we use `round` because we want the *exterior* of the pixels to contain the area_to_cover's area extent. + if (src_area.area_extent[0] > src_area.area_extent[2]) ^ (llx > urx): + xstart = max(0, round(x[1])) + xstop = min(src_area.width, round(x[0]) + 1) + else: + xstart = max(0, round(x[0])) + xstop = min(src_area.width, round(x[1]) + 1) + if (src_area.area_extent[1] > src_area.area_extent[3]) ^ (lly > ury): + ystart = max(0, round(y[0])) + ystop = min(src_area.height, round(y[1]) + 1) + else: + ystart = max(0, round(y[1])) + ystop = min(src_area.height, round(y[0]) + 1) + + return xstart, xstop, ystart, ystop + + def _get_area_boundary(area_to_cover: AreaDefinition) -> Boundary: try: if area_to_cover.is_geostationary: diff --git a/pyresample/test/conftest.py b/pyresample/test/conftest.py index 992fa5a02..e69fcb5b9 100644 --- a/pyresample/test/conftest.py +++ b/pyresample/test/conftest.py @@ -42,6 +42,7 @@ def reset_pyresample_config(tmpdir): """Set pyresample config to logical defaults for tests.""" test_config = { + "cache_geom_slices": False, "features": { "future_geometries": False, }, diff --git a/pyresample/test/test_geometry/test_area.py b/pyresample/test/test_geometry/test_area.py index 4ed0b283c..6d298b561 100644 --- a/pyresample/test/test_geometry/test_area.py +++ b/pyresample/test/test_geometry/test_area.py @@ -15,6 +15,7 @@ """Test AreaDefinition objects.""" import io import sys +from glob import glob from unittest.mock import MagicMock, patch import dask.array as da @@ -1840,6 +1841,25 @@ def test_area_to_cover_all_nan_bounds(self, geos_src_area, create_test_area): with pytest.raises(NotImplementedError): area_def.get_area_slices(area_to_cover) + def test_area_slices_caching(self, create_test_area, tmp_path): + """Check that area slices can be cached.""" + src_area = create_test_area(dict(proj="utm", zone=33), + 10980, 10980, + (499980.0, 6490200.0, 609780.0, 6600000.0)) + crop_area = create_test_area({'proj': 'latlong'}, + 100, 100, + (15.9689, 58.5284, 16.4346, 58.6995)) + with pyresample.config.set(cache_dir=tmp_path, cache_geom_slices=False): + assert len(glob(str(tmp_path / "geometry_slices" / "*.json"))) == 0 + slice_x, slice_y = src_area.get_area_slices(crop_area) + assert len(glob(str(tmp_path / "geometry_slices" / "*.json"))) == 0 + with pyresample.config.set(cache_dir=tmp_path, cache_geom_slices=True): + assert len(glob(str(tmp_path / "geometry_slices" / "*.json"))) == 0 + slice_x, slice_y = src_area.get_area_slices(crop_area) + assert len(glob(str(tmp_path / "geometry_slices" / "*.json"))) == 1 + assert slice_x == slice(5630, 8339) + assert slice_y == slice(9261, 10980) + class TestBoundary: """Test 'boundary' method for AreaDefinition classes.""" From 94ca7a8c0c55600385f3c29df84b34a9e06e9f01 Mon Sep 17 00:00:00 2001 From: David Hoese Date: Sun, 12 Nov 2023 21:25:51 -0600 Subject: [PATCH 03/10] Refactor JSON caching to resemble satpy caching decorators --- pyresample/_caching.py | 106 +++++++++++++-------- pyresample/geometry.py | 4 +- pyresample/test/test_geometry/test_area.py | 8 +- 3 files changed, 74 insertions(+), 44 deletions(-) diff --git a/pyresample/_caching.py b/pyresample/_caching.py index 65e37cfc2..137e50a2b 100644 --- a/pyresample/_caching.py +++ b/pyresample/_caching.py @@ -4,57 +4,69 @@ throughout pyresample. """ - -import functools import hashlib import json +import shutil +from functools import update_wrapper +from glob import glob from pathlib import Path from typing import Any, Callable import pyresample -class JSONCache: +class JSONCacheHelper: """Decorator class to cache results to a JSON file on-disk.""" - def __init__(self, *args, **kwargs): - self._callable = None - if len(args) == 1 and not kwargs: - self._callable = args[0] + def __init__( + self, + func: Callable, + cache_config_key: str, + cache_version: int = 1, + ): + self._callable = func + self._cache_config_key = cache_config_key + self._cache_version = cache_version + + def cache_clear(self, cache_dir: str | None = None): + """Remove all on-disk files associated with this function. + + Intended to mimic the :func:`functools.cache` behavior. + """ + cache_dir = self._get_cache_dir_from_config(cache_dir=cache_dir, cache_version="*") + for zarr_dir in glob(str(cache_dir / "*.json")): + shutil.rmtree(zarr_dir, ignore_errors=True) def __call__(self, *args, **kwargs): """Call decorated function and cache the result to JSON.""" - is_decorated = len(args) == 1 and isinstance(args[0], Callable) - if is_decorated: - self._callable = args[0] - - @functools.wraps(self._callable) - def _func(*args, **kwargs): - if not pyresample.config.get("cache_geom_slices", False): - return self._callable(*args, **kwargs) - - # TODO: kwargs - existing_hash = hashlib.sha1() - # hashable_args = [hash(arg) if isinstance(arg, AreaDefinition) else arg for arg in args] - hashable_args = [hash(arg) if arg.__class__.__name__ == "AreaDefinition" else arg for arg in args] - existing_hash.update(json.dumps(tuple(hashable_args)).encode("utf8")) - arg_hash = existing_hash.hexdigest() - print(arg_hash) - base_cache_dir = Path(pyresample.config.get("cache_dir")) / "geometry_slices" - json_path = base_cache_dir / f"{arg_hash}.json" - if not json_path.is_file(): - res = self._callable(*args, **kwargs) - json_path.parent.mkdir(exist_ok=True) - with open(json_path, "w") as json_cache: - json.dump(res, json_cache, cls=_ExtraJSONEncoder) - else: - with open(json_path, "r") as json_cache: - res = json.load(json_cache, object_hook=_object_hook) - return res - - if is_decorated: - return _func - return _func(*args, **kwargs) + if not pyresample.config.get(self._cache_config_key, False): + return self._callable(*args, **kwargs) + + existing_hash = hashlib.sha1() + # TODO: exclude SwathDefinition for hashing reasons + hashable_args = [hash(arg) if arg.__class__.__name__ in ("AreaDefinition",) else arg for arg in args] + hashable_args += sorted(kwargs.items()) + existing_hash.update(json.dumps(tuple(hashable_args)).encode("utf8")) + arg_hash = existing_hash.hexdigest() + base_cache_dir = self._get_cache_dir_from_config(cache_version=self._cache_version) + json_path = base_cache_dir / f"{arg_hash}.json" + if not json_path.is_file(): + res = self._callable(*args, **kwargs) + json_path.parent.mkdir(exist_ok=True) + with open(json_path, "w") as json_cache: + json.dump(res, json_cache, cls=_ExtraJSONEncoder) + else: + with open(json_path, "r") as json_cache: + res = json.load(json_cache, object_hook=_object_hook) + return res + + @staticmethod + def _get_cache_dir_from_config(cache_dir: str | None = None, cache_version: int | str = 1) -> Path: + cache_dir = cache_dir or pyresample.config.get("cache_dir") + if cache_dir is None: + raise RuntimeError("Can't use JSON caching. No 'cache_dir' configured.") + subdir = f"geometry_slices_v{cache_version}" + return Path(cache_dir) / subdir class _ExtraJSONEncoder(json.JSONEncoder): @@ -68,3 +80,21 @@ def _object_hook(obj: object) -> Any: if isinstance(obj, dict) and obj.get("__slice__", False): return slice(obj["start"], obj["stop"], obj["step"]) return obj + + +def cache_to_json_if(cache_config_key: str) -> Callable: + """Decorate a function and cache the results to a JSON file on disk. + + This caching only happens if the ``pyresample.config`` boolean value for + the provided key is ``True`` as well as some other conditions. See + :class:`JSONCacheHelper` for more information. Most importantly this + decorator does not limit how many items can be cached and does not clear + out old entries. It is up to the user to manage the size of the cache. + + """ + def _decorator(func: Callable) -> Callable: + zarr_cacher = JSONCacheHelper(func, cache_config_key) + wrapper = update_wrapper(zarr_cacher, func) + return wrapper + + return _decorator diff --git a/pyresample/geometry.py b/pyresample/geometry.py index 9ef0d9473..23b407561 100644 --- a/pyresample/geometry.py +++ b/pyresample/geometry.py @@ -34,7 +34,7 @@ from pyproj.aoi import AreaOfUse from pyresample import CHUNK_SIZE -from pyresample._caching import JSONCache +from pyresample._caching import cache_to_json_if from pyresample._spatial_mp import Cartesian, Cartesian_MP, Proj_MP from pyresample.area_config import create_area_def from pyresample.boundary import Boundary, SimpleBoundary @@ -2680,7 +2680,7 @@ def geocentric_resolution(self, ellps='WGS84', radius=None): return res -@JSONCache() +@cache_to_json_if("cache_geom_slices") def get_area_slices( src_area: AreaDefinition, area_to_cover: AreaDefinition, diff --git a/pyresample/test/test_geometry/test_area.py b/pyresample/test/test_geometry/test_area.py index 6d298b561..c8b7f3bca 100644 --- a/pyresample/test/test_geometry/test_area.py +++ b/pyresample/test/test_geometry/test_area.py @@ -1850,13 +1850,13 @@ def test_area_slices_caching(self, create_test_area, tmp_path): 100, 100, (15.9689, 58.5284, 16.4346, 58.6995)) with pyresample.config.set(cache_dir=tmp_path, cache_geom_slices=False): - assert len(glob(str(tmp_path / "geometry_slices" / "*.json"))) == 0 + assert len(glob(str(tmp_path / "geometry_slices_v1" / "*.json"))) == 0 slice_x, slice_y = src_area.get_area_slices(crop_area) - assert len(glob(str(tmp_path / "geometry_slices" / "*.json"))) == 0 + assert len(glob(str(tmp_path / "geometry_slices_v1" / "*.json"))) == 0 with pyresample.config.set(cache_dir=tmp_path, cache_geom_slices=True): - assert len(glob(str(tmp_path / "geometry_slices" / "*.json"))) == 0 + assert len(glob(str(tmp_path / "geometry_slices_v1" / "*.json"))) == 0 slice_x, slice_y = src_area.get_area_slices(crop_area) - assert len(glob(str(tmp_path / "geometry_slices" / "*.json"))) == 1 + assert len(glob(str(tmp_path / "geometry_slices_v1" / "*.json"))) == 1 assert slice_x == slice(5630, 8339) assert slice_y == slice(9261, 10980) From 2c805433fd119600bc3ba10e3aca8311d0329253 Mon Sep 17 00:00:00 2001 From: David Hoese Date: Wed, 15 Nov 2023 21:21:11 -0600 Subject: [PATCH 04/10] Drop musllinux builds (pyproj isn't providing them) --- pyresample/_caching.py | 80 +++++++++----- pyresample/future/geometry/_subset.py | 123 +++++++++++++++++++++ pyresample/geometry.py | 113 +------------------ pyresample/test/test_geometry/test_area.py | 14 ++- 4 files changed, 189 insertions(+), 141 deletions(-) create mode 100644 pyresample/future/geometry/_subset.py diff --git a/pyresample/_caching.py b/pyresample/_caching.py index 137e50a2b..80e5eb932 100644 --- a/pyresample/_caching.py +++ b/pyresample/_caching.py @@ -6,7 +6,8 @@ """ import hashlib import json -import shutil +import os +import warnings from functools import update_wrapper from glob import glob from pathlib import Path @@ -27,46 +28,71 @@ def __init__( self._callable = func self._cache_config_key = cache_config_key self._cache_version = cache_version + self._uncacheable_arg_type_names = ("",) - def cache_clear(self, cache_dir: str | None = None): + @staticmethod + def cache_clear(cache_dir: str | None = None): """Remove all on-disk files associated with this function. Intended to mimic the :func:`functools.cache` behavior. """ - cache_dir = self._get_cache_dir_from_config(cache_dir=cache_dir, cache_version="*") - for zarr_dir in glob(str(cache_dir / "*.json")): - shutil.rmtree(zarr_dir, ignore_errors=True) + cache_dir = _get_cache_dir_from_config(cache_dir=cache_dir, cache_version="*") + for json_file in glob(str(cache_dir / "*.json")): + os.remove(json_file) - def __call__(self, *args, **kwargs): + def __call__(self, *args): """Call decorated function and cache the result to JSON.""" - if not pyresample.config.get(self._cache_config_key, False): - return self._callable(*args, **kwargs) - - existing_hash = hashlib.sha1() - # TODO: exclude SwathDefinition for hashing reasons - hashable_args = [hash(arg) if arg.__class__.__name__ in ("AreaDefinition",) else arg for arg in args] - hashable_args += sorted(kwargs.items()) - existing_hash.update(json.dumps(tuple(hashable_args)).encode("utf8")) - arg_hash = existing_hash.hexdigest() - base_cache_dir = self._get_cache_dir_from_config(cache_version=self._cache_version) + should_cache = pyresample.config.get(self._cache_config_key, False) + if not should_cache: + return self._callable(*args) + + try: + arg_hash = _hash_args(args) + except TypeError as err: + warnings.warn("Cannot cache function due to unhashable argument: " + str(err), + stacklevel=2) + return self._callable(*args) + + return self._run_and_cache(arg_hash, args) + + def _run_and_cache(self, arg_hash: str, args: tuple[Any]) -> Any: + base_cache_dir = _get_cache_dir_from_config(cache_version=self._cache_version) json_path = base_cache_dir / f"{arg_hash}.json" if not json_path.is_file(): - res = self._callable(*args, **kwargs) + res = self._callable(*args) json_path.parent.mkdir(exist_ok=True) with open(json_path, "w") as json_cache: json.dump(res, json_cache, cls=_ExtraJSONEncoder) - else: - with open(json_path, "r") as json_cache: - res = json.load(json_cache, object_hook=_object_hook) + + # for consistency, always load the cached result + with open(json_path, "r") as json_cache: + res = json.load(json_cache, object_hook=_object_hook) return res - @staticmethod - def _get_cache_dir_from_config(cache_dir: str | None = None, cache_version: int | str = 1) -> Path: - cache_dir = cache_dir or pyresample.config.get("cache_dir") - if cache_dir is None: - raise RuntimeError("Can't use JSON caching. No 'cache_dir' configured.") - subdir = f"geometry_slices_v{cache_version}" - return Path(cache_dir) / subdir + +def _get_cache_dir_from_config(cache_dir: str | None = None, cache_version: int | str = 1) -> Path: + cache_dir = cache_dir or pyresample.config.get("cache_dir") + if cache_dir is None: + raise RuntimeError("Can't use JSON caching. No 'cache_dir' configured.") + subdir = f"geometry_slices_v{cache_version}" + return Path(cache_dir) / subdir + + +def _hash_args(args: tuple[Any]) -> str: + from pyresample.future.geometry import AreaDefinition, SwathDefinition + from pyresample.geometry import AreaDefinition as LegacyAreaDefinition + from pyresample.geometry import SwathDefinition as LegacySwathDefinition + + hashable_args = [] + for arg in args: + if isinstance(arg, (SwathDefinition, LegacySwathDefinition)): + raise TypeError(f"Unhashable type ({type(arg)})") + if isinstance(arg, (AreaDefinition, LegacyAreaDefinition)): + arg = hash(arg) + hashable_args.append(arg) + arg_hash = hashlib.sha1() # nosec + arg_hash.update(json.dumps(tuple(hashable_args)).encode("utf8")) + return arg_hash.hexdigest() class _ExtraJSONEncoder(json.JSONEncoder): diff --git a/pyresample/future/geometry/_subset.py b/pyresample/future/geometry/_subset.py new file mode 100644 index 000000000..3eb81d5dc --- /dev/null +++ b/pyresample/future/geometry/_subset.py @@ -0,0 +1,123 @@ +"""Functions and tools for subsetting a geometry object.""" +from __future__ import annotations + +import math + +import numpy as np + +from pyresample import AreaDefinition + +# this caching module imports the geometries so this subset module +# must be imported inside functions in the geometry modules if needed +# to avoid circular dependencies +from pyresample._caching import cache_to_json_if +from pyresample.boundary import Boundary +from pyresample.geometry import get_geostationary_bounding_box_in_lonlats, logger +from pyresample.utils import check_slice_orientation + + +@cache_to_json_if("cache_geom_slices") +def get_area_slices( + src_area: AreaDefinition, + area_to_cover: AreaDefinition, + shape_divisible_by: int | None, +) -> tuple[slice, slice]: + """Compute the slice to read based on an `area_to_cover`.""" + if not isinstance(area_to_cover, AreaDefinition): + raise NotImplementedError('Only AreaDefinitions can be used') + + # Intersection only required for two different projections + proj_def_to_cover = area_to_cover.crs + proj_def = src_area.crs + if proj_def_to_cover == proj_def: + logger.debug('Projections for data and slice areas are identical: %s', + proj_def_to_cover) + # Get slice parameters + xstart, xstop, ystart, ystop = _get_slice_starts_stops(src_area, area_to_cover) + + x_slice = check_slice_orientation(slice(xstart, xstop)) + y_slice = check_slice_orientation(slice(ystart, ystop)) + x_slice = _ensure_integer_slice(x_slice) + y_slice = _ensure_integer_slice(y_slice) + return x_slice, y_slice + + data_boundary = _get_area_boundary(src_area) + area_boundary = _get_area_boundary(area_to_cover) + intersection = data_boundary.contour_poly.intersection( + area_boundary.contour_poly) + if intersection is None: + logger.debug('Cannot determine appropriate slicing. ' + "Data and projection area do not overlap.") + raise NotImplementedError + x, y = src_area.get_array_indices_from_lonlat( + np.rad2deg(intersection.lon), np.rad2deg(intersection.lat)) + x_slice = slice(np.ma.min(x), np.ma.max(x) + 1) + y_slice = slice(np.ma.min(y), np.ma.max(y) + 1) + x_slice = _ensure_integer_slice(x_slice) + y_slice = _ensure_integer_slice(y_slice) + if shape_divisible_by is not None: + x_slice = _make_slice_divisible(x_slice, src_area.width, + factor=shape_divisible_by) + y_slice = _make_slice_divisible(y_slice, src_area.height, + factor=shape_divisible_by) + + return (check_slice_orientation(x_slice), + check_slice_orientation(y_slice)) + + +def _get_slice_starts_stops(src_area, area_to_cover): + """Get x and y start and stop points for slicing.""" + llx, lly, urx, ury = area_to_cover.area_extent + x, y = src_area.get_array_coordinates_from_projection_coordinates([llx, urx], [lly, ury]) + + # we use `round` because we want the *exterior* of the pixels to contain the area_to_cover's area extent. + if (src_area.area_extent[0] > src_area.area_extent[2]) ^ (llx > urx): + xstart = max(0, round(x[1])) + xstop = min(src_area.width, round(x[0]) + 1) + else: + xstart = max(0, round(x[0])) + xstop = min(src_area.width, round(x[1]) + 1) + if (src_area.area_extent[1] > src_area.area_extent[3]) ^ (lly > ury): + ystart = max(0, round(y[0])) + ystop = min(src_area.height, round(y[1]) + 1) + else: + ystart = max(0, round(y[1])) + ystop = min(src_area.height, round(y[0]) + 1) + + return xstart, xstop, ystart, ystop + + +def _get_area_boundary(area_to_cover: AreaDefinition) -> Boundary: + try: + if area_to_cover.is_geostationary: + return Boundary(*get_geostationary_bounding_box_in_lonlats(area_to_cover)) + boundary_shape = max(max(*area_to_cover.shape) // 100 + 1, 3) + return area_to_cover.boundary(frequency=boundary_shape, force_clockwise=True) + except ValueError: + raise NotImplementedError("Can't determine boundary of area to cover") + + +def _make_slice_divisible(sli, max_size, factor=2): + """Make the given slice even in size.""" + rem = (sli.stop - sli.start) % factor + if rem != 0: + adj = factor - rem + if sli.stop + 1 + rem < max_size: + sli = slice(sli.start, sli.stop + adj) + elif sli.start > 0: + sli = slice(sli.start - adj, sli.stop) + else: + sli = slice(sli.start, sli.stop - rem) + + return sli + + +def _ensure_integer_slice(sli): + start = sli.start + stop = sli.stop + step = sli.step + return slice( + math.floor(start) if start is not None else None, + math.ceil(stop) if stop is not None else None, + math.floor(step) if step is not None else None + ) diff --git a/pyresample/geometry.py b/pyresample/geometry.py index 23b407561..1ecf1d1d1 100644 --- a/pyresample/geometry.py +++ b/pyresample/geometry.py @@ -34,11 +34,10 @@ from pyproj.aoi import AreaOfUse from pyresample import CHUNK_SIZE -from pyresample._caching import cache_to_json_if from pyresample._spatial_mp import Cartesian, Cartesian_MP, Proj_MP from pyresample.area_config import create_area_def -from pyresample.boundary import Boundary, SimpleBoundary -from pyresample.utils import check_slice_orientation, load_cf_area +from pyresample.boundary import SimpleBoundary +from pyresample.utils import load_cf_area from pyresample.utils.proj4 import ( get_geodetic_crs_with_no_datum_shift, get_geostationary_height, @@ -2579,6 +2578,7 @@ def proj4_string(self): def get_area_slices(self, area_to_cover, shape_divisible_by=None): """Compute the slice to read based on an `area_to_cover`.""" + from .future.geometry._subset import get_area_slices return get_area_slices(self, area_to_cover, shape_divisible_by) def crop_around(self, other_area): @@ -2680,113 +2680,6 @@ def geocentric_resolution(self, ellps='WGS84', radius=None): return res -@cache_to_json_if("cache_geom_slices") -def get_area_slices( - src_area: AreaDefinition, - area_to_cover: AreaDefinition, - shape_divisible_by: int | None = None -) -> tuple[slice, slice]: - """Compute the slice to read based on an `area_to_cover`.""" - if not isinstance(area_to_cover, AreaDefinition): - raise NotImplementedError('Only AreaDefinitions can be used') - - # Intersection only required for two different projections - proj_def_to_cover = area_to_cover.crs - proj_def = src_area.crs - if proj_def_to_cover == proj_def: - logger.debug('Projections for data and slice areas are identical: %s', - proj_def_to_cover) - # Get slice parameters - xstart, xstop, ystart, ystop = _get_slice_starts_stops(src_area, area_to_cover) - - x_slice = check_slice_orientation(slice(xstart, xstop)) - y_slice = check_slice_orientation(slice(ystart, ystop)) - x_slice = _ensure_integer_slice(x_slice) - y_slice = _ensure_integer_slice(y_slice) - return x_slice, y_slice - - data_boundary = _get_area_boundary(src_area) - area_boundary = _get_area_boundary(area_to_cover) - intersection = data_boundary.contour_poly.intersection( - area_boundary.contour_poly) - if intersection is None: - logger.debug('Cannot determine appropriate slicing. ' - "Data and projection area do not overlap.") - raise NotImplementedError - x, y = src_area.get_array_indices_from_lonlat( - np.rad2deg(intersection.lon), np.rad2deg(intersection.lat)) - x_slice = slice(np.ma.min(x), np.ma.max(x) + 1) - y_slice = slice(np.ma.min(y), np.ma.max(y) + 1) - x_slice = _ensure_integer_slice(x_slice) - y_slice = _ensure_integer_slice(y_slice) - if shape_divisible_by is not None: - x_slice = _make_slice_divisible(x_slice, src_area.width, - factor=shape_divisible_by) - y_slice = _make_slice_divisible(y_slice, src_area.height, - factor=shape_divisible_by) - - return (check_slice_orientation(x_slice), - check_slice_orientation(y_slice)) - - -def _get_slice_starts_stops(src_area, area_to_cover): - """Get x and y start and stop points for slicing.""" - llx, lly, urx, ury = area_to_cover.area_extent - x, y = src_area.get_array_coordinates_from_projection_coordinates([llx, urx], [lly, ury]) - - # we use `round` because we want the *exterior* of the pixels to contain the area_to_cover's area extent. - if (src_area.area_extent[0] > src_area.area_extent[2]) ^ (llx > urx): - xstart = max(0, round(x[1])) - xstop = min(src_area.width, round(x[0]) + 1) - else: - xstart = max(0, round(x[0])) - xstop = min(src_area.width, round(x[1]) + 1) - if (src_area.area_extent[1] > src_area.area_extent[3]) ^ (lly > ury): - ystart = max(0, round(y[0])) - ystop = min(src_area.height, round(y[1]) + 1) - else: - ystart = max(0, round(y[1])) - ystop = min(src_area.height, round(y[0]) + 1) - - return xstart, xstop, ystart, ystop - - -def _get_area_boundary(area_to_cover: AreaDefinition) -> Boundary: - try: - if area_to_cover.is_geostationary: - return Boundary(*get_geostationary_bounding_box_in_lonlats(area_to_cover)) - boundary_shape = max(max(*area_to_cover.shape) // 100 + 1, 3) - return area_to_cover.boundary(frequency=boundary_shape, force_clockwise=True) - except ValueError: - raise NotImplementedError("Can't determine boundary of area to cover") - - -def _make_slice_divisible(sli, max_size, factor=2): - """Make the given slice even in size.""" - rem = (sli.stop - sli.start) % factor - if rem != 0: - adj = factor - rem - if sli.stop + 1 + rem < max_size: - sli = slice(sli.start, sli.stop + adj) - elif sli.start > 0: - sli = slice(sli.start - adj, sli.stop) - else: - sli = slice(sli.start, sli.stop - rem) - - return sli - - -def _ensure_integer_slice(sli): - start = sli.start - stop = sli.stop - step = sli.step - return slice( - math.floor(start) if start is not None else None, - math.ceil(stop) if stop is not None else None, - math.floor(step) if step is not None else None - ) - - def get_geostationary_angle_extent(geos_area): """Get the max earth (vs space) viewing angles in x and y.""" # get some projection parameters diff --git a/pyresample/test/test_geometry/test_area.py b/pyresample/test/test_geometry/test_area.py index c8b7f3bca..895b8e1c5 100644 --- a/pyresample/test/test_geometry/test_area.py +++ b/pyresample/test/test_geometry/test_area.py @@ -1849,17 +1849,23 @@ def test_area_slices_caching(self, create_test_area, tmp_path): crop_area = create_test_area({'proj': 'latlong'}, 100, 100, (15.9689, 58.5284, 16.4346, 58.6995)) + cache_glob = str(tmp_path / "geometry_slices_v1" / "*.json") with pyresample.config.set(cache_dir=tmp_path, cache_geom_slices=False): - assert len(glob(str(tmp_path / "geometry_slices_v1" / "*.json"))) == 0 + assert len(glob(cache_glob)) == 0 slice_x, slice_y = src_area.get_area_slices(crop_area) - assert len(glob(str(tmp_path / "geometry_slices_v1" / "*.json"))) == 0 + assert len(glob(cache_glob)) == 0 with pyresample.config.set(cache_dir=tmp_path, cache_geom_slices=True): - assert len(glob(str(tmp_path / "geometry_slices_v1" / "*.json"))) == 0 + assert len(glob(cache_glob)) == 0 slice_x, slice_y = src_area.get_area_slices(crop_area) - assert len(glob(str(tmp_path / "geometry_slices_v1" / "*.json"))) == 1 + assert len(glob(cache_glob)) == 1 assert slice_x == slice(5630, 8339) assert slice_y == slice(9261, 10980) + from pyresample.future.geometry._subset import get_area_slices + with pyresample.config.set(cache_dir=tmp_path): + get_area_slices.cache_clear() + assert len(glob(cache_glob)) == 0 + class TestBoundary: """Test 'boundary' method for AreaDefinition classes.""" From e9f7fa0478c216f9cca63cfafa35536d98f07a16 Mon Sep 17 00:00:00 2001 From: David Hoese Date: Thu, 16 Nov 2023 11:08:12 -0600 Subject: [PATCH 05/10] Fix area slice tests after function refactor --- pyresample/test/test_geometry/test_area.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/pyresample/test/test_geometry/test_area.py b/pyresample/test/test_geometry/test_area.py index 895b8e1c5..9d1e1ad63 100644 --- a/pyresample/test/test_geometry/test_area.py +++ b/pyresample/test/test_geometry/test_area.py @@ -882,6 +882,7 @@ def test_get_array_indices_from_lonlat(self, create_test_area): def test_get_slice_starts_stops(self, create_test_area): """Check area slice end-points.""" + from pyresample.future.geometry._subset import _get_slice_starts_stops x_size = 3712 y_size = 3712 area_extent = (-5570248.477339745, -5561247.267842293, 5567248.074173927, 5570248.477339745) @@ -895,25 +896,25 @@ def test_get_slice_starts_stops(self, create_test_area): # Source and target have the same orientation area_extent = (-5580248.477339745, -5571247.267842293, 5577248.074173927, 5580248.477339745) source_area = create_test_area(proj_dict, x_size, y_size, area_extent) - res = source_area._get_slice_starts_stops(target_area) + res = _get_slice_starts_stops(source_area, target_area) assert res == expected # Source is flipped in X direction area_extent = (5577248.074173927, -5571247.267842293, -5580248.477339745, 5580248.477339745) source_area = create_test_area(proj_dict, x_size, y_size, area_extent) - res = source_area._get_slice_starts_stops(target_area) + res = _get_slice_starts_stops(source_area, target_area) assert res == expected # Source is flipped in Y direction area_extent = (-5580248.477339745, 5580248.477339745, 5577248.074173927, -5571247.267842293) source_area = create_test_area(proj_dict, x_size, y_size, area_extent) - res = source_area._get_slice_starts_stops(target_area) + res = _get_slice_starts_stops(source_area, target_area) assert res == expected # Source is flipped in both X and Y directions area_extent = (5577248.074173927, 5580248.477339745, -5580248.477339745, -5571247.267842293) source_area = create_test_area(proj_dict, x_size, y_size, area_extent) - res = source_area._get_slice_starts_stops(target_area) + res = _get_slice_starts_stops(source_area, target_area) assert res == expected def test_proj_str(self, create_test_area): @@ -1251,7 +1252,7 @@ class TestMakeSliceDivisible: def test_make_slice_divisible(self): """Test that making area shape divisible by a given factor works.""" - from pyresample.geometry import _make_slice_divisible + from pyresample.future.geometry._subset import _make_slice_divisible # Divisible by 2 sli = slice(10, 21) From 2f9df8bf25954f4a45245a1af8e4f296a7da3858 Mon Sep 17 00:00:00 2001 From: David Hoese Date: Thu, 16 Nov 2023 11:13:24 -0600 Subject: [PATCH 06/10] Simplify area slice tests with parametrize --- pyresample/test/test_geometry/test_area.py | 69 ++++++++-------------- 1 file changed, 25 insertions(+), 44 deletions(-) diff --git a/pyresample/test/test_geometry/test_area.py b/pyresample/test/test_geometry/test_area.py index 9d1e1ad63..2074548de 100644 --- a/pyresample/test/test_geometry/test_area.py +++ b/pyresample/test/test_geometry/test_area.py @@ -880,7 +880,20 @@ def test_get_array_indices_from_lonlat(self, create_test_area): assert (x__.data == x_expects).all() assert (y__.data == y_expects).all() - def test_get_slice_starts_stops(self, create_test_area): + @pytest.mark.parametrize( + "src_extent", + [ + # Source and target have the same orientation + (-5580248.477339745, -5571247.267842293, 5577248.074173927, 5580248.477339745), + # Source is flipped in X direction + (5577248.074173927, -5571247.267842293, -5580248.477339745, 5580248.477339745), + # Source is flipped in Y direction + (-5580248.477339745, 5580248.477339745, 5577248.074173927, -5571247.267842293), + # Source is flipped in both X and Y directions + (5577248.074173927, 5580248.477339745, -5580248.477339745, -5571247.267842293), + ] + ) + def test_get_slice_starts_stops(self, create_test_area, src_extent): """Check area slice end-points.""" from pyresample.future.geometry._subset import _get_slice_starts_stops x_size = 3712 @@ -889,33 +902,9 @@ def test_get_slice_starts_stops(self, create_test_area): proj_dict = {'a': 6378169.0, 'b': 6356583.8, 'h': 35785831.0, 'lon_0': 0.0, 'proj': 'geos', 'units': 'm'} target_area = create_test_area(proj_dict, x_size, y_size, area_extent) - - # Expected result is the same for all cases - expected = (3, 3709, 3, 3709) - - # Source and target have the same orientation - area_extent = (-5580248.477339745, -5571247.267842293, 5577248.074173927, 5580248.477339745) - source_area = create_test_area(proj_dict, x_size, y_size, area_extent) - res = _get_slice_starts_stops(source_area, target_area) - assert res == expected - - # Source is flipped in X direction - area_extent = (5577248.074173927, -5571247.267842293, -5580248.477339745, 5580248.477339745) - source_area = create_test_area(proj_dict, x_size, y_size, area_extent) - res = _get_slice_starts_stops(source_area, target_area) - assert res == expected - - # Source is flipped in Y direction - area_extent = (-5580248.477339745, 5580248.477339745, 5577248.074173927, -5571247.267842293) - source_area = create_test_area(proj_dict, x_size, y_size, area_extent) - res = _get_slice_starts_stops(source_area, target_area) - assert res == expected - - # Source is flipped in both X and Y directions - area_extent = (5577248.074173927, 5580248.477339745, -5580248.477339745, -5571247.267842293) - source_area = create_test_area(proj_dict, x_size, y_size, area_extent) + source_area = create_test_area(proj_dict, x_size, y_size, src_extent) res = _get_slice_starts_stops(source_area, target_area) - assert res == expected + assert res == (3, 3709, 3, 3709) def test_proj_str(self, create_test_area): """Test the 'proj_str' property of AreaDefinition.""" @@ -1250,27 +1239,19 @@ def test_area_def_metadata_equality(self): class TestMakeSliceDivisible: """Test the _make_slice_divisible.""" - def test_make_slice_divisible(self): + @pytest.mark.parametrize( + ("sli", "factor"), + [ + (slice(10, 21), 2), + (slice(10, 23), 3), + (slice(10, 23), 5), + ] + ) + def test_make_slice_divisible(self, sli, factor): """Test that making area shape divisible by a given factor works.""" from pyresample.future.geometry._subset import _make_slice_divisible # Divisible by 2 - sli = slice(10, 21) - factor = 2 - assert (sli.stop - sli.start) % factor != 0 - res = _make_slice_divisible(sli, 1000, factor=factor) - assert (res.stop - res.start) % factor == 0 - - # Divisible by 3 - sli = slice(10, 23) - factor = 3 - assert (sli.stop - sli.start) % factor != 0 - res = _make_slice_divisible(sli, 1000, factor=factor) - assert (res.stop - res.start) % factor == 0 - - # Divisible by 5 - sli = slice(10, 23) - factor = 5 assert (sli.stop - sli.start) % factor != 0 res = _make_slice_divisible(sli, 1000, factor=factor) assert (res.stop - res.start) % factor == 0 From 1de5f828c6bede88a28e4f86eef45f91c376640c Mon Sep 17 00:00:00 2001 From: David Hoese Date: Thu, 16 Nov 2023 21:21:31 -0600 Subject: [PATCH 07/10] Add more area slice caching tests --- pyresample/future/geometry/_subset.py | 16 +++++-- pyresample/test/test_geometry/test_area.py | 49 +++++++++++++++++----- 2 files changed, 51 insertions(+), 14 deletions(-) diff --git a/pyresample/future/geometry/_subset.py b/pyresample/future/geometry/_subset.py index 3eb81d5dc..2099f1d0b 100644 --- a/pyresample/future/geometry/_subset.py +++ b/pyresample/future/geometry/_subset.py @@ -2,11 +2,10 @@ from __future__ import annotations import math +from typing import TYPE_CHECKING, Any import numpy as np -from pyresample import AreaDefinition - # this caching module imports the geometries so this subset module # must be imported inside functions in the geometry modules if needed # to avoid circular dependencies @@ -15,6 +14,9 @@ from pyresample.geometry import get_geostationary_bounding_box_in_lonlats, logger from pyresample.utils import check_slice_orientation +if TYPE_CHECKING: + from pyresample import AreaDefinition + @cache_to_json_if("cache_geom_slices") def get_area_slices( @@ -23,8 +25,10 @@ def get_area_slices( shape_divisible_by: int | None, ) -> tuple[slice, slice]: """Compute the slice to read based on an `area_to_cover`.""" - if not isinstance(area_to_cover, AreaDefinition): - raise NotImplementedError('Only AreaDefinitions can be used') + if not _is_area_like(src_area): + raise NotImplementedError(f"Only AreaDefinitions are supported, not {type(src_area)}") + if not _is_area_like(area_to_cover): + raise NotImplementedError(f"Only AreaDefinitions are supported, not {type(area_to_cover)}") # Intersection only required for two different projections proj_def_to_cover = area_to_cover.crs @@ -65,6 +69,10 @@ def get_area_slices( check_slice_orientation(y_slice)) +def _is_area_like(area_obj: Any) -> bool: + return hasattr(area_obj, "crs") and hasattr(area_obj, "area_extent") + + def _get_slice_starts_stops(src_area, area_to_cover): """Get x and y start and stop points for slicing.""" llx, lly, urx, ury = area_to_cover.area_extent diff --git a/pyresample/test/test_geometry/test_area.py b/pyresample/test/test_geometry/test_area.py index 2074548de..f066f5657 100644 --- a/pyresample/test/test_geometry/test_area.py +++ b/pyresample/test/test_geometry/test_area.py @@ -1823,7 +1823,8 @@ def test_area_to_cover_all_nan_bounds(self, geos_src_area, create_test_area): with pytest.raises(NotImplementedError): area_def.get_area_slices(area_to_cover) - def test_area_slices_caching(self, create_test_area, tmp_path): + @pytest.mark.parametrize("cache_slices", [False, True]) + def test_area_slices_caching(self, create_test_area, tmp_path, cache_slices): """Check that area slices can be cached.""" src_area = create_test_area(dict(proj="utm", zone=33), 10980, 10980, @@ -1832,21 +1833,49 @@ def test_area_slices_caching(self, create_test_area, tmp_path): 100, 100, (15.9689, 58.5284, 16.4346, 58.6995)) cache_glob = str(tmp_path / "geometry_slices_v1" / "*.json") - with pyresample.config.set(cache_dir=tmp_path, cache_geom_slices=False): + with pyresample.config.set(cache_dir=tmp_path, cache_geom_slices=cache_slices): assert len(glob(cache_glob)) == 0 slice_x, slice_y = src_area.get_area_slices(crop_area) - assert len(glob(cache_glob)) == 0 - with pyresample.config.set(cache_dir=tmp_path, cache_geom_slices=True): - assert len(glob(cache_glob)) == 0 - slice_x, slice_y = src_area.get_area_slices(crop_area) - assert len(glob(cache_glob)) == 1 + assert len(glob(cache_glob)) == int(cache_slices) assert slice_x == slice(5630, 8339) assert slice_y == slice(9261, 10980) + if cache_slices: + from pyresample.future.geometry._subset import get_area_slices + with pyresample.config.set(cache_dir=tmp_path): + get_area_slices.cache_clear() + assert len(glob(cache_glob)) == 0 + + def test_area_slices_caching_no_swaths(self, tmp_path, create_test_area, create_test_swath): + """Test that swath inputs produce a warning when tried to use in caching.""" + from pyresample.future.geometry._subset import get_area_slices + from pyresample.test.utils import create_test_latitude, create_test_longitude + area = create_test_area(dict(proj="utm", zone=33), + 10980, 10980, + (499980.0, 6490200.0, 609780.0, 6600000.0)) + lons = create_test_longitude(-95.0, -75.0, shape=(1000, 500)) + lats = create_test_latitude(25.0, 35.0, shape=(1000, 500)) + swath = create_test_swath(lons, lats) + + with pyresample.config.set(cache_dir=tmp_path, cache_geom_slices=True), pytest.raises(NotImplementedError): + with pytest.warns(UserWarning, match="unhashable"): + get_area_slices(swath, area, None) + + @pytest.mark.parametrize("swath_as_src", [False, True]) + def test_unsupported_slice_inputs(self, create_test_area, create_test_swath, swath_as_src): + """Test that swath inputs produce an error.""" from pyresample.future.geometry._subset import get_area_slices - with pyresample.config.set(cache_dir=tmp_path): - get_area_slices.cache_clear() - assert len(glob(cache_glob)) == 0 + from pyresample.test.utils import create_test_latitude, create_test_longitude + area = create_test_area(dict(proj="utm", zone=33), + 10980, 10980, + (499980.0, 6490200.0, 609780.0, 6600000.0)) + lons = create_test_longitude(-95.0, -75.0, shape=(1000, 500)) + lats = create_test_latitude(25.0, 35.0, shape=(1000, 500)) + swath = create_test_swath(lons, lats) + + with pytest.raises(NotImplementedError): + args = (swath, area) if swath_as_src else (area, swath) + get_area_slices(*args, None) class TestBoundary: From dc68bfe86fc39bb2119d9ed93aa9a209e30252b8 Mon Sep 17 00:00:00 2001 From: David Hoese Date: Thu, 16 Nov 2023 21:42:50 -0600 Subject: [PATCH 08/10] Fix missing future annotations import --- pyresample/_caching.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pyresample/_caching.py b/pyresample/_caching.py index 80e5eb932..b44233e3b 100644 --- a/pyresample/_caching.py +++ b/pyresample/_caching.py @@ -4,6 +4,8 @@ throughout pyresample. """ +from __future__ import annotations + import hashlib import json import os From a224317b841c8cf63eecab778eaa69d6df335fdd Mon Sep 17 00:00:00 2001 From: David Hoese Date: Fri, 17 Nov 2023 09:34:31 -0600 Subject: [PATCH 09/10] Rename cache_geom_slices to cache_geometry_slices --- docs/source/howtos/configuration.rst | 4 ++-- pyresample/_caching.py | 4 ++-- pyresample/_config.py | 3 +-- pyresample/future/geometry/_subset.py | 2 +- pyresample/test/conftest.py | 2 +- pyresample/test/test_geometry/test_area.py | 4 ++-- 6 files changed, 9 insertions(+), 10 deletions(-) diff --git a/docs/source/howtos/configuration.rst b/docs/source/howtos/configuration.rst index 28e075f85..fe1606350 100644 --- a/docs/source/howtos/configuration.rst +++ b/docs/source/howtos/configuration.rst @@ -101,8 +101,8 @@ the `platformdirs ` Cache Geometry Slices ^^^^^^^^^^^^^^^^^^^^^ -* **Environment variable**: ``PYRESAMPLE_CACHE_GEOM_SLICES`` -* **YAML/Config Key**: ``cache_geom_slices`` +* **Environment variable**: ``PYRESAMPLE_CACHE_GEOMETRY_SLICES`` +* **YAML/Config Key**: ``cache_geometry_slices`` * **Default**: ``False`` Whether or not generated slices for geometry objects are cached to disk. diff --git a/pyresample/_caching.py b/pyresample/_caching.py index b44233e3b..9f6bdb065 100644 --- a/pyresample/_caching.py +++ b/pyresample/_caching.py @@ -64,7 +64,7 @@ def _run_and_cache(self, arg_hash: str, args: tuple[Any]) -> Any: res = self._callable(*args) json_path.parent.mkdir(exist_ok=True) with open(json_path, "w") as json_cache: - json.dump(res, json_cache, cls=_ExtraJSONEncoder) + json.dump(res, json_cache, cls=_JSONEncoderWithSlice) # for consistency, always load the cached result with open(json_path, "r") as json_cache: @@ -97,7 +97,7 @@ def _hash_args(args: tuple[Any]) -> str: return arg_hash.hexdigest() -class _ExtraJSONEncoder(json.JSONEncoder): +class _JSONEncoderWithSlice(json.JSONEncoder): def default(self, obj: Any) -> Any: if isinstance(obj, slice): return {"__slice__": True, "start": obj.start, "stop": obj.stop, "step": obj.step} diff --git a/pyresample/_config.py b/pyresample/_config.py index f7297a243..9a4a2b1ae 100644 --- a/pyresample/_config.py +++ b/pyresample/_config.py @@ -21,7 +21,6 @@ from donfig import Config BASE_PATH = os.path.dirname(os.path.realpath(__file__)) -# FIXME: Use package_resources? PACKAGE_CONFIG_PATH = os.path.join(BASE_PATH, 'etc') _user_config_dir = platformdirs.user_config_dir("pyresample", "pytroll") @@ -37,7 +36,7 @@ "pyresample", defaults=[{ "cache_dir": platformdirs.user_cache_dir("pyresample", "pytroll"), - "cache_geom_slices": False, + "cache_geometry_slices": False, "features": { "future_geometries": False, }, diff --git a/pyresample/future/geometry/_subset.py b/pyresample/future/geometry/_subset.py index 2099f1d0b..df348fca1 100644 --- a/pyresample/future/geometry/_subset.py +++ b/pyresample/future/geometry/_subset.py @@ -18,7 +18,7 @@ from pyresample import AreaDefinition -@cache_to_json_if("cache_geom_slices") +@cache_to_json_if("cache_geometry_slices") def get_area_slices( src_area: AreaDefinition, area_to_cover: AreaDefinition, diff --git a/pyresample/test/conftest.py b/pyresample/test/conftest.py index e69fcb5b9..2643caad7 100644 --- a/pyresample/test/conftest.py +++ b/pyresample/test/conftest.py @@ -42,7 +42,7 @@ def reset_pyresample_config(tmpdir): """Set pyresample config to logical defaults for tests.""" test_config = { - "cache_geom_slices": False, + "cache_geometry_slices": False, "features": { "future_geometries": False, }, diff --git a/pyresample/test/test_geometry/test_area.py b/pyresample/test/test_geometry/test_area.py index f066f5657..d1aeda7ee 100644 --- a/pyresample/test/test_geometry/test_area.py +++ b/pyresample/test/test_geometry/test_area.py @@ -1833,7 +1833,7 @@ def test_area_slices_caching(self, create_test_area, tmp_path, cache_slices): 100, 100, (15.9689, 58.5284, 16.4346, 58.6995)) cache_glob = str(tmp_path / "geometry_slices_v1" / "*.json") - with pyresample.config.set(cache_dir=tmp_path, cache_geom_slices=cache_slices): + with pyresample.config.set(cache_dir=tmp_path, cache_geometry_slices=cache_slices): assert len(glob(cache_glob)) == 0 slice_x, slice_y = src_area.get_area_slices(crop_area) assert len(glob(cache_glob)) == int(cache_slices) @@ -1857,7 +1857,7 @@ def test_area_slices_caching_no_swaths(self, tmp_path, create_test_area, create_ lats = create_test_latitude(25.0, 35.0, shape=(1000, 500)) swath = create_test_swath(lons, lats) - with pyresample.config.set(cache_dir=tmp_path, cache_geom_slices=True), pytest.raises(NotImplementedError): + with pyresample.config.set(cache_dir=tmp_path, cache_geometry_slices=True), pytest.raises(NotImplementedError): with pytest.warns(UserWarning, match="unhashable"): get_area_slices(swath, area, None) From b0a2579c68604037988b8256e1577973610a3596 Mon Sep 17 00:00:00 2001 From: David Hoese Date: Fri, 17 Nov 2023 09:56:46 -0600 Subject: [PATCH 10/10] Add more information to slice caching config documentation --- docs/source/howtos/configuration.rst | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/docs/source/howtos/configuration.rst b/docs/source/howtos/configuration.rst index fe1606350..6a79acf10 100644 --- a/docs/source/howtos/configuration.rst +++ b/docs/source/howtos/configuration.rst @@ -117,6 +117,17 @@ are cached, this option saves a pair of ``slice`` objects that consist of only 3 integers each. This makes the amount of space used in the cache very small for many cached results. +The slicing operations in Pyresample typically involve finding the intersection +between two geometries. This requires generating bounding polygons for the +geometries and doing polygon intersection calculations that can handle +projection anti-meridians. At the time of writing these calculations can take +as long as 15 seconds depending on number of vertices used in the bounding +polygons. One use case for these slices is reducing input data to only the +overlap of the target area. This can be done before or during resampling as +part of the algorithm or as part of a third-party resampling interface +(ex. Satpy). In the future as optimizations are made to the polygon +intersection logic this caching option should hopefully not be needed. + When setting this as an environment variable, this should be set with the string equivalent of the Python boolean values ``="True"`` or ``="False"``.