Skip to content

Commit

Permalink
Add initial proof of concept area slice caching
Browse files Browse the repository at this point in the history
  • Loading branch information
djhoese committed Nov 12, 2023
1 parent 8c15774 commit 6fd85a1
Show file tree
Hide file tree
Showing 4 changed files with 164 additions and 62 deletions.
70 changes: 70 additions & 0 deletions pyresample/_caching.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
"""Various tools for caching.
These tools are rarely needed by users and are used where they make sense
throughout pyresample.
"""

import functools
import hashlib
import json
from pathlib import Path
from typing import Any, Callable

import pyresample


class JSONCache:
"""Decorator class to cache results to a JSON file on-disk."""

def __init__(self, *args, **kwargs):
self._callable = None
if len(args) == 1 and not kwargs:
self._callable = args[0]

def __call__(self, *args, **kwargs):
"""Call decorated function and cache the result to JSON."""
is_decorated = len(args) == 1 and isinstance(args[0], Callable)
if is_decorated:
self._callable = args[0]

@functools.wraps(self._callable)
def _func(*args, **kwargs):
if not pyresample.config.get("cache_geom_slices", False):
return self._callable(*args, **kwargs)

# TODO: kwargs

Check notice on line 36 in pyresample/_caching.py

View check run for this annotation

codefactor.io / CodeFactor

pyresample/_caching.py#L36

unresolved comment '# TODO: kwargs' (C100)
existing_hash = hashlib.sha1()
# hashable_args = [hash(arg) if isinstance(arg, AreaDefinition) else arg for arg in args]
hashable_args = [hash(arg) if arg.__class__.__name__ == "AreaDefinition" else arg for arg in args]
existing_hash.update(json.dumps(tuple(hashable_args)).encode("utf8"))
arg_hash = existing_hash.hexdigest()
print(arg_hash)
base_cache_dir = Path(pyresample.config.get("cache_dir")) / "geometry_slices"
json_path = base_cache_dir / f"{arg_hash}.json"
if not json_path.is_file():
res = self._callable(*args, **kwargs)
json_path.parent.mkdir(exist_ok=True)
with open(json_path, "w") as json_cache:
json.dump(res, json_cache, cls=_ExtraJSONEncoder)
else:
with open(json_path, "r") as json_cache:
res = json.load(json_cache, object_hook=_object_hook)
return res

if is_decorated:
return _func
return _func(*args, **kwargs)


class _ExtraJSONEncoder(json.JSONEncoder):
def default(self, obj: Any) -> Any:
if isinstance(obj, slice):
return {"__slice__": True, "start": obj.start, "stop": obj.stop, "step": obj.step}
return super().default(obj)


def _object_hook(obj: object) -> Any:
if isinstance(obj, dict) and obj.get("__slice__", False):
return slice(obj["start"], obj["stop"], obj["step"])
return obj
135 changes: 73 additions & 62 deletions pyresample/geometry.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
from pyproj.aoi import AreaOfUse

from pyresample import CHUNK_SIZE
from pyresample._caching import JSONCache
from pyresample._spatial_mp import Cartesian, Cartesian_MP, Proj_MP
from pyresample.area_config import create_area_def
from pyresample.boundary import Boundary, SimpleBoundary
Expand Down Expand Up @@ -2576,70 +2577,9 @@ def proj4_string(self):
"instead.", DeprecationWarning, stacklevel=2)
return proj4_dict_to_str(self.proj_dict)

def _get_slice_starts_stops(self, area_to_cover):
"""Get x and y start and stop points for slicing."""
llx, lly, urx, ury = area_to_cover.area_extent
x, y = self.get_array_coordinates_from_projection_coordinates([llx, urx], [lly, ury])

# we use `round` because we want the *exterior* of the pixels to contain the area_to_cover's area extent.
if (self.area_extent[0] > self.area_extent[2]) ^ (llx > urx):
xstart = max(0, round(x[1]))
xstop = min(self.width, round(x[0]) + 1)
else:
xstart = max(0, round(x[0]))
xstop = min(self.width, round(x[1]) + 1)
if (self.area_extent[1] > self.area_extent[3]) ^ (lly > ury):
ystart = max(0, round(y[0]))
ystop = min(self.height, round(y[1]) + 1)
else:
ystart = max(0, round(y[1]))
ystop = min(self.height, round(y[0]) + 1)

return xstart, xstop, ystart, ystop

def get_area_slices(self, area_to_cover, shape_divisible_by=None):
"""Compute the slice to read based on an `area_to_cover`."""
if not isinstance(area_to_cover, AreaDefinition):
raise NotImplementedError('Only AreaDefinitions can be used')

# Intersection only required for two different projections
proj_def_to_cover = area_to_cover.crs
proj_def = self.crs
if proj_def_to_cover == proj_def:
logger.debug('Projections for data and slice areas are'
' identical: %s',
proj_def_to_cover)
# Get slice parameters
xstart, xstop, ystart, ystop = self._get_slice_starts_stops(area_to_cover)

x_slice = check_slice_orientation(slice(xstart, xstop))
y_slice = check_slice_orientation(slice(ystart, ystop))
x_slice = _ensure_integer_slice(x_slice)
y_slice = _ensure_integer_slice(y_slice)
return x_slice, y_slice

data_boundary = _get_area_boundary(self)
area_boundary = _get_area_boundary(area_to_cover)
intersection = data_boundary.contour_poly.intersection(
area_boundary.contour_poly)
if intersection is None:
logger.debug('Cannot determine appropriate slicing. '
"Data and projection area do not overlap.")
raise NotImplementedError
x, y = self.get_array_indices_from_lonlat(
np.rad2deg(intersection.lon), np.rad2deg(intersection.lat))
x_slice = slice(np.ma.min(x), np.ma.max(x) + 1)
y_slice = slice(np.ma.min(y), np.ma.max(y) + 1)
x_slice = _ensure_integer_slice(x_slice)
y_slice = _ensure_integer_slice(y_slice)
if shape_divisible_by is not None:
x_slice = _make_slice_divisible(x_slice, self.width,
factor=shape_divisible_by)
y_slice = _make_slice_divisible(y_slice, self.height,
factor=shape_divisible_by)

return (check_slice_orientation(x_slice),
check_slice_orientation(y_slice))
return get_area_slices(self, area_to_cover, shape_divisible_by)

def crop_around(self, other_area):
"""Crop this area around `other_area`."""
Expand Down Expand Up @@ -2740,6 +2680,77 @@ def geocentric_resolution(self, ellps='WGS84', radius=None):
return res


@JSONCache()
def get_area_slices(
src_area: AreaDefinition,
area_to_cover: AreaDefinition,
shape_divisible_by: int | None = None
) -> tuple[slice, slice]:
"""Compute the slice to read based on an `area_to_cover`."""
if not isinstance(area_to_cover, AreaDefinition):
raise NotImplementedError('Only AreaDefinitions can be used')

# Intersection only required for two different projections
proj_def_to_cover = area_to_cover.crs
proj_def = src_area.crs
if proj_def_to_cover == proj_def:
logger.debug('Projections for data and slice areas are identical: %s',
proj_def_to_cover)
# Get slice parameters
xstart, xstop, ystart, ystop = _get_slice_starts_stops(src_area, area_to_cover)

x_slice = check_slice_orientation(slice(xstart, xstop))
y_slice = check_slice_orientation(slice(ystart, ystop))
x_slice = _ensure_integer_slice(x_slice)
y_slice = _ensure_integer_slice(y_slice)
return x_slice, y_slice

data_boundary = _get_area_boundary(src_area)
area_boundary = _get_area_boundary(area_to_cover)
intersection = data_boundary.contour_poly.intersection(
area_boundary.contour_poly)
if intersection is None:
logger.debug('Cannot determine appropriate slicing. '
"Data and projection area do not overlap.")
raise NotImplementedError
x, y = src_area.get_array_indices_from_lonlat(
np.rad2deg(intersection.lon), np.rad2deg(intersection.lat))
x_slice = slice(np.ma.min(x), np.ma.max(x) + 1)
y_slice = slice(np.ma.min(y), np.ma.max(y) + 1)
x_slice = _ensure_integer_slice(x_slice)
y_slice = _ensure_integer_slice(y_slice)
if shape_divisible_by is not None:
x_slice = _make_slice_divisible(x_slice, src_area.width,
factor=shape_divisible_by)
y_slice = _make_slice_divisible(y_slice, src_area.height,
factor=shape_divisible_by)

return (check_slice_orientation(x_slice),
check_slice_orientation(y_slice))


def _get_slice_starts_stops(src_area, area_to_cover):
"""Get x and y start and stop points for slicing."""
llx, lly, urx, ury = area_to_cover.area_extent
x, y = src_area.get_array_coordinates_from_projection_coordinates([llx, urx], [lly, ury])

# we use `round` because we want the *exterior* of the pixels to contain the area_to_cover's area extent.
if (src_area.area_extent[0] > src_area.area_extent[2]) ^ (llx > urx):
xstart = max(0, round(x[1]))
xstop = min(src_area.width, round(x[0]) + 1)
else:
xstart = max(0, round(x[0]))
xstop = min(src_area.width, round(x[1]) + 1)
if (src_area.area_extent[1] > src_area.area_extent[3]) ^ (lly > ury):
ystart = max(0, round(y[0]))
ystop = min(src_area.height, round(y[1]) + 1)
else:
ystart = max(0, round(y[1]))
ystop = min(src_area.height, round(y[0]) + 1)

return xstart, xstop, ystart, ystop


def _get_area_boundary(area_to_cover: AreaDefinition) -> Boundary:
try:
if area_to_cover.is_geostationary:
Expand Down
1 change: 1 addition & 0 deletions pyresample/test/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@
def reset_pyresample_config(tmpdir):
"""Set pyresample config to logical defaults for tests."""
test_config = {
"cache_geom_slices": False,
"features": {
"future_geometries": False,
},
Expand Down
20 changes: 20 additions & 0 deletions pyresample/test/test_geometry/test_area.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
"""Test AreaDefinition objects."""
import io
import sys
from glob import glob
from unittest.mock import MagicMock, patch

import dask.array as da
Expand Down Expand Up @@ -1840,6 +1841,25 @@ def test_area_to_cover_all_nan_bounds(self, geos_src_area, create_test_area):
with pytest.raises(NotImplementedError):
area_def.get_area_slices(area_to_cover)

def test_area_slices_caching(self, create_test_area, tmp_path):
"""Check that area slices can be cached."""
src_area = create_test_area(dict(proj="utm", zone=33),
10980, 10980,
(499980.0, 6490200.0, 609780.0, 6600000.0))
crop_area = create_test_area({'proj': 'latlong'},
100, 100,
(15.9689, 58.5284, 16.4346, 58.6995))
with pyresample.config.set(cache_dir=tmp_path, cache_geom_slices=False):
assert len(glob(str(tmp_path / "geometry_slices" / "*.json"))) == 0
slice_x, slice_y = src_area.get_area_slices(crop_area)
assert len(glob(str(tmp_path / "geometry_slices" / "*.json"))) == 0
with pyresample.config.set(cache_dir=tmp_path, cache_geom_slices=True):
assert len(glob(str(tmp_path / "geometry_slices" / "*.json"))) == 0
slice_x, slice_y = src_area.get_area_slices(crop_area)
assert len(glob(str(tmp_path / "geometry_slices" / "*.json"))) == 1
assert slice_x == slice(5630, 8339)
assert slice_y == slice(9261, 10980)


class TestBoundary:
"""Test 'boundary' method for AreaDefinition classes."""
Expand Down

0 comments on commit 6fd85a1

Please sign in to comment.