diff --git a/docs/examples/memory_cache.ipynb b/docs/examples/memory_cache.ipynb index 7b53979..8bf94e9 100644 --- a/docs/examples/memory_cache.ipynb +++ b/docs/examples/memory_cache.ipynb @@ -48,6 +48,7 @@ "# create input data array for an O1280 grid\n", "in_data = np.ones(6599680)\n", "\n", + "# helper method for interpolation\n", "def _run(n=10):\n", " for _ in range(n):\n", " res_array = ekr.interpolate(in_data, \n", @@ -116,8 +117,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 6.23 s, sys: 643 ms, total: 6.87 s\n", - "Wall time: 7.39 s\n" + "CPU times: user 6.26 s, sys: 712 ms, total: 6.97 s\n", + "Wall time: 7.47 s\n" ] } ], @@ -178,7 +179,7 @@ "tags": [] }, "source": [ - "In this example we will use the default memory cache policy, which is \"largest\". For details see: :func:`set_memory_cache`. Notice the x9 speed-up we achived with the caching." + "In this example we will use the default memory cache policy, which is \"largest\". For details see: :func:`set_memory_cache`. Notice the x9 speed-up we achieved with the caching." ] }, { @@ -194,7 +195,7 @@ }, "outputs": [], "source": [ - "ekr.set_memory_cache()" + "ekr.set_memory_cache(policy=\"largest\")" ] }, { @@ -213,8 +214,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 746 ms, sys: 86.7 ms, total: 833 ms\n", - "Wall time: 849 ms\n" + "CPU times: user 750 ms, sys: 91.6 ms, total: 842 ms\n", + "Wall time: 847 ms\n" ] } ], diff --git a/docs/memory_cache.rst b/docs/memory_cache.rst index 78f3792..d58d939 100644 --- a/docs/memory_cache.rst +++ b/docs/memory_cache.rst @@ -3,7 +3,7 @@ Memory cache ================== -.. py:function:: set_memory_cache(policy="largest", max_size=300 * 1024**2, ensure_capacity=False) +.. py:function:: set_memory_cache(policy="largest", max_size=300 * 1024**2, strict=False) *New in version 0.4.0.* @@ -11,14 +11,16 @@ Memory cache :param str policy: The matrix in-memory cache policy. The possible values are as follows: - - ``"off"``: no cache + - ``"off"``: no cache, the matrices are always loaded from disk - ``"unlimited"``: keep all matrices in memory - ``"largest"``: first evict the largest matrices from the cache (default) - ``"lru"``: first evict the least recently used matrices from the cache - :param int max_size: The maximum memory size of the in-memory cache in bytes. Only used when the policy is not ``off`` or ``"unlimited"``. - :param bool ensure_capacity: If True, estimate the matrix size and try to ensure it fits into the cache by evicting items according to the policy. If the cache capacity is not enough to hold the matrix raises ValueError. Only used when ``policy`` is no ``"off"`` or ``"unlimited"``. If False, the matrix is loaded into the cache without checking the size, then the cache is evicted according to the policy. - :raises ValueError: if the estimated size of the matrix to be loaded does not fit into the cache. Only raised when ``ensure_capacity=True`` and ``policy`` is not ``"off"`` or ``"unlimited"``. + :param int max_size: The maximum memory size of the in-memory cache in bytes. Only used when ``policy`` is ``"largest"`` or ``"lru"``. + :param bool strict: If ``True``, raises ValueError if the matrix cannot be fit into the cache. If ``False``, the matrix is not loaded into the cache under the same conditions. Only used when ``policy`` is ``"largest"`` or ``"lru"``. + :raises ValueError: if the estimated size of the matrix to be loaded does not fit into the cache. Only raised when ``strict=True`` and ``policy`` is ``"largest"`` or ``"lru"``. + + When ``policy`` is ``"largest"`` or ``"lru"`` the cache eviction policy is applied before loading the matrix to ensure that it will fit into the cache. When it is not possible the behaviour depends on the ``strict`` option. .. py:function:: clear_memory_cache() diff --git a/src/earthkit/regrid/utils/caching.py b/src/earthkit/regrid/utils/caching.py index 00a8141..32c0b60 100644 --- a/src/earthkit/regrid/utils/caching.py +++ b/src/earthkit/regrid/utils/caching.py @@ -51,7 +51,7 @@ "temporary-cache-directory-root": None, "maximum-matrix-memory-cache-size": 300 * 1024 * 1024, "matrix-memory-cache-policy": "largest", - "ensure-matrix-memory-cache-capacity": False, + "matrix-memory-cache-strict-mode": False, } diff --git a/src/earthkit/regrid/utils/memcache.py b/src/earthkit/regrid/utils/memcache.py index 77ccf8a..0959b5c 100644 --- a/src/earthkit/regrid/utils/memcache.py +++ b/src/earthkit/regrid/utils/memcache.py @@ -77,7 +77,7 @@ class NoPolicy(MemoryCachePolicy): def check(self): self.cache.max_mem = 0 - self.cache.ensure_capacity = False + self.cache.strict = False def reduce(self, *args, **kwargs): self.cache._clear() @@ -94,7 +94,7 @@ class UnlimitedPolicy(MemoryCachePolicy): def check(self): self.cache.max_mem = None - self.cache.ensure_capacity = False + self.cache.strict = False def reduce(self, *args, **kwargs): # must be called within a lock @@ -174,14 +174,14 @@ def has_limit(self): class MemoryCache: MAX_SIZE_KEY = "maximum-matrix-memory-cache-size" POLICY_KEY = "matrix-memory-cache-policy" - ENSURE_CAPACITY_KEY = "ensure-matrix-memory-cache-capacity" + STRICT_KEY = "matrix-memory-cache-strict-mode" def __init__( self, max_mem=300 * 1024 * 1024, size_fn=None, policy="largest", - ensure_capacity=False, + strict=False, ): """ Memory bound in-memory cache for interpolation matrices. @@ -206,7 +206,7 @@ def __init__( if size_fn is None: raise ValueError("size_fn must be provided") self.size_fn = size_fn - self.ensure_capacity = ensure_capacity + self.strict = strict self.policy = MemoryCachePolicy.make("largest" if policy is None else policy)( self @@ -267,7 +267,7 @@ def _create_with_pre_check(self, find_entry, create_from_entry, *args): assert target_size >= 0 self._reduce(target_size=target_size) - if self.ensure_capacity and self._capacity() < estimated_memory: + if self.strict and self._capacity() < estimated_memory: raise ValueError( ( "Matrix too large to fit in memory cache. " @@ -303,7 +303,7 @@ def _update_policy(): if any( [ _update("max_mem", self.MAX_SIZE_KEY), - _update("ensure_capacity", self.ENSURE_CAPACITY_KEY), + _update("strict", self.STRICT_KEY), _update_policy(), ] ): @@ -370,13 +370,13 @@ def __init__(self, *args, **kwargs): def set_memory_cache( policy="largest", max_size=300 * 1024 * 1024, - ensure_capacity=False, + strict=False, ): from earthkit.regrid.utils.caching import SETTINGS SETTINGS[MEMORY_CACHE.MAX_SIZE_KEY] = max_size SETTINGS[MEMORY_CACHE.POLICY_KEY] = policy - SETTINGS[MEMORY_CACHE.ENSURE_CAPACITY_KEY] = ensure_capacity + SETTINGS[MEMORY_CACHE.STRICT_KEY] = strict MEMORY_CACHE.update() diff --git a/tests/test_memcache.py b/tests/test_memcache.py index 373e199..999434e 100644 --- a/tests/test_memcache.py +++ b/tests/test_memcache.py @@ -206,7 +206,6 @@ def test_local_memcache_small(policy): max_mem = 1 SETTINGS["matrix-memory-cache-policy"] = policy SETTINGS["maximum-matrix-memory-cache-size"] = max_mem - SETTINGS["ensure-matrix-memory-cache-capacity"] = False MEMORY_CACHE.clear() MEMORY_CACHE.update() @@ -305,7 +304,7 @@ def test_local_memcache_unlimited(): assert MEMORY_CACHE.info() == (1, 1, None, MEMORY_CACHE.curr_mem, 1, policy) -def test_local_memcache_ensure_pre_check_1(monkeypatch): +def test_local_memcache_ensure_strict_1(monkeypatch): """Test the cache with a memory limit that is too small to hold any estimated matrix size""" from earthkit.regrid.utils.caching import SETTINGS from earthkit.regrid.utils.memcache import MEMORY_CACHE @@ -314,8 +313,8 @@ def test_local_memcache_ensure_pre_check_1(monkeypatch): max_mem = 300 * 1024 * 1024 SETTINGS["matrix-memory-cache-policy"] = policy SETTINGS["maximum-matrix-memory-cache-size"] = max_mem - SETTINGS["pre-check-matrix-size"] = True - SETTINGS["ensure-matrix-memory-cache-capacity"] = True + # SETTINGS["pre-check-matrix-size"] = True + SETTINGS["matrix-memory-cache-strict-mode"] = True MEMORY_CACHE.clear() MEMORY_CACHE.update() @@ -336,7 +335,7 @@ def _estimate_memory(entry): run_interpolate("linear") -def test_local_memcache_pre_check_2(monkeypatch): +def test_local_memcache_strict_2(monkeypatch): """Test the cache with a memory limit that can only hold one estimated matrix size""" from earthkit.regrid.utils.caching import SETTINGS from earthkit.regrid.utils.memcache import MEMORY_CACHE @@ -345,8 +344,8 @@ def test_local_memcache_pre_check_2(monkeypatch): max_mem = 300 * 1024 * 1024 SETTINGS["matrix-memory-cache-policy"] = policy SETTINGS["maximum-matrix-memory-cache-size"] = max_mem - SETTINGS["pre-check-matrix-size"] = True - SETTINGS["ensure-matrix-memory-cache-capacity"] = True + # SETTINGS["pre-check-matrix-size"] = True + SETTINGS["matrix-memory-cache-strict-mode"] = True MEMORY_CACHE.clear() MEMORY_CACHE.update()