Skip to content

Commit

Permalink
Memory cache
Browse files Browse the repository at this point in the history
  • Loading branch information
sandorkertesz committed Nov 24, 2024
1 parent 6681b1d commit 4d63c8e
Show file tree
Hide file tree
Showing 5 changed files with 30 additions and 28 deletions.
13 changes: 7 additions & 6 deletions docs/examples/memory_cache.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@
"# create input data array for an O1280 grid\n",
"in_data = np.ones(6599680)\n",
"\n",
"# helper method for interpolation\n",
"def _run(n=10):\n",
" for _ in range(n):\n",
" res_array = ekr.interpolate(in_data, \n",
Expand Down Expand Up @@ -116,8 +117,8 @@
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: user 6.23 s, sys: 643 ms, total: 6.87 s\n",
"Wall time: 7.39 s\n"
"CPU times: user 6.26 s, sys: 712 ms, total: 6.97 s\n",
"Wall time: 7.47 s\n"
]
}
],
Expand Down Expand Up @@ -178,7 +179,7 @@
"tags": []
},
"source": [
"In this example we will use the default memory cache policy, which is \"largest\". For details see: :func:`set_memory_cache`. Notice the x9 speed-up we achived with the caching."
"In this example we will use the default memory cache policy, which is \"largest\". For details see: :func:`set_memory_cache`. Notice the x9 speed-up we achieved with the caching."
]
},
{
Expand All @@ -194,7 +195,7 @@
},
"outputs": [],
"source": [
"ekr.set_memory_cache()"
"ekr.set_memory_cache(policy=\"largest\")"
]
},
{
Expand All @@ -213,8 +214,8 @@
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: user 746 ms, sys: 86.7 ms, total: 833 ms\n",
"Wall time: 849 ms\n"
"CPU times: user 750 ms, sys: 91.6 ms, total: 842 ms\n",
"Wall time: 847 ms\n"
]
}
],
Expand Down
12 changes: 7 additions & 5 deletions docs/memory_cache.rst
Original file line number Diff line number Diff line change
Expand Up @@ -3,22 +3,24 @@
Memory cache
==================

.. py:function:: set_memory_cache(policy="largest", max_size=300 * 1024**2, ensure_capacity=False)
.. py:function:: set_memory_cache(policy="largest", max_size=300 * 1024**2, strict=False)
*New in version 0.4.0.*

Control the in-memory cache used to store interpolation matrices.

:param str policy: The matrix in-memory cache policy. The possible values are as follows:

- ``"off"``: no cache
- ``"off"``: no cache, the matrices are always loaded from disk
- ``"unlimited"``: keep all matrices in memory
- ``"largest"``: first evict the largest matrices from the cache (default)
- ``"lru"``: first evict the least recently used matrices from the cache

:param int max_size: The maximum memory size of the in-memory cache in bytes. Only used when the policy is not ``off`` or ``"unlimited"``.
:param bool ensure_capacity: If True, estimate the matrix size and try to ensure it fits into the cache by evicting items according to the policy. If the cache capacity is not enough to hold the matrix raises ValueError. Only used when ``policy`` is no ``"off"`` or ``"unlimited"``. If False, the matrix is loaded into the cache without checking the size, then the cache is evicted according to the policy.
:raises ValueError: if the estimated size of the matrix to be loaded does not fit into the cache. Only raised when ``ensure_capacity=True`` and ``policy`` is not ``"off"`` or ``"unlimited"``.
:param int max_size: The maximum memory size of the in-memory cache in bytes. Only used when ``policy`` is ``"largest"`` or ``"lru"``.
:param bool strict: If ``True``, raises ValueError if the matrix cannot be fit into the cache. If ``False``, the matrix is not loaded into the cache under the same conditions. Only used when ``policy`` is ``"largest"`` or ``"lru"``.
:raises ValueError: if the estimated size of the matrix to be loaded does not fit into the cache. Only raised when ``strict=True`` and ``policy`` is ``"largest"`` or ``"lru"``.

When ``policy`` is ``"largest"`` or ``"lru"`` the cache eviction policy is applied before loading the matrix to ensure that it will fit into the cache. When it is not possible the behaviour depends on the ``strict`` option.


.. py:function:: clear_memory_cache()
Expand Down
2 changes: 1 addition & 1 deletion src/earthkit/regrid/utils/caching.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@
"temporary-cache-directory-root": None,
"maximum-matrix-memory-cache-size": 300 * 1024 * 1024,
"matrix-memory-cache-policy": "largest",
"ensure-matrix-memory-cache-capacity": False,
"matrix-memory-cache-strict-mode": False,
}


Expand Down
18 changes: 9 additions & 9 deletions src/earthkit/regrid/utils/memcache.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ class NoPolicy(MemoryCachePolicy):

def check(self):
self.cache.max_mem = 0
self.cache.ensure_capacity = False
self.cache.strict = False

def reduce(self, *args, **kwargs):
self.cache._clear()
Expand All @@ -94,7 +94,7 @@ class UnlimitedPolicy(MemoryCachePolicy):

def check(self):
self.cache.max_mem = None
self.cache.ensure_capacity = False
self.cache.strict = False

def reduce(self, *args, **kwargs):
# must be called within a lock
Expand Down Expand Up @@ -174,14 +174,14 @@ def has_limit(self):
class MemoryCache:
MAX_SIZE_KEY = "maximum-matrix-memory-cache-size"
POLICY_KEY = "matrix-memory-cache-policy"
ENSURE_CAPACITY_KEY = "ensure-matrix-memory-cache-capacity"
STRICT_KEY = "matrix-memory-cache-strict-mode"

def __init__(
self,
max_mem=300 * 1024 * 1024,
size_fn=None,
policy="largest",
ensure_capacity=False,
strict=False,
):
"""
Memory bound in-memory cache for interpolation matrices.
Expand All @@ -206,7 +206,7 @@ def __init__(
if size_fn is None:
raise ValueError("size_fn must be provided")
self.size_fn = size_fn
self.ensure_capacity = ensure_capacity
self.strict = strict

self.policy = MemoryCachePolicy.make("largest" if policy is None else policy)(
self
Expand Down Expand Up @@ -267,7 +267,7 @@ def _create_with_pre_check(self, find_entry, create_from_entry, *args):
assert target_size >= 0
self._reduce(target_size=target_size)

if self.ensure_capacity and self._capacity() < estimated_memory:
if self.strict and self._capacity() < estimated_memory:
raise ValueError(
(
"Matrix too large to fit in memory cache. "
Expand Down Expand Up @@ -303,7 +303,7 @@ def _update_policy():
if any(
[
_update("max_mem", self.MAX_SIZE_KEY),
_update("ensure_capacity", self.ENSURE_CAPACITY_KEY),
_update("strict", self.STRICT_KEY),
_update_policy(),
]
):
Expand Down Expand Up @@ -370,13 +370,13 @@ def __init__(self, *args, **kwargs):
def set_memory_cache(
policy="largest",
max_size=300 * 1024 * 1024,
ensure_capacity=False,
strict=False,
):
from earthkit.regrid.utils.caching import SETTINGS

SETTINGS[MEMORY_CACHE.MAX_SIZE_KEY] = max_size
SETTINGS[MEMORY_CACHE.POLICY_KEY] = policy
SETTINGS[MEMORY_CACHE.ENSURE_CAPACITY_KEY] = ensure_capacity
SETTINGS[MEMORY_CACHE.STRICT_KEY] = strict
MEMORY_CACHE.update()


Expand Down
13 changes: 6 additions & 7 deletions tests/test_memcache.py
Original file line number Diff line number Diff line change
Expand Up @@ -206,7 +206,6 @@ def test_local_memcache_small(policy):
max_mem = 1
SETTINGS["matrix-memory-cache-policy"] = policy
SETTINGS["maximum-matrix-memory-cache-size"] = max_mem
SETTINGS["ensure-matrix-memory-cache-capacity"] = False

MEMORY_CACHE.clear()
MEMORY_CACHE.update()
Expand Down Expand Up @@ -305,7 +304,7 @@ def test_local_memcache_unlimited():
assert MEMORY_CACHE.info() == (1, 1, None, MEMORY_CACHE.curr_mem, 1, policy)


def test_local_memcache_ensure_pre_check_1(monkeypatch):
def test_local_memcache_ensure_strict_1(monkeypatch):
"""Test the cache with a memory limit that is too small to hold any estimated matrix size"""
from earthkit.regrid.utils.caching import SETTINGS
from earthkit.regrid.utils.memcache import MEMORY_CACHE
Expand All @@ -314,8 +313,8 @@ def test_local_memcache_ensure_pre_check_1(monkeypatch):
max_mem = 300 * 1024 * 1024
SETTINGS["matrix-memory-cache-policy"] = policy
SETTINGS["maximum-matrix-memory-cache-size"] = max_mem
SETTINGS["pre-check-matrix-size"] = True
SETTINGS["ensure-matrix-memory-cache-capacity"] = True
# SETTINGS["pre-check-matrix-size"] = True
SETTINGS["matrix-memory-cache-strict-mode"] = True

MEMORY_CACHE.clear()
MEMORY_CACHE.update()
Expand All @@ -336,7 +335,7 @@ def _estimate_memory(entry):
run_interpolate("linear")


def test_local_memcache_pre_check_2(monkeypatch):
def test_local_memcache_strict_2(monkeypatch):
"""Test the cache with a memory limit that can only hold one estimated matrix size"""
from earthkit.regrid.utils.caching import SETTINGS
from earthkit.regrid.utils.memcache import MEMORY_CACHE
Expand All @@ -345,8 +344,8 @@ def test_local_memcache_pre_check_2(monkeypatch):
max_mem = 300 * 1024 * 1024
SETTINGS["matrix-memory-cache-policy"] = policy
SETTINGS["maximum-matrix-memory-cache-size"] = max_mem
SETTINGS["pre-check-matrix-size"] = True
SETTINGS["ensure-matrix-memory-cache-capacity"] = True
# SETTINGS["pre-check-matrix-size"] = True
SETTINGS["matrix-memory-cache-strict-mode"] = True

MEMORY_CACHE.clear()
MEMORY_CACHE.update()
Expand Down

0 comments on commit 4d63c8e

Please sign in to comment.