diff --git a/doc/source/changelog.rst b/doc/source/changelog.rst index 6872650..6d09fbd 100644 --- a/doc/source/changelog.rst +++ b/doc/source/changelog.rst @@ -24,6 +24,9 @@ Changelog - New method ``File.link()``, which acquires a file-based key from another source (e.g. a different memory-mapped File object) (:pr:`80`) `Guido Imperiale`_ +- ``Buffer`` has gained the option to preserve keys in ``slow`` when they are + moved back to ``fast`` + (:pr:`80`) `Guido Imperiale`_ 2.2.0 - 2022-04-28 diff --git a/zict/buffer.py b/zict/buffer.py index 33c9afb..1dd183b 100644 --- a/zict/buffer.py +++ b/zict/buffer.py @@ -1,7 +1,6 @@ from __future__ import annotations from collections.abc import Callable, Iterator, MutableMapping -from itertools import chain from typing import ( # TODO import from collections.abc (needs Python >=3.9) ItemsView, ValuesView, @@ -34,6 +33,11 @@ class Buffer(ZictBase[KT, VT]): storing to disk and raised a disk full error) the key will remain in the LRU. slow_to_fast_callbacks: list of callables These functions run every time data moves form the slow to the fast mapping. + keep_slow: bool, optional + If False (default), delete key/value pairs in slow when they are moved back to + fast. + If True, keep them in slow until deleted; this will avoid repeating the fast to + slow transition when they are evicted again, but at the cost of duplication. Notes ----- @@ -58,6 +62,8 @@ class Buffer(ZictBase[KT, VT]): weight: Callable[[KT, VT], float] fast_to_slow_callbacks: list[Callable[[KT, VT], None]] slow_to_fast_callbacks: list[Callable[[KT, VT], None]] + keep_slow: bool + _len: int def __init__( self, @@ -71,6 +77,7 @@ def __init__( slow_to_fast_callbacks: Callable[[KT, VT], None] | list[Callable[[KT, VT], None]] | None = None, + keep_slow: bool = False, ): self.fast = LRU(n, fast, weight=weight, on_evict=[self.fast_to_slow]) self.slow = slow @@ -81,12 +88,17 @@ def __init__( slow_to_fast_callbacks = [slow_to_fast_callbacks] self.fast_to_slow_callbacks = fast_to_slow_callbacks or [] self.slow_to_fast_callbacks = slow_to_fast_callbacks or [] + self.keep_slow = keep_slow + self._len = 0 @property def n(self) -> float: return self.fast.n def fast_to_slow(self, key: KT, value: VT) -> None: + if self.keep_slow and key in self.slow: + return + self.slow[key] = value try: for cb in self.fast_to_slow_callbacks: @@ -102,8 +114,9 @@ def slow_to_fast(self, key: KT) -> VT: # Avoid useless movement for heavy values w = self.weight(key, value) if w <= self.n: - del self.slow[key] self.fast[key] = value + if not self.keep_slow: + del self.slow[key] for cb in self.slow_to_fast_callbacks: cb(key, value) return value @@ -116,13 +129,14 @@ def __getitem__(self, key: KT) -> VT: def __setitem__(self, key: KT, value: VT) -> None: try: - del self.slow[key] + del self[key] except KeyError: pass # This may trigger an eviction from fast to slow of older keys. # If the weight is individually greater than n, then key/value will be stored # into self.slow instead (see LRU.__setitem__). self.fast[key] = value + self._len += 1 def set_noevict(self, key: KT, value: VT) -> None: """Variant of ``__setitem__`` that does not move keys from fast to slow if the @@ -135,10 +149,18 @@ def set_noevict(self, key: KT, value: VT) -> None: self.fast.set_noevict(key, value) def __delitem__(self, key: KT) -> None: - try: - del self.fast[key] - except KeyError: - del self.slow[key] + has_key = False + for d in (self.fast, self.slow): + try: + del d[key] + has_key = True + except KeyError: + pass + + if has_key: + self._len -= 1 + else: + raise KeyError(key) def values(self) -> ValuesView[VT]: return BufferValuesView(self) @@ -147,7 +169,7 @@ def items(self) -> ItemsView[KT, VT]: return BufferItemsView(self) def __len__(self) -> int: - return len(self.fast) + len(self.slow) + return self._len def __iter__(self) -> Iterator[KT]: """Make sure that the iteration is not disrupted if you evict/restore a key in @@ -169,7 +191,11 @@ def __contains__(self, key: object) -> bool: return key in self.fast or key in self.slow def __str__(self) -> str: - return f"Buffer<{self.fast}, {self.slow}>" + s = f"Buffer" __repr__ = __str__ @@ -185,8 +211,13 @@ class BufferItemsView(ItemsView[KT, VT]): __slots__ = () def __iter__(self) -> Iterator[tuple[KT, VT]]: - # Avoid changing the LRU - return chain(self._mapping.fast.items(), self._mapping.slow.items()) + for k in self._mapping: + try: + # Avoid changing the LRU + v = self._mapping.fast.d[k] + except KeyError: + v = self._mapping.slow[k] + yield k, v class BufferValuesView(ValuesView[VT]): @@ -199,4 +230,4 @@ def __contains__(self, value: object) -> bool: def __iter__(self) -> Iterator[VT]: # Avoid changing the LRU - return chain(self._mapping.fast.values(), self._mapping.slow.values()) + return (v for _, v in self._mapping.items()) diff --git a/zict/tests/test_buffer.py b/zict/tests/test_buffer.py index 253658b..d71d1a0 100644 --- a/zict/tests/test_buffer.py +++ b/zict/tests/test_buffer.py @@ -242,3 +242,106 @@ def test_evict_restore_during_iter(): assert next(it) == "z" with pytest.raises(StopIteration): next(it) + + +def test_flush_close(): + flushes = [] + closes = [] + + class D(dict): + def __init__(self, name): + self.name = name + + def flush(self): + flushes.append(self.name) + + def close(self): + closes.append(self.name) + + buff = Buffer(D("fast"), D("slow"), n=2) + buff.flush() + buff.close() + assert flushes == ["fast", "slow"] + assert closes == ["fast", "slow"] + + +def test_keep_slow(): + a = {} + b = {} + f2s = [] + s2f = [] + buff = Buffer( + a, + b, + n=10, + weight=lambda k, v: v, + keep_slow=True, + fast_to_slow_callbacks=lambda k, v: f2s.append(k), + slow_to_fast_callbacks=lambda k, v: s2f.append(k), + ) + + buff["x"] = 1 + buff["y"] = 2 + buff["z"] = 11 + buff.fast.evict() + assert a == {"y": 2} + assert b == {"x": 1, "z": 11} + assert f2s == ["z", "x"] + assert s2f == [] + assert buff.fast.total_weight == 2 + f2s.clear() + + assert buff["x"] == 1 # Get from slow + assert buff["x"] == 1 # It's in both + assert buff["z"] == 11 # Too large to stay in fast + assert a == {"x": 1, "y": 2} + assert b == {"x": 1, "z": 11} + assert f2s == [] + assert s2f == ["x", "z"] # x has been moved only once + assert buff.fast.total_weight == 3 + # Test no duplicates + assert len(buff) == 3 + assert list(buff) == list(buff.keys()) == ["y", "x", "z"] + assert list(buff.items()) == [("y", 2), ("x", 1), ("z", 11)] + assert list(buff.values()) == [2, 1, 11] + f2s.clear() + s2f.clear() + + assert ( + str(buff) + == repr(buff) + == ("Buffer") + ) + + # Evict a key that is already in slow + _ = buff["y"] + buff.fast.evict() + assert a == {"y": 2} + assert b == {"x": 1, "z": 11} + assert f2s == [] # fast_to_slow_callback was not called + assert s2f == [] + assert buff.fast.total_weight == 2 + assert len(buff) == 3 + _ = buff["x"] + s2f.clear() + + # Overwrite + buff["x"] = 3 + buff["y"] = 4 + buff["z"] = 12 + assert a == {"x": 3, "y": 4} + assert b == {"z": 12} + assert f2s == ["z"] # One more spill for z + assert s2f == [] + assert buff.fast.total_weight == 7 + assert len(buff) == 3 + f2s.clear() + + # Delete + del buff["x"] + del buff["y"] + del buff["z"] + assert a == b == {} + assert f2s == s2f == [] + assert buff.fast.total_weight == 0 + assert len(buff) == 0