Skip to content

Commit

Permalink
Buffer(keep_slow=True)
Browse files Browse the repository at this point in the history
  • Loading branch information
crusaderky committed Mar 29, 2023
1 parent a9f4523 commit 26eefa0
Show file tree
Hide file tree
Showing 3 changed files with 149 additions and 12 deletions.
3 changes: 3 additions & 0 deletions doc/source/changelog.rst
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,9 @@ Changelog
- New method ``File.link()``, which acquires a file-based key from another source
(e.g. a different memory-mapped File object)
(:pr:`80`) `Guido Imperiale`_
- ``Buffer`` has gained the option to preserve keys in ``slow`` when they are
moved back to ``fast``
(:pr:`80`) `Guido Imperiale`_


2.2.0 - 2022-04-28
Expand Down
55 changes: 43 additions & 12 deletions zict/buffer.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
from __future__ import annotations

from collections.abc import Callable, Iterator, MutableMapping
from itertools import chain
from typing import ( # TODO import from collections.abc (needs Python >=3.9)
ItemsView,
ValuesView,
Expand Down Expand Up @@ -34,6 +33,11 @@ class Buffer(ZictBase[KT, VT]):
storing to disk and raised a disk full error) the key will remain in the LRU.
slow_to_fast_callbacks: list of callables
These functions run every time data moves form the slow to the fast mapping.
keep_slow: bool, optional
If False (default), delete key/value pairs in slow when they are moved back to
fast.
If True, keep them in slow until deleted; this will avoid repeating the fast to
slow transition when they are evicted again, but at the cost of duplication.
Notes
-----
Expand All @@ -58,6 +62,8 @@ class Buffer(ZictBase[KT, VT]):
weight: Callable[[KT, VT], float]
fast_to_slow_callbacks: list[Callable[[KT, VT], None]]
slow_to_fast_callbacks: list[Callable[[KT, VT], None]]
keep_slow: bool
_len: int

def __init__(
self,
Expand All @@ -71,6 +77,7 @@ def __init__(
slow_to_fast_callbacks: Callable[[KT, VT], None]
| list[Callable[[KT, VT], None]]
| None = None,
keep_slow: bool = False,
):
self.fast = LRU(n, fast, weight=weight, on_evict=[self.fast_to_slow])
self.slow = slow
Expand All @@ -81,12 +88,17 @@ def __init__(
slow_to_fast_callbacks = [slow_to_fast_callbacks]
self.fast_to_slow_callbacks = fast_to_slow_callbacks or []
self.slow_to_fast_callbacks = slow_to_fast_callbacks or []
self.keep_slow = keep_slow
self._len = 0

@property
def n(self) -> float:
return self.fast.n

def fast_to_slow(self, key: KT, value: VT) -> None:
if self.keep_slow and key in self.slow:
return

self.slow[key] = value
try:
for cb in self.fast_to_slow_callbacks:
Expand All @@ -102,8 +114,9 @@ def slow_to_fast(self, key: KT) -> VT:
# Avoid useless movement for heavy values
w = self.weight(key, value)
if w <= self.n:
del self.slow[key]
self.fast[key] = value
if not self.keep_slow:
del self.slow[key]
for cb in self.slow_to_fast_callbacks:
cb(key, value)
return value
Expand All @@ -116,13 +129,14 @@ def __getitem__(self, key: KT) -> VT:

def __setitem__(self, key: KT, value: VT) -> None:
try:
del self.slow[key]
del self[key]
except KeyError:
pass
# This may trigger an eviction from fast to slow of older keys.
# If the weight is individually greater than n, then key/value will be stored
# into self.slow instead (see LRU.__setitem__).
self.fast[key] = value
self._len += 1

def set_noevict(self, key: KT, value: VT) -> None:
"""Variant of ``__setitem__`` that does not move keys from fast to slow if the
Expand All @@ -135,10 +149,18 @@ def set_noevict(self, key: KT, value: VT) -> None:
self.fast.set_noevict(key, value)

def __delitem__(self, key: KT) -> None:
try:
del self.fast[key]
except KeyError:
del self.slow[key]
has_key = False
for d in (self.fast, self.slow):
try:
del d[key]
has_key = True
except KeyError:
pass

if has_key:
self._len -= 1
else:
raise KeyError(key)

def values(self) -> ValuesView[VT]:
return BufferValuesView(self)
Expand All @@ -147,7 +169,7 @@ def items(self) -> ItemsView[KT, VT]:
return BufferItemsView(self)

def __len__(self) -> int:
return len(self.fast) + len(self.slow)
return self._len

def __iter__(self) -> Iterator[KT]:
"""Make sure that the iteration is not disrupted if you evict/restore a key in
Expand All @@ -169,7 +191,11 @@ def __contains__(self, key: object) -> bool:
return key in self.fast or key in self.slow

def __str__(self) -> str:
return f"Buffer<{self.fast}, {self.slow}>"
s = f"Buffer<fast: {len(self.fast)}, slow: {len(self.slow)}"
if self.keep_slow:
ndup = len(self.fast) + len(self.slow) - int(self._len)
s += f", unique: {self._len}, duplicates: {ndup}"
return s + ">"

__repr__ = __str__

Expand All @@ -185,8 +211,13 @@ class BufferItemsView(ItemsView[KT, VT]):
__slots__ = ()

def __iter__(self) -> Iterator[tuple[KT, VT]]:
# Avoid changing the LRU
return chain(self._mapping.fast.items(), self._mapping.slow.items())
for k in self._mapping:
try:
# Avoid changing the LRU
v = self._mapping.fast.d[k]
except KeyError:
v = self._mapping.slow[k]
yield k, v


class BufferValuesView(ValuesView[VT]):
Expand All @@ -199,4 +230,4 @@ def __contains__(self, value: object) -> bool:

def __iter__(self) -> Iterator[VT]:
# Avoid changing the LRU
return chain(self._mapping.fast.values(), self._mapping.slow.values())
return (v for _, v in self._mapping.items())
103 changes: 103 additions & 0 deletions zict/tests/test_buffer.py
Original file line number Diff line number Diff line change
Expand Up @@ -242,3 +242,106 @@ def test_evict_restore_during_iter():
assert next(it) == "z"
with pytest.raises(StopIteration):
next(it)


def test_flush_close():
flushes = []
closes = []

class D(dict):
def __init__(self, name):
self.name = name

def flush(self):
flushes.append(self.name)

def close(self):
closes.append(self.name)

buff = Buffer(D("fast"), D("slow"), n=2)
buff.flush()
buff.close()
assert flushes == ["fast", "slow"]
assert closes == ["fast", "slow"]


def test_keep_slow():
a = {}
b = {}
f2s = []
s2f = []
buff = Buffer(
a,
b,
n=10,
weight=lambda k, v: v,
keep_slow=True,
fast_to_slow_callbacks=lambda k, v: f2s.append(k),
slow_to_fast_callbacks=lambda k, v: s2f.append(k),
)

buff["x"] = 1
buff["y"] = 2
buff["z"] = 11
buff.fast.evict()
assert a == {"y": 2}
assert b == {"x": 1, "z": 11}
assert f2s == ["z", "x"]
assert s2f == []
assert buff.fast.total_weight == 2
f2s.clear()

assert buff["x"] == 1 # Get from slow
assert buff["x"] == 1 # It's in both
assert buff["z"] == 11 # Too large to stay in fast
assert a == {"x": 1, "y": 2}
assert b == {"x": 1, "z": 11}
assert f2s == []
assert s2f == ["x", "z"] # x has been moved only once
assert buff.fast.total_weight == 3
# Test no duplicates
assert len(buff) == 3
assert list(buff) == list(buff.keys()) == ["y", "x", "z"]
assert list(buff.items()) == [("y", 2), ("x", 1), ("z", 11)]
assert list(buff.values()) == [2, 1, 11]
f2s.clear()
s2f.clear()

assert (
str(buff)
== repr(buff)
== ("Buffer<fast: 2, slow: 2, unique: 3, duplicates: 1>")
)

# Evict a key that is already in slow
_ = buff["y"]
buff.fast.evict()
assert a == {"y": 2}
assert b == {"x": 1, "z": 11}
assert f2s == [] # fast_to_slow_callback was not called
assert s2f == []
assert buff.fast.total_weight == 2
assert len(buff) == 3
_ = buff["x"]
s2f.clear()

# Overwrite
buff["x"] = 3
buff["y"] = 4
buff["z"] = 12
assert a == {"x": 3, "y": 4}
assert b == {"z": 12}
assert f2s == ["z"] # One more spill for z
assert s2f == []
assert buff.fast.total_weight == 7
assert len(buff) == 3
f2s.clear()

# Delete
del buff["x"]
del buff["y"]
del buff["z"]
assert a == b == {}
assert f2s == s2f == []
assert buff.fast.total_weight == 0
assert len(buff) == 0

0 comments on commit 26eefa0

Please sign in to comment.