Skip to content

Commit

Permalink
Shared memory IPC in File
Browse files Browse the repository at this point in the history
  • Loading branch information
crusaderky committed Apr 20, 2023
1 parent 4466892 commit fed29a4
Show file tree
Hide file tree
Showing 3 changed files with 112 additions and 4 deletions.
7 changes: 7 additions & 0 deletions doc/source/changelog.rst
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,13 @@ Changelog
=========
.. currentmodule:: zict

3.1.0 - Unreleased
------------------
- New method ``File.link()``, which acquires a file-based key from another source
(e.g. a different memory-mapped File object)
(:pr:`80`) `Guido Imperiale`_


3.0.0 - 2023-04-17
------------------
- The library is now almost completely thread-safe
Expand Down
75 changes: 71 additions & 4 deletions zict/file.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@ class File(ZictBase[str, bytes]):
Keys must be strings, values must be buffers
Note this shouldn't be used for interprocess persistence, as keys
are cached in memory.
Keys are cached in memory; you shouldn't share the directory with other File
objects. However, see :meth:`link` for inter-process comunication.
Parameters
----------
Expand Down Expand Up @@ -76,8 +76,9 @@ def _safe_key(self, key: str) -> str:
"""Escape key so that it is usable on all filesystems.
Append to the filenames a unique suffix that changes every time this method is
called. This prevents race conditions when another thread accesses the same
key, e.g. ``__setitem__`` on one thread and ``__getitem__`` on another.
called. This prevents race conditions when another thread/process opens the
files for read (see :meth:`link` below), as it guarantees that a file is either
complete and coherent or it does not exist.
"""
# `#` is escaped by quote and is supported by most file systems
key = quote(key, safe="") + f"#{self._inc}"
Expand Down Expand Up @@ -156,3 +157,69 @@ def __delitem__(self, key: str) -> None:

def __len__(self) -> int:
return len(self.filenames)

def get_path(self, key: str) -> str:
"""Returns the full path on disk for a spilled key"""
return os.path.join(self.directory, self.filenames[key])

@locked
def link(self, key: str, path: str) -> None:
"""Hardlink an external file into self.directory.
The file must be on the same filesystem as self.directory. This is an atomic
operation which allows for data transfer between multiple File instances (or
from an external data creator to a File instance) running on different
processes, and is particularly useful in conjunction with memory mapping.
Raises
------
FileNotFoundError
The key has been deleted from the other File mapping
PermissionError
Can't access the target path for writing
OSError
- OS or filesystem doesn't support hardlinking
- The provided path and self.directory are on different mountpoints
Examples
--------
In process 1:
>>> z1 = File("/dev/shm/z1", memmap=True) # doctest: +SKIP
>>> z1["x"] = b"Hello world!" # doctest: +SKIP
>>> send_to_proc2("x", z1.get_path("x")) # doctest: +SKIP
In process 2:
>>> z2 = File("/dev/shm/z2", memmap=True) # doctest: +SKIP
>>> key, path = receive_from_proc1() # doctest: +SKIP
>>> z2.link(key, path) # doctest: +SKIP
Now ``z1["x"]`` and ``z2["x"]`` share the same memory. Updating the memoryview
contents on one (``z1["x"][:] = ...``) will immediately be reflected onto the
other. Setting a new value on either (``z1["x"] = ...``) will decouple them.
There are now two files on disk, ``/dev/shm/z1/x#0`` and ``/dev/shm/z2/x#0``,
which share the same inode. The memory is released when both z1 and z2 delete
the key.
.. note::
File names change every time you set a new value for a key; this prevents a
race condition when z1 is in the process of replacing ``x`` with an entirely
new value while z2 acquires it.
You may also use link() to create aliases to its own data.
This reads x back into memory and then writes a deep copy of it into y::
>>> z["y"] = z["x"] # doctest: +SKIP
This creates a second, shallow reference to x and is the same as writing
``z["y"] = z["x"]`` on a regular in-memory dict::
>>> z.link("y", z.get_path("x")) # doctest: +SKIP
"""
self.discard(key)
fn = self._safe_key(key)
os.link(path, os.path.join(self.directory, fn))
self.filenames[key] = fn
34 changes: 34 additions & 0 deletions zict/tests/test_file.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,3 +146,37 @@ def test_stress_same_key_threadsafe(tmp_path):
z = File(tmp_path)
utils_test.check_same_key_threadsafe(z)
utils_test.check_mapping(z)


@pytest.mark.parametrize("memmap", [False, True])
def test_link(tmp_path, memmap):
z1 = File(tmp_path / "a", memmap=memmap)
z2 = File(tmp_path / "b", memmap=memmap)
z1["x"] = b"123"

z1.link("y", z1.get_path("x"))
z2.link("x", z1.get_path("x"))
assert z1["x"] == b"123"
assert z1["y"] == b"123"
assert z2["x"] == b"123"
assert sorted(os.listdir(tmp_path / "a")) == ["x#0", "y#1"]
assert os.listdir(tmp_path / "b") == ["x#0"]

if not memmap:
return

z1["x"].cast("c")[0] = b"4"
assert z1["y"] == b"423"
assert z2["x"] == b"423"

z1["x"] = b"567"
assert z1["x"] == b"567"
assert z1["y"] == b"423"
assert z2["x"] == b"423"
assert sorted(os.listdir(tmp_path / "a")) == ["x#2", "y#1"]
assert os.listdir(tmp_path / "b") == ["x#0"]

del z1["y"]
assert z2["x"] == b"423"
assert os.listdir(tmp_path / "a") == ["x#2"]
assert os.listdir(tmp_path / "b") == ["x#0"]

0 comments on commit fed29a4

Please sign in to comment.