Skip to content

Commit

Permalink
Raise exception for trying to set too large chunk in local cache
Browse files Browse the repository at this point in the history
  • Loading branch information
magland committed May 21, 2024
1 parent 4714aec commit 1535202
Show file tree
Hide file tree
Showing 6 changed files with 31 additions and 33 deletions.
8 changes: 4 additions & 4 deletions lindi/LindiH5ZarrStore/LindiH5ZarrStore.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
from ..conversion.h5_filters_to_codecs import h5_filters_to_codecs
from ..conversion.create_zarr_dataset_from_h5_data import create_zarr_dataset_from_h5_data
from ..LindiH5pyFile.LindiReferenceFileSystemStore import LindiReferenceFileSystemStore
from ..LocalCache.LocalCache import LocalCache
from ..LocalCache.LocalCache import ChunkTooLargeError, LocalCache
from ..LindiRemfile.LindiRemfile import LindiRemfile
from .LindiH5ZarrStoreOpts import LindiH5ZarrStoreOpts

Expand Down Expand Up @@ -346,15 +346,15 @@ def _get_chunk_file_bytes(self, key_parent: str, key_name: str):
buf = _read_bytes(self._file, byte_offset, byte_count)
if self._local_cache is not None:
assert self._url is not None, "Unexpected: url is None but local_cache is not None"
if byte_count < 1000 * 1000 * 900:
try:
self._local_cache.put_remote_chunk(
url=self._url,
offset=byte_offset,
size=byte_count,
data=buf
)
else:
print(f"Warning: Not storing chunk of size {byte_count} in local cache in LindiH5ZarrStore (key: {key_parent}/{key_name})")
except ChunkTooLargeError:
print(f"Warning: Unable to store chunk of size {byte_count} in local cache in LindiH5ZarrStore (key: {key_parent}/{key_name})")
return buf

def _get_chunk_file_bytes_data(self, key_parent: str, key_name: str):
Expand Down
8 changes: 4 additions & 4 deletions lindi/LindiH5pyFile/LindiReferenceFileSystemStore.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import requests
from zarr.storage import Store as ZarrStore

from ..LocalCache.LocalCache import LocalCache
from ..LocalCache.LocalCache import ChunkTooLargeError, LocalCache


class LindiReferenceFileSystemStore(ZarrStore):
Expand Down Expand Up @@ -141,10 +141,10 @@ def __getitem__(self, key: str):
return x
val = _read_bytes_from_url_or_path(url, offset, length)
if self.local_cache is not None:
if length < 1000 * 1000 * 900:
try:
self.local_cache.put_remote_chunk(url=url, offset=offset, size=length, data=val)
else:
print(f'Warning: not caching chunk of size {length} om LindiReferenceFileSystemStore (key: {key})')
except ChunkTooLargeError:
print(f'Warning: unable to cache chunk of size {length} on LocalCache (key: {key})')
return val
else:
# should not happen given checks in __init__, but self.rfs is mutable
Expand Down
30 changes: 12 additions & 18 deletions lindi/LindiRemfile/LindiRemfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -218,15 +218,12 @@ def _load_chunk(self, chunk_index: int) -> bytes:
self._memory_chunks[chunk_index] = x
if self._local_cache is not None:
size = min(self._min_chunk_size, self.length - chunk_index * self._min_chunk_size)
if size < 1000 * 1000 * 900:
self._local_cache.put_remote_chunk(
url=self._url,
offset=chunk_index * self._min_chunk_size,
size=size,
data=x
)
else:
raise Exception(f'Unexpected large size for chunk when caching: {size}')
self._local_cache.put_remote_chunk(
url=self._url,
offset=chunk_index * self._min_chunk_size,
size=size,
data=x
)
self._memory_chunk_indices.append(chunk_index)
else:
for i in range(self._smart_loader_chunk_sequence_length):
Expand All @@ -242,15 +239,12 @@ def _load_chunk(self, chunk_index: int) -> bytes:
data = x[i * self._min_chunk_size: (i + 1) * self._min_chunk_size]
if len(data) != size:
raise ValueError(f'Unexpected: len(data) != size: {len(data)} != {size}')
if size < 1000 * 1000 * 900:
self._local_cache.put_remote_chunk(
url=self._url,
offset=(chunk_index + i) * self._min_chunk_size,
size=size,
data=data
)
else:
raise Exception(f'Unexpected large size for chunk when caching: {size}')
self._local_cache.put_remote_chunk(
url=self._url,
offset=(chunk_index + i) * self._min_chunk_size,
size=size,
data=data
)
self._smart_loader_last_chunk_index_accessed = (
chunk_index + self._smart_loader_chunk_sequence_length - 1
)
Expand Down
14 changes: 9 additions & 5 deletions lindi/LocalCache/LocalCache.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,14 +18,13 @@ def get_remote_chunk(self, *, url: str, offset: int, size: int):
def put_remote_chunk(self, *, url: str, offset: int, size: int, data: bytes):
if len(data) != size:
raise ValueError("data size does not match size")
if size >= 1000 * 1000 * 900:
# This is a sqlite limitation/configuration
# https://www.sqlite.org/limits.html
# For some reason 1000 * 1000 * 1000 seems to be too large, whereas 1000 * 1000 * 900 is fine
raise ValueError("Cannot store blobs larger than 900 MB in LocalCache")
self._sqlite_client.put_remote_chunk(url=url, offset=offset, size=size, data=data)


class ChunkTooLargeError(Exception):
pass


class LocalCacheSQLiteClient:
def __init__(self, *, db_fname: str):
import sqlite3
Expand Down Expand Up @@ -63,6 +62,11 @@ def get_remote_chunk(self, *, url: str, offset: int, size: int):
return row[0]

def put_remote_chunk(self, *, url: str, offset: int, size: int, data: bytes):
if size >= 1000 * 1000 * 900:
# This is a sqlite limitation/configuration
# https://www.sqlite.org/limits.html
# For some reason 1000 * 1000 * 1000 seems to be too large, whereas 1000 * 1000 * 900 is fine
raise ChunkTooLargeError("Cannot store blobs larger than 900 MB in LocalCache")
self._cursor.execute(
"""
INSERT OR REPLACE INTO remote_chunks (url, offset, size, data) VALUES (?, ?, ?, ?)
Expand Down
2 changes: 1 addition & 1 deletion lindi/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from .LindiH5ZarrStore import LindiH5ZarrStore, LindiH5ZarrStoreOpts # noqa: F401
from .LindiH5pyFile import LindiH5pyFile, LindiH5pyGroup, LindiH5pyDataset, LindiH5pyHardLink, LindiH5pySoftLink # noqa: F401
from .LindiStagingStore import LindiStagingStore, StagingArea # noqa: F401
from .LocalCache.LocalCache import LocalCache # noqa: F401
from .LocalCache.LocalCache import LocalCache, ChunkTooLargeError # noqa: F401
from .File.File import File # noqa: F401
2 changes: 1 addition & 1 deletion tests/test_local_cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ def test_put_local_cache():
)
assert data == data2
data_too_large = b'x' * (1000 * 1000 * 900)
with pytest.raises(ValueError):
with pytest.raises(lindi.ChunkTooLargeError):
local_cache.put_remote_chunk(
url='dummy',
offset=0,
Expand Down

0 comments on commit 1535202

Please sign in to comment.