Skip to content

Commit

Permalink
improve get partial file handle read
Browse files Browse the repository at this point in the history
  • Loading branch information
mkrd committed Nov 10, 2022
1 parent f5303ff commit e33fe59
Show file tree
Hide file tree
Showing 2 changed files with 33 additions and 9 deletions.
2 changes: 2 additions & 0 deletions dictdatabase/io_bytes.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@ def read(db_name: str, start=None, end=None) -> bytes:
if start is None:
return f.read()
f.seek(start)
if end is None:
return f.read()
return f.read(end - start)
if not ddb_exists:
raise FileNotFoundError(f"DB does not exist: \"{db_name}\"")
Expand Down
40 changes: 31 additions & 9 deletions dictdatabase/io_unsafe.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,35 @@ def write(db_name: str, data: dict):
################################################################################


def try_get_parial_file_handle_by_index(indexer: indexing.Indexer, db_name, key):
if (index := indexer.get(key)) is None:
return None
value_start, value_end, indent_level, indent_with, value_hash = index

# If compression is enabled, all data has to be read from the file
if config.use_compression:
data_bytes = io_bytes.read(db_name)
value_bytes = data_bytes[value_start:value_end]
if value_hash != hashlib.sha256(value_bytes).hexdigest():
return None
value_data = orjson.loads(value_bytes)
partial_dict = PartialDict(data_bytes[:value_start], key, value_data, value_start, data_bytes[value_end:])

# If compression is disabled, only the value and suffix have to be read
else:
value_and_suffix_bytes = io_bytes.read(db_name, value_start)
value_length = value_end - value_start
value_bytes = value_and_suffix_bytes[:value_length]
if value_hash != hashlib.sha256(value_bytes).hexdigest():
return None
value_data = orjson.loads(value_bytes)
partial_dict = PartialDict(None, key, value_data, value_start, value_and_suffix_bytes[value_length:])

return PartialFileHandle(db_name, partial_dict, indent_level, indent_with, indexer)




def get_partial_file_handle(db_name: str, key: str) -> PartialFileHandle:
"""
Partially read a key from a db.
Expand All @@ -138,15 +167,8 @@ def get_partial_file_handle(db_name: str, key: str) -> PartialFileHandle:

# Search for key in the index file
indexer = indexing.Indexer(db_name)
index = indexer.get(key)
if index is not None:
value_start, value_end, indent_level, indent_with, value_hash = index
partial_bytes = data[value_start:value_end]
if value_hash == hashlib.sha256(partial_bytes).hexdigest():
partial_value = orjson.loads(partial_bytes)
prefix = data[:value_start] if config.use_compression else None
partial_dict = PartialDict(prefix, key, partial_value, value_start, data[value_end:])
return PartialFileHandle(db_name, partial_dict, indent_level, indent_with, indexer)
if (partial_file_handle := try_get_parial_file_handle_by_index(indexer, db_name, key)) is not None:
return partial_file_handle

# Not found in index file, search for key in the entire file
key_start, key_end = utils.find_outermost_key_in_json_bytes(data, key)
Expand Down

0 comments on commit e33fe59

Please sign in to comment.