From e856f71f8d37fdf6ab14452b8fbad60b960a14b9 Mon Sep 17 00:00:00 2001 From: Danil Tolmachev Date: Thu, 24 Nov 2022 00:08:18 +0300 Subject: [PATCH] add partial write --- dictdatabase/io_unsafe.py | 8 +-- dictdatabase/searching.py | 103 ++++++++++++++++++++++--------------- tests/test_glom_writing.py | 17 ++++++ 3 files changed, 82 insertions(+), 46 deletions(-) create mode 100644 tests/test_glom_writing.py diff --git a/dictdatabase/io_unsafe.py b/dictdatabase/io_unsafe.py index d9ea9ab..83a85a9 100644 --- a/dictdatabase/io_unsafe.py +++ b/dictdatabase/io_unsafe.py @@ -11,9 +11,9 @@ from . import config from . import indexing from . import io_bytes +from . import searching from . import utils from .index_manager import IndexManager -from .searching import KeySearcher @dataclass(frozen=True) # slots=True not supported by python 3.8 and 3.9 @@ -88,7 +88,7 @@ def partial_read_only(db_name: str, key: str) -> dict | None: # Not found in index file, search for key in the entire file all_file_bytes = io_bytes.read(db_name) - start, end, found = KeySearcher().search(all_file_bytes, key) + start, end, found = searching.search_value_by_key(all_file_bytes, key) if not found: return None value_bytes = all_file_bytes[start:end] @@ -185,9 +185,9 @@ def get_partial_file_handle(db_name: str, key: str) -> PartialFileHandle: return partial_handle # Not found in index file, search for key in the entire file - key_start, key_end = utils.find_outermost_key_in_json_bytes(all_file_bytes, key) + key_start, key_end, found = searching.search_key(all_file_bytes, key) - if key_end == -1: + if not found: raise KeyError(f"Key \"{key}\" not found in db \"{db_name}\"") # Key found, now determine the bounding byte indices of the value diff --git a/dictdatabase/searching.py b/dictdatabase/searching.py index cc6177d..f661bde 100644 --- a/dictdatabase/searching.py +++ b/dictdatabase/searching.py @@ -1,52 +1,71 @@ from typing import Tuple +import orjson + from dictdatabase import byte_codes from dictdatabase import utils -class KeySearcher: - @staticmethod - def find_start_end_in_bytes(file: bytes, key: str) -> Tuple[int, int, bool]: - """ - It finds the start and end indices of the value of a key in a JSON file +def find_start_end_in_bytes(file: bytes, key: str) -> Tuple[int, int, bool]: + """ + It finds the start and end indices of the value of a key in a JSON file + + Args: + file (bytes): bytes + key (str): The key to find in the JSON file. - Args: - file (bytes): bytes - key (str): The key to find in the JSON file. + Returns: + A tuple of the start and end index of the key, and a boolean value indicating whether the key was found. + """ + key_start, key_end = utils.find_outermost_key_in_json_bytes(file, key) + if key_end == -1: + return -1, -1, False + start = key_end + (1 if file[key_end] == byte_codes.SPACE else 0) + end = utils.seek_index_through_value_bytes(file, start) + return start, end, True - Returns: - A tuple of the start and end index of the key, and a boolean value indicating whether the key was found. - """ - key_start, key_end = utils.find_outermost_key_in_json_bytes(file, key) + +def search_key(file: bytes, key: str, glom_searching=True) -> Tuple[int, int, bool]: + original_value_start = 0 + original_value_end = len(file) + original_key_start = 0 + original_key_end = len(file) + for k in key.split(".") if glom_searching else [key]: + key_start, key_end = utils.find_outermost_key_in_json_bytes(file, k) if key_end == -1: return -1, -1, False - start = key_end + (1 if file[key_end] == byte_codes.SPACE else 0) - end = utils.seek_index_through_value_bytes(file, start) - return start, end, True - - def search( - self, all_file_bytes: bytes, key: str, glom_searching=True - ) -> Tuple[int, int, bool]: - """ - It takes a byte string, a key, and a boolean, and returns a tuple of three integers - - Args: - all_file_bytes (bytes): The bytes of the file you're searching in. - key (str): The key to search for. - glom_searching: If True, then the key is a glom path, and we need to search for each part of the path. Defaults to - True - - Returns: - The start and end of the key in the file. - """ - original_start = 0 - original_end = len(all_file_bytes) - for k in key.split(".") if glom_searching else [key]: - start, end, found = self.find_start_end_in_bytes( - all_file_bytes[original_start:original_end], k - ) - if not found: - return -1, -1, False - original_end = original_start + end - original_start += start - return original_start, original_end, True + original_key_end = original_value_start + key_end + original_key_start = original_value_start + key_start + value_start, value_end, found = find_start_end_in_bytes(file, k) + original_value_end = original_value_start + original_value_end + original_value_start += value_start + file = file[original_value_start:original_value_end] + return original_key_start, original_key_end, True + + +def search_value_by_key( + all_file_bytes: bytes, key: str, glom_searching=True +) -> Tuple[int, int, bool]: + """ + It takes a byte string, a key, and a boolean, and returns a tuple of three integers + + Args: + all_file_bytes (bytes): The bytes of the file you're searching in. + key (str): The key to search for. + glom_searching: If True, then the key is a glom path, and we need to search for each part of the path. Defaults to + True + + Returns: + The start and end of the key in the file. + """ + original_start = 0 + original_end = len(all_file_bytes) + for k in key.split(".") if glom_searching else [key]: + start, end, found = find_start_end_in_bytes( + all_file_bytes[original_start:original_end], k + ) + if not found: + return -1, -1, False + original_end = original_start + end + original_start += start + return original_start, original_end, True diff --git a/tests/test_glom_writing.py b/tests/test_glom_writing.py new file mode 100644 index 0000000..2702884 --- /dev/null +++ b/tests/test_glom_writing.py @@ -0,0 +1,17 @@ +import dictdatabase as DDB + +data = { + "users": { + "Ben": {"age": 30, "job": "Software Engineer"}, + "Bob": {"age": 30, "job": "Plumbers"}, + }, + "Ben": {"job": {"age": 30, "job": "Software Engineer"}}, +} + + +def test_glom_writing(): + DDB.at("users").create(data, force_overwrite=True) + with DDB.at("users", key="users.Ben").session() as (session, purchase): + purchase["status"] = "cancelled" + session.write() + assert DDB.at("users", key="users.Ben.status").read() == "cancelled"