From 5d5a1034b139db04b9792e182d235339bdd5d901 Mon Sep 17 00:00:00 2001 From: Danil Tolmachev Date: Tue, 22 Nov 2022 11:20:54 +0300 Subject: [PATCH 1/5] add tests --- tests/test_glom_like_searching.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/tests/test_glom_like_searching.py b/tests/test_glom_like_searching.py index cea6670..cc1ec05 100644 --- a/tests/test_glom_like_searching.py +++ b/tests/test_glom_like_searching.py @@ -11,7 +11,7 @@ def test_glom_searching(): DDB.at("users").create(data, force_overwrite=True) - assert DDB.at("users", key="users.Ben.job").read() == 'Software Engineer' + assert DDB.at("users", key="users.Ben.job").read() == "Software Engineer" def test_without_glom_searching(): @@ -19,3 +19,13 @@ def test_without_glom_searching(): assert DDB.at("users", key="Ben").read() == { "job": {"age": 30, "job": "Software Engineer"} } + + +def test_glom_searching_if_key_not_exists(): + DDB.at("users").create(data, force_overwrite=True) + assert DDB.at("users", key="users.Job.Ben").read() is None + + +def test_glom_searching_if_subkey_not_exists(): + DDB.at("users").create(data, force_overwrite=True) + assert DDB.at("users", key="users.Ben.SUBKEYNOTEXISTS").read() is None From 8940e493ef490062bd9a0f6c59854f395a0ec922 Mon Sep 17 00:00:00 2001 From: Danil Tolmachev Date: Tue, 22 Nov 2022 11:21:07 +0300 Subject: [PATCH 2/5] rename Searcher -> KeySearcher --- dictdatabase/io_unsafe.py | 4 ++-- dictdatabase/searching.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/dictdatabase/io_unsafe.py b/dictdatabase/io_unsafe.py index 5a2b09a..d9ea9ab 100644 --- a/dictdatabase/io_unsafe.py +++ b/dictdatabase/io_unsafe.py @@ -13,7 +13,7 @@ from . import io_bytes from . import utils from .index_manager import IndexManager -from .searching import Searcher +from .searching import KeySearcher @dataclass(frozen=True) # slots=True not supported by python 3.8 and 3.9 @@ -88,7 +88,7 @@ def partial_read_only(db_name: str, key: str) -> dict | None: # Not found in index file, search for key in the entire file all_file_bytes = io_bytes.read(db_name) - start, end, found = Searcher().search(all_file_bytes, key) + start, end, found = KeySearcher().search(all_file_bytes, key) if not found: return None value_bytes = all_file_bytes[start:end] diff --git a/dictdatabase/searching.py b/dictdatabase/searching.py index 81c4e30..cc6177d 100644 --- a/dictdatabase/searching.py +++ b/dictdatabase/searching.py @@ -4,7 +4,7 @@ from dictdatabase import utils -class Searcher: +class KeySearcher: @staticmethod def find_start_end_in_bytes(file: bytes, key: str) -> Tuple[int, int, bool]: """ From f96351788c8c6f82257a770735829e01ddbd4ef9 Mon Sep 17 00:00:00 2001 From: Danil Tolmachev Date: Thu, 24 Nov 2022 00:08:18 +0300 Subject: [PATCH 3/5] add partial write --- dictdatabase/io_unsafe.py | 8 +-- dictdatabase/searching.py | 103 ++++++++++++++++++++++--------------- tests/test_glom_writing.py | 17 ++++++ 3 files changed, 82 insertions(+), 46 deletions(-) create mode 100644 tests/test_glom_writing.py diff --git a/dictdatabase/io_unsafe.py b/dictdatabase/io_unsafe.py index d9ea9ab..83a85a9 100644 --- a/dictdatabase/io_unsafe.py +++ b/dictdatabase/io_unsafe.py @@ -11,9 +11,9 @@ from . import config from . import indexing from . import io_bytes +from . import searching from . import utils from .index_manager import IndexManager -from .searching import KeySearcher @dataclass(frozen=True) # slots=True not supported by python 3.8 and 3.9 @@ -88,7 +88,7 @@ def partial_read_only(db_name: str, key: str) -> dict | None: # Not found in index file, search for key in the entire file all_file_bytes = io_bytes.read(db_name) - start, end, found = KeySearcher().search(all_file_bytes, key) + start, end, found = searching.search_value_by_key(all_file_bytes, key) if not found: return None value_bytes = all_file_bytes[start:end] @@ -185,9 +185,9 @@ def get_partial_file_handle(db_name: str, key: str) -> PartialFileHandle: return partial_handle # Not found in index file, search for key in the entire file - key_start, key_end = utils.find_outermost_key_in_json_bytes(all_file_bytes, key) + key_start, key_end, found = searching.search_key(all_file_bytes, key) - if key_end == -1: + if not found: raise KeyError(f"Key \"{key}\" not found in db \"{db_name}\"") # Key found, now determine the bounding byte indices of the value diff --git a/dictdatabase/searching.py b/dictdatabase/searching.py index cc6177d..f661bde 100644 --- a/dictdatabase/searching.py +++ b/dictdatabase/searching.py @@ -1,52 +1,71 @@ from typing import Tuple +import orjson + from dictdatabase import byte_codes from dictdatabase import utils -class KeySearcher: - @staticmethod - def find_start_end_in_bytes(file: bytes, key: str) -> Tuple[int, int, bool]: - """ - It finds the start and end indices of the value of a key in a JSON file +def find_start_end_in_bytes(file: bytes, key: str) -> Tuple[int, int, bool]: + """ + It finds the start and end indices of the value of a key in a JSON file + + Args: + file (bytes): bytes + key (str): The key to find in the JSON file. - Args: - file (bytes): bytes - key (str): The key to find in the JSON file. + Returns: + A tuple of the start and end index of the key, and a boolean value indicating whether the key was found. + """ + key_start, key_end = utils.find_outermost_key_in_json_bytes(file, key) + if key_end == -1: + return -1, -1, False + start = key_end + (1 if file[key_end] == byte_codes.SPACE else 0) + end = utils.seek_index_through_value_bytes(file, start) + return start, end, True - Returns: - A tuple of the start and end index of the key, and a boolean value indicating whether the key was found. - """ - key_start, key_end = utils.find_outermost_key_in_json_bytes(file, key) + +def search_key(file: bytes, key: str, glom_searching=True) -> Tuple[int, int, bool]: + original_value_start = 0 + original_value_end = len(file) + original_key_start = 0 + original_key_end = len(file) + for k in key.split(".") if glom_searching else [key]: + key_start, key_end = utils.find_outermost_key_in_json_bytes(file, k) if key_end == -1: return -1, -1, False - start = key_end + (1 if file[key_end] == byte_codes.SPACE else 0) - end = utils.seek_index_through_value_bytes(file, start) - return start, end, True - - def search( - self, all_file_bytes: bytes, key: str, glom_searching=True - ) -> Tuple[int, int, bool]: - """ - It takes a byte string, a key, and a boolean, and returns a tuple of three integers - - Args: - all_file_bytes (bytes): The bytes of the file you're searching in. - key (str): The key to search for. - glom_searching: If True, then the key is a glom path, and we need to search for each part of the path. Defaults to - True - - Returns: - The start and end of the key in the file. - """ - original_start = 0 - original_end = len(all_file_bytes) - for k in key.split(".") if glom_searching else [key]: - start, end, found = self.find_start_end_in_bytes( - all_file_bytes[original_start:original_end], k - ) - if not found: - return -1, -1, False - original_end = original_start + end - original_start += start - return original_start, original_end, True + original_key_end = original_value_start + key_end + original_key_start = original_value_start + key_start + value_start, value_end, found = find_start_end_in_bytes(file, k) + original_value_end = original_value_start + original_value_end + original_value_start += value_start + file = file[original_value_start:original_value_end] + return original_key_start, original_key_end, True + + +def search_value_by_key( + all_file_bytes: bytes, key: str, glom_searching=True +) -> Tuple[int, int, bool]: + """ + It takes a byte string, a key, and a boolean, and returns a tuple of three integers + + Args: + all_file_bytes (bytes): The bytes of the file you're searching in. + key (str): The key to search for. + glom_searching: If True, then the key is a glom path, and we need to search for each part of the path. Defaults to + True + + Returns: + The start and end of the key in the file. + """ + original_start = 0 + original_end = len(all_file_bytes) + for k in key.split(".") if glom_searching else [key]: + start, end, found = find_start_end_in_bytes( + all_file_bytes[original_start:original_end], k + ) + if not found: + return -1, -1, False + original_end = original_start + end + original_start += start + return original_start, original_end, True diff --git a/tests/test_glom_writing.py b/tests/test_glom_writing.py new file mode 100644 index 0000000..2702884 --- /dev/null +++ b/tests/test_glom_writing.py @@ -0,0 +1,17 @@ +import dictdatabase as DDB + +data = { + "users": { + "Ben": {"age": 30, "job": "Software Engineer"}, + "Bob": {"age": 30, "job": "Plumbers"}, + }, + "Ben": {"job": {"age": 30, "job": "Software Engineer"}}, +} + + +def test_glom_writing(): + DDB.at("users").create(data, force_overwrite=True) + with DDB.at("users", key="users.Ben").session() as (session, purchase): + purchase["status"] = "cancelled" + session.write() + assert DDB.at("users", key="users.Ben.status").read() == "cancelled" From b1a2e8e27f6c0a99a11dd7610d5d2e449ff956da Mon Sep 17 00:00:00 2001 From: Danil Tolmachev Date: Thu, 24 Nov 2022 00:10:51 +0300 Subject: [PATCH 4/5] fix print compatibility --- tests/benchmark/run_parallel.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/benchmark/run_parallel.py b/tests/benchmark/run_parallel.py index 6065f51..7f0799d 100644 --- a/tests/benchmark/run_parallel.py +++ b/tests/benchmark/run_parallel.py @@ -89,9 +89,9 @@ class Scenario: ops: int = 10 def print(self): - res = f"✨ Scenario: {'🔹' * self.readers}{'🔻' * self.writers} ({self.readers}r{self.writers}w)" - res += ", 🔸 compression" if self.use_compression else "" - res += ", 💎 big file" if self.big_file else "" + res = f"Scenario: {'*' * self.readers}{'#' * self.writers} ({self.readers}r{self.writers}w)" + res += ", [] compression" if self.use_compression else "" + res += ", {} big file" if self.big_file else "" print(res) From 193e49ff90bcaf410317d8e516600131fc3f1094 Mon Sep 17 00:00:00 2001 From: Danil Tolmachev Date: Thu, 24 Nov 2022 00:16:08 +0300 Subject: [PATCH 5/5] renaming --- dictdatabase/io_unsafe.py | 4 ++-- dictdatabase/searching.py | 10 +++++----- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/dictdatabase/io_unsafe.py b/dictdatabase/io_unsafe.py index 83a85a9..fbc405b 100644 --- a/dictdatabase/io_unsafe.py +++ b/dictdatabase/io_unsafe.py @@ -88,7 +88,7 @@ def partial_read_only(db_name: str, key: str) -> dict | None: # Not found in index file, search for key in the entire file all_file_bytes = io_bytes.read(db_name) - start, end, found = searching.search_value_by_key(all_file_bytes, key) + start, end, found = searching.search_value_position_in_db(all_file_bytes, key) if not found: return None value_bytes = all_file_bytes[start:end] @@ -185,7 +185,7 @@ def get_partial_file_handle(db_name: str, key: str) -> PartialFileHandle: return partial_handle # Not found in index file, search for key in the entire file - key_start, key_end, found = searching.search_key(all_file_bytes, key) + key_start, key_end, found = searching.search_key_position_in_db(all_file_bytes, key) if not found: raise KeyError(f"Key \"{key}\" not found in db \"{db_name}\"") diff --git a/dictdatabase/searching.py b/dictdatabase/searching.py index f661bde..62ed857 100644 --- a/dictdatabase/searching.py +++ b/dictdatabase/searching.py @@ -6,7 +6,7 @@ from dictdatabase import utils -def find_start_end_in_bytes(file: bytes, key: str) -> Tuple[int, int, bool]: +def find_key_position_in_bytes(file: bytes, key: str) -> Tuple[int, int, bool]: """ It finds the start and end indices of the value of a key in a JSON file @@ -25,7 +25,7 @@ def find_start_end_in_bytes(file: bytes, key: str) -> Tuple[int, int, bool]: return start, end, True -def search_key(file: bytes, key: str, glom_searching=True) -> Tuple[int, int, bool]: +def search_key_position_in_db(file: bytes, key: str, glom_searching=True) -> Tuple[int, int, bool]: original_value_start = 0 original_value_end = len(file) original_key_start = 0 @@ -36,14 +36,14 @@ def search_key(file: bytes, key: str, glom_searching=True) -> Tuple[int, int, bo return -1, -1, False original_key_end = original_value_start + key_end original_key_start = original_value_start + key_start - value_start, value_end, found = find_start_end_in_bytes(file, k) + value_start, value_end, found = find_key_position_in_bytes(file, k) original_value_end = original_value_start + original_value_end original_value_start += value_start file = file[original_value_start:original_value_end] return original_key_start, original_key_end, True -def search_value_by_key( +def search_value_position_in_db( all_file_bytes: bytes, key: str, glom_searching=True ) -> Tuple[int, int, bool]: """ @@ -61,7 +61,7 @@ def search_value_by_key( original_start = 0 original_end = len(all_file_bytes) for k in key.split(".") if glom_searching else [key]: - start, end, found = find_start_end_in_bytes( + start, end, found = find_key_position_in_bytes( all_file_bytes[original_start:original_end], k ) if not found: