Skip to content

Commit

Permalink
Merge pull request #1 from UmbrellaMalware/refactoring
Browse files Browse the repository at this point in the history
Refactoring
  • Loading branch information
UmbrellaMalware authored Nov 23, 2022
2 parents 4d80f32 + 193e49f commit 8512ce9
Show file tree
Hide file tree
Showing 5 changed files with 96 additions and 50 deletions.
8 changes: 4 additions & 4 deletions dictdatabase/io_unsafe.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,9 @@
from . import config
from . import indexing
from . import io_bytes
from . import searching
from . import utils
from .index_manager import IndexManager
from .searching import Searcher


@dataclass(frozen=True) # slots=True not supported by python 3.8 and 3.9
Expand Down Expand Up @@ -88,7 +88,7 @@ def partial_read_only(db_name: str, key: str) -> dict | None:

# Not found in index file, search for key in the entire file
all_file_bytes = io_bytes.read(db_name)
start, end, found = Searcher().search(all_file_bytes, key)
start, end, found = searching.search_value_position_in_db(all_file_bytes, key)
if not found:
return None
value_bytes = all_file_bytes[start:end]
Expand Down Expand Up @@ -185,9 +185,9 @@ def get_partial_file_handle(db_name: str, key: str) -> PartialFileHandle:
return partial_handle

# Not found in index file, search for key in the entire file
key_start, key_end = utils.find_outermost_key_in_json_bytes(all_file_bytes, key)
key_start, key_end, found = searching.search_key_position_in_db(all_file_bytes, key)

if key_end == -1:
if not found:
raise KeyError(f"Key \"{key}\" not found in db \"{db_name}\"")

# Key found, now determine the bounding byte indices of the value
Expand Down
103 changes: 61 additions & 42 deletions dictdatabase/searching.py
Original file line number Diff line number Diff line change
@@ -1,52 +1,71 @@
from typing import Tuple

import orjson

from dictdatabase import byte_codes
from dictdatabase import utils


class Searcher:
@staticmethod
def find_start_end_in_bytes(file: bytes, key: str) -> Tuple[int, int, bool]:
"""
It finds the start and end indices of the value of a key in a JSON file
def find_key_position_in_bytes(file: bytes, key: str) -> Tuple[int, int, bool]:
"""
It finds the start and end indices of the value of a key in a JSON file
Args:
file (bytes): bytes
key (str): The key to find in the JSON file.
Args:
file (bytes): bytes
key (str): The key to find in the JSON file.
Returns:
A tuple of the start and end index of the key, and a boolean value indicating whether the key was found.
"""
key_start, key_end = utils.find_outermost_key_in_json_bytes(file, key)
if key_end == -1:
return -1, -1, False
start = key_end + (1 if file[key_end] == byte_codes.SPACE else 0)
end = utils.seek_index_through_value_bytes(file, start)
return start, end, True

Returns:
A tuple of the start and end index of the key, and a boolean value indicating whether the key was found.
"""
key_start, key_end = utils.find_outermost_key_in_json_bytes(file, key)

def search_key_position_in_db(file: bytes, key: str, glom_searching=True) -> Tuple[int, int, bool]:
original_value_start = 0
original_value_end = len(file)
original_key_start = 0
original_key_end = len(file)
for k in key.split(".") if glom_searching else [key]:
key_start, key_end = utils.find_outermost_key_in_json_bytes(file, k)
if key_end == -1:
return -1, -1, False
start = key_end + (1 if file[key_end] == byte_codes.SPACE else 0)
end = utils.seek_index_through_value_bytes(file, start)
return start, end, True

def search(
self, all_file_bytes: bytes, key: str, glom_searching=True
) -> Tuple[int, int, bool]:
"""
It takes a byte string, a key, and a boolean, and returns a tuple of three integers
Args:
all_file_bytes (bytes): The bytes of the file you're searching in.
key (str): The key to search for.
glom_searching: If True, then the key is a glom path, and we need to search for each part of the path. Defaults to
True
Returns:
The start and end of the key in the file.
"""
original_start = 0
original_end = len(all_file_bytes)
for k in key.split(".") if glom_searching else [key]:
start, end, found = self.find_start_end_in_bytes(
all_file_bytes[original_start:original_end], k
)
if not found:
return -1, -1, False
original_end = original_start + end
original_start += start
return original_start, original_end, True
original_key_end = original_value_start + key_end
original_key_start = original_value_start + key_start
value_start, value_end, found = find_key_position_in_bytes(file, k)
original_value_end = original_value_start + original_value_end
original_value_start += value_start
file = file[original_value_start:original_value_end]
return original_key_start, original_key_end, True


def search_value_position_in_db(
all_file_bytes: bytes, key: str, glom_searching=True
) -> Tuple[int, int, bool]:
"""
It takes a byte string, a key, and a boolean, and returns a tuple of three integers
Args:
all_file_bytes (bytes): The bytes of the file you're searching in.
key (str): The key to search for.
glom_searching: If True, then the key is a glom path, and we need to search for each part of the path. Defaults to
True
Returns:
The start and end of the key in the file.
"""
original_start = 0
original_end = len(all_file_bytes)
for k in key.split(".") if glom_searching else [key]:
start, end, found = find_key_position_in_bytes(
all_file_bytes[original_start:original_end], k
)
if not found:
return -1, -1, False
original_end = original_start + end
original_start += start
return original_start, original_end, True
6 changes: 3 additions & 3 deletions tests/benchmark/run_parallel.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,9 +89,9 @@ class Scenario:
ops: int = 10

def print(self):
res = f"Scenario: {'🔹' * self.readers}{'🔻' * self.writers} ({self.readers}r{self.writers}w)"
res += ", 🔸 compression" if self.use_compression else ""
res += ", 💎 big file" if self.big_file else ""
res = f"Scenario: {'*' * self.readers}{'#' * self.writers} ({self.readers}r{self.writers}w)"
res += ", [] compression" if self.use_compression else ""
res += ", {} big file" if self.big_file else ""
print(res)


Expand Down
12 changes: 11 additions & 1 deletion tests/test_glom_like_searching.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,21 @@

def test_glom_searching():
DDB.at("users").create(data, force_overwrite=True)
assert DDB.at("users", key="users.Ben.job").read() == 'Software Engineer'
assert DDB.at("users", key="users.Ben.job").read() == "Software Engineer"


def test_without_glom_searching():
DDB.at("users").create(data, force_overwrite=True)
assert DDB.at("users", key="Ben").read() == {
"job": {"age": 30, "job": "Software Engineer"}
}


def test_glom_searching_if_key_not_exists():
DDB.at("users").create(data, force_overwrite=True)
assert DDB.at("users", key="users.Job.Ben").read() is None


def test_glom_searching_if_subkey_not_exists():
DDB.at("users").create(data, force_overwrite=True)
assert DDB.at("users", key="users.Ben.SUBKEYNOTEXISTS").read() is None
17 changes: 17 additions & 0 deletions tests/test_glom_writing.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
import dictdatabase as DDB

data = {
"users": {
"Ben": {"age": 30, "job": "Software Engineer"},
"Bob": {"age": 30, "job": "Plumbers"},
},
"Ben": {"job": {"age": 30, "job": "Software Engineer"}},
}


def test_glom_writing():
DDB.at("users").create(data, force_overwrite=True)
with DDB.at("users", key="users.Ben").session() as (session, purchase):
purchase["status"] = "cancelled"
session.write()
assert DDB.at("users", key="users.Ben.status").read() == "cancelled"

0 comments on commit 8512ce9

Please sign in to comment.