Skip to content

Commit

Permalink
add partial write
Browse files Browse the repository at this point in the history
  • Loading branch information
Danil Tolmachev authored and mkrd committed Nov 27, 2022
1 parent 325d3c7 commit e856f71
Show file tree
Hide file tree
Showing 3 changed files with 82 additions and 46 deletions.
8 changes: 4 additions & 4 deletions dictdatabase/io_unsafe.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,9 @@
from . import config
from . import indexing
from . import io_bytes
from . import searching
from . import utils
from .index_manager import IndexManager
from .searching import KeySearcher


@dataclass(frozen=True) # slots=True not supported by python 3.8 and 3.9
Expand Down Expand Up @@ -88,7 +88,7 @@ def partial_read_only(db_name: str, key: str) -> dict | None:

# Not found in index file, search for key in the entire file
all_file_bytes = io_bytes.read(db_name)
start, end, found = KeySearcher().search(all_file_bytes, key)
start, end, found = searching.search_value_by_key(all_file_bytes, key)
if not found:
return None
value_bytes = all_file_bytes[start:end]
Expand Down Expand Up @@ -185,9 +185,9 @@ def get_partial_file_handle(db_name: str, key: str) -> PartialFileHandle:
return partial_handle

# Not found in index file, search for key in the entire file
key_start, key_end = utils.find_outermost_key_in_json_bytes(all_file_bytes, key)
key_start, key_end, found = searching.search_key(all_file_bytes, key)

if key_end == -1:
if not found:
raise KeyError(f"Key \"{key}\" not found in db \"{db_name}\"")

# Key found, now determine the bounding byte indices of the value
Expand Down
103 changes: 61 additions & 42 deletions dictdatabase/searching.py
Original file line number Diff line number Diff line change
@@ -1,52 +1,71 @@
from typing import Tuple

import orjson

from dictdatabase import byte_codes
from dictdatabase import utils


class KeySearcher:
@staticmethod
def find_start_end_in_bytes(file: bytes, key: str) -> Tuple[int, int, bool]:
"""
It finds the start and end indices of the value of a key in a JSON file
def find_start_end_in_bytes(file: bytes, key: str) -> Tuple[int, int, bool]:
"""
It finds the start and end indices of the value of a key in a JSON file
Args:
file (bytes): bytes
key (str): The key to find in the JSON file.
Args:
file (bytes): bytes
key (str): The key to find in the JSON file.
Returns:
A tuple of the start and end index of the key, and a boolean value indicating whether the key was found.
"""
key_start, key_end = utils.find_outermost_key_in_json_bytes(file, key)
if key_end == -1:
return -1, -1, False
start = key_end + (1 if file[key_end] == byte_codes.SPACE else 0)
end = utils.seek_index_through_value_bytes(file, start)
return start, end, True

Returns:
A tuple of the start and end index of the key, and a boolean value indicating whether the key was found.
"""
key_start, key_end = utils.find_outermost_key_in_json_bytes(file, key)

def search_key(file: bytes, key: str, glom_searching=True) -> Tuple[int, int, bool]:
original_value_start = 0
original_value_end = len(file)
original_key_start = 0
original_key_end = len(file)
for k in key.split(".") if glom_searching else [key]:
key_start, key_end = utils.find_outermost_key_in_json_bytes(file, k)
if key_end == -1:
return -1, -1, False
start = key_end + (1 if file[key_end] == byte_codes.SPACE else 0)
end = utils.seek_index_through_value_bytes(file, start)
return start, end, True

def search(
self, all_file_bytes: bytes, key: str, glom_searching=True
) -> Tuple[int, int, bool]:
"""
It takes a byte string, a key, and a boolean, and returns a tuple of three integers
Args:
all_file_bytes (bytes): The bytes of the file you're searching in.
key (str): The key to search for.
glom_searching: If True, then the key is a glom path, and we need to search for each part of the path. Defaults to
True
Returns:
The start and end of the key in the file.
"""
original_start = 0
original_end = len(all_file_bytes)
for k in key.split(".") if glom_searching else [key]:
start, end, found = self.find_start_end_in_bytes(
all_file_bytes[original_start:original_end], k
)
if not found:
return -1, -1, False
original_end = original_start + end
original_start += start
return original_start, original_end, True
original_key_end = original_value_start + key_end
original_key_start = original_value_start + key_start
value_start, value_end, found = find_start_end_in_bytes(file, k)
original_value_end = original_value_start + original_value_end
original_value_start += value_start
file = file[original_value_start:original_value_end]
return original_key_start, original_key_end, True


def search_value_by_key(
all_file_bytes: bytes, key: str, glom_searching=True
) -> Tuple[int, int, bool]:
"""
It takes a byte string, a key, and a boolean, and returns a tuple of three integers
Args:
all_file_bytes (bytes): The bytes of the file you're searching in.
key (str): The key to search for.
glom_searching: If True, then the key is a glom path, and we need to search for each part of the path. Defaults to
True
Returns:
The start and end of the key in the file.
"""
original_start = 0
original_end = len(all_file_bytes)
for k in key.split(".") if glom_searching else [key]:
start, end, found = find_start_end_in_bytes(
all_file_bytes[original_start:original_end], k
)
if not found:
return -1, -1, False
original_end = original_start + end
original_start += start
return original_start, original_end, True
17 changes: 17 additions & 0 deletions tests/test_glom_writing.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
import dictdatabase as DDB

data = {
"users": {
"Ben": {"age": 30, "job": "Software Engineer"},
"Bob": {"age": 30, "job": "Plumbers"},
},
"Ben": {"job": {"age": 30, "job": "Software Engineer"}},
}


def test_glom_writing():
DDB.at("users").create(data, force_overwrite=True)
with DDB.at("users", key="users.Ben").session() as (session, purchase):
purchase["status"] = "cancelled"
session.write()
assert DDB.at("users", key="users.Ben.status").read() == "cancelled"

0 comments on commit e856f71

Please sign in to comment.