-
Notifications
You must be signed in to change notification settings - Fork 11
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #42 from UmbrellaMalware/main
Add glom-like searching for keys
- Loading branch information
Showing
8 changed files
with
195 additions
and
24 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
import dataclasses | ||
|
||
|
||
@dataclasses.dataclass(frozen=True) | ||
class SearchResult: | ||
start_byte: int | ||
end_byte: int | ||
found: bool |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
import hashlib | ||
|
||
from dictdatabase import utils | ||
|
||
|
||
class IndexManager: | ||
@staticmethod | ||
def create_index(all_file_bytes: bytes, key: str, start, end): | ||
""" | ||
It takes a JSON file, a key, and a start and end position, and returns a tuple of information about the key and its | ||
value | ||
Args: | ||
all_file_bytes (bytes): The entire file as a byte string. | ||
key (str): The key of the value we're indexing. | ||
start: the start of the value in the file | ||
end: the end of the value in the file | ||
Returns: | ||
The key, start, end, indent_level, indent_with, value_hash, end | ||
""" | ||
key_start, key_end = utils.find_outermost_key_in_json_bytes(all_file_bytes, key) | ||
indent_level, indent_with = utils.detect_indentation_in_json_bytes( | ||
all_file_bytes, key_start | ||
) | ||
value_bytes = all_file_bytes[start:end] | ||
value_hash = hashlib.sha256(value_bytes).hexdigest() | ||
return key, start, end, indent_level, indent_with, value_hash, end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,74 @@ | ||
from typing import Tuple | ||
|
||
import orjson | ||
|
||
from dictdatabase import byte_codes | ||
from dictdatabase import utils | ||
from dictdatabase.dataclasses import SearchResult | ||
|
||
|
||
def find_key_position_in_bytes(file: bytes, key: str) -> SearchResult: | ||
""" | ||
It finds the start and end indices of the value of a key in a JSON file | ||
Args: | ||
file (bytes): bytes | ||
key (str): The key to find in the JSON file. | ||
Returns: | ||
A tuple of the start and end index of the key, and a boolean value indicating whether the key was found. | ||
""" | ||
key_start, key_end = utils.find_outermost_key_in_json_bytes(file, key) | ||
if key_end == -1: | ||
return SearchResult(start_byte=-1, end_byte=-1, found=False) | ||
start = key_end + (1 if file[key_end] == byte_codes.SPACE else 0) | ||
end = utils.seek_index_through_value_bytes(file, start) | ||
return SearchResult(start_byte=start, end_byte=end, found=True) | ||
|
||
|
||
def search_key_position_in_db( | ||
file: bytes, key: str, glom_searching=True | ||
) -> SearchResult: | ||
original_value_start = 0 | ||
original_value_end = len(file) | ||
original_key_start = 0 | ||
original_key_end = len(file) | ||
for k in key.split(".") if glom_searching else [key]: | ||
key_start, key_end = utils.find_outermost_key_in_json_bytes(file, k) | ||
if key_end == -1: | ||
return SearchResult(start_byte=-1, end_byte=-1, found=False) | ||
original_key_end = original_value_start + key_end | ||
original_key_start = original_value_start + key_start | ||
position = find_key_position_in_bytes(file, k) | ||
original_value_end = original_value_start + original_value_end | ||
original_value_start += position.start_byte | ||
file = file[original_value_start:original_value_end] | ||
return SearchResult(start_byte=original_key_start, end_byte=original_key_end, found=True) | ||
|
||
|
||
def search_value_position_in_db( | ||
all_file_bytes: bytes, key: str, glom_searching=True | ||
) -> Tuple[int, int, bool]: | ||
""" | ||
It takes a byte string, a key, and a boolean, and returns a tuple of three integers | ||
Args: | ||
all_file_bytes (bytes): The bytes of the file you're searching in. | ||
key (str): The key to search for. | ||
glom_searching: If True, then the key is a glom path, and we need to search for each part of the path. Defaults to | ||
True | ||
Returns: | ||
The start and end of the key in the file. | ||
""" | ||
original_start = 0 | ||
original_end = len(all_file_bytes) | ||
for k in key.split(".") if glom_searching else [key]: | ||
position = find_key_position_in_bytes( | ||
all_file_bytes[original_start:original_end], k | ||
) | ||
if not position.found: | ||
return -1, -1, False | ||
original_end = original_start + position.end_byte | ||
original_start += position.start_byte | ||
return original_start, original_end, True |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
import dictdatabase as DDB | ||
|
||
data = { | ||
"users": { | ||
"Ben": {"age": 30, "job": "Software Engineer"}, | ||
"Bob": {"age": 30, "job": "Plumbers"}, | ||
}, | ||
"Ben": {"job": {"age": 30, "job": "Software Engineer"}}, | ||
} | ||
|
||
|
||
def test_glom_searching(): | ||
DDB.at("users").create(data, force_overwrite=True) | ||
assert DDB.at("users", key="users.Ben.job").read() == "Software Engineer" | ||
|
||
|
||
def test_without_glom_searching(): | ||
DDB.at("users").create(data, force_overwrite=True) | ||
assert DDB.at("users", key="Ben").read() == { | ||
"job": {"age": 30, "job": "Software Engineer"} | ||
} | ||
|
||
|
||
def test_glom_searching_if_key_not_exists(): | ||
DDB.at("users").create(data, force_overwrite=True) | ||
assert DDB.at("users", key="users.Job.Ben").read() is None | ||
|
||
|
||
def test_glom_searching_if_subkey_not_exists(): | ||
DDB.at("users").create(data, force_overwrite=True) | ||
assert DDB.at("users", key="users.Ben.SUBKEYNOTEXISTS").read() is None |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
import pytest | ||
|
||
import dictdatabase as DDB | ||
|
||
data = { | ||
"users": { | ||
"Ben": {"age": 30, "job": "Software Engineer"}, | ||
"Bob": {"age": 30, "job": "Plumbers"}, | ||
}, | ||
"Ben": {"job": {"age": 30, "job": "Software Engineer"}}, | ||
} | ||
|
||
|
||
def test_glom_writing(): | ||
DDB.at("users").create(data, force_overwrite=True) | ||
with DDB.at("users", key="users.Ben").session() as (session, purchase): | ||
purchase["status"] = "cancelled" | ||
session.write() | ||
assert DDB.at("users", key="users.Ben.status").read() == "cancelled" | ||
|
||
|
||
def test_glom_writing_sub_key_not_exists(): | ||
DDB.at("users").create(data, force_overwrite=True) | ||
with pytest.raises(KeyError): | ||
with DDB.at("users", key="users.SUBKEY").session() as (session, purchase): | ||
purchase["status"] = "cancelled" | ||
session.write() |