Skip to content

Commit

Permalink
use ruff as formatter
Browse files Browse the repository at this point in the history
  • Loading branch information
mkrd committed Oct 30, 2023
1 parent 196d175 commit 0bd8c3b
Show file tree
Hide file tree
Showing 49 changed files with 605 additions and 633 deletions.
8 changes: 7 additions & 1 deletion DictDataBase.code-workspace
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,12 @@
}
],
"settings": {
"python.pythonPath": ".venv/bin/python3"
"[python]": {
"editor.formatOnSave": true,
"editor.defaultFormatter": "charliermarsh.ruff"
},
"editor.codeActionsOnSave": {
"source.organizeImports": true
},
}
}
1 change: 0 additions & 1 deletion dictdatabase/configuration.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@


class Confuguration:

__slots__ = ("storage_directory", "indent", "use_compression", "use_orjson")

storage_directory: str
Expand Down
34 changes: 16 additions & 18 deletions dictdatabase/indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,19 +26,19 @@

class Indexer:
"""
The Indexer takes the name of a database file, and tries to load the .index file
of the corresponding database file.
The name of the index file is the name of the database file, with the extension
.index and all "/" replaced with "___"
The content of the index file is a json object, where the keys are keys inside
the database json file, and the values are lists of 5 elements:
- start_index: The index of the first byte of the value of the key in the database file
- end_index: The index of the last byte of the value of the key in the database file
- indent_level: The indent level of the key in the database file
- indent_with: The indent string used.
- value_hash: The hash of the value bytes
The Indexer takes the name of a database file, and tries to load the .index file
of the corresponding database file.
The name of the index file is the name of the database file, with the extension
.index and all "/" replaced with "___"
The content of the index file is a json object, where the keys are keys inside
the database json file, and the values are lists of 5 elements:
- start_index: The index of the first byte of the value of the key in the database file
- end_index: The index of the last byte of the value of the key in the database file
- indent_level: The indent level of the key in the database file
- indent_with: The indent string used.
- value_hash: The hash of the value bytes
"""

__slots__ = ("data", "path")
Expand All @@ -59,15 +59,13 @@ def __init__(self, db_name: str) -> None:
except orjson.JSONDecodeError:
self.data = {}


def get(self, key: str) -> Union[list, None]:
"""
Returns a list of 5 elements for a key if it exists, otherwise None
Elements:[start_index, end_index, indent_level, indent_with, value_hash]
Returns a list of 5 elements for a key if it exists, otherwise None
Elements:[start_index, end_index, indent_level, indent_with, value_hash]
"""
return self.data.get(key, None)


def write(
self,
key: str,
Expand All @@ -79,7 +77,7 @@ def write(
old_value_end: int,
) -> None:
"""
Write index information for a key to the index file
Write index information for a key to the index file
"""

if self.data.get(key, None) is not None:
Expand Down
69 changes: 32 additions & 37 deletions dictdatabase/io_bytes.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,37 +6,34 @@

def read(db_name: str, *, start: int = None, end: int = None) -> bytes:
"""
Read the content of a file as bytes. Reading works even when the config
changes, so a compressed ddb file can also be read if compression is
disabled, and vice versa.
If no compression is used, efficient reading can be done by specifying a start
and end byte index, such that only the bytes in that range are read from the
file.
If compression is used, specifying a start and end byte index is still possible,
but the entire file has to be read and decompressed first, and then the bytes
in the range are returned. This is because the compressed file is not seekable.
Args:
- `db_name`: The name of the database file to read from.
- `start`: The start byte index to read from.
- `end`: The end byte index to read up to (not included).
Raises:
- `FileNotFoundError`: If the file does not exist as .json nor .ddb.
- `OSError`: If no compression is used and `start` is negative.
- `FileExistsError`: If the file exists as .json and .ddb.
Read the content of a file as bytes. Reading works even when the config
changes, so a compressed ddb file can also be read if compression is
disabled, and vice versa.
If no compression is used, efficient reading can be done by specifying a start
and end byte index, such that only the bytes in that range are read from the
file.
If compression is used, specifying a start and end byte index is still possible,
but the entire file has to be read and decompressed first, and then the bytes
in the range are returned. This is because the compressed file is not seekable.
Args:
- `db_name`: The name of the database file to read from.
- `start`: The start byte index to read from.
- `end`: The end byte index to read up to (not included).
Raises:
- `FileNotFoundError`: If the file does not exist as .json nor .ddb.
- `OSError`: If no compression is used and `start` is negative.
- `FileExistsError`: If the file exists as .json and .ddb.
"""

json_path, json_exists, ddb_path, ddb_exists = utils.file_info(db_name)

if json_exists:
if ddb_exists:
raise FileExistsError(
f"Inconsistent: \"{db_name}\" exists as .json and .ddb."
"Please remove one of them."
)
raise FileExistsError(f'Inconsistent: "{db_name}" exists as .json and .ddb.' "Please remove one of them.")
with open(json_path, "rb") as f:
if start is None and end is None:
return f.read()
Expand All @@ -46,7 +43,7 @@ def read(db_name: str, *, start: int = None, end: int = None) -> bytes:
return f.read()
return f.read(end - start)
if not ddb_exists:
raise FileNotFoundError(f"No database file exists for \"{db_name}\"")
raise FileNotFoundError(f'No database file exists for "{db_name}"')
with open(ddb_path, "rb") as f:
json_bytes = zlib.decompress(f.read())
if start is None and end is None:
Expand All @@ -56,19 +53,17 @@ def read(db_name: str, *, start: int = None, end: int = None) -> bytes:
return json_bytes[start:end]




def write(db_name: str, dump: bytes, *, start: int = None) -> None:
"""
Write the bytes to the file of the db_path. If the db was compressed but no
compression is enabled, remove the compressed file, and vice versa.
Args:
- `db_name`: The name of the database to write to.
- `dump`: The bytes to write to the file, representing correct JSON when
decoded.
- `start`: The start byte index to write to. If None, the whole file is overwritten.
If the original content was longer, the rest truncated.
Write the bytes to the file of the db_path. If the db was compressed but no
compression is enabled, remove the compressed file, and vice versa.
Args:
- `db_name`: The name of the database to write to.
- `dump`: The bytes to write to the file, representing correct JSON when
decoded.
- `start`: The start byte index to write to. If None, the whole file is overwritten.
If the original content was longer, the rest truncated.
"""

json_path, json_exists, ddb_path, ddb_exists = utils.file_info(db_name)
Expand Down
31 changes: 14 additions & 17 deletions dictdatabase/io_safe.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,10 @@

def read(file_name: str) -> dict:
"""
Read the content of a file as a dict.
Read the content of a file as a dict.
Args:
- `file_name`: The name of the file to read from.
Args:
- `file_name`: The name of the file to read from.
"""

_, json_exists, _, ddb_exists = utils.file_info(file_name)
Expand All @@ -20,14 +20,13 @@ def read(file_name: str) -> dict:
return io_unsafe.read(file_name)



def partial_read(file_name: str, key: str) -> dict:
"""
Read only the value of a key-value pair from a file.
Read only the value of a key-value pair from a file.
Args:
- `file_name`: The name of the file to read from.
- `key`: The key to read the value of.
Args:
- `file_name`: The name of the file to read from.
- `key`: The key to read the value of.
"""

_, json_exists, _, ddb_exists = utils.file_info(file_name)
Expand All @@ -39,14 +38,13 @@ def partial_read(file_name: str, key: str) -> dict:
return io_unsafe.partial_read(file_name, key)



def write(file_name: str, data: dict) -> None:
"""
Ensures that writing only starts if there is no reading or writing in progress.
Ensures that writing only starts if there is no reading or writing in progress.
Args:
- `file_name`: The name of the file to write to.
- `data`: The data to write to the file.
Args:
- `file_name`: The name of the file to write to.
- `data`: The data to write to the file.
"""

dirname = os.path.dirname(f"{config.storage_directory}/{file_name}.any")
Expand All @@ -56,13 +54,12 @@ def write(file_name: str, data: dict) -> None:
io_unsafe.write(file_name, data)



def delete(file_name: str) -> None:
"""
Ensures that deleting only starts if there is no reading or writing in progress.
Ensures that deleting only starts if there is no reading or writing in progress.
Args:
- `file_name`: The name of the file to delete.
Args:
- `file_name`: The name of the file to delete.
"""

json_path, json_exists, ddb_path, ddb_exists = utils.file_info(file_name)
Expand Down
60 changes: 30 additions & 30 deletions dictdatabase/io_unsafe.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,9 @@ class PartialFileHandle:

def read(db_name: str) -> dict:
"""
Read the file at db_path from the configured storage directory.
Make sure the file exists. If it does not a FileNotFoundError is
raised.
Read the file at db_path from the configured storage directory.
Make sure the file exists. If it does not a FileNotFoundError is
raised.
"""
# Always use orjson to read the file, because it is faster
return orjson.loads(io_bytes.read(db_name))
Expand All @@ -50,9 +50,9 @@ def read(db_name: str) -> dict:

def try_read_bytes_using_indexer(indexer: indexing.Indexer, db_name: str, key: str) -> bytes | None:
"""
Check if the key info is saved in the file's index file.
If it is and the value has not changed, return the value bytes.
Otherwise return None.
Check if the key info is saved in the file's index file.
If it is and the value has not changed, return the value bytes.
Otherwise return None.
"""

if (index := indexer.get(key)) is None:
Expand All @@ -66,12 +66,12 @@ def try_read_bytes_using_indexer(indexer: indexing.Indexer, db_name: str, key: s

def partial_read(db_name: str, key: str) -> dict | None:
"""
Partially read a key from a db.
The key MUST be unique in the entire db, otherwise the behavior is undefined.
This is a lot faster than reading the entire db, because it does not parse
the entire file, but only the part <value> part of the <key>: <value> pair.
Partially read a key from a db.
The key MUST be unique in the entire db, otherwise the behavior is undefined.
This is a lot faster than reading the entire db, because it does not parse
the entire file, but only the part <value> part of the <key>: <value> pair.
If the key is not found, a `KeyError` is raised.
If the key is not found, a `KeyError` is raised.
"""

# Search for key in the index file
Expand All @@ -90,7 +90,7 @@ def partial_read(db_name: str, key: str) -> dict | None:
start = key_end + (1 if all_file_bytes[key_end] == byte_codes.SPACE else 0)
end = utils.seek_index_through_value_bytes(all_file_bytes, start)

indent_level, indent_with = utils.detect_indentation_in_json_bytes(all_file_bytes, key_start)
indent_level, indent_with = utils.detect_indentation_in_json_bytes(all_file_bytes, key_start)
value_bytes = all_file_bytes[start:end]
value_hash = hashlib.sha256(value_bytes).hexdigest()

Expand All @@ -106,9 +106,9 @@ def partial_read(db_name: str, key: str) -> dict | None:

def serialize_data_to_json_bytes(data: dict) -> bytes:
"""
Serialize the data as json bytes. Depending on the config,
this can be done with orjson or the standard json module.
Additionally config.indent is respected.
Serialize the data as json bytes. Depending on the config,
this can be done with orjson or the standard json module.
Additionally config.indent is respected.
"""
if config.use_orjson:
option = (orjson.OPT_INDENT_2 if config.indent else 0) | orjson.OPT_SORT_KEYS
Expand All @@ -120,8 +120,8 @@ def serialize_data_to_json_bytes(data: dict) -> bytes:

def write(db_name: str, data: dict) -> None:
"""
Write the dict db dumped as a json string
to the file of the db_path.
Write the dict db dumped as a json string
to the file of the db_path.
"""
data_bytes = serialize_data_to_json_bytes(data)
io_bytes.write(db_name, data_bytes)
Expand All @@ -138,12 +138,12 @@ def try_get_partial_file_handle_by_index(
key: str,
) -> tuple[PartialFileHandle | None, bytes | None]:
"""
Try to get a partial file handle by using the key entry in the index file.
Try to get a partial file handle by using the key entry in the index file.
If the data could be read from the index file, a tuple of the partial file
handle and None is returned.
If the data could not be read from the index file, a tuple of None and the file
bytes is returned, so that the file bytes can be searched for the key.
If the data could be read from the index file, a tuple of the partial file
handle and None is returned.
If the data could not be read from the index file, a tuple of None and the file
bytes is returned, so that the file bytes can be searched for the key.
"""

if (index := indexer.get(key)) is None:
Expand Down Expand Up @@ -176,12 +176,12 @@ def try_get_partial_file_handle_by_index(

def get_partial_file_handle(db_name: str, key: str) -> PartialFileHandle:
"""
Partially read a key from a db.
The key MUST be unique in the entire db, otherwise the behavior is undefined.
This is a lot faster than reading the entire db, because it does not parse
the entire file, but only the part <value> part of the <key>: <value> pair.
Partially read a key from a db.
The key MUST be unique in the entire db, otherwise the behavior is undefined.
This is a lot faster than reading the entire db, because it does not parse
the entire file, but only the part <value> part of the <key>: <value> pair.
If the key is not found, a `KeyError` is raised.
If the key is not found, a `KeyError` is raised.
"""

# Search for key in the index file
Expand All @@ -194,13 +194,13 @@ def get_partial_file_handle(db_name: str, key: str) -> PartialFileHandle:
key_start, key_end = utils.find_outermost_key_in_json_bytes(all_file_bytes, key)

if key_end == -1:
raise KeyError(f"Key \"{key}\" not found in db \"{db_name}\"")
raise KeyError(f'Key "{key}" not found in db "{db_name}"')

# Key found, now determine the bounding byte indices of the value
start = key_end + (1 if all_file_bytes[key_end] == byte_codes.SPACE else 0)
end = utils.seek_index_through_value_bytes(all_file_bytes, start)

indent_level, indent_with = utils.detect_indentation_in_json_bytes(all_file_bytes, key_start)
indent_level, indent_with = utils.detect_indentation_in_json_bytes(all_file_bytes, key_start)

partial_value = orjson.loads(all_file_bytes[start:end])
prefix_bytes = all_file_bytes[:start] if config.use_compression else None
Expand All @@ -210,7 +210,7 @@ def get_partial_file_handle(db_name: str, key: str) -> PartialFileHandle:

def partial_write(pf: PartialFileHandle) -> None:
"""
Write a partial file handle to the db.
Write a partial file handle to the db.
"""

partial_bytes = serialize_data_to_json_bytes(pf.partial_dict.value)
Expand Down
Loading

0 comments on commit 0bd8c3b

Please sign in to comment.