Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft: Feature: Object Mapper #35

Draft
wants to merge 3 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
79 changes: 79 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -212,6 +212,85 @@ assert ress == {"8": {"a": 8}, "9": {"a": 9}} # True
```


Object Mapper
========================================================================================

DictDataBase provides object mappers to model your data as classes and benefit from
type hints and type checking.


Mapping key-value items in one File
----------------------------------------------------------------------------------------

In this example, we will have a file called `users.json`, inside your storage directory
(`DDB.config.storage_directory`) which contains the following data:
```json
{
"u1": {
"first_name": "John",
"last_name": "Doe",
"age": 21
},
"u2": {
"first_name": "Jane",
"last_name": "Smith",
"age": 30,
"phone": "0123456"
},
}
```

We will now map the data to classes:

```python
from dictdatabase.object_mapper import FileDictModel, FileDictItemModel

class User(FileDictItemModel):
first_name: str
last_name: str
age: int
phone: str | None

def full_name(self):
return f"{self.first_name} {self.last_name}"


class Users(FileDictModel[User]):
__file__ = "users"
```

A few important things are happening here:
- `FileDictItemModel` models a key-value item inside the file.
- The attributes `first_name` and `last_name` and `age` of `User` are required. If they miss in the file, a `KeyError` is raised.
- The attribute `phone` is optional, and will be `None` if it does not exist in the file.
- When defining `Users`, the `FileDictModel` must specify it's item model type as a type argument (`FileDictModel[User]`)
- `Users` only has to specify the file it refers to by passing the file name without the ending (`__file__ = "users"`)


Now, the models can be used:

```python
# Get user by id
u1: User = Users.get_at_key("u1")
print(u1.full_name())
>>> "John Doe"

# Iterate all users:
for uid, user in Users.items():
print(user.last_name, user.age, user.phone)
>>> "Doe", 21, None
>>> "Smith", 30, "0123456"

# Filter
u_over_25: dict[str, User] = Users.get_where(lambda uid, user: user.age > 25)
```








Performance
========================================================================================
Expand Down
6 changes: 3 additions & 3 deletions assets/coverage.svg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
4 changes: 2 additions & 2 deletions dictdatabase/io_safe.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@



def read(file_name: str) -> dict:
def read(file_name: str) -> dict | None:
"""
Read the content of a file as a dict.

Expand All @@ -21,7 +21,7 @@ def read(file_name: str) -> dict:



def partial_read(file_name: str, key: str) -> dict:
def partial_read(file_name: str, key: str) -> dict | None:
"""
Read only the value of a key-value pair from a file.

Expand Down
2 changes: 1 addition & 1 deletion dictdatabase/io_unsafe.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

@dataclass(frozen=True) # slots=True not supported by python 3.8 and 3.9
class PartialDict:
prefix: bytes
prefix: bytes | None
key: str
value: dict
value_start: int
Expand Down
175 changes: 175 additions & 0 deletions dictdatabase/object_mapper.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,175 @@
from __future__ import annotations
from abc import ABC
from typing import get_type_hints, get_origin, get_args, TypeVar, Tuple, Type, Generic
from types import UnionType, NoneType
from . import io_safe
from .sessions import SessionFileFull, SessionFileKey, SessionFileWhere, SessionDirFull, SessionDirWhere



T = TypeVar("T")
T2 = TypeVar("T2")


def get_type_hints_excluding_internals(cls):
"""
Get type hints of the class, excluding double dunder variables.
"""
for var_name, var_type in get_type_hints(cls).items():
if var_name.startswith("__") and var_name.endswith("__"):
continue
yield var_name, var_type



def fill_object_from_dict_using_type_hints(obj, cls, data: dict):
"""
Attributes of obj are set using the data dict.
The type hints of the class cls are used to determine which attributes to set.
"""
for var_name, var_type in get_type_hints_excluding_internals(cls):
var_type_args = get_args(var_type)
var_type_origin = get_origin(var_type)
# Check if variable is nullable (e.g. email: str | None)
# When it is not nullable but not in the data, raise an error
if var_name not in data:
nullable = var_type_origin is UnionType and NoneType in var_type_args
if not nullable:
raise KeyError(f"Missing variable '{var_name}' in {cls.__name__}.")
# When it is a list, fill the list with the items
if var_type_origin is list and len(var_type_args) == 1:
item_type = var_type_args[0]
setattr(obj, var_name, [item_type.from_dict(x) for x in data[var_name]])
else:
setattr(obj, var_name, data.get(var_name, None))
return obj



def fill_dict_from_object_using_type_hints(cls, obj):
raise NotImplementedError





########################################################################################
# Scenario 1:
# Model a single file with FileDictModel, which is a dict at the top level.
# Each key-value item is modeled by a DictItemModel.



class FileDictModel(ABC, Generic[T]):
"""
A file base refers to a file that is stored in the database.
At the top level the file must contain a dictionary with strings as keys.
"""

__file__ = None

@classmethod
def _get_item_model(cls):
for base in cls.__orig_bases__:
for type_args in get_args(base):
if issubclass(type_args, FileDictItemModel):
return type_args
raise AttributeError(
"FileDictModel must specify a FileDictItemModel "
"(e.g. Users(FileDictModel[User]))"
)


@classmethod
def get_at_key(cls, key) -> T:
"""
Gets an item by key.
The data is partially read from the __file__.
"""
data = io_safe.partial_read(cls.__file__, key)
res: T = cls._get_item_model().from_key_value(key, data)
return res

@classmethod
def session_at_key(cls, key):
return cls._get_item_model().session(key)

@classmethod
def get_all(cls) -> dict[str, T]:
data = io_safe.read(cls.__file__)
return {k: cls._get_item_model().from_key_value(k, v) for k, v in data.items()}

@classmethod
def session(cls):
"""
Enter a session with the file as (session, data) where data is a dict of
<key>: <ORM model of value> pairs.
"""
def make_session_obj_from_dict(data):
sess_obj = {}
for k, v in data.items():
sess_obj[k] = cls._get_item_model().from_key_value(k, v)
return sess_obj
return SessionFileFull(cls.__file__, make_session_obj_from_dict)


@classmethod
def get_where(cls, where: callable[str, T]) -> dict[str, T]:
"""
Return a dictionary of all the items for which the where function returns True.
Where takes the key and the value's model object as arguments.
"""
return {k: v for k, v in cls.get_all().items() if where(k, v)}




class FileDictItemModel(ABC):
__key__: str

@classmethod
def from_key_value(cls: Type[T2], key, value) -> T2:
obj = fill_object_from_dict_using_type_hints(cls(), cls, value)
obj.__key__ = key
return obj

@classmethod
def session(cls, key):
def partial_func(x):
return cls.from_key_value(key, x)
return SessionFileKey(cls.__file__, key, partial_func)



class DictModel(ABC):

@classmethod
def from_dict(cls, data) -> DictModel:
obj = cls()
return fill_object_from_dict_using_type_hints(obj, cls, data)

def to_dict(self) -> dict:
res = {}
for var_name in get_type_hints(self).keys():
if (value := getattr(self, var_name)) is not None:
res[var_name] = value
return res



########################################################################################
# Scenario 2:
# Add in a later version of DDB
# A folder containing multiple files, each containing json.

# class FolderBase(ABC):
# __folder__ = None
# __file_model__: FileInFolderModel = None


# class FileInFolderModel(ABC):

# @classmethod
# def get_by_name(cls, file_name: str) -> FileInFolderModel:
# data = io_safe.read(f"{cls.__folder__}/{file_name}")
# return cls(**data)
14 changes: 7 additions & 7 deletions dictdatabase/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,28 +126,28 @@ def find_outermost_key_in_json_bytes(json_bytes: bytes, key: str):
Returns:
- A tuple of the key start and end index, or `(-1, -1)` if the key is not found.
"""
key = f"\"{key}\":".encode()
key_bytes = f"\"{key}\":".encode()

if (curr_i := json_bytes.find(key, 0)) == -1:
if (curr_i := json_bytes.find(key_bytes, 0)) == -1:
return -1, -1

key_nest = [(curr_i, 0)] # (key, nesting)
key_nest: list[tuple[int, int]] = [(curr_i, 0)] # (key, nesting)

while (next_i := json_bytes.find(key, curr_i + len(key))) != -1:
nesting = count_nesting_in_bytes(json_bytes, curr_i + len(key), next_i)
while (next_i := json_bytes.find(key_bytes, curr_i + len(key_bytes))) != -1:
nesting = count_nesting_in_bytes(json_bytes, curr_i + len(key_bytes), next_i)
key_nest.append((next_i, nesting))
curr_i = next_i

# Early exit if there is only one key
if len(key_nest) == 1:
return key_nest[0][0], key_nest[0][0] + len(key)
return key_nest[0][0], key_nest[0][0] + len(key_bytes)

# Relative to total nesting
for i in range(1, len(key_nest)):
key_nest[i] = (key_nest[i][0], key_nest[i - 1][1] + key_nest[i][1])

start_index = min(key_nest, key=lambda x: x[1])[0]
end_index = start_index + len(key)
end_index = start_index + len(key_bytes)
return start_index, end_index


Expand Down
Loading