diff --git a/README.md b/README.md
index e23f5ed..ed568ab 100644
--- a/README.md
+++ b/README.md
@@ -212,6 +212,85 @@ assert ress == {"8": {"a": 8}, "9": {"a": 9}} # True
```
+Object Mapper
+========================================================================================
+
+DictDataBase provides object mappers to model your data as classes and benefit from
+type hints and type checking.
+
+
+Mapping key-value items in one File
+----------------------------------------------------------------------------------------
+
+In this example, we will have a file called `users.json`, inside your storage directory
+(`DDB.config.storage_directory`) which contains the following data:
+```json
+{
+ "u1": {
+ "first_name": "John",
+ "last_name": "Doe",
+ "age": 21
+ },
+ "u2": {
+ "first_name": "Jane",
+ "last_name": "Smith",
+ "age": 30,
+ "phone": "0123456"
+ },
+}
+```
+
+We will now map the data to classes:
+
+```python
+from dictdatabase.object_mapper import FileDictModel, FileDictItemModel
+
+class User(FileDictItemModel):
+ first_name: str
+ last_name: str
+ age: int
+ phone: str | None
+
+ def full_name(self):
+ return f"{self.first_name} {self.last_name}"
+
+
+class Users(FileDictModel[User]):
+ __file__ = "users"
+```
+
+A few important things are happening here:
+- `FileDictItemModel` models a key-value item inside the file.
+- The attributes `first_name` and `last_name` and `age` of `User` are required. If they miss in the file, a `KeyError` is raised.
+- The attribute `phone` is optional, and will be `None` if it does not exist in the file.
+- When defining `Users`, the `FileDictModel` must specify it's item model type as a type argument (`FileDictModel[User]`)
+- `Users` only has to specify the file it refers to by passing the file name without the ending (`__file__ = "users"`)
+
+
+Now, the models can be used:
+
+```python
+# Get user by id
+u1: User = Users.get_at_key("u1")
+print(u1.full_name())
+>>> "John Doe"
+
+# Iterate all users:
+for uid, user in Users.items():
+ print(user.last_name, user.age, user.phone)
+>>> "Doe", 21, None
+>>> "Smith", 30, "0123456"
+
+# Filter
+u_over_25: dict[str, User] = Users.get_where(lambda uid, user: user.age > 25)
+```
+
+
+
+
+
+
+
Performance
========================================================================================
diff --git a/assets/coverage.svg b/assets/coverage.svg
index 6bfc8fa..b3e8ba0 100644
--- a/assets/coverage.svg
+++ b/assets/coverage.svg
@@ -9,13 +9,13 @@
-
+
coverage
coverage
- 99%
- 99%
+ 88%
+ 88%
diff --git a/dictdatabase/io_safe.py b/dictdatabase/io_safe.py
index 130f685..d852bc8 100644
--- a/dictdatabase/io_safe.py
+++ b/dictdatabase/io_safe.py
@@ -3,7 +3,7 @@
-def read(file_name: str) -> dict:
+def read(file_name: str) -> dict | None:
"""
Read the content of a file as a dict.
@@ -21,7 +21,7 @@ def read(file_name: str) -> dict:
-def partial_read(file_name: str, key: str) -> dict:
+def partial_read(file_name: str, key: str) -> dict | None:
"""
Read only the value of a key-value pair from a file.
diff --git a/dictdatabase/io_unsafe.py b/dictdatabase/io_unsafe.py
index 7204a30..a40432a 100644
--- a/dictdatabase/io_unsafe.py
+++ b/dictdatabase/io_unsafe.py
@@ -9,7 +9,7 @@
@dataclass(frozen=True) # slots=True not supported by python 3.8 and 3.9
class PartialDict:
- prefix: bytes
+ prefix: bytes | None
key: str
value: dict
value_start: int
diff --git a/dictdatabase/object_mapper.py b/dictdatabase/object_mapper.py
new file mode 100644
index 0000000..dd4e922
--- /dev/null
+++ b/dictdatabase/object_mapper.py
@@ -0,0 +1,175 @@
+from __future__ import annotations
+from abc import ABC
+from typing import get_type_hints, get_origin, get_args, TypeVar, Tuple, Type, Generic
+from types import UnionType, NoneType
+from . import io_safe
+from .sessions import SessionFileFull, SessionFileKey, SessionFileWhere, SessionDirFull, SessionDirWhere
+
+
+
+T = TypeVar("T")
+T2 = TypeVar("T2")
+
+
+def get_type_hints_excluding_internals(cls):
+ """
+ Get type hints of the class, excluding double dunder variables.
+ """
+ for var_name, var_type in get_type_hints(cls).items():
+ if var_name.startswith("__") and var_name.endswith("__"):
+ continue
+ yield var_name, var_type
+
+
+
+def fill_object_from_dict_using_type_hints(obj, cls, data: dict):
+ """
+ Attributes of obj are set using the data dict.
+ The type hints of the class cls are used to determine which attributes to set.
+ """
+ for var_name, var_type in get_type_hints_excluding_internals(cls):
+ var_type_args = get_args(var_type)
+ var_type_origin = get_origin(var_type)
+ # Check if variable is nullable (e.g. email: str | None)
+ # When it is not nullable but not in the data, raise an error
+ if var_name not in data:
+ nullable = var_type_origin is UnionType and NoneType in var_type_args
+ if not nullable:
+ raise KeyError(f"Missing variable '{var_name}' in {cls.__name__}.")
+ # When it is a list, fill the list with the items
+ if var_type_origin is list and len(var_type_args) == 1:
+ item_type = var_type_args[0]
+ setattr(obj, var_name, [item_type.from_dict(x) for x in data[var_name]])
+ else:
+ setattr(obj, var_name, data.get(var_name, None))
+ return obj
+
+
+
+def fill_dict_from_object_using_type_hints(cls, obj):
+ raise NotImplementedError
+
+
+
+
+
+########################################################################################
+# Scenario 1:
+# Model a single file with FileDictModel, which is a dict at the top level.
+# Each key-value item is modeled by a DictItemModel.
+
+
+
+class FileDictModel(ABC, Generic[T]):
+ """
+ A file base refers to a file that is stored in the database.
+ At the top level the file must contain a dictionary with strings as keys.
+ """
+
+ __file__ = None
+
+ @classmethod
+ def _get_item_model(cls):
+ for base in cls.__orig_bases__:
+ for type_args in get_args(base):
+ if issubclass(type_args, FileDictItemModel):
+ return type_args
+ raise AttributeError(
+ "FileDictModel must specify a FileDictItemModel "
+ "(e.g. Users(FileDictModel[User]))"
+ )
+
+
+ @classmethod
+ def get_at_key(cls, key) -> T:
+ """
+ Gets an item by key.
+ The data is partially read from the __file__.
+ """
+ data = io_safe.partial_read(cls.__file__, key)
+ res: T = cls._get_item_model().from_key_value(key, data)
+ return res
+
+ @classmethod
+ def session_at_key(cls, key):
+ return cls._get_item_model().session(key)
+
+ @classmethod
+ def get_all(cls) -> dict[str, T]:
+ data = io_safe.read(cls.__file__)
+ return {k: cls._get_item_model().from_key_value(k, v) for k, v in data.items()}
+
+ @classmethod
+ def session(cls):
+ """
+ Enter a session with the file as (session, data) where data is a dict of
+ : pairs.
+ """
+ def make_session_obj_from_dict(data):
+ sess_obj = {}
+ for k, v in data.items():
+ sess_obj[k] = cls._get_item_model().from_key_value(k, v)
+ return sess_obj
+ return SessionFileFull(cls.__file__, make_session_obj_from_dict)
+
+
+ @classmethod
+ def get_where(cls, where: callable[str, T]) -> dict[str, T]:
+ """
+ Return a dictionary of all the items for which the where function returns True.
+ Where takes the key and the value's model object as arguments.
+ """
+ return {k: v for k, v in cls.get_all().items() if where(k, v)}
+
+
+
+
+class FileDictItemModel(ABC):
+ __key__: str
+
+ @classmethod
+ def from_key_value(cls: Type[T2], key, value) -> T2:
+ obj = fill_object_from_dict_using_type_hints(cls(), cls, value)
+ obj.__key__ = key
+ return obj
+
+ @classmethod
+ def session(cls, key):
+ def partial_func(x):
+ return cls.from_key_value(key, x)
+ return SessionFileKey(cls.__file__, key, partial_func)
+
+
+
+class DictModel(ABC):
+
+ @classmethod
+ def from_dict(cls, data) -> DictModel:
+ obj = cls()
+ return fill_object_from_dict_using_type_hints(obj, cls, data)
+
+ def to_dict(self) -> dict:
+ res = {}
+ for var_name in get_type_hints(self).keys():
+ if (value := getattr(self, var_name)) is not None:
+ res[var_name] = value
+ return res
+
+
+
+########################################################################################
+# Scenario 2:
+# Add in a later version of DDB
+# A folder containing multiple files, each containing json.
+
+# class FolderBase(ABC):
+# __folder__ = None
+# __file_model__: FileInFolderModel = None
+
+
+# class FileInFolderModel(ABC):
+
+# @classmethod
+# def get_by_name(cls, file_name: str) -> FileInFolderModel:
+# data = io_safe.read(f"{cls.__folder__}/{file_name}")
+# return cls(**data)
diff --git a/dictdatabase/utils.py b/dictdatabase/utils.py
index 55de637..1f7e7a3 100644
--- a/dictdatabase/utils.py
+++ b/dictdatabase/utils.py
@@ -126,28 +126,28 @@ def find_outermost_key_in_json_bytes(json_bytes: bytes, key: str):
Returns:
- A tuple of the key start and end index, or `(-1, -1)` if the key is not found.
"""
- key = f"\"{key}\":".encode()
+ key_bytes = f"\"{key}\":".encode()
- if (curr_i := json_bytes.find(key, 0)) == -1:
+ if (curr_i := json_bytes.find(key_bytes, 0)) == -1:
return -1, -1
- key_nest = [(curr_i, 0)] # (key, nesting)
+ key_nest: list[tuple[int, int]] = [(curr_i, 0)] # (key, nesting)
- while (next_i := json_bytes.find(key, curr_i + len(key))) != -1:
- nesting = count_nesting_in_bytes(json_bytes, curr_i + len(key), next_i)
+ while (next_i := json_bytes.find(key_bytes, curr_i + len(key_bytes))) != -1:
+ nesting = count_nesting_in_bytes(json_bytes, curr_i + len(key_bytes), next_i)
key_nest.append((next_i, nesting))
curr_i = next_i
# Early exit if there is only one key
if len(key_nest) == 1:
- return key_nest[0][0], key_nest[0][0] + len(key)
+ return key_nest[0][0], key_nest[0][0] + len(key_bytes)
# Relative to total nesting
for i in range(1, len(key_nest)):
key_nest[i] = (key_nest[i][0], key_nest[i - 1][1] + key_nest[i][1])
start_index = min(key_nest, key=lambda x: x[1])[0]
- end_index = start_index + len(key)
+ end_index = start_index + len(key_bytes)
return start_index, end_index
diff --git a/testing_orm.py b/testing_orm.py
new file mode 100644
index 0000000..237465d
--- /dev/null
+++ b/testing_orm.py
@@ -0,0 +1,120 @@
+
+
+
+class WorkTime(DictModel):
+ start: str
+ end: str
+
+
+class User(FileDictItemModel):
+ first_name: str
+ last_name: str
+ email: str | None
+
+ work_times: list[WorkTime]
+
+
+ def full_name(self):
+ return f"{self.first_name} {self.last_name}"
+
+
+class Users(FileDictModel[User]):
+ __file__ = "users"
+
+
+
+u = User.from_key_value("uid1", {
+ "first_name": "John",
+ "last_name": "Doe",
+ "none": "no",
+ "work_times": [
+ {"start": "08:00", "end": "12:00"},
+ {"start": "13:00", "end": "17:00"},
+ ]
+})
+
+
+assert u.first_name == "John"
+assert u.last_name == "Doe"
+assert u.full_name() == "John Doe"
+assert u.work_times[0].start == "08:00"
+assert u.work_times[0].end == "12:00"
+assert u.work_times[1].start == "13:00"
+assert u.work_times[1].end == "17:00"
+assert len(u.work_times) == 2
+
+
+print("u type:", type(u))
+
+
+
+
+DDB.at("users").create({
+ "uid1": {
+ "first_name": "John",
+ "last_name": "Doe",
+ "none": "no",
+ "work_times": [
+ {"start": "08:00", "end": "12:00"},
+ {"start": "13:00", "end": "17:00"},
+ ]
+ },
+ "uid2": {
+ "first_name": "Jane",
+ "last_name": "Smith",
+ "none": "no",
+ "work_times": [
+ {"start": "08:00", "end": "12:00"},
+ {"start": "13:00", "end": "17:00"},
+ ]
+ },
+ "uid3": {
+ "first_name": "Pete",
+ "last_name": "Griffin",
+ "none": "no",
+ "work_times": [
+ {"start": "08:00", "end": "12:00"},
+ {"start": "13:00", "end": "17:00"},
+ ]
+ }
+}, force_overwrite=True)
+
+
+u1 = Users.get_at_key("uid1")
+assert u1.first_name == "John"
+assert u1.last_name == "Doe"
+assert u1.full_name() == "John Doe"
+assert u1.work_times[0].start == "08:00"
+assert u1.work_times[0].end == "12:00"
+assert u1.work_times[1].start == "13:00"
+assert u1.work_times[1].end == "17:00"
+assert len(u1.work_times) == 2
+
+
+
+u2 = Users.get_at_key("uid2")
+
+
+
+for uid, u in Users.items():
+ print(u.full_name())
+
+
+
+# # Iterate FileDictModel
+# for user_id, user in Users.items():
+# print(user_id, user.first_name, user.last_name, user.email)
+
+# # Get one item
+# user: User = Users.get_at_key("user_id")
+
+
+# # Get by lambda
+# users: Users = Users.where(lambda user: user.first_name != "John")
+
+
+# with Users.session_at_key(user_id) as (session, user):
+# ...
+
+# with Users.session() as (session, users): Dict[str, User]
+# ...
diff --git a/tests/test_object_mapper.py b/tests/test_object_mapper.py
new file mode 100644
index 0000000..ccc5de3
--- /dev/null
+++ b/tests/test_object_mapper.py
@@ -0,0 +1,50 @@
+from dictdatabase.object_mapper import DictModel, FileDictItemModel, FileDictModel
+import dictdatabase as DDB
+import pytest
+
+
+def test_object_mapper_docs_example(use_test_dir):
+ name = "object_mapper_docs_example_users"
+ DDB.at(name).create({
+ "u1": {
+ "first_name": "John",
+ "last_name": "Doe",
+ "age": 21
+ },
+ "u2": {
+ "first_name": "Jane",
+ "last_name": "Smith",
+ "age": 30,
+ "phone": "0123456"
+ },
+ }, force_overwrite=True)
+
+ class User(FileDictItemModel):
+ first_name: str
+ last_name: str
+ age: int
+ phone: str | None
+
+ def full_name(self):
+ return f"{self.first_name} {self.last_name}"
+
+ class Users(FileDictModel[User]):
+ __file__ = name
+
+ u1: User = Users.get_at_key("u1")
+ assert u1.full_name() == "John Doe"
+ assert u1.age == 21
+ assert u1.phone is None
+
+ with pytest.raises(AttributeError):
+ u1.no
+
+ u2: User = Users.get_at_key("u2")
+ assert u2.full_name() == "Jane Smith"
+ assert u2.age == 30
+ assert u2.phone == "0123456"
+
+
+
+ for uid, user in Users.get_all().items():
+ assert user.age in [21, 30]