initial design idea

mkrd · Nov 18, 2022 · 3f0e225 · 3f0e225
1 parent 1427574
commit 3f0e225
Show file tree

Hide file tree

Showing 5 changed files with 427 additions and 3 deletions.
diff --git a/README.md b/README.md
@@ -212,6 +212,85 @@ assert ress == {"8": {"a": 8}, "9": {"a": 9}} # True
 ```
 
 
+Object Mapper
+========================================================================================
+
+DictDataBase provides object mappers to model your data as classes and benefit from
+type hints and type checking.
+
+
+Mapping key-value items in one File
+----------------------------------------------------------------------------------------
+
+In this example, we will have a file called `users.json`, inside your storage directory
+(`DDB.config.storage_directory`) which contains the following data:
+```json
+{
+    "u1": {
+		"first_name": "John",
+		"last_name": "Doe",
+        "age": 21
+	},
+	"u2": {
+		"first_name": "Jane",
+		"last_name": "Smith",
+        "age": 30,
+        "phone": "0123456"
+	},
+}
+```
+
+We will now map the data to classes:
+
+```python
+from dictdatabase.object_mapper import FileDictModel, FileDictItemModel
+
+class User(FileDictItemModel):
+    first_name: str
+    last_name: str
+    age: int
+    phone: str | None
+
+    def full_name(self):
+        return f"{self.first_name} {self.last_name}"
+
+
+class Users(FileDictModel[User]):
+    __file__ = "users"
+```
+
+A few important things are happening here:
+- `FileDictItemModel` models a key-value item inside the file.
+- The attributes `first_name` and `last_name` and `age` of `User` are required. If they miss in the file, a `KeyError` is raised.
+- The attribute `phone` is optional, and will be `None` if it does not exist in the file.
+- When defining `Users`, the `FileDictModel` must specify it's item model type as a type argument (`FileDictModel[User]`)
+- `Users` only has to specify the file it refers to by passing the file name without the ending (`__file__ = "users"`)
+
+
+Now, the models can be used:
+
+```python
+# Get user by id
+u1: User = Users.get_at_key("u1")
+print(u1.full_name())
+>>> "John Doe"
+
+# Iterate all users:
+for uid, user in Users.items():
+    print(user.last_name, user.age, user.phone)
+>>> "Doe", 21, None
+>>> "Smith", 30, "0123456"
+
+# Filter
+u_over_25: dict[str, User] = Users.get_where(lambda uid, user: user.age > 25)
+```
+
+
+
+
+
+
+
 
 Performance
 ========================================================================================

diff --git a/assets/coverage.svg b/assets/coverage.svg
diff --git a/dictdatabase/object_mapper.py b/dictdatabase/object_mapper.py
@@ -0,0 +1,175 @@
+from __future__ import annotations
+from abc import ABC
+from typing import get_type_hints, get_origin, get_args, TypeVar, Tuple, Type, Generic
+from types import UnionType, NoneType
+from . import io_safe
+from .sessions import SessionFileFull, SessionFileKey, SessionFileWhere, SessionDirFull, SessionDirWhere
+
+
+
+T = TypeVar("T")
+T2 = TypeVar("T2")
+
+
+def get_type_hints_excluding_internals(cls):
+	"""
+		Get type hints of the class, excluding double dunder variables.
+	"""
+	for var_name, var_type in get_type_hints(cls).items():
+		if var_name.startswith("__") and var_name.endswith("__"):
+			continue
+		yield var_name, var_type
+
+
+
+def fill_object_from_dict_using_type_hints(obj, cls, data: dict):
+	"""
+		Attributes of obj are set using the data dict.
+		The type hints of the class cls are used to determine which attributes to set.
+	"""
+	for var_name, var_type in get_type_hints_excluding_internals(cls):
+		var_type_args = get_args(var_type)
+		var_type_origin = get_origin(var_type)
+		# Check if variable is nullable (e.g. email: str | None)
+		# When it is not nullable but not in the data, raise an error
+		if var_name not in data:
+			nullable = var_type_origin is UnionType and NoneType in var_type_args
+			if not nullable:
+				raise KeyError(f"Missing variable '{var_name}' in {cls.__name__}.")
+		# When it is a list, fill the list with the items
+		if var_type_origin is list and len(var_type_args) == 1:
+			item_type = var_type_args[0]
+			setattr(obj, var_name, [item_type.from_dict(x) for x in data[var_name]])
+		else:
+			setattr(obj, var_name, data.get(var_name, None))
+	return obj
+
+
+
+def fill_dict_from_object_using_type_hints(cls, obj):
+	raise NotImplementedError
+
+
+
+
+
+########################################################################################
+# Scenario 1:
+# Model a single file with FileDictModel, which is a dict at the top level.
+# Each key-value item is modeled by a DictItemModel.
+
+
+
+class FileDictModel(ABC, Generic[T]):
+	"""
+		A file base refers to a file that is stored in the database.
+		At the top level the file must contain a dictionary with strings as keys.
+	"""
+
+	__file__ = None
+
+	@classmethod
+	def _get_item_model(cls):
+		for base in cls.__orig_bases__:
+			for type_args in get_args(base):
+				if issubclass(type_args, FileDictItemModel):
+					return type_args
+		raise AttributeError(
+			"FileDictModel must specify a FileDictItemModel "
+			"(e.g. Users(FileDictModel[User]))"
+		)
+
+
+	@classmethod
+	def get_at_key(cls, key) -> T:
+		"""
+			Gets an item by key.
+			The data is partially read from the __file__.
+		"""
+		data = io_safe.partial_read(cls.__file__, key)
+		res: T = cls._get_item_model().from_key_value(key, data)
+		return res
+
+	@classmethod
+	def session_at_key(cls, key):
+		return cls._get_item_model().session(key)
+
+	@classmethod
+	def get_all(cls) -> dict[str, T]:
+		data = io_safe.read(cls.__file__)
+		return {k: cls._get_item_model().from_key_value(k, v) for k, v in data.items()}
+
+	@classmethod
+	def session(cls):
+		"""
+		Enter a session with the file as (session, data) where data is a dict of
+		<key>: <ORM model of value> pairs.
+		"""
+		def make_session_obj_from_dict(data):
+			sess_obj = {}
+			for k, v in data.items():
+				sess_obj[k] = cls._get_item_model().from_key_value(k, v)
+			return sess_obj
+		return SessionFileFull(cls.__file__, make_session_obj_from_dict)
+
+
+	@classmethod
+	def get_where(cls, where: callable[str, T]) -> dict[str, T]:
+		"""
+		Return a dictionary of all the items for which the where function returns True.
+		Where takes the key and the value's model object as arguments.
+		"""
+		return {k: v for k, v in cls.get_all().items() if where(k, v)}
+
+
+
+
+class FileDictItemModel(ABC):
+	__key__: str
+
+	@classmethod
+	def from_key_value(cls: Type[T2], key, value) -> T2:
+		obj = fill_object_from_dict_using_type_hints(cls(), cls, value)
+		obj.__key__ = key
+		return obj
+
+	@classmethod
+	def session(cls, key):
+		def partial_func(x):
+			return cls.from_key_value(key, x)
+		return SessionFileKey(cls.__file__, key, partial_func)
+
+
+
+class DictModel(ABC):
+
+	@classmethod
+	def from_dict(cls, data) -> DictModel:
+		obj = cls()
+		return fill_object_from_dict_using_type_hints(obj, cls, data)
+
+	def to_dict(self) -> dict:
+		res = {}
+		for var_name in get_type_hints(self).keys():
+			if (value := getattr(self, var_name)) is not None:
+				res[var_name] = value
+		return res
+
+
+
+########################################################################################
+# Scenario 2:
+# Add in a later version of DDB
+# A folder containing multiple files, each containing json.
+
+# class FolderBase(ABC):
+# 	__folder__ = None
+# 	__file_model__: FileInFolderModel = None
+
+
+# class FileInFolderModel(ABC):
+
+# 	@classmethod
+# 	def get_by_name(cls, file_name: str) -> FileInFolderModel:
+# 		data = io_safe.read(f"{cls.__folder__}/{file_name}")
+# 		return cls(**data)
diff --git a/testing_orm.py b/testing_orm.py
@@ -0,0 +1,120 @@
+
+
+
+class WorkTime(DictModel):
+	start: str
+	end: str
+
+
+class User(FileDictItemModel):
+	first_name: str
+	last_name: str
+	email: str | None
+
+	work_times: list[WorkTime]
+
+
+	def full_name(self):
+		return f"{self.first_name} {self.last_name}"
+
+
+class Users(FileDictModel[User]):
+	__file__ = "users"
+
+
+
+u = User.from_key_value("uid1", {
+	"first_name": "John",
+	"last_name": "Doe",
+	"none": "no",
+	"work_times": [
+		{"start": "08:00", "end": "12:00"},
+		{"start": "13:00", "end": "17:00"},
+	]
+})
+
+
+assert u.first_name == "John"
+assert u.last_name == "Doe"
+assert u.full_name() == "John Doe"
+assert u.work_times[0].start == "08:00"
+assert u.work_times[0].end == "12:00"
+assert u.work_times[1].start == "13:00"
+assert u.work_times[1].end == "17:00"
+assert len(u.work_times) == 2
+
+
+print("u type:", type(u))
+
+
+
+
+DDB.at("users").create({
+	"uid1": {
+		"first_name": "John",
+		"last_name": "Doe",
+		"none": "no",
+		"work_times": [
+			{"start": "08:00", "end": "12:00"},
+			{"start": "13:00", "end": "17:00"},
+		]
+	},
+	"uid2": {
+		"first_name": "Jane",
+		"last_name": "Smith",
+		"none": "no",
+		"work_times": [
+			{"start": "08:00", "end": "12:00"},
+			{"start": "13:00", "end": "17:00"},
+		]
+	},
+	"uid3": {
+		"first_name": "Pete",
+		"last_name": "Griffin",
+		"none": "no",
+		"work_times": [
+			{"start": "08:00", "end": "12:00"},
+			{"start": "13:00", "end": "17:00"},
+		]
+	}
+}, force_overwrite=True)
+
+
+u1 = Users.get_at_key("uid1")
+assert u1.first_name == "John"
+assert u1.last_name == "Doe"
+assert u1.full_name() == "John Doe"
+assert u1.work_times[0].start == "08:00"
+assert u1.work_times[0].end == "12:00"
+assert u1.work_times[1].start == "13:00"
+assert u1.work_times[1].end == "17:00"
+assert len(u1.work_times) == 2
+
+
+
+u2 = Users.get_at_key("uid2")
+
+
+
+for uid, u in Users.items():
+	print(u.full_name())
+
+
+
+# # Iterate FileDictModel
+# for user_id, user in Users.items():
+# 	print(user_id, user.first_name, user.last_name, user.email)
+
+# # Get one item
+# user: User = Users.get_at_key("user_id")
+
+
+# # Get by lambda
+# users: Users = Users.where(lambda user: user.first_name != "John")
+
+
+# with Users.session_at_key(user_id) as (session, user):
+# 	...
+
+# with Users.session() as (session, users): Dict[str, User]
+# 	...