Skip to content

Commit

Permalink
Merge pull request #199 from PainterQubits/#198-more-descriptive-json
Browse files Browse the repository at this point in the history
#198 More descriptive JSON
  • Loading branch information
alexhad6 authored Jun 22, 2024
2 parents 97d95ce + fd537d9 commit 45ca04e
Show file tree
Hide file tree
Showing 5 changed files with 138 additions and 74 deletions.
11 changes: 11 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,17 @@ project adheres to clauses 1–8 of [Semantic Versioning](https://semver.org/spe

## [Unreleased]

### Added

- Added `ParamDB.__repr__()` for better debugging.

### Changed

- `ParamDB.commit()` now has a `raw_json` option, allowing a raw JSON string to be
committed to the database.
- Updated the underlying JSON representation to be more self-describing; see
`ParamDB.load()` for the new format specification.

## [0.14.0] (Jun 18 2024)

### Changed
Expand Down
2 changes: 1 addition & 1 deletion docs/api-reference.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,5 +29,5 @@ All of the following can be imported from `paramdb`.
.. autoclass:: ParamDB
.. autoclass:: CommitEntry
.. autoclass:: CommitEntryWithData
.. autoclass:: ParamDBKey
.. autoclass:: ParamDBType
```
4 changes: 2 additions & 2 deletions paramdb/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from paramdb._param_data._files import ParamFile
from paramdb._param_data._collections import ParamList, ParamDict
from paramdb._param_data._type_mixins import ParentType, RootType
from paramdb._database import ParamDB, CommitEntry, CommitEntryWithData, ParamDBKey
from paramdb._database import ParamDB, CommitEntry, CommitEntryWithData, ParamDBType

__all__ = [
"ParamData",
Expand All @@ -18,7 +18,7 @@
"ParamDB",
"CommitEntry",
"CommitEntryWithData",
"ParamDBKey",
"ParamDBType",
]

try:
Expand Down
158 changes: 105 additions & 53 deletions paramdb/_database.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,23 +27,23 @@
_SelectT = TypeVar("_SelectT", bound=Select[Any])


class ParamDBKey:
class ParamDBType:
"""
Keys corresponding to different object types in the JSON representation of the data
in a ParamDB commit.
Type strings corresponding to different object types in the JSON representation of
the data in a ParamDB commit.
"""

DATETIME = "t"
"""Key for ``datetime.datetime`` objects."""
QUANTITY = "q"
"""Key for ``astropy.units.quantity`` objects."""
LIST = "l"
"""Key for ordinary lists."""
DICT = "d"
"""Key for ordinary dictionaries."""
PARAM = "p"
DATETIME = "datetime"
"""Type string for ``datetime.datetime`` objects."""
QUANTITY = "Quantity"
"""Type string ``astropy.units.Quantity`` objects."""
LIST = "list"
"""Type string ordinary lists."""
DICT = "dict"
"""Type string ordinary dictionaries."""
PARAM_DATA = "ParamData"
"""
Key for :py:class:`ParamData` objects.
Type string for :py:class:`ParamData` objects.
The JSON object should either include a parameter class name, or be None if wrapping
a non-:py:class:`ParamData` with parameter metadata (e.g. a last updated time).
Expand All @@ -69,23 +69,25 @@ def _encode_json(obj: Any) -> Any:
if isinstance(obj, (int, float, bool, str)) or obj is None:
return obj
if isinstance(obj, datetime):
return [ParamDBKey.DATETIME, obj.timestamp()]
return {"type": ParamDBType.DATETIME, "timestamp": obj.timestamp()}
if _ASTROPY_INSTALLED and isinstance(obj, Quantity):
return [ParamDBKey.QUANTITY, obj.value, str(obj.unit)]
return {"type": ParamDBType.QUANTITY, "value": obj.value, "unit": str(obj.unit)}
if isinstance(obj, (list, tuple)):
return [ParamDBKey.LIST, [_encode_json(item) for item in obj]]
return {"type": ParamDBType.LIST, "data": [_encode_json(item) for item in obj]}
if isinstance(obj, dict):
return [
ParamDBKey.DICT,
{key: _encode_json(value) for key, value in obj.items()},
]
return {
"type": ParamDBType.DICT,
"data": {key: _encode_json(value) for key, value in obj.items()},
}
if isinstance(obj, ParamData):
return [
ParamDBKey.PARAM,
_encode_json(obj.to_json()),
None if isinstance(obj, _ParamWrapper) else type(obj).__name__,
obj.last_updated.timestamp(),
]
encoded_json = {"type": ParamDBType.PARAM_DATA}
if not isinstance(obj, _ParamWrapper):
encoded_json |= {"className": type(obj).__name__}
encoded_json |= {
"lastUpdated": obj.last_updated.timestamp(),
"data": _encode_json(obj.to_json()),
}
return encoded_json
raise TypeError(
f"'{type(obj).__name__}' object {repr(obj)} is not JSON serializable, so the"
" commit failed"
Expand All @@ -94,34 +96,47 @@ def _encode_json(obj: Any) -> Any:

def _decode_json(json_data: Any) -> Any:
"""Reconstruct an object encoded by ``_json_encode()``."""
if isinstance(json_data, list):
key, *data = json_data
if key == ParamDBKey.DATETIME:
return datetime.fromtimestamp(data[0], timezone.utc).astimezone()
if _ASTROPY_INSTALLED and key == ParamDBKey.QUANTITY:
return Quantity(*data)
if key == ParamDBKey.LIST:
return [_decode_json(item) for item in data[0]]
if key == ParamDBKey.DICT:
return {key: _decode_json(value) for key, value in data[0].items()}
if key == ParamDBKey.PARAM:
json_data, class_name, timestamp = data
if isinstance(json_data, dict):
param_db_type = json_data["type"]
if param_db_type == ParamDBType.DATETIME:
return datetime.fromtimestamp(
json_data["timestamp"], timezone.utc
).astimezone()
if _ASTROPY_INSTALLED and param_db_type == ParamDBType.QUANTITY:
return Quantity(value=json_data["value"], unit=json_data["unit"])
if param_db_type == ParamDBType.LIST:
return [_decode_json(item) for item in json_data["data"]]
if param_db_type == ParamDBType.DICT:
return {
key: _decode_json(value) for key, value in json_data["data"].items()
}
if param_db_type == ParamDBType.PARAM_DATA:
class_name = json_data.get("className", None)
param_class = (
_ParamWrapper if class_name is None else get_param_class(class_name)
)
if param_class is not None:
return param_class.from_json(_decode_json(json_data), timestamp)
return param_class.from_json(
_decode_json(json_data["data"]), json_data["lastUpdated"]
)
raise ValueError(
f"ParamData class '{class_name}' is not known to ParamDB, so the load"
" failed"
)
return json_data


def _encode(obj: Any) -> bytes:
"""Encode the given object into bytes that will be stored in the database."""
# pylint: disable=no-member
return _compress(json.dumps(_encode_json(obj), separators=(",", ":")))
def _encode(obj: Any, raw_json: bool) -> bytes:
"""
Encode the given object into bytes that will be stored in the database.
If ``raw_json`` is True, the object will be assumed to be a raw JSON string and will
only be compressed; otherwise, the given object will be first encoded as a JSON
string.
"""
return _compress(
obj if raw_json else json.dumps(_encode_json(obj), separators=(",", ":"))
)


def _decode(data: bytes, raw_json: bool) -> Any:
Expand Down Expand Up @@ -206,6 +221,9 @@ def __init__(self, path: str):
self._Session = sessionmaker(self._engine) # pylint: disable=invalid-name
_Base.metadata.create_all(self._engine)

def __repr__(self) -> str:
return f"{type(self).__name__}(path={self.path!r})"

def _index_error(self, commit_id: int | None) -> IndexError:
"""
Returns an ``IndexError`` to raise if the given commit ID was not found in the
Expand Down Expand Up @@ -250,18 +268,53 @@ def path(self) -> str:
"""Path of the database file."""
return self._path

@overload
def commit(
self,
message: str,
data: DataT,
timestamp: datetime | None = None,
*,
raw_json: Literal[False] = False,
) -> CommitEntry: ...

@overload
def commit(
self, message: str, data: DataT, timestamp: datetime | None = None
self,
message: str,
data: str,
timestamp: datetime | None = None,
*,
raw_json: Literal[True],
) -> CommitEntry: ...

def commit(
self,
message: str,
data: Any,
timestamp: datetime | None = None,
*,
raw_json: bool = False,
) -> CommitEntry:
"""
Commit the given data to the database with the given message and return a commit
entry for the new commit.
By default, the timestamp will be set to the current time. If a timestamp is
given, it is used instead. Naive datetimes will be assumed to be in UTC time.
By default, the given data will be converted into a JSON string, which is then
saved to the database; however, if ``raw_json`` is True, then the given data is
assumed to already be a JSON string in the format specified by
:py:meth:`ParamDB.load`. Note that any string will be accepted, so be careful
using this option. If the format is incorrect, loading this particular commit
may fail.
"""
with self._Session.begin() as session:
kwargs: dict[str, Any] = {"message": message, "data": _encode(data)}
kwargs: dict[str, Any] = {
"message": message,
"data": _encode(data, raw_json),
}
if timestamp is not None:
utc_offset = timestamp.utcoffset()
kwargs["timestamp"] = (
Expand Down Expand Up @@ -300,20 +353,19 @@ def load(self, commit_id: int | None = None, *, raw_json: bool = False) -> Any:
By default, objects are reconstructed, which requires the relevant parameter
data classes to be defined in the current program. However, if ``raw_json``
is True, the JSON data is returned directly from the database as a string.
The format of the JSON data is as follows (see :py:class:`ParamDBKey` for key
codes)::
The format of the JSON data is as follows::
json_data:
json_data =
| int
| float
| bool
| str
| None
| [ParamDBKey.DATETIME, float<timestamp>]
| [ParamDBKey.QUANTITY, float<value>, str<unit>]
| [ParamDBKey.LIST, [json_data<item>, ...]]
| [ParamDBKey.DICT, {str<key>: json_data<value>, ...}]
| [ParamDBKey.PARAM, json_data<data>, str<class_name> | None, float<timestamp>]
| {"type": "datetime", "timestamp": float}
| {"type": "Quantity", "value": float, "unit": str}
| {"type": "list", "data": [json_data, ...]}
| {"type": "dict", "data": {str: json_data, ...}}
| {"type": "ParamData", "className": str, "lastUpdated": float, "data": json_data}
""" # noqa: E501
select_stmt = self._select_commit(select(_Snapshot.data), commit_id)
with self._Session() as session:
Expand Down
37 changes: 19 additions & 18 deletions tests/test_database.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
ParamDB,
CommitEntry,
CommitEntryWithData,
ParamDBKey,
ParamDBType,
)
from paramdb._param_data._param_data import _param_classes

Expand Down Expand Up @@ -233,25 +233,26 @@ def test_raw_json_true(db_path: str, param_data: ParamData[Any]) -> None:

for data in data_loaded, data_from_history:
# Check that loaded dictionary has the correct type and keys
assert isinstance(data, list)
assert isinstance(data, dict)
assert len(data) == 4
key, json_data, class_name, timestamp = data
assert key == ParamDBKey.PARAM
assert class_name == type(param_data).__name__
assert timestamp == param_data.last_updated.timestamp()
assert data["type"] == ParamDBType.PARAM_DATA
assert data["className"] == type(param_data).__name__
assert data["lastUpdated"] == param_data.last_updated.timestamp()

# Check that the loaded JSON data has the correct data
assert isinstance(json_data, list)
json_data_key, json_data_data = json_data
contents = data["data"]
assert isinstance(contents, dict)
contents_type = contents["type"]
contents_data = contents["data"]
param_json_data = param_data.to_json()
if json_data_key == "l":
assert isinstance(json_data_data, list)
if contents_type == ParamDBType.LIST:
assert isinstance(contents_data, list)
assert isinstance(param_json_data, list)
assert len(json_data_data) == len(param_json_data)
elif json_data_key == "d":
assert isinstance(json_data_data, dict)
assert len(contents_data) == len(param_json_data)
elif contents_type == ParamDBType.DICT:
assert isinstance(contents_data, dict)
assert isinstance(param_json_data, dict)
assert json_data_data.keys() == param_json_data.keys()
assert contents_data.keys() == param_json_data.keys()
else:
assert False # Currently, all param_data have list or dict data

Expand All @@ -266,10 +267,10 @@ def test_load_classes_false_unknown_class(db_path: str) -> None:
data_from_history = json.loads(
param_db.commit_history_with_data(raw_json=True)[0].data
)
assert isinstance(data_loaded, list)
assert data_loaded[2] == Unknown.__name__
assert isinstance(data_from_history, list)
assert data_from_history[2] == Unknown.__name__
assert isinstance(data_loaded, dict)
assert data_loaded["className"] == Unknown.__name__
assert isinstance(data_from_history, dict)
assert data_from_history["className"] == Unknown.__name__


# pylint: disable-next=too-many-arguments,too-many-locals
Expand Down

0 comments on commit 45ca04e

Please sign in to comment.