Skip to content

Commit

Permalink
Merge pull request #38 from PainterQubits/#37-filtering-for-property-…
Browse files Browse the repository at this point in the history
…logs

#37 Filtering for property logs
  • Loading branch information
alexhad6 authored Nov 6, 2023
2 parents 8c9e594 + 36454ed commit b66d20a
Show file tree
Hide file tree
Showing 6 changed files with 381 additions and 91 deletions.
14 changes: 14 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,20 @@ project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## [Unreleased]

### Added

- `LoggedProp` type hint to indicate class properties that should be logged by
`Logger.log_props()`.

### Changed

- `Logger.log_props()` only logs properties marked by the `LoggedProp` type hint in the
object's class.
- `Logger.convert_to_json()` handles Numpy values and Pandas DataFrames, and takes in an
optional `convert()` function.
- `Logger.log_dict()` and `Logger.log_props()` take in an optional `convert()` function to
pass to `Logger.convert_to_json()`.

## [0.2.0] (Oct 4 2023)

### Added
Expand Down
3 changes: 2 additions & 1 deletion datalogger/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

from datalogger._variables import Coord, DataVar
from datalogger._logs import LogMetadata, DataLog, DictLog
from datalogger._logger import Logger
from datalogger._logger import LoggedProp, Logger
from datalogger._load_log import load_log

__all__ = [
Expand All @@ -11,6 +11,7 @@
"LogMetadata",
"DataLog",
"DictLog",
"LoggedProp",
"Logger",
"load_log",
]
137 changes: 103 additions & 34 deletions datalogger/_logger.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,14 @@
"""Data logging classes."""
"""Data logging class."""

from __future__ import annotations
from typing import TypeVar, Any, overload
from typing import TypeVar, Generic, Any, overload, get_type_hints, get_origin
from collections.abc import Callable, Sequence, Collection, Mapping
from abc import ABC, abstractmethod
import os
import sys
from datetime import datetime, timezone
import numpy as np
import pandas as pd # type: ignore
from datalogger._variables import Coord, DataVar
from datalogger._logs import LogMetadata, DataLog, DictLog
from datalogger._get_filename import get_filename
Expand All @@ -17,15 +21,34 @@
PARAMDB_INSTALLED = False


# Log type variable
_LT = TypeVar("_LT", DataLog, DictLog)
_T = TypeVar("_T") # Any type variable
_LT = TypeVar("_LT", DataLog, DictLog) # Log type variable


def _now() -> datetime:
"""Return the current time as a ``datetime`` object in the current timezone."""
return datetime.now(timezone.utc).astimezone()


class LoggedProp(Generic[_T], ABC):
"""
Used as a type hint to indicate that properties of a class should be logged by
:py:meth:`Logger.log_props`.
Note that this class is only meant to be used as a type hint, not instantiated.
"""

@abstractmethod
def __get__(
self, instance: Any | None, owner: Any | None = None
) -> _T: # pragma: no cover
...

@abstractmethod
def __set__(self, instance: Any, value: _T) -> None: # pragma: no cover
...


class Logger:
"""
Logger corresponding to a directory that generates log files and
Expand Down Expand Up @@ -181,6 +204,7 @@ def log_data(
description: str,
coords: Coord | Sequence[Coord],
data_vars: DataVar | Sequence[DataVar],
*,
commit_id: int | None = None,
) -> DataLog:
"""
Expand All @@ -196,58 +220,103 @@ def make_log(log_metadata: LogMetadata) -> DataLog:

return self._log(make_log, description, commit_id)

@classmethod
def convert_to_json(
cls, obj: Any, convert: Callable[[Any], Any] | None = None
) -> Any:
"""
Return a JSON-serializable version of the given object. This function is used to
convert objects to JSON for :py:meth:`Logger.log_dict` and
:py:meth:`Logger.log_props`.
1. If provided, ``convert()`` will be used to convert the object.
2. Numpy scalars will be unpacked and Pandas DataFrames will be converted to
dictionaries.
3. ``Mapping`` and ``Collection`` objects will be converted to dictionaries and
lists, with keys converted to strings and values converted according to these
rules.
4. Other non-JSON-serializable values will be converted to ``repr()`` strings.
"""
if convert is not None:
obj = convert(obj)
if isinstance(obj, (np.generic, np.ndarray)) and obj.ndim == 0:
obj = obj.item() # Unpack NumPy scalars to simple Python values
if isinstance(obj, pd.DataFrame):
obj = obj.to_dict() # Convert DataFrames to dictionaries
if isinstance(obj, (str, int, float, bool)) or obj is None:
return obj
if isinstance(obj, Mapping):
return {str(k): cls.convert_to_json(v, convert) for k, v in obj.items()}
if isinstance(obj, Collection):
return [cls.convert_to_json(v, convert) for v in obj]
return repr(obj)

def log_dict(
self, description: str, dict_data: dict[str, Any], commit_id: int | None = None
self,
description: str,
dict_data: dict[str, Any],
*,
commit_id: int | None = None,
convert: Callable[[Any], Any] | None = None,
) -> DictLog:
"""
Save the given dictionary data and corresponding metadata in a JSON file, and
return a :py:class:`DictLog` with this data and metadata.
Objects will be converted according to :py:meth:`Logger.convert_to_json`, with
``convert()`` passed to that function.
The log will be tagged with the given commit ID, or the latest commit ID if none
is given (and if this Logger has a corresponding ParamDB).
"""

def make_log(log_metadata: LogMetadata) -> DictLog:
return DictLog(log_metadata, dict_data)
return DictLog(log_metadata, self.convert_to_json(dict_data, convert))

return self._log(make_log, description, commit_id)

@classmethod
def _convert_to_json(cls, obj: Any) -> Any:
"""
Return a JSON-serializable version of the given object by converting ``Mapping``
and ``Collection`` objects to dictionaries and lists, converting other
non-JSON-serializable values to ``repr`` strings, and converting all dictionary
keys to strings.
"""
if isinstance(obj, (str, int, float, bool)) or obj is None:
return obj
if isinstance(obj, Mapping):
return {str(k): cls._convert_to_json(v) for k, v in obj.items()}
if isinstance(obj, Collection):
return [cls._convert_to_json(v) for v in obj]
return repr(obj)

def log_props(
self, description: str, obj: Any, commit_id: int | None = None
self,
description: str,
obj: Any,
*,
commit_id: int | None = None,
convert: Callable[[Any], Any] | None = None,
) -> DictLog:
"""
Save a dictionary of the given object's properties and corresponding metadata in
a JSON file, and return a :py:class:`DictLog` with this data and metadata. The
object must be one with properties (i.e. one that has a ``__dict__`` property).
a JSON file, and return a :py:class:`DictLog` with this data and metadata.
Only properties that have been marked with a :py:class:`LoggedProp` type hint at
the top of the class definition will be saved. For example::
This function will attempt to convert values that are not JSON-serializable to
lists or dictionaries, and otherwise will convert them to string
representations. This is intended to save a snapshot of the current properties
of the given object, but makes no guarentees that all information is saved.
class Example:
value: LoggedProp
number: LoggedProp[float]
Objects will be converted according to :py:meth:`Logger.convert_to_json`, with
``convert()`` passed to that function.
The log will be tagged with the given commit ID, or the latest commit ID if none
is given (and if this Logger has a corresponding ParamDB).
"""
obj_class = type(obj)
logged_props: dict[str, Any] = {}
try:
obj_vars = vars(obj)
except TypeError as exc:
raise TypeError(
f"'{type(obj).__name__}' object is not supported by log_props"
type_hints = get_type_hints(obj_class)
except Exception as exc:
python_version = f"{sys.version_info.major}.{sys.version_info.minor}"
raise RuntimeError(
f"cannot log properties of '{obj_class.__name__}' object because its"
f" class type hints are invalid in Python {python_version}"
) from exc
return self.log_dict(description, self._convert_to_json(obj_vars), commit_id)
for name, type_hint in type_hints.items():
if type_hint is LoggedProp or get_origin(type_hint) is LoggedProp:
if hasattr(obj, name):
logged_props[name] = getattr(obj, name)
return self.log_dict(
description, logged_props, commit_id=commit_id, convert=convert
)
1 change: 1 addition & 0 deletions docs/api-reference.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ All of the following can be imported from `datalogger`.

```{eval-rst}
.. autoclass:: Logger
.. autoclass:: LoggedProp
```

## Load Log
Expand Down
36 changes: 18 additions & 18 deletions docs/usage.md
Original file line number Diff line number Diff line change
Expand Up @@ -135,9 +135,10 @@ display_tree("data_logs")

### Dictionary Logs

Dictionary logs store `dict` data in JSON files. The data contained in the dictionary must
be compatible with Python's [`JSONEncoder`], and keys should be strings. We can create a
dictionary log using {py:meth}`Logger.log_dict`.
Dictionary logs store `dict` data in JSON files. The data stored in the dictionary log
will be converted to JSON-serializable types according to
{py:meth}`Logger.convert_to_json`. We can create a dictionary log using
{py:meth}`Logger.log_dict`.

```{jupyter-execute}
node_logger.log_dict(
Expand All @@ -156,29 +157,28 @@ display_tree("data_logs")

### Property Logs

Property logs store the properties of an arbitrary object (which must have a `__dict__`
attribute, see documentation for [`vars()`] for more information).
Property logs automatically store the properties of an object within a dictionary log.
Only properties marked with the type hint {py:class}`LoggedProp` will be saved. We can
create a property log using {py:meth}`Logger.log_props`.

```{warning}
Property logs are built on top of dictionary logs, so they can only store JSON-compatible
data. For this log type, non-string keys are converted to strings and values that are not
JSON-compatible are converted to lists or dictionaries if possible, and if not are
converted to strings.
This means that a property log does not guarentee to store all information contained
within a given object. Instead, it is meant to function as a quick way to create a general
snapshot of the object. For data that must be stored and recovered exactly, use data or
dictionary logs.
```{note}
{py:class}`LoggedProp` can optionally take in a type parameter representing the type of
the variable, which is only used by code analysis tools.
```

We can create a property log using {py:meth}`Logger.log_props`.

```{jupyter-execute}
from typing import Optional
from datalogger import LoggedProp
class SpecNode:
_element: LoggedProp
xy_f_rf: LoggedProp[int]
xy_f_if: LoggedProp[Optional[int]]
def __init__(self, element: str) -> None:
self._element = element
self.xy_f_rf = 379500822
self.xy_f_if = 95008227
self.xy_f_if = None
self.xy_f_lo = 3700000000
q1_spec_node = SpecNode("q1")
Expand Down
Loading

0 comments on commit b66d20a

Please sign in to comment.