diff --git a/CHANGELOG.md b/CHANGELOG.md index b6c08a4..cdff196 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,20 @@ project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). ## [Unreleased] +### Added + +- `LoggedProp` type hint to indicate class properties that should be logged by + `Logger.log_props()`. + +### Changed + +- `Logger.log_props()` only logs properties marked by the `LoggedProp` type hint in the + object's class. +- `Logger.convert_to_json()` handles Numpy values and Pandas DataFrames, and takes in an + optional `convert()` function. +- `Logger.log_dict()` and `Logger.log_props()` take in an optional `convert()` function to + pass to `Logger.convert_to_json()`. + ## [0.2.0] (Oct 4 2023) ### Added diff --git a/datalogger/__init__.py b/datalogger/__init__.py index dd3e35c..20cb8be 100644 --- a/datalogger/__init__.py +++ b/datalogger/__init__.py @@ -2,7 +2,7 @@ from datalogger._variables import Coord, DataVar from datalogger._logs import LogMetadata, DataLog, DictLog -from datalogger._logger import Logger +from datalogger._logger import LoggedProp, Logger from datalogger._load_log import load_log __all__ = [ @@ -11,6 +11,7 @@ "LogMetadata", "DataLog", "DictLog", + "LoggedProp", "Logger", "load_log", ] diff --git a/datalogger/_logger.py b/datalogger/_logger.py index 025adb3..cb861d7 100644 --- a/datalogger/_logger.py +++ b/datalogger/_logger.py @@ -1,10 +1,14 @@ -"""Data logging classes.""" +"""Data logging class.""" from __future__ import annotations -from typing import TypeVar, Any, overload +from typing import TypeVar, Generic, Any, overload, get_type_hints, get_origin from collections.abc import Callable, Sequence, Collection, Mapping +from abc import ABC, abstractmethod import os +import sys from datetime import datetime, timezone +import numpy as np +import pandas as pd # type: ignore from datalogger._variables import Coord, DataVar from datalogger._logs import LogMetadata, DataLog, DictLog from datalogger._get_filename import get_filename @@ -17,8 +21,8 @@ PARAMDB_INSTALLED = False -# Log type variable -_LT = TypeVar("_LT", DataLog, DictLog) +_T = TypeVar("_T") # Any type variable +_LT = TypeVar("_LT", DataLog, DictLog) # Log type variable def _now() -> datetime: @@ -26,6 +30,25 @@ def _now() -> datetime: return datetime.now(timezone.utc).astimezone() +class LoggedProp(Generic[_T], ABC): + """ + Used as a type hint to indicate that properties of a class should be logged by + :py:meth:`Logger.log_props`. + + Note that this class is only meant to be used as a type hint, not instantiated. + """ + + @abstractmethod + def __get__( + self, instance: Any | None, owner: Any | None = None + ) -> _T: # pragma: no cover + ... + + @abstractmethod + def __set__(self, instance: Any, value: _T) -> None: # pragma: no cover + ... + + class Logger: """ Logger corresponding to a directory that generates log files and @@ -181,6 +204,7 @@ def log_data( description: str, coords: Coord | Sequence[Coord], data_vars: DataVar | Sequence[DataVar], + *, commit_id: int | None = None, ) -> DataLog: """ @@ -196,58 +220,103 @@ def make_log(log_metadata: LogMetadata) -> DataLog: return self._log(make_log, description, commit_id) + @classmethod + def convert_to_json( + cls, obj: Any, convert: Callable[[Any], Any] | None = None + ) -> Any: + """ + Return a JSON-serializable version of the given object. This function is used to + convert objects to JSON for :py:meth:`Logger.log_dict` and + :py:meth:`Logger.log_props`. + + 1. If provided, ``convert()`` will be used to convert the object. + + 2. Numpy scalars will be unpacked and Pandas DataFrames will be converted to + dictionaries. + + 3. ``Mapping`` and ``Collection`` objects will be converted to dictionaries and + lists, with keys converted to strings and values converted according to these + rules. + + 4. Other non-JSON-serializable values will be converted to ``repr()`` strings. + """ + if convert is not None: + obj = convert(obj) + if isinstance(obj, (np.generic, np.ndarray)) and obj.ndim == 0: + obj = obj.item() # Unpack NumPy scalars to simple Python values + if isinstance(obj, pd.DataFrame): + obj = obj.to_dict() # Convert DataFrames to dictionaries + if isinstance(obj, (str, int, float, bool)) or obj is None: + return obj + if isinstance(obj, Mapping): + return {str(k): cls.convert_to_json(v, convert) for k, v in obj.items()} + if isinstance(obj, Collection): + return [cls.convert_to_json(v, convert) for v in obj] + return repr(obj) + def log_dict( - self, description: str, dict_data: dict[str, Any], commit_id: int | None = None + self, + description: str, + dict_data: dict[str, Any], + *, + commit_id: int | None = None, + convert: Callable[[Any], Any] | None = None, ) -> DictLog: """ Save the given dictionary data and corresponding metadata in a JSON file, and return a :py:class:`DictLog` with this data and metadata. + Objects will be converted according to :py:meth:`Logger.convert_to_json`, with + ``convert()`` passed to that function. + The log will be tagged with the given commit ID, or the latest commit ID if none is given (and if this Logger has a corresponding ParamDB). """ def make_log(log_metadata: LogMetadata) -> DictLog: - return DictLog(log_metadata, dict_data) + return DictLog(log_metadata, self.convert_to_json(dict_data, convert)) return self._log(make_log, description, commit_id) - @classmethod - def _convert_to_json(cls, obj: Any) -> Any: - """ - Return a JSON-serializable version of the given object by converting ``Mapping`` - and ``Collection`` objects to dictionaries and lists, converting other - non-JSON-serializable values to ``repr`` strings, and converting all dictionary - keys to strings. - """ - if isinstance(obj, (str, int, float, bool)) or obj is None: - return obj - if isinstance(obj, Mapping): - return {str(k): cls._convert_to_json(v) for k, v in obj.items()} - if isinstance(obj, Collection): - return [cls._convert_to_json(v) for v in obj] - return repr(obj) - def log_props( - self, description: str, obj: Any, commit_id: int | None = None + self, + description: str, + obj: Any, + *, + commit_id: int | None = None, + convert: Callable[[Any], Any] | None = None, ) -> DictLog: """ Save a dictionary of the given object's properties and corresponding metadata in - a JSON file, and return a :py:class:`DictLog` with this data and metadata. The - object must be one with properties (i.e. one that has a ``__dict__`` property). + a JSON file, and return a :py:class:`DictLog` with this data and metadata. + + Only properties that have been marked with a :py:class:`LoggedProp` type hint at + the top of the class definition will be saved. For example:: - This function will attempt to convert values that are not JSON-serializable to - lists or dictionaries, and otherwise will convert them to string - representations. This is intended to save a snapshot of the current properties - of the given object, but makes no guarentees that all information is saved. + class Example: + value: LoggedProp + number: LoggedProp[float] + + Objects will be converted according to :py:meth:`Logger.convert_to_json`, with + ``convert()`` passed to that function. The log will be tagged with the given commit ID, or the latest commit ID if none is given (and if this Logger has a corresponding ParamDB). """ + obj_class = type(obj) + logged_props: dict[str, Any] = {} try: - obj_vars = vars(obj) - except TypeError as exc: - raise TypeError( - f"'{type(obj).__name__}' object is not supported by log_props" + type_hints = get_type_hints(obj_class) + except Exception as exc: + python_version = f"{sys.version_info.major}.{sys.version_info.minor}" + raise RuntimeError( + f"cannot log properties of '{obj_class.__name__}' object because its" + f" class type hints are invalid in Python {python_version}" ) from exc - return self.log_dict(description, self._convert_to_json(obj_vars), commit_id) + for name, type_hint in type_hints.items(): + if type_hint is LoggedProp or get_origin(type_hint) is LoggedProp: + if hasattr(obj, name): + logged_props[name] = getattr(obj, name) + return self.log_dict( + description, logged_props, commit_id=commit_id, convert=convert + ) diff --git a/docs/api-reference.md b/docs/api-reference.md index 568ea74..f4d8f64 100644 --- a/docs/api-reference.md +++ b/docs/api-reference.md @@ -10,6 +10,7 @@ All of the following can be imported from `datalogger`. ```{eval-rst} .. autoclass:: Logger +.. autoclass:: LoggedProp ``` ## Load Log diff --git a/docs/usage.md b/docs/usage.md index 9ecf4c0..435c58f 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -135,9 +135,10 @@ display_tree("data_logs") ### Dictionary Logs -Dictionary logs store `dict` data in JSON files. The data contained in the dictionary must -be compatible with Python's [`JSONEncoder`], and keys should be strings. We can create a -dictionary log using {py:meth}`Logger.log_dict`. +Dictionary logs store `dict` data in JSON files. The data stored in the dictionary log +will be converted to JSON-serializable types according to +{py:meth}`Logger.convert_to_json`. We can create a dictionary log using +{py:meth}`Logger.log_dict`. ```{jupyter-execute} node_logger.log_dict( @@ -156,29 +157,28 @@ display_tree("data_logs") ### Property Logs -Property logs store the properties of an arbitrary object (which must have a `__dict__` -attribute, see documentation for [`vars()`] for more information). +Property logs automatically store the properties of an object within a dictionary log. +Only properties marked with the type hint {py:class}`LoggedProp` will be saved. We can +create a property log using {py:meth}`Logger.log_props`. -```{warning} -Property logs are built on top of dictionary logs, so they can only store JSON-compatible -data. For this log type, non-string keys are converted to strings and values that are not -JSON-compatible are converted to lists or dictionaries if possible, and if not are -converted to strings. - -This means that a property log does not guarentee to store all information contained -within a given object. Instead, it is meant to function as a quick way to create a general -snapshot of the object. For data that must be stored and recovered exactly, use data or -dictionary logs. +```{note} +{py:class}`LoggedProp` can optionally take in a type parameter representing the type of +the variable, which is only used by code analysis tools. ``` -We can create a property log using {py:meth}`Logger.log_props`. - ```{jupyter-execute} +from typing import Optional +from datalogger import LoggedProp + class SpecNode: + _element: LoggedProp + xy_f_rf: LoggedProp[int] + xy_f_if: LoggedProp[Optional[int]] + def __init__(self, element: str) -> None: self._element = element self.xy_f_rf = 379500822 - self.xy_f_if = 95008227 + self.xy_f_if = None self.xy_f_lo = 3700000000 q1_spec_node = SpecNode("q1") diff --git a/tests/test_logger.py b/tests/test_logger.py index 7103b3c..526a922 100644 --- a/tests/test_logger.py +++ b/tests/test_logger.py @@ -1,22 +1,29 @@ """Tests for datalogger._logger.""" +# pylint: disable=too-few-public-methods +# pylint: disable=missing-class-docstring +# pylint: disable=attribute-defined-outside-init + from __future__ import annotations -from typing import Any +from typing import Any, Union, Optional +from collections.abc import Callable import os +import sys from datetime import datetime +import numpy as np +import pandas as pd # type: ignore import xarray as xr import xarray.testing import pytest from freezegun import freeze_time from paramdb import ParamDB from datalogger._get_filename import get_filename -from datalogger import Coord, DataVar, Logger +from datalogger import Coord, DataVar, LoggedProp, Logger +PYTHON_VERSION = f"{sys.version_info.major}.{sys.version_info.minor}" -# pylint: disable-next=too-few-public-methods -class Obj: - """Class for creating objects in the following tests.""" +class Obj: def __repr__(self) -> str: return "" @@ -155,10 +162,69 @@ def test_log_data( xarray.testing.assert_identical(data_log.data, xarray_data) -def test_log_dict_not_dict_fails(logger: Logger) -> None: +@pytest.mark.parametrize( + "obj,expected_converted", + [ + ("test", "test"), + (123, 123), + (1.23, 1.23), + (True, True), + ([1, 2, 3], [1, 2, 3]), + ((1, 2, 3), [1, 2, 3]), + ({1, 2, 3}, [1, 2, 3]), + ({"p1": 123, "p2": 456}, {"p1": 123, "p2": 456}), + ({"p1": [(1, Obj()), 2]}, {"p1": [[1, ""], 2]}), + ( + {(1, 2): 1, 12: 2, 1.2: 3, None: 4, False: 5}, + {"(1, 2)": 1, "12": 2, "1.2": 3, "None": 4, "False": 5}, + ), + (Obj(), ""), + (np.int32(123), 123), + (np.float64(1.23), 1.23), + (np.array(123), 123), + (np.array(1.23), 1.23), + ( + pd.DataFrame({"col1": [1, 2], "col2": [3, 4]}), + {"col1": {"0": 1, "1": 2}, "col2": {"0": 3, "1": 4}}, + ), + ], +) +def test_convert_to_json(obj: Any, expected_converted: Any) -> None: + """A logger can convert a given object to a JSON-serializable object.""" + converted = Logger.convert_to_json(obj) + assert converted == expected_converted + assert type(converted) is type(expected_converted) + + +@pytest.mark.parametrize( + "obj,convert,expected_converted", + [ + ("test", lambda obj: 123, 123), + (123, lambda obj: obj * 2, 246), + ((1, 2, 3), lambda obj: obj + 1 if isinstance(obj, int) else obj, [2, 3, 4]), + ( + {"p1": 1, "p2": Obj()}, + lambda obj: "Obj()" if isinstance(obj, Obj) else obj, + {"p1": 1, "p2": "Obj()"}, + ), + ], +) +def test_convert_to_json_convert( + obj: Any, convert: Callable[[Any], Any] | None, expected_converted: Any +) -> None: + """ + A logger can use a convert function to convert a given object to a JSON-serializable + object. + """ + converted = Logger.convert_to_json(obj, convert) + assert converted == expected_converted + assert type(converted) is type(expected_converted) + + +def test_log_dict_not_dict_fails(root_logger: Logger) -> None: """A logger fails to save a dict log when a non-dict object is passed.""" with pytest.raises(TypeError) as exc_info: - logger.log_dict("test_dict", 123) # type: ignore + root_logger.log_dict("test_dict", 123) # type: ignore assert str(exc_info.value) == "'int' data given for dict log 'test_dict'" @@ -178,21 +244,86 @@ def test_log_dict( assert dict_log.data == dict_data -def test_log_props_unsupported_fails(logger: Logger) -> None: +@pytest.mark.parametrize( + "dict_data,expected_converted", + [ + ({"p1": 123, "p2": 456}, {"p1": 123, "p2": 456}), + ({"p1": [(1, Obj()), 2]}, {"p1": [[1, ""], 2]}), + ( + {(1, 2): 1, 12: 2, 1.2: 3, None: 4, False: 5}, + {"(1, 2)": 1, "12": 2, "1.2": 3, "None": 4, "False": 5}, + ), + ], +) +def test_log_dict_convert_to_json( + root_logger: Logger, dict_data: dict[Any, Any], expected_converted: Any +) -> None: + """A logger converts data to JSON before storing in a dict_log.""" + dict_log = root_logger.log_dict("test_dict", dict_data) + assert dict_log.data == expected_converted + + +@pytest.mark.parametrize( + "dict_data,convert,expected_converted", + [ + ( + {"p1": 123, "p2": 456}, + lambda obj: obj + 1 if isinstance(obj, int) else obj, + {"p1": 124, "p2": 457}, + ), + ( + {"p1": [(1, Obj()), 2]}, + lambda obj: "Obj()" if isinstance(obj, Obj) else obj, + {"p1": [[1, "Obj()"], 2]}, + ), + ], +) +def test_log_dict_convert_to_json_convert( + root_logger: Logger, + dict_data: dict[Any, Any], + convert: Callable[[Any], Any] | None, + expected_converted: Any, +) -> None: """ - A logger fails to save an object property log when an unsupported object is passed - (one without a ``__dict__`` property). + A logger can use a convert function to convert data to JSON before storing in a + dict_log. """ - with pytest.raises(TypeError) as exc_info: - logger.log_props("test_props", 123) - assert str(exc_info.value) == "'int' object is not supported by log_props" + dict_log = root_logger.log_dict("test_dict", dict_data, convert=convert) + assert dict_log.data == expected_converted -def test_log_props_basic(logger: Logger, timestamp: datetime) -> None: +if PYTHON_VERSION == "3.9": + + def test_python39_log_props_invalid_type_hints_fails(root_logger: Logger) -> None: + """ + A logger fails to log the properties of an object whose class type hints are + invalid for Python 3.9. + """ + + class LogPropsObj: + p1: LoggedProp[int | float] + + with pytest.raises(RuntimeError) as exc_info: + root_logger.log_props("test_props", LogPropsObj()) + assert ( + str(exc_info.value) + == "cannot log properties of 'LogPropsObj' object because its class type" + f" hints are invalid in Python {PYTHON_VERSION}" + ) + + +def test_log_props(logger: Logger, timestamp: datetime) -> None: """A logger can create and save an object property log for a basic object.""" - obj = Obj() - setattr(obj, "p1", 1) - setattr(obj, "p2", 2) + + class LogPropsObj: + p1: LoggedProp[int] + p2: bool + p3: LoggedProp[str] + + obj = LogPropsObj() + obj.p1 = 123 + obj.p2 = False + obj.p3 = "test" with freeze_time(timestamp): props_log = logger.log_props("test_props", obj) assert os.path.exists(props_log.path) @@ -202,41 +333,115 @@ def test_log_props_basic(logger: Logger, timestamp: datetime) -> None: assert log_metadata.description == "test_props" assert log_metadata.commit_id is None assert log_metadata.param_db_path is None - assert props_log.data == {"p1": 1, "p2": 2} + assert props_log.data == {"p1": 123, "p3": "test"} @pytest.mark.parametrize( - "props,props_log_data", + "annotations,props,expected_logged", + [ + ( + {"p1": LoggedProp[int], "p2": bool, "p3": LoggedProp[str]}, + {"p1": 123, "p2": False, "p3": "test"}, + {"p1": 123, "p3": "test"}, + ), + ( + {"p1": "LoggedProp[int]", "p2": "bool", "p3": "LoggedProp[str]"}, + {"p1": 123, "p2": False, "p3": "test"}, + {"p1": 123, "p3": "test"}, + ), + ( + { + "p1": LoggedProp[Union[int, str]], + "p2": Optional[LoggedProp], + "p3": LoggedProp[Optional[str]], + }, + {"p1": 123, "p2": False, "p3": None}, + {"p1": 123, "p3": None}, + ), + ], +) +def test_log_props_type_hints( + root_logger: Logger, + annotations: dict[str, Any], + props: dict[str, Any], + expected_logged: dict[str, Any], +) -> None: + """ + A logger only logs the properties of objects that are marked with a LoggedProp + annotation. + """ + + class LogPropsObj: + __annotations__ = annotations + + obj = LogPropsObj() + for k, v in props.items(): + setattr(obj, k, v) + + props_log = root_logger.log_props("test_props", obj) + assert props_log.data == expected_logged + + +@pytest.mark.parametrize( + "props,expected_converted", [ ({"p1": 123, "p2": 456}, {"p1": 123, "p2": 456}), - ({"str": "test"}, {"str": "test"}), - ({"int": 123}, {"int": 123}), - ({"float": 1.23}, {"float": 1.23}), - ({"bool": True}, {"bool": True}), - ({"list": [1, 2, 3]}, {"list": [1, 2, 3]}), - ({"tuple": (1, 2, 3)}, {"tuple": [1, 2, 3]}), - ({"dict": {"p1": 1, "p2": 2}}, {"dict": {"p1": 1, "p2": 2}}), - ({"nested": {"p1": [(1, 2), 3]}}, {"nested": {"p1": [[1, 2], 3]}}), + ({"p1": [(1, Obj()), 2]}, {"p1": [[1, ""], 2]}), + ], +) +def test_log_props_convert_to_json( + root_logger: Logger, props: dict[str, Any], expected_converted: Any +) -> None: + """A logger converts properties to JSON before logging them.""" + + class LogPropsObj: + __annotations__: dict[str, Any] + + obj = LogPropsObj() + for k, v in props.items(): + LogPropsObj.__annotations__[k] = LoggedProp + setattr(obj, k, v) + + props_log = root_logger.log_props("test_props", obj) + assert props_log.data == expected_converted + + +@pytest.mark.parametrize( + "props,convert,expected_converted", + [ ( - {"nonstring_keys": {(1, 2): 1, 12: 2, 1.2: 3, None: 4, False: 5}}, - {"nonstring_keys": {"(1, 2)": 1, "12": 2, "1.2": 3, "None": 4, "False": 5}}, + {"p1": 123, "p2": 456}, + lambda obj: obj + 1 if isinstance(obj, int) else obj, + {"p1": 124, "p2": 457}, + ), + ( + {"p1": [(1, Obj()), 2]}, + lambda obj: "Obj()" if isinstance(obj, Obj) else obj, + {"p1": [[1, "Obj()"], 2]}, ), - ({"obj": Obj()}, {"obj": ""}), ], ) -def test_log_props( - root_logger: Logger, props: dict[Any, Any], props_log_data: dict[str, Any] +def test_log_props_convert_to_json_convert( + root_logger: Logger, + props: dict[str, Any], + convert: Callable[[Any], Any] | None, + expected_converted: Any, ) -> None: """ - A logger can create and save object property logs for a range of object property - values. + A logger can use a convert function to convert properties to JSON before logging + them. """ - obj = Obj() + + class LogPropsObj: + __annotations__: dict[str, Any] + + obj = LogPropsObj() for k, v in props.items(): + LogPropsObj.__annotations__[k] = LoggedProp setattr(obj, k, v) - props_log = root_logger.log_props("test", obj) - assert os.path.exists(props_log.path) - assert props_log.data == props_log_data + + props_log = root_logger.log_props("test_props", obj, convert=convert) + assert props_log.data == expected_converted @pytest.mark.usefixtures("cd_tempdir")