Skip to content

Commit

Permalink
Merge pull request #35 from PainterQubits/#34-option-for-sublogger-to…
Browse files Browse the repository at this point in the history
…-not-add-timestamp

#34 Option for sub-Logger to not add timestamp
  • Loading branch information
alexhad6 authored Oct 4, 2023
2 parents fd80067 + c13e065 commit c21108a
Show file tree
Hide file tree
Showing 5 changed files with 150 additions and 57 deletions.
9 changes: 9 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,15 @@ project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## [Unreleased]

### Added

- Ability to create sub-`Logger`s with no timestamp using the `timestamp` option.

### Changed

- Sub-`Logger`s without timestamps create their directory as soon as `Logger.directory` is
first called, rather than when a log is created.

## [0.1.1] (Aug 30 2023)

### Added
Expand Down
67 changes: 45 additions & 22 deletions datalogger/_logger.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,8 @@ class Logger:
Otherwise, ``parent`` and ``description`` must be given, and this will be a
sub-:py:class:`Logger` object that corresponds to a subdirectory within its parent's
directory (and uses its parent's ParamDB, if given).
directory (and uses its parent's ParamDB, if given). See
:py:meth:`Logger.sub_logger` for an explanation of the ``timestamp`` option.
"""

@overload
Expand All @@ -47,16 +48,20 @@ def __init__(
...

@overload
def __init__(self, *, parent: Logger, description: str) -> None: # pragma: no cover
def __init__(
self, *, parent: Logger, description: str, timestamp: bool = True
) -> None: # pragma: no cover
...

# pylint: disable-next=too-many-arguments
def __init__(
self,
root_directory: str | None = None,
param_db: ParamDB[Any] | None = None,
*,
parent: Logger | None = None,
description: str | None = None,
timestamp: bool = True,
) -> None:
if root_directory is None:
if parent is None:
Expand All @@ -73,35 +78,61 @@ def __init__(
self._name = root_directory
self._parent = parent
self._description = description
self._timestamp = timestamp
self._param_db: ParamDB[Any] | None = (
parent._param_db if parent is not None else param_db
)
if root_directory is not None:
self._create_directory()
if root_directory is not None or not timestamp:
# Generate this logger's directory, if it is a root Logger or a sub-Logger
# with no timestamp.
self.directory # pylint: disable=pointless-statement

def sub_logger(self, description: str) -> Logger:
def sub_logger(self, description: str, timestamp: bool = True) -> Logger:
"""
Create a new sub-:py:class:`Logger` with the given description corresponding to
a subdirectory within the parent :py:class:`Logger`.
By default, ``timestamp`` is True, meaning that the directory name will include
a timestamp corresponding to when it was created. (Note that the directory will
be created when first needed so that the timestamp more accurately reflects when
its content was created.)
If ``timestamp`` is False, the directory name will not include a timestamp. If
there is an existing directory, it will be used. If not, a new directory will be
created immediately.
"""
return Logger(parent=self, description=description)
return Logger(parent=self, description=description, timestamp=timestamp)

@property
def directory(self) -> str:
"""Directory where this logger saves subdirectories or files."""
"""
Directory where this logger saves subdirectories or files.
If the directory does not yet exist (i.e. if this is a sub-:py:class:`Logger`
with a timestamp), it is created.
"""
if self._name is None:
# If self._name is None, both self._parent and self._description should have
# been defined in self.__init__().
assert self._parent is not None, "sub-Logger must have a parent"
assert self._description is not None, "sub-Logger must have a description"
self._name = get_filename(
self._parent.directory,
self._description,
timestamp=_now(),
self._name = (
get_filename(
self._parent.directory,
self._description,
timestamp=_now(),
)
if self._timestamp
else self._description
)
if self._parent is None:
return self._name
return os.path.join(self._parent.directory, self._name)
directory = (
self._name
if self._parent is None
else os.path.join(self._parent.directory, self._name)
)
if not os.path.exists(directory):
os.mkdir(directory)
return directory

def file_path(self, filename: str) -> str:
"""
Expand All @@ -113,13 +144,6 @@ def file_path(self, filename: str) -> str:
"""
return os.path.join(self.directory, filename)

def _create_directory(self) -> None:
"""Create the directory for this logger and its parents if they do not exist."""
if self._parent is not None:
self._parent._create_directory() # pylint: disable=protected-access
if not os.path.exists(self.directory):
os.mkdir(self.directory)

def _log(
self,
make_log: Callable[[LogMetadata], _LT],
Expand All @@ -138,7 +162,6 @@ def _log(
f" ParamDB '{self._param_db.path}' is empty"
)
commit_id = latest_commit.id
self._create_directory()
log = make_log(
LogMetadata(
directory=self.directory,
Expand Down
73 changes: 52 additions & 21 deletions docs/usage.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,21 +20,22 @@ os.chdir(tmp_dir.name)
## Background

DataLogger is Python package to log array and dictionary data from scientific
experiments. These logs are stored in files (NetCDF for array data and JSON for
experiments. These logs are stored in files (netCDF for array data and JSON for
dictionary data). The log files are organized within a nested directory structure and
tagged with metadata, such as timestamp or optionally a commit ID from a [ParamDB]
database.

The original purpose of DataLogger was to store logs from graph calibrations, where
directories correspond to nodes in a graph, so the examples are based on this
The original purpose of DataLogger was to store logs from graph calibration experiments,
where directories correspond to nodes in a graph, so the examples below are based on this
application. However, the core functionality is very general.

## Logger Setup

### Root Logger

To log data, we first have to create a root {py:class}`Logger` object, passing the name
of the root directory.
To log data, we first have to create a root {py:class}`Logger` object, passing the path
(either relative or absolute) to the root directory. This directory will be created if it
does not exist.

```{jupyter-execute}
from datalogger import Logger
Expand All @@ -57,22 +58,37 @@ imported to parts of the code that use it.

### Sub-Loggers

We can then create a sub-{py:class}`Logger` objects, which will correspond to a
particular calibration graph and node.
We can also create sub-{py:class}`Logger` objects, which will correspond to subdirectories
within the root directory. By default, a sub-{py:class}`Logger` creates a new directory
with a timestamp. However, using the `timestamp` argument, it is possible to create
sub-{py:class}`Logger`s that, just like root loggers, contain no timestamp and immediately
create their directory if it does not exist.

For example, here we create a sub-{py:class}`Logger` with no timestamp to contain all
calibration experiments, and then timestamped sub-{py:class}`Logger`s to run a particular
experiment graph containing one node.

```{jupyter-execute}
graph_logger = root_logger.sub_logger("calibration_graph")
calibration_logger = root_logger.sub_logger("calibrations", timestamp=False)
graph_logger = calibration_logger.sub_logger("calibration_graph")
node_logger = graph_logger.sub_logger("q1_spec_node")
```

These will correspond to subdirectories within the root directory.
We can see that the directory `calibrations` is created immediately, while the timestamped
directories are not created yet.

```{jupyter-execute}
:hide-code:
display_tree("data_logs")
```

```{important}
Unlike the root {py:class}`Logger`, which creates its directory right away if it does not
exist, sub-{py:class}`Logger`s wait to create their directories until the first log file
is created using them.
Sub-{py:class}`Logger`s with timestamps wait to create their directories until their
directory path is accessed, either explicitly via {py:attr}`Logger.directory` or
internally, e.g. to create a log file.
This is done so that timestamps in directory names can reflect when the first log within
This is done so that timestamps in directory names can reflect when the first file within
them was created (often when that part of the experiment is being run), not when the
{py:class}`Logger` object was created (often when the entire experiment is being set up).
```
Expand All @@ -83,7 +99,8 @@ them was created (often when that part of the experiment is being run), not when

The first type of log that can be created is a data log, which contains multidimensional
array data. This type of log stores data in an [`xarray.Dataset`], which contains data
variables, coordinates, and attributes. The log is saved to a NetCDF
variables, coordinates, and attributes. The log is saved to a [netCDF] file via
[`xarray.Dataset.to_netcdf()`].

```{seealso}
To learn more about Xarray data, see [Data Structures] in the Xarray user guide.
Expand All @@ -107,7 +124,7 @@ node_logger.log_data(
)
```

The directories for the graph and node have now been created, along with the NetCDF log
The directories for the graph and node have now been created, along with the netCDF log
file.

```{jupyter-execute}
Expand Down Expand Up @@ -179,7 +196,7 @@ display_tree("data_logs")

## Loading

Logs can be loaded passing a file path to {py:func}`load_log`. We can also use
Logs can be loaded by passing a file path to {py:func}`load_log`. We can also use
{py:meth}`Logger.file_path` to aid in creating the file paths to logs. (The full path can
also be passed in directly if known.)

Expand All @@ -191,6 +208,19 @@ q1_spec_frequency_log = load_log(node_logger.file_path("q1_spec_frequency.json")
q1_spec_node_props_log = load_log(node_logger.file_path("q1_spec_node_props.json"))
```

Alternatively, logs can be loaded using {py:class}`DataLog` for data logs or
{py:class}`DictLog` for dictionary logs. This is not necessary since {py:func}`load_log`
already infers the log type from the file extension, but is useful for static type
checking when the log type is known.

```{jupyter-execute}
from datalogger import DataLog, DictLog
q1_spec_signal_log = DataLog.load(node_logger.file_path("q1_spec_signal.nc"))
q1_spec_frequency_log = DictLog.load(node_logger.file_path("q1_spec_frequency.json"))
q1_spec_node_props_log = DictLog.load(node_logger.file_path("q1_spec_node_props.json"))
```

### Accessing Data

Logs are represented as objects ({py:class}`DataLog` or {py:class}`DictLog` depending on
Expand Down Expand Up @@ -218,17 +248,17 @@ Metadata is also loaded in and can be accessed using {py:attr}`DataLog.metadata`
q1_spec_signal_log.metadata
```

Any property can be accessed as a property of this object. For example, we can get the
timestamp using {py:attr}`LogMetadata.timestamp`.
Metadata properties can be accessed as properties of this object. For example, we can get
the timestamp using {py:attr}`LogMetadata.timestamp`.

```{jupyter-execute}
q1_spec_signal_log.metadata.timestamp
```

## ParamDB Integration

Optionally, a [`ParamDB`] can be passed to a {py:class}`Logger`, in which case it will be
used to automatically tag logs with the latest commit ID.
Optionally, a [`ParamDB`] can be passed to a root {py:class}`Logger`, in which case it
will be used to automatically tag logs with the latest commit ID.

```{jupyter-execute}
from paramdb import ParamDB
Expand Down Expand Up @@ -256,7 +286,8 @@ param_db.dispose() # Fixes PermissionError on Windows

[ParamDB]: https://paramdb.readthedocs.io/en/stable/
[`xarray.Dataset`]: https://docs.xarray.dev/en/stable/generated/xarray.Dataset.html
[NetCDF]: https://www.unidata.ucar.edu/software/netcdf/
[netCDF]: https://www.unidata.ucar.edu/software/netcdf/
[`xarray.Dataset.to_netcdf()`]: https://docs.xarray.dev/en/latest/generated/xarray.Dataset.to_netcdf.html
[Data Structures]: https://docs.xarray.dev/en/stable/user-guide/data-structures.html
[`JSONEncoder`]: https://docs.python.org/3/library/json.html#json.JSONEncoder
[`vars()`]: https://docs.python.org/3/library/functions.html#vars
Expand Down
2 changes: 1 addition & 1 deletion tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ def fixture_root_logger(cd_tempdir: None) -> Logger:
@pytest.fixture(name="sub_logger")
def fixture_sub_logger(root_logger: Logger) -> Logger:
"""Sub-logger object."""
return root_logger.sub_logger("sub_logger")
return root_logger.sub_logger("sub_logger", timestamp=False)


@pytest.fixture(name="sub_sub_logger")
Expand Down
56 changes: 43 additions & 13 deletions tests/test_logger.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,47 +57,77 @@ def test_sub_logger_no_description_fails() -> None:

@pytest.mark.usefixtures("cd_tempdir")
def test_root_logger_creates_directory() -> None:
"""A root Logger creates its directory when it is defined."""
"""
A root Logger creates its directory when it is defined, and another root logger will
reuse the same directory.
"""
assert not os.path.exists("dir")
Logger("dir")
root_logger_1 = Logger("dir")
assert os.path.exists("dir")
root_logger_2 = Logger("dir")
assert root_logger_1.directory == root_logger_2.directory


def test_sub_logger_creates_directory(logger: Logger, timestamp: datetime) -> None:
"""A sub-Logger creates its directory when a log is created."""
def test_sub_logger_no_timestamp_creates_directory(logger: Logger) -> None:
"""
A sub-Logger with no timestamp creates its directory when it is defined, and another
sub-Logger with no timestamp will reuse the same directory.
"""
sub_logger_dir = os.path.join(logger.directory, "sub_logger")
sub_logger_1 = logger.sub_logger("sub_logger", timestamp=False)
assert os.path.exists(sub_logger_dir)
sub_logger_2 = logger.sub_logger("sub_logger", timestamp=False)
assert sub_logger_1.directory == sub_logger_2.directory


def test_sub_logger_timestamp_creates_directory(
logger: Logger, timestamp: datetime
) -> None:
"""
A sub-Logger with a timestamp creates its directory when the directory is accessed,
and another sub-Logger with a timestamp will always create a new directory.
"""
parent_dir = logger.directory
sub_logger = logger.sub_logger("sub_logger")
sub_logger_dir = os.path.join(
parent_dir,
get_filename(parent_dir, "sub_logger", timestamp=timestamp.astimezone()),
)
sub_logger_1 = logger.sub_logger("sub_logger")
assert not os.path.exists(sub_logger_dir)
with freeze_time(timestamp):
sub_logger.log_dict("dict", {})
sub_logger_1.directory # pylint: disable=pointless-statement
assert os.path.exists(sub_logger_dir)
sub_logger_2 = logger.sub_logger("sub_logger")
with freeze_time(timestamp):
sub_logger_2.directory # pylint: disable=pointless-statement
assert sub_logger_1.directory != sub_logger_2.directory


def test_root_logger_directory(root_logger: Logger) -> None:
"""A root logger can return its directory."""
assert root_logger.directory == "dir"


def test_sub_logger_directory(logger: Logger, timestamp: datetime) -> None:
def test_sub_logger_no_timestamp_directory(logger: Logger) -> None:
"""A sub-Logger with no timestamp can return its directory."""
sub_logger = logger.sub_logger("sub_logger", timestamp=False)
assert sub_logger.directory == os.path.join(logger.directory, "sub_logger")


def test_sub_logger_timestamp_directory(logger: Logger, timestamp: datetime) -> None:
"""
A sub-Logger can return its directory and continues to use that same directory name
to generate future logs.
A sub-Logger with a timestamp can return its directory and continues to use that
same directory name.
"""
parent_dir = logger.directory
sub_logger = logger.sub_logger("sub_logger")
sub_logger_dir = os.path.join(
parent_dir,
get_filename(parent_dir, "sub_logger", timestamp=timestamp.astimezone()),
)
sub_logger = logger.sub_logger("sub_logger")
with freeze_time(timestamp):
assert sub_logger.directory == sub_logger_dir
assert not os.path.exists(sub_logger_dir)
sub_logger.log_dict("dict", {})
assert os.path.exists(sub_logger_dir)
assert sub_logger.directory == sub_logger_dir # Continues to use the same directory


def test_file_path(logger: Logger) -> None:
Expand Down

0 comments on commit c21108a

Please sign in to comment.