Skip to content

Commit

Permalink
Merge pull request #583 from mdekstrand/feature/quiet-default-logger
Browse files Browse the repository at this point in the history
Implement a proxy logger that suppresses debug & info messages by default
  • Loading branch information
mdekstrand authored Dec 31, 2024
2 parents 48eb645 + 80fb41b commit 0b55981
Show file tree
Hide file tree
Showing 16 changed files with 108 additions and 64 deletions.
67 changes: 28 additions & 39 deletions docs/guide/GettingStarted.ipynb

Large diffs are not rendered by default.

16 changes: 16 additions & 0 deletions docs/guide/logging.rst
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ LensKit provides support code for logging and progress reporting. This code
lives in the :py:mod:`lenskit.logging` package and provides several
capabilities:

- Logger acquisition functions with useful defaults.
- Backend-independent progress reporting, with colorful progress bars (via
Rich_) on terminals.
- Easy logging configuration for recommender scripts, supporting log files and
Expand Down Expand Up @@ -74,6 +75,21 @@ are run by other tools like DVC.
level; this allows you to send ``DEBUG`` messages to the file while only
``INFO`` messages go to the console.

Emitting Log Messages
~~~~~~~~~~~~~~~~~~~~~

When writing LensKit code that needs to emit log messages, use LensKit's
:func:`~lenskit.logging.get_logger` function. This wraps Structopt's
``get_logger`` in a proxy that has more useful LensKit defaults (only emitting
warnings and errors when logging has not been configured). The resulting logger
can be used like any other Structlog or standard library logger.

Structlog loggers are *lazy*, resolving their configurations when they are
*bound* with variables. When emitting many log messages in a loop or function,
we recommend calling :meth:`structlog.typing.BindableLogger.bind` to get a bound
logger with the configuration resolved, which will be much faster for repeated
fine-grained logging messages.

Progress Reporting
~~~~~~~~~~~~~~~~~~

Expand Down
5 changes: 3 additions & 2 deletions lenskit/lenskit/data/movielens.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,13 @@

import numpy as np
import pandas as pd
import structlog

from lenskit.logging import get_logger

from .convert import from_interactions_df
from .dataset import Dataset

_log = structlog.stdlib.get_logger(__name__)
_log = get_logger(__name__)

LOC: TypeAlias = Path | tuple[ZipFile, str]

Expand Down
5 changes: 2 additions & 3 deletions lenskit/lenskit/knn/item.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,22 +14,21 @@
import warnings

import numpy as np
import structlog
import torch
from scipy.sparse import csr_array
from typing_extensions import Optional, override

from lenskit import util
from lenskit.data import Dataset, FeedbackType, ItemList, QueryInput, RecQuery, Vocabulary
from lenskit.diagnostics import DataWarning
from lenskit.logging import trace
from lenskit.logging import get_logger, trace
from lenskit.logging.progress import item_progress_handle, pbh_update
from lenskit.math.sparse import normalize_sparse_rows, safe_spmv
from lenskit.parallel import ensure_parallel_init
from lenskit.pipeline import Component, Trainable
from lenskit.util.torch import inference_mode

_log = structlog.stdlib.get_logger(__name__)
_log = get_logger(__name__)
MAX_BLOCKS = 1024


Expand Down
3 changes: 2 additions & 1 deletion lenskit/lenskit/knn/user.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,11 +24,12 @@
from lenskit.data import Dataset, FeedbackType, ItemList, QueryInput, RecQuery
from lenskit.data.vocab import Vocabulary
from lenskit.diagnostics import DataWarning
from lenskit.logging import get_logger
from lenskit.math.sparse import normalize_sparse_rows, safe_spmv, torch_sparse_to_scipy
from lenskit.parallel.config import ensure_parallel_init
from lenskit.pipeline import Component, Trainable

_log = structlog.stdlib.get_logger(__name__)
_log = get_logger(__name__)


class UserKNNScorer(Component, Trainable):
Expand Down
5 changes: 4 additions & 1 deletion lenskit/lenskit/logging/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

import structlog

from ._proxy import get_logger
from .config import LoggingConfig, basic_logging
from .progress import Progress, item_progress, set_progress_impl
from .tasks import Task
Expand All @@ -22,7 +23,6 @@
"trace",
]

get_logger = structlog.stdlib.get_logger
_trace_debug = os.environ.get("LK_TRACE", "no").lower() == "debug"


Expand All @@ -32,6 +32,9 @@ def trace(logger: structlog.stdlib.BoundLogger, *args: Any, **kwargs: Any):
messages are more fine-grained than debug-level messages, and you usually
don't want them.
This function does not work on the lazy proxies returned by
:func:`get_logger` and similar — it only works on bound loggers.
Stability:
Caller
"""
Expand Down
32 changes: 32 additions & 0 deletions lenskit/lenskit/logging/_proxy.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
import logging
from typing import Any

import structlog
from structlog._config import BoundLoggerLazyProxy

_fallback_wrapper = structlog.make_filtering_bound_logger(logging.WARNING)


def get_logger(name: str) -> structlog.stdlib.BoundLogger:
"""
Get a logger. This works like :func:`structlog.stdlib.get_logger`, except
the returned proxy logger is quiet (only WARN and higher messages) if
structlog has not been configured. LensKit code should use this instead of
obtaining loggers from Structlog directly.
"""
return LenskitProxyLogger(None, logger_factory_args=[name]) # type: ignore


class LenskitProxyLogger(BoundLoggerLazyProxy):
"""
Lazy proxy logger for LensKit. This is based on Structlog's lazy proxy,
with using a filtering logger by default when structlog is not configured.
"""

def bind(self, **new_values: Any):
if structlog.is_configured():
self._wrapper_class = None
else:
self._wrapper_class = _fallback_wrapper

return super().bind(**new_values)
6 changes: 3 additions & 3 deletions lenskit/lenskit/logging/monitor.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,15 +21,15 @@
from typing import Protocol, runtime_checkable
from uuid import UUID, uuid4

import structlog
import zmq

from ._proxy import get_logger
from .tasks import Task

SIGNAL_ADDR = "inproc://lenskit-monitor-signal"
REFRESH_INTERVAL = 5

_log = structlog.stdlib.get_logger(__name__)
_log = get_logger(__name__)
_monitor_lock = threading.Lock()
_monitor_instance: Monitor | None = None

Expand Down Expand Up @@ -258,7 +258,7 @@ def _handle_log_message(self):
logger = logging.getLogger(name)
logger.handle(rec)
elif engine == "structlog":
logger = structlog.get_logger(name)
logger = get_logger(name)
data = json.loads(data)
method = getattr(logger, data["method"])
method(**data["event"])
Expand Down
3 changes: 2 additions & 1 deletion lenskit/lenskit/logging/progress/_rich.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,10 @@
from typing_extensions import override

from .._console import console, get_live
from .._proxy import get_logger
from ._base import Progress

_log = structlog.stdlib.get_logger("lenskit.logging.progress")
_log = get_logger("lenskit.logging.progress")
_pb_lock = Lock()
_progress: ProgressImpl | None = None
_active_bars: dict[UUID, RichProgress] = {}
Expand Down
5 changes: 3 additions & 2 deletions lenskit/lenskit/logging/resource.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,11 @@
from dataclasses import dataclass
from pathlib import Path

import structlog
import torch

_log = structlog.get_logger(__name__)
from ._proxy import get_logger

_log = get_logger(__name__)


@dataclass
Expand Down
4 changes: 2 additions & 2 deletions lenskit/lenskit/logging/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,12 @@
from typing import Annotated, Any
from uuid import UUID, uuid4

import structlog
from pydantic import BaseModel, BeforeValidator, Field, SerializeAsAny

from ._proxy import get_logger
from .resource import ResourceMeasurement, reset_linux_hwm

_log = structlog.stdlib.get_logger(__name__)
_log = get_logger(__name__)
_active_tasks: list[Task] = []


Expand Down
3 changes: 2 additions & 1 deletion lenskit/lenskit/logging/worker.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,14 +21,15 @@
import zmq
from structlog.typing import EventDict

from ._proxy import get_logger
from .config import CORE_PROCESSORS, active_logging_config, log_warning
from .monitor import get_monitor
from .processors import add_process_info
from .tasks import Task
from .tracing import lenskit_filtering_logger

_active_context: WorkerContext | None = None
_log = structlog.stdlib.get_logger(__name__)
_log = get_logger(__name__)


@dataclass
Expand Down
5 changes: 2 additions & 3 deletions lenskit/lenskit/parallel/pool.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,18 +11,17 @@
from multiprocessing.context import SpawnContext, SpawnProcess
from multiprocessing.managers import SharedMemoryManager

import structlog
from typing_extensions import Any, Generic, Iterable, Iterator, override

from lenskit.logging.tasks import Task
from lenskit.logging import Task, get_logger
from lenskit.logging.worker import WorkerContext, WorkerLogConfig

from . import worker
from .config import ParallelConfig, ensure_parallel_init, get_parallel_config, initialize
from .invoker import A, InvokeOp, M, ModelOpInvoker, R
from .serialize import shm_serialize

_log = structlog.stdlib.get_logger(__name__)
_log = get_logger(__name__)


def multiprocess_executor(
Expand Down
5 changes: 3 additions & 2 deletions lenskit/lenskit/parallel/worker.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,14 @@
from dataclasses import dataclass
from typing import Any

import structlog
from typing_extensions import Generic

from lenskit.logging import get_logger

from .invoker import A, InvokeOp, M, R
from .serialize import SHMData, shm_deserialize

_log = structlog.get_logger(__name__)
_log = get_logger(__name__)

__work_context: WorkerData

Expand Down
4 changes: 2 additions & 2 deletions lenskit/lenskit/pipeline/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,10 @@
from types import FunctionType, UnionType
from uuid import NAMESPACE_URL, uuid4, uuid5

import structlog
from typing_extensions import Any, Literal, Self, TypeAlias, TypeVar, cast, overload

from lenskit.data import Dataset
from lenskit.logging import get_logger

from . import config
from .components import ( # type: ignore # noqa: F401
Expand Down Expand Up @@ -51,7 +51,7 @@
"topn_pipeline",
]

_log = structlog.stdlib.get_logger(__name__)
_log = get_logger(__name__)

# common type var for quick use
T = TypeVar("T")
Expand Down
4 changes: 2 additions & 2 deletions lenskit/lenskit/testing/_movielens.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@

import numpy as np
import pandas as pd
import structlog
from pyprojroot import here

import pytest
Expand All @@ -17,10 +16,11 @@
from lenskit.data import Dataset, ItemListCollection, UserIDKey, from_interactions_df
from lenskit.data.lazy import LazyDataset
from lenskit.data.movielens import load_movielens, load_movielens_df
from lenskit.logging import get_logger
from lenskit.pipeline import RecPipelineBuilder
from lenskit.splitting import TTSplit, simple_test_pair

_log = structlog.stdlib.get_logger("lenskit.testing")
_log = get_logger("lenskit.testing")

ml_test_dir = here("data/ml-latest-small")
ml_100k_zip = here("data/ml-100k.zip")
Expand Down

0 comments on commit 0b55981

Please sign in to comment.