From 1160ae8e9d59b53ef213c546f9702dd7d7bc0a7b Mon Sep 17 00:00:00 2001 From: Nicholas Hairs Date: Sat, 18 May 2024 16:26:49 +1000 Subject: [PATCH] Support missing rename fields and preserve order (#13) Fixes #6 Fixes #7 Changes also prevent double renaming of fields. ## Test plan Run tests --- CHANGELOG.md | 3 +- pyproject.toml | 2 +- src/pythonjsonlogger/core.py | 54 ++++++++++++--------- tests/test_formatters.py | 91 +++++++++++++++++++++++++++++++----- 4 files changed, 116 insertions(+), 34 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5e1ea14..6103af2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,7 +4,7 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## [3.1.0.rc2](https://github.com/nhairs/python-json-logger/compare/v3.0.1...v3.1.0.rc2) - 2023-05-03 +## [3.1.0.rc3](https://github.com/nhairs/python-json-logger/compare/v3.0.1...v3.1.0.rc3) - 2023-05-03 This splits common funcitonality out to allow supporting other JSON encoders. Although this is a large refactor, backwards compatibility has been maintained. @@ -40,6 +40,7 @@ This splits common funcitonality out to allow supporting other JSON encoders. Al - Dataclasses are now supported - Enum values now use their value, Enum classes now return all values as a list. - Will fallback on `__str__` if available, else `__repr__` if available, else will use `__could_not_encode__` +- Renaming fields now preserves order (#7) and ignores missing fields (#6). ### Deprecated - `.jsonlogger` is now `.json` diff --git a/pyproject.toml b/pyproject.toml index 8264289..7ce8173 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "python-json-logger" -version = "3.1.0.rc2" +version = "3.1.0.rc3" description = "JSON Log Formatter for the Python Logging Package" authors = [ {name = "Zakaria Zajac", email = "zak@madzak.com"}, diff --git a/src/pythonjsonlogger/core.py b/src/pythonjsonlogger/core.py index b38c27f..cb5fec4 100644 --- a/src/pythonjsonlogger/core.py +++ b/src/pythonjsonlogger/core.py @@ -138,6 +138,7 @@ def __init__( *, prefix: str = "", rename_fields: Optional[Dict[str, str]] = None, + rename_fields_keep_missing: bool = False, static_fields: Optional[Dict[str, Any]] = None, reserved_attrs: Optional[Sequence[str]] = None, timestamp: Union[bool, str] = False, @@ -154,7 +155,8 @@ def __init__( prefix: an optional string prefix added at the beginning of the formatted string rename_fields: an optional dict, used to rename field names in the output. - Rename message to @message: {'message': '@message'} + Rename `message` to `@message`: `{'message': '@message'}` + rename_fields_keep_missing: When renaming fields, include missing fields in the output. static_fields: an optional dict, used to add fields with static values to all logs reserved_attrs: an optional list of fields that will be skipped when outputting json log record. Defaults to all log record attributes: @@ -164,14 +166,18 @@ def __init__( to log record using string as key. If True boolean is passed, timestamp key will be "timestamp". Defaults to False/off. - *Changed in 3.1*: you can now use custom values for style by setting validate to `False`. - The value is stored in `self._style` as a string. The `parse` method will need to be - overridden in order to support the new style. + *Changed in 3.1*: + + - you can now use custom values for style by setting validate to `False`. + The value is stored in `self._style` as a string. The `parse` method will need to be + overridden in order to support the new style. + - Renaming fields now preserves the order that fields were added in and avoids adding + missing fields. The original behaviour, missing fields have a value of `None`, is still + available by setting `rename_fields_keep_missing` to `True`. """ ## logging.Formatter compatibility ## --------------------------------------------------------------------- - # Note: validate added in 3.8 - # Note: defaults added in 3.10 + # Note: validate added in 3.8, defaults added in 3.10 if style in logging._STYLES: _style = logging._STYLES[style][0](fmt) # type: ignore[operator] if validate: @@ -192,6 +198,7 @@ def __init__( ## --------------------------------------------------------------------- self.prefix = prefix self.rename_fields = rename_fields if rename_fields is not None else {} + self.rename_fields_keep_missing = rename_fields_keep_missing self.static_fields = static_fields if static_fields is not None else {} self.reserved_attrs = set(reserved_attrs if reserved_attrs is not None else RESERVED_ATTRS) self.timestamp = timestamp @@ -215,6 +222,7 @@ def format(self, record: logging.LogRecord) -> str: record.message = "" else: record.message = record.getMessage() + # only format time if needed if "asctime" in self._required_fields: record.asctime = self.formatTime(record, self.datefmt) @@ -225,6 +233,7 @@ def format(self, record: logging.LogRecord) -> str: message_dict["exc_info"] = self.formatException(record.exc_info) if not message_dict.get("exc_info") and record.exc_text: message_dict["exc_info"] = record.exc_text + # Display formatted record of stack frames # default format is a string returned from :func:`traceback.print_stack` if record.stack_info and not message_dict.get("stack_info"): @@ -289,13 +298,16 @@ def add_fields( Args: log_record: data that will be logged record: the record to extract data from - message_dict: ??? + message_dict: dictionary that was logged instead of a message. e.g + `logger.info({"is_this_message_dict": True})` """ for field in self._required_fields: - log_record[field] = record.__dict__.get(field) + log_record[self._get_rename(field)] = record.__dict__.get(field) + + for data_dict in [self.static_fields, message_dict]: + for key, value in data_dict.items(): + log_record[self._get_rename(key)] = value - log_record.update(self.static_fields) - log_record.update(message_dict) merge_record_extra( record, log_record, @@ -304,19 +316,19 @@ def add_fields( ) if self.timestamp: - # TODO: Can this use isinstance instead? - # pylint: disable=unidiomatic-typecheck - key = self.timestamp if type(self.timestamp) == str else "timestamp" - log_record[key] = datetime.fromtimestamp(record.created, tz=timezone.utc) - - self._perform_rename_log_fields(log_record) + key = self.timestamp if isinstance(self.timestamp, str) else "timestamp" + log_record[self._get_rename(key)] = datetime.fromtimestamp( + record.created, tz=timezone.utc + ) + + if self.rename_fields_keep_missing: + for field in self.rename_fields.values(): + if field not in log_record: + log_record[field] = None return - def _perform_rename_log_fields(self, log_record: Dict[str, Any]) -> None: - for old_field_name, new_field_name in self.rename_fields.items(): - log_record[new_field_name] = log_record[old_field_name] - del log_record[old_field_name] - return + def _get_rename(self, key: str) -> str: + return self.rename_fields.get(key, key) # Child Methods # .......................................................................... diff --git a/tests/test_formatters.py b/tests/test_formatters.py index 1a6702c..cbaf886 100644 --- a/tests/test_formatters.py +++ b/tests/test_formatters.py @@ -187,15 +187,64 @@ def test_rename_base_field(env: LoggingEnvironment, class_: type[BaseJsonFormatt @pytest.mark.parametrize("class_", ALL_FORMATTERS) -def test_rename_nonexistent_field(env: LoggingEnvironment, class_: type[BaseJsonFormatter]): - env.set_formatter(class_(rename_fields={"nonexistent_key": "new_name"})) +def test_rename_missing(env: LoggingEnvironment, class_: type[BaseJsonFormatter]): + env.set_formatter(class_(rename_fields={"missing_field": "new_field"})) - stderr_watcher = io.StringIO() - sys.stderr = stderr_watcher - env.logger.info("testing logging rename") - sys.stderr == sys.__stderr__ + msg = "test rename missing field" + env.logger.info(msg) + log_json = env.load_json() + + assert log_json["message"] == msg + assert "missing_field" not in log_json + assert "new_field" not in log_json + return + + +@pytest.mark.parametrize("class_", ALL_FORMATTERS) +def test_rename_keep_missing(env: LoggingEnvironment, class_: type[BaseJsonFormatter]): + env.set_formatter( + class_(rename_fields={"missing_field": "new_field"}, rename_fields_keep_missing=True) + ) + + msg = "test keep rename missing field" + env.logger.info(msg) + log_json = env.load_json() + + assert log_json["message"] == msg + assert "missing_field" not in log_json + assert log_json["new_field"] is None + return + + +@pytest.mark.parametrize("class_", ALL_FORMATTERS) +def test_rename_preserve_order(env: LoggingEnvironment, class_: type[BaseJsonFormatter]): + env.set_formatter( + class_("{levelname}{message}{asctime}", style="{", rename_fields={"levelname": "LEVEL"}) + ) + + env.logger.info("testing logging rename order") + log_json = env.load_json() + + assert list(log_json.keys())[0] == "LEVEL" + return + + +@pytest.mark.parametrize("class_", ALL_FORMATTERS) +def test_rename_once(env: LoggingEnvironment, class_: type[BaseJsonFormatter]): + env.set_formatter( + class_( + "{levelname}{message}{asctime}", + style="{", + rename_fields={"levelname": "LEVEL", "message": "levelname"}, + ) + ) + + msg = "something" + env.logger.info(msg) + log_json = env.load_json() - assert "KeyError: 'nonexistent_key'" in stderr_watcher.getvalue() + assert log_json["LEVEL"] == "INFO" + assert log_json["levelname"] == msg return @@ -327,6 +376,30 @@ def test_exc_info_renamed(env: LoggingEnvironment, class_: type[BaseJsonFormatte return +@pytest.mark.parametrize("class_", ALL_FORMATTERS) +def test_exc_info_renamed_not_required(env: LoggingEnvironment, class_: type[BaseJsonFormatter]): + env.set_formatter(class_(rename_fields={"exc_info": "stack_trace"})) + + expected_value = get_traceback_from_exception_followed_by_log_call(env) + log_json = env.load_json() + + assert log_json["stack_trace"] == expected_value + assert "exc_info" not in log_json + return + + +@pytest.mark.parametrize("class_", ALL_FORMATTERS) +def test_exc_info_renamed_no_error(env: LoggingEnvironment, class_: type[BaseJsonFormatter]): + env.set_formatter(class_(rename_fields={"exc_info": "stack_trace"})) + + env.logger.info("message") + log_json = env.load_json() + + assert "stack_trace" not in log_json + assert "exc_info" not in log_json + return + + @pytest.mark.parametrize("class_", ALL_FORMATTERS) def test_custom_object_serialization(env: LoggingEnvironment, class_: type[BaseJsonFormatter]): def encode_complex(z): @@ -368,10 +441,6 @@ def test_rename_reserved_attrs(env: LoggingEnvironment, class_: type[BaseJsonFor env.logger.info("message") log_json = env.load_json() - # Note: this check is fragile if we make the following changes in the future (we might): - # - renaming fields no longer requires the field to be present (#6) - # - we add the ability (and data above) to rename a field to an existing field name - # e.g. {"exc_info": "trace_original", "@custom_trace": "exc_info"} for old_name, new_name in reserved_attrs_map.items(): assert new_name in log_json assert old_name not in log_json