From f7e9e166b0dce9fd3dbb3f96dacdd83a627a5f5a Mon Sep 17 00:00:00 2001 From: benoit74 Date: Mon, 22 Jul 2024 09:18:01 +0000 Subject: [PATCH] Remove null characters in any string passed to the container / debug task --- dispatcher/backend/src/common/utils.py | 5 +++-- .../integration/routes/tasks/test_task.py | 22 +++++++++++++++++++ .../src/tests/unit/utils/test_check.py | 15 +++++++++++++ dispatcher/backend/src/utils/check.py | 7 ++++++ 4 files changed, 47 insertions(+), 2 deletions(-) create mode 100644 dispatcher/backend/src/tests/unit/utils/test_check.py diff --git a/dispatcher/backend/src/common/utils.py b/dispatcher/backend/src/common/utils.py index 567833fda..53efbd46b 100644 --- a/dispatcher/backend/src/common/utils.py +++ b/dispatcher/backend/src/common/utils.py @@ -16,6 +16,7 @@ from common.external import advertise_book_to_cms from common.notifications import handle_notification from errors.http import TaskNotFound, WorkerNotFound +from utils.check import cleanup_value from utils.scheduling import update_schedule_duration logger = logging.getLogger(__name__) @@ -87,13 +88,13 @@ def add_to_container_if_present( task: dbm.Task, kwargs_key: str, container_key: str ) -> None: if kwargs_key in kwargs: - task.container[container_key] = kwargs[kwargs_key] + task.container[container_key] = cleanup_value(kwargs[kwargs_key]) def add_to_debug_if_present( task: dbm.Task, kwargs_key: str, debug_key: str ) -> None: if kwargs_key in kwargs: - task.debug[debug_key] = kwargs[kwargs_key] + task.debug[debug_key] = cleanup_value(kwargs[kwargs_key]) if "worker" in kwargs: task.worker = dbm.Worker.get(session, kwargs["worker"], WorkerNotFound) diff --git a/dispatcher/backend/src/tests/integration/routes/tasks/test_task.py b/dispatcher/backend/src/tests/integration/routes/tasks/test_task.py index d5406e93f..08ee37d3c 100644 --- a/dispatcher/backend/src/tests/integration/routes/tasks/test_task.py +++ b/dispatcher/backend/src/tests/integration/routes/tasks/test_task.py @@ -184,3 +184,25 @@ def test_cancel_task(self, client, access_token, tasks): } response = client.post(url, headers=headers) assert response.status_code == 204 + + +class TestTaskPatch: + + def test_patch_task(self, client, access_token, tasks): + for task in filter(lambda x: x["status"] in [TaskStatus.started], tasks): + url = "/tasks/{}".format(task["_id"]) + headers = { + "Authorization": access_token, + "Content-Type": "application/json", + } + response = client.patch( + url, + headers=headers, + json={ + "event": "scraper_running", + "payload": { # control character below must be ignored + "stdout": "some string with ignore bad \u0000character" + }, + }, + ) + assert response.status_code == 204 diff --git a/dispatcher/backend/src/tests/unit/utils/test_check.py b/dispatcher/backend/src/tests/unit/utils/test_check.py new file mode 100644 index 000000000..61d89bc23 --- /dev/null +++ b/dispatcher/backend/src/tests/unit/utils/test_check.py @@ -0,0 +1,15 @@ +import pytest + +from utils.check import cleanup_value + + +@pytest.mark.parametrize( + "value, expected", + [ + pytest.param("ok", "ok", id="str_simple_ok"), + pytest.param("o\u0000k", "ok", id="str_null_character"), + pytest.param(123, 123, id="int_simple"), + ], +) +def test_cleanup_value(value: str, expected: str): + assert cleanup_value(value) == expected diff --git a/dispatcher/backend/src/utils/check.py b/dispatcher/backend/src/utils/check.py index 90f686e48..acc36974e 100644 --- a/dispatcher/backend/src/utils/check.py +++ b/dispatcher/backend/src/utils/check.py @@ -29,3 +29,10 @@ def raise_if( """ if condition: raise exception_class(*exception_args) + + +def cleanup_value(value: Any) -> Any: + """Remove unwanted characters before inserting / updating in DB""" + if isinstance(value, str): + return value.replace("\u0000", "") + return value