From 64867e21888a4d95146223d094a3bab914c58ba8 Mon Sep 17 00:00:00 2001 From: benoit74 Date: Wed, 17 Jan 2024 14:23:03 +0100 Subject: [PATCH] Publisher flag might be set automatically when not provided in configuration --- dispatcher/backend/src/common/constants.py | 3 + dispatcher/backend/src/tests/conftest.py | 12 +++- .../src/tests/unit/utils/test_offliners.py | 56 +++++++++++++++++++ dispatcher/backend/src/utils/offliners.py | 55 +++++++++++++----- 4 files changed, 112 insertions(+), 14 deletions(-) create mode 100644 dispatcher/backend/src/tests/unit/utils/test_offliners.py diff --git a/dispatcher/backend/src/common/constants.py b/dispatcher/backend/src/common/constants.py index 5f4ee187..918b6355 100644 --- a/dispatcher/backend/src/common/constants.py +++ b/dispatcher/backend/src/common/constants.py @@ -37,6 +37,9 @@ # empty ZIMCHECK_OPTION means no zimcheck ZIMCHECK_OPTION = os.getenv("ZIMCHECK_OPTION", "") +# Publisher value to "force" in all scrapers if not set in the recipe +DEFAULT_PUBLISHER = os.getenv("DEFAULT_PUBLISHER") + # NOTIFICATIONS # in-notification URLs diff --git a/dispatcher/backend/src/tests/conftest.py b/dispatcher/backend/src/tests/conftest.py index 230be5da..051f26e5 100644 --- a/dispatcher/backend/src/tests/conftest.py +++ b/dispatcher/backend/src/tests/conftest.py @@ -1,8 +1,9 @@ -from typing import Generator +from typing import Callable, Generator import pytest from sqlalchemy.orm import Session as OrmSession +from common import constants from db import Session @@ -10,3 +11,12 @@ def dbsession() -> Generator[OrmSession, None, None]: with Session.begin() as session: yield session + + +@pytest.fixture +def set_default_publisher() -> Generator[Callable, None, None]: + def _set_default_publisher(publisher: str): + constants.DEFAULT_PUBLISHER = publisher + + yield _set_default_publisher + constants.DEFAULT_PUBLISHER = None # Reset to default after test diff --git a/dispatcher/backend/src/tests/unit/utils/test_offliners.py b/dispatcher/backend/src/tests/unit/utils/test_offliners.py new file mode 100644 index 00000000..8708ab71 --- /dev/null +++ b/dispatcher/backend/src/tests/unit/utils/test_offliners.py @@ -0,0 +1,56 @@ +import pytest + +from common.enum import Offliner +from utils.offliners import command_for + + +@pytest.mark.parametrize( + "offliner, flags, default_publisher, expected_result", + [ + ( + Offliner.freecodecamp, + {}, + None, + ["fcc2zim", '--output="/"'], + ), # no default publisher + ( + Offliner.freecodecamp, + {}, + "openZIM", + ["fcc2zim", '--output="/"', '--publisher="openZIM"'], + ), # default publisher is "openZIM" + ( + Offliner.freecodecamp, + {}, + "Kiwix", + ["fcc2zim", '--output="/"', '--publisher="Kiwix"'], + ), # default publisher is "Kiwix" + ( + Offliner.freecodecamp, + {"publisher": "Kiwix"}, + "openZIM", + ["fcc2zim", '--output="/"', '--publisher="Kiwix"'], + ), # publisher is already set "manually" in the configuration + (Offliner.gutenberg, {}, None, ["gutenberg2zim"]), + ( + Offliner.gutenberg, + {}, + "openZIM", + ["gutenberg2zim"], + ), # offliner does not support the publisher flag + ], +) +def test_command_for( + offliner, flags, default_publisher, expected_result, set_default_publisher +): + set_default_publisher(default_publisher) + command = command_for(offliner=offliner, flags=flags, mount_point="/") + assert ( + command[0] == expected_result[0] + ) # first item is the executable, it must match + assert set(command[1:]) == set( + expected_result[1:] + ) # other flags order does not matter + assert len(command) == len( + expected_result + ) # but we must not have duplicate flags, so length must match diff --git a/dispatcher/backend/src/utils/offliners.py b/dispatcher/backend/src/utils/offliners.py index 20ad3984..abaa2c3c 100644 --- a/dispatcher/backend/src/utils/offliners.py +++ b/dispatcher/backend/src/utils/offliners.py @@ -8,22 +8,25 @@ # from common.constants import DISALLOW_CAPABILITIES from typing import List +from common import constants from common.enum import Offliner -od = collections.namedtuple("OfflinerDef", ["cmd", "std_output", "std_stats"]) +od = collections.namedtuple( + "OfflinerDef", ["cmd", "std_output", "std_stats", "publisher_flag"] +) OFFLINER_DEFS = { - Offliner.freecodecamp: od("fcc2zim", True, False), - Offliner.gutenberg: od("gutenberg2zim", False, False), - Offliner.sotoki: od("sotoki", True, True), - Offliner.wikihow: od("wikihow2zim", True, True), - Offliner.ifixit: od("ifixit2zim", True, True), - Offliner.mwoffliner: od("mwoffliner", "outputDirectory", False), - Offliner.youtube: od("youtube2zim-playlists", True, False), - Offliner.ted: od("ted2zim-multi", True, False), - Offliner.openedx: od("openedx2zim", True, False), - Offliner.nautilus: od("nautiluszim", True, False), - Offliner.zimit: od("zimit", True, "statsFilename"), - Offliner.kolibri: od("kolibri2zim", True, False), + Offliner.freecodecamp: od("fcc2zim", True, False, True), + Offliner.gutenberg: od("gutenberg2zim", False, False, False), + Offliner.sotoki: od("sotoki", True, True, True), + Offliner.wikihow: od("wikihow2zim", True, True, True), + Offliner.ifixit: od("ifixit2zim", True, True, True), + Offliner.mwoffliner: od("mwoffliner", "outputDirectory", False, True), + Offliner.youtube: od("youtube2zim-playlists", True, False, True), + Offliner.ted: od("ted2zim-multi", True, False, True), + Offliner.openedx: od("openedx2zim", True, False, True), + Offliner.nautilus: od("nautiluszim", True, False, True), + Offliner.zimit: od("zimit", True, "statsFilename", True), + Offliner.kolibri: od("kolibri2zim", True, False, True), } @@ -73,9 +76,35 @@ def command_for(offliner, flags, mount_point): if offliner == Offliner.zimit: if "adminEmail" not in flags: flags["adminEmail"] = "contact+zimfarm@kiwix.org" + + _command_for_set_default_publisher(flags, offliner_def) + return [cmd] + compute_flags(flags) +def _command_for_set_default_publisher(flags, offliner_def): + """Set a default publisher in the command + + The "publisher" flag is set if a default is provided in the local environment, if + the scraper supports it, and if it is not already set manually. + + The "publisher" flag might have a different name, configured in the offliner + definition. + """ + + flag_name = ( + offliner_def.publisher_flag + if isinstance(offliner_def.publisher_flag, str) + else "publisher" + ) + if ( + constants.DEFAULT_PUBLISHER + and offliner_def.publisher_flag + and flag_name not in flags + ): + flags[flag_name] = constants.DEFAULT_PUBLISHER + + def docker_config_for(offliner): # Note: in docker, --shm-size sets the size of /dev/shm # it is taken out of --memory (if set)