Skip to content

Commit

Permalink
Merge pull request #901 from openzim/harmonize_publisher
Browse files Browse the repository at this point in the history
  • Loading branch information
benoit74 authored Jan 17, 2024
2 parents 90a5f14 + 64867e2 commit 7cc2df1
Show file tree
Hide file tree
Showing 13 changed files with 152 additions and 18 deletions.
3 changes: 3 additions & 0 deletions dispatcher/backend/src/common/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,9 @@
# empty ZIMCHECK_OPTION means no zimcheck
ZIMCHECK_OPTION = os.getenv("ZIMCHECK_OPTION", "")

# Publisher value to "force" in all scrapers if not set in the recipe
DEFAULT_PUBLISHER = os.getenv("DEFAULT_PUBLISHER")

# NOTIFICATIONS

# in-notification URLs
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ class Meta:
publisher = String(
metadata={
"label": "Publisher",
"description": "Custom publisher name (ZIM metadata). “OpenZIM” otherwise",
"description": "Custom publisher name (ZIM metadata). “openZIM” otherwise",
}
)

Expand Down
2 changes: 1 addition & 1 deletion dispatcher/backend/src/common/schemas/offliners/kolibri.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ class Meta:
publisher = String(
metadata={
"label": "Publisher",
"description": "Custom publisher name (ZIM metadata). “OpenZIM” otherwise",
"description": "Custom publisher name (ZIM metadata). “openZIM” otherwise",
}
)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ class Meta:
publisher = String(
metadata={
"label": "Publisher",
"description": "ZIM publisher metadata. `Kiwix` otherwise.",
"description": "ZIM publisher metadata. `openZIM` otherwise.",
}
)
filenamePrefix = String(
Expand Down
8 changes: 8 additions & 0 deletions dispatcher/backend/src/common/schemas/offliners/nautilus.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,14 @@ class Meta:
"description": "Name of content creator.",
}
)

publisher = String(
metadata={
"label": "Publisher",
"description": "Custom publisher name (ZIM metadata). “openZIM” otherwise",
}
)

tags = String(
metadata={
"label": "ZIM Tags",
Expand Down
7 changes: 7 additions & 0 deletions dispatcher/backend/src/common/schemas/offliners/openedx.py
Original file line number Diff line number Diff line change
Expand Up @@ -193,6 +193,13 @@ class Meta:
data_key="creator",
)

publisher = String(
metadata={
"label": "Publisher",
"description": "Custom publisher name (ZIM metadata). “openZIM” otherwise",
}
)

tags = String(
metadata={
"label": "ZIM Tags",
Expand Down
2 changes: 1 addition & 1 deletion dispatcher/backend/src/common/schemas/offliners/sotoki.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ class Meta:
publisher = String(
metadata={
"label": "Publisher",
"description": "Custom publisher name (ZIM metadata). “OpenZIM” otherwise",
"description": "Custom publisher name (ZIM metadata). “openZIM” otherwise",
},
)

Expand Down
7 changes: 7 additions & 0 deletions dispatcher/backend/src/common/schemas/offliners/ted.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,13 @@ class Meta:
}
)

publisher = String(
metadata={
"label": "Publisher",
"description": "Custom publisher name (ZIM metadata). “openZIM” otherwise",
}
)

tags = String(
metadata={
"label": "ZIM Tags",
Expand Down
7 changes: 7 additions & 0 deletions dispatcher/backend/src/common/schemas/offliners/youtube.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,13 @@ class Meta:
}
)

publisher = String(
metadata={
"label": "Publisher",
"description": "Custom publisher name (ZIM metadata). “openZIM” otherwise",
}
)

tags = String(
metadata={
"label": "ZIM Tags",
Expand Down
7 changes: 7 additions & 0 deletions dispatcher/backend/src/common/schemas/offliners/zimit.py
Original file line number Diff line number Diff line change
Expand Up @@ -207,6 +207,13 @@ class Meta:
}
)

publisher = String(
metadata={
"label": "Publisher",
"description": "Custom publisher name (ZIM metadata). “openZIM” otherwise",
}
)

source = String(
metadata={
"label": "Content Source",
Expand Down
12 changes: 11 additions & 1 deletion dispatcher/backend/src/tests/conftest.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,22 @@
from typing import Generator
from typing import Callable, Generator

import pytest
from sqlalchemy.orm import Session as OrmSession

from common import constants
from db import Session


@pytest.fixture
def dbsession() -> Generator[OrmSession, None, None]:
with Session.begin() as session:
yield session


@pytest.fixture
def set_default_publisher() -> Generator[Callable, None, None]:
def _set_default_publisher(publisher: str):
constants.DEFAULT_PUBLISHER = publisher

yield _set_default_publisher
constants.DEFAULT_PUBLISHER = None # Reset to default after test
56 changes: 56 additions & 0 deletions dispatcher/backend/src/tests/unit/utils/test_offliners.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
import pytest

from common.enum import Offliner
from utils.offliners import command_for


@pytest.mark.parametrize(
"offliner, flags, default_publisher, expected_result",
[
(
Offliner.freecodecamp,
{},
None,
["fcc2zim", '--output="/"'],
), # no default publisher
(
Offliner.freecodecamp,
{},
"openZIM",
["fcc2zim", '--output="/"', '--publisher="openZIM"'],
), # default publisher is "openZIM"
(
Offliner.freecodecamp,
{},
"Kiwix",
["fcc2zim", '--output="/"', '--publisher="Kiwix"'],
), # default publisher is "Kiwix"
(
Offliner.freecodecamp,
{"publisher": "Kiwix"},
"openZIM",
["fcc2zim", '--output="/"', '--publisher="Kiwix"'],
), # publisher is already set "manually" in the configuration
(Offliner.gutenberg, {}, None, ["gutenberg2zim"]),
(
Offliner.gutenberg,
{},
"openZIM",
["gutenberg2zim"],
), # offliner does not support the publisher flag
],
)
def test_command_for(
offliner, flags, default_publisher, expected_result, set_default_publisher
):
set_default_publisher(default_publisher)
command = command_for(offliner=offliner, flags=flags, mount_point="/")
assert (
command[0] == expected_result[0]
) # first item is the executable, it must match
assert set(command[1:]) == set(
expected_result[1:]
) # other flags order does not matter
assert len(command) == len(
expected_result
) # but we must not have duplicate flags, so length must match
55 changes: 42 additions & 13 deletions dispatcher/backend/src/utils/offliners.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,22 +8,25 @@
# from common.constants import DISALLOW_CAPABILITIES
from typing import List

from common import constants
from common.enum import Offliner

od = collections.namedtuple("OfflinerDef", ["cmd", "std_output", "std_stats"])
od = collections.namedtuple(
"OfflinerDef", ["cmd", "std_output", "std_stats", "publisher_flag"]
)
OFFLINER_DEFS = {
Offliner.freecodecamp: od("fcc2zim", True, False),
Offliner.gutenberg: od("gutenberg2zim", False, False),
Offliner.sotoki: od("sotoki", True, True),
Offliner.wikihow: od("wikihow2zim", True, True),
Offliner.ifixit: od("ifixit2zim", True, True),
Offliner.mwoffliner: od("mwoffliner", "outputDirectory", False),
Offliner.youtube: od("youtube2zim-playlists", True, False),
Offliner.ted: od("ted2zim-multi", True, False),
Offliner.openedx: od("openedx2zim", True, False),
Offliner.nautilus: od("nautiluszim", True, False),
Offliner.zimit: od("zimit", True, "statsFilename"),
Offliner.kolibri: od("kolibri2zim", True, False),
Offliner.freecodecamp: od("fcc2zim", True, False, True),
Offliner.gutenberg: od("gutenberg2zim", False, False, False),
Offliner.sotoki: od("sotoki", True, True, True),
Offliner.wikihow: od("wikihow2zim", True, True, True),
Offliner.ifixit: od("ifixit2zim", True, True, True),
Offliner.mwoffliner: od("mwoffliner", "outputDirectory", False, True),
Offliner.youtube: od("youtube2zim-playlists", True, False, True),
Offliner.ted: od("ted2zim-multi", True, False, True),
Offliner.openedx: od("openedx2zim", True, False, True),
Offliner.nautilus: od("nautiluszim", True, False, True),
Offliner.zimit: od("zimit", True, "statsFilename", True),
Offliner.kolibri: od("kolibri2zim", True, False, True),
}


Expand Down Expand Up @@ -73,9 +76,35 @@ def command_for(offliner, flags, mount_point):
if offliner == Offliner.zimit:
if "adminEmail" not in flags:
flags["adminEmail"] = "[email protected]"

_command_for_set_default_publisher(flags, offliner_def)

return [cmd] + compute_flags(flags)


def _command_for_set_default_publisher(flags, offliner_def):
"""Set a default publisher in the command
The "publisher" flag is set if a default is provided in the local environment, if
the scraper supports it, and if it is not already set manually.
The "publisher" flag might have a different name, configured in the offliner
definition.
"""

flag_name = (
offliner_def.publisher_flag
if isinstance(offliner_def.publisher_flag, str)
else "publisher"
)
if (
constants.DEFAULT_PUBLISHER
and offliner_def.publisher_flag
and flag_name not in flags
):
flags[flag_name] = constants.DEFAULT_PUBLISHER


def docker_config_for(offliner):
# Note: in docker, --shm-size sets the size of /dev/shm
# it is taken out of --memory (if set)
Expand Down

0 comments on commit 7cc2df1

Please sign in to comment.