From 63a5880a6895c23dae737829d698f03fe44e82fa Mon Sep 17 00:00:00 2001 From: benoit74 Date: Tue, 29 Oct 2024 14:11:12 +0000 Subject: [PATCH] Add mindtouch offliner for libretexts.org --- dispatcher/backend/src/common/enum.py | 9 + .../backend/src/common/schemas/models.py | 2 + .../src/common/schemas/offliners/__init__.py | 2 + .../src/common/schemas/offliners/mindtouch.py | 178 ++++++++++++++++++ dispatcher/backend/src/utils/offliners.py | 1 + 5 files changed, 192 insertions(+) create mode 100644 dispatcher/backend/src/common/schemas/offliners/mindtouch.py diff --git a/dispatcher/backend/src/common/enum.py b/dispatcher/backend/src/common/enum.py index c447dc6a..4213646a 100644 --- a/dispatcher/backend/src/common/enum.py +++ b/dispatcher/backend/src/common/enum.py @@ -78,6 +78,7 @@ class WarehousePath: hidden_custom_apps = "/.hidden/custom_apps" videos = "/videos" zimit = "/zimit" + libretexts = "/libretexts" @classmethod def all(cls): @@ -116,6 +117,7 @@ class ScheduleCategory: ifixit = "ifixit" freecodecamp = "freecodecamp" devdocs = "devdocs" + mindtouch = "mindtouch" @classmethod def all(cls): @@ -141,6 +143,7 @@ def all(cls): cls.ifixit, cls.freecodecamp, cls.devdocs, + cls.mindtouch, ] @classmethod @@ -173,6 +176,7 @@ class DockerImageName: ifixit = "openzim/ifixit" freecodecamp = "openzim/freecodecamp" devdocs = "openzim/devdocs" + mindtouch = "openzim/mindtouch" @classmethod def all(cls) -> set: @@ -209,6 +213,7 @@ class Offliner: ifixit = "ifixit" freecodecamp = "freecodecamp" devdocs = "devdocs" + mindtouch = "mindtouch" @classmethod def all(cls): @@ -227,6 +232,7 @@ def all(cls): cls.ifixit, cls.freecodecamp, cls.devdocs, + cls.mindtouch, ] @classmethod @@ -252,6 +258,7 @@ def get_image_name(cls, offliner): cls.ifixit: DockerImageName.ifixit, cls.freecodecamp: DockerImageName.freecodecamp, cls.devdocs: DockerImageName.devdocs, + cls.mindtouch: DockerImageName.mindtouch, }.get(offliner, "-") @@ -275,6 +282,7 @@ class Platform: ted = "ted" devdocs = "devdocs" shamela = "shamela" + libretexts = "libretexts" @classmethod def all(cls) -> str: @@ -286,6 +294,7 @@ def all(cls) -> str: cls.ted, cls.devdocs, cls.shamela, + cls.libretexts, ] @classmethod diff --git a/dispatcher/backend/src/common/schemas/models.py b/dispatcher/backend/src/common/schemas/models.py index 8ff3eab8..e69bb49d 100644 --- a/dispatcher/backend/src/common/schemas/models.py +++ b/dispatcher/backend/src/common/schemas/models.py @@ -26,6 +26,7 @@ GutenbergFlagsSchema, IFixitFlagsSchema, KolibriFlagsSchema, + MindtouchFlagsSchema, MWOfflinerFlagsSchema, NautilusFlagsSchema, NautilusFlagsSchemaRelaxed, @@ -103,6 +104,7 @@ def get_offliner_schema(offliner): Offliner.ifixit: IFixitFlagsSchema, Offliner.freecodecamp: FreeCodeCampFlagsSchema, Offliner.devdocs: DevDocsFlagsSchema, + Offliner.mindtouch: MindtouchFlagsSchema, }.get(offliner, Schema) @validates_schema diff --git a/dispatcher/backend/src/common/schemas/offliners/__init__.py b/dispatcher/backend/src/common/schemas/offliners/__init__.py index 07dab227..33225f7c 100644 --- a/dispatcher/backend/src/common/schemas/offliners/__init__.py +++ b/dispatcher/backend/src/common/schemas/offliners/__init__.py @@ -4,6 +4,7 @@ from common.schemas.offliners.gutenberg import GutenbergFlagsSchema from common.schemas.offliners.ifixit import IFixitFlagsSchema from common.schemas.offliners.kolibri import KolibriFlagsSchema +from common.schemas.offliners.mindtouch import MindtouchFlagsSchema from common.schemas.offliners.mwoffliner import MWOfflinerFlagsSchema from common.schemas.offliners.nautilus import ( NautilusFlagsSchema, @@ -22,6 +23,7 @@ "GutenbergFlagsSchema", "IFixitFlagsSchema", "KolibriFlagsSchema", + "MindtouchFlagsSchema", "MWOfflinerFlagsSchema", "NautilusFlagsSchema", "NautilusFlagsSchemaRelaxed", diff --git a/dispatcher/backend/src/common/schemas/offliners/mindtouch.py b/dispatcher/backend/src/common/schemas/offliners/mindtouch.py new file mode 100644 index 00000000..ea44f742 --- /dev/null +++ b/dispatcher/backend/src/common/schemas/offliners/mindtouch.py @@ -0,0 +1,178 @@ +from marshmallow import fields, validate + +from common.schemas import SerializableSchema, String +from common.schemas.fields import ( + validate_output, + validate_zim_description, + validate_zim_longdescription, + validate_zim_title, +) + + +class MindtouchFlagsSchema(SerializableSchema): + class Meta: + ordered = True + + library_url = String( + metadata={ + "label": "Library URL", + "description": "URL of the Mindtouch / Nice CXone Expert instance (must NOT" + " contain trailing slash), e.g. for LibreTexts Geosciences it is " + "https://geo.libretexts.org", + }, + data_key="library-url", + required=True, + ) + + creator = String( + metadata={ + "label": "Creator", + "description": "Name of content creator", + }, + required=True, + ) + + publisher = String( + metadata={ + "label": "Publisher", + "description": "Custom publisher name (ZIM metadata). “openZIM” otherwise", + }, + ) + + file_name = String( + metadata={ + "label": "ZIM filename", + "description": "ZIM filename. Do not input trailing `.zim`, it " + "will be automatically added. Defaults to {name}_{period}", + }, + data_key="file-name", + ) + + name = String( + metadata={ + "label": "ZIM name", + "description": "Name of the ZIM.", + }, + required=True, + ) + + title = String( + metadata={ + "label": "ZIM title", + "description": "Title of the ZIM.", + }, + validate=validate_zim_title, + required=True, + ) + + description = String( + metadata={ + "label": "ZIM description", + "description": "Description of the ZIM.", + }, + validate=validate_zim_description, + required=True, + ) + + long_description = String( + metadata={ + "label": "ZIM long description", + "description": "Long description of the ZIM.", + }, + data_key="long-description", + validate=validate_zim_longdescription, + ) + + tags = String( + metadata={ + "label": "ZIM Tags", + "description": "A semicolon (;) delimited list of tags to add to the ZIM.", + } + ) + + secondary_color = String( + metadata={ + "label": "Secondary color", + "description": "Secondary (background) color of ZIM UI. Default: '#FFFFFF'", + }, + data_key="secondary-color", + ) + + page_id_include = String( + metadata={ + "label": "Page ID include", + "description": "CSV of page ids to include. Parent pages will be included " + "as well for proper navigation, up to root (or subroot if --root-page-id is" + " set). Can be combined with --page-title-include (pages with matching " + "title or id will be included)", + }, + data_key="page-id-include", + ) + + page_title_include = String( + metadata={ + "label": "Page title include regex", + "description": "Includes only pages with title matching the given regular " + "expression, and their parent pages for proper navigation, up to root (or " + "subroot if --root-page-id is set). Can be combined with --page-id-include " + "(pages with matching title or id will be included)", + }, + data_key="page-title-include", + ) + + page_title_exclude = String( + metadata={ + "label": "Page title exclude regex", + "description": "Excludes pages with title matching the given regular " + "expression", + }, + data_key="page-title-exclude", + ) + + root_page_id = String( + metadata={ + "label": "Root page ID", + "description": "ID of the root page to include in ZIM. Only this page and " + "its subpages will be included in the ZIM", + }, + data_key="root-page-id", + ) + + illustration_url = String( + metadata={ + "label": "Illustration URL", + "description": "URL to illustration to use for ZIM illustration and " + "favicon", + }, + data_key="illustration-url", + ) + + debug = fields.Boolean( + truthy=[True], + falsy=[False], + metadata={"label": "Debug", "description": "Enable verbose output"}, + ) + + stats_filename = String( + metadata={ + "label": "Stats filename", + "placeholder": "/output/task_progress.json", + "description": "Scraping progress file. " + "Leave it as `/output/task_progress.json`", + }, + data_key="stats-filename", + load_default="/output/task_progress.json", + dump_default="/output/task_progress.json", + validate=validate.Equal("/output/task_progress.json"), + ) + + output = String( + metadata={ + "label": "Output folder", + "placeholder": "/output", + "description": "Output folder for ZIM file(s). Leave it as `/output`", + }, + load_default="/output", + dump_default="/output", + validate=validate_output, + ) diff --git a/dispatcher/backend/src/utils/offliners.py b/dispatcher/backend/src/utils/offliners.py index 52470975..c98eaa34 100644 --- a/dispatcher/backend/src/utils/offliners.py +++ b/dispatcher/backend/src/utils/offliners.py @@ -26,6 +26,7 @@ Offliner.zimit: od("zimit", True, "statsFilename"), Offliner.kolibri: od("kolibri2zim", True, False), Offliner.devdocs: od("devdocs2zim", True, False), + Offliner.mindtouch: od("mindtouch2zim", True, True), }