Skip to content

Commit

Permalink
Merge pull request #1038 from openzim/add_mindtouch
Browse files Browse the repository at this point in the history
Add mindtouch offliner for libretexts.org
  • Loading branch information
benoit74 authored Oct 31, 2024
2 parents fa98354 + 63a5880 commit 9d43704
Show file tree
Hide file tree
Showing 5 changed files with 192 additions and 0 deletions.
9 changes: 9 additions & 0 deletions dispatcher/backend/src/common/enum.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ class WarehousePath:
hidden_custom_apps = "/.hidden/custom_apps"
videos = "/videos"
zimit = "/zimit"
libretexts = "/libretexts"

@classmethod
def all(cls):
Expand Down Expand Up @@ -116,6 +117,7 @@ class ScheduleCategory:
ifixit = "ifixit"
freecodecamp = "freecodecamp"
devdocs = "devdocs"
mindtouch = "mindtouch"

@classmethod
def all(cls):
Expand All @@ -141,6 +143,7 @@ def all(cls):
cls.ifixit,
cls.freecodecamp,
cls.devdocs,
cls.mindtouch,
]

@classmethod
Expand Down Expand Up @@ -173,6 +176,7 @@ class DockerImageName:
ifixit = "openzim/ifixit"
freecodecamp = "openzim/freecodecamp"
devdocs = "openzim/devdocs"
mindtouch = "openzim/mindtouch"

@classmethod
def all(cls) -> set:
Expand Down Expand Up @@ -209,6 +213,7 @@ class Offliner:
ifixit = "ifixit"
freecodecamp = "freecodecamp"
devdocs = "devdocs"
mindtouch = "mindtouch"

@classmethod
def all(cls):
Expand All @@ -227,6 +232,7 @@ def all(cls):
cls.ifixit,
cls.freecodecamp,
cls.devdocs,
cls.mindtouch,
]

@classmethod
Expand All @@ -252,6 +258,7 @@ def get_image_name(cls, offliner):
cls.ifixit: DockerImageName.ifixit,
cls.freecodecamp: DockerImageName.freecodecamp,
cls.devdocs: DockerImageName.devdocs,
cls.mindtouch: DockerImageName.mindtouch,
}.get(offliner, "-")


Expand All @@ -275,6 +282,7 @@ class Platform:
ted = "ted"
devdocs = "devdocs"
shamela = "shamela"
libretexts = "libretexts"

@classmethod
def all(cls) -> str:
Expand All @@ -286,6 +294,7 @@ def all(cls) -> str:
cls.ted,
cls.devdocs,
cls.shamela,
cls.libretexts,
]

@classmethod
Expand Down
2 changes: 2 additions & 0 deletions dispatcher/backend/src/common/schemas/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
GutenbergFlagsSchema,
IFixitFlagsSchema,
KolibriFlagsSchema,
MindtouchFlagsSchema,
MWOfflinerFlagsSchema,
NautilusFlagsSchema,
NautilusFlagsSchemaRelaxed,
Expand Down Expand Up @@ -103,6 +104,7 @@ def get_offliner_schema(offliner):
Offliner.ifixit: IFixitFlagsSchema,
Offliner.freecodecamp: FreeCodeCampFlagsSchema,
Offliner.devdocs: DevDocsFlagsSchema,
Offliner.mindtouch: MindtouchFlagsSchema,
}.get(offliner, Schema)

@validates_schema
Expand Down
2 changes: 2 additions & 0 deletions dispatcher/backend/src/common/schemas/offliners/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from common.schemas.offliners.gutenberg import GutenbergFlagsSchema
from common.schemas.offliners.ifixit import IFixitFlagsSchema
from common.schemas.offliners.kolibri import KolibriFlagsSchema
from common.schemas.offliners.mindtouch import MindtouchFlagsSchema
from common.schemas.offliners.mwoffliner import MWOfflinerFlagsSchema
from common.schemas.offliners.nautilus import (
NautilusFlagsSchema,
Expand All @@ -22,6 +23,7 @@
"GutenbergFlagsSchema",
"IFixitFlagsSchema",
"KolibriFlagsSchema",
"MindtouchFlagsSchema",
"MWOfflinerFlagsSchema",
"NautilusFlagsSchema",
"NautilusFlagsSchemaRelaxed",
Expand Down
178 changes: 178 additions & 0 deletions dispatcher/backend/src/common/schemas/offliners/mindtouch.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,178 @@
from marshmallow import fields, validate

from common.schemas import SerializableSchema, String
from common.schemas.fields import (
validate_output,
validate_zim_description,
validate_zim_longdescription,
validate_zim_title,
)


class MindtouchFlagsSchema(SerializableSchema):
class Meta:
ordered = True

library_url = String(
metadata={
"label": "Library URL",
"description": "URL of the Mindtouch / Nice CXone Expert instance (must NOT"
" contain trailing slash), e.g. for LibreTexts Geosciences it is "
"https://geo.libretexts.org",
},
data_key="library-url",
required=True,
)

creator = String(
metadata={
"label": "Creator",
"description": "Name of content creator",
},
required=True,
)

publisher = String(
metadata={
"label": "Publisher",
"description": "Custom publisher name (ZIM metadata). “openZIM” otherwise",
},
)

file_name = String(
metadata={
"label": "ZIM filename",
"description": "ZIM filename. Do not input trailing `.zim`, it "
"will be automatically added. Defaults to {name}_{period}",
},
data_key="file-name",
)

name = String(
metadata={
"label": "ZIM name",
"description": "Name of the ZIM.",
},
required=True,
)

title = String(
metadata={
"label": "ZIM title",
"description": "Title of the ZIM.",
},
validate=validate_zim_title,
required=True,
)

description = String(
metadata={
"label": "ZIM description",
"description": "Description of the ZIM.",
},
validate=validate_zim_description,
required=True,
)

long_description = String(
metadata={
"label": "ZIM long description",
"description": "Long description of the ZIM.",
},
data_key="long-description",
validate=validate_zim_longdescription,
)

tags = String(
metadata={
"label": "ZIM Tags",
"description": "A semicolon (;) delimited list of tags to add to the ZIM.",
}
)

secondary_color = String(
metadata={
"label": "Secondary color",
"description": "Secondary (background) color of ZIM UI. Default: '#FFFFFF'",
},
data_key="secondary-color",
)

page_id_include = String(
metadata={
"label": "Page ID include",
"description": "CSV of page ids to include. Parent pages will be included "
"as well for proper navigation, up to root (or subroot if --root-page-id is"
" set). Can be combined with --page-title-include (pages with matching "
"title or id will be included)",
},
data_key="page-id-include",
)

page_title_include = String(
metadata={
"label": "Page title include regex",
"description": "Includes only pages with title matching the given regular "
"expression, and their parent pages for proper navigation, up to root (or "
"subroot if --root-page-id is set). Can be combined with --page-id-include "
"(pages with matching title or id will be included)",
},
data_key="page-title-include",
)

page_title_exclude = String(
metadata={
"label": "Page title exclude regex",
"description": "Excludes pages with title matching the given regular "
"expression",
},
data_key="page-title-exclude",
)

root_page_id = String(
metadata={
"label": "Root page ID",
"description": "ID of the root page to include in ZIM. Only this page and "
"its subpages will be included in the ZIM",
},
data_key="root-page-id",
)

illustration_url = String(
metadata={
"label": "Illustration URL",
"description": "URL to illustration to use for ZIM illustration and "
"favicon",
},
data_key="illustration-url",
)

debug = fields.Boolean(
truthy=[True],
falsy=[False],
metadata={"label": "Debug", "description": "Enable verbose output"},
)

stats_filename = String(
metadata={
"label": "Stats filename",
"placeholder": "/output/task_progress.json",
"description": "Scraping progress file. "
"Leave it as `/output/task_progress.json`",
},
data_key="stats-filename",
load_default="/output/task_progress.json",
dump_default="/output/task_progress.json",
validate=validate.Equal("/output/task_progress.json"),
)

output = String(
metadata={
"label": "Output folder",
"placeholder": "/output",
"description": "Output folder for ZIM file(s). Leave it as `/output`",
},
load_default="/output",
dump_default="/output",
validate=validate_output,
)
1 change: 1 addition & 0 deletions dispatcher/backend/src/utils/offliners.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
Offliner.zimit: od("zimit", True, "statsFilename"),
Offliner.kolibri: od("kolibri2zim", True, False),
Offliner.devdocs: od("devdocs2zim", True, False),
Offliner.mindtouch: od("mindtouch2zim", True, True),
}


Expand Down

0 comments on commit 9d43704

Please sign in to comment.