Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add mindtouch offliner for libretexts.org #1038

Merged
merged 1 commit into from
Oct 31, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions dispatcher/backend/src/common/enum.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ class WarehousePath:
hidden_custom_apps = "/.hidden/custom_apps"
videos = "/videos"
zimit = "/zimit"
libretexts = "/libretexts"

@classmethod
def all(cls):
Expand Down Expand Up @@ -116,6 +117,7 @@ class ScheduleCategory:
ifixit = "ifixit"
freecodecamp = "freecodecamp"
devdocs = "devdocs"
mindtouch = "mindtouch"

@classmethod
def all(cls):
Expand All @@ -141,6 +143,7 @@ def all(cls):
cls.ifixit,
cls.freecodecamp,
cls.devdocs,
cls.mindtouch,
]

@classmethod
Expand Down Expand Up @@ -173,6 +176,7 @@ class DockerImageName:
ifixit = "openzim/ifixit"
freecodecamp = "openzim/freecodecamp"
devdocs = "openzim/devdocs"
mindtouch = "openzim/mindtouch"

@classmethod
def all(cls) -> set:
Expand Down Expand Up @@ -209,6 +213,7 @@ class Offliner:
ifixit = "ifixit"
freecodecamp = "freecodecamp"
devdocs = "devdocs"
mindtouch = "mindtouch"

@classmethod
def all(cls):
Expand All @@ -227,6 +232,7 @@ def all(cls):
cls.ifixit,
cls.freecodecamp,
cls.devdocs,
cls.mindtouch,
]

@classmethod
Expand All @@ -252,6 +258,7 @@ def get_image_name(cls, offliner):
cls.ifixit: DockerImageName.ifixit,
cls.freecodecamp: DockerImageName.freecodecamp,
cls.devdocs: DockerImageName.devdocs,
cls.mindtouch: DockerImageName.mindtouch,
}.get(offliner, "-")


Expand All @@ -275,6 +282,7 @@ class Platform:
ted = "ted"
devdocs = "devdocs"
shamela = "shamela"
libretexts = "libretexts"

@classmethod
def all(cls) -> str:
Expand All @@ -286,6 +294,7 @@ def all(cls) -> str:
cls.ted,
cls.devdocs,
cls.shamela,
cls.libretexts,
]

@classmethod
Expand Down
2 changes: 2 additions & 0 deletions dispatcher/backend/src/common/schemas/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
GutenbergFlagsSchema,
IFixitFlagsSchema,
KolibriFlagsSchema,
MindtouchFlagsSchema,
MWOfflinerFlagsSchema,
NautilusFlagsSchema,
NautilusFlagsSchemaRelaxed,
Expand Down Expand Up @@ -103,6 +104,7 @@ def get_offliner_schema(offliner):
Offliner.ifixit: IFixitFlagsSchema,
Offliner.freecodecamp: FreeCodeCampFlagsSchema,
Offliner.devdocs: DevDocsFlagsSchema,
Offliner.mindtouch: MindtouchFlagsSchema,
}.get(offliner, Schema)

@validates_schema
Expand Down
2 changes: 2 additions & 0 deletions dispatcher/backend/src/common/schemas/offliners/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from common.schemas.offliners.gutenberg import GutenbergFlagsSchema
from common.schemas.offliners.ifixit import IFixitFlagsSchema
from common.schemas.offliners.kolibri import KolibriFlagsSchema
from common.schemas.offliners.mindtouch import MindtouchFlagsSchema
from common.schemas.offliners.mwoffliner import MWOfflinerFlagsSchema
from common.schemas.offliners.nautilus import (
NautilusFlagsSchema,
Expand All @@ -22,6 +23,7 @@
"GutenbergFlagsSchema",
"IFixitFlagsSchema",
"KolibriFlagsSchema",
"MindtouchFlagsSchema",
"MWOfflinerFlagsSchema",
"NautilusFlagsSchema",
"NautilusFlagsSchemaRelaxed",
Expand Down
178 changes: 178 additions & 0 deletions dispatcher/backend/src/common/schemas/offliners/mindtouch.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,178 @@
from marshmallow import fields, validate

from common.schemas import SerializableSchema, String
from common.schemas.fields import (
validate_output,
validate_zim_description,
validate_zim_longdescription,
validate_zim_title,
)


class MindtouchFlagsSchema(SerializableSchema):
class Meta:
ordered = True

library_url = String(
metadata={
"label": "Library URL",
"description": "URL of the Mindtouch / Nice CXone Expert instance (must NOT"
" contain trailing slash), e.g. for LibreTexts Geosciences it is "
"https://geo.libretexts.org",
},
data_key="library-url",
required=True,
)

creator = String(
metadata={
"label": "Creator",
"description": "Name of content creator",
},
required=True,
)

publisher = String(
metadata={
"label": "Publisher",
"description": "Custom publisher name (ZIM metadata). “openZIM” otherwise",
},
)

file_name = String(
metadata={
"label": "ZIM filename",
"description": "ZIM filename. Do not input trailing `.zim`, it "
"will be automatically added. Defaults to {name}_{period}",
},
data_key="file-name",
)

name = String(
metadata={
"label": "ZIM name",
"description": "Name of the ZIM.",
},
required=True,
)

title = String(
metadata={
"label": "ZIM title",
"description": "Title of the ZIM.",
},
validate=validate_zim_title,
required=True,
)

description = String(
metadata={
"label": "ZIM description",
"description": "Description of the ZIM.",
},
validate=validate_zim_description,
required=True,
)

long_description = String(
metadata={
"label": "ZIM long description",
"description": "Long description of the ZIM.",
},
data_key="long-description",
validate=validate_zim_longdescription,
)

tags = String(
metadata={
"label": "ZIM Tags",
"description": "A semicolon (;) delimited list of tags to add to the ZIM.",
}
)

secondary_color = String(
metadata={
"label": "Secondary color",
"description": "Secondary (background) color of ZIM UI. Default: '#FFFFFF'",
},
data_key="secondary-color",
)

page_id_include = String(
metadata={
"label": "Page ID include",
"description": "CSV of page ids to include. Parent pages will be included "
"as well for proper navigation, up to root (or subroot if --root-page-id is"
" set). Can be combined with --page-title-include (pages with matching "
"title or id will be included)",
},
data_key="page-id-include",
)

page_title_include = String(
metadata={
"label": "Page title include regex",
"description": "Includes only pages with title matching the given regular "
"expression, and their parent pages for proper navigation, up to root (or "
"subroot if --root-page-id is set). Can be combined with --page-id-include "
"(pages with matching title or id will be included)",
},
data_key="page-title-include",
)

page_title_exclude = String(
metadata={
"label": "Page title exclude regex",
"description": "Excludes pages with title matching the given regular "
"expression",
},
data_key="page-title-exclude",
)

root_page_id = String(
metadata={
"label": "Root page ID",
"description": "ID of the root page to include in ZIM. Only this page and "
"its subpages will be included in the ZIM",
},
data_key="root-page-id",
)

illustration_url = String(
metadata={
"label": "Illustration URL",
"description": "URL to illustration to use for ZIM illustration and "
"favicon",
},
data_key="illustration-url",
)

debug = fields.Boolean(
truthy=[True],
falsy=[False],
metadata={"label": "Debug", "description": "Enable verbose output"},
)

stats_filename = String(
metadata={
"label": "Stats filename",
"placeholder": "/output/task_progress.json",
"description": "Scraping progress file. "
"Leave it as `/output/task_progress.json`",
},
data_key="stats-filename",
load_default="/output/task_progress.json",
dump_default="/output/task_progress.json",
validate=validate.Equal("/output/task_progress.json"),
)

output = String(
metadata={
"label": "Output folder",
"placeholder": "/output",
"description": "Output folder for ZIM file(s). Leave it as `/output`",
},
load_default="/output",
dump_default="/output",
validate=validate_output,
)
1 change: 1 addition & 0 deletions dispatcher/backend/src/utils/offliners.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
Offliner.zimit: od("zimit", True, "statsFilename"),
Offliner.kolibri: od("kolibri2zim", True, False),
Offliner.devdocs: od("devdocs2zim", True, False),
Offliner.mindtouch: od("mindtouch2zim", True, True),
}


Expand Down