diff --git a/docs/supportedsites.md b/docs/supportedsites.md index e095743f98..4608b6e4cd 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -949,6 +949,12 @@ Consider all sites to be NSFW unless otherwise known. Comics, Episodes + + Webtoon.xyz + https://www.webtoon.xyz/ + Chapters, Comics + + Weibo https://www.weibo.com/ diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py index fa56bfb45d..9c1d530b3e 100644 --- a/gallery_dl/extractor/__init__.py +++ b/gallery_dl/extractor/__init__.py @@ -167,6 +167,7 @@ "weasyl", "webmshare", "webtoons", + "webtoonxyz", "weibo", "wikiart", "wikifeet", diff --git a/gallery_dl/extractor/webtoonxyz.py b/gallery_dl/extractor/webtoonxyz.py new file mode 100644 index 0000000000..c822540333 --- /dev/null +++ b/gallery_dl/extractor/webtoonxyz.py @@ -0,0 +1,147 @@ +# -*- coding: utf-8 -*- + +# Copyright 2022 Ion Chary + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +"""Extractors for https://www.webtoon.xyz/""" + +from .. import text, util +from .common import Extractor, GalleryExtractor, Message + +BASE_PATTERN = r"(?:https?://)?(?:www\.)?webtoon\.xyz/read/([^/?#]+)" + + +class WebtoonxyzBase: + category = "webtoonxyz" + root = "https://www.webtoon.xyz" + + def setup_agegate_cookies(self): + self._update_cookies( + { + "wpmanga-adault": "1", + } + ) + + +class WebtoonxyzChapterExtractor(WebtoonxyzBase, GalleryExtractor): + """Extractor for a chapter on webtoon.xyz""" + + subcategory = "chapter" + directory_fmt = ("{category}", "{comic}") + filename_fmt = "{chapter_no:>02}-{num:>02}.{extension}" + archive_fmt = "{comic}_{chapter_no:>02}_{num}" + pattern = BASE_PATTERN + r"/([^/?#]+)/?" + test = ( + ( + "https://www.webtoon.xyz/read/learning-the-hard-way/chapter-1", + { + "url": "55bec5d7c42aba19e3d0d56db25fdf0b0b13be38", + "content": ( + "1748c7e82b6db910fa179f6dc7c4281b0f680fa7", + "42055e44659f6ffc410b3fb6557346dfbb993df3", + "49e1f2def04c6f7a6a3dacf245a1cd9abe77a6a9", + ), + "count": 5, + "keyword": { + "comic": "learning-the-hard-way", + "description": r"re:^Bullied ruthlessly by girls .+", + "chapter_no": "1", + "title": "Learning The Hard Way", + }, + }, + ), + ) + + def __init__(self, match): + self.comic, self.chapter = match.groups() + + url = "{}/read/{}/{}/".format(self.root, self.comic, self.chapter) + GalleryExtractor.__init__(self, match, url) + self.session.headers["Referer"] = self.root + "/" + self.setup_agegate_cookies() + + def metadata(self, page): + locale, pos = text.extract( + page, '") + chapter_urls = [ + match.group(0) + for match in WebtoonxyzChapterExtractor.pattern.finditer(page) + ] + + for chapter_url in chapter_urls: + yield Message.Queue, chapter_url, data diff --git a/scripts/supportedsites.py b/scripts/supportedsites.py index 5415276625..45a8266f9c 100755 --- a/scripts/supportedsites.py +++ b/scripts/supportedsites.py @@ -126,6 +126,7 @@ "wallpapercave" : "Wallpaper Cave", "webmshare" : "webmshare", "webtoons" : "Webtoon", + "webtoonxyz" : "Webtoon.xyz", "wikiart" : "WikiArt.org", "xbunkr" : "xBunkr", "xhamster" : "xHamster",