feat(New Site Supported): ✨ Mangakakalot

thezak48 · Nov 14, 2023 · 09ecffe · 09ecffe
1 parent 0e26115
commit 09ecffe
Show file tree

Hide file tree

Showing 2 changed files with 189 additions and 1 deletion.
diff --git a/manga_dl.py b/manga_dl.py
@@ -25,6 +25,7 @@
 from manga_dl.utilities.sites.mangaread import Mangaread
 from manga_dl.utilities.sites.webtoons import Webtoons
 from manga_dl.utilities.sites.kaiscans import Kaiscans
+from manga_dl.utilities.sites.mangakakalot import Mangakakalot
 
 
 class GracefulThreadPoolExecutor(concurrent.futures.ThreadPoolExecutor):
@@ -122,6 +123,12 @@ def get_website_class(url: str):
         return Webtoons(log)
     elif "kaiscans.com" in url:
         return Kaiscans(log, driver_path)
+    elif (
+        "mangakakalot.com" in url
+        or "chapmanganato.com" in url
+        or "readmanganato.com" in url
+    ):
+        return Mangakakalot(log)
     else:
         raise ValueError(f"Unsupported website: {url}")
 
@@ -147,7 +154,7 @@ def download_manga():
 
             for manga_url in manga_urls:
                 manga = get_website_class(manga_url)
-                if isinstance(manga, (Webtoons, Kaiscans)):
+                if isinstance(manga, (Webtoons, Kaiscans, Mangakakalot)):
                     manga_name = manga_url
                 else:
                     manga_name = unquote(urlparse(manga_url).path.split("/")[-1])

diff --git a/manga_dl/utilities/sites/mangakakalot.py b/manga_dl/utilities/sites/mangakakalot.py
@@ -0,0 +1,181 @@
+"""
+This module defines the Mangakakalot class for interacting with the website mangakakalot.com.
+
+The Mangakakalot class provides methods to fetch manga IDs, chapters, images, 
+and metadata from mangakakalot.com, and to download manga images and save them as .cbz files.
+
+Classes:
+    Mangakakalot: A class to interact with the website mangakakalot.com.
+"""
+import re
+
+import requests
+from bs4 import BeautifulSoup
+
+
+class Mangakakalot:
+    """
+    A class to interact with the website mangakakalot.com.
+
+    This class provides methods to fetch manga IDs, chapters, images,
+    and metadata from mangakakalot.com, and to download manga images and save them as .cbz files.
+
+    Attributes:
+        logger: An instance of log.Logger for log.
+    """
+
+    base_headers = {
+        "authority": "mangakakalot.com",
+        "accept-language": "en-US,en;q=0.9,es-US;q=0.8,es;q=0.7,en-GB-oxendict;q=0.6",
+        "cache-control": "no-cache",
+        "pragma": "no-cache",
+        "sec-ch-ua": '"Chromium";v="118", "Google Chrome";v="118", "Not=A?Brand";v="99"',
+        "sec-ch-ua-mobile": "?0",
+        "sec-ch-ua-platform": '"Windows"',
+        "sec-fetch-site": "none",
+        "user-agent": (
+            "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
+            "AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36"
+        ),
+    }
+
+    headers_image = base_headers.copy()
+    headers_image.update(
+        {
+            "accept": "image/avif,image/webp,image/apng,image/svg+xml,image/*,*/*;q=0.8",
+            "referer": "https://mangakakalot.com/",
+            "sec-fetch-dest": "image",
+            "sec-fetch-mode": "no-cors",
+            "sec-fetch-site": "same-site",
+        }
+    )
+
+    def __init__(self, logger):
+        self.logger = logger
+
+    def get_manga_id(self, manga_name: str):
+        """
+        Get the manga ID for a given manga name.
+        """
+
+        url = manga_name
+
+        result = requests.get(url, headers=self.base_headers, timeout=30)
+
+        if "mangakakalot" in manga_name:
+            if result.status_code == 200:
+                soup = BeautifulSoup(result.text, "html.parser")
+                node = soup.find("ul", {"class": "manga-info-text"})
+                title = node.h1
+                return url, title.text.lstrip().rstrip()
+        elif "chapmanganato" in manga_name:
+            if result.status_code == 200:
+                soup = BeautifulSoup(result.text, "html.parser")
+                node = soup.find("div", {"class": "story-info-right"})
+                title = node.h1
+                return url, title.text.lstrip().rstrip()
+        self.logger.error("unable to find the manga id needed")
+        return None
+
+    def get_manga_chapters(self, manga_id: str):
+        """
+        Get the manga chapters for a given manga ID.
+        """
+        result = requests.get(
+            url=f"{manga_id}",
+            headers=self.base_headers,
+            timeout=30,
+        )
+        if "mangakakalot" in manga_id:
+            if result.status_code == 200:
+                soup = BeautifulSoup(result.text, "html.parser")
+                chapter_list = soup.find(
+                    "div", {"class": "chapter-list"}
+                )  # find chapter list first
+                if chapter_list is not None:
+                    rows = chapter_list.find_all(
+                        "div", {"class": "row"}
+                    )  # then find rows within the chapter list
+                    chapters = []
+                    for row in rows:
+                        try:
+                            url = row.find("a")[
+                                "href"
+                            ]  # URL is still in 'href' of 'a' tag in each row
+                            chapter_number_raw = url.split("/chapter_")[-1]
+                            number_parts = re.findall(
+                                r"\d+\.\d+|\d+", chapter_number_raw
+                            )  # capture float or int types
+                            if (
+                                "." in number_parts[0]
+                            ):  # if chapter number is of float type e.g. 7.6
+                                chapter_number = float(number_parts[0])
+                            else:  # if it is of integer type e.g. 7
+                                chapter_number = int(float(number_parts[0]))
+                            chapters.append((chapter_number, url))
+                        except TypeError:
+                            continue
+                    chapters.sort(key=lambda x: x[0])  # sort by chapter number
+                    return chapters
+        elif "chapmanganato" in manga_id:
+            if result.status_code == 200:
+                soup = BeautifulSoup(result.text, "html.parser")
+                chapter_list = soup.find(
+                    "div", {"class": "panel-story-chapter-list"}
+                )  # find chapter list first
+                if chapter_list is not None:
+                    rows = chapter_list.find_all(
+                        "li", {"class": "a-h"}
+                    )  # then find rows within the chapter list
+                    chapters = []
+                    for row in rows:
+                        try:
+                            url = row.find("a")[
+                                "href"
+                            ]  # URL is still in 'href' of 'a' tag in each row
+                            chapter_number_raw = url.split("/chapter-")[-1]
+                            number_parts = re.findall(
+                                r"\d+\.\d+|\d+", chapter_number_raw
+                            )  # capture float or int types
+                            if (
+                                "." in number_parts[0]
+                            ):  # if chapter number is of float type e.g. 7.6
+                                chapter_number = float(number_parts[0])
+                            else:  # if it is of integer type e.g. 7
+                                chapter_number = int(float(number_parts[0]))
+                            chapters.append((chapter_number, url))
+                        except TypeError:
+                            continue
+                    chapters.sort(key=lambda x: x[0])  # sort by chapter number
+                    return chapters
+        self.logger.error("unable to find the manga chapters needed")
+        return None
+
+    def get_chapter_images(self, url: str):
+        """
+        Get the manga chapter images for a given chapter URL.
+        """
+        result = requests.get(
+            url=url,
+            headers=self.base_headers,
+            timeout=30,
+        )
+
+        if result.status_code == 200:
+            soup = BeautifulSoup(result.text, "html.parser")
+            node = soup.find("div", {"class": "container-chapter-reader"})
+            image_nodes = node.find_all("img")
+            images = []
+            for image_node in image_nodes:
+                images.append(image_node["src"].lstrip().rstrip())
+
+            return images
+
+    def get_manga_metadata(self, manga_name: str):
+        """
+        Get the manga metadata for a given manga name.
+        """
+        genres = []
+        summary = []
+
+        return genres, summary