feat(New Site Supported): ✨ Kaiscans.com

thezak48 · Nov 13, 2023 · 0974bf6 · 0974bf6
1 parent ae0ec3c
commit 0974bf6
Show file tree

Hide file tree

Showing 7 changed files with 148 additions and 3 deletions.
diff --git a/data/config.example.ini b/data/config.example.ini
@@ -2,4 +2,5 @@
 mangas = ./data/manga.txt
 multi_threaded = True
 num_threads = 10
-save_location = ./data/manga
+save_location = ./data/manga
+driver_path = /usr/bin/chromedriver
diff --git a/manga_dl.py b/manga_dl.py
@@ -22,6 +22,7 @@
 from manga_dl.utilities.sites.manhuaus import Manhuaus
 from manga_dl.utilities.sites.mangaread import Mangaread
 from manga_dl.utilities.sites.webtoons import Webtoons
+from manga_dl.utilities.sites.kaiscans import Kaiscans
 
 
 class GracefulThreadPoolExecutor(concurrent.futures.ThreadPoolExecutor):
@@ -48,6 +49,8 @@ def submit(self, fn, *args, **kwargs):
     log, os.path.join(os.path.dirname(__file__), "data", "config.ini")
 )
 
+driver_path = config.get("General", "driver_path")
+
 parser = argparse.ArgumentParser(
     description="Download download manga's, manhua's or manhwa's",
     usage="%(prog)s manga [options] save_location",
@@ -96,6 +99,8 @@ def get_website_class(url: str):
         return Mangaread(log)
     elif "webtoons.com" in url:
         return Webtoons(log)
+    elif "kaiscans.com" in url:
+        return Kaiscans(log, driver_path)
     else:
         raise ValueError(f"Unsupported website: {url}")
 
@@ -116,7 +121,7 @@ def get_website_class(url: str):
 
         for manga_url in manga_urls:
             manga = get_website_class(manga_url)
-            if isinstance(manga, Webtoons):
+            if isinstance(manga, (Webtoons, Kaiscans)):
                 manga_name = manga_url
             else:
                 manga_name = unquote(urlparse(manga_url).path.split("/")[-1])

diff --git a/manga_dl/utilities/config.py b/manga_dl/utilities/config.py
@@ -31,6 +31,7 @@ def _generate_default_config(self):
             "multi_threaded": "True",
             "num_threads": "10",
             "save_location": "./data/manga",
+            "driver_path": "/usr/bin/chromedriver",
         }
 
         with open(self.path, "w") as configfile:

diff --git a/manga_dl/utilities/file_handler.py b/manga_dl/utilities/file_handler.py
@@ -37,6 +37,10 @@ def make_cbz(self, directory_path, compelte_dir, output_path):
         """
         Create a .cbz file from a directory.
         """
+        if not os.listdir(directory_path):
+            self.logger.warning("No files found in %s", directory_path)
+            return
+
         output_path = os.path.join(
             compelte_dir, f"{os.path.basename(directory_path)}.cbz"
         )

diff --git a/manga_dl/utilities/logging.py b/manga_dl/utilities/logging.py
@@ -24,7 +24,7 @@ def setup_logging():
 
     current_time = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
     log_filename = f"manga_dl_{current_time}.log"
-    log_dir = "./logs"
+    log_dir = "./data/logs"
     os.makedirs(log_dir, exist_ok=True)
     log_filepath = os.path.join(log_dir, log_filename)
     file_handler = RotatingFileHandler(

diff --git a/manga_dl/utilities/sites/kaiscans.py b/manga_dl/utilities/sites/kaiscans.py
@@ -0,0 +1,133 @@
+"""
+This module defines the Kaiscans class for interacting with the website kaiscans.com.
+
+The Kaiscans class provides methods to fetch manga IDs, chapters, images, 
+and metadata from kaiscans.com, and to download manga images and save them as .cbz files.
+
+Classes:
+    Kaiscans: A class to interact with the website kaiscans.com.
+"""
+
+import requests
+from time import sleep
+from bs4 import BeautifulSoup
+from selenium import webdriver
+from selenium.webdriver.chrome.options import Options
+
+
+class Kaiscans:
+    """
+    A class to interact with the website kaiscans.com.
+
+    This class provides methods to fetch manga IDs, chapters, images,
+    and metadata from kaiscans.com, and to download manga images and save them as .cbz files.
+
+    Attributes:
+        logger: An instance of log.Logger for log.
+    """
+
+    base_headers = {
+        "authority": "kaiscans.com",
+        "accept-language": "en-US,en;q=0.9,es-US;q=0.8,es;q=0.7,en-GB-oxendict;q=0.6",
+        "cache-control": "no-cache",
+        "pragma": "no-cache",
+        "sec-ch-ua": '"Chromium";v="118", "Google Chrome";v="118", "Not=A?Brand";v="99"',
+        "sec-ch-ua-mobile": "?0",
+        "sec-ch-ua-platform": '"Windows"',
+        "sec-fetch-site": "none",
+        "user-agent": (
+            "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
+            "AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36"
+        ),
+    }
+    headers_image = base_headers
+
+    def __init__(self, logger, driver_path):
+        self.logger = logger
+        self.driver_path = driver_path
+
+    def get_manga_id(self, manga_name: str):
+        """Get the series title for a given URL."""
+        response = requests.get(manga_name, headers=self.base_headers, timeout=30)
+        if response.status_code == 200:
+            soup = BeautifulSoup(response.text, "html.parser")
+            node = soup.find("div", {"id": "titlemove"})
+            title = node.h1
+            return manga_name, title.text.lstrip().rstrip()
+        self.logger.error(f"Status code: {response.status_code}")
+        return None
+
+    def get_manga_chapters(self, manga_id: str):
+        """
+        Get the manga chapters for a given manga ID.
+        """
+        result = requests.get(
+            manga_id,
+            headers=self.base_headers,
+            timeout=30,
+        )
+
+        if result.status_code == 200:
+            soup = BeautifulSoup(result.text, "html.parser")
+            chapters = []
+
+            for li in soup.find("div", class_="eplister").find_all("li"):
+                chapter_number = li.get("data-num")
+                url = li.find("a").get("href")
+                chapters.append((chapter_number, url))
+
+            chapters = sorted(chapters, key=lambda x: float(x[0]))
+
+            return chapters
+
+        return None
+
+    def get_chapter_images(self, url: str):
+        """
+        Get the manga chapter images for a given chapter URL.
+        """
+        options = Options()
+        options.add_argument("--headless")
+        driver = webdriver.Chrome(self.driver_path, options=options)
+        driver.get(url)
+
+        sleep(5)
+
+        soup = BeautifulSoup(driver.page_source, "html.parser")
+
+        image_nodes = soup.find("div", id="readerarea").find_all("img")
+        images = []
+        for image_node in image_nodes:
+            data_src = image_node.get("data-src")
+            if data_src:
+                images.append(data_src.strip())
+            else:
+                images.append(image_node["src"].strip())
+
+        driver.quit()
+
+        return images
+
+    def get_manga_metadata(self, manga_url: str):
+        """
+        Get the manga metadata for a given manga name.
+        """
+        result = requests.get(
+            manga_url,
+            headers=self.base_headers,
+            timeout=30,
+        )
+
+        if result.status_code == 200:
+            soup = BeautifulSoup(result.text, "html.parser")
+
+            genres_content = soup.find("div", {"class": "wd-full"})
+            genres = [a.text for a in genres_content.find_all("a")]
+
+            summary_content = soup.find("div", {"itemprop": "description"})
+            summary = summary_content.p.text
+
+            return genres, summary
+
+        self.logger.error("unable to fetch the manga metadata")
+        return None
diff --git a/requirements.txt b/requirements.txt
@@ -1,3 +1,4 @@
 beautifulsoup4==4.12.2
 Requests==2.31.0
 rich==13.6.0
+selenium==4.15.2