From 0974bf608dc61b4d40bccbb5266064f8ce3bc083 Mon Sep 17 00:00:00 2001 From: thezak48 Date: Mon, 13 Nov 2023 23:06:33 +0000 Subject: [PATCH] feat(New Site Supported): :sparkles: Kaiscans.com --- data/config.example.ini | 3 +- manga_dl.py | 7 +- manga_dl/utilities/config.py | 1 + manga_dl/utilities/file_handler.py | 4 + manga_dl/utilities/logging.py | 2 +- manga_dl/utilities/sites/kaiscans.py | 133 +++++++++++++++++++++++++++ requirements.txt | 1 + 7 files changed, 148 insertions(+), 3 deletions(-) create mode 100644 manga_dl/utilities/sites/kaiscans.py diff --git a/data/config.example.ini b/data/config.example.ini index 3192019..17dd7a5 100644 --- a/data/config.example.ini +++ b/data/config.example.ini @@ -2,4 +2,5 @@ mangas = ./data/manga.txt multi_threaded = True num_threads = 10 -save_location = ./data/manga \ No newline at end of file +save_location = ./data/manga +driver_path = /usr/bin/chromedriver \ No newline at end of file diff --git a/manga_dl.py b/manga_dl.py index 13274d7..4728271 100644 --- a/manga_dl.py +++ b/manga_dl.py @@ -22,6 +22,7 @@ from manga_dl.utilities.sites.manhuaus import Manhuaus from manga_dl.utilities.sites.mangaread import Mangaread from manga_dl.utilities.sites.webtoons import Webtoons +from manga_dl.utilities.sites.kaiscans import Kaiscans class GracefulThreadPoolExecutor(concurrent.futures.ThreadPoolExecutor): @@ -48,6 +49,8 @@ def submit(self, fn, *args, **kwargs): log, os.path.join(os.path.dirname(__file__), "data", "config.ini") ) +driver_path = config.get("General", "driver_path") + parser = argparse.ArgumentParser( description="Download download manga's, manhua's or manhwa's", usage="%(prog)s manga [options] save_location", @@ -96,6 +99,8 @@ def get_website_class(url: str): return Mangaread(log) elif "webtoons.com" in url: return Webtoons(log) + elif "kaiscans.com" in url: + return Kaiscans(log, driver_path) else: raise ValueError(f"Unsupported website: {url}") @@ -116,7 +121,7 @@ def get_website_class(url: str): for manga_url in manga_urls: manga = get_website_class(manga_url) - if isinstance(manga, Webtoons): + if isinstance(manga, (Webtoons, Kaiscans)): manga_name = manga_url else: manga_name = unquote(urlparse(manga_url).path.split("/")[-1]) diff --git a/manga_dl/utilities/config.py b/manga_dl/utilities/config.py index 50286c7..26902c6 100644 --- a/manga_dl/utilities/config.py +++ b/manga_dl/utilities/config.py @@ -31,6 +31,7 @@ def _generate_default_config(self): "multi_threaded": "True", "num_threads": "10", "save_location": "./data/manga", + "driver_path": "/usr/bin/chromedriver", } with open(self.path, "w") as configfile: diff --git a/manga_dl/utilities/file_handler.py b/manga_dl/utilities/file_handler.py index e5a2f56..99f0124 100644 --- a/manga_dl/utilities/file_handler.py +++ b/manga_dl/utilities/file_handler.py @@ -37,6 +37,10 @@ def make_cbz(self, directory_path, compelte_dir, output_path): """ Create a .cbz file from a directory. """ + if not os.listdir(directory_path): + self.logger.warning("No files found in %s", directory_path) + return + output_path = os.path.join( compelte_dir, f"{os.path.basename(directory_path)}.cbz" ) diff --git a/manga_dl/utilities/logging.py b/manga_dl/utilities/logging.py index 8ba9eed..4dc9037 100644 --- a/manga_dl/utilities/logging.py +++ b/manga_dl/utilities/logging.py @@ -24,7 +24,7 @@ def setup_logging(): current_time = datetime.now().strftime("%Y-%m-%d_%H-%M-%S") log_filename = f"manga_dl_{current_time}.log" - log_dir = "./logs" + log_dir = "./data/logs" os.makedirs(log_dir, exist_ok=True) log_filepath = os.path.join(log_dir, log_filename) file_handler = RotatingFileHandler( diff --git a/manga_dl/utilities/sites/kaiscans.py b/manga_dl/utilities/sites/kaiscans.py new file mode 100644 index 0000000..132cb8d --- /dev/null +++ b/manga_dl/utilities/sites/kaiscans.py @@ -0,0 +1,133 @@ +""" +This module defines the Kaiscans class for interacting with the website kaiscans.com. + +The Kaiscans class provides methods to fetch manga IDs, chapters, images, +and metadata from kaiscans.com, and to download manga images and save them as .cbz files. + +Classes: + Kaiscans: A class to interact with the website kaiscans.com. +""" + +import requests +from time import sleep +from bs4 import BeautifulSoup +from selenium import webdriver +from selenium.webdriver.chrome.options import Options + + +class Kaiscans: + """ + A class to interact with the website kaiscans.com. + + This class provides methods to fetch manga IDs, chapters, images, + and metadata from kaiscans.com, and to download manga images and save them as .cbz files. + + Attributes: + logger: An instance of log.Logger for log. + """ + + base_headers = { + "authority": "kaiscans.com", + "accept-language": "en-US,en;q=0.9,es-US;q=0.8,es;q=0.7,en-GB-oxendict;q=0.6", + "cache-control": "no-cache", + "pragma": "no-cache", + "sec-ch-ua": '"Chromium";v="118", "Google Chrome";v="118", "Not=A?Brand";v="99"', + "sec-ch-ua-mobile": "?0", + "sec-ch-ua-platform": '"Windows"', + "sec-fetch-site": "none", + "user-agent": ( + "Mozilla/5.0 (Windows NT 10.0; Win64; x64) " + "AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36" + ), + } + headers_image = base_headers + + def __init__(self, logger, driver_path): + self.logger = logger + self.driver_path = driver_path + + def get_manga_id(self, manga_name: str): + """Get the series title for a given URL.""" + response = requests.get(manga_name, headers=self.base_headers, timeout=30) + if response.status_code == 200: + soup = BeautifulSoup(response.text, "html.parser") + node = soup.find("div", {"id": "titlemove"}) + title = node.h1 + return manga_name, title.text.lstrip().rstrip() + self.logger.error(f"Status code: {response.status_code}") + return None + + def get_manga_chapters(self, manga_id: str): + """ + Get the manga chapters for a given manga ID. + """ + result = requests.get( + manga_id, + headers=self.base_headers, + timeout=30, + ) + + if result.status_code == 200: + soup = BeautifulSoup(result.text, "html.parser") + chapters = [] + + for li in soup.find("div", class_="eplister").find_all("li"): + chapter_number = li.get("data-num") + url = li.find("a").get("href") + chapters.append((chapter_number, url)) + + chapters = sorted(chapters, key=lambda x: float(x[0])) + + return chapters + + return None + + def get_chapter_images(self, url: str): + """ + Get the manga chapter images for a given chapter URL. + """ + options = Options() + options.add_argument("--headless") + driver = webdriver.Chrome(self.driver_path, options=options) + driver.get(url) + + sleep(5) + + soup = BeautifulSoup(driver.page_source, "html.parser") + + image_nodes = soup.find("div", id="readerarea").find_all("img") + images = [] + for image_node in image_nodes: + data_src = image_node.get("data-src") + if data_src: + images.append(data_src.strip()) + else: + images.append(image_node["src"].strip()) + + driver.quit() + + return images + + def get_manga_metadata(self, manga_url: str): + """ + Get the manga metadata for a given manga name. + """ + result = requests.get( + manga_url, + headers=self.base_headers, + timeout=30, + ) + + if result.status_code == 200: + soup = BeautifulSoup(result.text, "html.parser") + + genres_content = soup.find("div", {"class": "wd-full"}) + genres = [a.text for a in genres_content.find_all("a")] + + summary_content = soup.find("div", {"itemprop": "description"}) + summary = summary_content.p.text + + return genres, summary + + self.logger.error("unable to fetch the manga metadata") + return None diff --git a/requirements.txt b/requirements.txt index aa464ea..514796c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,4 @@ beautifulsoup4==4.12.2 Requests==2.31.0 rich==13.6.0 +selenium==4.15.2