Skip to content

Commit

Permalink
feat(New Site Supported): ✨ Kaiscans.com
Browse files Browse the repository at this point in the history
  • Loading branch information
thezak48 committed Nov 13, 2023
1 parent ae0ec3c commit 0974bf6
Show file tree
Hide file tree
Showing 7 changed files with 148 additions and 3 deletions.
3 changes: 2 additions & 1 deletion data/config.example.ini
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,5 @@
mangas = ./data/manga.txt
multi_threaded = True
num_threads = 10
save_location = ./data/manga
save_location = ./data/manga
driver_path = /usr/bin/chromedriver
7 changes: 6 additions & 1 deletion manga_dl.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
from manga_dl.utilities.sites.manhuaus import Manhuaus
from manga_dl.utilities.sites.mangaread import Mangaread
from manga_dl.utilities.sites.webtoons import Webtoons
from manga_dl.utilities.sites.kaiscans import Kaiscans


class GracefulThreadPoolExecutor(concurrent.futures.ThreadPoolExecutor):
Expand All @@ -48,6 +49,8 @@ def submit(self, fn, *args, **kwargs):
log, os.path.join(os.path.dirname(__file__), "data", "config.ini")
)

driver_path = config.get("General", "driver_path")

parser = argparse.ArgumentParser(
description="Download download manga's, manhua's or manhwa's",
usage="%(prog)s manga [options] save_location",
Expand Down Expand Up @@ -96,6 +99,8 @@ def get_website_class(url: str):
return Mangaread(log)
elif "webtoons.com" in url:
return Webtoons(log)
elif "kaiscans.com" in url:
return Kaiscans(log, driver_path)
else:
raise ValueError(f"Unsupported website: {url}")

Expand All @@ -116,7 +121,7 @@ def get_website_class(url: str):

for manga_url in manga_urls:
manga = get_website_class(manga_url)
if isinstance(manga, Webtoons):
if isinstance(manga, (Webtoons, Kaiscans)):
manga_name = manga_url
else:
manga_name = unquote(urlparse(manga_url).path.split("/")[-1])
Expand Down
1 change: 1 addition & 0 deletions manga_dl/utilities/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ def _generate_default_config(self):
"multi_threaded": "True",
"num_threads": "10",
"save_location": "./data/manga",
"driver_path": "/usr/bin/chromedriver",
}

with open(self.path, "w") as configfile:
Expand Down
4 changes: 4 additions & 0 deletions manga_dl/utilities/file_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,10 @@ def make_cbz(self, directory_path, compelte_dir, output_path):
"""
Create a .cbz file from a directory.
"""
if not os.listdir(directory_path):
self.logger.warning("No files found in %s", directory_path)
return

output_path = os.path.join(
compelte_dir, f"{os.path.basename(directory_path)}.cbz"
)
Expand Down
2 changes: 1 addition & 1 deletion manga_dl/utilities/logging.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ def setup_logging():

current_time = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
log_filename = f"manga_dl_{current_time}.log"
log_dir = "./logs"
log_dir = "./data/logs"
os.makedirs(log_dir, exist_ok=True)
log_filepath = os.path.join(log_dir, log_filename)
file_handler = RotatingFileHandler(
Expand Down
133 changes: 133 additions & 0 deletions manga_dl/utilities/sites/kaiscans.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
"""
This module defines the Kaiscans class for interacting with the website kaiscans.com.
The Kaiscans class provides methods to fetch manga IDs, chapters, images,
and metadata from kaiscans.com, and to download manga images and save them as .cbz files.
Classes:
Kaiscans: A class to interact with the website kaiscans.com.
"""

import requests
from time import sleep
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.chrome.options import Options


class Kaiscans:
"""
A class to interact with the website kaiscans.com.
This class provides methods to fetch manga IDs, chapters, images,
and metadata from kaiscans.com, and to download manga images and save them as .cbz files.
Attributes:
logger: An instance of log.Logger for log.
"""

base_headers = {
"authority": "kaiscans.com",
"accept-language": "en-US,en;q=0.9,es-US;q=0.8,es;q=0.7,en-GB-oxendict;q=0.6",
"cache-control": "no-cache",
"pragma": "no-cache",
"sec-ch-ua": '"Chromium";v="118", "Google Chrome";v="118", "Not=A?Brand";v="99"',
"sec-ch-ua-mobile": "?0",
"sec-ch-ua-platform": '"Windows"',
"sec-fetch-site": "none",
"user-agent": (
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
"AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36"
),
}
headers_image = base_headers

def __init__(self, logger, driver_path):
self.logger = logger
self.driver_path = driver_path

def get_manga_id(self, manga_name: str):
"""Get the series title for a given URL."""
response = requests.get(manga_name, headers=self.base_headers, timeout=30)
if response.status_code == 200:
soup = BeautifulSoup(response.text, "html.parser")
node = soup.find("div", {"id": "titlemove"})
title = node.h1
return manga_name, title.text.lstrip().rstrip()
self.logger.error(f"Status code: {response.status_code}")
return None

def get_manga_chapters(self, manga_id: str):
"""
Get the manga chapters for a given manga ID.
"""
result = requests.get(
manga_id,
headers=self.base_headers,
timeout=30,
)

if result.status_code == 200:
soup = BeautifulSoup(result.text, "html.parser")
chapters = []

for li in soup.find("div", class_="eplister").find_all("li"):
chapter_number = li.get("data-num")
url = li.find("a").get("href")
chapters.append((chapter_number, url))

chapters = sorted(chapters, key=lambda x: float(x[0]))

return chapters

return None

def get_chapter_images(self, url: str):
"""
Get the manga chapter images for a given chapter URL.
"""
options = Options()
options.add_argument("--headless")
driver = webdriver.Chrome(self.driver_path, options=options)
driver.get(url)

sleep(5)

soup = BeautifulSoup(driver.page_source, "html.parser")

image_nodes = soup.find("div", id="readerarea").find_all("img")
images = []
for image_node in image_nodes:
data_src = image_node.get("data-src")
if data_src:
images.append(data_src.strip())
else:
images.append(image_node["src"].strip())

driver.quit()

return images

def get_manga_metadata(self, manga_url: str):
"""
Get the manga metadata for a given manga name.
"""
result = requests.get(
manga_url,
headers=self.base_headers,
timeout=30,
)

if result.status_code == 200:
soup = BeautifulSoup(result.text, "html.parser")

genres_content = soup.find("div", {"class": "wd-full"})
genres = [a.text for a in genres_content.find_all("a")]

summary_content = soup.find("div", {"itemprop": "description"})
summary = summary_content.p.text

return genres, summary

self.logger.error("unable to fetch the manga metadata")
return None
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
beautifulsoup4==4.12.2
Requests==2.31.0
rich==13.6.0
selenium==4.15.2

0 comments on commit 0974bf6

Please sign in to comment.