Skip to content

Commit

Permalink
feat(New Site Supported): ✨ Mangakakalot
Browse files Browse the repository at this point in the history
  • Loading branch information
thezak48 committed Nov 14, 2023
1 parent 0e26115 commit 09ecffe
Show file tree
Hide file tree
Showing 2 changed files with 189 additions and 1 deletion.
9 changes: 8 additions & 1 deletion manga_dl.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
from manga_dl.utilities.sites.mangaread import Mangaread
from manga_dl.utilities.sites.webtoons import Webtoons
from manga_dl.utilities.sites.kaiscans import Kaiscans
from manga_dl.utilities.sites.mangakakalot import Mangakakalot


class GracefulThreadPoolExecutor(concurrent.futures.ThreadPoolExecutor):
Expand Down Expand Up @@ -122,6 +123,12 @@ def get_website_class(url: str):
return Webtoons(log)
elif "kaiscans.com" in url:
return Kaiscans(log, driver_path)
elif (
"mangakakalot.com" in url
or "chapmanganato.com" in url
or "readmanganato.com" in url
):
return Mangakakalot(log)
else:
raise ValueError(f"Unsupported website: {url}")

Expand All @@ -147,7 +154,7 @@ def download_manga():

for manga_url in manga_urls:
manga = get_website_class(manga_url)
if isinstance(manga, (Webtoons, Kaiscans)):
if isinstance(manga, (Webtoons, Kaiscans, Mangakakalot)):
manga_name = manga_url
else:
manga_name = unquote(urlparse(manga_url).path.split("/")[-1])
Expand Down
181 changes: 181 additions & 0 deletions manga_dl/utilities/sites/mangakakalot.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,181 @@
"""
This module defines the Mangakakalot class for interacting with the website mangakakalot.com.
The Mangakakalot class provides methods to fetch manga IDs, chapters, images,
and metadata from mangakakalot.com, and to download manga images and save them as .cbz files.
Classes:
Mangakakalot: A class to interact with the website mangakakalot.com.
"""
import re

import requests
from bs4 import BeautifulSoup


class Mangakakalot:
"""
A class to interact with the website mangakakalot.com.
This class provides methods to fetch manga IDs, chapters, images,
and metadata from mangakakalot.com, and to download manga images and save them as .cbz files.
Attributes:
logger: An instance of log.Logger for log.
"""

base_headers = {
"authority": "mangakakalot.com",
"accept-language": "en-US,en;q=0.9,es-US;q=0.8,es;q=0.7,en-GB-oxendict;q=0.6",
"cache-control": "no-cache",
"pragma": "no-cache",
"sec-ch-ua": '"Chromium";v="118", "Google Chrome";v="118", "Not=A?Brand";v="99"',
"sec-ch-ua-mobile": "?0",
"sec-ch-ua-platform": '"Windows"',
"sec-fetch-site": "none",
"user-agent": (
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
"AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36"
),
}

headers_image = base_headers.copy()
headers_image.update(
{
"accept": "image/avif,image/webp,image/apng,image/svg+xml,image/*,*/*;q=0.8",
"referer": "https://mangakakalot.com/",
"sec-fetch-dest": "image",
"sec-fetch-mode": "no-cors",
"sec-fetch-site": "same-site",
}
)

def __init__(self, logger):
self.logger = logger

def get_manga_id(self, manga_name: str):
"""
Get the manga ID for a given manga name.
"""

url = manga_name

result = requests.get(url, headers=self.base_headers, timeout=30)

if "mangakakalot" in manga_name:
if result.status_code == 200:
soup = BeautifulSoup(result.text, "html.parser")
node = soup.find("ul", {"class": "manga-info-text"})
title = node.h1
return url, title.text.lstrip().rstrip()
elif "chapmanganato" in manga_name:
if result.status_code == 200:
soup = BeautifulSoup(result.text, "html.parser")
node = soup.find("div", {"class": "story-info-right"})
title = node.h1
return url, title.text.lstrip().rstrip()
self.logger.error("unable to find the manga id needed")
return None

def get_manga_chapters(self, manga_id: str):
"""
Get the manga chapters for a given manga ID.
"""
result = requests.get(
url=f"{manga_id}",
headers=self.base_headers,
timeout=30,
)
if "mangakakalot" in manga_id:
if result.status_code == 200:
soup = BeautifulSoup(result.text, "html.parser")
chapter_list = soup.find(
"div", {"class": "chapter-list"}
) # find chapter list first
if chapter_list is not None:
rows = chapter_list.find_all(
"div", {"class": "row"}
) # then find rows within the chapter list
chapters = []
for row in rows:
try:
url = row.find("a")[
"href"
] # URL is still in 'href' of 'a' tag in each row
chapter_number_raw = url.split("/chapter_")[-1]
number_parts = re.findall(
r"\d+\.\d+|\d+", chapter_number_raw
) # capture float or int types
if (
"." in number_parts[0]
): # if chapter number is of float type e.g. 7.6
chapter_number = float(number_parts[0])
else: # if it is of integer type e.g. 7
chapter_number = int(float(number_parts[0]))
chapters.append((chapter_number, url))
except TypeError:
continue
chapters.sort(key=lambda x: x[0]) # sort by chapter number
return chapters
elif "chapmanganato" in manga_id:
if result.status_code == 200:
soup = BeautifulSoup(result.text, "html.parser")
chapter_list = soup.find(
"div", {"class": "panel-story-chapter-list"}
) # find chapter list first
if chapter_list is not None:
rows = chapter_list.find_all(
"li", {"class": "a-h"}
) # then find rows within the chapter list
chapters = []
for row in rows:
try:
url = row.find("a")[
"href"
] # URL is still in 'href' of 'a' tag in each row
chapter_number_raw = url.split("/chapter-")[-1]
number_parts = re.findall(
r"\d+\.\d+|\d+", chapter_number_raw
) # capture float or int types
if (
"." in number_parts[0]
): # if chapter number is of float type e.g. 7.6
chapter_number = float(number_parts[0])
else: # if it is of integer type e.g. 7
chapter_number = int(float(number_parts[0]))
chapters.append((chapter_number, url))
except TypeError:
continue
chapters.sort(key=lambda x: x[0]) # sort by chapter number
return chapters
self.logger.error("unable to find the manga chapters needed")
return None

def get_chapter_images(self, url: str):
"""
Get the manga chapter images for a given chapter URL.
"""
result = requests.get(
url=url,
headers=self.base_headers,
timeout=30,
)

if result.status_code == 200:
soup = BeautifulSoup(result.text, "html.parser")
node = soup.find("div", {"class": "container-chapter-reader"})
image_nodes = node.find_all("img")
images = []
for image_node in image_nodes:
images.append(image_node["src"].lstrip().rstrip())

return images

def get_manga_metadata(self, manga_name: str):
"""
Get the manga metadata for a given manga name.
"""
genres = []
summary = []

return genres, summary

0 comments on commit 09ecffe

Please sign in to comment.