Skip to content

Commit

Permalink
Merge pull request #2168 from dipu-bd/dev
Browse files Browse the repository at this point in the history
Version 3.3.1
  • Loading branch information
dipu-bd authored Oct 17, 2023
2 parents 27c2719 + 44626fb commit 035c5a6
Show file tree
Hide file tree
Showing 6 changed files with 439 additions and 371 deletions.
744 changes: 375 additions & 369 deletions README.md

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion lncrawl/VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
3.3.0
3.3.1
5 changes: 5 additions & 0 deletions lncrawl/core/arguments.py
Original file line number Diff line number Diff line change
Expand Up @@ -265,6 +265,11 @@ def get_args(self):
action="store_true",
help="Suppress all input prompts and use defaults.",
),
Args(
"--ignore-images",
action="store_true",
help="Ignore images in chapters when downloading.",
),
Args(
"--close-directly",
action="store_true",
Expand Down
5 changes: 5 additions & 0 deletions lncrawl/core/crawler.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from abc import abstractmethod
from typing import Generator, List, Optional

from .arguments import get_args
from ..models import Chapter, SearchResult, Volume
from .cleaner import TextCleaner
from .scraper import Scraper
Expand Down Expand Up @@ -114,6 +115,10 @@ def index_of_chapter(self, url: str) -> int:
return 0

def extract_chapter_images(self, chapter: Chapter) -> None:
ignore_images = get_args().ignore_images
if ignore_images:
return

if not chapter.body:
return

Expand Down
2 changes: 1 addition & 1 deletion sources/_index.json

Large diffs are not rendered by default.

52 changes: 52 additions & 0 deletions sources/en/s/shanghaifantasy.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
# -*- coding: utf-8 -*-
import logging

from lncrawl.core.crawler import Crawler

logger = logging.getLogger(__name__)


class Shanghaifantasy(Crawler):
base_url = ["https://shanghaifantasy.com/"]
wp_json_novel = "https://shanghaifantasy.com/wp-json/wp/v2/novel/%s"
wp_json_chapters = "https://shanghaifantasy.com/wp-json/fiction/v1/chapters?category=%s&order=asc&per_page=%%s"

def read_novel_info(self):
soup = self.get_soup(self.novel_url)

novel_id = soup.select_one("div#likebox").attrs["data-novel"]
total_chapters = soup.select("div.grid p.text-sm")[1].text.split(": ")[1]
get_novel_json = self.get_response(self.wp_json_novel % novel_id).json()

novel_title = get_novel_json["title"]["rendered"]
self.novel_title = novel_title

novel_author = soup.select("div.grid p.text-sm")[2].text
if "Author" in novel_author:
self.novel_author = novel_author.split(": ")[1]

novel_synopsis = soup.select_one("div[x-show*='Synopsis']").get_text()
self.novel_synopsis = novel_synopsis

novel_cover = soup.select_one("img[class*='object-cover']")["src"]
self.novel_cover = novel_cover

novel_chap_id = get_novel_json["categories"][0]
chapters = self.get_response(self.wp_json_chapters % novel_chap_id % total_chapters).json()
for chapter in chapters:
chap_id = 1 + len(self.chapters)
locked = chapter["locked"]

if not locked:
self.chapters.append(
{
"id": chap_id,
"title": chapter["title"],
"url": chapter["permalink"]
}
)

def download_chapter_body(self, chapter):
soup = self.get_soup(chapter["url"])
content = soup.select_one("div.contenta")
return self.cleaner.extract_contents(content)

0 comments on commit 035c5a6

Please sign in to comment.