Skip to content

Commit

Permalink
Fix: sankaku (#385)
Browse files Browse the repository at this point in the history
* WIP: www.sanka

* Fix: don't throw when there is no accessToken

* Fix: bad-indentation
  • Loading branch information
eight04 authored Oct 2, 2024
1 parent 1021b2a commit 05972c0
Show file tree
Hide file tree
Showing 6 changed files with 97 additions and 77 deletions.
4 changes: 1 addition & 3 deletions .pylintrc
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ disable=
design,

anomalous-backslash-in-string,
bad-indentation,
consider-using-f-string,
duplicate-code,
global-statement,
Expand Down Expand Up @@ -42,9 +43,6 @@ const-rgx=^(([A-Z_][A-Z0-9_]*)|(__.*__)|[a-z_][a-z0-9_]*)$
method-rgx=(([a-z][a-z0-9_]{1,30})|(_[a-z0-9_]*))$
variable-rgx=(([a-z][a-z0-9_]{,30})|(_[a-z0-9_]*))$

[FORMAT]
indent-string="\t"

[VARIABLES]
callbacks=_
init-import=yes
Expand Down
10 changes: 5 additions & 5 deletions comiccrawler/mods/sankaku.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
"curl": "",
# curl for v.sankakucomplex.com. Note that you should leave this empty.
"curl_v": ""
}
}
no_referer = True
autocurl = True

Expand All @@ -32,7 +32,7 @@ def is_redirected(err):

def errorhandler(err, crawler):
pass
# this shouldn't happen without referer
# this shouldn't happen without referer
# if is_redirected(err):
# crawler.init_images()

Expand All @@ -49,7 +49,7 @@ def login_check(html):
def get_title(html, url):
title = re.search(r"<title>/?(.+?) \|", html).group(1)
return "[sankaku] " + unescape(title)

def get_episodes(html, url):
login_check(html)
s = []
Expand All @@ -65,7 +65,7 @@ def get_episodes(html, url):
continue
e = Episode(pid, urljoin(url, ep_url))
s.append(e)

return s[::-1]

def get_images(html, url):
Expand All @@ -87,4 +87,4 @@ def get_next_page(html, url):

def get_next_image_page(html, url):
pass

106 changes: 64 additions & 42 deletions comiccrawler/mods/sankaku_beta.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,64 +8,86 @@
from ..error import SkipPageError
from ..url import update_qs
from ..grabber import grabhtml
from ..session_manager import session_manager
from ..util import get_cookie

domain = ["beta.sankakucomplex.com"]
domain = ["beta.sankakucomplex.com", "www.sankakucomplex.com"]
name = "Sankaku Beta"
noepfolder = True

class ExpireError(Exception):
pass
pass

def session_key(url):
r = urlparse(url)
if r.path.startswith("/post/keyset"):
return (r.scheme, r.netloc, "/post/keyset")

def load_config():
s = session_manager.get("https://www.sankakucomplex.com/posts/keyset")
try:
access_token = get_cookie(s.cookies, "accessToken", domain="www.sankakucomplex.com")
except ValueError:
access_token = ""
s.headers.update({
"Accept": "application/vnd.sankaku.api+json;v=2",
"Client-Type": "non-premium",
"Platform": "web-app",
"Api-Version": "2",
"Enable-New-Tag-Type": "true",
"Authorization": f'Bearer {access_token}'
})

def get_query(url, name):
query = urlparse(url).query
return parse_qs(query)[name][0]
query = urlparse(url).query
return parse_qs(query)[name][0]

def get_title(html, url):
return "[sankaku] {}".format(get_query(url, "tags"))
return "[sankaku] {}".format(get_query(url, "tags"))

next_page_cache = {}

def get_episodes(html, url):
if re.match(r"https://beta\.sankakucomplex\.com/\?", url):
next_page_cache[url] = update_qs("https://capi-v2.sankakucomplex.com/posts/keyset?lang=en&default_threshold=1&hide_posts_in_books=never&limit=40", {
"tags": get_query(url, "tags")
})
raise SkipPageError
data = json.loads(html)
next = data["meta"]["next"]
# data_len_cache[url] = len(data)
eps = [
Episode(
str(e["id"]),
"https://beta.sankakucomplex.com/post/show/{}".format(e["id"]),
image=e["file_url"]
) for e in data["data"]
]
if next:
next_page_cache[url] = update_qs(url, {
"next": next
})
return eps[::-1]
if re.match(r"https://(beta|www)\.sankakucomplex\.com/", url):
next_page_cache[url] = update_qs("https://sankakuapi.com/posts/keyset?default_threshold=0&hide_posts_in_books=never&limit=40", {
"tags": get_query(url, "tags")
})
raise SkipPageError

data = json.loads(html)
next = data["meta"]["next"]
# data_len_cache[url] = len(data)
eps = [
Episode(
str(e["id"]),
"https://www.sankakucomplex.com/post/show/{}".format(e["id"]),
image=e["file_url"]
) for e in data["data"]
]

if next:
next_page_cache[url] = update_qs(url, {
"next": next
})

return eps[::-1]

def get_images(html, url):
id = re.search("post/show/(\d+)", url).group(1)
data = grabhtml("https://capi-v2.sankakucomplex.com/posts?lang=english&page=1&limit=1&tags=id_range:{}".format(id))
data = json.loads(data)
return data[0]["file_url"]
id = re.search("post/show/(\d+)", url).group(1)
data = grabhtml("https://capi-v2.sankakucomplex.com/posts?lang=english&page=1&limit=1&tags=id_range:{}".format(id))
data = json.loads(data)
return data[0]["file_url"]

def get_next_page(html, url):
if url in next_page_cache:
return next_page_cache.pop(url)
if url in next_page_cache:
return next_page_cache.pop(url)

def redirecthandler(response, crawler):
if re.match(r"https://chan\.sankakucomplex\.com/.+\.(png|jpg)", response.url):
raise ExpireError
if re.match(r"https://chan\.sankakucomplex\.com/.+\.(png|jpg)", response.url):
raise ExpireError

def errorhandler(err, crawler):
if isinstance(err, ExpireError):
crawler.ep.image = None
crawler.html = None
if isinstance(err, ExpireError):
crawler.ep.image = None
crawler.html = None

44 changes: 22 additions & 22 deletions requirements-lock.txt
Original file line number Diff line number Diff line change
@@ -1,65 +1,65 @@
ansicon==1.89.0
astroid==3.2.4
astroid==3.3.4
belfrywidgets==1.0.3
bidict==0.23.1
blessed==1.20.0
Brotli==1.1.0
certifi==2024.7.4
certifi==2024.8.30
charset-normalizer==3.3.2
colorama==0.4.6
deno_vm==0.6.0
desktop3==0.5.3
dill==0.3.8
dill==0.3.9
docopt==0.6.2
docutils==0.20.1
enlighten==1.12.4
idna==3.7
importlib_metadata==8.2.0
idna==3.10
importlib_metadata==8.5.0
isort==5.13.2
jaraco.classes==3.4.0
jaraco.context==5.3.0
jaraco.functools==4.0.2
jaraco.context==6.0.1
jaraco.functools==4.1.0
jinxed==1.3.0
keyring==25.3.0
keyring==25.4.1
livereload==2.7.0
markdown-it-py==3.0.0
mccabe==0.7.0
mdurl==0.1.2
more-itertools==10.4.0
more-itertools==10.5.0
mutagen==1.47.0
natsort==6.2.1
nh3==0.2.18
ordered-set==3.1.1
pkginfo==1.10.0
platformdirs==4.2.2
prefixed==0.7.1
puremagic==1.26
pycryptodomex==3.20.0
platformdirs==4.3.6
prefixed==0.9.0
puremagic==1.28
pycryptodomex==3.21.0
Pygments==2.18.0
pylint==3.2.6
pylint==3.3.1
pyperclip==1.9.0
pythreadworker==0.10.0
pywin32-ctypes==0.2.2
pywin32-ctypes==0.2.3
pyxcute==0.8.1
readme_renderer==43.0
requests==2.32.3
requests-toolbelt==1.0.0
rfc3986==2.0.0
rich==13.7.1
rich==13.9.1
safeprint==0.2.0
semver==2.13.0
Send2Trash==1.8.3
setuptools==72.1.0
setuptools==75.1.0
six==1.16.0
tomli==2.0.1
tomlkit==0.13.0
tomlkit==0.13.2
tornado==6.4.1
twine==5.1.1
typing_extensions==4.8.0
uncurl==0.0.11
urllib3==2.2.2
urllib3==2.2.3
wcwidth==0.2.13
websockets==12.0
websockets==13.1
win_unicode_console==0.5
yt-dlp==2024.8.6
zipp==3.19.2
yt-dlp==2024.9.27
zipp==3.20.2
4 changes: 2 additions & 2 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
certifi==2024.7.4
certifi==2024.8.30
docutils==0.20.1
pygments==2.18.0
pylint==3.2.6
pylint==3.3.1
pyxcute==0.8.1
twine==5.1.1
6 changes: 3 additions & 3 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -35,14 +35,14 @@ install_requires =
desktop3~=0.5.3
docopt~=0.6.2
enlighten~=1.12
puremagic~=1.26
pycryptodomex~=3.20
puremagic~=1.28
pycryptodomex~=3.21
pythreadworker~=0.10.0
requests~=2.32
safeprint~=0.2.0
uncurl~=0.0.11
urllib3~=2.2
yt-dlp~=2024.8
yt-dlp~=2024.9

python_requires = >=3.10

Expand Down

0 comments on commit 05972c0

Please sign in to comment.