Skip to content

Commit

Permalink
fix: always unescape html content
Browse files Browse the repository at this point in the history
  • Loading branch information
NobeKanai committed Apr 29, 2024
1 parent 2ee8bac commit e09bae7
Show file tree
Hide file tree
Showing 2 changed files with 4 additions and 4 deletions.
6 changes: 3 additions & 3 deletions dvtag/scrape.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,13 +31,13 @@ def scrape(workno: str) -> DoujinVoice:
html = _get_200(url).text

if m := re.search(r'data-product-name="(.+)"\s*data-maker-name="(.+)"', html):
name = m.group(1)
circle = m.group(2)
name = unescape(m.group(1))
circle = unescape(m.group(2))
else:
raise ParsingError(f"no work name found", workno)

if m := re.search(r"\"og:image\"[\s\S]*?content=\"(.+?)\"", html):
image_url = urljoin("https://www.dlsite.com", m.group(1))
image_url = urljoin("https://www.dlsite.com", unescape(m.group(1)))
else:
raise ParsingError(f"no cover image url found", workno)

Expand Down
2 changes: 1 addition & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[metadata]
name = dvtag
version = 0.7.2
version = 0.7.3
author = Nobe Kanai
author_email = [email protected]
description = A tool for tagging your doujin voice library.
Expand Down

0 comments on commit e09bae7

Please sign in to comment.