fix: always unescape html content

NobeKanai · Apr 29, 2024 · e09bae7 · e09bae7
1 parent 2ee8bac
commit e09bae7
Show file tree

Hide file tree

Showing 2 changed files with 4 additions and 4 deletions.
diff --git a/dvtag/scrape.py b/dvtag/scrape.py
@@ -31,13 +31,13 @@ def scrape(workno: str) -> DoujinVoice:
     html = _get_200(url).text
 
     if m := re.search(r'data-product-name="(.+)"\s*data-maker-name="(.+)"', html):
-        name = m.group(1)
-        circle = m.group(2)
+        name = unescape(m.group(1))
+        circle = unescape(m.group(2))
     else:
         raise ParsingError(f"no work name found", workno)
 
     if m := re.search(r"\"og:image\"[\s\S]*?content=\"(.+?)\"", html):
-        image_url = urljoin("https://www.dlsite.com", m.group(1))
+        image_url = urljoin("https://www.dlsite.com", unescape(m.group(1)))
     else:
         raise ParsingError(f"no cover image url found", workno)
 

diff --git a/setup.cfg b/setup.cfg
@@ -1,6 +1,6 @@
 [metadata]
 name = dvtag
-version = 0.7.2
+version = 0.7.3
 author = Nobe Kanai
 author_email = [email protected]
 description = A tool for tagging your doujin voice library.