🚧 Fix OurBits

1. HTTP Error 404 when download torrent 2. High permissions to visit Torrent info page 3. Quota can't remove 4. Muti Line Break - "\n" 5. Torrent Base info lost
Rhilip · Aug 2, 2018 · c0dde84 · c0dde84
1 parent 3e905e8
commit c0dde84
Show file tree

Hide file tree

Showing 3 changed files with 34 additions and 5 deletions.
diff --git a/extractors/ourbits.py b/extractors/ourbits.py
@@ -5,6 +5,8 @@
 import re
 from html import unescape
 
+import requests
+
 from extractors.base.nexusphp import NexusPHP
 from utils.constants import ubb_clean, episode_eng2chs, html2ubb, title_clean
 from utils.load.handler import rootLogger as Logger
@@ -49,27 +51,47 @@ class OurBits(NexusPHP):
     url_host = "https://ourbits.club"
     db_column = "ourbits.club"
 
+    def torrent_link(self, tid):
+        torrent_link = self.url_host + "/download.php?id={tid}&passkey={pk}".format(tid=tid, pk=self.passkey)
+        tmp_file = "/tmp/[TJUPT].{}.torrent".format(tid)
+        with open(tmp_file, "wb") as torrent:
+            r = requests.get(torrent_link)
+            torrent.write(r.content)
+        return tmp_file
+
+    def exist_torrent_title(self, tag):
+        torrent_page = self.page_torrent_detail(tid=tag, bs=True)
+        torrent_title = re.search("\[OurBits\]\.(?P<name>.+?)\.torrent", torrent_page.text).group("name")
+        Logger.info("The torrent name for id({id}) is \"{name}\"".format(id=tag, name=torrent_title))
+        return torrent_title
+
     def torrent_clone(self, tid) -> dict:
         return_dict = {}
         details_bs = self.page_torrent_detail(tid=tid, bs=True)
         title_search = re.search("种子详情 \"(?P<title>.*)\" - Powered", str(details_bs.title))
         if title_search:
-            body = details_bs.body
+            return_dict["clone_id"] = tid
             return_dict["name"] = unescape(title_search.group("title")) or ""
 
+            body = details_bs.body
             for pat, type_ in [("://movie.douban.com/subject", "dburl"), ("://www.imdb.com/title/tt", "url")]:
                 a_another = body.find("a", href=re.compile(pat))
                 return_dict[type_] = a_another.get_text() if a_another else ""
 
-            descr_html = str(details_bs.find("div", id="kdescr"))
-            return_dict["descr"] = ubb_clean(html2ubb(descr_html)) or ""
+            # Remove Quota First
+            kdescr = details_bs.find("div", id="kdescr")
+            kdescr_quota = kdescr.findAll("fieldset")
+            for tag in kdescr_quota:
+                tag.extract()
+
+            return_dict["descr"] = ubb_clean(html2ubb(str(kdescr))) or ""
 
             def detail_fetch(text):
                 return details_bs.find("td", text=text).next_sibling.get_text(" ", strip=True)
 
             return_dict["small_descr"] = detail_fetch("副标题") or ""
 
-            info_gp = re.findall("([^：]+?[：:].+?) ", re.sub("大小.+?([TGMk]?B) ", "", detail_fetch("基本信息")))
+            info_gp = re.findall("([^：]+?[：:].+?) ", re.sub("大小.+?([TGMk]?B) ", "", detail_fetch("基本信息") + " "))
             for info in info_gp:
                 info_pat = re.search("([^：:]+?)[：: ]+(.+)", info)
                 if info_pat:

diff --git a/utils/constants.py b/utils/constants.py
@@ -65,4 +65,6 @@ def episode_eng2chs(ep: str) -> str:
 
 
 def html2ubb(html: str) -> str:
-    return str(HTML2BBCode().feed(html))
+    ret = str(HTML2BBCode().feed(html))
+    ret = re.sub("\n\n", "\n", ret)
+    return ret
diff --git a/utils/pattern.py b/utils/pattern.py
@@ -15,6 +15,11 @@
 
 # Search_pattern
 pattern_group = [
+    re.compile(  # Series (Which name match with 0day Source,see https://scenerules.org/t.html?id=tvx2642k16.nfo 16.4)
+        "\.?(?P<full_name>(?P<search_name>[\w\-. ]+?)[. ]"
+        "(?P<episode>([Ss]\d+)?[Ee][Pp]?\d+(-[Ee]?[Pp]?\d+)?|[Ss]\d+|Complete).+?WEB-DL.+?(-(?P<group>.+?))?)"
+        "(\.(?P<filetype>\w+)$|$)"
+    ),
     re.compile(  # Series (Which name match with 0day Source,see https://scenerules.org/t.html?id=tvx2642k16.nfo 16.4)
         "\.?(?P<full_name>(?P<search_name>[\w\-. ]+?)[. ]"
         "(?P<episode>([Ss]\d+)?[Ee][Pp]?\d+(-[Ee]?[Pp]?\d+)?|[Ss]\d+|Complete).+?(-(?P<group>.+?))?)"