🚧 Start to solve search mistake in HUDBT

Rhilip · Jun 9, 2018 · 68de146 · 68de146
1 parent 71bb846
commit 68de146
Show file tree

Hide file tree

Showing 6 changed files with 37 additions and 56 deletions.
diff --git a/extractors/base/site.py b/extractors/base/site.py
@@ -8,7 +8,6 @@
 
 import requests
 from bs4 import BeautifulSoup
-from html2bbcode.parser import HTML2BBCode
 
 import utils.descr as descr
 from utils.constants import Video_Containers
@@ -81,7 +80,7 @@ def online_check(self) -> bool:
         else:
             if self.suspended != 0:
                 Logger.info("The Site: {si} is Online now,after {count} times tries."
-                             "Will check the session soon.".format(si=self.url_host, count=self.suspended))
+                            "Will check the session soon.".format(si=self.url_host, count=self.suspended))
                 self.suspended = 0  # Set self.suspended as 0 first, then session_check()
                 self.session_check()
         return True if self.suspended == 0 else False
@@ -109,21 +108,11 @@ def _get_torrent(torrent):
             torrent = tc.get_torrent(torrent)
         return torrent
 
-    @staticmethod
-    def _descr_html2ubb(string: str) -> str:
-        """
-        Build-in function to make a string from html to bbcode
-
-        :param string: str
-        :return: str
-        """
-        return str(HTML2BBCode().feed(string))
-
     def _assist_delay(self):
         if self._ASSIST_ONLY:
             Logger.info("Autoseed-{mo} only allowed to assist."
-                         "it will sleep {sl} Seconds to wait the reseed site "
-                         "to have this torrent".format(mo=self.name, sl=self._ASSIST_DELAY_TIME))
+                        "it will sleep {sl} Seconds to wait the reseed site "
+                        "to have this torrent".format(mo=self.name, sl=self._ASSIST_DELAY_TIME))
             time.sleep(self._ASSIST_DELAY_TIME)
 
     def _get_torrent_ptn(self, torrent):

diff --git a/extractors/byrbt.py b/extractors/byrbt.py
@@ -153,7 +153,7 @@ def sort_title_info(raw_title, raw_type, raw_sec_type) -> dict:
     len_split = len(type_dict[raw_type]["split"])
     if len_split != len(raw_title_group):
         Logger.warning("The raw title \"{raw}\" may lack of tag (now: {no},ask: {co}),"
-                        "The split may wrong.".format(raw=raw_title, no=len(raw_title_group), co=len_split))
+                       "The split may wrong.".format(raw=raw_title, no=len(raw_title_group), co=len_split))
         while len_split > len(raw_title_group):
             raw_title_group.append("")
     raw_title_group.reverse()

diff --git a/extractors/hudbt.py b/extractors/hudbt.py
@@ -7,7 +7,8 @@
 from bs4 import BeautifulSoup
 
 from extractors.base.nexusphp import NexusPHP
-from utils.constants import ubb_clean, episode_eng2chs
+from utils.constants import ubb_clean, episode_eng2chs, html2ubb
+from utils.load.handler import rootLogger as Logger
 
 
 def title_clean(noext: str) -> str:
@@ -41,25 +42,25 @@ def torrent_clone(self, tid) -> dict:
         return_dict = {}
         details_bs = self.page_torrent_detail(tid=tid, bs=True)
 
-        return_dict["clone_id"] = tid
-
-        # 解析原种页面
-        return_dict["name"] = details_bs.find("h1", id="page-title").text  # 标题
-        return_dict["small_descr"] = details_bs.find("dt", text="副标题").next_sibling.text  # 副标题
-
-        # IMDb
-        imdb_another = details_bs.find("a", href=re.compile("http://www.imdb.com/title/tt"))
-        return_dict["url"] = imdb_another.text if imdb_another else ""
-
-        for key_dict, key_search in [("type", "cat"), ("standard_sel", "standard")]:  # 类型, 质量
-            temp_reg = re.compile("torrents.php\?{}=(\d+)".format(key_search))
-            temp_tag = details_bs.find("a", href=temp_reg)
-            return_dict[key_dict] = re.search(temp_reg, temp_tag["href"]).group(1)
-
-        # 简介
-        descr_html = str((details_bs.select("div#kdescr > div.bbcode") or "")[0])
-        descr_ubb = self._descr_html2ubb(descr_html)
-        return_dict["descr"] = ubb_clean(descr_ubb)
+        if re.search("没有该ID的种子", str(details_bs)):
+            Logger.error("Error,this torrent may not exist or ConnectError")
+        else:  # 解析原种页面
+            return_dict["clone_id"] = tid  # 传入引用种子号
+            return_dict["name"] = details_bs.find("h1", id="page-title").text  # 标题
+            return_dict["small_descr"] = details_bs.find("dt", text="副标题").next_sibling.text  # 副标题
+
+            imdb_another = details_bs.find("a", href=re.compile("http://www.imdb.com/title/tt"))
+            return_dict["url"] = imdb_another.text if imdb_another else ""  # IMDb
+
+            for key_dict, key_search in [("type", "cat"), ("standard_sel", "standard")]:  # 类型, 质量
+                temp_reg = re.compile("torrents.php\?{}=(\d+)".format(key_search))
+                temp_tag = details_bs.find("a", href=temp_reg)
+                return_dict[key_dict] = re.search(temp_reg, temp_tag["href"]).group(1)
+
+            # 简介
+            descr_html = str((details_bs.select("div#kdescr > div.bbcode") or "")[0])
+            descr_ubb = html2ubb(descr_html)
+            return_dict["descr"] = ubb_clean(descr_ubb)
 
         return return_dict
 

diff --git a/extractors/nwsuaf6.py b/extractors/nwsuaf6.py
@@ -142,7 +142,7 @@ def date_raw_update(self, torrent_name_search, raw_info: dict) -> dict:
         # TODO if len_split == 0:
         if len_split != len(raw_title_group):
             Logger.warning("The raw title \"{raw}\" may lack of tag (now: {no},ask: {co}),"
-                            "The split may wrong.".format(raw=raw_title, no=len(raw_title_group), co=len_split))
+                           "The split may wrong.".format(raw=raw_title, no=len(raw_title_group), co=len_split))
             while len_split > len(raw_title_group):
                 raw_title_group.append("")
         raw_title_group.reverse()

diff --git a/extractors/tjupt.py b/extractors/tjupt.py
@@ -102,30 +102,15 @@ def torrent_clone(self, tid):
 
     def date_raw_update(self, torrent_name_search, raw_info: dict) -> dict:
         # TODO Change info due to reseed torrent's name information
-        if int(raw_info["type"]) == 401:  # 电影
+        type_ = int(raw_info["type"])
+        if type_ == 401:  # 电影
             pass
-        elif int(raw_info["type"]) == 402:  # 剧集
+        elif type_ == 402:  # 剧集
             raw_info["ename"] = torrent_name_search.group("full_name")  # 英文名
             raw_info["tvseasoninfo"] = torrent_name_search.group("episode")  # 集数
-            raw_info["subsinfo"] = "1"  # 强制更新字幕情况为"暂无字幕"
-        elif int(raw_info["type"]) == 403:  # 综艺
-            pass
-        elif int(raw_info["type"]) == 404:  # 资料
-            pass
-        elif int(raw_info["type"]) == 405:  # 动漫
+            raw_info["subsinfo"] = 1  # 强制更新字幕情况为"暂无字幕"
+        elif type_ == 405:  # 动漫
             raw_info["animenum"] = torrent_name_search.group("episode")  # 动漫集数
-        elif int(raw_info["type"]) == 407:  # 体育
-            pass
-        elif int(raw_info["type"]) == 408:  # 软件
-            pass
-        elif int(raw_info["type"]) == 409:  # 游戏
-            pass
-        elif int(raw_info["type"]) == 410:  # 其他
-            pass
-        elif int(raw_info["type"]) == 411:  # 纪录片
-            pass
-        elif int(raw_info["type"]) == 412:  # 移动视频
-            pass
 
         return raw_info
 

diff --git a/utils/constants.py b/utils/constants.py
@@ -5,6 +5,8 @@
 import re
 import time
 
+from html2bbcode.parser import HTML2BBCode
+
 Support_Site = [
     # The tuple is like (config_dict_name in setting, Package name, Class name)
     ("site_byrbt", "extractors.byrbt", "Byrbt"),
@@ -48,3 +50,7 @@ def episode_eng2chs(ep: str) -> str:
     if season_episode_info_search.group("episode"):
         season_episode_info += " 第{e}集".format(e=season_episode_info_search.group("episode"))
     return season_episode_info
+
+
+def html2ubb(html: str) -> str:
+    return str(HTML2BBCode().feed(html))