Skip to content

Commit

Permalink
🚧 Start to solve search mistake in HUDBT
Browse files Browse the repository at this point in the history
  • Loading branch information
Rhilip committed Jun 9, 2018
1 parent 71bb846 commit 68de146
Show file tree
Hide file tree
Showing 6 changed files with 37 additions and 56 deletions.
17 changes: 3 additions & 14 deletions extractors/base/site.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@

import requests
from bs4 import BeautifulSoup
from html2bbcode.parser import HTML2BBCode

import utils.descr as descr
from utils.constants import Video_Containers
Expand Down Expand Up @@ -81,7 +80,7 @@ def online_check(self) -> bool:
else:
if self.suspended != 0:
Logger.info("The Site: {si} is Online now,after {count} times tries."
"Will check the session soon.".format(si=self.url_host, count=self.suspended))
"Will check the session soon.".format(si=self.url_host, count=self.suspended))
self.suspended = 0 # Set self.suspended as 0 first, then session_check()
self.session_check()
return True if self.suspended == 0 else False
Expand Down Expand Up @@ -109,21 +108,11 @@ def _get_torrent(torrent):
torrent = tc.get_torrent(torrent)
return torrent

@staticmethod
def _descr_html2ubb(string: str) -> str:
"""
Build-in function to make a string from html to bbcode
:param string: str
:return: str
"""
return str(HTML2BBCode().feed(string))

def _assist_delay(self):
if self._ASSIST_ONLY:
Logger.info("Autoseed-{mo} only allowed to assist."
"it will sleep {sl} Seconds to wait the reseed site "
"to have this torrent".format(mo=self.name, sl=self._ASSIST_DELAY_TIME))
"it will sleep {sl} Seconds to wait the reseed site "
"to have this torrent".format(mo=self.name, sl=self._ASSIST_DELAY_TIME))
time.sleep(self._ASSIST_DELAY_TIME)

def _get_torrent_ptn(self, torrent):
Expand Down
2 changes: 1 addition & 1 deletion extractors/byrbt.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,7 @@ def sort_title_info(raw_title, raw_type, raw_sec_type) -> dict:
len_split = len(type_dict[raw_type]["split"])
if len_split != len(raw_title_group):
Logger.warning("The raw title \"{raw}\" may lack of tag (now: {no},ask: {co}),"
"The split may wrong.".format(raw=raw_title, no=len(raw_title_group), co=len_split))
"The split may wrong.".format(raw=raw_title, no=len(raw_title_group), co=len_split))
while len_split > len(raw_title_group):
raw_title_group.append("")
raw_title_group.reverse()
Expand Down
41 changes: 21 additions & 20 deletions extractors/hudbt.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@
from bs4 import BeautifulSoup

from extractors.base.nexusphp import NexusPHP
from utils.constants import ubb_clean, episode_eng2chs
from utils.constants import ubb_clean, episode_eng2chs, html2ubb
from utils.load.handler import rootLogger as Logger


def title_clean(noext: str) -> str:
Expand Down Expand Up @@ -41,25 +42,25 @@ def torrent_clone(self, tid) -> dict:
return_dict = {}
details_bs = self.page_torrent_detail(tid=tid, bs=True)

return_dict["clone_id"] = tid

# 解析原种页面
return_dict["name"] = details_bs.find("h1", id="page-title").text # 标题
return_dict["small_descr"] = details_bs.find("dt", text="副标题").next_sibling.text # 副标题

# IMDb
imdb_another = details_bs.find("a", href=re.compile("http://www.imdb.com/title/tt"))
return_dict["url"] = imdb_another.text if imdb_another else ""

for key_dict, key_search in [("type", "cat"), ("standard_sel", "standard")]: # 类型, 质量
temp_reg = re.compile("torrents.php\?{}=(\d+)".format(key_search))
temp_tag = details_bs.find("a", href=temp_reg)
return_dict[key_dict] = re.search(temp_reg, temp_tag["href"]).group(1)

# 简介
descr_html = str((details_bs.select("div#kdescr > div.bbcode") or "")[0])
descr_ubb = self._descr_html2ubb(descr_html)
return_dict["descr"] = ubb_clean(descr_ubb)
if re.search("没有该ID的种子", str(details_bs)):
Logger.error("Error,this torrent may not exist or ConnectError")
else: # 解析原种页面
return_dict["clone_id"] = tid # 传入引用种子号
return_dict["name"] = details_bs.find("h1", id="page-title").text # 标题
return_dict["small_descr"] = details_bs.find("dt", text="副标题").next_sibling.text # 副标题

imdb_another = details_bs.find("a", href=re.compile("http://www.imdb.com/title/tt"))
return_dict["url"] = imdb_another.text if imdb_another else "" # IMDb

for key_dict, key_search in [("type", "cat"), ("standard_sel", "standard")]: # 类型, 质量
temp_reg = re.compile("torrents.php\?{}=(\d+)".format(key_search))
temp_tag = details_bs.find("a", href=temp_reg)
return_dict[key_dict] = re.search(temp_reg, temp_tag["href"]).group(1)

# 简介
descr_html = str((details_bs.select("div#kdescr > div.bbcode") or "")[0])
descr_ubb = html2ubb(descr_html)
return_dict["descr"] = ubb_clean(descr_ubb)

return return_dict

Expand Down
2 changes: 1 addition & 1 deletion extractors/nwsuaf6.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,7 @@ def date_raw_update(self, torrent_name_search, raw_info: dict) -> dict:
# TODO if len_split == 0:
if len_split != len(raw_title_group):
Logger.warning("The raw title \"{raw}\" may lack of tag (now: {no},ask: {co}),"
"The split may wrong.".format(raw=raw_title, no=len(raw_title_group), co=len_split))
"The split may wrong.".format(raw=raw_title, no=len(raw_title_group), co=len_split))
while len_split > len(raw_title_group):
raw_title_group.append("")
raw_title_group.reverse()
Expand Down
25 changes: 5 additions & 20 deletions extractors/tjupt.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,30 +102,15 @@ def torrent_clone(self, tid):

def date_raw_update(self, torrent_name_search, raw_info: dict) -> dict:
# TODO Change info due to reseed torrent's name information
if int(raw_info["type"]) == 401: # 电影
type_ = int(raw_info["type"])
if type_ == 401: # 电影
pass
elif int(raw_info["type"]) == 402: # 剧集
elif type_ == 402: # 剧集
raw_info["ename"] = torrent_name_search.group("full_name") # 英文名
raw_info["tvseasoninfo"] = torrent_name_search.group("episode") # 集数
raw_info["subsinfo"] = "1" # 强制更新字幕情况为"暂无字幕"
elif int(raw_info["type"]) == 403: # 综艺
pass
elif int(raw_info["type"]) == 404: # 资料
pass
elif int(raw_info["type"]) == 405: # 动漫
raw_info["subsinfo"] = 1 # 强制更新字幕情况为"暂无字幕"
elif type_ == 405: # 动漫
raw_info["animenum"] = torrent_name_search.group("episode") # 动漫集数
elif int(raw_info["type"]) == 407: # 体育
pass
elif int(raw_info["type"]) == 408: # 软件
pass
elif int(raw_info["type"]) == 409: # 游戏
pass
elif int(raw_info["type"]) == 410: # 其他
pass
elif int(raw_info["type"]) == 411: # 纪录片
pass
elif int(raw_info["type"]) == 412: # 移动视频
pass

return raw_info

Expand Down
6 changes: 6 additions & 0 deletions utils/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
import re
import time

from html2bbcode.parser import HTML2BBCode

Support_Site = [
# The tuple is like (config_dict_name in setting, Package name, Class name)
("site_byrbt", "extractors.byrbt", "Byrbt"),
Expand Down Expand Up @@ -48,3 +50,7 @@ def episode_eng2chs(ep: str) -> str:
if season_episode_info_search.group("episode"):
season_episode_info += " 第{e}集".format(e=season_episode_info_search.group("episode"))
return season_episode_info


def html2ubb(html: str) -> str:
return str(HTML2BBCode().feed(html))

0 comments on commit 68de146

Please sign in to comment.