Skip to content

Commit

Permalink
🚧 Fix OurBits
Browse files Browse the repository at this point in the history
1. HTTP Error 404 when download torrent
2. High permissions to visit Torrent info page
3. Quota can't remove
4. Muti Line Break - "\n"
5. Torrent Base info lost
  • Loading branch information
Rhilip committed Aug 2, 2018
1 parent 3e905e8 commit c0dde84
Show file tree
Hide file tree
Showing 3 changed files with 34 additions and 5 deletions.
30 changes: 26 additions & 4 deletions extractors/ourbits.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
import re
from html import unescape

import requests

from extractors.base.nexusphp import NexusPHP
from utils.constants import ubb_clean, episode_eng2chs, html2ubb, title_clean
from utils.load.handler import rootLogger as Logger
Expand Down Expand Up @@ -49,27 +51,47 @@ class OurBits(NexusPHP):
url_host = "https://ourbits.club"
db_column = "ourbits.club"

def torrent_link(self, tid):
torrent_link = self.url_host + "/download.php?id={tid}&passkey={pk}".format(tid=tid, pk=self.passkey)
tmp_file = "/tmp/[TJUPT].{}.torrent".format(tid)
with open(tmp_file, "wb") as torrent:
r = requests.get(torrent_link)
torrent.write(r.content)
return tmp_file

def exist_torrent_title(self, tag):
torrent_page = self.page_torrent_detail(tid=tag, bs=True)
torrent_title = re.search("\[OurBits\]\.(?P<name>.+?)\.torrent", torrent_page.text).group("name")
Logger.info("The torrent name for id({id}) is \"{name}\"".format(id=tag, name=torrent_title))
return torrent_title

def torrent_clone(self, tid) -> dict:
return_dict = {}
details_bs = self.page_torrent_detail(tid=tid, bs=True)
title_search = re.search("种子详情 \"(?P<title>.*)\" - Powered", str(details_bs.title))
if title_search:
body = details_bs.body
return_dict["clone_id"] = tid
return_dict["name"] = unescape(title_search.group("title")) or ""

body = details_bs.body
for pat, type_ in [("://movie.douban.com/subject", "dburl"), ("://www.imdb.com/title/tt", "url")]:
a_another = body.find("a", href=re.compile(pat))
return_dict[type_] = a_another.get_text() if a_another else ""

descr_html = str(details_bs.find("div", id="kdescr"))
return_dict["descr"] = ubb_clean(html2ubb(descr_html)) or ""
# Remove Quota First
kdescr = details_bs.find("div", id="kdescr")
kdescr_quota = kdescr.findAll("fieldset")
for tag in kdescr_quota:
tag.extract()

return_dict["descr"] = ubb_clean(html2ubb(str(kdescr))) or ""

def detail_fetch(text):
return details_bs.find("td", text=text).next_sibling.get_text(" ", strip=True)

return_dict["small_descr"] = detail_fetch("副标题") or ""

info_gp = re.findall("([^:]+?[::].+?) ", re.sub("大小.+?([TGMk]?B) ", "", detail_fetch("基本信息")))
info_gp = re.findall("([^:]+?[::].+?) ", re.sub("大小.+?([TGMk]?B) ", "", detail_fetch("基本信息") + " "))
for info in info_gp:
info_pat = re.search("([^::]+?)[:: ]+(.+)", info)
if info_pat:
Expand Down
4 changes: 3 additions & 1 deletion utils/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,4 +65,6 @@ def episode_eng2chs(ep: str) -> str:


def html2ubb(html: str) -> str:
return str(HTML2BBCode().feed(html))
ret = str(HTML2BBCode().feed(html))
ret = re.sub("\n\n", "\n", ret)
return ret
5 changes: 5 additions & 0 deletions utils/pattern.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,11 @@

# Search_pattern
pattern_group = [
re.compile( # Series (Which name match with 0day Source,see https://scenerules.org/t.html?id=tvx2642k16.nfo 16.4)
"\.?(?P<full_name>(?P<search_name>[\w\-. ]+?)[. ]"
"(?P<episode>([Ss]\d+)?[Ee][Pp]?\d+(-[Ee]?[Pp]?\d+)?|[Ss]\d+|Complete).+?WEB-DL.+?(-(?P<group>.+?))?)"
"(\.(?P<filetype>\w+)$|$)"
),
re.compile( # Series (Which name match with 0day Source,see https://scenerules.org/t.html?id=tvx2642k16.nfo 16.4)
"\.?(?P<full_name>(?P<search_name>[\w\-. ]+?)[. ]"
"(?P<episode>([Ss]\d+)?[Ee][Pp]?\d+(-[Ee]?[Pp]?\d+)?|[Ss]\d+|Complete).+?(-(?P<group>.+?))?)"
Expand Down

0 comments on commit c0dde84

Please sign in to comment.