From 632a6a13ee572d9e07b7317edd485927dd09a668 Mon Sep 17 00:00:00 2001 From: Lim Nicolas Date: Tue, 22 Jan 2019 21:20:13 +0100 Subject: [PATCH] Crunchyroll fix #74 --- anime_dl/sites/crunchyroll.py | 47 ++++++++++++++++++++++------------- 1 file changed, 30 insertions(+), 17 deletions(-) diff --git a/anime_dl/sites/crunchyroll.py b/anime_dl/sites/crunchyroll.py index 93211bb..96c1902 100644 --- a/anime_dl/sites/crunchyroll.py +++ b/anime_dl/sites/crunchyroll.py @@ -23,7 +23,9 @@ '''This code Stinx. I'll write a better, faster and compact code when I get time after my exams or in mid. I literally have NO idea what I was thinking when I wrote this piece of code. -THIS REALLY STINX! Read the code at your own risk. +THIS REALLY STINX! +Also, some strangers went here and wrote some more code that REALLY REALLY STINX. +Read the code at your own risk. ''' @@ -37,24 +39,34 @@ def __init__(self, url, password, username, resolution, language, skipper, logge logging.basicConfig(format='%(levelname)s: %(message)s', filename="Error Log.log", level=logging.DEBUG, encoding="utf-8") - Crunchy_Show_regex = r'https?://(?:(?Pwww|m)\.)?(?Pcrunchyroll\.com/(?!(?:news|anime-news|library|forum|launchcalendar|lineup|store|comics|freetrial|login))(?P[\w\-]+))/?(?:\?|$)' - Crunchy_Video_regex = r'https?:\/\/(?:(?Pwww|m)\.)?(?Pcrunchyroll\.(?:com|fr)/(?:media(?:-|/\?id=)|[^/]*/[^/?&]*?)(?P[0-9]+))(?:[/?&]|$)' + # Extract the language from the input URL + Crunchy_Language = re.search(r'.+/([a-z]{2})/.+', url) + if not Crunchy_Language: + print("Could not extract the language from the URL") + return + + Crunchy_Language = Crunchy_Language.group(1) + + + Crunchy_Show_regex = r'https?://(?:(?Pwww|m)\.)?(?Pcrunchyroll\.com/[a-z]{2}/(?!(?:news|anime-news|library|forum|launchcalendar|lineup|store|comics|freetrial|login))(?P[\w\-]+))/?(?:\?|$)' + Crunchy_Video_regex = r'https?:\/\/(?:(?Pwww|m)\.)?(?Pcrunchyroll\.(?:com|fr)/[a-z]{2}/(?:media(?:-|/\?id=)|[^/]*/[^/?&]*?)(?P[0-9]+))(?:[/?&]|$)' Crunchy_Show = re.match(Crunchy_Show_regex, url) Crunchy_Video = re.match(Crunchy_Video_regex, url) if Crunchy_Video: - cookies, Token = self.webpagedownloader(url=url, username=username[0], password=password[0]) + cookies, Token = self.webpagedownloader(url=url, username=username[0], password=password[0], country=Crunchy_Language) if skipper == "yes": self.onlySubs(url=url, cookies=cookies) else: self.singleEpisode( url=url, cookies=cookies, token=Token, resolution=resolution) elif Crunchy_Show: - - cookies, Token = self.webpagedownloader(url=url, username=username[0], password=password[0]) + cookies, Token = self.webpagedownloader(url=url, username=username[0], password=password[0], country=Crunchy_Language) self.wholeShow(url=url, cookie=cookies, token=Token, language=language, resolution=resolution, skipper=skipper, episode_range=episode_range) + else: + print("URL does not look like a show or a video, stopping.") def login_check(self, htmlsource): # Open the page and check the title. CrunchyRoll redirects the user and the title has the text "Redirecting...". @@ -68,20 +80,20 @@ def login_check(self, htmlsource): # return False return True - def webpagedownloader(self, url, username, password): + def webpagedownloader(self, url, username, password, country='fr'): headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36', 'Referer': - 'https://www.crunchyroll.com/login' + 'https://www.crunchyroll.com/' + country + '/login' } sess = requests.session() sess = cfscrape.create_scraper(sess) print("Trying to login...") - initial_page_fetch = sess.get(url='https://www.crunchyroll.com/login', headers=headers) + initial_page_fetch = sess.get(url='https://www.crunchyroll.com/' + country + '/login', headers=headers) if initial_page_fetch.status_code == 200: initial_page_source = initial_page_fetch.text.encode("utf-8") @@ -97,7 +109,7 @@ def webpagedownloader(self, url, username, password): } login_post = sess.post( - url='https://www.crunchyroll.com/login', + url='https://www.crunchyroll.com/' + country + '/login', data=payload, headers=headers, cookies=initial_cookies) @@ -184,13 +196,13 @@ def singleEpisode(self, url, cookies, token, resolution): xml_page = xml_page_connect.text.encode("utf-8") try: - m3u8_file_link = str(re.search(r'(.*?)', xml_page).group(1)).replace("&", "&") + m3u8_file_link = str(re.search(r'(.*?)', xml_page.decode("utf-8")).group(1)).replace("&", "&") logging.debug("m3u8_file_link : %s", m3u8_file_link) if not m3u8_file_link: # If no m3u8 found, try the rtmpdump... try: - host_link = re.search(r'(.*?)', xml_page).group(1) + host_link = re.search(r'(.*?)', xml_page.decode("utf-8")).group(1) logging.debug("Found RTMP DUMP!") print("RTMP streams not supported currently...") except Exception as NoRtmpDump: @@ -198,7 +210,7 @@ def singleEpisode(self, url, cookies, token, resolution): print(NoRtmpDump) else: anime_name = re.sub(r'[^A-Za-z0-9\ \-\' \\]+', '', str( - re.search(r'(.*?)', xml_page).group(1))).title().strip() + re.search(r'(.*?)', xml_page.decode("utf-8")).group(1))).title().strip() episode_number = re.search(r'(.*?)', xml_page.decode("utf-8")).group(1) video_width = re.search(r'(.*?)', xml_page.decode("utf-8")).group(1) @@ -229,7 +241,7 @@ def singleEpisode(self, url, cookies, token, resolution): pass else: self.subFetcher( - xml=str(xml_page), + xml=str(xml_page.decode("utf-8")), episode_number=episode_number, file_name=file_name) @@ -454,6 +466,7 @@ def subFetcher(self, xml, episode_number, file_name): 'https://www.crunchyroll.com' } + sess = requests.session() sess = cfscrape.create_scraper(sess) for sub_id, sub_lang, sub_lang2 in re.findall( @@ -514,9 +527,9 @@ def onlySubs(self, url, cookies): video_id, url) xml_page = sess.get(url=infoURL, headers=headers, cookies=cookies).text.encode("utf-8") - # anime_name = re.search(r'(.*?)', xml_page).group(1) + # anime_name = re.search(r'(.*?)', xml_page.decode("utf-8")).group(1) anime_name = re.sub(r'[^A-Za-z0-9\ \-\' \\]+', '', - str(re.search(r'(.*?)', xml_page).group(1))).title().strip() + str(re.search(r'(.*?)', xml_page.decode("utf-8")).group(1))).title().strip() episode_number = re.search(r'(.*?)', xml_page.decode("utf-8")).group(1) video_width = re.search(r'(.*?)', xml_page.decode("utf-8")).group(1) @@ -535,7 +548,7 @@ def onlySubs(self, url, cookies): if not os.path.exists(output_directory): os.makedirs(output_directory) - self.subFetcher(xml=xml_page, episode_number=episode_number, file_name=file_name) + self.subFetcher(xml=xml_page.decode('utf-8'), episode_number=episode_number, file_name=file_name) for sub_file in glob("*.ass"): try: