diff --git a/src/bbcode.py b/src/bbcode.py index 2821a7a03..2a512fe13 100644 --- a/src/bbcode.py +++ b/src/bbcode.py @@ -1,6 +1,7 @@ import re import html import urllib.parse +from src.console import console # Bold - KEEP # Italic - KEEP @@ -36,18 +37,22 @@ def __init__(self): pass def clean_ptp_description(self, desc, is_disc): + console.print(f"[yellow]Cleaning PTP description...") + # Convert Bullet Points to - desc = desc.replace("•", "-") # Unescape html desc = html.unescape(desc) - # End my suffering desc = desc.replace('\r\n', '\n') + # Debugging print + console.print(f"[yellow]Description after unescaping HTML:\n{desc[:500]}...") + # Remove url tags with PTP/HDB links url_tags = re.findall(r"(\[url[\=\]]https?:\/\/passthepopcorn\.m[^\]]+)([^\[]+)(\[\/url\])?", desc, flags=re.IGNORECASE) - url_tags = url_tags + re.findall(r"(\[url[\=\]]https?:\/\/hdbits\.o[^\]]+)([^\[]+)(\[\/url\])?", desc, flags=re.IGNORECASE) - if url_tags != []: + url_tags += re.findall(r"(\[url[\=\]]https?:\/\/hdbits\.o[^\]]+)([^\[]+)(\[\/url\])?", desc, flags=re.IGNORECASE) + if url_tags: for url_tag in url_tags: url_tag = ''.join(url_tag) url_tag_removed = re.sub(r"(\[url[\=\]]https?:\/\/passthepopcorn\.m[^\]]+])", "", url_tag, flags=re.IGNORECASE) @@ -55,13 +60,16 @@ def clean_ptp_description(self, desc, is_disc): url_tag_removed = url_tag_removed.replace("[/url]", "") desc = desc.replace(url_tag, url_tag_removed) - # Remove links to PTP + # Debugging print + console.print(f"[yellow]Description after removing URL tags:\n{desc[:500]}...") + + # Remove links to PTP/HDB desc = desc.replace('http://passthepopcorn.me', 'PTP').replace('https://passthepopcorn.me', 'PTP') desc = desc.replace('http://hdbits.org', 'HDB').replace('https://hdbits.org', 'HDB') # Remove Mediainfo Tags / Attempt to regex out mediainfo - mediainfo_tags = re.findall(r"\[mediainfo\][\s\S]*?\[\/mediainfo\]", desc) - if len(mediainfo_tags) >= 1: + mediainfo_tags = re.findall(r"\[mediainfo\][\s\S]*?\[\/mediainfo\]", desc) + if mediainfo_tags: desc = re.sub(r"\[mediainfo\][\s\S]*?\[\/mediainfo\]", "", desc) elif is_disc != "BDMV": desc = re.sub(r"(^general\nunique)(.*?)^$", "", desc, flags=re.MULTILINE | re.IGNORECASE | re.DOTALL) @@ -70,7 +78,10 @@ def clean_ptp_description(self, desc, is_disc): desc = re.sub(r"(^(video|audio|text)( #\d+)?\nid)(.*?)^$", "", desc, flags=re.MULTILINE | re.IGNORECASE | re.DOTALL) desc = re.sub(r"(^(menu)( #\d+)?\n)(.*?)^$", "", f"{desc}\n\n", flags=re.MULTILINE | re.IGNORECASE | re.DOTALL) elif any(x in is_disc for x in ["BDMV", "DVD"]): - return "" + return "", [] + + # Debugging print + console.print(f"[yellow]Description after removing mediainfo tags:\n{desc[:500]}...") # Convert Quote tags: desc = re.sub(r"\[quote.*?\]", "[code]", desc) @@ -102,7 +113,8 @@ def clean_ptp_description(self, desc, is_disc): for each in remove_list: desc = desc.replace(each, '') - # Catch Stray Images + # Catch Stray Images and Prepare Image List + imagelist = [] comps = re.findall(r"\[comparison=[\s\S]*?\[\/comparison\]", desc) hides = re.findall(r"\[hide[\s\S]*?\[\/hide\]", desc) comps.extend(hides) @@ -110,24 +122,33 @@ def clean_ptp_description(self, desc, is_disc): comp_placeholders = [] # Replace comparison/hide tags with placeholder because sometimes uploaders use comp images as loose images - for i in range(len(comps)): - nocomp = nocomp.replace(comps[i], '') - desc = desc.replace(comps[i], f"COMPARISON_PLACEHOLDER-{i} ") - comp_placeholders.append(comps[i]) + for i, comp in enumerate(comps): + nocomp = nocomp.replace(comp, '') + desc = desc.replace(comp, f"COMPARISON_PLACEHOLDER-{i} ") + comp_placeholders.append(comp) + + # Debugging print + console.print(f"[yellow]Description after processing comparisons and hides:\n{desc[:500]}...") # Remove Images in IMG tags: desc = re.sub(r"\[img\][\s\S]*?\[\/img\]", "", desc, flags=re.IGNORECASE) desc = re.sub(r"\[img=[\s\S]*?\]", "", desc, flags=re.IGNORECASE) - # Replace Images + + # Extract loose images and add to imagelist loose_images = re.findall(r"(https?:\/\/.*\.(?:png|jpg))", nocomp, flags=re.IGNORECASE) - if len(loose_images) >= 1: + if loose_images: + imagelist.extend(loose_images) + console.print(f"[yellow]Loose images found: {len(loose_images)}") for image in loose_images: desc = desc.replace(image, '') + + # Debugging print + console.print(f"[yellow]Final description after removing loose images:\n{desc[:500]}...") + # Re-place comparisons - if comp_placeholders != []: - for i, comp in enumerate(comp_placeholders): - comp = re.sub(r"\[\/?img[\s\S]*?\]", "", comp, flags=re.IGNORECASE) - desc = desc.replace(f"COMPARISON_PLACEHOLDER-{i} ", comp) + for i, comp in enumerate(comp_placeholders): + comp = re.sub(r"\[\/?img[\s\S]*?\]", "", comp, flags=re.IGNORECASE) + desc = desc.replace(f"COMPARISON_PLACEHOLDER-{i} ", comp) # Convert hides with multiple images to comparison desc = self.convert_collapse_to_comparison(desc, "hide", hides) @@ -139,9 +160,12 @@ def clean_ptp_description(self, desc, is_disc): desc = desc.replace('\n', '', 1) desc = desc.strip('\n') - if desc.replace('\n', '') == '': - return "" - return desc + if desc.replace('\n', '').strip() == '': + console.print(f"[yellow]Description is empty after cleaning.") + return "", imagelist + + console.print(f"[green]Returning cleaned description and {len(imagelist)} images.") + return desc, imagelist def clean_unit3d_description(self, desc, site): # Unescape html diff --git a/src/prep.py b/src/prep.py index 8c22bcce0..fbb177a9b 100644 --- a/src/prep.py +++ b/src/prep.py @@ -68,15 +68,21 @@ def __init__(self, screens, img_host, config): async def prompt_user_for_id_selection(self, blu_tmdb=None, blu_imdb=None, blu_tvdb=None, blu_filename=None, imdb=None): if imdb: imdb = str(imdb).zfill(7) # Convert to string and ensure IMDb ID is 7 characters long by adding leading zeros - console.print(f"[cyan]Found IMDb ID: https://www.imdb.com/title/tt{imdb}[/cyan]") + console.print(f"[cyan]Found IMDb ID: https://www.imdb.com/title/tt{imdb}") if blu_tmdb or blu_imdb or blu_tvdb: if blu_imdb: blu_imdb = str(blu_imdb).zfill(7) # Convert to string and ensure IMDb ID is 7 characters long by adding leading zeros - console.print("[cyan]Found the following IDs on BLU:[/cyan]") + console.print("[cyan]Found the following IDs on BLU:") console.print(f"TMDb ID: {blu_tmdb}") console.print(f"IMDb ID: https://www.imdb.com/title/tt{blu_imdb}") console.print(f"TVDb ID: {blu_tvdb}") console.print(f"Filename: {blu_filename}") + + if blu_imdb: # Assuming blu_imagelist would be linked with this function + if meta.get('image_list'): + console.print("[cyan]Found the following images:") + for img in meta['image_list']: + console.print(f"[blue]{img}") selection = input("Do you want to use this ID? (y/n): ").strip().lower() return selection == 'y' @@ -92,13 +98,12 @@ async def update_metadata_from_tracker(self, tracker_name, tracker_instance, met if meta.get(tracker_key) is not None: meta[manual_key] = meta[tracker_key] - console.print(f"[cyan]{tracker_name} ID found in meta, reusing existing ID: {meta[tracker_key]}[/cyan]") + console.print(f"[cyan]{tracker_name} ID found in meta, reusing existing ID: {meta[tracker_key]}") if tracker_name == "BLU": blu_tmdb, blu_imdb, blu_tvdb, blu_mal, blu_desc, blu_category, meta['ext_torrenthash'], blu_imagelist, blu_filename = await COMMON(self.config).unit3d_torrent_info("BLU", tracker_instance.torrent_url, tracker_instance.search_url, id=meta[tracker_key]) if blu_tmdb not in [None, '0'] or blu_imdb not in [None, '0'] or blu_tvdb not in [None, '0']: - console.print(f"[green]Valid data found on {tracker_name}, setting meta values[/green]") + console.print(f"[green]Valid data found on {tracker_name}, setting meta values") if await self.prompt_user_for_id_selection(blu_tmdb, blu_imdb, blu_tvdb, blu_filename): - # Setting metadata based on found IDs if blu_tmdb not in [None, '0']: meta['tmdb_manual'] = blu_tmdb if blu_imdb not in [None, '0']: @@ -115,27 +120,79 @@ async def update_metadata_from_tracker(self, tracker_name, tracker_instance, met meta['image_list'] = blu_imagelist if blu_filename: meta['blu_filename'] = blu_filename # Store the filename in meta for later use - found_match = True # Set flag if any relevant data is found + found_match = True else: - console.print(f"[yellow]User skipped the found ID on {tracker_name}, moving to the next site.[/yellow]") + console.print(f"[yellow]User skipped the found ID on {tracker_name}, moving to the next site.") await self.handle_image_list(meta, tracker_name) - return meta, found_match # Return immediately to skip the current site + return meta, found_match else: - console.print(f"[yellow]No valid data found on {tracker_name}[/yellow]") + console.print(f"[yellow]No valid data found on {tracker_name}") + elif tracker_name == "PTP": + ptp_desc, ptp_imagelist = await tracker_instance.get_ptp_description(meta['ptp'], meta['is_disc']) + if ptp_desc.replace('\r\n', '').replace('\n', '').strip() != "": + meta['description'] = ptp_desc + meta['image_list'] = ptp_imagelist + console.print(f"[green]PTP description and images added to metadata.") + + # Print images before asking for confirmation + if ptp_imagelist: + console.print("[cyan]Found the following images:") + for img in ptp_imagelist: + console.print(f"[blue]{img}") + + if await self.prompt_user_for_confirmation("Do you want to keep the description and images from PTP?"): + found_match = True + else: + console.print(f"[yellow]Description and images discarded from PTP") + meta['description'] = None + meta['image_list'] = [] + return meta, found_match + else: + console.print(f"[yellow]No valid data found on {tracker_name}") else: meta['imdb'], meta['ext_torrenthash'] = await tracker_instance.get_imdb_from_torrent_id(meta[tracker_key]) if meta['imdb']: - meta['imdb'] = str(meta['imdb']).zfill(7) # Pad IMDb ID with leading zeros + meta['imdb'] = str(meta['imdb']).zfill(7) if await self.prompt_user_for_id_selection(imdb=meta['imdb']): - console.print(f"[green]{tracker_name} IMDb ID found: {meta['imdb']}[/green]") + console.print(f"[green]{tracker_name} IMDb ID found: {meta['imdb']}") found_match = True + + if tracker_name == "PTP": + imdb, ptp_torrent_id, meta['ext_torrenthash'] = await tracker_instance.get_ptp_id_imdb(search_term, search_file_folder) + if ptp_torrent_id: + meta['ptp'] = ptp_torrent_id # Store ptp_torrent_id in meta + if imdb: + imdb = str(imdb).zfill(7) + if await self.prompt_user_for_id_selection(imdb=imdb): + console.print(f"[green]{tracker_name} IMDb ID found: {imdb}") + meta['imdb'] = imdb + found_match = True + + ptp_desc, ptp_imagelist = await tracker_instance.get_ptp_description(meta['ptp'], meta['is_disc']) + if ptp_desc.replace('\r\n', '').replace('\n', '').strip() != "": + meta['description'] = ptp_desc + meta['image_list'] = ptp_imagelist + console.print(f"[green]PTP description and images added to metadata.") + + if ptp_imagelist: + console.print("[cyan]Found the following images:") + for img in ptp_imagelist: + console.print(f"[blue]{img}") + + if await self.prompt_user_for_confirmation("Do you want to keep the description and images from PTP?"): + found_match = True + else: + console.print(f"[yellow]Description and images discarded from PTP") + meta['description'] = None + meta['image_list'] = [] + return meta, found_match else: - console.print(f"[yellow]User skipped the found IMDb ID on {tracker_name}, moving to the next site.[/yellow]") - return meta, found_match # Return immediately to skip the current site + console.print(f"[yellow]User skipped the found IMDb ID on {tracker_name}, moving to the next site.") + return meta, found_match else: - console.print(f"[yellow]No IMDb ID found on {tracker_name}[/yellow]") + console.print(f"[yellow]No IMDb ID found on {tracker_name}") else: - imdb, tracker_id = None, None # Initialize variables + imdb, tracker_id = None, None if tracker_name == "PTP": imdb, tracker_id, meta['ext_torrenthash'] = await tracker_instance.get_ptp_id_imdb(search_term, search_file_folder) elif tracker_name == "HDB": @@ -145,13 +202,12 @@ async def update_metadata_from_tracker(self, tracker_name, tracker_instance, met elif tracker_name == "BLU": blu_tmdb, blu_imdb, blu_tvdb, blu_mal, blu_desc, blu_category, meta['ext_torrenthash'], blu_imagelist, blu_filename = await COMMON(self.config).unit3d_torrent_info("BLU", tracker_instance.torrent_url, tracker_instance.search_url, file_name=search_term) if blu_tmdb not in [None, '0'] or blu_imdb not in [None, '0'] or blu_tvdb not in [None, '0']: - console.print(f"[green]Valid data found on {tracker_name} using file name, setting meta values[/green]") + console.print(f"[green]Valid data found on {tracker_name} using file name, setting meta values") if await self.prompt_user_for_id_selection(blu_tmdb, blu_imdb, blu_tvdb, blu_filename): - # Setting metadata based on found IDs if blu_tmdb not in [None, '0']: meta['tmdb_manual'] = blu_tmdb if blu_imdb not in [None, '0']: - meta['imdb'] = str(blu_imdb).zfill(7) # Pad IMDb ID with leading zeros + meta['imdb'] = str(blu_imdb).zfill(7) if blu_tvdb not in [None, '0']: meta['tvdb_id'] = blu_tvdb if blu_mal not in [None, '0']: @@ -163,26 +219,56 @@ async def update_metadata_from_tracker(self, tracker_name, tracker_instance, met if meta.get('image_list', []) == []: meta['image_list'] = blu_imagelist if blu_filename: - meta['blu_filename'] = blu_filename # Store the filename in meta for later use - found_match = True # Set flag if any relevant data is found + meta['blu_filename'] = blu_filename + found_match = True else: - console.print(f"[yellow]User skipped the found ID on {tracker_name}, moving to the next site.[/yellow]") + console.print(f"[yellow]User skipped the found ID on {tracker_name}, moving to the next site.") await self.handle_image_list(meta, tracker_name) - return meta, found_match # Return immediately to skip the current site + return meta, found_match else: - console.print(f"[yellow]No valid data found on {tracker_name}[/yellow]") + console.print(f"[yellow]No valid data found on {tracker_name}") else: imdb = tracker_id = None if imdb: - imdb = str(imdb).zfill(7) # Pad IMDb ID with leading zeros + imdb = str(imdb).zfill(7) if await self.prompt_user_for_id_selection(imdb=imdb): - console.print(f"[green]{tracker_name} IMDb ID found: {imdb}[/green]") + console.print(f"[green]{tracker_name} IMDb ID found: {imdb}") meta['imdb'] = imdb found_match = True + + if tracker_name == "PTP": + imdb, ptp_torrent_id, meta['ext_torrenthash'] = await tracker_instance.get_ptp_id_imdb(search_term, search_file_folder) + if ptp_torrent_id: + meta['ptp'] = ptp_torrent_id # Store ptp_torrent_id in meta + if imdb: + imdb = str(imdb).zfill(7) + if await self.prompt_user_for_id_selection(imdb=imdb): + console.print(f"[green]{tracker_name} IMDb ID found: {imdb}") + meta['imdb'] = imdb + found_match = True + + ptp_desc, ptp_imagelist = await tracker_instance.get_ptp_description(meta['ptp'], meta['is_disc']) + if ptp_desc.replace('\r\n', '').replace('\n', '').strip() != "": + meta['description'] = ptp_desc + meta['image_list'] = ptp_imagelist + console.print(f"[green]PTP description and images added to metadata.") + + if ptp_imagelist: + console.print("[cyan]Found the following images:") + for img in ptp_imagelist: + console.print(f"[blue]{img}") + + if await self.prompt_user_for_confirmation("Do you want to keep the description and images from PTP?"): + found_match = True + else: + console.print(f"[yellow]Description and images discarded from PTP") + meta['description'] = None + meta['image_list'] = [] + return meta, found_match else: - console.print(f"[yellow]User skipped the found IMDb ID on {tracker_name}, moving to the next site.[/yellow]") - return meta, found_match # Return immediately to skip the current site + console.print(f"[yellow]User skipped the found IMDb ID on {tracker_name}, moving to the next site.") + return meta, found_match if tracker_id: meta[tracker_key] = tracker_id @@ -191,13 +277,15 @@ async def update_metadata_from_tracker(self, tracker_name, tracker_instance, met async def handle_image_list(self, meta, tracker_name): if meta.get('image_list'): + console.print("[cyan]Found the following images:") + for img in meta['image_list']: + console.print(f"[blue]{img}[/blue]") keep_images = await self.prompt_user_for_confirmation(f"Do you want to keep the images found on {tracker_name}?") if not keep_images: meta['image_list'] = [] - console.print(f"[yellow]Images discarded from {tracker_name}[/yellow]") + console.print(f"[yellow]Images discarded from {tracker_name}") else: - console.print(f"[green]Images retained from {tracker_name}[/green]") - + console.print(f"[green]Images retained from {tracker_name}") async def gather_prep(self, meta, mode): meta['mode'] = mode diff --git a/src/trackers/PTP.py b/src/trackers/PTP.py index 4e924cdaa..eae72bacb 100644 --- a/src/trackers/PTP.py +++ b/src/trackers/PTP.py @@ -175,13 +175,24 @@ async def get_ptp_description(self, ptp_torrent_id, is_disc): 'User-Agent': self.user_agent } url = 'https://passthepopcorn.me/torrents.php' + console.print(f"[yellow]Requesting description from {url} with ID {ptp_torrent_id}") response = requests.get(url, params=params, headers=headers) await asyncio.sleep(1) + ptp_desc = response.text + console.print(f"[yellow]Raw description received:\n{ptp_desc[:500]}...") # Show first 500 characters for brevity + bbcode = BBCODE() - desc = bbcode.clean_ptp_description(ptp_desc, is_disc) - console.print("[bold green]Successfully grabbed description from PTP") - return desc + desc, imagelist = bbcode.clean_ptp_description(ptp_desc, is_disc) + + console.print(f"[bold green]Successfully grabbed description from PTP") + console.print(f"[cyan]Description after cleaning:\n{desc[:500]}...") # Show first 500 characters for brevity + console.print(f"[cyan]Images found: {len(imagelist)}") + if imagelist: + for img in imagelist: + console.print(f"[blue]Image: {img}") + + return desc, imagelist async def get_group_by_imdb(self, imdb): params = {