From e42c87ed96e51f5009c1e94c2820497a9704611b Mon Sep 17 00:00:00 2001 From: Audionut Date: Sun, 1 Sep 2024 20:29:22 +1000 Subject: [PATCH] Improve HDB searching --- src/bbcode.py | 10 ++++++-- src/prep.py | 22 ++++++++-------- src/trackers/HDB.py | 61 +++++++++++++++++++++++++++++++-------------- 3 files changed, 61 insertions(+), 32 deletions(-) diff --git a/src/bbcode.py b/src/bbcode.py index 1afd03bb4..6ae031d1a 100644 --- a/src/bbcode.py +++ b/src/bbcode.py @@ -224,8 +224,14 @@ def clean_unit3d_description(self, desc, site): desc = desc.replace(center, cleaned_center.strip()) # Remove bot signatures - bot_signature_regex = r"\[center\]\s*\[img=\d+\]https:\/\/blutopia\.xyz\/favicon\.ico\[\/img\]\s*\[b\]Uploaded Using \[url=https:\/\/github\.com\/HDInnovations\/UNIT3D\]UNIT3D\[\/url\] Auto Uploader\[\/b\]\s*\[img=\d+\]https:\/\/blutopia\.xyz\/favicon\.ico\[\/img\]\s*\[\/center\]" - desc = re.sub(bot_signature_regex, "", desc, flags=re.IGNORECASE) + bot_signature_regex = r""" + \[center\]\s*\[img=\d+\]https:\/\/blutopia\.xyz\/favicon\.ico\[\/img\]\s*\[b\] + Uploaded\sUsing\s\[url=https:\/\/github\.com\/HDInnovations\/UNIT3D\]UNIT3D\[\/url\]\s + Auto\sUploader\[\/b\]\s*\[img=\d+\]https:\/\/blutopia\.xyz\/favicon\.ico\[\/img\]\s*\[\/center\]| + \[center\]\s*\[b\]Uploaded\sUsing\s\[url=https:\/\/github\.com\/HDInnovations\/UNIT3D\]UNIT3D\[\/url\] + \sAuto\sUploader\[\/b\]\s*\[\/center\] + """ + desc = re.sub(bot_signature_regex, "", desc, flags=re.IGNORECASE | re.VERBOSE) desc = re.sub(r"\[center\].*Created by L4G's Upload Assistant.*\[\/center\]", "", desc, flags=re.IGNORECASE) # Ensure no dangling tags and remove extra blank lines diff --git a/src/prep.py b/src/prep.py index ecf2d5806..3f8a63cab 100644 --- a/src/prep.py +++ b/src/prep.py @@ -230,7 +230,7 @@ async def update_metadata_from_tracker(self, tracker_name, tracker_instance, met console.print("[yellow]No ID found in meta for HDB, searching by file name[/yellow]") # Use search_filename function if ID is not found in meta - imdb, tvdb_id, hdb_name, meta['ext_torrenthash'], tracker_id = await tracker_instance.search_filename(search_term, search_file_folder) + imdb, tvdb_id, hdb_name, meta['ext_torrenthash'], tracker_id = await tracker_instance.search_filename(search_term, search_file_folder, meta) meta['tvdb_id'] = str(tvdb_id) if tvdb_id else meta.get('tvdb_id') meta['hdb_name'] = hdb_name @@ -241,7 +241,7 @@ async def update_metadata_from_tracker(self, tracker_name, tracker_instance, met if found_match: if imdb or tvdb_id or hdb_name: console.print(f"[green]{tracker_name} data found: IMDb ID: {imdb}, TVDb ID: {meta['tvdb_id']}, HDB Name: {meta['hdb_name']}[/green]") - if await self.prompt_user_for_confirmation(f"Do you want to keep the data found on {tracker_name}?"): + if await self.prompt_user_for_confirmation(f"Do you want to use the ID's found on {tracker_name}?"): console.print(f"[green]{tracker_name} data retained.[/green]") else: console.print(f"[yellow]{tracker_name} data discarded.[/yellow]") @@ -407,15 +407,6 @@ async def gather_prep(self, meta, mode): found_match = True # console.print(f"[blue]PTP search complete, found_match: {found_match}[/blue]") - if "HDB" in default_trackers and not found_match: - if str(self.config['TRACKERS'].get('HDB', {}).get('useAPI')).lower() == "true": - # console.print(f"[blue]Searching HDB for: {search_term}[/blue]") - hdb = HDB(config=self.config) - meta, match = await self.update_metadata_from_tracker('HDB', hdb, meta, search_term, search_file_folder) - if match: - found_match = True - # console.print(f"[blue]HDB search complete, found_match: {found_match}[/blue]") - if "BLU" in default_trackers and not found_match: if str(self.config['TRACKERS'].get('BLU', {}).get('useAPI')).lower() == "true": # console.print(f"[blue]Searching BLU for: {search_term}[/blue]") @@ -425,6 +416,15 @@ async def gather_prep(self, meta, mode): found_match = True # console.print(f"[blue]BLU search complete, found_match: {found_match}[/blue]") + if "HDB" in default_trackers and not found_match: + if str(self.config['TRACKERS'].get('HDB', {}).get('useAPI')).lower() == "true": + # console.print(f"[blue]Searching HDB for: {search_term}[/blue]") + hdb = HDB(config=self.config) + meta, match = await self.update_metadata_from_tracker('HDB', hdb, meta, search_term, search_file_folder) + if match: + found_match = True + # console.print(f"[blue]HDB search complete, found_match: {found_match}[/blue]") + if not found_match: console.print("[yellow]No matches found on any trackers.[/yellow]") else: diff --git a/src/trackers/HDB.py b/src/trackers/HDB.py index 97e8710dd..921a64d96 100644 --- a/src/trackers/HDB.py +++ b/src/trackers/HDB.py @@ -509,26 +509,49 @@ async def get_info_from_torrent_id(self, hdb_id): console.print("Failed to get info from HDB ID. Either the site is down or your credentials are invalid") return hdb_imdb, hdb_tvdb, hdb_name, hdb_torrenthash - async def search_filename(self, search_term, search_file_folder): + async def search_filename(self, search_term, search_file_folder, meta): hdb_imdb = hdb_tvdb = hdb_name = hdb_torrenthash = hdb_id = None url = "https://hdbits.org/api/torrents" - if search_file_folder == 'folder': # Handling disc case - data = { - "username": self.username, - "passkey": self.passkey, - "limit": 100, - "folder_in_torrent": os.path.basename(search_term) # Using folder name for search - } - console.print(f"[green]Searching HDB for folder: [bold yellow]{os.path.basename(search_term)}[/bold yellow]") + # Handle disc case + if search_file_folder == 'folder' and meta.get('is_disc'): + bd_summary_path = os.path.join(meta['base_dir'], 'tmp', meta['uuid'], 'BD_SUMMARY_00.txt') + bd_summary = None + + # Parse the BD_SUMMARY_00.txt file to extract the Disc Title + try: + with open(bd_summary_path, 'r', encoding='utf-8') as file: + for line in file: + if "Disc Title:" in line: + bd_summary = line.split("Disc Title:")[1].strip() + break + + if bd_summary: + data = { + "username": self.username, + "passkey": self.passkey, + "limit": 100, + "search": bd_summary # Using the Disc Title for search + } + console.print(f"[green]Searching HDB for disc title: [bold yellow]{bd_summary}[/bold yellow]") + console.print(f"[yellow]Using this data: {data}") + else: + console.print(f"[red]Error: 'Disc Title' not found in {bd_summary_path}[/red]") + return hdb_imdb, hdb_tvdb, hdb_name, hdb_torrenthash, hdb_id + + except FileNotFoundError: + console.print(f"[red]Error: File not found at {bd_summary_path}[/red]") + return hdb_imdb, hdb_tvdb, hdb_name, hdb_torrenthash, hdb_id + else: # Handling non-disc case data = { "username": self.username, "passkey": self.passkey, "limit": 100, - "file_in_torrent": os.path.basename(search_term) # Using filename for search + "file_in_torrent": os.path.basename(search_term) } console.print(f"[green]Searching HDB for file: [bold yellow]{os.path.basename(search_term)}[/bold yellow]") + console.print(f"[yellow]Using this data: {data}") response = requests.get(url, json=data) @@ -537,21 +560,21 @@ async def search_filename(self, search_term, search_file_folder): response_json = response.json() # console.print(f"[green]HDB API response: {response_json}[/green]") # Log the entire response for debugging - # Check if 'data' key is present if 'data' not in response_json: console.print(f"[red]Error: 'data' key not found in HDB API response. Full response: {response_json}[/red]") return hdb_imdb, hdb_tvdb, hdb_name, hdb_torrenthash, hdb_id if response_json['data'] != []: for each in response_json['data']: - if search_file_folder == 'folder' or each['numfiles'] == len(search_term): # Handle folder or filelist match - hdb_imdb = each.get('imdb', {'id': None}).get('id') - hdb_tvdb = each.get('tvdb', {'id': None}).get('id') - hdb_name = each['name'] - hdb_torrenthash = each['hash'] - hdb_id = each['id'] - console.print(f'[bold green]Matched release with HDB ID: [yellow]https://hdbits.org/details.php?id={hdb_id}[/yellow][/bold green]') - return hdb_imdb, hdb_tvdb, hdb_name, hdb_torrenthash, hdb_id + hdb_imdb = each.get('imdb', {'id': None}).get('id') + hdb_tvdb = each.get('tvdb', {'id': None}).get('id') + hdb_name = each['name'] + hdb_torrenthash = each['hash'] + hdb_id = each['id'] + console.print(f'[bold green]Matched release with HDB ID: [yellow]https://hdbits.org/details.php?id={hdb_id}[/yellow][/bold green]') + return hdb_imdb, hdb_tvdb, hdb_name, hdb_torrenthash, hdb_id + else: + console.print('[yellow]No data found in the HDB API response[/yellow]') except Exception as e: console.print_exception() console.print(f"[red]Failed to parse HDB API response. Error: {str(e)}[/red]")