diff --git a/app/__pycache__/beautiful_soup_data.cpython-312.pyc b/app/__pycache__/beautiful_soup_data.cpython-312.pyc index 306986b..57cd5af 100644 Binary files a/app/__pycache__/beautiful_soup_data.cpython-312.pyc and b/app/__pycache__/beautiful_soup_data.cpython-312.pyc differ diff --git a/app/__pycache__/rss_to_json.cpython-312.pyc b/app/__pycache__/rss_to_json.cpython-312.pyc index c8a5e7c..aa3b2c6 100644 Binary files a/app/__pycache__/rss_to_json.cpython-312.pyc and b/app/__pycache__/rss_to_json.cpython-312.pyc differ diff --git a/app/beautiful_soup_data.py b/app/beautiful_soup_data.py index 9039cb1..cb56fd0 100644 --- a/app/beautiful_soup_data.py +++ b/app/beautiful_soup_data.py @@ -12,6 +12,7 @@ db = client['tech_news_db'] tools_collection = db['tools'] +# Get tools data def fetch_tools_data_to_json(): tools_collection.delete_many({}) # Scrape GitHub Trending page @@ -58,3 +59,84 @@ def fetch_tools_data_to_json(): repositories.append(repository) return repositories + +# Function to get the icon url from the webpage +def fetch_icon_url(page_url): + try: + response = requests.get(page_url) + if response.status_code == 200: + soup = BeautifulSoup(response.content, 'html.parser') + icon_tag = soup.find("link", rel="icon") or soup.find("link", rel="shortcut icon") + if icon_tag and icon_tag.get("href"): + icon_url = icon_tag["href"] + # Handle relative URLs + if icon_url.startswith('/'): + return page_url.rstrip('/') + icon_url + return icon_url + return None + except requests.RequestException as e: + print(f"Failed to fetch icon for {page_url}: {e}") + return None + +# products icon +def fetch_product_icon_url(page_url): + try: + response = requests.get(page_url) + if response.status_code == 200: + soup = BeautifulSoup(response.content, 'html.parser') + + # Look for an tag first + img_tag = soup.find("img", class_="relative z-0 rounded") + if img_tag: + # Prefer srcset if available; fallback to src + icon_url = img_tag.get("srcset") or img_tag.get("src") + + # If srcset exists, get the first URL (before the first space) + if icon_url and " " in icon_url: + icon_url = icon_url.split()[0] + + return icon_url + + # If no tag is found, then go to the cur link to get the web icon + + return None + except requests.RequestException as e: + print(f"Failed to fetch icon for {page_url}: {e}") + return None + +# product tag line +def fetch_tagline(page_url): + try: + response = requests.get(page_url) + if response.status_code == 200: + soup = BeautifulSoup(response.content, 'html.parser') + + # Find the