Skip to content

Commit

Permalink
fix: improve ULR parsing and catch error for og-tag
Browse files Browse the repository at this point in the history
  • Loading branch information
FAUSheppy committed Jul 22, 2024
1 parent 379652a commit efca10c
Showing 1 changed file with 15 additions and 6 deletions.
21 changes: 15 additions & 6 deletions server.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
import datetime
import yaml
import urllib
import urllib.parse
from bs4 import BeautifulSoup

import sqlalchemy
Expand Down Expand Up @@ -154,12 +155,20 @@ def cache_og_meta_icons(tiles):
try:

og_image_href = og_image_tag.get("content")
if(not (og_image_href.startswith("https://")
or og_image_href.startswith("http://"))):
og_image_href = "https://{}".format(og_image_href)
urllib_image_request = urllib.request.Request(og_image_href)
urllib_image_request.add_header(USER_AGENT_HEADER, USER_AGENT_CONTENT)
image = urllib.request.urlopen(urllib_image_request).read()

parsed_tag_url = urllib.parse.urlparse(og_image_href)
original_request_url = urllib.parse.urlparse(href)

if not parsed_tag_url.netloc or not parsed_tag_url.scheme:
og_image_href = "{}://{}{}".format(original_request_url.scheme,
original_request_url.netloc, parsed_tag_url.path)

try:
urllib_image_request = urllib.request.Request(og_image_href)
urllib_image_request.add_header(USER_AGENT_HEADER, USER_AGENT_CONTENT)
image = urllib.request.urlopen(urllib_image_request).read()
except urllib.error.URLError as e:
print("Failed to query og-image-tag [{}]:".format(og_image_href), e)

with open(cache_path, "wb") as f:
f.write(image)
Expand Down

0 comments on commit efca10c

Please sign in to comment.