Skip to content

Commit

Permalink
Add async tests
Browse files Browse the repository at this point in the history
  • Loading branch information
AlexMili committed Dec 13, 2024
1 parent 1121224 commit e3867b2
Show file tree
Hide file tree
Showing 7 changed files with 220 additions and 195 deletions.
3 changes: 3 additions & 0 deletions requiremen-tests.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
pytest
pytest-asyncio
pytest-cov
1 change: 1 addition & 0 deletions scripts/test.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
pytest --cov=extract_favicon --cov-report=xml
169 changes: 91 additions & 78 deletions src/extract_favicon/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -333,6 +333,91 @@ def _load_image(bytes_content: bytes) -> Tuple[Optional[Image.Image], bool]:
return img, is_valid


def _get_meta_image(img: Optional[Image.Image]) -> Tuple[int, int, Optional[str]]:
width = height = 0
img_format = None

if img is not None:
width, height = img.size
if img.format is not None:
img_format = img.format.lower()

return width, height, img_format


def _load_base64_img(favicon: Favicon) -> RealFavicon:
data_img = favicon.url.split(",")
suffix = (
data_img[0]
.replace("data:", "")
.replace(";base64", "")
.replace("image", "")
.replace("/", "")
.lower()
)

if suffix == "svg+xml":
suffix = "svg"

bytes_content = base64.b64decode(data_img[1])
img, is_valid = _load_image(bytes_content)

fav_url = FaviconURL(
favicon.url, final_url=favicon.url, redirected=False, status_code=200
)

width, height, img_format = _get_meta_image(img)

r_favicon = RealFavicon(
fav_url,
img_format,
width=width,
height=height,
valid=is_valid,
image=img,
original=favicon,
)

return r_favicon


def _load_svg_img(favicon: Favicon, bytes_content: bytes) -> RealFavicon:
root = ETree.fromstring(bytes_content)

# Check if the root tag is SVG
if root.tag.lower().endswith("svg"):
is_valid = True
else:
is_valid = False

width = 0
height = 0

if "width" in root.attrib:
try:
width = int(root.attrib["width"])
except ValueError:
pass

if "height" in root.attrib:
try:
height = int(root.attrib["height"])
except ValueError:
pass

r_favicon = RealFavicon(
FaviconURL("", "", False, -1),
"svg",
width=width,
height=height,
valid=is_valid,
image=ETree.tostring(root, encoding="utf-8"),
original=favicon,
)

return r_favicon


def download(
favicons: Union[list[Favicon], set[Favicon]],
mode: str = "all",
Expand Down Expand Up @@ -401,49 +486,13 @@ def download(

filename = os.path.basename(urlparse(fav.url).path)
if filename.lower().endswith(".svg") is True:
root = ETree.fromstring(result["response"].content)

# Check if the root tag is SVG
if root.tag.lower().endswith("svg"):
is_valid = True
else:
is_valid = False

width = 0
height = 0

if "width" in root.attrib:
try:
width = int(root.attrib["width"])
except ValueError:
pass

if "height" in root.attrib:
try:
height = int(root.attrib["height"])
except ValueError:
pass

real_favicons.append(
RealFavicon(
fav_url,
"svg",
width=width,
height=height,
valid=is_valid,
image=ETree.tostring(root, encoding="utf-8"),
original=fav,
)
)
new_fav = _load_svg_img(fav, result["response"].content)
new_fav = new_fav._replace(url=fav_url)
real_favicons.append(new_fav)
else:
img, is_valid = _load_image(result["response"].content)

width = height = 0
img_format = None
if img is not None:
width, height = img.size
if img.format is not None:
img_format = img.format.lower()
width, height, img_format = _get_meta_image(img)

real_favicons.append(
RealFavicon(
Expand All @@ -457,44 +506,8 @@ def download(
)
)
else:
data_img = fav.url.split(",")
suffix = (
data_img[0]
.replace("data:", "")
.replace(";base64", "")
.replace("image", "")
.replace("/", "")
.lower()
)

if suffix == "svg+xml":
suffix = "svg"

bytes_content = base64.b64decode(data_img[1])
img, is_valid = _load_image(bytes_content)

fav_url = FaviconURL(
fav.url, final_url=fav.url, redirected=False, status_code=200
)

width = height = 0
img_format = None
if img is not None:
width, height = img.size
if img.format is not None:
img_format = img.format.lower()

real_favicons.append(
RealFavicon(
fav_url,
img_format,
width=width,
height=height,
valid=is_valid,
image=img,
original=fav,
)
)
new_fav = _load_base64_img(fav)
real_favicons.append(new_fav)

# If we are in these modes, we need to exit the for loop
if mode in ["biggest", "smallest"]:
Expand Down
97 changes: 14 additions & 83 deletions src/extract_favicon/main_async.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
from typing import Optional, Tuple, Union
from urllib.parse import urlparse

import defusedxml.ElementTree as ETree
import httpx
from PIL import ImageFile
from reachable import is_reachable_async
Expand All @@ -14,8 +13,11 @@
Favicon,
FaviconURL,
RealFavicon,
_get_meta_image,
_get_root_url,
_load_base64_img,
_load_image,
_load_svg_img,
from_html,
)

Expand Down Expand Up @@ -76,7 +78,8 @@ async def download(
else:
to_process = list(favicons)

for fav in to_process:
len_process = len(to_process)
for idx, fav in enumerate(to_process):
if fav.url[:5] != "data:":
result = await is_reachable_async(
fav.url, head_optim=False, include_response=True, client=client
Expand Down Expand Up @@ -105,50 +108,13 @@ async def download(

filename = os.path.basename(urlparse(fav.url).path)
if filename.lower().endswith(".svg") is True:
root = ETree.fromstring(result["response"].content)

# Check if the root tag is SVG
if root.tag.lower().endswith("svg"):
is_valid = True
else:
is_valid = False

width = 0
height = 0

if "width" in root.attrib:
try:
width = int(root.attrib["width"])
except ValueError:
pass

if "height" in root.attrib:
try:
height = int(root.attrib["height"])
except ValueError:
pass

real_favicons.append(
RealFavicon(
fav_url,
"svg",
width=width,
height=height,
valid=is_valid,
image=ETree.tostring(root, encoding="utf-8"),
original=fav,
)
)
new_fav = _load_svg_img(fav, result["response"].content)
new_fav = new_fav._replace(url=fav_url)
real_favicons.append(new_fav)
else:
img, is_valid = _load_image(result["response"].content)

width = height = 0
img_format = None
if img is not None:
width, height = img.size
img_format = img.format
if img_format is not None:
img_format = img_format.lower()
width, height, img_format = _get_meta_image(img)

real_favicons.append(
RealFavicon(
Expand All @@ -162,51 +128,16 @@ async def download(
)
)
else:
data_img = fav.url.split(",")
suffix = (
data_img[0]
.replace("data:", "")
.replace(";base64", "")
.replace("image", "")
.replace("/", "")
.lower()
)

if suffix == "svg+xml":
suffix = "svg"

bytes_content = base64.b64decode(data_img[1])
img, is_valid = _load_image(bytes_content)

fav_url = FaviconURL(
fav.url, final_url=fav.url, redirected=False, status_code=200
)

width = height = 0
img_format = None
if img is not None:
width, height = img.size
if img_format is not None:
img_format = img_format.lower()

real_favicons.append(
RealFavicon(
fav_url,
img_format,
width=width,
height=height,
valid=is_valid,
image=img,
original=fav,
)
)
new_fav = _load_base64_img(fav)
real_favicons.append(new_fav)

# If we are in these modes, we need to exit the for loop
if mode in ["biggest", "smallest"]:
break

# Wait before next request to avoid detection
time.sleep(sleep_time)
# Wait before next request to avoid detection but skip it for the last item
if idx < len_process - 1:
time.sleep(sleep_time)

real_favicons = sorted(
real_favicons, key=lambda x: x.width * x.height, reverse=sort.lower() == "desc"
Expand Down
21 changes: 21 additions & 0 deletions test/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
import pytest


@pytest.fixture(scope="function")
def base64_img():
return ""


@pytest.fixture(scope="function")
def svg_url():
return "https://upload.wikimedia.org/wikipedia/commons/c/c3/Flag_of_France.svg"


@pytest.fixture(scope="function")
def gif_url():
return "https://www.google.com/logos/doodles/2024/seasonal-holidays-2024-6753651837110333.2-la202124.gif"


@pytest.fixture(scope="function")
def python_url():
return "https://www.python.org"
Loading

0 comments on commit e3867b2

Please sign in to comment.