diff --git a/scripts/generate_parser_test_files.py b/scripts/generate_parser_test_files.py
index 8d276bd6..55129bdf 100644
--- a/scripts/generate_parser_test_files.py
+++ b/scripts/generate_parser_test_files.py
@@ -76,7 +76,7 @@ def get_test_article(enum: PublisherEnum) -> Optional[Article]:
                     basic_logger.warn(f"Couldn't get article for {publisher.name}. Skipping")
                     continue
                 html = HTMLTestFile(
-                    url=article.html.url,
+                    url=article.html.responded_url,
                     content=article.html.content,
                     crawl_date=article.html.crawl_date,
                     publisher=publisher,
diff --git a/setup.cfg b/setup.cfg
index 7093734d..0b889fd1 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -27,6 +27,7 @@ install_requires =
     typing-extensions >= 4.0, < 5.0
     langdetect~=1.0.9
     aiohttp~=3.8.4
+    validators~=0.20.0
 python_requires = >=3.8
 zip_safe = no
 
diff --git a/src/fundus/scraping/article.py b/src/fundus/scraping/article.py
index 88f2099d..989f4253 100644
--- a/src/fundus/scraping/article.py
+++ b/src/fundus/scraping/article.py
@@ -54,7 +54,7 @@ def lang(self) -> Optional[str]:
             try:
                 language = langdetect.detect(self.plaintext)
             except langdetect.LangDetectException:
-                basic_logger.debug(f"Unable to detect language for article '{self.html.url}'")
+                basic_logger.debug(f"Unable to detect language for article '{self.html.responded_url}'")
 
         # use @lang attribute of <html> tag as fallback
         if not language or language == langdetect.detector_factory.Detector.UNKNOWN_LANG:
diff --git a/src/fundus/scraping/html.py b/src/fundus/scraping/html.py
index 7a247174..775848fc 100644
--- a/src/fundus/scraping/html.py
+++ b/src/fundus/scraping/html.py
@@ -19,6 +19,7 @@
 import aiohttp
 import feedparser
 import lxml.html
+import validators
 from aiohttp.client_exceptions import ClientError
 from aiohttp.http_exceptions import HttpProcessingError
 from aiohttp.web_exceptions import HTTPError
@@ -74,10 +75,6 @@ def supported_file_formats(self) -> List[str]:
         return list(self.archive_mapping.keys())
 
 
-def validate_url(url: str) -> bool:
-    return bool(re.match(r"https?://(?:[a-zA-Z]|\d|[$-_@.&+]|[!*(),]|%[\da-fA-F][\da-fA-F])+", url))
-
-
 @dataclass
 class URLSource(AsyncIterable[str], ABC):
     url: str
@@ -87,7 +84,7 @@ class URLSource(AsyncIterable[str], ABC):
     def __post_init__(self):
         if not self._request_header:
             self._request_header = _default_header
-        if not validate_url(self.url):
+        if not validators.url(self.url):
             raise ValueError(f"Invalid url '{self.url}'")
 
     def set_header(self, request_header: Dict[str, str]) -> None:
@@ -130,7 +127,7 @@ class Sitemap(URLSource):
     async def _get_pre_filtered_urls(self) -> AsyncIterator[str]:
         async def yield_recursive(sitemap_url: str) -> AsyncIterator[str]:
             session = await session_handler.get_session()
-            if not validate_url(sitemap_url):
+            if not validators.url(sitemap_url):
                 basic_logger.info(f"Skipped sitemap '{sitemap_url}' because the URL is malformed")
             async with session.get(url=sitemap_url, headers=self._request_header) as response:
                 try:
@@ -200,7 +197,7 @@ def _filter(self, url: str) -> bool:
 
     async def fetch(self) -> AsyncIterator[HTML]:
         async for url in self.url_source:
-            if not validate_url(url):
+            if not validators.url(url):
                 basic_logger.debug(f"Skipped requested URL '{url}' because the URL is malformed")
                 continue
 
@@ -211,7 +208,6 @@ async def fetch(self) -> AsyncIterator[HTML]:
             session = await session_handler.get_session()
 
             async with session.get(url, headers=self.request_header) as response:
-
                 if self._filter(str(response.url)):
                     basic_logger.debug(f"Skipped responded URL '{url}' because of URL filter")
                     continue
diff --git a/src/fundus/scraping/pipeline.py b/src/fundus/scraping/pipeline.py
index 79edbd69..784d78d9 100644
--- a/src/fundus/scraping/pipeline.py
+++ b/src/fundus/scraping/pipeline.py
@@ -111,7 +111,9 @@ def run(
         event_loop = asyncio.get_event_loop()
 
         def article_gen() -> Iterator[Article]:
-            interleave: AsyncIterator[Iterable[Optional[Article]]] = batched_interleave_longest(*async_article_iterators)
+            interleave: AsyncIterator[Iterable[Optional[Article]]] = batched_interleave_longest(
+                *async_article_iterators
+            )
             while True:
                 start_time = time.time()
                 batch: Optional[Iterable[Optional[Article]]] = event_loop.run_until_complete(