From 80022ed1b2c9d1de725da4ba504a1c4a38b14762 Mon Sep 17 00:00:00 2001 From: Adrien Barbaresi Date: Mon, 21 Oct 2024 15:24:55 +0200 Subject: [PATCH] deprecate base_url parameter in extract_links() --- courlan/core.py | 7 ++----- tests/unit_tests.py | 6 ++++-- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/courlan/core.py b/courlan/core.py index 2e7b254..37e24b5 100644 --- a/courlan/core.py +++ b/courlan/core.py @@ -5,7 +5,6 @@ # import locale import logging import re -import warnings from typing import List, Optional, Set, Tuple from urllib.robotparser import RobotFileParser @@ -136,7 +135,6 @@ def check_url( def extract_links( pagecontent: str, url: Optional[str] = None, - base_url: Optional[str] = None, external_bool: bool = False, no_filter: bool = False, language: Optional[str] = None, @@ -145,6 +143,7 @@ def extract_links( with_nav: bool = False, redirects: bool = False, reference: Optional[str] = None, + base_url: Optional[str] = None, ) -> Set[str]: """Filter links in a HTML document using a series of heuristics Args: @@ -167,9 +166,7 @@ def extract_links( Nothing. """ if base_url: - warnings.warn( - "'base_url' will soon be deprecated, use 'url'.", PendingDeprecationWarning - ) + raise ValueError("'base_url' is deprecated, use 'url' instead.") base_url = base_url or get_base_url(url) url = url or base_url diff --git a/tests/unit_tests.py b/tests/unit_tests.py index 1df56ee..3f0d0da 100644 --- a/tests/unit_tests.py +++ b/tests/unit_tests.py @@ -862,7 +862,9 @@ def test_external(): def test_extraction(): """test link comparison in HTML""" - assert len(extract_links(None, "https://test.com/", False)) == 0 + with pytest.raises(ValueError): + extract_links(None, base_url="https://test.com/", external_bool=False) + assert len(extract_links(None, url="https://test.com/", external_bool=False)) == 0 assert len(extract_links("", "https://test.com/", False)) == 0 # link known under another form pagecontent = '' @@ -933,7 +935,7 @@ def test_extraction(): "https://httpbin.org/links/2/1", ] links = extract_links( - pagecontent, base_url="https://httpbin.org", external_bool=False, with_nav=True + pagecontent, url="https://httpbin.org", external_bool=False, with_nav=True ) assert sorted(links) == [ "https://httpbin.org/links/2/0",