Skip to content

Commit

Permalink
deprecate base_url parameter in extract_links()
Browse files Browse the repository at this point in the history
  • Loading branch information
adbar committed Oct 21, 2024
1 parent ea02ae9 commit 80022ed
Show file tree
Hide file tree
Showing 2 changed files with 6 additions and 7 deletions.
7 changes: 2 additions & 5 deletions courlan/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
# import locale
import logging
import re
import warnings

from typing import List, Optional, Set, Tuple
from urllib.robotparser import RobotFileParser
Expand Down Expand Up @@ -136,7 +135,6 @@ def check_url(
def extract_links(
pagecontent: str,
url: Optional[str] = None,
base_url: Optional[str] = None,
external_bool: bool = False,
no_filter: bool = False,
language: Optional[str] = None,
Expand All @@ -145,6 +143,7 @@ def extract_links(
with_nav: bool = False,
redirects: bool = False,
reference: Optional[str] = None,
base_url: Optional[str] = None,
) -> Set[str]:
"""Filter links in a HTML document using a series of heuristics
Args:
Expand All @@ -167,9 +166,7 @@ def extract_links(
Nothing.
"""
if base_url:
warnings.warn(
"'base_url' will soon be deprecated, use 'url'.", PendingDeprecationWarning
)
raise ValueError("'base_url' is deprecated, use 'url' instead.")

base_url = base_url or get_base_url(url)
url = url or base_url
Expand Down
6 changes: 4 additions & 2 deletions tests/unit_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -862,7 +862,9 @@ def test_external():

def test_extraction():
"""test link comparison in HTML"""
assert len(extract_links(None, "https://test.com/", False)) == 0
with pytest.raises(ValueError):
extract_links(None, base_url="https://test.com/", external_bool=False)
assert len(extract_links(None, url="https://test.com/", external_bool=False)) == 0
assert len(extract_links("", "https://test.com/", False)) == 0
# link known under another form
pagecontent = '<html><a href="https://test.org/example"/><a href="https://test.org/example/&"/></html>'
Expand Down Expand Up @@ -933,7 +935,7 @@ def test_extraction():
"https://httpbin.org/links/2/1",
]
links = extract_links(
pagecontent, base_url="https://httpbin.org", external_bool=False, with_nav=True
pagecontent, url="https://httpbin.org", external_bool=False, with_nav=True
)
assert sorted(links) == [
"https://httpbin.org/links/2/0",
Expand Down

0 comments on commit 80022ed

Please sign in to comment.