Skip to content

Commit

Permalink
fix: restore url arg in find_date() as optional (#110)
Browse files Browse the repository at this point in the history
  • Loading branch information
adbar authored Nov 7, 2023
1 parent aa86e54 commit af69eca
Show file tree
Hide file tree
Showing 2 changed files with 15 additions and 14 deletions.
4 changes: 2 additions & 2 deletions htmldate/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -814,7 +814,7 @@ def find_date(
extensive_search: bool = True,
original_date: bool = False,
outputformat: str = "%Y-%m-%d",
url: str = "",
url: Optional[str] = None,
verbose: bool = False,
min_date: Optional[Union[datetime, str]] = None,
max_date: Optional[Union[datetime, str]] = None,
Expand Down Expand Up @@ -883,7 +883,7 @@ def find_date(

# URL
url_result = None
if not url:
if url is None:
# probe for canonical links
urlelem = tree.find('.//link[@rel="canonical"]')
if urlelem is not None:
Expand Down
25 changes: 13 additions & 12 deletions htmldate/extractors.py
Original file line number Diff line number Diff line change
Expand Up @@ -227,21 +227,22 @@ def discard_unwanted(tree: HtmlElement) -> Tuple[HtmlElement, List[HtmlElement]]


def extract_url_date(
testurl: str,
testurl: Optional[str],
options: Extractor,
) -> Optional[str]:
"""Extract the date out of an URL string complying with the Y-M-D format"""
match = COMPLETE_URL.search(testurl)
if match:
LOGGER.debug("found date in URL: %s", match[0])
try:
dateobject = datetime(int(match[1]), int(match[2]), int(match[3]))
if is_valid_date(
dateobject, options.format, earliest=options.min, latest=options.max
):
return dateobject.strftime(options.format)
except ValueError as err: # pragma: no cover
LOGGER.debug("conversion error: %s %s", match[0], err)
if testurl is not None:
match = COMPLETE_URL.search(testurl)
if match:
LOGGER.debug("found date in URL: %s", match[0])
try:
dateobject = datetime(int(match[1]), int(match[2]), int(match[3]))
if is_valid_date(
dateobject, options.format, earliest=options.min, latest=options.max
):
return dateobject.strftime(options.format)
except ValueError as err: # pragma: no cover
LOGGER.debug("conversion error: %s %s", match[0], err)
return None


Expand Down

0 comments on commit af69eca

Please sign in to comment.