From eb152c4428acaa5f768614f552edc16465d5797c Mon Sep 17 00:00:00 2001 From: Kelly Christensen <63346582+kat-kel@users.noreply.github.com> Date: Thu, 29 Feb 2024 12:32:06 +0100 Subject: [PATCH] Adding YoutubeShort result to youtube url parsing --- test/youtube_test.py | 28 ++++++++++++++++++++++------ ural/youtube.py | 24 +++++++++++++++++++++++- ural/youtube.pyi | 5 ++++- 3 files changed, 49 insertions(+), 8 deletions(-) diff --git a/test/youtube_test.py b/test/youtube_test.py index b604fc8..35b99c1 100644 --- a/test/youtube_test.py +++ b/test/youtube_test.py @@ -2,15 +2,16 @@ # Ural Youtube Unit Tests # ============================================================================= from ural.youtube import ( + YoutubeChannel, + YoutubeShort, + YoutubeUser, + YoutubeVideo, + extract_video_id_from_youtube_url, + is_youtube_channel_id, is_youtube_url, is_youtube_video_id, - is_youtube_channel_id, - parse_youtube_url, - extract_video_id_from_youtube_url, normalize_youtube_url, - YoutubeVideo, - YoutubeUser, - YoutubeChannel, + parse_youtube_url, ) IS_TESTS = [ @@ -232,6 +233,21 @@ YoutubeChannel(id=None, name="28minutesARTE"), "https://www.youtube.com/28minutesARTE", ), + ( + "https://www.youtube.com/shorts/xnh-JKqktAU", + YoutubeShort(id="xnh-JKqktAU"), + "https://www.youtube.com/shorts/xnh-JKqktAU", + ), + ( + "https://www.youtube.com/shorts/U5Bn8mMxj4o/nonsense?whatever", + YoutubeShort(id="U5Bn8mMxj4o"), + "https://www.youtube.com/shorts/U5Bn8mMxj4o", + ), + ( + "https://www.youtube.com/shorts/", + None, + "https://www.youtube.com/shorts/", + ), ] diff --git a/ural/youtube.py b/ural/youtube.py index 96064a4..04fbf66 100644 --- a/ural/youtube.py +++ b/ural/youtube.py @@ -191,6 +191,8 @@ # but there is no way to infer this... YOUTUBE_CHANNEL_NAME_URL_TEMPLATE = "https://www.youtube.com/%s" +YOUTUBE_SHORT_URL_TEMPLATE = "https://www.youtube.com/shorts/%s" + YOUTUBE_CHANNEL_NAME_BLACKLIST = { "about", "account", @@ -209,7 +211,7 @@ YoutubeVideo = namedtuple("YoutubeVideo", ["id", "playlist"]) YoutubeUser = namedtuple("YoutubeUser", ["id", "name"]) YoutubeChannel = namedtuple("YoutubeChannel", ["id", "name"]) - +YoutubeShort = namedtuple("YoutubeShort", ["id"]) # NOTE: we use a trie to perform efficient queries and so we don't # need to test every domain/subdomain linearly @@ -369,6 +371,22 @@ def parse_youtube_url(url, fix_common_mistakes=True): return YoutubeChannel(id=cid, name=None) + elif path.startswith("/shorts/"): + splitted_path = pathsplit(path) + + if len(splitted_path) < 2: + return None + + v = splitted_path[1] + + if fix_common_mistakes: + v = v[:11] + + if not is_youtube_video_id(v): + return + + return YoutubeShort(id=v) + else: path = path.rstrip("/") if path.count("/") == 1: @@ -414,4 +432,8 @@ def normalize_youtube_url(url): return YOUTUBE_CHANNEL_NAME_URL_TEMPLATE % parsed.name + if isinstance(parsed, YoutubeShort): + if parsed.id is not None: + return YOUTUBE_SHORT_URL_TEMPLATE % parsed.id + raise TypeError("normalize_youtube_url: impossible path reached") diff --git a/ural/youtube.pyi b/ural/youtube.pyi index 6fac1ce..c2783d9 100644 --- a/ural/youtube.pyi +++ b/ural/youtube.pyi @@ -12,11 +12,14 @@ class YoutubeChannel(NamedTuple): id: str name: str +class YoutubeShort(NamedTuple): + id: str + def is_youtube_url(url: AnyUrlTarget) -> bool: ... def is_youtube_video_id(value: str) -> bool: ... def is_youtube_channel_id(value: str) -> bool: ... def parse_youtube_url( url: AnyUrlTarget, fix_common_mistakes: bool = ... -) -> Optional[Union[YoutubeVideo, YoutubeUser, YoutubeChannel]]: ... +) -> Optional[Union[YoutubeVideo, YoutubeUser, YoutubeChannel, YoutubeShort]]: ... def extract_video_id_from_youtube_url(url: AnyUrlTarget) -> Optional[str]: ... def normalize_youtube_url(url: AnyUrlTarget) -> str: ...