Skip to content

Commit

Permalink
fix/channel_playlist_parsing (#2)
Browse files Browse the repository at this point in the history
* fix/channel_playlist_parsing

* fix/add_pagination + parse @channel_name new type of urls

* fix/handle 'lockupViewModel'

* port pytube/pytube#1424
  • Loading branch information
JarbasAl authored Sep 4, 2024
1 parent 30829c0 commit 1c0cd26
Show file tree
Hide file tree
Showing 2 changed files with 76 additions and 21 deletions.
1 change: 1 addition & 0 deletions examples/ch_playlists.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
c = Channel(url)
print(c.videos_url)
print(c.vanity_url)
print(c.playlist_urls)
print(c.video_urls)
for v in c.videos:
print(v)
Expand Down
96 changes: 75 additions & 21 deletions tutubo/models.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,51 @@
import json
from typing import List, Tuple, Optional, Iterable
import re

from pytube import YouTube as _Yt, Channel as _Ch, Playlist as _Pl
from pytube import extract
from pytube.exceptions import VideoUnavailable
from pytube.helpers import uniqueify, cache, DeferredGeneratorList


def extract_channel_name(url: str) -> str:
"""Extract the ``channel_name`` or ``channel_id`` from a YouTube url.
This function supports the following patterns:
- :samp:`https://youtube.com/{channel_name}/*`
- :samp:`https://youtube.com/@{channel_name}/*`
- :samp:`https://youtube.com/c/{channel_name}/*`
- :samp:`https://youtube.com/channel/{channel_id}/*
- :samp:`https://youtube.com/c/@{channel_name}/*`
- :samp:`https://youtube.com/channel/@{channel_id}/*
- :samp:`https://youtube.com/u/{channel_name}/*`
- :samp:`https://youtube.com/user/{channel_id}/*
:param str url:
A YouTube url containing a channel name.
:rtype: str
:returns:
YouTube channel name.
"""
pattern = r"(?:https?:\/\/)?(?:www\.)?youtube\.com\/(?:(user|channel|c)(?:\/))?\@?([%\d\w_\-]+)"
regex = re.compile(pattern)
function_match = regex.search(url)
if function_match:
uri_style = function_match.group(1)
uri_style = uri_style if uri_style else "c"
uri_identifier = function_match.group(2)
return f'/{uri_style}/{uri_identifier}'

raise extract.RegexMatchError(
caller="channel_name", pattern="patterns"
)


# patch channel parser
extract.channel_name = extract_channel_name


class YoutubePreview:
def __init__(self, renderer_data):
self._raw_data = renderer_data
Expand Down Expand Up @@ -196,6 +235,10 @@ def length(self):
60 * 60 * int(h)
return 0

@property
def is_live(self) -> bool:
return self.length == 0

@property
def thumbnail_url(self):
return f"https://img.youtube.com/vi/{self.video_id}/default.jpg"
Expand Down Expand Up @@ -371,32 +414,43 @@ def _extract_playlists(raw_json: str) -> Tuple[List[str], Optional[str]]:

# this is the json tree structure, if the json was extracted from
# html
playlists = []
try:
playlists = initial_data["contents"][
"twoColumnBrowseResultsRenderer"][
"tabs"][2]["tabRenderer"]["content"][
"sectionListRenderer"]["contents"][0][
"itemSectionRenderer"]["contents"][0][
'shelfRenderer']["content"][
'horizontalListRenderer']["items"]
tabs = initial_data["contents"]["twoColumnBrowseResultsRenderer"]["tabs"]
for t in tabs:
if "content" not in t["tabRenderer"]:
continue
data = t["tabRenderer"]["content"]
if "sectionListRenderer" not in t["tabRenderer"]["content"]:
continue
for c in data["sectionListRenderer"]["contents"][0]["itemSectionRenderer"]["contents"]:
if 'shelfRenderer' in c:
playlists = c['shelfRenderer']["content"]['horizontalListRenderer']["items"]
break
elif 'gridRenderer' in c:
playlists = c['gridRenderer']["items"]
break
if playlists:
break
except (KeyError, IndexError, TypeError):
playlists = []
pass

continuation = None
try:
continuation = playlists[-1]['continuationItemRenderer']['continuationEndpoint']['continuationCommand']['token']
playlists = playlists[:-1]
except (KeyError, IndexError):
# if there is an error, no continuation is available
continuation = None

for p in playlists:
if 'gridPlaylistRenderer' in p:
p['playlistId'] = p['gridPlaylistRenderer']['playlistId']
elif 'lockupViewModel' in p:
#p["title"] = p['lockupViewModel']['metadata']['lockupMetadataViewModel']['title']['content']
p['playlistId'] = p['lockupViewModel']['contentId']
# remove duplicates
return (
uniqueify(
list(
# only extract the video ids from the video data
map(
lambda x: (
f"/playlist?list="
f"{x['gridPlaylistRenderer']['playlistId']}"
),
playlists
)
),
),
uniqueify(list(map(lambda x: f"/playlist?list={x['playlistId']}", playlists))),
continuation,
)

Expand Down

0 comments on commit 1c0cd26

Please sign in to comment.