-
Notifications
You must be signed in to change notification settings - Fork 0
/
dm_youtools.py
77 lines (70 loc) · 2.38 KB
/
dm_youtools.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
"""
dm_youtools module.
This module contains the methods required for retrieving data from youtube.
"""
import gdata.youtube.service
import time
import urllib, json
import socket
import lib_speed as ls
@ls.speed_calculate
def retrieve_youtube_comments(video_id):
"""
Retrieve youtube comments for the given video.
Keyword arguments:
video_id -- the video for which we want to retrieve the comments
"""
yts = gdata.youtube.service.YouTubeService()
index = 1
urlpattern = 'http://gdata.youtube.com/feeds/api/videos/' + video_id +\
'/comments?start-index=%d&max-results=50'
url = urlpattern % index
comments = []
while url:
try:
ytfeed = yts.GetYouTubeVideoCommentFeed(uri=url)
except (socket.gaierror, gdata.service.RequestError):
break
comments.extend([comment.content.text for comment in ytfeed.entry])
if not hasattr(ytfeed.GetNextLink(), 'href'):
break
url = ytfeed.GetNextLink().href
return comments
@ls.speed_calculate
def get_channel_videos(author):
"""
Retrieve channel's videos basic information.
Keyword arguments:
author -- the name of the channel
"""
found_all = False
ind = 1
videos = []
while not found_all:
inp = urllib.urlopen((r'http://gdata.youtube.com/feeds/api/videos?'+\
'start-index={0}&max-results=50&alt=json&orderby=published&'+\
'author={1}').format(ind, author))
try:
resp = json.load(inp)
inp.close()
returned_videos = resp['feed']['entry']
for video in returned_videos:
videos.append(video)
ind += 50
print len(videos)
if len(returned_videos) < 50:
found_all = True
except:
#catch the case where the number of videos in the channel is a
#multiple of 50
print "error"
found_all = True
out = []
for video in videos:
out.append({"id":video["id"]["$t"].split('videos/')[1],\
"title":video['title']['$t'],\
"url":video['media$group']['media$player'][0]['url'],\
"image":video['media$group']['media$thumbnail'][0]['url'],\
"time":time.strptime(video["published"]["$t"].split(".")[0],\
"%Y-%m-%dT%H:%M:%S")})
return out