-
Notifications
You must be signed in to change notification settings - Fork 6
/
retrieve-scrapers.py
65 lines (53 loc) · 2.19 KB
/
retrieve-scrapers.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
import scraperwiki
import simplejson
import tarfile
import time
import StringIO
import os,cgi
import random
import string
chars = string.letters + string.digits
randomstring = ''.join([random.choice(chars) for i in xrange(4)]) # create a random string for url appending to avoid cache
qsenv = dict(cgi.parse_qsl(os.getenv("QUERY_STRING", "")))
if not qsenv:
print 'Please add a username to the url in this format:<br>'
print 'user=USERNAME<br>'
print 'Example: https://views.scraperwiki.com/run/retrieve-scrapers/?user=pallih'
exit()
mtime = time.time()
tarfilename= '/tmp/'+qsenv['user']+'_'+randomstring+'-scrapers.tar'
sendtarfilename = qsenv['user']+'-scrapers.tar'
tar = tarfile.open(tarfilename, 'w') #write with no compression - for some reason compressed archives turn up damaged
json_url = 'https://api.scraperwiki.com/api/1.0/scraper/getuserinfo?format=jsondict&username=' + qsenv['user']
json = simplejson.loads(scraperwiki.scrape(json_url))
scrapers = []
for d in json:
for scraper_name in d['coderoles']['owner']:
scrapers.append(scraper_name)
for scraper_name in scrapers[50:]:
scraper = 'https://api.scraperwiki.com/api/1.0/scraper/getinfo?format=jsondict&name='+scraper_name+'&version=-1&quietfields=runevents%7Cdatasummary%7Cuserroles%7Chistory'
scraper_json = simplejson.loads(scraperwiki.scrape(scraper))
for scrapers in scraper_json:
code = scrapers['code'].encode('utf-8')
language = scrapers['language']
if language == 'python':
ending = '.py'
elif language == 'php':
ending = '.php'
elif language == 'ruby':
ending = '.rb'
elif language == 'html':
ending = '.html'
else:
ending = '.txt'
tarinfo = tarfile.TarInfo(scraper_name+ending)
tarinfo.size = len(code)
tarinfo.mtime = mtime
tar.addfile(tarinfo, StringIO.StringIO(str(code)))
tar.close()
scraperwiki.utils.httpresponseheader("Content-Type", "application/tar-x")
scraperwiki.utils.httpresponseheader("Content-Disposition", "attachment; filename="+sendtarfilename)
f = open(tarfilename,"r")
data = f.read()
print data
f.close