-
Notifications
You must be signed in to change notification settings - Fork 6
/
world-referees.py
36 lines (27 loc) · 1.05 KB
/
world-referees.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
import scraperwiki
import lxml.html
# 'http://worldreferee.com/site/copy.php?linkID=837&linkType=referee&contextType=bio'
# Blank Python
baseurl = "http://worldreferee.com/site/copy.php?linkType=referee&contextType=bio&linkID=%s"
html = scraperwiki.scrape('http://worldreferee.com/site/ajax.php?request=getRefs&page=http://worldreferee.com/site/home.php')
root = lxml.html.fromstring(html)
options = root.xpath('//result/option')
ids = []
for option in options:
ids.append(option.attrib['value'])
def process(id):
record = {}
url = baseurl % id
record['url'] = url
record['id'] = id
html = scraperwiki.scrape(url)
root = root = lxml.html.fromstring(html)
record['name'] = root.xpath('//h1/text()')[0].replace('Referee ','').replace(' bio','').strip()
info = root.xpath('//div[@class="specs"]')
for div in info:
tr = div.xpath('div[@class="spec"]')
for t in tr:
record[(t[0].text).replace('.','')] = t[1].text
scraperwiki.sqlite.save(['id'],record,verbose=0)
for id in ids:
process(id)