-
Notifications
You must be signed in to change notification settings - Fork 392
/
yahoo_parser (1).py
73 lines (57 loc) · 2.21 KB
/
yahoo_parser (1).py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
import requests
import csv
from lxml import html
from bs4 import BeautifulSoup as bs
import scipy.io as sio
YAHOO_UPDOWN_URL = 'http://finance.yahoo.com/q/ud?s='
tick = 'ZQK'
page = requests.get(YAHOO_UPDOWN_URL + tick, allow_redirects = False)
soup = bs(page.text, features = 'lxml')
tables = soup.find_all('table')
headers = tables.find(['th'])
#headers = table.find('th',{'class':'yfnc_tablehead1'})
data = tables.find(['td'])
#data = table.findAll('td',{'class':'yfnc_tabledata1'})
it = iter([ d.text.strip() for d in data ]) # create an iterator over the textual data
csvrows = list(zip([tick]*(1+len(data)/5),it,it,it,it,it)) # each call to it returns the next data entry, so this zip will create a 5-tuple array
len(data)
with open('tickers.txt','r') as f:
lines = f.readlines()
tickers = [l.strip() for l in lines if l.strip() != '']
# tickers
out = open('stocks.csv','a')
csvout = csv.writer(out)
headers = None
for tick in tickers:
print(tick)
try:
page = requests.get(YAHOO_UPDOWN_URL + tick)
except Exception as inst:
print(inst)
continue
soup = bs(page.text)
table = soup.find(lambda tag: tag.name=='table' and
'class' in tag and
tag['class']=="yfnc_datamodoutline1")
if table == None:
print("No data found")
continue
if headers == None:
headers = table.findAll('th',{'class':'yfnc_tablehead1'})
#csvout.writerow([u'Ticker'] + [ h.text for h in headers]) # concat 'Ticker' as the first column
data = table.findAll('td',{'class':'yfnc_tabledata1'})
print(len(data))
it = iter([ d.text.strip() for d in data ]) # create an iterator over the textual data
csvrows = list(zip([tick]*(1+len(data)/5),it,it,it,it,it)) # each call to it returns the next data entry, so this zip will create a 5-tuple array
print(len(csvrows))
#for row in csvrows:
# csvout.writerow(row)
csvout.writerows(csvrows)
out.flush()
out.close()
# csvout.close()
# close(out)
with open('stocks.csv','r') as infile:
csvin = csv.reader(infile)
data = list(map(tuple,csvin))
sio.savemat('stocks.mat',{'data':data},do_compression=True)