-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcrawl_data.py
27 lines (23 loc) · 1.08 KB
/
crawl_data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
from bs4 import BeautifulSoup
import urllib.request
import datetime
import csv
ngay_out_2017 = datetime.datetime(year=2017, month=2, day=15)
ngay_out_2018 = datetime.datetime(year=2018, month=2, day=15)
headers = {
"User-Agent": "User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.116 Safari/537.36 Edge/15.15063"
}
webpage = "https://coinmarketcap.com/currencies/bitcoin/historical-data/?start=20130428&end=20190520"
request = urllib.request.Request(url=webpage, headers=headers)
websource = urllib.request.urlopen(request)
soup = BeautifulSoup(websource.read(), "html.parser")
tbody = soup.find_all("tbody")
tbody = tbody[0]
tr_list = tbody.find_all("tr")
tr_list = tr_list[1:]
with open('btc.csv','w') as csv_file:
writer = csv.writer(csv_file, lineterminator='\n')
writer.writerow(['Date','Open','High','Low','Close','Volume','Market Cap'])
for tr in tr_list:
td = tr.find_all("td")
writer.writerow([td[0].text,td[1].text,td[2].text,td[3].text,td[4].text,td[5].text.replace(",",""),td[6].text.replace(",","")])