-
Notifications
You must be signed in to change notification settings - Fork 181
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Fix For Obtaining Yahoo Historical Data #52
base: master
Are you sure you want to change the base?
Changes from 15 commits
aebae1f
c904b9c
160178a
f0d861a
f22703d
09aedad
8038441
863addb
4b3f809
563373c
f36ebdc
1ce6ce2
74eba18
5cc70d4
275c875
84fdbbd
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -15,15 +15,21 @@ | |
|
||
__version__ = '0.2.6dev' # NOQA | ||
|
||
import time | ||
import re | ||
|
||
try: | ||
# py3 | ||
from urllib.request import Request, urlopen | ||
from urllib.parse import urlencode | ||
from imp import reload | ||
except ImportError: | ||
# py2 | ||
from urllib2 import Request, urlopen | ||
from urllib import urlencode | ||
import sys | ||
reload(sys) | ||
sys.setdefaultencoding('utf8') | ||
|
||
|
||
def _request(symbol, stat): | ||
|
@@ -462,29 +468,78 @@ def get_short_ratio(symbol): | |
return _request(symbol, 's7') | ||
|
||
|
||
def get_historical_prices(symbol, start_date, end_date): | ||
def _get_headers(): | ||
headers = { | ||
'Connection': 'keep-alive', | ||
'Expires': str(-1), | ||
'Upgrade-Insecure-Requests': str(1), | ||
# Google Chrome: | ||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) \ | ||
AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.99 Safari/537.36' # noqa | ||
} | ||
|
||
return headers | ||
|
||
|
||
def _get_crumb_cookie(symbol): | ||
""" | ||
Get a valid crumb id y scraping the page. | ||
|
||
Returns a dictionary. | ||
""" | ||
tu = "https://finance.yahoo.com/quote/{}/history".format(symbol) | ||
req = Request(tu, headers=_get_headers()) | ||
resp = urlopen(req) | ||
cookies = resp.info()['Set-Cookie'] | ||
|
||
content = str(resp.read().decode('utf-8').strip()) | ||
|
||
# Matches: {'crumb':'AlphaNumeric'} | ||
rpat = '"CrumbStore":{"crumb":"([^"]+)"}' | ||
crumb = re.findall(rpat, content)[0] | ||
return {'crumb': crumb.encode('ascii').decode('unicode-escape'), | ||
'cookie': cookies} | ||
|
||
|
||
def get_historical_prices(symbol, start_date, end_date, interval='1d'): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I might be using this wrong, but can 'interval' only be a number of days? If so, maybe rename the variable to interval_days and have the input be an int? That will make it more clear how it's used. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. interval variable can take the following: This determines if you would like to obtain data rolled up daily, weekly or monthly. Changing it to interval days might be misleading. However this library was used to obtain only daily data before so we could remove it as an argument. What do you think? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Oh I see. Hmm... Either way seems fine. Maybe keep it because being able to request at a specific interval would be sweet :D although could you add a small docstring explaining the options? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No problem. Will add it and push the update. Any idea why Travis returns wrong assert? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done with the requested changes |
||
""" | ||
Get historical prices for the given ticker symbol. | ||
Date format is 'YYYY-MM-DD' | ||
|
||
interval determines the rollup or aggregate of the | ||
time series to return. interval can take the following values: | ||
|
||
1d - data returned on a daily basis. This is the default | ||
1wk - data rolled up and returned on a weekly basis | ||
1mo - data rolled up and returend on a monthly basis | ||
|
||
Returns a nested dictionary (dict of dicts). | ||
outer dict keys are dates ('YYYY-MM-DD') | ||
""" | ||
|
||
pattern = '%Y-%m-%d' | ||
|
||
unix_start = int(time.mktime(time.strptime(start_date, pattern))) | ||
unix_end = int(time.mktime(time.strptime(end_date, pattern))) | ||
|
||
crumb_cookie = _get_crumb_cookie(symbol) | ||
|
||
params = urlencode({ | ||
's': symbol, | ||
'a': int(start_date[5:7]) - 1, | ||
'b': int(start_date[8:10]), | ||
'c': int(start_date[0:4]), | ||
'd': int(end_date[5:7]) - 1, | ||
'e': int(end_date[8:10]), | ||
'f': int(end_date[0:4]), | ||
'g': 'd', | ||
'ignore': '.csv', | ||
}) | ||
url = 'http://real-chart.finance.yahoo.com/table.csv?%s' % params | ||
req = Request(url) | ||
'period1': unix_start, | ||
'period2': unix_end, | ||
'interval': interval, | ||
'events': 'history', | ||
'crumb': crumb_cookie['crumb'] | ||
}) | ||
|
||
url = 'https://query1.finance.yahoo.com/v7/finance/download/{}?{}'.\ | ||
format(symbol, params) | ||
|
||
req = Request(url, headers=_get_headers()) | ||
req.add_header("Cookie", crumb_cookie['cookie']) | ||
resp = urlopen(req) | ||
content = str(resp.read().decode('utf-8').strip()) | ||
|
||
daily_data = content.splitlines() | ||
hist_dict = dict() | ||
keys = daily_data[0].split(',') | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
please don't reload sys to change the default encoding