feat: add delay to avoid rate limiting (#22)

Adds a 1.8 sec delay between requests to avoid rate limiting.
wenboyu2 · Apr 7, 2019 · b56f248 · b56f248
1 parent 39bd9e5
commit b56f248
Show file tree

Hide file tree

Showing 3 changed files with 21 additions and 1 deletion.
diff --git a/README.md b/README.md
@@ -50,3 +50,16 @@ yec = YahooEarningsCalendar()
 print(yec.get_next_earnings_date('box'))
 # 1508716800
 ```
+
+### Set delay between requests
+
+By default, requests are delayed by 1.8 sec to avoid exceeding the 2000/hour rate limit. You can override the default delay by passing an argument to the `YahooEarningsCalendar` constructor.
+
+```py
+import datetime
+from yahoo_earnings_calendar import YahooEarningsCalendar
+
+my_custom_delay_s = 0.5
+
+yec = YahooEarningsCalendar(my_custom_delay_s)
+```
diff --git a/setup.py b/setup.py
@@ -12,7 +12,7 @@
     install_requires=[
         'requests'
     ],
-    version='0.4.0',
+    version='0.5.0',
     description='Scrapes data from Yahoo! Finance earnings calendar',
     long_description=long_description,
     long_description_content_type="text/markdown",

diff --git a/yahoo_earnings_calendar/scraper.py b/yahoo_earnings_calendar/scraper.py
@@ -5,9 +5,12 @@
 import json
 import logging
 import requests
+import time
 
 BASE_URL = 'https://finance.yahoo.com/calendar/earnings'
 BASE_STOCK_URL = 'https://finance.yahoo.com/quote'
+RATE_LIMIT = 2000.0
+SLEEP_BETWEEN_REQUESTS_S = 60 * 60 / RATE_LIMIT
 
 # Logging config
 logger = logging.getLogger()
@@ -24,7 +27,11 @@ class YahooEarningsCalendar(object):
     This is the class for fetching earnings data from Yahoo! Finance
     """
 
+    def __init__(self, delay = SLEEP_BETWEEN_REQUESTS_S):
+        self.delay = delay
+
     def _get_data_dict(self, url):
+        time.sleep(self.delay)
         page = requests.get(url)
         page_content = page.content.decode(encoding='utf-8', errors='strict')
         page_data_string = [row for row in page_content.split(