Skip to content

Commit

Permalink
scraper: Increase the read Timeout to 30 seconds
Browse files Browse the repository at this point in the history
This should reduce retrying by a good margin.
  • Loading branch information
pmav99 committed Nov 15, 2023
1 parent f1ec7f6 commit 48b07b8
Showing 1 changed file with 2 additions and 2 deletions.
4 changes: 2 additions & 2 deletions observer/ioc/scraper.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,8 @@ def scrape_ioc_station(
urls = generate_urls(ioc_code, start_date, end_date)
logger.debug("%s: There are %d urls", ioc_code, len(urls))
logger.debug("%s:\n%s", ioc_code, "\n".join(urls))
with httpx.Client() as client:
timeout = httpx.Timeout(timeout=10, read=30)
with httpx.Client(timeout=timeout) as client:
func_kwargs = [dict(url=url, client=client, rate_limit=rate_limit) for url in urls]
logger.debug("%s: Starting data retrieval", ioc_code)
results = multifutures.multithread(fetch_url, func_kwargs, check=True)
Expand Down Expand Up @@ -151,5 +152,4 @@ def scrape_ioc_station(
logger.info("%s: Finished scraping: %s - %s", ioc_code, start_date, end_date)
return df


return df

0 comments on commit 48b07b8

Please sign in to comment.