From 48b07b8eeac8a2d2ec4ecb8858f8abc7502a500f Mon Sep 17 00:00:00 2001 From: Panos Mavrogiorgos Date: Wed, 15 Nov 2023 13:30:12 +0200 Subject: [PATCH] scraper: Increase the read Timeout to 30 seconds This should reduce retrying by a good margin. --- observer/ioc/scraper.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/observer/ioc/scraper.py b/observer/ioc/scraper.py index a24080e..9624604 100644 --- a/observer/ioc/scraper.py +++ b/observer/ioc/scraper.py @@ -110,7 +110,8 @@ def scrape_ioc_station( urls = generate_urls(ioc_code, start_date, end_date) logger.debug("%s: There are %d urls", ioc_code, len(urls)) logger.debug("%s:\n%s", ioc_code, "\n".join(urls)) - with httpx.Client() as client: + timeout = httpx.Timeout(timeout=10, read=30) + with httpx.Client(timeout=timeout) as client: func_kwargs = [dict(url=url, client=client, rate_limit=rate_limit) for url in urls] logger.debug("%s: Starting data retrieval", ioc_code) results = multifutures.multithread(fetch_url, func_kwargs, check=True) @@ -151,5 +152,4 @@ def scrape_ioc_station( logger.info("%s: Finished scraping: %s - %s", ioc_code, start_date, end_date) return df - return df