From b1c7dc934959afa7ca45b9426e19195903982494 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adri=C3=A1n=20Chaves?= Date: Thu, 27 Jun 2024 10:00:38 +0200 Subject: [PATCH] If Retry has a message, use it as retry reason (#202) --- scrapy_poet/spidermiddlewares.py | 2 +- tests/test_retries.py | 34 ++++++++++++++++++++++++++++++++ tox.ini | 2 +- 3 files changed, 36 insertions(+), 2 deletions(-) diff --git a/scrapy_poet/spidermiddlewares.py b/scrapy_poet/spidermiddlewares.py index c1bdacd6..f4900154 100644 --- a/scrapy_poet/spidermiddlewares.py +++ b/scrapy_poet/spidermiddlewares.py @@ -24,7 +24,7 @@ def process_spider_exception( new_request_or_none = get_retry_request( response.request, spider=spider, - reason="page_object_retry", + reason=str(exception) or "page_object_retry", ) if not new_request_or_none: return [] diff --git a/tests/test_retries.py b/tests/test_retries.py index 8d3f7408..b37acbab 100644 --- a/tests/test_retries.py +++ b/tests/test_retries.py @@ -53,6 +53,40 @@ def parse(self, response, page: SamplePage): _assert_all_unique_instances(page_response_instances) +@inlineCallbacks +def test_retry_reason(): + retries = deque([True, False]) + items, page_instances, page_response_instances = [], [], [] + + with MockServer(EchoResource) as server: + + class SamplePage(WebPage): + def to_item(self): + page_instances.append(self) + page_response_instances.append(self.response) + if retries.popleft(): + raise Retry("foo") + return {"foo": "bar"} + + class TestSpider(BaseSpider): + def start_requests(self): + yield Request(server.root_url, callback=self.parse) + + def parse(self, response, page: SamplePage): + items.append(page.to_item()) + + crawler = make_crawler(TestSpider) + yield crawler.crawl() + + assert items == [{"foo": "bar"}] + assert crawler.stats.get_value("downloader/request_count") == 2 + assert crawler.stats.get_value("retry/count") == 1 + assert crawler.stats.get_value("retry/reason_count/foo") == 1 + assert crawler.stats.get_value("retry/max_reached") is None + _assert_all_unique_instances(page_instances) + _assert_all_unique_instances(page_response_instances) + + @inlineCallbacks def test_retry_max(): # The default value of the RETRY_TIMES Scrapy setting is 2. diff --git a/tox.ini b/tox.ini index 372a17ed..7ac907ed 100644 --- a/tox.ini +++ b/tox.ini @@ -94,7 +94,7 @@ commands = pre-commit run --all-files --show-diff-on-failure [testenv:twinecheck] basepython = python3 deps = - twine==4.0.2 + twine==5.0.0 build==0.10.0 commands = python -m build --sdist