From 39772438b8a4c13e78cee7564c9065944b79455b Mon Sep 17 00:00:00 2001 From: Max Varlamov Date: Wed, 26 Jul 2023 14:54:57 +0300 Subject: [PATCH] fix: determine response type only by action type --- scrapypuppeteer/middleware.py | 14 +++++++------- setup.py | 6 +++++- 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/scrapypuppeteer/middleware.py b/scrapypuppeteer/middleware.py index c8adc27..9235f09 100644 --- a/scrapypuppeteer/middleware.py +++ b/scrapypuppeteer/middleware.py @@ -2,15 +2,15 @@ import logging from collections import defaultdict from typing import List, Union -from urllib.parse import urljoin, urlencode +from urllib.parse import urlencode, urljoin from scrapy import Request, signals from scrapy.crawler import Crawler from scrapy.exceptions import IgnoreRequest, NotConfigured from scrapy.http import Headers, TextResponse -from scrapypuppeteer import PuppeteerRequest, PuppeteerHtmlResponse, PuppeteerResponse -from scrapypuppeteer.actions import Screenshot, RecaptchaSolver, Click +from scrapypuppeteer import PuppeteerHtmlResponse, PuppeteerRequest, PuppeteerResponse +from scrapypuppeteer.actions import Click, GoBack, GoForward, GoTo, RecaptchaSolver, Screenshot, Scroll from scrapypuppeteer.response import PuppeteerJsonResponse, PuppeteerScreenshotResponse @@ -136,7 +136,7 @@ def process_response(self, request, response, spider): context_id = response_data.pop('contextId', None) page_id = response_data.pop('pageId', None) - response_cls = self._get_response_class(puppeteer_request.action, response_data) + response_cls = self._get_response_class(puppeteer_request.action) response = response_cls( url=puppeteer_request.url, puppeteer_request=puppeteer_request, @@ -150,10 +150,10 @@ def process_response(self, request, response, spider): return response @staticmethod - def _get_response_class(request_action, response_data): - if 'html' in response_data and 'recaptcha_data' not in response_data: + def _get_response_class(request_action): + if isinstance(request_action, (GoTo, GoForward, GoBack, Click, Scroll)): return PuppeteerHtmlResponse - if 'screenshot' in response_data and isinstance(request_action, Screenshot): + if isinstance(request_action, Screenshot): return PuppeteerScreenshotResponse return PuppeteerJsonResponse diff --git a/setup.py b/setup.py index e62f38e..2870f7b 100644 --- a/setup.py +++ b/setup.py @@ -7,7 +7,7 @@ setup( name='scrapy-puppeteer-client', - version='0.1.0', + version='0.1.1', description='A library to use Puppeteer-managed browser in Scrapy spiders', long_description=long_description, long_description_content_type="text/markdown", @@ -25,6 +25,10 @@ 'Programming Language :: Python :: 3.5', 'Programming Language :: Python :: 3.6', 'Programming Language :: Python :: 3.7', + 'Programming Language :: Python :: 3.8', + 'Programming Language :: Python :: 3.9', + 'Programming Language :: Python :: 3.10', + 'Programming Language :: Python :: 3.11', 'Framework :: Scrapy', 'Intended Audience :: Developers', 'Operating System :: OS Independent',