Skip to content

Commit

Permalink
fix: determine response type only by action type
Browse files Browse the repository at this point in the history
  • Loading branch information
mxsnq committed Jul 26, 2023
1 parent 995e960 commit 3977243
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 8 deletions.
14 changes: 7 additions & 7 deletions scrapypuppeteer/middleware.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,15 @@
import logging
from collections import defaultdict
from typing import List, Union
from urllib.parse import urljoin, urlencode
from urllib.parse import urlencode, urljoin

from scrapy import Request, signals
from scrapy.crawler import Crawler
from scrapy.exceptions import IgnoreRequest, NotConfigured
from scrapy.http import Headers, TextResponse

from scrapypuppeteer import PuppeteerRequest, PuppeteerHtmlResponse, PuppeteerResponse
from scrapypuppeteer.actions import Screenshot, RecaptchaSolver, Click
from scrapypuppeteer import PuppeteerHtmlResponse, PuppeteerRequest, PuppeteerResponse
from scrapypuppeteer.actions import Click, GoBack, GoForward, GoTo, RecaptchaSolver, Screenshot, Scroll
from scrapypuppeteer.response import PuppeteerJsonResponse, PuppeteerScreenshotResponse


Expand Down Expand Up @@ -136,7 +136,7 @@ def process_response(self, request, response, spider):
context_id = response_data.pop('contextId', None)
page_id = response_data.pop('pageId', None)

response_cls = self._get_response_class(puppeteer_request.action, response_data)
response_cls = self._get_response_class(puppeteer_request.action)
response = response_cls(
url=puppeteer_request.url,
puppeteer_request=puppeteer_request,
Expand All @@ -150,10 +150,10 @@ def process_response(self, request, response, spider):
return response

@staticmethod
def _get_response_class(request_action, response_data):
if 'html' in response_data and 'recaptcha_data' not in response_data:
def _get_response_class(request_action):
if isinstance(request_action, (GoTo, GoForward, GoBack, Click, Scroll)):
return PuppeteerHtmlResponse
if 'screenshot' in response_data and isinstance(request_action, Screenshot):
if isinstance(request_action, Screenshot):
return PuppeteerScreenshotResponse
return PuppeteerJsonResponse

Expand Down
6 changes: 5 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

setup(
name='scrapy-puppeteer-client',
version='0.1.0',
version='0.1.1',
description='A library to use Puppeteer-managed browser in Scrapy spiders',
long_description=long_description,
long_description_content_type="text/markdown",
Expand All @@ -25,6 +25,10 @@
'Programming Language :: Python :: 3.5',
'Programming Language :: Python :: 3.6',
'Programming Language :: Python :: 3.7',
'Programming Language :: Python :: 3.8',
'Programming Language :: Python :: 3.9',
'Programming Language :: Python :: 3.10',
'Programming Language :: Python :: 3.11',
'Framework :: Scrapy',
'Intended Audience :: Developers',
'Operating System :: OS Independent',
Expand Down

0 comments on commit 3977243

Please sign in to comment.