Skip to content

Commit

Permalink
Merge pull request #9 from ispras/follow
Browse files Browse the repository at this point in the history
Move response out of request's init
  • Loading branch information
mxsnq authored Nov 13, 2020
2 parents a14aac1 + 9f268b6 commit 8a5a1b8
Show file tree
Hide file tree
Showing 3 changed files with 15 additions and 17 deletions.
17 changes: 5 additions & 12 deletions scrapypuppeteer/request.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from typing import Union

from scrapy.http import Request, Response
from scrapy.http import Request

from scrapypuppeteer.actions import GoTo, PuppeteerServiceAction

Expand All @@ -15,7 +15,6 @@ def __init__(self,
context_id: str = None,
page_id: str = None,
close_page: bool = True,
response: Response = None,
**kwargs):
"""
Expand All @@ -27,25 +26,19 @@ def __init__(self,
:param close_page: whether to close page after request completion;
set to False, if you want to continue interacting
with the page
:param response: a response which this request follows; if target page URL
can't be inferred from action, it is set to response.url
:param kwargs:
"""
if not action and 'url' in kwargs:
action = kwargs.pop('url')
elif 'url' in kwargs:
kwargs.pop('url')
url = kwargs.pop('url', None)
if isinstance(action, str):
url = action
navigation_options = kwargs.pop('navigation_options', None)
wait_options = kwargs.pop('wait_options', None)
action = GoTo(url, navigation_options=navigation_options, wait_options=wait_options)
elif isinstance(action, GoTo):
url = action.url
elif response is not None:
url = response.url
kwargs['dont_filter'] = True
else:
elif not isinstance(action, PuppeteerServiceAction):
raise ValueError('Undefined browser action')
if url is None:
raise ValueError('Request is not a goto-request and does not follow a response')
super().__init__(url, **kwargs)
self.action = action
Expand Down
13 changes: 9 additions & 4 deletions scrapypuppeteer/response.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
from typing import Union
from urllib.parse import urljoin

from scrapy.http import Response, TextResponse

Expand Down Expand Up @@ -33,18 +32,22 @@ def follow(self,
"""
page_id = None if self.puppeteer_request.close_page else self.page_id
if isinstance(action, str):
action = urljoin(self.url, action)
action = self.urljoin(action)
elif isinstance(action, GoTo):
action.url = urljoin(self.url, action.url)
action.url = self.urljoin(action.url)
else:
kwargs['url'] = self.url
kwargs['dont_filter'] = True
return PuppeteerRequest(action, context_id=self.context_id, page_id=page_id,
close_page=close_page, response=self, **kwargs)
close_page=close_page, **kwargs)


class PuppeteerHtmlResponse(PuppeteerResponse, TextResponse):
"""
scrapy.TextResponse capturing state of a page in browser.
Additionally exposes received html and cookies via corresponding attributes.
"""

def __init__(self, url, puppeteer_request, context_id, page_id, **kwargs):
self.html = kwargs.pop('html')
self.cookies = kwargs.pop('cookies')
Expand All @@ -58,6 +61,7 @@ class PuppeteerJsonResponse(PuppeteerResponse):
Response for CustomJsAction.
Result is available via self.data object.
"""

def __init__(self, url, puppeteer_request, context_id, page_id, **kwargs):
self.data = kwargs
super().__init__(url, puppeteer_request, context_id, page_id)
Expand All @@ -68,6 +72,7 @@ class PuppeteerScreenshotResponse(PuppeteerResponse):
Response for Screenshot action.
Screenshot is available via self.screenshot as base64 encoded string.
"""

def __init__(self, url, puppeteer_request, context_id, page_id, **kwargs):
self.screenshot = kwargs.get('screenshot')
super().__init__(url, puppeteer_request, context_id, page_id, **kwargs)
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

setup(
name='scrapy-puppeteer-client',
version='0.0.4',
version='0.0.5',
description='A library to use Puppeteer-managed browser in Scrapy spiders',
long_description=long_description,
long_description_content_type="text/markdown",
Expand Down

0 comments on commit 8a5a1b8

Please sign in to comment.