diff --git a/scrapypuppeteer/request.py b/scrapypuppeteer/request.py index 6c14739..95ae735 100644 --- a/scrapypuppeteer/request.py +++ b/scrapypuppeteer/request.py @@ -1,6 +1,6 @@ from typing import Union -from scrapy.http import Request, Response +from scrapy.http import Request from scrapypuppeteer.actions import GoTo, PuppeteerServiceAction @@ -15,7 +15,6 @@ def __init__(self, context_id: str = None, page_id: str = None, close_page: bool = True, - response: Response = None, **kwargs): """ @@ -27,14 +26,9 @@ def __init__(self, :param close_page: whether to close page after request completion; set to False, if you want to continue interacting with the page - :param response: a response which this request follows; if target page URL - can't be inferred from action, it is set to response.url :param kwargs: """ - if not action and 'url' in kwargs: - action = kwargs.pop('url') - elif 'url' in kwargs: - kwargs.pop('url') + url = kwargs.pop('url', None) if isinstance(action, str): url = action navigation_options = kwargs.pop('navigation_options', None) @@ -42,10 +36,9 @@ def __init__(self, action = GoTo(url, navigation_options=navigation_options, wait_options=wait_options) elif isinstance(action, GoTo): url = action.url - elif response is not None: - url = response.url - kwargs['dont_filter'] = True - else: + elif not isinstance(action, PuppeteerServiceAction): + raise ValueError('Undefined browser action') + if url is None: raise ValueError('Request is not a goto-request and does not follow a response') super().__init__(url, **kwargs) self.action = action diff --git a/scrapypuppeteer/response.py b/scrapypuppeteer/response.py index e20b743..942b2d0 100644 --- a/scrapypuppeteer/response.py +++ b/scrapypuppeteer/response.py @@ -1,5 +1,4 @@ from typing import Union -from urllib.parse import urljoin from scrapy.http import Response, TextResponse @@ -33,11 +32,14 @@ def follow(self, """ page_id = None if self.puppeteer_request.close_page else self.page_id if isinstance(action, str): - action = urljoin(self.url, action) + action = self.urljoin(action) elif isinstance(action, GoTo): - action.url = urljoin(self.url, action.url) + action.url = self.urljoin(action.url) + else: + kwargs['url'] = self.url + kwargs['dont_filter'] = True return PuppeteerRequest(action, context_id=self.context_id, page_id=page_id, - close_page=close_page, response=self, **kwargs) + close_page=close_page, **kwargs) class PuppeteerHtmlResponse(PuppeteerResponse, TextResponse): @@ -45,6 +47,7 @@ class PuppeteerHtmlResponse(PuppeteerResponse, TextResponse): scrapy.TextResponse capturing state of a page in browser. Additionally exposes received html and cookies via corresponding attributes. """ + def __init__(self, url, puppeteer_request, context_id, page_id, **kwargs): self.html = kwargs.pop('html') self.cookies = kwargs.pop('cookies') @@ -58,6 +61,7 @@ class PuppeteerJsonResponse(PuppeteerResponse): Response for CustomJsAction. Result is available via self.data object. """ + def __init__(self, url, puppeteer_request, context_id, page_id, **kwargs): self.data = kwargs super().__init__(url, puppeteer_request, context_id, page_id) @@ -68,6 +72,7 @@ class PuppeteerScreenshotResponse(PuppeteerResponse): Response for Screenshot action. Screenshot is available via self.screenshot as base64 encoded string. """ + def __init__(self, url, puppeteer_request, context_id, page_id, **kwargs): self.screenshot = kwargs.get('screenshot') super().__init__(url, puppeteer_request, context_id, page_id, **kwargs) diff --git a/setup.py b/setup.py index e892cb2..8830d67 100644 --- a/setup.py +++ b/setup.py @@ -7,7 +7,7 @@ setup( name='scrapy-puppeteer-client', - version='0.0.4', + version='0.0.5', description='A library to use Puppeteer-managed browser in Scrapy spiders', long_description=long_description, long_description_content_type="text/markdown",