Skip to content

Commit

Permalink
Logging errors from service (#29)
Browse files Browse the repository at this point in the history
* added fail reason to logging in the service middleware

* Formatting and import fix

* Print -> self.log

* Now logging level is warning
  • Loading branch information
MatthewZMSU authored Jul 1, 2024
1 parent ec807dc commit 9bd72a8
Show file tree
Hide file tree
Showing 3 changed files with 20 additions and 14 deletions.
14 changes: 7 additions & 7 deletions examples/spiders/auto_recaptcha.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import base64

import logging
import scrapy
import base64
from twisted.python.failure import Failure

from scrapypuppeteer import PuppeteerRequest
Expand Down Expand Up @@ -37,7 +37,7 @@ def start_requests(self):
)

def parse_html(self, response: PuppeteerResponse, **kwargs):
with open(f"recaptcha_page.html", "wb") as f:
with open("recaptcha_page.html", "wb") as f:
f.write(response.body)
action = Screenshot(
options={
Expand All @@ -48,13 +48,13 @@ def parse_html(self, response: PuppeteerResponse, **kwargs):
action, callback=self.make_screenshot, errback=self.error, close_page=True
)

def make_screenshot(self, response: PuppeteerScreenshotResponse, **kwargs):
@staticmethod
def make_screenshot(response: PuppeteerScreenshotResponse, **kwargs):
data = (
response.screenshot
) # Note that data is string containing bytes, don't forget to decode them!
with open("imageToSave.png", "wb") as fh:
fh.write(base64.b64decode(data))

@staticmethod
def error(failure: Failure):
print(f"We are in error function!")
def error(self, failure: Failure):
self.log("We are in error function!", level=logging.WARNING)
14 changes: 7 additions & 7 deletions examples/spiders/manual_recaptcha.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
import logging
import scrapy
import base64
from twisted.python.failure import Failure

from scrapypuppeteer import PuppeteerRequest
from scrapypuppeteer.actions import GoTo, RecaptchaSolver, Click, Screenshot
from scrapypuppeteer.response import PuppeteerResponse, PuppeteerScreenshotResponse

import base64


class ManualRecaptchaSpider(scrapy.Spider):
name = "manual_recaptcha"
Expand Down Expand Up @@ -42,7 +42,7 @@ def submit_recaptcha(self, response, **kwargs):
)

def parse_html(self, response: PuppeteerResponse, **kwargs):
with open(f"recaptcha_page.html", "wb") as f:
with open("recaptcha_page.html", "wb") as f:
f.write(response.body)
action = Screenshot(
options={
Expand All @@ -53,13 +53,13 @@ def parse_html(self, response: PuppeteerResponse, **kwargs):
action, callback=self.make_screenshot, errback=self.error, close_page=True
)

def make_screenshot(self, response: PuppeteerScreenshotResponse, **kwargs):
@staticmethod
def make_screenshot(response: PuppeteerScreenshotResponse, **kwargs):
data = (
response.screenshot
) # Note that data is string containing bytes, don't forget to decode them!
with open("imageToSave.png", "wb") as fh:
fh.write(base64.b64decode(data))

@staticmethod
def error(failure: Failure):
print(f"We are in error function!")
def error(self, failure: Failure):
self.log("We are in error function!", level=logging.WARNING)
6 changes: 6 additions & 0 deletions scrapypuppeteer/middleware.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,8 @@ class PuppeteerServiceDownloaderMiddleware:
SERVICE_META_SETTING = "PUPPETEER_INCLUDE_META"
DEFAULT_INCLUDE_HEADERS = ["Cookie"] # TODO send them separately

service_logger = logging.getLogger(__name__)

def __init__(
self,
crawler: Crawler,
Expand Down Expand Up @@ -177,6 +179,10 @@ def process_response(self, request, response, spider):
response_cls = self._get_response_class(puppeteer_request.action)

if response.status != 200:
reason = response_data.pop("error", f"undefined, status {response.status}")
self.service_logger.warning(
f"Request {request} is not succeeded. Reason: {reason}"
)
context_id = response_data.get("contextId")
if context_id:
self.used_contexts[id(spider)].add(context_id)
Expand Down

0 comments on commit 9bd72a8

Please sign in to comment.