-
Notifications
You must be signed in to change notification settings - Fork 0
/
5_actions.py
68 lines (64 loc) · 2.68 KB
/
5_actions.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
from scrapy.http import HtmlResponse
from scrapy.loader import ItemLoader
import json, scrapy, custom_settings_config, os
from base64 import b64decode, decodebytes
import datetime
class AmazonbooksSpider(scrapy.Spider):
name = 'amazonbooks'
allowed_domains = ['amazon.co.uk']
custom_settings = custom_settings_config.custom_settings
def start_requests(self):
yield scrapy.Request(
"https://www.amazon.co.uk/dp/178685807X",
meta = {
"zyte_api": {
"screenshot": True,
"browserHtml": True,
"actions": [
{
"action": "click",
"selector": {
"type": "css",
"value": ".cip-a-size-small"
},
"delay": 0,
"button": "left",
"onError": "return"
},
{
"action": "type",
"selector": {
"type": "css",
"value": "#GLUXZipUpdateInput"
},
"delay": 0,
"onError": "return",
"text": "NW1 5LJ", # BT23 4AA
},
{
"action": "click",
"selector": {
"type": "css",
"value": ".a-button-input"
},
"delay": 0,
"button": "left",
"onError": "return"
}
],
}
}
)
def parse(self, response):
screenshot: bytes = b64decode(response.raw_api_response["screenshot"]) # decode base64 response
with open("../amazon_in/output_5.jpg", "wb") as fh:
fh.write(screenshot) # write bytes to the output.jpg file
yield{
"book_url": response.url,
'author': response.css('#bylineInfo a.a-link-normal::text').getall()[2:],
'book_title': response.css('#productTitle::text').get(),
'price': response.css('.a-color-price::text').get(),
'cover': response.css('#main-image::attr(src)').get(),
'delivery': response.css(
'#mir-layout-DELIVERY_BLOCK #mir-layout-DELIVERY_BLOCK-slot-SECONDARY_DELIVERY_MESSAGE_LARGE .a-text-bold::text').get(),
}