-
Notifications
You must be signed in to change notification settings - Fork 1
/
Scraper.py
executable file
·144 lines (126 loc) · 4.87 KB
/
Scraper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText
import smtplib
import time
import requests
import json
from Utilities import BannedSellers, email_settings
from Book import Book
from Token import Token
from app import Search
class Scraper:
def __init__(self):
self.urls_sent = set()
self.Token = Token()
self.banned_sellers = '|'.join(BannedSellers)
self.books = []
self.time_emailed = time.time()
def run(self):
while True:
rows = Search.Search.query.all()
for row in rows:
self.check_books(row.book_id, row.max_price)
if time.time() - self.time_emailed > 180 and len(self.books) > 0:
self.send_email()
def check_books(self, book_id, max_price):
request_url = """https://api.ebay.com/buy/browse/v1/item_summary/search?q={book_id}&filter=price:[..{max_price}],\
priceCurrency:GBP,itemLocationCountry:GB,excludeSellers:{{ {banned_sellers} }}""".format(book_id=book_id, max_price=max_price, banned_sellers=self.banned_sellers)
headers = {
'Authorization': 'Bearer ' + self.Token.get_token()
}
try:
response = requests.get(url=request_url, headers=headers)
response_json = response.json()
print("request_url: " + request_url)
except:
print('Out of API calls: ' + str(time.time()))
print('Pausing 60 seconds')
time.sleep(60)
return
if response_json['total'] > 0:
items = response_json['itemSummaries']
for item in items:
try:
book_json = json.dumps(item, indent=4)
book_url = item['itemWebUrl']
if book_url not in self.urls_sent:
title = item['title']
price = json.dumps(item['price'], indent=4)
try:
shipping_information = json.dumps(item['shippingOptions'], indent=4)
except:
shipping_information = 'NOT FOUND'
book = Book(book_id, max_price, price, shipping_information, title, book_url, book_json)
self.urls_sent.add(book.url)
self.books.append(book)
except:
print('error with book: ' + str(item))
def send_email(self):
try:
server = smtplib.SMTP('smtp.gmail.com', 587)
server.starttls()
server.login(email_settings['from_mail'], email_settings['password_mail'])
except:
print('Email connection failed')
print('Pausing 600 seconds')
time.sleep(600)
return
msg = MIMEMultipart('mixed')
msg['Subject'] = 'Books found: ' + str(len(self.books))
msg['From'] = email_settings['from_mail']
msg['To'] = 'ebayalert123_subscriber'
for book in self.books:
html_mail = self.email_html(book)
msg.attach(MIMEText(html_mail, 'html'))
msg.attach(MIMEText(book.book_json, 'plain'))
try:
server.sendmail(email_settings['from_mail'], email_settings['to_mail'], msg.as_string())
for book in self.books:
print('Emailed Book: ' + book.url)
self.books = []
self.time_emailed = time.time()
server.quit()
except:
print('Email Failed to send')
print('Pausing 600 seconds')
time.sleep(600)
def email_html(self, book):
html_mail = """
<html>
<head>
<style>
table,
th,
td {{
padding: 10px;
border: 1px solid black;
border-collapse: collapse;
}}
</style>
</head>
<body>
<table>
<tr>
<th>Book ID</th>
<th>Title</th>
<th>Max Price (GBP)</th>
<th>Price</th>
<th>Shipping Information</th>
<th>URL</th>
</tr>
<tr>
<th>{book_id}</th>
<th>{title}</th>
<th>{max_price}</th>
<th>{price}</th>
<th>{shipping_information}</th>
<th>{url}</th>
</tr>
</table>
</body>
</html>
""".format(book_id=book.book_id, title=book.title, max_price=book.max_price, price=book.price,
shipping_information=book.shipping_information, url=book.url)
return html_mail
def reset_urls_sent(self):
self.urls_sent = set()