This repository has been archived by the owner on Apr 2, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsearchDCCon.py
60 lines (56 loc) · 1.84 KB
/
searchDCCon.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
import json
from bs4 import BeautifulSoup
import requests
import re
import base64
from multiprocessing.dummy import Pool as ThreadPool
def get_base64_thumbnail(url):
headers = {'referer': 'https://dccon.dcinside.com/'}
img_data = requests.request('GET',url,headers=headers)
return base64.b64encode(img_data.content).decode('utf-8')
def get_search_result(query_text, offset=0):
def get_package(dccon):
num = dccon.get('package_idx')
thumbnail_url = dccon.find('img','thumb_img').get('src')
thumbnail = get_base64_thumbnail(thumbnail_url)
name = dccon.find('strong','dcon_name').get_text()
seller = dccon.find('span','dcon_seller').get_text()
return {
'num': num,
'thumbnail': thumbnail,
'name': name,
'seller': seller,
}
page = 1 + offset
res = requests.get('https://dccon.dcinside.com/hot/%d/title/%s' % (page, query_text) )
bs = BeautifulSoup(res.text, 'html.parser')
dccons = bs.findAll('li','div_package')
pool = ThreadPool(4)
search_result = pool.map(get_package,dccons)
return search_result
def get_all_search_result(query_text):
result = []
offset = 0
while True:
data = get_search_result(query_text, offset)
if data == []:
break
result.extend(data)
offset += 1
return result
def lambda_handler(event, context):
query_text = event['query_text']
try:
if 'offset' in event:
data = get_search_result(query_text, int(event['offset']))
else:
data = get_all_search_result(query_text)
except ValueError:
return {
'statusCode': 400,
'body': f'Bad parameters: {json.dumps(event)}'
}
return {
'statusCode': 200,
'body': json.dumps(data, ensure_ascii=False)
}