forked from DesertBot/DesertBot
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathSlurp.py
88 lines (68 loc) · 2.84 KB
/
Slurp.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
"""
Created on Aug 31, 2015
@author: StarlitGhost
"""
from twisted.plugin import IPlugin
from desertbot.moduleinterface import IModule
from desertbot.modules.commandinterface import BotCommand
from zope.interface import implementer
from html import unescape
import re
from desertbot.message import IRCMessage
from desertbot.response import IRCResponse, ResponseType
from bs4 import BeautifulSoup
@implementer(IPlugin, IModule)
class Slurp(BotCommand):
def triggers(self):
return ['slurp']
def help(self, query):
return ("slurp <attribute> <url> <css selector>"
" - scrapes the given attribute from the tag selected "
"at the given url")
def execute(self, message: IRCMessage):
if len(message.parameterList) < 3:
return IRCResponse(ResponseType.Say,
"Not enough parameters, usage: {}".format(self.help(None)),
message.replyTo)
prop, url, selector = (message.parameterList[0],
message.parameterList[1],
" ".join(message.parameterList[2:]))
if not re.match(r'^\w+://', url):
url = "http://{}".format(url)
if 'slurp' in message.metadata and url in message.metadata['slurp']:
soup = message.metadata['slurp'][url]
else:
response = self.bot.moduleHandler.runActionUntilValue('fetch-url', url)
if not response:
return IRCResponse(ResponseType.Say,
"Problem fetching {}".format(url),
message.replyTo)
soup = BeautifulSoup(response.content, 'lxml')
tag = soup.select_one(selector)
if tag is None:
return IRCResponse(ResponseType.Say,
"'{}' does not select a tag at {}".format(selector, url),
message.replyTo)
specials = {
'tagname': tag.name,
'text': tag.text
}
if prop in specials:
value = specials[prop]
elif prop in tag.attrs:
value = tag[prop]
else:
attrMissing = ("The tag selected by '{}' ({}) does not have attribute '{}'"
.format(selector, tag.name, prop))
return IRCResponse(ResponseType.Say, attrMissing, message.replyTo)
if not isinstance(value, str):
value = " ".join(value)
# sanitize the value
value = value.strip()
value = re.sub(r'[\r\n]+', ' ', value)
value = re.sub(r'\s+', ' ', value)
value = unescape(value)
return IRCResponse(ResponseType.Say, value, message.replyTo,
extraVars={'slurpURL': url},
metadata={'slurp': {url: soup}})
slurp = Slurp()