-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfunctions.py
150 lines (121 loc) · 4.39 KB
/
functions.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
import re
import requests
from bs4 import BeautifulSoup
def firstColumn():
def nested(ScrappedPageStruct):
return ScrappedPageStruct.url
return nested
def calc(column):
def nested(ScrappedPageStruct):
content_bs = BeautifulSoup(ScrappedPageStruct.content, 'html.parser').prettify()
return content_bs.count(column)
return nested
def getUrlLenght():
def nested(ScrappedPageStruct):
return len(ScrappedPageStruct.url)
return nested
def urlLenghtStatus():
def nested(ScrappedPageStruct):
lenght = getUrlLenght()(ScrappedPageStruct)
status = ''
if lenght < 54: status = 'benign'
elif lenght >= 54 and lenght <= 75: status = 'suspicious'
else: status = 'malicious'
return status
return nested
def ipAddress():
def nested(ScrappedPageStruct):
domain = ScrappedPageStruct.domain
ip=list(range(1,257))
cds=domain.count(".")
if cds == 3:
ip1= re.split(r"\.", domain, 1)
ip2= re.split(r"\.", ip1[1], 1)
ip3= re.split(r"\.", ip2[1], 1)
try:
if int(ip1[0]) in ip:
if int(ip2[0]) in ip:
if int(ip3[0]) in ip:
if int(ip3[1]) in ip:
ipAddress="found"
ipStatus="malicious"
else:
ipAddress = "not found"
ipStatus = "benign"
else:
ipAddress = "not found"
ipStatus = "benign"
else:
ipAddress = "not found"
ipStatus = "benign"
else:
ipAddress = "not found"
ipStatus = "benign"
except:
ipAddress = "not found"
ipStatus = "benign"
else:
ipAddress = "not found"
ipStatus = "benign"
return "%s. Url is %s"%(ipAddress, ipStatus)
return nested
def specialCharacterExist(characters):
def nested(ScrappedPageStruct):
return 'exist' if countSpecialCharacterInUrl(characters)(ScrappedPageStruct) > 0 else 'not exist'
return nested
def countSpecialCharacterInUrl(characters):
def nested(ScrappedPageStruct):
total = 0
for character in characters:
total+= ScrappedPageStruct.url.count(character)
return total
return nested
def commentStyle():
def nested(ScrappedPageStruct):
content_bs = BeautifulSoup(ScrappedPageStruct.content, 'html.parser').prettify()
if ScrappedPageStruct.status_code == 200:
match_cs = re.search(r'//-->', content_bs)
if match_cs:
commentStatus="malicious"
else:
commentStatus = "benign"
return commentStatus
return nested
def IPExistInUrl():
def nested(ScrappedPageStruct):
status = ''
if (re.search(r'http://\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}/.*', ScrappedPageStruct.url)):
status = 'malicious'
else:
status = 'benign'
return status
return nested;
def existenceSubDomains():
def nested(ScrappedPageStruct):
status = 'benign'
if ScrappedPageStruct.url.count(".") >= 3: status = 'malicious'
return status
return nested;
def AliasSymbolExistInUrl():
def nested(ScrappedPageStruct):
status = 'legitimate'
if ScrappedPageStruct.url.count(".") >= 1: status = 'phishing'
return status
return nested;
def getDomainAge(url):
request = requests.get("https://input.payapi.io/v1/api/fraud/domain/age/" + url)
if hasattr(request, 'result'): return request.result
else: return None
def AgeOfDomainStatus():
def nested(ScrappedPageStruct):
status = 'phishing'
age = getDomainAge(ScrappedPageStruct.domain)
if age != None and age > 365: status = 'legitimate'
return status
return nested;
def urlRedirectedUsing(char):
def nested(ScrappedPageStruct):
firstOccurance = ScrappedPageStruct.url.find(char)
secondOccurance = ScrappedPageStruct.url.find(char, firstOccurance+1)
return 'Phishing' if (secondOccurance > 7) else 'Legitimate'
return nested;