This repository has been archived by the owner on Jan 22, 2018. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
magic.py
93 lines (76 loc) · 4.53 KB
/
magic.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
#!/usr/bin/python
from lxml import html
import mysql.connector as mariadb
import sys
ListIP = "select ip, count(1) from `dns` group by 1 order by 2"
DataByIP = "SELECT hour, min, SUBSTRING(domain, LOCATE('.', domain) + 1, LENGTH(domain) - LOCATE('.', domain)) from `dns` where ip='%s' group by hour, min, SUBSTRING(domain, LOCATE('.', domain) + 1, LENGTH(domain) - LOCATE('.', domain)) order by hour, min, sec, msec"
#Grab IP addresses pertaining to different time slots.
DataMorning = "select distinct ip from `new_data_5am_to_8am` where SUBSTRING_INDEX(new_domain, '.', -1) not in ('edu') and domain not regexp 'montana|spam|aka|akamai' and hour = 6"
DataEvening = "select distinct ip from `new_data_5pm_to_5am` where SUBSTRING_INDEX(new_domain, '.', -1) not in ('edu') and domain not regexp 'montana|spam|aka|akamai' and hour = 21"
DataWorking = "select distinct ip from `new_data_8am_to_5pm` where SUBSTRING_INDEX(new_domain, '.', -1) not in ('edu') and domain not regexp 'montana|spam|aka|akamai' and hour = 12"
#Grab visited domain names by each IP address
DataMorningByIP = "SELECT hour, min, new_domain from `new_data_5am_to_8am` where ip='%s' and SUBSTRING_INDEX(new_domain, '.', -1) not in ('edu') and domain not regexp 'montana|spam|aka|akamai' and hour = 6 group by min, new_domain order by min, sec, msec"
DataEveningByIP = "SELECT hour, min, new_domain from `new_data_5pm_to_5am` where ip='%s' and SUBSTRING_INDEX(new_domain, '.', -1) not in ('edu') and domain not regexp 'montana|spam|aka|akamai' and hour = 21 group by min, new_domain order by min, sec, msec"
DataWorkingByIP = "SELECT hour, min, new_domain from `new_data_8am_to_5pm` where ip='%s' and SUBSTRING_INDEX(new_domain, '.', -1) not in ('edu') and domain not regexp 'montana|spam|aka|akamai' and hour = 12 group by min, new_domain order by min, sec, msec"
def RunQuery(cnx, query):
cursor = cnx.cursor()
cursor.execute(query)
result = [x for x in cursor]
cursor.close()
return result
def main(daterbase):
cnx = mariadb.connect()
try:
cnx = mariadb.connect(
user='', #username to connect with the SQL database
password='', #password to connect with the SQL database
host='', #Hostname to connect with the SQL database
database='') #Database name to connect with the SQL database
except mariadb.Error as error:
print("Error: {}".format(error))
FinalList = []
ResultDict = {}
print(daterbase)
if daterbase == 'DataMorning': #Database containing data from morning hours (5AM to 8AM).
ans = RunQuery(cnx, DataMorning)
elif daterbase == 'DataEvening': #Database containing data from evening hours (8AM to 5PM).
ans = RunQuery(cnx, DataEvening)
elif daterbase == 'DataWorking': #Database containing data from afternoon hours (5PM to 5AM).
ans = RunQuery(cnx, DataWorking)
else:
ans = RunQuery(cnx, ListIP)
IPlist = [str(x[0]) for x in ans] # string conversion of first element of tuple of returned list
print("Got IP List")
for y in IPlist:
if IPlist.index(y)%100 == 0: print(IPlist.index(y))
if daterbase == 'DataMorning':
res = RunQuery(cnx, DataMorningByIP % y)
elif daterbase == 'DataEvening':
res = RunQuery(cnx, DataEveningByIP % y)
elif daterbase == 'DataWorking':
res = RunQuery(cnx, DataWorkingByIP % y)
else:
res = RunQuery(cnx, DataByIP % y)
fin = [(x[0], x[1], x[2].encode('raw_unicode_escape', 'ignore').decode("cp1252")) for x in res]
FinalList.append(fin)
print("Got Final List\n")
for z in FinalList:
for i in range(0,len(z)-3):
#if z[i][2] != z[i+1][2] and z[i+1][2] != z[i+2][2] and z[i+2][2] != z[i+3][2]:
if z[i][2] != z[i+1][2] and z[i+1][2] != z[i+2][2]:
#if z[i][2] != z[i+1][2]:
#key = (z[i][2], z[i+1][2], z[i+2][2], z[i+3][2])
key = (z[i][2], z[i+1][2], z[i+2][2])
#key = (z[i][2], z[i+1][2])
if key in ResultDict:
ResultDict[key] += 1
else:
ResultDict[key] = 1
print ''
for key, value in ResultDict.iteritems():
if value > 1:
#print "%s,%s,%s,%s,%d" % (key[0].strip(), key[1].strip(), key[2].strip(), key[3].strip(), value)
print "%s,%s,%s,%d" % (key[0].strip(), key[1].strip(), key[2].strip(), value)
#print "%s,%s,%d" % (key[0].strip(), key[1].strip(), value)
if __name__ == '__main__':
main(sys.argv[1])