-
Notifications
You must be signed in to change notification settings - Fork 9
/
gwordlist.py
executable file
·84 lines (76 loc) · 3.41 KB
/
gwordlist.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
#!/usr/bin/python
def freq_an(input,fx={}):
for elem in input:
if fx.has_key(elem):
fx[elem] +=1
else:
fx[elem] = 1
return fx
def scrape_links_and_wordlistify(links,lower=False,verbose=1):
import nltk
import requests
import string
raw = ''
wordlist = {}
for site in links:
try:
if verbose == 1:
print '[+] fetching data from: ',site
if site.find('http://pastebin.com/') == 0:
raw = requests.get(site.replace('http://pastebin.com/','http://pastebin.com/raw.php?i=')).content
else:
raw = requests.get(site).content
if lower == False:
#wordlist += list(set(nltk.clean_html(raw).replace('\r','').split()))
freq_an(list(set(nltk.clean_html(raw).replace('\r','').split())),wordlist)
else:
#wordlist += map(lambda x: string.lower(x),list(set(nltk.clean_html(raw).replace('\r','').split())))
freq_an(map(lambda x: string.lower(x),list(set(nltk.clean_html(raw).replace('\r','').split()))),wordlist)
except:
if verbose == 1:
print '[-] Skipping url: ',site
return wordlist
def google_wordlist(queries, results_per_query=5, lower=False, verbose=1):
from google import search
links = []
num = 0
for q in queries:
try:
links += [x for x in search(q,stop=results_per_query)][:results_per_query] #quick and dirty, i'd hit that :)
# for link in llist:
# links.append(link)
# num += 1
except:
if verbose == 1:
print '[-] google fails'
links = list(set(links))
return scrape_links_and_wordlistify(links,lower)
if __name__ == '__main__':
__program__ = 'mkwordlist'
__url__ = 'https://github.com/tkisason/gcrack'
__author__ = 'Tonimir Kisasondi <[email protected]>'
__copyright__ = 'Copyright (c) 2012-'
__license__ = 'GPLv3'
__version__ = '0.6'
import argparse
import operator
desc = 'Generate custom wordlists based on google queries and screen scrapes of N top links returned by google queries for each keyword'
print '\n'+__program__+' '+__version__+' by '+__author__+'\n'+__copyright__+' Distributed under '+ __license__+''
parser = argparse.ArgumentParser(description=desc)
parser.add_argument('KEYWORDS_FILE', help='Load keywords from KEYWORDS_FILE, one line == one search query')
parser.add_argument('OUTPUT_FILE', help='Your wordlist will be saved/appended to OUTPUT_FILE')
parser.add_argument('-l','--lowercase', help='Make sure all capitals are LOWERCASE, useful if you will use rules to mutate your wordlist',action='store_true')
parser.add_argument('-n','--number', help='Use NUMBER of top google links for scraping instead of default 5 ',type=int, default=5)
args=parser.parse_args()
keywords = open(args.KEYWORDS_FILE,'r').read().strip().split("\n")
print '[+] Googling for keywords'
owl = google_wordlist(keywords,int(args.number),args.lowercase)
print '[+] Sorting wordlist according to word probability'
sorted_wl = sorted(owl.iteritems(), key=operator.itemgetter(1),reverse=True)
wl = open(args.OUTPUT_FILE,'a+')
print '[+] Writing wordlist to :',args.OUTPUT_FILE
for line in sorted_wl:
wl.write(line[0]+'\n')
wl.flush()
wl.close()
print '[+] Done!'