-
Notifications
You must be signed in to change notification settings - Fork 1
/
google.py
executable file
·122 lines (93 loc) · 3 KB
/
google.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
#!/usr/bin/env python
"""
# Google Around From Within Vim
`g my search keywords [@site]`
# --site keywords
{known_sites}
The keyword can be anywhere
# Examples
g cnn news germany
g @gh vpe
Will search google and display all URLs from the first page, excluding the non interesting
google internal ones.
💡 Select all links and open the browser on them using `,g`, smart open (see below).
# CLI Example
```
[gk@axgk ~]$ vpe g vim python evaluation
https: // vim.fandom.com/wiki/Evaluate_current_line_using_Python
https: // vim.fandom.com
(...)
```
"""
from share import notify, vim, wrap_text_result
import sys
import base64
import uuid
import requests
import urllib
import re
USER_AGENT = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36'
rm_from_res = ['.google', '.w3.', '.sec.', '.gstatic', 'schema.org']
rm_if_endswith = ['.svg', '.png', '&', '.xml', '.jpg', 'jpeg', '/stack_overflow_']
known_sites = {'@gh': 'github.com', '@so': 'stackoverflow.com'}
def try_help():
h = __doc__.format(known_sites=known_sites)
return h
def metas(line):
line = line + ' '
s, e = [], []
for k, v in known_sites.items():
if k in line:
s.append(v)
line = line.replace(k, '')
return line.strip(), s, e
def links(txt):
urls = '(http|ftp|https):\/\/([\w_-]+(?:(?:\.[\w_-]+)+))([\w.,@?^=%&:\/~+#-]*[\w@?^=%&\/~+#-])'
a = re.findall(urls, txt)
def filt(i):
url = f'{i[0]}://{"".join(i[1:])}'
ul = url.lower()
for k in rm_from_res:
if k in ul:
return
for k in rm_if_endswith:
if ul.endswith(k):
return
return url
rd, r = [filt(i) for i in a], []
[r.append(l) for l in rd if l and l not in r]
return r
# r = [l for l in r if l]
# r = sorted(list(set(r)))
# r = list(set(r))
# return r
def from_picker(word):
res = try_load(word)
return res
def try_load(line='vpe', **kw):
"""s the content of a swagger definition file"""
line, sites, exclude = metas(line)
q = urllib.parse.quote_plus(line)
q += '+OR'.join('+site:' + urllib.parse.quote_plus(site) for site in sites)
q += ''.join('+-site:' + urllib.parse.quote_plus(e) for e in exclude)
sei = base64.encodebytes(uuid.uuid4().bytes)
sei = sei.decode('ascii').rstrip('=\n').replace('/', '_')
# 'gbv': '1', # 1: js / 2: no javascript
url = f'https://www.google.com/search?ie=UTF-8&oe=UTF-8&q={q}&sei={sei}'
n = {
'Accept': 'text/html',
'Accept-Encoding': 'gzip',
'User-Agent': USER_AGENT,
'Cookie': '',
'Connection': 'keep-alive',
'DNT': '1',
}
t = requests.get(url, headers=n).text
l = links(t)
s = '\n'.join(l).strip()
s = wrap_text_result(s, 'google', line)
return {'lines': s, ':here': True, 'list': l}
if __name__ == '__main__':
res = try_load(line=' '.join(sys.argv[1:]))
for k in res['list']:
print(k)