-
Notifications
You must be signed in to change notification settings - Fork 0
/
parser.rb
119 lines (101 loc) · 3.11 KB
/
parser.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
#!/usr/bin/env ruby
require 'nokogiri'
require 'uri'
f = File.open("dump.xml")
provider_all = File.open("dump-all.provider","w")
@provider_urls = File.open("urls.provider","w")
@provider_domains = File.open("domains.provider","w")
@provider_ip = File.open("ip.provider","w")
@provider_http = File.open("http.provider","w")
@provider_https = File.open("https.provider","w")
def encode_urls (u)
#@provider_urls.puts URI::encode(u)
@provider_urls.puts u
# @provider_urls.puts URI.unescape(u)
end
def line_domain(d)
if d["/"]
then
d["/"] = ""
#www_domain(d)
@provider_domains.puts d
else
#www_domain(d)
@provider_domains.puts d
end
end
def www_domain(d)
if d["www."]
then
d["www."] = ""
@provider_domains.puts d
else
@provider_domains.puts d
end
end
def provider(urls)
i = 1
k = urls.index(/\//).to_i
if k != 0 then
t = k + 1
#url = URI.unescape(urls)
encode_urls(urls) if urls[t] != nil
# @provider_domains.puts "#{www_domain(urls)}" if urls[t] == nil
line_domain(urls) if urls[t] == nil
i = i + 1
else
line_domain(urls)
end
end
def provider_http(url)
i = 1
k = url.index(/\//).to_i
if k != 0 then
t = k + 1
@provider_urls.puts "http://#{url}" if url[t] != nil
@provider_domains.puts "http://#{url}" if url[t] == nil
i = i + 1
else
@provider_domains.puts "http://#{url}"
end
end
def provider_https(url)
i = 1
k = url.index(/\//).to_i
if k != 0 then
t = k + 1
@provider_urls.puts "https://#{url}" if url[t] != nil
@provider_domains.puts "https://#{url}" if url[t] == nil
i = i + 1
else
@provider_domains.puts "https://#{url}"
end
end
doc = Nokogiri::XML(f) do |config|
config.strict.nonet
end
ips = Array.new
doc.search('ip').each do |link|
ips.concat link.content.split(",")
end
ips.uniq!
ips.each {|ip| @provider_ip.puts ip}
doc.search('url').each do |link|
links = link.content.gsub(",http","\nhttp").split("\n")
links.each do |mas|
case
when mas["http://"]
mas["http://"] = ""
provider(mas)
@provider_http.puts mas
when mas["https://"]
mas["https://"] = ""
provider(mas)
@provider_https.puts mas
else
provider(mas)
end
end
end
provider_all.puts doc
f.close