-
Notifications
You must be signed in to change notification settings - Fork 9
/
autoparser.rb
60 lines (47 loc) · 1.38 KB
/
autoparser.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
require 'open-uri'
require 'nokogiri'
require 'json'
class AutoParser
attr_reader :filename, :url, :results, :start_time,
:marks_data, :models
def initialize(args = {})
@filename = args[:filename]
@url = args[:url]
@results = {}
end
def do_parse
parse_marks
parse_models
write_to_file
end
private
def parse_marks
doc = nokogiri_object
@start_time = Time.now
@marks_data = doc.css('.mmm__item').inject({}) do |hash, marka|
hash[marka.children.first.children.first.text] = marka.children.first['href']; hash
end
puts "Count of model = #{marks_data.count}"
end
def parse_models
marks_data.to_enum.with_index(1).each do |mark, index|
key, value = mark
doc = nokogiri_object("https:#{value}")
puts "Parsed #{index} model, called #{key.capitalize}, remaining #{marks_data.count - index} models"
models = doc.css('.mmm__item').inject([]) do |array, model|
array << model.children.first.text; array
end
@results[key] = models
end
puts "Parsed for #{(Time.now - start_time).round} seconds"
end
def write_to_file
file_descriptor = File.open(filename, 'w')
file_descriptor.puts results.to_json
file_descriptor.close
end
def nokogiri_object(url = @url)
Nokogiri::HTML(open(url))
end
end
AutoParser.new(filename: 'result.json', url: 'https://auto.ru').do_parse