diff --git a/Vladimir_Kovalev/2/checker.rb b/Vladimir_Kovalev/2/checker.rb new file mode 100644 index 00000000..8a487ded --- /dev/null +++ b/Vladimir_Kovalev/2/checker.rb @@ -0,0 +1,116 @@ +require "csv" +require "benchmark" +require "net/http" +require "digest" +require "yaml" +require "optparse" +require "singleton" +require "fileutils" +require "domainatrix" + +Dir["./lib/*.rb"].each { |file| require_relative file } + +class Options + include Singleton + attr_accessor :verbose, :nosubdomains, :filter, :solutions, :parallel, :cache + + def self.method_missing(*name) + instance.send(*name) + end +end + +parser = OptionParser.new do |opts| + opts.banner = "Usage: checker.rb [options] file.csv" + + opts.on("-v", "--[no-]verbose", "Run verbosely") do |v| + Options.verbose = v + end + opts.on("--no-subdomains", "Exclude subdomains") do |v| + Options.nosubdomains = v + end + opts.on("--filter=sales", "Body filter") do |v| + Options.filter = v + end + opts.on("--exclude-solutions", "Exclude OpenSource projects") do |v| + Options.solutions = v + end + opts.on("--parallel=N", "Using threads") do |v| + Options.parallel = v + end + opts.on("--no-cache", "Without cache") do |v| + Options.cache = v + end +end + +begin + parser.parse! +rescue OptionParser::InvalidOption => e + puts e + puts parser.help + exit +end + +if Options.verbose + p Options.instance + p ARGV +end + +if ARGV.count != 1 + puts parser.help + exit +end + +unless File.exist? ARGV[0] + puts "File not found" + puts parser.help + exit +end + +Db.load_file ARGV[0] +Db.subdomains_clear if Options.nosubdomains +Db.populate + +def info(row) + print " #{row[:code]} (#{row[:time]}) " +end + +def print_percent(step) + print "#{100 / Db.total * step}% - " +end + +1.upto(Db.total) do |i| + print_percent(i) unless Options.filter + print "#{Db.result[i][:host]} - " + + cached = UrlCache.fetch(Db.result[i][:host]) + if cached = UrlCache.fetch(Db.result[i][:host]) + Db.result[i] = cached + puts " cached #{info(Db.result[i])}" + next + end + + uri = URI::HTTP.build(host: Db.result[i][:host]) + res = nil + + begin + time = Benchmark.measure do + res = Net::HTTP.get_response(uri) + end + time_string = if time.real.truncate.positive? + "#{time.real.truncate}s" + else + "#{time.real.to_s.split(".")[1][0..2]}ms" + end + Db.result[i].merge!({ code: res.code, + time: time_string, + body: Body.new(res.body) }) + rescue => e + Db.result[i].merge!({ code: e.message }) + ensure + UrlCache.push(Db.result[i][:host], Db.result[i]) + end + + info(Db.result[i]) + puts "done" +end +puts Db.out diff --git a/Vladimir_Kovalev/2/lib/body.class.rb b/Vladimir_Kovalev/2/lib/body.class.rb new file mode 100644 index 00000000..80c29c54 --- /dev/null +++ b/Vladimir_Kovalev/2/lib/body.class.rb @@ -0,0 +1,11 @@ +class Body + attr_reader :body + + def initialize(body) + @body = body + end + + def match_keyword?(keyword) + !@body.scan(/#{keyword}/).empty? + end +end diff --git a/Vladimir_Kovalev/2/lib/db.class.rb b/Vladimir_Kovalev/2/lib/db.class.rb new file mode 100644 index 00000000..d4a8ce6e --- /dev/null +++ b/Vladimir_Kovalev/2/lib/db.class.rb @@ -0,0 +1,49 @@ +class Db + class << self + def load_file(csv_file) + @@all_data = CSV.read(csv_file).uniq + end + + def csv + @@all_data + end + + def subdomains_clear + @@all_data.reject! do |e| + subdomain = Domainatrix.parse(e).subdomain + true if subdomain.length.positive? && subdomain != "www" + end + end + + def total + @@all_data.count + end + + def populate + @@result = {} + @@all_data.each.with_index(1) do |e, i| + @@result[i] = { host: e[0] } + end + end + + def result + @@result + end + + def out + out = { Total: 0, Success: 0, Failed: 0, Errored: 0 } + @@result.map { |e| e[1][:code] }.each do |e| + case e + when /^[23]/ + out[:Success] += 1 + when /^[45]/ + out[:Failed] += 1 + else + out[:Errored] += 1 + end + out[:Total] += 1 + end + out + end + end +end diff --git a/Vladimir_Kovalev/2/lib/urlcache.class.rb b/Vladimir_Kovalev/2/lib/urlcache.class.rb new file mode 100644 index 00000000..6d97ffb2 --- /dev/null +++ b/Vladimir_Kovalev/2/lib/urlcache.class.rb @@ -0,0 +1,33 @@ +class UrlCache + @@cache_dir_name = ".cache" + @@cache_dir = File.join(Dir.pwd, @@cache_dir_name) + class << self + def fetch(url) + f = self.filename(url) + puts "#{Time.now.to_i - File.atime(f).to_i} < #{60 * 60}" if Options.verbose + if self.exist?(f) && (Time.now.to_i - File.atime(f).to_i) < 60 * 60 + YAML.safe_load(IO.read(f)) + else + false + end + end + + def hash_of_file(url) + Digest::MD5.hexdigest url + end + + def filename(url) + File.join(@@cache_dir, hash_of_file(url)) + end + + def push(url, data) + FileUtils.rm self.filename(url) if self.exist?(self.filename(url)) + IO.write(self.filename(url), data.to_yaml) + end + + def exist?(file) + Dir.mkdir @@cache_dir unless Dir.exist? @@cache_dir + File.exist? file + end + end +end