-
Notifications
You must be signed in to change notification settings - Fork 0
/
scrape.rb
executable file
·88 lines (70 loc) · 2.08 KB
/
scrape.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
#!/usr/bin/env ruby
require "rubygems"
require "bundler/setup"
require 'rubygems'
require 'nokogiri'
require 'open-uri'
require 'open_uri_redirections'
require 'mechanize'
require 'uri'
START_LINK = "https://wallhaven.cc/search?categories=111&purity=100&resolutions=2560x1440%2C3840x2160&topRange=1M&sorting=toplist&order=desc"
IMAGE_FOLDER = "./images/"
# Print
def print_progres (proces_name, index, total)
if index == total
print "\r#{proces_name} #{arrow(1, 1)} DONE \n"
else
print "\r#{proces_name} #{arrow(index, total)} #{percentage(index, total) }"
end
end
def percentage(index, total)
rez = (index.to_f / total.to_f * 100).round(0).to_s + "%"
rez
end
def arrow(index, total)
lines = (index.to_f / total.to_f * 50).round(0)
arrow = ("=" * lines) + "=>"
end
# Download
def download_images_from_array(images)
index = 1
images.each do |image|
download_image(image)
print_progres("Downloading", index, images.count)
index = index.next
end
end
def download_image(image_link)
name = URI(image_link).path.split('/').last
agent = Mechanize.new
agent.get(image_link).save IMAGE_FOLDER + name
end
def clear_images_folder
print "Deleting"
Dir.foreach(IMAGE_FOLDER) do |file|
next if file.start_with?('.')
filePath = File.join(IMAGE_FOLDER, file)
File.delete(filePath)
end
print " - DONE\n"
end
# Scrape
def get_link_array
links = Array.new
main_page = Nokogiri::HTML(open(START_LINK, :allow_redirections => :safe))
page_links = main_page.css("a").select{|link| link['class'] == "preview"}
index = 1
page_links.each do |thumb_link|
href_page_link = thumb_link["href"]
image_page = Nokogiri::HTML(open(href_page_link, :allow_redirections => :safe))
image_link = image_page.css("img#wallpaper").attribute("src").value
links.push(image_link)
print_progres("Scraping", index, page_links.count)
index = index.next
sleep(1)
end
links
end
# Main
clear_images_folder
download_images_from_array(get_link_array)