-
Notifications
You must be signed in to change notification settings - Fork 0
/
Rakefile
58 lines (44 loc) · 1.18 KB
/
Rakefile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
task :convert do
sh "dos2unix -f dataset/*"
Dir["dataset/*"].each do |path|
sh "cp #{path} #{path}.tmp"
sh "iconv -f iso-8859-1 -t utf-8 #{path}.tmp > #{path}"
sh "rm #{path}.tmp"
end
end
task :names do
require "redis"
require "json"
r = Redis.connect
File.open("names.json") do |io|
io.each_line do |line|
line = JSON.parse(line)
next if line["str"] =~ /^[[:alpha:]]$/
r.pipelined do
r.zadd("names", line["cnt_name"] || 0, line["str"])
r.zadd("last_names", line["cnt_last_name"] || 0, line["str"])
end
print "."
end
end
r.save
end
task :load_boletines do
require_relative 'lib/parser'
require_relative 'lib/model'
require "time"
require "batch"
bdb = BoletinDB.new(File.dirname(__FILE__) + "/db")
Batch.each(Dir["**/*-02.pdf"]) do |path|
time = Time.strptime(File.basename(path)[/(\d{8})/, 1], "%Y%m%d")
IO.popen("pdftotext -raw #{path} -") do |file|
Parser.parse(file, fecha_aparicion: time.strftime("%Y-%m-%d")) do |sociedad|
bdb.store_sociedad(sociedad)
end
end
end
end
task :test do
Dir["test/**/*.rb"].each { |p| require_relative p }
end
task default: :test