forked from twitter/twitter-text
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Rakefile
93 lines (77 loc) · 2.14 KB
/
Rakefile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
require 'open-uri'
require 'nokogiri'
require 'yaml'
namespace :tlds do
desc 'Grab tlds from iana and save to tld_lib.yml'
task :iana_update do
doc = Nokogiri::HTML(open('http://www.iana.org/domains/root/db'))
tlds = []
types = {
'country' => /country-code/,
'generic' => /generic|sponsored|infrastructure|generic-restricted/,
}
doc.css('table#tld-table tr').each do |tr|
info = tr.css('td')
unless info.empty?
tlds << parse_node(info)
end
end
yml = {}
types.each do |name, regex|
yml[name] = select_tld(tlds, regex)
end
yml["generic"] << "onion"
File.open(repo_path('tld_lib.yml'), 'w') do |file|
file.write(yml.to_yaml)
end
File.open(repo_path("TldLists.java"), 'w') do |file|
file.write(<<-EOF
// Auto-generated by conformance/Rakefile
package com.twitter;
import java.util.Arrays;
import java.util.List;
public class TldLists {
public static final List<String> GTLDS = Arrays.asList(
#{yml["generic"].map {|el| " \"#{el}\""}.join(",\n")}
);
public static final List<String> CTLDS = Arrays.asList(
#{yml["country"].map {|el| " \"#{el}\""}.join(",\n")}
);
}
EOF
)
end
end
desc 'Update tests from tld_lib.yml'
task :generate_tests do
test_yml = { 'tests' => { } }
path = repo_path('tld_lib.yml')
yml = YAML.load_file(path)
yml.each do |type, tlds|
test_yml['tests'][type] = []
tlds.each do |tld|
test_yml['tests'][type].push(
'description' => "#{tld} is a valid #{type} tld",
'text' => "https://twitter.#{tld}",
'expected' => ["https://twitter.#{tld}"],
)
end
end
File.open('tlds.yml', 'w') do |file|
file.write(test_yml.to_yaml)
end
end
end
def parse_node(node)
{
domain: node[0].text.gsub(/[\.\s]+/, '').gsub("\u200f", '').gsub("\u200e", ""),
type: node[1].text
}
end
def select_tld(tlds, type)
# Reverse tlds to make sure tld regex can match longer one when subset exists
tlds.select {|i| i[:type] =~ type}.map {|i| i[:domain]}.sort.reverse
end
def repo_path(*path)
File.join(File.dirname(__FILE__), *path)
end