-
Notifications
You must be signed in to change notification settings - Fork 0
/
DTDBuilder.rb
120 lines (109 loc) · 3.41 KB
/
DTDBuilder.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
require 'nokogiri'
NX = Nokogiri::XML
# Class definitions ############################################################
class Tag
attr_accessor :name, :tag_rules, :attr_rules, :cdata_always, :cdata_sometimes
def initialize name
@name = name
@tag_rules = {}
@attr_rules = {}
@cdata_always = true
@cdata_sometimes = false
end
end
class TagRule
attr_accessor :parent, :child, :min_arity, :max_arity
def initialize parent, child
@parent = parent
@child = child
@min_arity = Float::INFINITY
@max_arity = 0
end
end
class AttrRule
attr_accessor :tag, :attr, :optional
def initialize tag, attr
@tag = tag
@attr = attr
@optional = false
end
end
# Core #########################################################################
class DTDBuilder
attr_accessor :tags, :root_tag
def initialize
@tags = {}
@root_tag = Tag.new nil
end
def add_documents *documents
documents = documents[0] if Array === documents[0]
documents.each{|d|add_document d}
self
end
def add_document document
document = NX.parse document if String === document
# document = document.root if NX::Document === document
[document, *document.css(?*)].each do |parent|
parent_tag = if NX::Document === parent
then @root_tag
else (@tags[parent.name] ||= Tag.new parent.name)
end
cdata = parent.children.any? &:text?
parent_tag.cdata_sometimes ||= cdata
parent_tag.cdata_always &&= cdata
children_by_name = parent.element_children.group_by(&:name)
parent_tag.tag_rules.each_value do |rule|
rule.min_arity = 0 unless children_by_name.has_key? rule.child
end
children_by_name.each do |name, children|
rule = (parent_tag.tag_rules[name] ||= TagRule.new parent.name, name)
rule.max_arity = [rule.max_arity, children.count].max
rule.min_arity = [rule.min_arity, children.count].min
end
parent_tag.attr_rules.each_value do |rule|
rule.optional = true unless parent.has_attribute? rule.attr
end
parent.attribute_nodes.each do |attr|
parent_tag.attr_rules[attr.name] ||= AttrRule.new parent.name, attr.name
end
end
end
end
# Front-end ####################################################################
def list_results result_set
[result_set.root_tag, *result_set.tags.values].each do |tag|
puts
puts "#{tag.name || "<root>"}"
puts " attributes:" unless tag.attr_rules.empty?
tag.attr_rules.each_value do |rule|
puts " #{rule.attr}#{" optional" if rule.optional}"
end
puts " children:" unless tag.tag_rules.empty?
tag.tag_rules.each_value do |rule|
puts " #{rule.child} (#{rule.min_arity}:#{rule.max_arity})"
end
puts " #{if tag.cdata_always then "cdata required"
elsif !tag.cdata_sometimes then "cdata not present"
else "cdata allowed"
end}"
end
end
results = DTDBuilder.new
loop do
puts "what document to parse? Empty line to stop:"
filemask = gets.chomp
break if filemask.empty?
filenames = Dir[filemask]
puts "No such file was found" if filenames.empty?
filenames.each do |filename|
begin
puts "parsing #{filename}"
results.add_document IO.read filename
rescue =>e
puts e.inspect
puts e.backtrace
end
end
puts "partial results:"
list_results results
end