Skip to content

Commit

Permalink
Implement nokogiri-html5-inference
Browse files Browse the repository at this point in the history
  • Loading branch information
marcoroth committed Apr 24, 2024
1 parent bbb0868 commit e6e8b73
Show file tree
Hide file tree
Showing 7 changed files with 100 additions and 48 deletions.
3 changes: 3 additions & 0 deletions Gemfile.lock
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ PATH
activesupport (>= 5.2, < 8)
cable_ready (~> 5.0)
nokogiri (~> 1.0)
nokogiri-html5-inference
rack (>= 2, < 4)
railties (>= 5.2, < 8)
redis (>= 4.0, < 6.0)
Expand Down Expand Up @@ -127,6 +128,8 @@ GEM
racc (~> 1.4)
nokogiri (1.16.2-x86_64-linux)
racc (~> 1.4)
nokogiri-html5-inference (0.1.1)
nokogiri (~> 1.14)
parallel (1.22.1)
parser (3.2.1.0)
ast (~> 2.4.1)
Expand Down
1 change: 1 addition & 0 deletions lib/stimulus_reflex.rb
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
require "action_cable"
require "action_view"
require "nokogiri"
require "nokogiri/html5/inference"
require "cable_ready"
require "stimulus_reflex/version"
require "stimulus_reflex/open_struct_fix"
Expand Down
6 changes: 5 additions & 1 deletion lib/stimulus_reflex/html/document_fragment.rb
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,11 @@ module StimulusReflex
module HTML
class DocumentFragment < Document
def parsing_class
Nokogiri
Nokogiri::HTML5::Inference
end

def document_element
@document
end
end
end
Expand Down
1 change: 1 addition & 0 deletions stimulus_reflex.gemspec
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ Gem::Specification.new do |gem|
gem.add_dependency "nokogiri", "~> 1.0"
gem.add_dependency "rack", ">= 2", "< 4"
gem.add_dependency "redis", ">= 4.0", "< 6.0"
gem.add_dependency "nokogiri-html5-inference"

gem.add_development_dependency "bundler", "~> 2.0"
gem.add_development_dependency "magic_frozen_string_literal", "~> 1.2"
Expand Down
4 changes: 1 addition & 3 deletions test/broadcasters/selector_broadcaster_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -71,9 +71,7 @@ class SelectorBroadcasterTest < StimulusReflex::BroadcasterTestCase
"operations" => [
{
"selector" => "html",
# Nokogiri automatically adds a `<meta>` tag for the encoding
# See. https://github.com/sparklemotion/nokogiri/blob/6ea1449926ce97648bb2f7401c9e4fdcb0e261ba/lib/nokogiri/html4/document.rb#L34-L35
"html" => "<head><meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\"><title>Test</title></head><body><div><div>bar</div><div>baz</div></div></body>",
"html" => "<head><title>Test</title></head><body><div><div>bar</div><div>baz</div></div></body>",
"payload" => {},
"childrenOnly" => true,
"permanentAttributeName" => nil,
Expand Down
125 changes: 85 additions & 40 deletions test/html/document_fragment_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -109,13 +109,68 @@ class StimulusReflex::HTML::DocumentFragmentTest < ActiveSupport::TestCase
assert_equal "1", fragment.inner_html.squish
end

test "should properly parse <table>" do
html = "<table></table>"
fragment = StimulusReflex::HTML::DocumentFragment.new(html)

assert_equal "<table></table>", fragment.to_html.squish
assert_equal "<table></table>", fragment.outer_html.squish
end

test "should properly parse <table> with <caption>" do
html = "<table><caption>Caption</caption></table>"
fragment = StimulusReflex::HTML::DocumentFragment.new(html)

assert_equal "<table><caption>Caption</caption></table>", fragment.to_html.squish
assert_equal "<table><caption>Caption</caption></table>", fragment.outer_html.squish
end

test "should properly parse <table> with <thead> and <tbody>" do
html = "<table><thead></thead><tbody></tbody></table>"
fragment = StimulusReflex::HTML::DocumentFragment.new(html)

assert_equal "<table><thead></thead><tbody></tbody></table>", fragment.to_html.squish
assert_equal "<table><thead></thead><tbody></tbody></table>", fragment.outer_html.squish
end

test "should properly parse <table> with <thead> and <tbody> and <tr>s" do
html = "<table><thead><tr><th>1</th></tr></thead><tbody><tr><td>1</td></tr></tbody></table>"
fragment = StimulusReflex::HTML::DocumentFragment.new(html)

assert_equal "<table><thead><tr><th>1</th></tr></thead><tbody><tr><td>1</td></tr></tbody></table>", fragment.to_html.squish
assert_equal "<table><thead><tr><th>1</th></tr></thead><tbody><tr><td>1</td></tr></tbody></table>", fragment.outer_html.squish
end

test "should properly parse <thead> and <tbody> with <tr>" do
html = "<thead><tr><th>1</th></tr></thead><tbody><tr><td>1</td></tr></tbody>"
fragment = StimulusReflex::HTML::DocumentFragment.new(html)

assert_equal "<thead><tr><th>1</th></tr></thead><tbody><tr><td>1</td></tr></tbody>", fragment.to_html.squish
assert_equal "<thead><tr><th>1</th></tr></thead><tbody><tr><td>1</td></tr></tbody>", fragment.outer_html.squish
end

test "should properly parse <table> with <th>" do
html = "<table><tr><th>1</th><th>2</th></tr></table>"
fragment = StimulusReflex::HTML::DocumentFragment.new(html)

assert_equal "<table><tr><th>1</td><th>2</th></tr></table>", fragment.to_html.squish
assert_equal "<table><tr><th>1</td><th>2</th></tr></table>", fragment.outer_html.squish
end

test "should properly parse <table> with <tr>" do
html = "<table><tr><td>1</td><td>2</td></tr></table>"
fragment = StimulusReflex::HTML::DocumentFragment.new(html)

assert_equal "<table><tr><td>1</td><td>2</td></tr></table>", fragment.to_html.squish
assert_equal "<table><tr><td>1</td><td>2</td></tr></table>", fragment.outer_html.squish
end

test "should properly parse <thead>" do
html = "<thead><tr><th>1</th><th>2</th></tr></thead>"
fragment = StimulusReflex::HTML::DocumentFragment.new(html)

assert_equal "<thead><tr><th>1</th><th>2</th></tr></thead>", fragment.to_html.squish
assert_equal "<thead><tr><th>1</th><th>2</th></tr></thead>", fragment.outer_html.squish
assert_equal "<tr><th>1</th><th>2</th></tr>", fragment.inner_html.squish
end

test "should properly parse <tbody>" do
Expand All @@ -124,7 +179,6 @@ class StimulusReflex::HTML::DocumentFragmentTest < ActiveSupport::TestCase

assert_equal "<tbody><tr><th>1</th><th>2</th></tr></tbody>", fragment.to_html.squish
assert_equal "<tbody><tr><th>1</th><th>2</th></tr></tbody>", fragment.outer_html.squish
assert_equal "<tr><th>1</th><th>2</th></tr>", fragment.inner_html.squish
end

test "should properly parse <tfoot>" do
Expand All @@ -133,7 +187,6 @@ class StimulusReflex::HTML::DocumentFragmentTest < ActiveSupport::TestCase

assert_equal "<tfoot><tr><th>1</th><th>2</th></tr></tfoot>", fragment.to_html.squish
assert_equal "<tfoot><tr><th>1</th><th>2</th></tr></tfoot>", fragment.outer_html.squish
assert_equal "<tr><th>1</th><th>2</th></tr>", fragment.inner_html.squish
end

test "should properly parse <caption>" do
Expand All @@ -142,7 +195,6 @@ class StimulusReflex::HTML::DocumentFragmentTest < ActiveSupport::TestCase

assert_equal "<caption>Caption</caption>", fragment.to_html.squish
assert_equal "<caption>Caption</caption>", fragment.outer_html.squish
assert_equal "Caption", fragment.inner_html.squish
end

test "should properly parse <colgroup> and <col>" do
Expand All @@ -157,7 +209,6 @@ class StimulusReflex::HTML::DocumentFragmentTest < ActiveSupport::TestCase

assert_equal %(<colgroup> <col> <col span="1" class="one"> <col span="2" class="two"> </colgroup>), fragment.to_html.squish
assert_equal %(<colgroup> <col> <col span="1" class="one"> <col span="2" class="two"> </colgroup>), fragment.outer_html.squish
assert_equal %(<col> <col span="1" class="one"> <col span="2" class="two">), fragment.inner_html.squish
end

test "should properly parse <col>" do
Expand All @@ -176,7 +227,6 @@ class StimulusReflex::HTML::DocumentFragmentTest < ActiveSupport::TestCase

assert_equal "<ul><li>1</li></ul>", fragment.to_html.squish
assert_equal "<ul><li>1</li></ul>", fragment.outer_html.squish
assert_equal "<li>1</li>", fragment.inner_html.squish
end

test "should properly parse <li>" do
Expand All @@ -185,34 +235,32 @@ class StimulusReflex::HTML::DocumentFragmentTest < ActiveSupport::TestCase

assert_equal "<li>1</li>", fragment.to_html.squish
assert_equal "<li>1</li>", fragment.outer_html.squish
assert_equal "1", fragment.inner_html.squish
end

test "should properly parse two siblings input" do
html = %(
<div>
<div id="label-container">
<label>
<input type="file" accept="image/*"> <!-- this <input> isn't self-closed -->
<input type="hidden" value=""> <!-- this <input> isn't self-closed -->
<input type="file" accept="image/*">
<input type="hidden" value="">
</label>
</div>
<div id="after-label"></div>
</div>
)
fragment = StimulusReflex::HTML::DocumentFragment.new(html)

# assert_equal 2, fragment.document_element.at_css("input").children.length
assert_equal %(<div> <div id="label-container"> <label> <input type="file" accept="image/*"> <input type="hidden" value=""></label> </div> <div id="after-label"> </div></div>), fragment.to_html.squish
assert_equal %(<div> <div id="label-container"> <label> <input type="file" accept="image/*"> <input type="hidden" value=""></label> </div> <div id="after-label"> </div></div>), fragment.outer_html.squish
assert_equal %(<div> <div id="label-container"> <label> <input type="file" accept="image/*"> <input type="hidden" value=""> </label> </div> <div id="after-label"></div> </div>), fragment.to_html.squish
assert_equal %(<div> <div id="label-container"> <label> <input type="file" accept="image/*"> <input type="hidden" value=""> </label> </div> <div id="after-label"></div> </div>), fragment.outer_html.squish
end

test "should properly parse <span> after non-closed <input>" do
html = %(
<div>
<label>X</label>
<div>
<input type="text"> <!-- this <input> isn't self-closed -->
<input type="text">
<span>After Input</span>
</div>
</div>
Expand All @@ -230,8 +278,8 @@ class StimulusReflex::HTML::DocumentFragmentTest < ActiveSupport::TestCase
)
fragment = StimulusReflex::HTML::DocumentFragment.new(html)

assert_equal %(<input data-action="input-&gt;autocomplete#search"> <span>some text</span>), fragment.to_html.squish
assert_equal %(<input data-action="input-&gt;autocomplete#search"> <span>some text</span>), fragment.outer_html.squish
assert_equal %(<input data-action="input->autocomplete#search"> <span>some text</span>), fragment.to_html.squish
assert_equal %(<input data-action="input->autocomplete#search"> <span>some text</span>), fragment.outer_html.squish
end

test "non-closing <img> tag" do
Expand Down Expand Up @@ -327,8 +375,8 @@ class StimulusReflex::HTML::DocumentFragmentTest < ActiveSupport::TestCase
)
fragment = StimulusReflex::HTML::DocumentFragment.new(html)

assert_equal %(<html> <head></head> <body></body> </html>), fragment.to_html.squish
assert_equal %(<html> <head></head> <body></body> </html>), fragment.outer_html.squish
assert_equal %(<html><head></head> <body> </body></html>), fragment.to_html.squish
assert_equal %(<html><head></head> <body> </body></html>), fragment.outer_html.squish
end

test "<head> alongside <body> inside <html> with doctype and content" do
Expand All @@ -345,8 +393,8 @@ class StimulusReflex::HTML::DocumentFragmentTest < ActiveSupport::TestCase
)
fragment = StimulusReflex::HTML::DocumentFragment.new(html)

assert_equal %(<!DOCTYPE html> <html> <head> <title>Title</title> </head> <body id="body"> <h1>Header</h1> </body> </html>), fragment.to_html.squish
assert_equal %(<!DOCTYPE html> <html> <head> <title>Title</title> </head> <body id="body"> <h1>Header</h1> </body> </html>), fragment.outer_html.squish
assert_equal %(<!DOCTYPE html><html><head> <title>Title</title> </head> <body id="body"> <h1>Header</h1> </body></html>), fragment.to_html.squish
assert_equal %(<!DOCTYPE html><html><head> <title>Title</title> </head> <body id="body"> <h1>Header</h1> </body></html>), fragment.outer_html.squish
end

test "<head> alongside <body> inside <html> with content" do
Expand All @@ -363,8 +411,8 @@ class StimulusReflex::HTML::DocumentFragmentTest < ActiveSupport::TestCase
)
fragment = StimulusReflex::HTML::DocumentFragment.new(html)

assert_equal %(<html> <head> <meta name="attribute" content="value"> </head> <body class="bg-green-200"> <h1>Hello World</h1> </body> </html>), fragment.to_html.squish
assert_equal %(<html> <head> <meta name="attribute" content="value"> </head> <body class="bg-green-200"> <h1>Hello World</h1> </body> </html>), fragment.outer_html.squish
assert_equal %(<html><head> <meta name="attribute" content="value"> </head> <body class="bg-green-200"> <h1>Hello World</h1> </body></html>), fragment.to_html.squish
assert_equal %(<html><head> <meta name="attribute" content="value"> </head> <body class="bg-green-200"> <h1>Hello World</h1> </body></html>), fragment.outer_html.squish
end

test "<title>" do
Expand Down Expand Up @@ -452,8 +500,8 @@ class StimulusReflex::HTML::DocumentFragmentTest < ActiveSupport::TestCase
)
fragment = StimulusReflex::HTML::DocumentFragment.new(html)

assert_equal %(<div data-reflex-permanent></div>), fragment.to_html.squish
assert_equal %(<div data-reflex-permanent></div>), fragment.outer_html.squish
assert_equal %(<div data-reflex-permanent=""></div>), fragment.to_html.squish
assert_equal %(<div data-reflex-permanent=""></div>), fragment.outer_html.squish
end

test "boolean attribute with attribute name as value on <div> tag" do
Expand All @@ -472,8 +520,8 @@ class StimulusReflex::HTML::DocumentFragmentTest < ActiveSupport::TestCase
)
fragment = StimulusReflex::HTML::DocumentFragment.new(html)

assert_equal %(<input required>), fragment.to_html.squish
assert_equal %(<input required>), fragment.outer_html.squish
assert_equal %(<input required="">), fragment.to_html.squish
assert_equal %(<input required="">), fragment.outer_html.squish
end

test "boolean attribute on closed <input> tag" do
Expand All @@ -482,8 +530,8 @@ class StimulusReflex::HTML::DocumentFragmentTest < ActiveSupport::TestCase
)
fragment = StimulusReflex::HTML::DocumentFragment.new(html)

assert_equal %(<input required>), fragment.to_html.squish
assert_equal %(<input required>), fragment.outer_html.squish
assert_equal %(<input required="">), fragment.to_html.squish
assert_equal %(<input required="">), fragment.outer_html.squish
end

test "boolean attribute with attribute name as value on non-closed <input> tag" do
Expand Down Expand Up @@ -514,8 +562,8 @@ class StimulusReflex::HTML::DocumentFragmentTest < ActiveSupport::TestCase

fragment = StimulusReflex::HTML::DocumentFragment.new(html)

assert_equal %(<!-- Hello Comment --> <div data-attribute="present" some-attribute checked>Content</div>), fragment.to_html.squish
assert_equal %(<!-- Hello Comment --> <div data-attribute="present" some-attribute checked>Content</div>), fragment.outer_html.squish
assert_equal %(<!-- Hello Comment --> <div data-attribute="present" some-attribute="" checked="">Content</div>), fragment.to_html.squish
assert_equal %(<!-- Hello Comment --> <div data-attribute="present" some-attribute="" checked="">Content</div>), fragment.outer_html.squish
end

test "should parse comments with quotes" do
Expand All @@ -526,17 +574,17 @@ class StimulusReflex::HTML::DocumentFragmentTest < ActiveSupport::TestCase

fragment = StimulusReflex::HTML::DocumentFragment.new(html)

assert_equal %(<!-- Hello "Comment" --> <div data-attribute="present" some-attribute checked>Content</div>), fragment.to_html.squish
assert_equal %(<!-- Hello "Comment" --> <div data-attribute="present" some-attribute checked>Content</div>), fragment.outer_html.squish
assert_equal %(<!-- Hello "Comment" --> <div data-attribute="present" some-attribute="" checked="">Content</div>), fragment.to_html.squish
assert_equal %(<!-- Hello "Comment" --> <div data-attribute="present" some-attribute="" checked="">Content</div>), fragment.outer_html.squish
end

test "case-sensitive attributes" do
html = %(<div data-someThing="value">1</div>)

fragment = StimulusReflex::HTML::DocumentFragment.new(html)

assert_equal %(<div data-someThing="value">1</div>), fragment.to_html.squish
assert_equal %(<div data-someThing="value">1</div>), fragment.outer_html.squish
assert_equal %(<div data-something="value">1</div>), fragment.to_html.squish
assert_equal %(<div data-something="value">1</div>), fragment.outer_html.squish
end

test "case-sensitive <svg> tags" do
Expand All @@ -546,7 +594,6 @@ class StimulusReflex::HTML::DocumentFragmentTest < ActiveSupport::TestCase

assert_equal %(<svg><feSpecularLighting><fePointLight></fePointLight></feSpecularLighting></svg>), fragment.to_html.squish
assert_equal %(<svg><feSpecularLighting><fePointLight></fePointLight></feSpecularLighting></svg>), fragment.outer_html.squish
assert_equal %(<feSpecularLighting><fePointLight></fePointLight></feSpecularLighting>), fragment.inner_html.squish
end

test "non-standard HTML attributes (Alpine.js-like)" do
Expand All @@ -573,7 +620,6 @@ class StimulusReflex::HTML::DocumentFragmentTest < ActiveSupport::TestCase
outer_container = %(<div id="container"> #{inner_container} </div>)

assert_equal raw_html.squish, fragment.to_html.squish
assert_equal outer_title.squish, fragment.inner_html.squish
assert_equal raw_html.squish, fragment.outer_html.squish

refute fragment.match("body").present?
Expand Down Expand Up @@ -644,12 +690,11 @@ class StimulusReflex::HTML::DocumentFragmentTest < ActiveSupport::TestCase
outer_p = %(<p>#{inner_p}</p>)
inner_body = %(<h1>Home#index</h1> #{outer_p})
outer_body = %(<body id="body"> #{inner_body} </body>)
inner_html = %(<head><meta http-equiv="Content-Type" content="text/html; charset=UTF-8"> <title>StimulusReflex Test</title> <meta name="viewport" content="width=device-width,initial-scale=1"> <meta name="csrf-param" content="authenticity_token"> <meta name="csrf-token" content="token"> <link rel="stylesheet" href="/assets/application.css" data-turbo-track="reload"> <script src="/assets/application.js" data-turbo-track="reload" defer></script> </head> #{outer_body})
outer_html = %(<html> #{inner_html} </html>)
inner_html = %(<head> <title>StimulusReflex Test</title> <meta name="viewport" content="width=device-width,initial-scale=1"> <meta name="csrf-param" content="authenticity_token"> <meta name="csrf-token" content="token"> <link rel="stylesheet" href="/assets/application.css" data-turbo-track="reload"> <script src="/assets/application.js" data-turbo-track="reload" defer="defer"></script> </head> #{outer_body})
outer_html = %(<html>#{inner_html}</html>)

assert_equal outer_html, fragment.to_html.squish
assert_equal outer_html, fragment.outer_html.squish
assert_equal inner_html, fragment.inner_html.squish
assert_equal "<!DOCTYPE html>#{outer_html}", fragment.to_html.squish
assert_equal "<!DOCTYPE html>#{outer_html}", fragment.outer_html.squish

assert_equal outer_html, fragment.match("html").to_html.squish
assert_equal outer_html, fragment.match("html").outer_html.squish
Expand Down
8 changes: 4 additions & 4 deletions test/html/document_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -52,8 +52,8 @@ class StimulusReflex::HTML::DocumentTest < ActiveSupport::TestCase
)
fragment = StimulusReflex::HTML::DocumentFragment.new(html)

assert_equal %(<html> <head></head> <body></body> </html>), fragment.to_html.squish
assert_equal %(<html> <head></head> <body></body> </html>), fragment.outer_html.squish
assert_equal %(<html><head></head> <body> </body></html>), fragment.to_html.squish
assert_equal %(<html><head></head> <body> </body></html>), fragment.outer_html.squish
end

test "<head> alongside <body> inside <html> with doctype and content" do
Expand All @@ -70,8 +70,8 @@ class StimulusReflex::HTML::DocumentTest < ActiveSupport::TestCase
)
fragment = StimulusReflex::HTML::Document.new(html)

assert_equal %(<!DOCTYPE html> <html> <head> <title>Title</title> </head> <body id="body"> <h1>Header</h1> </body> </html>), fragment.to_html.squish
assert_equal %(<!DOCTYPE html> <html> <head> <title>Title</title> </head> <body id="body"> <h1>Header</h1> </body> </html>), fragment.outer_html.squish
assert_equal %(<html><head> <title>Title</title> </head> <body id="body"> <h1>Header</h1> </body></html>), fragment.to_html.squish
assert_equal %(<html><head> <title>Title</title> </head> <body id="body"> <h1>Header</h1> </body></html>), fragment.outer_html.squish
end

test "should extract a document of the HTML" do
Expand Down

0 comments on commit e6e8b73

Please sign in to comment.