-
-
Notifications
You must be signed in to change notification settings - Fork 153
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Optimize selectors matching #510
Changes from 6 commits
341ed1d
e5858ff
d03143d
9ae289b
99e6a32
75f9507
fa64b01
e4a8a8a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -78,12 +78,27 @@ defmodule Floki.Selector do | |
def match?(%Comment{}, _selector, _tree), do: false | ||
|
||
def match?(html_node, selector, tree) do | ||
id_match?(html_node, selector.id) && namespace_match?(html_node, selector.namespace) && | ||
type_match?(html_node, selector.type) && classes_matches?(html_node, selector.classes) && | ||
can_match_combinator?(html_node, selector.combinator) && | ||
id_match?(html_node, selector.id) && | ||
namespace_match?(html_node, selector.namespace) && | ||
type_match?(html_node, selector.type) && | ||
classes_matches?(html_node, selector.classes) && | ||
attributes_matches?(html_node, selector.attributes) && | ||
pseudo_classes_match?(html_node, selector.pseudo_classes, tree) | ||
end | ||
|
||
defp can_match_combinator?(_node, nil), do: true | ||
|
||
defp can_match_combinator?( | ||
%HTMLNode{children_nodes_ids: []}, | ||
%Selector.Combinator{match_type: match_type} | ||
) | ||
when match_type in [:child, :descendant] do | ||
false | ||
end | ||
|
||
defp can_match_combinator?(_node, _combinator), do: true | ||
|
||
defp id_match?(_node, nil), do: true | ||
defp id_match?(%HTMLNode{attributes: []}, _), do: false | ||
defp id_match?(%HTMLNode{type: :pi}, _), do: false | ||
|
@@ -143,8 +158,24 @@ defmodule Floki.Selector do | |
|
||
defp do_classes_matches?(nil, _classes), do: false | ||
|
||
defp do_classes_matches?(class_attr_value, [class | _]) | ||
when bit_size(class_attr_value) < bit_size(class) do | ||
false | ||
end | ||
|
||
defp do_classes_matches?(class_attr_value, [class]) | ||
when bit_size(class_attr_value) == bit_size(class) do | ||
class == class_attr_value | ||
end | ||
|
||
defp do_classes_matches?(class_attr_value, [class]) do | ||
Enum.member?(String.split(class_attr_value, [" ", "\t", "\n"]), class) | ||
end | ||
|
||
defp do_classes_matches?(class_attr_value, classes) do | ||
classes -- String.split(class_attr_value, ~r/\s+/) == [] | ||
min_size = Enum.reduce(classes, -1, fn item, acc -> acc + 1 + bit_size(item) end) | ||
can_match? = bit_size(class_attr_value) >= min_size | ||
can_match? && classes -- String.split(class_attr_value, [" ", "\t", "\n"]) == [] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Same thing here :) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||
end | ||
|
||
defp attributes_matches?(_node, []), do: true | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -63,7 +63,7 @@ defmodule Floki.Selector.AttributeSelector do | |
s.attribute | ||
|> get_value(attributes) | ||
# Splits by whitespaces ("a b c" -> ["a", "b", "c"]) | ||
|> String.split(~r/\s+/) | ||
|> String.split([" ", "\t", "\n"]) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Here as well. I think we need to consider the same problems here. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||
|> Enum.any?(fn v -> String.downcase(v) == selector_value end) | ||
end | ||
|
||
|
@@ -103,7 +103,7 @@ defmodule Floki.Selector.AttributeSelector do | |
|
||
def match?(attributes, s = %AttributeSelector{match_type: :includes, value: value}) do | ||
get_value(s.attribute, attributes) | ||
|> String.split(~r/\s+/) | ||
|> String.split([" ", "\t", "\n"]) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. And here :) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||
|> Enum.any?(fn v -> v == value end) | ||
end | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think this is not exactly the same from the regex version, because the occurrence of multiple whitespace chars in sequence are treated differently.
For example, if we have a class attribute that contains multiple spaces between classes values, the former version would capture that correctly, while the new version does not:
We can, however, reject the empty binaries from the match. Can you try this and see if there is an impact in performance?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done, fa64b01.
Adding
trim: true
didn't had performance impact in the benchmark, but it did reduce the memory usage, since we won't have this empty strings in the list.