diff --git a/lib/floki/selector.ex b/lib/floki/selector.ex index 706e1659..b40a96ca 100644 --- a/lib/floki/selector.ex +++ b/lib/floki/selector.ex @@ -78,12 +78,27 @@ defmodule Floki.Selector do def match?(%Comment{}, _selector, _tree), do: false def match?(html_node, selector, tree) do - id_match?(html_node, selector.id) && namespace_match?(html_node, selector.namespace) && - type_match?(html_node, selector.type) && classes_matches?(html_node, selector.classes) && + can_match_combinator?(html_node, selector.combinator) && + id_match?(html_node, selector.id) && + namespace_match?(html_node, selector.namespace) && + type_match?(html_node, selector.type) && + classes_matches?(html_node, selector.classes) && attributes_matches?(html_node, selector.attributes) && pseudo_classes_match?(html_node, selector.pseudo_classes, tree) end + defp can_match_combinator?(_node, nil), do: true + + defp can_match_combinator?( + %HTMLNode{children_nodes_ids: []}, + %Selector.Combinator{match_type: match_type} + ) + when match_type in [:child, :descendant] do + false + end + + defp can_match_combinator?(_node, _combinator), do: true + defp id_match?(_node, nil), do: true defp id_match?(%HTMLNode{attributes: []}, _), do: false defp id_match?(%HTMLNode{type: :pi}, _), do: false @@ -143,8 +158,26 @@ defmodule Floki.Selector do defp do_classes_matches?(nil, _classes), do: false + defp do_classes_matches?(class_attr_value, [class | _]) + when bit_size(class_attr_value) < bit_size(class) do + false + end + + defp do_classes_matches?(class_attr_value, [class]) + when bit_size(class_attr_value) == bit_size(class) do + class == class_attr_value + end + + defp do_classes_matches?(class_attr_value, [class]) do + class_attr_value + |> String.split([" ", "\t", "\n"], trim: true) + |> Enum.member?(class) + end + defp do_classes_matches?(class_attr_value, classes) do - classes -- String.split(class_attr_value, ~r/\s+/) == [] + min_size = Enum.reduce(classes, -1, fn item, acc -> acc + 1 + bit_size(item) end) + can_match? = bit_size(class_attr_value) >= min_size + can_match? && classes -- String.split(class_attr_value, [" ", "\t", "\n"], trim: true) == [] end defp attributes_matches?(_node, []), do: true diff --git a/lib/floki/selector/attribute_selector.ex b/lib/floki/selector/attribute_selector.ex index 5e8da68c..3ef3c4d2 100644 --- a/lib/floki/selector/attribute_selector.ex +++ b/lib/floki/selector/attribute_selector.ex @@ -63,7 +63,7 @@ defmodule Floki.Selector.AttributeSelector do s.attribute |> get_value(attributes) # Splits by whitespaces ("a b c" -> ["a", "b", "c"]) - |> String.split(~r/\s+/) + |> String.split([" ", "\t", "\n"], trim: true) |> Enum.any?(fn v -> String.downcase(v) == selector_value end) end @@ -103,8 +103,8 @@ defmodule Floki.Selector.AttributeSelector do def match?(attributes, s = %AttributeSelector{match_type: :includes, value: value}) do get_value(s.attribute, attributes) - |> String.split(~r/\s+/) - |> Enum.any?(fn v -> v == value end) + |> String.split([" ", "\t", "\n"], trim: true) + |> Enum.member?(value) end def match?(attributes, s = %AttributeSelector{match_type: :dash_match}) do diff --git a/lib/floki/selector/parser.ex b/lib/floki/selector/parser.ex index 8ef96d2b..dd32ca5d 100644 --- a/lib/floki/selector/parser.ex +++ b/lib/floki/selector/parser.ex @@ -37,9 +37,9 @@ defmodule Floki.Selector.Parser do do_parse_all(remaining_tokens, [selector | selectors]) end - defp do_parse([], selector), do: {selector, []} - defp do_parse([{:close_parentesis, _} | t], selector), do: {selector, t} - defp do_parse([{:comma, _} | t], selector), do: {selector, t} + defp do_parse([], selector), do: {optimize_selector(selector), []} + defp do_parse([{:close_parentesis, _} | t], selector), do: {optimize_selector(selector), t} + defp do_parse([{:comma, _} | t], selector), do: {optimize_selector(selector), t} defp do_parse([{:identifier, _, namespace}, {:namespace_pipe, _} | t], selector) do do_parse(t, %{selector | namespace: to_string(namespace)}) @@ -267,4 +267,9 @@ defmodule Floki.Selector.Parser do Logger.debug("Only simple selectors are allowed in :not() pseudo-class. Ignoring.") nil end + + # Reorders classes in selector to improve matching performance. + defp optimize_selector(selector) do + %{selector | classes: Enum.sort(selector.classes, &(bit_size(&1) >= bit_size(&2)))} + end end diff --git a/test/floki/selector/parser_test.exs b/test/floki/selector/parser_test.exs index 23cbca8f..68847c55 100644 --- a/test/floki/selector/parser_test.exs +++ b/test/floki/selector/parser_test.exs @@ -31,6 +31,14 @@ defmodule Floki.Selector.ParserTest do ] end + test "reorders classes in selector to improve matching performance" do + tokens = tokenize(".small.longer.even-longer") + + assert Parser.parse(tokens) == [ + %Selector{classes: ["even-longer", "longer", "small"]} + ] + end + test "multiple selectors" do tokens = tokenize("ol, ul")