From 25c0dbda98254b0fa6d5d05b85fb9be89b5852a9 Mon Sep 17 00:00:00 2001 From: Yuri Pereira Constante Date: Sun, 24 Dec 2023 19:00:02 -0300 Subject: [PATCH] Always use optimal encoding function --- lib/floki/entities.ex | 54 ++++++++++++++++++++++++++++++------ lib/floki/raw_html.ex | 53 +---------------------------------- test/floki/entities_test.exs | 10 +++---- 3 files changed, 51 insertions(+), 66 deletions(-) diff --git a/lib/floki/entities.ex b/lib/floki/entities.ex index 0831b602..688f9cd2 100644 --- a/lib/floki/entities.ex +++ b/lib/floki/entities.ex @@ -62,15 +62,51 @@ defmodule Floki.Entities do * greater-than sign - > - is replaced by ">". All other simbols are going to remain the same. + + Optimized IO data implementation from Plug.HTML """ - @spec encode(String.t()) :: String.t() - def encode(string) when is_binary(string) do - String.replace(string, ["'", "\"", "&", "<", ">"], fn - "'" -> "'" - "\"" -> """ - "&" -> "&" - "<" -> "<" - ">" -> ">" - end) + @spec encode(iodata()) :: iodata() + def encode(string) when is_binary(string), do: encode(string, 0, string, []) + def encode(data), do: encode(IO.iodata_to_binary(data)) + + escapes = [ + {?<, "<"}, + {?>, ">"}, + {?&, "&"}, + {?", """}, + {?', "'"} + ] + + for {match, insert} <- escapes do + defp encode(<>, skip, original, acc) do + encode(rest, skip + 1, original, [acc | unquote(insert)]) + end + end + + defp encode(<<_char, rest::bits>>, skip, original, acc) do + encode(rest, skip, original, acc, 1) + end + + defp encode(<<>>, _skip, _original, acc) do + acc + end + + for {match, insert} <- escapes do + defp encode(<>, skip, original, acc, len) do + part = binary_part(original, skip, len) + encode(rest, skip + len + 1, original, [acc, part | unquote(insert)]) + end + end + + defp encode(<<_char, rest::bits>>, skip, original, acc, len) do + encode(rest, skip, original, acc, len + 1) + end + + defp encode(<<>>, 0, original, _acc, _len) do + original + end + + defp encode(<<>>, skip, original, acc, len) do + [acc | binary_part(original, skip, len)] end end diff --git a/lib/floki/raw_html.ex b/lib/floki/raw_html.ex index 899c47aa..e40007a5 100644 --- a/lib/floki/raw_html.ex +++ b/lib/floki/raw_html.ex @@ -131,11 +131,7 @@ defmodule Floki.RawHTML do end defp build_attrs({attr, value}, encoder) do - if encoder == @encoder do - [attr, "=\"", html_escape(value) | "\""] - else - [attr, "=\"", value | "\""] - end + [attr, "=\"", encoder.(value) | "\""] end defp build_attrs(attr, _encoder), do: attr @@ -164,53 +160,6 @@ defmodule Floki.RawHTML do end end - # html_escape - # Optimized IO data implementation from Plug.HTML - - defp html_escape(data) when is_binary(data), do: html_escape(data, 0, data, []) - defp html_escape(data), do: html_escape(IO.iodata_to_binary(data)) - - escapes = [ - {?<, "<"}, - {?>, ">"}, - {?&, "&"}, - {?", """}, - {?', "'"} - ] - - for {match, insert} <- escapes do - defp html_escape(<>, skip, original, acc) do - html_escape(rest, skip + 1, original, [acc | unquote(insert)]) - end - end - - defp html_escape(<<_char, rest::bits>>, skip, original, acc) do - html_escape(rest, skip, original, acc, 1) - end - - defp html_escape(<<>>, _skip, _original, acc) do - acc - end - - for {match, insert} <- escapes do - defp html_escape(<>, skip, original, acc, len) do - part = binary_part(original, skip, len) - html_escape(rest, skip + len + 1, original, [acc, part | unquote(insert)]) - end - end - - defp html_escape(<<_char, rest::bits>>, skip, original, acc, len) do - html_escape(rest, skip, original, acc, len + 1) - end - - defp html_escape(<<>>, 0, original, _acc, _len) do - original - end - - defp html_escape(<<>>, skip, original, acc, len) do - [acc | binary_part(original, skip, len)] - end - # helpers # TODO: Use Enum.map_intersperse/3 when we require Elixir v1.10+ diff --git a/test/floki/entities_test.exs b/test/floki/entities_test.exs index 33f2bfcb..c421f7fa 100644 --- a/test/floki/entities_test.exs +++ b/test/floki/entities_test.exs @@ -5,23 +5,23 @@ defmodule Floki.EntitiesTest do describe "encode/1" do test "encode single-quote" do - assert Entities.encode("'") == "'" + assert IO.iodata_to_binary(Entities.encode("'")) == "'" end test "encode double-quote" do - assert Entities.encode("\"") == """ + assert IO.iodata_to_binary(Entities.encode("\"")) == """ end test "ampersand" do - assert Entities.encode("&") == "&" + assert IO.iodata_to_binary(Entities.encode("&")) == "&" end test "encode less-than sign" do - assert Entities.encode("<") == "<" + assert IO.iodata_to_binary(Entities.encode("<")) == "<" end test "encode greater-than sign" do - assert Entities.encode(">") == ">" + assert IO.iodata_to_binary(Entities.encode(">")) == ">" end test "does not encode others" do