From 75b830a78917262b1e136d22e5259ffdc63decd8 Mon Sep 17 00:00:00 2001 From: Brian Cardarella Date: Wed, 23 Oct 2024 02:10:33 -0400 Subject: [PATCH] First implementation of LVN template parser --- lib/live_view_native/template/parser.ex | 302 ++++++++++++++++++ .../live_view_native/template/parser_test.exs | 196 ++++++++++++ 2 files changed, 498 insertions(+) create mode 100644 lib/live_view_native/template/parser.ex create mode 100644 test/live_view_native/template/parser_test.exs diff --git a/lib/live_view_native/template/parser.ex b/lib/live_view_native/template/parser.ex new file mode 100644 index 0000000..87d9011 --- /dev/null +++ b/lib/live_view_native/template/parser.ex @@ -0,0 +1,302 @@ +defmodule LiveViewNative.Template.Parser do + @first_chars ~c"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" + @chars ~c"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789_-" + @whitespace ~c"\s\t\n\r" + + def parse_document(document) do + parse(document, [line: 1, column: 1], []) + |> case do + {:ok, {nodes, _cursor}} -> {:ok, nodes} + error -> error + end + end + + def parse_document!(document) do + case parse_document(document) do + {:ok, {nodes, _cursor}} -> nodes + {:error, message, _range} -> raise message + end + end + + defp parse(<<>>, cursor, nodes), + do: {:ok, {Enum.reverse(nodes), cursor}} + + # this next two functions are special escape function that are only used to detect + # the start of an end tag and eject from parsing children + # they does not conform to the same return type as other functions of this name + defp parse(<<"> = document, cursor, nodes) do + {:ok, {document, nodes, cursor}} + end + + defp parse(<<"/>", _document::binary>> = document, cursor, nodes) do + {:ok, {document, nodes, cursor}} + end + + defp parse(<<"", document::binary>>, cursor, buffer) do + cursor = incr_column(cursor, 3) + + comment = + buffer + |> Enum.reverse() + |> List.to_string() + + {:ok, {document, [comment: comment], cursor}} + end + + defp parse_comment_node(<>, cursor, buffer) do + parse_comment_node(document, cursor, [char | buffer]) + end + + defp parse_node(document, cursor = start_cursor) do + with {:ok, {document, tag_name, cursor}} <- parse_tag_name(document, cursor, []), + {:ok, {document, attributes, cursor}} <- parse_attributes(document, cursor, []), + {:ok, {document, cursor}} <- parse_tag_close(document, cursor, start_cursor), + {:ok, {document, children, cursor}} <- parse_children(document, cursor), + {:ok, {document, cursor}} <- parse_end_tag(document, cursor, [], tag_name, start_cursor) do + {:ok, {document, {tag_name, attributes, children}, cursor}} + else + {:error, message, range} -> {:error, message, range} + end + end + + defp parse_tag_name(<<>>, cursor, _buffer) do + {:error, "unexpected end of file while parsing attribute key", [start: cursor, end: cursor]} + end + + defp parse_tag_name(<>, cursor, []) when char in @first_chars do + cursor = incr_column(cursor) + parse_tag_name(document, cursor, [char]) + end + + defp parse_tag_name(<>, cursor, buffer) when char in @chars do + cursor = incr_column(cursor) + parse_tag_name(document, cursor, [char | buffer]) + end + + defp parse_tag_name(<<"/>", _document::binary>> = document, cursor, buffer), + do: return_tag_name(document, buffer, cursor) + defp parse_tag_name(<<">", _document::binary>> = document, cursor, buffer), + do: return_tag_name(document, buffer, cursor) + defp parse_tag_name(<> = document, cursor, buffer) when char in @whitespace do + return_tag_name(document, buffer, cursor) + end + + defp parse_tag_name(<>, cursor, _buffer) do + {:error, "invalid character in tag name: #{[char]}", [start: cursor, end: cursor]} + end + + defp return_tag_name(document, buffer, cursor) do + tag_name = + buffer + |> Enum.reverse() + |> List.to_string() + + {:ok, {document, tag_name, cursor}} + end + + defp parse_attributes(<>, cursor, buffer) when char in @whitespace do + cursor = move_cursor(cursor, char) + parse_attributes(document, cursor, buffer) + end + + defp parse_attributes(<<"/>", _document::binary>> = document, cursor, buffer) do + attributes = Enum.reverse(buffer) + {:ok, {document, attributes, cursor}} + end + + defp parse_attributes(<<">", _document::binary>> = document, cursor, buffer) do + attributes = Enum.reverse(buffer) + {:ok, {document, attributes, cursor}} + end + + defp parse_attributes(document, cursor, buffer) do + case parse_attribute(document, cursor) do + {:ok, {document, attribute, cursor}} -> parse_attributes(document, cursor, [attribute | buffer]) + error -> error + end + end + + defp parse_attribute(document, cursor) do + with {:ok, {document, key, cursor}} <- parse_attribute_key(document, cursor, []), + {:ok, {document, value, cursor}} <- parse_attribute_value(document, cursor, []) do + {:ok, {document, {key, value}, cursor}} + else + error -> error + end + end + + defp parse_attribute_key(<>, cursor, buffer) when char in @whitespace do + cursor = move_cursor(cursor, char) + parse_attribute_key(document, cursor, buffer) + end + + defp parse_attribute_key(<>, cursor, []) when char in @first_chars do + parse_attribute_key(document, incr_column(cursor), [char]) + end + + defp parse_attribute_key(<>, cursor, key_buffer) when char in @chars do + parse_attribute_key(document, incr_column(cursor), [char | key_buffer]) + end + + defp parse_attribute_key(<<"=", document::binary>>, cursor, key_buffer) do + key = + key_buffer + |> Enum.reverse() + |> List.to_string() + + {document, cursor} = drain_whitespace(document, incr_column(cursor)) + + {:ok, {document, key, cursor}} + end + + defp parse_attribute_key(<<>>, cursor, _buffer) do + {:error, "unexpected end of file while parsing attribute key", [start: cursor, end: cursor]} + end + + defp parse_attribute_key(<>, cursor, _buffer) do + {:error, "invalid character in attribute key: #{[char]}", [start: cursor, end: cursor]} + end + + defp parse_attribute_value(<<>>, cursor, _buffer) do + {:error, "unexpected end of file while parsing attribute value", [start: cursor, end: cursor]} + end + + defp parse_attribute_value(<<"\"\"", document::binary>>, cursor, []) do + cursor = incr_column(cursor, 2) + {:ok, {document, "", cursor}} + end + + defp parse_attribute_value(<<"\"", char, document::binary>>, cursor, []) do + cursor = + cursor + |> incr_column() + |> move_cursor(char) + + parse_attribute_value(document, cursor, [char]) + end + + defp parse_attribute_value(<<"\"", document::binary>>, cursor, buffer) do + value = + buffer + |> Enum.reverse() + |> List.to_string() + + {:ok, {document, value, incr_column(cursor)}} + end + + defp parse_attribute_value(_document, cursor, []) do + {:error, "invalid value format for attribute", [start: cursor, end: cursor]} + end + + defp parse_attribute_value(<>, cursor, buffer) do + cursor = move_cursor(cursor, char) + parse_attribute_value(document, cursor, [char | buffer]) + end + + defp parse_tag_close(<<">", document::binary>>, cursor, _start_cursor) do + {:ok, drain_whitespace(document, incr_column(cursor))} + end + + defp parse_tag_close(<<"/>", _document::binary>> = document, cursor, _start_cursor) do + {:ok, {document, cursor}} + end + + defp parse_tag_close(_document, cursor, start_cursor) do + {:error, "tag entity not closed", [start: start_cursor, end: cursor]} + end + + defp parse_children(document, cursor) do + case parse(document, cursor, []) do + {:ok, {"", _nodes, cursor}} -> {:error, "unexpected end of file", [start: cursor, end: cursor]} + result -> result + end + end + + defp parse_end_tag(<<">, cursor, buffer, tag_name, start_cursor) do + cursor = incr_column(cursor, 2) + {document, cursor} = drain_whitespace(document, cursor) + parse_end_tag(document, cursor, buffer, tag_name, start_cursor) + end + + defp parse_end_tag(<>, cursor, [], tag_name, start_cursor) when char in @first_chars do + cursor = incr_column(cursor) + parse_end_tag(document, cursor, [char], tag_name, start_cursor) + end + + defp parse_end_tag(<>, cursor, buffer, tag_name, start_cursor) when char in @chars do + cursor = incr_column(cursor) + parse_end_tag(document, cursor, [char | buffer], tag_name, start_cursor) + end + + defp parse_end_tag(document, cursor, [], _tag_name, _start_cursor) do + case document do + <<">", document::binary>> -> {:ok, {document, incr_column(cursor)}} + <<"/>", document::binary>> -> {:ok, {document, incr_column(cursor, 2)}} + _document -> {:error, "invalid character for end tag", [start: cursor, end: cursor]} + end + end + + defp parse_end_tag(document, end_cursor, buffer, tag_name, start_cursor) do + {document, cursor} = drain_whitespace(document, end_cursor) + + closing_tag_name = + buffer + |> Enum.reverse() + |> List.to_string() + + if tag_name != closing_tag_name do + {:error, "starting tagname does not match closing tagname", [start: start_cursor, end: end_cursor]} + else + parse_end_tag(document, cursor, [], tag_name, start_cursor) + end + end + + defp drain_whitespace(<>, cursor) when char in @whitespace do + drain_whitespace(document, move_cursor(cursor, char)) + end + + defp drain_whitespace(document, cursor), + do: {document, cursor} + + defp move_cursor(cursor, char) when char in [?\n] do + incr_line(cursor) + end + defp move_cursor(cursor, _char), + do: incr_column(cursor) + + defp incr_column([line: line, column: column], count \\ 1), + do: [line: line, column: column + count] + + defp incr_line([line: line, column: _column], count \\ 1) do + [line: line + count, column: 1] + end +end diff --git a/test/live_view_native/template/parser_test.exs b/test/live_view_native/template/parser_test.exs new file mode 100644 index 0000000..2f7cc8e --- /dev/null +++ b/test/live_view_native/template/parser_test.exs @@ -0,0 +1,196 @@ +defmodule LiveViewNative.Template.ParserTest do + use ExUnit.Case, async: false + import LiveViewNative.Template.Parser + + test "will parse a tag" do + {:ok, nodes} = """ + + + + + """ + |> parse_document() + + assert nodes == [ + {"FooBar", [], []}, + {"FooBar", [], []}, + {"FooBar", [], []} + ] + end + + test "will parse a self-closing tags" do + {:ok, nodes} = """ + + + """ + |> parse_document() + + assert nodes == [ + {"FooBar", [], []}, + {"FooBar", [], []} + ] + end + + test "will parse attributes" do + {:ok, nodes} = """ + + + + """ + |> parse_document() + + assert nodes == [ + {"FooBar", [{"a", "123"}, {"b", "321"}, {"c", "789"}], []}, + {"FooBar", [{"a-b", "456"}], []}, + {"FooBar", [{"a", "987"}, {"b-c", "654"}], []} + ] + end + + test "will parse children" do + {:ok, nodes} = """ + + + """ + |> parse_document() + + assert nodes == [ + {"Foo", [], [ + {"Bar", [], [ + {"Baz", [], []} + ]} + ]}, + {"Foo", [], [ + {"Bar", [], []} + ]} + ] + + end + + test "can parse comments" do + {:ok, nodes} = """ + + + + + + + """ + |> parse_document() + + assert nodes == [ + {"FooBar", [], []}, + [comment: " \n\n"], + {"FooBar", [], [ + [comment: " "] + ]} + ] + end + + test "empty" do + {:ok, nodes} = parse_document("") + + assert nodes == [] + end + + describe "parsing errors" do + test "eof within a comment" do + {:error, _message, [start: start_pos, end: end_pos]} = "