diff --git a/README.md b/README.md index 8bd489f..f6deb9b 100644 --- a/README.md +++ b/README.md @@ -79,6 +79,9 @@ are ) in the codebase, as of the last time that I updated this list. - IN case statements, the following patterns are not supported: - Concatenate patterns - Bitstring patterns (bytes) + - The problem with both is python match has no way to match on "parts" of bytes or strings. Possible solutions: + - convert the entity to a python list and match on that + - construct a potentially massive match guard ('case ... if') to compare elements. - Destructuring in assignments is not supported yet - (EASY) tuple destructuring can map straight to python destructuring - other structures will maybe need a match statement? @@ -97,7 +100,6 @@ are ) in the codebase, as of the last time that I updated this list. - glance doesn't have (much of) a typechecker - not currently generating python type hints (e.g. function arguments and return types), but gleam gives us that info so may as well use it -- need to print out nice errors when glance fails to parse - no concept of a "project", gleam.toml, downloading dependencies - only compiles one module at a time, doesn't follow imports - copies the prelude module blindly into the directory that contains that one module instead of a top level diff --git a/gleam.toml b/gleam.toml index 538269d..0d10db8 100644 --- a/gleam.toml +++ b/gleam.toml @@ -18,6 +18,7 @@ glance = ">= 0.11.0 and < 1.0.0" argv = ">= 1.0.2 and < 2.0.0" simplifile = ">= 2.0.1 and < 3.0.0" filepath = ">= 1.0.0 and < 2.0.0" +glexer = ">= 1.0.1 and < 2.0.0" [dev-dependencies] gleescript = ">= 1.4.0 and < 2.0.0" diff --git a/manifest.toml b/manifest.toml index f96d3e5..9835223 100644 --- a/manifest.toml +++ b/manifest.toml @@ -24,5 +24,6 @@ glance = { version = ">= 0.11.0 and < 1.0.0" } gleam_stdlib = { version = ">= 0.34.0 and < 2.0.0" } gleescript = { version = ">= 1.4.0 and < 2.0.0" } gleeunit = { version = ">= 1.2.0 and < 2.0.0" } +glexer = { version = ">= 1.0.1 and < 2.0.0" } pprint = { version = ">= 1.0.3 and < 2.0.0" } simplifile = { version = ">= 2.0.1 and < 3.0.0" } diff --git a/src/compiler.gleam b/src/compiler.gleam index 746dada..948705a 100644 --- a/src/compiler.gleam +++ b/src/compiler.gleam @@ -2,20 +2,10 @@ import compiler/generator import compiler/transformer import glance import gleam/result -import pprint -pub fn parse(contents: String) -> Result(glance.Module, String) { - contents - |> glance.module - |> result.map_error(fn(x) { - pprint.debug(x) - "Unable to parse" - }) -} - -pub fn compile(module_contents: String) -> Result(String, String) { +pub fn compile(module_contents: String) -> Result(String, glance.Error) { module_contents - |> parse + |> glance.module |> result.map(transformer.transform) |> result.map(generator.generate) } diff --git a/src/compiler/internal/transformer/statements.gleam b/src/compiler/internal/transformer/statements.gleam index bbc82c0..f561e0e 100644 --- a/src/compiler/internal/transformer/statements.gleam +++ b/src/compiler/internal/transformer/statements.gleam @@ -80,7 +80,6 @@ fn transform_statement( ) } glance.Assignment(..) as expr -> { - pprint.debug(expr) todo as "Non-trivial assignments are not supported yet" } diff --git a/src/internal/bytes.gleam b/src/internal/bytes.gleam new file mode 100644 index 0000000..5f59ad2 --- /dev/null +++ b/src/internal/bytes.gleam @@ -0,0 +1,11 @@ +import gleam/iterator + +pub fn iterate(string: String) -> iterator.Iterator(Int) { + iterator.unfold(<>, fn(remaining) { + case remaining { + <<>> -> iterator.Done + <> -> iterator.Next(byte, rest) + _ -> panic as "string should always return a byte-aligned bitarray" + } + }) +} diff --git a/src/internal/errors.gleam b/src/internal/errors.gleam new file mode 100644 index 0000000..562a0f7 --- /dev/null +++ b/src/internal/errors.gleam @@ -0,0 +1,135 @@ +import glance +import gleam/bit_array +import gleam/bytes_builder.{type BytesBuilder} +import gleam/int +import gleam/iterator +import gleam/list +import gleam/result +import gleam/string +import glexer +import glexer/token +import internal/bytes +import pprint + +pub fn format_glance_error( + error: glance.Error, + filename: String, + contents: String, +) -> String { + pprint.debug(error) + let error_message = case error { + glance.UnexpectedEndOfInput -> "Unexpected EOF" + glance.UnexpectedToken(token, position) -> + format_unexpected_token(token, position, contents) + } + "Unable to compile " <> filename <> ":\n" <> error_message +} + +type PositionState { + PositionState( + current_line_number: Int, + current_line_bytes: BytesBuilder, + current_line_first_byte_position: Int, + current_position: Int, + target_position: Int, + ) +} + +pub fn format_unexpected_token( + token: token.Token, + position: glexer.Position, + contents: String, +) -> String { + let initial = + PositionState( + current_line_number: 1, + current_line_bytes: bytes_builder.new(), + current_line_first_byte_position: 0, + current_position: 0, + // glexer positions start at byte 0, which is character 1 on a line based system + target_position: position.byte_offset + 1, + ) + + let position_state = + contents + |> bytes.iterate + |> iterator.fold_until(initial, fold_position_to_lines) + + case position_state.current_position { + pos if pos < position_state.target_position -> + "\nUnexpected EOF looking for " + <> format_token(token) + <> " at position " + <> int.to_string(position_state.target_position) + _ -> + { + let column = + position_state.target_position + - position_state.current_line_first_byte_position + "Unexpected Token " + <> format_token(token) + <> "\nAt line " + <> int.to_string(position_state.current_line_number) + <> " column " + <> int.to_string(column) + <> "\n\n" + <> { + position_state.current_line_bytes + |> bytes_builder.to_bit_array + |> bit_array.to_string + |> result.unwrap("Unexpected unicode") + } + <> "\n" + <> string.repeat(" ", column - 1) + <> "^\n" + } + |> pprint.debug + } +} + +// Given a byte position, return information about the line that contains that +// byte iterates over each bytes, counting lines. Once it finds the target, +// continues iterating until the end of the line and returns that line. +fn fold_position_to_lines( + state: PositionState, + byte: Int, +) -> list.ContinueOrStop(PositionState) { + pprint.debug(#( + PositionState(..state, current_line_bytes: bytes_builder.new()), + byte, + )) + case byte, state.current_position, state.target_position { + 10, curr, target if curr < target -> + list.Continue( + PositionState( + ..state, + current_line_first_byte_position: state.current_position + 1, + current_line_number: state.current_line_number + 1, + current_line_bytes: bytes_builder.new(), + current_position: state.current_position + 1, + ), + ) + 10, _, _ -> list.Stop(state) + byte, _, _ -> { + list.Continue( + PositionState( + ..state, + current_line_bytes: bytes_builder.append(state.current_line_bytes, << + byte, + >>), + current_position: state.current_position + 1, + ), + ) + } + } +} + +fn format_token(token: token.Token) -> String { + case token { + token.Int(num_str) -> num_str + _ -> { + pprint.debug(token) + "" + } + } +} diff --git a/src/macabre.gleam b/src/macabre.gleam index 3280481..8ca190e 100644 --- a/src/macabre.gleam +++ b/src/macabre.gleam @@ -3,6 +3,7 @@ import compiler import gleam/io import gleam/result import gleam/string +import internal/errors import output import simplifile @@ -16,9 +17,7 @@ pub fn compile_module(filename: String) -> Result(Nil, String) { |> result.try(fn(content) { content |> compiler.compile - |> result.map_error(fn(error) { - "Unable to compile " <> filename <> ":\n " <> error - }) + |> result.map_error(errors.format_glance_error(_, filename, content)) }) |> result.try(output.write(_, output.replace_extension(filename))) |> result.try(fn(_) { diff --git a/test/assignment_test.gleam b/test/compiler/assignment_test.gleam similarity index 100% rename from test/assignment_test.gleam rename to test/compiler/assignment_test.gleam diff --git a/test/bitstring_test.gleam b/test/compiler/bitstring_test.gleam similarity index 100% rename from test/bitstring_test.gleam rename to test/compiler/bitstring_test.gleam diff --git a/test/case_test.gleam b/test/compiler/case_test.gleam similarity index 100% rename from test/case_test.gleam rename to test/compiler/case_test.gleam diff --git a/test/expression_test.gleam b/test/compiler/expression_test.gleam similarity index 100% rename from test/expression_test.gleam rename to test/compiler/expression_test.gleam diff --git a/test/function_test.gleam b/test/compiler/function_test.gleam similarity index 100% rename from test/function_test.gleam rename to test/compiler/function_test.gleam diff --git a/test/imports_test.gleam b/test/compiler/imports_test.gleam similarity index 100% rename from test/imports_test.gleam rename to test/compiler/imports_test.gleam diff --git a/test/transformer_test.gleam b/test/compiler/transformer_test.gleam similarity index 100% rename from test/transformer_test.gleam rename to test/compiler/transformer_test.gleam diff --git a/test/types_test.gleam b/test/compiler/types_test.gleam similarity index 100% rename from test/types_test.gleam rename to test/compiler/types_test.gleam diff --git a/test/internal/bytes_test.gleam b/test/internal/bytes_test.gleam new file mode 100644 index 0000000..e5beacb --- /dev/null +++ b/test/internal/bytes_test.gleam @@ -0,0 +1,18 @@ +import gleam/iterator +import gleeunit/should +import internal/bytes + +pub fn iterate_ascii_bytes_test() { + bytes.iterate("hello") + |> iterator.to_list + |> should.equal([104, 101, 108, 108, 111]) +} + +pub fn iterate_utf8_bytes_test() { + "🏳️‍🌈" + |> bytes.iterate + |> iterator.to_list + |> should.equal([ + 240, 159, 143, 179, 239, 184, 143, 226, 128, 141, 240, 159, 140, 136, + ]) +} diff --git a/test/internal/errors_test.gleam b/test/internal/errors_test.gleam new file mode 100644 index 0000000..5bd8c5f --- /dev/null +++ b/test/internal/errors_test.gleam @@ -0,0 +1,35 @@ +import gleeunit/should +import glexer +import glexer/token +import internal/errors +import pprint + +// Reminder: glexer.Position is 0-indexed, but output columns are 1-indexed +pub fn position_at_first_byte_test() { + errors.format_unexpected_token(token.Int("5"), glexer.Position(0), "5bcdefg") + |> should.equal("Unexpected Token 5\nAt line 1 column 1\n\n5bcdefg\n^\n") +} + +pub fn position_in_first_line_test() { + errors.format_unexpected_token(token.Int("5"), glexer.Position(4), "abcd5fg") + |> should.equal("Unexpected Token 5\nAt line 1 column 5\n\nabcd5fg\n ^\n") +} + +pub fn position_in_second_line_test() { + errors.format_unexpected_token( + token.Int("5"), + glexer.Position(5), + "abc\nd5fg", + ) + |> should.equal("Unexpected Token 5\nAt line 2 column 2\n\nd5fg\n ^\n") +} + +pub fn position_after_newline_test() { + pprint.debug("abc\n\nd5fg") + errors.format_unexpected_token( + token.Int("5"), + glexer.Position(6), + "abc\n\nd5fg", + ) + |> should.equal("Unexpected Token 5\nAt line 3 column 2\n\nd5fg\n ^\n") +}