Refactor and add better support for error reporting

dusty-phillips · Aug 25, 2024 · c024af4 · c024af4
1 parent 8c48928
commit c024af4
Show file tree

Hide file tree

Showing 18 changed files with 208 additions and 17 deletions.
diff --git a/README.md b/README.md
@@ -79,6 +79,9 @@ are ) in the codebase, as of the last time that I updated this list.
 - IN case statements, the following patterns are not supported:
   - Concatenate patterns
   - Bitstring patterns (bytes)
+  - The problem with both is python match has no way to match on "parts" of bytes or strings. Possible solutions:
+    - convert the entity to a python list and match on that
+    - construct a potentially massive match guard ('case ... if') to compare elements.
 - Destructuring in assignments is not supported yet
   - (EASY) tuple destructuring can map straight to python destructuring
   - other structures will maybe need a match statement?
@@ -97,7 +100,6 @@ are ) in the codebase, as of the last time that I updated this list.
 - glance doesn't have (much of) a typechecker
 - not currently generating python type hints (e.g. function arguments and
   return types), but gleam gives us that info so may as well use it
-- need to print out nice errors when glance fails to parse
 - no concept of a "project", gleam.toml, downloading dependencies
 - only compiles one module at a time, doesn't follow imports
 - copies the prelude module blindly into the directory that contains that one module instead of a top level

diff --git a/gleam.toml b/gleam.toml
@@ -18,6 +18,7 @@ glance = ">= 0.11.0 and < 1.0.0"
 argv = ">= 1.0.2 and < 2.0.0"
 simplifile = ">= 2.0.1 and < 3.0.0"
 filepath = ">= 1.0.0 and < 2.0.0"
+glexer = ">= 1.0.1 and < 2.0.0"
 
 [dev-dependencies]
 gleescript = ">= 1.4.0 and < 2.0.0"

diff --git a/manifest.toml b/manifest.toml
@@ -24,5 +24,6 @@ glance = { version = ">= 0.11.0 and < 1.0.0" }
 gleam_stdlib = { version = ">= 0.34.0 and < 2.0.0" }
 gleescript = { version = ">= 1.4.0 and < 2.0.0" }
 gleeunit = { version = ">= 1.2.0 and < 2.0.0" }
+glexer = { version = ">= 1.0.1 and < 2.0.0" }
 pprint = { version = ">= 1.0.3 and < 2.0.0" }
 simplifile = { version = ">= 2.0.1 and < 3.0.0" }
diff --git a/src/compiler.gleam b/src/compiler.gleam
@@ -2,20 +2,10 @@ import compiler/generator
 import compiler/transformer
 import glance
 import gleam/result
-import pprint
 
-pub fn parse(contents: String) -> Result(glance.Module, String) {
-  contents
-  |> glance.module
-  |> result.map_error(fn(x) {
-    pprint.debug(x)
-    "Unable to parse"
-  })
-}
-
-pub fn compile(module_contents: String) -> Result(String, String) {
+pub fn compile(module_contents: String) -> Result(String, glance.Error) {
   module_contents
-  |> parse
+  |> glance.module
   |> result.map(transformer.transform)
   |> result.map(generator.generate)
 }
diff --git a/src/compiler/internal/transformer/statements.gleam b/src/compiler/internal/transformer/statements.gleam
@@ -80,7 +80,6 @@ fn transform_statement(
       )
     }
     glance.Assignment(..) as expr -> {
-      pprint.debug(expr)
       todo as "Non-trivial assignments are not supported yet"
     }
 

diff --git a/src/internal/bytes.gleam b/src/internal/bytes.gleam
@@ -0,0 +1,11 @@
+import gleam/iterator
+
+pub fn iterate(string: String) -> iterator.Iterator(Int) {
+  iterator.unfold(<<string:utf8>>, fn(remaining) {
+    case remaining {
+      <<>> -> iterator.Done
+      <<byte:8, rest:bytes>> -> iterator.Next(byte, rest)
+      _ -> panic as "string should always return a byte-aligned bitarray"
+    }
+  })
+}
diff --git a/src/internal/errors.gleam b/src/internal/errors.gleam
@@ -0,0 +1,135 @@
+import glance
+import gleam/bit_array
+import gleam/bytes_builder.{type BytesBuilder}
+import gleam/int
+import gleam/iterator
+import gleam/list
+import gleam/result
+import gleam/string
+import glexer
+import glexer/token
+import internal/bytes
+import pprint
+
+pub fn format_glance_error(
+  error: glance.Error,
+  filename: String,
+  contents: String,
+) -> String {
+  pprint.debug(error)
+  let error_message = case error {
+    glance.UnexpectedEndOfInput -> "Unexpected EOF"
+    glance.UnexpectedToken(token, position) ->
+      format_unexpected_token(token, position, contents)
+  }
+  "Unable to compile " <> filename <> ":\n" <> error_message
+}
+
+type PositionState {
+  PositionState(
+    current_line_number: Int,
+    current_line_bytes: BytesBuilder,
+    current_line_first_byte_position: Int,
+    current_position: Int,
+    target_position: Int,
+  )
+}
+
+pub fn format_unexpected_token(
+  token: token.Token,
+  position: glexer.Position,
+  contents: String,
+) -> String {
+  let initial =
+    PositionState(
+      current_line_number: 1,
+      current_line_bytes: bytes_builder.new(),
+      current_line_first_byte_position: 0,
+      current_position: 0,
+      // glexer positions start at byte 0, which is character 1 on a line based system
+      target_position: position.byte_offset + 1,
+    )
+
+  let position_state =
+    contents
+    |> bytes.iterate
+    |> iterator.fold_until(initial, fold_position_to_lines)
+
+  case position_state.current_position {
+    pos if pos < position_state.target_position ->
+      "\nUnexpected EOF looking for "
+      <> format_token(token)
+      <> " at position "
+      <> int.to_string(position_state.target_position)
+    _ ->
+      {
+        let column =
+          position_state.target_position
+          - position_state.current_line_first_byte_position
+        "Unexpected Token "
+        <> format_token(token)
+        <> "\nAt line "
+        <> int.to_string(position_state.current_line_number)
+        <> " column "
+        <> int.to_string(column)
+        <> "\n\n"
+        <> {
+          position_state.current_line_bytes
+          |> bytes_builder.to_bit_array
+          |> bit_array.to_string
+          |> result.unwrap("Unexpected unicode")
+        }
+        <> "\n"
+        <> string.repeat(" ", column - 1)
+        <> "^\n"
+      }
+      |> pprint.debug
+  }
+}
+
+// Given a byte position, return information about the line that contains that
+// byte iterates over each bytes, counting lines. Once it finds the target,
+// continues iterating until the end of the line and returns that line.
+fn fold_position_to_lines(
+  state: PositionState,
+  byte: Int,
+) -> list.ContinueOrStop(PositionState) {
+  pprint.debug(#(
+    PositionState(..state, current_line_bytes: bytes_builder.new()),
+    byte,
+  ))
+  case byte, state.current_position, state.target_position {
+    10, curr, target if curr < target ->
+      list.Continue(
+        PositionState(
+          ..state,
+          current_line_first_byte_position: state.current_position + 1,
+          current_line_number: state.current_line_number + 1,
+          current_line_bytes: bytes_builder.new(),
+          current_position: state.current_position + 1,
+        ),
+      )
+    10, _, _ -> list.Stop(state)
+    byte, _, _ -> {
+      list.Continue(
+        PositionState(
+          ..state,
+          current_line_bytes: bytes_builder.append(state.current_line_bytes, <<
+            byte,
+          >>),
+          current_position: state.current_position + 1,
+        ),
+      )
+    }
+  }
+}
+
+fn format_token(token: token.Token) -> String {
+  case token {
+    token.Int(num_str) -> num_str
+    _ -> {
+      pprint.debug(token)
+      "<TODO Unknown Token>"
+    }
+  }
+}
diff --git a/src/macabre.gleam b/src/macabre.gleam
@@ -3,6 +3,7 @@ import compiler
 import gleam/io
 import gleam/result
 import gleam/string
+import internal/errors
 import output
 import simplifile
 
@@ -16,9 +17,7 @@ pub fn compile_module(filename: String) -> Result(Nil, String) {
   |> result.try(fn(content) {
     content
     |> compiler.compile
-    |> result.map_error(fn(error) {
-      "Unable to compile " <> filename <> ":\n    " <> error
-    })
+    |> result.map_error(errors.format_glance_error(_, filename, content))
   })
   |> result.try(output.write(_, output.replace_extension(filename)))
   |> result.try(fn(_) {

diff --git a/test/assignment_test.gleam → test/compiler/assignment_test.gleam b/test/assignment_test.gleam → test/compiler/assignment_test.gleam
diff --git a/test/bitstring_test.gleam → test/compiler/bitstring_test.gleam b/test/bitstring_test.gleam → test/compiler/bitstring_test.gleam
diff --git a/test/case_test.gleam → test/compiler/case_test.gleam b/test/case_test.gleam → test/compiler/case_test.gleam
diff --git a/test/expression_test.gleam → test/compiler/expression_test.gleam b/test/expression_test.gleam → test/compiler/expression_test.gleam
diff --git a/test/function_test.gleam → test/compiler/function_test.gleam b/test/function_test.gleam → test/compiler/function_test.gleam
diff --git a/test/imports_test.gleam → test/compiler/imports_test.gleam b/test/imports_test.gleam → test/compiler/imports_test.gleam
diff --git a/test/transformer_test.gleam → test/compiler/transformer_test.gleam b/test/transformer_test.gleam → test/compiler/transformer_test.gleam
diff --git a/test/types_test.gleam → test/compiler/types_test.gleam b/test/types_test.gleam → test/compiler/types_test.gleam
diff --git a/test/internal/bytes_test.gleam b/test/internal/bytes_test.gleam
@@ -0,0 +1,18 @@
+import gleam/iterator
+import gleeunit/should
+import internal/bytes
+
+pub fn iterate_ascii_bytes_test() {
+  bytes.iterate("hello")
+  |> iterator.to_list
+  |> should.equal([104, 101, 108, 108, 111])
+}
+
+pub fn iterate_utf8_bytes_test() {
+  "🏳️‍🌈"
+  |> bytes.iterate
+  |> iterator.to_list
+  |> should.equal([
+    240, 159, 143, 179, 239, 184, 143, 226, 128, 141, 240, 159, 140, 136,
+  ])
+}
diff --git a/test/internal/errors_test.gleam b/test/internal/errors_test.gleam
@@ -0,0 +1,35 @@
+import gleeunit/should
+import glexer
+import glexer/token
+import internal/errors
+import pprint
+
+// Reminder: glexer.Position is 0-indexed, but output columns are 1-indexed
+pub fn position_at_first_byte_test() {
+  errors.format_unexpected_token(token.Int("5"), glexer.Position(0), "5bcdefg")
+  |> should.equal("Unexpected Token 5\nAt line 1 column 1\n\n5bcdefg\n^\n")
+}
+
+pub fn position_in_first_line_test() {
+  errors.format_unexpected_token(token.Int("5"), glexer.Position(4), "abcd5fg")
+  |> should.equal("Unexpected Token 5\nAt line 1 column 5\n\nabcd5fg\n    ^\n")
+}
+
+pub fn position_in_second_line_test() {
+  errors.format_unexpected_token(
+    token.Int("5"),
+    glexer.Position(5),
+    "abc\nd5fg",
+  )
+  |> should.equal("Unexpected Token 5\nAt line 2 column 2\n\nd5fg\n ^\n")
+}
+
+pub fn position_after_newline_test() {
+  pprint.debug("abc\n\nd5fg")
+  errors.format_unexpected_token(
+    token.Int("5"),
+    glexer.Position(6),
+    "abc\n\nd5fg",
+  )
+  |> should.equal("Unexpected Token 5\nAt line 3 column 2\n\nd5fg\n ^\n")
+}