From 14cec6e77042ad9a3d2cfe4f70f6807b813f3ef8 Mon Sep 17 00:00:00 2001 From: "Amy J. Ko" Date: Wed, 10 Jul 2024 15:45:10 -0700 Subject: [PATCH] Fixed parsing bug that prevented complete parsing of the program. --- CHANGELOG.md | 1 + src/parser/Parser.test.ts | 12 +++++++++++- src/parser/Tokens.ts | 2 +- src/parser/parseExpression.ts | 24 ++++++++++++++++++------ src/parser/parseProgram.ts | 3 +-- 5 files changed, 32 insertions(+), 10 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 019d83fc3..b952d2b89 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,7 @@ Dates are in `YYYY-MM-DD` format and versions are in [semantic versioning](http: ### Fixed - [#514](https://github.com/wordplaydev/wordplay/issues/514) Fixed cursor position on hidden language tags. +- Fixed parsing bug that prevented complete parsing of the program. ## 0.10.4 2024-07-08 diff --git a/src/parser/Parser.test.ts b/src/parser/Parser.test.ts index edaea329a..797217282 100644 --- a/src/parser/Parser.test.ts +++ b/src/parser/Parser.test.ts @@ -11,7 +11,7 @@ import NumberLiteral from '@nodes/NumberLiteral'; import NumberType from '@nodes/NumberType'; import NameType from '@nodes/NameType'; import NoneType from '@nodes/NoneType'; -import { toProgram } from './parseProgram'; +import parseProgram, { toProgram } from './parseProgram'; import Program from '@nodes/Program'; import StreamType from '@nodes/StreamType'; import TableType from '@nodes/TableType'; @@ -384,3 +384,13 @@ test('unparsables in docs', () => { expect(doc.markup.paragraphs[0].segments[2]).toBeInstanceOf(Token); expect(doc.markup.paragraphs[0].segments.length).toBe(3); }); + +test('unparsables in blocks', () => { + const program = parseProgram(toTokens('test: Phrase(\\\\)\ntest')); + expect(program).toBeInstanceOf(Program); + expect(program.expression).toBeInstanceOf(Block); + expect(program.expression.statements[0]).toBeInstanceOf(Bind); + expect(program.expression.statements[1]).toBeInstanceOf( + UnparsableExpression, + ); +}); diff --git a/src/parser/Tokens.ts b/src/parser/Tokens.ts index 8c08db146..9f31f08dc 100644 --- a/src/parser/Tokens.ts +++ b/src/parser/Tokens.ts @@ -45,7 +45,7 @@ export default class Tokens { } peekUnread() { - return this.#unread; + return this.#unread.slice(); } /** Returns true if the token list isn't empty. */ diff --git a/src/parser/parseExpression.ts b/src/parser/parseExpression.ts index a6c1a838a..32bbb53a0 100644 --- a/src/parser/parseExpression.ts +++ b/src/parser/parseExpression.ts @@ -62,6 +62,7 @@ import Spread from '../nodes/Spread'; import Otherwise from '@nodes/Otherwise'; import Match from '@nodes/Match'; import Input from '@nodes/Input'; +import type Token from '@nodes/Token'; export function toExpression(code: string): Expression { return parseExpression(toTokens(code)); @@ -128,12 +129,23 @@ export function parseBlock( ((root && !doc) || (!root && !doc && tokens.nextIsnt(Sym.EvalClose)) || (doc && tokens.nextIsnt(Sym.Code))), - () => - statements.push( - nextIsBind(tokens, true) - ? parseBind(tokens) - : parseExpression(tokens), - ), + () => { + const next = nextIsBind(tokens, true) + ? parseBind(tokens) + : parseExpression(tokens); + statements.push(next); + // Did we get an unparsable expression with no tokens? Read until we get to the block close or the end of the + // program. If we don't do this, the we will stop reading statements and will not parse the remainder of the program. + if ( + next instanceof UnparsableExpression && + next.unparsables.length === 0 + ) { + const unparsed: Token[] = []; + while (tokens.hasNext() && tokens.nextIsnt(Sym.EvalClose)) + unparsed.push(tokens.read()); + statements.push(new UnparsableExpression(unparsed)); + } + }, ); const close = root diff --git a/src/parser/parseProgram.ts b/src/parser/parseProgram.ts index 79753db90..9252bc8e1 100644 --- a/src/parser/parseProgram.ts +++ b/src/parser/parseProgram.ts @@ -22,8 +22,7 @@ export default function parseProgram(tokens: Tokens, doc = false): Program { const block = parseBlock(tokens, BlockKind.Root, doc); - // If the next token is the end, we're done! Otherwise, read all of the remaining - // tokens and bundle them into an unparsable. + // If the next token is the end, we're done! const end = tokens.nextIsEnd() ? tokens.read(Sym.End) : undefined; return new Program(docs, borrows, block, end);