From 527279df3f656e5a2136293a4ccbbfced5f37cdf Mon Sep 17 00:00:00 2001 From: Justin Tirrell Date: Sun, 27 Nov 2022 09:44:26 -0500 Subject: [PATCH 1/4] Allow reading a single cell at a time from a PushbackReader --- .../convex/core/lang/reader/antlr/Convex.g4 | 2 +- .../main/java/convex/core/lang/Reader.java | 2 +- .../convex/core/lang/reader/AntlrReader.java | 122 ++++++++++++++---- 3 files changed, 100 insertions(+), 26 deletions(-) diff --git a/convex-core/src/main/antlr4/convex/core/lang/reader/antlr/Convex.g4 b/convex-core/src/main/antlr4/convex/core/lang/reader/antlr/Convex.g4 index 2f3fba4b1..26a930e84 100644 --- a/convex-core/src/main/antlr4/convex/core/lang/reader/antlr/Convex.g4 +++ b/convex-core/src/main/antlr4/convex/core/lang/reader/antlr/Convex.g4 @@ -203,6 +203,6 @@ fragment COMMENT: ';' ~[\r\n]* ; TRASH - : ( WS | COMMENT ) -> channel(HIDDEN) + : ( WS | COMMENT ) -> skip ; diff --git a/convex-core/src/main/java/convex/core/lang/Reader.java b/convex-core/src/main/java/convex/core/lang/Reader.java index 588e163af..ce06c3b79 100644 --- a/convex-core/src/main/java/convex/core/lang/Reader.java +++ b/convex-core/src/main/java/convex/core/lang/Reader.java @@ -60,7 +60,7 @@ public static AList readAll(String source) { * @param source Reader instance to get expression from * @return Parsed form (may be nil) */ - public static ACell read(java.io.Reader source) throws IOException { + public static ACell read(java.io.PushbackReader source) throws IOException { return AntlrReader.read(source); } diff --git a/convex-core/src/main/java/convex/core/lang/reader/AntlrReader.java b/convex-core/src/main/java/convex/core/lang/reader/AntlrReader.java index f5960eea8..fdf23d237 100644 --- a/convex-core/src/main/java/convex/core/lang/reader/AntlrReader.java +++ b/convex-core/src/main/java/convex/core/lang/reader/AntlrReader.java @@ -1,12 +1,21 @@ package convex.core.lang.reader; import java.io.IOException; +import java.io.Reader; +import java.io.PushbackReader; import java.util.ArrayList; import org.antlr.v4.runtime.CharStream; import org.antlr.v4.runtime.CharStreams; import org.antlr.v4.runtime.CommonTokenStream; +import org.antlr.v4.runtime.CommonTokenFactory; +import org.antlr.v4.runtime.TokenStream; import org.antlr.v4.runtime.ParserRuleContext; +import org.antlr.v4.runtime.UnbufferedCharStream; +import org.antlr.v4.runtime.UnbufferedTokenStream; +import org.antlr.v4.runtime.Token; +import org.antlr.v4.runtime.TokenSource; +import org.antlr.v4.runtime.IntStream; import org.antlr.v4.runtime.tree.ErrorNode; import org.antlr.v4.runtime.tree.ParseTree; import org.antlr.v4.runtime.tree.ParseTreeWalker; @@ -433,31 +442,38 @@ public void exitSingleForm(SingleFormContext ctx) { } public static ACell read(String s) { - return read(CharStreams.fromString(s)); + return read(new PushbackReader(new java.io.StringReader(s))); } - - public static ACell read(java.io.Reader r) throws IOException { - return read(CharStreams.fromReader(r)); - } - - public static ACell read(CharStream cs) { - ConvexLexer lexer=new ConvexLexer(cs); - lexer.removeErrorListeners(); - CommonTokenStream tokens = new CommonTokenStream(lexer); - ConvexParser parser = new ConvexParser(tokens); - parser.removeErrorListeners(); - - ParseTree tree = parser.singleForm(); - - CRListener visitor=new CRListener(); - ParseTreeWalker.DEFAULT.walk(visitor, tree); - - ArrayList top=visitor.popList(); - if (top.size()!=1) { - throw new ParseException("Bad parse output: "+top); - } - - return top.get(0); + + + public static ACell read(PushbackReader rdr) { + try { + CharStream cs = new InteractiveCharStream(rdr); + ConvexLexer lexer=new ConvexLexer(cs); + lexer.removeErrorListeners(); + lexer.setTokenFactory(new CommonTokenFactory(true)); + TokenStream tokens = new InteractiveTokenStream(lexer); + ConvexParser parser = new ConvexParser(tokens); + parser.removeErrorListeners(); + + ParseTree tree = parser.form(); + + rdr.unread(cs.LA(1)); + + CRListener visitor=new CRListener(); + ParseTreeWalker.DEFAULT.walk(visitor, tree); + + ArrayList top=visitor.popList(); + if (top.size()!=1) { + throw new ParseException("Bad parse output: "+top); + } + + return top.get(0); + + } catch (IOException e) { + throw Utils.sneakyThrow(e); + } + } public static AList readAll(String source) { @@ -479,4 +495,62 @@ public static AList readAll(CharStream cs) { return Lists.create(top); } + +} + +class InteractiveCharStream extends UnbufferedCharStream { + + InteractiveCharStream(Reader input) + { + super(input); + } + + @Override + public void consume() { + + if (LA(1) == IntStream.EOF) { + throw new IllegalStateException("cannot consume EOF"); + } + + // buf always has at least data[p==0] in this method due to ctor + lastChar = data[p]; // track last char for LA(-1) + if (p == n-1 && numMarkers==0) { + n = 0; + p = -1; // p++ will leave this at 0 + lastCharBufferStart = lastChar; + } + + p++; + currentCharIndex++; + //sync(1); + } +} + +class InteractiveTokenStream extends UnbufferedTokenStream { + + InteractiveTokenStream(TokenSource tokenSource) + { + super(tokenSource); + } + + @Override + public void consume() { + if (LA(1) == Token.EOF) { + throw new IllegalStateException("cannot consume EOF"); + } + + // buf always has at least tokens[p==0] in this method due to ctor + lastToken = tokens[p]; // track last token for LT(-1) + + // if we're at last token and no markers, opportunity to flush buffer + if ( p == n-1 && numMarkers==0 ) { + n = 0; + p = -1; // p++ will leave this at 0 + lastTokenBufferStart = lastToken; + } + + p++; + currentTokenIndex++; + //sync(1); + } } From f347a80b428b58f9bee03a662ae0d8515460c5d4 Mon Sep 17 00:00:00 2001 From: Justin Tirrell Date: Mon, 28 Nov 2022 11:48:11 -0500 Subject: [PATCH 2/4] Separate readOne and existing fully-consuming read --- .../main/java/convex/core/lang/Reader.java | 17 +++- .../convex/core/lang/reader/AntlrReader.java | 80 ++++++++++++------- 2 files changed, 66 insertions(+), 31 deletions(-) diff --git a/convex-core/src/main/java/convex/core/lang/Reader.java b/convex-core/src/main/java/convex/core/lang/Reader.java index ce06c3b79..84aa7b9df 100644 --- a/convex-core/src/main/java/convex/core/lang/Reader.java +++ b/convex-core/src/main/java/convex/core/lang/Reader.java @@ -55,12 +55,13 @@ public static AList readAll(String source) { } /** - * Parses an expression and returns a form as an Object + * Parses an expression and returns a form as an Object, consuming the + * entire source * * @param source Reader instance to get expression from * @return Parsed form (may be nil) */ - public static ACell read(java.io.PushbackReader source) throws IOException { + public static ACell read(java.io.Reader source) throws IOException { return AntlrReader.read(source); } @@ -75,4 +76,16 @@ public static R read(String source) { return (R) AntlrReader.read(source); } + /** + * Parses an expression and returns a form as an Object. Leaves + * remaining input on the source. + * + * @param source PushbackReader instance to get expression from + * @return Parsed form (may be nil) + */ + public static ACell readOne(java.io.PushbackReader source) throws IOException { + return AntlrReader.readOne(source); + } + + } diff --git a/convex-core/src/main/java/convex/core/lang/reader/AntlrReader.java b/convex-core/src/main/java/convex/core/lang/reader/AntlrReader.java index fdf23d237..2d32619f2 100644 --- a/convex-core/src/main/java/convex/core/lang/reader/AntlrReader.java +++ b/convex-core/src/main/java/convex/core/lang/reader/AntlrReader.java @@ -1,8 +1,6 @@ package convex.core.lang.reader; import java.io.IOException; -import java.io.Reader; -import java.io.PushbackReader; import java.util.ArrayList; import org.antlr.v4.runtime.CharStream; @@ -442,13 +440,55 @@ public void exitSingleForm(SingleFormContext ctx) { } public static ACell read(String s) { - return read(new PushbackReader(new java.io.StringReader(s))); + return read(CharStreams.fromString(s)); } + public static ACell read(java.io.Reader r) throws IOException { + return read(CharStreams.fromReader(r)); + } + + public static ACell read(CharStream cs) { + ConvexLexer lexer=new ConvexLexer(cs); + lexer.removeErrorListeners(); + CommonTokenStream tokens = new CommonTokenStream(lexer); + ConvexParser parser = new ConvexParser(tokens); + parser.removeErrorListeners(); + + ParseTree tree = parser.singleForm(); + + CRListener visitor=new CRListener(); + ParseTreeWalker.DEFAULT.walk(visitor, tree); + + ArrayList top=visitor.popList(); + if (top.size()!=1) { + throw new ParseException("Bad parse output: "+top); + } + + return top.get(0); + } + + public static AList readAll(String source) { + return readAll(CharStreams.fromString(source)); + } + + public static AList readAll(CharStream cs) { + ConvexLexer lexer=new ConvexLexer(cs); + lexer.removeErrorListeners(); + CommonTokenStream tokens = new CommonTokenStream(lexer); + ConvexParser parser = new ConvexParser(tokens); + parser.removeErrorListeners(); + ParseTree tree = parser.forms(); - public static ACell read(PushbackReader rdr) { - try { - CharStream cs = new InteractiveCharStream(rdr); + CRListener visitor=new CRListener(); + ParseTreeWalker.DEFAULT.walk(visitor, tree); + + ArrayList top=visitor.popList(); + return Lists.create(top); + } + + public static ACell readOne(java.io.PushbackReader r) { + try { + CharStream cs = new InteractiveCharStream(r); ConvexLexer lexer=new ConvexLexer(cs); lexer.removeErrorListeners(); lexer.setTokenFactory(new CommonTokenFactory(true)); @@ -458,7 +498,7 @@ public static ACell read(PushbackReader rdr) { ParseTree tree = parser.form(); - rdr.unread(cs.LA(1)); + r.unread(cs.LA(1)); CRListener visitor=new CRListener(); ParseTreeWalker.DEFAULT.walk(visitor, tree); @@ -470,37 +510,19 @@ public static ACell read(PushbackReader rdr) { return top.get(0); - } catch (IOException e) { - throw Utils.sneakyThrow(e); - } + } catch (IOException e) { + throw Utils.sneakyThrow(e); + } } - - public static AList readAll(String source) { - return readAll(CharStreams.fromString(source)); - } - public static AList readAll(CharStream cs) { - ConvexLexer lexer=new ConvexLexer(cs); - lexer.removeErrorListeners(); - CommonTokenStream tokens = new CommonTokenStream(lexer); - ConvexParser parser = new ConvexParser(tokens); - parser.removeErrorListeners(); - ParseTree tree = parser.forms(); - - CRListener visitor=new CRListener(); - ParseTreeWalker.DEFAULT.walk(visitor, tree); - - ArrayList top=visitor.popList(); - return Lists.create(top); - } } class InteractiveCharStream extends UnbufferedCharStream { - InteractiveCharStream(Reader input) + InteractiveCharStream(java.io.Reader input) { super(input); } From 74400783037d29596c1fc9c3a0a250184d4d3e6c Mon Sep 17 00:00:00 2001 From: Justin Tirrell Date: Mon, 28 Nov 2022 14:55:36 -0500 Subject: [PATCH 3/4] More lenient grammar rules to detect and throw ParseExceptions --- .../convex/core/lang/reader/antlr/Convex.g4 | 16 ++++------------ .../convex/core/lang/reader/AntlrReader.java | 6 +++++- 2 files changed, 9 insertions(+), 13 deletions(-) diff --git a/convex-core/src/main/antlr4/convex/core/lang/reader/antlr/Convex.g4 b/convex-core/src/main/antlr4/convex/core/lang/reader/antlr/Convex.g4 index 26a930e84..ce9a98631 100644 --- a/convex-core/src/main/antlr4/convex/core/lang/reader/antlr/Convex.g4 +++ b/convex-core/src/main/antlr4/convex/core/lang/reader/antlr/Convex.g4 @@ -92,18 +92,10 @@ BOOL : 'true' | 'false' ; DOUBLE: (DIGITS | SIGNED_DIGITS) DOUBLE_TAIL; - -fragment -DOUBLE_TAIL: - DECIMAL EPART | DECIMAL | EPART; -fragment -DECIMAL: - '.' DIGITS; - -fragment -EPART: - [eE] (DIGITS | SIGNED_DIGITS); +fragment +DOUBLE_TAIL: + [.eE] [-0-9.eE]*; DIGITS: [0-9]+; @@ -148,7 +140,7 @@ QUOTING: '\'' | '`' | '~' | '~@'; KEYWORD: - ':' NAME; + ':' NAME?; SYMBOL : NAME diff --git a/convex-core/src/main/java/convex/core/lang/reader/AntlrReader.java b/convex-core/src/main/java/convex/core/lang/reader/AntlrReader.java index 2d32619f2..faf3287a8 100644 --- a/convex-core/src/main/java/convex/core/lang/reader/AntlrReader.java +++ b/convex-core/src/main/java/convex/core/lang/reader/AntlrReader.java @@ -235,7 +235,11 @@ public void enterDoubleValue(DoubleValueContext ctx) { @Override public void exitDoubleValue(DoubleValueContext ctx) { String s=ctx.getText(); - push( CVMDouble.parse(s)); + try { + push( CVMDouble.parse(s)); + } catch (NumberFormatException x) { + throw new ParseException("Unparseable double value: "+s,x); + } } @Override From 55e2c2d5536b4edb012fda7854c08a1adab054e5 Mon Sep 17 00:00:00 2001 From: Justin Tirrell Date: Wed, 30 Nov 2022 10:03:20 -0500 Subject: [PATCH 4/4] Add some readOne tests --- .../java/convex/core/lang/reader/ANTLRTest.java | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/convex-core/src/test/java/convex/core/lang/reader/ANTLRTest.java b/convex-core/src/test/java/convex/core/lang/reader/ANTLRTest.java index cce8bd171..772cc8728 100644 --- a/convex-core/src/test/java/convex/core/lang/reader/ANTLRTest.java +++ b/convex-core/src/test/java/convex/core/lang/reader/ANTLRTest.java @@ -42,6 +42,11 @@ private R readAll(String s) { return (R) AntlrReader.readAll(s); } + @SuppressWarnings("unchecked") + private R readOne(String s) { + return (R) AntlrReader.readOne(new java.io.PushbackReader(new java.io.StringReader(s))); + } + @Test public void testNil() { assertNull(read("nil")); } @@ -316,6 +321,15 @@ private void doDifferentPrintTests(String src, String dst) { doRoundTripTest(dst); // final value should round trip normally } + @Test public void testReadOne() { + assertEquals(Lists.of(1,2),readOne("(1 2) 3")); + assertEquals(Lists.of(1,2),readOne("(1 2)(3 4)")); + assertEquals(Lists.of(1,2),readOne("(1 2))))")); + assertThrows(ParseException.class,()->readOne(")")); + assertThrows(ParseException.class,()->readOne("1.0e0.1234")); + assertThrows(ParseException.class,()->readOne(":")); + } + }