Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Interactive reads #439

Merged
merged 4 commits into from
Dec 13, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -92,18 +92,10 @@ BOOL : 'true' | 'false' ;

DOUBLE:
(DIGITS | SIGNED_DIGITS) DOUBLE_TAIL;

fragment
DOUBLE_TAIL:
DECIMAL EPART | DECIMAL | EPART;

fragment
DECIMAL:
'.' DIGITS;

fragment
EPART:
[eE] (DIGITS | SIGNED_DIGITS);
fragment
DOUBLE_TAIL:
[.eE] [-0-9.eE]*;
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Something like this is required to properly throw a ParseException on 1.0e0.1234, instead of just reading 1.0 and leaving the rest on the stream.


DIGITS:
[0-9]+;
Expand Down Expand Up @@ -148,7 +140,7 @@ QUOTING: '\'' | '`' | '~' | '~@';


KEYWORD:
':' NAME;
':' NAME?;
Copy link
Author

@jjttjj jjttjj Dec 1, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Allows for a parse excepton to be thrown on : input


SYMBOL
: NAME
Expand Down Expand Up @@ -203,6 +195,6 @@ fragment
COMMENT: ';' ~[\r\n]* ;

TRASH
: ( WS | COMMENT ) -> channel(HIDDEN)
: ( WS | COMMENT ) -> skip
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The built in CommonTokenStream that is used for the other reads filters for tokens only from the default channel. This feature doesn't use that token stream and instead modifies UnbufferedTokenStream, which doesn't include channel filtering.

It would be possible to add that functionality back into the token stream we use, but since we are not doing anything with the hidden token channel, it should be fine to just skip these tokens instead.

;

15 changes: 14 additions & 1 deletion convex-core/src/main/java/convex/core/lang/Reader.java
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,8 @@ public static AList<ACell> readAll(String source) {
}

/**
* Parses an expression and returns a form as an Object
* Parses an expression and returns a form as an Object, consuming the
* entire source
*
* @param source Reader instance to get expression from
* @return Parsed form (may be nil)
Expand All @@ -75,4 +76,16 @@ public static <R extends ACell> R read(String source) {
return (R) AntlrReader.read(source);
}

/**
* Parses an expression and returns a form as an Object. Leaves
* remaining input on the source.
*
* @param source PushbackReader instance to get expression from
* @return Parsed form (may be nil)
*/
public static ACell readOne(java.io.PushbackReader source) throws IOException {
return AntlrReader.readOne(source);
}


}
120 changes: 110 additions & 10 deletions convex-core/src/main/java/convex/core/lang/reader/AntlrReader.java
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,14 @@
import org.antlr.v4.runtime.CharStream;
import org.antlr.v4.runtime.CharStreams;
import org.antlr.v4.runtime.CommonTokenStream;
import org.antlr.v4.runtime.CommonTokenFactory;
import org.antlr.v4.runtime.TokenStream;
import org.antlr.v4.runtime.ParserRuleContext;
import org.antlr.v4.runtime.UnbufferedCharStream;
import org.antlr.v4.runtime.UnbufferedTokenStream;
import org.antlr.v4.runtime.Token;
import org.antlr.v4.runtime.TokenSource;
import org.antlr.v4.runtime.IntStream;
import org.antlr.v4.runtime.tree.ErrorNode;
import org.antlr.v4.runtime.tree.ParseTree;
import org.antlr.v4.runtime.tree.ParseTreeWalker;
Expand Down Expand Up @@ -228,7 +235,11 @@ public void enterDoubleValue(DoubleValueContext ctx) {
@Override
public void exitDoubleValue(DoubleValueContext ctx) {
String s=ctx.getText();
push( CVMDouble.parse(s));
try {
push( CVMDouble.parse(s));
} catch (NumberFormatException x) {
throw new ParseException("Unparseable double value: "+s,x);
}
}

@Override
Expand Down Expand Up @@ -435,31 +446,31 @@ public void exitSingleForm(SingleFormContext ctx) {
public static ACell read(String s) {
return read(CharStreams.fromString(s));
}

public static ACell read(java.io.Reader r) throws IOException {
return read(CharStreams.fromReader(r));
}

public static ACell read(CharStream cs) {
ConvexLexer lexer=new ConvexLexer(cs);
lexer.removeErrorListeners();
CommonTokenStream tokens = new CommonTokenStream(lexer);
ConvexParser parser = new ConvexParser(tokens);
parser.removeErrorListeners();

ParseTree tree = parser.singleForm();

CRListener visitor=new CRListener();
ParseTreeWalker.DEFAULT.walk(visitor, tree);

ArrayList<ACell> top=visitor.popList();
if (top.size()!=1) {
throw new ParseException("Bad parse output: "+top);
}

return top.get(0);
}

public static AList<ACell> readAll(String source) {
return readAll(CharStreams.fromString(source));
}
Expand All @@ -471,12 +482,101 @@ public static AList<ACell> readAll(CharStream cs) {
ConvexParser parser = new ConvexParser(tokens);
parser.removeErrorListeners();
ParseTree tree = parser.forms();

CRListener visitor=new CRListener();
ParseTreeWalker.DEFAULT.walk(visitor, tree);

ArrayList<ACell> top=visitor.popList();
return Lists.create(top);
}

public static ACell readOne(java.io.PushbackReader r) {
try {
CharStream cs = new InteractiveCharStream(r);
ConvexLexer lexer=new ConvexLexer(cs);
lexer.removeErrorListeners();
lexer.setTokenFactory(new CommonTokenFactory(true));
TokenStream tokens = new InteractiveTokenStream(lexer);
ConvexParser parser = new ConvexParser(tokens);
parser.removeErrorListeners();

ParseTree tree = parser.form();

r.unread(cs.LA(1));
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Even after disabling the syncing in the streams with our custom consume implementations below, there is a single additional character that is read from the stream when a form is parsed. I'm currently unsure of the exact mechanism that causes this but I believe it involves consume calls in antlr's LexerATNSimulator class. It's possible that there's a better way to handle this there. But just unreading from the stream here seems to work well.


CRListener visitor=new CRListener();
ParseTreeWalker.DEFAULT.walk(visitor, tree);

ArrayList<ACell> top=visitor.popList();
if (top.size()!=1) {
throw new ParseException("Bad parse output: "+top);
}

return top.get(0);

} catch (IOException e) {
throw Utils.sneakyThrow(e);
}

}



}

class InteractiveCharStream extends UnbufferedCharStream {

InteractiveCharStream(java.io.Reader input)
{
super(input);
}

@Override
public void consume() {

if (LA(1) == IntStream.EOF) {
throw new IllegalStateException("cannot consume EOF");
}

// buf always has at least data[p==0] in this method due to ctor
lastChar = data[p]; // track last char for LA(-1)
if (p == n-1 && numMarkers==0) {
n = 0;
p = -1; // p++ will leave this at 0
lastCharBufferStart = lastChar;
}

p++;
currentCharIndex++;
//sync(1);
Copy link
Author

@jjttjj jjttjj Dec 1, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These two consume implementations are the same as the Unbuffered(Char|Token)Stream class that they extend except without the sync call at the end, which consumes more characters/tokens from the input

}
}

class InteractiveTokenStream extends UnbufferedTokenStream<Token> {

InteractiveTokenStream(TokenSource tokenSource)
{
super(tokenSource);
}

@Override
public void consume() {
if (LA(1) == Token.EOF) {
throw new IllegalStateException("cannot consume EOF");
}

// buf always has at least tokens[p==0] in this method due to ctor
lastToken = tokens[p]; // track last token for LT(-1)

// if we're at last token and no markers, opportunity to flush buffer
if ( p == n-1 && numMarkers==0 ) {
n = 0;
p = -1; // p++ will leave this at 0
lastTokenBufferStart = lastToken;
}

p++;
currentTokenIndex++;
//sync(1);
}
}
14 changes: 14 additions & 0 deletions convex-core/src/test/java/convex/core/lang/reader/ANTLRTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,11 @@ private <R extends ACell> R readAll(String s) {
return (R) AntlrReader.readAll(s);
}

@SuppressWarnings("unchecked")
private <R extends ACell> R readOne(String s) {
return (R) AntlrReader.readOne(new java.io.PushbackReader(new java.io.StringReader(s)));
}

@Test public void testNil() {
assertNull(read("nil"));
}
Expand Down Expand Up @@ -316,6 +321,15 @@ private void doDifferentPrintTests(String src, String dst) {
doRoundTripTest(dst); // final value should round trip normally
}

@Test public void testReadOne() {
assertEquals(Lists.of(1,2),readOne("(1 2) 3"));
assertEquals(Lists.of(1,2),readOne("(1 2)(3 4)"));
assertEquals(Lists.of(1,2),readOne("(1 2))))"));
assertThrows(ParseException.class,()->readOne(")"));
assertThrows(ParseException.class,()->readOne("1.0e0.1234"));
assertThrows(ParseException.class,()->readOne(":"));
}



}