-
Continuing #2017: I'm working on a parser for liquid templates. Given the following template: {% if product.type == "Shirt" or product.type == "Shoes" %}
This is a shirt or a pair of shoes.
{% endif %} I get the following tokens: {
"errors": [],
"groups": {},
"tokens": [
{
"endColumn": 2,
"endLine": 1,
"endOffset": 1,
"image": "{%",
"startColumn": 1,
"startLine": 1,
"startOffset": 0,
"tokenType": {
"CATEGORIES": [],
"PATTERN": /\\{%-\\?/,
"PUSH_MODE": "tag",
"categoryMatches": [],
"categoryMatchesMap": {},
"isParent": false,
"name": "TagStart",
"tokenTypeIdx": 10,
},
"tokenTypeIdx": 10,
},
{
"endColumn": 5,
"endLine": 1,
"endOffset": 4,
"image": "if",
"startColumn": 4,
"startLine": 1,
"startOffset": 3,
"tokenType": {
"CATEGORIES": [],
"LONGER_ALT": {
"CATEGORIES": [],
"PATTERN": /\\[a-z_-\\]\\(\\?:\\[a-z0-9\\._-\\]\\|\\\\\\[\\(\\?:-\\?\\\\d\\+\\|"\\.\\+"\\|\\[a-z_-\\]\\+\\)\\]\\)\\*/i,
"categoryMatches": [],
"categoryMatchesMap": {},
"isParent": false,
"name": "Identifier",
"tokenTypeIdx": 4,
},
"PATTERN": "if",
"categoryMatches": [],
"categoryMatchesMap": {},
"isParent": false,
"name": "ControlIf",
"tokenTypeIdx": 19,
},
"tokenTypeIdx": 19,
},
{
"endColumn": 18,
"endLine": 1,
"endOffset": 17,
"image": "product.type",
"startColumn": 7,
"startLine": 1,
"startOffset": 6,
"tokenType": {
"CATEGORIES": [],
"PATTERN": /\\[a-z_-\\]\\(\\?:\\[a-z0-9\\._-\\]\\|\\\\\\[\\(\\?:-\\?\\\\d\\+\\|"\\.\\+"\\|\\[a-z_-\\]\\+\\)\\]\\)\\*/i,
"categoryMatches": [],
"categoryMatchesMap": {},
"isParent": false,
"name": "Identifier",
"tokenTypeIdx": 4,
},
"tokenTypeIdx": 4,
},
{
"endColumn": 21,
"endLine": 1,
"endOffset": 20,
"image": "==",
"startColumn": 20,
"startLine": 1,
"startOffset": 19,
"tokenType": {
"CATEGORIES": [],
"LONGER_ALT": {
"CATEGORIES": [],
"PATTERN": /\\[a-z_-\\]\\(\\?:\\[a-z0-9\\._-\\]\\|\\\\\\[\\(\\?:-\\?\\\\d\\+\\|"\\.\\+"\\|\\[a-z_-\\]\\+\\)\\]\\)\\*/i,
"categoryMatches": [],
"categoryMatchesMap": {},
"isParent": false,
"name": "Identifier",
"tokenTypeIdx": 4,
},
"PATTERN": /,\\|:\\|==\\|!=\\|>\\|<\\|>=\\|<=\\|=\\|or\\|and\\|contains\\|liquid\\|echo\\|render\\|with\\|as\\|for\\|include/,
"categoryMatches": [],
"categoryMatchesMap": {},
"isParent": false,
"name": "Operator",
"tokenTypeIdx": 41,
},
"tokenTypeIdx": 41,
},
{
"endColumn": 29,
"endLine": 1,
"endOffset": 28,
"image": ""Shirt"",
"startColumn": 23,
"startLine": 1,
"startOffset": 22,
"tokenType": {
"CATEGORIES": [],
"PATTERN": /"\\.\\+\\?"\\|'\\.\\+\\?'/,
"categoryMatches": [],
"categoryMatchesMap": {},
"isParent": false,
"name": "String",
"tokenTypeIdx": 5,
},
"tokenTypeIdx": 5,
},
{
"endColumn": 32,
"endLine": 1,
"endOffset": 31,
"image": "or",
"startColumn": 31,
"startLine": 1,
"startOffset": 30,
"tokenType": {
"CATEGORIES": [],
"LONGER_ALT": {
"CATEGORIES": [],
"PATTERN": /\\[a-z_-\\]\\(\\?:\\[a-z0-9\\._-\\]\\|\\\\\\[\\(\\?:-\\?\\\\d\\+\\|"\\.\\+"\\|\\[a-z_-\\]\\+\\)\\]\\)\\*/i,
"categoryMatches": [],
"categoryMatchesMap": {},
"isParent": false,
"name": "Identifier",
"tokenTypeIdx": 4,
},
"PATTERN": /,\\|:\\|==\\|!=\\|>\\|<\\|>=\\|<=\\|=\\|or\\|and\\|contains\\|liquid\\|echo\\|render\\|with\\|as\\|for\\|include/,
"categoryMatches": [],
"categoryMatchesMap": {},
"isParent": false,
"name": "Operator",
"tokenTypeIdx": 41,
},
"tokenTypeIdx": 41,
},
{
"endColumn": 45,
"endLine": 1,
"endOffset": 44,
"image": "product.type",
"startColumn": 34,
"startLine": 1,
"startOffset": 33,
"tokenType": {
"CATEGORIES": [],
"PATTERN": /\\[a-z_-\\]\\(\\?:\\[a-z0-9\\._-\\]\\|\\\\\\[\\(\\?:-\\?\\\\d\\+\\|"\\.\\+"\\|\\[a-z_-\\]\\+\\)\\]\\)\\*/i,
"categoryMatches": [],
"categoryMatchesMap": {},
"isParent": false,
"name": "Identifier",
"tokenTypeIdx": 4,
},
"tokenTypeIdx": 4,
},
{
"endColumn": 48,
"endLine": 1,
"endOffset": 47,
"image": "==",
"startColumn": 47,
"startLine": 1,
"startOffset": 46,
"tokenType": {
"CATEGORIES": [],
"LONGER_ALT": {
"CATEGORIES": [],
"PATTERN": /\\[a-z_-\\]\\(\\?:\\[a-z0-9\\._-\\]\\|\\\\\\[\\(\\?:-\\?\\\\d\\+\\|"\\.\\+"\\|\\[a-z_-\\]\\+\\)\\]\\)\\*/i,
"categoryMatches": [],
"categoryMatchesMap": {},
"isParent": false,
"name": "Identifier",
"tokenTypeIdx": 4,
},
"PATTERN": /,\\|:\\|==\\|!=\\|>\\|<\\|>=\\|<=\\|=\\|or\\|and\\|contains\\|liquid\\|echo\\|render\\|with\\|as\\|for\\|include/,
"categoryMatches": [],
"categoryMatchesMap": {},
"isParent": false,
"name": "Operator",
"tokenTypeIdx": 41,
},
"tokenTypeIdx": 41,
},
{
"endColumn": 56,
"endLine": 1,
"endOffset": 55,
"image": ""Shoes"",
"startColumn": 50,
"startLine": 1,
"startOffset": 49,
"tokenType": {
"CATEGORIES": [],
"PATTERN": /"\\.\\+\\?"\\|'\\.\\+\\?'/,
"categoryMatches": [],
"categoryMatchesMap": {},
"isParent": false,
"name": "String",
"tokenTypeIdx": 5,
},
"tokenTypeIdx": 5,
},
{
"endColumn": 59,
"endLine": 1,
"endOffset": 58,
"image": "%}",
"startColumn": 58,
"startLine": 1,
"startOffset": 57,
"tokenType": {
"CATEGORIES": [],
"PATTERN": /-\\?%\\}/,
"POP_MODE": true,
"categoryMatches": [],
"categoryMatchesMap": {},
"isParent": false,
"name": "TagEnd",
"tokenTypeIdx": 14,
},
"tokenTypeIdx": 14,
},
{
"endColumn": 1,
"endLine": 2,
"endOffset": 97,
"image": "
This is a shirt or a pair of shoes.
",
"startColumn": 60,
"startLine": 1,
"startOffset": 59,
"tokenType": {
"CATEGORIES": [],
"LINE_BREAKS": true,
"PATTERN": /\\(\\[\\^\\{\\]\\|\\(\\{\\[\\^%\\{\\]\\)\\)\\+/,
"categoryMatches": [],
"categoryMatchesMap": {},
"isParent": false,
"name": "Text",
"tokenTypeIdx": 11,
},
"tokenTypeIdx": 11,
},
{
"endColumn": 2,
"endLine": 3,
"endOffset": 99,
"image": "{%",
"startColumn": 1,
"startLine": 3,
"startOffset": 98,
"tokenType": {
"CATEGORIES": [],
"PATTERN": /\\{%-\\?/,
"PUSH_MODE": "tag",
"categoryMatches": [],
"categoryMatchesMap": {},
"isParent": false,
"name": "TagStart",
"tokenTypeIdx": 10,
},
"tokenTypeIdx": 10,
},
{
"endColumn": 8,
"endLine": 3,
"endOffset": 105,
"image": "endif",
"startColumn": 4,
"startLine": 3,
"startOffset": 101,
"tokenType": {
"CATEGORIES": [],
"LONGER_ALT": {
"CATEGORIES": [],
"PATTERN": /\\[a-z_-\\]\\(\\?:\\[a-z0-9\\._-\\]\\|\\\\\\[\\(\\?:-\\?\\\\d\\+\\|"\\.\\+"\\|\\[a-z_-\\]\\+\\)\\]\\)\\*/i,
"categoryMatches": [],
"categoryMatchesMap": {},
"isParent": false,
"name": "Identifier",
"tokenTypeIdx": 4,
},
"PATTERN": "endif",
"categoryMatches": [],
"categoryMatchesMap": {},
"isParent": false,
"name": "ControlEndif",
"tokenTypeIdx": 22,
},
"tokenTypeIdx": 22,
},
{
"endColumn": 11,
"endLine": 3,
"endOffset": 108,
"image": "%}",
"startColumn": 10,
"startLine": 3,
"startOffset": 107,
"tokenType": {
"CATEGORIES": [],
"PATTERN": /-\\?%\\}/,
"POP_MODE": true,
"categoryMatches": [],
"categoryMatchesMap": {},
"isParent": false,
"name": "TagEnd",
"tokenTypeIdx": 14,
},
"tokenTypeIdx": 14,
},
{
"endColumn": 12,
"endLine": 3,
"endOffset": 109,
"image": "
",
"startColumn": 12,
"startLine": 3,
"startOffset": 109,
"tokenType": {
"CATEGORIES": [],
"LINE_BREAKS": true,
"PATTERN": /\\(\\[\\^\\{\\]\\|\\(\\{\\[\\^%\\{\\]\\)\\)\\+/,
"categoryMatches": [],
"categoryMatchesMap": {},
"isParent": false,
"name": "Text",
"tokenTypeIdx": 11,
},
"tokenTypeIdx": 11,
},
],
} I am trying to make a parser for these tokens. I find it intuitive to consider the entire text a "document," and the body of the So far I've put the following together: class LiquidParser extends CstParser {
constructor() {
super(allTokens)
this.performSelfAnalysis()
}
public doc = this.RULE('doc', () => {
this.MANY({
DEF: () => {
this.OR([
{ ALT: () => this.CONSUME(TextToken) },
// { ALT: () => this.SUBRULE(this.object) },
// { ALT: () => this.SUBRULE(this.controlFor) },
{ ALT: () => this.SUBRULE(this.controlIf) },
])
},
})
})
public controlIf = this.RULE('controlIf', () => {
this.CONSUME(TagStart)
this.CONSUME(ControlIf)
this.SUBRULE(this.expression)
this.CONSUME(TagEnd)
this.SUBRULE(this.doc)
this.CONSUME2(TagStart)
this.CONSUME(ControlEndif)
this.CONSUME2(TagEnd)
})
public expression = this.RULE('expression', () => {
this.MANY_SEP1({
SEP: Operator,
DEF: () => {
this.OR([
{ ALT: () => this.CONSUME(Identifier) },
{ ALT: () => this.SUBRULE(this.primitive) },
{ ALT: () => this.CONSUME(RangeToken) },
])
},
})
this.OPTION(() => {
this.CONSUME(Pipe)
this.MANY_SEP2({
SEP: Pipe,
DEF: () => {
this.SUBRULE(this.filterInstance)
},
})
})
})
} Looking at the JSON examples in this repository, it seems to be that recursive parsing should be possible (I can see
It seems to me like chevrotain is getting into the sub document (the body of the if statement), and then not exiting back up to consume the I've tried making a sub document rule, but it doesn't work and even if it did I think I would have to create as many sub document rules as levels of nesting I plan to support. Am I doing something wrong? Are there any tips or tricks I can use to debug or fix this? EDIT: It's probably worth noting that I'm calling the |
Beta Was this translation helpful? Give feedback.
Replies: 1 comment 1 reply
-
Hey @knpwrs, I haven't tried reproducing your error yet - the report is missing a reproducible repo ;) - but I assume you're encountering one of the pitfalls of Chevrotain's alternative choosing (lookahead) mechanism. Basically, when parsing the inner A quick way to fix this would be to add a |
Beta Was this translation helpful? Give feedback.
Hey @knpwrs,
I haven't tried reproducing your error yet - the report is missing a reproducible repo ;) - but I assume you're encountering one of the pitfalls of Chevrotain's alternative choosing (lookahead) mechanism. Basically, when parsing the inner
doc
rule, theMANY
consumes the inner text token first and since it finds another{%
right after, it will perform another repetition in the assumption that there's a nestedcontrolIf
waiting in the token stream. However, inside of theMANY
, theOR
will find thatcontrolIf
doesn't really match (since it requires a[TagStart, ControlIf]
sequence), which then results in the parser error you're seeing. Basically, the rule is unaware that there i…