diff --git a/src/grammars/expr.ometa b/src/grammars/expr.ometa deleted file mode 100644 index 411bb45..0000000 --- a/src/grammars/expr.ometa +++ /dev/null @@ -1,21 +0,0 @@ -Приоритет операторов: -- ~ -- квантификаторы -- seq -- alt - - -ometa OmetaExpr { - binOp = '|' | spaces - postOp = '?' | '*' | '+' - preOp = '~' - - - expr = alt - alt = seq ('|' seq)* - seq = quant (spaces quant)* - quant = not ('?' | '*' | '+')? - not = '~'? operand - operand = '(' exp ') | value - value = letter+ -} \ No newline at end of file diff --git a/src/grammars/math.ometa b/src/grammars/math.ometa deleted file mode 100644 index f86267d..0000000 --- a/src/grammars/math.ometa +++ /dev/null @@ -1,55 +0,0 @@ -ometa { - expr = group (op group)*, - group = '(' expr ')' | num, - op = '-'|'+'|'*'|'/', - num = digit+, - digit = '0'|'1'|'2'|'3'|'4'|'5'|'6'|'7'|'8'|'9' -} - - - -Типы выражений: - -atom -rule -seq -alt -notLess - - -[ - ['expr', ['seq', [ - ['rule', 'group'], - ['notLess', 0, ['seq', [ - ['rule', 'op'], - ['rule', 'group'] - ]]] - ]]], - ['group', ['alt', [ - ['seq', [ - ['atom', '('], - ['rule', 'expr'], - ['atom', ')'], - ]], - ['rule', 'num'] - ]]], - ['op', ['alt', [ - ['atom', '-'], - ['atom', '+'], - ['atom', '*'], - ['atom', '/'], - ]]], - ['num', ['notLess', 1, ['rule', 'digit']]], - ['digit', ['alt', [ - ['atom', '0'], - ['atom', '1'], - ['atom', '2'], - ['atom', '3'], - ['atom', '4'], - ['atom', '5'], - ['atom', '6'], - ['atom', '7'], - ['atom', '8'], - ['atom', '9'], - ]]] -] \ No newline at end of file diff --git a/src/grammars/math2.ometa b/src/grammars/math2.ometa deleted file mode 100644 index 6e1a7ac..0000000 --- a/src/grammars/math2.ometa +++ /dev/null @@ -1,13 +0,0 @@ -ometa Math { - sum = e '+' e - sub = e '-' e - - e = mul | div | num - - mul = group '*' group - div = num '/' num - e = group | - expr = mul | div -} - - diff --git a/src/grammars/ometa1.ometa b/src/grammars/ometa1.ometa index fb66975..641cf1d 100644 --- a/src/grammars/ometa1.ometa +++ b/src/grammars/ometa1.ometa @@ -131,8 +131,10 @@ ometa Ometa { rule equal + times token - times not + bind project + range } \ No newline at end of file diff --git a/src/step1.ts b/src/step1.ts index f55e62b..b9ffd05 100644 --- a/src/step1.ts +++ b/src/step1.ts @@ -243,7 +243,7 @@ const grammar: AST.Grammar = [ ] -const p = new Parser(grammar, [...'((1+2)-3*3)/4']) +const p = new Parser(grammar, '((1+2)-3*3)/4' as unknown as any[]) const r = p.match('expr') diff --git a/src/step2/cli.ts b/src/step2/cli.ts index c4b42e2..9c81dbf 100644 --- a/src/step2/cli.ts +++ b/src/step2/cli.ts @@ -1,5 +1,5 @@ import { Parser } from './parser' -import { GrammarAst as AST } from './grammar-ast' +import { Ast as AST } from './grammar-ast' import { math1 } from './grammars/math1' const p = new Parser(math1, [...'((1+2)-3*3)/4']) diff --git a/src/step2/grammar-ast.ts b/src/step2/grammar-ast.ts index a57b683..2421707 100644 --- a/src/step2/grammar-ast.ts +++ b/src/step2/grammar-ast.ts @@ -1,11 +1,30 @@ -export namespace GrammarAst { +export namespace Ast { export type Grammar = Rule[] export type Rule = [string, Expr] - export type Expr = ExSeq | ExAlt | ExAtom | ExRule | ExNotLess - export type ExSeq = ['seq', Expr[]] - export type ExAlt = ['alt', Expr[]] + + export type Expr = + Ex.Seq + | Ex.Alt + | Ex.Atom + | Ex.Rule + | Ex.Times + | Ex.Token + | Ex.Not + | Ex.Project + | Ex.Regex + | Ex.Range - export type ExAtom = ['equal', string] - export type ExRule = ['rule', string] - export type ExNotLess = ['notLess', number, Expr] + export namespace Ex { + export type Seq = ['seq', Expr[]] + export type Alt = ['alt', Expr[]] + export type Atom = ['equal', string] + export type Rule = ['rule', string] + export type Times = ['times', number, number, Expr] + + export type Token = ['token', string] + export type Not = ['not', Expr] + export type Project = ['project', string, Expr] + export type Regex = ['regex', string] + export type Range = ['range', string, string] + } } \ No newline at end of file diff --git a/src/step2/grammars/math1.ts b/src/step2/grammars/math1.ts index c6e8cbe..2341534 100644 --- a/src/step2/grammars/math1.ts +++ b/src/step2/grammars/math1.ts @@ -1,9 +1,21 @@ -import { GrammarAst as AST } from '../grammar-ast' +import { Ast as AST } from '../grammar-ast' + +/* + +ometa { + expr = group (op group)*, + group = '(' expr ')' | num, + op = '-'|'+'|'*'|'/', + num = digit+, + digit = '0'|'1'|'2'|'3'|'4'|'5'|'6'|'7'|'8'|'9' +} + +*/ export const math1: AST.Grammar = [ ['expr', ['seq', [ ['rule', 'group'], - ['notLess', 0, ['seq', [ + ['times', 0, null, ['seq', [ ['rule', 'op'], ['rule', 'group'] ]]] @@ -22,7 +34,7 @@ export const math1: AST.Grammar = [ ['equal', '*'], ['equal', '/'], ]]], - ['num', ['notLess', 1, ['rule', 'digit']]], + ['num', ['times', 1, null, ['rule', 'digit']]], ['digit', ['alt', [ ['equal', '0'], ['equal', '1'], diff --git a/src/step2/grammars/ometa1.ts b/src/step2/grammars/ometa1.ts new file mode 100644 index 0000000..86ed0b3 --- /dev/null +++ b/src/step2/grammars/ometa1.ts @@ -0,0 +1,164 @@ +import { IProjectors } from 'step2/types' +import { Ast as AST } from '../grammar-ast' + +/* + +Выражения располагаются в порядке приоритета (внизу - наибольший приоритет) + + +ometa Ometa1 { + ometa = "ometa" ident "{" eRule* "}", + eRule = ident "=" "|"? eTop inlSpaces* ',' newline, + + eTop = eAlt, + eAlt = eProj ("|" eProj)*, + eProj = eSeq ("->" ident)?, + eSeq = eQuant (spaces quant)*, + eQuant = eNot ('?' | '*' | '+')?, + eNot = '~'? operand, + + operand = + | '(' eTop ') -> op_group + | eRange -> op_range + | eStr -> op_str + | eToken -> op_token + | eRegex -> op_regex + | ident -> op_rule, + + eRange = alphanum '-' alphanum, + eStr = "'" (~'\'' anything)* '\'', + eToken = "\"" (~'"' anything)* '"', + eRegex = "/" (~'/' anything)* '/' a-z*, + + ident = letter alphanum*, + + alphanum = letter | digit, + digit = 0-9, + letter = A-Z | a-z, + + spaces = /\s+/, + space = /\s/, + inlSpaces = /[ \t]+/, + inlSpace = /[ \t]/, + newline = /(\r\n)|\n/, +} + +*/ + +export const ometaExpr: AST.Grammar = [ + ['ometa', ['seq', [ + ['token', 'ometa'], + ['rule', 'ident'], + ['token', '{'], + ['times', 0, null, ['rule', 'eRule']], + ['token', '}'], + ]]], + + ['rule', ['seq', [ + ['rule', 'ident'], + ['token', '='], + ['times', 0, 1, ['token', '|']], + ['rule', 'eTop'], + ['times', 0, null, ['rule', 'inlSpaces']], + ['equal', ','], + ['rule', 'newline'], + ]]], + + ['eTop', ['rule', 'eAlt']], + + ['eAlt', ['seq', [ + ['rule', 'eProj'], + ['times', 0, null, ['seq', [['equal', '|'], ['rule', 'eProj']]]] + ]]], + + ['eProj', ['seq', [ + ['rule', 'eSeq'], + ['times', 0, 1, ['seq', [['equal', '->'], ['rule', 'ident']]]], + ]]], + + ['eSeq', ['seq', [ + ['rule', 'eQuant'], + ['times', 0, null, ['seq', [['rule', 'spaces'], ['rule', 'eQuant']]]], + ]]], + + ['eQuant', ['seq', [ + ['rule', 'not'], + ['times', 0, 1, ['alt', [ + ['equal', '?'], + ['equal', '*'], + ['equal', '+'], + ]]] + ]]], + + ['eNot', ['seq', [ + ['times', 0, 1, ['equal', '~']], + ['rule', 'operand'] + ]]], + + ['operand', ['alt', [ + ['project', 'op_group', ['seq', [ + ['equal', '('], + ['rule', 'eTop'], + ['equal', ')'], + ]]], + ['project', 'op_range', ['rule', 'eRange']], + ['project', 'op_str', ['rule', 'eStr']], + ['project', 'op_token', ['rule', 'eToken']], + ['project', 'op_regex', ['rule', 'eRegex']], + ['project', 'op_rule', ['rule', 'ident']], + ]]], + + ['eRange', ['seq', [ + ['rule', 'alphanum'], + ['equal', '-'], + ['rule', 'alphanum'], + ]]], + + ['eStr', ['seq', [ + ['token', '\''], + ['times', 0, null, ['seq', [['not', ['equal', '\'']], ['rule', 'anything']]]], + ['equal', '\''], + ]]], + + ['eToken', ['seq', [ + ['token', '"'], + ['times', 0, null, ['seq', [['not', ['equal', '"']], ['rule', 'anything']]]], + ['equal', '"'], + ]]], + + ['eRegex', ['seq', [ + ['token', '/'], + ['times', 0, null, ['seq', [['not', ['equal', '/']], ['rule', 'anything']]]], + ['equal', '/'], + ['times', 0, null, ['range', 'a', 'z']], + ]]], + + ['ident', ['seq', [ + ['rule', 'letter'], + ['times', 0, null, ['rule', 'alphanum']] + ]]], + + ['alphanum', ['alt', [['rule', 'letter'], ['rule', 'digit']]]], + + ['digit', ['range', '0', '9'],], + + ['letter', ['alt', [ + ['range', 'A', 'Z'], + ['range', 'a', 'z'], + ]]], + + ['spaces', ['regex', '\\s+']], + + ['space', ['regex', '\\s']], + + ['inlSpaces', ['regex', '[ \\t]+']], + + ['inlSpace', ['regex', '[ \\t]']], + + ['newline', ['regex', '(\\r\\n)|\\n']], +] + + +export const proj: IProjectors = { + +} \ No newline at end of file diff --git a/src/step2/parser.ts b/src/step2/parser.ts index 80b8186..c9d9d9c 100644 --- a/src/step2/parser.ts +++ b/src/step2/parser.ts @@ -1,17 +1,21 @@ import { State } from './state' -import { IParseResultSuccess, IParseResultFail, IParserFn } from './types' -import { GrammarAst as AST } from './grammar-ast' +import { IParseResultSuccess, IParseResultFail, IParserFn, IProjectors } from './types' +import { Ast } from './grammar-ast' import * as equal from 'fast-deep-equal/es6' +import { AsyncParallelBailHook } from 'tapable' export class Parser { private state: State - private grammar: AST.Grammar + private grammar: Ast.Grammar - constructor(gr: AST.Grammar, input: any[]) { + private projectors: IProjectors + + constructor(gr: Ast.Grammar, input: any[], proj: IProjectors = {}) { this.grammar = gr this.state = new State(input) + this.projectors = proj } private success(consumed: number = 0, result: any = null): IParseResultSuccess { @@ -28,10 +32,16 @@ export class Parser { } } - private getRuleBodyByName = (name: string): AST.Expr => + private getRuleBodyByName = (name: string): Ast.Expr => this.grammar.find(i => i[0] === name)[1] - expr = (e: AST.Expr): IParserFn => { + // === Parsers + + rule = (name: string): IParserFn => { + return this.project(name, this.getRuleBodyByName(name)) + } + + expr = (e: Ast.Expr): IParserFn => { switch (e[0]) { case 'equal': return this.equal(e[1]) @@ -41,8 +51,18 @@ export class Parser { return this.alt(e[1]) case 'seq': return this.seq(e[1]) - case 'notLess': - return this.notLess(e[1], e[2]) + case 'times': + return this.times(e[1], e[2], e[3]) + case 'token': + return this.token(e[1]) + case 'not': + return this.not(e[1]) + case 'project': + return this.project(e[1], e[2]) + case 'regex': + return this.regex(e[1]) + case 'range': + return this.range(e[1], e[2]) default: throw new Error(`Unknown expression type: ${e[0]}`); } @@ -67,11 +87,7 @@ export class Parser { return this.fail() } - rule = (name: string): IParserFn => { - return this.expr(this.getRuleBodyByName(name)) - } - - seq = (exprs: AST.Expr[]): IParserFn => () => { + seq = (exprs: Ast.Expr[]): IParserFn => () => { const results: any[] = [] this.state.savePos() for (let i = 0; i < exprs.length; i++) { @@ -88,7 +104,7 @@ export class Parser { return this.success(0, results) } - alt = (exprs: AST.Expr[]): IParserFn => () => { + alt = (exprs: Ast.Expr[]): IParserFn => () => { for (let i = 0; i < exprs.length; i++) { this.state.savePos(); const e = exprs[i]; @@ -103,17 +119,24 @@ export class Parser { return this.fail() } - notLess = (min: number, expr: AST.Expr) => () => { + times = (min: number, max: number, expr: Ast.Expr): IParserFn => () => { + if (max === null || max === undefined) + max = Infinity + if (max < 1) { + throw new Error(`max should be more than zero (or undefined, which interprets as Infinity). max = ${max}`); + } let count = 0 const p = this.expr(expr) const results: any[] = [] - for(;;) { + for (; ;) { const r = p() if (r.success) { results.push(r.result) count++ - } - else { + if (count == max) { + return this.success(0, results) + } + } else { if (count >= min) { return this.success(0, results) } else { @@ -123,6 +146,71 @@ export class Parser { } } + token = (token: string): IParserFn => { + const e: Ast.Expr = ['seq', [ + ['times', 0, null, ['alt', [ + ['equal', ' '], + ['equal', '\t'] + ]]], + ['equal', token] + ]] + return this.expr(e) + } + + not = (expr: Ast.Expr): IParserFn => () => { + this.state.savePos() + const p = this.expr(expr) + const r = p() + this.state.backtrack() + return r.success ? this.fail() : this.success(0) + } + + project = (projector: string, expr: Ast.Expr): IParserFn => () => { + const p = this.expr(expr) + const r = p() + if (!r.success) { + return r + } + + const proj = this.projectors[projector] + if (proj) { + const res = proj(r.result) + return this.success(r.consumed, res) + } else { + return r + } + } + + regex = (regex: string): IParserFn => { + const rx = new RegExp(regex).compile() + if (typeof (this.state.input) !== 'string') { + throw new Error("regex can be used only if input sequence is string"); + } + return () => { + const s = (this.state.input as unknown as string).substring(this.state.pos) + const m = rx.exec(s) + if (m) { + return this.success(m[0].length, m[0]) + } + return this.fail() + } + } + + range = (from: string, to: string): IParserFn => { + if (from.length != 1 || to.length != 1) { + throw new Error(`from and to must be 1 symbol length: from=${from}, to=${to}`); + } + return () => { + let item: any = this.state.current + if (item >= from && item <= to) { + return this.success(1, item) + } + return this.fail() + } + } + + // === API + match = (rule: string) => { const p = this.rule(rule) return p() diff --git a/src/step2/state.ts b/src/step2/state.ts index 8b620ce..4c25c57 100644 --- a/src/step2/state.ts +++ b/src/step2/state.ts @@ -27,6 +27,10 @@ export class State { return this._input[this._pos] } + at(pos: number) { + return this._input[pos] + } + consume (num: number) { this._pos += num return !this.isEof diff --git a/src/step2/types.ts b/src/step2/types.ts index 4452bb3..7c49382 100644 --- a/src/step2/types.ts +++ b/src/step2/types.ts @@ -13,3 +13,6 @@ export type IParseResult = IParseResultSuccess | IParseResultFail export type IParserFn = () => IParseResult +export type IProjectors = { + [key: string]: (args: [...any]) => any +} \ No newline at end of file