From 8132b7df157c399586dce7422e4843543b72ccf4 Mon Sep 17 00:00:00 2001 From: Zachary Carter Date: Sat, 26 Jan 2013 18:56:12 -0800 Subject: [PATCH] init --- .gitignore | 1 + README.md | 20 + cli.js | 87 ++++ package.json | 42 ++ regexp-lexer.js | 398 +++++++++++++++++++ tests/all-tests.js | 4 + tests/regexplexer.js | 924 +++++++++++++++++++++++++++++++++++++++++++ 7 files changed, 1476 insertions(+) create mode 100644 .gitignore create mode 100644 README.md create mode 100755 cli.js create mode 100644 package.json create mode 100644 regexp-lexer.js create mode 100755 tests/all-tests.js create mode 100644 tests/regexplexer.js diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..c2658d7 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +node_modules/ diff --git a/README.md b/README.md new file mode 100644 index 0000000..5218427 --- /dev/null +++ b/README.md @@ -0,0 +1,20 @@ +# jison-lex +A lexical analyzer generator used by [jison](http://jison.org). + +## install +npm install jison-lex -g + +## usage +``` +Usage: jison-lex [file] [options] + +file file containing a lexical grammar + +Options: + -o FILE, --outfile FILE Filename and base module name of the generated parser + -t TYPE, --module-type TYPE The type of module to generate (commonjs, js) + --version print version and exit +``` + +## license +MIT diff --git a/cli.js b/cli.js new file mode 100755 index 0000000..71dbb47 --- /dev/null +++ b/cli.js @@ -0,0 +1,87 @@ +#!/usr/bin/env node + +var version = require('./package.json').version; + +var path = require('path'); +var fs = require('fs'); +var lexParser = require('lex-parser'); +var RegExpLexer = require('./regexp-lexer.js'); + + +var opts = require("nomnom") + .script('jison-lex') + .option('file', { + flag: true, + position: 0, + help: 'file containing a lexical grammar' + }) + .option('outfile', { + abbr: 'o', + metavar: 'FILE', + help: 'Filename and base module name of the generated parser' + }) + .option('module-type', { + abbr: 't', + default: 'commonjs', + metavar: 'TYPE', + help: 'The type of module to generate (commonjs, js)' + }) + .option('version', { + flag: true, + help: 'print version and exit', + callback: function() { + return version; + } + }) + .parse(); + +exports.main = function () { + if (opts.file) { + var raw = fs.readFileSync(path.normalize(opts.file), 'utf8'), + name = path.basename((opts.outfile||opts.file)).replace(/\..*$/g,''); + + fs.writeFileSync(opts.outfile||(name + '.js'), processGrammar(raw, name)); + } else { + readin(function (raw) { + console.log(processGrammar(raw)); + }); + } +}; + +function processGrammar (file, name) { + var grammar; + try { + grammar = lexParser.parse(file); + } catch (e) { + try { + grammar = JSON.parse(file); + } catch (e2) { + throw e; + } + } + + var settings = grammar.options || {}; + if (!settings.moduleType) settings.moduleType = opts.moduleType; + if (!settings.moduleName && name) settings.moduleName = name.replace(/-\w/g, function (match){ return match.charAt(1).toUpperCase(); }); + + grammar.options = settings; + + var lexer = new RegExpLexer(grammar); + return lexer.generate(settings); +} + +function readin (cb) { + var stdin = process.openStdin(), + data = ''; + + stdin.setEncoding('utf8'); + stdin.addListener('data', function (chunk) { + data += chunk; + }); + stdin.addListener('end', function () { + cb(data); + }); +} + +if (require.main === module) + exports.main(); diff --git a/package.json b/package.json new file mode 100644 index 0000000..85c5022 --- /dev/null +++ b/package.json @@ -0,0 +1,42 @@ +{ + "author": "Zach Carter (http://zaa.ch)", + "name": "jison-lex", + "description": "lexical analyzer generator used by jison", + "version": "0.0.1", + "keywords": [ + "jison", + "parser", + "generator", + "lexer", + "flex", + "tokenizer" + ], + "repository": { + "type": "git", + "url": "git://github.com/zaach/jison-lex.git" + }, + "bugs": { + "email": "jison@librelist.com", + "url": "http://github.com/zaach/jison-lex/issues" + }, + "main": "regexp-lexer", + "bin": "cli.js", + "engines": { + "node": ">=0.4" + }, + "dependencies": { + "lex-parser": "0.0.1", + "nomnom": "1.5.2" + }, + "devDependencies": { + "test": "0.4.4" + }, + "scripts": { + "test": "node tests/all-tests.js" + }, + "directories": { + "lib": "lib", + "tests": "tests" + }, + "homepage": "http://jison.org" +} diff --git a/regexp-lexer.js b/regexp-lexer.js new file mode 100644 index 0000000..94a93e9 --- /dev/null +++ b/regexp-lexer.js @@ -0,0 +1,398 @@ +// Basic Lexer implemented using JavaScript regular expressions +// MIT Licensed + +var RegExpLexer = (function () { + +var lexParser = require('lex-parser'); +var version = require('./package.json').version; + +// expand macros and convert matchers to RegExp's +function prepareRules(rules, macros, actions, tokens, startConditions, caseless) { + var m,i,k,action,conditions, + newRules = []; + + if (macros) { + macros = prepareMacros(macros); + } + + function tokenNumberReplacement (str, token) { + return "return "+(tokens[token] || "'"+token+"'"); + } + + actions.push('switch($avoiding_name_collisions) {'); + + for (i=0;i < rules.length; i++) { + if (Object.prototype.toString.apply(rules[i][0]) !== '[object Array]') { + // implicit add to all inclusive start conditions + for (k in startConditions) { + if (startConditions[k].inclusive) { + startConditions[k].rules.push(i); + } + } + } else if (rules[i][0][0] === '*') { + // Add to ALL start conditions + for (k in startConditions) { + startConditions[k].rules.push(i); + } + rules[i].shift(); + } else { + // Add to explicit start conditions + conditions = rules[i].shift(); + for (k=0;k 20 ? '...':'') + past.substr(-20).replace(/\n/g, ""); + }, + // displays upcoming input, i.e. for error messages + upcomingInput: function () { + var next = this.match; + if (next.length < 20) { + next += this._input.substr(0, 20-next.length); + } + return (next.substr(0,20)+(next.length > 20 ? '...':'')).replace(/\n/g, ""); + }, + // displays upcoming input, i.e. for error messages + showPosition: function () { + var pre = this.pastInput(); + var c = new Array(pre.length + 1).join("-"); + return pre + this.upcomingInput() + "\n" + c+"^"; + }, + + // return next match in input + next: function () { + if (this.done) { + return this.EOF; + } + if (!this._input) this.done = true; + + var token, + match, + tempMatch, + index, + col, + lines; + if (!this._more) { + this.yytext = ''; + this.match = ''; + } + var rules = this._currentRules(); + for (var i=0;i < rules.length; i++) { + tempMatch = this._input.match(this.rules[rules[i]]); + if (tempMatch && (!match || tempMatch[0].length > match[0].length)) { + match = tempMatch; + index = i; + if (!this.options.flex) break; + } + } + if (match) { + lines = match[0].match(/(?:\r\n?|\n).*/g); + if (lines) this.yylineno += lines.length; + this.yylloc = {first_line: this.yylloc.last_line, + last_line: this.yylineno+1, + first_column: this.yylloc.last_column, + last_column: lines ? lines[lines.length-1].length-lines[lines.length-1].match(/\r?\n?/)[0].length : this.yylloc.last_column + match[0].length}; + this.yytext += match[0]; + this.match += match[0]; + this.matches = match; + this.yyleng = this.yytext.length; + if (this.options.ranges) { + this.yylloc.range = [this.offset, this.offset += this.yyleng]; + } + this._more = false; + this._input = this._input.slice(match[0].length); + this.matched += match[0]; + token = this.performAction.call(this, this.yy, this, rules[index],this.conditionStack[this.conditionStack.length-1]); + if (this.done && this._input) this.done = false; + if (token) return token; + else return; + } + if (this._input === "") { + return this.EOF; + } else { + return this.parseError('Lexical error on line '+(this.yylineno+1)+'. Unrecognized text.\n'+this.showPosition(), + {text: "", token: null, line: this.yylineno}); + } + }, + + // return next match that has a token + lex: function lex () { + var r = this.next(); + if (typeof r !== 'undefined') { + return r; + } else { + return this.lex(); + } + }, + begin: function begin (condition) { + this.conditionStack.push(condition); + }, + popState: function popState () { + return this.conditionStack.pop(); + }, + _currentRules: function _currentRules () { + return this.conditions[this.conditionStack[this.conditionStack.length-1]].rules; + }, + topState: function () { + return this.conditionStack[this.conditionStack.length-2]; + }, + pushState: function begin (condition) { + this.begin(condition); + }, + + generate: function generate(opt) { + var code = ""; + if (opt.moduleType === 'commonjs') { + code = this.generateCommonJSModule(opt); + } else if (opt.moduleType === 'amd') { + code = this.generateAMDModule(opt); + } else { + code = this.generateModule(opt); + } + + return code; + }, + generateModuleBody: function generateModule() { + var out = "{\n"; + var p = []; + for (var k in RegExpLexer.prototype) { + if (RegExpLexer.prototype.hasOwnProperty(k) && k.indexOf("generate") === -1) { + p.push(k + ":" + (RegExpLexer.prototype[k].toString() || '""')); + } + } + out += p.join(",\n"); + + if (this.options) { + out += ",\noptions: " + JSON.stringify(this.options); + } + + out += ",\nperformAction: " + String(this.performAction); + out += ",\nrules: [" + this.rules + "]"; + out += ",\nconditions: " + JSON.stringify(this.conditions); + out += "\n}"; + + return out; + }, + generateModule: function generateModule(opt) { + opt = opt || {}; + + var out = "/* generated by jison-lex " + version + " */"; + var moduleName = opt.moduleName || "lexer"; + + out += "\nvar " + moduleName + " = (function(){\nvar lexer = " + + this.generateModuleBody(); + + if (this.moduleInclude) out += ";\n"+this.moduleInclude; + out += ";\nreturn lexer;\n})();"; + return out; + }, + generateAMDModule: function generateAMDModule() { + var out = "/* generated by jison-lex " + version + " */"; + + out += "define([], function(){\nvar lexer = " + + this.generateModuleBody(); + + if (this.moduleInclude) out += ";\n"+this.moduleInclude; + out += ";\nreturn lexer;" + + "\n})();"; + return out; + }, + generateCommonJSModule: function generateCommonJSModule(opt) { + opt = opt || {}; + + var out = ""; + var moduleName = opt.moduleName || "lexer"; + + out += this.generateModule(opt); + out += "\nexports.lexer = "+moduleName; + out += ";\nexports.lex = function () { return "+moduleName+".lex.apply(lexer, arguments); };"; + return out; + } +}; + +return RegExpLexer; + +})(); + +module.exports = RegExpLexer; + diff --git a/tests/all-tests.js b/tests/all-tests.js new file mode 100755 index 0000000..335a469 --- /dev/null +++ b/tests/all-tests.js @@ -0,0 +1,4 @@ +exports.testRegExpLexer = require("./regexplexer"); + +if (require.main === module) + process.exit(require("test").run(exports)); diff --git a/tests/regexplexer.js b/tests/regexplexer.js new file mode 100644 index 0000000..b59ff25 --- /dev/null +++ b/tests/regexplexer.js @@ -0,0 +1,924 @@ +var RegExpLexer = require("../regexp-lexer"), + assert = require("assert"); + +exports["test basic matchers"] = function() { + var dict = { + rules: [ + ["x", "return 'X';" ], + ["y", "return 'Y';" ], + ["$", "return 'EOF';" ] + ] + }; + + var input = "xxyx"; + + var lexer = new RegExpLexer(dict, input); + assert.equal(lexer.lex(), "X"); + assert.equal(lexer.lex(), "X"); + assert.equal(lexer.lex(), "Y"); + assert.equal(lexer.lex(), "X"); + assert.equal(lexer.lex(), "EOF"); +}; + +exports["test set input after"] = function() { + var dict = { + rules: [ + ["x", "return 'X';" ], + ["y", "return 'Y';" ], + ["$", "return 'EOF';" ] + ] + }; + + var input = "xxyx"; + + var lexer = new RegExpLexer(dict); + lexer.setInput(input); + + assert.equal(lexer.lex(), "X"); + assert.equal(lexer.lex(), "X"); + assert.equal(lexer.lex(), "Y"); + assert.equal(lexer.lex(), "X"); + assert.equal(lexer.lex(), "EOF"); +}; + +exports["test unrecognized char"] = function() { + var dict = { + rules: [ + ["x", "return 'X';" ], + ["y", "return 'Y';" ], + ["$", "return 'EOF';" ] + ] + }; + + var input = "xa"; + + var lexer = new RegExpLexer(dict, input); + assert.equal(lexer.lex(), "X"); + assert["throws"](function(){lexer.lex()}, "bad char"); +}; + +exports["test macro"] = function() { + var dict = { + macros: { + "digit": "[0-9]" + }, + rules: [ + ["x", "return 'X';" ], + ["y", "return 'Y';" ], + ["{digit}+", "return 'NAT';" ], + ["$", "return 'EOF';" ] + ] + }; + + var input = "x12234y42"; + + var lexer = new RegExpLexer(dict, input); + assert.equal(lexer.lex(), "X"); + assert.equal(lexer.lex(), "NAT"); + assert.equal(lexer.lex(), "Y"); + assert.equal(lexer.lex(), "NAT"); + assert.equal(lexer.lex(), "EOF"); +}; + +exports["test macro precedence"] = function() { + var dict = { + macros: { + "hex": "[0-9]|[a-f]" + }, + rules: [ + ["-", "return '-';" ], + ["{hex}+", "return 'HEX';" ], + ["$", "return 'EOF';" ] + ] + }; + + var input = "129-abfe-42dc-ea12"; + + var lexer = new RegExpLexer(dict, input); + assert.equal(lexer.lex(), "HEX"); + assert.equal(lexer.lex(), "-"); + assert.equal(lexer.lex(), "HEX"); + assert.equal(lexer.lex(), "-"); + assert.equal(lexer.lex(), "HEX"); + assert.equal(lexer.lex(), "-"); + assert.equal(lexer.lex(), "HEX"); + assert.equal(lexer.lex(), "EOF"); +}; + +exports["test nested macros"] = function () { + var dict = { + macros: { + "digit": "[0-9]", + "2digit": "{digit}{digit}", + "3digit": "{2digit}{digit}" + }, + rules: [ + ["x", "return 'X';" ], + ["y", "return 'Y';" ], + ["{3digit}", "return 'NNN';" ], + ["{2digit}", "return 'NN';" ], + ["{digit}", "return 'N';" ], + ["$", "return 'EOF';" ] + ] + }; + + var input = "x1y42y123"; + + var lexer = new RegExpLexer(dict, input); + assert.equal(lexer.lex(), "X"); + assert.equal(lexer.lex(), "N"); + assert.equal(lexer.lex(), "Y"); + assert.equal(lexer.lex(), "NN"); + assert.equal(lexer.lex(), "Y"); + assert.equal(lexer.lex(), "NNN"); + assert.equal(lexer.lex(), "EOF"); +}; + +exports["test nested macro precedence"] = function() { + var dict = { + macros: { + "hex": "[0-9]|[a-f]", + "col": "#{hex}+" + }, + rules: [ + ["-", "return '-';" ], + ["{col}", "return 'HEX';" ], + ["$", "return 'EOF';" ] + ] + }; + + var input = "#129-#abfe-#42dc-#ea12"; + + var lexer = new RegExpLexer(dict, input); + assert.equal(lexer.lex(), "HEX"); + assert.equal(lexer.lex(), "-"); + assert.equal(lexer.lex(), "HEX"); + assert.equal(lexer.lex(), "-"); + assert.equal(lexer.lex(), "HEX"); + assert.equal(lexer.lex(), "-"); + assert.equal(lexer.lex(), "HEX"); + assert.equal(lexer.lex(), "EOF"); +}; + +exports["test action include"] = function() { + var dict = { + rules: [ + ["x", "return included ? 'Y' : 'N';" ], + ["$", "return 'EOF';" ] + ], + actionInclude: "var included = true;" + }; + + var input = "x"; + + var lexer = new RegExpLexer(dict, input); + assert.equal(lexer.lex(), "Y"); + assert.equal(lexer.lex(), "EOF"); +}; + +exports["test ignored"] = function() { + var dict = { + rules: [ + ["x", "return 'X';" ], + ["y", "return 'Y';" ], + ["\\s+", "/* skip whitespace */" ], + ["$", "return 'EOF';" ] + ] + }; + + var input = "x x y x"; + + var lexer = new RegExpLexer(dict, input); + assert.equal(lexer.lex(), "X"); + assert.equal(lexer.lex(), "X"); + assert.equal(lexer.lex(), "Y"); + assert.equal(lexer.lex(), "X"); + assert.equal(lexer.lex(), "EOF"); +}; + +exports["test disambiguate"] = function() { + var dict = { + rules: [ + ["for\\b", "return 'FOR';" ], + ["if\\b", "return 'IF';" ], + ["[a-z]+", "return 'IDENTIFIER';" ], + ["\\s+", "/* skip whitespace */" ], + ["$", "return 'EOF';" ] + ] + }; + + var input = "if forever for for"; + + var lexer = new RegExpLexer(dict, input); + assert.equal(lexer.lex(), "IF"); + assert.equal(lexer.lex(), "IDENTIFIER"); + assert.equal(lexer.lex(), "FOR"); + assert.equal(lexer.lex(), "FOR"); + assert.equal(lexer.lex(), "EOF"); +}; + +exports["test yytext overwrite"] = function() { + var dict = { + rules: [ + ["x", "yytext = 'hi der'; return 'X';" ] + ] + }; + + var input = "x"; + + var lexer = new RegExpLexer(dict, input); + lexer.lex(); + assert.equal(lexer.yytext, "hi der"); +}; + +exports["test yylineno"] = function() { + var dict = { + rules: [ + ["\\s+", "/* skip whitespace */" ], + ["x", "return 'x';" ], + ["y", "return 'y';" ] + ] + }; + + var input = "x\nxy\n\n\nx"; + + var lexer = new RegExpLexer(dict, input); + assert.equal(lexer.yylineno, 0); + assert.equal(lexer.lex(), "x"); + assert.equal(lexer.lex(), "x"); + assert.equal(lexer.yylineno, 1); + assert.equal(lexer.lex(), "y"); + assert.equal(lexer.yylineno, 1); + assert.equal(lexer.lex(), "x"); + assert.equal(lexer.yylineno, 4); +}; + +exports["test yylloc"] = function() { + var dict = { + rules: [ + ["\\s+", "/* skip whitespace */" ], + ["x", "return 'x';" ], + ["y", "return 'y';" ] + ] + }; + + var input = "x\nxy\n\n\nx"; + + var lexer = new RegExpLexer(dict, input); + assert.equal(lexer.lex(), "x"); + assert.equal(lexer.yylloc.first_column, 0); + assert.equal(lexer.yylloc.last_column, 1); + assert.equal(lexer.lex(), "x"); + assert.equal(lexer.yylloc.first_line, 2); + assert.equal(lexer.yylloc.last_line, 2); + assert.equal(lexer.yylloc.first_column, 0); + assert.equal(lexer.yylloc.last_column, 1); + assert.equal(lexer.lex(), "y"); + assert.equal(lexer.yylloc.first_line, 2); + assert.equal(lexer.yylloc.last_line, 2); + assert.equal(lexer.yylloc.first_column, 1); + assert.equal(lexer.yylloc.last_column, 2); + assert.equal(lexer.lex(), "x"); + assert.equal(lexer.yylloc.first_line, 5); + assert.equal(lexer.yylloc.last_line, 5); + assert.equal(lexer.yylloc.first_column, 0); + assert.equal(lexer.yylloc.last_column, 1); +}; + +exports["test more()"] = function() { + var dict = { + rules: [ + ["x", "return 'X';" ], + ['"[^"]*', function(){ + if(yytext.charAt(yyleng-1) == '\\') { + this.more(); + } else { + yytext += this.input(); // swallow end quote + return "STRING"; + } + } ], + ["$", "return 'EOF';" ] + ] + }; + + var input = 'x"fgjdrtj\\"sdfsdf"x'; + + var lexer = new RegExpLexer(dict, input); + assert.equal(lexer.lex(), "X"); + assert.equal(lexer.lex(), "STRING"); + assert.equal(lexer.lex(), "X"); + assert.equal(lexer.lex(), "EOF"); +}; + +exports["test defined token returns"] = function() { + var tokens = {"2":"X", "3":"Y", "4":"EOF"}; + var dict = { + rules: [ + ["x", "return 'X';" ], + ["y", "return 'Y';" ], + ["$", "return 'EOF';" ] + ] + }; + + var input = "xxyx"; + + var lexer = new RegExpLexer(dict, input, tokens); + + assert.equal(lexer.lex(), 2); + assert.equal(lexer.lex(), 2); + assert.equal(lexer.lex(), 3); + assert.equal(lexer.lex(), 2); + assert.equal(lexer.lex(), 4); +}; + +exports["test module generator"] = function() { + var dict = { + rules: [ + ["x", "return 'X';" ], + ["y", "return 'Y';" ], + ["$", "return 'EOF';" ] + ] + }; + + var input = "xxyx"; + + var lexer_ = new RegExpLexer(dict); + var lexerSource = lexer_.generateModule(); + eval(lexerSource); + lexer.setInput(input); + + assert.equal(lexer.lex(), "X"); + assert.equal(lexer.lex(), "X"); + assert.equal(lexer.lex(), "Y"); + assert.equal(lexer.lex(), "X"); + assert.equal(lexer.lex(), "EOF"); +}; + +exports["test generator with more complex lexer"] = function() { + var dict = { + rules: [ + ["x", "return 'X';" ], + ['"[^"]*', function(){ + if(yytext.charAt(yyleng-1) == '\\') { + this.more(); + } else { + yytext += this.input(); // swallow end quote + return "STRING"; + } + } ], + ["$", "return 'EOF';" ] + ] + }; + + var input = 'x"fgjdrtj\\"sdfsdf"x'; + + var lexer_ = new RegExpLexer(dict); + var lexerSource = lexer_.generateModule(); + eval(lexerSource); + lexer.setInput(input); + + assert.equal(lexer.lex(), "X"); + assert.equal(lexer.lex(), "STRING"); + assert.equal(lexer.lex(), "X"); + assert.equal(lexer.lex(), "EOF"); +}; + +exports["test commonjs module generator"] = function() { + var dict = { + rules: [ + ["x", "return 'X';" ], + ["y", "return 'Y';" ], + ["$", "return 'EOF';" ] + ] + }; + + var input = "xxyx"; + + var lexer_ = new RegExpLexer(dict); + var lexerSource = lexer_.generateCommonJSModule(); + var exports = {}; + eval(lexerSource); + exports.lexer.setInput(input); + + assert.equal(exports.lex(), "X"); + assert.equal(exports.lex(), "X"); + assert.equal(exports.lex(), "Y"); + assert.equal(exports.lex(), "X"); + assert.equal(exports.lex(), "EOF"); +}; + +exports["test amd module generator"] = function() { + var dict = { + rules: [ + ["x", "return 'X';" ], + ["y", "return 'Y';" ], + ["$", "return 'EOF';" ] + ] + }; + + var input = "xxyx"; + + var lexer_ = new RegExpLexer(dict); + var lexerSource = lexer_.generateCommonJSModule(); + + var lexer; + var define = function (_, fn) { + lexer = fn(); + }; + + eval(lexerSource); + lexer.setInput(input); + + assert.equal(lexer.lex(), "X"); + assert.equal(lexer.lex(), "X"); + assert.equal(lexer.lex(), "Y"); + assert.equal(lexer.lex(), "X"); + assert.equal(lexer.lex(), "EOF"); +}; + +exports["test DJ lexer"] = function() { + var dict = { + "lex": { + "macros": { + "digit": "[0-9]", + "id": "[a-zA-Z][a-zA-Z0-9]*" + }, + + "rules": [ + ["//.*", "/* ignore comment */"], + ["main\\b", "return 'MAIN';"], + ["class\\b", "return 'CLASS';"], + ["extends\\b", "return 'EXTENDS';"], + ["nat\\b", "return 'NATTYPE';"], + ["if\\b", "return 'IF';"], + ["else\\b", "return 'ELSE';"], + ["for\\b", "return 'FOR';"], + ["printNat\\b", "return 'PRINTNAT';"], + ["readNat\\b", "return 'READNAT';"], + ["this\\b", "return 'THIS';"], + ["new\\b", "return 'NEW';"], + ["var\\b", "return 'VAR';"], + ["null\\b", "return 'NUL';"], + ["{digit}+", "return 'NATLITERAL';"], + ["{id}", "return 'ID';"], + ["==", "return 'EQUALITY';"], + ["=", "return 'ASSIGN';"], + ["\\+", "return 'PLUS';"], + ["-", "return 'MINUS';"], + ["\\*", "return 'TIMES';"], + [">", "return 'GREATER';"], + ["\\|\\|", "return 'OR';"], + ["!", "return 'NOT';"], + ["\\.", "return 'DOT';"], + ["\\{", "return 'LBRACE';"], + ["\\}", "return 'RBRACE';"], + ["\\(", "return 'LPAREN';"], + ["\\)", "return 'RPAREN';"], + [";", "return 'SEMICOLON';"], + ["\\s+", "/* skip whitespace */"], + [".", "print('Illegal character');throw 'Illegal character';"], + ["$", "return 'ENDOFFILE';"] + ] + } +}; + + var input = "class Node extends Object { \ + var nat value var nat value;\ + var Node next;\ + var nat index;\ + }\ +\ + class List extends Object {\ + var Node start;\ +\ + Node prepend(Node startNode) {\ + startNode.next = start;\ + start = startNode;\ + }\ +\ + nat find(nat index) {\ + var nat value;\ + var Node node;\ +\ + for(node = start;!(node == null);node = node.next){\ + if(node.index == index){\ + value = node.value;\ + } else { 0; };\ + };\ +\ + value;\ + }\ + }\ +\ + main {\ + var nat index;\ + var nat value;\ + var List list;\ + var Node startNode;\ +\ + index = readNat();\ + list = new List;\ +\ + for(0;!(index==0);0){\ + value = readNat();\ + startNode = new Node;\ + startNode.index = index;\ + startNode.value = value;\ + list.prepend(startNode);\ + index = readNat();\ + };\ +\ + index = readNat();\ +\ + for(0;!(index==0);0){\ + printNat(list.find(index));\ + index = readNat();\ + };\ + }"; + + var lexer = new RegExpLexer(dict.lex); + lexer.setInput(input); + var tok; + while (tok = lexer.lex(), tok!==1) { + assert.equal(typeof tok, "string"); + } +}; + +exports["test instantiation from string"] = function() { + var dict = "%%\n'x' {return 'X';}\n'y' {return 'Y';}\n<> {return 'EOF';}"; + + var input = "x"; + + var lexer = new RegExpLexer(dict); + lexer.setInput(input); + + assert.equal(lexer.lex(), "X"); + assert.equal(lexer.lex(), "EOF"); +}; + +exports["test inclusive start conditions"] = function() { + var dict = { + startConditions: { + "TEST": 0, + }, + rules: [ + ["enter-test", "this.begin('TEST');" ], + [["TEST"], "x", "return 'T';" ], + [["TEST"], "y", "this.begin('INITIAL'); return 'TY';" ], + ["x", "return 'X';" ], + ["y", "return 'Y';" ], + ["$", "return 'EOF';" ] + ] + }; + var input = "xenter-testxyy"; + + var lexer = new RegExpLexer(dict); + lexer.setInput(input); + + assert.equal(lexer.lex(), "X"); + assert.equal(lexer.lex(), "T"); + assert.equal(lexer.lex(), "TY"); + assert.equal(lexer.lex(), "Y"); + assert.equal(lexer.lex(), "EOF"); +}; + +exports["test exclusive start conditions"] = function() { + var dict = { + startConditions: { + "EAT": 1, + }, + rules: [ + ["//", "this.begin('EAT');" ], + [["EAT"], ".", "" ], + [["EAT"], "\\n", "this.begin('INITIAL');" ], + ["x", "return 'X';" ], + ["y", "return 'Y';" ], + ["$", "return 'EOF';" ] + ] + }; + var input = "xy//yxteadh//ste\ny"; + + var lexer = new RegExpLexer(dict); + lexer.setInput(input); + + assert.equal(lexer.lex(), "X"); + assert.equal(lexer.lex(), "Y"); + assert.equal(lexer.lex(), "Y"); + assert.equal(lexer.lex(), "EOF"); +}; + +exports["test pop start condition stack"] = function() { + var dict = { + startConditions: { + "EAT": 1, + }, + rules: [ + ["//", "this.begin('EAT');" ], + [["EAT"], ".", "" ], + [["EAT"], "\\n", "this.popState();" ], + ["x", "return 'X';" ], + ["y", "return 'Y';" ], + ["$", "return 'EOF';" ] + ] + }; + var input = "xy//yxteadh//ste\ny"; + + var lexer = new RegExpLexer(dict); + lexer.setInput(input); + + assert.equal(lexer.lex(), "X"); + assert.equal(lexer.lex(), "Y"); + assert.equal(lexer.lex(), "Y"); + assert.equal(lexer.lex(), "EOF"); +}; + + +exports["test star start condition"] = function() { + var dict = { + startConditions: { + "EAT": 1, + }, + rules: [ + ["//", "this.begin('EAT');" ], + [["EAT"], ".", "" ], + ["x", "return 'X';" ], + ["y", "return 'Y';" ], + [["*"],"$", "return 'EOF';" ] + ] + }; + var input = "xy//yxteadh//stey"; + + var lexer = new RegExpLexer(dict); + lexer.setInput(input); + + assert.equal(lexer.lex(), "X"); + assert.equal(lexer.lex(), "Y"); + assert.equal(lexer.lex(), "EOF"); +}; + +exports["test start condition constants"] = function() { + var dict = { + startConditions: { + "EAT": 1, + }, + rules: [ + ["//", "this.begin('EAT');" ], + [["EAT"], ".", "if (YYSTATE==='EAT') return 'E';" ], + ["x", "if (YY_START==='INITIAL') return 'X';" ], + ["y", "return 'Y';" ], + [["*"],"$", "return 'EOF';" ] + ] + }; + var input = "xy//y"; + + var lexer = new RegExpLexer(dict); + lexer.setInput(input); + + assert.equal(lexer.lex(), "X"); + assert.equal(lexer.lex(), "Y"); + assert.equal(lexer.lex(), "E"); + assert.equal(lexer.lex(), "EOF"); +}; + +exports["test unicode encoding"] = function() { + var dict = { + rules: [ + ["\\u2713", "return 'CHECK';" ], + ["\\u03c0", "return 'PI';" ], + ["y", "return 'Y';" ] + ] + }; + var input = "\u2713\u03c0y"; + + var lexer = new RegExpLexer(dict); + lexer.setInput(input); + + assert.equal(lexer.lex(), "CHECK"); + assert.equal(lexer.lex(), "PI"); + assert.equal(lexer.lex(), "Y"); +}; + +exports["test unicode"] = function() { + var dict = { + rules: [ + ["π", "return 'PI';" ], + ["y", "return 'Y';" ] + ] + }; + var input = "πy"; + + var lexer = new RegExpLexer(dict); + lexer.setInput(input); + + assert.equal(lexer.lex(), "PI"); + assert.equal(lexer.lex(), "Y"); +}; + +exports["test longest match returns"] = function() { + var dict = { + rules: [ + [".", "return 'DOT';" ], + ["cat", "return 'CAT';" ] + ], + options: {flex: true} + }; + var input = "cat!"; + + var lexer = new RegExpLexer(dict); + lexer.setInput(input); + + assert.equal(lexer.lex(), "CAT"); + assert.equal(lexer.lex(), "DOT"); +}; + +exports["test case insensitivity"] = function() { + var dict = { + rules: [ + ["cat", "return 'CAT';" ] + ], + options: {'case-insensitive': true} + }; + var input = "Cat"; + + var lexer = new RegExpLexer(dict); + lexer.setInput(input); + + assert.equal(lexer.lex(), "CAT"); +}; + +exports["test less"] = function() { + var dict = { + rules: [ + ["cat", "this.less(2); return 'CAT';" ], + ["t", "return 'T';" ] + ], + }; + var input = "cat"; + + var lexer = new RegExpLexer(dict); + lexer.setInput(input); + + assert.equal(lexer.lex(), "CAT"); + assert.equal(lexer.lex(), "T"); +}; + +exports["test EOF unput"] = function() { + var dict = { + startConditions: { + "UN": 1, + }, + rules: [ + ["U", "this.begin('UN');return 'U';" ], + [["UN"],"$", "this.unput('X')" ], + [["UN"],"X", "this.popState();return 'X';" ], + ["$", "return 'EOF'" ] + ] + }; + var input = "U"; + + var lexer = new RegExpLexer(dict); + lexer.setInput(input); + + assert.equal(lexer.lex(), "U"); + assert.equal(lexer.lex(), "X"); + assert.equal(lexer.lex(), "EOF"); +}; + +exports["test flex mode default rule"] = function() { + var dict = { + rules: [ + ["x", "return 'X';" ] + ], + options: {flex: true} + }; + var input = "xyx"; + + var lexer = new RegExpLexer(dict); + lexer.setInput(input); + + assert.equal(lexer.lex(), "X"); + assert.equal(lexer.lex(), "X"); +}; + +exports["test pipe precedence"] = function() { + var dict = { + rules: [ + ["x|y", "return 'X_Y';" ], + [".", "return 'N';"] + ] + }; + var input = "xny"; + + var lexer = new RegExpLexer(dict); + lexer.setInput(input); + + assert.equal(lexer.lex(), "X_Y"); + assert.equal(lexer.lex(), "N"); + assert.equal(lexer.lex(), "X_Y"); +}; + +exports["test ranges"] = function() { + var dict = { + rules: [ + ["x+", "return 'X';" ], + [".", "return 'N';"] + ], + options: {ranges: true} + }; + var input = "xxxyy"; + + var lexer = new RegExpLexer(dict); + lexer.setInput(input); + + assert.equal(lexer.lex(), "X"); + assert.deepEqual(lexer.yylloc.range, [0, 3]); +}; + +exports["test unput location"] = function() { + var dict = { + rules: [ + ["x+", "return 'X';" ], + ["y\\n", "this.unput('\\n'); return 'Y';" ], + ["\\ny", "this.unput('y'); return 'BR';" ], + ["y", "return 'Y';" ], + [".", "return 'N';"] + ], + options: {ranges: true} + }; + var input = "xxxy\ny"; + + var lexer = new RegExpLexer(dict); + lexer.setInput(input); + console.log(lexer.rules); + + assert.equal(lexer.next(), "X"); + assert.deepEqual(lexer.yylloc, {first_line: 1, + first_column: 0, + last_line: 1, + last_column: 3, + range: [0, 3]}); + assert.equal(lexer.next(), "Y"); + assert.deepEqual(lexer.yylloc, {first_line: 1, + first_column: 3, + last_line: 1, + last_column: 4, + range: [3, 4]}); + assert.equal(lexer.next(), "BR"); + assert.deepEqual(lexer.yylloc, {first_line: 1, + first_column: 4, + last_line: 2, + last_column: 0, + range: [4, 5]}); + assert.equal(lexer.next(), "Y"); + assert.deepEqual(lexer.yylloc, {first_line: 2, + first_column: 0, + last_line: 2, + last_column: 1, + range: [5, 6]}); + +}; + +exports["test unput location again"] = function() { + var dict = { + rules: [ + ["x+", "return 'X';" ], + ["y\\ny\\n", "this.unput('\\n'); return 'YY';" ], + ["\\ny", "this.unput('y'); return 'BR';" ], + ["y", "return 'Y';" ], + [".", "return 'N';"] + ], + options: {ranges: true} + }; + var input = "xxxy\ny\ny"; + + var lexer = new RegExpLexer(dict); + lexer.setInput(input); + console.log(lexer.rules); + + assert.equal(lexer.next(), "X"); + assert.deepEqual(lexer.yylloc, {first_line: 1, + first_column: 0, + last_line: 1, + last_column: 3, + range: [0, 3]}); + assert.equal(lexer.next(), "YY"); + assert.deepEqual(lexer.yylloc, {first_line: 1, + first_column: 3, + last_line: 2, + last_column: 1, + range: [3, 6]}); + assert.equal(lexer.next(), "BR"); + assert.deepEqual(lexer.yylloc, {first_line: 2, + first_column: 1, + last_line: 3, + last_column: 0, + range: [6, 7]}); + assert.equal(lexer.next(), "Y"); + assert.deepEqual(lexer.yylloc, {first_line: 3, + first_column: 0, + last_line: 3, + last_column: 1, + range: [7, 8]}); + +}; +