From a2844c7fd79d88944ffac33e2bcdeb70d8a2c503 Mon Sep 17 00:00:00 2001 From: Ger Hobbelt Date: Thu, 2 Feb 2017 23:36:50 +0100 Subject: [PATCH] synced the code generators with the ones in the jison tool: support all 4 modes: CommonJS, AMD, ES6 and vanilla JS; also taken the opportunity to give the lexer its own documentation comment chunk as is generated by jison for the parser at large. --- cli.js | 3 +- regexp-lexer.js | 300 ++++++++++++++++++++++++++++++++++++++++++------ 2 files changed, 264 insertions(+), 39 deletions(-) diff --git a/cli.js b/cli.js index 9e5690e..3a33cfa 100755 --- a/cli.js +++ b/cli.js @@ -25,7 +25,8 @@ var opts = require('nomnom') abbr: 't', default: 'commonjs', metavar: 'TYPE', - help: 'The type of module to generate (commonjs, js)' + choices: ['commonjs', 'amd', 'js', 'es'], + help: 'The type of module to generate (commonjs, amd, es, js)' }) .option('version', { abbr: 'V', diff --git a/regexp-lexer.js b/regexp-lexer.js index e936be5..e0b38e7 100644 --- a/regexp-lexer.js +++ b/regexp-lexer.js @@ -2754,12 +2754,20 @@ function processGrammar(dict, tokens, build_options) { function generateFromOpts(opt) { var code = ''; - if (opt.moduleType === 'commonjs') { - code = generateCommonJSModule(opt); - } else if (opt.moduleType === 'amd') { - code = generateAMDModule(opt); - } else { + switch (opt.moduleType) { + case 'js': code = generateModule(opt); + break; + case 'amd': + code = generateAMDModule(opt); + break; + case 'es': + code = generateESModule(opt); + break; + case 'commonjs': + default: + code = generateCommonJSModule(opt); + break; } return code; @@ -2887,62 +2895,278 @@ function generateModuleBody(opt) { return out; } -function generateModule(opt) { - opt = opt || {}; +function generateGenericHeaderComment() { + var out = '/* lexer generated by jison-lex ' + version + ' */\n' + + '/*\n' + + ' * Returns a Lexer object of the following structure:\n' + + ' *\n' + + ' * Lexer: {\n' + + ' * yy: {} The so-called "shared state" or rather the *source* of it;\n' + + ' * the real "shared state" `yy` passed around to\n' + + ' * the rule actions, etc. is a derivative/copy of this one,\n' + + ' * not a direct reference!\n' + + ' * }\n' + + ' *\n' + + ' * Lexer.prototype: {\n' + + ' * yy: {},\n' + + ' * EOF: 1,\n' + + ' * ERROR: 2,\n' + + ' *\n' + + ' * JisonLexerError: function(msg, hash),\n' + + ' *\n' + + ' * performAction: function lexer__performAction(yy, yy_, $avoiding_name_collisions, YY_START, ...),\n' + + ' * where `...` denotes the (optional) additional arguments the user passed to\n' + + ' * `lexer.lex(...)` and specified by way of `%parse-param ...` in the **parser** grammar file\n' + + ' *\n' + + ' * The function parameters and `this` have the following value/meaning:\n' + + ' * - `this` : reference to the `lexer` instance.\n' + + ' *\n' + + ' * - `yy` : a reference to the `yy` "shared state" object which was passed to the lexer\n' + + ' * by way of the `lexer.setInput(str, yy)` API before.\n' + + ' *\n' + + ' * - `yy_` : lexer instance reference used internally.\n' + + ' *\n' + + ' * - `$avoiding_name_collisions` : index of the matched lexer rule (regex), used internally.\n' + + ' *\n' + + ' * - `YY_START`: the current lexer "start condition" state.\n' + + ' *\n' + + ' * - `...` : the extra arguments you specified in the `%parse-param` statement in your\n' + + ' * **parser** grammar definition file and which are passed to the lexer via\n' + + ' * its `lexer.lex(...)` API.\n' + + ' *\n' + + ' * parseError: function(str, hash),\n' + + ' *\n' + + ' * constructLexErrorInfo: function(error_message, is_recoverable),\n' + + ' * Helper function.\n' + + ' * Produces a new errorInfo \'hash object\' which can be passed into `parseError()`.\n' + + ' * See it\'s use in this lexer kernel in many places; example usage:\n' + + ' *\n' + + ' * var infoObj = lexer.constructParseErrorInfo(\'fail!\', true);\n' + + ' * var retVal = lexer.parseError(infoObj.errStr, infoObj);\n' + + ' *\n' + + ' * options: { ... lexer %options ... },\n' + + ' *\n' + + ' * lex: function([args...]),\n' + + ' * Produce one token of lexed input, which was passed in earlier via the `lexer.setInput()` API.\n' + + ' * You MAY use the additional `args...` parameters as per `%parse-param` spec of the **parser** grammar:\n' + + ' * these extra `args...` are passed verbatim to the lexer rules\' action code.\n' + + ' *\n' + + ' * cleanupAfterLex: function(do_not_nuke_errorinfos),\n' + + ' * Helper function.\n' + + ' * This helper API is invoked when the parse process has completed. This helper may\n' + + ' * be invoked by user code to ensure the internal lexer gets properly garbage collected.\n' + + ' *\n' + + ' * setInput: function(input, [yy]),\n' + + ' * input: function(),\n' + + ' * unput: function(str),\n' + + ' * more: function(),\n' + + ' * reject: function(),\n' + + ' * less: function(n),\n' + + ' * pastInput: function(n),\n' + + ' * upcomingInput: function(n),\n' + + ' * showPosition: function(),\n' + + ' * test_match: function(regex_match_array, rule_index),\n' + + ' * next: function(...),\n' + + ' * lex: function(...),\n' + + ' * begin: function(condition),\n' + + ' * pushState: function(condition),\n' + + ' * popState: function(),\n' + + ' * topState: function(),\n' + + ' * _currentRules: function(),\n' + + ' * stateStackSize: function(),\n' + + ' *\n' + + ' * options: { ... lexer %options ... },\n' + + ' *\n' + + ' * performAction: function(yy, yy_, $avoiding_name_collisions, YY_START, ...),\n' + + ' * rules: [...],\n' + + ' * conditions: {associative list: name ==> set},\n' + + ' * }\n' + + ' *\n' + + ' *\n' + + ' * token location info (`yylloc`): {\n' + + ' * first_line: n,\n' + + ' * last_line: n,\n' + + ' * first_column: n,\n' + + ' * last_column: n,\n' + + ' * range: [start_number, end_number]\n' + + ' * (where the numbers are indexes into the input string, zero-based)\n' + + ' * }\n' + + ' *\n' + + ' * ---\n' + + ' *\n' + + ' * The parseError function receives a \'hash\' object with these members for lexer errors:\n' + + ' *\n' + + ' * {\n' + + ' * text: (matched text)\n' + + ' * token: (the produced terminal token, if any)\n' + + ' * token_id: (the produced terminal token numeric ID, if any)\n' + + ' * line: (yylineno)\n' + + ' * loc: (yylloc)\n' + + ' * recoverable: (boolean: TRUE when the parser MAY have an error recovery rule\n' + + ' * available for this particular error)\n' + + ' * yy: (object: the current parser internal "shared state" `yy`\n' + + ' * as is also available in the rule actions; this can be used,\n' + + ' * for instance, for advanced error analysis and reporting)\n' + + ' * lexer: (reference to the current lexer instance used by the parser)\n' + + ' * }\n' + + ' *\n' + + ' * while `this` will reference the current lexer instance.\n' + + ' *\n' + + ' * When `parseError` is invoked by the lexer, the default implementation will\n' + + ' * attempt to invoke `yy.parser.parseError()`; when this callback is not provided\n' + + ' * it will try to invoke `yy.parseError()` instead. When that callback is also not\n' + + ' * provided, a `JisonLexerError` exception will be thrown containing the error\n' + + ' * message and hash, as constructed by the `constructLexErrorInfo()` API.\n' + + ' *\n' + + ' * ---\n' + + ' *\n' + + ' * You can specify lexer options by setting / modifying the `.options` object of your Lexer instance.\n' + + ' * These options are available:\n' + + ' *\n' + + ' * (Options are permanent.)\n' + + ' * \n' + + ' * yy: {\n' + + ' * parseError: function(str, hash)\n' + + ' * optional: overrides the default `parseError` function.\n' + + ' * }\n' + + ' *\n' + + ' * lexer.options: {\n' + + ' * pre_lex: function()\n' + + ' * optional: is invoked before the lexer is invoked to produce another token.\n' + + ' * `this` refers to the Lexer object.\n' + + ' * post_lex: function(token) { return token; }\n' + + ' * optional: is invoked when the lexer has produced a token `token`;\n' + + ' * this function can override the returned token value by returning another.\n' + + ' * When it does not return any (truthy) value, the lexer will return\n' + + ' * the original `token`.\n' + + ' * `this` refers to the Lexer object.\n' + + ' *\n' + + ' * WARNING: the next set of options are not meant to be changed. They echo the abilities of\n' + + ' * the lexer as per when it was compiled!\n' + + ' *\n' + + ' * ranges: boolean\n' + + ' * optional: `true` ==> token location info will include a .range[] member.\n' + + ' * flex: boolean\n' + + ' * optional: `true` ==> flex-like lexing behaviour where the rules are tested\n' + + ' * exhaustively to find the longest match.\n' + + ' * backtrack_lexer: boolean\n' + + ' * optional: `true` ==> lexer regexes are tested in order and for invoked;\n' + + ' * the lexer terminates the scan when a token is returned by the action code.\n' + + ' * xregexp: boolean\n' + + ' * optional: `true` ==> lexer rule regexes are "extended regex format" requiring the\n' + + ' * `XRegExp` library. When this %option has not been specified at compile time, all lexer\n' + + ' * rule regexes have been written as standard JavaScript RegExp expressions.\n' + + ' * }\n' + + ' */\n'; - var out = ['/* generated by jison-lex ' + version + ' */']; - var moduleName = opt.moduleName || 'lexer'; + return out; +} - out.push('var ' + moduleName + ' = (function () {'); - out.push(jisonLexerErrorDefinition); - out.push(generateModuleBody(opt)); +function prepareOptions(opt) { + opt = opt || {}; - if (opt.moduleInclude) { - out.push(opt.moduleInclude + ';'); + // check for illegal identifier + if (!opt.moduleName || !opt.moduleName.match(/^[a-zA-Z_$][a-zA-Z0-9_$\.]*$/)) { + if (opt.moduleName) { + var msg = 'WARNING: The specified moduleName "' + opt.moduleName + '" is illegal (only characters [a-zA-Z0-9_$] and "." dot are accepted); using the default moduleName "lexer" instead.'; + if (typeof opt.warn_cb === 'function') { + opt.warn_cb(msg); + } else { + // do not treat as warning; barf hairball instead so that this oddity gets noticed right away! + throw new Error(msg); + } + } + opt.moduleName = 'lexer'; } + return opt; +}; + +function generateModule(opt) { + opt = prepareOptions(opt); - out.push( + var out = [ + generateGenericHeaderComment(), + '', + 'var ' + opt.moduleName + ' = (function () {', + jisonLexerErrorDefinition, + '', + generateModuleBody(opt), + '', + (opt.moduleInclude ? opt.moduleInclude + ';' : ''), + '', 'return lexer;', '})();' - ); + ]; return out.join('\n'); } function generateAMDModule(opt) { - opt = opt || {}; + opt = prepareOptions(opt); - var out = ['/* generated by jison-lex ' + version + ' */']; + var out = [ + generateGenericHeaderComment(), + '', + 'define([], function () {', + jisonLexerErrorDefinition, + '', + generateModuleBody(opt), + '', + (opt.moduleInclude ? opt.moduleInclude + ';' : ''), + '', + 'return lexer;', + '});' + ]; - out.push('define([], function () {'); - out.push(jisonLexerErrorDefinition); - out.push(generateModuleBody(opt)); + return out.join('\n'); +} - if (opt.moduleInclude) { - out.push(opt.moduleInclude + ';'); - } +function generateESModule(opt) { + opt = prepareOptions(opt); - out.push( + var out = [ + generateGenericHeaderComment(), + '', + 'var lexer = (function () {', + jisonLexerErrorDefinition, + '', + generateModuleBody(opt), + '', + (opt.moduleInclude ? opt.moduleInclude + ';' : ''), + '', 'return lexer;', - '});' - ); + '})();', + '', + 'export {lexer};' + ]; return out.join('\n'); -} +}; function generateCommonJSModule(opt) { - opt = opt || {}; + opt = prepareOptions(opt); - var out = []; - var moduleName = opt.moduleName || 'lexer'; + var out = [ + generateGenericHeaderComment(), + '', + 'var ' + opt.moduleName + ' = (function () {', + jisonLexerErrorDefinition, + '', + generateModuleBody(opt), + '', + (opt.moduleInclude ? opt.moduleInclude + ';' : ''), + '', + 'return lexer;', + '})();', + '', + 'if (typeof require !== \'undefined\' && typeof exports !== \'undefined\') {', + ' exports.lexer = ' + opt.moduleName + ';', + ' exports.lex = function () {', + ' return ' + opt.moduleName + '.lex.apply(lexer, arguments);', + ' };', + '}' + ]; - out.push( - generateModule(opt), - 'exports.lexer = ' + moduleName + ';', - 'exports.lex = function () {', - ' return ' + moduleName + '.lex.apply(lexer, arguments);', - '};' - ); return out.join('\n'); }