Skip to content
This repository has been archived by the owner on Jun 15, 2019. It is now read-only.

Commit

Permalink
synced the code generators with the ones in the jison tool: support a…
Browse files Browse the repository at this point in the history
…ll 4 modes: CommonJS, AMD, ES6 and vanilla JS; also taken the opportunity to give the lexer its own documentation comment chunk as is generated by jison for the parser at large.
  • Loading branch information
GerHobbelt committed Feb 2, 2017
1 parent 6724da8 commit a2844c7
Show file tree
Hide file tree
Showing 2 changed files with 264 additions and 39 deletions.
3 changes: 2 additions & 1 deletion cli.js
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,8 @@ var opts = require('nomnom')
abbr: 't',
default: 'commonjs',
metavar: 'TYPE',
help: 'The type of module to generate (commonjs, js)'
choices: ['commonjs', 'amd', 'js', 'es'],
help: 'The type of module to generate (commonjs, amd, es, js)'
})
.option('version', {
abbr: 'V',
Expand Down
300 changes: 262 additions & 38 deletions regexp-lexer.js
Original file line number Diff line number Diff line change
Expand Up @@ -2754,12 +2754,20 @@ function processGrammar(dict, tokens, build_options) {
function generateFromOpts(opt) {
var code = '';

if (opt.moduleType === 'commonjs') {
code = generateCommonJSModule(opt);
} else if (opt.moduleType === 'amd') {
code = generateAMDModule(opt);
} else {
switch (opt.moduleType) {
case 'js':
code = generateModule(opt);
break;
case 'amd':
code = generateAMDModule(opt);
break;
case 'es':
code = generateESModule(opt);
break;
case 'commonjs':
default:
code = generateCommonJSModule(opt);
break;
}

return code;
Expand Down Expand Up @@ -2887,62 +2895,278 @@ function generateModuleBody(opt) {
return out;
}

function generateModule(opt) {
opt = opt || {};
function generateGenericHeaderComment() {
var out = '/* lexer generated by jison-lex ' + version + ' */\n'
+ '/*\n'
+ ' * Returns a Lexer object of the following structure:\n'
+ ' *\n'
+ ' * Lexer: {\n'
+ ' * yy: {} The so-called "shared state" or rather the *source* of it;\n'
+ ' * the real "shared state" `yy` passed around to\n'
+ ' * the rule actions, etc. is a derivative/copy of this one,\n'
+ ' * not a direct reference!\n'
+ ' * }\n'
+ ' *\n'
+ ' * Lexer.prototype: {\n'
+ ' * yy: {},\n'
+ ' * EOF: 1,\n'
+ ' * ERROR: 2,\n'
+ ' *\n'
+ ' * JisonLexerError: function(msg, hash),\n'
+ ' *\n'
+ ' * performAction: function lexer__performAction(yy, yy_, $avoiding_name_collisions, YY_START, ...),\n'
+ ' * where `...` denotes the (optional) additional arguments the user passed to\n'
+ ' * `lexer.lex(...)` and specified by way of `%parse-param ...` in the **parser** grammar file\n'
+ ' *\n'
+ ' * The function parameters and `this` have the following value/meaning:\n'
+ ' * - `this` : reference to the `lexer` instance.\n'
+ ' *\n'
+ ' * - `yy` : a reference to the `yy` "shared state" object which was passed to the lexer\n'
+ ' * by way of the `lexer.setInput(str, yy)` API before.\n'
+ ' *\n'
+ ' * - `yy_` : lexer instance reference used internally.\n'
+ ' *\n'
+ ' * - `$avoiding_name_collisions` : index of the matched lexer rule (regex), used internally.\n'
+ ' *\n'
+ ' * - `YY_START`: the current lexer "start condition" state.\n'
+ ' *\n'
+ ' * - `...` : the extra arguments you specified in the `%parse-param` statement in your\n'
+ ' * **parser** grammar definition file and which are passed to the lexer via\n'
+ ' * its `lexer.lex(...)` API.\n'
+ ' *\n'
+ ' * parseError: function(str, hash),\n'
+ ' *\n'
+ ' * constructLexErrorInfo: function(error_message, is_recoverable),\n'
+ ' * Helper function.\n'
+ ' * Produces a new errorInfo \'hash object\' which can be passed into `parseError()`.\n'
+ ' * See it\'s use in this lexer kernel in many places; example usage:\n'
+ ' *\n'
+ ' * var infoObj = lexer.constructParseErrorInfo(\'fail!\', true);\n'
+ ' * var retVal = lexer.parseError(infoObj.errStr, infoObj);\n'
+ ' *\n'
+ ' * options: { ... lexer %options ... },\n'
+ ' *\n'
+ ' * lex: function([args...]),\n'
+ ' * Produce one token of lexed input, which was passed in earlier via the `lexer.setInput()` API.\n'
+ ' * You MAY use the additional `args...` parameters as per `%parse-param` spec of the **parser** grammar:\n'
+ ' * these extra `args...` are passed verbatim to the lexer rules\' action code.\n'
+ ' *\n'
+ ' * cleanupAfterLex: function(do_not_nuke_errorinfos),\n'
+ ' * Helper function.\n'
+ ' * This helper API is invoked when the parse process has completed. This helper may\n'
+ ' * be invoked by user code to ensure the internal lexer gets properly garbage collected.\n'
+ ' *\n'
+ ' * setInput: function(input, [yy]),\n'
+ ' * input: function(),\n'
+ ' * unput: function(str),\n'
+ ' * more: function(),\n'
+ ' * reject: function(),\n'
+ ' * less: function(n),\n'
+ ' * pastInput: function(n),\n'
+ ' * upcomingInput: function(n),\n'
+ ' * showPosition: function(),\n'
+ ' * test_match: function(regex_match_array, rule_index),\n'
+ ' * next: function(...),\n'
+ ' * lex: function(...),\n'
+ ' * begin: function(condition),\n'
+ ' * pushState: function(condition),\n'
+ ' * popState: function(),\n'
+ ' * topState: function(),\n'
+ ' * _currentRules: function(),\n'
+ ' * stateStackSize: function(),\n'
+ ' *\n'
+ ' * options: { ... lexer %options ... },\n'
+ ' *\n'
+ ' * performAction: function(yy, yy_, $avoiding_name_collisions, YY_START, ...),\n'
+ ' * rules: [...],\n'
+ ' * conditions: {associative list: name ==> set},\n'
+ ' * }\n'
+ ' *\n'
+ ' *\n'
+ ' * token location info (`yylloc`): {\n'
+ ' * first_line: n,\n'
+ ' * last_line: n,\n'
+ ' * first_column: n,\n'
+ ' * last_column: n,\n'
+ ' * range: [start_number, end_number]\n'
+ ' * (where the numbers are indexes into the input string, zero-based)\n'
+ ' * }\n'
+ ' *\n'
+ ' * ---\n'
+ ' *\n'
+ ' * The parseError function receives a \'hash\' object with these members for lexer errors:\n'
+ ' *\n'
+ ' * {\n'
+ ' * text: (matched text)\n'
+ ' * token: (the produced terminal token, if any)\n'
+ ' * token_id: (the produced terminal token numeric ID, if any)\n'
+ ' * line: (yylineno)\n'
+ ' * loc: (yylloc)\n'
+ ' * recoverable: (boolean: TRUE when the parser MAY have an error recovery rule\n'
+ ' * available for this particular error)\n'
+ ' * yy: (object: the current parser internal "shared state" `yy`\n'
+ ' * as is also available in the rule actions; this can be used,\n'
+ ' * for instance, for advanced error analysis and reporting)\n'
+ ' * lexer: (reference to the current lexer instance used by the parser)\n'
+ ' * }\n'
+ ' *\n'
+ ' * while `this` will reference the current lexer instance.\n'
+ ' *\n'
+ ' * When `parseError` is invoked by the lexer, the default implementation will\n'
+ ' * attempt to invoke `yy.parser.parseError()`; when this callback is not provided\n'
+ ' * it will try to invoke `yy.parseError()` instead. When that callback is also not\n'
+ ' * provided, a `JisonLexerError` exception will be thrown containing the error\n'
+ ' * message and hash, as constructed by the `constructLexErrorInfo()` API.\n'
+ ' *\n'
+ ' * ---\n'
+ ' *\n'
+ ' * You can specify lexer options by setting / modifying the `.options` object of your Lexer instance.\n'
+ ' * These options are available:\n'
+ ' *\n'
+ ' * (Options are permanent.)\n'
+ ' * \n'
+ ' * yy: {\n'
+ ' * parseError: function(str, hash)\n'
+ ' * optional: overrides the default `parseError` function.\n'
+ ' * }\n'
+ ' *\n'
+ ' * lexer.options: {\n'
+ ' * pre_lex: function()\n'
+ ' * optional: is invoked before the lexer is invoked to produce another token.\n'
+ ' * `this` refers to the Lexer object.\n'
+ ' * post_lex: function(token) { return token; }\n'
+ ' * optional: is invoked when the lexer has produced a token `token`;\n'
+ ' * this function can override the returned token value by returning another.\n'
+ ' * When it does not return any (truthy) value, the lexer will return\n'
+ ' * the original `token`.\n'
+ ' * `this` refers to the Lexer object.\n'
+ ' *\n'
+ ' * WARNING: the next set of options are not meant to be changed. They echo the abilities of\n'
+ ' * the lexer as per when it was compiled!\n'
+ ' *\n'
+ ' * ranges: boolean\n'
+ ' * optional: `true` ==> token location info will include a .range[] member.\n'
+ ' * flex: boolean\n'
+ ' * optional: `true` ==> flex-like lexing behaviour where the rules are tested\n'
+ ' * exhaustively to find the longest match.\n'
+ ' * backtrack_lexer: boolean\n'
+ ' * optional: `true` ==> lexer regexes are tested in order and for invoked;\n'
+ ' * the lexer terminates the scan when a token is returned by the action code.\n'
+ ' * xregexp: boolean\n'
+ ' * optional: `true` ==> lexer rule regexes are "extended regex format" requiring the\n'
+ ' * `XRegExp` library. When this %option has not been specified at compile time, all lexer\n'
+ ' * rule regexes have been written as standard JavaScript RegExp expressions.\n'
+ ' * }\n'
+ ' */\n';

var out = ['/* generated by jison-lex ' + version + ' */'];
var moduleName = opt.moduleName || 'lexer';
return out;
}

out.push('var ' + moduleName + ' = (function () {');
out.push(jisonLexerErrorDefinition);
out.push(generateModuleBody(opt));
function prepareOptions(opt) {
opt = opt || {};

if (opt.moduleInclude) {
out.push(opt.moduleInclude + ';');
// check for illegal identifier
if (!opt.moduleName || !opt.moduleName.match(/^[a-zA-Z_$][a-zA-Z0-9_$\.]*$/)) {
if (opt.moduleName) {
var msg = 'WARNING: The specified moduleName "' + opt.moduleName + '" is illegal (only characters [a-zA-Z0-9_$] and "." dot are accepted); using the default moduleName "lexer" instead.';
if (typeof opt.warn_cb === 'function') {
opt.warn_cb(msg);
} else {
// do not treat as warning; barf hairball instead so that this oddity gets noticed right away!
throw new Error(msg);
}
}
opt.moduleName = 'lexer';
}
return opt;
};

function generateModule(opt) {
opt = prepareOptions(opt);

out.push(
var out = [
generateGenericHeaderComment(),
'',
'var ' + opt.moduleName + ' = (function () {',
jisonLexerErrorDefinition,
'',
generateModuleBody(opt),
'',
(opt.moduleInclude ? opt.moduleInclude + ';' : ''),
'',
'return lexer;',
'})();'
);
];

return out.join('\n');
}

function generateAMDModule(opt) {
opt = opt || {};
opt = prepareOptions(opt);

var out = ['/* generated by jison-lex ' + version + ' */'];
var out = [
generateGenericHeaderComment(),
'',
'define([], function () {',
jisonLexerErrorDefinition,
'',
generateModuleBody(opt),
'',
(opt.moduleInclude ? opt.moduleInclude + ';' : ''),
'',
'return lexer;',
'});'
];

out.push('define([], function () {');
out.push(jisonLexerErrorDefinition);
out.push(generateModuleBody(opt));
return out.join('\n');
}

if (opt.moduleInclude) {
out.push(opt.moduleInclude + ';');
}
function generateESModule(opt) {
opt = prepareOptions(opt);

out.push(
var out = [
generateGenericHeaderComment(),
'',
'var lexer = (function () {',
jisonLexerErrorDefinition,
'',
generateModuleBody(opt),
'',
(opt.moduleInclude ? opt.moduleInclude + ';' : ''),
'',
'return lexer;',
'});'
);
'})();',
'',
'export {lexer};'
];

return out.join('\n');
}
};

function generateCommonJSModule(opt) {
opt = opt || {};
opt = prepareOptions(opt);

var out = [];
var moduleName = opt.moduleName || 'lexer';
var out = [
generateGenericHeaderComment(),
'',
'var ' + opt.moduleName + ' = (function () {',
jisonLexerErrorDefinition,
'',
generateModuleBody(opt),
'',
(opt.moduleInclude ? opt.moduleInclude + ';' : ''),
'',
'return lexer;',
'})();',
'',
'if (typeof require !== \'undefined\' && typeof exports !== \'undefined\') {',
' exports.lexer = ' + opt.moduleName + ';',
' exports.lex = function () {',
' return ' + opt.moduleName + '.lex.apply(lexer, arguments);',
' };',
'}'
];

out.push(
generateModule(opt),
'exports.lexer = ' + moduleName + ';',
'exports.lex = function () {',
' return ' + moduleName + '.lex.apply(lexer, arguments);',
'};'
);
return out.join('\n');
}

Expand Down

0 comments on commit a2844c7

Please sign in to comment.