From 036fefd090b1414d198ddf5beadf613d6ef0b398 Mon Sep 17 00:00:00 2001 From: Danny McClanahan <1305167+cosmicexplorer@users.noreply.github.com> Date: Fri, 15 Nov 2024 10:24:02 -0500 Subject: [PATCH 1/3] rename our internal set so we can use the existing js set now --- lib/jison.js | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/lib/jison.js b/lib/jison.js index d078056ee..e4e1a93b9 100755 --- a/lib/jison.js +++ b/lib/jison.js @@ -3,7 +3,7 @@ // MIT X Licensed var typal = require('./util/typal').typal; -var Set = require('./util/set').Set; +var SetLike = require('./util/set').Set; var Lexer = require('jison-lex'); var ebnfParser = require('ebnf-parser'); var JSONSelect = require('JSONSelect'); @@ -46,7 +46,7 @@ function each (obj, func) { var Nonterminal = typal.construct({ constructor: function Nonterminal (symbol) { this.symbol = symbol; - this.productions = new Set(); + this.productions = new SetLike(); this.first = []; this.follows = []; this.nullable = false; @@ -450,7 +450,7 @@ lookaheadMixin.followSets = function followSets () { } } oldcount = nonterminals[t].follows.length; - Set.union(nonterminals[t].follows, set); + SetLike.union(nonterminals[t].follows, set); if (oldcount !== nonterminals[t].follows.length) { cont = true; } @@ -472,7 +472,7 @@ lookaheadMixin.first = function first (symbol) { if (firsts.indexOf(t) === -1) firsts.push(t); } else { - Set.union(firsts, this.nonterminals[t].first); + SetLike.union(firsts, this.nonterminals[t].first); } if (!this.nullable(t)) break; @@ -510,7 +510,7 @@ lookaheadMixin.firstSets = function firstSets () { for (symbol in nonterminals) { firsts = []; nonterminals[symbol].productions.forEach(function (production) { - Set.union(firsts, production.first); + SetLike.union(firsts, production.first); }); if (firsts.length !== nonterminals[symbol].first.length) { nonterminals[symbol].first = firsts; @@ -637,7 +637,7 @@ lrGeneratorMixin.Item = typal.construct({ } }); -lrGeneratorMixin.ItemSet = Set.prototype.construct({ +lrGeneratorMixin.ItemSet = SetLike.prototype.construct({ afterconstructor: function () { this.reductions = []; this.goes = {}; @@ -679,7 +679,7 @@ lrGeneratorMixin.closureOperation = function closureOperation (itemSet /*, closu itemQueue, syms = {}; do { - itemQueue = new Set(); + itemQueue = new SetLike(); closureSet.concat(set); set.forEach(function CO_set_forEach (item) { var symbol = item.markedSymbol; @@ -730,7 +730,7 @@ lrGeneratorMixin.gotoOperation = function gotoOperation (itemSet, symbol) { lrGeneratorMixin.canonicalCollection = function canonicalCollection () { var item1 = new this.Item(this.productions[0], 0, [this.EOF]); var firstState = this.closureOperation(new this.ItemSet(item1)), - states = new Set(firstState), + states = new SetLike(firstState), marked = 0, self = this, itemSet; @@ -1821,7 +1821,7 @@ var lr1 = lrLookaheadGenerator.beget({ itemQueue, syms = {}; do { - itemQueue = new Set(); + itemQueue = new SetLike(); closureSet.concat(set); set.forEach(function (item) { var symbol = item.markedSymbol; @@ -1872,7 +1872,7 @@ var ll = generator.beget(lookaheadMixin, { var row = table[production.symbol] || {}; var tokens = production.first; if (self.nullable(production.handle)) { - Set.union(tokens, self.nonterminals[production.symbol].follows); + SetLike.union(tokens, self.nonterminals[production.symbol].follows); } tokens.forEach(function (token) { if (row[token]) { From fd23bab497d6c4445300b33b65fe78bb461c9733 Mon Sep 17 00:00:00 2001 From: Danny McClanahan <1305167+cosmicexplorer@users.noreply.github.com> Date: Fri, 15 Nov 2024 10:48:31 -0500 Subject: [PATCH 2/3] rewrite followSets to vastly improve perf --- lib/jison.js | 117 +++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 95 insertions(+), 22 deletions(-) diff --git a/lib/jison.js b/lib/jison.js index e4e1a93b9..3d4eb5ca4 100755 --- a/lib/jison.js +++ b/lib/jison.js @@ -419,6 +419,74 @@ lookaheadMixin.followSets = function followSets () { nonterminals = this.nonterminals, self = this, cont = true; + const ctx = !!self.go_; + + const followsSets = new Map(); + for (const ntSym in nonterminals) { + followsSets.set(ntSym, new Set(nonterminals[ntSym].follows)); + } + + const cachedHandleIteration = (function pre_compute_handles () { + const ret = []; + + for (const production of productions) { + // q is used in Simple LALR algorithm determine follows in context + let accumulatingQ = production.suffixSym; + + const retProd = []; + for (let i = 0; i < production.handle.length; ++i) { + const t = production.handle[i]; + const curNonterminal = nonterminals[t]; + if (curNonterminal == null) { + retProd.push(null); + continue; + } + const curFollows = followsSets.get(t); + + let bool; + // for Simple LALR algorithm, self.go_ checks if + if (ctx) { + const q = self.go_(production.symbol, production.handle.slice(0, i)); + bool = q === parseInt(self.nterms_[t], 10); + } else { + bool = true; + } + + const nullable = (function iterate_subsequent_nullable () { + for (let j = i + 1; j < production.handle.length; ++j) { + const s = production.handle[j]; + const nt = nonterminals[s]; + if (nt == null) { + curFollows.add(s); + // Not nullable, so break. + return false; + } + for (const k of nt.first) { + curFollows.add(k); + } + if (!nt.nullable) { + // Not nullable, so break. + return false; + } + } + // Assume nullable until proven wrong. + return true; + })(); + + retProd.push({addTransitive: nullable && bool}); + } + ret.push(retProd); + } + + return ret; + })(); + + const prevFollows = new Map(); + const nextFollows = new Map(); + for (const ntSym in nonterminals) { + prevFollows.set(ntSym, Array.from(followsSets.get(ntSym))); + nextFollows.set(ntSym, []); + } // loop until no further changes have been made while(cont) { @@ -426,36 +494,41 @@ lookaheadMixin.followSets = function followSets () { productions.forEach(function Follow_prod_forEach (production, k) { //self.trace(production.symbol,nonterminals[production.symbol].follows); - // q is used in Simple LALR algorithm determine follows in context - var q; - var ctx = !!self.go_; + const prevPFollows = prevFollows.get(production.symbol); + const curProdInfo = cachedHandleIteration[k]; - var set = [],oldcount; for (var i=0,t;t=production.handle[i];++i) { if (!nonterminals[t]) continue; + const curTerm = curProdInfo[i]; + if (curTerm.addTransitive === false) continue; - // for Simple LALR algorithm, self.go_ checks if - if (ctx) - q = self.go_(production.symbol, production.handle.slice(0, i)); - var bool = !ctx || q === parseInt(self.nterms_[t], 10); - - if (i === production.handle.length+1 && bool) { - set = nonterminals[production.symbol].follows; - } else { - var part = production.handle.slice(i+1); + (function add_transitive_follows () { + const nextNtFollows = nextFollows.get(t); + for (const f of prevPFollows) { + nextNtFollows.push(f); + } + })(); + } + }); - set = self.first(part); - if (self.nullable(part) && bool) { - set.push.apply(set, nonterminals[production.symbol].follows); + (function rewrite_amortized_follows () { + for (const [ntSym, newFollows] of nextFollows) { + const ntFollows = followsSets.get(ntSym); + const prevFollowSet = prevFollows.get(ntSym); + prevFollowSet.splice(0); + for (const f of newFollows.splice(0)) { + if (!ntFollows.has(f)) { + cont = true; + ntFollows.add(f); + prevFollowSet.push(f); } } - oldcount = nonterminals[t].follows.length; - SetLike.union(nonterminals[t].follows, set); - if (oldcount !== nonterminals[t].follows.length) { - cont = true; - } } - }); + })(); + } + + for (const [ntSym, completedFollows] of followsSets) { + nonterminals[ntSym].follows = Array.from(completedFollows); } }; From 7f7dcab18fd86b197d0e7ba1204af4a7c694995f Mon Sep 17 00:00:00 2001 From: Danny McClanahan <1305167+cosmicexplorer@users.noreply.github.com> Date: Fri, 15 Nov 2024 11:03:27 -0500 Subject: [PATCH 3/3] add some helpful docs for what was changed --- lib/jison.js | 37 ++++++++++++++++++++++++++++++++----- 1 file changed, 32 insertions(+), 5 deletions(-) diff --git a/lib/jison.js b/lib/jison.js index 3d4eb5ca4..c79b43c3d 100755 --- a/lib/jison.js +++ b/lib/jison.js @@ -421,37 +421,53 @@ lookaheadMixin.followSets = function followSets () { cont = true; const ctx = !!self.go_; + // Cache a js Set for each nonterminal symbol for the duration of this method. This performs + // faster set operations than SetLike.union(). const followsSets = new Map(); for (const ntSym in nonterminals) { followsSets.set(ntSym, new Set(nonterminals[ntSym].follows)); } + // Much of this method's data can be precomputed before the fixed-point iteration in the + // while loop. Add "static" follows which won't be modified by the fixed-point calculation, and + // determine whether this case of the production "handle" will be involved in the + // fixed-point calculation. const cachedHandleIteration = (function pre_compute_handles () { + // Return an array of the same length as our productions. const ret = []; for (const production of productions) { - // q is used in Simple LALR algorithm determine follows in context - let accumulatingQ = production.suffixSym; - + // Return an array per element of the handle. const retProd = []; for (let i = 0; i < production.handle.length; ++i) { const t = production.handle[i]; const curNonterminal = nonterminals[t]; if (curNonterminal == null) { + // If this is not a nonterminal, then it doesn't need to be involved in any + // follows calculations. retProd.push(null); continue; } + // Add to our Set version of each follows value, which will be extracted at the end + // of the method. const curFollows = followsSets.get(t); let bool; - // for Simple LALR algorithm, self.go_ checks if + // for Simple LALR algorithm, self.go_ checks if (...if what?) if (ctx) { + // NB: This can be further optimized, but since this calculation was taken out + // of the fixed-point iteration, it really doesn't help much to further + // amortize it. + // q is used in Simple LALR algorithm determine follows in context const q = self.go_(production.symbol, production.handle.slice(0, i)); bool = q === parseInt(self.nterms_[t], 10); } else { bool = true; } + // Determine whether the nonterminal is nullable by extracting the logic from + // self.nullable(), and add "static" follows which do not change in the + // fixed-point calculation. const nullable = (function iterate_subsequent_nullable () { for (let j = i + 1; j < production.handle.length; ++j) { const s = production.handle[j]; @@ -473,17 +489,23 @@ lookaheadMixin.followSets = function followSets () { return true; })(); + // Return an anonymous object stating whether this case needs to be involved in the + // fixed-point calculation. retProd.push({addTransitive: nullable && bool}); } ret.push(retProd); } - return ret; })(); + // Create a queue of new and old inputs for the fixed-point calculation. This minimizes the + // amount of additional entries we need to add at each point, which reduces the algorithmic + // complexity of this method. const prevFollows = new Map(); const nextFollows = new Map(); for (const ntSym in nonterminals) { + // Begin the fixed-point with all the prior follows, as well as the ones calculated earlier + // in this method. prevFollows.set(ntSym, Array.from(followsSets.get(ntSym))); nextFollows.set(ntSym, []); } @@ -502,6 +524,7 @@ lookaheadMixin.followSets = function followSets () { const curTerm = curProdInfo[i]; if (curTerm.addTransitive === false) continue; + // This is an IIFE for easier profiling. (function add_transitive_follows () { const nextNtFollows = nextFollows.get(t); for (const f of prevPFollows) { @@ -511,6 +534,8 @@ lookaheadMixin.followSets = function followSets () { } }); + // At the end of that iteration, try adding all the new follows to the backing followsSets, + // and culling nextFollows to only the ones which are actually new. (function rewrite_amortized_follows () { for (const [ntSym, newFollows] of nextFollows) { const ntFollows = followsSets.get(ntSym); @@ -527,6 +552,8 @@ lookaheadMixin.followSets = function followSets () { })(); } + // Write all the calculated fixed-point follows back to the nonterminals for use by the rest of + // the jison compiler. for (const [ntSym, completedFollows] of followsSets) { nonterminals[ntSym].follows = Array.from(completedFollows); }