Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

rewrite followSets() to drastically improve compile performance #408

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
164 changes: 132 additions & 32 deletions lib/jison.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
// MIT X Licensed

var typal = require('./util/typal').typal;
var Set = require('./util/set').Set;
var SetLike = require('./util/set').Set;
var Lexer = require('jison-lex');
var ebnfParser = require('ebnf-parser');
var JSONSelect = require('JSONSelect');
Expand Down Expand Up @@ -46,7 +46,7 @@ function each (obj, func) {
var Nonterminal = typal.construct({
constructor: function Nonterminal (symbol) {
this.symbol = symbol;
this.productions = new Set();
this.productions = new SetLike();
this.first = [];
this.follows = [];
this.nullable = false;
Expand Down Expand Up @@ -419,43 +419,143 @@ lookaheadMixin.followSets = function followSets () {
nonterminals = this.nonterminals,
self = this,
cont = true;
const ctx = !!self.go_;

// Cache a js Set for each nonterminal symbol for the duration of this method. This performs
// faster set operations than SetLike.union().
const followsSets = new Map();
for (const ntSym in nonterminals) {
followsSets.set(ntSym, new Set(nonterminals[ntSym].follows));
}

// Much of this method's data can be precomputed before the fixed-point iteration in the
// while loop. Add "static" follows which won't be modified by the fixed-point calculation, and
// determine whether this case of the production "handle" will be involved in the
// fixed-point calculation.
const cachedHandleIteration = (function pre_compute_handles () {
// Return an array of the same length as our productions.
const ret = [];

for (const production of productions) {
// Return an array per element of the handle.
const retProd = [];
for (let i = 0; i < production.handle.length; ++i) {
const t = production.handle[i];
const curNonterminal = nonterminals[t];
if (curNonterminal == null) {
// If this is not a nonterminal, then it doesn't need to be involved in any
// follows calculations.
retProd.push(null);
continue;
}
// Add to our Set version of each follows value, which will be extracted at the end
// of the method.
const curFollows = followsSets.get(t);

let bool;
// for Simple LALR algorithm, self.go_ checks if (...if what?)
if (ctx) {
// NB: This can be further optimized, but since this calculation was taken out
// of the fixed-point iteration, it really doesn't help much to further
// amortize it.
// q is used in Simple LALR algorithm determine follows in context
const q = self.go_(production.symbol, production.handle.slice(0, i));
bool = q === parseInt(self.nterms_[t], 10);
} else {
bool = true;
}

// Determine whether the nonterminal is nullable by extracting the logic from
// self.nullable(), and add "static" follows which do not change in the
// fixed-point calculation.
const nullable = (function iterate_subsequent_nullable () {
for (let j = i + 1; j < production.handle.length; ++j) {
const s = production.handle[j];
const nt = nonterminals[s];
if (nt == null) {
curFollows.add(s);
// Not nullable, so break.
return false;
}
for (const k of nt.first) {
curFollows.add(k);
}
if (!nt.nullable) {
// Not nullable, so break.
return false;
}
}
// Assume nullable until proven wrong.
return true;
})();

// Return an anonymous object stating whether this case needs to be involved in the
// fixed-point calculation.
retProd.push({addTransitive: nullable && bool});
}
ret.push(retProd);
}
return ret;
})();

// Create a queue of new and old inputs for the fixed-point calculation. This minimizes the
// amount of additional entries we need to add at each point, which reduces the algorithmic
// complexity of this method.
const prevFollows = new Map();
const nextFollows = new Map();
for (const ntSym in nonterminals) {
// Begin the fixed-point with all the prior follows, as well as the ones calculated earlier
// in this method.
prevFollows.set(ntSym, Array.from(followsSets.get(ntSym)));
nextFollows.set(ntSym, []);
}

// loop until no further changes have been made
while(cont) {
cont = false;

productions.forEach(function Follow_prod_forEach (production, k) {
//self.trace(production.symbol,nonterminals[production.symbol].follows);
// q is used in Simple LALR algorithm determine follows in context
var q;
var ctx = !!self.go_;
const prevPFollows = prevFollows.get(production.symbol);
const curProdInfo = cachedHandleIteration[k];

var set = [],oldcount;
for (var i=0,t;t=production.handle[i];++i) {
if (!nonterminals[t]) continue;
const curTerm = curProdInfo[i];
if (curTerm.addTransitive === false) continue;

// This is an IIFE for easier profiling.
(function add_transitive_follows () {
const nextNtFollows = nextFollows.get(t);
for (const f of prevPFollows) {
nextNtFollows.push(f);
}
})();
}
});

// for Simple LALR algorithm, self.go_ checks if
if (ctx)
q = self.go_(production.symbol, production.handle.slice(0, i));
var bool = !ctx || q === parseInt(self.nterms_[t], 10);

if (i === production.handle.length+1 && bool) {
set = nonterminals[production.symbol].follows;
} else {
var part = production.handle.slice(i+1);

set = self.first(part);
if (self.nullable(part) && bool) {
set.push.apply(set, nonterminals[production.symbol].follows);
// At the end of that iteration, try adding all the new follows to the backing followsSets,
// and culling nextFollows to only the ones which are actually new.
(function rewrite_amortized_follows () {
for (const [ntSym, newFollows] of nextFollows) {
const ntFollows = followsSets.get(ntSym);
const prevFollowSet = prevFollows.get(ntSym);
prevFollowSet.splice(0);
for (const f of newFollows.splice(0)) {
if (!ntFollows.has(f)) {
cont = true;
ntFollows.add(f);
prevFollowSet.push(f);
}
}
oldcount = nonterminals[t].follows.length;
Set.union(nonterminals[t].follows, set);
if (oldcount !== nonterminals[t].follows.length) {
cont = true;
}
}
});
})();
}

// Write all the calculated fixed-point follows back to the nonterminals for use by the rest of
// the jison compiler.
for (const [ntSym, completedFollows] of followsSets) {
nonterminals[ntSym].follows = Array.from(completedFollows);
}
};

Expand All @@ -472,7 +572,7 @@ lookaheadMixin.first = function first (symbol) {
if (firsts.indexOf(t) === -1)
firsts.push(t);
} else {
Set.union(firsts, this.nonterminals[t].first);
SetLike.union(firsts, this.nonterminals[t].first);
}
if (!this.nullable(t))
break;
Expand Down Expand Up @@ -510,7 +610,7 @@ lookaheadMixin.firstSets = function firstSets () {
for (symbol in nonterminals) {
firsts = [];
nonterminals[symbol].productions.forEach(function (production) {
Set.union(firsts, production.first);
SetLike.union(firsts, production.first);
});
if (firsts.length !== nonterminals[symbol].first.length) {
nonterminals[symbol].first = firsts;
Expand Down Expand Up @@ -637,7 +737,7 @@ lrGeneratorMixin.Item = typal.construct({
}
});

lrGeneratorMixin.ItemSet = Set.prototype.construct({
lrGeneratorMixin.ItemSet = SetLike.prototype.construct({
afterconstructor: function () {
this.reductions = [];
this.goes = {};
Expand Down Expand Up @@ -679,7 +779,7 @@ lrGeneratorMixin.closureOperation = function closureOperation (itemSet /*, closu
itemQueue, syms = {};

do {
itemQueue = new Set();
itemQueue = new SetLike();
closureSet.concat(set);
set.forEach(function CO_set_forEach (item) {
var symbol = item.markedSymbol;
Expand Down Expand Up @@ -730,7 +830,7 @@ lrGeneratorMixin.gotoOperation = function gotoOperation (itemSet, symbol) {
lrGeneratorMixin.canonicalCollection = function canonicalCollection () {
var item1 = new this.Item(this.productions[0], 0, [this.EOF]);
var firstState = this.closureOperation(new this.ItemSet(item1)),
states = new Set(firstState),
states = new SetLike(firstState),
marked = 0,
self = this,
itemSet;
Expand Down Expand Up @@ -1821,7 +1921,7 @@ var lr1 = lrLookaheadGenerator.beget({
itemQueue, syms = {};

do {
itemQueue = new Set();
itemQueue = new SetLike();
closureSet.concat(set);
set.forEach(function (item) {
var symbol = item.markedSymbol;
Expand Down Expand Up @@ -1872,7 +1972,7 @@ var ll = generator.beget(lookaheadMixin, {
var row = table[production.symbol] || {};
var tokens = production.first;
if (self.nullable(production.handle)) {
Set.union(tokens, self.nonterminals[production.symbol].follows);
SetLike.union(tokens, self.nonterminals[production.symbol].follows);
}
tokens.forEach(function (token) {
if (row[token]) {
Expand Down