From 05291eb045188aba024084705d95f4e2664f3078 Mon Sep 17 00:00:00 2001 From: Danny McClanahan <1305167+cosmicexplorer@users.noreply.github.com> Date: Sun, 10 Nov 2024 20:02:15 -0500 Subject: [PATCH] wrap literal tokens more hygienically --- lib/coffeescript/grammar.js | 2 +- lib/coffeescript/parser.js | 2 +- lib/coffeescript/rewriter.js | 122 ++++++++++++++++--- semantics.coffee | 221 +++++++++++++++++++++++++++++++---- src/grammar.coffee | 2 +- src/rewriter.coffee | 60 +++++++++- 6 files changed, 367 insertions(+), 42 deletions(-) diff --git a/lib/coffeescript/grammar.js b/lib/coffeescript/grammar.js index 85c3fd906b..377215c167 100644 --- a/lib/coffeescript/grammar.js +++ b/lib/coffeescript/grammar.js @@ -227,7 +227,7 @@ String: [ o('STRING', function() { - return new StringLiteral($1.slice(1, + return new StringLiteral($1.toString().slice(1, -1), // strip artificial quotes and unwrap to primitive string { quote: $1.quote, diff --git a/lib/coffeescript/parser.js b/lib/coffeescript/parser.js index c13a778196..d65fa28cee 100644 --- a/lib/coffeescript/parser.js +++ b/lib/coffeescript/parser.js @@ -155,7 +155,7 @@ this.$ = yy.addDataToNode(yy, _$[$0], $$[$0], _$[$0], $$[$0], true)(new yy.Numbe })); break; case 42: -this.$ = yy.addDataToNode(yy, _$[$0], $$[$0], _$[$0], $$[$0], true)(new yy.StringLiteral($$[$0].slice(1, +this.$ = yy.addDataToNode(yy, _$[$0], $$[$0], _$[$0], $$[$0], true)(new yy.StringLiteral($$[$0].toString().slice(1, -1), // strip artificial quotes and unwrap to primitive string { quote: $$[$0].quote, diff --git a/lib/coffeescript/rewriter.js b/lib/coffeescript/rewriter.js index c38cfd0873..456592a06f 100644 --- a/lib/coffeescript/rewriter.js +++ b/lib/coffeescript/rewriter.js @@ -6,9 +6,9 @@ // a series of passes over the token stream, using this **Rewriter** to convert // shorthand into the unambiguous long form, add implicit indentation and // parentheses, and generally clean things up. - var BALANCED_PAIRS, CALL_CLOSERS, CONTROL_IN_IMPLICIT, DISCARDED, EXPRESSION_CLOSE, EXPRESSION_END, EXPRESSION_START, IMPLICIT_CALL, IMPLICIT_END, IMPLICIT_FUNC, IMPLICIT_UNSPACED_CALL, INVERSES, LINEBREAKS, Rewriter, SINGLE_CLOSERS, SINGLE_LINERS, UNFINISHED, extractAllCommentTokens, generate, k, left, len, moveComments, right, throwSyntaxError, - indexOf = [].indexOf, - hasProp = {}.hasOwnProperty; + var BALANCED_PAIRS, CALL_CLOSERS, CONTROL_IN_IMPLICIT, DISCARDED, EXPRESSION_CLOSE, EXPRESSION_END, EXPRESSION_START, IMPLICIT_CALL, IMPLICIT_END, IMPLICIT_FUNC, IMPLICIT_UNSPACED_CALL, INVERSES, LINEBREAKS, Rewriter, SINGLE_CLOSERS, SINGLE_LINERS, UNFINISHED, WrappedTokenData, extractAllCommentTokens, generate, k, left, len, moveComments, right, throwSyntaxError, + hasProp = {}.hasOwnProperty, + indexOf = [].indexOf; ({throwSyntaxError, extractAllCommentTokens} = require('./helpers')); @@ -51,6 +51,110 @@ return token; }; + WrappedTokenData = (function() { + class WrappedTokenData { + static defineInnerProp(obj, sym, val, propOpts = {}) { + return Object.defineProperty(obj, sym, { + configurable: false, + enumerable: false, + writable: true, + ...propOpts, + value: val + }); + } + + constructor(value, {data, generated} = {}) { + if (value == null) { + throw new TypeError(`value must be truthy: ${value}`); + } + if (data != null) { + if (typeof data !== 'object') { + throw new TypeError(`data must be a dict: ${data}`); + } + } + if (generated != null) { + if (typeof generated !== 'boolean') { + throw new TypeError(`generated must be a bool: ${generated}`); + } + } + this.constructor.defineInnerProp(this, this.constructor.val, value); + this.constructor.defineInnerProp(this, this.constructor.data, data); + this.constructor.defineInnerProp(this, this.constructor.generated, generated); + } + + innerVal() { + return this[this.constructor.val]; + } + + innerData() { + return this[this.constructor.data]; + } + + isGenerated() { + var ref; + return (ref = this[this.constructor.generated]) != null ? ref : false; + } + + toString() { + return this.valueOf(); + } + + valueOf() { + return this.innerVal(); + } + + static withData({data, generated}) { + return (value) => { + var key, ret, val; + ret = new WrappedTokenData(value, {data, generated}); + if (data != null) { + for (key in data) { + if (!hasProp.call(data, key)) continue; + val = data[key]; + WrappedTokenData.defineInnerProp(ret, key, val, { + enumerable: true + }); + } + } + if (generated) { + WrappedTokenData.defineInnerProp(ret, 'generated', generated, { + enumerable: true + }); + } + return ret; + }; + } + + }; + + WrappedTokenData.val = Symbol('val'); + + WrappedTokenData.data = Symbol('data'); + + WrappedTokenData.generated = Symbol('generated'); + + Object.defineProperty(WrappedTokenData.prototype, Symbol.toStringTag, { + get: function() { + return this.innerVal(); + } + }); + + WrappedTokenData.prototype[Symbol.toPrimitive] = function(hint) { + switch (hint) { + case 'string': + return this.toString(); + case 'number': + return +this.valueOf(); + default: + assert.equal(hint, 'default', hint); + return this.toString(); + } + }; + + return WrappedTokenData; + + }).call(this); + // The **Rewriter** class is used by the [Lexer](lexer.html), directly against // its internal array of tokens. exports.Rewriter = Rewriter = (function() { @@ -1029,18 +1133,8 @@ // primitive string and separately passing any expected token data properties exposeTokenDataToGrammar() { return this.scanTokens(function(token, i) { - var key, ref, ref1, val; if (token.generated || (token.data && Object.keys(token.data).length !== 0)) { - token[1] = new String(token[1]); - ref1 = (ref = token.data) != null ? ref : {}; - for (key in ref1) { - if (!hasProp.call(ref1, key)) continue; - val = ref1[key]; - token[1][key] = val; - } - if (token.generated) { - token[1].generated = true; - } + token[1] = (WrappedTokenData.withData(token))(token[1]); } return 1; }); diff --git a/semantics.coffee b/semantics.coffee index 619b5104a7..ee4ced230e 100644 --- a/semantics.coffee +++ b/semantics.coffee @@ -1,33 +1,210 @@ assert = require 'assert' process = require 'process' + coffee = require './lib/coffeescript/index.js' -[input, ...] = process.argv[2..] +### +# Token types for debugging +# see tag:/makeToken:/token:/makeLocationData: methods in lexer.coffee! + +# each token is an array: +[tag, value, loc] = token + +# ...but also has some optional data: +{ + generated?: bool = no, + indentSize?: number, + origin?: , + # NB: also added to `value` with Object.assign: see addTokenData + data?: {} = {}, # arbitrary data +} +### +class InternTable + @defaultProperty: Symbol 'global-table' + + constructor: (@generatedProperty = @constructor.defaultProperty, @table = new Map) -> + assert.equal 'symbol', typeof @generatedProperty + assert.ok @table instanceof Map + + @idCounter = 0 + gensym: (value) -> Symbol "key(#{@constructor.idCounter++})|#{@keyFormat value}" + + keyFormat: (value) -> value.toString() + + internNew: (value) -> + sym = @gensym value + + backref = {sym, table: @} + wrapped = Object.defineProperty value, @generatedProperty, + configurable: no + enumerable: no + writable: no + value: backref + + @table.set sym, wrapped + wrapped + + getInternKey: (obj) -> + throw new TypeError "object #{obj} not wrapped!" unless Object.hasOwn obj, @generatedProperty + obj[@generatedProperty] + + dereference: (sym) -> + throw new TypeError "symbol #{sym} not interned in table #{@}" unless @table.has sym + @table.get sym + + +class TokenTable extends InternTable + constructor: (table) -> + prop = Symbol 'token-table' + super prop, table + + keyFormat: ([type, value, ...]) -> + if type is value then value + else "#{type}=#{value}" + +tokenTable = new TokenTable + +rawTokens = (input) -> tokenTable.internNew token for token in coffee.tokens input + +ID_MAPPING_FIELDS = ['origin'] +RECOGNIZED_TOKEN_FIELDS = [ + 'data' + 'generated' + 'indentSize' + 'spaced' + 'newLine' + ...ID_MAPPING_FIELDS +] +extractTokenMetadata = do (tokenTable) -> (token) -> + ret = null + for fieldName in RECOGNIZED_TOKEN_FIELDS + continue unless Object.hasOwn token, fieldName + ret ?= {} + metadataFieldValue = token[fieldName] + value = switch # TODO: could be `switch fieldName ...`! + when fieldName in ID_MAPPING_FIELDS + {sym: originSym, table} = tokenTable.getInternKey metadataFieldValue + assert Object.is table, tokenTable + originSym + else + metadataFieldValue + ret[fieldName] = value + ret + + +locField = Symbol 'location-data-field' +normalizeTokens = do (locField, tokenTable) -> (input) -> for curToken in rawTokens input + [tag, value, loc] = curToken + {sym, table} = tokenTable.getInternKey curToken + assert Object.is table, tokenTable -rawTokens = (input) -> {type, value} for [type, value] in coffee.tokens input + ret = {tag} -encodeTokens = (input) -> for {type, value} in rawTokens input - value = value.toString() - if type.match(/^[A-Z_]+$/) or type is 'BIN?' - if value.match /^\s+$/ - {whitespace: type, value: encodeURIComponent value} + if (metadata = extractTokenMetadata curToken)? + ret.meta = metadata + + ret.value = switch typeof value + when 'number' + # This is an indent token. + assert.ok tag in ['INDENT', 'OUTDENT'], {tag} + {indentValue: value} + when 'object' + switch tag + when 'INDENT', 'OUTDENT' + assert.ok metadata?.generated, {tag} + {indentValue: value.innerVal()} + else value + when 'string' + # This corresponds to exactly the string from the input. + value + else throw new TypeError "unrecognized token value type: '#{typeof value}' for '#{value}'" + + ret = Object.defineProperty ret, locField, + configurable: no + enumerable: no + writable: yes + value: loc + + ret + +indentTypes = + INDENT: 'in' + OUTDENT: 'out' + +metaField = Symbol 'meta-data-field' +encodeTokens = do (metaField, indentTypes) -> (input) -> for curToken in normalizeTokens input + {tag, value, meta} = curToken + + encoded = switch # TODO: could be `switch tag ...`! + when Object.hasOwn indentTypes, tag + indentType = indentTypes[tag] + + {indentValue} = value + assert.equal 'number', typeof indentValue, {value} + + ret = + dent: indentType + width: indentValue + + {indentSize} = meta ? {} + if indentSize? + ret.start = indentSize + + ret + when tag.match /^[A-Z_]+\??$/ + switch + when tag is 'TERMINATOR' + assert.equal value.toString(), '\n', {value} + {terminator: value} + when value.toString().match /^\s+$/ + {whitespace: tag, value: encodeURIComponent value} + else + {tag, value} else - {type, value} - else - assert (type is value), JSON.stringify {type, value} - {punct: value} + assert.equal tag, value, JSON.stringify {tag, value} + {punct: value} + + if meta? + encoded = Object.defineProperty encoded, metaField, + configurable: no + enumerable: no + writable: yes + value: meta + + {generated} = meta ? {} + if generated? + encoded.generated = generated -tokenPrint = (input) -> for {punct, whitespace, type, value} in encodeTokens input - if punct? + encoded + +tokenPrint = (input) -> for encodedToken in encodeTokens input + {punct, whitespace, tag, value, dent, start, width, terminator} = encodedToken + if terminator? + assert.equal terminator, '\n' + 'TERMINATOR' + else if punct? ": #{punct}" else if whitespace? "#{whitespace}('#{value}')" + else if dent? + prefix = if start? then "@#{start}" else '' + "dent(#{dent}, #{width})#{prefix}" + else if tag is 'PARAM_START' + assert.equal value, '(' + '@params: (...' + else if tag is 'PARAM_END' + assert.equal value, ')' + '@params: ...)' else - "#{type}(#{value})" + "#{tag}(#{value})" nodes = (input) -> coffee.nodes input -# .body.expressions[0].params[0].name.properties[0] + +DEFAULT_PATH = '.body.expressions' +indexByExpr = (pathExpr) -> + pathExpr ?= DEFAULT_PATH + (value) -> eval "value#{pathExpr}" compiled = (input) -> coffee.compile input, bare: yes @@ -35,21 +212,21 @@ evaled = (input) -> coffee.eval input {TOK, AST, AST_PATH, COMP, EV} = process.env -output = if TOK? + +output = (input) -> if TOK? switch TOK when 'raw' then rawTokens input + when 'norm' then normalizeTokens input when 'enc' then encodeTokens input else (tokenPrint input).join '\n' else if AST? - ret = nodes input - if AST_PATH? - eval "ret#{AST_PATH}" - else - ret + (indexByExpr (AST_PATH ? null)) nodes input else if COMP? compiled input else if EV? evaled input else throw new Error('wtf') -console.log output +[input, ...] = process.argv[2..] + +console.log output(input) diff --git a/src/grammar.coffee b/src/grammar.coffee index 03ee092ee1..febc39e947 100644 --- a/src/grammar.coffee +++ b/src/grammar.coffee @@ -203,7 +203,7 @@ grammar = String: [ o 'STRING', -> new StringLiteral( - $1.slice 1, -1 # strip artificial quotes and unwrap to primitive string + $1.toString().slice 1, -1 # strip artificial quotes and unwrap to primitive string quote: $1.quote initialChunk: $1.initialChunk finalChunk: $1.finalChunk diff --git a/src/rewriter.coffee b/src/rewriter.coffee index e8fbcbca7b..2d682ef27c 100644 --- a/src/rewriter.coffee +++ b/src/rewriter.coffee @@ -31,6 +31,62 @@ generate = (tag, value, origin, commentsToken) -> moveComments commentsToken, token if commentsToken token + +class WrappedTokenData + @val: Symbol 'val' + @data: Symbol 'data' + @generated: Symbol 'generated' + + @defineInnerProp: (obj, sym, val, propOpts = {}) => Object.defineProperty obj, sym, { + configurable: no, + enumerable: no, + writable: yes, + ...propOpts, + value: val + } + + constructor: (value, {data, generated} = {}) -> + throw new TypeError "value must be truthy: #{value}" unless value? + if data? + unless typeof data is 'object' + throw new TypeError "data must be a dict: #{data}" + if generated? + unless typeof generated is 'boolean' + throw new TypeError "generated must be a bool: #{generated}" + + @constructor.defineInnerProp @, @constructor.val, value + @constructor.defineInnerProp @, @constructor.data, data + @constructor.defineInnerProp @, @constructor.generated, generated + + innerVal: -> @[@constructor.val] + innerData: -> @[@constructor.data] + isGenerated: -> @[@constructor.generated] ? no + + Object.defineProperty @::, Symbol.toStringTag, + get: -> @innerVal() + + @::[Symbol.toPrimitive] = (hint) -> switch hint + when 'string' then @toString() + when 'number' then +@valueOf() + else + assert.equal hint, 'default', hint + @toString() + + toString: -> @valueOf() + valueOf: -> @innerVal() + + @withData: ({data, generated}) => (value) => + ret = new @ value, {data, generated} + if data? + for own key, val of data + @defineInnerProp ret, key, val, + enumerable: yes + if generated + @defineInnerProp ret, 'generated', generated, + enumerable: yes + ret + + # The **Rewriter** class is used by the [Lexer](lexer.html), directly against # its internal array of tokens. exports.Rewriter = class Rewriter @@ -761,9 +817,7 @@ exports.Rewriter = class Rewriter exposeTokenDataToGrammar: -> @scanTokens (token, i) -> if token.generated or (token.data and Object.keys(token.data).length isnt 0) - token[1] = new String token[1] - token[1][key] = val for own key, val of (token.data ? {}) - token[1].generated = yes if token.generated + token[1] = (WrappedTokenData.withData token) token[1] 1 # Generate the indentation tokens, based on another token on the same line.