From 05291eb045188aba024084705d95f4e2664f3078 Mon Sep 17 00:00:00 2001
From: Danny McClanahan <1305167+cosmicexplorer@users.noreply.github.com>
Date: Sun, 10 Nov 2024 20:02:15 -0500
Subject: [PATCH] wrap literal tokens more hygienically

---
 lib/coffeescript/grammar.js  |   2 +-
 lib/coffeescript/parser.js   |   2 +-
 lib/coffeescript/rewriter.js | 122 ++++++++++++++++---
 semantics.coffee             | 221 +++++++++++++++++++++++++++++++----
 src/grammar.coffee           |   2 +-
 src/rewriter.coffee          |  60 +++++++++-
 6 files changed, 367 insertions(+), 42 deletions(-)

diff --git a/lib/coffeescript/grammar.js b/lib/coffeescript/grammar.js
index 85c3fd906b..377215c167 100644
--- a/lib/coffeescript/grammar.js
+++ b/lib/coffeescript/grammar.js
@@ -227,7 +227,7 @@
     String: [
       o('STRING',
       function() {
-        return new StringLiteral($1.slice(1,
+        return new StringLiteral($1.toString().slice(1,
       -1), // strip artificial quotes and unwrap to primitive string
       {
           quote: $1.quote,
diff --git a/lib/coffeescript/parser.js b/lib/coffeescript/parser.js
index c13a778196..d65fa28cee 100644
--- a/lib/coffeescript/parser.js
+++ b/lib/coffeescript/parser.js
@@ -155,7 +155,7 @@ this.$ = yy.addDataToNode(yy, _$[$0], $$[$0], _$[$0], $$[$0], true)(new yy.Numbe
         }));
 break;
 case 42:
-this.$ = yy.addDataToNode(yy, _$[$0], $$[$0], _$[$0], $$[$0], true)(new yy.StringLiteral($$[$0].slice(1,
+this.$ = yy.addDataToNode(yy, _$[$0], $$[$0], _$[$0], $$[$0], true)(new yy.StringLiteral($$[$0].toString().slice(1,
       -1), // strip artificial quotes and unwrap to primitive string
       {
           quote: $$[$0].quote,
diff --git a/lib/coffeescript/rewriter.js b/lib/coffeescript/rewriter.js
index c38cfd0873..456592a06f 100644
--- a/lib/coffeescript/rewriter.js
+++ b/lib/coffeescript/rewriter.js
@@ -6,9 +6,9 @@
   // a series of passes over the token stream, using this **Rewriter** to convert
   // shorthand into the unambiguous long form, add implicit indentation and
   // parentheses, and generally clean things up.
-  var BALANCED_PAIRS, CALL_CLOSERS, CONTROL_IN_IMPLICIT, DISCARDED, EXPRESSION_CLOSE, EXPRESSION_END, EXPRESSION_START, IMPLICIT_CALL, IMPLICIT_END, IMPLICIT_FUNC, IMPLICIT_UNSPACED_CALL, INVERSES, LINEBREAKS, Rewriter, SINGLE_CLOSERS, SINGLE_LINERS, UNFINISHED, extractAllCommentTokens, generate, k, left, len, moveComments, right, throwSyntaxError,
-    indexOf = [].indexOf,
-    hasProp = {}.hasOwnProperty;
+  var BALANCED_PAIRS, CALL_CLOSERS, CONTROL_IN_IMPLICIT, DISCARDED, EXPRESSION_CLOSE, EXPRESSION_END, EXPRESSION_START, IMPLICIT_CALL, IMPLICIT_END, IMPLICIT_FUNC, IMPLICIT_UNSPACED_CALL, INVERSES, LINEBREAKS, Rewriter, SINGLE_CLOSERS, SINGLE_LINERS, UNFINISHED, WrappedTokenData, extractAllCommentTokens, generate, k, left, len, moveComments, right, throwSyntaxError,
+    hasProp = {}.hasOwnProperty,
+    indexOf = [].indexOf;
 
   ({throwSyntaxError, extractAllCommentTokens} = require('./helpers'));
 
@@ -51,6 +51,110 @@
     return token;
   };
 
+  WrappedTokenData = (function() {
+    class WrappedTokenData {
+      static defineInnerProp(obj, sym, val, propOpts = {}) {
+        return Object.defineProperty(obj, sym, {
+          configurable: false,
+          enumerable: false,
+          writable: true,
+          ...propOpts,
+          value: val
+        });
+      }
+
+      constructor(value, {data, generated} = {}) {
+        if (value == null) {
+          throw new TypeError(`value must be truthy: ${value}`);
+        }
+        if (data != null) {
+          if (typeof data !== 'object') {
+            throw new TypeError(`data must be a dict: ${data}`);
+          }
+        }
+        if (generated != null) {
+          if (typeof generated !== 'boolean') {
+            throw new TypeError(`generated must be a bool: ${generated}`);
+          }
+        }
+        this.constructor.defineInnerProp(this, this.constructor.val, value);
+        this.constructor.defineInnerProp(this, this.constructor.data, data);
+        this.constructor.defineInnerProp(this, this.constructor.generated, generated);
+      }
+
+      innerVal() {
+        return this[this.constructor.val];
+      }
+
+      innerData() {
+        return this[this.constructor.data];
+      }
+
+      isGenerated() {
+        var ref;
+        return (ref = this[this.constructor.generated]) != null ? ref : false;
+      }
+
+      toString() {
+        return this.valueOf();
+      }
+
+      valueOf() {
+        return this.innerVal();
+      }
+
+      static withData({data, generated}) {
+        return (value) => {
+          var key, ret, val;
+          ret = new WrappedTokenData(value, {data, generated});
+          if (data != null) {
+            for (key in data) {
+              if (!hasProp.call(data, key)) continue;
+              val = data[key];
+              WrappedTokenData.defineInnerProp(ret, key, val, {
+                enumerable: true
+              });
+            }
+          }
+          if (generated) {
+            WrappedTokenData.defineInnerProp(ret, 'generated', generated, {
+              enumerable: true
+            });
+          }
+          return ret;
+        };
+      }
+
+    };
+
+    WrappedTokenData.val = Symbol('val');
+
+    WrappedTokenData.data = Symbol('data');
+
+    WrappedTokenData.generated = Symbol('generated');
+
+    Object.defineProperty(WrappedTokenData.prototype, Symbol.toStringTag, {
+      get: function() {
+        return this.innerVal();
+      }
+    });
+
+    WrappedTokenData.prototype[Symbol.toPrimitive] = function(hint) {
+      switch (hint) {
+        case 'string':
+          return this.toString();
+        case 'number':
+          return +this.valueOf();
+        default:
+          assert.equal(hint, 'default', hint);
+          return this.toString();
+      }
+    };
+
+    return WrappedTokenData;
+
+  }).call(this);
+
   // The **Rewriter** class is used by the [Lexer](lexer.html), directly against
   // its internal array of tokens.
   exports.Rewriter = Rewriter = (function() {
@@ -1029,18 +1133,8 @@
       // primitive string and separately passing any expected token data properties
       exposeTokenDataToGrammar() {
         return this.scanTokens(function(token, i) {
-          var key, ref, ref1, val;
           if (token.generated || (token.data && Object.keys(token.data).length !== 0)) {
-            token[1] = new String(token[1]);
-            ref1 = (ref = token.data) != null ? ref : {};
-            for (key in ref1) {
-              if (!hasProp.call(ref1, key)) continue;
-              val = ref1[key];
-              token[1][key] = val;
-            }
-            if (token.generated) {
-              token[1].generated = true;
-            }
+            token[1] = (WrappedTokenData.withData(token))(token[1]);
           }
           return 1;
         });
diff --git a/semantics.coffee b/semantics.coffee
index 619b5104a7..ee4ced230e 100644
--- a/semantics.coffee
+++ b/semantics.coffee
@@ -1,33 +1,210 @@
 assert = require 'assert'
 process = require 'process'
+
 coffee = require './lib/coffeescript/index.js'
 
-[input, ...] = process.argv[2..]
+###
+# Token types for debugging
+# see tag:/makeToken:/token:/makeLocationData: methods in lexer.coffee!
+
+# each token is an array:
+[tag, value, loc] = token
+
+# ...but also has some optional data:
+{
+  generated?: bool = no,
+  indentSize?: number,
+  origin?: <token>,
+  # NB: also added to `value` with Object.assign: see addTokenData
+  data?: {} = {}, # arbitrary data
+}
+###
+class InternTable
+  @defaultProperty: Symbol 'global-table'
+
+  constructor: (@generatedProperty = @constructor.defaultProperty, @table = new Map) ->
+    assert.equal 'symbol', typeof @generatedProperty
+    assert.ok @table instanceof Map
+
+  @idCounter = 0
+  gensym: (value) -> Symbol "key(#{@constructor.idCounter++})|#{@keyFormat value}"
+
+  keyFormat: (value) -> value.toString()
+
+  internNew: (value) ->
+    sym = @gensym value
+
+    backref = {sym, table: @}
+    wrapped = Object.defineProperty value, @generatedProperty,
+      configurable: no
+      enumerable: no
+      writable: no
+      value: backref
+
+    @table.set sym, wrapped
+    wrapped
+
+  getInternKey: (obj) ->
+    throw new TypeError "object #{obj} not wrapped!" unless Object.hasOwn obj, @generatedProperty
+    obj[@generatedProperty]
+
+  dereference: (sym) ->
+    throw new TypeError "symbol #{sym} not interned in table #{@}" unless @table.has sym
+    @table.get sym
+
+
+class TokenTable extends InternTable
+  constructor: (table) ->
+    prop = Symbol 'token-table'
+    super prop, table
+
+  keyFormat: ([type, value, ...]) ->
+    if type is value then value
+    else "#{type}=#{value}"
+
+tokenTable = new TokenTable
+
+rawTokens = (input) -> tokenTable.internNew token for token in coffee.tokens input
+
+ID_MAPPING_FIELDS = ['origin']
+RECOGNIZED_TOKEN_FIELDS = [
+  'data'
+  'generated'
+  'indentSize'
+  'spaced'
+  'newLine'
+  ...ID_MAPPING_FIELDS
+]
+extractTokenMetadata = do (tokenTable) -> (token) ->
+  ret = null
+  for fieldName in RECOGNIZED_TOKEN_FIELDS
+    continue unless Object.hasOwn token, fieldName
+    ret ?= {}
+    metadataFieldValue = token[fieldName]
+    value = switch  # TODO: could be `switch fieldName ...`!
+      when fieldName in ID_MAPPING_FIELDS
+        {sym: originSym, table} = tokenTable.getInternKey metadataFieldValue
+        assert Object.is table, tokenTable
+        originSym
+      else
+        metadataFieldValue
+    ret[fieldName] = value
+  ret
+
+
+locField = Symbol 'location-data-field'
+normalizeTokens = do (locField, tokenTable) -> (input) -> for curToken in rawTokens input
+  [tag, value, loc] = curToken
 
+  {sym, table} = tokenTable.getInternKey curToken
+  assert Object.is table, tokenTable
 
-rawTokens = (input) -> {type, value} for [type, value] in coffee.tokens input
+  ret = {tag}
 
-encodeTokens = (input) -> for {type, value} in rawTokens input
-  value = value.toString()
-  if type.match(/^[A-Z_]+$/) or type is 'BIN?'
-    if value.match /^\s+$/
-      {whitespace: type, value: encodeURIComponent value}
+  if (metadata = extractTokenMetadata curToken)?
+    ret.meta = metadata
+
+  ret.value = switch typeof value
+    when 'number'
+      # This is an indent token.
+      assert.ok tag in ['INDENT', 'OUTDENT'], {tag}
+      {indentValue: value}
+    when 'object'
+      switch tag
+        when 'INDENT', 'OUTDENT'
+          assert.ok metadata?.generated, {tag}
+          {indentValue: value.innerVal()}
+        else value
+    when 'string'
+      # This corresponds to exactly the string from the input.
+      value
+    else throw new TypeError "unrecognized token value type: '#{typeof value}' for '#{value}'"
+
+  ret = Object.defineProperty ret, locField,
+    configurable: no
+    enumerable: no
+    writable: yes
+    value: loc
+
+  ret
+
+indentTypes =
+  INDENT: 'in'
+  OUTDENT: 'out'
+
+metaField = Symbol 'meta-data-field'
+encodeTokens = do (metaField, indentTypes) -> (input) -> for curToken in normalizeTokens input
+  {tag, value, meta} = curToken
+
+  encoded = switch  # TODO: could be `switch tag ...`!
+    when Object.hasOwn indentTypes, tag
+      indentType = indentTypes[tag]
+
+      {indentValue} = value
+      assert.equal 'number', typeof indentValue, {value}
+
+      ret =
+        dent: indentType
+        width: indentValue
+
+      {indentSize} = meta ? {}
+      if indentSize?
+        ret.start = indentSize
+
+      ret
+    when tag.match /^[A-Z_]+\??$/
+      switch
+        when tag is 'TERMINATOR'
+          assert.equal value.toString(), '\n', {value}
+          {terminator: value}
+        when value.toString().match /^\s+$/
+          {whitespace: tag, value: encodeURIComponent value}
+        else
+          {tag, value}
     else
-      {type, value}
-  else
-    assert (type is value), JSON.stringify {type, value}
-    {punct: value}
+      assert.equal tag, value, JSON.stringify {tag, value}
+      {punct: value}
+
+  if meta?
+    encoded = Object.defineProperty encoded, metaField,
+      configurable: no
+      enumerable: no
+      writable: yes
+      value: meta
+
+  {generated} = meta ? {}
+  if generated?
+    encoded.generated = generated
 
-tokenPrint = (input) -> for {punct, whitespace, type, value} in encodeTokens input
-  if punct?
+  encoded
+
+tokenPrint = (input) -> for encodedToken in encodeTokens input
+  {punct, whitespace, tag, value, dent, start, width, terminator} = encodedToken
+  if terminator?
+    assert.equal terminator, '\n'
+    'TERMINATOR'
+  else if punct?
     ": #{punct}"
   else if whitespace?
     "#{whitespace}('#{value}')"
+  else if dent?
+    prefix = if start? then "@#{start}" else ''
+    "dent(#{dent}, #{width})#{prefix}"
+  else if tag is 'PARAM_START'
+    assert.equal value, '('
+    '@params: (...'
+  else if tag is 'PARAM_END'
+    assert.equal value, ')'
+    '@params: ...)'
   else
-    "#{type}(#{value})"
+    "#{tag}(#{value})"
 
 nodes = (input) -> coffee.nodes input
-# .body.expressions[0].params[0].name.properties[0]
+
+DEFAULT_PATH = '.body.expressions'
+indexByExpr = (pathExpr) ->
+  pathExpr ?= DEFAULT_PATH
+  (value) -> eval "value#{pathExpr}"
 
 compiled = (input) -> coffee.compile input, bare: yes
 
@@ -35,21 +212,21 @@ evaled = (input) -> coffee.eval input
 
 
 {TOK, AST, AST_PATH, COMP, EV} = process.env
-output = if TOK?
+
+output = (input) -> if TOK?
   switch TOK
     when 'raw' then rawTokens input
+    when 'norm' then normalizeTokens input
     when 'enc' then encodeTokens input
     else (tokenPrint input).join '\n'
 else if AST?
-  ret = nodes input
-  if AST_PATH?
-    eval "ret#{AST_PATH}"
-  else
-    ret
+  (indexByExpr (AST_PATH ? null)) nodes input
 else if COMP?
   compiled input
 else if EV?
   evaled input
 else throw new Error('wtf')
 
-console.log output
+[input, ...] = process.argv[2..]
+
+console.log output(input)
diff --git a/src/grammar.coffee b/src/grammar.coffee
index 03ee092ee1..febc39e947 100644
--- a/src/grammar.coffee
+++ b/src/grammar.coffee
@@ -203,7 +203,7 @@ grammar =
   String: [
     o 'STRING', ->
       new StringLiteral(
-        $1.slice 1, -1 # strip artificial quotes and unwrap to primitive string
+        $1.toString().slice 1, -1 # strip artificial quotes and unwrap to primitive string
         quote:        $1.quote
         initialChunk: $1.initialChunk
         finalChunk:   $1.finalChunk
diff --git a/src/rewriter.coffee b/src/rewriter.coffee
index e8fbcbca7b..2d682ef27c 100644
--- a/src/rewriter.coffee
+++ b/src/rewriter.coffee
@@ -31,6 +31,62 @@ generate = (tag, value, origin, commentsToken) ->
   moveComments commentsToken, token if commentsToken
   token
 
+
+class WrappedTokenData
+  @val: Symbol 'val'
+  @data: Symbol 'data'
+  @generated: Symbol 'generated'
+
+  @defineInnerProp: (obj, sym, val, propOpts = {}) => Object.defineProperty obj, sym, {
+      configurable: no,
+      enumerable: no,
+      writable: yes,
+      ...propOpts,
+      value: val
+    }
+
+  constructor: (value, {data, generated} = {}) ->
+    throw new TypeError "value must be truthy: #{value}" unless value?
+    if data?
+      unless typeof data is 'object'
+        throw new TypeError "data must be a dict: #{data}"
+    if generated?
+      unless typeof generated is 'boolean'
+        throw new TypeError "generated must be a bool: #{generated}"
+
+    @constructor.defineInnerProp @, @constructor.val, value
+    @constructor.defineInnerProp @, @constructor.data, data
+    @constructor.defineInnerProp @, @constructor.generated, generated
+
+  innerVal: -> @[@constructor.val]
+  innerData: -> @[@constructor.data]
+  isGenerated: -> @[@constructor.generated] ? no
+
+  Object.defineProperty @::, Symbol.toStringTag,
+    get: -> @innerVal()
+
+  @::[Symbol.toPrimitive] = (hint) -> switch hint
+    when 'string' then @toString()
+    when 'number' then +@valueOf()
+    else
+      assert.equal hint, 'default', hint
+      @toString()
+
+  toString: -> @valueOf()
+  valueOf: -> @innerVal()
+
+  @withData: ({data, generated}) => (value) =>
+    ret = new @ value, {data, generated}
+    if data?
+      for own key, val of data
+        @defineInnerProp ret, key, val,
+          enumerable: yes
+    if generated
+      @defineInnerProp ret, 'generated', generated,
+        enumerable: yes
+    ret
+
+
 # The **Rewriter** class is used by the [Lexer](lexer.html), directly against
 # its internal array of tokens.
 exports.Rewriter = class Rewriter
@@ -761,9 +817,7 @@ exports.Rewriter = class Rewriter
   exposeTokenDataToGrammar: ->
     @scanTokens (token, i) ->
       if token.generated or (token.data and Object.keys(token.data).length isnt 0)
-        token[1] = new String token[1]
-        token[1][key] = val for own key, val of (token.data ? {})
-        token[1].generated = yes if token.generated
+        token[1] = (WrappedTokenData.withData token) token[1]
       1
 
   # Generate the indentation tokens, based on another token on the same line.