bugfix: Inline CharArrayReader which changed between version

This is an internal part of scalameta, which is not kept binary compatible, so it's safer to inline it.
scalameta · Oct 9, 2023 · 2a57231 · 2a57231
1 parent d1b21f7
commit 2a57231
Show file tree

Hide file tree

Showing 4 changed files with 166 additions and 5 deletions.
diff --git a/mtags/src/main/scala-2/scala/meta/internal/pc/completions/Completions.scala b/mtags/src/main/scala-2/scala/meta/internal/pc/completions/Completions.scala
@@ -876,7 +876,7 @@ trait Completions { this: MetalsGlobal =>
   def inferStart(
       pos: Position,
       text: String,
-      charPred: Int => Boolean
+      charPred: Char => Boolean
   ): Int = {
     def fallback: Int = {
       var i = pos.point - 1
@@ -903,11 +903,15 @@ trait Completions { this: MetalsGlobal =>
     loop(lastVisitedParentTrees)
   }
 
+  /** Can character form part of an alphanumeric Scala identifier? */
+  private def isIdentifierPart(c: Char) =
+    (c == '$') || Character.isUnicodeIdentifierPart(c)
+
   /**
    * Returns the start offset of the identifier starting as the given offset position.
    */
   def inferIdentStart(pos: Position, text: String): Int =
-    inferStart(pos, text, Chars.isIdentifierPart)
+    inferStart(pos, text, isIdentifierPart)
 
   /**
    * Returns the end offset of the identifier starting as the given offset position.

diff --git a/mtags/src/main/scala/scala/meta/internal/metals/docstrings/ScaladocUtils.scala b/mtags/src/main/scala/scala/meta/internal/metals/docstrings/ScaladocUtils.scala
@@ -8,7 +8,7 @@ import scala.meta.internal.tokenizers.Chars._
 object ScaladocUtils {
 
   /** Is character a whitespace character (but not a new line)? */
-  def isWhitespace(c: Char) =
+  def isWhitespace(c: Char): Boolean =
     c == ' ' || c == '\t' || c == CR
 
   /**
@@ -199,7 +199,7 @@ object ScaladocUtils {
       str
 
   /** Can character form part of a doc comment variable xxx? */
-  def isVarPart(c: Char) =
+  def isVarPart(c: Char): Boolean =
     '0' <= c && c <= '9' || 'A' <= c && c <= 'Z' || 'a' <= c && c <= 'z'
 
   /**

diff --git a/mtags/src/main/scala/scala/meta/internal/mtags/CharArrayReader.scala b/mtags/src/main/scala/scala/meta/internal/mtags/CharArrayReader.scala
@@ -0,0 +1,158 @@
+package scala.meta.internal.mtags
+
+import scala.meta.Dialect
+import scala.meta.inputs._
+import scala.meta.internal.tokenizers.Chars._
+import scala.meta.internal.tokenizers.Reporter
+
+private[mtags] case class CharArrayReader private (
+    buf: Array[Char],
+    dialect: Dialect,
+    reporter: Reporter,
+    /** the last read character */
+    var ch: Int = SU,
+    /** The offset one past the last read character */
+    var begCharOffset: Int = -1, // included
+    var endCharOffset: Int = 0, // excluded
+    /** The start offset of the current line */
+    var lineStartOffset: Int = 0,
+    /** The start offset of the line before the current one */
+    private var lastLineStartOffset: Int = 0
+) {
+
+  def this(input: Input, dialect: Dialect, reporter: Reporter) =
+    this(buf = input.chars, dialect = dialect, reporter = reporter)
+
+  import reporter._
+
+  /** Advance one character; reducing CR;LF pairs to just LF */
+  final def nextChar(): Unit = {
+    nextRawChar()
+    if (ch < ' ') {
+      skipCR()
+      potentialLineEnd()
+    }
+    if (ch == '"' && !dialect.allowMultilinePrograms) {
+      readerError(
+        "double quotes are not allowed in single-line quasiquotes",
+        at = begCharOffset
+      )
+    }
+  }
+
+  final def nextCommentChar(): Unit = {
+    if (endCharOffset >= buf.length) {
+      ch = SU
+    } else {
+      ch = buf(endCharOffset)
+      begCharOffset = endCharOffset
+      endCharOffset += 1
+      checkLineEnd()
+    }
+  }
+
+  /**
+   * Advance one character, leaving CR;LF pairs intact. This is for use in multi-line strings, so
+   * there are no "potential line ends" here.
+   */
+  final def nextRawChar(): Unit = {
+    if (endCharOffset >= buf.length) {
+      ch = SU
+    } else {
+      begCharOffset = endCharOffset
+      val (hi, hiEnd) = readUnicodeChar(endCharOffset)
+      if (!Character.isHighSurrogate(hi)) {
+        ch = hi
+        endCharOffset = hiEnd
+      } else if (hiEnd >= buf.length)
+        readerError("invalid unicode surrogate pair", at = begCharOffset)
+      else {
+        val (lo, loEnd) = readUnicodeChar(hiEnd)
+        if (!Character.isLowSurrogate(lo))
+          readerError("invalid unicode surrogate pair", at = begCharOffset)
+        else {
+          ch = Character.toCodePoint(hi, lo)
+          endCharOffset = loEnd
+        }
+      }
+    }
+  }
+
+  def nextNonWhitespace: Int = {
+    while (ch == ' ' || ch == '\t') nextRawChar()
+    ch
+  }
+
+  /** Read next char interpreting \\uxxxx escapes; doesn't mutate internal state */
+  private def readUnicodeChar(offset: Int): (Char, Int) = {
+    val c = buf(offset)
+    val firstOffset = offset + 1 // offset after a single character
+
+    def evenSlashPrefix: Boolean = {
+      var p = firstOffset - 2
+      while (p >= 0 && buf(p) == '\\') p -= 1
+      (firstOffset - p) % 2 == 0
+    }
+
+    if (
+      c != '\\' || firstOffset >= buf.length || buf(
+        firstOffset
+      ) != 'u' || !evenSlashPrefix
+    )
+      return (c, firstOffset)
+
+    var escapedOffset = firstOffset // offset after an escaped character
+    escapedOffset += 1
+    while (escapedOffset < buf.length && buf(escapedOffset) == 'u')
+      escapedOffset += 1
+
+    // need 4 digits
+    if (escapedOffset + 3 >= buf.length)
+      return (c, firstOffset)
+
+    def udigit: Int =
+      try digit2int(buf(escapedOffset), 16)
+      finally escapedOffset += 1
+
+    val code = udigit << 12 | udigit << 8 | udigit << 4 | udigit
+    (code.toChar, escapedOffset)
+  }
+
+  /** replace CR;LF by LF */
+  private def skipCR() =
+    if (ch == CR && endCharOffset < buf.length && buf(endCharOffset) == '\\') {
+      val (c, nextOffset) = readUnicodeChar(endCharOffset)
+      if (c == LF) {
+        ch = LF
+        endCharOffset = nextOffset
+      }
+    }
+
+  /** Handle line ends */
+  private def potentialLineEnd(): Unit = {
+    if (checkLineEnd() && !dialect.allowMultilinePrograms) {
+      readerError(
+        "line breaks are not allowed in single-line quasiquotes",
+        at = begCharOffset
+      )
+    }
+  }
+
+  private def checkLineEnd(): Boolean = {
+    val ok = ch == LF || ch == FF
+    if (ok) {
+      lastLineStartOffset = lineStartOffset
+      lineStartOffset = endCharOffset
+    }
+    ok
+  }
+
+  /** A new reader that takes off at the current character position */
+  def lookaheadReader: CharArrayReader = copy()
+
+  /** A mystery why CharArrayReader.nextChar() returns Unit */
+  def getc(): Int = { nextChar(); ch }
+
+  final def wasMultiChar: Boolean = begCharOffset < endCharOffset - 1
+
+}
diff --git a/mtags/src/main/scala/scala/meta/internal/mtags/JavaToplevelMtags.scala b/mtags/src/main/scala/scala/meta/internal/mtags/JavaToplevelMtags.scala
@@ -7,7 +7,6 @@ import scala.meta.inputs.Input
 import scala.meta.inputs.Position
 import scala.meta.internal.semanticdb.Language
 import scala.meta.internal.semanticdb.SymbolInformation
-import scala.meta.internal.tokenizers.CharArrayReader
 import scala.meta.internal.tokenizers.Chars._
 import scala.meta.internal.tokenizers.Reporter