From 2a572319be6f9a421751308f3620a682aadd0ac7 Mon Sep 17 00:00:00 2001 From: Tomasz Godzik Date: Wed, 4 Oct 2023 10:53:56 +0200 Subject: [PATCH] bugfix: Inline CharArrayReader which changed between version This is an internal part of scalameta, which is not kept binary compatible, so it's safer to inline it. --- .../internal/pc/completions/Completions.scala | 8 +- .../metals/docstrings/ScaladocUtils.scala | 4 +- .../meta/internal/mtags/CharArrayReader.scala | 158 ++++++++++++++++++ .../internal/mtags/JavaToplevelMtags.scala | 1 - 4 files changed, 166 insertions(+), 5 deletions(-) create mode 100644 mtags/src/main/scala/scala/meta/internal/mtags/CharArrayReader.scala diff --git a/mtags/src/main/scala-2/scala/meta/internal/pc/completions/Completions.scala b/mtags/src/main/scala-2/scala/meta/internal/pc/completions/Completions.scala index ca6dd82739c..ee50e48f0c0 100644 --- a/mtags/src/main/scala-2/scala/meta/internal/pc/completions/Completions.scala +++ b/mtags/src/main/scala-2/scala/meta/internal/pc/completions/Completions.scala @@ -876,7 +876,7 @@ trait Completions { this: MetalsGlobal => def inferStart( pos: Position, text: String, - charPred: Int => Boolean + charPred: Char => Boolean ): Int = { def fallback: Int = { var i = pos.point - 1 @@ -903,11 +903,15 @@ trait Completions { this: MetalsGlobal => loop(lastVisitedParentTrees) } + /** Can character form part of an alphanumeric Scala identifier? */ + private def isIdentifierPart(c: Char) = + (c == '$') || Character.isUnicodeIdentifierPart(c) + /** * Returns the start offset of the identifier starting as the given offset position. */ def inferIdentStart(pos: Position, text: String): Int = - inferStart(pos, text, Chars.isIdentifierPart) + inferStart(pos, text, isIdentifierPart) /** * Returns the end offset of the identifier starting as the given offset position. diff --git a/mtags/src/main/scala/scala/meta/internal/metals/docstrings/ScaladocUtils.scala b/mtags/src/main/scala/scala/meta/internal/metals/docstrings/ScaladocUtils.scala index 8ae2fd090b1..34e83adb64b 100644 --- a/mtags/src/main/scala/scala/meta/internal/metals/docstrings/ScaladocUtils.scala +++ b/mtags/src/main/scala/scala/meta/internal/metals/docstrings/ScaladocUtils.scala @@ -8,7 +8,7 @@ import scala.meta.internal.tokenizers.Chars._ object ScaladocUtils { /** Is character a whitespace character (but not a new line)? */ - def isWhitespace(c: Char) = + def isWhitespace(c: Char): Boolean = c == ' ' || c == '\t' || c == CR /** @@ -199,7 +199,7 @@ object ScaladocUtils { str /** Can character form part of a doc comment variable xxx? */ - def isVarPart(c: Char) = + def isVarPart(c: Char): Boolean = '0' <= c && c <= '9' || 'A' <= c && c <= 'Z' || 'a' <= c && c <= 'z' /** diff --git a/mtags/src/main/scala/scala/meta/internal/mtags/CharArrayReader.scala b/mtags/src/main/scala/scala/meta/internal/mtags/CharArrayReader.scala new file mode 100644 index 00000000000..b3955d0aaf8 --- /dev/null +++ b/mtags/src/main/scala/scala/meta/internal/mtags/CharArrayReader.scala @@ -0,0 +1,158 @@ +package scala.meta.internal.mtags + +import scala.meta.Dialect +import scala.meta.inputs._ +import scala.meta.internal.tokenizers.Chars._ +import scala.meta.internal.tokenizers.Reporter + +private[mtags] case class CharArrayReader private ( + buf: Array[Char], + dialect: Dialect, + reporter: Reporter, + /** the last read character */ + var ch: Int = SU, + /** The offset one past the last read character */ + var begCharOffset: Int = -1, // included + var endCharOffset: Int = 0, // excluded + /** The start offset of the current line */ + var lineStartOffset: Int = 0, + /** The start offset of the line before the current one */ + private var lastLineStartOffset: Int = 0 +) { + + def this(input: Input, dialect: Dialect, reporter: Reporter) = + this(buf = input.chars, dialect = dialect, reporter = reporter) + + import reporter._ + + /** Advance one character; reducing CR;LF pairs to just LF */ + final def nextChar(): Unit = { + nextRawChar() + if (ch < ' ') { + skipCR() + potentialLineEnd() + } + if (ch == '"' && !dialect.allowMultilinePrograms) { + readerError( + "double quotes are not allowed in single-line quasiquotes", + at = begCharOffset + ) + } + } + + final def nextCommentChar(): Unit = { + if (endCharOffset >= buf.length) { + ch = SU + } else { + ch = buf(endCharOffset) + begCharOffset = endCharOffset + endCharOffset += 1 + checkLineEnd() + } + } + + /** + * Advance one character, leaving CR;LF pairs intact. This is for use in multi-line strings, so + * there are no "potential line ends" here. + */ + final def nextRawChar(): Unit = { + if (endCharOffset >= buf.length) { + ch = SU + } else { + begCharOffset = endCharOffset + val (hi, hiEnd) = readUnicodeChar(endCharOffset) + if (!Character.isHighSurrogate(hi)) { + ch = hi + endCharOffset = hiEnd + } else if (hiEnd >= buf.length) + readerError("invalid unicode surrogate pair", at = begCharOffset) + else { + val (lo, loEnd) = readUnicodeChar(hiEnd) + if (!Character.isLowSurrogate(lo)) + readerError("invalid unicode surrogate pair", at = begCharOffset) + else { + ch = Character.toCodePoint(hi, lo) + endCharOffset = loEnd + } + } + } + } + + def nextNonWhitespace: Int = { + while (ch == ' ' || ch == '\t') nextRawChar() + ch + } + + /** Read next char interpreting \\uxxxx escapes; doesn't mutate internal state */ + private def readUnicodeChar(offset: Int): (Char, Int) = { + val c = buf(offset) + val firstOffset = offset + 1 // offset after a single character + + def evenSlashPrefix: Boolean = { + var p = firstOffset - 2 + while (p >= 0 && buf(p) == '\\') p -= 1 + (firstOffset - p) % 2 == 0 + } + + if ( + c != '\\' || firstOffset >= buf.length || buf( + firstOffset + ) != 'u' || !evenSlashPrefix + ) + return (c, firstOffset) + + var escapedOffset = firstOffset // offset after an escaped character + escapedOffset += 1 + while (escapedOffset < buf.length && buf(escapedOffset) == 'u') + escapedOffset += 1 + + // need 4 digits + if (escapedOffset + 3 >= buf.length) + return (c, firstOffset) + + def udigit: Int = + try digit2int(buf(escapedOffset), 16) + finally escapedOffset += 1 + + val code = udigit << 12 | udigit << 8 | udigit << 4 | udigit + (code.toChar, escapedOffset) + } + + /** replace CR;LF by LF */ + private def skipCR() = + if (ch == CR && endCharOffset < buf.length && buf(endCharOffset) == '\\') { + val (c, nextOffset) = readUnicodeChar(endCharOffset) + if (c == LF) { + ch = LF + endCharOffset = nextOffset + } + } + + /** Handle line ends */ + private def potentialLineEnd(): Unit = { + if (checkLineEnd() && !dialect.allowMultilinePrograms) { + readerError( + "line breaks are not allowed in single-line quasiquotes", + at = begCharOffset + ) + } + } + + private def checkLineEnd(): Boolean = { + val ok = ch == LF || ch == FF + if (ok) { + lastLineStartOffset = lineStartOffset + lineStartOffset = endCharOffset + } + ok + } + + /** A new reader that takes off at the current character position */ + def lookaheadReader: CharArrayReader = copy() + + /** A mystery why CharArrayReader.nextChar() returns Unit */ + def getc(): Int = { nextChar(); ch } + + final def wasMultiChar: Boolean = begCharOffset < endCharOffset - 1 + +} diff --git a/mtags/src/main/scala/scala/meta/internal/mtags/JavaToplevelMtags.scala b/mtags/src/main/scala/scala/meta/internal/mtags/JavaToplevelMtags.scala index 1d190bf9d58..1c1043af043 100644 --- a/mtags/src/main/scala/scala/meta/internal/mtags/JavaToplevelMtags.scala +++ b/mtags/src/main/scala/scala/meta/internal/mtags/JavaToplevelMtags.scala @@ -7,7 +7,6 @@ import scala.meta.inputs.Input import scala.meta.inputs.Position import scala.meta.internal.semanticdb.Language import scala.meta.internal.semanticdb.SymbolInformation -import scala.meta.internal.tokenizers.CharArrayReader import scala.meta.internal.tokenizers.Chars._ import scala.meta.internal.tokenizers.Reporter