From a152f62b00d3bd1f73f7351d47e47d8cf4c704e1 Mon Sep 17 00:00:00 2001 From: Wojciech Mazur Date: Fri, 1 Dec 2023 00:45:18 +0100 Subject: [PATCH] Add Source Code sanitizer to obfuscate to anonimize the source code contained in reports --- .../internal/metals/MetalsLspService.scala | 13 +- .../internal/metals/RemoteReportContext.scala | 109 +++++++---- .../meta/internal/metals/ReportContext.scala | 2 +- .../internal/metals/ReportSanitizer.scala | 3 +- .../internal/metals/SourceCodeSanitizer.scala | 100 ++++++++++ .../metals/SourceCodeTransformer.scala | 180 ++++++++++++++++++ .../pc/ScalaPresentationCompiler.scala | 16 +- .../pc/ScalaPresentationCompiler.scala | 14 +- .../ScalametaSourceCodeTransformer.scala | 81 ++++++++ project/TestGroups.scala | 6 +- .../meta/internal/telemetry/ReportEvent.java | 20 +- .../internal/telemetry/TelemetryService.java | 3 - .../tests/telemetry/RemoteReporterSuite.scala | 15 +- .../scala/tests/telemetry/SampleReports.scala | 3 +- .../telemetry/SourceCodeSanitizerSuite.scala | 161 ++++++++++++++++ 15 files changed, 638 insertions(+), 88 deletions(-) create mode 100644 mtags-shared/src/main/scala/scala/meta/internal/metals/SourceCodeSanitizer.scala create mode 100644 mtags-shared/src/main/scala/scala/meta/internal/metals/SourceCodeTransformer.scala create mode 100644 mtags/src/main/scala/scala/meta/internal/metals/ScalametaSourceCodeTransformer.scala create mode 100644 tests/unit/src/test/scala/tests/telemetry/SourceCodeSanitizerSuite.scala diff --git a/metals/src/main/scala/scala/meta/internal/metals/MetalsLspService.scala b/metals/src/main/scala/scala/meta/internal/metals/MetalsLspService.scala index 6ac29b76bbd..84fe8efe4e5 100644 --- a/metals/src/main/scala/scala/meta/internal/metals/MetalsLspService.scala +++ b/metals/src/main/scala/scala/meta/internal/metals/MetalsLspService.scala @@ -48,7 +48,7 @@ import scala.meta.internal.metals.Messages.IncompatibleBloopVersion import scala.meta.internal.metals.MetalsEnrichments._ import scala.meta.internal.metals.StdReportContext import scala.meta.internal.metals.MirroredReportContext -import scala.meta.internal.metals.RemoteTelemetryReportContext +import scala.meta.internal.metals.RemoteReportContext import scala.meta.internal.metals.ammonite.Ammonite import scala.meta.internal.metals.callHierarchy.CallHierarchyProvider import scala.meta.internal.metals.clients.language.ConfiguredLanguageClient @@ -191,13 +191,16 @@ class MetalsLspService( }, ReportLevel.fromString(MetalsServerConfig.default.loglevel), ) - private val remoteTelemetryReports = new RemoteTelemetryReportContext( - serverEndpoint = serverInputs.initialServerConfig.telemetryServer, - workspace = Some(folder.toNIO), + private val remoteTelemetryReports = new RemoteReportContext( + serverEndpoint = RemoteReportContext.DefaultEndpoint, getReporterContext = makeTelemetryContext, + sanitizers = new RemoteReportContext.Sanitizers( + workspace = Some(folder.toNIO), + sourceCodeTransformer = Some(ScalametaSourceCodeTransformer), + ), logger = { val logger = logging.MetalsLogger.default - RemoteTelemetryReportContext.LoggerAccess( + RemoteReportContext.LoggerAccess( info = logger.info(_), warning = logger.warn(_), error = logger.error(_), diff --git a/mtags-shared/src/main/scala/scala/meta/internal/metals/RemoteReportContext.scala b/mtags-shared/src/main/scala/scala/meta/internal/metals/RemoteReportContext.scala index 1dc1f5cce13..dea99cc8891 100644 --- a/mtags-shared/src/main/scala/scala/meta/internal/metals/RemoteReportContext.scala +++ b/mtags-shared/src/main/scala/scala/meta/internal/metals/RemoteReportContext.scala @@ -12,12 +12,30 @@ import scala.util.Random import java.nio.file.Path import java.io.InputStreamReader import scala.util.Try -import RemoteTelemetryReportContext.LoggerAccess -object RemoteTelemetryReportContext { +import RemoteReportContext.LoggerAccess +import java.util.Optional +object RemoteReportContext { def discoverTelemetryServer = sys.props.getOrElse("metals.telemetry-server", DefaultEndpoint) - final val DefaultEndpoint = - "https://scala3.westeurope.cloudapp.azure.com/telemetry" + final val DefaultEndpoint = "http://localhost:8081" + // "https://scala3.westeurope.cloudapp.azure.com/telemetry" + + case class Sanitizers( + workspaceSanitizer: WorkspaceSanitizer, + sourceCodeSanitizer: Option[SourceCodeSanitizer[_, _]] + ) { + def canSanitizeSources = sourceCodeSanitizer.isDefined + def this( + workspace: Option[Path], + sourceCodeTransformer: Option[SourceCodeTransformer[_, _]] + ) = + this( + workspaceSanitizer = new WorkspaceSanitizer(workspace), + sourceCodeSanitizer = + sourceCodeTransformer.map(new SourceCodeSanitizer(_)) + ) + val all = Seq(workspaceSanitizer) ++ sourceCodeSanitizer + } // Proxy for different logging mechanism java.util.logging in PresentatilnCompiler and scribe in metals case class LoggerAccess( @@ -41,30 +59,33 @@ object RemoteTelemetryReportContext { * @param telemetryServerEndpoint * @param getReporterContext Constructor of reporter context metadata containg informations about user/server configuration of components */ -class RemoteTelemetryReportContext( +class RemoteReportContext( serverEndpoint: String, - workspace: Option[Path], getReporterContext: () => telemetry.ReporterContext, + sanitizers: RemoteReportContext.Sanitizers, logger: LoggerAccess ) extends ReportContext { + + // Don't send reports with fragile user data - sources etc override lazy val unsanitized: Reporter = reporter("unsanitized") - override lazy val incognito: Reporter = reporter("sanitized") + override lazy val incognito: Reporter = reporter("incognito") override lazy val bloop: Reporter = reporter("bloop") - private def reporter(name: String) = new TelemetryReporter( - name = name, - serverEndpoint = serverEndpoint, - workspace = workspace, - getReporterContext = getReporterContext, - logger = logger - ) + private def reporter(name: String) = + new RemoteReporter( + name = name, + serverEndpoint = serverEndpoint, + getReporterContext = getReporterContext, + sanitizers = sanitizers, + logger = logger + ) } -private class TelemetryReporter( +private class RemoteReporter( override val name: String, serverEndpoint: String, - workspace: Option[Path], getReporterContext: () => telemetry.ReporterContext, + sanitizers: RemoteReportContext.Sanitizers, logger: LoggerAccess ) extends Reporter { @@ -73,9 +94,6 @@ private class TelemetryReporter( Nil override def deleteAll(): Unit = () - private val sanitizer: ReportSanitizer = new WorkspaceReportSanitizer( - workspace - ) private lazy val environmentInfo: telemetry.Environment = new telemetry.Environment( /* java = */ new telemetry.JavaInfo( @@ -94,32 +112,41 @@ private class TelemetryReporter( logger = logger ) + override def sanitize(message: String): String = + sanitizers.all.foldRight(message)(_.apply(_)) + + private def createSanitizedReport(report: Report) = new telemetry.ReportEvent( + /* name = */ report.name, + /* text = */ if (sanitizers.canSanitizeSources) + Optional.of(sanitize(report.text)) + else Optional.empty(), + /* id = */ report.id.toJava, + /* error = */ report.error + .map(telemetry.ReportedError.fromThrowable(_, sanitize(_))) + .toJava, + /* reporterName = */ name, + /* reporterContext = */ getReporterContext() match { + case ctx: telemetry.MetalsLspContext => + telemetry.ReporterContextUnion.metalsLSP(ctx) + case ctx: telemetry.ScalaPresentationCompilerContext => + telemetry.ReporterContextUnion.scalaPresentationCompiler(ctx) + case ctx: telemetry.UnknownProducerContext => + telemetry.ReporterContextUnion.unknown(ctx) + }, + /* env = */ environmentInfo + ) + override def create( unsanitizedReport: => Report, ifVerbose: Boolean ): Option[Path] = { - val report = sanitizer(unsanitizedReport) - client - .sendReportEvent( - new telemetry.ReportEvent( - /* name = */ report.name, - /* text = */ report.text, - /* shortSummary = */ report.shortSummary, - /* id = */ report.id.toJava, - /* error = */ report.error - .map(telemetry.ReportedError.fromThrowable(_, sanitizer.apply(_))) - .toJava, - /* reporterName = */ name, - /* reporterContext = */ getReporterContext() match { - case ctx: telemetry.MetalsLspContext => - telemetry.ReporterContextUnion.metalsLSP(ctx) - case ctx: telemetry.ScalaPresentationCompilerContext => - telemetry.ReporterContextUnion.scalaPresentationCompiler(ctx) - case ctx: telemetry.UnknownProducerContext => - telemetry.ReporterContextUnion.unknown(ctx) - }, - /* env = */ environmentInfo - ) + val event = createSanitizedReport(unsanitizedReport) + if (event.getText().isPresent() || event.getError().isPresent()) + client.sendReportEvent(event) + else + logger.info( + "Skiped reporting remotely unmeaningful report, no context or error, reportId=" + + unsanitizedReport.id.getOrElse("null") ) None } diff --git a/mtags-shared/src/main/scala/scala/meta/internal/metals/ReportContext.scala b/mtags-shared/src/main/scala/scala/meta/internal/metals/ReportContext.scala index a5395bfea4f..527568bfb39 100644 --- a/mtags-shared/src/main/scala/scala/meta/internal/metals/ReportContext.scala +++ b/mtags-shared/src/main/scala/scala/meta/internal/metals/ReportContext.scala @@ -87,7 +87,7 @@ class StdReporter( level: ReportLevel, override val name: String ) extends Reporter { - private val sanitizer: ReportSanitizer = new WorkspaceReportSanitizer( + private val sanitizer: ReportSanitizer = new WorkspaceSanitizer( Some(workspace) ) val maybeReportsDir: Path = diff --git a/mtags-shared/src/main/scala/scala/meta/internal/metals/ReportSanitizer.scala b/mtags-shared/src/main/scala/scala/meta/internal/metals/ReportSanitizer.scala index f80fd217c1e..2b4deaf3f1f 100644 --- a/mtags-shared/src/main/scala/scala/meta/internal/metals/ReportSanitizer.scala +++ b/mtags-shared/src/main/scala/scala/meta/internal/metals/ReportSanitizer.scala @@ -15,8 +15,7 @@ trait ReportSanitizer { def sanitize(text: String): String } -class WorkspaceReportSanitizer(workspace: Option[Path]) - extends ReportSanitizer { +class WorkspaceSanitizer(workspace: Option[Path]) extends ReportSanitizer { private lazy val userHome = Option(System.getProperty("user.home")) override def sanitize(text: String): String = { diff --git a/mtags-shared/src/main/scala/scala/meta/internal/metals/SourceCodeSanitizer.scala b/mtags-shared/src/main/scala/scala/meta/internal/metals/SourceCodeSanitizer.scala new file mode 100644 index 00000000000..0adf362d7ae --- /dev/null +++ b/mtags-shared/src/main/scala/scala/meta/internal/metals/SourceCodeSanitizer.scala @@ -0,0 +1,100 @@ +package scala.meta.internal.metals + +import java.util.regex.Pattern + +/** + * Sanitizer ensuring that no original source code can leak through the reports. + * First it would treat input as the markdown source snippet with 1 or more code snipets. + * If the snippet contains parsable code it would erase all the original names, replacing them with synthetic symbols of the same length. + * If the code is not parsable or the transformed code is would not be parsable after transformation it would be replaced with an failure reason tag. + * If no code snipets are found the input is treated as a raw source code. + */ +class SourceCodeSanitizer[ParserCtx, ParserAST]( + parser: SourceCodeTransformer[ParserCtx, ParserAST] +) extends ReportSanitizer { + + override def sanitize(text: String): String = { + anonimizeMarkdownSnippets(text) + .getOrElse(tryAnonimize(text, languageHint = Some("scala")).merge) + } + + // Completion marker needs to be escape before parsing the sources, and restored afterwards + private final val CompletionMarker = "@@" + private final val CompletionMarkerReplecement = "__METALS_COMPLETION_MARKER__" + + private final val MarkdownCodeSnippet = java.util.regex.Pattern + .compile( + raw"^`{3}(\w+\s*)?\n([\s\S]*?)`{3}", + Pattern.MULTILINE | Pattern.CASE_INSENSITIVE + ) + private final val StackTraceLine = + raw"(?:\s*(?:at\s*))?(\S+)\((?:(?:\S+\.(?:scala|java)\:\d+)|(?:Native Method))\)".r + + private type FailureReason = String + private def tryAnonimize( + source: String, + languageHint: Option[String] + ): Either[FailureReason, String] = { + Option(source) + .map(_.trim()) + .filter(_.nonEmpty) + .map(_.replaceAll(CompletionMarker, CompletionMarkerReplecement)) + .fold[Either[String, String]](Left("no-source")) { source => + if (StackTraceLine.findFirstIn(source).isDefined) + Right(source) + else if (languageHint.forall(_.toLowerCase() == "scala")) { + parser + .parse(source) + .toRight("") + .flatMap { case (ctx, tree) => + parser.transformer + .sanitizeSymbols(tree) + .toRight("") + .flatMap { parsed => + val sourceString = parser.toSourceString(parsed, ctx) + val isReparsable = parser.parse(sourceString, ctx).isDefined + if (isReparsable) Right(sourceString) + else Left("") + } + } + } else + Left("") + } + .map(_.replace(CompletionMarkerReplecement, CompletionMarker)) + } + + private def anonimizeMarkdownSnippets(source: String): Option[String] = { + // Check if we have even number of markdown snipets markers, if not discard whole input + val snipetMarkers = source.linesIterator.count(_.startsWith("```")) + if (snipetMarkers == 0 || snipetMarkers % 2 != 0) None + else { + val matcher = MarkdownCodeSnippet.matcher(source) + val sourceResult = new java.lang.StringBuffer(source.size) + while (matcher.find()) { + val matchResult = matcher.toMatchResult() + val language = Option(matchResult.group(1)).map(_.trim()) + val result = tryAnonimize( + languageHint = language, + source = matchResult.group(2) + ) + val sanitizedOrFailureReason: String = result.merge.replace("$", "\\$") + val updatedSnippet = + s"""```${language.getOrElse("")} + |$sanitizedOrFailureReason + |``` + |""".stripMargin + + matcher.appendReplacement( + sourceResult, + updatedSnippet + ) + } + if (sourceResult.length() == 0) None // not found any snipets + else + Some { + matcher.appendTail(sourceResult) + sourceResult.toString() + } + } + } +} diff --git a/mtags-shared/src/main/scala/scala/meta/internal/metals/SourceCodeTransformer.scala b/mtags-shared/src/main/scala/scala/meta/internal/metals/SourceCodeTransformer.scala new file mode 100644 index 00000000000..089bf11c080 --- /dev/null +++ b/mtags-shared/src/main/scala/scala/meta/internal/metals/SourceCodeTransformer.scala @@ -0,0 +1,180 @@ +package scala.meta.internal.metals + +import scala.util.Random +import scala.annotation.tailrec +import scala.collection.mutable + +// Needs to be implemented in user of the reporting, eg. MetalsLSPService using scalameta or compiler +trait SourceCodeTransformer[Context, Tree] { + + /** Try parse using any available dialects/contexts and return the context that can be used for validation of sanitized source */ + def parse(source: String): Option[(Context, Tree)] + + /** Parse once using dedicated context, used for validation */ + def parse(source: String, context: Context): Option[Tree] + + def toSourceString(value: Tree, ctx: Context): String + def transformer: ASTTrasnformer + + trait ASTTrasnformer { + protected type Name + protected type TermName <: Name + protected type TypeName <: Name + protected type UnclasifiedName <: Name + + protected def toTermName(name: String): TermName + protected def toTypeName(name: String): TypeName + protected def toUnclasifiedName(name: String): UnclasifiedName + protected def toSymbol(name: Name): String + + private final val SymbolToPrefixLength = 3 + private final val ShortSymbolLength = 2 + + def sanitizeSymbols(tree: Tree): Option[Tree] + + protected def isCommonScalaName(name: Name): Boolean = { + val symbol = toSymbol(name) + SourceCodeTransformer.CommonNames.types.contains(symbol) || + SourceCodeTransformer.CommonNames.methods.contains(symbol) + } + + def isScalaOrJavaSelector(v: String) = + v.startsWith("scala.") || v.startsWith("java.") + + protected def sanitizeTermName(name: TermName): TermName = + termNames.getOrElseUpdate( + name, + toTermName(sanitizeSymbolOf(toSymbol(name), termNames.size.toString)) + ) + + protected def sanitizeTypeName(name: TypeName): TypeName = + typeNames.getOrElseUpdate( + name, + toTypeName(sanitizeSymbolOf(toSymbol(name), typeNames.size.toString)) + ) + + protected def sanitizeUnclasifiedName( + name: UnclasifiedName + ): UnclasifiedName = + unclasifiedNames.getOrElseUpdate( + name, + toUnclasifiedName( + sanitizeSymbolOf(toSymbol(name), unclasifiedNames.size.toString) + ) + ) + + protected def sanitizeStringLiteral(original: String): String = + original.map(c => if (c.isLetterOrDigit) '-' else c) + + protected def santitizeScalaSymbol(original: scala.Symbol): scala.Symbol = + scala.Symbol( + toSymbol( + sanitizeUnclasifiedName(toUnclasifiedName(original.name)) + ) + ) + + private def cacheOf[T] = mutable.Map.empty[T, T] + private val symbols = cacheOf[String] + private val termNames = cacheOf[TermName] + private val typeNames = cacheOf[TypeName] + private val unclasifiedNames = cacheOf[UnclasifiedName] + + private def sanitizeSymbolOf( + originalSymbol: String, + suffix: => String + ): String = symbols.getOrElseUpdate( + originalSymbol, { + if ( + originalSymbol.length() <= ShortSymbolLength && + !symbols.values.exists(_ == originalSymbol) + ) originalSymbol + else + generateSymbol(originalSymbol, suffix) + }.ensuring( + _.length() == originalSymbol.length(), + "new symbol has different length then original" + ) + ) + + private def generateSymbol( + originalSymbol: String, + suffix: => String + ): String = + if (originalSymbol.forall(isAsciiLetterOrDigit)) + newSimpleSymbol(originalSymbol, suffix, fillChar = 'x') + else + newSymbolsWithSpecialCharacters(originalSymbol) + + private def isAsciiLetterOrDigit(c: Char) = + (c >= 'a' && c <= 'z') || + (c >= 'A' && c <= 'Z') || + (c >= '0' && c <= '9') + + private def newSymbolsWithSpecialCharacters( + originalSymbol: String + ): String = { + val rnd = new Random(originalSymbol.##) + + def nextAsciiLetter(original: Char) = { + val c = (rnd.nextInt('z' - 'a') + 'a').toChar + val next = if (original.isUpper) c.toUpper else c + next match { + case 'O' => 'P' // To easiliy distinquish O from 0 + case 'I' => 'J' // I vs l + case 'l' => 'k' // l vs I + case c => c + } + } + @tailrec def generate(): String = { + val newSymbol = originalSymbol.map(c => + if (!isAsciiLetterOrDigit(c)) c + else if (c.isDigit) c + else nextAsciiLetter(c) + ) + if (symbols.values.exists(_ == newSymbol)) generate() + else newSymbol + } + generate() + } + + private def newSimpleSymbol( + originalSymbol: String, + suffix: String, + fillChar: Char + ): String = { + val prefix = originalSymbol.take(SymbolToPrefixLength) + val prefixHead = + if (originalSymbol.head.isUpper) prefix.head.toUpper + else prefix.head.toLower + val fillInLength = + originalSymbol.length() - prefix.length() - suffix.length() + val prefixTail = + if (fillInLength < 0) prefix.tail.take(-fillInLength) + else prefix.tail + + val sb = new java.lang.StringBuilder(originalSymbol.length()) + sb.append(prefixHead) + sb.append(prefixTail) + 0.until(fillInLength).foreach(_ => sb.append(fillChar)) + sb.append(suffix) + sb.toString() + } + + } +} + +private object SourceCodeTransformer { + object CommonNames { + final val types = Seq("Byte", "Short", "Int", "Long", "String", "Unit", + "Nothing", "Class", "Option", "Some", "None", "List", "Nil", "Set", "Seq", + "Array", "Vector", "Stream", "LazyList", "Map", "Future", "Try", + "Success", "Failure", "mutable", "immutable") + + final val methods = Seq("get", "getOrElse", "orElse", "map", "left", + "right", "flatMap", "flatten", "apply", "unapply", "fold", "foldLeft", + "foldRight", "reduce", "reduceLeft", "reduceRight", "scan", "scanLeft", + "scanRight", "recover", "recoverWith", "size", "length", "exists", + "contains", "forall", "value", "underlying", "classOf", "toOption", + "toEither", "toLeft", "toRight", "toString", "to", "stripMargin", "empty") + } +} diff --git a/mtags/src/main/scala-2/scala/meta/internal/pc/ScalaPresentationCompiler.scala b/mtags/src/main/scala-2/scala/meta/internal/pc/ScalaPresentationCompiler.scala index 7dfce6ed8da..b741f8651f2 100644 --- a/mtags/src/main/scala-2/scala/meta/internal/pc/ScalaPresentationCompiler.scala +++ b/mtags/src/main/scala-2/scala/meta/internal/pc/ScalaPresentationCompiler.scala @@ -49,10 +49,11 @@ import org.eclipse.lsp4j.Range import org.eclipse.lsp4j.SelectionRange import org.eclipse.lsp4j.SignatureHelp import org.eclipse.lsp4j.TextEdit -import scala.meta.internal.metals.RemoteTelemetryReportContext -import scala.meta.internal.metals.MirroredReportContext import scala.meta.internal.{telemetry => telemetryApi} import scala.meta.internal.pc.{telemetry => pcTelemetryApi} +import scala.meta.internal.metals.RemoteReportContext +import scala.meta.internal.metals.MirroredReportContext +import scala.meta.internal.metals.ScalametaSourceCodeTransformer case class ScalaPresentationCompiler( buildTargetIdentifier: String = "", @@ -75,11 +76,14 @@ case class ScalaPresentationCompiler( Logger.getLogger(classOf[ScalaPresentationCompiler].getName) implicit val reportContex: ReportContext = { - val remoteReporters = new RemoteTelemetryReportContext( - serverEndpoint = RemoteTelemetryReportContext.discoverTelemetryServer, - workspace = folderPath, + val remoteReporters = new RemoteReportContext( + serverEndpoint = RemoteReportContext.discoverTelemetryServer, getReporterContext = makeTelemetryContext, - logger = RemoteTelemetryReportContext.LoggerAccess( + sanitizers = new RemoteReportContext.Sanitizers( + workspace = folderPath, + sourceCodeTransformer = Some(ScalametaSourceCodeTransformer) + ), + logger = RemoteReportContext.LoggerAccess( info = logger.info(_), warning = logger.warning(_), error = logger.severe(_) diff --git a/mtags/src/main/scala-3/scala/meta/internal/pc/ScalaPresentationCompiler.scala b/mtags/src/main/scala-3/scala/meta/internal/pc/ScalaPresentationCompiler.scala index 5c07018284a..51fc30edf48 100644 --- a/mtags/src/main/scala-3/scala/meta/internal/pc/ScalaPresentationCompiler.scala +++ b/mtags/src/main/scala-3/scala/meta/internal/pc/ScalaPresentationCompiler.scala @@ -26,8 +26,9 @@ import scala.meta.internal.pc.completions.OverrideCompletions import scala.meta.pc.* import scala.meta.internal.telemetry as telemetryApi import scala.meta.internal.pc.{telemetry as pcTelemetryApi} -import scala.meta.internal.metals.RemoteTelemetryReportContext +import scala.meta.internal.metals.RemoteReportContext import scala.meta.internal.metals.MirroredReportContext +import scala.meta.internal.metals.ScalametaSourceCodeTransformer import dotty.tools.dotc.reporting.StoreReporter import org.eclipse.lsp4j.DocumentHighlight @@ -58,11 +59,14 @@ case class ScalaPresentationCompiler( Logger.getLogger(classOf[ScalaPresentationCompiler].getName) given ReportContext = - val remoteReporters = new RemoteTelemetryReportContext( - serverEndpoint = RemoteTelemetryReportContext.discoverTelemetryServer, - workspace = folderPath, + val remoteReporters = new RemoteReportContext( + serverEndpoint = RemoteReportContext.discoverTelemetryServer, getReporterContext = makeTelemetryContext, - logger = RemoteTelemetryReportContext.LoggerAccess( + sanitizers = new RemoteReportContext.Sanitizers( + workspace = folderPath, + sourceCodeTransformer = Some(ScalametaSourceCodeTransformer), + ), + logger = RemoteReportContext.LoggerAccess( info = logger.info(_), warning = logger.warning(_), error = logger.severe(_), diff --git a/mtags/src/main/scala/scala/meta/internal/metals/ScalametaSourceCodeTransformer.scala b/mtags/src/main/scala/scala/meta/internal/metals/ScalametaSourceCodeTransformer.scala new file mode 100644 index 00000000000..f76f7b13a59 --- /dev/null +++ b/mtags/src/main/scala/scala/meta/internal/metals/ScalametaSourceCodeTransformer.scala @@ -0,0 +1,81 @@ +package scala.meta.internal.metals + +import scala.meta._ + +object ScalametaSourceCodeTransformer + extends SourceCodeTransformer[Dialect, Tree] { + private val availableDialects = { + import scala.meta.dialects._ + val mainDialects = Seq( + Dialect.current, + Scala3, + Scala213, + Sbt1, + Scala3Future, + Scala212 + ) + val auxilaryDialects = Seq( + Scala213Source3, + Scala212Source3, + Scala31, + Scala32, + Scala33, + Scala211, + Scala210, + Sbt0137, + Sbt0136 + ) + (mainDialects ++ auxilaryDialects) + } + + override def parse(source: String): Option[(Dialect, Tree)] = + availableDialects + .to(LazyList) + .map { implicit dialect: meta.Dialect => + dialect -> parse(source, dialect) + } + .collectFirst { case (dialect, Some(tree)) => dialect -> tree } + + override def parse(source: String, context: Dialect): Option[Tree] = + context(source).parse[Source].toOption + + override def toSourceString(value: Tree, ctx: Dialect): String = + value.show(Tree.showSyntax(ctx)) + + override def transformer: ASTTrasnformer = ScalaMetaTrasnformer + + private object ScalaMetaTrasnformer extends Transformer with ASTTrasnformer { + override protected type Name = meta.Name + override protected type TermName = meta.Term.Name + override protected type TypeName = meta.Type.Name + override protected type UnclasifiedName = meta.Name.Indeterminate + + override protected def toTermName(name: String): TermName = + meta.Term.Name(name) + override protected def toTypeName(name: String): TypeName = + meta.Type.Name(name) + override protected def toUnclasifiedName(name: String): UnclasifiedName = + meta.Name.Indeterminate(name) + override protected def toSymbol(name: Name): String = name.value + + override def sanitizeSymbols(tree: Tree): Option[Tree] = Option( + this.apply(tree) + ) + + override def apply(tree: Tree): Tree = { + tree match { + case name: Name if isCommonScalaName(name) => name + case node: Term.Select if isScalaOrJavaSelector(node.toString()) => node + case node: Type.Select if isScalaOrJavaSelector(node.toString()) => node + case node: Type.Name => sanitizeTypeName(node) + case node: Term.Name => sanitizeTermName(node) + case node: Name.Indeterminate => sanitizeUnclasifiedName(node) + case lit: Lit.String => Lit.String(sanitizeStringLiteral(lit.value)) + case lit: Lit.Symbol => Lit.Symbol(santitizeScalaSymbol(lit.value)) + case x => super.apply(x) + } + } + + } + +} diff --git a/project/TestGroups.scala b/project/TestGroups.scala index e7a6c15fdab..f578a6bf81e 100644 --- a/project/TestGroups.scala +++ b/project/TestGroups.scala @@ -115,7 +115,11 @@ object TestGroups { "tests.RunProviderLensLspSuite", "tests.SemanticTokensLspSuite", "tests.ToplevelsScala3Suite", "tests.codeactions.InlineValueLspSuite", "tests.JavaToplevelSuite", "tests.ToplevelLibrarySuite", - "tests.FoldingRangeScala3LineFoldingOnlySuite"), + "tests.FoldingRangeScala3LineFoldingOnlySuite", + "tests.telemetry.CodeAnonimizationSuite", + "tests.telemetry.RemoteReporterSuite", + "tests.telemetry.SourceCodeSanitizerSuite", + "tests.telemetry.SerializationSuite"), ) } diff --git a/telemetry-interface/src/main/java/scala/meta/internal/telemetry/ReportEvent.java b/telemetry-interface/src/main/java/scala/meta/internal/telemetry/ReportEvent.java index c1e57310ddd..f43e8a669f9 100644 --- a/telemetry-interface/src/main/java/scala/meta/internal/telemetry/ReportEvent.java +++ b/telemetry-interface/src/main/java/scala/meta/internal/telemetry/ReportEvent.java @@ -4,19 +4,17 @@ public class ReportEvent { final private String name; - final private String text; - final private String shortSummary; + final private Optional text; final private Optional id; final private Optional error; final private String reporterName; final private ReporterContextUnion reporterContext; final private Environment env; - public ReportEvent(String name, String text, String shortSummary, Optional id, - Optional error, String reporterName, ReporterContextUnion reporterContext, Environment env) { + public ReportEvent(String name, Optional text, Optional id, Optional error, + String reporterName, ReporterContextUnion reporterContext, Environment env) { this.name = name; this.text = text; - this.shortSummary = shortSummary; this.id = id; this.error = error; this.reporterName = reporterName; @@ -28,14 +26,10 @@ public String getName() { return name; } - public String getText() { + public Optional getText() { return text; } - public String getShortSummary() { - return shortSummary; - } - public Optional getId() { return id; } @@ -62,7 +56,6 @@ public int hashCode() { int result = 1; result = prime * result + ((name == null) ? 0 : name.hashCode()); result = prime * result + ((text == null) ? 0 : text.hashCode()); - result = prime * result + ((shortSummary == null) ? 0 : shortSummary.hashCode()); result = prime * result + ((id == null) ? 0 : id.hashCode()); result = prime * result + ((error == null) ? 0 : error.hashCode()); result = prime * result + ((reporterName == null) ? 0 : reporterName.hashCode()); @@ -90,11 +83,6 @@ public boolean equals(Object obj) { return false; } else if (!text.equals(other.text)) return false; - if (shortSummary == null) { - if (other.shortSummary != null) - return false; - } else if (!shortSummary.equals(other.shortSummary)) - return false; if (id == null) { if (other.id != null) return false; diff --git a/telemetry-interface/src/main/java/scala/meta/internal/telemetry/TelemetryService.java b/telemetry-interface/src/main/java/scala/meta/internal/telemetry/TelemetryService.java index 18c9b089988..1824ef99c95 100644 --- a/telemetry-interface/src/main/java/scala/meta/internal/telemetry/TelemetryService.java +++ b/telemetry-interface/src/main/java/scala/meta/internal/telemetry/TelemetryService.java @@ -1,8 +1,5 @@ package scala.meta.internal.telemetry; -import scala.meta.internal.telemetry.ReportEvent; -import scala.meta.internal.telemetry.ServiceEndpoint; - public interface TelemetryService { void sendReportEvent(ReportEvent event); diff --git a/tests/unit/src/test/scala/tests/telemetry/RemoteReporterSuite.scala b/tests/unit/src/test/scala/tests/telemetry/RemoteReporterSuite.scala index 9fd2f8095d9..e84811319a7 100644 --- a/tests/unit/src/test/scala/tests/telemetry/RemoteReporterSuite.scala +++ b/tests/unit/src/test/scala/tests/telemetry/RemoteReporterSuite.scala @@ -27,12 +27,12 @@ class RemoteReporterSuite extends BaseSuite { // Remote telemetry reporter should be treated as best effort, ensure that logging test("ignore connectiviy failures") { - val reporter = new metals.RemoteTelemetryReportContext( + val reporter = new metals.RemoteReportContext( "https://not.existing.endpoint.for.metals.tests:8081", - None, getReporterContext = () => SampleReports.metalsLSPReport().getReporterContext.getMetalsLSP.get(), - logger = metals.RemoteTelemetryReportContext.LoggerAccess.system, + sanitizers = new metals.RemoteReportContext.Sanitizers(None, None), + logger = metals.RemoteReportContext.LoggerAccess.system, ) assertEquals( @@ -56,11 +56,14 @@ class RemoteReporterSuite extends BaseSuite { SampleReports.scalaPresentationCompilerReport(), SampleReports.unknownReport(), ).map(_.getReporterContext().get()) - reporter = new metals.RemoteTelemetryReportContext( + reporter = new metals.RemoteReportContext( serverEndpoint, - None, getReporterContext = () => reporterCtx, - logger = metals.RemoteTelemetryReportContext.LoggerAccess.system, + sanitizers = new metals.RemoteReportContext.Sanitizers( + None, + Some(metals.ScalametaSourceCodeTransformer), + ), + logger = metals.RemoteReportContext.LoggerAccess.system, ) } { val createdReport = simpleReport(reporterCtx.toString()) diff --git a/tests/unit/src/test/scala/tests/telemetry/SampleReports.scala b/tests/unit/src/test/scala/tests/telemetry/SampleReports.scala index 99a926c1b8d..8cc3f8b810e 100644 --- a/tests/unit/src/test/scala/tests/telemetry/SampleReports.scala +++ b/tests/unit/src/test/scala/tests/telemetry/SampleReports.scala @@ -30,8 +30,7 @@ object SampleReports { list: ListControl, ): telemetry.ReportEvent = new telemetry.ReportEvent( "name", - "test", - "shortSummary", + optional("text"), optional("id"), optional( new telemetry.ReportedError( diff --git a/tests/unit/src/test/scala/tests/telemetry/SourceCodeSanitizerSuite.scala b/tests/unit/src/test/scala/tests/telemetry/SourceCodeSanitizerSuite.scala new file mode 100644 index 00000000000..26f652020dd --- /dev/null +++ b/tests/unit/src/test/scala/tests/telemetry/SourceCodeSanitizerSuite.scala @@ -0,0 +1,161 @@ +package tests.telemetry + +import tests.BaseSuite +import scala.meta.internal.metals.SourceCodeSanitizer +import scala.meta.internal.metals.ScalametaSourceCodeTransformer + +class SourceCodeSanitizerSuite extends BaseSuite { + + val sanitizer = new SourceCodeSanitizer(ScalametaSourceCodeTransformer) + + val sampleScalaInput = + """ + |package some.namespace.of.my.app + |class Foo{ + | def myFoo: Int = 42 + |} + |trait Bar{ + | def myBarSecret: String = "my_super-secret-code" + |} + |object FooBar extends Foo with Bar{ + | def compute(input: String, other: Bar): Unit = + | if(myBarSecret.contains("super-secret-code") || this.myBarSecret == other.myBarSecret) myFoo * 42 + | else -1 + |} + """.stripMargin + val sampleScalaOutput = + """package som0.namxxxxx1.of.my.ap4 + |class Fo0 { def myFx5: Int = 42 } + |trait Ba1 { def myBxxxxxxx6: String = "--_-----------------" } + |object Fooxx7 extends Fo0 with Ba1 { def comxxx8(inpx9: String, oth10: Ba1): Unit = if (myBxxxxxxx6.contains("-----------------") || this.myBxxxxxxx6 == oth10.myBxxxxxxx6) myFx5 * 42 else -1 } + """.stripMargin + + val sampleStackTraceElements = + """ + |scala.meta.internal.pc.completions.OverrideCompletions.scala$meta$internal$pc$completions$OverrideCompletions$$getMembers(OverrideCompletions.scala:180) + | scala.meta.internal.pc.completions.OverrideCompletions$OverrideCompletion.contribute(OverrideCompletions.scala:79) + | scala.meta.internal.pc.CompletionProvider.expected$1(CompletionProvider.scala:439) + | scala.meta.internal.pc.CompletionProvider.safeCompletionsAt(CompletionProvider.scala:499) + | scala.meta.internal.pc.CompletionProvider.completions(CompletionProvider.scala:58) + | scala.meta.internal.pc.ScalaPresentationCompiler.$anonfun$complete$1(ScalaPresentationCompiler.scala:169) + | + |""".stripMargin + + val sampleJavaInput = + """ + |package scala.meta.internal.telemetry; + | + |public class ServiceEndpoint { + | final private String uri; + | final private String method; + | final private Class inputType; + | final private Class outputType; + | + | public ServiceEndpoint(String method, String uri, Class inputType, Class outputType) { + | this.uri = uri; + | this.method = method; + | this.inputType = inputType; + | this.outputType = outputType; + | } + | + | public String getUri() { + | return uri; + | } + | + | public String getMethod() { + | return method; + } + """.stripMargin + + val sampleStackTrace = + """ + |java.lang.RuntimeException + | at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native Method) + | at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:77) + | at java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) + | at java.base/java.lang.reflect.Method.invoke(Method.java:568) + | at dotty.tools.repl.Rendering.$anonfun$4(Rendering.scala:110) + | at scala.Option.flatMap(Option.scala:283) + | at dotty.tools.repl.Rendering.valueOf(Rendering.scala:110) + | at dotty.tools.repl.Rendering.renderVal(Rendering.scala:152) + | at dotty.tools.repl.ReplDriver.$anonfun$7(ReplDriver.scala:388) + | at scala.runtime.function.JProcedure1.apply(JProcedure1.java:15) + """.stripMargin + + test("erases names from sources in Scala") { + val input = sampleScalaInput + val expected = sampleScalaOutput + assertEquals(expected.trim(), sanitizer(input).trim()) + } + + test("erases sources in non parsable sources") { // TODO: Java parsing + val input = sampleJavaInput + assertEquals("", sanitizer(input).trim()) + } + + test("erases names from markdown snippets") { + val input = + s""" + |## Source code: + |``` + |$sampleScalaInput + |``` + | + |## Scala source code + |```scala + |$sampleScalaInput + |``` + | + |## Java source code + |``` + |${sampleJavaInput} + |``` + | + |## Stacktrace: + |``` + |$sampleStackTrace + |``` + | + |## Stack trace elements + |```scala + |$sampleStackTraceElements + |``` + | + """.stripMargin + + val expected = + s""" + |## Source code: + |``` + |$sampleScalaOutput + |``` + | + |## Scala source code + |```scala + |$sampleScalaOutput + |``` + | + |## Java source code + |``` + | + |``` + | + |## Stacktrace: + |``` + |$sampleStackTrace + |``` + | + |## Stack trace elements + |```scala + |$sampleStackTraceElements + |``` + | + """.stripMargin + def trimLines(string: String) = string.linesIterator + .map(_.trim()) + .filterNot(_.isEmpty()) + .mkString(System.lineSeparator()) + assertEquals(trimLines(expected), trimLines(sanitizer(input))) + } + +}