From f71a2ba7b169426dd07e442817264c173be5b54e Mon Sep 17 00:00:00 2001 From: Jaden Peterson Date: Fri, 18 Oct 2024 18:31:17 -0400 Subject: [PATCH 1/3] Output a SemanticDbInfo provider in the "semanticdb" phase --- rules/private/phases/phase_semanticdb.bzl | 58 ++++++++++++++------- rules/private/phases/phase_zinc_compile.bzl | 7 +-- rules/providers.bzl | 8 +++ tests/plugins/semanticdb/BUILD | 18 +++++++ tests/plugins/semanticdb/rule.bzl | 25 +++++++++ tests/plugins/semanticdb/test | 19 ++++++- 6 files changed, 113 insertions(+), 22 deletions(-) create mode 100644 tests/plugins/semanticdb/rule.bzl diff --git a/rules/private/phases/phase_semanticdb.bzl b/rules/private/phases/phase_semanticdb.bzl index 6553d682..3d2d7f87 100644 --- a/rules/private/phases/phase_semanticdb.bzl +++ b/rules/private/phases/phase_semanticdb.bzl @@ -1,5 +1,12 @@ load("@bazel_skylib//lib:paths.bzl", "paths") -load("@rules_scala_annex//rules:providers.bzl", _ScalaConfiguration = "ScalaConfiguration") +load( + "@rules_scala_annex//rules:providers.bzl", + _ScalaConfiguration = "ScalaConfiguration", + _SemanticDbInfo = "SemanticDbInfo", +) + +def _semanticdb_directory_from_file(file): + return file.path[:file.path.find("META-INF") - 1] # # PHASE: semanticdb @@ -11,32 +18,47 @@ def phase_semanticdb(ctx, g): scala_configuration = ctx.attr.scala[_ScalaConfiguration] if scala_configuration.semanticdb_bundle: - return struct(outputs = [], scalacopts = []) + return struct(outputs = [], arguments_modifier = lambda _: None) + directory_name = "{}/semanticdb".format(ctx.label.name) outputs = [] - semanticdb_directory = paths.join("_semanticdb/", ctx.label.name) - semanticdb_target_root = paths.join(paths.dirname(ctx.outputs.jar.path), semanticdb_directory) for source in ctx.files.srcs: if source.extension == "scala": - output_filename = paths.join( - semanticdb_directory, + path = paths.join( + directory_name, "META-INF", "semanticdb", "{}.semanticdb".format(source.path), ) - outputs.append(ctx.actions.declare_file(output_filename)) + outputs.append(ctx.actions.declare_file(path)) + + def add_scalacopts(arguments): + if len(outputs) == 0: + return + + if scala_configuration.version.startswith("2"): + arguments.add("--compiler_option=-P:semanticdb:failures:error") + arguments.add_all( + [outputs[0]], + format_each = "--compiler_option=-P:semanticdb:targetroot:%s", + map_each = _semanticdb_directory_from_file, + ) + else: + arguments.add_all( + [outputs[0]], + format_each = "--compiler_option=-semanticdb-target:%s", + map_each = _semanticdb_directory_from_file, + ) + + arguments.add("--compiler_option=-Ysemanticdb") - if scala_configuration.version.startswith("2"): - scalacopts = [ - "-P:semanticdb:failures:error", - "-P:semanticdb:targetroot:{}".format(semanticdb_target_root), - ] - else: - scalacopts = [ - "-semanticdb-target:{}".format(semanticdb_target_root), - "-Ysemanticdb", - ] + g.out.providers.append( + _SemanticDbInfo( + target_root = "{}/{}".format(ctx.label.package, directory_name), + semanticdb_files = outputs, + ), + ) - return struct(outputs = outputs, scalacopts = scalacopts) + return struct(outputs = outputs, arguments_modifier = add_scalacopts) diff --git a/rules/private/phases/phase_zinc_compile.bzl b/rules/private/phases/phase_zinc_compile.bzl index c57fccdb..2a323c1b 100644 --- a/rules/private/phases/phase_zinc_compile.bzl +++ b/rules/private/phases/phase_zinc_compile.bzl @@ -37,10 +37,9 @@ def phase_zinc_compile(ctx, g): ] zincs = [dep[_ZincInfo] for dep in ctx.attr.deps if _ZincInfo in dep] - common_scalacopts = scala_configuration.global_scalacopts + ctx.attr.scalacopts + g.semanticdb.scalacopts + common_scalacopts = scala_configuration.global_scalacopts + ctx.attr.scalacopts args = ctx.actions.args() - args.add_all(depset(transitive = [zinc.deps for zinc in zincs]), map_each = _compile_analysis) args.add("--compiler_bridge", zinc_configuration.compiler_bridge) args.add_all("--compiler_classpath", g.classpaths.compiler) @@ -55,8 +54,10 @@ def phase_zinc_compile(ctx, g): args.add_all("--plugins", g.classpaths.plugin) args.add_all("--source_jars", g.classpaths.src_jars) args.add("--tmp", tmp.path) - args.add("--log_level", zinc_configuration.log_level) + + g.semanticdb.arguments_modifier(args) + args.add_all("--", g.classpaths.srcs) args.set_param_file_format("multiline") args.use_param_file("@%s", use_always = True) diff --git a/rules/providers.bzl b/rules/providers.bzl index f602f488..ada2c31d 100644 --- a/rules/providers.bzl +++ b/rules/providers.bzl @@ -207,3 +207,11 @@ LabeledJars = provider( "values": "The preorder depset of label and jars.", }, ) + +SemanticDbInfo = provider( + doc = "Provided by the Scala rules when `semanticdb_bundle` is set to `False`.", + fields = { + "target_root": "The directory in which SemanticDB files were outputted.", + "semanticdb_files": "The SemanticDB files.", + }, +) diff --git a/tests/plugins/semanticdb/BUILD b/tests/plugins/semanticdb/BUILD index 293ed382..af8698e5 100644 --- a/tests/plugins/semanticdb/BUILD +++ b/tests/plugins/semanticdb/BUILD @@ -1,5 +1,6 @@ load("@rules_scala_annex//rules:scala.bzl", "configure_zinc_scala", "scala_library") load("@rules_scala_annex//rules/scala:workspace.bzl", "scala_2_13_version", "scala_3_version") +load(":rule.bzl", "read_semanticdb_info") configure_zinc_scala( name = "scala_2_13_with_semanticdb", @@ -39,9 +40,26 @@ scala_library( tags = ["manual"], ) +read_semanticdb_info( + name = "semanticdb-2_13-semanticdb-info", + scala_target = ":semanticdb-2_13", +) + scala_library( name = "semanticdb-3", srcs = glob(["*.scala"]), scala = ":scala_3_with_semanticdb", tags = ["manual"], ) + +read_semanticdb_info( + name = "semanticdb-3-semanticdb-info", + scala_target = ":semanticdb-3", +) + +scala_library( + name = "semanticdb-empty", + srcs = [], + scala = ":scala_2_13_with_semanticdb", + tags = ["manual"], +) diff --git a/tests/plugins/semanticdb/rule.bzl b/tests/plugins/semanticdb/rule.bzl new file mode 100644 index 00000000..7697ad3e --- /dev/null +++ b/tests/plugins/semanticdb/rule.bzl @@ -0,0 +1,25 @@ +load("@rules_scala_annex//rules:providers.bzl", "SemanticDbInfo") + +def _read_semanticdb_info_impl(ctx): + semanticdb_info = ctx.attr.scala_target[SemanticDbInfo] + output = ctx.actions.declare_file("{}.txt".format(ctx.label.name)) + + ctx.actions.write( + output, + json.encode({ + "targetRoot": semanticdb_info.target_root, + "semanticDbFiles": sorted([file.path for file in semanticdb_info.semanticdb_files]), + }), + ) + + return DefaultInfo(files = depset([output])) + +read_semanticdb_info = rule( + attrs = { + "scala_target": attr.label( + mandatory = True, + providers = [SemanticDbInfo], + ), + }, + implementation = _read_semanticdb_info_impl, +) diff --git a/tests/plugins/semanticdb/test b/tests/plugins/semanticdb/test index 261bd37e..fbcf104f 100755 --- a/tests/plugins/semanticdb/test +++ b/tests/plugins/semanticdb/test @@ -1,9 +1,11 @@ #!/bin/bash -e . "$(dirname "$0")"/../../common.sh +bazel_bin="$(bazel info bazel-bin)" + check_for_semanticdb_files() { for filename in "A.scala.semanticdb" "B.scala.semanticdb"; do - path="../../bazel-bin/plugins/semanticdb/_semanticdb/semanticdb-$1/META-INF/semanticdb/plugins/semanticdb/$filename" + path="../../bazel-bin/plugins/semanticdb/semanticdb-$1/semanticdb/META-INF/semanticdb/plugins/semanticdb/$filename" if [ ! -f "$path" ]; then echo "Error: $path doesn't exist" @@ -12,7 +14,22 @@ check_for_semanticdb_files() { done } +check_semanticdb_info() { + bazel build ":semanticdb-$1-semanticdb-info" + + output_path="$bazel_bin/plugins/semanticdb/semanticdb-$1-semanticdb-info.txt" + semanticdb_file_directory="bazel-out/k8-fastbuild/bin/plugins/semanticdb/semanticdb-$1/semanticdb/META-INF/semanticdb/plugins/semanticdb" + + [ "$(jq ".targetRoot" "$output_path")" = "\"plugins/semanticdb/semanticdb-$1/semanticdb\"" ] + [ "$(jq -c ".semanticDbFiles" "$output_path")" = "[\"$semanticdb_file_directory/A.scala.semanticdb\",\"$semanticdb_file_directory/B.scala.semanticdb\"]" ] +} + bazel build :semanticdb-2_13 check_for_semanticdb_files 2_13 +check_semanticdb_info 2_13 + bazel build :semanticdb-3 check_for_semanticdb_files '3' +check_semanticdb_info '3' + +bazel build :semanticdb-empty From 8786cdd17ead12907d4b6b6ca2f04e73319d2702 Mon Sep 17 00:00:00 2001 From: Jaden Peterson Date: Fri, 18 Oct 2024 18:35:59 -0400 Subject: [PATCH 2/3] Adjust SemanticDB target root paths using a regex Matching on string interpolation is terribly slow. --- .../rules_scala/workers/common/CommonArguments.scala | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/main/scala/higherkindness/rules_scala/workers/common/CommonArguments.scala b/src/main/scala/higherkindness/rules_scala/workers/common/CommonArguments.scala index a4c6f5c7..bc52a046 100644 --- a/src/main/scala/higherkindness/rules_scala/workers/common/CommonArguments.scala +++ b/src/main/scala/higherkindness/rules_scala/workers/common/CommonArguments.scala @@ -49,14 +49,18 @@ object Analysis { } object CommonArguments { + private val scala2SemanticDbTargetRootRegex = """-P:semanticdb:targetroot:(.*)""".r + private val scala3SemanticDbTargetRootRegex = """-semanticdb-target:(.*)""".r + private def adjustCompilerOptions(workDir: Path, options: List[String]) = { - def adjustStringPath(path: String) = SandboxUtil.getSandboxPath(workDir, Paths.get(path)).toString + def adjustStringPath(path: String) = + SandboxUtil.getSandboxPath(workDir, Paths.get(path)).toString options.flatMap { - case s"-P:semanticdb:targetroot:$path" => + case scala2SemanticDbTargetRootRegex(path) => List(s"-P:semanticdb:sourceroot:${workDir.toString}", s"-P:semanticdb:targetroot:${adjustStringPath(path)}") - case s"-semanticdb-target:$path" => + case scala3SemanticDbTargetRootRegex(path) => List(s"-semanticdb-target:${adjustStringPath(path)}", s"-sourceroot:${workDir.toString}") case option => List(option) From 4c0cc327d6c9e7b39311fa5748fa2172393f171e Mon Sep 17 00:00:00 2001 From: Jaden Peterson Date: Mon, 21 Oct 2024 14:41:36 -0400 Subject: [PATCH 3/3] Pass SemanticDB compiler options in a separate list --- rules/private/phases/phase_semanticdb.bzl | 17 +++++- .../workers/common/CommonArguments.scala | 55 +++++++++++++------ .../workers/zinc/compile/ZincRunner.scala | 9 +-- 3 files changed, 58 insertions(+), 23 deletions(-) diff --git a/rules/private/phases/phase_semanticdb.bzl b/rules/private/phases/phase_semanticdb.bzl index 3d2d7f87..d2a7a9e5 100644 --- a/rules/private/phases/phase_semanticdb.bzl +++ b/rules/private/phases/phase_semanticdb.bzl @@ -6,7 +6,16 @@ load( ) def _semanticdb_directory_from_file(file): - return file.path[:file.path.find("META-INF") - 1] + """ + This is janky, but we're limited in what we can do in this function. From the + [documentation](https://bazel.build/rules/lib/builtins/Args#add_all) on `Args#add_all`: + + To avoid unintended retention of large analysis-phase data structures into the execution phase, + the `map_each` function must be declared by a top-level `def` statement; it may not be a + nested function closure by default. + """ + + return "{}/{}".format(file.root.path, file.short_path[:file.short_path.find("META-INF") - 1]) # # PHASE: semanticdb @@ -40,18 +49,20 @@ def phase_semanticdb(ctx, g): if scala_configuration.version.startswith("2"): arguments.add("--compiler_option=-P:semanticdb:failures:error") + arguments.add("--compiler_option_referencing_path=-P:semanticdb:sourceroot:${workDir}") arguments.add_all( [outputs[0]], - format_each = "--compiler_option=-P:semanticdb:targetroot:%s", + format_each = "--compiler_option_referencing_path=-P:semanticdb:targetroot:${path} %s", map_each = _semanticdb_directory_from_file, ) else: arguments.add_all( [outputs[0]], - format_each = "--compiler_option=-semanticdb-target:%s", + format_each = "--compiler_option_referencing_path=-semanticdb-target:${path} %s", map_each = _semanticdb_directory_from_file, ) + arguments.add("--compiler_option_referencing_path=-sourceroot:${workDir}") arguments.add("--compiler_option=-Ysemanticdb") g.out.providers.append( diff --git a/src/main/scala/higherkindness/rules_scala/workers/common/CommonArguments.scala b/src/main/scala/higherkindness/rules_scala/workers/common/CommonArguments.scala index bc52a046..d735b331 100644 --- a/src/main/scala/higherkindness/rules_scala/workers/common/CommonArguments.scala +++ b/src/main/scala/higherkindness/rules_scala/workers/common/CommonArguments.scala @@ -7,6 +7,7 @@ import common.sandbox.SandboxUtil import net.sourceforge.argparse4j.impl.{Arguments => ArgumentsImpl} import net.sourceforge.argparse4j.inf.{Argument, ArgumentParser, ArgumentType, Namespace} import java.util.{Collections, List as JList} +import scala.annotation.nowarn import scala.collection.mutable.Buffer import scala.jdk.CollectionConverters.* import java.io.File @@ -17,6 +18,18 @@ class CommonArguments private ( val compilerBridge: Path, val compilerClasspath: List[Path], val compilerOptions: List[String], + + /** + * With [[https://bazel.build/remote/multiplex#multiplex_sandboxing multiplex sandboxing]], Bazel generates a separate + * sandbox directory for each worker invocation, which means the paths to build artifacts won't be known until the + * execution phase. However, compiler options are usually generated during the analysis phase. + * + * Some compiler options (currently, only those regarding SemanticDB) reference paths to build artifacts and need to + * be adjusted to be relative to the sandbox directory. It's more performant for those options to be passed in a + * separate list so we don't have to scan through every compiler option and potentially modify it. In our experience, + * this resulted in a ~10% worker speedup. + */ + val compilerOptionsReferencingPaths: List[String], val classpath: List[Path], val debug: Boolean, val javaCompilerOptions: List[String], @@ -49,22 +62,22 @@ object Analysis { } object CommonArguments { - private val scala2SemanticDbTargetRootRegex = """-P:semanticdb:targetroot:(.*)""".r - private val scala3SemanticDbTargetRootRegex = """-semanticdb-target:(.*)""".r - - private def adjustCompilerOptions(workDir: Path, options: List[String]) = { - def adjustStringPath(path: String) = - SandboxUtil.getSandboxPath(workDir, Paths.get(path)).toString - - options.flatMap { - case scala2SemanticDbTargetRootRegex(path) => - List(s"-P:semanticdb:sourceroot:${workDir.toString}", s"-P:semanticdb:targetroot:${adjustStringPath(path)}") + private def adjustCompilerOptions(workDir: Path, options: List[String]) = options.map { option => + val i = option.lastIndexOf(' ') + val withPathReplaced = if (i == -1) { + option + } else { + val template = option.slice(0, i) + val path = option.slice(i + 1, option.length) - case scala3SemanticDbTargetRootRegex(path) => - List(s"-semanticdb-target:${adjustStringPath(path)}", s"-sourceroot:${workDir.toString}") - - case option => List(option) + template.replace( + "${path}": @nowarn("cat=lint-missing-interpolator"), + SandboxUtil.getSandboxPath(workDir, Paths.get(path)).toString, + ) } + + withPathReplaced + .replace("${workDir}": @nowarn("cat=lint-missing-interpolator"), workDir.toString) } /** @@ -95,6 +108,11 @@ object CommonArguments { .help("Compiler option") .action(ArgumentsImpl.append) .metavar("option") + parser + .addArgument("--compiler_option_referencing_path") + .help("Compiler option referencing the paths to build artifact(s)") + .action(ArgumentsImpl.append) + .metavar("option") parser .addArgument("--classpath") .help("Compilation classpath") @@ -195,9 +213,14 @@ object CommonArguments { analyses = analyses, compilerBridge = SandboxUtil.getSandboxPath(workDir, namespace.get[Path]("compiler_bridge")), compilerClasspath = SandboxUtil.getSandboxPaths(workDir, namespace.getList[Path]("compiler_classpath")), - compilerOptions = adjustCompilerOptions( + compilerOptions = Option(namespace.getList[String]("compiler_option")) + .map(_.asScala.toList) + .getOrElse(List.empty), + compilerOptionsReferencingPaths = adjustCompilerOptions( workDir, - Option(namespace.getList[String]("compiler_option")).map(_.asScala.toList).getOrElse(List.empty), + Option(namespace.getList[String]("compiler_option_referencing_path")) + .map(_.asScala.toList) + .getOrElse(List.empty), ), classpath = SandboxUtil.getSandboxPaths(workDir, namespace.getList[Path]("classpath")), debug = namespace.getBoolean("debug"), diff --git a/src/main/scala/higherkindness/rules_scala/workers/zinc/compile/ZincRunner.scala b/src/main/scala/higherkindness/rules_scala/workers/zinc/compile/ZincRunner.scala index c0313bf7..98fac434 100644 --- a/src/main/scala/higherkindness/rules_scala/workers/zinc/compile/ZincRunner.scala +++ b/src/main/scala/higherkindness/rules_scala/workers/zinc/compile/ZincRunner.scala @@ -214,10 +214,11 @@ object ZincRunner extends WorkerMain[ZincRunnerWorkerConfig] { .withClassesDirectory(classesOutputDir) .withJavacOptions(workRequest.javaCompilerOptions.toArray) .withScalacOptions( - Array.concat( - workRequest.plugins.map(p => s"-Xplugin:$p").toArray, - workRequest.compilerOptions.toArray, - ), + ( + workRequest.plugins.map(p => s"-Xplugin:$p") ++ + workRequest.compilerOptions ++ + workRequest.compilerOptionsReferencingPaths + ).toArray, ) val compilers = {