From 443c3d560d46fc863fd7b8222749126848c5f8e4 Mon Sep 17 00:00:00 2001 From: Navneet Verma Date: Mon, 16 Dec 2024 12:07:15 -0800 Subject: [PATCH] Removing micro-benchmarks module due to JMH using GPL (#2337) Signed-off-by: Navneet Verma --- .github/workflows/CI.yml | 2 - micro-benchmarks/README.md | 97 ---------------- micro-benchmarks/build.gradle | 68 ----------- .../knn/QueryParsingBenchmarks.java | 109 ------------------ .../knn/TransferVectorsBenchmarks.java | 97 ---------------- .../src/main/resources/log4j2.properties | 19 --- settings.gradle | 1 - 7 files changed, 393 deletions(-) delete mode 100644 micro-benchmarks/README.md delete mode 100644 micro-benchmarks/build.gradle delete mode 100644 micro-benchmarks/src/main/java/org/opensearch/knn/QueryParsingBenchmarks.java delete mode 100644 micro-benchmarks/src/main/java/org/opensearch/knn/TransferVectorsBenchmarks.java delete mode 100644 micro-benchmarks/src/main/resources/log4j2.properties diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index 2cd49de82..7969a9d59 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -14,7 +14,6 @@ on: - 'buildSrc/**' - 'gradle/**' - 'jni/**' - - 'micro-benchmarks/**' - '.github/workflows/CI.yml' pull_request: branches: @@ -28,7 +27,6 @@ on: - 'buildSrc/**' - 'gradle/**' - 'jni/**' - - 'micro-benchmarks/**' - '.github/workflows/CI.yml' jobs: diff --git a/micro-benchmarks/README.md b/micro-benchmarks/README.md deleted file mode 100644 index 0a676004b..000000000 --- a/micro-benchmarks/README.md +++ /dev/null @@ -1,97 +0,0 @@ -# OpenSearch K-NN Microbenchmark Suite - -This directory contains the microbenchmark suite of Opensearch K-NN Plugin. It relies on [JMH](http://openjdk.java.net/projects/code-tools/jmh/). - -This module draws a lot of inspiration from [Opensearch benchmarks](https://github.com/opensearch-project/OpenSearch/tree/main/benchmarks). - -## Purpose - -Micro benchmarks are intended to spot performance regressions in performance-critical components. - -The microbenchmark suite is also handy for ad-hoc micro benchmarks but please remove them again before merging your PR. - -## Getting Started - -Just run `gradlew -p micro-benchmarks run` from the project root -directory. It will build all microbenchmarks, execute them and print -the result. - -## Running Microbenchmarks - -Running via an IDE is not supported as the results are meaningless -because we have no control over the JVM running the benchmarks. - -If you want to run a specific benchmark class like, say, -`TransferVectorsBenchmarks`, you can use `--args`: - -``` -gradlew -p micro-benchmarks run --args ' TransferVectorsBenchmarks' -``` - -Setting Heap while running the benchmarks -``` -./gradlew -p micro-benchmarks run --args ' -gc true ' -Djvm.heap.size=4g -``` - -Everything in the `'` gets sent on the command line to JMH. The leading ` ` -inside the `'`s is important. Without it parameters are sometimes sent to -gradle. - -## Adding Microbenchmarks - -Before adding a new microbenchmark, make yourself familiar with the JMH API. You can check our existing microbenchmarks and also the -[JMH samples](http://hg.openjdk.java.net/code-tools/jmh/file/tip/jmh-samples/src/main/java/org/openjdk/jmh/samples/). - -In contrast to tests, the actual name of the benchmark class is not relevant to JMH. However, stick to the naming convention and -end the class name of a benchmark with `Benchmark`. To have JMH execute a benchmark, annotate the respective methods with `@Benchmark`. - -## Tips and Best Practices - -To get realistic results, you should exercise care when running benchmarks. Here are a few tips: - -### Do - -* Ensure that the system executing your microbenchmarks has as little load as possible. Shutdown every process that can cause unnecessary - runtime jitter. Watch the `Error` column in the benchmark results to see the run-to-run variance. -* Ensure to run enough warmup iterations to get the benchmark into a stable state. If you are unsure, don't change the defaults. -* Avoid CPU migrations by pinning your benchmarks to specific CPU cores. On Linux you can use `taskset`. -* Fix the CPU frequency to avoid Turbo Boost from kicking in and skewing your results. On Linux you can use `cpufreq-set` and the - `performance` CPU governor. -* Vary the problem input size with `@Param`. -* Use the integrated profilers in JMH to dig deeper if benchmark results to not match your hypotheses: - * Add `-prof gc` to the options to check whether the garbage collector runs during a microbenchmarks and skews - your results. If so, try to force a GC between runs (`-gc true`) but watch out for the caveats. - * Add `-prof perf` or `-prof perfasm` (both only available on Linux) to see hotspots. -* Have your benchmarks peer-reviewed. - -### Don't - -* Blindly believe the numbers that your microbenchmark produces but verify them by measuring e.g. with `-prof perfasm`. -* Run more threads than your number of CPU cores (in case you run multi-threaded microbenchmark). -* Look only at the `Score` column and ignore `Error`. Instead, take countermeasures to keep `Error` low / variance explainable. - -## Disassembling - -Disassembling is fun! Maybe not always useful, but always fun! Generally, you'll want to install `perf` and FCML's `hsdis`. -`perf` is generally available via `apg-get install perf` or `pacman -S perf`. FCML is a little more involved. This worked -on 2020-08-01: - -``` -wget https://github.com/swojtasiak/fcml-lib/releases/download/v1.2.2/fcml-1.2.2.tar.gz -tar xf fcml* -cd fcml* -./configure -make -cd example/hsdis -make -sudo cp .libs/libhsdis.so.0.0.0 /usr/lib/jvm/java-14-adoptopenjdk/lib/hsdis-amd64.so -``` - -If you want to disassemble a single method do something like this: - -``` -gradlew -p micro-benchmarks run --args ' MemoryStatsBenchmark -jvmArgs "-XX:+UnlockDiagnosticVMOptions -XX:CompileCommand=print,*.yourMethodName -XX:PrintAssemblyOptions=intel" -``` - - -If you want `perf` to find the hot methods for you then do add `-prof:perfasm`. diff --git a/micro-benchmarks/build.gradle b/micro-benchmarks/build.gradle deleted file mode 100644 index b1da431fa..000000000 --- a/micro-benchmarks/build.gradle +++ /dev/null @@ -1,68 +0,0 @@ -/* - * Copyright OpenSearch Contributors - * SPDX-License-Identifier: Apache-2.0 - */ -import org.opensearch.gradle.info.BuildParams - -apply plugin: 'opensearch.build' -apply plugin: 'application' -apply plugin: 'java' -apply plugin: 'io.freefair.lombok' - -assemble.enabled = false - -application { - mainClass = 'org.openjdk.jmh.Main' -} - -test.enabled = false - -repositories { - mavenLocal() - maven { url "https://aws.oss.sonatype.org/content/repositories/snapshots" } - mavenCentral() - maven { url "https://plugins.gradle.org/m2/" } -} - -dependencies { - // This will take root project as the dependency - api(project(':')) - api "org.openjdk.jmh:jmh-core:$versions.jmh" - annotationProcessor "org.openjdk.jmh:jmh-generator-annprocess:$versions.jmh" - // Dependencies of JMH - runtimeOnly 'net.sf.jopt-simple:jopt-simple:5.0.4' - runtimeOnly 'org.apache.commons:commons-math3:3.6.1' -} - -// enable the JMH's BenchmarkProcessor to generate the final benchmark classes -// needs to be added separately otherwise Gradle will quote it and javac will fail -compileJava.options.compilerArgs.addAll(["-processor", "org.openjdk.jmh.generators.BenchmarkProcessor"]) - - -run { - // This is required for C++ code - systemProperty "java.library.path", "$rootDir/jni/release" - executable = "${BuildParams.runtimeJavaHome}/bin/java" - var jvmHeapSize = System.getProperty("jvm.heap.size", "6g") - jvmArgs("-Xms" + jvmHeapSize, "-Xmx" + jvmHeapSize) -} - - -// No licenses for our benchmark deps (we don't ship benchmarks) -tasks.named("dependencyLicenses").configure { it.enabled = false } -dependenciesInfo.enabled = false - -thirdPartyAudit.ignoreViolations( - // these classes intentionally use JDK internal API (and this is ok since the project is maintained by Oracle employees) - 'org.openjdk.jmh.util.Utils' -) - -spotless { - java { - // IDEs can sometimes run annotation processors that leave files in - // here, causing Spotless to complain. Even though this path ought not - // to exist, exclude it anyway in order to avoid spurious failures. - targetExclude 'src/main/generated/**/*.java' - } -} - diff --git a/micro-benchmarks/src/main/java/org/opensearch/knn/QueryParsingBenchmarks.java b/micro-benchmarks/src/main/java/org/opensearch/knn/QueryParsingBenchmarks.java deleted file mode 100644 index 1c5a3b875..000000000 --- a/micro-benchmarks/src/main/java/org/opensearch/knn/QueryParsingBenchmarks.java +++ /dev/null @@ -1,109 +0,0 @@ -/* - * Copyright OpenSearch Contributors - * SPDX-License-Identifier: Apache-2.0 - */ - -package org.opensearch.knn; - -import org.openjdk.jmh.annotations.Benchmark; -import org.openjdk.jmh.annotations.Fork; -import org.openjdk.jmh.annotations.Measurement; -import org.openjdk.jmh.annotations.Param; -import org.openjdk.jmh.annotations.Scope; -import org.openjdk.jmh.annotations.Setup; -import org.openjdk.jmh.annotations.State; -import org.openjdk.jmh.annotations.Warmup; -import org.openjdk.jmh.infra.Blackhole; -import org.opensearch.cluster.ClusterModule; -import org.opensearch.common.xcontent.LoggingDeprecationHandler; -import org.opensearch.common.xcontent.XContentFactory; -import org.opensearch.common.xcontent.json.JsonXContent; -import org.opensearch.core.common.bytes.BytesArray; -import org.opensearch.core.common.bytes.BytesReference; -import org.opensearch.core.xcontent.NamedXContentRegistry; -import org.opensearch.core.xcontent.XContentBuilder; -import org.opensearch.core.xcontent.XContentParser; -import org.opensearch.index.query.QueryBuilder; -import org.opensearch.index.query.QueryBuilders; -import org.opensearch.index.query.TermQueryBuilder; -import org.opensearch.knn.index.query.KNNQueryBuilder; -import org.opensearch.knn.index.query.parser.KNNQueryBuilderParser; -import org.opensearch.plugins.SearchPlugin; - -import java.io.IOException; -import java.util.Arrays; -import java.util.List; - -/** - * Benchmarks for impact of changes around query parsing - */ -@Warmup(iterations = 5, time = 10) -@Measurement(iterations = 3, time = 10) -@Fork(3) -@State(Scope.Benchmark) -public class QueryParsingBenchmarks { - private static final TermQueryBuilder TERM_QUERY = QueryBuilders.termQuery("field", "value"); - private static final NamedXContentRegistry NAMED_X_CONTENT_REGISTRY = xContentRegistry(); - - @Param({ "128", "1024" }) - private int dimension; - @Param({ "basic", "filter" }) - private String type; - - private BytesReference bytesReference; - - @Setup - public void setup() throws IOException { - XContentBuilder builder = XContentFactory.jsonBuilder(); - builder.startObject(); - builder.startObject("test"); - builder.field(KNNQueryBuilder.VECTOR_FIELD.getPreferredName(), generateVectorWithOnes(dimension)); - builder.field(KNNQueryBuilder.K_FIELD.getPreferredName(), 1); - if (type.equals("filter")) { - builder.field(KNNQueryBuilder.FILTER_FIELD.getPreferredName(), TERM_QUERY); - } - builder.endObject(); - builder.endObject(); - bytesReference = BytesReference.bytes(builder); - } - - @Benchmark - public void fromXContent(final Blackhole bh) throws IOException { - XContentParser xContentParser = createParser(); - bh.consume(KNNQueryBuilderParser.fromXContent(xContentParser)); - } - - private XContentParser createParser() throws IOException { - XContentParser contentParser = createParser(bytesReference); - contentParser.nextToken(); - return contentParser; - } - - private float[] generateVectorWithOnes(final int dimensions) { - float[] vector = new float[dimensions]; - Arrays.fill(vector, (float) 1); - return vector; - } - - private XContentParser createParser(final BytesReference data) throws IOException { - BytesArray array = (BytesArray) data; - return JsonXContent.jsonXContent.createParser( - NAMED_X_CONTENT_REGISTRY, - LoggingDeprecationHandler.INSTANCE, - array.array(), - array.offset(), - array.length() - ); - } - - private static NamedXContentRegistry xContentRegistry() { - List list = ClusterModule.getNamedXWriteables(); - SearchPlugin.QuerySpec spec = new SearchPlugin.QuerySpec<>( - TermQueryBuilder.NAME, - TermQueryBuilder::new, - TermQueryBuilder::fromXContent - ); - list.add(new NamedXContentRegistry.Entry(QueryBuilder.class, spec.getName(), (p, c) -> spec.getParser().fromXContent(p))); - return new NamedXContentRegistry(list); - } -} diff --git a/micro-benchmarks/src/main/java/org/opensearch/knn/TransferVectorsBenchmarks.java b/micro-benchmarks/src/main/java/org/opensearch/knn/TransferVectorsBenchmarks.java deleted file mode 100644 index 2bce54ee6..000000000 --- a/micro-benchmarks/src/main/java/org/opensearch/knn/TransferVectorsBenchmarks.java +++ /dev/null @@ -1,97 +0,0 @@ -/* - * SPDX-License-Identifier: Apache-2.0 - * - * The OpenSearch Contributors require contributions made to - * this file be licensed under the Apache-2.0 license or a - * compatible open source license. - * - * Modifications Copyright OpenSearch Contributors. See - * GitHub history for details. - */ - -package org.opensearch.knn; - -import org.openjdk.jmh.annotations.Benchmark; -import org.openjdk.jmh.annotations.BenchmarkMode; -import org.openjdk.jmh.annotations.Fork; -import org.openjdk.jmh.annotations.Level; -import org.openjdk.jmh.annotations.Measurement; -import org.openjdk.jmh.annotations.Mode; -import org.openjdk.jmh.annotations.OutputTimeUnit; -import org.openjdk.jmh.annotations.Param; -import org.openjdk.jmh.annotations.Scope; -import org.openjdk.jmh.annotations.Setup; -import org.openjdk.jmh.annotations.State; -import org.openjdk.jmh.annotations.Warmup; -import org.opensearch.knn.jni.JNICommons; - -import java.util.ArrayList; -import java.util.List; -import java.util.Random; -import java.util.concurrent.TimeUnit; - -/** - * The class provides runs some benchmarks and provide the performance data around how much time it will take to - * transfer vectors from java to jni layer for different configuration. - */ -@Warmup(iterations = 1, timeUnit = TimeUnit.SECONDS, time = 300) -@Measurement(iterations = 1, timeUnit = TimeUnit.SECONDS, time = 300) -@Fork(3) -@BenchmarkMode(Mode.SingleShotTime) -@OutputTimeUnit(TimeUnit.SECONDS) -@State(Scope.Benchmark) -public class TransferVectorsBenchmarks { - private static final Random random = new Random(1212121212); - private static final long TOTAL_NUMBER_OF_VECTOR_TO_BE_TRANSFERRED = 1000000; - - @Param({ "128", "256", "384", "512", "960", "1024", "1536" }) - private int dimension; - - @Param({ "100000", "500000", "1000000" }) - private int vectorsPerTransfer; - - private List vectorList; - - @Setup(Level.Trial) - public void setup() { - vectorList = new ArrayList<>(); - for (int i = 0; i < TOTAL_NUMBER_OF_VECTOR_TO_BE_TRANSFERRED; i++) { - vectorList.add(generateRandomVector(dimension)); - } - } - - @Benchmark - public void transferVectors_withCapacity() { - long vectorsAddress = 0; - List vectorToTransfer = new ArrayList<>(); - long startingIndex = 0; - for (float[] floats : vectorList) { - if (vectorToTransfer.size() == vectorsPerTransfer) { - vectorsAddress = JNICommons.storeVectorData( - vectorsAddress, - vectorToTransfer.toArray(new float[][] {}), - dimension * TOTAL_NUMBER_OF_VECTOR_TO_BE_TRANSFERRED - ); - startingIndex += vectorsPerTransfer; - vectorToTransfer = new ArrayList<>(); - } - vectorToTransfer.add(floats); - } - if (!vectorToTransfer.isEmpty()) { - vectorsAddress = JNICommons.storeVectorData( - vectorsAddress, - vectorToTransfer.toArray(new float[][] {}), - dimension * TOTAL_NUMBER_OF_VECTOR_TO_BE_TRANSFERRED - ); - } - JNICommons.freeVectorData(vectorsAddress); - } - - private float[] generateRandomVector(int dimensions) { - float[] vector = new float[dimensions]; - for (int i = 0; i < dimensions; i++) { - vector[i] = -500 + (float) random.nextGaussian() * (1000); - } - return vector; - } -} diff --git a/micro-benchmarks/src/main/resources/log4j2.properties b/micro-benchmarks/src/main/resources/log4j2.properties deleted file mode 100644 index 2cd74124e..000000000 --- a/micro-benchmarks/src/main/resources/log4j2.properties +++ /dev/null @@ -1,19 +0,0 @@ -# -# SPDX-License-Identifier: Apache-2.0 -# -# The OpenSearch Contributors require contributions made to -# this file be licensed under the Apache-2.0 license or a -# compatible open source license. -# -# Modifications Copyright OpenSearch Contributors. See -# GitHub history for details. -# - -appender.console.type = Console -appender.console.name = console -appender.console.layout.type = PatternLayout -appender.console.layout.pattern = [%d{ISO8601}][%-5p][%-25c] %marker %m%n - -# Do not log at all if it is not really critical - we're in a benchmark -rootLogger.level = error -rootLogger.appenderRef.console.ref = console diff --git a/settings.gradle b/settings.gradle index fd4369d4a..9056e382e 100644 --- a/settings.gradle +++ b/settings.gradle @@ -8,5 +8,4 @@ rootProject.name = 'opensearch-knn' include ":qa" include ":qa:rolling-upgrade" include ":qa:restart-upgrade" -include ":micro-benchmarks"