Split benchmarks into separate build jobs & increase fork count. (#75)

To balance the recent reduction in threads used in benchmarking, the fork count needs to be increased. This will increase the time it takes each benchmark to complete. GitHub currently has a 6-hour limit per-job, which would be breached if the fork count was increased. Therefore, this change splits the benchmark job in two. One job per benchmark.
creek-service · Nov 16, 2023 · 92d1e2f · 92d1e2f
1 parent 32a5235
commit 92d1e2f
Show file tree

Hide file tree

Showing 6 changed files with 153 additions and 88 deletions.
diff --git a/.github/workflows/gh-pages.yml b/.github/workflows/gh-pages.yml
@@ -69,7 +69,7 @@ jobs:
           name: functional
           path: docs/_includes/*
 
-  run_performance:
+  run_validate_benchmark:
     runs-on: ubuntu-latest
     steps:
       - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
@@ -84,24 +84,51 @@ jobs:
           gradle-home-cache-cleanup: true
       - if: github.event_name == 'pull_request'
         name: Run performance smoke benchmarks
-        run: ./gradlew --quiet runBenchmarkSmokeTest
+        run: ./gradlew --quiet runValidateBenchmarkSmokeTest
       - if: github.event_name != 'pull_request'
         name: Run performance benchmarks
-        run: ./gradlew --quiet runBenchmarks
+        run: ./gradlew --quiet runValidateBenchmark
       - name: Add results to step summary
         run: |
           echo "# Json Validator Benchmark Results" >> $GITHUB_STEP_SUMMARY
           cat docs/_includes/JsonValidateBenchmark.md >> $GITHUB_STEP_SUMMARY
+      - name: Upload Implementations
+        uses: actions/upload-artifact@v3
+        with:
+          name: validateBenchmark
+          path: docs/_includes/*
+
+  run_serde_benchmark:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
+      - name: Set up JDK
+        uses: actions/setup-java@0ab4596768b603586c0de567f2430c30f5b0d2b0 # v3.13.0
+        with:
+          java-version: '17'
+          distribution: 'adopt'
+      - name: Setup Gradle
+        uses: gradle/gradle-build-action@842c587ad8aa4c68eeba24c396e15af4c2e9f30a # v2.9.0
+        with:
+          gradle-home-cache-cleanup: true
+      - if: github.event_name == 'pull_request'
+        name: Run performance smoke benchmarks
+        run: ./gradlew --quiet runSerdeBenchmarkSmokeTest
+      - if: github.event_name != 'pull_request'
+        name: Run performance benchmarks
+        run: ./gradlew --quiet runSerdeBenchmark
+      - name: Add results to step summary
+        run: |
           echo "# Json Serde Benchmark Results" >> $GITHUB_STEP_SUMMARY
           cat docs/_includes/JsonSerdeBenchmark.md >> $GITHUB_STEP_SUMMARY
       - name: Upload Implementations
         uses: actions/upload-artifact@v3
         with:
-          name: performance
+          name: serdeBenchmark
           path: docs/_includes/*
 
   build_pages:
-    needs: [get_impls, run_functional, run_performance]
+    needs: [get_impls, run_functional, run_validate_benchmark, run_serde_benchmark]
     runs-on: ubuntu-latest
     env:
       BUNDLE_GEMFILE: ${{ github.workspace }}/docs/Gemfile
@@ -129,10 +156,15 @@ jobs:
         with:
           name: functional
           path: docs/_includes
-      - name: Download performance results
+      - name: Download validate benchmark results
+        uses: actions/download-artifact@v3
+        with:
+          name: validateBenchmark
+          path: docs/_includes
+      - name: Download serde benchmark results
         uses: actions/download-artifact@v3
         with:
-          name: performance
+          name: serdeBenchmark
           path: docs/_includes
       - name: Build with Jekyll
         run: (cd docs && bundle exec jekyll build --baseurl "${{ steps.pages.outputs.base_path }}")

diff --git a/build.gradle.kts b/build.gradle.kts
@@ -120,34 +120,69 @@ val pullTask = tasks.register("pull-json-schema-test-suite") {
 }
 
 val runFunctionalTests = tasks.register<JavaExec>("runFunctionalTests") {
+    dependsOn(pullTask)
     classpath = sourceSets.main.get().runtimeClasspath
     mainClass.set("org.creekservice.kafka.test.perf.FunctionalMain")
     args = listOf(jsonSchemaTestSuiteDir.get().asFile.absolutePath)
+}
+
+tasks.register<JavaExec>("runValidateBenchmark") {
     dependsOn(pullTask)
+    classpath = sourceSets.main.get().runtimeClasspath
+    configureBenchmarkTask("JsonValidateBenchmark", false)
 }
 
-tasks.register<JavaExec>("runBenchmarks") {
+tasks.register<JavaExec>("runSerdeBenchmark") {
     classpath = sourceSets.main.get().runtimeClasspath
-    mainClass.set("org.creekservice.kafka.test.perf.PerformanceMain")
+    configureBenchmarkTask("JsonSerdeBenchmark", false)
+}
+
+tasks.register("runBenchmarks") {
+    dependsOn("runValidateBenchmark", "runSerdeBenchmark")
+}
+
+val runValidateBenchmarkSmokeTest = tasks.register<JavaExec>("runValidateBenchmarkSmokeTest") {
     dependsOn(pullTask)
+    classpath = sourceSets.main.get().runtimeClasspath
+    configureBenchmarkTask("JsonValidateBenchmark", true)
 }
 
-val runBenchmarkSmokeTest = tasks.register<JavaExec>("runBenchmarkSmokeTest") {
+val runSerdeBenchmarkSmokeTest = tasks.register<JavaExec>("runSerdeBenchmarkSmokeTest") {
     classpath = sourceSets.main.get().runtimeClasspath
+    configureBenchmarkTask("JsonSerdeBenchmark", true)
+}
+
+val runBenchmarkSmokeTest = tasks.register("runBenchmarkSmokeTest") {
+    dependsOn(runValidateBenchmarkSmokeTest, runSerdeBenchmarkSmokeTest)
+}
+
+fun JavaExec.configureBenchmarkTask(benchmarkClass: String, smokeTest: Boolean) {
     mainClass.set("org.creekservice.kafka.test.perf.PerformanceMain")
-    args(listOf(
-        // No warmup:
-        "-wi", "0",
-        // Single test iteration:
-        "-i", "1",
-        // On a single thread:
-        "-t", "1",
-        // Running for 1 second
-        "-r", "1s",
-        // With forking disabled, i.e. in-process
-        "-f", "0"
-    ))
-    dependsOn(pullTask)
+
+    outputs.file(file("docs/_includes/$benchmarkClass.json"))
+    outputs.file(file("docs/_includes/$benchmarkClass.md"))
+
+    args(
+        listOf(
+            // Benchmark to run:
+            benchmarkClass
+        )
+    )
+
+    if (smokeTest) {
+        args(listOf(
+            // No warmup:
+            "-wi", "0",
+            // Single test iteration:
+            "-i", "1",
+            // On a single thread:
+            "-t", "1",
+            // Running for 1 second
+            "-r", "1s",
+            // With forking disabled, i.e. in-process
+            "-f", "0"
+        ))
+    }
 }
 
 val extractImplementations = tasks.register<JavaExec>("extractImplementations") {

diff --git a/docs/_docs/3. performance.md b/docs/_docs/3. performance.md
@@ -58,33 +58,7 @@ The graphs below exclude the `Snow` implementation, as it is orders of magnitude
 (The `Snow` implementation describes itself as a _reference_ implementation).
 {: .notice--warning}
 
-<div>
-  <canvas id="validateChart_Draft_04"></canvas>
-</div>
-
-&nbsp;
-
-<div>
-  <canvas id="validateChart_Draft_06"></canvas>
-</div>
-
-&nbsp;
-
-<div>
-  <canvas id="validateChart_Draft_07"></canvas>
-</div>
-
-&nbsp;
-
-<div>
-  <canvas id="validateChart_Draft_2019_09"></canvas>
-</div>
-
-&nbsp;
-
-<div>
-  <canvas id="validateChart_Draft_2020_12"></canvas>
-</div>
+<div id="ValidateCharts"></div>
 
 ### Serde benchmark
 
@@ -115,15 +89,7 @@ Newer schema versions are more feature rich, and this can come at a cost.
 Comparison of different implementations across specification versions may be misleading. 
 {: .notice--warning}
 
-<div>
-  <canvas id="serdeChart_Draft_07"></canvas>
-</div>
-
-&nbsp;
-
-<div>
-  <canvas id="serdeChart_Draft_2020_12"></canvas>
-</div>
+<div id="SerdeCharts"></div>
 
 [//]: # (Chart scripts: https://www.chartjs.org/docs/latest/)
 <script src="https://cdn.jsdelivr.net/npm/chart.js"></script>
@@ -132,24 +98,28 @@ Comparison of different implementations across specification versions may be mis
 <link href="https://cdn.jsdelivr.net/npm/[email protected]/dist/style.css" rel="stylesheet" type="text/css">
 <script src="https://cdn.jsdelivr.net/npm/[email protected]" type="text/javascript"></script>
 
-[//]: # (IMPLEMENTATIONS_JSON, PERFORMANCE_JSON)
 <script>
     const implData = {% include implementations.json %};
 
-    const performanceData = {% include benchmark_results.json %};
+    const validateResults = {% include JsonValidateBenchmark.json %};
+    const serdeResults = {% include JsonSerdeBenchmark.json %};
 
-    function buildCharts(benchmarkName, title, elementIdPrefix, drafts, includeSnow){
-      let results = performanceData
-        .filter(r => r.benchmark.includes(benchmarkName))
-        .filter(r => includeSnow || !r.benchmark.includes('_Snow'));
+    function buildCharts(resultData, benchmarkType, drafts){
+      const chartContainer = document.getElementById(benchmarkType + 'Charts');
 
       drafts.forEach(function(draft) {
-        let draftData = results.filter(r => r.benchmark.includes(draft)).sort(function(a, b) {
+        const title = document.createElement('h4');
+        const canvas = document.createElement('canvas');
+        title.textContent = draft + ' Results';
+        chartContainer.append(title);
+        chartContainer.append(canvas);
+
+        let draftData = resultData.filter(r => r.benchmark.includes(draft)).sort(function(a, b) {
           return a.primaryMetric.score - b.primaryMetric.score;
         });
 
         let implNames = draftData.map(r => r.benchmark.substring(r.benchmark.lastIndexOf('_') + 1));
-        new Chart(document.getElementById(elementIdPrefix + draft), 
+        new Chart(canvas, 
           {
             type: 'bar',
             data: {
@@ -165,7 +135,7 @@ Comparison of different implementations across specification versions may be mis
               plugins: {
                   title: {
                       display: true,
-                      text: draft + ' ' + title + ' (lower is better)'
+                      text: draft + ' ' + benchmarkType + ' Performance (lower is better)'
                   },
                   legend: {
                       display: false
@@ -183,10 +153,10 @@ Comparison of different implementations across specification versions may be mis
             },
           });
       });
-  } 
+  }
 
-  buildCharts('JsonValidateBenchmark', 'Validator Performance', 'validateChart_', ["Draft_04", "Draft_06", "Draft_07", "Draft_2019_09", "Draft_2020_12"], false);
-  buildCharts('JsonSerdeBenchmark', 'Serde Performance', 'serdeChart_', ["Draft_07", "Draft_2020_12"], true);
+  buildCharts(validateResults, 'Validate', ["Draft_04", "Draft_06", "Draft_07", "Draft_2019_09", "Draft_2020_12"]);
+  buildCharts(serdeResults.filter(r => !r.benchmark.includes('_Snow')), 'Serde', ["Draft_07", "Draft_2020_12"]);
 </script>
 
 

diff --git a/src/main/java/org/creekservice/kafka/test/perf/PerformanceMain.java b/src/main/java/org/creekservice/kafka/test/perf/PerformanceMain.java
@@ -21,49 +21,77 @@
 import java.io.IOException;
 import java.nio.file.Files;
 import java.nio.file.Path;
+import java.util.List;
 import org.creekservice.kafka.test.perf.performance.util.JsonToMarkdownConvertor;
 import org.creekservice.kafka.test.perf.performance.util.PerformanceDataValidator;
+import org.openjdk.jmh.runner.options.CommandLineOptionException;
+import org.openjdk.jmh.runner.options.CommandLineOptions;
 
 /** Entry point for running the performance benchmarks. */
 public final class PerformanceMain {
 
-    private static final Path JSON_RESULTS = INCLUDES_ROOT.resolve("benchmark_results.json");
-
     private PerformanceMain() {}
 
     public static void main(final String[] suppliedArgs) throws Exception {
-        runBenchmarks(suppliedArgs);
-        validateJsonOutput();
-        writeMarkdownOutput();
+        final String benchmark = extractBenchmark(suppliedArgs);
+        final Path jsonResultFile = INCLUDES_ROOT.resolve(benchmark + ".json");
+
+        ensureOutputDirectory();
+
+        runBenchmarks(suppliedArgs, jsonResultFile);
+
+        validateJsonOutput(jsonResultFile);
+        writeMarkdownOutput(jsonResultFile);
+    }
+
+    private static String extractBenchmark(final String[] args) {
+        try {
+            final CommandLineOptions cmdOptions = new CommandLineOptions(args);
+            final List<String> benchmarks = cmdOptions.getIncludes();
+            if (benchmarks.size() != 1) {
+                throw new CommandLineOptionException(
+                        "A single benchmark to run must be supplied. Got: " + benchmarks);
+            }
+
+            return benchmarks.get(0);
+        } catch (CommandLineOptionException e) {
+            System.err.println("Error parsing command line:");
+            System.err.println(" " + e.getMessage());
+            System.exit(1);
+            return null;
+        }
+    }
+
+    private static void ensureOutputDirectory() throws IOException {
+        Files.createDirectories(INCLUDES_ROOT);
     }
 
-    private static void runBenchmarks(final String[] suppliedArgs) throws IOException {
+    private static void runBenchmarks(final String[] suppliedArgs, final Path jsonResultFile)
+            throws IOException {
         final String[] additionalArgs = {
             // Output results in csv format
             "-rf",
             "json",
             // To a named file
             "-rff",
-            JSON_RESULTS.toString(),
+            jsonResultFile.toString(),
             // Fail on Error
             "-foe",
             "true"
         };
 
         final String[] allArgs = new String[suppliedArgs.length + additionalArgs.length];
-        System.arraycopy(suppliedArgs, 0, allArgs, 0, suppliedArgs.length);
-        System.arraycopy(additionalArgs, 0, allArgs, suppliedArgs.length, additionalArgs.length);
-
-        Files.createDirectories(INCLUDES_ROOT);
+        System.arraycopy(additionalArgs, 0, allArgs, 0, additionalArgs.length);
+        System.arraycopy(suppliedArgs, 0, allArgs, additionalArgs.length, suppliedArgs.length);
 
         org.openjdk.jmh.Main.main(allArgs);
     }
 
-    private static void validateJsonOutput() {
-        new PerformanceDataValidator().validate(JSON_RESULTS);
+    private static void validateJsonOutput(final Path jsonResultFile) {
+        new PerformanceDataValidator().validate(jsonResultFile);
     }
 
-    private static void writeMarkdownOutput() {
-        new JsonToMarkdownConvertor().convert(JSON_RESULTS, INCLUDES_ROOT);
+    private static void writeMarkdownOutput(final Path jsonResultFile) {
+        new JsonToMarkdownConvertor().convert(jsonResultFile, INCLUDES_ROOT);
     }
 }
diff --git a/src/main/java/org/creekservice/kafka/test/perf/performance/JsonSerdeBenchmark.java b/src/main/java/org/creekservice/kafka/test/perf/performance/JsonSerdeBenchmark.java
@@ -62,7 +62,7 @@
 @BenchmarkMode(Mode.AverageTime)
 @OutputTimeUnit(MICROSECONDS)
 @Threads(1) // GitHub linux runners have two cores, so running more threads is pointless.
-@Fork(4) // Note: to debug, set fork to 0.
+@Fork(6) // Note: to debug, set fork to 0.
 // @Warmup(iterations = 0, time = 10)
 // @Measurement(iterations = 1, time = 10)
 @SuppressWarnings({"FieldMayBeFinal", "MethodName"}) // not final to avoid folding.

diff --git a/src/main/java/org/creekservice/kafka/test/perf/performance/JsonValidateBenchmark.java b/src/main/java/org/creekservice/kafka/test/perf/performance/JsonValidateBenchmark.java
@@ -55,7 +55,7 @@
 @BenchmarkMode(Mode.AverageTime)
 @OutputTimeUnit(MILLISECONDS)
 @Threads(1) // GitHub linux runners have two cores, so running more threads is pointless.
-@Fork(4) // Note: to debug, set fork to 0.
+@Fork(6) // Note: to debug, set fork to 0.
 // @Warmup(iterations = 0, time = 10)
 // @Measurement(iterations = 1, time = 10)
 @SuppressWarnings({"FieldMayBeFinal", "MethodName"}) // not final to avoid folding.