From 93a36c970d0b8cf42cb02d2b093e75abefff0d19 Mon Sep 17 00:00:00 2001 From: Kostiantyn Masliuk <1pkg@protonmail.com> Date: Fri, 4 Oct 2024 09:57:17 -0700 Subject: [PATCH] PGO: optimize collected profile file size (#14256) (cherry picked from commit 3833927f8b1a17f4f0480af5d4a8ed051910a0ca) # Conflicts: # .github/workflows/benchmarks.yml # systemtest/benchtest/profiles.go # testing/benchmark/Makefile --- .github/workflows/benchmarks.yml | 54 ++++++++++++++++++++++++++++++++ systemtest/benchtest/profiles.go | 40 +++++++++++++++++++---- testing/benchmark/Makefile | 4 +++ 3 files changed, 92 insertions(+), 6 deletions(-) diff --git a/.github/workflows/benchmarks.yml b/.github/workflows/benchmarks.yml index eda117cdc7a..25eddb0512f 100644 --- a/.github/workflows/benchmarks.yml +++ b/.github/workflows/benchmarks.yml @@ -156,6 +156,60 @@ jobs: path: ${{ env.WORKING_DIRECTORY }}/${{ env.BENCHMARK_RESULT }} if-no-files-found: error +<<<<<<< HEAD +======= + # The next section injects CPU profile collected by apmbench into the build. + # By copying the profile, uploading it to the artifacts and pushing it + # via a PR to update default.pgo. + + - name: Copy CPU profile + run: make cp-cpuprof + + - name: Upload CPU profile + uses: actions/upload-artifact@v4 + with: + name: cpu-profile + path: ${{ env.WORKING_DIRECTORY }}/${{ env.BENCHMARK_CPU_OUT }} + if-no-files-found: error + + - name: Get token + id: get_token + uses: tibdex/github-app-token@3beb63f4bd073e61482598c45c71c1019b59b73a # v2.1.0 + with: + app_id: ${{ secrets.OBS_AUTOMATION_APP_ID }} + private_key: ${{ secrets.OBS_AUTOMATION_APP_PEM }} + permissions: >- + { + "contents": "write", + "pull_requests": "write" + } + + # Required to use a service account, otherwise PRs created by + # GitHub bot won't trigger any CI builds. + # See https://github.com/peter-evans/create-pull-request/issues/48#issuecomment-537478081 + - name: Configure git user + uses: elastic/oblt-actions/git/setup@v1 + with: + github-token: ${{ steps.get_token.outputs.token }} + + - name: Import GPG key + uses: crazy-max/ghaction-import-gpg@01dd5d3ca463c7f10f7f4f7b4f177225ac661ee4 # v6.1.0 + with: + gpg_private_key: ${{ secrets.APM_SERVER_RELEASE_GPG_PRIVATE_KEY }} + passphrase: ${{ secrets.APM_SERVER_RELEASE_PASSPHRASE }} + git_user_signingkey: true + git_commit_gpgsign: true + + - name: Open PGO PR + if: ${{ env.RUN_STANDALONE == 'true' }} + run: ${{ github.workspace }}/.ci/scripts/push-pgo-pr.sh + env: + WORKSPACE_PATH: ${{ github.workspace }} + PROFILE_PATH: ${{ env.WORKING_DIRECTORY }}/${{ env.BENCHMARK_CPU_OUT }} + GITHUB_TOKEN: ${{ steps.get_token.outputs.token }} + WORKFLOW: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}/attempts/${{ github.run_attempt }} + +>>>>>>> 3833927f8 (PGO: optimize collected profile file size (#14256)) - name: Tear down benchmark environment if: always() run: make destroy diff --git a/systemtest/benchtest/profiles.go b/systemtest/benchtest/profiles.go index 9e2ee89b43a..7ef5046b2f4 100644 --- a/systemtest/benchtest/profiles.go +++ b/systemtest/benchtest/profiles.go @@ -18,9 +18,11 @@ package benchtest import ( + "compress/gzip" "context" "fmt" "io" + "math/rand/v2" "net/http" "os" "strconv" @@ -88,12 +90,37 @@ func (p *profiles) recordCPU() error { if benchConfig.CPUProfile == "" { return nil } +<<<<<<< HEAD duration := 2 * benchConfig.Benchtime profile, err := fetchProfile("/debug/pprof/profile", duration) if err != nil { return fmt.Errorf("failed to fetch CPU profile: %w", err) +======= + // Limit profiling time to random 5% of overall time. + // This should not seriously affect the profile quality, + // since we merge the final profile form multiple sources, + // but prevent profile size from swelling. + var done bool + const tickets = 20 + duration := benchConfig.Benchtime / tickets + for i := range tickets { + if done || (rand.N(tickets-i)+i+1) < tickets { + time.Sleep(duration) + continue + } + profile, err := fetchProfile("/debug/pprof/profile", duration) + if err != nil { + return fmt.Errorf("failed to fetch CPU profile: %w", err) + } + // We don't need the address in the profile, so discard it to reduce the size. + if err := profile.Aggregate(true, true, true, true, false); err != nil { + return fmt.Errorf("failed to fetch CPU profile: %w", err) + } + profile = profile.Compact() + p.cpu = append(p.cpu, profile) + done = true +>>>>>>> 3833927f8 (PGO: optimize collected profile file size (#14256)) } - p.cpu = append(p.cpu, profile) return nil } @@ -168,14 +195,15 @@ func (p *profiles) writeDeltas(filename string, deltas []*profile.Profile) error return err } defer f.Close() - return merged.Write(f) + w, err := gzip.NewWriterLevel(f, gzip.BestCompression) + if err != nil { + return err + } + defer w.Close() + return merged.WriteUncompressed(w) } func (p *profiles) mergeBenchmarkProfiles(profiles []*profile.Profile) (*profile.Profile, error) { - for i, profile := range profiles { - benchmarkName := p.benchmarkNames[i] - profile.SetLabel("benchmark", []string{benchmarkName}) - } merged, err := profile.Merge(profiles) if err != nil { return nil, fmt.Errorf("error merging profiles: %w", err) diff --git a/testing/benchmark/Makefile b/testing/benchmark/Makefile index cf894325b58..6f64ab9d51c 100644 --- a/testing/benchmark/Makefile +++ b/testing/benchmark/Makefile @@ -23,6 +23,10 @@ SSH_USER ?= ec2-user SSH_OPTS ?= -o LogLevel=ERROR -o StrictHostKeyChecking=no -o ServerAliveInterval=60 -o ServerAliveCountMax=10 SSH_KEY ?= ~/.ssh/id_rsa_terraform WORKER_IP = $(shell terraform output -raw public_ip) +<<<<<<< HEAD +======= +APM_SERVER_IP = $(shell terraform output -raw apm_server_ip) +>>>>>>> 3833927f8 (PGO: optimize collected profile file size (#14256)) SHELL = /bin/bash .SHELLFLAGS = -o pipefail -c