From 2a9a9a1d9402c9195b89666853e037f19fe009e2 Mon Sep 17 00:00:00 2001 From: Kostiantyn Masliuk <1pkg@protonmail.com> Date: Mon, 16 Dec 2024 12:03:49 -0800 Subject: [PATCH 1/7] changelog: update 8.15 missing changelog entry (#14909) --- changelogs/8.15.asciidoc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/changelogs/8.15.asciidoc b/changelogs/8.15.asciidoc index 7add8e1e62d..e805f6f7d32 100644 --- a/changelogs/8.15.asciidoc +++ b/changelogs/8.15.asciidoc @@ -101,9 +101,10 @@ https://github.com/elastic/apm-server/compare/v8.14.3\...v8.15.0[View commits] - Add `elasticsearch.flushed.uncompressed.bytes` metric {pull}13155[13155] - APM Server now relies on the Elasticsearch apm-data plugin's index templates, removing the requirement to install the APM integration package {pull}12066[12066] +- Switch the default lifecycle management to leverage DSL instead of ILM, the change is executed as a part of Elasticsearch apm-data plugin migration {pull}12066[12066] - Upgraded bundled APM Java agent attacher CLI to version 1.50.0 {pull}13326[13326] - Enable Kibana curated UIs to work with hostmetrics from OpenTelemetry's https://pkg.go.dev/go.opentelemetry.io/collector/receiver/hostmetricsreceiver[hostmetricsreceiver] {pull}13196[13196] - Add require data stream to bulk index requests {pull}13398[13398] - Support self-instrumentation when in managed mode by getting tracing configs via reloader {pull}13514[13514] {pull}13653[13653] {pull}13691[13691] {pull}13790[13790] - Add mapping for OpenTelemetry attribute `messaging.destination.name` to derive `service.target` correctly {pull}13472[13472] -- APM Server now automatically retries document-level 429s from Elasticsearch to avoid dropping data. `output.elasticsearch.max_retries` now controls both request-level and document-level retries, and defaults to `3`. {pull}13620[13620] +- APM Server now automatically retries document-level 429s from Elasticsearch to avoid dropping data. `output.elasticsearch.max_retries` now controls both request-level and document-level retries, and defaults to `3`. {pull}13620[13620] \ No newline at end of file From 5c59c96251b7be945155e51447047ca4a4bfedd9 Mon Sep 17 00:00:00 2001 From: Kostiantyn Masliuk <1pkg@protonmail.com> Date: Mon, 16 Dec 2024 15:57:42 -0800 Subject: [PATCH 2/7] changelog: add entry for 8.16.2 release notes (#14912) * changelog: add entry for 8.16.2 release notes * changelog: add 8.16.2 wolfi docker entry --- changelogs/8.16.asciidoc | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/changelogs/8.16.asciidoc b/changelogs/8.16.asciidoc index a2b982c1e0a..4d83ae3e19d 100644 --- a/changelogs/8.16.asciidoc +++ b/changelogs/8.16.asciidoc @@ -1,9 +1,36 @@ [[apm-release-notes-8.16]] == APM version 8.16 +* <> * <> * <> +[float] +[[apm-release-notes-8.16.2]] +=== APM version 8.16.2 + +https://github.com/elastic/apm-server/compare/v8.16.1\...v8.16.2[View commits] + +[float] +==== Bug fixes + +- Surface config parsing error under EA managed mode by logging and marking EA input unit as failed {pull}14574[14574] +- Remove unnecessary hot reload under EA managed mode when apm tracing config is nil {pull}14865[14865] + +[float] +==== Breaking Changes + +[float] +==== Deprecations + +[float] +==== Intake API Changes + +[float] +==== Added + +- In this release we've introduced an image based on the hardened https://wolfi.dev/[Wolfi] image to provide additional security to our self-managed customers, and improve our supply chain security posture. + [float] [[apm-release-notes-8.16.1]] === APM version 8.16.1 From 12b947bfaf36100eca17f321122a902013c10bed Mon Sep 17 00:00:00 2001 From: "elastic-observability-automation[bot]" <180520183+elastic-observability-automation[bot]@users.noreply.github.com> Date: Tue, 17 Dec 2024 12:38:41 +0000 Subject: [PATCH 3/7] chore: Update to elastic/beats@d508a408b0e9 (#14961) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Made with ❤️️ by updatecli Co-authored-by: elastic-observability-automation[bot] <180520183+elastic-observability-automation[bot]@users.noreply.github.com> Co-authored-by: mergify[bot] <37929162+mergify[bot]@users.noreply.github.com> --- NOTICE.txt | 4 ++-- go.mod | 2 +- go.sum | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/NOTICE.txt b/NOTICE.txt index 28e801bb7e0..ca7a1fe64aa 100644 --- a/NOTICE.txt +++ b/NOTICE.txt @@ -604,11 +604,11 @@ Contents of probable licence file $GOMODCACHE/github.com/elastic/apm-data@v1.14. -------------------------------------------------------------------------------- Dependency : github.com/elastic/beats/v7 -Version: v7.0.0-alpha2.0.20241213104605-93b018a83621 +Version: v7.0.0-alpha2.0.20241216091513-d508a408b0e9 Licence type (autodetected): Elastic -------------------------------------------------------------------------------- -Contents of probable licence file $GOMODCACHE/github.com/elastic/beats/v7@v7.0.0-alpha2.0.20241213104605-93b018a83621/LICENSE.txt: +Contents of probable licence file $GOMODCACHE/github.com/elastic/beats/v7@v7.0.0-alpha2.0.20241216091513-d508a408b0e9/LICENSE.txt: Source code in this repository is variously licensed under the Apache License Version 2.0, an Apache compatible license, or the Elastic License. Outside of diff --git a/go.mod b/go.mod index be71d3b219a..a8e988f58fa 100644 --- a/go.mod +++ b/go.mod @@ -9,7 +9,7 @@ require ( github.com/dustin/go-humanize v1.0.1 github.com/elastic/apm-aggregation v1.1.0 github.com/elastic/apm-data v1.14.4 - github.com/elastic/beats/v7 v7.0.0-alpha2.0.20241213104605-93b018a83621 + github.com/elastic/beats/v7 v7.0.0-alpha2.0.20241216091513-d508a408b0e9 github.com/elastic/elastic-agent-client/v7 v7.17.0 github.com/elastic/elastic-agent-libs v0.17.4 github.com/elastic/elastic-agent-system-metrics v0.11.5 diff --git a/go.sum b/go.sum index 3894ddeb262..b8f87b4cc3b 100644 --- a/go.sum +++ b/go.sum @@ -136,8 +136,8 @@ github.com/elastic/apm-aggregation v1.1.0 h1:eTHGd5w99JoRFJ763MJP6nUX4hkkeSil3KW github.com/elastic/apm-aggregation v1.1.0/go.mod h1:YBQ77Jt7vOy2/7w4q5SsEifhc0V02dJxRcyyeC6HsvQ= github.com/elastic/apm-data v1.14.4 h1:Q84GljGLbPEnUCcF7i/CWFx6Ey1Rx+ffvSUyEy/k0R4= github.com/elastic/apm-data v1.14.4/go.mod h1:MctdMZ5LBx07m0TjJTdh80nU2ef6ABcoyJWg1E14/kg= -github.com/elastic/beats/v7 v7.0.0-alpha2.0.20241213104605-93b018a83621 h1:1fcMJDnMDx25sAQM7+YC5DeS+ovVWRkjWAUgR04pjDE= -github.com/elastic/beats/v7 v7.0.0-alpha2.0.20241213104605-93b018a83621/go.mod h1:JZIuztvKeiuHUM8zvfQ24wz6nJPPOM1DVqu9FHu9a0Q= +github.com/elastic/beats/v7 v7.0.0-alpha2.0.20241216091513-d508a408b0e9 h1:A+n1lfJyAxMQwWHq1nOkOMHk3rMOxFqgtbijT1Uh0aM= +github.com/elastic/beats/v7 v7.0.0-alpha2.0.20241216091513-d508a408b0e9/go.mod h1:JZIuztvKeiuHUM8zvfQ24wz6nJPPOM1DVqu9FHu9a0Q= github.com/elastic/elastic-agent-autodiscover v0.9.0 h1:+iWIKh0u3e8I+CJa3FfWe9h0JojNasPgYIA47gpuuns= github.com/elastic/elastic-agent-autodiscover v0.9.0/go.mod h1:5iUxLHhVdaGSWYTveSwfJEY4RqPXTG13LPiFoxcpFd4= github.com/elastic/elastic-agent-client/v7 v7.17.0 h1:TPLrEHF4kJ3RkmQzZPffrniY4WeW4vriHZbOAzM1hFo= From 258f5df87bc24102e839020f5a037c6bf5a95856 Mon Sep 17 00:00:00 2001 From: Carson Ip Date: Tue, 17 Dec 2024 17:37:58 +0000 Subject: [PATCH 4/7] Add option to enable TBS in benchmark terraform (#14985) Update testing/benchmark terraform and github workflow to add an option to enable TBS. --- .github/workflows/benchmarks.yml | 14 +++++++++++- testing/benchmark/main.tf | 14 +++++++----- testing/benchmark/variables.tf | 12 ++++++++++ .../terraform/modules/ec_deployment/README.md | 10 +++++---- .../modules/ec_deployment/deployment.tf | 22 ++++++++++--------- ...ble_expvar.tftpl => enable_features.tftpl} | 11 +++------- .../modules/ec_deployment/variables.tf | 14 +++++++++++- 7 files changed, 67 insertions(+), 30 deletions(-) rename testing/infra/terraform/modules/ec_deployment/scripts/{enable_expvar.tftpl => enable_features.tftpl} (71%) diff --git a/.github/workflows/benchmarks.yml b/.github/workflows/benchmarks.yml index 50133d60642..6f7fadfee91 100644 --- a/.github/workflows/benchmarks.yml +++ b/.github/workflows/benchmarks.yml @@ -8,6 +8,16 @@ on: required: false type: boolean default: false + enableTailSampling: + description: 'Enable tail-based sampling on the APM server' + required: false + type: boolean + default: false + tailSamplingStorageLimit: + description: 'Storage size limit of tail-based sampling on the APM server, defaults to 10GB' + required: false + type: string + default: "10GB" profile: description: 'The system profile used to run the benchmarks' required: false @@ -52,12 +62,14 @@ jobs: TF_VAR_private_key: ./id_rsa_terraform TF_VAR_public_key: ./id_rsa_terraform.pub TF_VAR_run_standalone: ${{ inputs.runStandalone || github.event.schedule=='0 5 */5 * *' }} + TF_VAR_apm_server_tail_sampling: ${{ inputs.enableTailSampling }} + TF_VAR_apm_server_tail_sampling_storage_limit: ${{ inputs.tailSamplingStorageLimit }} RUN_STANDALONE: ${{ inputs.runStandalone || github.event.schedule=='0 5 */5 * *' }} TFVARS_SOURCE: ${{ inputs.profile || 'system-profiles/8GBx1zone.tfvars' }} # // Default to use an 8gb profile TF_VAR_BUILD_ID: ${{ github.run_id }} TF_VAR_ENVIRONMENT: ci TF_VAR_REPO: ${{ github.repository }} - GOBENCH_TAGS: branch=${{ github.head_ref || github.ref }},commit=${{ github.sha }},target_branch=${{ github.base_ref }} + GOBENCH_TAGS: branch=${{ github.head_ref || github.ref }},commit=${{ github.sha }},target_branch=${{ github.base_ref }},enable_tail_sampling=${{ inputs.enableTailSampling }} GOBENCH_PASSWORD: ${{ secrets.GOBENCH_PASSWORD }} GOBENCH_USERNAME: ${{ secrets.GOBENCH_USERNAME }} GOBENCH_HOST: ${{ secrets.GOBENCH_HOST }} diff --git a/testing/benchmark/main.tf b/testing/benchmark/main.tf index bf58ee76e27..98e6ca317a7 100644 --- a/testing/benchmark/main.tf +++ b/testing/benchmark/main.tf @@ -97,12 +97,14 @@ module "ec_deployment" { deployment_template = var.deployment_template deployment_name_prefix = local.name_prefix - apm_server_size = var.apm_server_size - apm_server_zone_count = var.apm_server_zone_count - apm_index_shards = var.apm_shards - drop_pipeline = var.drop_pipeline - apm_server_expvar = true - apm_server_pprof = true + apm_server_size = var.apm_server_size + apm_server_zone_count = var.apm_server_zone_count + apm_index_shards = var.apm_shards + drop_pipeline = var.drop_pipeline + apm_server_expvar = true + apm_server_pprof = true + apm_server_tail_sampling = var.apm_server_tail_sampling + apm_server_tail_sampling_storage_limit = var.apm_server_tail_sampling_storage_limit elasticsearch_size = var.elasticsearch_size elasticsearch_zone_count = var.elasticsearch_zone_count diff --git a/testing/benchmark/variables.tf b/testing/benchmark/variables.tf index f3c65302eb7..5fbe5814cb0 100644 --- a/testing/benchmark/variables.tf +++ b/testing/benchmark/variables.tf @@ -92,6 +92,18 @@ variable "drop_pipeline" { type = bool } +variable "apm_server_tail_sampling" { + default = false + description = "Whether or not to enable APM Server tail-based sampling. Defaults to false" + type = bool +} + +variable "apm_server_tail_sampling_storage_limit" { + default = "10GB" + description = "Storage size limit of APM Server tail-based sampling. Defaults to 10GB" + type = string +} + # Standalone variable "apm_server_bin_path" { diff --git a/testing/infra/terraform/modules/ec_deployment/README.md b/testing/infra/terraform/modules/ec_deployment/README.md index fb2b8613cab..4b91fadc383 100644 --- a/testing/infra/terraform/modules/ec_deployment/README.md +++ b/testing/infra/terraform/modules/ec_deployment/README.md @@ -28,12 +28,12 @@ used to configure the module, please refer to the [EC Provider docs](https://reg | [ec_deployment.deployment](https://registry.terraform.io/providers/elastic/ec/0.5.1/docs/resources/deployment) | resource | | [local_file.custom_apm_integration_pkg](https://registry.terraform.io/providers/hashicorp/local/latest/docs/resources/file) | resource | | [local_file.drop_pipeline](https://registry.terraform.io/providers/hashicorp/local/latest/docs/resources/file) | resource | -| [local_file.enable_expvar](https://registry.terraform.io/providers/hashicorp/local/latest/docs/resources/file) | resource | +| [local_file.enable_features](https://registry.terraform.io/providers/hashicorp/local/latest/docs/resources/file) | resource | | [local_file.secret_token](https://registry.terraform.io/providers/hashicorp/local/latest/docs/resources/file) | resource | | [local_file.shard_settings](https://registry.terraform.io/providers/hashicorp/local/latest/docs/resources/file) | resource | | [null_resource.custom_apm_integration_pkg](https://registry.terraform.io/providers/hashicorp/null/latest/docs/resources/resource) | resource | | [null_resource.drop_pipeline](https://registry.terraform.io/providers/hashicorp/null/latest/docs/resources/resource) | resource | -| [null_resource.enable_expvar](https://registry.terraform.io/providers/hashicorp/null/latest/docs/resources/resource) | resource | +| [null_resource.enable_features](https://registry.terraform.io/providers/hashicorp/null/latest/docs/resources/resource) | resource | | [null_resource.secret_token](https://registry.terraform.io/providers/hashicorp/null/latest/docs/resources/resource) | resource | | [null_resource.shard_settings](https://registry.terraform.io/providers/hashicorp/null/latest/docs/resources/resource) | resource | | [ec_stack.deployment_version](https://registry.terraform.io/providers/elastic/ec/0.5.1/docs/data-sources/stack) | data source | @@ -47,12 +47,14 @@ used to configure the module, please refer to the [EC Provider docs](https://reg | [apm\_server\_expvar](#input\_apm\_server\_expvar) | Whether or not to enable APM Server's expvar endpoint. Defaults to false | `bool` | `false` | no | | [apm\_server\_pprof](#input\_apm\_server\_pprof) | Whether or not to enable APM Server's pprof endpoint. Defaults to false | `bool` | `false` | no | | [apm\_server\_size](#input\_apm\_server\_size) | Optional apm server instance size | `string` | `"1g"` | no | +| [apm\_server\_tail\_sampling](#input\_apm\_server\_tail\_sampling) | Whether or not to enable APM Server tail-based sampling. Defaults to false | `bool` | `false` | no | +| [apm\_server\_tail\_sampling\_storage\_limit](#input\_apm\_server\_tail\_sampling\_storage\_limit) | Storage size limit of APM Server tail-based sampling. Defaults to 10GB | `string` | `"10GB"` | no | | [apm\_server\_zone\_count](#input\_apm\_server\_zone\_count) | Optional apm server zone count | `number` | `1` | no | | [custom\_apm\_integration\_pkg\_path](#input\_custom\_apm\_integration\_pkg\_path) | Path to the zipped custom APM integration package, if empty custom apm integration pkg is not installed | `string` | `""` | no | | [deployment\_name\_prefix](#input\_deployment\_name\_prefix) | Optional ESS or ECE region. Defaults to GCP US West 2 (Los Angeles) | `string` | `"apmserver"` | no | | [deployment\_template](#input\_deployment\_template) | Optional deployment template. Defaults to the CPU optimized template for GCP | `string` | `"gcp-cpu-optimized"` | no | -| [docker\_image](#input\_docker\_image) | Optional docker image overrides. The full map needs to be specified | `map(string)` |
{
"apm": "docker.elastic.co/cloud-release/elastic-agent-cloud",
"elasticsearch": "docker.elastic.co/cloud-release/elasticsearch-cloud-ess",
"kibana": "docker.elastic.co/cloud-release/kibana-cloud"
}
| no | -| [docker\_image\_tag\_override](#input\_docker\_image\_tag\_override) | Optional docker image tag overrides, The full map needs to be specified | `map(string)` |
{
"apm": "",
"elasticsearch": "",
"kibana": ""
}
| no | +| [docker\_image](#input\_docker\_image) | Optional docker image overrides. The full map needs to be specified | `map(string)` |
{
"apm": "docker.elastic.co/cloud-release/elastic-agent-cloud",
"elasticsearch": "docker.elastic.co/cloud-release/elasticsearch-cloud-ess",
"kibana": "docker.elastic.co/cloud-release/kibana-cloud"
}
| no | +| [docker\_image\_tag\_override](#input\_docker\_image\_tag\_override) | Optional docker image tag overrides, The full map needs to be specified | `map(string)` |
{
"apm": "",
"elasticsearch": "",
"kibana": ""
}
| no | | [drop\_pipeline](#input\_drop\_pipeline) | Whether or not to install an Elasticsearch ingest pipeline to drop all incoming APM documents. Defaults to false | `bool` | `false` | no | | [elasticsearch\_autoscale](#input\_elasticsearch\_autoscale) | Optional autoscale the Elasticsearch cluster | `bool` | `false` | no | | [elasticsearch\_dedicated\_masters](#input\_elasticsearch\_dedicated\_masters) | Optionally use dedicated masters for the Elasticsearch cluster | `bool` | `false` | no | diff --git a/testing/infra/terraform/modules/ec_deployment/deployment.tf b/testing/infra/terraform/modules/ec_deployment/deployment.tf index 808e2706285..2a58ea8d9a8 100644 --- a/testing/infra/terraform/modules/ec_deployment/deployment.tf +++ b/testing/infra/terraform/modules/ec_deployment/deployment.tf @@ -113,14 +113,16 @@ resource "ec_deployment" "dedicated_observability_deployment" { kibana {} } -resource "local_file" "enable_expvar" { - content = templatefile("${path.module}/scripts/enable_expvar.tftpl", { - kibana_url = ec_deployment.deployment.kibana.0.https_endpoint, - elastic_password = ec_deployment.deployment.elasticsearch_password, - enable_expvar = var.apm_server_expvar - enable_pprof = var.apm_server_pprof +resource "local_file" "enable_features" { + content = templatefile("${path.module}/scripts/enable_features.tftpl", { + kibana_url = ec_deployment.deployment.kibana.0.https_endpoint, + elastic_password = ec_deployment.deployment.elasticsearch_password, + enable_expvar = var.apm_server_expvar + enable_pprof = var.apm_server_pprof + enable_tail_sampling = var.apm_server_tail_sampling + tail_sampling_storage_limit = var.apm_server_tail_sampling_storage_limit }) - filename = "${path.module}/scripts/enable_expvar.sh" + filename = "${path.module}/scripts/enable_features.sh" } resource "local_file" "secret_token" { @@ -153,13 +155,13 @@ resource "local_file" "custom_apm_integration_pkg" { filename = "${path.module}/scripts/custom-apm-integration-pkg.sh" } -resource "null_resource" "enable_expvar" { +resource "null_resource" "enable_features" { triggers = { - shell_hash = local_file.enable_expvar.id + shell_hash = local_file.enable_features.id integrations_server = var.integrations_server } provisioner "local-exec" { - command = "scripts/enable_expvar.sh" + command = "scripts/enable_features.sh" interpreter = ["/bin/bash", "-c"] working_dir = path.module } diff --git a/testing/infra/terraform/modules/ec_deployment/scripts/enable_expvar.tftpl b/testing/infra/terraform/modules/ec_deployment/scripts/enable_features.tftpl similarity index 71% rename from testing/infra/terraform/modules/ec_deployment/scripts/enable_expvar.tftpl rename to testing/infra/terraform/modules/ec_deployment/scripts/enable_features.tftpl index 0737afe7906..f3f3d58f0eb 100644 --- a/testing/infra/terraform/modules/ec_deployment/scripts/enable_expvar.tftpl +++ b/testing/infra/terraform/modules/ec_deployment/scripts/enable_features.tftpl @@ -14,19 +14,14 @@ if [[ "$${POLICY}" == *"$${NOT_FOUND_MSG}"* ]]; then exit 0 fi -echo $${POLICY} | grep '"expvar_enabled":{"type":"bool","value":true}' > /dev/null 2>&1 && EXPVAR_ENABLED=true -echo $${POLICY} | grep '"pprof_enabled":{"type":"bool","value":true}' > /dev/null 2>&1 && PPROF_ENABLED=true -if [[ $${EXPVAR_ENABLED} || $${PPROF_ENABLED} ]] ; then - echo "expvar or pprof already enabled" - exit 0 -fi - # Download and modify the APM policy echo $${POLICY} | jq '.item' | \ jq 'del(.id)' | jq 'del(.elasticsearch)'| jq 'del(.inputs[].compiled_input)' | jq 'del(.revision)' |\ jq 'del(.created_at)' | jq 'del(.created_by)' | jq 'del(.updated_at)' | jq 'del(.updated_by)' |\ jq 'select(.inputs[].policy_template == "apmserver").inputs[].vars.expvar_enabled = {type: "bool", value: ${enable_expvar}}' |\ - jq 'select(.inputs[].policy_template == "apmserver").inputs[].vars.pprof_enabled = {type: "bool", value: ${enable_pprof}}' > policy.json + jq 'select(.inputs[].policy_template == "apmserver").inputs[].vars.pprof_enabled = {type: "bool", value: ${enable_pprof}}' |\ + jq 'select(.inputs[].policy_template == "apmserver").inputs[].vars.tail_sampling_storage_limit = {"value":"${tail_sampling_storage_limit}","type":"text"}' |\ + jq 'select(.inputs[].policy_template == "apmserver").inputs[].vars.tail_sampling_enabled = {type: "bool", value: ${enable_tail_sampling}}' > policy.json # Update the policy curl -s -H 'content-type: application/json' -H 'kbn-xsrf: true' -X PUT -k -d@policy.json -u $${KIBANA_AUTH} $${KIBANA_ENDPOINT} diff --git a/testing/infra/terraform/modules/ec_deployment/variables.tf b/testing/infra/terraform/modules/ec_deployment/variables.tf index 26d7ad04fb8..8f6733b6544 100644 --- a/testing/infra/terraform/modules/ec_deployment/variables.tf +++ b/testing/infra/terraform/modules/ec_deployment/variables.tf @@ -108,7 +108,7 @@ variable "docker_image" { description = "Optional docker image overrides. The full map needs to be specified" } -# Enable APM Server's expvar +# Enable APM Server's features variable "apm_server_expvar" { default = false @@ -122,6 +122,18 @@ variable "apm_server_pprof" { type = bool } +variable "apm_server_tail_sampling" { + default = false + description = "Whether or not to enable APM Server tail-based sampling. Defaults to false" + type = bool +} + +variable "apm_server_tail_sampling_storage_limit" { + default = "10GB" + description = "Storage size limit of APM Server tail-based sampling. Defaults to 10GB" + type = string +} + variable "apm_index_shards" { default = 0 description = "The number of shards to set for APM Indices" From 4305ce3061d3235302a904fd679f0b31ac4b363a Mon Sep 17 00:00:00 2001 From: Carson Ip Date: Thu, 19 Dec 2024 15:37:33 +0000 Subject: [PATCH 5/7] Mark most client.geo.* as dynamic in TestRUMXForwardedFor approvals (#14997) Mark client.geo.{city_name,location,region_iso_code,region_name} as dynamic in TestRUMXforwardedFor. This will make it consistent with other systemtest approvals. --- .../approvals/TestRUMXForwardedFor.approved.json | 12 ++++++------ systemtest/rum_test.go | 3 +++ 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/systemtest/approvals/TestRUMXForwardedFor.approved.json b/systemtest/approvals/TestRUMXForwardedFor.approved.json index 89d28519fa6..f1b96a8c854 100644 --- a/systemtest/approvals/TestRUMXForwardedFor.approved.json +++ b/systemtest/approvals/TestRUMXForwardedFor.approved.json @@ -13,7 +13,7 @@ "5.5.0" ], "client.geo.city_name": [ - "Perth" + "dynamic" ], "client.geo.continent_name": [ "Oceania" @@ -28,10 +28,10 @@ "dynamic" ], "client.geo.region_iso_code": [ - "AU-WA" + "dynamic" ], "client.geo.region_name": [ - "Western Australia" + "dynamic" ], "client.ip": [ "220.244.41.16" @@ -132,7 +132,7 @@ "5.5.0" ], "client.geo.city_name": [ - "Perth" + "dynamic" ], "client.geo.continent_name": [ "Oceania" @@ -147,10 +147,10 @@ "dynamic" ], "client.geo.region_iso_code": [ - "AU-WA" + "dynamic" ], "client.geo.region_name": [ - "Western Australia" + "dynamic" ], "client.ip": [ "220.244.41.16" diff --git a/systemtest/rum_test.go b/systemtest/rum_test.go index d48659dc205..db5af921e46 100644 --- a/systemtest/rum_test.go +++ b/systemtest/rum_test.go @@ -78,7 +78,10 @@ func TestRUMXForwardedFor(t *testing.T) { "source.port", // Do not assert the exact contents of the location field since they may change // slightly depending on the IP lookup. + "client.geo.city_name", "client.geo.location", + "client.geo.region_iso_code", + "client.geo.region_name", ) } From 1ea743952f24a239207b5d3deb2653b53f958b87 Mon Sep 17 00:00:00 2001 From: Carson Ip Date: Thu, 19 Dec 2024 16:27:17 +0000 Subject: [PATCH 6/7] Record tbs disk usage stats in benchtest (#14995) Record TBS lsm size and vlog size in benchtest to facilitate TBS improvements when running benchmarks using gh actions following #14985 --- systemtest/benchtest/expvar/expvar.go | 12 ++++++++++++ systemtest/benchtest/expvar/metrics.go | 4 ++++ systemtest/benchtest/main.go | 2 ++ systemtest/benchtest/main_test.go | 4 ++++ 4 files changed, 22 insertions(+) diff --git a/systemtest/benchtest/expvar/expvar.go b/systemtest/benchtest/expvar/expvar.go index 452e3ddf0bf..be407e1f98a 100644 --- a/systemtest/benchtest/expvar/expvar.go +++ b/systemtest/benchtest/expvar/expvar.go @@ -36,6 +36,7 @@ type expvar struct { LibbeatStats ElasticResponseStats OTLPResponseStats + TailSamplingStats // UncompressedBytes holds the number of bytes of uncompressed // data that the server has read from the Elastic APM events @@ -72,6 +73,11 @@ type LibbeatStats struct { RSSMemoryBytes int64 `json:"beat.memstats.rss"` } +type TailSamplingStats struct { + TBSLsmSize int64 `json:"apm-server.sampling.tail.storage.lsm_size"` + TBSVlogSize int64 `json:"apm-server.sampling.tail.storage.value_log_size"` +} + func queryExpvar(ctx context.Context, out *expvar, srv string) error { req, err := http.NewRequest("GET", srv+"/debug/vars", nil) if err != nil { @@ -113,6 +119,7 @@ func queryExpvar(ctx context.Context, out *expvar, srv string) error { aggregateResponseStats(s.ElasticResponseStats, &result.ElasticResponseStats) aggregateOTLPResponseStats(s.OTLPResponseStats, &result.OTLPResponseStats) aggregateLibbeatStats(s.LibbeatStats, &result.LibbeatStats) + aggregateTailSamplingStats(s.TailSamplingStats, &result.TailSamplingStats) result.UncompressedBytes += s.UncompressedBytes result.AvailableBulkRequests += s.AvailableBulkRequests } @@ -205,3 +212,8 @@ func aggregateOTLPResponseStats(from OTLPResponseStats, to *OTLPResponseStats) { to.ErrorOTLPTracesResponses += from.ErrorOTLPTracesResponses to.ErrorOTLPMetricsResponses += from.ErrorOTLPMetricsResponses } + +func aggregateTailSamplingStats(from TailSamplingStats, to *TailSamplingStats) { + to.TBSLsmSize += from.TBSLsmSize + to.TBSVlogSize += from.TBSVlogSize +} diff --git a/systemtest/benchtest/expvar/metrics.go b/systemtest/benchtest/expvar/metrics.go index 328c78bd9a5..c21bd0db051 100644 --- a/systemtest/benchtest/expvar/metrics.go +++ b/systemtest/benchtest/expvar/metrics.go @@ -47,6 +47,8 @@ const ( ErrorElasticResponses ErrorOTLPTracesResponses ErrorOTLPMetricsResponses + TBSLsmSize + TBSVlogSize ) type AggregateStats struct { @@ -164,6 +166,8 @@ func (c *Collector) accumulate(e expvar) { c.processMetric(MemBytes, int64(e.TotalAlloc)) c.processMetric(HeapAlloc, int64(e.HeapAlloc)) c.processMetric(HeapObjects, int64(e.HeapObjects)) + c.processMetric(TBSLsmSize, e.TBSLsmSize) + c.processMetric(TBSVlogSize, e.TBSVlogSize) } func (c *Collector) processMetric(m Metric, val int64) { diff --git a/systemtest/benchtest/main.go b/systemtest/benchtest/main.go index 5422915b491..abab1cceb2f 100644 --- a/systemtest/benchtest/main.go +++ b/systemtest/benchtest/main.go @@ -122,6 +122,8 @@ func addExpvarMetrics(result *testing.BenchmarkResult, collector *expvar.Collect result.Extra["max_heap_alloc"] = float64(collector.Get(expvar.HeapAlloc).Max) result.Extra["max_heap_objects"] = float64(collector.Get(expvar.HeapObjects).Max) result.Extra["mean_available_indexers"] = float64(collector.Get(expvar.AvailableBulkRequests).Mean) + result.Extra["tbs_lsm_size"] = float64(collector.Get(expvar.TBSLsmSize).Max) + result.Extra["tbs_vlog_size"] = float64(collector.Get(expvar.TBSVlogSize).Max) } // Record the number of error responses returned by the server: lower is better. diff --git a/systemtest/benchtest/main_test.go b/systemtest/benchtest/main_test.go index 14a7bbd64b9..a94c6f62e8f 100644 --- a/systemtest/benchtest/main_test.go +++ b/systemtest/benchtest/main_test.go @@ -141,6 +141,8 @@ func TestAddExpvarMetrics(t *testing.T) { `"apm-server.processor.span.transformations": 5`, `"apm-server.processor.metric.transformations": 9`, `"apm-server.processor.error.transformations": 3`, + `"apm-server.sampling.tail.storage.lsm_size": 10`, + `"apm-server.sampling.tail.storage.value_log_size": 11`, `"beat.runtime.goroutines": 4`, `"beat.memstats.rss": 1048576`, `"output.elasticsearch.bulk_requests.available": 0`, @@ -165,6 +167,8 @@ func TestAddExpvarMetrics(t *testing.T) { "max_heap_objects": 102, "mean_available_indexers": 0, "error_responses/sec": 1, + "tbs_lsm_size": 10, + "tbs_vlog_size": 11, }, }, } From 7c1b7060db534812946461d2e8bf2b9f8778237b Mon Sep 17 00:00:00 2001 From: Carson Ip Date: Thu, 19 Dec 2024 17:46:59 +0000 Subject: [PATCH 7/7] Fix TF_VAR_apm_server_tail_sampling for scheduled gh actions (#15003) A regression from #14985 causing nightly benchmark to fail. --- .github/workflows/benchmarks.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/benchmarks.yml b/.github/workflows/benchmarks.yml index 6f7fadfee91..ed251f5a768 100644 --- a/.github/workflows/benchmarks.yml +++ b/.github/workflows/benchmarks.yml @@ -62,8 +62,8 @@ jobs: TF_VAR_private_key: ./id_rsa_terraform TF_VAR_public_key: ./id_rsa_terraform.pub TF_VAR_run_standalone: ${{ inputs.runStandalone || github.event.schedule=='0 5 */5 * *' }} - TF_VAR_apm_server_tail_sampling: ${{ inputs.enableTailSampling }} - TF_VAR_apm_server_tail_sampling_storage_limit: ${{ inputs.tailSamplingStorageLimit }} + TF_VAR_apm_server_tail_sampling: ${{ inputs.enableTailSampling || 'false' }} # set the default again otherwise schedules won't work + TF_VAR_apm_server_tail_sampling_storage_limit: ${{ inputs.tailSamplingStorageLimit || '10GB' }} # set the default again otherwise schedules won't work RUN_STANDALONE: ${{ inputs.runStandalone || github.event.schedule=='0 5 */5 * *' }} TFVARS_SOURCE: ${{ inputs.profile || 'system-profiles/8GBx1zone.tfvars' }} # // Default to use an 8gb profile TF_VAR_BUILD_ID: ${{ github.run_id }}