From 258f5df87bc24102e839020f5a037c6bf5a95856 Mon Sep 17 00:00:00 2001 From: Carson Ip Date: Tue, 17 Dec 2024 17:37:58 +0000 Subject: [PATCH] Add option to enable TBS in benchmark terraform (#14985) Update testing/benchmark terraform and github workflow to add an option to enable TBS. --- .github/workflows/benchmarks.yml | 14 +++++++++++- testing/benchmark/main.tf | 14 +++++++----- testing/benchmark/variables.tf | 12 ++++++++++ .../terraform/modules/ec_deployment/README.md | 10 +++++---- .../modules/ec_deployment/deployment.tf | 22 ++++++++++--------- ...ble_expvar.tftpl => enable_features.tftpl} | 11 +++------- .../modules/ec_deployment/variables.tf | 14 +++++++++++- 7 files changed, 67 insertions(+), 30 deletions(-) rename testing/infra/terraform/modules/ec_deployment/scripts/{enable_expvar.tftpl => enable_features.tftpl} (71%) diff --git a/.github/workflows/benchmarks.yml b/.github/workflows/benchmarks.yml index 50133d60642..6f7fadfee91 100644 --- a/.github/workflows/benchmarks.yml +++ b/.github/workflows/benchmarks.yml @@ -8,6 +8,16 @@ on: required: false type: boolean default: false + enableTailSampling: + description: 'Enable tail-based sampling on the APM server' + required: false + type: boolean + default: false + tailSamplingStorageLimit: + description: 'Storage size limit of tail-based sampling on the APM server, defaults to 10GB' + required: false + type: string + default: "10GB" profile: description: 'The system profile used to run the benchmarks' required: false @@ -52,12 +62,14 @@ jobs: TF_VAR_private_key: ./id_rsa_terraform TF_VAR_public_key: ./id_rsa_terraform.pub TF_VAR_run_standalone: ${{ inputs.runStandalone || github.event.schedule=='0 5 */5 * *' }} + TF_VAR_apm_server_tail_sampling: ${{ inputs.enableTailSampling }} + TF_VAR_apm_server_tail_sampling_storage_limit: ${{ inputs.tailSamplingStorageLimit }} RUN_STANDALONE: ${{ inputs.runStandalone || github.event.schedule=='0 5 */5 * *' }} TFVARS_SOURCE: ${{ inputs.profile || 'system-profiles/8GBx1zone.tfvars' }} # // Default to use an 8gb profile TF_VAR_BUILD_ID: ${{ github.run_id }} TF_VAR_ENVIRONMENT: ci TF_VAR_REPO: ${{ github.repository }} - GOBENCH_TAGS: branch=${{ github.head_ref || github.ref }},commit=${{ github.sha }},target_branch=${{ github.base_ref }} + GOBENCH_TAGS: branch=${{ github.head_ref || github.ref }},commit=${{ github.sha }},target_branch=${{ github.base_ref }},enable_tail_sampling=${{ inputs.enableTailSampling }} GOBENCH_PASSWORD: ${{ secrets.GOBENCH_PASSWORD }} GOBENCH_USERNAME: ${{ secrets.GOBENCH_USERNAME }} GOBENCH_HOST: ${{ secrets.GOBENCH_HOST }} diff --git a/testing/benchmark/main.tf b/testing/benchmark/main.tf index bf58ee76e27..98e6ca317a7 100644 --- a/testing/benchmark/main.tf +++ b/testing/benchmark/main.tf @@ -97,12 +97,14 @@ module "ec_deployment" { deployment_template = var.deployment_template deployment_name_prefix = local.name_prefix - apm_server_size = var.apm_server_size - apm_server_zone_count = var.apm_server_zone_count - apm_index_shards = var.apm_shards - drop_pipeline = var.drop_pipeline - apm_server_expvar = true - apm_server_pprof = true + apm_server_size = var.apm_server_size + apm_server_zone_count = var.apm_server_zone_count + apm_index_shards = var.apm_shards + drop_pipeline = var.drop_pipeline + apm_server_expvar = true + apm_server_pprof = true + apm_server_tail_sampling = var.apm_server_tail_sampling + apm_server_tail_sampling_storage_limit = var.apm_server_tail_sampling_storage_limit elasticsearch_size = var.elasticsearch_size elasticsearch_zone_count = var.elasticsearch_zone_count diff --git a/testing/benchmark/variables.tf b/testing/benchmark/variables.tf index f3c65302eb7..5fbe5814cb0 100644 --- a/testing/benchmark/variables.tf +++ b/testing/benchmark/variables.tf @@ -92,6 +92,18 @@ variable "drop_pipeline" { type = bool } +variable "apm_server_tail_sampling" { + default = false + description = "Whether or not to enable APM Server tail-based sampling. Defaults to false" + type = bool +} + +variable "apm_server_tail_sampling_storage_limit" { + default = "10GB" + description = "Storage size limit of APM Server tail-based sampling. Defaults to 10GB" + type = string +} + # Standalone variable "apm_server_bin_path" { diff --git a/testing/infra/terraform/modules/ec_deployment/README.md b/testing/infra/terraform/modules/ec_deployment/README.md index fb2b8613cab..4b91fadc383 100644 --- a/testing/infra/terraform/modules/ec_deployment/README.md +++ b/testing/infra/terraform/modules/ec_deployment/README.md @@ -28,12 +28,12 @@ used to configure the module, please refer to the [EC Provider docs](https://reg | [ec_deployment.deployment](https://registry.terraform.io/providers/elastic/ec/0.5.1/docs/resources/deployment) | resource | | [local_file.custom_apm_integration_pkg](https://registry.terraform.io/providers/hashicorp/local/latest/docs/resources/file) | resource | | [local_file.drop_pipeline](https://registry.terraform.io/providers/hashicorp/local/latest/docs/resources/file) | resource | -| [local_file.enable_expvar](https://registry.terraform.io/providers/hashicorp/local/latest/docs/resources/file) | resource | +| [local_file.enable_features](https://registry.terraform.io/providers/hashicorp/local/latest/docs/resources/file) | resource | | [local_file.secret_token](https://registry.terraform.io/providers/hashicorp/local/latest/docs/resources/file) | resource | | [local_file.shard_settings](https://registry.terraform.io/providers/hashicorp/local/latest/docs/resources/file) | resource | | [null_resource.custom_apm_integration_pkg](https://registry.terraform.io/providers/hashicorp/null/latest/docs/resources/resource) | resource | | [null_resource.drop_pipeline](https://registry.terraform.io/providers/hashicorp/null/latest/docs/resources/resource) | resource | -| [null_resource.enable_expvar](https://registry.terraform.io/providers/hashicorp/null/latest/docs/resources/resource) | resource | +| [null_resource.enable_features](https://registry.terraform.io/providers/hashicorp/null/latest/docs/resources/resource) | resource | | [null_resource.secret_token](https://registry.terraform.io/providers/hashicorp/null/latest/docs/resources/resource) | resource | | [null_resource.shard_settings](https://registry.terraform.io/providers/hashicorp/null/latest/docs/resources/resource) | resource | | [ec_stack.deployment_version](https://registry.terraform.io/providers/elastic/ec/0.5.1/docs/data-sources/stack) | data source | @@ -47,12 +47,14 @@ used to configure the module, please refer to the [EC Provider docs](https://reg | [apm\_server\_expvar](#input\_apm\_server\_expvar) | Whether or not to enable APM Server's expvar endpoint. Defaults to false | `bool` | `false` | no | | [apm\_server\_pprof](#input\_apm\_server\_pprof) | Whether or not to enable APM Server's pprof endpoint. Defaults to false | `bool` | `false` | no | | [apm\_server\_size](#input\_apm\_server\_size) | Optional apm server instance size | `string` | `"1g"` | no | +| [apm\_server\_tail\_sampling](#input\_apm\_server\_tail\_sampling) | Whether or not to enable APM Server tail-based sampling. Defaults to false | `bool` | `false` | no | +| [apm\_server\_tail\_sampling\_storage\_limit](#input\_apm\_server\_tail\_sampling\_storage\_limit) | Storage size limit of APM Server tail-based sampling. Defaults to 10GB | `string` | `"10GB"` | no | | [apm\_server\_zone\_count](#input\_apm\_server\_zone\_count) | Optional apm server zone count | `number` | `1` | no | | [custom\_apm\_integration\_pkg\_path](#input\_custom\_apm\_integration\_pkg\_path) | Path to the zipped custom APM integration package, if empty custom apm integration pkg is not installed | `string` | `""` | no | | [deployment\_name\_prefix](#input\_deployment\_name\_prefix) | Optional ESS or ECE region. Defaults to GCP US West 2 (Los Angeles) | `string` | `"apmserver"` | no | | [deployment\_template](#input\_deployment\_template) | Optional deployment template. Defaults to the CPU optimized template for GCP | `string` | `"gcp-cpu-optimized"` | no | -| [docker\_image](#input\_docker\_image) | Optional docker image overrides. The full map needs to be specified | `map(string)` |
{
"apm": "docker.elastic.co/cloud-release/elastic-agent-cloud",
"elasticsearch": "docker.elastic.co/cloud-release/elasticsearch-cloud-ess",
"kibana": "docker.elastic.co/cloud-release/kibana-cloud"
}
| no | -| [docker\_image\_tag\_override](#input\_docker\_image\_tag\_override) | Optional docker image tag overrides, The full map needs to be specified | `map(string)` |
{
"apm": "",
"elasticsearch": "",
"kibana": ""
}
| no | +| [docker\_image](#input\_docker\_image) | Optional docker image overrides. The full map needs to be specified | `map(string)` |
{
"apm": "docker.elastic.co/cloud-release/elastic-agent-cloud",
"elasticsearch": "docker.elastic.co/cloud-release/elasticsearch-cloud-ess",
"kibana": "docker.elastic.co/cloud-release/kibana-cloud"
}
| no | +| [docker\_image\_tag\_override](#input\_docker\_image\_tag\_override) | Optional docker image tag overrides, The full map needs to be specified | `map(string)` |
{
"apm": "",
"elasticsearch": "",
"kibana": ""
}
| no | | [drop\_pipeline](#input\_drop\_pipeline) | Whether or not to install an Elasticsearch ingest pipeline to drop all incoming APM documents. Defaults to false | `bool` | `false` | no | | [elasticsearch\_autoscale](#input\_elasticsearch\_autoscale) | Optional autoscale the Elasticsearch cluster | `bool` | `false` | no | | [elasticsearch\_dedicated\_masters](#input\_elasticsearch\_dedicated\_masters) | Optionally use dedicated masters for the Elasticsearch cluster | `bool` | `false` | no | diff --git a/testing/infra/terraform/modules/ec_deployment/deployment.tf b/testing/infra/terraform/modules/ec_deployment/deployment.tf index 808e2706285..2a58ea8d9a8 100644 --- a/testing/infra/terraform/modules/ec_deployment/deployment.tf +++ b/testing/infra/terraform/modules/ec_deployment/deployment.tf @@ -113,14 +113,16 @@ resource "ec_deployment" "dedicated_observability_deployment" { kibana {} } -resource "local_file" "enable_expvar" { - content = templatefile("${path.module}/scripts/enable_expvar.tftpl", { - kibana_url = ec_deployment.deployment.kibana.0.https_endpoint, - elastic_password = ec_deployment.deployment.elasticsearch_password, - enable_expvar = var.apm_server_expvar - enable_pprof = var.apm_server_pprof +resource "local_file" "enable_features" { + content = templatefile("${path.module}/scripts/enable_features.tftpl", { + kibana_url = ec_deployment.deployment.kibana.0.https_endpoint, + elastic_password = ec_deployment.deployment.elasticsearch_password, + enable_expvar = var.apm_server_expvar + enable_pprof = var.apm_server_pprof + enable_tail_sampling = var.apm_server_tail_sampling + tail_sampling_storage_limit = var.apm_server_tail_sampling_storage_limit }) - filename = "${path.module}/scripts/enable_expvar.sh" + filename = "${path.module}/scripts/enable_features.sh" } resource "local_file" "secret_token" { @@ -153,13 +155,13 @@ resource "local_file" "custom_apm_integration_pkg" { filename = "${path.module}/scripts/custom-apm-integration-pkg.sh" } -resource "null_resource" "enable_expvar" { +resource "null_resource" "enable_features" { triggers = { - shell_hash = local_file.enable_expvar.id + shell_hash = local_file.enable_features.id integrations_server = var.integrations_server } provisioner "local-exec" { - command = "scripts/enable_expvar.sh" + command = "scripts/enable_features.sh" interpreter = ["/bin/bash", "-c"] working_dir = path.module } diff --git a/testing/infra/terraform/modules/ec_deployment/scripts/enable_expvar.tftpl b/testing/infra/terraform/modules/ec_deployment/scripts/enable_features.tftpl similarity index 71% rename from testing/infra/terraform/modules/ec_deployment/scripts/enable_expvar.tftpl rename to testing/infra/terraform/modules/ec_deployment/scripts/enable_features.tftpl index 0737afe7906..f3f3d58f0eb 100644 --- a/testing/infra/terraform/modules/ec_deployment/scripts/enable_expvar.tftpl +++ b/testing/infra/terraform/modules/ec_deployment/scripts/enable_features.tftpl @@ -14,19 +14,14 @@ if [[ "$${POLICY}" == *"$${NOT_FOUND_MSG}"* ]]; then exit 0 fi -echo $${POLICY} | grep '"expvar_enabled":{"type":"bool","value":true}' > /dev/null 2>&1 && EXPVAR_ENABLED=true -echo $${POLICY} | grep '"pprof_enabled":{"type":"bool","value":true}' > /dev/null 2>&1 && PPROF_ENABLED=true -if [[ $${EXPVAR_ENABLED} || $${PPROF_ENABLED} ]] ; then - echo "expvar or pprof already enabled" - exit 0 -fi - # Download and modify the APM policy echo $${POLICY} | jq '.item' | \ jq 'del(.id)' | jq 'del(.elasticsearch)'| jq 'del(.inputs[].compiled_input)' | jq 'del(.revision)' |\ jq 'del(.created_at)' | jq 'del(.created_by)' | jq 'del(.updated_at)' | jq 'del(.updated_by)' |\ jq 'select(.inputs[].policy_template == "apmserver").inputs[].vars.expvar_enabled = {type: "bool", value: ${enable_expvar}}' |\ - jq 'select(.inputs[].policy_template == "apmserver").inputs[].vars.pprof_enabled = {type: "bool", value: ${enable_pprof}}' > policy.json + jq 'select(.inputs[].policy_template == "apmserver").inputs[].vars.pprof_enabled = {type: "bool", value: ${enable_pprof}}' |\ + jq 'select(.inputs[].policy_template == "apmserver").inputs[].vars.tail_sampling_storage_limit = {"value":"${tail_sampling_storage_limit}","type":"text"}' |\ + jq 'select(.inputs[].policy_template == "apmserver").inputs[].vars.tail_sampling_enabled = {type: "bool", value: ${enable_tail_sampling}}' > policy.json # Update the policy curl -s -H 'content-type: application/json' -H 'kbn-xsrf: true' -X PUT -k -d@policy.json -u $${KIBANA_AUTH} $${KIBANA_ENDPOINT} diff --git a/testing/infra/terraform/modules/ec_deployment/variables.tf b/testing/infra/terraform/modules/ec_deployment/variables.tf index 26d7ad04fb8..8f6733b6544 100644 --- a/testing/infra/terraform/modules/ec_deployment/variables.tf +++ b/testing/infra/terraform/modules/ec_deployment/variables.tf @@ -108,7 +108,7 @@ variable "docker_image" { description = "Optional docker image overrides. The full map needs to be specified" } -# Enable APM Server's expvar +# Enable APM Server's features variable "apm_server_expvar" { default = false @@ -122,6 +122,18 @@ variable "apm_server_pprof" { type = bool } +variable "apm_server_tail_sampling" { + default = false + description = "Whether or not to enable APM Server tail-based sampling. Defaults to false" + type = bool +} + +variable "apm_server_tail_sampling_storage_limit" { + default = "10GB" + description = "Storage size limit of APM Server tail-based sampling. Defaults to 10GB" + type = string +} + variable "apm_index_shards" { default = 0 description = "The number of shards to set for APM Indices"