diff --git a/changelogs/head.asciidoc b/changelogs/head.asciidoc index 75f47945cef..a7d8d53ba2e 100644 --- a/changelogs/head.asciidoc +++ b/changelogs/head.asciidoc @@ -15,6 +15,7 @@ https://github.com/elastic/apm-server/compare/8.10\...main[View commits] - Add back gzip support for grpc otlp endpoint {pull}11434[11434] - Correctly mark jvm.memory.non_heap.pool.* and jvm.fd.* metrics as internal {pull}11303[11303] - Fix tail-based sampling discarding low throughput and low sample rate traces {pull}11642[11642] +- Add memory based autoscaling for service destination aggregation groups {pull}11739[11739] [float] ==== Intake API Changes diff --git a/docs/data-model.asciidoc b/docs/data-model.asciidoc index 918c48f2c23..5e6f3ad9724 100644 --- a/docs/data-model.asciidoc +++ b/docs/data-model.asciidoc @@ -548,9 +548,10 @@ which is 500 transaction groups per service per GB of APM Server. ** For service-transaction metrics, there is an additional limit of 1000 total service transaction groups per GB of APM Server, and each service may only consume up to 10% of the service transaction groups, which is 100 service transaction groups per service per GB of APM Server. -** For service-destination metrics, there is an additional limit of a constant 10000 total service destination groups, +** For service-destination metrics, there is an additional limit of 5000 total service destination groups per GB of APM Server +starting with 10000 service destination groups for 1 GB APM Server, and each service may only consume up to 10% of the service destination groups, -which is 1000 service destination groups per service. +which is 1000 service destination groups for 1GB APM Server with 500 increment per GB of APM Server. ** For service-summary metrics, there is no additional limit. In the above, a service is defined as a combination of `service.name`, `service.environment`, `service.language.name` and `agent.name`. diff --git a/internal/beater/beater.go b/internal/beater/beater.go index 0361ba19731..2bbd53a1ae2 100644 --- a/internal/beater/beater.go +++ b/internal/beater/beater.go @@ -230,23 +230,30 @@ func (s *Runner) Run(ctx context.Context) error { } if s.config.Aggregation.MaxServices <= 0 { - s.config.Aggregation.MaxServices = maxGroupsForAggregation(memLimitGB) + s.config.Aggregation.MaxServices = linearScaledValue(1_000, memLimitGB, 0) s.logger.Infof("Aggregation.MaxServices set to %d based on %0.1fgb of memory", s.config.Aggregation.MaxServices, memLimitGB, ) } + if s.config.Aggregation.ServiceTransactions.MaxGroups <= 0 { + s.config.Aggregation.ServiceTransactions.MaxGroups = linearScaledValue(1_000, memLimitGB, 0) + s.logger.Infof("Aggregation.ServiceTransactions.MaxGroups for service aggregation set to %d based on %0.1fgb of memory", + s.config.Aggregation.ServiceTransactions.MaxGroups, memLimitGB, + ) + } + if s.config.Aggregation.Transactions.MaxGroups <= 0 { - s.config.Aggregation.Transactions.MaxGroups = maxTxGroupsForAggregation(memLimitGB) + s.config.Aggregation.Transactions.MaxGroups = linearScaledValue(5_000, memLimitGB, 0) s.logger.Infof("Aggregation.Transactions.MaxGroups set to %d based on %0.1fgb of memory", s.config.Aggregation.Transactions.MaxGroups, memLimitGB, ) } - if s.config.Aggregation.ServiceTransactions.MaxGroups <= 0 { - s.config.Aggregation.ServiceTransactions.MaxGroups = maxGroupsForAggregation(memLimitGB) - s.logger.Infof("Aggregation.ServiceTransactions.MaxGroups for service aggregation set to %d based on %0.1fgb of memory", - s.config.Aggregation.ServiceTransactions.MaxGroups, memLimitGB, + if s.config.Aggregation.ServiceDestinations.MaxGroups <= 0 { + s.config.Aggregation.ServiceDestinations.MaxGroups = linearScaledValue(5_000, memLimitGB, 5_000) + s.logger.Infof("Aggregation.ServiceDestinations.MaxGroups set to %d based on %0.1fgb of memory", + s.config.Aggregation.Transactions.MaxGroups, memLimitGB, ) } @@ -568,26 +575,14 @@ func maxConcurrentDecoders(memLimitGB float64) uint { return decoders } -// maxGroupsForAggregation calculates the maximum service groups that a -// particular memory limit can have. This will be scaled linearly for bigger -// instances. -func maxGroupsForAggregation(memLimitGB float64) int { - const maxMemGB = 64 - if memLimitGB > maxMemGB { - memLimitGB = maxMemGB - } - return int(memLimitGB * 1_000) -} - -// maxTxGroupsForAggregation calculates the maximum transaction groups that a -// particular memory limit can have. This will be scaled linearly for bigger -// instances. -func maxTxGroupsForAggregation(memLimitGB float64) int { +// linearScaledValue calculates linearly scaled value based on memory limit using +// the formula y = (perGBIncrement * memLimitGB) + constant +func linearScaledValue(perGBIncrement, memLimitGB, constant float64) int { const maxMemGB = 64 if memLimitGB > maxMemGB { memLimitGB = maxMemGB } - return int(memLimitGB * 5_000) + return int(memLimitGB*perGBIncrement + constant) } // waitReady waits until the server is ready to index events.