From a67f9bcc36bdf03c72c4c57924aeb44d4cac365d Mon Sep 17 00:00:00 2001 From: Vishal Raj Date: Thu, 28 Sep 2023 23:51:15 +0800 Subject: [PATCH 1/5] Scale service destination based on available memory --- internal/beater/beater.go | 43 +++++++++++++++++++-------------------- 1 file changed, 21 insertions(+), 22 deletions(-) diff --git a/internal/beater/beater.go b/internal/beater/beater.go index 0361ba19731..7e86db02c88 100644 --- a/internal/beater/beater.go +++ b/internal/beater/beater.go @@ -230,23 +230,34 @@ func (s *Runner) Run(ctx context.Context) error { } if s.config.Aggregation.MaxServices <= 0 { - s.config.Aggregation.MaxServices = maxGroupsForAggregation(memLimitGB) + // scale based on available memory considering 1K groups for 1GB + s.config.Aggregation.MaxServices = linearScaledValue(1_000, memLimitGB) s.logger.Infof("Aggregation.MaxServices set to %d based on %0.1fgb of memory", s.config.Aggregation.MaxServices, memLimitGB, ) } + if s.config.Aggregation.ServiceTransactions.MaxGroups <= 0 { + // scale based on available memory considering 1K groups for 1GB + s.config.Aggregation.ServiceTransactions.MaxGroups = linearScaledValue(1_000, memLimitGB) + s.logger.Infof("Aggregation.ServiceTransactions.MaxGroups for service aggregation set to %d based on %0.1fgb of memory", + s.config.Aggregation.ServiceTransactions.MaxGroups, memLimitGB, + ) + } + if s.config.Aggregation.Transactions.MaxGroups <= 0 { - s.config.Aggregation.Transactions.MaxGroups = maxTxGroupsForAggregation(memLimitGB) + // scale based on available memory considering 5K groups for 1GB + s.config.Aggregation.Transactions.MaxGroups = linearScaledValue(5_000, memLimitGB) s.logger.Infof("Aggregation.Transactions.MaxGroups set to %d based on %0.1fgb of memory", s.config.Aggregation.Transactions.MaxGroups, memLimitGB, ) } - if s.config.Aggregation.ServiceTransactions.MaxGroups <= 0 { - s.config.Aggregation.ServiceTransactions.MaxGroups = maxGroupsForAggregation(memLimitGB) - s.logger.Infof("Aggregation.ServiceTransactions.MaxGroups for service aggregation set to %d based on %0.1fgb of memory", - s.config.Aggregation.ServiceTransactions.MaxGroups, memLimitGB, + if s.config.Aggregation.ServiceDestinations.MaxGroups <= 0 { + // scale based on available memory considering 5K groups for 1GB + s.config.Aggregation.ServiceDestinations.MaxGroups = linearScaledValue(5_000, memLimitGB) + s.logger.Infof("Aggregation.ServiceDestinations.MaxGroups set to %d based on %0.1fgb of memory", + s.config.Aggregation.Transactions.MaxGroups, memLimitGB, ) } @@ -568,26 +579,14 @@ func maxConcurrentDecoders(memLimitGB float64) uint { return decoders } -// maxGroupsForAggregation calculates the maximum service groups that a -// particular memory limit can have. This will be scaled linearly for bigger -// instances. -func maxGroupsForAggregation(memLimitGB float64) int { - const maxMemGB = 64 - if memLimitGB > maxMemGB { - memLimitGB = maxMemGB - } - return int(memLimitGB * 1_000) -} - -// maxTxGroupsForAggregation calculates the maximum transaction groups that a -// particular memory limit can have. This will be scaled linearly for bigger -// instances. -func maxTxGroupsForAggregation(memLimitGB float64) int { +// linearScaledValue calculates linearly scaled value based on memory limit where +// c denotes the value for 1GB. +func linearScaledValue(c, memLimitGB float64) int { const maxMemGB = 64 if memLimitGB > maxMemGB { memLimitGB = maxMemGB } - return int(memLimitGB * 5_000) + return int(memLimitGB * c) } // waitReady waits until the server is ready to index events. From 2c2db61068f2095e3f44470a09fc0f5cbcd2b003 Mon Sep 17 00:00:00 2001 From: Vishal Raj Date: Fri, 29 Sep 2023 10:59:06 +0800 Subject: [PATCH 2/5] Add changelog --- changelogs/head.asciidoc | 1 + 1 file changed, 1 insertion(+) diff --git a/changelogs/head.asciidoc b/changelogs/head.asciidoc index 75f47945cef..a7d8d53ba2e 100644 --- a/changelogs/head.asciidoc +++ b/changelogs/head.asciidoc @@ -15,6 +15,7 @@ https://github.com/elastic/apm-server/compare/8.10\...main[View commits] - Add back gzip support for grpc otlp endpoint {pull}11434[11434] - Correctly mark jvm.memory.non_heap.pool.* and jvm.fd.* metrics as internal {pull}11303[11303] - Fix tail-based sampling discarding low throughput and low sample rate traces {pull}11642[11642] +- Add memory based autoscaling for service destination aggregation groups {pull}11739[11739] [float] ==== Intake API Changes From 0c533c61eab534855ed44a306f5ff32cec9c3475 Mon Sep 17 00:00:00 2001 From: Vishal Raj Date: Mon, 2 Oct 2023 17:08:28 +0800 Subject: [PATCH 3/5] Update service destination to use a 5k y intercept --- internal/beater/beater.go | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/internal/beater/beater.go b/internal/beater/beater.go index 7e86db02c88..031b6f7dcac 100644 --- a/internal/beater/beater.go +++ b/internal/beater/beater.go @@ -230,32 +230,28 @@ func (s *Runner) Run(ctx context.Context) error { } if s.config.Aggregation.MaxServices <= 0 { - // scale based on available memory considering 1K groups for 1GB - s.config.Aggregation.MaxServices = linearScaledValue(1_000, memLimitGB) + s.config.Aggregation.MaxServices = linearScaledValue(1_000, memLimitGB, 0) s.logger.Infof("Aggregation.MaxServices set to %d based on %0.1fgb of memory", s.config.Aggregation.MaxServices, memLimitGB, ) } if s.config.Aggregation.ServiceTransactions.MaxGroups <= 0 { - // scale based on available memory considering 1K groups for 1GB - s.config.Aggregation.ServiceTransactions.MaxGroups = linearScaledValue(1_000, memLimitGB) + s.config.Aggregation.ServiceTransactions.MaxGroups = linearScaledValue(1_000, memLimitGB, 0) s.logger.Infof("Aggregation.ServiceTransactions.MaxGroups for service aggregation set to %d based on %0.1fgb of memory", s.config.Aggregation.ServiceTransactions.MaxGroups, memLimitGB, ) } if s.config.Aggregation.Transactions.MaxGroups <= 0 { - // scale based on available memory considering 5K groups for 1GB - s.config.Aggregation.Transactions.MaxGroups = linearScaledValue(5_000, memLimitGB) + s.config.Aggregation.Transactions.MaxGroups = linearScaledValue(5_000, memLimitGB, 0) s.logger.Infof("Aggregation.Transactions.MaxGroups set to %d based on %0.1fgb of memory", s.config.Aggregation.Transactions.MaxGroups, memLimitGB, ) } if s.config.Aggregation.ServiceDestinations.MaxGroups <= 0 { - // scale based on available memory considering 5K groups for 1GB - s.config.Aggregation.ServiceDestinations.MaxGroups = linearScaledValue(5_000, memLimitGB) + s.config.Aggregation.ServiceDestinations.MaxGroups = linearScaledValue(5_000, memLimitGB, 5_000) s.logger.Infof("Aggregation.ServiceDestinations.MaxGroups set to %d based on %0.1fgb of memory", s.config.Aggregation.Transactions.MaxGroups, memLimitGB, ) @@ -579,14 +575,14 @@ func maxConcurrentDecoders(memLimitGB float64) uint { return decoders } -// linearScaledValue calculates linearly scaled value based on memory limit where -// c denotes the value for 1GB. -func linearScaledValue(c, memLimitGB float64) int { +// linearScaledValue calculates linearly scaled value based on memory limit using +// the formula y = (perGBIncrement * memLimitGB) + c +func linearScaledValue(perGBIncrement, memLimitGB, c float64) int { const maxMemGB = 64 if memLimitGB > maxMemGB { memLimitGB = maxMemGB } - return int(memLimitGB * c) + return int(memLimitGB*perGBIncrement) + c } // waitReady waits until the server is ready to index events. From ddd77a83a34994fee5e42811f24ad095005d8839 Mon Sep 17 00:00:00 2001 From: Vishal Raj Date: Mon, 2 Oct 2023 17:14:51 +0800 Subject: [PATCH 4/5] Update data model docs --- docs/data-model.asciidoc | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/docs/data-model.asciidoc b/docs/data-model.asciidoc index 918c48f2c23..5e6f3ad9724 100644 --- a/docs/data-model.asciidoc +++ b/docs/data-model.asciidoc @@ -548,9 +548,10 @@ which is 500 transaction groups per service per GB of APM Server. ** For service-transaction metrics, there is an additional limit of 1000 total service transaction groups per GB of APM Server, and each service may only consume up to 10% of the service transaction groups, which is 100 service transaction groups per service per GB of APM Server. -** For service-destination metrics, there is an additional limit of a constant 10000 total service destination groups, +** For service-destination metrics, there is an additional limit of 5000 total service destination groups per GB of APM Server +starting with 10000 service destination groups for 1 GB APM Server, and each service may only consume up to 10% of the service destination groups, -which is 1000 service destination groups per service. +which is 1000 service destination groups for 1GB APM Server with 500 increment per GB of APM Server. ** For service-summary metrics, there is no additional limit. In the above, a service is defined as a combination of `service.name`, `service.environment`, `service.language.name` and `agent.name`. From 9747a026c92efdda8ceb63099716e722be5178d0 Mon Sep 17 00:00:00 2001 From: Vishal Raj Date: Mon, 2 Oct 2023 17:43:09 +0800 Subject: [PATCH 5/5] Fix lint --- internal/beater/beater.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/internal/beater/beater.go b/internal/beater/beater.go index 031b6f7dcac..2bbd53a1ae2 100644 --- a/internal/beater/beater.go +++ b/internal/beater/beater.go @@ -576,13 +576,13 @@ func maxConcurrentDecoders(memLimitGB float64) uint { } // linearScaledValue calculates linearly scaled value based on memory limit using -// the formula y = (perGBIncrement * memLimitGB) + c -func linearScaledValue(perGBIncrement, memLimitGB, c float64) int { +// the formula y = (perGBIncrement * memLimitGB) + constant +func linearScaledValue(perGBIncrement, memLimitGB, constant float64) int { const maxMemGB = 64 if memLimitGB > maxMemGB { memLimitGB = maxMemGB } - return int(memLimitGB*perGBIncrement) + c + return int(memLimitGB*perGBIncrement + constant) } // waitReady waits until the server is ready to index events.