Skip to content

Commit

Permalink
Scale service destination based on available memory (#11739)
Browse files Browse the repository at this point in the history
  • Loading branch information
lahsivjar authored Oct 2, 2023
1 parent 1df20b9 commit 60f6ac5
Show file tree
Hide file tree
Showing 3 changed files with 21 additions and 24 deletions.
1 change: 1 addition & 0 deletions changelogs/head.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ https://github.com/elastic/apm-server/compare/8.10\...main[View commits]
- Add back gzip support for grpc otlp endpoint {pull}11434[11434]
- Correctly mark jvm.memory.non_heap.pool.* and jvm.fd.* metrics as internal {pull}11303[11303]
- Fix tail-based sampling discarding low throughput and low sample rate traces {pull}11642[11642]
- Add memory based autoscaling for service destination aggregation groups {pull}11739[11739]

[float]
==== Intake API Changes
Expand Down
5 changes: 3 additions & 2 deletions docs/data-model.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -548,9 +548,10 @@ which is 500 transaction groups per service per GB of APM Server.
** For service-transaction metrics, there is an additional limit of 1000 total service transaction groups per GB of APM Server,
and each service may only consume up to 10% of the service transaction groups,
which is 100 service transaction groups per service per GB of APM Server.
** For service-destination metrics, there is an additional limit of a constant 10000 total service destination groups,
** For service-destination metrics, there is an additional limit of 5000 total service destination groups per GB of APM Server
starting with 10000 service destination groups for 1 GB APM Server,
and each service may only consume up to 10% of the service destination groups,
which is 1000 service destination groups per service.
which is 1000 service destination groups for 1GB APM Server with 500 increment per GB of APM Server.
** For service-summary metrics, there is no additional limit.

In the above, a service is defined as a combination of `service.name`, `service.environment`, `service.language.name` and `agent.name`.
Expand Down
39 changes: 17 additions & 22 deletions internal/beater/beater.go
Original file line number Diff line number Diff line change
Expand Up @@ -230,23 +230,30 @@ func (s *Runner) Run(ctx context.Context) error {
}

if s.config.Aggregation.MaxServices <= 0 {
s.config.Aggregation.MaxServices = maxGroupsForAggregation(memLimitGB)
s.config.Aggregation.MaxServices = linearScaledValue(1_000, memLimitGB, 0)
s.logger.Infof("Aggregation.MaxServices set to %d based on %0.1fgb of memory",
s.config.Aggregation.MaxServices, memLimitGB,
)
}

if s.config.Aggregation.ServiceTransactions.MaxGroups <= 0 {
s.config.Aggregation.ServiceTransactions.MaxGroups = linearScaledValue(1_000, memLimitGB, 0)
s.logger.Infof("Aggregation.ServiceTransactions.MaxGroups for service aggregation set to %d based on %0.1fgb of memory",
s.config.Aggregation.ServiceTransactions.MaxGroups, memLimitGB,
)
}

if s.config.Aggregation.Transactions.MaxGroups <= 0 {
s.config.Aggregation.Transactions.MaxGroups = maxTxGroupsForAggregation(memLimitGB)
s.config.Aggregation.Transactions.MaxGroups = linearScaledValue(5_000, memLimitGB, 0)
s.logger.Infof("Aggregation.Transactions.MaxGroups set to %d based on %0.1fgb of memory",
s.config.Aggregation.Transactions.MaxGroups, memLimitGB,
)
}

if s.config.Aggregation.ServiceTransactions.MaxGroups <= 0 {
s.config.Aggregation.ServiceTransactions.MaxGroups = maxGroupsForAggregation(memLimitGB)
s.logger.Infof("Aggregation.ServiceTransactions.MaxGroups for service aggregation set to %d based on %0.1fgb of memory",
s.config.Aggregation.ServiceTransactions.MaxGroups, memLimitGB,
if s.config.Aggregation.ServiceDestinations.MaxGroups <= 0 {
s.config.Aggregation.ServiceDestinations.MaxGroups = linearScaledValue(5_000, memLimitGB, 5_000)
s.logger.Infof("Aggregation.ServiceDestinations.MaxGroups set to %d based on %0.1fgb of memory",
s.config.Aggregation.Transactions.MaxGroups, memLimitGB,
)
}

Expand Down Expand Up @@ -568,26 +575,14 @@ func maxConcurrentDecoders(memLimitGB float64) uint {
return decoders
}

// maxGroupsForAggregation calculates the maximum service groups that a
// particular memory limit can have. This will be scaled linearly for bigger
// instances.
func maxGroupsForAggregation(memLimitGB float64) int {
const maxMemGB = 64
if memLimitGB > maxMemGB {
memLimitGB = maxMemGB
}
return int(memLimitGB * 1_000)
}

// maxTxGroupsForAggregation calculates the maximum transaction groups that a
// particular memory limit can have. This will be scaled linearly for bigger
// instances.
func maxTxGroupsForAggregation(memLimitGB float64) int {
// linearScaledValue calculates linearly scaled value based on memory limit using
// the formula y = (perGBIncrement * memLimitGB) + constant
func linearScaledValue(perGBIncrement, memLimitGB, constant float64) int {
const maxMemGB = 64
if memLimitGB > maxMemGB {
memLimitGB = maxMemGB
}
return int(memLimitGB * 5_000)
return int(memLimitGB*perGBIncrement + constant)
}

// waitReady waits until the server is ready to index events.
Expand Down

0 comments on commit 60f6ac5

Please sign in to comment.