Skip to content

Commit

Permalink
fix: guard against overflow in sse backoff calculation (#455)
Browse files Browse the repository at this point in the history
In some platform configurations, the SSE backoff calculation overflows
which results in failing to schedule another backoff attempt via
`asio::deadline_timer::expires_from_now`.

This PR refactors the backoff algorithm to use free functions which can
be independently tested.

It fixes the bug by constraining the maximum backoff exponent based on
the configurable max backoff parameter.

Finally, I've added some test vectors for large attempt values. In the
bug report, the overflow happens at about 34 minutes which corresponded
to 55 attempts.
  • Loading branch information
cwaldren-ld authored Oct 8, 2024
1 parent 1ca48d5 commit a3c1e58
Show file tree
Hide file tree
Showing 6 changed files with 212 additions and 41 deletions.
1 change: 1 addition & 0 deletions libs/server-sent-events/src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ add_library(${LIBNAME} OBJECT
parser.cpp
event.cpp
error.cpp
backoff_detail.cpp
backoff.cpp)

target_link_libraries(${LIBNAME}
Expand Down
35 changes: 17 additions & 18 deletions libs/server-sent-events/src/backoff.cpp
Original file line number Diff line number Diff line change
@@ -1,22 +1,14 @@
#include "backoff.hpp"
#include "backoff_detail.hpp"

static double const kDefaultJitterRatio = 0.5;
static const std::chrono::milliseconds kDefaultResetInterval =
std::chrono::milliseconds{60'000};
static constexpr double kDefaultJitterRatio = 0.5;
static constexpr auto kDefaultResetInterval = std::chrono::milliseconds{60'000};

namespace launchdarkly::sse {

std::chrono::milliseconds Backoff::calculate_backoff() {
return initial_ * static_cast<uint64_t>(std::pow(2, attempt_ - 1));
}

std::chrono::milliseconds Backoff::jitter(std::chrono::milliseconds base) {
return std::chrono::milliseconds(static_cast<uint64_t>(
base.count() - (random_(jitter_ratio_) * base.count())));
}

std::chrono::milliseconds Backoff::delay() {
return jitter(std::min(calculate_backoff(), max_));
std::chrono::milliseconds Backoff::delay() const {
return detail::delay(initial_, max_, attempt_, max_exponent_, jitter_ratio_,
random_);
}

void Backoff::fail() {
Expand Down Expand Up @@ -50,12 +42,20 @@ Backoff::Backoff(std::chrono::milliseconds initial,
double jitter_ratio,
std::chrono::milliseconds reset_interval,
std::function<double(double ratio)> random)
: attempt_(1),
initial_(initial),
: initial_(initial),
max_(max),
// This is necessary to constrain the exponent that is passed eventually
// into std::pow. Otherwise, we'll be operating with a number that is too
// large to be represented as a chrono::milliseconds and (depending on the
// platform) may result in a value of 0, negative, or some other
// unexpected value.
max_exponent_(std::ceil(
std::log2(max.count() / std::max(std::chrono::milliseconds{1}.count(),
initial.count())))),
attempt_(1),
jitter_ratio_(jitter_ratio),
reset_interval_(reset_interval),
random_(random) {}
random_(std::move(random)) {}

Backoff::Backoff(std::chrono::milliseconds initial,
std::chrono::milliseconds max)
Expand All @@ -68,5 +68,4 @@ Backoff::Backoff(std::chrono::milliseconds initial,
0.0, kDefaultJitterRatio);
return distribution(this->random_gen_);
}) {}

} // namespace launchdarkly::sse
28 changes: 5 additions & 23 deletions libs/server-sent-events/src/backoff.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
#include <random>

namespace launchdarkly::sse {

/**
* Implements an algorithm for calculating the delay between connection
* attempts.
Expand Down Expand Up @@ -52,35 +51,18 @@ class Backoff {
/**
* Get the current reconnect delay.
*/
std::chrono::milliseconds delay();
[[nodiscard]] std::chrono::milliseconds delay() const;

private:
/**
* Calculate an exponential backoff based on the initial retry time and
* the current attempt.
*
* @param attempt The current attempt count.
* @param initial The initial retry delay.
* @return The current backoff based on the number of attempts.
*/
std::chrono::milliseconds calculate_backoff();

/**
* Produce a jittered version of the base value. This value will be adjusted
* randomly to be between 50% and 100% of the input duration.
*
* @param base The base duration to jitter.
* @return The jittered duration.
*/
std::chrono::milliseconds jitter(std::chrono::milliseconds base);

std::chrono::milliseconds initial_;
std::chrono::milliseconds max_;
uint64_t attempt_;

std::uint64_t max_exponent_;
std::uint64_t attempt_;
std::optional<std::chrono::time_point<std::chrono::system_clock>>
active_since_;
double const jitter_ratio_;
const std::chrono::milliseconds reset_interval_;
std::chrono::milliseconds const reset_interval_;
// Default random generator. Used when random method not specified.
std::function<double(double ratio)> random_;
std::default_random_engine random_gen_;
Expand Down
35 changes: 35 additions & 0 deletions libs/server-sent-events/src/backoff_detail.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
#include "backoff_detail.hpp"

#include <algorithm>
#include <cmath>

namespace launchdarkly::sse::detail {
std::chrono::milliseconds calculate_backoff(
std::chrono::milliseconds const initial,
std::uint64_t const attempt,
std::uint64_t const max_exponent) {
std::uint64_t const exponent = std::min(attempt - 1, max_exponent);
double const exponentiated = std::pow(2, exponent);
return initial * static_cast<uint64_t>(exponentiated);
}

std::chrono::milliseconds jitter(std::chrono::milliseconds const base,
double const jitter_ratio,
std::function<double(double)> const& random) {
double const jitter = random(jitter_ratio);
auto const base_as_double = static_cast<double>(base.count());
double const jittered_base = base_as_double - jitter * base_as_double;
return std::chrono::milliseconds(static_cast<uint64_t>(jittered_base));
}

std::chrono::milliseconds delay(std::chrono::milliseconds const initial,

Check warning on line 25 in libs/server-sent-events/src/backoff_detail.cpp

View workflow job for this annotation

GitHub Actions / cpp-linter

/libs/server-sent-events/src/backoff_detail.cpp:25:33 [bugprone-easily-swappable-parameters]

2 adjacent parameters of 'delay' of similar type ('const std::chrono::milliseconds') are easily swapped by mistake
std::chrono::milliseconds const max,
std::uint64_t const attempt,
std::uint64_t const max_exponent,

Check warning on line 28 in libs/server-sent-events/src/backoff_detail.cpp

View workflow job for this annotation

GitHub Actions / cpp-linter

/libs/server-sent-events/src/backoff_detail.cpp:28:33 [bugprone-easily-swappable-parameters]

2 adjacent parameters of 'delay' of convertible types are easily swapped by mistake
double const jitter_ratio,
std::function<double(double)> const& random) {
auto const backoff = calculate_backoff(initial, attempt, max_exponent);
auto const constrained_backoff = std::min(backoff, max);
return jitter(constrained_backoff, jitter_ratio, random);
}
} // namespace launchdarkly::sse::detail
52 changes: 52 additions & 0 deletions libs/server-sent-events/src/backoff_detail.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
#pragma once

#include <chrono>
#include <functional>

namespace launchdarkly::sse::detail {
/**
* Calculate an exponential backoff based on the initial retry time and
* the current attempt.
*
* @param initial The initial retry delay.
* @param attempt The current attempt count.
* @param max_exponent The maximum exponent to use in the calculation.
*
* @return The current backoff based on the number of attempts.
*/

std::chrono::milliseconds calculate_backoff(std::chrono::milliseconds initial,
std::uint64_t attempt,
std::uint64_t max_exponent);
/**
* Produce a jittered version of the base value. This value will be adjusted
* randomly to be between 50% and 100% of the input duration.
*
* @param base The base duration to jitter.
* @param jitter_ratio The ratio to use when jittering.
* @param random A random method which produces a value between 0 and
* jitter_ratio.
* @return The jittered duration.
*/
std::chrono::milliseconds jitter(std::chrono::milliseconds base,
double jitter_ratio,
std::function<double(double)> const& random);

/**
*
* @param initial The initial retry delay.
* @param max The maximum delay between retries.
* @param attempt The current attempt.
* @param max_exponent The maximum exponent to use in the calculation.
* @param jitter_ratio The ratio to use when jittering.
* @param random A random method which produces a value between 0 and
* jitter_ratio.
* @return The delay between connection attempts.
*/
std::chrono::milliseconds delay(std::chrono::milliseconds initial,
std::chrono::milliseconds max,
std::uint64_t attempt,
std::uint64_t max_exponent,
double jitter_ratio,
std::function<double(double)> const& random);
} // namespace launchdarkly::sse::detail
102 changes: 102 additions & 0 deletions libs/server-sent-events/tests/backoff_test.cpp
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
#include <gtest/gtest.h>

#include <numeric>
#include <thread>

#include "backoff.hpp"
#include "backoff_detail.hpp"

using launchdarkly::sse::Backoff;

Expand Down Expand Up @@ -94,3 +96,103 @@ TEST(BackoffTests, BackoffDoesNotResetActiveConnectionWasTooShort) {

EXPECT_EQ(8000, backoff.delay().count());
}

// This test specifically attempts to reproduce an overflow bug reported by a
// customer that occurred using default backoff parameters after 55 attempts.
TEST(BackoffTests, BackoffDoesNotOverflowAt55Attempts) {
Backoff backoff(
std::chrono::milliseconds{1000}, std::chrono::milliseconds{30000}, 0,
std::chrono::milliseconds{6000}, [](auto ratio) { return 0; });

constexpr std::size_t kAttemptsToReachMax = 5;
constexpr std::size_t kAttemptWhereOverflowOccurs = 54;

for (int i = 0; i < kAttemptsToReachMax; i++) {
backoff.fail();
}
// At 5 attempts, we've already reached maximum backoff, given the initial
// parameters.
EXPECT_EQ(std::chrono::milliseconds{30000}, backoff.delay());

// This should succeed even if the code was unprotected.
for (int i = 0; i < kAttemptWhereOverflowOccurs - kAttemptsToReachMax - 1;
i++) {
backoff.fail();
EXPECT_EQ(std::chrono::milliseconds{30000}, backoff.delay());
}

// This should trigger overflow in unprotected code.
backoff.fail();
EXPECT_EQ(std::chrono::milliseconds{30000}, backoff.delay());
}

class BackoffOverflowEndToEndTest
: public ::testing::TestWithParam<std::uint64_t> {};

// The purpose of this test is to simulate the SDK calling backoff.fail()
// over and over again.
//
// Given a large amount of failed attempts, the calculation should remain at
// the max backoff value and stay there.
//
// Since we can't simulate very large numbers of attempts (simply takes too
// long), the free functions that Backoff delegates to are tested separately.
TEST_P(BackoffOverflowEndToEndTest, BackoffDoesNotOverflowWithVariousAttempts) {
Backoff backoff(
std::chrono::milliseconds{1000}, std::chrono::milliseconds{30000}, 0,
std::chrono::milliseconds{60000}, [](auto ratio) { return 0; });

std::uint64_t const attempts = GetParam();

for (int i = 0; i < attempts; i++) {
backoff.fail();
}

auto const val = backoff.delay();
EXPECT_EQ(val, std::chrono::milliseconds{30000});
}

INSTANTIATE_TEST_SUITE_P(
VariousBackoffAttempts,
BackoffOverflowEndToEndTest,
::testing::Values<std::uint64_t>(10, 54, 55, 63, 64, 65, 500, 1000));

class BackoffOverflowUnitTest : public ::testing::TestWithParam<std::uint64_t> {
};

// This test performs a straight calculation of the backoff given
// a range of large values, including the maximum uint64_t which might be
// reached at the heat death of the universe (or earlier, if there's a bug.)
// It should not overflow, but remain at the max value.
TEST_P(BackoffOverflowUnitTest,
BackoffDoesNotOverflowWithVariousAttemptsInCalculateBackoff) {
std::uint64_t const attempts = GetParam();

constexpr std::uint64_t max_exponent = 5;

// Given an exponent of 5, and initial delay of 1ms, the max should be
// 2^5 = 32 for any amount of attempts > 5.

constexpr auto initial = std::chrono::milliseconds{1};
constexpr auto max = std::chrono::milliseconds{32};

auto const millisecondsA = launchdarkly::sse::detail::calculate_backoff(
initial, attempts, max_exponent);

ASSERT_EQ(millisecondsA, max);

// Given a random function that always returns 0, this should be identical
// to calling calculate_backoff.
auto const millisecondsB = launchdarkly::sse::detail::delay(
initial, max, attempts, max_exponent, 0, [](auto ratio) { return 0; });

ASSERT_EQ(millisecondsB, max);
}

INSTANTIATE_TEST_SUITE_P(VariousBackoffAttempts,
BackoffOverflowUnitTest,
::testing::Values<std::uint64_t>(
1000,
10000,
100000,
std::numeric_limits<std::uint64_t>::max()));

0 comments on commit a3c1e58

Please sign in to comment.