Skip to content

Commit

Permalink
Apply expected loss to trends continuous
Browse files Browse the repository at this point in the history
  • Loading branch information
danielbachhuber committed Dec 19, 2024
1 parent ecaed9a commit 17ea023
Show file tree
Hide file tree
Showing 2 changed files with 140 additions and 5 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -474,3 +474,60 @@ def run_test(stats_version, calculate_probabilities, are_results_significant, ca
self.assertAlmostEqual(intervals["test"][1], 0.0025, delta=0.0025) # Upper bound ratio

self.run_test_for_both_implementations(run_test)

def test_expected_loss_minimal_difference(self):
"""Test expected loss when variants have very similar performance"""

def run_test(stats_version, calculate_probabilities, are_results_significant, calculate_credible_intervals):
control_absolute_exposure = 600
control = create_variant("control", mean=100.0, exposure=1, absolute_exposure=control_absolute_exposure)
test_absolute_exposure = 600
test = create_variant(
"test",
mean=120.0, # Slightly higher mean
exposure=test_absolute_exposure / control_absolute_exposure,
absolute_exposure=test_absolute_exposure,
)

probabilities = calculate_probabilities(control, [test])
significance, expected_loss = are_results_significant(control, [test], probabilities)

if stats_version == 2:
self.assertEqual(significance, ExperimentSignificanceCode.SIGNIFICANT)
# Expected loss should be relatively small
self.assertLess(expected_loss, 3.0) # Less than $3 expected loss
self.assertGreater(expected_loss, 0) # But still some loss
else:
# Original implementation behavior (returns p_value in expected_loss)
self.assertEqual(significance, ExperimentSignificanceCode.HIGH_P_VALUE)
self.assertAlmostEqual(expected_loss, 0.2, delta=0.1)

self.run_test_for_both_implementations(run_test)

def test_expected_loss_test_variant_clear_winner(self):
"""Test expected loss when one variant is clearly better"""

def run_test(stats_version, calculate_probabilities, are_results_significant, calculate_credible_intervals):
control_absolute_exposure = 10000
control = create_variant("control", mean=100.0, exposure=1, absolute_exposure=control_absolute_exposure)
test_absolute_exposure = 10000
test = create_variant(
"test",
mean=200.0, # Much higher mean
exposure=test_absolute_exposure / control_absolute_exposure,
absolute_exposure=test_absolute_exposure,
)

probabilities = calculate_probabilities(control, [test])
significance, expected_loss = are_results_significant(control, [test], probabilities)

if stats_version == 2:
self.assertEqual(significance, ExperimentSignificanceCode.SIGNIFICANT)
# Expected loss should be very close to zero since test is clearly better
self.assertLess(expected_loss, 0.1) # Essentially zero loss
else:
# Original implementation behavior
self.assertEqual(significance, ExperimentSignificanceCode.SIGNIFICANT)
self.assertLess(expected_loss, 0.001)

self.run_test_for_both_implementations(run_test)
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
from rest_framework.exceptions import ValidationError
from sentry_sdk import capture_exception
from posthog.hogql_queries.experiments import FF_DISTRIBUTION_THRESHOLD, MIN_PROBABILITY_FOR_SIGNIFICANCE
from posthog.hogql_queries.experiments import (
FF_DISTRIBUTION_THRESHOLD,
MIN_PROBABILITY_FOR_SIGNIFICANCE,
EXPECTED_LOSS_SIGNIFICANCE_LEVEL,
)
from posthog.schema import ExperimentSignificanceCode, ExperimentVariantTrendsBaseStats
from scipy.stats import t
import numpy as np
Expand Down Expand Up @@ -123,7 +127,7 @@ def are_results_significant_v2_continuous(
--------
tuple[ExperimentSignificanceCode, float]
- ExperimentSignificanceCode indicating the significance status
- Probability value
- Expected loss value for significant results, 1.0 for non-significant results
"""
# Check exposure thresholds
for variant in test_variants:
Expand All @@ -137,10 +141,22 @@ def are_results_significant_v2_continuous(
max_probability = max(probabilities)

# Check if any variant has a high enough probability of being best
if max_probability < MIN_PROBABILITY_FOR_SIGNIFICANCE:
return ExperimentSignificanceCode.LOW_WIN_PROBABILITY, 1.0
if max_probability >= MIN_PROBABILITY_FOR_SIGNIFICANCE:
# Find best performing variant
all_variants = [control_variant, *test_variants]
means = [v.count for v in all_variants] # count field stores mean value
best_idx = np.argmax(means)
best_variant = all_variants[best_idx]
other_variants = all_variants[:best_idx] + all_variants[best_idx + 1 :]

return ExperimentSignificanceCode.SIGNIFICANT, 0.0
expected_loss = calculate_expected_loss_v2_continuous(best_variant, other_variants)

if expected_loss >= EXPECTED_LOSS_SIGNIFICANCE_LEVEL:
return ExperimentSignificanceCode.HIGH_LOSS, expected_loss

return ExperimentSignificanceCode.SIGNIFICANT, expected_loss

return ExperimentSignificanceCode.LOW_WIN_PROBABILITY, 1.0


def calculate_credible_intervals_v2_continuous(variants, lower_bound=0.025, upper_bound=0.975):
Expand Down Expand Up @@ -193,3 +209,65 @@ def calculate_credible_intervals_v2_continuous(variants, lower_bound=0.025, uppe
return {}

return intervals


def calculate_expected_loss_v2_continuous(
target_variant: ExperimentVariantTrendsBaseStats, variants: list[ExperimentVariantTrendsBaseStats]
) -> float:
"""
Calculates expected loss in mean value using Normal-Inverse-Gamma conjugate prior.
This implementation uses a Bayesian approach with Normal-Inverse-Gamma model
to estimate the expected loss when choosing the target variant over others.
The data is log-transformed to handle typical revenue/continuous metric distributions.
Parameters:
-----------
target_variant : ExperimentVariantTrendsBaseStats
The variant being evaluated for loss
variants : list[ExperimentVariantTrendsBaseStats]
List of other variants to compare against
Returns:
--------
float
Expected loss in mean value if choosing the target variant
"""
# Calculate posterior parameters for target variant
log_target_mean = np.log(target_variant.count + EPSILON)

# Update parameters for target variant
kappa_n_target = KAPPA_0 + target_variant.absolute_exposure
mu_n_target = (KAPPA_0 * MU_0 + target_variant.absolute_exposure * log_target_mean) / kappa_n_target
alpha_n_target = ALPHA_0 + target_variant.absolute_exposure / 2
beta_n_target = BETA_0 + 0.5 * target_variant.absolute_exposure * LOG_VARIANCE

# Draw samples from target variant's posterior
target_posterior = t(
df=2 * alpha_n_target, loc=mu_n_target, scale=np.sqrt(beta_n_target / (kappa_n_target * alpha_n_target))
)
target_samples = target_posterior.rvs(SAMPLE_SIZE)

# Draw samples from each comparison variant's posterior
variant_samples = []
for variant in variants:
log_variant_mean = np.log(variant.count + EPSILON)

kappa_n = KAPPA_0 + variant.absolute_exposure
mu_n = (KAPPA_0 * MU_0 + variant.absolute_exposure * log_variant_mean) / kappa_n
alpha_n = ALPHA_0 + variant.absolute_exposure / 2
beta_n = BETA_0 + 0.5 * variant.absolute_exposure * LOG_VARIANCE

variant_posterior = t(df=2 * alpha_n, loc=mu_n, scale=np.sqrt(beta_n / (kappa_n * alpha_n)))
variant_samples.append(variant_posterior.rvs(SAMPLE_SIZE))

# Transform samples back from log space
target_samples = np.exp(target_samples) - EPSILON
variant_samples = [np.exp(samples) - EPSILON for samples in variant_samples]

# Calculate loss
variant_max = np.maximum.reduce(variant_samples)
losses = np.maximum(0, variant_max - target_samples)
expected_loss = float(np.mean(losses))

return expected_loss

0 comments on commit 17ea023

Please sign in to comment.