Skip to content

Commit

Permalink
Adds command line argument to specify throughput percentiles to displ…
Browse files Browse the repository at this point in the history
…ay (#449)

Signed-off-by: Peter Alfonsi <[email protected]>
Co-authored-by: Peter Alfonsi <[email protected]>
  • Loading branch information
peteralfonsi and Peter Alfonsi authored Jan 30, 2024
1 parent 5538546 commit c74c993
Show file tree
Hide file tree
Showing 4 changed files with 86 additions and 30 deletions.
7 changes: 7 additions & 0 deletions osbenchmark/benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -560,6 +560,12 @@ def add_workload_source(subparser):
f"(default: {metrics.GlobalStatsCalculator.DEFAULT_LATENCY_PERCENTILES}).",
default=metrics.GlobalStatsCalculator.DEFAULT_LATENCY_PERCENTILES
)
test_execution_parser.add_argument(
"--throughput-percentiles",
help=f"A comma-separated list of percentiles to report for throughput, in addition to mean/median/max/min "
f"(default: {metrics.GlobalStatsCalculator.DEFAULT_THROUGHPUT_PERCENTILES}).",
default=metrics.GlobalStatsCalculator.DEFAULT_THROUGHPUT_PERCENTILES
)

###############################################################################
#
Expand Down Expand Up @@ -869,6 +875,7 @@ def dispatch_sub_command(arg_parser, args, cfg):
opts.csv_to_list(args.load_worker_coordinator_hosts))
cfg.add(config.Scope.applicationOverride, "workload", "test.mode.enabled", args.test_mode)
cfg.add(config.Scope.applicationOverride, "workload", "latency.percentiles", args.latency_percentiles)
cfg.add(config.Scope.applicationOverride, "workload", "throughput.percentiles", args.throughput_percentiles)
configure_workload_params(arg_parser, args, cfg)
configure_connection_params(arg_parser, args, cfg)
configure_telemetry_params(args, cfg)
Expand Down
75 changes: 57 additions & 18 deletions osbenchmark/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -336,7 +336,8 @@ def calculate_results(store, test_execution):
store,
test_execution.workload,
test_execution.test_procedure,
latency_percentiles=test_execution.latency_percentiles
latency_percentiles=test_execution.latency_percentiles,
throughput_percentiles=test_execution.throughput_percentiles
)
return calc()

Expand Down Expand Up @@ -1297,13 +1298,19 @@ def create_test_execution(cfg, workload, test_procedure, workload_revision=None)
plugin_params = cfg.opts("builder", "plugin.params")
benchmark_version = version.version()
benchmark_revision = version.revision()
latency_percentiles = cfg.opts("workload", "latency.percentiles", mandatory=False)
latency_percentiles = cfg.opts("workload", "latency.percentiles", mandatory=False,
default_value=GlobalStatsCalculator.DEFAULT_LATENCY_PERCENTILES)
throughput_percentiles = cfg.opts("workload", "throughput.percentiles", mandatory=False,
default_value=GlobalStatsCalculator.DEFAULT_THROUGHPUT_PERCENTILES)
# In tests, we don't get the default command-line arg value for percentiles,
# so supply them as defaults here as well

return TestExecution(benchmark_version, benchmark_revision,
environment, test_execution_id, test_execution_timestamp,
pipeline, user_tags, workload,
workload_params, test_procedure, provision_config_instance, provision_config_instance_params,
plugin_params, workload_revision, latency_percentiles=latency_percentiles)
plugin_params, workload_revision, latency_percentiles=latency_percentiles,
throughput_percentiles=throughput_percentiles)


class TestExecution:
Expand All @@ -1313,7 +1320,7 @@ def __init__(self, benchmark_version, benchmark_revision, environment_name,
provision_config_instance_params, plugin_params,
workload_revision=None, provision_config_revision=None,
distribution_version=None, distribution_flavor=None,
revision=None, results=None, meta_data=None, latency_percentiles=None):
revision=None, results=None, meta_data=None, latency_percentiles=None, throughput_percentiles=None):
if results is None:
results = {}
# this happens when the test execution is created initially
Expand All @@ -1326,6 +1333,8 @@ def __init__(self, benchmark_version, benchmark_revision, environment_name,
if latency_percentiles:
# split comma-separated string into list of floats
latency_percentiles = [float(value) for value in latency_percentiles.split(",")]
if throughput_percentiles:
throughput_percentiles = [float(value) for value in throughput_percentiles.split(",")]
self.benchmark_version = benchmark_version
self.benchmark_revision = benchmark_revision
self.environment_name = environment_name
Expand All @@ -1347,6 +1356,7 @@ def __init__(self, benchmark_version, benchmark_revision, environment_name,
self.results = results
self.meta_data = meta_data
self.latency_percentiles = latency_percentiles
self.throughput_percentiles = throughput_percentiles


@property
Expand Down Expand Up @@ -1684,46 +1694,56 @@ def filter_percentiles_by_sample_size(sample_size, percentiles):
raise AssertionError("Percentiles require at least one sample")

filtered_percentiles = []
# Treat the 1 and 2-10 case separately, to return p50 and p100 if present
# Treat the cases below 10 separately, to return p25, 50, 75, 100 if present
if sample_size == 1:
filtered_percentiles = [100]
elif sample_size < 10:
elif sample_size < 4:
for p in [50, 100]:
if p in percentiles:
filtered_percentiles.append(p)
elif sample_size < 10:
for p in [25, 50, 75, 100]:
if p in percentiles:
filtered_percentiles.append(p)
else:
effective_sample_size = 10 ** (int(math.log10(sample_size))) # round down to nearest power of ten
delta = 0.000001 # If (p / 100) * effective_sample_size is within this value of a whole number,
# assume the discrepancy is due to floating point and allow it
for p in percentiles:
fraction = p / 100

# check if fraction * effective_sample_size is close enough to a whole number
if abs((effective_sample_size * fraction) - round(effective_sample_size*fraction)) < delta:
if abs((effective_sample_size * fraction) - round(effective_sample_size*fraction)) < delta or p in [25, 75]:
filtered_percentiles.append(p)
# if no percentiles are suitable, just return 100
if len(filtered_percentiles) == 0:
return [100]
return filtered_percentiles

def percentiles_for_sample_size(sample_size, latency_percentiles=None):
def percentiles_for_sample_size(sample_size, percentiles_list=None):
# If latency_percentiles is present, as a list, display those values instead (assuming there are enough samples)
percentiles = GlobalStatsCalculator.DEFAULT_LATENCY_PERCENTILES_LIST
if latency_percentiles:
percentiles = latency_percentiles # Defaults get overridden if a value is provided
percentiles = []
if percentiles_list:
percentiles = percentiles_list # Defaults get overridden if a value is provided
percentiles.sort()
return filter_percentiles_by_sample_size(sample_size, percentiles)

class GlobalStatsCalculator:
DEFAULT_LATENCY_PERCENTILES = "50,90,99,99.9,99.99,100"
DEFAULT_LATENCY_PERCENTILES_LIST = [float(value) for value in DEFAULT_LATENCY_PERCENTILES.split(",")]

def __init__(self, store, workload, test_procedure, latency_percentiles=None):
DEFAULT_THROUGHPUT_PERCENTILES = ""
DEFAULT_THROUGHPUT_PERCENTILES_LIST = []

OTHER_PERCENTILES = [50,90,99,99.9,99.99,100]
# Use these percentiles when the single_latency fn is called for something other than latency

def __init__(self, store, workload, test_procedure, latency_percentiles=None, throughput_percentiles=None):
self.store = store
self.logger = logging.getLogger(__name__)
self.workload = workload
self.test_procedure = test_procedure
self.latency_percentiles = latency_percentiles
self.throughput_percentiles = throughput_percentiles

def __call__(self):
result = GlobalStats()
Expand All @@ -1739,7 +1759,7 @@ def __call__(self):
result.add_op_metrics(
t,
task.operation.name,
self.summary_stats("throughput", t, op_type),
self.summary_stats("throughput", t, op_type, percentiles_list=self.throughput_percentiles),
self.single_latency(t, op_type),
self.single_latency(t, op_type, metric_name="service_time"),
self.single_latency(t, op_type, metric_name="processing_time"),
Expand Down Expand Up @@ -1817,28 +1837,44 @@ def sum(self, metric_name):
def one(self, metric_name):
return self.store.get_one(metric_name)

def summary_stats(self, metric_name, task_name, operation_type):
def summary_stats(self, metric_name, task_name, operation_type, percentiles_list=None):
mean = self.store.get_mean(metric_name, task=task_name, operation_type=operation_type, sample_type=SampleType.Normal)
median = self.store.get_median(metric_name, task=task_name, operation_type=operation_type, sample_type=SampleType.Normal)
unit = self.store.get_unit(metric_name, task=task_name, operation_type=operation_type)
stats = self.store.get_stats(metric_name, task=task_name, operation_type=operation_type, sample_type=SampleType.Normal)

result = {}
if mean and median and stats:
return {
result = {
"min": stats["min"],
"mean": mean,
"median": median,
"max": stats["max"],
"unit": unit
}
else:
return {
result = {
"min": None,
"mean": None,
"median": None,
"max": None,
"unit": unit
}

if percentiles_list: # modified from single_latency()
sample_size = stats["count"]
percentiles = self.store.get_percentiles(metric_name,
task=task_name,
operation_type=operation_type,
sample_type=SampleType.Normal,
percentiles=percentiles_for_sample_size(
sample_size,
percentiles_list=percentiles_list))
for k, v in percentiles.items():
# safely encode so we don't have any dots in field names
result[encode_float_key(k)] = v
return result

def shard_stats(self, metric_name):
values = self.store.get_raw(metric_name, mapper=lambda doc: doc["per-shard"])
unit = self.store.get_unit(metric_name)
Expand Down Expand Up @@ -1896,6 +1932,9 @@ def single_latency(self, task, operation_type, metric_name="latency"):
sample_type = SampleType.Normal
stats = self.store.get_stats(metric_name, task=task, operation_type=operation_type, sample_type=sample_type)
sample_size = stats["count"] if stats else 0
percentiles_list = self.OTHER_PERCENTILES
if metric_name == "latency":
percentiles_list = self.latency_percentiles
if sample_size > 0:
# The custom latency percentiles have to be supplied here as the workload runs,
# or else they aren't present when results are published
Expand All @@ -1905,7 +1944,7 @@ def single_latency(self, task, operation_type, metric_name="latency"):
sample_type=sample_type,
percentiles=percentiles_for_sample_size(
sample_size,
latency_percentiles=self.latency_percentiles
percentiles_list=percentiles_list
))
mean = self.store.get_mean(metric_name,
task=task,
Expand Down
14 changes: 10 additions & 4 deletions osbenchmark/results_publisher.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,10 @@ def __init__(self, results, config):
self.show_processing_time = convert.to_bool(config.opts("results_publishing", "output.processingtime",
mandatory=False, default_value=False))
self.cwd = config.opts("node", "benchmark.cwd")
self.latency_percentiles = comma_separated_string_to_number_list(config.opts("workload", "latency.percentiles", mandatory=False))
self.display_percentiles = {
"throughput":comma_separated_string_to_number_list(config.opts("workload", "throughput.percentiles", mandatory=False)),
"latency": comma_separated_string_to_number_list(config.opts("workload", "latency.percentiles", mandatory=False))
}

def publish(self):
print_header(FINAL_SCORE)
Expand Down Expand Up @@ -184,7 +187,8 @@ def _publish_throughput(self, values, task):
self._line("Min Throughput", task, throughput["min"], unit, lambda v: "%.2f" % v),
self._line("Mean Throughput", task, throughput["mean"], unit, lambda v: "%.2f" % v),
self._line("Median Throughput", task, throughput["median"], unit, lambda v: "%.2f" % v),
self._line("Max Throughput", task, throughput["max"], unit, lambda v: "%.2f" % v)
self._line("Max Throughput", task, throughput["max"], unit, lambda v: "%.2f" % v),
*self._publish_percentiles("throughput", task, throughput)
)

def _publish_latency(self, values, task):
Expand All @@ -198,8 +202,10 @@ def _publish_processing_time(self, values, task):

def _publish_percentiles(self, name, task, value):
lines = []
percentiles = self.display_percentiles.get(name, metrics.GlobalStatsCalculator.OTHER_PERCENTILES)

if value:
for percentile in metrics.percentiles_for_sample_size(sys.maxsize, latency_percentiles=self.latency_percentiles):
for percentile in metrics.percentiles_for_sample_size(sys.maxsize, percentiles_list=percentiles):
percentile_value = value.get(metrics.encode_float_key(percentile))
a_line = self._line("%sth percentile %s" % (percentile, name), task, percentile_value, "ms",
force=self.publish_all_percentile_values)
Expand Down Expand Up @@ -436,7 +442,7 @@ def _publish_processing_time(self, baseline_stats, contender_stats, task):

def _publish_percentiles(self, name, task, baseline_values, contender_values):
lines = []
for percentile in metrics.percentiles_for_sample_size(sys.maxsize, latency_percentiles=self.latency_percentiles):
for percentile in metrics.percentiles_for_sample_size(sys.maxsize, percentiles_list=self.latency_percentiles):
baseline_value = baseline_values.get(metrics.encode_float_key(percentile))
contender_value = contender_values.get(metrics.encode_float_key(percentile))
self._append_non_empty(lines, self._line("%sth percentile %s" % (percentile, name),
Expand Down
20 changes: 12 additions & 8 deletions tests/metrics_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -1797,9 +1797,11 @@ def test_filter_percentiles_by_sample_size(self):
4,
10,
10.01,
25,
45,
46.001,
50,
75,
80.1,
90,
98.9,
Expand All @@ -1813,14 +1815,16 @@ def test_filter_percentiles_by_sample_size(self):
100]
sample_size_to_result_map = {
1: [100],
5: [50, 100],
10: [0, 10, 50, 90, 100],
99: [0, 10, 50, 90, 100],
100: [0, 4, 10, 45, 50, 90, 99, 100],
1000: [0, 0.1, 4, 10, 45, 50, 80.1, 90, 98.9, 99, 99.9, 100],
10000: [0, 0.01, 0.1, 4, 10, 10.01, 45, 50, 80.1, 90, 98.9, 98.91, 99, 99.9, 99.99, 100],
100000: [0, 0.001, 0.01, 0.1, 4, 10, 10.01, 45, 46.001, 50, 80.1, 90, 98.9, 98.91, 98.999, 99, 99.9, 99.99, 99.999, 100],
1000000: [0, 0.0001, 0.001, 0.01, 0.1, 4, 10, 10.01, 45, 46.001, 50,
2: [50, 100],
4: [25, 50, 75, 100],
10: [0, 10, 25, 50, 75, 90, 100],
99: [0, 10, 25, 50, 75, 90, 100],
100: [0, 4, 10, 25, 45, 50, 75, 90, 99, 100],
1000: [0, 0.1, 4, 10, 25, 45, 50, 75, 80.1, 90, 98.9, 99, 99.9, 100],
10000: [0, 0.01, 0.1, 4, 10, 10.01, 25, 45, 50, 75, 80.1, 90, 98.9, 98.91, 99, 99.9, 99.99, 100],
100000: [0, 0.001, 0.01, 0.1, 4, 10, 10.01, 25, 45, 46.001, 50, 75,
80.1, 90, 98.9, 98.91, 98.999, 99, 99.9, 99.99, 99.999, 100],
1000000: [0, 0.0001, 0.001, 0.01, 0.1, 4, 10, 10.01, 25, 45, 46.001, 50, 75,
80.1, 90, 98.9, 98.91, 98.999, 99, 99.9, 99.99, 99.999, 99.9999, 100]
} # 100,000 corresponds to 0.001% which is the order of magnitude we round to,
# so at higher orders (>=1M samples) all values are permitted
Expand Down

0 comments on commit c74c993

Please sign in to comment.