-
Notifications
You must be signed in to change notification settings - Fork 80
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
add relative standard deviation to aggregated test execution metrics #681
Changes from 1 commit
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,5 @@ | ||
import os | ||
import statistics | ||
from typing import Any, Dict, List, Union | ||
import uuid | ||
|
||
|
@@ -123,14 +124,17 @@ def build_aggregated_results(self): | |
op_metric = { | ||
"task": task, | ||
"operation": task, | ||
"throughput": aggregated_task_metrics["throughput"], | ||
"latency": aggregated_task_metrics["latency"], | ||
"service_time": aggregated_task_metrics["service_time"], | ||
"client_processing_time": aggregated_task_metrics["client_processing_time"], | ||
"processing_time": aggregated_task_metrics["processing_time"], | ||
"error_rate": aggregated_task_metrics["error_rate"], | ||
"duration": aggregated_task_metrics["duration"] | ||
} | ||
for metric in self.metrics: | ||
op_metric[metric] = aggregated_task_metrics[metric] | ||
if isinstance(aggregated_task_metrics[metric], dict): | ||
mean_values = [v['mean'] for v in task_metrics[metric]] | ||
rsd = self.calculate_rsd(mean_values) | ||
op_metric[metric]['mean_rsd'] = rsd | ||
else: | ||
rsd = self.calculate_rsd(task_metrics[metric]) | ||
op_metric[f"{metric}_rsd"] = rsd | ||
|
||
aggregated_results["op_metrics"].append(op_metric) | ||
|
||
# extract the necessary data from the first test execution, since the configurations should be identical for all test executions | ||
|
@@ -184,8 +188,8 @@ def calculate_weighted_average(self, task_metrics: Dict[str, List[Any]], iterati | |
weighted_metrics = {} | ||
|
||
for metric, values in task_metrics.items(): | ||
weighted_metrics[metric] = {} | ||
if isinstance(values[0], dict): | ||
weighted_metrics[metric] = {} | ||
for item_key in values[0].keys(): | ||
if item_key == 'unit': | ||
weighted_metrics[metric][item_key] = values[0][item_key] | ||
|
@@ -194,18 +198,28 @@ def calculate_weighted_average(self, task_metrics: Dict[str, List[Any]], iterati | |
if iterations > 1: | ||
weighted_sum = sum(value * iterations for value in item_values) | ||
total_iterations = iterations * len(values) | ||
weighted_metrics[metric][item_key] = weighted_sum / total_iterations | ||
weighted_avg = weighted_sum / total_iterations | ||
else: | ||
weighted_metrics[metric][item_key] = sum(item_values) / len(item_values) | ||
weighted_avg = sum(item_values) / len(item_values) | ||
weighted_metrics[metric][item_key] = weighted_avg | ||
else: | ||
if iterations > 1: | ||
weighted_sum = sum(value * iterations for value in values) | ||
total_iterations = iterations * len(values) | ||
weighted_metrics[metric] = weighted_sum / total_iterations | ||
weighted_avg = weighted_sum / total_iterations | ||
else: | ||
weighted_metrics[metric] = sum(values) / len(values) | ||
weighted_avg = sum(values) / len(values) | ||
weighted_metrics[metric] = weighted_avg | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why the special case for There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The special case is fixed, but I want to include support for custom iteration values in the future so I think that should stay for now. |
||
|
||
return weighted_metrics | ||
|
||
def calculate_rsd(self, values): | ||
if not values: | ||
return 0 | ||
OVI3D0 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
mean = statistics.mean(values) | ||
std_dev = statistics.stdev(values) if len(values) > 1 else 0 | ||
OVI3D0 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
return (std_dev / mean) * 100 if mean != 0 else 0 | ||
OVI3D0 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
def test_execution_compatibility_check(self) -> None: | ||
first_test_execution = self.test_store.find_by_test_execution_id(list(self.test_executions.keys())[0]) | ||
workload = first_test_execution.workload | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Some comments will be helpful here -- the first clause is for the standard metrics, the second for derived ones like error rate and duration, etc.
The documentation should indicate the rationale as to why RSD is computed for the mean, instead of the median.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'll also speak to nate about these changes👍