From 5f1951513d3eaa56c30431de177d8ecfe4b1f2b4 Mon Sep 17 00:00:00 2001 From: Liyun Xiu Date: Wed, 3 Apr 2024 23:31:26 +0800 Subject: [PATCH 1/2] Support request-params for bulk operation (#501) Adding "request-params" support for bulk operation. Signed-off-by: Liyun Xiu --- osbenchmark/worker_coordinator/runner.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/osbenchmark/worker_coordinator/runner.py b/osbenchmark/worker_coordinator/runner.py index e31deca83..9e03bfb8f 100644 --- a/osbenchmark/worker_coordinator/runner.py +++ b/osbenchmark/worker_coordinator/runner.py @@ -475,6 +475,7 @@ async def __call__(self, opensearch, params): The following keys are optional: * ``pipeline``: If present, runs the the specified ingest pipeline for this bulk. + * ``request-params``: If present, they will be passed as parameters of bulk. * ``detailed-results``: If ``True``, the runner will analyze the response and add detailed meta-data. Defaults to ``False``. Note that this has a very significant impact on performance and will very likely cause a bottleneck in the benchmark worker_coordinator so please @@ -486,12 +487,17 @@ async def __call__(self, opensearch, params): ``None`` and potentially falls back to the global timeout setting. """ detailed_results = params.get("detailed-results", False) - api_kwargs = self._default_kw_params(params) bulk_params = {} if "pipeline" in params: bulk_params["pipeline"] = params["pipeline"] + if "request-params" in params: + bulk_params.update(params["request-params"]) + params.pop( "request-params" ) + + api_kwargs = self._default_kw_params(params) + with_action_metadata = mandatory(params, "action-metadata-present", self) bulk_size = mandatory(params, "bulk-size", self) unit = mandatory(params, "unit", self) From bd79dc098147c25e34b70af77d4b1ef5bd2f0504 Mon Sep 17 00:00:00 2001 From: Ian Hoang <51065478+IanHoang@users.noreply.github.com> Date: Fri, 5 Apr 2024 10:38:08 -0500 Subject: [PATCH 2/2] Allow users to specify percentiles for compare subcommand (#503) (#504) Signed-off-by: Ian Hoang Co-authored-by: Ian Hoang --- osbenchmark/benchmark.py | 6 ++++++ osbenchmark/results_publisher.py | 5 +++-- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/osbenchmark/benchmark.py b/osbenchmark/benchmark.py index f23e79f35..d07819e1a 100644 --- a/osbenchmark/benchmark.py +++ b/osbenchmark/benchmark.py @@ -197,6 +197,11 @@ def add_workload_source(subparser): "--contender", required=True, help=f"TestExecution ID of the contender (see {PROGRAM_NAME} list test_executions).") + compare_parser.add_argument( + "--percentiles", + help=f"A comma-separated list of percentiles to report latency and service time." + f"(default: {metrics.GlobalStatsCalculator.DEFAULT_LATENCY_PERCENTILES}).", + default=metrics.GlobalStatsCalculator.DEFAULT_LATENCY_PERCENTILES) compare_parser.add_argument( "--results-format", help="Define the output format for the command line results (default: markdown).", @@ -834,6 +839,7 @@ def dispatch_sub_command(arg_parser, args, cfg): try: if sub_command == "compare": configure_results_publishing_params(args, cfg) + cfg.add(config.Scope.applicationOverride, "results_publishing", "percentiles", args.percentiles) results_publisher.compare(cfg, args.baseline, args.contender) elif sub_command == "list": cfg.add(config.Scope.applicationOverride, "system", "list.config.option", args.configuration) diff --git a/osbenchmark/results_publisher.py b/osbenchmark/results_publisher.py index 61999ae7c..ac9a631cf 100644 --- a/osbenchmark/results_publisher.py +++ b/osbenchmark/results_publisher.py @@ -337,6 +337,7 @@ def _line(self, k, task, v, unit, converter=lambda x: x, force=False): class ComparisonResultsPublisher: def __init__(self, config): + self.logger = logging.getLogger(__name__) self.results_file = config.opts("results_publishing", "output.path") self.results_format = config.opts("results_publishing", "format") self.numbers_align = config.opts("results_publishing", "numbers.align", @@ -344,7 +345,7 @@ def __init__(self, config): self.cwd = config.opts("node", "benchmark.cwd") self.show_processing_time = convert.to_bool(config.opts("results_publishing", "output.processingtime", mandatory=False, default_value=False)) - self.latency_percentiles = comma_separated_string_to_number_list(config.opts("workload", "latency.percentiles", mandatory=False)) + self.percentiles = comma_separated_string_to_number_list(config.opts("results_publishing", "percentiles", mandatory=False)) self.plain = False def publish(self, r1, r2): @@ -442,7 +443,7 @@ def _publish_processing_time(self, baseline_stats, contender_stats, task): def _publish_percentiles(self, name, task, baseline_values, contender_values): lines = [] - for percentile in metrics.percentiles_for_sample_size(sys.maxsize, percentiles_list=self.latency_percentiles): + for percentile in metrics.percentiles_for_sample_size(sys.maxsize, percentiles_list=self.percentiles): baseline_value = baseline_values.get(metrics.encode_float_key(percentile)) contender_value = contender_values.get(metrics.encode_float_key(percentile)) self._append_non_empty(lines, self._line("%sth percentile %s" % (percentile, name),