From 5f1951513d3eaa56c30431de177d8ecfe4b1f2b4 Mon Sep 17 00:00:00 2001
From: Liyun Xiu <chishui2@gmail.com>
Date: Wed, 3 Apr 2024 23:31:26 +0800
Subject: [PATCH 1/2] Support request-params for bulk operation (#501)

Adding "request-params" support for bulk operation.
Signed-off-by: Liyun Xiu <chishui2@gmail.com>
---
 osbenchmark/worker_coordinator/runner.py | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/osbenchmark/worker_coordinator/runner.py b/osbenchmark/worker_coordinator/runner.py
index e31deca83..9e03bfb8f 100644
--- a/osbenchmark/worker_coordinator/runner.py
+++ b/osbenchmark/worker_coordinator/runner.py
@@ -475,6 +475,7 @@ async def __call__(self, opensearch, params):
         The following keys are optional:
 
         * ``pipeline``: If present, runs the the specified ingest pipeline for this bulk.
+        * ``request-params``: If present, they will be passed as parameters of bulk.
         * ``detailed-results``: If ``True``, the runner will analyze the response and add detailed meta-data. Defaults to ``False``. Note
         that this has a very significant impact on performance and will very
         likely cause a bottleneck in the benchmark worker_coordinator so please
@@ -486,12 +487,17 @@ async def __call__(self, opensearch, params):
          ``None`` and potentially falls back to the global timeout setting.
         """
         detailed_results = params.get("detailed-results", False)
-        api_kwargs = self._default_kw_params(params)
 
         bulk_params = {}
         if "pipeline" in params:
             bulk_params["pipeline"] = params["pipeline"]
 
+        if "request-params" in params:
+            bulk_params.update(params["request-params"])
+            params.pop( "request-params" )
+
+        api_kwargs = self._default_kw_params(params)
+
         with_action_metadata = mandatory(params, "action-metadata-present", self)
         bulk_size = mandatory(params, "bulk-size", self)
         unit = mandatory(params, "unit", self)

From bd79dc098147c25e34b70af77d4b1ef5bd2f0504 Mon Sep 17 00:00:00 2001
From: Ian Hoang <51065478+IanHoang@users.noreply.github.com>
Date: Fri, 5 Apr 2024 10:38:08 -0500
Subject: [PATCH 2/2] Allow users to specify percentiles for compare subcommand
 (#503) (#504)

Signed-off-by: Ian Hoang <hoangia@amazon.com>
Co-authored-by: Ian Hoang <hoangia@amazon.com>
---
 osbenchmark/benchmark.py         | 6 ++++++
 osbenchmark/results_publisher.py | 5 +++--
 2 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/osbenchmark/benchmark.py b/osbenchmark/benchmark.py
index f23e79f35..d07819e1a 100644
--- a/osbenchmark/benchmark.py
+++ b/osbenchmark/benchmark.py
@@ -197,6 +197,11 @@ def add_workload_source(subparser):
         "--contender",
         required=True,
         help=f"TestExecution ID of the contender (see {PROGRAM_NAME} list test_executions).")
+    compare_parser.add_argument(
+        "--percentiles",
+        help=f"A comma-separated list of percentiles to report latency and service time."
+             f"(default: {metrics.GlobalStatsCalculator.DEFAULT_LATENCY_PERCENTILES}).",
+        default=metrics.GlobalStatsCalculator.DEFAULT_LATENCY_PERCENTILES)
     compare_parser.add_argument(
         "--results-format",
         help="Define the output format for the command line results (default: markdown).",
@@ -834,6 +839,7 @@ def dispatch_sub_command(arg_parser, args, cfg):
     try:
         if sub_command == "compare":
             configure_results_publishing_params(args, cfg)
+            cfg.add(config.Scope.applicationOverride, "results_publishing", "percentiles", args.percentiles)
             results_publisher.compare(cfg, args.baseline, args.contender)
         elif sub_command == "list":
             cfg.add(config.Scope.applicationOverride, "system", "list.config.option", args.configuration)
diff --git a/osbenchmark/results_publisher.py b/osbenchmark/results_publisher.py
index 61999ae7c..ac9a631cf 100644
--- a/osbenchmark/results_publisher.py
+++ b/osbenchmark/results_publisher.py
@@ -337,6 +337,7 @@ def _line(self, k, task, v, unit, converter=lambda x: x, force=False):
 
 class ComparisonResultsPublisher:
     def __init__(self, config):
+        self.logger = logging.getLogger(__name__)
         self.results_file = config.opts("results_publishing", "output.path")
         self.results_format = config.opts("results_publishing", "format")
         self.numbers_align = config.opts("results_publishing", "numbers.align",
@@ -344,7 +345,7 @@ def __init__(self, config):
         self.cwd = config.opts("node", "benchmark.cwd")
         self.show_processing_time = convert.to_bool(config.opts("results_publishing", "output.processingtime",
                                                                 mandatory=False, default_value=False))
-        self.latency_percentiles = comma_separated_string_to_number_list(config.opts("workload", "latency.percentiles", mandatory=False))
+        self.percentiles = comma_separated_string_to_number_list(config.opts("results_publishing", "percentiles", mandatory=False))
         self.plain = False
 
     def publish(self, r1, r2):
@@ -442,7 +443,7 @@ def _publish_processing_time(self, baseline_stats, contender_stats, task):
 
     def _publish_percentiles(self, name, task, baseline_values, contender_values):
         lines = []
-        for percentile in metrics.percentiles_for_sample_size(sys.maxsize, percentiles_list=self.latency_percentiles):
+        for percentile in metrics.percentiles_for_sample_size(sys.maxsize, percentiles_list=self.percentiles):
             baseline_value = baseline_values.get(metrics.encode_float_key(percentile))
             contender_value = contender_values.get(metrics.encode_float_key(percentile))
             self._append_non_empty(lines, self._line("%sth percentile %s" % (percentile, name),