Cloudant worker rampup (#47)

* Add worker rampup logic
cloudant · Jul 7, 2020 · 21c8716 · 21c8716
1 parent 346d9de
commit 21c8716
Show file tree

Hide file tree

Showing 4 changed files with 115 additions and 18 deletions.
diff --git a/src/basho_bench_duration.erl b/src/basho_bench_duration.erl
@@ -28,7 +28,8 @@
 -record(state, {
     ref,
     duration,
-    start
+    start,
+    rampup_interval
 }).
 
 -include("basho_bench.hrl").
@@ -50,7 +51,7 @@ start_link() ->
 init([]) ->
     run_hook(basho_bench_config:get(pre_hook, no_op)),
     Ref = erlang:monitor(process, whereis(basho_bench_run_sup)),
-    {ok, #state{ref=Ref}}.
+    {ok, maybe_add_rampup(#state{ref=Ref})}.
 
 
 handle_call(run, _From, State) ->
@@ -60,6 +61,9 @@ handle_call(run, _From, State) ->
         start=os:timestamp(),
         duration=DurationMins
     },
+    if NewState#state.rampup_interval =:= undefined -> ok; true ->
+        timer:send_interval(NewState#state.rampup_interval, rampup)
+    end,
     maybe_end({reply, ok, NewState});
 
 handle_call(remaining, _From, State) ->
@@ -90,13 +94,20 @@ handle_info({'DOWN', Ref, process, _Object, Info}, #state{ref=Ref}=State) ->
 handle_info(timeout, State) ->
     {stop, {shutdown, normal}, State};
 
+handle_info(rampup, State) ->
+    ?INFO("Triggering worker rampup", []),
+    add_worker(),
+    maybe_end({noreply, State});
+
 handle_info(Msg, State) ->
     ?WARN("basho_bench_duration handled unexpected info message: ~p", [Msg]),
-    {noreply, State}.
+    maybe_end({noreply, State}).
 
 
 terminate(Reason, #state{duration=DurationMins}) ->
     case Reason of
+        normal ->
+            ?CONSOLE("Test completed after ~p mins.\n", [DurationMins]);
         {shutdown, normal} ->
             ?CONSOLE("Test completed after ~p mins.\n", [DurationMins]);
         {shutdown, Reason} ->
@@ -165,3 +176,25 @@ worker_stopping(WorkerPid) ->
     %% WorkerPid is basho_bench_worker's id, not the pid of actual driver 
     gen_server:cast(?MODULE, {worker_stopping, WorkerPid}),
     ok.
+
+maybe_add_rampup(State) ->
+    case basho_bench_config:get(workers_rampup, undefined) of
+        undefined ->
+            State;
+        Interval when is_integer(Interval) ->
+            State#state{rampup_interval=Interval};
+        %% TODO: should we support per type intervals?
+        [{_Type, Interval} | _ ] when is_integer(Interval) ->
+            State#state{rampup_interval=Interval};
+        Else ->
+            throw({unexpected_rampup, Else})
+    end.
+
+add_worker() ->
+    case basho_bench_config:get(workers, undefined) of
+        undefined ->
+            basho_bench_worker_sup:add_worker();
+        [_|_] = Workers ->
+            WorkerTypes = [WT || {WT, _C} <- Workers],
+            basho_bench_worker_sup:add_workers(WorkerTypes)
+    end.
diff --git a/src/basho_bench_stats.erl b/src/basho_bench_stats.erl
@@ -270,9 +270,10 @@ process_stats(Now, #state{stats_writer=Module}=State) ->
     %% Reset Ops
     [folsom_metrics_counter:dec({ops, Op}, OpsAmount) || {Op, _, OpsAmount} <- OkOpsRes],
 
+    Concurrency = basho_bench_worker_sup:worker_count(),
     %% Write summary
     Module:process_summary(State#state.stats_writer_data,
-                           Elapsed, Window, Ops, Oks, Errors),
+                           Elapsed, Window, Concurrency, Ops, Oks, Errors),
 
     %% Dump current error counts to console
     case (State#state.errors_since_last_report) of
@@ -295,7 +296,8 @@ process_global_stats(#state{stats_writer=Module}=State) ->
         Errors = error_counter(Op),
         Units = folsom_metrics:get_metric_value({overall_units, Op}),
         Ops = folsom_metrics:get_metric_value({overall_ops, Op}),
-        Module:report_global_stats(Op, Stats, Errors, Units, Ops)
+        Concurrency = basho_bench_worker_sup:worker_count(),
+        Module:report_global_stats(Op, Stats, Errors, Units, Ops, Concurrency)
     end, State#state.ops).
 
 %%
@@ -307,10 +309,11 @@ report_latency(#state{stats_writer=Module}=State, Elapsed, Window, Op) ->
     Errors = error_counter(Op),
     Units = folsom_metrics:get_metric_value({units, Op}),
     Ops = folsom_metrics:get_metric_value({ops, Op}),
+    Concurrency = basho_bench_worker_sup:worker_count(),
 
     Module:report_latency({State#state.stats_writer,
                                              State#state.stats_writer_data},
-                                            Elapsed, Window, Op,
+                                            Elapsed, Window, Concurrency, Op,
                                             Stats, Errors, Units, Ops),
     {Ops, Units, Errors}.
 

diff --git a/src/basho_bench_stats_writer_csv.erl b/src/basho_bench_stats_writer_csv.erl
@@ -33,10 +33,10 @@
 
 -export([new/2,
          terminate/1,
-         process_summary/6,
+         process_summary/7,
          report_error/3,
-         report_global_stats/5,
-         report_latency/8]).
+         report_global_stats/6,
+         report_latency/9]).
 
 -include("basho_bench.hrl").
 
@@ -57,7 +57,7 @@ new(Ops, Measurements) ->
         filename:join([TestDir, "summary.csv"]),
         [raw, binary, write]
     ),
-    file:write(SummaryFile, <<"elapsed, window, total_operations, total_units, successful, failed\n">>),
+    file:write(SummaryFile, <<"elapsed, window, concurrency, total_operations, total_units, successful, failed\n">>),
 
     %% Setup errors file w/counters for each error.  Embedded commas likely
     %% in the error messages so quote the columns.
@@ -82,11 +82,12 @@ terminate({SummaryFile, ErrorsFile}) ->
     ok.
 
 process_summary({SummaryFile, _ErrorsFile},
-                Elapsed, Window, Ops, Oks, Errors) ->
+                Elapsed, Window, Concurrency, Ops, Oks, Errors) ->
     file:write(SummaryFile,
-               io_lib:format("~w, ~w, ~w, ~w, ~w, ~w\n",
+               io_lib:format("~w, ~w, ~w, ~w, ~w, ~w, ~w\n",
                              [Elapsed,
                               Window,
+                              Concurrency,
                               Ops + Errors,
                               Oks,
                               Ops,
@@ -98,7 +99,7 @@ report_error({_SummaryFile, ErrorsFile},
                io_lib:format("\"~w\",\"~w\"\n",
                              [Key, Count])).
 
-report_global_stats({Op,_}, Stats, Errors, Units, Ops) ->
+report_global_stats({Op,_}, Stats, Errors, Units, Ops, Concurrency) ->
     %% Build up JSON structure representing statistics collected in folsom
     P = proplists:get_value(percentile, Stats),
     JsonElements0 = lists:foldl(fun(K, Acc) ->
@@ -136,20 +137,24 @@ report_global_stats({Op,_}, Stats, Errors, Units, Ops) ->
     %% insert Ops counts
     JsonElements3 = [{'ops', Ops} | JsonElements2],
 
+    %% insert concurrency
+    JsonElements4 = [{concurrency, Concurrency} | JsonElements3],
+
     JsonMetrics0 = erlang:get(run_metrics),
-    JsonMetrics = lists:keyreplace(Op, 1, JsonMetrics0, {Op, {JsonElements3}}),
+    JsonMetrics = lists:keyreplace(Op, 1, JsonMetrics0, {Op, {JsonElements4}}),
     %?DEBUG("Generated Json:\n~w",[JsonElements]),
     erlang:put(run_metrics, JsonMetrics).
 
 report_latency({_SummaryFile, _ErrorsFile},
-               Elapsed, Window, Op,
+               Elapsed, Window, Concurrency, Op,
                Stats, Errors, Units, Ops) ->
     case proplists:get_value(n, Stats) > 0 of
         true ->
             P = proplists:get_value(percentile, Stats),
-            Line = io_lib:format("~w, ~w, ~w, ~w, ~w, ~.1f, ~w, ~w, ~w, ~w, ~w, ~w\n",
+            Line = io_lib:format("~w, ~w, ~w, ~w, ~w, ~w, ~.1f, ~w, ~w, ~w, ~w, ~w, ~w\n",
                                  [Elapsed,
                                   Window,
+                                  Concurrency,
                                   Units,
                                   Ops,
                                   proplists:get_value(min, Stats),
@@ -177,7 +182,7 @@ op_csv_file({Label, _Op}) ->
     TestDir = basho_bench:get_test_dir(),
     Fname = filename:join([TestDir, normalize_label(Label) ++ "_latencies.csv"]),
     {ok, F} = file:open(Fname, [raw, binary, write]),
-    ok = file:write(F, <<"elapsed, window, n, ops, min, mean, median, 95th, 99th, 99_9th, max, errors\n">>),
+    ok = file:write(F, <<"elapsed, window, concurrency, n, ops, min, mean, median, 95th, 99th, 99_9th, max, errors\n">>),
     F.
 
 measurement_csv_file({Label, _Op}) ->
@@ -199,7 +204,7 @@ stringify_stats(_RunStatsType=json, RunMetrics) ->
     [ jiffy:encode( {[{recordedMetrics, {RunMetrics}}]}, [pretty] ) ];
 stringify_stats(_RunStatsType=csv, RunMetrics) ->
     % Ordered output of fields
-    OrderedFields = [n, ops, mean, geometric_mean, harmonic_mean,
+    OrderedFields = [n, ops, concurrency, mean, geometric_mean, harmonic_mean,
         variance, standard_deviation, skewness, kurtosis,
         median, p75, p90, p95, p99, p999,
         min, max, basho_errors],

diff --git a/src/basho_bench_worker_sup.erl b/src/basho_bench_worker_sup.erl
@@ -25,8 +25,12 @@
 
 %% API
 -export([start_link/0,
+         add_worker/0,
+         add_workers/1,
+         add_worker_spec/1,
          workers/0,
          workers/1,
+         worker_count/0,
          remote_workers/1,
          stop_child/1,
          active_workers/0]).
@@ -58,6 +62,21 @@ stop_child(Id) ->
 active_workers() ->
     [X || X <- workers(), X =/= undefined].
 
+worker_count() ->
+    case whereis(?MODULE) of
+        undefined ->
+            case erlang:get(last_worker_count) of
+                undefined ->
+                    0;
+                WC ->
+                    WC
+            end;
+        _Pid ->
+            WC = length(active_workers()),
+            erlang:put(last_worker_count, WC),
+            WC
+    end.
+
 
 %% ===================================================================
 %% Supervisor callbacks
@@ -78,6 +97,43 @@ init([]) ->
 %% ===================================================================
 
 
+add_worker() ->
+    WorkerCount = length(active_workers()),
+    WorkerNum = WorkerCount + 1,
+    Id = list_to_atom(lists:concat(['basho_bench_rampup_worker_', WorkerNum])),
+    %% Use "single_worker" atom for original non-worker case
+    Spec = {
+        Id,
+        {basho_bench_worker, start_link, [Id, {single_worker, WorkerNum, WorkerNum}, []]},
+        transient, 5000, worker, [basho_bench_worker]
+    },
+    add_worker_spec(Spec).
+
+
+add_workers(WorkerTypes) ->
+    add_workers(WorkerTypes, []).
+
+
+add_workers([], Acc) ->
+    Acc;
+add_workers([WorkerType|Rest], Acc) when is_atom(WorkerType) ->
+    WorkerTypes = basho_bench_config:get(worker_types),
+    Conf0 = proplists:get_value(WorkerType, WorkerTypes, []),
+    Conf = [{concurrent, 1} | proplists:delete(concurrent, Conf0)],
+    WorkerCount = length(active_workers()),
+    WorkerNum = WorkerCount + 1,
+    Id = list_to_atom(lists:concat(['basho_bench_rampup_worker_', WorkerType, '_', WorkerNum])),
+    Spec = {
+        Id,
+        {basho_bench_worker, start_link, [Id, {WorkerType, WorkerNum, WorkerNum}, Conf]},
+        transient, 5000, worker, [basho_bench_worker]},
+    add_workers(Rest, [add_worker_spec(Spec)|Acc]).
+
+
+add_worker_spec(Spec) ->
+    {ok, _Pid} = supervisor:start_child(?MODULE, Spec).
+
+
 worker_specs([]) ->
     worker_specs_single(basho_bench_config:get(concurrent), []);
 worker_specs(Workers) ->