Skip to content

Commit

Permalink
Vmstats should only be sent for full intervals
Browse files Browse the repository at this point in the history
It was possible to see spikes or dips on restart of a service running
mondemand if you used the initial sample then the trigger to send
vmstats hit quickly because you'd have maybe 5 seconds of startup
stats.  Instead skip the first time sending stats, and only send on
the second time.  This will leave a gap in graphs on restart but will
mean you don't end up with spikes which dwarf the surrounding samples.
  • Loading branch information
djnym committed Feb 3, 2017
1 parent 8fe41ad commit dfa7b67
Show file tree
Hide file tree
Showing 4 changed files with 25 additions and 6 deletions.
10 changes: 8 additions & 2 deletions ChangeLog
Original file line number Diff line number Diff line change
@@ -1,5 +1,11 @@
Verison 6.6.0 (molinaro)
* add scheduler utilization
Version 6.6.1 (molinaro)
* make sure that mondemand vmstats metrics are not emitted without a full
interval having gone by. This means a restart of a service can lead to
a gap in vmstats, but that's better than the spikes which can happen with
out waiting (IMHO).

Version 6.6.0 (molinaro)
* add scheduler utilization to vmstats

Version 6.5.0 (molinaro)
* add a call to get vmstats out from the vmstats sampler
Expand Down
2 changes: 1 addition & 1 deletion src/mondemand.app.src
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{ application, mondemand,
[
{ description, "Erlang Mondemand Bindings." },
{ vsn, "6.6.0" },
{ vsn, "6.6.1" },
{ modules, [] },
{ registered, [mondemand,mondemand_sup]},
{ applications, [kernel,stdlib,syntax_tools,lwes,inets]},
Expand Down
2 changes: 2 additions & 0 deletions src/mondemand.erl
Original file line number Diff line number Diff line change
Expand Up @@ -293,6 +293,8 @@ send_annotation (Id, Time, Description, Text, Tags, Context) ->
),
send_event (Event).

send_stats (_, _, []) ->
ok;
send_stats (ProgId, Context, Stats) ->
Event =
mondemand_statsmsg:to_lwes (
Expand Down
17 changes: 14 additions & 3 deletions src/mondemand_vmstats.erl
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@
-record (state, {samples = queue:new(),
max_samples = 300, % 5 minutes of sampled data
legacy = false, % old otp workarounds
previous_mondemand,
previous_mondemand = undefined,
timer,
scheduler_former_flag,% keep track of previous scheduler
% stats flag for shutdown
Expand Down Expand Up @@ -146,7 +146,6 @@ init([]) ->
% keep the initial sample as both the previous mondemand value and put
% it into the queue
{ ok, #state { samples = InitialQueue,
previous_mondemand = InitialSample,
timer = TRef,
legacy = Legacy,
collect_scheduler_stats = CollectSchedulerStats,
Expand All @@ -167,7 +166,19 @@ handle_call (to_mondemand, _From,
previous_mondemand = Prev }) ->
% queue should always have something in it
{value, LastSample} = queue:peek_r (Queue),
Stats = to_mondemand (Prev, LastSample),
Stats =
case Prev =:= undefined of
true ->
% we skip the first send of data to mondemand, as we have no way
% to really ensure the normal duration between sends to mondemand
% has elapsed, if it hasn't elapsed we might be emitting to mondemand
% shortly after restart and would see some spikiness in any counters
% (as they are turned into gauges with the assumption calls to
% to_mondemand/0 are happening on a regular interval).
[];
false ->
to_mondemand (Prev, LastSample)
end,
{reply, Stats, State#state { previous_mondemand = LastSample } };
handle_call (_Request, _From, State = #state { }) ->
{reply, ok, State }.
Expand Down

0 comments on commit dfa7b67

Please sign in to comment.