From dfa7b679ee12cecd3b3150cf8b7a7e20b0d5ee97 Mon Sep 17 00:00:00 2001 From: Anthony Molinaro Date: Fri, 3 Feb 2017 23:35:07 +0000 Subject: [PATCH] Vmstats should only be sent for full intervals It was possible to see spikes or dips on restart of a service running mondemand if you used the initial sample then the trigger to send vmstats hit quickly because you'd have maybe 5 seconds of startup stats. Instead skip the first time sending stats, and only send on the second time. This will leave a gap in graphs on restart but will mean you don't end up with spikes which dwarf the surrounding samples. --- ChangeLog | 10 ++++++++-- src/mondemand.app.src | 2 +- src/mondemand.erl | 2 ++ src/mondemand_vmstats.erl | 17 ++++++++++++++--- 4 files changed, 25 insertions(+), 6 deletions(-) diff --git a/ChangeLog b/ChangeLog index eaaf6d1..1f9552e 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,11 @@ -Verison 6.6.0 (molinaro) - * add scheduler utilization +Version 6.6.1 (molinaro) + * make sure that mondemand vmstats metrics are not emitted without a full + interval having gone by. This means a restart of a service can lead to + a gap in vmstats, but that's better than the spikes which can happen with + out waiting (IMHO). + +Version 6.6.0 (molinaro) + * add scheduler utilization to vmstats Version 6.5.0 (molinaro) * add a call to get vmstats out from the vmstats sampler diff --git a/src/mondemand.app.src b/src/mondemand.app.src index 3bc7267..1e8dd0a 100644 --- a/src/mondemand.app.src +++ b/src/mondemand.app.src @@ -1,7 +1,7 @@ { application, mondemand, [ { description, "Erlang Mondemand Bindings." }, - { vsn, "6.6.0" }, + { vsn, "6.6.1" }, { modules, [] }, { registered, [mondemand,mondemand_sup]}, { applications, [kernel,stdlib,syntax_tools,lwes,inets]}, diff --git a/src/mondemand.erl b/src/mondemand.erl index 4bccbf1..01ab528 100644 --- a/src/mondemand.erl +++ b/src/mondemand.erl @@ -293,6 +293,8 @@ send_annotation (Id, Time, Description, Text, Tags, Context) -> ), send_event (Event). +send_stats (_, _, []) -> + ok; send_stats (ProgId, Context, Stats) -> Event = mondemand_statsmsg:to_lwes ( diff --git a/src/mondemand_vmstats.erl b/src/mondemand_vmstats.erl index 89d0c19..16cf928 100644 --- a/src/mondemand_vmstats.erl +++ b/src/mondemand_vmstats.erl @@ -35,7 +35,7 @@ -record (state, {samples = queue:new(), max_samples = 300, % 5 minutes of sampled data legacy = false, % old otp workarounds - previous_mondemand, + previous_mondemand = undefined, timer, scheduler_former_flag,% keep track of previous scheduler % stats flag for shutdown @@ -146,7 +146,6 @@ init([]) -> % keep the initial sample as both the previous mondemand value and put % it into the queue { ok, #state { samples = InitialQueue, - previous_mondemand = InitialSample, timer = TRef, legacy = Legacy, collect_scheduler_stats = CollectSchedulerStats, @@ -167,7 +166,19 @@ handle_call (to_mondemand, _From, previous_mondemand = Prev }) -> % queue should always have something in it {value, LastSample} = queue:peek_r (Queue), - Stats = to_mondemand (Prev, LastSample), + Stats = + case Prev =:= undefined of + true -> + % we skip the first send of data to mondemand, as we have no way + % to really ensure the normal duration between sends to mondemand + % has elapsed, if it hasn't elapsed we might be emitting to mondemand + % shortly after restart and would see some spikiness in any counters + % (as they are turned into gauges with the assumption calls to + % to_mondemand/0 are happening on a regular interval). + []; + false -> + to_mondemand (Prev, LastSample) + end, {reply, Stats, State#state { previous_mondemand = LastSample } }; handle_call (_Request, _From, State = #state { }) -> {reply, ok, State }.