Skip to content

Commit

Permalink
Also report free disk space in metric description
Browse files Browse the repository at this point in the history
Now that we take free space into account, adding it to the message make
sense.
  • Loading branch information
smortex committed Jan 22, 2024
1 parent eb6c0dd commit b002378
Show file tree
Hide file tree
Showing 2 changed files with 75 additions and 30 deletions.
13 changes: 11 additions & 2 deletions lib/riemann/tools/health.rb
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@ class Health
include Riemann::Tools
include Riemann::Tools::Utils

SI_UNITS = '_kMGTPEZYRQ'

opt :cpu_warning, 'CPU warning threshold (fraction of total jiffies)', default: 0.9
opt :cpu_critical, 'CPU critical threshold (fraction of total jiffies)', default: 0.95
opt :disk_warning, 'Disk warning threshold (fraction of space used)', default: 0.9
Expand Down Expand Up @@ -404,7 +406,7 @@ def disk
elsif x > @limits[:disk][:warning] && available < @limits[:disk][:warning_leniency_kb]
alert "disk #{f[5]}", :warning, x, "#{f[4]} used"
else
alert "disk #{f[5]}", :ok, x, "#{f[4]} used"
alert "disk #{f[5]}", :ok, x, "#{f[4]} used, #{number_to_human_size(available * 1024, :floor)} free"
end
end
end
Expand Down Expand Up @@ -484,10 +486,17 @@ def human_size_to_number(value)
when /^\d+R$/i then value.to_i * (1024**9)
when /^\d+Q$/i then value.to_i * (1024**10)
else
raise %(Malformed size "#{value}", syntax is [0-9]+[kMGTPEZYRQ]?)
raise %(Malformed size "#{value}", syntax is [0-9]+[#{SI_UNITS[1..]}]?)
end
end

def number_to_human_size(value, rounding = :round)
return value.to_s if value < 1024

r = Math.log(value, 1024).floor
format('%<size>.1f%<unit>ciB', size: (value.to_f / (1024**r)).send(rounding, 1), unit: SI_UNITS[r])
end

def tick
invalidate_cache

Expand Down
92 changes: 64 additions & 28 deletions spec/riemann/tools/health_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,42 @@
end
end

describe('#number_to_human_size') do
subject { described_class.new.number_to_human_size(input, rounding) }

{
0 => %w[0 0 0],
1024 => ['1.0kiB', '1.0kiB', '1.0kiB'],
2047 => ['1.9kiB', '2.0kiB', '2.0kiB'],
2048 => ['2.0kiB', '2.0kiB', '2.0kiB'],
2049 => ['2.0kiB', '2.0kiB', '2.1kiB'],
44_040_192 => ['42.0MiB', '42.0MiB', '42.0MiB'],
1_155_301_638_144 => ['1.0TiB', '1.1TiB', '1.1TiB'],
}.each do |input, expected_output|
context %(when passed #{input.inspect}) do
let(:input) { input }

context 'when rounding lower' do
let(:rounding) { :floor }

it { is_expected.to eq(expected_output[0]) }
end

context 'when rounding to nearest' do
let(:rounding) { :round }

it { is_expected.to eq(expected_output[1]) }
end

context 'when rounding above' do
let(:rounding) { :ceil }

it { is_expected.to eq(expected_output[2]) }
end
end
end
end

describe('#disks') do
before do
allow(subject).to receive(:df).and_return(<<~OUTPUT)
Expand All @@ -39,29 +75,29 @@
end

it 'reports all zfs filesystems' do
allow(subject).to receive(:alert).with('disk /', :ok, 0.07185344331519083, '7% used')
allow(subject).to receive(:alert).with('disk /var/audit', :ok, 2.9484841782529697e-07, '0% used')
allow(subject).to receive(:alert).with('disk /var/mail', :ok, 4.529924689197913e-06, '0% used')
allow(subject).to receive(:alert).with('disk /tmp', :ok, 0.0001386131897766662, '0% used')
allow(subject).to receive(:alert).with('disk /zroot', :ok, 2.9484841782529697e-07, '0% used')
allow(subject).to receive(:alert).with('disk /var/crash', :ok, 2.9484841782529697e-07, '0% used')
allow(subject).to receive(:alert).with('disk /usr/src', :ok, 2.9484841782529697e-07, '0% used')
allow(subject).to receive(:alert).with('disk /usr/home', :ok, 0.33075683535672684, '33% used')
allow(subject).to receive(:alert).with('disk /var/tmp', :ok, 4.02065981198671e-07, '0% used')
allow(subject).to receive(:alert).with('disk /var/log', :ok, 2.0545157787749945e-05, '0% used')
allow(subject).to receive(:alert).with('disk /usr/home/romain/Medias', :ok, 0.39906518922242257, '40% used')
allow(subject).to receive(:alert).with('disk /', :ok, 0.07185344331519083, '7% used, 284.6GiB free')
allow(subject).to receive(:alert).with('disk /var/audit', :ok, 2.9484841782529697e-07, '0% used, 284.6GiB free')
allow(subject).to receive(:alert).with('disk /var/mail', :ok, 4.529924689197913e-06, '0% used, 284.6GiB free')
allow(subject).to receive(:alert).with('disk /tmp', :ok, 0.0001386131897766662, '0% used, 284.6GiB free')
allow(subject).to receive(:alert).with('disk /zroot', :ok, 2.9484841782529697e-07, '0% used, 284.6GiB free')
allow(subject).to receive(:alert).with('disk /var/crash', :ok, 2.9484841782529697e-07, '0% used, 284.6GiB free')
allow(subject).to receive(:alert).with('disk /usr/src', :ok, 2.9484841782529697e-07, '0% used, 284.6GiB free')
allow(subject).to receive(:alert).with('disk /usr/home', :ok, 0.33075683535672684, '33% used, 284.6GiB free')
allow(subject).to receive(:alert).with('disk /var/tmp', :ok, 4.02065981198671e-07, '0% used, 284.6GiB free')
allow(subject).to receive(:alert).with('disk /var/log', :ok, 2.0545157787749945e-05, '0% used, 284.6GiB free')
allow(subject).to receive(:alert).with('disk /usr/home/romain/Medias', :ok, 0.39906518922242257, '40% used, 2.0TiB free')
subject.disk
expect(subject).to have_received(:alert).with('disk /', :ok, 0.07185344331519083, '7% used')
expect(subject).to have_received(:alert).with('disk /var/audit', :ok, 2.9484841782529697e-07, '0% used')
expect(subject).to have_received(:alert).with('disk /var/mail', :ok, 4.529924689197913e-06, '0% used')
expect(subject).to have_received(:alert).with('disk /tmp', :ok, 0.0001386131897766662, '0% used')
expect(subject).to have_received(:alert).with('disk /zroot', :ok, 2.9484841782529697e-07, '0% used')
expect(subject).to have_received(:alert).with('disk /var/crash', :ok, 2.9484841782529697e-07, '0% used')
expect(subject).to have_received(:alert).with('disk /usr/src', :ok, 2.9484841782529697e-07, '0% used')
expect(subject).to have_received(:alert).with('disk /usr/home', :ok, 0.33075683535672684, '33% used')
expect(subject).to have_received(:alert).with('disk /var/tmp', :ok, 4.02065981198671e-07, '0% used')
expect(subject).to have_received(:alert).with('disk /var/log', :ok, 2.0545157787749945e-05, '0% used')
expect(subject).to have_received(:alert).with('disk /usr/home/romain/Medias', :ok, 0.39906518922242257, '40% used')
expect(subject).to have_received(:alert).with('disk /', :ok, 0.07185344331519083, '7% used, 284.6GiB free')
expect(subject).to have_received(:alert).with('disk /var/audit', :ok, 2.9484841782529697e-07, '0% used, 284.6GiB free')
expect(subject).to have_received(:alert).with('disk /var/mail', :ok, 4.529924689197913e-06, '0% used, 284.6GiB free')
expect(subject).to have_received(:alert).with('disk /tmp', :ok, 0.0001386131897766662, '0% used, 284.6GiB free')
expect(subject).to have_received(:alert).with('disk /zroot', :ok, 2.9484841782529697e-07, '0% used, 284.6GiB free')
expect(subject).to have_received(:alert).with('disk /var/crash', :ok, 2.9484841782529697e-07, '0% used, 284.6GiB free')
expect(subject).to have_received(:alert).with('disk /usr/src', :ok, 2.9484841782529697e-07, '0% used, 284.6GiB free')
expect(subject).to have_received(:alert).with('disk /usr/home', :ok, 0.33075683535672684, '33% used, 284.6GiB free')
expect(subject).to have_received(:alert).with('disk /var/tmp', :ok, 4.02065981198671e-07, '0% used, 284.6GiB free')
expect(subject).to have_received(:alert).with('disk /var/log', :ok, 2.0545157787749945e-05, '0% used, 284.6GiB free')
expect(subject).to have_received(:alert).with('disk /usr/home/romain/Medias', :ok, 0.39906518922242257, '40% used, 2.0TiB free')
end

context 'with a foreign locale' do
Expand All @@ -74,11 +110,11 @@
end

it 'reports all zfs filesystems' do
allow(subject).to receive(:alert).with('disk /', :ok, 0.6267130394624543, '63% used')
allow(subject).to receive(:alert).with('disk /home', :ok, 0.22016432923987797, '23% used')
allow(subject).to receive(:alert).with('disk /', :ok, 0.6267130394624543, '63% used, 6.7GiB free')
allow(subject).to receive(:alert).with('disk /home', :ok, 0.22016432923987797, '23% used, 66.7GiB free')
subject.disk
expect(subject).to have_received(:alert).with('disk /', :ok, 0.6267130394624543, '63% used')
expect(subject).to have_received(:alert).with('disk /home', :ok, 0.22016432923987797, '23% used')
expect(subject).to have_received(:alert).with('disk /', :ok, 0.6267130394624543, '63% used, 6.7GiB free')
expect(subject).to have_received(:alert).with('disk /home', :ok, 0.22016432923987797, '23% used, 66.7GiB free')
end
end

Expand All @@ -91,9 +127,9 @@
end

it 'reports a correct lenient state' do
allow(subject).to receive(:alert).with('disk /tank', :ok, 0.9002625247490722, '91% used')
allow(subject).to receive(:alert).with('disk /tank', :ok, 0.9002625247490722, '91% used, 1.0TiB free')
subject.disk
expect(subject).to have_received(:alert).with('disk /tank', :ok, 0.9002625247490722, '91% used')
expect(subject).to have_received(:alert).with('disk /tank', :ok, 0.9002625247490722, '91% used, 1.0TiB free')
end
end
end
Expand Down

0 comments on commit b002378

Please sign in to comment.