-
Notifications
You must be signed in to change notification settings - Fork 63
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* Add check-smart-tests plugin * Cleanup style to pass rubocop checks * Cleanup style to pass rubocop checks * Update README * Fix conditional typo * Fix typo, add additional conditional check * Fix conditional check * Add requested changes to increase readability of the code
- Loading branch information
1 parent
43a2dfd
commit f27eb2b
Showing
3 changed files
with
187 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,179 @@ | ||
#! /usr/bin/env ruby | ||
# | ||
# check-smart-tests.rb | ||
# | ||
# DESCRIPTION: | ||
# This script checks S.M.A.R.T. self-tests status and optionally time of last | ||
# test run | ||
# | ||
# OUTPUT: | ||
# plain text | ||
# | ||
# PLATFORMS: | ||
# Linux | ||
# | ||
# DEPENDENCIES: | ||
# gem: sensu-plugin | ||
# | ||
# USAGE: | ||
# check-smart-tests.rb # Use default options | ||
# check-smart-tests.rb -d /dev/sda,/dev/sdb -l 24 -t 336 # Check smart tests status for | ||
# /dev/sda and /dev/sdb devices, also check if short tests were run in last 24 hours and | ||
# extended tests were run in last 14 days(336 hours) | ||
# | ||
# NOTES: | ||
# The plugin requires smartmontools to be installed and smartctl utility in particular. | ||
# | ||
# smartctl requires root rights to run, so you should allow sensu to execute | ||
# this command as root without password by adding following line to /etc/sudoers: | ||
# | ||
# sensu ALL=(ALL) NOPASSWD: /usr/sbin/smartctl | ||
# | ||
# Tested only on Debian. | ||
# | ||
# LICENSE: | ||
# Stanislav Sandalnikov <[email protected]> | ||
# Released under the same terms as Sensu (the MIT license); see LICENSE | ||
# for details. | ||
|
||
require 'sensu-plugin/check/cli' | ||
|
||
class Device | ||
attr_accessor :name, :pwh, :str | ||
|
||
def initialize(name, smartctl_executable) | ||
@name = name | ||
@exec = smartctl_executable | ||
@pwh = poweron_hours | ||
@str = selftest_results | ||
end | ||
|
||
def poweron_hours | ||
`sudo #{@exec} -A #{@name}`.split("\n").each do |line| | ||
columns = line.split | ||
if columns[1] == 'Power_On_Hours' | ||
return columns[9] | ||
end | ||
end | ||
end | ||
|
||
def selftest_results | ||
results = [] | ||
headers = %w(num test_description status remaining lifetime lba_of_first_error) | ||
|
||
`sudo #{@exec} -l selftest #{@name}`.split("\n").grep(/^#/).each do |test| | ||
test = test.gsub!(/\s\s+/m, "\t").split("\t") | ||
res = {} | ||
|
||
headers.each_with_index do |v, k| | ||
res[v] = test[k] | ||
end | ||
|
||
results << res | ||
end | ||
|
||
results | ||
end | ||
end | ||
|
||
class CheckSMARTTests < Sensu::Plugin::Check::CLI | ||
option :executable, | ||
long: '--executable EXECUTABLE', | ||
short: '-e EXECUTABLE', | ||
default: '/usr/sbin/smartctl', | ||
description: 'Path to smartctl executable' | ||
option :devices, | ||
long: '--devices *DEVICES', | ||
short: '-d *DEVICES', | ||
default: 'all', | ||
description: 'Comma-separated list of devices to check, i.e. "/dev/sda,/dev/sdb"' | ||
option :short_test_interval, | ||
long: '--short_test_interval INTERVAL', | ||
short: '-s INTERVAL', | ||
description: 'If more time then this value passed since last short test run, then warning will be raised' | ||
option :long_test_interval, | ||
long: '--long_test_interval INTERVAL', | ||
short: '-l INTERVAL', | ||
description: 'If more time then this value passed since last extedned test run, then warning will be raised' | ||
|
||
def initialize | ||
super | ||
@devices = [] | ||
@warnings = [] | ||
@criticals = [] | ||
set_devices | ||
end | ||
|
||
def set_devices | ||
if config[:devices] == 'all' | ||
`lsblk -plnd -o NAME`.split.each do |name| | ||
unless name =~ /\/dev\/loop.*/ | ||
dev = Device.new(name, config[:executable]) | ||
@devices.push(dev) | ||
end | ||
end | ||
else | ||
config[:devices].split(',').each do |name| | ||
dev = Device.new(name, config[:executable]) | ||
@devices.push(dev) | ||
end | ||
end | ||
end | ||
|
||
def check_tests(dev) | ||
if dev.str.empty? | ||
@warnings << "#{dev.name}: No self-tests have been logged." | ||
return | ||
end | ||
|
||
unless dev.str[0]['status'] == 'Completed without error' || dev.str[0]['status'] =~ /Self-test routine in progress/ | ||
@criticals << "#{dev.name}: Last test failed - #{dev.str[0]['status']}" | ||
end | ||
|
||
unless config[:short_test_interval].nil? | ||
dev.str.each_with_index do |t, i| | ||
if t['test_description'] != 'Short offline' | ||
if i == dev.str.length - 1 | ||
@warnings << "#{dev.name}: No short tests were run for this device in last #{dev.str.length} executions" | ||
end | ||
next | ||
else | ||
if dev.pwh.to_i - t['lifetime'].to_i > config[:short_test_interval].to_i | ||
@warnings << "#{dev.name}: More than #{config[:short_test_interval]} hours passed since the last short test" | ||
end | ||
break | ||
end | ||
end | ||
end | ||
|
||
unless config[:long_test_interval].nil? | ||
dev.str.each_with_index do |t, i| | ||
if t['test_description'] != 'Extended offline' | ||
if i == dev.str.length - 1 | ||
@warnings << "#{dev.name}: No extended tests were run for this device in last #{dev.str.length} executions" | ||
end | ||
next | ||
else | ||
if dev.pwh.to_i - t['lifetime'].to_i > config[:long_test_interval].to_i | ||
@warnings << "#{dev.name}: More than #{config[:long_test_interval]} hours passed since the last extended test" | ||
end | ||
break | ||
end | ||
end | ||
end | ||
end | ||
|
||
def run | ||
@devices.each do |device| | ||
check_tests(device) | ||
end | ||
|
||
if @criticals.any? | ||
critical @criticals.join(' ') | ||
elsif @warnings.any? | ||
warning @warnings.join(' ') | ||
else | ||
ok 'All devices are OK' | ||
end | ||
end | ||
end |