Add a nagios check for a notify_tornado consumer

(imported from commit 050536bb4ac7384d5b98d5cf6cb7430b2b00dbd5)
This commit is contained in:
Leo Franchi 2013-04-16 14:07:53 -04:00
parent a07df5fb17
commit b77cee798e
3 changed files with 94 additions and 0 deletions

59
bots/check-rabbitmq-consumers Executable file
View file

@ -0,0 +1,59 @@
#!/usr/bin/env python
import sys
import subprocess
import time
import optparse
from collections import defaultdict
from os import path, environ
sys.path.append(path.join(path.dirname(__file__), '../tools'))
from humbug_tools import check_output
states = {
0: "OK",
1: "WARNING",
2: "CRITICAL",
3: "UNKNOWN"
}
if 'USER' in environ and not environ['USER'] in ['root', 'rabbitmq']:
print "This script must be run as the root or rabbitmq user"
usage = """Usage: check-rabbitmq-consumers --queue=[queue-name] --min-threshold=[min-threshold]"""
parser = optparse.OptionParser(usage=usage)
parser.add_option('--queue',
dest='queue_name',
default="notify_tornado",
action='store')
parser.add_option('--min-threshold',
dest='min_count',
type="int",
default=1,
action='store')
(options, args) = parser.parse_args()
output = check_output(['/usr/sbin/rabbitmqctl', 'list_consumers'], shell=False)
consumers = defaultdict(int)
for line in output.split('\n'):
parts = line.split('\t')
if len(parts) and parts[0] == options.queue_name:
consumers[parts[0]] += 1
now = int(time.time())
if consumers[options.queue_name] < options.min_count:
status = 2
else:
status = 0
print("%s|%s|%s|queue %s has %s consumers, needs %s" % (
now, status, states[status], options.queue_name,
consumers[options.queue_name], options.min_count))

34
bots/cron_file_helper.py Normal file
View file

@ -0,0 +1,34 @@
#!/usr/bin/env python
import time
def nagios_from_file(results_file):
"""Returns a nagios-appropriate string and return code obtained by
parsing the desired file on disk. The file on disk should be of format
%s|%s % (timestamp, nagios_string)
This file is created by various nagios checking cron jobs such as
check-rabbitmq-queues and check-rabbitmq-consumers"""
data = file(results_file).read().strip()
pieces = data.split('|')
if not len(pieces) == 4:
state = 'UNKNOWN'
ret = 3
data = "Results file malformed"
else:
timestamp = int(pieces[0])
time_diff = time.time() - timestamp
if time_diff > 60 * 2:
ret = 3
state = 'UNKNOWN'
data = "Results file is stale"
else:
ret = int(pieces[1])
state = pieces[2]
data = pieces[3]
return (ret, "%s: %s" % (state, data))

View file

@ -0,0 +1 @@
* * * * * /home/humbug/humbug/bots/check-rabbitmq-consumers &> /var/run/nagios/check-rabbitmq-consumers-tmp; mv /var/run/nagios/check-rabbitmq-consumers-tmp /var/run/nagios/check-rabbitmq-consumers