From b77cee798eaef75cb124adda039d0e8d7523264b Mon Sep 17 00:00:00 2001 From: Leo Franchi Date: Tue, 16 Apr 2013 14:07:53 -0400 Subject: [PATCH] Add a nagios check for a notify_tornado consumer (imported from commit 050536bb4ac7384d5b98d5cf6cb7430b2b00dbd5) --- bots/check-rabbitmq-consumers | 59 +++++++++++++++++++++++++++++ bots/cron_file_helper.py | 34 +++++++++++++++++ bots/rabbitmq-numconsumners-crontab | 1 + 3 files changed, 94 insertions(+) create mode 100755 bots/check-rabbitmq-consumers create mode 100644 bots/cron_file_helper.py create mode 100644 bots/rabbitmq-numconsumners-crontab diff --git a/bots/check-rabbitmq-consumers b/bots/check-rabbitmq-consumers new file mode 100755 index 0000000..1772841 --- /dev/null +++ b/bots/check-rabbitmq-consumers @@ -0,0 +1,59 @@ +#!/usr/bin/env python + +import sys +import subprocess +import time +import optparse +from collections import defaultdict + +from os import path, environ + +sys.path.append(path.join(path.dirname(__file__), '../tools')) +from humbug_tools import check_output + +states = { + 0: "OK", + 1: "WARNING", + 2: "CRITICAL", + 3: "UNKNOWN" +} + +if 'USER' in environ and not environ['USER'] in ['root', 'rabbitmq']: + print "This script must be run as the root or rabbitmq user" + + +usage = """Usage: check-rabbitmq-consumers --queue=[queue-name] --min-threshold=[min-threshold]""" + +parser = optparse.OptionParser(usage=usage) +parser.add_option('--queue', + dest='queue_name', + default="notify_tornado", + action='store') +parser.add_option('--min-threshold', + dest='min_count', + type="int", + default=1, + action='store') + +(options, args) = parser.parse_args() + +output = check_output(['/usr/sbin/rabbitmqctl', 'list_consumers'], shell=False) + +consumers = defaultdict(int) + +for line in output.split('\n'): + parts = line.split('\t') + if len(parts) and parts[0] == options.queue_name: + consumers[parts[0]] += 1 + + +now = int(time.time()) + +if consumers[options.queue_name] < options.min_count: + status = 2 +else: + status = 0 + +print("%s|%s|%s|queue %s has %s consumers, needs %s" % ( + now, status, states[status], options.queue_name, + consumers[options.queue_name], options.min_count)) diff --git a/bots/cron_file_helper.py b/bots/cron_file_helper.py new file mode 100644 index 0000000..7aa6460 --- /dev/null +++ b/bots/cron_file_helper.py @@ -0,0 +1,34 @@ +#!/usr/bin/env python +import time + +def nagios_from_file(results_file): + """Returns a nagios-appropriate string and return code obtained by + parsing the desired file on disk. The file on disk should be of format + + %s|%s % (timestamp, nagios_string) + + This file is created by various nagios checking cron jobs such as + check-rabbitmq-queues and check-rabbitmq-consumers""" + + data = file(results_file).read().strip() + pieces = data.split('|') + + if not len(pieces) == 4: + state = 'UNKNOWN' + ret = 3 + data = "Results file malformed" + else: + timestamp = int(pieces[0]) + + time_diff = time.time() - timestamp + if time_diff > 60 * 2: + ret = 3 + state = 'UNKNOWN' + data = "Results file is stale" + else: + ret = int(pieces[1]) + state = pieces[2] + data = pieces[3] + + return (ret, "%s: %s" % (state, data)) + diff --git a/bots/rabbitmq-numconsumners-crontab b/bots/rabbitmq-numconsumners-crontab new file mode 100644 index 0000000..1673c58 --- /dev/null +++ b/bots/rabbitmq-numconsumners-crontab @@ -0,0 +1 @@ +* * * * * /home/humbug/humbug/bots/check-rabbitmq-consumers &> /var/run/nagios/check-rabbitmq-consumers-tmp; mv /var/run/nagios/check-rabbitmq-consumers-tmp /var/run/nagios/check-rabbitmq-consumers