From 382c4120ef65c320bfa451340ae6deff5c49e74b Mon Sep 17 00:00:00 2001 From: Leo Franchi Date: Tue, 19 Feb 2013 12:06:25 -0500 Subject: [PATCH] Add a nagios plugin for checking rabbitmq queue sizes (imported from commit 32bd03bcfe4c4a4221ace17f83adb175f591c8ea) --- bots/check-rabbitmq-queue | 63 +++++++++++++++++++++++++++++++++ bots/rabbitmq-queuesize-crontab | 25 +++++++++++++ 2 files changed, 88 insertions(+) create mode 100755 bots/check-rabbitmq-queue create mode 100644 bots/rabbitmq-queuesize-crontab diff --git a/bots/check-rabbitmq-queue b/bots/check-rabbitmq-queue new file mode 100755 index 0000000..ca2787a --- /dev/null +++ b/bots/check-rabbitmq-queue @@ -0,0 +1,63 @@ +#!/usr/bin/env python + +import sys +import subprocess +import re +import time + +WARN_THRESHOLD = 100 +CRIT_THRESHOLD = 200 + +states = { + 0: "OK", + 1: "WARNING", + 2: "CRITICAL", + 3: "UNKNOWN" +} + +# check_output is backported from subprocess.py in Python 2.7 +def check_output(*popenargs, **kwargs): + if 'stdout' in kwargs: + raise ValueError('stdout argument not allowed, it will be overridden.') + process = subprocess.Popen(stdout=subprocess.PIPE, *popenargs, **kwargs) + output, unused_err = process.communicate() + retcode = process.poll() + if retcode: + cmd = kwargs.get("args") + if cmd is None: + cmd = popenargs[0] + raise subprocess.CalledProcessError(retcode, cmd, output=output) + return output +subprocess.check_output = check_output + +re = re.compile(r'(\w+)\t(\d+)') +output = subprocess.check_output(['rabbitmqctl', 'list_queues'], shell=False) + +status = 0 +max_count = 0 +warn_queues = [] + +for line in output.split("\n"): + line = line.strip() + m = re.match(line) + if m: + queue = m.group(1) + count = int(m.group(2)) + this_status = 0 + if count > CRIT_THRESHOLD: + this_status = 2 + warn_queues.append(queue) + elif count > WARN_THRESHOLD: + this_status = max(status, 1) + warn_queues.append(queue) + + status = max(status, this_status) + max_count = max(max_count, count) + +warn_about = ", ".join(warn_queues) +now = int(time.time()) + +if status > 0: + print("%s|%s|%s|max count %s, queues affected: %s" % (now, status, states[status], max_count, warn_about)) +else: + print("%s|%s|%s|queues normal, max count %s" % (now, status, states[status], max_count)) diff --git a/bots/rabbitmq-queuesize-crontab b/bots/rabbitmq-queuesize-crontab new file mode 100644 index 0000000..36b315e --- /dev/null +++ b/bots/rabbitmq-queuesize-crontab @@ -0,0 +1,25 @@ +# Edit this file to introduce tasks to be run by cron. +# +# Each task to run has to be defined through a single line +# indicating with different fields when the task will be run +# and what command to run for the task +# +# To define the time you can provide concrete values for +# minute (m), hour (h), day of month (dom), month (mon), +# and day of week (dow) or use '*' in these fields (for 'any').# +# Notice that tasks will be started based on the cron's system +# daemon's notion of time and timezones. +# +# Output of the crontab jobs (including errors) is sent through +# email to the user the crontab file belongs to (unless redirected). +# +# For example, you can run a backup of all your user accounts +# at 5 a.m every week with: +# 0 5 * * 1 tar -zcf /var/backups/home.tgz /home/ +# +# For more information see the manual pages of crontab(5) and cron(8) +# +# m h dom mon dow command +SHELL=/bin/bash + +* * * * * /home/humbug/humbug/bots/check-rabbitmq-queue &> /var/run/nagios/check-rabbitmq-results-tmp; mv /var/run/nagios/check-rabbitmq-results-tmp /var/run/nagios/check-rabbitmq-results