Add a nagios plugin for checking rabbitmq queue sizes
(imported from commit 32bd03bcfe4c4a4221ace17f83adb175f591c8ea)
This commit is contained in:
parent
0d53053669
commit
382c4120ef
63
bots/check-rabbitmq-queue
Executable file
63
bots/check-rabbitmq-queue
Executable file
|
@ -0,0 +1,63 @@
|
||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
import sys
|
||||||
|
import subprocess
|
||||||
|
import re
|
||||||
|
import time
|
||||||
|
|
||||||
|
WARN_THRESHOLD = 100
|
||||||
|
CRIT_THRESHOLD = 200
|
||||||
|
|
||||||
|
states = {
|
||||||
|
0: "OK",
|
||||||
|
1: "WARNING",
|
||||||
|
2: "CRITICAL",
|
||||||
|
3: "UNKNOWN"
|
||||||
|
}
|
||||||
|
|
||||||
|
# check_output is backported from subprocess.py in Python 2.7
|
||||||
|
def check_output(*popenargs, **kwargs):
|
||||||
|
if 'stdout' in kwargs:
|
||||||
|
raise ValueError('stdout argument not allowed, it will be overridden.')
|
||||||
|
process = subprocess.Popen(stdout=subprocess.PIPE, *popenargs, **kwargs)
|
||||||
|
output, unused_err = process.communicate()
|
||||||
|
retcode = process.poll()
|
||||||
|
if retcode:
|
||||||
|
cmd = kwargs.get("args")
|
||||||
|
if cmd is None:
|
||||||
|
cmd = popenargs[0]
|
||||||
|
raise subprocess.CalledProcessError(retcode, cmd, output=output)
|
||||||
|
return output
|
||||||
|
subprocess.check_output = check_output
|
||||||
|
|
||||||
|
re = re.compile(r'(\w+)\t(\d+)')
|
||||||
|
output = subprocess.check_output(['rabbitmqctl', 'list_queues'], shell=False)
|
||||||
|
|
||||||
|
status = 0
|
||||||
|
max_count = 0
|
||||||
|
warn_queues = []
|
||||||
|
|
||||||
|
for line in output.split("\n"):
|
||||||
|
line = line.strip()
|
||||||
|
m = re.match(line)
|
||||||
|
if m:
|
||||||
|
queue = m.group(1)
|
||||||
|
count = int(m.group(2))
|
||||||
|
this_status = 0
|
||||||
|
if count > CRIT_THRESHOLD:
|
||||||
|
this_status = 2
|
||||||
|
warn_queues.append(queue)
|
||||||
|
elif count > WARN_THRESHOLD:
|
||||||
|
this_status = max(status, 1)
|
||||||
|
warn_queues.append(queue)
|
||||||
|
|
||||||
|
status = max(status, this_status)
|
||||||
|
max_count = max(max_count, count)
|
||||||
|
|
||||||
|
warn_about = ", ".join(warn_queues)
|
||||||
|
now = int(time.time())
|
||||||
|
|
||||||
|
if status > 0:
|
||||||
|
print("%s|%s|%s|max count %s, queues affected: %s" % (now, status, states[status], max_count, warn_about))
|
||||||
|
else:
|
||||||
|
print("%s|%s|%s|queues normal, max count %s" % (now, status, states[status], max_count))
|
25
bots/rabbitmq-queuesize-crontab
Normal file
25
bots/rabbitmq-queuesize-crontab
Normal file
|
@ -0,0 +1,25 @@
|
||||||
|
# Edit this file to introduce tasks to be run by cron.
|
||||||
|
#
|
||||||
|
# Each task to run has to be defined through a single line
|
||||||
|
# indicating with different fields when the task will be run
|
||||||
|
# and what command to run for the task
|
||||||
|
#
|
||||||
|
# To define the time you can provide concrete values for
|
||||||
|
# minute (m), hour (h), day of month (dom), month (mon),
|
||||||
|
# and day of week (dow) or use '*' in these fields (for 'any').#
|
||||||
|
# Notice that tasks will be started based on the cron's system
|
||||||
|
# daemon's notion of time and timezones.
|
||||||
|
#
|
||||||
|
# Output of the crontab jobs (including errors) is sent through
|
||||||
|
# email to the user the crontab file belongs to (unless redirected).
|
||||||
|
#
|
||||||
|
# For example, you can run a backup of all your user accounts
|
||||||
|
# at 5 a.m every week with:
|
||||||
|
# 0 5 * * 1 tar -zcf /var/backups/home.tgz /home/
|
||||||
|
#
|
||||||
|
# For more information see the manual pages of crontab(5) and cron(8)
|
||||||
|
#
|
||||||
|
# m h dom mon dow command
|
||||||
|
SHELL=/bin/bash
|
||||||
|
|
||||||
|
* * * * * /home/humbug/humbug/bots/check-rabbitmq-queue &> /var/run/nagios/check-rabbitmq-results-tmp; mv /var/run/nagios/check-rabbitmq-results-tmp /var/run/nagios/check-rabbitmq-results
|
Loading…
Reference in a new issue