Move rabbitmq consumer checks from bots/ to scripts/nagios/.
This commit is contained in:
parent
e1deeba72a
commit
ab505749ad
|
@ -1,56 +0,0 @@
|
|||
#!/usr/bin/env python2.7
|
||||
|
||||
from __future__ import print_function
|
||||
import sys
|
||||
import time
|
||||
import optparse
|
||||
from collections import defaultdict
|
||||
import os
|
||||
import subprocess
|
||||
|
||||
states = {
|
||||
0: "OK",
|
||||
1: "WARNING",
|
||||
2: "CRITICAL",
|
||||
3: "UNKNOWN"
|
||||
}
|
||||
|
||||
if 'USER' in os.environ and not os.environ['USER'] in ['root', 'rabbitmq']:
|
||||
print("This script must be run as the root or rabbitmq user")
|
||||
|
||||
|
||||
usage = """Usage: check-rabbitmq-consumers --queue=[queue-name] --min-threshold=[min-threshold]"""
|
||||
|
||||
parser = optparse.OptionParser(usage=usage)
|
||||
parser.add_option('--queue',
|
||||
dest='queue_name',
|
||||
default="notify_tornado",
|
||||
action='store')
|
||||
parser.add_option('--min-threshold',
|
||||
dest='min_count',
|
||||
type="int",
|
||||
default=1,
|
||||
action='store')
|
||||
|
||||
(options, args) = parser.parse_args()
|
||||
|
||||
output = subprocess.check_output(['/usr/sbin/rabbitmqctl', 'list_consumers'], shell=False)
|
||||
|
||||
consumers = defaultdict(int)
|
||||
|
||||
for line in output.split('\n'):
|
||||
parts = line.split('\t')
|
||||
if len(parts) and parts[0] == options.queue_name:
|
||||
consumers[parts[0]] += 1
|
||||
|
||||
|
||||
now = int(time.time())
|
||||
|
||||
if consumers[options.queue_name] < options.min_count:
|
||||
status = 2
|
||||
else:
|
||||
status = 0
|
||||
|
||||
print("%s|%s|%s|queue %s has %s consumers, needs %s" % (
|
||||
now, status, states[status], options.queue_name,
|
||||
consumers[options.queue_name], options.min_count))
|
|
@ -1,69 +0,0 @@
|
|||
#!/usr/bin/env python2.7
|
||||
|
||||
from __future__ import print_function
|
||||
import sys
|
||||
import re
|
||||
import time
|
||||
import os
|
||||
import subprocess
|
||||
|
||||
WARN_THRESHOLD_DEFAULT = 10
|
||||
WARN_THRESHOLD = {
|
||||
'missedmessage_emails': 45,
|
||||
}
|
||||
CRIT_THRESHOLD_DEFAULT = 50
|
||||
CRIT_THRESHOLD = {
|
||||
'missedmessage_emails': 70,
|
||||
}
|
||||
|
||||
states = {
|
||||
0: "OK",
|
||||
1: "WARNING",
|
||||
2: "CRITICAL",
|
||||
3: "UNKNOWN"
|
||||
}
|
||||
|
||||
re = re.compile(r'(\w+)\t(\d+)')
|
||||
output = subprocess.check_output(['/usr/sbin/rabbitmqctl', 'list_queues'], shell=False)
|
||||
|
||||
status = 0
|
||||
max_count = 0
|
||||
warn_queues = []
|
||||
|
||||
if 'USER' in os.environ and not os.environ['USER'] in ['root', 'rabbitmq']:
|
||||
print("This script must be run as the root or rabbitmq user")
|
||||
|
||||
for line in output.split("\n"):
|
||||
line = line.strip()
|
||||
m = re.match(line)
|
||||
if m:
|
||||
queue = m.group(1)
|
||||
count = int(m.group(2))
|
||||
this_status = 0
|
||||
if count > CRIT_THRESHOLD.get(queue, CRIT_THRESHOLD_DEFAULT):
|
||||
this_status = 2
|
||||
warn_queues.append(queue)
|
||||
elif count > WARN_THRESHOLD.get(queue, WARN_THRESHOLD_DEFAULT):
|
||||
this_status = max(status, 1)
|
||||
warn_queues.append(queue)
|
||||
|
||||
status = max(status, this_status)
|
||||
max_count = max(max_count, count)
|
||||
|
||||
warn_about = ", ".join(warn_queues)
|
||||
now = int(time.time())
|
||||
now_struct = time.gmtime(now)
|
||||
|
||||
# While we are sending digest emails, at 11am each weekday, the mail queues can
|
||||
# get backed up; don't alert on those.
|
||||
if not set(warn_queues) - set(("missedmessage_emails", "digest_emails")) and \
|
||||
now_struct.tm_hour == 15 and now_struct.tm_min < 25:
|
||||
status = 0
|
||||
print("%s|%s|%s|processing digests, not alerting on elevated mail queues" % (
|
||||
now, status, states[status]))
|
||||
exit(0)
|
||||
|
||||
if status > 0:
|
||||
print("%s|%s|%s|max count %s, queues affected: %s" % (now, status, states[status], max_count, warn_about))
|
||||
else:
|
||||
print("%s|%s|%s|queues normal, max count %s" % (now, status, states[status], max_count))
|
Loading…
Reference in a new issue