From ab505749ad9afb7ee379e4b9636298a5297ce237 Mon Sep 17 00:00:00 2001 From: Tim Abbott Date: Sat, 7 May 2016 18:58:57 -0700 Subject: [PATCH] Move rabbitmq consumer checks from bots/ to scripts/nagios/. --- bots/check-rabbitmq-consumers | 56 ---------------------------- bots/check-rabbitmq-queue | 69 ----------------------------------- 2 files changed, 125 deletions(-) delete mode 100755 bots/check-rabbitmq-consumers delete mode 100755 bots/check-rabbitmq-queue diff --git a/bots/check-rabbitmq-consumers b/bots/check-rabbitmq-consumers deleted file mode 100755 index c0fcd32..0000000 --- a/bots/check-rabbitmq-consumers +++ /dev/null @@ -1,56 +0,0 @@ -#!/usr/bin/env python2.7 - -from __future__ import print_function -import sys -import time -import optparse -from collections import defaultdict -import os -import subprocess - -states = { - 0: "OK", - 1: "WARNING", - 2: "CRITICAL", - 3: "UNKNOWN" -} - -if 'USER' in os.environ and not os.environ['USER'] in ['root', 'rabbitmq']: - print("This script must be run as the root or rabbitmq user") - - -usage = """Usage: check-rabbitmq-consumers --queue=[queue-name] --min-threshold=[min-threshold]""" - -parser = optparse.OptionParser(usage=usage) -parser.add_option('--queue', - dest='queue_name', - default="notify_tornado", - action='store') -parser.add_option('--min-threshold', - dest='min_count', - type="int", - default=1, - action='store') - -(options, args) = parser.parse_args() - -output = subprocess.check_output(['/usr/sbin/rabbitmqctl', 'list_consumers'], shell=False) - -consumers = defaultdict(int) - -for line in output.split('\n'): - parts = line.split('\t') - if len(parts) and parts[0] == options.queue_name: - consumers[parts[0]] += 1 - - -now = int(time.time()) - -if consumers[options.queue_name] < options.min_count: - status = 2 -else: - status = 0 - -print("%s|%s|%s|queue %s has %s consumers, needs %s" % ( - now, status, states[status], options.queue_name, - consumers[options.queue_name], options.min_count)) diff --git a/bots/check-rabbitmq-queue b/bots/check-rabbitmq-queue deleted file mode 100755 index 12373c5..0000000 --- a/bots/check-rabbitmq-queue +++ /dev/null @@ -1,69 +0,0 @@ -#!/usr/bin/env python2.7 - -from __future__ import print_function -import sys -import re -import time -import os -import subprocess - -WARN_THRESHOLD_DEFAULT = 10 -WARN_THRESHOLD = { - 'missedmessage_emails': 45, -} -CRIT_THRESHOLD_DEFAULT = 50 -CRIT_THRESHOLD = { - 'missedmessage_emails': 70, -} - -states = { - 0: "OK", - 1: "WARNING", - 2: "CRITICAL", - 3: "UNKNOWN" -} - -re = re.compile(r'(\w+)\t(\d+)') -output = subprocess.check_output(['/usr/sbin/rabbitmqctl', 'list_queues'], shell=False) - -status = 0 -max_count = 0 -warn_queues = [] - -if 'USER' in os.environ and not os.environ['USER'] in ['root', 'rabbitmq']: - print("This script must be run as the root or rabbitmq user") - -for line in output.split("\n"): - line = line.strip() - m = re.match(line) - if m: - queue = m.group(1) - count = int(m.group(2)) - this_status = 0 - if count > CRIT_THRESHOLD.get(queue, CRIT_THRESHOLD_DEFAULT): - this_status = 2 - warn_queues.append(queue) - elif count > WARN_THRESHOLD.get(queue, WARN_THRESHOLD_DEFAULT): - this_status = max(status, 1) - warn_queues.append(queue) - - status = max(status, this_status) - max_count = max(max_count, count) - -warn_about = ", ".join(warn_queues) -now = int(time.time()) -now_struct = time.gmtime(now) - -# While we are sending digest emails, at 11am each weekday, the mail queues can -# get backed up; don't alert on those. -if not set(warn_queues) - set(("missedmessage_emails", "digest_emails")) and \ - now_struct.tm_hour == 15 and now_struct.tm_min < 25: - status = 0 - print("%s|%s|%s|processing digests, not alerting on elevated mail queues" % ( - now, status, states[status])) - exit(0) - -if status > 0: - print("%s|%s|%s|max count %s, queues affected: %s" % (now, status, states[status], max_count, warn_about)) -else: - print("%s|%s|%s|queues normal, max count %s" % (now, status, states[status], max_count))