zulip/scripts/nagios/check-rabbitmq-consumers

110 lines
3.1 KiB
Plaintext
Raw Normal View History

#!/usr/bin/env python3
import sys
import time
import argparse
import configparser
from collections import defaultdict
import os
import subprocess
if False:
# See https://zulip.readthedocs.io/en/latest/testing/mypy.html#mypy-in-production-scripts
from typing import Dict
states = {
0: "OK",
1: "WARNING",
2: "CRITICAL",
3: "UNKNOWN"
}
if 'USER' in os.environ and not os.environ['USER'] in ['root', 'rabbitmq']:
print("This script must be run as the root or rabbitmq user")
usage = """Usage: check-rabbitmq-consumers --queue=[queue-name] --min-threshold=[min-threshold]"""
parser = argparse.ArgumentParser(usage=usage)
parser.add_argument('--min-threshold',
dest='min_count',
type=int,
default=1,
action='store')
options = parser.parse_args()
config_file = configparser.RawConfigParser()
config_file.read("/etc/zulip/zulip.conf")
def get_config(section, key, default_value):
# type: (str, str, str) -> str
if config_file.has_option(section, key):
return config_file.get(section, key)
return default_value
TORNADO_PROCESSES = int(get_config('application_server', 'tornado_processes', '1'))
output = subprocess.check_output(['/usr/sbin/rabbitmqctl', 'list_consumers'],
universal_newlines=True)
consumers = defaultdict(int) # type: Dict[str, int]
sys.path.append(os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(__file__)))))
queues = {
'deferred_work'
'digest_emails',
'email_mirror',
'embed_links',
'embedded_bots',
'error_reports',
'feedback_messages',
'invites',
'message_sender',
'missedmessage_emails',
'missedmessage_email_senders',
'email_senders',
'missedmessage_mobile_notifications',
'outgoing_webhooks',
'signups',
'slow_queries',
'user_activity'
'user_activity_interval',
'user_presence',
# These queues may not be present if settings.TORNADO_PROCESSES > 1
'notify_tornado',
'tornado_return',
}
for queue_name in queues:
queue_name = queue_name.strip()
consumers[queue_name] = 0
for line in output.split('\n'):
parts = line.split('\t')
if len(parts) >= 2:
queue_name = parts[0]
if queue_name.startswith("tornado_return_"):
queue_name = "tornado_return"
if queue_name.startswith("notify_tornado_"):
queue_name = "notify_tornado"
consumers[queue_name] += 1
now = int(time.time())
for queue_name in consumers.keys():
state_file_path = "/var/lib/nagios_state/check-rabbitmq-consumers-" + queue_name
state_file_tmp = state_file_path + "-tmp"
target_count = options.min_count
if queue_name in ["tornado_return", "notify_tornado"]:
target_count = TORNADO_PROCESSES
if consumers[queue_name] < target_count:
status = 2
else:
status = 0
with open(state_file_tmp, "w") as f:
f.write("%s|%s|%s|queue %s has %s consumers, needs %s\n" % (
now, status, states[status], queue_name,
consumers[queue_name], target_count))
os.rename(state_file_tmp, state_file_path)