#!/usr/bin/env python3 import argparse import os import re import subprocess import sys import time from collections import defaultdict ZULIP_PATH = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) sys.path.append(ZULIP_PATH) from scripts.lib.check_rabbitmq_queue import normal_queues from scripts.lib.zulip_tools import ( atomic_nagios_write, get_config, get_config_file, get_tornado_ports, ) if "USER" in os.environ and os.environ["USER"] not in ["root", "rabbitmq"]: print("This script must be run as the root or rabbitmq user") parser = argparse.ArgumentParser() parser.parse_args() config_file = get_config_file() TORNADO_PROCESSES = len(get_tornado_ports(config_file)) output = subprocess.check_output(["/usr/sbin/rabbitmqctl", "list_consumers"], text=True) consumers: dict[str, int] = defaultdict(int) queues = { *normal_queues, # All tornado queues get grouped into this, even if sharded "notify_tornado", } for queue_name in queues: queue_name = queue_name.strip() consumers[queue_name] = 0 for line in output.split("\n"): parts = line.split("\t") if len(parts) >= 2: queue_name = parts[0] if queue_name.startswith("notify_tornado_"): queue_name = "notify_tornado" # Collapse sharded queues into a single queue for this check. queue_name = re.sub(r"_shard(\d+)", "", queue_name) consumers[queue_name] += 1 now = int(time.time()) for queue_name in consumers: target_count = 1 if queue_name == "notify_tornado": target_count = TORNADO_PROCESSES elif queue_name == "missedmessage_mobile_notifications": target_count = int( get_config(config_file, "application_server", "mobile_notification_shards", "1") ) elif queue_name == "thumbnail": target_count = int(get_config(config_file, "application_server", "thumbnail_workers", "1")) atomic_nagios_write( "check-rabbitmq-consumers-" + queue_name, "critical" if consumers[queue_name] < target_count else "ok", f"queue {queue_name} has {consumers[queue_name]} consumers, needs {target_count}", )