nagios: Support multiple tornado processes.

This allows our Tornado monitoring to correctly report whether
multiple configured Tornado processes are running.

This setup isn't ideal, in that it can't detect cases where the wrong
set of Tornado processes are running, but it's nice and simple and
should catch most actual problems.
This commit is contained in:
Tim Abbott 2018-11-06 16:45:26 -08:00
parent 5abf4dee92
commit 3f03dcdf5e
1 changed files with 23 additions and 4 deletions

View File

@ -3,12 +3,13 @@
import sys import sys
import time import time
import argparse import argparse
import configparser
from collections import defaultdict from collections import defaultdict
import os import os
import subprocess import subprocess
if False: if False:
from typing import Dict from typing import Any, Dict, Optional, Union
states = { states = {
0: "OK", 0: "OK",
@ -32,6 +33,15 @@ parser.add_argument('--min-threshold',
options = parser.parse_args() options = parser.parse_args()
config_file = configparser.RawConfigParser()
config_file.read("/etc/zulip/zulip.conf")
def get_config(section, key, default_value):
# type: (str, str, str) -> str
if config_file.has_option(section, key):
return config_file.get(section, key)
return default_value
TORNADO_PROCESSES = int(get_config('application_server', 'tornado_processes', '1'))
output = subprocess.check_output(['/usr/sbin/rabbitmqctl', 'list_consumers'], output = subprocess.check_output(['/usr/sbin/rabbitmqctl', 'list_consumers'],
universal_newlines=True) universal_newlines=True)
@ -71,7 +81,12 @@ for queue_name in queues:
for line in output.split('\n'): for line in output.split('\n'):
parts = line.split('\t') parts = line.split('\t')
if len(parts) >= 2: if len(parts) >= 2:
consumers[parts[0]] += 1 queue_name = parts[0]
if queue_name.startswith("tornado_return_"):
queue_name = "tornado_return"
if queue_name.startswith("notify_tornado_"):
queue_name = "notify_tornado"
consumers[queue_name] += 1
now = int(time.time()) now = int(time.time())
@ -79,12 +94,16 @@ for queue_name in consumers.keys():
state_file_path = "/var/lib/nagios_state/check-rabbitmq-consumers-" + queue_name state_file_path = "/var/lib/nagios_state/check-rabbitmq-consumers-" + queue_name
state_file_tmp = state_file_path + "-tmp" state_file_tmp = state_file_path + "-tmp"
if consumers[queue_name] < options.min_count: target_count = options.min_count
if queue_name in ["tornado_return", "notify_tornado"]:
target_count = TORNADO_PROCESSES
if consumers[queue_name] < target_count:
status = 2 status = 2
else: else:
status = 0 status = 0
with open(state_file_tmp, "w") as f: with open(state_file_tmp, "w") as f:
f.write("%s|%s|%s|queue %s has %s consumers, needs %s\n" % ( f.write("%s|%s|%s|queue %s has %s consumers, needs %s\n" % (
now, status, states[status], queue_name, now, status, states[status], queue_name,
consumers[queue_name], options.min_count)) consumers[queue_name], target_count))
os.rename(state_file_tmp, state_file_path) os.rename(state_file_tmp, state_file_path)