From ede3252f8db5149b49dd9fc0cf88bbc34c2525f1 Mon Sep 17 00:00:00 2001 From: Steve Howell Date: Tue, 29 Oct 2013 16:46:31 -0400 Subject: [PATCH] [puppet] Add nagios script to look for queue error files. You should do the puppet apply on the nagios box. (imported from commit 7cf3a11ede69ed6bd7ba2a4384d83c89cfcc65c0) --- .../modules/zulip/files/nagios3/commands.cfg | 5 ++++ .../zulip/files/nagios3/conf.d/services.cfg | 8 +++++++ .../nagios_plugins/check_queue_worker_errors | 23 +++++++++++++++++++ 3 files changed, 36 insertions(+) create mode 100755 servers/puppet/modules/zulip/files/nagios_plugins/check_queue_worker_errors diff --git a/servers/puppet/modules/zulip/files/nagios3/commands.cfg b/servers/puppet/modules/zulip/files/nagios3/commands.cfg index 24e66628df..a8d3865ad9 100644 --- a/servers/puppet/modules/zulip/files/nagios3/commands.cfg +++ b/servers/puppet/modules/zulip/files/nagios3/commands.cfg @@ -86,6 +86,11 @@ define command{ command_line /usr/lib/nagios/plugins/check_by_ssh -p $ARG1$ -l zulip -t 30 -i /var/lib/nagios/.ssh/id_rsa -H $HOSTADDRESS$ -C '/usr/lib/nagios/plugins/check_send_receive_time --nagios --site=https://$HOSTADDRESS$' } +define command{ + command_name check_queue_worker_errors + command_line /usr/lib/nagios/plugins/check_by_ssh -p $ARG1$ -l zulip -t 30 -i /var/lib/nagios/.ssh/id_rsa -H $HOSTADDRESS$ -C '/usr/lib/nagios/plugins/check_queue_worker_errors +} + define command{ command_name check_postgres command_line /usr/lib/nagios/plugins/check_by_ssh -p $ARG1$ -l zulip -t 30 -i /var/lib/nagios/.ssh/id_rsa -H $HOSTADDRESS$ -C '/usr/lib/nagios/plugins/check_postgres.pl --dbname=zulip --dbuser=zulip --action $ARG2$' diff --git a/servers/puppet/modules/zulip/files/nagios3/conf.d/services.cfg b/servers/puppet/modules/zulip/files/nagios3/conf.d/services.cfg index e8fc2a3e87..8473b7e638 100644 --- a/servers/puppet/modules/zulip/files/nagios3/conf.d/services.cfg +++ b/servers/puppet/modules/zulip/files/nagios3/conf.d/services.cfg @@ -224,6 +224,14 @@ define service { contact_groups page_admins } +define service { + use generic-service + service_description Check for queue worker errors. + check_command check_queue_worker_errors!22 + hostgroup_name frontends + contact_groups admins +} + define service { use generic-service service_description Check rabbitmq notify_tornado consumers diff --git a/servers/puppet/modules/zulip/files/nagios_plugins/check_queue_worker_errors b/servers/puppet/modules/zulip/files/nagios_plugins/check_queue_worker_errors new file mode 100755 index 0000000000..d295b4f83d --- /dev/null +++ b/servers/puppet/modules/zulip/files/nagios_plugins/check_queue_worker_errors @@ -0,0 +1,23 @@ +#!/usr/bin/env python + +""" +Nagios plugin to check that none of our queue workers have reported errors. +""" + +import sys +sys.path.append('.') +from zproject import settings + +import glob +import os + +wildcard = os.path.join(settings.QUEUE_ERROR_DIR, '*.errors') +clean = True +for fn in glob.glob(wildcard): + print('WARNING: Queue errors logged in %s' % (fn,)) + clean = False + +if not clean: + sys.exit(1) + +sys.exit(0)