mirror of https://github.com/zulip/zulip.git
[puppet] Add nagios script to look for queue error files.
You should do the puppet apply on the nagios box. (imported from commit 7cf3a11ede69ed6bd7ba2a4384d83c89cfcc65c0)
This commit is contained in:
parent
884e602185
commit
ede3252f8d
|
@ -86,6 +86,11 @@ define command{
|
||||||
command_line /usr/lib/nagios/plugins/check_by_ssh -p $ARG1$ -l zulip -t 30 -i /var/lib/nagios/.ssh/id_rsa -H $HOSTADDRESS$ -C '/usr/lib/nagios/plugins/check_send_receive_time --nagios --site=https://$HOSTADDRESS$'
|
command_line /usr/lib/nagios/plugins/check_by_ssh -p $ARG1$ -l zulip -t 30 -i /var/lib/nagios/.ssh/id_rsa -H $HOSTADDRESS$ -C '/usr/lib/nagios/plugins/check_send_receive_time --nagios --site=https://$HOSTADDRESS$'
|
||||||
}
|
}
|
||||||
|
|
||||||
|
define command{
|
||||||
|
command_name check_queue_worker_errors
|
||||||
|
command_line /usr/lib/nagios/plugins/check_by_ssh -p $ARG1$ -l zulip -t 30 -i /var/lib/nagios/.ssh/id_rsa -H $HOSTADDRESS$ -C '/usr/lib/nagios/plugins/check_queue_worker_errors
|
||||||
|
}
|
||||||
|
|
||||||
define command{
|
define command{
|
||||||
command_name check_postgres
|
command_name check_postgres
|
||||||
command_line /usr/lib/nagios/plugins/check_by_ssh -p $ARG1$ -l zulip -t 30 -i /var/lib/nagios/.ssh/id_rsa -H $HOSTADDRESS$ -C '/usr/lib/nagios/plugins/check_postgres.pl --dbname=zulip --dbuser=zulip --action $ARG2$'
|
command_line /usr/lib/nagios/plugins/check_by_ssh -p $ARG1$ -l zulip -t 30 -i /var/lib/nagios/.ssh/id_rsa -H $HOSTADDRESS$ -C '/usr/lib/nagios/plugins/check_postgres.pl --dbname=zulip --dbuser=zulip --action $ARG2$'
|
||||||
|
|
|
@ -224,6 +224,14 @@ define service {
|
||||||
contact_groups page_admins
|
contact_groups page_admins
|
||||||
}
|
}
|
||||||
|
|
||||||
|
define service {
|
||||||
|
use generic-service
|
||||||
|
service_description Check for queue worker errors.
|
||||||
|
check_command check_queue_worker_errors!22
|
||||||
|
hostgroup_name frontends
|
||||||
|
contact_groups admins
|
||||||
|
}
|
||||||
|
|
||||||
define service {
|
define service {
|
||||||
use generic-service
|
use generic-service
|
||||||
service_description Check rabbitmq notify_tornado consumers
|
service_description Check rabbitmq notify_tornado consumers
|
||||||
|
|
|
@ -0,0 +1,23 @@
|
||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
"""
|
||||||
|
Nagios plugin to check that none of our queue workers have reported errors.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import sys
|
||||||
|
sys.path.append('.')
|
||||||
|
from zproject import settings
|
||||||
|
|
||||||
|
import glob
|
||||||
|
import os
|
||||||
|
|
||||||
|
wildcard = os.path.join(settings.QUEUE_ERROR_DIR, '*.errors')
|
||||||
|
clean = True
|
||||||
|
for fn in glob.glob(wildcard):
|
||||||
|
print('WARNING: Queue errors logged in %s' % (fn,))
|
||||||
|
clean = False
|
||||||
|
|
||||||
|
if not clean:
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
sys.exit(0)
|
Loading…
Reference in New Issue