mirror of https://github.com/zulip/zulip.git
[puppet] Add nagios script to look for queue error files.
You should do the puppet apply on the nagios box. (imported from commit 7cf3a11ede69ed6bd7ba2a4384d83c89cfcc65c0)
This commit is contained in:
parent
884e602185
commit
ede3252f8d
|
@ -86,6 +86,11 @@ define command{
|
|||
command_line /usr/lib/nagios/plugins/check_by_ssh -p $ARG1$ -l zulip -t 30 -i /var/lib/nagios/.ssh/id_rsa -H $HOSTADDRESS$ -C '/usr/lib/nagios/plugins/check_send_receive_time --nagios --site=https://$HOSTADDRESS$'
|
||||
}
|
||||
|
||||
define command{
|
||||
command_name check_queue_worker_errors
|
||||
command_line /usr/lib/nagios/plugins/check_by_ssh -p $ARG1$ -l zulip -t 30 -i /var/lib/nagios/.ssh/id_rsa -H $HOSTADDRESS$ -C '/usr/lib/nagios/plugins/check_queue_worker_errors
|
||||
}
|
||||
|
||||
define command{
|
||||
command_name check_postgres
|
||||
command_line /usr/lib/nagios/plugins/check_by_ssh -p $ARG1$ -l zulip -t 30 -i /var/lib/nagios/.ssh/id_rsa -H $HOSTADDRESS$ -C '/usr/lib/nagios/plugins/check_postgres.pl --dbname=zulip --dbuser=zulip --action $ARG2$'
|
||||
|
|
|
@ -224,6 +224,14 @@ define service {
|
|||
contact_groups page_admins
|
||||
}
|
||||
|
||||
define service {
|
||||
use generic-service
|
||||
service_description Check for queue worker errors.
|
||||
check_command check_queue_worker_errors!22
|
||||
hostgroup_name frontends
|
||||
contact_groups admins
|
||||
}
|
||||
|
||||
define service {
|
||||
use generic-service
|
||||
service_description Check rabbitmq notify_tornado consumers
|
||||
|
|
|
@ -0,0 +1,23 @@
|
|||
#!/usr/bin/env python
|
||||
|
||||
"""
|
||||
Nagios plugin to check that none of our queue workers have reported errors.
|
||||
"""
|
||||
|
||||
import sys
|
||||
sys.path.append('.')
|
||||
from zproject import settings
|
||||
|
||||
import glob
|
||||
import os
|
||||
|
||||
wildcard = os.path.join(settings.QUEUE_ERROR_DIR, '*.errors')
|
||||
clean = True
|
||||
for fn in glob.glob(wildcard):
|
||||
print('WARNING: Queue errors logged in %s' % (fn,))
|
||||
clean = False
|
||||
|
||||
if not clean:
|
||||
sys.exit(1)
|
||||
|
||||
sys.exit(0)
|
Loading…
Reference in New Issue