mirror of https://github.com/zulip/zulip.git
nagios: Switch staging hosts to not page, but send a zulip.
This commit is contained in:
parent
ba8b9a445b
commit
f325e15439
|
@ -19,6 +19,17 @@ define command{
|
||||||
command_line /usr/bin/printf "%b" "Subject: $NOTIFICATIONTYPE$ Service Alert: $HOSTALIAS$/$SERVICEDESC$ is $SERVICESTATE$\n\n***** Nagios *****\n\nNotification Type: $NOTIFICATIONTYPE$\n\nService: $SERVICEDESC$\nHost: $HOSTALIAS$\nAddress: $HOSTADDRESS$\nState: $SERVICESTATE$\n\nDate/Time: $LONGDATETIME$\n\nAdditional Info:\n\n$SERVICEOUTPUT$\n$LONGSERVICEOUTPUT$\n" | /usr/bin/msmtp -C /var/lib/nagios/msmtprc -vt $CONTACTEMAIL$
|
command_line /usr/bin/printf "%b" "Subject: $NOTIFICATIONTYPE$ Service Alert: $HOSTALIAS$/$SERVICEDESC$ is $SERVICESTATE$\n\n***** Nagios *****\n\nNotification Type: $NOTIFICATIONTYPE$\n\nService: $SERVICEDESC$\nHost: $HOSTALIAS$\nAddress: $HOSTADDRESS$\nState: $SERVICESTATE$\n\nDate/Time: $LONGDATETIME$\n\nAdditional Info:\n\n$SERVICEOUTPUT$\n$LONGSERVICEOUTPUT$\n" | /usr/bin/msmtp -C /var/lib/nagios/msmtprc -vt $CONTACTEMAIL$
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# Zulip commands
|
||||||
|
define command {
|
||||||
|
command_name notify-host-by-zulip
|
||||||
|
command_line /usr/local/share/zulip/integrations/nagios/nagios-notify-zulip --stream="kandra ops" --type="$NOTIFICATIONTYPE$" --host="$HOSTADDRESS$" --state="$HOSTSTATE$" --output="$HOSTOUTPUT$" --long-output="$LONGHOSTOUTPUT$"
|
||||||
|
}
|
||||||
|
|
||||||
|
define command {
|
||||||
|
command_name notify-service-by-zulip
|
||||||
|
command_line /usr/local/share/zulip/integrations/nagios/nagios-notify-zulip --stream="kandra ops" --type="$NOTIFICATIONTYPE$" --host="$HOSTADDRESS$" --service="$SERVICEDESC$" --state="$SERVICESTATE$" --output="$SERVICEOUTPUT$" --long-output="$LONGSERVICEOUTPUT$"
|
||||||
|
}
|
||||||
|
|
||||||
################################################################################
|
################################################################################
|
||||||
# HOST CHECK COMMANDS
|
# HOST CHECK COMMANDS
|
||||||
################################################################################
|
################################################################################
|
||||||
|
|
|
@ -35,7 +35,7 @@ define service {
|
||||||
service_description Disk usage - pageable
|
service_description Disk usage - pageable
|
||||||
hostgroup_name pageable_servers
|
hostgroup_name pageable_servers
|
||||||
check_command check_remote_disk!20%!10%
|
check_command check_remote_disk!20%!10%
|
||||||
contact_groups page_admins
|
contact_groups ops_message
|
||||||
}
|
}
|
||||||
|
|
||||||
define service {
|
define service {
|
||||||
|
@ -43,7 +43,7 @@ define service {
|
||||||
service_description Disk usage
|
service_description Disk usage
|
||||||
hostgroup_name not_pageable_servers
|
hostgroup_name not_pageable_servers
|
||||||
check_command check_remote_disk!20%!10%
|
check_command check_remote_disk!20%!10%
|
||||||
contact_groups admins
|
contact_groups ops_message
|
||||||
}
|
}
|
||||||
|
|
||||||
define service {
|
define service {
|
||||||
|
@ -135,20 +135,37 @@ define service {
|
||||||
define service {
|
define service {
|
||||||
use generic-service
|
use generic-service
|
||||||
service_description HTTPS
|
service_description HTTPS
|
||||||
hostgroup_name frontends
|
hostgroup_name prod_frontends, fullstack
|
||||||
check_command check_https_status
|
check_command check_https_status
|
||||||
contact_groups page_admins
|
contact_groups page_admins
|
||||||
}
|
}
|
||||||
|
|
||||||
|
define service {
|
||||||
|
use generic-service
|
||||||
|
service_description HTTPS
|
||||||
|
hostgroup_name staging_frontends
|
||||||
|
check_command check_https_status
|
||||||
|
contact_groups ops_message
|
||||||
|
}
|
||||||
|
|
||||||
define service {
|
define service {
|
||||||
use generic-service
|
use generic-service
|
||||||
service_description Check send receive time
|
service_description Check send receive time
|
||||||
hostgroup_name frontends
|
hostgroup_name prod_frontends, fullstack
|
||||||
check_command check_send_receive_time!22
|
check_command check_send_receive_time!22
|
||||||
max_check_attempts 2
|
max_check_attempts 2
|
||||||
contact_groups page_admins
|
contact_groups page_admins
|
||||||
}
|
}
|
||||||
|
|
||||||
|
define service {
|
||||||
|
use generic-service
|
||||||
|
service_description Check send receive time
|
||||||
|
hostgroup_name staging_frontends
|
||||||
|
check_command check_send_receive_time!22
|
||||||
|
max_check_attempts 2
|
||||||
|
contact_groups ops_message
|
||||||
|
}
|
||||||
|
|
||||||
define service {
|
define service {
|
||||||
use generic-service
|
use generic-service
|
||||||
service_description user zephyr mirror liveness
|
service_description user zephyr mirror liveness
|
||||||
|
@ -280,19 +297,27 @@ define service {
|
||||||
use generic-service
|
use generic-service
|
||||||
service_description Check redis service
|
service_description Check redis service
|
||||||
# Both redis and frontends hostgroups, since frontends SSH proxy redis to themselves
|
# Both redis and frontends hostgroups, since frontends SSH proxy redis to themselves
|
||||||
hostgroup_name frontends, redis
|
hostgroup_name prod_frontends, fullstack, redis
|
||||||
check_command check_redis_ssh!22
|
check_command check_redis_ssh!22
|
||||||
max_check_attempts 3
|
max_check_attempts 3
|
||||||
contact_groups page_admins
|
contact_groups page_admins
|
||||||
}
|
}
|
||||||
|
|
||||||
|
define service {
|
||||||
|
use generic-service
|
||||||
|
service_description Check redis service
|
||||||
|
hostgroup_name staging_frontends
|
||||||
|
check_command check_redis_ssh!22
|
||||||
|
max_check_attempts 3
|
||||||
|
contact_groups ops_message
|
||||||
|
}
|
||||||
|
|
||||||
#### RabbitMQ / queue workers
|
#### RabbitMQ / queue workers
|
||||||
|
|
||||||
define service {
|
define service {
|
||||||
use generic-service
|
use generic-service
|
||||||
service_description Check RabbitMQ queue sizes
|
service_description Check RabbitMQ queue sizes
|
||||||
hostgroup_name frontends
|
hostgroup_name prod_frontends, fullstack
|
||||||
check_command check_rabbitmq_queues!22
|
check_command check_rabbitmq_queues!22
|
||||||
# Workaround weird checks 40s after first error causing alerts
|
# Workaround weird checks 40s after first error causing alerts
|
||||||
# from a single failure because cron hasn't run again yet
|
# from a single failure because cron hasn't run again yet
|
||||||
|
@ -300,6 +325,17 @@ define service {
|
||||||
contact_groups page_admins
|
contact_groups page_admins
|
||||||
}
|
}
|
||||||
|
|
||||||
|
define service {
|
||||||
|
use generic-service
|
||||||
|
service_description Check RabbitMQ queue sizes
|
||||||
|
hostgroup_name staging_frontends
|
||||||
|
check_command check_rabbitmq_queues!22
|
||||||
|
# Workaround weird checks 40s after first error causing alerts
|
||||||
|
# from a single failure because cron hasn't run again yet
|
||||||
|
max_check_attempts 3
|
||||||
|
contact_groups ops_message
|
||||||
|
}
|
||||||
|
|
||||||
define service {
|
define service {
|
||||||
name rabbitmq-consumer-service
|
name rabbitmq-consumer-service
|
||||||
use generic-service
|
use generic-service
|
||||||
|
@ -315,10 +351,18 @@ define service {
|
||||||
define service {
|
define service {
|
||||||
use rabbitmq-consumer-service
|
use rabbitmq-consumer-service
|
||||||
service_description Check RabbitMQ notify_tornado consumers
|
service_description Check RabbitMQ notify_tornado consumers
|
||||||
|
hostgroup_name prod_frontends, fullstack
|
||||||
check_command check_rabbitmq_consumers!notify_tornado
|
check_command check_rabbitmq_consumers!notify_tornado
|
||||||
contact_groups page_admins
|
contact_groups page_admins
|
||||||
}
|
}
|
||||||
|
|
||||||
|
define service {
|
||||||
|
use rabbitmq-consumer-service
|
||||||
|
service_description Check RabbitMQ notify_tornado consumers
|
||||||
|
hostgroup_name staging_frontends
|
||||||
|
check_command check_rabbitmq_consumers!notify_tornado
|
||||||
|
contact_groups admins
|
||||||
|
}
|
||||||
|
|
||||||
define service {
|
define service {
|
||||||
use rabbitmq-consumer-service
|
use rabbitmq-consumer-service
|
||||||
|
|
|
@ -48,6 +48,17 @@ define contact {
|
||||||
host_notification_commands notify-host-by-email
|
host_notification_commands notify-host-by-email
|
||||||
}
|
}
|
||||||
|
|
||||||
|
define contact {
|
||||||
|
contact_name kandra-ops-in-czo
|
||||||
|
alias Notify kandra-ops on chat.zulip.org
|
||||||
|
service_notification_period 24x7
|
||||||
|
host_notification_period 24x7
|
||||||
|
service_notification_options w,u,c,r
|
||||||
|
host_notification_options d,r
|
||||||
|
service_notification_commands notify-service-by-zulip
|
||||||
|
host_notification_commands notify-host-by-zulip
|
||||||
|
}
|
||||||
|
|
||||||
###############################################################################
|
###############################################################################
|
||||||
###############################################################################
|
###############################################################################
|
||||||
#
|
#
|
||||||
|
@ -68,6 +79,12 @@ define contactgroup{
|
||||||
members monitoring,pager
|
members monitoring,pager
|
||||||
}
|
}
|
||||||
|
|
||||||
|
define contactgroup{
|
||||||
|
contactgroup_name ops_message
|
||||||
|
alias Message admins on CZO
|
||||||
|
members monitoring,kandra-ops-in-czo
|
||||||
|
}
|
||||||
|
|
||||||
define contactgroup{
|
define contactgroup{
|
||||||
contactgroup_name test
|
contactgroup_name test
|
||||||
alias Nagios Test Administrators
|
alias Nagios Test Administrators
|
||||||
|
|
Loading…
Reference in New Issue