[puppet]: Adjust zmirror Nagios checks to be more tolerant of a bad network.

We get a lot of alerts and sometimes pages due to network blips.

(imported from commit 4766585e71533b8551d49fa61bc4653114a65457)
This commit is contained in:
Jessica McKellar 2014-03-07 18:45:16 -05:00 committed by Steve Howell
parent 9d8758557b
commit e7ef654b45
2 changed files with 43 additions and 6 deletions

View File

@ -43,7 +43,7 @@ define host{
host_name zmirror
alias zmirror
address zmirror.zulip.net
hostgroups all,zmirror,pageable_servers
hostgroups all,zmirror
}
define host{
@ -51,7 +51,7 @@ define host{
host_name zmirror2
alias zmirror2
address zmirror2.zulip.net
hostgroups all,pageable_servers
hostgroups all,zmirror
}
define host{

View File

@ -14,6 +14,17 @@ define service {
contact_groups page_admins
}
define service {
use generic-service
hostgroup_name zmirror
service_description SSH for flaky machines
check_command check_ssh
normal_check_interval 2
retry_check_interval 2
max_check_attempts 5
contact_groups admins
}
define service {
use generic-service
hostgroup_name not_pageable_servers
@ -30,6 +41,17 @@ define service {
contact_groups page_admins
}
define service {
use generic-service
hostgroup_name zmirror
service_description remote disk for flaky machines
check_command check_remote_disk!20%!10%
normal_check_interval 2
retry_check_interval 2
max_check_attempts 5
contact_groups admins
}
define service {
use generic-service
hostgroup_name not_pageable_servers
@ -54,6 +76,18 @@ define service {
contact_groups admins
}
define service {
use generic-service
hostgroup_name zmirror
service_description remote load for flaky machines
check_command check_remote_load!22!5.0,4.0,3.0!10.0,6.0,4.0
normal_check_interval 2
retry_check_interval 2
max_check_attempts 5
contact_groups admins
}
define service {
use generic-service
service_description zephyr mirror forwarding
@ -63,7 +97,7 @@ define service {
# Note: the actual check is run via cron, so retry_check_interval
# should always equal normal_check_interval.
retry_check_interval 2
max_check_attempts 2
max_check_attempts 5
contact_groups admins
}
@ -257,10 +291,13 @@ define service {
define service {
use generic-service
service_description zmirror subscriptons syncing
service_description zmirror subscriptions syncing
check_command check_sync_public_streams!22
hostgroup_name zmirror
contact_groups page_admins
host zmirror
normal_check_interval 2
retry_check_interval 2
max_check_attempts 5
contact_groups admins
}
define service {