diff --git a/puppet/zulip_internal/files/nagios3/conf.d/hosts.cfg b/puppet/zulip_internal/files/nagios3/conf.d/hosts.cfg index 4674653fcc..dc0c687162 100644 --- a/puppet/zulip_internal/files/nagios3/conf.d/hosts.cfg +++ b/puppet/zulip_internal/files/nagios3/conf.d/hosts.cfg @@ -43,7 +43,7 @@ define host{ host_name zmirror alias zmirror address zmirror.zulip.net - hostgroups all,zmirror,pageable_servers + hostgroups all,zmirror } define host{ @@ -51,7 +51,7 @@ define host{ host_name zmirror2 alias zmirror2 address zmirror2.zulip.net - hostgroups all,pageable_servers + hostgroups all,zmirror } define host{ diff --git a/puppet/zulip_internal/files/nagios3/conf.d/services.cfg b/puppet/zulip_internal/files/nagios3/conf.d/services.cfg index dc8d6e8691..d11fbf904c 100644 --- a/puppet/zulip_internal/files/nagios3/conf.d/services.cfg +++ b/puppet/zulip_internal/files/nagios3/conf.d/services.cfg @@ -14,6 +14,17 @@ define service { contact_groups page_admins } +define service { + use generic-service + hostgroup_name zmirror + service_description SSH for flaky machines + check_command check_ssh + normal_check_interval 2 + retry_check_interval 2 + max_check_attempts 5 + contact_groups admins +} + define service { use generic-service hostgroup_name not_pageable_servers @@ -30,6 +41,17 @@ define service { contact_groups page_admins } +define service { + use generic-service + hostgroup_name zmirror + service_description remote disk for flaky machines + check_command check_remote_disk!20%!10% + normal_check_interval 2 + retry_check_interval 2 + max_check_attempts 5 + contact_groups admins +} + define service { use generic-service hostgroup_name not_pageable_servers @@ -54,6 +76,18 @@ define service { contact_groups admins } + +define service { + use generic-service + hostgroup_name zmirror + service_description remote load for flaky machines + check_command check_remote_load!22!5.0,4.0,3.0!10.0,6.0,4.0 + normal_check_interval 2 + retry_check_interval 2 + max_check_attempts 5 + contact_groups admins +} + define service { use generic-service service_description zephyr mirror forwarding @@ -63,7 +97,7 @@ define service { # Note: the actual check is run via cron, so retry_check_interval # should always equal normal_check_interval. retry_check_interval 2 - max_check_attempts 2 + max_check_attempts 5 contact_groups admins } @@ -257,10 +291,13 @@ define service { define service { use generic-service - service_description zmirror subscriptons syncing + service_description zmirror subscriptions syncing check_command check_sync_public_streams!22 - hostgroup_name zmirror - contact_groups page_admins + host zmirror + normal_check_interval 2 + retry_check_interval 2 + max_check_attempts 5 + contact_groups admins } define service {