mirror of https://github.com/zulip/zulip.git
nagios: Set max_check_attempts to 3 for rabbitmq consumers.
This works around the fact that we seem to have a mysterous extra checkup 40s after an error first occurs with these checks, which always fails because the data is updated by a cron job that runs every minute. (imported from commit e7fe9c85e8399115443269287e695b140b4443ff)
This commit is contained in:
parent
5b8aa359dd
commit
ddc9c53e1b
|
@ -256,6 +256,9 @@ define service {
|
|||
use generic-service
|
||||
service_description Check rabbitmq queue sizes
|
||||
check_command check_rabbitmq_queues!22
|
||||
# Workaround weird checks 40s after first error causing alerts
|
||||
# from a single failure because cron hasn't run again yet
|
||||
max_check_attempts 3
|
||||
hostgroup_name frontends
|
||||
contact_groups page_admins
|
||||
}
|
||||
|
@ -264,6 +267,9 @@ define service {
|
|||
use generic-service
|
||||
service_description Check rabbitmq notify_tornado consumers
|
||||
check_command check_rabbitmq_tornado_consumers!22
|
||||
# Workaround weird checks 40s after first error causing alerts
|
||||
# from a single failure because cron hasn't run again yet
|
||||
max_check_attempts 3
|
||||
hostgroup_name frontends
|
||||
contact_groups page_admins
|
||||
}
|
||||
|
@ -272,6 +278,9 @@ define service {
|
|||
use generic-service
|
||||
service_description Check rabbitmq useractivity consumers
|
||||
check_command check_rabbitmq_useractivity_consumers!22
|
||||
# Workaround weird checks 40s after first error causing alerts
|
||||
# from a single failure because cron hasn't run again yet
|
||||
max_check_attempts 3
|
||||
hostgroup_name frontends
|
||||
contact_groups admins
|
||||
}
|
||||
|
@ -280,6 +289,9 @@ define service {
|
|||
use generic-service
|
||||
service_description Check rabbitmq invites consumers
|
||||
check_command check_rabbitmq_invites_consumers!22
|
||||
# Workaround weird checks 40s after first error causing alerts
|
||||
# from a single failure because cron hasn't run again yet
|
||||
max_check_attempts 3
|
||||
hostgroup_name frontends
|
||||
contact_groups admins
|
||||
}
|
||||
|
@ -288,6 +300,9 @@ define service {
|
|||
use generic-service
|
||||
service_description Check rabbitmq signups consumers
|
||||
check_command check_rabbitmq_signups_consumers!22
|
||||
# Workaround weird checks 40s after first error causing alerts
|
||||
# from a single failure because cron hasn't run again yet
|
||||
max_check_attempts 3
|
||||
hostgroup_name frontends
|
||||
contact_groups admins
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue