nagios: Update configuration for user_activity worker change.

Since LoopQueueProcessingWorker jobs cannot be monitored by checking
for connected consumers (since they poll, rather than consuming as
events arrive), they can't be monitored with check_consumers.  It's
OK, because that monitoring was redundant with monitoring for
potential growth in their queue that we have as well.

Also clean up the block comments for the two other similar queue
procesors.
This commit is contained in:
Tim Abbott 2019-09-21 12:09:07 -07:00
parent 3646936cfb
commit e4dee9532c
1 changed files with 4 additions and 34 deletions

View File

@ -403,17 +403,6 @@ define service {
contact_groups page_admins
}
define service {
use generic-service
service_description Check rabbitmq user_activity consumers
check_command check_rabbitmq_consumers!user_activity
# Workaround weird checks 40s after first error causing alerts
# from a single failure because cron hasn't run again yet
max_check_attempts 3
hostgroup_name frontends
contact_groups admins
}
define service {
use generic-service
service_description Check rabbitmq user_activity_interval consumers
@ -502,18 +491,6 @@ define service {
contact_groups page_admins
}
### The missedmessage_emails queue processor batches events, so don't monitor it this way
# define service {
# use generic-service
# service_description Check rabbitmq missedmessage emails consumers
# check_command check_rabbitmq_consumers!missedmessage_emails
# # Workaround weird checks 40s after first error causing alerts
# # from a single failure because cron hasn't run again yet
# max_check_attempts 3
# hostgroup_name frontends
# contact_groups admins
# }
define service {
use generic-service
service_description Check rabbitmq missedmessage mobile notifications consumers
@ -525,17 +502,10 @@ define service {
contact_groups admins
}
### The slow_queries queue consumer batches events, so don't monitor it this way
# define service {
# use generic-service
# service_description Check rabbitmq slow queries consumers
# check_command check_rabbitmq_consumers!slow_queries
# # Workaround weird checks 40s after first error causing alerts
# # from a single failure because cron hasn't run again yet
# max_check_attempts 3
# hostgroup_name frontends
# contact_groups admins
# }
# The following queue workers batch-process events and thus can't be
# monitored by checking for running consumers:
#
# user_activity, slow_queries, missedmessage_emails
define service {
use generic-service