diff --git a/puppet/zulip_ops/files/nagios4/conf.d/services.cfg b/puppet/zulip_ops/files/nagios4/conf.d/services.cfg index 0691c907cb..21ff728236 100644 --- a/puppet/zulip_ops/files/nagios4/conf.d/services.cfg +++ b/puppet/zulip_ops/files/nagios4/conf.d/services.cfg @@ -358,102 +358,70 @@ define service { } define service { + name rabbitmq-consumer-service use generic-service - service_description Check RabbitMQ notify_tornado consumers + service_description RabbitMQ consumer check template hostgroup_name frontends - check_command check_rabbitmq_consumers!notify_tornado # Workaround weird checks 40s after first error causing alerts # from a single failure because cron hasn't run again yet max_check_attempts 3 + contact_groups admins + register 0 +} + +define service { + use rabbitmq-consumer-service + service_description Check RabbitMQ notify_tornado consumers + check_command check_rabbitmq_consumers!notify_tornado contact_groups page_admins } define service { - use generic-service + use rabbitmq-consumer-service service_description Check RabbitMQ user_activity_interval consumers - hostgroup_name frontends check_command check_rabbitmq_consumers!user_activity_interval - # Workaround weird checks 40s after first error causing alerts - # from a single failure because cron hasn't run again yet - max_check_attempts 3 - contact_groups admins } define service { - use generic-service + use rabbitmq-consumer-service service_description Check RabbitMQ user_presence consumers - hostgroup_name frontends check_command check_rabbitmq_consumers!user_presence - # Workaround weird checks 40s after first error causing alerts - # from a single failure because cron hasn't run again yet - max_check_attempts 3 - contact_groups admins } define service { - use generic-service + use rabbitmq-consumer-service service_description Check RabbitMQ invites consumers - hostgroup_name frontends check_command check_rabbitmq_consumers!invites - # Workaround weird checks 40s after first error causing alerts - # from a single failure because cron hasn't run again yet - max_check_attempts 3 - contact_groups admins } define service { - use generic-service - service_description Check RabbitMQ digest email consumers - hostgroup_name frontends + use rabbitmq-consumer-service + service_description Check RabbitMQ digest digest_emails consumers check_command check_rabbitmq_consumers!digest_emails - # Workaround weird checks 40s after first error causing alerts - # from a single failure because cron hasn't run again yet - max_check_attempts 3 - contact_groups admins } define service { - use generic-service - service_description Check RabbitMQ email mirror consumers - hostgroup_name frontends + use rabbitmq-consumer-service + service_description Check RabbitMQ email_mirror consumers check_command check_rabbitmq_consumers!email_mirror - # Workaround weird checks 40s after first error causing alerts - # from a single failure because cron hasn't run again yet - max_check_attempts 3 - contact_groups admins } define service { - use generic-service - service_description Check RabbitMQ missedmessage mobile notifications consumers - hostgroup_name frontends + use rabbitmq-consumer-service + service_description Check RabbitMQ missedmessage_mobile_notifications consumers check_command check_rabbitmq_consumers!missedmessage_mobile_notifications - # Workaround weird checks 40s after first error causing alerts - # from a single failure because cron hasn't run again yet - max_check_attempts 3 - contact_groups admins } define service { - use generic-service - service_description Check RabbitMQ missedmessage email consumers - hostgroup_name frontends + use rabbitmq-consumer-service + service_description Check RabbitMQ missedmessage_emails consumers check_command check_rabbitmq_consumers!missedmessage_emails - # Workaround weird checks 40s after first error causing alerts - # from a single failure because cron hasn't run again yet - max_check_attempts 3 - contact_groups admins } define service { - use generic-service - service_description Check RabbitMQ user activity consumers - hostgroup_name frontends + use rabbitmq-consumer-service + service_description Check RabbitMQ user_activity consumers check_command check_rabbitmq_consumers!user_activity - # Workaround weird checks 40s after first error causing alerts - # from a single failure because cron hasn't run again yet - max_check_attempts 3 - contact_groups admins } define service {