From dc082cd96d0d0325215e1b934b3ea5d5b91057e9 Mon Sep 17 00:00:00 2001 From: Zev Benjamin Date: Mon, 23 Sep 2013 15:51:37 -0400 Subject: [PATCH] puppet: Add nagios notifications for the rest of our rabbitmq queues (imported from commit 9d21a0ca3662396c436b482c574113d0cbc714a0) --- .../zulip/files/cron.d/rabbitmq-numconsumers | 2 ++ .../zulip/files/nagios3/conf.d/services.cfg | 22 +++++++++++++++++++ 2 files changed, 24 insertions(+) diff --git a/servers/puppet/modules/zulip/files/cron.d/rabbitmq-numconsumers b/servers/puppet/modules/zulip/files/cron.d/rabbitmq-numconsumers index f4ca77e8a8..ae950aeddb 100644 --- a/servers/puppet/modules/zulip/files/cron.d/rabbitmq-numconsumers +++ b/servers/puppet/modules/zulip/files/cron.d/rabbitmq-numconsumers @@ -4,5 +4,7 @@ USER=root * * * * * root /home/humbug/humbug-deployments/current/tools/write-rabbitmq-consumers-state-file notify_tornado * * * * * root /home/humbug/humbug-deployments/current/tools/write-rabbitmq-consumers-state-file user_activity +* * * * * root /home/humbug/humbug-deployments/current/tools/write-rabbitmq-consumers-state-file user_activity_interval +* * * * * root /home/humbug/humbug-deployments/current/tools/write-rabbitmq-consumers-state-file user_presence * * * * * root /home/humbug/humbug-deployments/current/tools/write-rabbitmq-consumers-state-file invites * * * * * root /home/humbug/humbug-deployments/current/tools/write-rabbitmq-consumers-state-file signups diff --git a/servers/puppet/modules/zulip/files/nagios3/conf.d/services.cfg b/servers/puppet/modules/zulip/files/nagios3/conf.d/services.cfg index 89351423db..32a51f1bd4 100644 --- a/servers/puppet/modules/zulip/files/nagios3/conf.d/services.cfg +++ b/servers/puppet/modules/zulip/files/nagios3/conf.d/services.cfg @@ -285,6 +285,28 @@ define service { contact_groups admins } +define service { + use generic-service + service_description Check rabbitmq user_activity_interval consumers + check_command check_rabbitmq_consumers!user_activity_interval + # Workaround weird checks 40s after first error causing alerts + # from a single failure because cron hasn't run again yet + max_check_attempts 3 + hostgroup_name frontends + contact_groups admins +} + +define service { + use generic-service + service_description Check rabbitmq user_presence consumers + check_command check_rabbitmq_consumers!user_presence + # Workaround weird checks 40s after first error causing alerts + # from a single failure because cron hasn't run again yet + max_check_attempts 3 + hostgroup_name frontends + contact_groups admins +} + define service { use generic-service service_description Check rabbitmq invites consumers