2013-01-24 17:43:47 +01:00
|
|
|
define service {
|
2013-02-19 17:56:16 +01:00
|
|
|
use generic-service
|
|
|
|
hostgroup_name web
|
2014-06-05 20:47:16 +02:00
|
|
|
service_description HTTPS
|
|
|
|
check_command check_https_status
|
2013-02-19 17:56:16 +01:00
|
|
|
contact_groups page_admins
|
2013-01-24 17:43:47 +01:00
|
|
|
}
|
2012-11-01 15:47:50 +01:00
|
|
|
|
|
|
|
define service {
|
|
|
|
use generic-service
|
2013-06-04 22:07:49 +02:00
|
|
|
hostgroup_name pageable_servers
|
|
|
|
service_description SSH - pageable
|
|
|
|
check_command check_ssh
|
|
|
|
contact_groups page_admins
|
|
|
|
}
|
|
|
|
|
2014-03-08 00:45:16 +01:00
|
|
|
define service {
|
|
|
|
use generic-service
|
|
|
|
hostgroup_name zmirror
|
|
|
|
service_description SSH for flaky machines
|
|
|
|
check_command check_ssh
|
|
|
|
normal_check_interval 2
|
|
|
|
retry_check_interval 2
|
|
|
|
max_check_attempts 5
|
|
|
|
contact_groups admins
|
|
|
|
}
|
|
|
|
|
2013-06-04 22:07:49 +02:00
|
|
|
define service {
|
|
|
|
use generic-service
|
|
|
|
hostgroup_name not_pageable_servers
|
2012-11-01 15:47:50 +01:00
|
|
|
service_description SSH
|
|
|
|
check_command check_ssh
|
2013-06-04 22:07:49 +02:00
|
|
|
contact_groups admins
|
|
|
|
}
|
|
|
|
|
|
|
|
define service {
|
|
|
|
use generic-service
|
|
|
|
hostgroup_name pageable_servers
|
|
|
|
service_description remote disk - pageable
|
2013-12-09 19:26:51 +01:00
|
|
|
check_command check_remote_disk!20%!10%
|
2013-02-19 17:56:16 +01:00
|
|
|
contact_groups page_admins
|
2012-11-01 15:47:50 +01:00
|
|
|
}
|
2012-11-16 16:42:52 +01:00
|
|
|
|
2014-03-08 00:45:16 +01:00
|
|
|
define service {
|
|
|
|
use generic-service
|
|
|
|
hostgroup_name zmirror
|
|
|
|
service_description remote disk for flaky machines
|
|
|
|
check_command check_remote_disk!20%!10%
|
|
|
|
normal_check_interval 2
|
|
|
|
retry_check_interval 2
|
|
|
|
max_check_attempts 5
|
|
|
|
contact_groups admins
|
|
|
|
}
|
|
|
|
|
2012-11-16 16:42:52 +01:00
|
|
|
define service {
|
|
|
|
use generic-service
|
2013-06-04 22:07:49 +02:00
|
|
|
hostgroup_name not_pageable_servers
|
2012-11-16 16:42:52 +01:00
|
|
|
service_description remote disk
|
2013-12-09 19:26:51 +01:00
|
|
|
check_command check_remote_disk!20%!10%
|
2013-06-04 22:07:49 +02:00
|
|
|
contact_groups admins
|
2012-11-16 16:42:52 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
define service {
|
|
|
|
use generic-service
|
2013-06-04 22:07:49 +02:00
|
|
|
hostgroup_name not_pageable_servers
|
2012-11-16 16:42:52 +01:00
|
|
|
service_description remote load
|
|
|
|
check_command check_remote_load!22!5.0,4.0,3.0!10.0,6.0,4.0
|
2013-06-04 22:07:49 +02:00
|
|
|
contact_groups admins
|
|
|
|
}
|
|
|
|
|
|
|
|
define service {
|
|
|
|
use generic-service
|
|
|
|
hostgroup_name pageable_servers
|
|
|
|
service_description remote load - pageable
|
|
|
|
check_command check_remote_load!22!5.0,4.0,3.0!10.0,6.0,4.0
|
|
|
|
contact_groups admins
|
2012-11-16 16:42:52 +01:00
|
|
|
}
|
2012-11-16 17:19:53 +01:00
|
|
|
|
2014-03-08 00:45:16 +01:00
|
|
|
|
|
|
|
define service {
|
|
|
|
use generic-service
|
|
|
|
hostgroup_name zmirror
|
|
|
|
service_description remote load for flaky machines
|
|
|
|
check_command check_remote_load!22!5.0,4.0,3.0!10.0,6.0,4.0
|
|
|
|
normal_check_interval 2
|
|
|
|
retry_check_interval 2
|
|
|
|
max_check_attempts 5
|
|
|
|
contact_groups admins
|
|
|
|
}
|
|
|
|
|
2012-11-16 17:19:53 +01:00
|
|
|
define service {
|
|
|
|
use generic-service
|
|
|
|
service_description zephyr mirror forwarding
|
|
|
|
check_command check_zephyr_mirror_forwarding!22
|
2017-10-06 04:58:35 +02:00
|
|
|
hostgroup_name zmirror_main
|
2012-11-16 17:19:53 +01:00
|
|
|
normal_check_interval 2
|
2012-12-19 17:14:51 +01:00
|
|
|
# Note: the actual check is run via cron, so retry_check_interval
|
|
|
|
# should always equal normal_check_interval.
|
|
|
|
retry_check_interval 2
|
2014-03-08 00:45:16 +01:00
|
|
|
max_check_attempts 5
|
2017-10-12 09:16:32 +02:00
|
|
|
contact_groups page_admins
|
2012-11-16 17:19:53 +01:00
|
|
|
}
|
2012-11-16 17:20:12 +01:00
|
|
|
|
|
|
|
define service {
|
|
|
|
use generic-service
|
|
|
|
service_description user zephyr mirror liveness
|
|
|
|
check_command check_user_zephyr_mirror_liveness!22
|
2017-10-06 04:58:35 +02:00
|
|
|
hostgroup prod_frontends
|
2013-01-02 22:07:52 +01:00
|
|
|
contact_groups admins
|
2012-11-16 17:20:12 +01:00
|
|
|
}
|
2012-11-21 20:17:15 +01:00
|
|
|
|
2013-08-26 21:43:29 +02:00
|
|
|
define service {
|
|
|
|
use generic-service
|
2017-10-06 04:58:35 +02:00
|
|
|
hostgroup zmirrorp
|
2013-08-26 21:43:29 +02:00
|
|
|
service_description Check personal zephyr mirrors
|
|
|
|
check_command check_personal_zephyr_mirrors!22
|
|
|
|
contact_groups admins
|
|
|
|
}
|
|
|
|
|
2012-11-21 20:17:15 +01:00
|
|
|
define service {
|
|
|
|
use generic-service
|
|
|
|
service_description Debian update availability
|
|
|
|
check_command check_debian_packages!22
|
|
|
|
hostgroup_name all
|
2013-01-02 22:07:52 +01:00
|
|
|
contact_groups admins
|
2012-11-21 20:17:15 +01:00
|
|
|
}
|
2013-01-02 23:49:05 +01:00
|
|
|
|
|
|
|
define service {
|
|
|
|
use generic-service
|
|
|
|
service_description Check NTP time
|
|
|
|
check_command check_ntp_time!22
|
2016-08-06 05:13:34 +02:00
|
|
|
max_check_attempts 3
|
2013-01-02 23:49:05 +01:00
|
|
|
hostgroup_name all
|
2013-01-24 17:43:47 +01:00
|
|
|
contact_groups admins
|
2013-01-02 23:49:05 +01:00
|
|
|
}
|
2013-01-03 16:56:36 +01:00
|
|
|
|
2013-01-30 16:12:15 +01:00
|
|
|
define service {
|
|
|
|
use generic-service
|
|
|
|
service_description Check send receive time
|
|
|
|
check_command check_send_receive_time!22
|
2016-10-29 06:25:29 +02:00
|
|
|
max_check_attempts 2
|
2013-04-16 16:56:41 +02:00
|
|
|
hostgroup_name frontends
|
2016-10-29 06:25:29 +02:00
|
|
|
contact_groups page_admins
|
2013-01-30 16:12:15 +01:00
|
|
|
}
|
2013-01-30 16:14:38 +01:00
|
|
|
|
2017-02-08 20:12:25 +01:00
|
|
|
define service {
|
|
|
|
use generic-service
|
|
|
|
service_description Check send receive time_websockets
|
|
|
|
check_command check_send_receive_time_websockets!22
|
|
|
|
max_check_attempts 2
|
|
|
|
hostgroup_name frontends
|
|
|
|
contact_groups page_admins
|
|
|
|
}
|
|
|
|
|
2013-01-30 16:14:38 +01:00
|
|
|
## check_postgres.pl services
|
|
|
|
|
|
|
|
define service {
|
|
|
|
use generic-service
|
|
|
|
service_description Check postgres autovac_freeze
|
2016-07-31 07:51:00 +02:00
|
|
|
check_command check_postgres!zulip!nagios!autovac_freeze
|
2014-01-15 23:12:34 +01:00
|
|
|
hostgroup postgres_appdb_primary
|
2013-01-30 16:14:38 +01:00
|
|
|
contact_groups admins
|
|
|
|
}
|
|
|
|
|
|
|
|
define service {
|
|
|
|
use generic-service
|
|
|
|
service_description Check postgres backends
|
2016-07-31 07:51:00 +02:00
|
|
|
check_command check_postgres!zulip!nagios!backends
|
2014-01-15 23:14:29 +01:00
|
|
|
hostgroup postgres_appdb
|
2013-01-30 16:14:38 +01:00
|
|
|
contact_groups admins
|
|
|
|
}
|
|
|
|
|
|
|
|
define service {
|
|
|
|
use generic-service
|
|
|
|
service_description Check postgres connection
|
2016-07-31 07:51:00 +02:00
|
|
|
check_command check_postgres!zulip!nagios!connection
|
2014-01-15 23:14:29 +01:00
|
|
|
hostgroup postgres_appdb
|
2016-10-29 06:25:29 +02:00
|
|
|
contact_groups page_admins
|
2013-01-30 16:14:38 +01:00
|
|
|
}
|
|
|
|
|
2014-03-12 18:02:48 +01:00
|
|
|
define service {
|
|
|
|
use generic-service
|
|
|
|
service_description Check postgres disabled triggers
|
2016-07-31 07:51:00 +02:00
|
|
|
check_command check_postgres!zulip!nagios!disabled_triggers
|
2014-03-12 18:02:48 +01:00
|
|
|
hostgroup postgres_appdb
|
|
|
|
contact_groups admins
|
|
|
|
}
|
|
|
|
|
2013-01-30 16:14:38 +01:00
|
|
|
define service {
|
|
|
|
use generic-service
|
|
|
|
service_description Check postgres hitratio
|
2016-07-31 07:51:00 +02:00
|
|
|
check_command check_postgres!zulip!nagios!hitratio
|
2014-01-15 23:14:29 +01:00
|
|
|
hostgroup postgres_appdb
|
2013-01-30 16:14:38 +01:00
|
|
|
contact_groups admins
|
|
|
|
}
|
|
|
|
|
|
|
|
define service {
|
|
|
|
use generic-service
|
|
|
|
service_description Check postgres locks
|
2016-07-31 07:51:00 +02:00
|
|
|
check_command check_postgres_alert_args!zulip!nagios!locks!100!200
|
2014-01-15 23:14:29 +01:00
|
|
|
hostgroup postgres_appdb
|
2013-01-30 16:14:38 +01:00
|
|
|
contact_groups admins
|
|
|
|
}
|
|
|
|
|
|
|
|
define service {
|
|
|
|
use generic-service
|
|
|
|
service_description Check postgres query_time
|
2016-07-31 07:51:00 +02:00
|
|
|
check_command check_postgres_alert_args!zulip!nagios!query_time!20 seconds!40 seconds
|
2014-01-15 23:14:29 +01:00
|
|
|
hostgroup postgres_appdb
|
2013-01-30 16:14:38 +01:00
|
|
|
contact_groups admins
|
|
|
|
}
|
|
|
|
|
|
|
|
define service {
|
|
|
|
use generic-service
|
|
|
|
service_description Check postgres sequence
|
2016-07-31 07:51:00 +02:00
|
|
|
check_command check_postgres!zulip!nagios!sequence
|
2014-01-15 23:14:29 +01:00
|
|
|
hostgroup postgres_appdb
|
2013-01-30 16:14:38 +01:00
|
|
|
contact_groups admins
|
|
|
|
}
|
|
|
|
|
|
|
|
define service {
|
|
|
|
use generic-service
|
|
|
|
service_description Check postgres timesync
|
2016-07-31 07:51:00 +02:00
|
|
|
check_command check_postgres!zulip!nagios!timesync
|
2014-01-15 23:14:29 +01:00
|
|
|
hostgroup postgres_appdb
|
2013-01-30 16:14:38 +01:00
|
|
|
contact_groups admins
|
|
|
|
}
|
|
|
|
|
2014-03-17 15:47:26 +01:00
|
|
|
# define service {
|
|
|
|
# use generic-service
|
|
|
|
# service_description Check postgres txn_idle
|
2016-07-31 07:51:00 +02:00
|
|
|
# check_command check_postgres_alert_args!zulip!nagios!txn_idle!20 seconds!40 seconds
|
2014-03-17 15:47:26 +01:00
|
|
|
# hostgroup postgres_appdb
|
|
|
|
# contact_groups admins
|
|
|
|
# }
|
2014-03-12 17:14:13 +01:00
|
|
|
|
|
|
|
define service {
|
|
|
|
use generic-service
|
|
|
|
service_description Check postgres txn_time
|
2016-07-31 07:51:00 +02:00
|
|
|
check_command check_postgres_alert_args!zulip!nagios!txn_time!20 seconds!40 seconds
|
2014-03-12 17:14:13 +01:00
|
|
|
hostgroup postgres_appdb
|
|
|
|
contact_groups admins
|
|
|
|
}
|
2013-01-30 15:59:41 +01:00
|
|
|
|
2014-01-21 20:56:02 +01:00
|
|
|
## non-appdb check_postgres.pl checks
|
|
|
|
|
|
|
|
define service {
|
|
|
|
use generic-service
|
|
|
|
service_description Check postgres autovac_freeze
|
|
|
|
check_command check_postgres!wiki,trac!zulip!autovac_freeze
|
2016-07-31 08:46:05 +02:00
|
|
|
hostgroup postgres_other
|
2014-01-21 20:56:02 +01:00
|
|
|
contact_groups admins
|
|
|
|
}
|
|
|
|
|
|
|
|
define service {
|
|
|
|
use generic-service
|
|
|
|
service_description Check postgres connection
|
|
|
|
check_command check_postgres!wiki,trac!zulip!connection
|
2016-07-31 08:46:05 +02:00
|
|
|
hostgroup postgres_other
|
2014-01-21 20:56:02 +01:00
|
|
|
contact_groups admins
|
|
|
|
}
|
|
|
|
|
|
|
|
define service {
|
|
|
|
use generic-service
|
|
|
|
service_description Check postgres backends
|
|
|
|
check_command check_postgres!wiki,trac!zulip!backends
|
2016-07-31 08:46:05 +02:00
|
|
|
hostgroup postgres_other
|
2014-01-21 20:56:02 +01:00
|
|
|
contact_groups admins
|
|
|
|
}
|
|
|
|
|
2014-03-12 18:02:48 +01:00
|
|
|
define service {
|
|
|
|
use generic-service
|
|
|
|
service_description Check postgres disabled triggers
|
|
|
|
check_command check_postgres!wiki,trac!zulip!disabled_triggers
|
2016-07-31 08:46:05 +02:00
|
|
|
hostgroup postgres_other
|
2014-03-12 18:02:48 +01:00
|
|
|
contact_groups admins
|
|
|
|
}
|
|
|
|
|
2014-01-21 20:56:02 +01:00
|
|
|
define service {
|
|
|
|
use generic-service
|
|
|
|
service_description Check postgres hitratio
|
|
|
|
check_command check_postgres!wiki,trac!zulip!hitratio
|
2016-07-31 08:46:05 +02:00
|
|
|
hostgroup postgres_other
|
2014-01-21 20:56:02 +01:00
|
|
|
contact_groups admins
|
|
|
|
}
|
|
|
|
|
|
|
|
define service {
|
|
|
|
use generic-service
|
|
|
|
service_description Check postgres locks
|
|
|
|
check_command check_postgres!wiki,trac!zulip!locks
|
2016-07-31 08:46:05 +02:00
|
|
|
hostgroup postgres_other
|
2014-01-21 20:56:02 +01:00
|
|
|
contact_groups admins
|
|
|
|
}
|
|
|
|
|
|
|
|
define service {
|
|
|
|
use generic-service
|
|
|
|
service_description Check postgres timesync
|
|
|
|
check_command check_postgres!wiki,trac!zulip!timesync
|
2016-07-31 08:46:05 +02:00
|
|
|
hostgroup postgres_other
|
2014-01-21 20:56:02 +01:00
|
|
|
contact_groups admins
|
|
|
|
}
|
|
|
|
|
2014-03-12 17:14:13 +01:00
|
|
|
define service {
|
|
|
|
use generic-service
|
|
|
|
service_description Check postgres txn_time
|
|
|
|
check_command check_postgres_alert_args!wiki,trac!zulip!txn_time!20 seconds!40 seconds
|
2016-07-31 08:46:05 +02:00
|
|
|
hostgroup postgres_other
|
2014-03-12 17:14:13 +01:00
|
|
|
contact_groups admins
|
|
|
|
}
|
|
|
|
|
2013-08-07 17:56:31 +02:00
|
|
|
define service {
|
|
|
|
use generic-service
|
|
|
|
service_description Check FTS update log length
|
|
|
|
check_command check_fts_update_log
|
2014-01-15 23:14:29 +01:00
|
|
|
hostgroup postgres_appdb
|
2013-08-07 17:56:31 +02:00
|
|
|
contact_groups admins
|
|
|
|
}
|
|
|
|
|
2013-08-26 22:52:19 +02:00
|
|
|
define service {
|
|
|
|
use generic-service
|
|
|
|
service_description Check last Postgres backup time
|
|
|
|
check_command check_postgres_backup
|
|
|
|
hostgroup postgres
|
|
|
|
contact_groups admins
|
|
|
|
}
|
|
|
|
|
2013-02-05 19:17:49 +01:00
|
|
|
define service {
|
|
|
|
use generic-service
|
2014-03-08 00:45:16 +01:00
|
|
|
service_description zmirror subscriptions syncing
|
2013-02-05 19:17:49 +01:00
|
|
|
check_command check_sync_public_streams!22
|
2017-10-06 04:58:35 +02:00
|
|
|
hostgroup zmirror_main
|
2014-03-08 00:45:16 +01:00
|
|
|
normal_check_interval 2
|
|
|
|
retry_check_interval 2
|
|
|
|
max_check_attempts 5
|
|
|
|
contact_groups admins
|
2013-02-05 19:17:49 +01:00
|
|
|
}
|
2013-02-19 18:06:25 +01:00
|
|
|
|
2016-08-01 05:32:30 +02:00
|
|
|
define service {
|
|
|
|
use generic-service
|
|
|
|
service_description Check redis service
|
|
|
|
check_command check_redis_ssh!22
|
|
|
|
max_check_attempts 3
|
|
|
|
hostgroups frontends, redis
|
|
|
|
contact_groups page_admins
|
|
|
|
}
|
|
|
|
|
|
|
|
define service {
|
|
|
|
use generic-service
|
|
|
|
service_description Check memcached service
|
|
|
|
check_command check_memcached_ssh!22
|
|
|
|
max_check_attempts 3
|
|
|
|
hostgroups frontends
|
|
|
|
contact_groups page_admins
|
|
|
|
}
|
|
|
|
|
2013-02-19 18:06:25 +01:00
|
|
|
define service {
|
|
|
|
use generic-service
|
|
|
|
service_description Check rabbitmq queue sizes
|
|
|
|
check_command check_rabbitmq_queues!22
|
2013-09-04 00:03:51 +02:00
|
|
|
# Workaround weird checks 40s after first error causing alerts
|
|
|
|
# from a single failure because cron hasn't run again yet
|
|
|
|
max_check_attempts 3
|
2013-03-19 20:04:28 +01:00
|
|
|
hostgroup_name frontends
|
|
|
|
contact_groups page_admins
|
2013-02-19 18:06:25 +01:00
|
|
|
}
|
2013-02-19 21:29:38 +01:00
|
|
|
|
2013-10-29 21:46:31 +01:00
|
|
|
define service {
|
|
|
|
use generic-service
|
|
|
|
service_description Check for queue worker errors.
|
|
|
|
check_command check_queue_worker_errors!22
|
|
|
|
hostgroup_name frontends
|
|
|
|
contact_groups admins
|
|
|
|
}
|
|
|
|
|
2013-04-16 20:07:53 +02:00
|
|
|
define service {
|
|
|
|
use generic-service
|
|
|
|
service_description Check rabbitmq notify_tornado consumers
|
2013-09-23 20:58:02 +02:00
|
|
|
check_command check_rabbitmq_consumers!notify_tornado
|
2013-09-04 00:03:51 +02:00
|
|
|
# Workaround weird checks 40s after first error causing alerts
|
|
|
|
# from a single failure because cron hasn't run again yet
|
|
|
|
max_check_attempts 3
|
2013-05-17 22:04:17 +02:00
|
|
|
hostgroup_name frontends
|
|
|
|
contact_groups page_admins
|
2013-05-16 19:01:55 +02:00
|
|
|
}
|
|
|
|
|
2016-08-13 04:29:11 +02:00
|
|
|
define service {
|
|
|
|
use generic-service
|
|
|
|
service_description Check rabbitmq tornado_return consumers
|
|
|
|
check_command check_rabbitmq_consumers!tornado_return
|
|
|
|
# Workaround weird checks 40s after first error causing alerts
|
|
|
|
# from a single failure because cron hasn't run again yet
|
|
|
|
max_check_attempts 3
|
|
|
|
hostgroup_name frontends
|
|
|
|
contact_groups page_admins
|
|
|
|
}
|
|
|
|
|
2013-05-16 19:01:55 +02:00
|
|
|
define service {
|
|
|
|
use generic-service
|
2013-09-23 20:58:02 +02:00
|
|
|
service_description Check rabbitmq user_activity consumers
|
|
|
|
check_command check_rabbitmq_consumers!user_activity
|
2013-09-04 00:03:51 +02:00
|
|
|
# Workaround weird checks 40s after first error causing alerts
|
|
|
|
# from a single failure because cron hasn't run again yet
|
|
|
|
max_check_attempts 3
|
2013-05-17 22:04:17 +02:00
|
|
|
hostgroup_name frontends
|
2013-05-16 19:01:55 +02:00
|
|
|
contact_groups admins
|
|
|
|
}
|
|
|
|
|
2013-09-23 21:51:37 +02:00
|
|
|
define service {
|
|
|
|
use generic-service
|
|
|
|
service_description Check rabbitmq user_activity_interval consumers
|
|
|
|
check_command check_rabbitmq_consumers!user_activity_interval
|
|
|
|
# Workaround weird checks 40s after first error causing alerts
|
|
|
|
# from a single failure because cron hasn't run again yet
|
|
|
|
max_check_attempts 3
|
|
|
|
hostgroup_name frontends
|
|
|
|
contact_groups admins
|
|
|
|
}
|
|
|
|
|
|
|
|
define service {
|
|
|
|
use generic-service
|
|
|
|
service_description Check rabbitmq user_presence consumers
|
|
|
|
check_command check_rabbitmq_consumers!user_presence
|
|
|
|
# Workaround weird checks 40s after first error causing alerts
|
|
|
|
# from a single failure because cron hasn't run again yet
|
|
|
|
max_check_attempts 3
|
|
|
|
hostgroup_name frontends
|
|
|
|
contact_groups admins
|
|
|
|
}
|
|
|
|
|
2013-05-16 19:01:55 +02:00
|
|
|
define service {
|
|
|
|
use generic-service
|
|
|
|
service_description Check rabbitmq invites consumers
|
2013-09-23 20:58:02 +02:00
|
|
|
check_command check_rabbitmq_consumers!invites
|
2013-09-04 00:03:51 +02:00
|
|
|
# Workaround weird checks 40s after first error causing alerts
|
|
|
|
# from a single failure because cron hasn't run again yet
|
|
|
|
max_check_attempts 3
|
2013-05-17 22:04:17 +02:00
|
|
|
hostgroup_name frontends
|
2013-05-16 19:01:55 +02:00
|
|
|
contact_groups admins
|
|
|
|
}
|
|
|
|
|
|
|
|
define service {
|
|
|
|
use generic-service
|
|
|
|
service_description Check rabbitmq signups consumers
|
2013-09-23 20:58:02 +02:00
|
|
|
check_command check_rabbitmq_consumers!signups
|
2013-09-04 00:03:51 +02:00
|
|
|
# Workaround weird checks 40s after first error causing alerts
|
|
|
|
# from a single failure because cron hasn't run again yet
|
|
|
|
max_check_attempts 3
|
2013-05-17 22:04:17 +02:00
|
|
|
hostgroup_name frontends
|
2013-05-16 19:01:55 +02:00
|
|
|
contact_groups admins
|
2013-04-16 20:07:53 +02:00
|
|
|
}
|
|
|
|
|
2016-08-13 04:29:11 +02:00
|
|
|
define service {
|
|
|
|
use generic-service
|
|
|
|
service_description Check rabbitmq digest email consumers
|
|
|
|
check_command check_rabbitmq_consumers!digest_emails
|
|
|
|
# Workaround weird checks 40s after first error causing alerts
|
|
|
|
# from a single failure because cron hasn't run again yet
|
|
|
|
max_check_attempts 3
|
|
|
|
hostgroup_name frontends
|
|
|
|
contact_groups admins
|
|
|
|
}
|
|
|
|
|
|
|
|
define service {
|
|
|
|
use generic-service
|
|
|
|
service_description Check rabbitmq email mirror consumers
|
|
|
|
check_command check_rabbitmq_consumers!email_mirror
|
|
|
|
# Workaround weird checks 40s after first error causing alerts
|
|
|
|
# from a single failure because cron hasn't run again yet
|
|
|
|
max_check_attempts 3
|
|
|
|
hostgroup_name frontends
|
|
|
|
contact_groups admins
|
|
|
|
}
|
|
|
|
|
|
|
|
define service {
|
|
|
|
use generic-service
|
|
|
|
service_description Check rabbitmq feedback messages consumers
|
|
|
|
check_command check_rabbitmq_consumers!feedback_messages
|
|
|
|
# Workaround weird checks 40s after first error causing alerts
|
|
|
|
# from a single failure because cron hasn't run again yet
|
|
|
|
max_check_attempts 3
|
|
|
|
hostgroup_name frontends
|
|
|
|
contact_groups admins
|
|
|
|
}
|
|
|
|
|
|
|
|
define service {
|
|
|
|
use generic-service
|
|
|
|
service_description Check rabbitmq message sender consumers
|
|
|
|
check_command check_rabbitmq_consumers!message_sender
|
|
|
|
# Workaround weird checks 40s after first error causing alerts
|
|
|
|
# from a single failure because cron hasn't run again yet
|
|
|
|
max_check_attempts 3
|
|
|
|
hostgroup_name frontends
|
2016-10-29 06:25:29 +02:00
|
|
|
contact_groups page_admins
|
2016-08-13 04:29:11 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
### The missedmessage_emails queue processor batches events, so don't monitor it this way
|
|
|
|
# define service {
|
|
|
|
# use generic-service
|
|
|
|
# service_description Check rabbitmq missedmessage emails consumers
|
|
|
|
# check_command check_rabbitmq_consumers!missedmessage_emails
|
|
|
|
# # Workaround weird checks 40s after first error causing alerts
|
|
|
|
# # from a single failure because cron hasn't run again yet
|
|
|
|
# max_check_attempts 3
|
|
|
|
# hostgroup_name frontends
|
|
|
|
# contact_groups admins
|
|
|
|
# }
|
|
|
|
|
|
|
|
define service {
|
|
|
|
use generic-service
|
|
|
|
service_description Check rabbitmq missedmessage mobile notifications consumers
|
|
|
|
check_command check_rabbitmq_consumers!missedmessage_mobile_notifications
|
|
|
|
# Workaround weird checks 40s after first error causing alerts
|
|
|
|
# from a single failure because cron hasn't run again yet
|
|
|
|
max_check_attempts 3
|
|
|
|
hostgroup_name frontends
|
|
|
|
contact_groups admins
|
|
|
|
}
|
|
|
|
|
|
|
|
### The slow_queries queue consumer batches events, so don't monitor it this way
|
|
|
|
# define service {
|
|
|
|
# use generic-service
|
|
|
|
# service_description Check rabbitmq slow queries consumers
|
|
|
|
# check_command check_rabbitmq_consumers!slow_queries
|
|
|
|
# # Workaround weird checks 40s after first error causing alerts
|
|
|
|
# # from a single failure because cron hasn't run again yet
|
|
|
|
# max_check_attempts 3
|
|
|
|
# hostgroup_name frontends
|
|
|
|
# contact_groups admins
|
|
|
|
# }
|
|
|
|
|
2017-05-25 20:41:29 +02:00
|
|
|
define service {
|
|
|
|
use generic-service
|
|
|
|
service_description Check embedded_bots queue processor
|
|
|
|
check_command check_remote_arg_string!manage.py process_queue --queue_name=embedded_bots!1:1!1:1
|
|
|
|
max_check_attempts 3
|
|
|
|
hostgroup_name frontends
|
|
|
|
contact_groups admins
|
|
|
|
}
|
|
|
|
|
2013-10-04 23:49:00 +02:00
|
|
|
define service {
|
|
|
|
use generic-service
|
|
|
|
service_description Check missedmessage_emails queue processor
|
2016-03-27 23:59:08 +02:00
|
|
|
check_command check_remote_arg_string!manage.py process_queue --queue_name=missedmessage_emails!1:1!1:1
|
2013-12-19 23:02:18 +01:00
|
|
|
max_check_attempts 3
|
2013-10-04 23:49:00 +02:00
|
|
|
hostgroup_name frontends
|
|
|
|
contact_groups admins
|
|
|
|
}
|
|
|
|
|
2017-03-06 08:45:59 +01:00
|
|
|
define service {
|
|
|
|
use generic-service
|
|
|
|
service_description Check missedmessage_email_senders queue processor
|
|
|
|
check_command check_remote_arg_string!manage.py process_queue --queue_name=missedmessage_email_senders!1:1!1:1
|
|
|
|
max_check_attempts 3
|
|
|
|
hostgroup_name frontends
|
|
|
|
contact_groups admins
|
|
|
|
}
|
|
|
|
|
2017-04-20 22:04:08 +02:00
|
|
|
define service {
|
|
|
|
use generic-service
|
|
|
|
service_description Check outgoing webhooks queue processor
|
|
|
|
check_command check_remote_arg_string!manage.py process_queue --queue_name=outgoing_webhooks!1:1!1:1
|
|
|
|
max_check_attempts 3
|
|
|
|
hostgroup_name frontends
|
|
|
|
contact_groups admins
|
|
|
|
}
|
|
|
|
|
2013-10-04 23:49:00 +02:00
|
|
|
define service {
|
|
|
|
use generic-service
|
|
|
|
service_description Check slow_queries queue processor
|
2016-03-27 23:59:08 +02:00
|
|
|
check_command check_remote_arg_string!manage.py process_queue --queue_name=slow_queries!1:1!1:1
|
2013-12-19 23:02:18 +01:00
|
|
|
max_check_attempts 3
|
2013-10-04 23:49:00 +02:00
|
|
|
hostgroup_name frontends
|
|
|
|
contact_groups admins
|
|
|
|
}
|
|
|
|
|
2013-12-03 22:01:49 +01:00
|
|
|
define service {
|
|
|
|
use generic-service
|
|
|
|
service_description Check worker memory usage
|
|
|
|
check_command check_worker_memory
|
|
|
|
hostgroup_name frontends
|
|
|
|
contact_groups admins
|
|
|
|
}
|
|
|
|
|
2013-02-19 21:29:38 +01:00
|
|
|
define service {
|
|
|
|
use generic-service
|
2016-07-31 23:45:39 +02:00
|
|
|
hostgroup_name all
|
2013-02-19 21:29:38 +01:00
|
|
|
service_description swap
|
|
|
|
check_command check_remote_swap!22!80%!50%
|
|
|
|
contact_groups admins
|
|
|
|
}
|
2013-08-26 18:22:09 +02:00
|
|
|
|
2013-11-12 21:31:06 +01:00
|
|
|
define service {
|
|
|
|
use generic-service
|
2017-10-06 04:58:35 +02:00
|
|
|
hostgroup_name staging_frontends
|
2015-08-21 11:56:54 +02:00
|
|
|
service_description Check email deliverer process which is only used on Zulip Voyager
|
2013-11-12 21:31:06 +01:00
|
|
|
check_command check_email_deliverer_process
|
|
|
|
contact_groups admins
|
|
|
|
}
|
|
|
|
|
|
|
|
define service {
|
|
|
|
use generic-service
|
2017-10-06 04:58:35 +02:00
|
|
|
hostgroup_name staging_frontends
|
2015-08-21 11:56:54 +02:00
|
|
|
service_description Check email deliverer backlog which is only used on Zulip Voyager
|
2013-11-12 21:31:06 +01:00
|
|
|
check_command check_email_deliverer_backlog
|
|
|
|
contact_groups admins
|
|
|
|
}
|