define service { use generic-service hostgroup_name web service_description HTTP check_command check_http contact_groups page_admins } define service { use generic-service hostgroup_name all service_description SSH check_command check_ssh contact_groups page_admins } define service { use generic-service hostgroup_name all service_description remote disk check_command check_remote_disk!22!20%!10%!/ contact_groups page_admins } define service { use generic-service hostgroup_name all service_description remote load check_command check_remote_load!22!5.0,4.0,3.0!10.0,6.0,4.0 contact_groups page_admins } define service { use generic-service service_description zephyr mirror forwarding check_command check_zephyr_mirror_forwarding!22 host zmirror normal_check_interval 2 # Note: the actual check is run via cron, so retry_check_interval # should always equal normal_check_interval. retry_check_interval 2 max_check_attempts 2 contact_groups page_admins } define service { use generic-service service_description user zephyr mirror liveness check_command check_user_zephyr_mirror_liveness!22 host app contact_groups admins } define service { use generic-service service_description Debian update availability check_command check_debian_packages!22 hostgroup_name all contact_groups admins } define service { use generic-service service_description Check NTP time check_command check_ntp_time!22 hostgroup_name all contact_groups admins } define service { use generic-service service_description Check feedback bot check_command check_feedback_bot!22 host bots # Feedback isn't lost, it just isn't reported through Humbug # if the bot is down, so don't page. contact_groups admins } define service { use generic-service service_description Check send receive time check_command check_send_receive_time!22 hostgroup_name frontends contact_groups admins } ## check_postgres.pl services define service { use generic-service service_description Check postgres autovac_freeze check_command check_postgres!22!autovac_freeze host postgres contact_groups admins } define service { use generic-service service_description Check postgres backends check_command check_postgres!22!backends host postgres contact_groups admins } define service { use generic-service service_description Check postgres connection check_command check_postgres!22!connection host postgres contact_groups admins } define service { use generic-service service_description Check postgres hitratio check_command check_postgres!22!hitratio host postgres contact_groups admins } define service { use generic-service service_description Check postgres locks check_command check_postgres!22!locks host postgres contact_groups admins } define service { use generic-service service_description Check postgres query_time check_command check_postgres_alert_args!22!query_time!20 seconds!40 seconds host postgres contact_groups admins } define service { use generic-service service_description Check postgres sequence check_command check_postgres!22!sequence host postgres contact_groups admins } define service { use generic-service service_description Check postgres timesync check_command check_postgres!22!timesync host postgres contact_groups admins } # define service { # use generic-service # service_description Check postgres txn_idle # check_command check_postgres_alert_args!22!txn_idle!20 seconds!40 seconds # host postgres # contact_groups admins # } # define service { # use generic-service # service_description Check postgres txn_time # check_command check_postgres_alert_args!22!txn_time!20 seconds!40 seconds # host postgres # contact_groups admins # } define service { use generic-service service_description process_user_activity bot check_command check_process_user_activity!22 hostgroup_name frontends # Activity isn't lost if this isn't running, but it is # critical that it arrive to avoid falsely telling MIT folks # their mirrors are down. # This service is also responsible for active/idle # status. contact_groups page_admins } define service { use generic-service service_description sync signups from humbug to mailchimp check_command check_process_mailchimp_signups!22 hostgroup_name frontends contact_groups admins } define service { use generic-service service_description send confirmation emails check_command check_send_confirmation_emails!22 hostgroup_name frontends contact_groups admins } define service { use generic-service service_description zmirror subscriptons syncing check_command check_sync_public_streams!22 hostgroup_name zmirror contact_groups page_admins } define service { use generic-service service_description Check rabbitmq queue sizes check_command check_rabbitmq_queues!22 hostgroup_name frontends contact_groups page_admins } define service { use generic-service service_description Check rabbitmq notify_tornado consumers check_command check_rabbitmq_consumers!22 hostgroup_name frontends contact_groups test_leo } define service { use generic-service hostgroup_name all service_description swap check_command check_remote_swap!22!80%!50% contact_groups admins }