diff --git a/puppet/zulip/files/nagios_plugins/zulip_postgresql/check_fts_update_log b/puppet/zulip/files/nagios_plugins/zulip_postgresql/check_fts_update_log deleted file mode 100755 index fd50c6b455..0000000000 --- a/puppet/zulip/files/nagios_plugins/zulip_postgresql/check_fts_update_log +++ /dev/null @@ -1,40 +0,0 @@ -#!/usr/bin/env python3 - -""" -Nagios plugin to check the length of the FTS update log. -""" -import sys - -sys.path.append("/home/zulip/deployments/current") -try: - from scripts.lib.setup_path import setup_path - - setup_path() -except ImportError: - pass - -import psycopg2 - -states = { - "OK": 0, - "WARNING": 1, - "CRITICAL": 2, - "UNKNOWN": 3, -} - - -def report(state: str, num: str) -> None: - print(f"{state}: {num} rows in fts_update_log table") - exit(states[state]) - - -conn = psycopg2.connect(database="zulip") -cursor = conn.cursor() - -cursor.execute("SELECT count(*) FROM fts_update_log") -num = cursor.fetchall()[0][0] - -if num > 5: - report("CRITICAL", num) - -report("OK", num) diff --git a/puppet/zulip/files/postgresql/process_fts_updates b/puppet/zulip/files/postgresql/process_fts_updates index 6386100020..4954cfaf8a 100755 --- a/puppet/zulip/files/postgresql/process_fts_updates +++ b/puppet/zulip/files/postgresql/process_fts_updates @@ -41,6 +41,7 @@ BATCH_SIZE = 1000 parser = argparse.ArgumentParser() parser.add_argument("--quiet", action="store_true") +parser.add_argument("--nagios-check", action="store_true") options = parser.parse_args() logging.Formatter.converter = time.gmtime @@ -135,6 +136,28 @@ except ImportError: pg_args["host"] = "localhost" pg_args["dbname"] = get_config(config_file, "postgresql", "database_name", "zulip") + +if options.nagios_check: + conn = psycopg2.connect(**pg_args) + cursor = conn.cursor() + cursor.execute("SELECT count(*) FROM fts_update_log") + num = cursor.fetchall()[0][0] + + # nagios exit codes + states = { + "OK": 0, + "WARNING": 1, + "CRITICAL": 2, + "UNKNOWN": 3, + } + + state = "OK" + if num > 5: + state = "CRITICAL" + print(f"{state}: {num} rows in fts_update_log table") + exit(states[state]) + + conn = None retries = 1 diff --git a/puppet/zulip_ops/files/nagios3/commands.cfg b/puppet/zulip_ops/files/nagios3/commands.cfg index 71aa85eb6d..12430e5f7a 100644 --- a/puppet/zulip_ops/files/nagios3/commands.cfg +++ b/puppet/zulip_ops/files/nagios3/commands.cfg @@ -153,7 +153,7 @@ define command { define command { command_name check_fts_update_log - command_line /usr/lib/nagios/plugins/check_by_ssh -l nagios -t 30 -i /var/lib/nagios/.ssh/id_ed25519 -H $HOSTADDRESS$ -C '/usr/lib/nagios/plugins/zulip_postgresql/check_fts_update_log' + command_line /usr/lib/nagios/plugins/check_by_ssh -l nagios -t 30 -i /var/lib/nagios/.ssh/id_ed25519 -H $HOSTADDRESS$ -C '/usr/local/bin/process_fts_updates --nagios-check' } define command { diff --git a/tools/ci/production-verify b/tools/ci/production-verify index bb1197aa88..ea2f12c673 100755 --- a/tools/ci/production-verify +++ b/tools/ci/production-verify @@ -123,7 +123,7 @@ echo echo "Now running additional Nagios tests" echo if ! /usr/lib/nagios/plugins/zulip_app_frontend/check_queue_worker_errors \ - || ! su zulip -c /usr/lib/nagios/plugins/zulip_postgresql/check_fts_update_log; then # || \ + || ! su zulip -c "/usr/local/bin/process_fts_updates --nagios-check"; then # || \ # ! su zulip -c "/usr/lib/nagios/plugins/zulip_app_frontend/check_send_receive_time --site=https://127.0.0.1/api --nagios --insecure"; then set +x echo