py3: Switch almost all shebang lines to use `python3`.
This causes `upgrade-zulip-from-git`, as well as a no-option run of
`tools/build-release-tarball`, to produce a Zulip install running
Python 3, rather than Python 2. In particular this means that the
virtualenv we create, in which all application code runs, is Python 3.
One shebang line, on `zulip-ec2-configure-interfaces`, explicitly
keeps Python 2, and at least one external ops script, `wal-e`, also
still runs on Python 2. See discussion on the respective previous
commits that made those explicit. There may also be some other
third-party scripts we use, outside of this source tree and running
outside our virtualenv, that still run on Python 2.
2017-08-02 23:15:16 +02:00
|
|
|
#!/usr/bin/env python3
|
2013-02-19 18:06:25 +01:00
|
|
|
|
|
|
|
import re
|
|
|
|
import time
|
2013-10-28 15:54:32 +01:00
|
|
|
import os
|
2013-11-08 22:45:20 +01:00
|
|
|
import subprocess
|
2013-02-19 02:36:59 +01:00
|
|
|
|
2019-04-14 05:39:50 +02:00
|
|
|
# The WARN_THRESHOLD/CRIT_THRESHOLD settings makes it possible to
|
|
|
|
# configure specific queues to have a higher or lower limit then the
|
|
|
|
# default.
|
2014-08-12 12:41:26 +02:00
|
|
|
WARN_THRESHOLD_DEFAULT = 10
|
|
|
|
WARN_THRESHOLD = {
|
2019-04-14 05:39:50 +02:00
|
|
|
'missedmessage_emails': WARN_THRESHOLD_DEFAULT,
|
2019-09-18 01:52:37 +02:00
|
|
|
# The user_activity worker has high throughput and uses a
|
|
|
|
# LoopQueueProcessingWorker, so it's normal to have a moderate
|
|
|
|
# backlog.
|
2019-10-14 07:39:52 +02:00
|
|
|
'user_activity': 1000,
|
2014-08-12 12:41:26 +02:00
|
|
|
}
|
|
|
|
CRIT_THRESHOLD_DEFAULT = 50
|
|
|
|
CRIT_THRESHOLD = {
|
2019-04-14 05:39:50 +02:00
|
|
|
'missedmessage_emails': CRIT_THRESHOLD_DEFAULT,
|
2019-09-18 01:52:37 +02:00
|
|
|
# A backlog of hundreds of events for user_activity likely
|
|
|
|
# indicates an outage of the processor.
|
2019-10-14 07:39:52 +02:00
|
|
|
'user_activity': 5000,
|
2014-08-12 12:41:26 +02:00
|
|
|
}
|
2013-02-19 18:06:25 +01:00
|
|
|
|
|
|
|
states = {
|
|
|
|
0: "OK",
|
|
|
|
1: "WARNING",
|
|
|
|
2: "CRITICAL",
|
|
|
|
3: "UNKNOWN"
|
|
|
|
}
|
|
|
|
|
2016-07-23 20:33:58 +02:00
|
|
|
pattern = re.compile(r'(\w+)\t(\d+)')
|
2016-07-26 06:40:05 +02:00
|
|
|
output = subprocess.check_output(['/usr/sbin/rabbitmqctl', 'list_queues'], universal_newlines=True)
|
2013-02-19 18:06:25 +01:00
|
|
|
|
|
|
|
status = 0
|
|
|
|
max_count = 0
|
|
|
|
warn_queues = []
|
|
|
|
|
2013-10-28 15:54:32 +01:00
|
|
|
if 'USER' in os.environ and not os.environ['USER'] in ['root', 'rabbitmq']:
|
2016-03-10 17:15:34 +01:00
|
|
|
print("This script must be run as the root or rabbitmq user")
|
2013-03-19 20:04:28 +01:00
|
|
|
|
2013-02-19 18:06:25 +01:00
|
|
|
for line in output.split("\n"):
|
|
|
|
line = line.strip()
|
2016-07-23 20:33:58 +02:00
|
|
|
m = pattern.match(line)
|
2013-02-19 18:06:25 +01:00
|
|
|
if m:
|
|
|
|
queue = m.group(1)
|
|
|
|
count = int(m.group(2))
|
|
|
|
this_status = 0
|
2014-08-12 12:41:26 +02:00
|
|
|
if count > CRIT_THRESHOLD.get(queue, CRIT_THRESHOLD_DEFAULT):
|
2013-02-19 18:06:25 +01:00
|
|
|
this_status = 2
|
|
|
|
warn_queues.append(queue)
|
2014-08-12 12:41:26 +02:00
|
|
|
elif count > WARN_THRESHOLD.get(queue, WARN_THRESHOLD_DEFAULT):
|
2013-02-19 18:06:25 +01:00
|
|
|
this_status = max(status, 1)
|
|
|
|
warn_queues.append(queue)
|
|
|
|
|
|
|
|
status = max(status, this_status)
|
|
|
|
max_count = max(max_count, count)
|
|
|
|
|
2016-11-30 22:49:02 +01:00
|
|
|
warn_about = ", ".join(warn_queues)
|
2013-02-19 18:06:25 +01:00
|
|
|
now = int(time.time())
|
2014-01-24 00:02:26 +01:00
|
|
|
now_struct = time.gmtime(now)
|
|
|
|
|
2019-04-08 06:51:59 +02:00
|
|
|
# While we are sending digest emails, at 1800 hrs (UTC) each weekday, the mail
|
|
|
|
# queues can get backed up; don't alert on those. Additionally, certain workers
|
|
|
|
# (slow_queries and digest_emails) have a polling algorithm that means it's
|
|
|
|
# normal for them to accumulate items.
|
2019-04-14 05:39:50 +02:00
|
|
|
if not set(warn_queues) - set(("digest_emails", "slow_queries")) and \
|
2019-04-08 06:51:59 +02:00
|
|
|
now_struct.tm_hour == 18 and now_struct.tm_min < 25:
|
2014-01-24 00:02:26 +01:00
|
|
|
status = 0
|
|
|
|
print("%s|%s|%s|processing digests, not alerting on elevated mail queues" % (
|
2017-01-24 07:06:13 +01:00
|
|
|
now, status, states[status]))
|
2014-01-24 00:02:26 +01:00
|
|
|
exit(0)
|
2013-02-19 18:06:25 +01:00
|
|
|
|
|
|
|
if status > 0:
|
2017-11-08 03:40:04 +01:00
|
|
|
print("%s|%s|%s|max count %s, queues affected: %s" % (
|
|
|
|
now, status, states[status], max_count, warn_about))
|
2013-02-19 18:06:25 +01:00
|
|
|
else:
|
|
|
|
print("%s|%s|%s|queues normal, max count %s" % (now, status, states[status], max_count))
|