2020-01-14 21:59:46 +01:00
|
|
|
import os
|
|
|
|
import time
|
2017-10-05 19:34:33 +02:00
|
|
|
from datetime import timedelta
|
2020-01-14 21:59:46 +01:00
|
|
|
from typing import Any, Dict
|
2017-10-05 19:34:33 +02:00
|
|
|
|
|
|
|
from django.core.management.base import BaseCommand
|
|
|
|
from django.utils.timezone import now as timezone_now
|
|
|
|
|
|
|
|
from analytics.lib.counts import COUNT_STATS, CountStat
|
2020-01-14 21:59:46 +01:00
|
|
|
from analytics.models import installation_epoch, last_successful_fill
|
|
|
|
from zerver.lib.timestamp import TimezoneNotUTCException, floor_to_day, \
|
|
|
|
floor_to_hour, verify_UTC
|
2017-10-05 19:34:33 +02:00
|
|
|
from zerver.models import Realm
|
|
|
|
|
|
|
|
states = {
|
|
|
|
0: "OK",
|
|
|
|
1: "WARNING",
|
|
|
|
2: "CRITICAL",
|
|
|
|
3: "UNKNOWN"
|
|
|
|
}
|
|
|
|
|
|
|
|
class Command(BaseCommand):
|
|
|
|
help = """Checks FillState table.
|
|
|
|
|
|
|
|
Run as a cron job that runs every hour."""
|
|
|
|
|
2018-03-12 02:47:49 +01:00
|
|
|
def handle(self, *args: Any, **options: Any) -> None:
|
2017-10-05 19:34:33 +02:00
|
|
|
fill_state = self.get_fill_state()
|
|
|
|
status = fill_state['status']
|
|
|
|
message = fill_state['message']
|
|
|
|
|
|
|
|
state_file_path = "/var/lib/nagios_state/check-analytics-state"
|
|
|
|
state_file_tmp = state_file_path + "-tmp"
|
|
|
|
|
|
|
|
with open(state_file_tmp, "w") as f:
|
|
|
|
f.write("%s|%s|%s|%s\n" % (
|
|
|
|
int(time.time()), status, states[status], message))
|
2018-07-18 23:50:15 +02:00
|
|
|
os.rename(state_file_tmp, state_file_path)
|
2017-10-05 19:34:33 +02:00
|
|
|
|
2018-03-12 02:47:49 +01:00
|
|
|
def get_fill_state(self) -> Dict[str, Any]:
|
2017-10-05 19:34:33 +02:00
|
|
|
if not Realm.objects.exists():
|
|
|
|
return {'status': 0, 'message': 'No realms exist, so not checking FillState.'}
|
|
|
|
|
|
|
|
warning_unfilled_properties = []
|
|
|
|
critical_unfilled_properties = []
|
|
|
|
for property, stat in COUNT_STATS.items():
|
|
|
|
last_fill = last_successful_fill(property)
|
|
|
|
if last_fill is None:
|
|
|
|
last_fill = installation_epoch()
|
|
|
|
try:
|
|
|
|
verify_UTC(last_fill)
|
|
|
|
except TimezoneNotUTCException:
|
|
|
|
return {'status': 2, 'message': 'FillState not in UTC for %s' % (property,)}
|
|
|
|
|
|
|
|
if stat.frequency == CountStat.DAY:
|
|
|
|
floor_function = floor_to_day
|
|
|
|
warning_threshold = timedelta(hours=26)
|
|
|
|
critical_threshold = timedelta(hours=50)
|
|
|
|
else: # CountStat.HOUR
|
|
|
|
floor_function = floor_to_hour
|
|
|
|
warning_threshold = timedelta(minutes=90)
|
|
|
|
critical_threshold = timedelta(minutes=150)
|
|
|
|
|
|
|
|
if floor_function(last_fill) != last_fill:
|
|
|
|
return {'status': 2, 'message': 'FillState not on %s boundary for %s' %
|
|
|
|
(stat.frequency, property)}
|
|
|
|
|
|
|
|
time_to_last_fill = timezone_now() - last_fill
|
|
|
|
if time_to_last_fill > critical_threshold:
|
|
|
|
critical_unfilled_properties.append(property)
|
|
|
|
elif time_to_last_fill > warning_threshold:
|
|
|
|
warning_unfilled_properties.append(property)
|
|
|
|
|
|
|
|
if len(critical_unfilled_properties) == 0 and len(warning_unfilled_properties) == 0:
|
|
|
|
return {'status': 0, 'message': 'FillState looks fine.'}
|
|
|
|
if len(critical_unfilled_properties) == 0:
|
|
|
|
return {'status': 1, 'message': 'Missed filling %s once.' %
|
|
|
|
(', '.join(warning_unfilled_properties),)}
|
|
|
|
return {'status': 2, 'message': 'Missed filling %s once. Missed filling %s at least twice.' %
|
|
|
|
(', '.join(warning_unfilled_properties), ', '.join(critical_unfilled_properties))}
|