2024-05-22 06:22:22 +02:00
|
|
|
from dataclasses import dataclass
|
2017-10-05 19:34:33 +02:00
|
|
|
from datetime import timedelta
|
2024-05-22 06:22:22 +02:00
|
|
|
from typing import Any, Literal
|
2017-10-05 19:34:33 +02:00
|
|
|
|
|
|
|
from django.utils.timezone import now as timezone_now
|
2023-10-12 19:43:45 +02:00
|
|
|
from typing_extensions import override
|
2017-10-05 19:34:33 +02:00
|
|
|
|
2023-11-09 19:24:49 +01:00
|
|
|
from analytics.lib.counts import ALL_COUNT_STATS, CountStat
|
2020-12-22 18:09:34 +01:00
|
|
|
from analytics.models import installation_epoch
|
2024-05-22 06:22:22 +02:00
|
|
|
from scripts.lib.zulip_tools import atomic_nagios_write
|
2024-05-24 16:49:56 +02:00
|
|
|
from zerver.lib.management import ZulipBaseCommand
|
2022-11-17 09:30:48 +01:00
|
|
|
from zerver.lib.timestamp import TimeZoneNotUTCError, floor_to_day, floor_to_hour, verify_UTC
|
2017-10-05 19:34:33 +02:00
|
|
|
from zerver.models import Realm
|
|
|
|
|
|
|
|
states = {
|
|
|
|
0: "OK",
|
|
|
|
1: "WARNING",
|
|
|
|
2: "CRITICAL",
|
python: Use trailing commas consistently.
Automatically generated by the following script, based on the output
of lint with flake8-comma:
import re
import sys
last_filename = None
last_row = None
lines = []
for msg in sys.stdin:
m = re.match(
r"\x1b\[35mflake8 \|\x1b\[0m \x1b\[1;31m(.+):(\d+):(\d+): (\w+)", msg
)
if m:
filename, row_str, col_str, err = m.groups()
row, col = int(row_str), int(col_str)
if filename == last_filename:
assert last_row != row
else:
if last_filename is not None:
with open(last_filename, "w") as f:
f.writelines(lines)
with open(filename) as f:
lines = f.readlines()
last_filename = filename
last_row = row
line = lines[row - 1]
if err in ["C812", "C815"]:
lines[row - 1] = line[: col - 1] + "," + line[col - 1 :]
elif err in ["C819"]:
assert line[col - 2] == ","
lines[row - 1] = line[: col - 2] + line[col - 1 :].lstrip(" ")
if last_filename is not None:
with open(last_filename, "w") as f:
f.writelines(lines)
Signed-off-by: Anders Kaseorg <anders@zulipchat.com>
2020-04-10 05:23:40 +02:00
|
|
|
3: "UNKNOWN",
|
2017-10-05 19:34:33 +02:00
|
|
|
}
|
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2024-05-22 06:22:22 +02:00
|
|
|
@dataclass
|
|
|
|
class NagiosResult:
|
|
|
|
status: Literal["ok", "warning", "critical", "unknown"]
|
|
|
|
message: str
|
|
|
|
|
|
|
|
|
2024-05-24 16:49:56 +02:00
|
|
|
class Command(ZulipBaseCommand):
|
2017-10-05 19:34:33 +02:00
|
|
|
help = """Checks FillState table.
|
|
|
|
|
|
|
|
Run as a cron job that runs every hour."""
|
|
|
|
|
2023-10-12 19:43:45 +02:00
|
|
|
@override
|
2018-03-12 02:47:49 +01:00
|
|
|
def handle(self, *args: Any, **options: Any) -> None:
|
2017-10-05 19:34:33 +02:00
|
|
|
fill_state = self.get_fill_state()
|
2024-05-22 06:22:22 +02:00
|
|
|
atomic_nagios_write("check-analytics-state", fill_state.status, fill_state.message)
|
2017-10-05 19:34:33 +02:00
|
|
|
|
2024-05-22 06:22:22 +02:00
|
|
|
def get_fill_state(self) -> NagiosResult:
|
2017-10-05 19:34:33 +02:00
|
|
|
if not Realm.objects.exists():
|
2024-05-22 06:22:22 +02:00
|
|
|
return NagiosResult(status="ok", message="No realms exist, so not checking FillState.")
|
2017-10-05 19:34:33 +02:00
|
|
|
|
|
|
|
warning_unfilled_properties = []
|
|
|
|
critical_unfilled_properties = []
|
2023-11-09 19:24:49 +01:00
|
|
|
for property, stat in ALL_COUNT_STATS.items():
|
2020-12-22 18:09:34 +01:00
|
|
|
last_fill = stat.last_successful_fill()
|
2017-10-05 19:34:33 +02:00
|
|
|
if last_fill is None:
|
|
|
|
last_fill = installation_epoch()
|
|
|
|
try:
|
|
|
|
verify_UTC(last_fill)
|
2022-11-17 09:30:48 +01:00
|
|
|
except TimeZoneNotUTCError:
|
2024-05-22 06:22:22 +02:00
|
|
|
return NagiosResult(
|
|
|
|
status="critical", message=f"FillState not in UTC for {property}"
|
|
|
|
)
|
2017-10-05 19:34:33 +02:00
|
|
|
|
|
|
|
if stat.frequency == CountStat.DAY:
|
|
|
|
floor_function = floor_to_day
|
|
|
|
warning_threshold = timedelta(hours=26)
|
|
|
|
critical_threshold = timedelta(hours=50)
|
|
|
|
else: # CountStat.HOUR
|
|
|
|
floor_function = floor_to_hour
|
|
|
|
warning_threshold = timedelta(minutes=90)
|
|
|
|
critical_threshold = timedelta(minutes=150)
|
|
|
|
|
|
|
|
if floor_function(last_fill) != last_fill:
|
2024-05-22 06:22:22 +02:00
|
|
|
return NagiosResult(
|
|
|
|
status="critical",
|
|
|
|
message=f"FillState not on {stat.frequency} boundary for {property}",
|
|
|
|
)
|
2017-10-05 19:34:33 +02:00
|
|
|
|
|
|
|
time_to_last_fill = timezone_now() - last_fill
|
|
|
|
if time_to_last_fill > critical_threshold:
|
|
|
|
critical_unfilled_properties.append(property)
|
|
|
|
elif time_to_last_fill > warning_threshold:
|
|
|
|
warning_unfilled_properties.append(property)
|
|
|
|
|
|
|
|
if len(critical_unfilled_properties) == 0 and len(warning_unfilled_properties) == 0:
|
2024-05-22 06:22:22 +02:00
|
|
|
return NagiosResult(status="ok", message="FillState looks fine.")
|
2017-10-05 19:34:33 +02:00
|
|
|
if len(critical_unfilled_properties) == 0:
|
2024-05-22 06:22:22 +02:00
|
|
|
return NagiosResult(
|
|
|
|
status="warning",
|
|
|
|
message="Missed filling {} once.".format(
|
2021-02-12 08:20:45 +01:00
|
|
|
", ".join(warning_unfilled_properties),
|
2020-06-14 02:57:50 +02:00
|
|
|
),
|
2024-05-22 06:22:22 +02:00
|
|
|
)
|
|
|
|
return NagiosResult(
|
|
|
|
status="critical",
|
|
|
|
message="Missed filling {} once. Missed filling {} at least twice.".format(
|
2021-02-12 08:20:45 +01:00
|
|
|
", ".join(warning_unfilled_properties),
|
|
|
|
", ".join(critical_unfilled_properties),
|
2020-06-14 02:57:50 +02:00
|
|
|
),
|
2024-05-22 06:22:22 +02:00
|
|
|
)
|