2023-11-17 16:57:59 +01:00
|
|
|
import hashlib
|
2017-04-28 23:28:48 +02:00
|
|
|
import time
|
2017-11-16 00:55:49 +01:00
|
|
|
from argparse import ArgumentParser
|
2020-06-05 06:55:20 +02:00
|
|
|
from datetime import timezone
|
2024-07-12 02:30:17 +02:00
|
|
|
from typing import Any
|
2016-07-29 21:52:45 +02:00
|
|
|
|
2017-11-16 00:55:49 +01:00
|
|
|
from django.conf import settings
|
|
|
|
from django.utils.dateparse import parse_datetime
|
2017-04-15 04:03:56 +02:00
|
|
|
from django.utils.timezone import now as timezone_now
|
2023-10-12 19:43:45 +02:00
|
|
|
from typing_extensions import override
|
2016-07-29 21:52:45 +02:00
|
|
|
|
2023-11-09 19:24:49 +01:00
|
|
|
from analytics.lib.counts import ALL_COUNT_STATS, logger, process_count_stat
|
2024-05-24 16:49:56 +02:00
|
|
|
from zerver.lib.management import ZulipBaseCommand, abort_unless_locked
|
2024-07-16 22:52:01 +02:00
|
|
|
from zerver.lib.remote_server import send_server_data_to_push_bouncer, should_send_analytics_data
|
2017-04-28 01:26:50 +02:00
|
|
|
from zerver.lib.timestamp import floor_to_hour
|
2017-11-02 13:06:44 +01:00
|
|
|
from zerver.models import Realm
|
2016-07-29 21:52:45 +02:00
|
|
|
|
2020-01-14 21:59:46 +01:00
|
|
|
|
2024-05-24 16:49:56 +02:00
|
|
|
class Command(ZulipBaseCommand):
|
2016-07-29 21:52:45 +02:00
|
|
|
help = """Fills Analytics tables.
|
|
|
|
|
|
|
|
Run as a cron job that runs every hour."""
|
|
|
|
|
2023-10-12 19:43:45 +02:00
|
|
|
@override
|
2017-11-05 06:54:00 +01:00
|
|
|
def add_arguments(self, parser: ArgumentParser) -> None:
|
2021-02-12 08:19:30 +01:00
|
|
|
parser.add_argument(
|
2021-02-12 08:20:45 +01:00
|
|
|
"--time",
|
|
|
|
"-t",
|
|
|
|
help="Update stat tables from current state to "
|
|
|
|
"--time. Defaults to the current time.",
|
2021-02-12 08:19:30 +01:00
|
|
|
default=timezone_now().isoformat(),
|
|
|
|
)
|
2021-02-12 08:20:45 +01:00
|
|
|
parser.add_argument("--utc", action="store_true", help="Interpret --time in UTC.")
|
2021-02-12 08:19:30 +01:00
|
|
|
parser.add_argument(
|
2021-02-12 08:20:45 +01:00
|
|
|
"--stat", "-s", help="CountStat to process. If omitted, all stats are processed."
|
2021-02-12 08:19:30 +01:00
|
|
|
)
|
|
|
|
parser.add_argument(
|
2021-02-12 08:20:45 +01:00
|
|
|
"--verbose", action="store_true", help="Print timing information to stdout."
|
2021-02-12 08:19:30 +01:00
|
|
|
)
|
2016-07-29 21:52:45 +02:00
|
|
|
|
2023-10-12 19:43:45 +02:00
|
|
|
@override
|
2024-04-23 20:58:26 +02:00
|
|
|
@abort_unless_locked
|
2017-11-05 06:54:00 +01:00
|
|
|
def handle(self, *args: Any, **options: Any) -> None:
|
2024-04-23 20:58:26 +02:00
|
|
|
self.run_update_analytics_counts(options)
|
2016-10-05 03:38:20 +02:00
|
|
|
|
2024-07-12 02:30:17 +02:00
|
|
|
def run_update_analytics_counts(self, options: dict[str, Any]) -> None:
|
2017-10-05 03:47:50 +02:00
|
|
|
# installation_epoch relies on there being at least one realm; we
|
|
|
|
# shouldn't run the analytics code if that condition isn't satisfied
|
|
|
|
if not Realm.objects.exists():
|
|
|
|
logger.info("No realms, stopping update_analytics_counts")
|
|
|
|
return
|
2017-04-28 01:26:50 +02:00
|
|
|
|
2021-02-12 08:20:45 +01:00
|
|
|
fill_to_time = parse_datetime(options["time"])
|
2021-07-24 18:16:48 +02:00
|
|
|
assert fill_to_time is not None
|
2021-02-12 08:20:45 +01:00
|
|
|
if options["utc"]:
|
2020-06-05 06:55:20 +02:00
|
|
|
fill_to_time = fill_to_time.replace(tzinfo=timezone.utc)
|
2017-02-28 18:39:36 +01:00
|
|
|
if fill_to_time.tzinfo is None:
|
2021-02-12 08:19:30 +01:00
|
|
|
raise ValueError(
|
2022-02-24 21:15:43 +01:00
|
|
|
"--time must be time-zone-aware. Maybe you meant to use the --utc option?"
|
2021-02-12 08:19:30 +01:00
|
|
|
)
|
2016-07-29 21:52:45 +02:00
|
|
|
|
2020-06-05 06:55:20 +02:00
|
|
|
fill_to_time = floor_to_hour(fill_to_time.astimezone(timezone.utc))
|
2017-04-28 01:26:50 +02:00
|
|
|
|
2021-02-12 08:20:45 +01:00
|
|
|
if options["stat"] is not None:
|
2023-11-09 19:24:49 +01:00
|
|
|
stats = [ALL_COUNT_STATS[options["stat"]]]
|
2016-07-29 21:52:45 +02:00
|
|
|
else:
|
2023-11-09 19:24:49 +01:00
|
|
|
stats = list(ALL_COUNT_STATS.values())
|
2017-02-02 01:53:45 +01:00
|
|
|
|
2020-06-14 04:27:02 +02:00
|
|
|
logger.info("Starting updating analytics counts through %s", fill_to_time)
|
2021-02-12 08:20:45 +01:00
|
|
|
if options["verbose"]:
|
2017-04-28 23:28:48 +02:00
|
|
|
start = time.time()
|
|
|
|
last = start
|
|
|
|
|
|
|
|
for stat in stats:
|
|
|
|
process_count_stat(stat, fill_to_time)
|
2021-02-12 08:20:45 +01:00
|
|
|
if options["verbose"]:
|
2020-06-10 06:41:04 +02:00
|
|
|
print(f"Updated {stat.property} in {time.time() - last:.3f}s")
|
2017-04-28 23:28:48 +02:00
|
|
|
last = time.time()
|
|
|
|
|
2021-02-12 08:20:45 +01:00
|
|
|
if options["verbose"]:
|
2021-02-12 08:19:30 +01:00
|
|
|
print(
|
|
|
|
f"Finished updating analytics counts through {fill_to_time} in {time.time() - start:.3f}s"
|
|
|
|
)
|
2020-06-14 04:27:02 +02:00
|
|
|
logger.info("Finished updating analytics counts through %s", fill_to_time)
|
2019-02-12 05:54:17 +01:00
|
|
|
|
2024-07-16 22:52:01 +02:00
|
|
|
if should_send_analytics_data():
|
|
|
|
# Based on the specific value of the setting, the exact details to send
|
|
|
|
# will be decided. However, we proceed just based on this not being falsey.
|
|
|
|
|
2023-11-17 16:57:59 +01:00
|
|
|
# Skew 0-10 minutes based on a hash of settings.ZULIP_ORG_ID, so
|
|
|
|
# that each server will report in at a somewhat consistent time.
|
|
|
|
assert settings.ZULIP_ORG_ID
|
|
|
|
delay = int.from_bytes(
|
|
|
|
hashlib.sha256(settings.ZULIP_ORG_ID.encode()).digest(), byteorder="big"
|
|
|
|
) % (60 * 10)
|
|
|
|
logger.info("Sleeping %d seconds before reporting...", delay)
|
|
|
|
time.sleep(delay)
|
|
|
|
|
2023-12-11 14:24:13 +01:00
|
|
|
send_server_data_to_push_bouncer(consider_usage_statistics=True)
|