2020-06-11 00:54:34 +02:00
|
|
|
import logging
|
2017-11-16 00:55:49 +01:00
|
|
|
import time
|
|
|
|
from collections import OrderedDict, defaultdict
|
|
|
|
from datetime import datetime, timedelta
|
2024-02-20 08:32:44 +01:00
|
|
|
from typing import Callable, Dict, List, Optional, Sequence, Tuple, Type, Union
|
2017-11-16 00:55:49 +01:00
|
|
|
|
2017-02-19 01:59:45 +01:00
|
|
|
from django.conf import settings
|
2021-07-16 22:11:10 +02:00
|
|
|
from django.db import connection, models
|
2020-06-11 00:54:34 +02:00
|
|
|
from psycopg2.sql import SQL, Composable, Identifier, Literal
|
2023-10-12 19:43:45 +02:00
|
|
|
from typing_extensions import TypeAlias, override
|
2016-07-29 21:52:45 +02:00
|
|
|
|
2020-06-11 00:54:34 +02:00
|
|
|
from analytics.models import (
|
|
|
|
BaseCount,
|
|
|
|
FillState,
|
|
|
|
InstallationCount,
|
|
|
|
RealmCount,
|
|
|
|
StreamCount,
|
|
|
|
UserCount,
|
|
|
|
installation_epoch,
|
|
|
|
)
|
|
|
|
from zerver.lib.timestamp import ceiling_to_day, ceiling_to_hour, floor_to_hour, verify_UTC
|
2021-07-16 22:11:10 +02:00
|
|
|
from zerver.models import Message, Realm, RealmAuditLog, Stream, UserActivityInterval, UserProfile
|
2016-10-13 22:52:39 +02:00
|
|
|
|
2023-10-22 23:21:56 +02:00
|
|
|
if settings.ZILENCER_ENABLED:
|
2023-11-09 19:24:49 +01:00
|
|
|
from zilencer.models import (
|
|
|
|
RemoteInstallationCount,
|
|
|
|
RemoteRealm,
|
|
|
|
RemoteRealmCount,
|
|
|
|
RemoteZulipServer,
|
|
|
|
)
|
2023-10-22 23:21:56 +02:00
|
|
|
|
2017-04-02 07:34:17 +02:00
|
|
|
|
2023-11-17 19:40:09 +01:00
|
|
|
logger = logging.getLogger("zulip.analytics")
|
|
|
|
|
2016-10-13 22:52:39 +02:00
|
|
|
|
2017-03-16 05:08:36 +01:00
|
|
|
# You can't subtract timedelta.max from a datetime, so use this instead
|
2021-02-12 08:19:30 +01:00
|
|
|
TIMEDELTA_MAX = timedelta(days=365 * 1000)
|
analytics: Simplify frequency and measurement interval options.
Change the CountStat object to take an is_gauge variable instead of a
smallest_interval variable. Previously, (smallest_interval, frequency)
could be any of (hour, hour), (hour, day), (hour, gauge), (day, hour),
(day, day), or (day, gauge).
The current change is equivalent to excluding (hour, day) and (day, hour)
from the list above.
This change, along with other recent changes, allows us to simplify how we
handle time intervals. This commit also removes the TimeInterval object.
2016-10-14 00:15:46 +02:00
|
|
|
|
2017-04-02 07:34:17 +02:00
|
|
|
## Class definitions ##
|
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2017-11-05 11:30:44 +01:00
|
|
|
class CountStat:
|
2021-02-12 08:20:45 +01:00
|
|
|
HOUR = "hour"
|
|
|
|
DAY = "day"
|
analytics: Simplify frequency and measurement interval options.
Change the CountStat object to take an is_gauge variable instead of a
smallest_interval variable. Previously, (smallest_interval, frequency)
could be any of (hour, hour), (hour, day), (hour, gauge), (day, hour),
(day, day), or (day, gauge).
The current change is equivalent to excluding (hour, day) and (day, hour)
from the list above.
This change, along with other recent changes, allows us to simplify how we
handle time intervals. This commit also removes the TimeInterval object.
2016-10-14 00:15:46 +02:00
|
|
|
FREQUENCIES = frozenset([HOUR, DAY])
|
|
|
|
|
2020-12-22 16:45:12 +01:00
|
|
|
@property
|
|
|
|
def time_increment(self) -> timedelta:
|
|
|
|
if self.frequency == CountStat.HOUR:
|
|
|
|
return timedelta(hours=1)
|
|
|
|
return timedelta(days=1)
|
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
def __init__(
|
|
|
|
self,
|
|
|
|
property: str,
|
2021-02-12 08:20:45 +01:00
|
|
|
data_collector: "DataCollector",
|
2021-02-12 08:19:30 +01:00
|
|
|
frequency: str,
|
|
|
|
interval: Optional[timedelta] = None,
|
|
|
|
) -> None:
|
2016-07-29 21:52:45 +02:00
|
|
|
self.property = property
|
2017-04-02 00:49:53 +02:00
|
|
|
self.data_collector = data_collector
|
2016-07-29 21:52:45 +02:00
|
|
|
# might have to do something different for bitfields
|
analytics: Simplify frequency and measurement interval options.
Change the CountStat object to take an is_gauge variable instead of a
smallest_interval variable. Previously, (smallest_interval, frequency)
could be any of (hour, hour), (hour, day), (hour, gauge), (day, hour),
(day, day), or (day, gauge).
The current change is equivalent to excluding (hour, day) and (day, hour)
from the list above.
This change, along with other recent changes, allows us to simplify how we
handle time intervals. This commit also removes the TimeInterval object.
2016-10-14 00:15:46 +02:00
|
|
|
if frequency not in self.FREQUENCIES:
|
2020-06-10 06:41:04 +02:00
|
|
|
raise AssertionError(f"Unknown frequency: {frequency}")
|
2016-07-29 21:52:45 +02:00
|
|
|
self.frequency = frequency
|
2017-03-16 05:08:36 +01:00
|
|
|
if interval is not None:
|
|
|
|
self.interval = interval
|
2020-12-22 16:45:12 +01:00
|
|
|
else:
|
|
|
|
self.interval = self.time_increment
|
2016-07-29 21:52:45 +02:00
|
|
|
|
2023-10-12 19:43:45 +02:00
|
|
|
@override
|
2023-03-08 22:18:59 +01:00
|
|
|
def __repr__(self) -> str:
|
2020-06-10 06:41:04 +02:00
|
|
|
return f"<CountStat: {self.property}>"
|
2016-12-15 02:04:28 +01:00
|
|
|
|
2020-12-22 18:09:34 +01:00
|
|
|
def last_successful_fill(self) -> Optional[datetime]:
|
|
|
|
fillstate = FillState.objects.filter(property=self.property).first()
|
|
|
|
if fillstate is None:
|
|
|
|
return None
|
|
|
|
if fillstate.state == FillState.DONE:
|
|
|
|
return fillstate.end_time
|
2020-12-22 18:33:42 +01:00
|
|
|
return fillstate.end_time - self.time_increment
|
2020-12-22 18:09:34 +01:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2017-02-15 17:26:22 +01:00
|
|
|
class LoggingCountStat(CountStat):
|
2017-11-22 07:55:37 +01:00
|
|
|
def __init__(self, property: str, output_table: Type[BaseCount], frequency: str) -> None:
|
2017-04-02 02:43:17 +02:00
|
|
|
CountStat.__init__(self, property, DataCollector(output_table, None), frequency)
|
2017-02-15 17:26:22 +01:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2017-04-05 07:51:55 +02:00
|
|
|
class DependentCountStat(CountStat):
|
2021-02-12 08:19:30 +01:00
|
|
|
def __init__(
|
|
|
|
self,
|
|
|
|
property: str,
|
2021-02-12 08:20:45 +01:00
|
|
|
data_collector: "DataCollector",
|
2021-02-12 08:19:30 +01:00
|
|
|
frequency: str,
|
|
|
|
interval: Optional[timedelta] = None,
|
|
|
|
dependencies: Sequence[str] = [],
|
|
|
|
) -> None:
|
2017-04-05 07:51:55 +02:00
|
|
|
CountStat.__init__(self, property, data_collector, frequency, interval=interval)
|
|
|
|
self.dependencies = dependencies
|
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2017-11-05 11:30:44 +01:00
|
|
|
class DataCollector:
|
2021-02-12 08:19:30 +01:00
|
|
|
def __init__(
|
|
|
|
self,
|
|
|
|
output_table: Type[BaseCount],
|
|
|
|
pull_function: Optional[Callable[[str, datetime, datetime, Optional[Realm]], int]],
|
|
|
|
) -> None:
|
2017-04-02 02:12:39 +02:00
|
|
|
self.output_table = output_table
|
2017-04-02 02:43:17 +02:00
|
|
|
self.pull_function = pull_function
|
2016-07-29 21:52:45 +02:00
|
|
|
|
2023-11-17 19:03:30 +01:00
|
|
|
def depends_on_realm(self) -> bool:
|
|
|
|
return self.output_table in (UserCount, StreamCount)
|
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2017-04-02 07:34:17 +02:00
|
|
|
## CountStat-level operations ##
|
2017-01-07 09:19:37 +01:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
|
|
|
def process_count_stat(
|
|
|
|
stat: CountStat, fill_to_time: datetime, realm: Optional[Realm] = None
|
|
|
|
) -> None:
|
2020-01-16 02:43:51 +01:00
|
|
|
# TODO: The realm argument is not yet supported, in that we don't
|
|
|
|
# have a solution for how to update FillState if it is passed. It
|
|
|
|
# exists solely as partial plumbing for when we do fully implement
|
|
|
|
# doing single-realm analytics runs for use cases like data import.
|
|
|
|
#
|
|
|
|
# Also, note that for the realm argument to be properly supported,
|
|
|
|
# the CountStat object passed in needs to have come from
|
|
|
|
# E.g. get_count_stats(realm), i.e. have the realm_id already
|
|
|
|
# entered into the SQL query defined by the CountState object.
|
2017-10-05 02:06:43 +02:00
|
|
|
verify_UTC(fill_to_time)
|
2017-10-05 01:51:49 +02:00
|
|
|
if floor_to_hour(fill_to_time) != fill_to_time:
|
2020-06-10 06:41:04 +02:00
|
|
|
raise ValueError(f"fill_to_time must be on an hour boundary: {fill_to_time}")
|
2017-04-28 02:22:40 +02:00
|
|
|
|
2017-01-07 09:19:37 +01:00
|
|
|
fill_state = FillState.objects.filter(property=stat.property).first()
|
2016-10-12 23:40:48 +02:00
|
|
|
if fill_state is None:
|
|
|
|
currently_filled = installation_epoch()
|
2021-02-12 08:19:30 +01:00
|
|
|
fill_state = FillState.objects.create(
|
|
|
|
property=stat.property, end_time=currently_filled, state=FillState.DONE
|
|
|
|
)
|
2020-05-02 08:44:14 +02:00
|
|
|
logger.info("INITIALIZED %s %s", stat.property, currently_filled)
|
2017-01-07 09:19:37 +01:00
|
|
|
elif fill_state.state == FillState.STARTED:
|
2020-05-02 08:44:14 +02:00
|
|
|
logger.info("UNDO START %s %s", stat.property, fill_state.end_time)
|
2017-02-15 04:10:03 +01:00
|
|
|
do_delete_counts_at_hour(stat, fill_state.end_time)
|
2020-12-22 16:45:12 +01:00
|
|
|
currently_filled = fill_state.end_time - stat.time_increment
|
2017-01-07 09:19:37 +01:00
|
|
|
do_update_fill_state(fill_state, currently_filled, FillState.DONE)
|
2020-05-02 08:44:14 +02:00
|
|
|
logger.info("UNDO DONE %s", stat.property)
|
2017-01-07 09:19:37 +01:00
|
|
|
elif fill_state.state == FillState.DONE:
|
|
|
|
currently_filled = fill_state.end_time
|
2016-10-12 23:40:48 +02:00
|
|
|
else:
|
2020-06-10 06:41:04 +02:00
|
|
|
raise AssertionError(f"Unknown value for FillState.state: {fill_state.state}.")
|
2016-10-12 23:40:48 +02:00
|
|
|
|
2017-04-05 07:51:55 +02:00
|
|
|
if isinstance(stat, DependentCountStat):
|
|
|
|
for dependency in stat.dependencies:
|
2020-12-22 18:09:34 +01:00
|
|
|
dependency_fill_time = COUNT_STATS[dependency].last_successful_fill()
|
2017-04-05 07:51:55 +02:00
|
|
|
if dependency_fill_time is None:
|
2021-02-12 08:19:30 +01:00
|
|
|
logger.warning(
|
|
|
|
"DependentCountStat %s run before dependency %s.", stat.property, dependency
|
|
|
|
)
|
2017-04-05 07:51:55 +02:00
|
|
|
return
|
|
|
|
fill_to_time = min(fill_to_time, dependency_fill_time)
|
|
|
|
|
2020-12-22 16:45:12 +01:00
|
|
|
currently_filled = currently_filled + stat.time_increment
|
2016-10-12 23:40:48 +02:00
|
|
|
while currently_filled <= fill_to_time:
|
2020-05-02 08:44:14 +02:00
|
|
|
logger.info("START %s %s", stat.property, currently_filled)
|
2016-10-13 22:52:39 +02:00
|
|
|
start = time.time()
|
2017-01-07 09:19:37 +01:00
|
|
|
do_update_fill_state(fill_state, currently_filled, FillState.STARTED)
|
2020-01-16 02:43:51 +01:00
|
|
|
do_fill_count_stat_at_hour(stat, currently_filled, realm)
|
2017-01-07 09:19:37 +01:00
|
|
|
do_update_fill_state(fill_state, currently_filled, FillState.DONE)
|
2016-10-13 22:52:39 +02:00
|
|
|
end = time.time()
|
2020-12-22 16:45:12 +01:00
|
|
|
currently_filled = currently_filled + stat.time_increment
|
2021-02-12 08:19:30 +01:00
|
|
|
logger.info("DONE %s (%dms)", stat.property, (end - start) * 1000)
|
|
|
|
|
2016-10-12 23:40:48 +02:00
|
|
|
|
2017-11-22 07:55:37 +01:00
|
|
|
def do_update_fill_state(fill_state: FillState, end_time: datetime, state: int) -> None:
|
2017-04-02 07:34:17 +02:00
|
|
|
fill_state.end_time = end_time
|
|
|
|
fill_state.state = state
|
|
|
|
fill_state.save()
|
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2017-04-15 09:23:39 +02:00
|
|
|
# We assume end_time is valid (e.g. is on a day or hour boundary as appropriate)
|
2022-02-24 21:15:43 +01:00
|
|
|
# and is time-zone-aware. It is the caller's responsibility to enforce this!
|
2021-02-12 08:19:30 +01:00
|
|
|
def do_fill_count_stat_at_hour(
|
|
|
|
stat: CountStat, end_time: datetime, realm: Optional[Realm] = None
|
|
|
|
) -> None:
|
2017-03-16 05:08:36 +01:00
|
|
|
start_time = end_time - stat.interval
|
2017-04-04 20:40:22 +02:00
|
|
|
if not isinstance(stat, LoggingCountStat):
|
2017-04-06 03:07:06 +02:00
|
|
|
timer = time.time()
|
2021-02-12 08:19:30 +01:00
|
|
|
assert stat.data_collector.pull_function is not None
|
2020-01-16 02:43:51 +01:00
|
|
|
rows_added = stat.data_collector.pull_function(stat.property, start_time, end_time, realm)
|
2021-02-12 08:19:30 +01:00
|
|
|
logger.info(
|
|
|
|
"%s run pull_function (%dms/%sr)",
|
|
|
|
stat.property,
|
|
|
|
(time.time() - timer) * 1000,
|
|
|
|
rows_added,
|
|
|
|
)
|
2020-01-16 02:43:51 +01:00
|
|
|
do_aggregate_to_summary_table(stat, end_time, realm)
|
2016-07-29 21:52:45 +02:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2017-11-22 07:55:37 +01:00
|
|
|
def do_delete_counts_at_hour(stat: CountStat, end_time: datetime) -> None:
|
2017-04-04 20:40:22 +02:00
|
|
|
if isinstance(stat, LoggingCountStat):
|
2017-02-15 17:26:22 +01:00
|
|
|
InstallationCount.objects.filter(property=stat.property, end_time=end_time).delete()
|
2023-11-17 19:03:30 +01:00
|
|
|
if stat.data_collector.depends_on_realm():
|
2017-02-15 17:26:22 +01:00
|
|
|
RealmCount.objects.filter(property=stat.property, end_time=end_time).delete()
|
|
|
|
else:
|
|
|
|
UserCount.objects.filter(property=stat.property, end_time=end_time).delete()
|
|
|
|
StreamCount.objects.filter(property=stat.property, end_time=end_time).delete()
|
|
|
|
RealmCount.objects.filter(property=stat.property, end_time=end_time).delete()
|
|
|
|
InstallationCount.objects.filter(property=stat.property, end_time=end_time).delete()
|
2016-10-12 23:40:48 +02:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
|
|
|
def do_aggregate_to_summary_table(
|
|
|
|
stat: CountStat, end_time: datetime, realm: Optional[Realm] = None
|
|
|
|
) -> None:
|
2016-10-11 02:23:42 +02:00
|
|
|
cursor = connection.cursor()
|
2016-07-29 21:52:45 +02:00
|
|
|
|
2016-10-11 02:23:42 +02:00
|
|
|
# Aggregate into RealmCount
|
2017-04-02 02:12:39 +02:00
|
|
|
output_table = stat.data_collector.output_table
|
2020-01-16 02:43:51 +01:00
|
|
|
if realm is not None:
|
2022-07-30 07:12:58 +02:00
|
|
|
realm_clause: Composable = SQL("AND zerver_realm.id = {}").format(Literal(realm.id))
|
2020-01-16 02:43:51 +01:00
|
|
|
else:
|
2020-06-09 10:46:28 +02:00
|
|
|
realm_clause = SQL("")
|
2020-01-16 02:43:51 +01:00
|
|
|
|
2023-11-17 19:03:30 +01:00
|
|
|
if stat.data_collector.depends_on_realm():
|
2021-02-12 08:19:30 +01:00
|
|
|
realmcount_query = SQL(
|
|
|
|
"""
|
2016-10-11 02:23:42 +02:00
|
|
|
INSERT INTO analytics_realmcount
|
2017-01-16 22:05:51 +01:00
|
|
|
(realm_id, value, property, subgroup, end_time)
|
2016-10-11 02:23:42 +02:00
|
|
|
SELECT
|
2020-06-09 10:46:28 +02:00
|
|
|
zerver_realm.id, COALESCE(sum({output_table}.value), 0), %(property)s,
|
|
|
|
{output_table}.subgroup, %(end_time)s
|
2016-10-11 02:23:42 +02:00
|
|
|
FROM zerver_realm
|
2020-06-09 10:46:28 +02:00
|
|
|
JOIN {output_table}
|
2016-10-11 02:23:42 +02:00
|
|
|
ON
|
2020-06-09 10:46:28 +02:00
|
|
|
zerver_realm.id = {output_table}.realm_id
|
2017-02-18 00:15:38 +01:00
|
|
|
WHERE
|
2020-06-09 10:46:28 +02:00
|
|
|
{output_table}.property = %(property)s AND
|
|
|
|
{output_table}.end_time = %(end_time)s
|
|
|
|
{realm_clause}
|
|
|
|
GROUP BY zerver_realm.id, {output_table}.subgroup
|
2021-02-12 08:19:30 +01:00
|
|
|
"""
|
|
|
|
).format(
|
2020-06-09 10:46:28 +02:00
|
|
|
output_table=Identifier(output_table._meta.db_table),
|
|
|
|
realm_clause=realm_clause,
|
|
|
|
)
|
2016-10-13 22:52:39 +02:00
|
|
|
start = time.time()
|
2021-02-12 08:19:30 +01:00
|
|
|
cursor.execute(
|
|
|
|
realmcount_query,
|
|
|
|
{
|
2021-02-12 08:20:45 +01:00
|
|
|
"property": stat.property,
|
|
|
|
"end_time": end_time,
|
2021-02-12 08:19:30 +01:00
|
|
|
},
|
|
|
|
)
|
2016-10-13 22:52:39 +02:00
|
|
|
end = time.time()
|
2020-05-02 08:44:14 +02:00
|
|
|
logger.info(
|
|
|
|
"%s RealmCount aggregation (%dms/%sr)",
|
2021-02-12 08:19:30 +01:00
|
|
|
stat.property,
|
|
|
|
(end - start) * 1000,
|
|
|
|
cursor.rowcount,
|
2020-05-02 08:44:14 +02:00
|
|
|
)
|
2016-07-29 21:52:45 +02:00
|
|
|
|
2020-01-16 02:43:51 +01:00
|
|
|
if realm is None:
|
|
|
|
# Aggregate into InstallationCount. Only run if we just
|
|
|
|
# processed counts for all realms.
|
|
|
|
#
|
|
|
|
# TODO: Add support for updating installation data after
|
|
|
|
# changing an individual realm's values.
|
2021-02-12 08:19:30 +01:00
|
|
|
installationcount_query = SQL(
|
|
|
|
"""
|
2020-01-16 02:43:51 +01:00
|
|
|
INSERT INTO analytics_installationcount
|
|
|
|
(value, property, subgroup, end_time)
|
|
|
|
SELECT
|
2020-06-09 10:46:28 +02:00
|
|
|
sum(value), %(property)s, analytics_realmcount.subgroup, %(end_time)s
|
2020-01-16 02:43:51 +01:00
|
|
|
FROM analytics_realmcount
|
|
|
|
WHERE
|
2020-06-09 10:46:28 +02:00
|
|
|
property = %(property)s AND
|
|
|
|
end_time = %(end_time)s
|
2020-01-16 02:43:51 +01:00
|
|
|
GROUP BY analytics_realmcount.subgroup
|
2021-02-12 08:19:30 +01:00
|
|
|
"""
|
|
|
|
)
|
2020-01-16 02:43:51 +01:00
|
|
|
start = time.time()
|
2021-02-12 08:19:30 +01:00
|
|
|
cursor.execute(
|
|
|
|
installationcount_query,
|
|
|
|
{
|
2021-02-12 08:20:45 +01:00
|
|
|
"property": stat.property,
|
|
|
|
"end_time": end_time,
|
2021-02-12 08:19:30 +01:00
|
|
|
},
|
|
|
|
)
|
2020-01-16 02:43:51 +01:00
|
|
|
end = time.time()
|
2020-05-02 08:44:14 +02:00
|
|
|
logger.info(
|
|
|
|
"%s InstallationCount aggregation (%dms/%sr)",
|
2021-02-12 08:19:30 +01:00
|
|
|
stat.property,
|
|
|
|
(end - start) * 1000,
|
|
|
|
cursor.rowcount,
|
2020-05-02 08:44:14 +02:00
|
|
|
)
|
2020-01-16 02:43:51 +01:00
|
|
|
|
2016-07-29 21:52:45 +02:00
|
|
|
cursor.close()
|
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2017-04-02 07:34:17 +02:00
|
|
|
## Utility functions called from outside counts.py ##
|
|
|
|
|
2023-02-02 04:35:24 +01:00
|
|
|
|
2022-04-14 00:48:36 +02:00
|
|
|
# called from zerver.actions; should not throw any errors
|
2021-02-12 08:19:30 +01:00
|
|
|
def do_increment_logging_stat(
|
2023-11-09 19:24:49 +01:00
|
|
|
model_object_for_bucket: Union[Realm, UserProfile, Stream, "RemoteRealm", "RemoteZulipServer"],
|
2021-02-12 08:19:30 +01:00
|
|
|
stat: CountStat,
|
|
|
|
subgroup: Optional[Union[str, int, bool]],
|
|
|
|
event_time: datetime,
|
|
|
|
increment: int = 1,
|
|
|
|
) -> None:
|
2020-06-08 06:01:49 +02:00
|
|
|
if not increment:
|
|
|
|
return
|
|
|
|
|
2017-04-02 07:34:17 +02:00
|
|
|
table = stat.data_collector.output_table
|
2024-02-20 08:32:44 +01:00
|
|
|
id_args: Dict[str, Union[int, None]] = {}
|
|
|
|
conflict_args: List[str] = []
|
2017-04-02 07:34:17 +02:00
|
|
|
if table == RealmCount:
|
2023-10-23 21:55:28 +02:00
|
|
|
assert isinstance(model_object_for_bucket, Realm)
|
2024-02-20 08:32:44 +01:00
|
|
|
id_args = {"realm_id": model_object_for_bucket.id}
|
|
|
|
conflict_args = ["realm_id"]
|
2017-04-02 07:34:17 +02:00
|
|
|
elif table == UserCount:
|
2023-10-23 21:55:28 +02:00
|
|
|
assert isinstance(model_object_for_bucket, UserProfile)
|
2024-02-20 08:32:44 +01:00
|
|
|
id_args = {
|
|
|
|
"realm_id": model_object_for_bucket.realm_id,
|
|
|
|
"user_id": model_object_for_bucket.id,
|
|
|
|
}
|
|
|
|
conflict_args = ["user_id"]
|
2023-10-22 23:21:56 +02:00
|
|
|
elif table == StreamCount:
|
2023-10-23 21:55:28 +02:00
|
|
|
assert isinstance(model_object_for_bucket, Stream)
|
2024-02-20 08:32:44 +01:00
|
|
|
id_args = {
|
|
|
|
"realm_id": model_object_for_bucket.realm_id,
|
|
|
|
"stream_id": model_object_for_bucket.id,
|
|
|
|
}
|
|
|
|
conflict_args = ["stream_id"]
|
2023-10-22 23:21:56 +02:00
|
|
|
elif table == RemoteInstallationCount:
|
|
|
|
assert isinstance(model_object_for_bucket, RemoteZulipServer)
|
2024-02-20 08:32:44 +01:00
|
|
|
id_args = {"server_id": model_object_for_bucket.id, "remote_id": None}
|
|
|
|
conflict_args = ["server_id"]
|
2023-11-09 19:24:49 +01:00
|
|
|
elif table == RemoteRealmCount:
|
|
|
|
assert isinstance(model_object_for_bucket, RemoteRealm)
|
2024-02-20 08:32:44 +01:00
|
|
|
# For RemoteRealmCount (e.g. `mobile_pushes_forwarded::day`),
|
|
|
|
# we have no `remote_id` nor `realm_id`, since they are not
|
|
|
|
# imported from the remote server, which is the source of
|
|
|
|
# truth of those two columns. Their "ON CONFLICT" is thus the
|
|
|
|
# only unique key we have, which is `remote_realm_id`, and not
|
|
|
|
# `server_id` / `realm_id`.
|
2023-11-09 19:24:49 +01:00
|
|
|
id_args = {
|
2024-02-20 08:32:44 +01:00
|
|
|
"server_id": model_object_for_bucket.server_id,
|
|
|
|
"remote_realm_id": model_object_for_bucket.id,
|
2023-11-09 19:24:49 +01:00
|
|
|
"remote_id": None,
|
2024-02-20 08:32:44 +01:00
|
|
|
"realm_id": None,
|
2023-11-09 19:24:49 +01:00
|
|
|
}
|
2024-02-20 08:32:44 +01:00
|
|
|
conflict_args = [
|
|
|
|
"remote_realm_id",
|
|
|
|
]
|
2023-10-22 23:21:56 +02:00
|
|
|
else:
|
|
|
|
raise AssertionError("Unsupported CountStat output_table")
|
2017-04-02 07:34:17 +02:00
|
|
|
|
|
|
|
if stat.frequency == CountStat.DAY:
|
|
|
|
end_time = ceiling_to_day(event_time)
|
2023-10-21 00:15:51 +02:00
|
|
|
elif stat.frequency == CountStat.HOUR:
|
2017-04-02 07:34:17 +02:00
|
|
|
end_time = ceiling_to_hour(event_time)
|
2023-10-21 00:15:51 +02:00
|
|
|
else:
|
|
|
|
raise AssertionError("Unsupported CountStat frequency")
|
2017-04-02 07:34:17 +02:00
|
|
|
|
2024-02-20 08:32:44 +01:00
|
|
|
is_subgroup: SQL = SQL("NULL")
|
|
|
|
if subgroup is not None:
|
|
|
|
is_subgroup = SQL("NOT NULL")
|
|
|
|
# For backwards consistency, we cast the subgroup to a string
|
|
|
|
# in Python; this emulates the behaviour of `get_or_create`,
|
|
|
|
# which was previously used in this function, and performed
|
|
|
|
# this cast because the `subgroup` column is defined as a
|
|
|
|
# `CharField`. Omitting this explicit cast causes a subgroup
|
|
|
|
# of the boolean False to be passed as the PostgreSQL false,
|
|
|
|
# which it stringifies as the lower-case `'false'`, not the
|
|
|
|
# initial-case `'False'` if Python stringifies it.
|
|
|
|
#
|
|
|
|
# Other parts of the system (e.g. count_message_by_user_query)
|
|
|
|
# already use PostgreSQL to cast bools to strings, resulting
|
|
|
|
# in `subgroup` values of lower-case `'false'` -- for example
|
|
|
|
# in `messages_sent:is_bot:hour`. Fixing this inconsistency
|
|
|
|
# via a migration is complicated by these records being
|
|
|
|
# exchanged over the wire from remote servers.
|
|
|
|
subgroup = str(subgroup)
|
|
|
|
conflict_args.append("subgroup")
|
|
|
|
|
|
|
|
id_column_names = SQL(", ").join(map(Identifier, id_args.keys()))
|
|
|
|
id_values = SQL(", ").join(map(Literal, id_args.values()))
|
|
|
|
conflict_columns = SQL(", ").join(map(Identifier, conflict_args))
|
|
|
|
|
|
|
|
sql_query = SQL(
|
|
|
|
"""
|
|
|
|
INSERT INTO {table_name}(property, subgroup, end_time, value, {id_column_names})
|
|
|
|
VALUES (%s, %s, %s, %s, {id_values})
|
|
|
|
ON CONFLICT (property, end_time, {conflict_columns})
|
|
|
|
WHERE subgroup IS {is_subgroup}
|
|
|
|
DO UPDATE SET
|
|
|
|
value = {table_name}.value + EXCLUDED.value
|
|
|
|
"""
|
|
|
|
).format(
|
|
|
|
table_name=Identifier(table._meta.db_table),
|
|
|
|
id_column_names=id_column_names,
|
|
|
|
id_values=id_values,
|
|
|
|
conflict_columns=conflict_columns,
|
|
|
|
is_subgroup=is_subgroup,
|
2021-02-12 08:19:30 +01:00
|
|
|
)
|
2024-02-20 08:32:44 +01:00
|
|
|
with connection.cursor() as cursor:
|
|
|
|
cursor.execute(sql_query, [stat.property, subgroup, end_time, increment])
|
2017-04-02 07:34:17 +02:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2017-11-22 07:55:37 +01:00
|
|
|
def do_drop_all_analytics_tables() -> None:
|
2017-04-02 07:34:17 +02:00
|
|
|
UserCount.objects.all().delete()
|
|
|
|
StreamCount.objects.all().delete()
|
|
|
|
RealmCount.objects.all().delete()
|
|
|
|
InstallationCount.objects.all().delete()
|
|
|
|
FillState.objects.all().delete()
|
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2017-11-22 07:55:37 +01:00
|
|
|
def do_drop_single_stat(property: str) -> None:
|
2017-10-05 00:55:43 +02:00
|
|
|
UserCount.objects.filter(property=property).delete()
|
|
|
|
StreamCount.objects.filter(property=property).delete()
|
|
|
|
RealmCount.objects.filter(property=property).delete()
|
|
|
|
InstallationCount.objects.filter(property=property).delete()
|
|
|
|
FillState.objects.filter(property=property).delete()
|
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2017-04-02 07:34:17 +02:00
|
|
|
## DataCollector-level operations ##
|
|
|
|
|
2023-08-02 23:53:10 +02:00
|
|
|
QueryFn: TypeAlias = Callable[[Dict[str, Composable]], Composable]
|
2020-06-09 10:46:28 +02:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2020-06-09 10:46:28 +02:00
|
|
|
def do_pull_by_sql_query(
|
|
|
|
property: str,
|
|
|
|
start_time: datetime,
|
|
|
|
end_time: datetime,
|
|
|
|
query: QueryFn,
|
2021-07-25 06:33:39 +02:00
|
|
|
group_by: Optional[Tuple[Type[models.Model], str]],
|
2020-06-09 10:46:28 +02:00
|
|
|
) -> int:
|
2017-04-01 10:16:02 +02:00
|
|
|
if group_by is None:
|
2021-08-10 00:58:58 +02:00
|
|
|
subgroup: Composable = SQL("NULL")
|
2022-07-30 07:12:58 +02:00
|
|
|
group_by_clause: Composable = SQL("")
|
2016-10-26 00:41:57 +02:00
|
|
|
else:
|
2020-06-09 10:46:28 +02:00
|
|
|
subgroup = Identifier(group_by[0]._meta.db_table, group_by[1])
|
2021-02-12 08:20:45 +01:00
|
|
|
group_by_clause = SQL(", {}").format(subgroup)
|
2016-10-26 00:41:57 +02:00
|
|
|
|
2017-04-02 07:49:50 +02:00
|
|
|
# We do string replacement here because cursor.execute will reject a
|
|
|
|
# group_by_clause given as a param.
|
2017-04-05 07:51:55 +02:00
|
|
|
# We pass in the datetimes as params to cursor.execute so that we don't have to
|
|
|
|
# think about how to convert python datetimes to SQL datetimes.
|
2021-02-12 08:19:30 +01:00
|
|
|
query_ = query(
|
|
|
|
{
|
2021-02-12 08:20:45 +01:00
|
|
|
"subgroup": subgroup,
|
|
|
|
"group_by_clause": group_by_clause,
|
2021-02-12 08:19:30 +01:00
|
|
|
}
|
|
|
|
)
|
2016-07-29 21:52:45 +02:00
|
|
|
cursor = connection.cursor()
|
2021-02-12 08:19:30 +01:00
|
|
|
cursor.execute(
|
|
|
|
query_,
|
|
|
|
{
|
2021-02-12 08:20:45 +01:00
|
|
|
"property": property,
|
|
|
|
"time_start": start_time,
|
|
|
|
"time_end": end_time,
|
2021-02-12 08:19:30 +01:00
|
|
|
},
|
|
|
|
)
|
2017-04-06 03:07:06 +02:00
|
|
|
rowcount = cursor.rowcount
|
2016-07-29 21:52:45 +02:00
|
|
|
cursor.close()
|
2017-04-06 03:07:06 +02:00
|
|
|
return rowcount
|
2016-07-29 21:52:45 +02:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2020-06-09 10:46:28 +02:00
|
|
|
def sql_data_collector(
|
|
|
|
output_table: Type[BaseCount],
|
|
|
|
query: QueryFn,
|
2021-07-25 06:33:39 +02:00
|
|
|
group_by: Optional[Tuple[Type[models.Model], str]],
|
2020-06-09 10:46:28 +02:00
|
|
|
) -> DataCollector:
|
2021-02-12 08:19:30 +01:00
|
|
|
def pull_function(
|
|
|
|
property: str, start_time: datetime, end_time: datetime, realm: Optional[Realm] = None
|
|
|
|
) -> int:
|
2020-01-28 21:51:40 +01:00
|
|
|
# The pull function type needs to accept a Realm argument
|
|
|
|
# because the 'minutes_active::day' CountStat uses
|
|
|
|
# DataCollector directly for do_pull_minutes_active, which
|
|
|
|
# requires the realm argument. We ignore it here, because the
|
|
|
|
# realm should have been already encoded in the `query` we're
|
|
|
|
# passed.
|
2017-04-06 03:30:36 +02:00
|
|
|
return do_pull_by_sql_query(property, start_time, end_time, query, group_by)
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2017-04-02 02:43:17 +02:00
|
|
|
return DataCollector(output_table, pull_function)
|
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2024-04-19 03:06:53 +02:00
|
|
|
def count_upload_space_used_by_realm_query(realm: Optional[Realm]) -> QueryFn:
|
|
|
|
if realm is None:
|
|
|
|
realm_clause: Composable = SQL("")
|
|
|
|
else:
|
|
|
|
realm_clause = SQL("zerver_attachment.realm_id = {} AND").format(Literal(realm.id))
|
|
|
|
|
|
|
|
# Note: This query currently has to go through the entire table,
|
|
|
|
# summing all the sizes of attachments for every realm. This can be improved
|
|
|
|
# by having a query which looks at the latest CountStat for each realm,
|
|
|
|
# and sums it with only the new attachments.
|
|
|
|
# There'd be additional complexity added by the fact that attachments can
|
|
|
|
# also be deleted. Partially this can be accounted for by subtracting
|
|
|
|
# ArchivedAttachment sizes, but there's still the issue of attachments
|
|
|
|
# which can be directly deleted via the API.
|
|
|
|
|
|
|
|
return lambda kwargs: SQL(
|
|
|
|
"""
|
|
|
|
INSERT INTO analytics_realmcount (realm_id, property, end_time, value)
|
|
|
|
SELECT
|
|
|
|
zerver_attachment.realm_id,
|
|
|
|
%(property)s,
|
|
|
|
%(time_end)s,
|
|
|
|
COALESCE(SUM(zerver_attachment.size), 0)
|
|
|
|
FROM
|
|
|
|
zerver_attachment
|
|
|
|
WHERE
|
|
|
|
{realm_clause}
|
|
|
|
zerver_attachment.create_time < %(time_end)s
|
|
|
|
GROUP BY
|
|
|
|
zerver_attachment.realm_id
|
|
|
|
"""
|
|
|
|
).format(**kwargs, realm_clause=realm_clause)
|
|
|
|
|
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
def do_pull_minutes_active(
|
|
|
|
property: str, start_time: datetime, end_time: datetime, realm: Optional[Realm] = None
|
|
|
|
) -> int:
|
|
|
|
user_activity_intervals = (
|
|
|
|
UserActivityInterval.objects.filter(
|
|
|
|
end__gt=start_time,
|
|
|
|
start__lt=end_time,
|
|
|
|
)
|
|
|
|
.select_related(
|
2021-02-12 08:20:45 +01:00
|
|
|
"user_profile",
|
2021-02-12 08:19:30 +01:00
|
|
|
)
|
2021-02-12 08:20:45 +01:00
|
|
|
.values_list("user_profile_id", "user_profile__realm_id", "start", "end")
|
2021-02-12 08:19:30 +01:00
|
|
|
)
|
2017-02-15 17:26:22 +01:00
|
|
|
|
python: Convert assignment type annotations to Python 3.6 style.
This commit was split by tabbott; this piece covers the vast majority
of files in Zulip, but excludes scripts/, tools/, and puppet/ to help
ensure we at least show the right error messages for Xenial systems.
We can likely further refine the remaining pieces with some testing.
Generated by com2ann, with whitespace fixes and various manual fixes
for runtime issues:
- invoiced_through: Optional[LicenseLedger] = models.ForeignKey(
+ invoiced_through: Optional["LicenseLedger"] = models.ForeignKey(
-_apns_client: Optional[APNsClient] = None
+_apns_client: Optional["APNsClient"] = None
- notifications_stream: Optional[Stream] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
- signup_notifications_stream: Optional[Stream] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
+ notifications_stream: Optional["Stream"] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
+ signup_notifications_stream: Optional["Stream"] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
- author: Optional[UserProfile] = models.ForeignKey('UserProfile', blank=True, null=True, on_delete=CASCADE)
+ author: Optional["UserProfile"] = models.ForeignKey('UserProfile', blank=True, null=True, on_delete=CASCADE)
- bot_owner: Optional[UserProfile] = models.ForeignKey('self', null=True, on_delete=models.SET_NULL)
+ bot_owner: Optional["UserProfile"] = models.ForeignKey('self', null=True, on_delete=models.SET_NULL)
- default_sending_stream: Optional[Stream] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
- default_events_register_stream: Optional[Stream] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
+ default_sending_stream: Optional["Stream"] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
+ default_events_register_stream: Optional["Stream"] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
-descriptors_by_handler_id: Dict[int, ClientDescriptor] = {}
+descriptors_by_handler_id: Dict[int, "ClientDescriptor"] = {}
-worker_classes: Dict[str, Type[QueueProcessingWorker]] = {}
-queues: Dict[str, Dict[str, Type[QueueProcessingWorker]]] = {}
+worker_classes: Dict[str, Type["QueueProcessingWorker"]] = {}
+queues: Dict[str, Dict[str, Type["QueueProcessingWorker"]]] = {}
-AUTH_LDAP_REVERSE_EMAIL_SEARCH: Optional[LDAPSearch] = None
+AUTH_LDAP_REVERSE_EMAIL_SEARCH: Optional["LDAPSearch"] = None
Signed-off-by: Anders Kaseorg <anders@zulipchat.com>
2020-04-22 01:09:50 +02:00
|
|
|
seconds_active: Dict[Tuple[int, int], float] = defaultdict(float)
|
2017-04-02 07:34:17 +02:00
|
|
|
for user_id, realm_id, interval_start, interval_end in user_activity_intervals:
|
2020-01-16 02:43:51 +01:00
|
|
|
if realm is None or realm.id == realm_id:
|
|
|
|
start = max(start_time, interval_start)
|
|
|
|
end = min(end_time, interval_end)
|
|
|
|
seconds_active[(user_id, realm_id)] += (end - start).total_seconds()
|
2017-02-15 17:26:22 +01:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
rows = [
|
|
|
|
UserCount(
|
|
|
|
user_id=ids[0],
|
|
|
|
realm_id=ids[1],
|
|
|
|
property=property,
|
|
|
|
end_time=end_time,
|
|
|
|
value=int(seconds // 60),
|
|
|
|
)
|
|
|
|
for ids, seconds in seconds_active.items()
|
|
|
|
if seconds >= 60
|
|
|
|
]
|
2017-04-02 07:34:17 +02:00
|
|
|
UserCount.objects.bulk_create(rows)
|
2017-04-06 03:07:06 +02:00
|
|
|
return len(rows)
|
2016-07-29 21:52:45 +02:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2020-06-09 10:46:28 +02:00
|
|
|
def count_message_by_user_query(realm: Optional[Realm]) -> QueryFn:
|
2020-01-16 02:43:51 +01:00
|
|
|
if realm is None:
|
2022-07-30 07:12:58 +02:00
|
|
|
realm_clause: Composable = SQL("")
|
2020-01-16 02:43:51 +01:00
|
|
|
else:
|
2023-08-30 21:19:37 +02:00
|
|
|
# We limit both userprofile and message so that we only see
|
|
|
|
# users from this realm, but also get the performance speedup
|
|
|
|
# of limiting messages by realm.
|
|
|
|
realm_clause = SQL(
|
|
|
|
"zerver_userprofile.realm_id = {} AND zerver_message.realm_id = {} AND"
|
|
|
|
).format(Literal(realm.id), Literal(realm.id))
|
|
|
|
# Uses index: zerver_message_realm_date_sent (or the only-date index)
|
2021-02-12 08:19:30 +01:00
|
|
|
return lambda kwargs: SQL(
|
|
|
|
"""
|
2016-07-29 21:52:45 +02:00
|
|
|
INSERT INTO analytics_usercount
|
2017-01-16 22:05:51 +01:00
|
|
|
(user_id, realm_id, value, property, subgroup, end_time)
|
2016-07-29 21:52:45 +02:00
|
|
|
SELECT
|
2017-11-04 12:38:25 +01:00
|
|
|
zerver_userprofile.id, zerver_userprofile.realm_id, count(*),
|
2020-06-09 10:46:28 +02:00
|
|
|
%(property)s, {subgroup}, %(time_end)s
|
2016-07-29 21:52:45 +02:00
|
|
|
FROM zerver_userprofile
|
|
|
|
JOIN zerver_message
|
|
|
|
ON
|
2017-02-18 00:15:38 +01:00
|
|
|
zerver_userprofile.id = zerver_message.sender_id
|
|
|
|
WHERE
|
2020-06-09 10:46:28 +02:00
|
|
|
zerver_userprofile.date_joined < %(time_end)s AND
|
|
|
|
zerver_message.date_sent >= %(time_start)s AND
|
2020-01-16 02:43:51 +01:00
|
|
|
{realm_clause}
|
2020-06-09 10:46:28 +02:00
|
|
|
zerver_message.date_sent < %(time_end)s
|
|
|
|
GROUP BY zerver_userprofile.id {group_by_clause}
|
2021-02-12 08:19:30 +01:00
|
|
|
"""
|
|
|
|
).format(**kwargs, realm_clause=realm_clause)
|
|
|
|
|
2016-07-29 21:52:45 +02:00
|
|
|
|
2017-04-02 07:49:50 +02:00
|
|
|
# Note: ignores the group_by / group_by_clause.
|
2020-06-09 10:46:28 +02:00
|
|
|
def count_message_type_by_user_query(realm: Optional[Realm]) -> QueryFn:
|
2020-01-16 02:43:51 +01:00
|
|
|
if realm is None:
|
2022-07-30 07:12:58 +02:00
|
|
|
realm_clause: Composable = SQL("")
|
2020-01-16 02:43:51 +01:00
|
|
|
else:
|
2023-08-30 21:19:37 +02:00
|
|
|
# We limit both userprofile and message so that we only see
|
|
|
|
# users from this realm, but also get the performance speedup
|
|
|
|
# of limiting messages by realm.
|
|
|
|
realm_clause = SQL(
|
|
|
|
"zerver_userprofile.realm_id = {} AND zerver_message.realm_id = {} AND"
|
|
|
|
).format(Literal(realm.id), Literal(realm.id))
|
|
|
|
# Uses index: zerver_message_realm_date_sent (or the only-date index)
|
2021-02-12 08:19:30 +01:00
|
|
|
return lambda kwargs: SQL(
|
|
|
|
"""
|
2016-11-01 00:17:29 +01:00
|
|
|
INSERT INTO analytics_usercount
|
2017-01-16 22:05:51 +01:00
|
|
|
(realm_id, user_id, value, property, subgroup, end_time)
|
2020-06-09 10:46:28 +02:00
|
|
|
SELECT realm_id, id, SUM(count) AS value, %(property)s, message_type, %(time_end)s
|
2016-11-01 00:17:29 +01:00
|
|
|
FROM
|
|
|
|
(
|
|
|
|
SELECT zerver_userprofile.realm_id, zerver_userprofile.id, count(*),
|
|
|
|
CASE WHEN
|
2017-03-19 00:11:07 +01:00
|
|
|
zerver_recipient.type = 1 THEN 'private_message'
|
|
|
|
WHEN
|
|
|
|
zerver_recipient.type = 3 THEN 'huddle_message'
|
2016-11-01 00:17:29 +01:00
|
|
|
WHEN
|
|
|
|
zerver_stream.invite_only = TRUE THEN 'private_stream'
|
|
|
|
ELSE 'public_stream'
|
|
|
|
END
|
|
|
|
message_type
|
|
|
|
|
|
|
|
FROM zerver_userprofile
|
|
|
|
JOIN zerver_message
|
|
|
|
ON
|
2017-02-18 00:15:38 +01:00
|
|
|
zerver_userprofile.id = zerver_message.sender_id AND
|
2020-06-09 10:46:28 +02:00
|
|
|
zerver_message.date_sent >= %(time_start)s AND
|
2020-01-16 02:43:51 +01:00
|
|
|
{realm_clause}
|
2020-06-09 10:46:28 +02:00
|
|
|
zerver_message.date_sent < %(time_end)s
|
2016-11-01 00:17:29 +01:00
|
|
|
JOIN zerver_recipient
|
|
|
|
ON
|
2017-02-18 00:15:38 +01:00
|
|
|
zerver_message.recipient_id = zerver_recipient.id
|
2017-01-11 02:11:38 +01:00
|
|
|
LEFT JOIN zerver_stream
|
2016-11-01 00:17:29 +01:00
|
|
|
ON
|
2017-02-18 00:15:38 +01:00
|
|
|
zerver_recipient.type_id = zerver_stream.id
|
2017-11-04 12:38:25 +01:00
|
|
|
GROUP BY
|
|
|
|
zerver_userprofile.realm_id, zerver_userprofile.id,
|
|
|
|
zerver_recipient.type, zerver_stream.invite_only
|
2016-11-01 00:17:29 +01:00
|
|
|
) AS subquery
|
|
|
|
GROUP BY realm_id, id, message_type
|
2021-02-12 08:19:30 +01:00
|
|
|
"""
|
|
|
|
).format(**kwargs, realm_clause=realm_clause)
|
|
|
|
|
2016-11-01 00:17:29 +01:00
|
|
|
|
2017-04-02 07:49:50 +02:00
|
|
|
# This query joins to the UserProfile table since all current queries that
|
|
|
|
# use this also subgroup on UserProfile.is_bot. If in the future there is a
|
|
|
|
# stat that counts messages by stream and doesn't need the UserProfile
|
|
|
|
# table, consider writing a new query for efficiency.
|
2020-06-09 10:46:28 +02:00
|
|
|
def count_message_by_stream_query(realm: Optional[Realm]) -> QueryFn:
|
2020-01-16 02:43:51 +01:00
|
|
|
if realm is None:
|
2022-07-30 07:12:58 +02:00
|
|
|
realm_clause: Composable = SQL("")
|
2020-01-16 02:43:51 +01:00
|
|
|
else:
|
2023-08-30 21:19:37 +02:00
|
|
|
realm_clause = SQL(
|
|
|
|
"zerver_stream.realm_id = {} AND zerver_message.realm_id = {} AND"
|
|
|
|
).format(Literal(realm.id), Literal(realm.id))
|
|
|
|
# Uses index: zerver_message_realm_date_sent (or the only-date index)
|
2021-02-12 08:19:30 +01:00
|
|
|
return lambda kwargs: SQL(
|
|
|
|
"""
|
2016-12-15 20:17:16 +01:00
|
|
|
INSERT INTO analytics_streamcount
|
2017-01-16 22:05:51 +01:00
|
|
|
(stream_id, realm_id, value, property, subgroup, end_time)
|
2016-12-15 20:17:16 +01:00
|
|
|
SELECT
|
2020-06-09 10:46:28 +02:00
|
|
|
zerver_stream.id, zerver_stream.realm_id, count(*), %(property)s, {subgroup}, %(time_end)s
|
2016-12-15 20:17:16 +01:00
|
|
|
FROM zerver_stream
|
|
|
|
JOIN zerver_recipient
|
|
|
|
ON
|
|
|
|
zerver_stream.id = zerver_recipient.type_id
|
|
|
|
JOIN zerver_message
|
|
|
|
ON
|
2017-02-18 00:15:38 +01:00
|
|
|
zerver_recipient.id = zerver_message.recipient_id
|
|
|
|
JOIN zerver_userprofile
|
|
|
|
ON
|
|
|
|
zerver_message.sender_id = zerver_userprofile.id
|
|
|
|
WHERE
|
2020-06-09 10:46:28 +02:00
|
|
|
zerver_stream.date_created < %(time_end)s AND
|
2017-02-18 00:15:38 +01:00
|
|
|
zerver_recipient.type = 2 AND
|
2020-06-09 10:46:28 +02:00
|
|
|
zerver_message.date_sent >= %(time_start)s AND
|
2020-01-16 02:43:51 +01:00
|
|
|
{realm_clause}
|
2020-06-09 10:46:28 +02:00
|
|
|
zerver_message.date_sent < %(time_end)s
|
|
|
|
GROUP BY zerver_stream.id {group_by_clause}
|
2021-02-12 08:19:30 +01:00
|
|
|
"""
|
|
|
|
).format(**kwargs, realm_clause=realm_clause)
|
|
|
|
|
2016-12-15 20:17:16 +01:00
|
|
|
|
2024-06-03 17:56:13 +02:00
|
|
|
# Hardcodes the query needed for active_users_audit:is_bot:day.
|
|
|
|
# Assumes that a user cannot have two RealmAuditLog entries with the
|
|
|
|
# same event_time and event_type in [RealmAuditLog.USER_CREATED,
|
|
|
|
# USER_DEACTIVATED, etc]. In particular, it's important to ensure
|
|
|
|
# that migrations don't cause that to happen.
|
2020-06-09 10:46:28 +02:00
|
|
|
def check_realmauditlog_by_user_query(realm: Optional[Realm]) -> QueryFn:
|
2020-01-16 02:43:51 +01:00
|
|
|
if realm is None:
|
2022-07-30 07:12:58 +02:00
|
|
|
realm_clause: Composable = SQL("")
|
2020-01-16 02:43:51 +01:00
|
|
|
else:
|
2020-06-09 10:46:28 +02:00
|
|
|
realm_clause = SQL("realm_id = {} AND").format(Literal(realm.id))
|
2021-02-12 08:19:30 +01:00
|
|
|
return lambda kwargs: SQL(
|
|
|
|
"""
|
2024-06-03 17:48:55 +02:00
|
|
|
INSERT INTO analytics_realmcount
|
|
|
|
(realm_id, value, property, subgroup, end_time)
|
2017-04-01 03:26:35 +02:00
|
|
|
SELECT
|
2024-06-03 17:48:55 +02:00
|
|
|
zerver_userprofile.realm_id, count(*), %(property)s, {subgroup}, %(time_end)s
|
2024-06-03 17:56:13 +02:00
|
|
|
FROM zerver_userprofile
|
2017-04-01 03:26:35 +02:00
|
|
|
JOIN (
|
2024-06-03 17:56:13 +02:00
|
|
|
SELECT DISTINCT ON (modified_user_id)
|
|
|
|
modified_user_id, event_type
|
|
|
|
FROM
|
|
|
|
zerver_realmauditlog
|
|
|
|
WHERE
|
|
|
|
event_type IN ({user_created}, {user_activated}, {user_deactivated}, {user_reactivated}) AND
|
|
|
|
{realm_clause}
|
|
|
|
event_time < %(time_end)s
|
|
|
|
ORDER BY
|
|
|
|
modified_user_id,
|
|
|
|
event_time DESC
|
|
|
|
) last_user_event ON last_user_event.modified_user_id = zerver_userprofile.id
|
2017-04-01 03:26:35 +02:00
|
|
|
WHERE
|
2024-06-03 17:56:13 +02:00
|
|
|
last_user_event.event_type in ({user_created}, {user_activated}, {user_reactivated})
|
2024-06-03 17:48:55 +02:00
|
|
|
GROUP BY zerver_userprofile.realm_id {group_by_clause}
|
2021-02-12 08:19:30 +01:00
|
|
|
"""
|
|
|
|
).format(
|
2020-06-09 10:46:28 +02:00
|
|
|
**kwargs,
|
|
|
|
user_created=Literal(RealmAuditLog.USER_CREATED),
|
|
|
|
user_activated=Literal(RealmAuditLog.USER_ACTIVATED),
|
|
|
|
user_deactivated=Literal(RealmAuditLog.USER_DEACTIVATED),
|
|
|
|
user_reactivated=Literal(RealmAuditLog.USER_REACTIVATED),
|
|
|
|
realm_clause=realm_clause,
|
|
|
|
)
|
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2020-06-09 10:46:28 +02:00
|
|
|
def check_useractivityinterval_by_user_query(realm: Optional[Realm]) -> QueryFn:
|
2020-01-16 02:43:51 +01:00
|
|
|
if realm is None:
|
2022-07-30 07:12:58 +02:00
|
|
|
realm_clause: Composable = SQL("")
|
2020-01-16 02:43:51 +01:00
|
|
|
else:
|
2020-06-09 10:46:28 +02:00
|
|
|
realm_clause = SQL("zerver_userprofile.realm_id = {} AND").format(Literal(realm.id))
|
2021-02-12 08:19:30 +01:00
|
|
|
return lambda kwargs: SQL(
|
|
|
|
"""
|
2017-04-02 07:34:17 +02:00
|
|
|
INSERT INTO analytics_usercount
|
|
|
|
(user_id, realm_id, value, property, subgroup, end_time)
|
|
|
|
SELECT
|
2020-06-09 10:46:28 +02:00
|
|
|
zerver_userprofile.id, zerver_userprofile.realm_id, 1, %(property)s, {subgroup}, %(time_end)s
|
2017-04-02 07:34:17 +02:00
|
|
|
FROM zerver_userprofile
|
|
|
|
JOIN zerver_useractivityinterval
|
|
|
|
ON
|
|
|
|
zerver_userprofile.id = zerver_useractivityinterval.user_profile_id
|
|
|
|
WHERE
|
2020-06-09 10:46:28 +02:00
|
|
|
zerver_useractivityinterval.end >= %(time_start)s AND
|
2020-01-16 02:43:51 +01:00
|
|
|
{realm_clause}
|
2020-06-09 10:46:28 +02:00
|
|
|
zerver_useractivityinterval.start < %(time_end)s
|
|
|
|
GROUP BY zerver_userprofile.id {group_by_clause}
|
2021-02-12 08:19:30 +01:00
|
|
|
"""
|
|
|
|
).format(**kwargs, realm_clause=realm_clause)
|
|
|
|
|
2017-03-16 09:23:44 +01:00
|
|
|
|
2020-06-09 10:46:28 +02:00
|
|
|
def count_realm_active_humans_query(realm: Optional[Realm]) -> QueryFn:
|
2020-01-16 02:43:51 +01:00
|
|
|
if realm is None:
|
2022-07-30 07:12:58 +02:00
|
|
|
realm_clause: Composable = SQL("")
|
2020-01-16 02:43:51 +01:00
|
|
|
else:
|
2020-06-09 10:46:28 +02:00
|
|
|
realm_clause = SQL("realm_id = {} AND").format(Literal(realm.id))
|
2021-02-12 08:19:30 +01:00
|
|
|
return lambda kwargs: SQL(
|
|
|
|
"""
|
2017-04-05 07:51:55 +02:00
|
|
|
INSERT INTO analytics_realmcount
|
|
|
|
(realm_id, value, property, subgroup, end_time)
|
|
|
|
SELECT
|
2024-06-02 17:16:44 +02:00
|
|
|
active_usercount.realm_id, count(*), %(property)s, NULL, %(time_end)s
|
2017-04-05 07:51:55 +02:00
|
|
|
FROM (
|
2024-06-02 17:16:44 +02:00
|
|
|
SELECT
|
|
|
|
realm_id,
|
|
|
|
user_id
|
|
|
|
FROM
|
|
|
|
analytics_usercount
|
|
|
|
WHERE
|
|
|
|
property = '15day_actives::day'
|
|
|
|
{realm_clause}
|
|
|
|
AND end_time = %(time_end)s
|
|
|
|
) active_usercount
|
|
|
|
JOIN zerver_userprofile ON active_usercount.user_id = zerver_userprofile.id
|
2017-04-05 07:51:55 +02:00
|
|
|
JOIN (
|
2024-06-02 17:16:44 +02:00
|
|
|
SELECT DISTINCT ON (modified_user_id)
|
|
|
|
modified_user_id, event_type
|
|
|
|
FROM
|
|
|
|
zerver_realmauditlog
|
|
|
|
WHERE
|
|
|
|
event_type IN ({user_created}, {user_activated}, {user_deactivated}, {user_reactivated})
|
|
|
|
AND event_time < %(time_end)s
|
|
|
|
ORDER BY
|
|
|
|
modified_user_id,
|
|
|
|
event_time DESC
|
|
|
|
) last_user_event ON last_user_event.modified_user_id = active_usercount.user_id
|
|
|
|
WHERE
|
|
|
|
NOT zerver_userprofile.is_bot
|
|
|
|
AND event_type IN ({user_created}, {user_activated}, {user_reactivated})
|
|
|
|
GROUP BY
|
|
|
|
active_usercount.realm_id
|
2021-02-12 08:19:30 +01:00
|
|
|
"""
|
2024-06-02 17:16:44 +02:00
|
|
|
).format(
|
|
|
|
**kwargs,
|
|
|
|
user_created=Literal(RealmAuditLog.USER_CREATED),
|
|
|
|
user_activated=Literal(RealmAuditLog.USER_ACTIVATED),
|
|
|
|
user_deactivated=Literal(RealmAuditLog.USER_DEACTIVATED),
|
|
|
|
user_reactivated=Literal(RealmAuditLog.USER_REACTIVATED),
|
|
|
|
realm_clause=realm_clause,
|
|
|
|
)
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2017-04-05 07:51:55 +02:00
|
|
|
|
2017-04-02 07:34:17 +02:00
|
|
|
# Currently unused and untested
|
2021-02-12 08:19:30 +01:00
|
|
|
count_stream_by_realm_query = lambda kwargs: SQL(
|
|
|
|
"""
|
2017-04-02 07:34:17 +02:00
|
|
|
INSERT INTO analytics_realmcount
|
|
|
|
(realm_id, value, property, subgroup, end_time)
|
|
|
|
SELECT
|
2020-06-09 10:46:28 +02:00
|
|
|
zerver_realm.id, count(*), %(property)s, {subgroup}, %(time_end)s
|
2017-04-02 07:34:17 +02:00
|
|
|
FROM zerver_realm
|
|
|
|
JOIN zerver_stream
|
|
|
|
ON
|
|
|
|
zerver_realm.id = zerver_stream.realm_id AND
|
|
|
|
WHERE
|
2020-06-09 10:46:28 +02:00
|
|
|
zerver_realm.date_created < %(time_end)s AND
|
|
|
|
zerver_stream.date_created >= %(time_start)s AND
|
|
|
|
zerver_stream.date_created < %(time_end)s
|
|
|
|
GROUP BY zerver_realm.id {group_by_clause}
|
2021-02-12 08:19:30 +01:00
|
|
|
"""
|
|
|
|
).format(**kwargs)
|
|
|
|
|
2017-03-16 09:23:44 +01:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
def get_count_stats(realm: Optional[Realm] = None) -> Dict[str, CountStat]:
|
2020-01-16 02:43:51 +01:00
|
|
|
## CountStat declarations ##
|
|
|
|
|
|
|
|
count_stats_ = [
|
2020-10-23 02:43:28 +02:00
|
|
|
# Messages sent stats
|
2020-01-16 02:43:51 +01:00
|
|
|
# Stats that count the number of messages sent in various ways.
|
|
|
|
# These are also the set of stats that read from the Message table.
|
2021-02-12 08:19:30 +01:00
|
|
|
CountStat(
|
2021-02-12 08:20:45 +01:00
|
|
|
"messages_sent:is_bot:hour",
|
2021-02-12 08:19:30 +01:00
|
|
|
sql_data_collector(
|
2021-02-12 08:20:45 +01:00
|
|
|
UserCount, count_message_by_user_query(realm), (UserProfile, "is_bot")
|
2021-02-12 08:19:30 +01:00
|
|
|
),
|
|
|
|
CountStat.HOUR,
|
|
|
|
),
|
|
|
|
CountStat(
|
2021-02-12 08:20:45 +01:00
|
|
|
"messages_sent:message_type:day",
|
2021-02-12 08:19:30 +01:00
|
|
|
sql_data_collector(UserCount, count_message_type_by_user_query(realm), None),
|
|
|
|
CountStat.DAY,
|
|
|
|
),
|
|
|
|
CountStat(
|
2021-02-12 08:20:45 +01:00
|
|
|
"messages_sent:client:day",
|
2021-02-12 08:19:30 +01:00
|
|
|
sql_data_collector(
|
2021-02-12 08:20:45 +01:00
|
|
|
UserCount, count_message_by_user_query(realm), (Message, "sending_client_id")
|
2021-02-12 08:19:30 +01:00
|
|
|
),
|
|
|
|
CountStat.DAY,
|
|
|
|
),
|
|
|
|
CountStat(
|
2021-02-12 08:20:45 +01:00
|
|
|
"messages_in_stream:is_bot:day",
|
2021-02-12 08:19:30 +01:00
|
|
|
sql_data_collector(
|
2021-02-12 08:20:45 +01:00
|
|
|
StreamCount, count_message_by_stream_query(realm), (UserProfile, "is_bot")
|
2021-02-12 08:19:30 +01:00
|
|
|
),
|
|
|
|
CountStat.DAY,
|
|
|
|
),
|
2024-06-03 18:29:44 +02:00
|
|
|
# Counts the number of active users in the UserProfile.is_active sense.
|
2020-01-16 02:43:51 +01:00
|
|
|
# Important that this stay a daily stat, so that 'realm_active_humans::day' works as expected.
|
2021-02-12 08:19:30 +01:00
|
|
|
CountStat(
|
2021-02-12 08:20:45 +01:00
|
|
|
"active_users_audit:is_bot:day",
|
2021-02-12 08:19:30 +01:00
|
|
|
sql_data_collector(
|
2024-06-03 17:48:55 +02:00
|
|
|
RealmCount, check_realmauditlog_by_user_query(realm), (UserProfile, "is_bot")
|
2021-02-12 08:19:30 +01:00
|
|
|
),
|
|
|
|
CountStat.DAY,
|
|
|
|
),
|
2024-04-19 03:06:53 +02:00
|
|
|
CountStat(
|
|
|
|
"upload_quota_used_bytes::day",
|
|
|
|
sql_data_collector(RealmCount, count_upload_space_used_by_realm_query(realm), None),
|
|
|
|
CountStat.DAY,
|
|
|
|
),
|
2020-06-08 06:01:49 +02:00
|
|
|
# Messages read stats. messages_read::hour is the total
|
|
|
|
# number of messages read, whereas
|
|
|
|
# messages_read_interactions::hour tries to count the total
|
|
|
|
# number of UI interactions resulting in messages being marked
|
|
|
|
# as read (imperfect because of batching of some request
|
|
|
|
# types, but less likely to be overwhelmed by a single bulk
|
|
|
|
# operation).
|
2021-02-12 08:20:45 +01:00
|
|
|
LoggingCountStat("messages_read::hour", UserCount, CountStat.HOUR),
|
|
|
|
LoggingCountStat("messages_read_interactions::hour", UserCount, CountStat.HOUR),
|
2020-10-23 02:43:28 +02:00
|
|
|
# User activity stats
|
2020-01-16 02:43:51 +01:00
|
|
|
# Stats that measure user activity in the UserActivityInterval sense.
|
2021-02-12 08:19:30 +01:00
|
|
|
CountStat(
|
2021-02-12 08:20:45 +01:00
|
|
|
"1day_actives::day",
|
2021-02-12 08:19:30 +01:00
|
|
|
sql_data_collector(UserCount, check_useractivityinterval_by_user_query(realm), None),
|
|
|
|
CountStat.DAY,
|
|
|
|
interval=timedelta(days=1) - UserActivityInterval.MIN_INTERVAL_LENGTH,
|
|
|
|
),
|
|
|
|
CountStat(
|
2021-02-12 08:20:45 +01:00
|
|
|
"7day_actives::day",
|
2021-02-12 08:19:30 +01:00
|
|
|
sql_data_collector(UserCount, check_useractivityinterval_by_user_query(realm), None),
|
|
|
|
CountStat.DAY,
|
|
|
|
interval=timedelta(days=7) - UserActivityInterval.MIN_INTERVAL_LENGTH,
|
|
|
|
),
|
|
|
|
CountStat(
|
2021-02-12 08:20:45 +01:00
|
|
|
"15day_actives::day",
|
2021-02-12 08:19:30 +01:00
|
|
|
sql_data_collector(UserCount, check_useractivityinterval_by_user_query(realm), None),
|
|
|
|
CountStat.DAY,
|
|
|
|
interval=timedelta(days=15) - UserActivityInterval.MIN_INTERVAL_LENGTH,
|
|
|
|
),
|
|
|
|
CountStat(
|
2021-02-12 08:20:45 +01:00
|
|
|
"minutes_active::day", DataCollector(UserCount, do_pull_minutes_active), CountStat.DAY
|
2021-02-12 08:19:30 +01:00
|
|
|
),
|
2023-11-07 15:12:37 +01:00
|
|
|
# Tracks the number of push notifications requested by the server.
|
2024-02-26 21:14:52 +01:00
|
|
|
# Included in LOGGING_COUNT_STAT_PROPERTIES_NOT_SENT_TO_BOUNCER.
|
2023-11-07 15:12:37 +01:00
|
|
|
LoggingCountStat(
|
|
|
|
"mobile_pushes_sent::day",
|
|
|
|
RealmCount,
|
|
|
|
CountStat.DAY,
|
|
|
|
),
|
2020-01-16 02:43:51 +01:00
|
|
|
# Rate limiting stats
|
2024-02-26 21:14:52 +01:00
|
|
|
# Used to limit the number of invitation emails sent by a realm.
|
|
|
|
# Included in LOGGING_COUNT_STAT_PROPERTIES_NOT_SENT_TO_BOUNCER.
|
2021-02-12 08:20:45 +01:00
|
|
|
LoggingCountStat("invites_sent::day", RealmCount, CountStat.DAY),
|
2020-01-16 02:43:51 +01:00
|
|
|
# Dependent stats
|
|
|
|
# Must come after their dependencies.
|
|
|
|
# Canonical account of the number of active humans in a realm on each day.
|
2021-02-12 08:19:30 +01:00
|
|
|
DependentCountStat(
|
2021-02-12 08:20:45 +01:00
|
|
|
"realm_active_humans::day",
|
2021-02-12 08:19:30 +01:00
|
|
|
sql_data_collector(RealmCount, count_realm_active_humans_query(realm), None),
|
|
|
|
CountStat.DAY,
|
2024-06-02 17:16:44 +02:00
|
|
|
dependencies=["15day_actives::day"],
|
2021-02-12 08:19:30 +01:00
|
|
|
),
|
2020-01-16 02:43:51 +01:00
|
|
|
]
|
|
|
|
|
2023-10-22 23:21:56 +02:00
|
|
|
if settings.ZILENCER_ENABLED:
|
2023-11-09 19:24:49 +01:00
|
|
|
# See also the remote_installation versions of these in REMOTE_INSTALLATION_COUNT_STATS.
|
2023-10-22 23:21:56 +02:00
|
|
|
count_stats_.append(
|
|
|
|
LoggingCountStat(
|
|
|
|
"mobile_pushes_received::day",
|
2023-11-09 19:24:49 +01:00
|
|
|
RemoteRealmCount,
|
2023-10-22 23:21:56 +02:00
|
|
|
CountStat.DAY,
|
|
|
|
)
|
|
|
|
)
|
2023-10-28 02:01:22 +02:00
|
|
|
count_stats_.append(
|
|
|
|
LoggingCountStat(
|
|
|
|
"mobile_pushes_forwarded::day",
|
2023-11-09 19:24:49 +01:00
|
|
|
RemoteRealmCount,
|
2023-10-28 02:01:22 +02:00
|
|
|
CountStat.DAY,
|
|
|
|
)
|
|
|
|
)
|
2023-10-22 23:21:56 +02:00
|
|
|
|
2020-09-02 06:20:26 +02:00
|
|
|
return OrderedDict((stat.property, stat) for stat in count_stats_)
|
2020-01-16 02:43:51 +01:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2023-10-23 14:01:49 +02:00
|
|
|
# These properties are tracked by the bouncer itself and therefore syncing them
|
|
|
|
# from a remote server should not be allowed - or the server would be able to interfere
|
|
|
|
# with our data.
|
2023-11-09 16:16:40 +01:00
|
|
|
BOUNCER_ONLY_REMOTE_COUNT_STAT_PROPERTIES = [
|
|
|
|
"mobile_pushes_received::day",
|
|
|
|
"mobile_pushes_forwarded::day",
|
|
|
|
]
|
2023-10-23 14:01:49 +02:00
|
|
|
|
2024-02-26 21:14:52 +01:00
|
|
|
# LoggingCountStats with a daily duration and that are directly stored on
|
|
|
|
# the RealmCount table (instead of via aggregation in process_count_stat),
|
|
|
|
# can be in a state, after the hourly cron job to update analytics counts,
|
|
|
|
# where the logged value will be live-updated later (as the end time for
|
|
|
|
# the stat is still in the future). As these logging counts are designed
|
|
|
|
# to be used on the self-hosted installation for either debugging or rate
|
|
|
|
# limiting, sending these incomplete counts to the bouncer has low value.
|
2024-05-31 22:06:19 +02:00
|
|
|
LOGGING_COUNT_STAT_PROPERTIES_NOT_SENT_TO_BOUNCER = {
|
2024-02-26 21:14:52 +01:00
|
|
|
"invites_sent::day",
|
|
|
|
"mobile_pushes_sent::day",
|
|
|
|
"active_users_log:is_bot:day",
|
2024-05-31 22:06:19 +02:00
|
|
|
"active_users:is_bot:day",
|
|
|
|
}
|
2024-02-26 21:14:52 +01:00
|
|
|
|
2020-01-16 02:43:51 +01:00
|
|
|
# To avoid refactoring for now COUNT_STATS can be used as before
|
|
|
|
COUNT_STATS = get_count_stats()
|
2023-11-09 19:24:49 +01:00
|
|
|
|
|
|
|
REMOTE_INSTALLATION_COUNT_STATS = OrderedDict()
|
|
|
|
|
|
|
|
if settings.ZILENCER_ENABLED:
|
|
|
|
# REMOTE_INSTALLATION_COUNT_STATS contains duplicates of the
|
|
|
|
# RemoteRealmCount stats declared above; it is necessary because
|
|
|
|
# pre-8.0 servers do not send the fields required to identify a
|
|
|
|
# RemoteRealm.
|
|
|
|
|
|
|
|
# Tracks the number of push notifications requested to be sent
|
|
|
|
# by a remote server.
|
|
|
|
REMOTE_INSTALLATION_COUNT_STATS["mobile_pushes_received::day"] = LoggingCountStat(
|
|
|
|
"mobile_pushes_received::day",
|
|
|
|
RemoteInstallationCount,
|
|
|
|
CountStat.DAY,
|
|
|
|
)
|
|
|
|
# Tracks the number of push notifications successfully sent to
|
|
|
|
# mobile devices, as requested by the remote server. Therefore
|
|
|
|
# this should be less than or equal to mobile_pushes_received -
|
|
|
|
# with potential tiny offsets resulting from a request being
|
|
|
|
# *received* by the bouncer right before midnight, but *sent* to
|
|
|
|
# the mobile device right after midnight. This would cause the
|
|
|
|
# increments to happen to CountStat records for different days.
|
|
|
|
REMOTE_INSTALLATION_COUNT_STATS["mobile_pushes_forwarded::day"] = LoggingCountStat(
|
|
|
|
"mobile_pushes_forwarded::day",
|
|
|
|
RemoteInstallationCount,
|
|
|
|
CountStat.DAY,
|
|
|
|
)
|
|
|
|
|
|
|
|
ALL_COUNT_STATS = OrderedDict(
|
|
|
|
list(COUNT_STATS.items()) + list(REMOTE_INSTALLATION_COUNT_STATS.items())
|
|
|
|
)
|