analytics: Pass subgroup=None to improve indexing.

Because the main indexes on end_time either don't include realm_id or
do include subgroup, passing an explicit subgroup=None for
single-realm queries to read CountStats that don't use the subgroups
feature greatly improves the query plans.
This commit is contained in:
Tim Abbott 2024-10-02 10:40:59 -07:00 committed by Alex Vandiver
parent 9c8cc8c333
commit 1ff14fd0f1
4 changed files with 17 additions and 2 deletions

View File

@ -346,10 +346,12 @@ def get_mobile_push_data(remote_entity: RemoteZulipServer | RemoteRealm) -> Mobi
mobile_pushes = RemoteInstallationCount.objects.filter( mobile_pushes = RemoteInstallationCount.objects.filter(
server=remote_entity, server=remote_entity,
property="mobile_pushes_forwarded::day", property="mobile_pushes_forwarded::day",
subgroup=None,
end_time__gte=timezone_now() - timedelta(days=7), end_time__gte=timezone_now() - timedelta(days=7),
).aggregate(total_forwarded=Sum("value", default=0)) ).aggregate(total_forwarded=Sum("value", default=0))
latest_remote_server_push_forwarded_count = RemoteInstallationCount.objects.filter( latest_remote_server_push_forwarded_count = RemoteInstallationCount.objects.filter(
server=remote_entity, server=remote_entity,
subgroup=None,
property="mobile_pushes_forwarded::day", property="mobile_pushes_forwarded::day",
).last() ).last()
if latest_remote_server_push_forwarded_count is not None: # nocoverage if latest_remote_server_push_forwarded_count is not None: # nocoverage
@ -380,10 +382,12 @@ def get_mobile_push_data(remote_entity: RemoteZulipServer | RemoteRealm) -> Mobi
mobile_pushes = RemoteRealmCount.objects.filter( mobile_pushes = RemoteRealmCount.objects.filter(
remote_realm=remote_entity, remote_realm=remote_entity,
property="mobile_pushes_forwarded::day", property="mobile_pushes_forwarded::day",
subgroup=None,
end_time__gte=timezone_now() - timedelta(days=7), end_time__gte=timezone_now() - timedelta(days=7),
).aggregate(total_forwarded=Sum("value", default=0)) ).aggregate(total_forwarded=Sum("value", default=0))
latest_remote_realm_push_forwarded_count = RemoteRealmCount.objects.filter( latest_remote_realm_push_forwarded_count = RemoteRealmCount.objects.filter(
remote_realm=remote_entity, remote_realm=remote_entity,
subgroup=None,
property="mobile_pushes_forwarded::day", property="mobile_pushes_forwarded::day",
).last() ).last()
if latest_remote_realm_push_forwarded_count is not None: # nocoverage if latest_remote_realm_push_forwarded_count is not None: # nocoverage

View File

@ -41,6 +41,9 @@ def estimate_recent_invites(realms: Collection[Realm] | QuerySet[Realm], *, days
recent_invites = RealmCount.objects.filter( recent_invites = RealmCount.objects.filter(
realm__in=realms, realm__in=realms,
property="invites_sent::day", property="invites_sent::day",
# It's important to filter on this even though the count
# doesn't use subgroup, so that we use the index.
subgroup=None,
end_time__gte=timezone_now() - timedelta(days=days), end_time__gte=timezone_now() - timedelta(days=days),
).aggregate(Sum("value"))["value__sum"] ).aggregate(Sum("value"))["value__sum"]
if recent_invites is None: if recent_invites is None:

View File

@ -67,7 +67,13 @@ def get_active_realm_ids() -> QuerySet[RealmCount, int]:
""" """
date = timezone_now() - timedelta(days=2) date = timezone_now() - timedelta(days=2)
return ( return (
RealmCount.objects.filter(end_time__gte=date, property="1day_actives::day", value__gt=0) RealmCount.objects.filter(
end_time__gte=date,
property="1day_actives::day",
# Filtering on subgroup is important to ensure we use the good indexes.
subgroup=None,
value__gt=0,
)
.distinct("realm_id") .distinct("realm_id")
.values_list("realm_id", flat=True) .values_list("realm_id", flat=True)
) )

View File

@ -1022,7 +1022,9 @@ class Realm(models.Model): # type: ignore[django-manager-missing] # django-stub
try: try:
latest_count_stat = RealmCount.objects.filter( latest_count_stat = RealmCount.objects.filter(
realm=realm, property="upload_quota_used_bytes::day" realm=realm,
property="upload_quota_used_bytes::day",
subgroup=None,
).latest("end_time") ).latest("end_time")
last_recorded_used_space = latest_count_stat.value last_recorded_used_space = latest_count_stat.value
last_recorded_date = latest_count_stat.end_time last_recorded_date = latest_count_stat.end_time