push-bouncer: Exclude LoggingCountStats with partial data.

LoggingCountStats with a daily duration and that are directly stored
on the RealmCount table (not via aggregation in process_count_stat),
can be in a state, after the hourly cron job to update analytics
counts, where the logged value will be live-updated later, because
the end time for the stat is still in the future.

As these logging counts are designed to be used on the self-hosted
installation for either debugging or rate limiting, sending these
partial/incomplete counts to the bouncer has low value.
This commit is contained in:
Lauryn Menard 2024-02-26 21:14:52 +01:00 committed by Tim Abbott
parent cba4b8141e
commit cf82d3316b
3 changed files with 67 additions and 21 deletions

View File

@ -797,6 +797,7 @@ def get_count_stats(realm: Optional[Realm] = None) -> Dict[str, CountStat]:
# In RealmCount, 'active_users_audit:is_bot:day' should be the partial
# sum sequence of 'active_users_log:is_bot:day', for any realm that
# started after the latter stat was introduced.
# Included in LOGGING_COUNT_STAT_PROPERTIES_NOT_SENT_TO_BOUNCER.
LoggingCountStat("active_users_log:is_bot:day", RealmCount, CountStat.DAY),
# Another sanity check on 'active_users_audit:is_bot:day'. Is only an
# approximation, e.g. if a user is deactivated between the end of the
@ -843,13 +844,15 @@ def get_count_stats(realm: Optional[Realm] = None) -> Dict[str, CountStat]:
"minutes_active::day", DataCollector(UserCount, do_pull_minutes_active), CountStat.DAY
),
# Tracks the number of push notifications requested by the server.
# Included in LOGGING_COUNT_STAT_PROPERTIES_NOT_SENT_TO_BOUNCER.
LoggingCountStat(
"mobile_pushes_sent::day",
RealmCount,
CountStat.DAY,
),
# Rate limiting stats
# Used to limit the number of invitation emails sent by a realm
# Used to limit the number of invitation emails sent by a realm.
# Included in LOGGING_COUNT_STAT_PROPERTIES_NOT_SENT_TO_BOUNCER.
LoggingCountStat("invites_sent::day", RealmCount, CountStat.DAY),
# Dependent stats
# Must come after their dependencies.
@ -890,6 +893,19 @@ BOUNCER_ONLY_REMOTE_COUNT_STAT_PROPERTIES = [
"mobile_pushes_forwarded::day",
]
# LoggingCountStats with a daily duration and that are directly stored on
# the RealmCount table (instead of via aggregation in process_count_stat),
# can be in a state, after the hourly cron job to update analytics counts,
# where the logged value will be live-updated later (as the end time for
# the stat is still in the future). As these logging counts are designed
# to be used on the self-hosted installation for either debugging or rate
# limiting, sending these incomplete counts to the bouncer has low value.
LOGGING_COUNT_STAT_PROPERTIES_NOT_SENT_TO_BOUNCER = [
"invites_sent::day",
"mobile_pushes_sent::day",
"active_users_log:is_bot:day",
]
# To avoid refactoring for now COUNT_STATS can be used as before
COUNT_STATS = get_count_stats()

View File

@ -9,6 +9,7 @@ from django.db.models import QuerySet
from django.utils.translation import gettext as _
from pydantic import UUID4, BaseModel, ConfigDict, Field, Json, field_validator
from analytics.lib.counts import LOGGING_COUNT_STAT_PROPERTIES_NOT_SENT_TO_BOUNCER
from analytics.models import InstallationCount, RealmCount
from version import API_FEATURE_LEVEL, ZULIP_VERSION
from zerver.actions.realm_settings import (
@ -364,8 +365,10 @@ def send_server_data_to_push_bouncer(consider_usage_statistics: bool = True) ->
# uploading such statistics enabled.
installation_count_query = InstallationCount.objects.filter(
id__gt=last_acked_installation_count_id
).exclude(property__in=LOGGING_COUNT_STAT_PROPERTIES_NOT_SENT_TO_BOUNCER)
realm_count_query = RealmCount.objects.filter(id__gt=last_acked_realm_count_id).exclude(
property__in=LOGGING_COUNT_STAT_PROPERTIES_NOT_SENT_TO_BOUNCER
)
realm_count_query = RealmCount.objects.filter(id__gt=last_acked_realm_count_id)
else:
installation_count_query = InstallationCount.objects.none()
realm_count_query = RealmCount.objects.none()

View File

@ -24,7 +24,7 @@ from requests.models import PreparedRequest
from typing_extensions import override
from analytics.lib.counts import CountStat, LoggingCountStat
from analytics.models import InstallationCount, RealmCount
from analytics.models import InstallationCount, RealmCount, UserCount
from corporate.models import CustomerPlan
from version import ZULIP_VERSION
from zerver.actions.create_realm import do_create_realm
@ -1481,17 +1481,35 @@ class AnalyticsBouncerTest(BouncerTestCase):
)
# Create some rows we'll send to remote server
realm_stat = LoggingCountStat("invites_sent::day", RealmCount, CountStat.DAY)
RealmCount.objects.create(
realm=user.realm, property=realm_stat.property, end_time=end_time, value=5
# LoggingCountStat that should be included;
# i.e. not in LOGGING_COUNT_STAT_PROPERTIES_NOT_SENT_TO_BOUNCER
messages_read_logging_stat = LoggingCountStat(
"messages_read::hour", UserCount, CountStat.HOUR
)
InstallationCount.objects.create(
property=realm_stat.property,
RealmCount.objects.create(
realm=user.realm,
property=messages_read_logging_stat.property,
end_time=end_time,
value=5,
)
InstallationCount.objects.create(
property=messages_read_logging_stat.property,
end_time=end_time,
value=5,
)
# LoggingCountStat that should not be included;
# i.e. in LOGGING_COUNT_STAT_PROPERTIES_NOT_SENT_TO_BOUNCER
invites_sent_logging_stat = LoggingCountStat("invites_sent::day", RealmCount, CountStat.DAY)
RealmCount.objects.create(
realm=user.realm,
property=invites_sent_logging_stat.property,
end_time=end_time,
value=5,
)
InstallationCount.objects.create(
property=invites_sent_logging_stat.property,
end_time=end_time,
value=5,
# We set a subgroup here to work around:
# https://github.com/zulip/zulip/issues/12362
subgroup="test_subgroup",
)
# Event type in SYNCED_BILLING_EVENTS -- should be included
RealmAuditLog.objects.create(
@ -1513,8 +1531,8 @@ class AnalyticsBouncerTest(BouncerTestCase):
event_time=end_time,
extra_data=orjson.dumps({"foo": "bar"}).decode(),
)
self.assertEqual(RealmCount.objects.count(), 1)
self.assertEqual(InstallationCount.objects.count(), 1)
self.assertEqual(RealmCount.objects.count(), 2)
self.assertEqual(InstallationCount.objects.count(), 2)
self.assertEqual(RealmAuditLog.objects.filter(id__gt=audit_log_max_id).count(), 2)
with self.settings(SUBMIT_USAGE_STATISTICS=False):
@ -1689,13 +1707,13 @@ class AnalyticsBouncerTest(BouncerTestCase):
# Test only having new RealmCount rows
RealmCount.objects.create(
realm=user.realm,
property=realm_stat.property,
property=messages_read_logging_stat.property,
end_time=end_time + timedelta(days=1),
value=6,
)
RealmCount.objects.create(
realm=user.realm,
property=realm_stat.property,
property=messages_read_logging_stat.property,
end_time=end_time + timedelta(days=2),
value=9,
)
@ -1704,7 +1722,9 @@ class AnalyticsBouncerTest(BouncerTestCase):
# Test only having new InstallationCount rows
InstallationCount.objects.create(
property=realm_stat.property, end_time=end_time + timedelta(days=1), value=6
property=messages_read_logging_stat.property,
end_time=end_time + timedelta(days=1),
value=6,
)
send_server_data_to_push_bouncer()
check_counts(8, 8, 3, 2, 7)
@ -1823,12 +1843,17 @@ class AnalyticsBouncerTest(BouncerTestCase):
end_time = self.TIME_ZERO
# Create some rows we'll send to remote server
realm_stat = LoggingCountStat("invites_sent::day", RealmCount, CountStat.DAY)
messages_read_logging_stat = LoggingCountStat(
"messages_read::hour", UserCount, CountStat.HOUR
)
realm_count = RealmCount.objects.create(
realm=user.realm, property=realm_stat.property, end_time=end_time, value=5
realm=user.realm,
property=messages_read_logging_stat.property,
end_time=end_time,
value=5,
)
installation_count = InstallationCount.objects.create(
property=realm_stat.property,
property=messages_read_logging_stat.property,
end_time=end_time,
value=5,
)
@ -1920,12 +1945,14 @@ class AnalyticsBouncerTest(BouncerTestCase):
# set as it should.
RealmCount.objects.create(
realm=user.realm,
property=realm_stat.property,
property=messages_read_logging_stat.property,
end_time=end_time + timedelta(days=1),
value=6,
)
InstallationCount.objects.create(
property=realm_stat.property, end_time=end_time + timedelta(days=1), value=6
property=messages_read_logging_stat.property,
end_time=end_time + timedelta(days=1),
value=6,
)
RealmAuditLog.objects.create(
realm=user.realm,