push-bouncer: Exclude LoggingCountStats with partial data.

LoggingCountStats with a daily duration and that are directly stored
on the RealmCount table (not via aggregation in process_count_stat),
can be in a state, after the hourly cron job to update analytics
counts, where the logged value will be live-updated later, because
the end time for the stat is still in the future.

As these logging counts are designed to be used on the self-hosted
installation for either debugging or rate limiting, sending these
partial/incomplete counts to the bouncer has low value.
This commit is contained in:
Lauryn Menard 2024-02-26 21:14:52 +01:00 committed by Tim Abbott
parent cba4b8141e
commit cf82d3316b
3 changed files with 67 additions and 21 deletions

View File

@ -797,6 +797,7 @@ def get_count_stats(realm: Optional[Realm] = None) -> Dict[str, CountStat]:
# In RealmCount, 'active_users_audit:is_bot:day' should be the partial # In RealmCount, 'active_users_audit:is_bot:day' should be the partial
# sum sequence of 'active_users_log:is_bot:day', for any realm that # sum sequence of 'active_users_log:is_bot:day', for any realm that
# started after the latter stat was introduced. # started after the latter stat was introduced.
# Included in LOGGING_COUNT_STAT_PROPERTIES_NOT_SENT_TO_BOUNCER.
LoggingCountStat("active_users_log:is_bot:day", RealmCount, CountStat.DAY), LoggingCountStat("active_users_log:is_bot:day", RealmCount, CountStat.DAY),
# Another sanity check on 'active_users_audit:is_bot:day'. Is only an # Another sanity check on 'active_users_audit:is_bot:day'. Is only an
# approximation, e.g. if a user is deactivated between the end of the # approximation, e.g. if a user is deactivated between the end of the
@ -843,13 +844,15 @@ def get_count_stats(realm: Optional[Realm] = None) -> Dict[str, CountStat]:
"minutes_active::day", DataCollector(UserCount, do_pull_minutes_active), CountStat.DAY "minutes_active::day", DataCollector(UserCount, do_pull_minutes_active), CountStat.DAY
), ),
# Tracks the number of push notifications requested by the server. # Tracks the number of push notifications requested by the server.
# Included in LOGGING_COUNT_STAT_PROPERTIES_NOT_SENT_TO_BOUNCER.
LoggingCountStat( LoggingCountStat(
"mobile_pushes_sent::day", "mobile_pushes_sent::day",
RealmCount, RealmCount,
CountStat.DAY, CountStat.DAY,
), ),
# Rate limiting stats # Rate limiting stats
# Used to limit the number of invitation emails sent by a realm # Used to limit the number of invitation emails sent by a realm.
# Included in LOGGING_COUNT_STAT_PROPERTIES_NOT_SENT_TO_BOUNCER.
LoggingCountStat("invites_sent::day", RealmCount, CountStat.DAY), LoggingCountStat("invites_sent::day", RealmCount, CountStat.DAY),
# Dependent stats # Dependent stats
# Must come after their dependencies. # Must come after their dependencies.
@ -890,6 +893,19 @@ BOUNCER_ONLY_REMOTE_COUNT_STAT_PROPERTIES = [
"mobile_pushes_forwarded::day", "mobile_pushes_forwarded::day",
] ]
# LoggingCountStats with a daily duration and that are directly stored on
# the RealmCount table (instead of via aggregation in process_count_stat),
# can be in a state, after the hourly cron job to update analytics counts,
# where the logged value will be live-updated later (as the end time for
# the stat is still in the future). As these logging counts are designed
# to be used on the self-hosted installation for either debugging or rate
# limiting, sending these incomplete counts to the bouncer has low value.
LOGGING_COUNT_STAT_PROPERTIES_NOT_SENT_TO_BOUNCER = [
"invites_sent::day",
"mobile_pushes_sent::day",
"active_users_log:is_bot:day",
]
# To avoid refactoring for now COUNT_STATS can be used as before # To avoid refactoring for now COUNT_STATS can be used as before
COUNT_STATS = get_count_stats() COUNT_STATS = get_count_stats()

View File

@ -9,6 +9,7 @@ from django.db.models import QuerySet
from django.utils.translation import gettext as _ from django.utils.translation import gettext as _
from pydantic import UUID4, BaseModel, ConfigDict, Field, Json, field_validator from pydantic import UUID4, BaseModel, ConfigDict, Field, Json, field_validator
from analytics.lib.counts import LOGGING_COUNT_STAT_PROPERTIES_NOT_SENT_TO_BOUNCER
from analytics.models import InstallationCount, RealmCount from analytics.models import InstallationCount, RealmCount
from version import API_FEATURE_LEVEL, ZULIP_VERSION from version import API_FEATURE_LEVEL, ZULIP_VERSION
from zerver.actions.realm_settings import ( from zerver.actions.realm_settings import (
@ -364,8 +365,10 @@ def send_server_data_to_push_bouncer(consider_usage_statistics: bool = True) ->
# uploading such statistics enabled. # uploading such statistics enabled.
installation_count_query = InstallationCount.objects.filter( installation_count_query = InstallationCount.objects.filter(
id__gt=last_acked_installation_count_id id__gt=last_acked_installation_count_id
).exclude(property__in=LOGGING_COUNT_STAT_PROPERTIES_NOT_SENT_TO_BOUNCER)
realm_count_query = RealmCount.objects.filter(id__gt=last_acked_realm_count_id).exclude(
property__in=LOGGING_COUNT_STAT_PROPERTIES_NOT_SENT_TO_BOUNCER
) )
realm_count_query = RealmCount.objects.filter(id__gt=last_acked_realm_count_id)
else: else:
installation_count_query = InstallationCount.objects.none() installation_count_query = InstallationCount.objects.none()
realm_count_query = RealmCount.objects.none() realm_count_query = RealmCount.objects.none()

View File

@ -24,7 +24,7 @@ from requests.models import PreparedRequest
from typing_extensions import override from typing_extensions import override
from analytics.lib.counts import CountStat, LoggingCountStat from analytics.lib.counts import CountStat, LoggingCountStat
from analytics.models import InstallationCount, RealmCount from analytics.models import InstallationCount, RealmCount, UserCount
from corporate.models import CustomerPlan from corporate.models import CustomerPlan
from version import ZULIP_VERSION from version import ZULIP_VERSION
from zerver.actions.create_realm import do_create_realm from zerver.actions.create_realm import do_create_realm
@ -1481,17 +1481,35 @@ class AnalyticsBouncerTest(BouncerTestCase):
) )
# Create some rows we'll send to remote server # Create some rows we'll send to remote server
realm_stat = LoggingCountStat("invites_sent::day", RealmCount, CountStat.DAY) # LoggingCountStat that should be included;
RealmCount.objects.create( # i.e. not in LOGGING_COUNT_STAT_PROPERTIES_NOT_SENT_TO_BOUNCER
realm=user.realm, property=realm_stat.property, end_time=end_time, value=5 messages_read_logging_stat = LoggingCountStat(
"messages_read::hour", UserCount, CountStat.HOUR
) )
InstallationCount.objects.create( RealmCount.objects.create(
property=realm_stat.property, realm=user.realm,
property=messages_read_logging_stat.property,
end_time=end_time,
value=5,
)
InstallationCount.objects.create(
property=messages_read_logging_stat.property,
end_time=end_time,
value=5,
)
# LoggingCountStat that should not be included;
# i.e. in LOGGING_COUNT_STAT_PROPERTIES_NOT_SENT_TO_BOUNCER
invites_sent_logging_stat = LoggingCountStat("invites_sent::day", RealmCount, CountStat.DAY)
RealmCount.objects.create(
realm=user.realm,
property=invites_sent_logging_stat.property,
end_time=end_time,
value=5,
)
InstallationCount.objects.create(
property=invites_sent_logging_stat.property,
end_time=end_time, end_time=end_time,
value=5, value=5,
# We set a subgroup here to work around:
# https://github.com/zulip/zulip/issues/12362
subgroup="test_subgroup",
) )
# Event type in SYNCED_BILLING_EVENTS -- should be included # Event type in SYNCED_BILLING_EVENTS -- should be included
RealmAuditLog.objects.create( RealmAuditLog.objects.create(
@ -1513,8 +1531,8 @@ class AnalyticsBouncerTest(BouncerTestCase):
event_time=end_time, event_time=end_time,
extra_data=orjson.dumps({"foo": "bar"}).decode(), extra_data=orjson.dumps({"foo": "bar"}).decode(),
) )
self.assertEqual(RealmCount.objects.count(), 1) self.assertEqual(RealmCount.objects.count(), 2)
self.assertEqual(InstallationCount.objects.count(), 1) self.assertEqual(InstallationCount.objects.count(), 2)
self.assertEqual(RealmAuditLog.objects.filter(id__gt=audit_log_max_id).count(), 2) self.assertEqual(RealmAuditLog.objects.filter(id__gt=audit_log_max_id).count(), 2)
with self.settings(SUBMIT_USAGE_STATISTICS=False): with self.settings(SUBMIT_USAGE_STATISTICS=False):
@ -1689,13 +1707,13 @@ class AnalyticsBouncerTest(BouncerTestCase):
# Test only having new RealmCount rows # Test only having new RealmCount rows
RealmCount.objects.create( RealmCount.objects.create(
realm=user.realm, realm=user.realm,
property=realm_stat.property, property=messages_read_logging_stat.property,
end_time=end_time + timedelta(days=1), end_time=end_time + timedelta(days=1),
value=6, value=6,
) )
RealmCount.objects.create( RealmCount.objects.create(
realm=user.realm, realm=user.realm,
property=realm_stat.property, property=messages_read_logging_stat.property,
end_time=end_time + timedelta(days=2), end_time=end_time + timedelta(days=2),
value=9, value=9,
) )
@ -1704,7 +1722,9 @@ class AnalyticsBouncerTest(BouncerTestCase):
# Test only having new InstallationCount rows # Test only having new InstallationCount rows
InstallationCount.objects.create( InstallationCount.objects.create(
property=realm_stat.property, end_time=end_time + timedelta(days=1), value=6 property=messages_read_logging_stat.property,
end_time=end_time + timedelta(days=1),
value=6,
) )
send_server_data_to_push_bouncer() send_server_data_to_push_bouncer()
check_counts(8, 8, 3, 2, 7) check_counts(8, 8, 3, 2, 7)
@ -1823,12 +1843,17 @@ class AnalyticsBouncerTest(BouncerTestCase):
end_time = self.TIME_ZERO end_time = self.TIME_ZERO
# Create some rows we'll send to remote server # Create some rows we'll send to remote server
realm_stat = LoggingCountStat("invites_sent::day", RealmCount, CountStat.DAY) messages_read_logging_stat = LoggingCountStat(
"messages_read::hour", UserCount, CountStat.HOUR
)
realm_count = RealmCount.objects.create( realm_count = RealmCount.objects.create(
realm=user.realm, property=realm_stat.property, end_time=end_time, value=5 realm=user.realm,
property=messages_read_logging_stat.property,
end_time=end_time,
value=5,
) )
installation_count = InstallationCount.objects.create( installation_count = InstallationCount.objects.create(
property=realm_stat.property, property=messages_read_logging_stat.property,
end_time=end_time, end_time=end_time,
value=5, value=5,
) )
@ -1920,12 +1945,14 @@ class AnalyticsBouncerTest(BouncerTestCase):
# set as it should. # set as it should.
RealmCount.objects.create( RealmCount.objects.create(
realm=user.realm, realm=user.realm,
property=realm_stat.property, property=messages_read_logging_stat.property,
end_time=end_time + timedelta(days=1), end_time=end_time + timedelta(days=1),
value=6, value=6,
) )
InstallationCount.objects.create( InstallationCount.objects.create(
property=realm_stat.property, end_time=end_time + timedelta(days=1), value=6 property=messages_read_logging_stat.property,
end_time=end_time + timedelta(days=1),
value=6,
) )
RealmAuditLog.objects.create( RealmAuditLog.objects.create(
realm=user.realm, realm=user.realm,