From cf82d3316b2778108de4f65a3d8fd2ee0b72be58 Mon Sep 17 00:00:00 2001 From: Lauryn Menard Date: Mon, 26 Feb 2024 21:14:52 +0100 Subject: [PATCH] push-bouncer: Exclude LoggingCountStats with partial data. LoggingCountStats with a daily duration and that are directly stored on the RealmCount table (not via aggregation in process_count_stat), can be in a state, after the hourly cron job to update analytics counts, where the logged value will be live-updated later, because the end time for the stat is still in the future. As these logging counts are designed to be used on the self-hosted installation for either debugging or rate limiting, sending these partial/incomplete counts to the bouncer has low value. --- analytics/lib/counts.py | 18 ++++++- zerver/lib/remote_server.py | 5 +- zerver/tests/test_push_notifications.py | 65 +++++++++++++++++-------- 3 files changed, 67 insertions(+), 21 deletions(-) diff --git a/analytics/lib/counts.py b/analytics/lib/counts.py index 8b62017253..a2eba2937c 100644 --- a/analytics/lib/counts.py +++ b/analytics/lib/counts.py @@ -797,6 +797,7 @@ def get_count_stats(realm: Optional[Realm] = None) -> Dict[str, CountStat]: # In RealmCount, 'active_users_audit:is_bot:day' should be the partial # sum sequence of 'active_users_log:is_bot:day', for any realm that # started after the latter stat was introduced. + # Included in LOGGING_COUNT_STAT_PROPERTIES_NOT_SENT_TO_BOUNCER. LoggingCountStat("active_users_log:is_bot:day", RealmCount, CountStat.DAY), # Another sanity check on 'active_users_audit:is_bot:day'. Is only an # approximation, e.g. if a user is deactivated between the end of the @@ -843,13 +844,15 @@ def get_count_stats(realm: Optional[Realm] = None) -> Dict[str, CountStat]: "minutes_active::day", DataCollector(UserCount, do_pull_minutes_active), CountStat.DAY ), # Tracks the number of push notifications requested by the server. + # Included in LOGGING_COUNT_STAT_PROPERTIES_NOT_SENT_TO_BOUNCER. LoggingCountStat( "mobile_pushes_sent::day", RealmCount, CountStat.DAY, ), # Rate limiting stats - # Used to limit the number of invitation emails sent by a realm + # Used to limit the number of invitation emails sent by a realm. + # Included in LOGGING_COUNT_STAT_PROPERTIES_NOT_SENT_TO_BOUNCER. LoggingCountStat("invites_sent::day", RealmCount, CountStat.DAY), # Dependent stats # Must come after their dependencies. @@ -890,6 +893,19 @@ BOUNCER_ONLY_REMOTE_COUNT_STAT_PROPERTIES = [ "mobile_pushes_forwarded::day", ] +# LoggingCountStats with a daily duration and that are directly stored on +# the RealmCount table (instead of via aggregation in process_count_stat), +# can be in a state, after the hourly cron job to update analytics counts, +# where the logged value will be live-updated later (as the end time for +# the stat is still in the future). As these logging counts are designed +# to be used on the self-hosted installation for either debugging or rate +# limiting, sending these incomplete counts to the bouncer has low value. +LOGGING_COUNT_STAT_PROPERTIES_NOT_SENT_TO_BOUNCER = [ + "invites_sent::day", + "mobile_pushes_sent::day", + "active_users_log:is_bot:day", +] + # To avoid refactoring for now COUNT_STATS can be used as before COUNT_STATS = get_count_stats() diff --git a/zerver/lib/remote_server.py b/zerver/lib/remote_server.py index e7d57f6125..27cb0d7f02 100644 --- a/zerver/lib/remote_server.py +++ b/zerver/lib/remote_server.py @@ -9,6 +9,7 @@ from django.db.models import QuerySet from django.utils.translation import gettext as _ from pydantic import UUID4, BaseModel, ConfigDict, Field, Json, field_validator +from analytics.lib.counts import LOGGING_COUNT_STAT_PROPERTIES_NOT_SENT_TO_BOUNCER from analytics.models import InstallationCount, RealmCount from version import API_FEATURE_LEVEL, ZULIP_VERSION from zerver.actions.realm_settings import ( @@ -364,8 +365,10 @@ def send_server_data_to_push_bouncer(consider_usage_statistics: bool = True) -> # uploading such statistics enabled. installation_count_query = InstallationCount.objects.filter( id__gt=last_acked_installation_count_id + ).exclude(property__in=LOGGING_COUNT_STAT_PROPERTIES_NOT_SENT_TO_BOUNCER) + realm_count_query = RealmCount.objects.filter(id__gt=last_acked_realm_count_id).exclude( + property__in=LOGGING_COUNT_STAT_PROPERTIES_NOT_SENT_TO_BOUNCER ) - realm_count_query = RealmCount.objects.filter(id__gt=last_acked_realm_count_id) else: installation_count_query = InstallationCount.objects.none() realm_count_query = RealmCount.objects.none() diff --git a/zerver/tests/test_push_notifications.py b/zerver/tests/test_push_notifications.py index fcde17f65b..96a50ecf5b 100644 --- a/zerver/tests/test_push_notifications.py +++ b/zerver/tests/test_push_notifications.py @@ -24,7 +24,7 @@ from requests.models import PreparedRequest from typing_extensions import override from analytics.lib.counts import CountStat, LoggingCountStat -from analytics.models import InstallationCount, RealmCount +from analytics.models import InstallationCount, RealmCount, UserCount from corporate.models import CustomerPlan from version import ZULIP_VERSION from zerver.actions.create_realm import do_create_realm @@ -1481,17 +1481,35 @@ class AnalyticsBouncerTest(BouncerTestCase): ) # Create some rows we'll send to remote server - realm_stat = LoggingCountStat("invites_sent::day", RealmCount, CountStat.DAY) - RealmCount.objects.create( - realm=user.realm, property=realm_stat.property, end_time=end_time, value=5 + # LoggingCountStat that should be included; + # i.e. not in LOGGING_COUNT_STAT_PROPERTIES_NOT_SENT_TO_BOUNCER + messages_read_logging_stat = LoggingCountStat( + "messages_read::hour", UserCount, CountStat.HOUR ) - InstallationCount.objects.create( - property=realm_stat.property, + RealmCount.objects.create( + realm=user.realm, + property=messages_read_logging_stat.property, + end_time=end_time, + value=5, + ) + InstallationCount.objects.create( + property=messages_read_logging_stat.property, + end_time=end_time, + value=5, + ) + # LoggingCountStat that should not be included; + # i.e. in LOGGING_COUNT_STAT_PROPERTIES_NOT_SENT_TO_BOUNCER + invites_sent_logging_stat = LoggingCountStat("invites_sent::day", RealmCount, CountStat.DAY) + RealmCount.objects.create( + realm=user.realm, + property=invites_sent_logging_stat.property, + end_time=end_time, + value=5, + ) + InstallationCount.objects.create( + property=invites_sent_logging_stat.property, end_time=end_time, value=5, - # We set a subgroup here to work around: - # https://github.com/zulip/zulip/issues/12362 - subgroup="test_subgroup", ) # Event type in SYNCED_BILLING_EVENTS -- should be included RealmAuditLog.objects.create( @@ -1513,8 +1531,8 @@ class AnalyticsBouncerTest(BouncerTestCase): event_time=end_time, extra_data=orjson.dumps({"foo": "bar"}).decode(), ) - self.assertEqual(RealmCount.objects.count(), 1) - self.assertEqual(InstallationCount.objects.count(), 1) + self.assertEqual(RealmCount.objects.count(), 2) + self.assertEqual(InstallationCount.objects.count(), 2) self.assertEqual(RealmAuditLog.objects.filter(id__gt=audit_log_max_id).count(), 2) with self.settings(SUBMIT_USAGE_STATISTICS=False): @@ -1689,13 +1707,13 @@ class AnalyticsBouncerTest(BouncerTestCase): # Test only having new RealmCount rows RealmCount.objects.create( realm=user.realm, - property=realm_stat.property, + property=messages_read_logging_stat.property, end_time=end_time + timedelta(days=1), value=6, ) RealmCount.objects.create( realm=user.realm, - property=realm_stat.property, + property=messages_read_logging_stat.property, end_time=end_time + timedelta(days=2), value=9, ) @@ -1704,7 +1722,9 @@ class AnalyticsBouncerTest(BouncerTestCase): # Test only having new InstallationCount rows InstallationCount.objects.create( - property=realm_stat.property, end_time=end_time + timedelta(days=1), value=6 + property=messages_read_logging_stat.property, + end_time=end_time + timedelta(days=1), + value=6, ) send_server_data_to_push_bouncer() check_counts(8, 8, 3, 2, 7) @@ -1823,12 +1843,17 @@ class AnalyticsBouncerTest(BouncerTestCase): end_time = self.TIME_ZERO # Create some rows we'll send to remote server - realm_stat = LoggingCountStat("invites_sent::day", RealmCount, CountStat.DAY) + messages_read_logging_stat = LoggingCountStat( + "messages_read::hour", UserCount, CountStat.HOUR + ) realm_count = RealmCount.objects.create( - realm=user.realm, property=realm_stat.property, end_time=end_time, value=5 + realm=user.realm, + property=messages_read_logging_stat.property, + end_time=end_time, + value=5, ) installation_count = InstallationCount.objects.create( - property=realm_stat.property, + property=messages_read_logging_stat.property, end_time=end_time, value=5, ) @@ -1920,12 +1945,14 @@ class AnalyticsBouncerTest(BouncerTestCase): # set as it should. RealmCount.objects.create( realm=user.realm, - property=realm_stat.property, + property=messages_read_logging_stat.property, end_time=end_time + timedelta(days=1), value=6, ) InstallationCount.objects.create( - property=realm_stat.property, end_time=end_time + timedelta(days=1), value=6 + property=messages_read_logging_stat.property, + end_time=end_time + timedelta(days=1), + value=6, ) RealmAuditLog.objects.create( realm=user.realm,