From 6c17cca20852379c4d7f86e962054c46eb134311 Mon Sep 17 00:00:00 2001 From: Alex Vandiver Date: Fri, 31 May 2024 20:06:19 +0000 Subject: [PATCH] zilencer: Drop unwanted data that old servers might still send. --- analytics/lib/counts.py | 5 +++-- zerver/tests/test_push_notifications.py | 2 +- zilencer/views.py | 14 ++++++++++++++ 3 files changed, 18 insertions(+), 3 deletions(-) diff --git a/analytics/lib/counts.py b/analytics/lib/counts.py index a872446f9e..d6d80dfcb6 100644 --- a/analytics/lib/counts.py +++ b/analytics/lib/counts.py @@ -953,11 +953,12 @@ BOUNCER_ONLY_REMOTE_COUNT_STAT_PROPERTIES = [ # the stat is still in the future). As these logging counts are designed # to be used on the self-hosted installation for either debugging or rate # limiting, sending these incomplete counts to the bouncer has low value. -LOGGING_COUNT_STAT_PROPERTIES_NOT_SENT_TO_BOUNCER = [ +LOGGING_COUNT_STAT_PROPERTIES_NOT_SENT_TO_BOUNCER = { "invites_sent::day", "mobile_pushes_sent::day", "active_users_log:is_bot:day", -] + "active_users:is_bot:day", +} # To avoid refactoring for now COUNT_STATS can be used as before COUNT_STATS = get_count_stats() diff --git a/zerver/tests/test_push_notifications.py b/zerver/tests/test_push_notifications.py index bbd04b6fad..7a52d28e6d 100644 --- a/zerver/tests/test_push_notifications.py +++ b/zerver/tests/test_push_notifications.py @@ -2098,7 +2098,7 @@ class AnalyticsBouncerTest(BouncerTestCase): "POST", "server/analytics", { - "realm_counts": '[{"id":1,"property":"invites_sent::day","subgroup":null,"end_time":574300800.0,"value":5,"realm":2}]', + "realm_counts": '[{"id":1,"property":"messages_sent:is_bot:hour","subgroup":"false","end_time":574300800.0,"value":5,"realm":2}]', "installation_counts": "[]", "version": '"2.0.6+git"', }, diff --git a/zilencer/views.py b/zilencer/views.py index 3ebf445d0a..d0f2007ae4 100644 --- a/zilencer/views.py +++ b/zilencer/views.py @@ -26,6 +26,7 @@ from typing_extensions import Annotated from analytics.lib.counts import ( BOUNCER_ONLY_REMOTE_COUNT_STAT_PROPERTIES, COUNT_STATS, + LOGGING_COUNT_STAT_PROPERTIES_NOT_SENT_TO_BOUNCER, REMOTE_INSTALLATION_COUNT_STATS, do_increment_logging_stat, ) @@ -1178,6 +1179,17 @@ def remote_server_post_analytics( realm_id_to_remote_realm = build_realm_id_to_remote_realm_dict(server, realms) + # Note that due to skipping rows from the remote server which + # match LOGGING_COUNT_STAT_PROPERTIES_NOT_SENT_TO_BOUNCER, we may + # theoretically choose to omit the last RemoteRealmCount (or + # InstallationCount, below) row sent by the remote server, causing + # them to attempt to re-send that row repeatedlly. Since the last + # CountStat is not currently a skipped type, this is, in practice, + # unlikely to occur. + # + # TODO: Record the high-water RealmCount and InstallationCount's + # `remote_id` values on the RemoteServer, rather than computing + # them via get_last_id_from_server remote_realm_counts = [ RemoteRealmCount( remote_realm=realm_id_to_remote_realm.get(row.realm), @@ -1190,6 +1202,7 @@ def remote_server_post_analytics( value=row.value, ) for row in realm_counts + if row.property not in LOGGING_COUNT_STAT_PROPERTIES_NOT_SENT_TO_BOUNCER ] batch_create_table_data(server, RemoteRealmCount, remote_realm_counts) @@ -1203,6 +1216,7 @@ def remote_server_post_analytics( value=row.value, ) for row in installation_counts + if row.property not in LOGGING_COUNT_STAT_PROPERTIES_NOT_SENT_TO_BOUNCER ] batch_create_table_data(server, RemoteInstallationCount, remote_installation_counts)