zilencer: Drop unwanted data that old servers might still send.

This commit is contained in:
Alex Vandiver 2024-05-31 20:06:19 +00:00 committed by Tim Abbott
parent 09e9c75ec6
commit 6c17cca208
3 changed files with 18 additions and 3 deletions

View File

@ -953,11 +953,12 @@ BOUNCER_ONLY_REMOTE_COUNT_STAT_PROPERTIES = [
# the stat is still in the future). As these logging counts are designed # the stat is still in the future). As these logging counts are designed
# to be used on the self-hosted installation for either debugging or rate # to be used on the self-hosted installation for either debugging or rate
# limiting, sending these incomplete counts to the bouncer has low value. # limiting, sending these incomplete counts to the bouncer has low value.
LOGGING_COUNT_STAT_PROPERTIES_NOT_SENT_TO_BOUNCER = [ LOGGING_COUNT_STAT_PROPERTIES_NOT_SENT_TO_BOUNCER = {
"invites_sent::day", "invites_sent::day",
"mobile_pushes_sent::day", "mobile_pushes_sent::day",
"active_users_log:is_bot:day", "active_users_log:is_bot:day",
] "active_users:is_bot:day",
}
# To avoid refactoring for now COUNT_STATS can be used as before # To avoid refactoring for now COUNT_STATS can be used as before
COUNT_STATS = get_count_stats() COUNT_STATS = get_count_stats()

View File

@ -2098,7 +2098,7 @@ class AnalyticsBouncerTest(BouncerTestCase):
"POST", "POST",
"server/analytics", "server/analytics",
{ {
"realm_counts": '[{"id":1,"property":"invites_sent::day","subgroup":null,"end_time":574300800.0,"value":5,"realm":2}]', "realm_counts": '[{"id":1,"property":"messages_sent:is_bot:hour","subgroup":"false","end_time":574300800.0,"value":5,"realm":2}]',
"installation_counts": "[]", "installation_counts": "[]",
"version": '"2.0.6+git"', "version": '"2.0.6+git"',
}, },

View File

@ -26,6 +26,7 @@ from typing_extensions import Annotated
from analytics.lib.counts import ( from analytics.lib.counts import (
BOUNCER_ONLY_REMOTE_COUNT_STAT_PROPERTIES, BOUNCER_ONLY_REMOTE_COUNT_STAT_PROPERTIES,
COUNT_STATS, COUNT_STATS,
LOGGING_COUNT_STAT_PROPERTIES_NOT_SENT_TO_BOUNCER,
REMOTE_INSTALLATION_COUNT_STATS, REMOTE_INSTALLATION_COUNT_STATS,
do_increment_logging_stat, do_increment_logging_stat,
) )
@ -1178,6 +1179,17 @@ def remote_server_post_analytics(
realm_id_to_remote_realm = build_realm_id_to_remote_realm_dict(server, realms) realm_id_to_remote_realm = build_realm_id_to_remote_realm_dict(server, realms)
# Note that due to skipping rows from the remote server which
# match LOGGING_COUNT_STAT_PROPERTIES_NOT_SENT_TO_BOUNCER, we may
# theoretically choose to omit the last RemoteRealmCount (or
# InstallationCount, below) row sent by the remote server, causing
# them to attempt to re-send that row repeatedlly. Since the last
# CountStat is not currently a skipped type, this is, in practice,
# unlikely to occur.
#
# TODO: Record the high-water RealmCount and InstallationCount's
# `remote_id` values on the RemoteServer, rather than computing
# them via get_last_id_from_server
remote_realm_counts = [ remote_realm_counts = [
RemoteRealmCount( RemoteRealmCount(
remote_realm=realm_id_to_remote_realm.get(row.realm), remote_realm=realm_id_to_remote_realm.get(row.realm),
@ -1190,6 +1202,7 @@ def remote_server_post_analytics(
value=row.value, value=row.value,
) )
for row in realm_counts for row in realm_counts
if row.property not in LOGGING_COUNT_STAT_PROPERTIES_NOT_SENT_TO_BOUNCER
] ]
batch_create_table_data(server, RemoteRealmCount, remote_realm_counts) batch_create_table_data(server, RemoteRealmCount, remote_realm_counts)
@ -1203,6 +1216,7 @@ def remote_server_post_analytics(
value=row.value, value=row.value,
) )
for row in installation_counts for row in installation_counts
if row.property not in LOGGING_COUNT_STAT_PROPERTIES_NOT_SENT_TO_BOUNCER
] ]
batch_create_table_data(server, RemoteInstallationCount, remote_installation_counts) batch_create_table_data(server, RemoteInstallationCount, remote_installation_counts)