From 2ecd7abc0d7768e8863038168c5398a37edc563d Mon Sep 17 00:00:00 2001 From: Mateusz Mandera Date: Mon, 23 Oct 2023 22:29:42 +0200 Subject: [PATCH] zilencer: Make BaseRemoteCount.remote_id field nullable. --- analytics/tests/test_counts.py | 25 +++++++++++++++++++ ...oteinstallationcount_remote_id_and_more.py | 22 ++++++++++++++++ zilencer/models.py | 2 +- zilencer/views.py | 14 ++++++++++- 4 files changed, 61 insertions(+), 2 deletions(-) create mode 100644 zilencer/migrations/0031_alter_remoteinstallationcount_remote_id_and_more.py diff --git a/analytics/tests/test_counts.py b/analytics/tests/test_counts.py index 34959b31c7..ae5e6219cc 100644 --- a/analytics/tests/test_counts.py +++ b/analytics/tests/test_counts.py @@ -74,6 +74,8 @@ from zerver.models import ( get_user, is_cross_realm_bot_email, ) +from zilencer.models import RemoteInstallationCount, RemoteZulipServer +from zilencer.views import get_last_id_from_server class AnalyticsTestCase(ZulipTestCase): @@ -1852,3 +1854,26 @@ class TestRealmActiveHumans(AnalyticsTestCase): 1, ) self.assertEqual(RealmCount.objects.filter(property="realm_active_humans::day").count(), 1) + + +class GetLastIdFromServerTest(ZulipTestCase): + def test_get_last_id_from_server_ignores_null(self) -> None: + """ + Verifies that get_last_id_from_server ignores null remote_ids, since this goes + against the default Postgres ordering behavior, which treats nulls as the largest value. + """ + self.server_uuid = "6cde5f7a-1f7e-4978-9716-49f69ebfc9fe" + self.server = RemoteZulipServer.objects.create( + uuid=self.server_uuid, + api_key="magic_secret_api_key", + hostname="demo.example.com", + last_updated=timezone_now(), + ) + first = RemoteInstallationCount.objects.create( + end_time=timezone_now(), server=self.server, property="test", value=1, remote_id=1 + ) + RemoteInstallationCount.objects.create( + end_time=timezone_now(), server=self.server, property="test2", value=1, remote_id=None + ) + result = get_last_id_from_server(self.server, RemoteInstallationCount) + self.assertEqual(result, first.remote_id) diff --git a/zilencer/migrations/0031_alter_remoteinstallationcount_remote_id_and_more.py b/zilencer/migrations/0031_alter_remoteinstallationcount_remote_id_and_more.py new file mode 100644 index 0000000000..9ebe6965b5 --- /dev/null +++ b/zilencer/migrations/0031_alter_remoteinstallationcount_remote_id_and_more.py @@ -0,0 +1,22 @@ +# Generated by Django 4.2.6 on 2023-10-23 20:22 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + dependencies = [ + ("zilencer", "0030_alter_remoteinstallationcount_remote_id"), + ] + + operations = [ + migrations.AlterField( + model_name="remoteinstallationcount", + name="remote_id", + field=models.IntegerField(null=True), + ), + migrations.AlterField( + model_name="remoterealmcount", + name="remote_id", + field=models.IntegerField(null=True), + ), + ] diff --git a/zilencer/models.py b/zilencer/models.py index 9794674e0b..0e497df05c 100644 --- a/zilencer/models.py +++ b/zilencer/models.py @@ -135,7 +135,7 @@ class BaseRemoteCount(BaseCount): # The remote_id field is the id value of the corresponding *Count object # on the remote server. # It lets us deduplicate data from the remote server. - remote_id = models.IntegerField() + remote_id = models.IntegerField(null=True) class Meta: abstract = True diff --git a/zilencer/views.py b/zilencer/views.py index 4c4d644950..03842fc0e8 100644 --- a/zilencer/views.py +++ b/zilencer/views.py @@ -444,6 +444,10 @@ def validate_incoming_table_data( for row in rows: if is_count_stat and row["property"] not in COUNT_STATS: raise JsonableError(_("Invalid property {property}").format(property=row["property"])) + if row.get("id") is None: + # This shouldn't be possible, as submitting data like this should be + # prevented by our param validators. + raise AssertionError(f"Missing id field in row {row}") if row["id"] <= last_id: raise JsonableError(_("Data is out of order.")) last_id = row["id"] @@ -592,7 +596,15 @@ def remote_server_post_analytics( def get_last_id_from_server(server: RemoteZulipServer, model: Any) -> int: - last_count = model.objects.filter(server=server).order_by("remote_id").only("remote_id").last() + last_count = ( + model.objects.filter(server=server) + # Rows with remote_id=None are managed by the bouncer service itself, + # and thus aren't meant for syncing and should be ignored here. + .exclude(remote_id=None) + .order_by("remote_id") + .only("remote_id") + .last() + ) if last_count is not None: return last_count.remote_id return 0