zilencer: Automatically migrate Remote* objects to link to RemoteRealm.

1. When we get data and it includes realm info, we should automatically
   link the new records with the appropriate RemoteRealm.
2. For old records, when we receive realm data, we have an opportunity
   to update those old record to link them to the right RemoteRealm.
   This logic doesn't need to always run, just after a remote server
   upgrade, since that's when this shift in remote server behavior will
   occur.
This commit is contained in:
Mateusz Mandera 2023-12-05 20:28:36 +01:00 committed by Tim Abbott
parent 937abb8ba2
commit 39b0628f09
2 changed files with 154 additions and 0 deletions

View File

@ -1394,6 +1394,112 @@ class AnalyticsBouncerTest(BouncerTestCase):
result, 'Invalid realms[0]["org_type"]: Value error, Not a valid org_type value'
)
@override_settings(PUSH_NOTIFICATION_BOUNCER_URL="https://push.zulip.org.example.com")
@responses.activate
def test_analytics_api_foreign_keys_to_remote_realm(self) -> None:
self.add_mock_response()
user = self.example_user("hamlet")
end_time = self.TIME_ZERO
# Create some rows we'll send to remote server
realm_stat = LoggingCountStat("invites_sent::day", RealmCount, CountStat.DAY)
realm_count = RealmCount.objects.create(
realm=user.realm, property=realm_stat.property, end_time=end_time, value=5
)
installation_count = InstallationCount.objects.create(
property=realm_stat.property,
end_time=end_time,
value=5,
)
realm_audit_log = RealmAuditLog.objects.create(
realm=user.realm,
modified_user=user,
event_type=RealmAuditLog.USER_CREATED,
event_time=end_time,
extra_data=orjson.dumps(
{
RealmAuditLog.ROLE_COUNT: realm_user_count_by_role(user.realm),
}
).decode(),
)
realm_count_data, installation_count_data, realmauditlog_data = build_analytics_data(
RealmCount.objects.all(), InstallationCount.objects.all(), RealmAuditLog.objects.all()
)
# Send the data to the bouncer without any realms data. This should lead
# to successful saving of the data, but with the remote_realm foreign key
# set to NULL.
result = self.uuid_post(
self.server_uuid,
"/api/v1/remotes/server/analytics",
{
"realm_counts": orjson.dumps(realm_count_data).decode(),
"installation_counts": orjson.dumps(installation_count_data).decode(),
"realmauditlog_rows": orjson.dumps(realmauditlog_data).decode(),
"realms": orjson.dumps([]).decode(),
},
subdomain="",
)
self.assert_json_success(result)
remote_realm_count = RemoteRealmCount.objects.latest("id")
remote_installation_count = RemoteInstallationCount.objects.latest("id")
remote_realm_audit_log = RemoteRealmAuditLog.objects.latest("id")
self.assertEqual(remote_realm_count.remote_id, realm_count.id)
self.assertEqual(remote_realm_count.remote_realm, None)
self.assertEqual(remote_installation_count.remote_id, installation_count.id)
# InstallationCont/RemoteInstallationCount don't have realm/remote_realm foreign
# keys, because they're aggregated over all realms.
self.assertEqual(remote_realm_audit_log.remote_id, realm_audit_log.id)
self.assertEqual(remote_realm_audit_log.remote_realm, None)
send_analytics_to_push_bouncer()
remote_realm_count.refresh_from_db()
remote_installation_count.refresh_from_db()
remote_realm_audit_log.refresh_from_db()
remote_realm = RemoteRealm.objects.get(uuid=user.realm.uuid)
self.assertEqual(remote_realm_count.remote_realm, remote_realm)
self.assertEqual(remote_realm_audit_log.remote_realm, remote_realm)
current_remote_realm_count_amount = RemoteRealmCount.objects.count()
current_remote_realm_audit_log_amount = RemoteRealmAuditLog.objects.count()
# Now create and send new data (including realm info) and verify it has .remote_realm
# set as it should.
RealmCount.objects.create(
realm=user.realm,
property=realm_stat.property,
end_time=end_time + timedelta(days=1),
value=6,
)
InstallationCount.objects.create(
property=realm_stat.property, end_time=end_time + timedelta(days=1), value=6
)
RealmAuditLog.objects.create(
realm=user.realm,
modified_user=user,
event_type=RealmAuditLog.USER_CREATED,
event_time=end_time,
extra_data={"data": "foo"},
)
send_analytics_to_push_bouncer()
# Make sure new data was created, so that we're actually testing what we think.
self.assertEqual(RemoteRealmCount.objects.count(), current_remote_realm_count_amount + 1)
self.assertEqual(
RemoteRealmAuditLog.objects.count(), current_remote_realm_audit_log_amount + 1
)
for remote_realm_count in RemoteRealmCount.objects.filter(realm_id=user.realm.id):
self.assertEqual(remote_realm_count.remote_realm, remote_realm)
for remote_realm_audit_log in RemoteRealmAuditLog.objects.filter(realm_id=user.realm.id):
self.assertEqual(remote_realm_audit_log.remote_realm, remote_realm)
@override_settings(PUSH_NOTIFICATION_BOUNCER_URL="https://push.zulip.org.example.com")
@responses.activate
def test_analytics_api_invalid(self) -> None:

View File

@ -693,11 +693,13 @@ def remote_server_post_analytics(
# duplicate submissions of the data
server = RemoteZulipServer.objects.select_for_update().get(id=server.id)
remote_server_version_updated = False
if version is not None:
version = version[0 : RemoteZulipServer.VERSION_MAX_LENGTH]
if version != server.last_version:
server.last_version = version
server.save(update_fields=["last_version"])
remote_server_version_updated = True
validate_incoming_table_data(
server, RemoteRealmCount, [dict(count) for count in realm_counts], True
@ -713,9 +715,14 @@ def remote_server_post_analytics(
if realms is not None:
update_remote_realm_data_for_server(server, realms)
if remote_server_version_updated:
fix_remote_realm_foreign_keys(server, realms)
realm_id_to_remote_realm = build_realm_id_to_remote_realm_dict(server, realms)
remote_realm_counts = [
RemoteRealmCount(
remote_realm=realm_id_to_remote_realm.get(row.realm),
property=row.property,
realm_id=row.realm,
remote_id=row.id,
@ -755,6 +762,7 @@ def remote_server_post_analytics(
extra_data = row.extra_data
remote_realm_audit_logs.append(
RemoteRealmAuditLog(
remote_realm=realm_id_to_remote_realm.get(row.realm),
realm_id=row.realm,
remote_id=row.id,
server=server,
@ -798,6 +806,46 @@ def remote_server_post_analytics(
return json_success(request, data={"realms": remote_realm_dict})
def build_realm_id_to_remote_realm_dict(
server: RemoteZulipServer, realms: Optional[List[RealmDataForAnalytics]]
) -> Dict[int, Optional[RemoteRealm]]:
if realms is None:
return {}
realm_uuids = [realm.uuid for realm in realms]
remote_realms = RemoteRealm.objects.filter(uuid__in=realm_uuids, server=server)
uuid_to_remote_realm_dict = {
str(remote_realm.uuid): remote_realm for remote_realm in remote_realms
}
return {realm.id: uuid_to_remote_realm_dict[str(realm.uuid)] for realm in realms}
def fix_remote_realm_foreign_keys(
server: RemoteZulipServer, realms: List[RealmDataForAnalytics]
) -> None:
"""
Finds the RemoteRealmCount and RemoteRealmAuditLog entries without .remote_realm
set and sets it based on the "realms" data received from the remote server,
if possible.
"""
if (
not RemoteRealmCount.objects.filter(server=server, remote_realm=None).exists()
and not RemoteRealmAuditLog.objects.filter(server=server, remote_realm=None).exists()
):
return
realm_id_to_remote_realm = build_realm_id_to_remote_realm_dict(server, realms)
for realm_id in realm_id_to_remote_realm:
RemoteRealmCount.objects.filter(server=server, remote_realm=None, realm_id=realm_id).update(
remote_realm=realm_id_to_remote_realm[realm_id]
)
RemoteRealmAuditLog.objects.filter(
server=server, remote_realm=None, realm_id=realm_id
).update(remote_realm=realm_id_to_remote_realm[realm_id])
def get_last_id_from_server(server: RemoteZulipServer, model: Any) -> int:
last_count = (
model.objects.filter(server=server)