zulip/zerver/migrations/0450_backfill_subscription_...

152 lines
6.3 KiB
Python

# Generated by Django 4.2 on 2023-04-19 18:18
from django.db import migrations, transaction
from django.db.backends.base.schema import BaseDatabaseSchemaEditor
from django.db.migrations.state import StateApps
from django.db.models import Max, Min
from django.utils.timezone import now as timezone_now
def backfill_missing_subscriptions(
apps: StateApps, schema_editor: BaseDatabaseSchemaEditor
) -> None:
"""Backfill subscription realm audit log events for users which are
currently subscribed but don't have any, presumably due to some
historical bug. This is important because those rows are
necessary when reactivating a user who is currently
soft-deactivated.
For each stream, we find the subscribed users who have no relevant
realm audit log entries, and create a backfill=True subscription
audit log entry which is the latest it could have been, based on
UserMessage rows.
"""
Stream = apps.get_model("zerver", "Stream")
RealmAuditLog = apps.get_model("zerver", "RealmAuditLog")
Subscription = apps.get_model("zerver", "Subscription")
UserMessage = apps.get_model("zerver", "UserMessage")
Message = apps.get_model("zerver", "Message")
def get_last_message_id() -> int:
# We generally use this function to populate RealmAuditLog, and
# the max id here is actually system-wide, not per-realm. I
# assume there's some advantage in not filtering by realm.
last_id = Message.objects.aggregate(Max("id"))["id__max"]
if last_id is None:
# During initial realm creation, there might be 0 messages in
# the database; in that case, the `aggregate` query returns
# None. Since we want an int for "beginning of time", use -1.
last_id = -1
return last_id
for stream in Stream.objects.all():
with transaction.atomic():
subscribed_user_ids = set(
Subscription.objects.filter(recipient_id=stream.recipient_id).values_list(
"user_profile_id", flat=True
)
)
user_ids_in_audit_log = set(
RealmAuditLog.objects.filter(
realm=stream.realm,
event_type__in=[
301, # RealmAuditLog.SUBSCRIPTION_CREATED
302, # RealmAuditLog.SUBSCRIPTION_ACTIVATED
303, # RealmAuditLog.SUBSCRIPTION_DEACTIVATED
],
modified_stream=stream,
)
.distinct("modified_user_id")
.values_list("modified_user_id", flat=True)
)
user_ids_missing_events = subscribed_user_ids - user_ids_in_audit_log
if not user_ids_missing_events:
continue
last_message_id = get_last_message_id()
now = timezone_now()
backfills = []
for user_id in sorted(user_ids_missing_events):
print(
f"Backfilling subscription event for {user_id} in stream {stream.id} in realm {stream.realm.string_id}"
)
aggregated = UserMessage.objects.filter(
user_profile_id=user_id,
message__recipient=stream.recipient_id,
).aggregate(
earliest_date=Min("message__date_sent"),
earliest_message_id=Min("message_id"),
latest_date=Max("message__date_sent"),
latest_message_id=Max("message_id"),
)
# Assume we subscribed right before the first message we
# saw -- or, if we don't see any, right now. This makes
# this safe for streams which do not have shared history.
if aggregated["earliest_message_id"] is not None:
event_last_message_id = aggregated["earliest_message_id"] - 1
else:
event_last_message_id = last_message_id
if aggregated["earliest_date"] is not None:
event_time = aggregated["earliest_date"]
else:
event_time = now
log_event = RealmAuditLog(
event_time=event_time,
event_last_message_id=event_last_message_id,
backfilled=True,
event_type=301, # RealmAuditLog.SUBSCRIPTION_CREATED
realm_id=stream.realm_id,
modified_user_id=user_id,
modified_stream_id=stream.id,
)
backfills.append(log_event)
# If the subscription is not active, then we also need
# to manufacture a SUBSCRIPTION_DEACTIVATED event,
# which we assume to be whenever the last received
# UserMessage row was.
sub = Subscription.objects.get(
user_profile_id=user_id, recipient_id=stream.recipient_id
)
if sub.active:
continue
if aggregated["latest_message_id"] is not None:
event_last_message_id = aggregated["latest_message_id"]
else:
event_last_message_id = last_message_id
if aggregated["latest_date"] is not None:
event_time = aggregated["latest_date"]
else:
event_time = now
deactivated_log_event = RealmAuditLog(
event_time=event_time,
event_last_message_id=event_last_message_id,
backfilled=True,
event_type=303, # RealmAuditLog.SUBSCRIPTION_DEACTIVATED
realm_id=stream.realm_id,
modified_user_id=user_id,
modified_stream_id=stream.id,
)
backfills.append(deactivated_log_event)
RealmAuditLog.objects.bulk_create(backfills)
class Migration(migrations.Migration):
atomic = False
dependencies = [
("zerver", "0449_scheduledmessage_zerver_unsent_scheduled_messages_indexes"),
]
operations = [
migrations.RunPython(
backfill_missing_subscriptions, reverse_code=migrations.RunPython.noop, elidable=True
)
]