import sys from contextlib import ExitStack, redirect_stdout from typing import TextIO from django.conf import settings from django.db import migrations from django.db.backends.base.schema import BaseDatabaseSchemaEditor from django.db.migrations.state import StateApps BUILD_BAD_MOVES_TABLE = """ CREATE TEMPORARY TABLE bad_moves_cve_2024_27286 AS ( WITH messages_with_dangling_usermessages AS ( SELECT zerver_message.id AS message_id, ARRAY_AGG(DISTINCT zerver_usermessage.id) AS extra_usermessage_ids, edit_history::jsonb FROM zerver_message JOIN zerver_stream ON zerver_stream.recipient_id = zerver_message.recipient_id JOIN zerver_usermessage ON zerver_usermessage.message_id = zerver_message.id LEFT JOIN zerver_subscription ON zerver_subscription.recipient_id = zerver_stream.recipient_id AND zerver_subscription.user_profile_id = zerver_usermessage.user_profile_id WHERE zerver_stream.invite_only AND zerver_subscription.id IS NULL AND zerver_message.edit_history IS NOT NULL GROUP BY zerver_message.id ) SELECT message_id, extra_usermessage_ids, (history_entry->>'timestamp') AS timestamp_moved, (history_entry->>'prev_stream')::numeric AS moved_from_stream_id, (history_entry->>'stream')::numeric AS moved_to_stream_id FROM messages_with_dangling_usermessages CROSS JOIN JSONB_ARRAY_ELEMENTS(edit_history) AS history_entry WHERE history_entry->>'prev_stream' IS NOT NULL ORDER BY 1 ASC ) """ # The SQL query above builds a `bad_moves_cve_2024_27286` temporary table, which # finds all moved messages where there are UserMessage rows but no # Subscription rows. However, the difficulty is that this has a # false-negative: between 2bc3924672fb and e566e985e4d2, # multi-message moves only recorded their move on one of the # messages. There may thus be messages with dangling UserMessage # rows which are in the same topic as ones we found already, but # do not record as having moved, so were not found by that filter. # # We determine when zerver/0310_jsoonfield, the migration next merged # after e566e985e4d2 was merged, was run, and examine all messages # moved earlier than that migration. We do not limit the early side # of moves, since it is already naturally bounded by when message # moves were introduced, and it is plausible that servers were running # message-move the code before it was merged. # # For each potential single-message move in this range, we examine all # other messages in the topic which were sent before the move, and # check them for dangling UserMessage rows from users who are not # subscribed. We then compare those newly-found messages against the # known bad messages to guess which move was responsible for them. BROADEN_MOVES = """ INSERT INTO bad_moves_cve_2024_27286 ( WITH other_messages AS ( SELECT messages_in_topic.id AS message_id, messages_in_topic.recipient_id, UPPER(messages_in_topic.subject) AS upper_topic, messages_in_topic.date_sent FROM bad_moves_cve_2024_27286 JOIN zerver_message bad_message ON bad_moves_cve_2024_27286.message_id = bad_message.id JOIN zerver_message messages_in_topic ON bad_message.recipient_id = messages_in_topic.recipient_id AND UPPER(bad_message.subject) = UPPER(messages_in_topic.subject) WHERE TO_TIMESTAMP(timestamp_moved::numeric) < ( SELECT applied FROM django_migrations WHERE app = 'zerver' AND name = '0310_jsonfield' ) AND messages_in_topic.date_sent < TO_TIMESTAMP(timestamp_moved::numeric) AND messages_in_topic.id NOT IN (SELECT already.message_id FROM bad_moves_cve_2024_27286 already) GROUP BY 1 ), other_bad_messages AS ( SELECT other_messages.message_id, other_messages.recipient_id, other_messages.upper_topic, other_messages.date_sent, ARRAY_AGG(DISTINCT zerver_usermessage.id) as extra_usermessage_ids FROM other_messages JOIN zerver_usermessage ON zerver_usermessage.message_id = other_messages.message_id LEFT JOIN zerver_subscription ON zerver_subscription.recipient_id = other_messages.recipient_id AND zerver_subscription.user_profile_id = zerver_usermessage.user_profile_id WHERE zerver_subscription.id IS NULL GROUP BY 1, 2, 3, 4 ) SELECT other_bad_messages.message_id, other_bad_messages.extra_usermessage_ids, move_trigger.timestamp_moved, move_trigger.moved_from_stream_id, move_trigger.moved_to_stream_id FROM other_bad_messages LEFT JOIN LATERAL ( SELECT bad_moves_cve_2024_27286.* FROM bad_moves_cve_2024_27286 JOIN zerver_message ON zerver_message.id = bad_moves_cve_2024_27286.message_id JOIN zerver_stream ON zerver_stream.recipient_id = zerver_message.recipient_id AND bad_moves_cve_2024_27286.moved_to_stream_id = zerver_stream.id WHERE other_bad_messages.recipient_id = zerver_message.recipient_id AND other_bad_messages.upper_topic = UPPER(zerver_message.subject) AND TO_TIMESTAMP(bad_moves_cve_2024_27286.timestamp_moved::numeric) > other_bad_messages.date_sent ORDER BY bad_moves_cve_2024_27286.message_id ASC, bad_moves_cve_2024_27286.timestamp_moved ASC LIMIT 1 ) move_trigger ON true ) """ def log_extra_usermessage_rows(apps: StateApps, schema_editor: BaseDatabaseSchemaEditor) -> None: Message = apps.get_model("zerver", "message") UserMessage = apps.get_model("zerver", "usermessage") Stream = apps.get_model("zerver", "stream") messages = Message.objects.raw( "SELECT * FROM zerver_message JOIN bad_moves_cve_2024_27286 ON message_id = zerver_message.id" ) if len(messages) == 0: # RawQuerySet does not have .exists() or .count() return with ExitStack() as stack: if settings.PRODUCTION: log_file: TextIO = stack.enter_context( open("/var/log/zulip/migrations_0501_delete_dangling_usermessages.log", "w") ) else: log_file = sys.stderr print("", file=log_file) stack.enter_context(redirect_stdout(log_file)) for message in messages: realm = message.realm # Reimplement realm.url if realm.string_id == "": hostname = settings.EXTERNAL_HOST else: hostname = settings.REALM_HOSTS.get( realm.string_id, f"{realm.string_id}.{settings.EXTERNAL_HOST}" ) stream = Stream.objects.only("id").get(recipient_id=message.recipient_id) print( f"{settings.EXTERNAL_URI_SCHEME}{hostname}/#narrow/stream/{stream.id}/near/{message.id}", ) print( f" Moved at {message.timestamp_moved} from stream id {message.moved_from_stream_id} to {message.moved_to_stream_id}" ) # Find out how many of those are users, and not bots ums = ( UserMessage.objects.filter( id__in=message.extra_usermessage_ids, user_profile__is_bot=False ) .select_related("user_profile") .only("flags", "user_profile__delivery_email") ) print( f" Was still readable by {len(ums)} users, {len(message.extra_usermessage_ids) - len(ums)} bots", ) if len(message.extra_usermessage_ids) > 25: continue for um in ums: read = "(read)" if um.flags & 1 else "(unread)" print(f" {um.user_profile.delivery_email} {read}") print("") class Migration(migrations.Migration): atomic = False dependencies = [ ("zerver", "0496_alter_scheduledmessage_read_by_sender"), ] operations = [ migrations.RunSQL(BUILD_BAD_MOVES_TABLE, elidable=True), migrations.RunSQL(BROADEN_MOVES, elidable=True), migrations.RunPython(log_extra_usermessage_rows, reverse_code=migrations.RunPython.noop), migrations.RunSQL( """ DELETE FROM zerver_usermessage WHERE id IN (SELECT UNNEST(extra_usermessage_ids) FROM bad_moves_cve_2024_27286) """, elidable=True, ), migrations.RunSQL("DROP TABLE bad_moves_cve_2024_27286", elidable=True), ]