zulip/zerver/migrations/0423_fix_email_gateway_atta...

177 lines
7.4 KiB
Python

from datetime import timedelta
from django.conf import settings
from django.db import connection, migrations
from django.db.backends.base.schema import BaseDatabaseSchemaEditor
from django.db.migrations.state import StateApps
from psycopg2.sql import SQL, Identifier, Literal
def fix_email_gateway_attachment_owner(
apps: StateApps, schema_editor: BaseDatabaseSchemaEditor
) -> None:
Realm = apps.get_model("zerver", "Realm")
UserProfile = apps.get_model("zerver", "UserProfile")
Client = apps.get_model("zerver", "Client")
Message = apps.get_model("zerver", "Message")
ArchivedMessage = apps.get_model("zerver", "ArchivedMessage")
Stream = apps.get_model("zerver", "Stream")
Attachment = apps.get_model("zerver", "Attachment")
ArchivedAttachment = apps.get_model("zerver", "ArchivedAttachment")
if not Realm.objects.exists():
return
mail_gateway_bot = UserProfile.objects.get(email__iexact=settings.EMAIL_GATEWAY_BOT)
# "Internal" is the client-id of all mail gateway posts
internal_client, _ = Client.objects.get_or_create(name="Internal")
# We only look in Attachment and not ArchivedAttachment because,
# never having been associated with a message, there is no way for
# the attachments to have been archived.
orphan_attachments = Attachment.objects.filter(
messages=None,
owner_id=mail_gateway_bot.id,
)
if len(orphan_attachments) == 0:
return
print("")
print(f"Found {len(orphan_attachments)} email gateway attachments to reattach")
for attachment in orphan_attachments:
# We look for the message posted by "Internal" at the same
# time, in the same realm, which has a link to the attachment
# but no "has_attachments". There are potentially other,
# later, messages (possibly from other users, to other
# places!) which tried to link to the attachment; we do not
# fix those references, because finding them efficiently is
# quite hard, as is calculating if they "should" have had
# access to the attachment at the time.
print(
f"Looking for a message to attach {attachment.path_id}, created {attachment.create_time}"
)
possible_matches = []
for model_class in (Message, ArchivedMessage):
possible_matches.extend(
# All messages with this bug will have
# `has_attachment=False`, since they failed to attach
# the contents. However, we cannot limit to
# sender=mail_gateway_bot because they were sent "as"
# some other user.
model_class.objects.filter(
has_attachment=False,
realm_id=attachment.realm_id,
sending_client_id=internal_client.id,
date_sent__gte=attachment.create_time,
date_sent__lte=attachment.create_time + timedelta(minutes=5),
content__contains="/user_uploads/" + attachment.path_id,
).order_by("date_sent")
)
if len(possible_matches) == 0:
print(" No matches!")
continue
# If there are 1 or more matches, we assume the earliest is
# the correct one, since it's ~impossible to have predicted
# the URL before it was first sent.
message = possible_matches[0]
print(f" Found {message.id} @ {message.date_sent} by {message.sender.delivery_email})")
# If this is an ArchivedMessage, then we have to move the
# Attachment into an ArchivedAttachment. We also have to
# generate an zerver_archivedattachment_message row with an id
# based on the next free from zerver_attachment_message, since
# those are one id space.
if isinstance(message, ArchivedMessage):
# move_rows
fields = list(Attachment._meta.fields)
src_fields = [Identifier("zerver_attachment", field.column) for field in fields]
dst_fields = [Identifier(field.column) for field in fields]
with connection.cursor() as cursor:
raw_query = SQL(
"""
INSERT INTO zerver_archivedattachment ({dst_fields})
SELECT {src_fields}
FROM zerver_attachment
WHERE id = {id}
ON CONFLICT (id) DO NOTHING
RETURNING id
"""
)
cursor.execute(
raw_query.format(
src_fields=SQL(",").join(src_fields),
dst_fields=SQL(",").join(dst_fields),
id=Literal(attachment.id),
)
)
archived_ids = [id for (id,) in cursor.fetchall()]
if len(archived_ids) != 1:
print("!!! Did not create one archived attachment row!")
attachment.delete()
attachment = ArchivedAttachment.objects.get(id=archived_ids[0])
# Determine message (and thus attachment) properties; this is
# from do_claim_attachments
is_message_realm_public = False
is_message_web_public = False
if message.recipient.type == 2: # Recipient.STREAM
stream = Stream.objects.get(id=message.recipient.type_id)
is_message_realm_public = not stream.invite_only and not stream.is_in_zephyr_realm
is_message_web_public = stream.is_web_public
attachment.owner_id = message.sender_id
attachment.is_web_public = is_message_web_public
attachment.is_realm_public = is_message_realm_public
attachment.save(update_fields=["owner_id", "is_web_public", "is_realm_public"])
if isinstance(attachment, ArchivedAttachment):
assert isinstance(message, ArchivedMessage)
# We need to use the sequence from
# zerver_attachment_messages, since that id is reused when
# restoring the message.
with connection.cursor() as cursor:
raw_query = SQL(
"""
INSERT INTO zerver_archivedattachment_messages
(id, archivedattachment_id, archivedmessage_id)
VALUES (nextval(pg_get_serial_sequence('zerver_attachment_messages', 'id')),
{attachment_id}, {message_id})
"""
)
cursor.execute(
raw_query.format(
attachment_id=Literal(attachment.id),
message_id=Literal(message.id),
)
)
else:
assert isinstance(message, Message)
attachment.messages.add(message)
message.has_attachment = True
message.save(update_fields=["has_attachment"])
class Migration(migrations.Migration):
"""
Messages sent "as" a user via the email gateway had their
attachments left orphan, accidentally owned by the email gateway
bot. Find each such orphaned attachment, and re-own it and attach
it to the appropriate message.
"""
dependencies = [
("zerver", "0422_multiuseinvite_status"),
]
operations = [
migrations.RunPython(
fix_email_gateway_attachment_owner,
reverse_code=migrations.RunPython.noop,
elidable=True,
)
]