From 414658fc8e689ea74f13965bbcd72e7beed00df5 Mon Sep 17 00:00:00 2001 From: Mateusz Mandera Date: Sun, 7 May 2023 20:04:37 +0200 Subject: [PATCH] scheduled_message: Handle attachments properly. Fixes #25414. We add Attachment.scheduled_messages relation to track ScheduledMessages which reference the attachment. The import bits can be done after merging this, by updating #25345. --- zerver/actions/scheduled_messages.py | 28 ++++-- zerver/actions/uploads.py | 14 ++- zerver/lib/export.py | 3 + zerver/lib/import_realm.py | 4 + zerver/lib/retention.py | 1 + zerver/lib/upload/__init__.py | 14 ++- ..._attachment_scheduled_messages_and_more.py | 22 +++++ zerver/models.py | 26 +++++- zerver/tests/test_scheduled_messages.py | 91 ++++++++++++++++++- zerver/tests/test_upload.py | 43 +++++++++ 10 files changed, 228 insertions(+), 18 deletions(-) create mode 100644 zerver/migrations/0447_attachment_scheduled_messages_and_more.py diff --git a/zerver/actions/scheduled_messages.py b/zerver/actions/scheduled_messages.py index abc1e592db..f3c2f25088 100644 --- a/zerver/actions/scheduled_messages.py +++ b/zerver/actions/scheduled_messages.py @@ -1,10 +1,11 @@ import datetime -from typing import List, Optional, Sequence, Union +from typing import List, Optional, Sequence, Tuple, Union from django.db import transaction from django.utils.translation import gettext as _ from zerver.actions.message_send import check_message +from zerver.actions.uploads import check_attachment_reference_change, do_claim_attachments from zerver.lib.addressee import Addressee from zerver.lib.exceptions import JsonableError from zerver.lib.message import SendMessageRequest, render_markdown @@ -45,7 +46,7 @@ def check_schedule_message( def do_schedule_messages( send_message_requests: Sequence[SendMessageRequest], sender: UserProfile ) -> List[int]: - scheduled_messages: List[ScheduledMessage] = [] + scheduled_messages: List[Tuple[ScheduledMessage, SendMessageRequest]] = [] for send_request in send_message_requests: scheduled_message = ScheduledMessage() @@ -65,18 +66,28 @@ def do_schedule_messages( scheduled_message.scheduled_timestamp = send_request.deliver_at scheduled_message.delivery_type = ScheduledMessage.SEND_LATER - scheduled_messages.append(scheduled_message) + scheduled_messages.append((scheduled_message, send_request)) + + with transaction.atomic(): + ScheduledMessage.objects.bulk_create( + [scheduled_message for scheduled_message, ignored in scheduled_messages] + ) + for scheduled_message, send_request in scheduled_messages: + if do_claim_attachments( + scheduled_message, send_request.rendering_result.potential_attachment_path_ids + ): + scheduled_message.has_attachment = True + scheduled_message.save(update_fields=["has_attachment"]) - ScheduledMessage.objects.bulk_create(scheduled_messages) event = { "type": "scheduled_messages", "op": "add", "scheduled_messages": [ - scheduled_message.to_dict() for scheduled_message in scheduled_messages + scheduled_message.to_dict() for scheduled_message, ignored in scheduled_messages ], } send_event(sender.realm, event, [sender.id]) - return [scheduled_message.id for scheduled_message in scheduled_messages] + return [scheduled_message.id for scheduled_message, ignored in scheduled_messages] def edit_scheduled_message( @@ -102,6 +113,11 @@ def edit_scheduled_message( scheduled_message_object.stream = send_request.stream assert send_request.deliver_at is not None scheduled_message_object.scheduled_timestamp = send_request.deliver_at + + scheduled_message_object.has_attachment = check_attachment_reference_change( + scheduled_message_object, rendering_result + ) + scheduled_message_object.save() event = { diff --git a/zerver/actions/uploads.py b/zerver/actions/uploads.py index 756f2e119e..51a9e0922e 100644 --- a/zerver/actions/uploads.py +++ b/zerver/actions/uploads.py @@ -1,11 +1,12 @@ import logging -from typing import Any, Dict, List +from typing import Any, Dict, List, Union from zerver.lib.markdown import MessageRenderingResult from zerver.lib.upload import claim_attachment, delete_message_attachment from zerver.models import ( Attachment, Message, + ScheduledMessage, Stream, UserProfile, get_old_unclaimed_attachments, @@ -26,7 +27,9 @@ def notify_attachment_update( send_event(user_profile.realm, event, [user_profile.id]) -def do_claim_attachments(message: Message, potential_path_ids: List[str]) -> bool: +def do_claim_attachments( + message: Union[Message, ScheduledMessage], potential_path_ids: List[str] +) -> bool: claimed = False for path_id in potential_path_ids: user_profile = message.sender @@ -59,7 +62,10 @@ def do_claim_attachments(message: Message, potential_path_ids: List[str]) -> boo attachment = claim_attachment( user_profile, path_id, message, is_message_realm_public, is_message_web_public ) - notify_attachment_update(user_profile, "update", attachment.to_dict()) + if not isinstance(message, ScheduledMessage): + # attachment update events don't say anything about scheduled messages, + # so sending an event is pointless. + notify_attachment_update(user_profile, "update", attachment.to_dict()) return claimed @@ -77,7 +83,7 @@ def do_delete_old_unclaimed_attachments(weeks_ago: int) -> None: def check_attachment_reference_change( - message: Message, rendering_result: MessageRenderingResult + message: Union[Message, ScheduledMessage], rendering_result: MessageRenderingResult ) -> bool: # For a unsaved message edit (message.* has been updated, but not # saved to the database), adjusts Attachment data to correspond to diff --git a/zerver/lib/export.py b/zerver/lib/export.py index 6e5a5c6ad4..edf1a9cbdc 100644 --- a/zerver/lib/export.py +++ b/zerver/lib/export.py @@ -116,6 +116,7 @@ ALL_ZULIP_TABLES = { "zerver_archivedusermessage", "zerver_attachment", "zerver_attachment_messages", + "zerver_attachment_scheduled_messages", "zerver_archivedreaction", "zerver_archivedsubmessage", "zerver_archivetransaction", @@ -222,6 +223,8 @@ NON_EXPORTED_TABLES = { "zerver_archivedreaction", "zerver_archivedsubmessage", "zerver_archivetransaction", + # We don't export this until export of ScheduledMessage in general is implemented. + "zerver_attachment_scheduled_messages", # Social auth tables are not needed post-export, since we don't # use any of this state outside of a direct authentication flow. "social_auth_association", diff --git a/zerver/lib/import_realm.py b/zerver/lib/import_realm.py index 21cc7227f7..14f05077e6 100644 --- a/zerver/lib/import_realm.py +++ b/zerver/lib/import_realm.py @@ -1546,6 +1546,10 @@ def import_attachments(data: TableData) -> None: m2m_row[child_singular] = ID_MAP["message"][fk_id] m2m_rows.append(m2m_row) + # TODO: Import of scheduled messages is not implemented yet. + if "scheduled_messages" in parent_row: + del parent_row["scheduled_messages"] + # Create our table data for insert. m2m_data: TableData = {m2m_table_name: m2m_rows} convert_to_id_fields(m2m_data, m2m_table_name, parent_singular) diff --git a/zerver/lib/retention.py b/zerver/lib/retention.py index 8f65f2af57..117475ce79 100644 --- a/zerver/lib/retention.py +++ b/zerver/lib/retention.py @@ -324,6 +324,7 @@ def delete_messages(msg_ids: List[int]) -> None: def delete_expired_attachments(realm: Realm) -> None: (num_deleted, ignored) = Attachment.objects.filter( messages__isnull=True, + scheduled_messages__isnull=True, realm_id=realm.id, id__in=ArchivedAttachment.objects.filter(realm_id=realm.id), ).delete() diff --git a/zerver/lib/upload/__init__.py b/zerver/lib/upload/__init__.py index 8380c3d558..0b7b9ee6c4 100644 --- a/zerver/lib/upload/__init__.py +++ b/zerver/lib/upload/__init__.py @@ -3,7 +3,7 @@ import logging import urllib from datetime import datetime from mimetypes import guess_type -from typing import IO, Any, BinaryIO, Callable, Iterator, List, Optional, Tuple +from typing import IO, Any, BinaryIO, Callable, Iterator, List, Optional, Tuple, Union from urllib.parse import urljoin from django.conf import settings @@ -15,7 +15,7 @@ from zerver.lib.outgoing_http import OutgoingSession from zerver.lib.upload.base import ZulipUploadBackend from zerver.lib.upload.local import LocalUploadBackend from zerver.lib.upload.s3 import S3UploadBackend -from zerver.models import Attachment, Message, Realm, RealmEmoji, UserProfile +from zerver.models import Attachment, Message, Realm, RealmEmoji, ScheduledMessage, UserProfile class RealmUploadQuotaError(JsonableError): @@ -86,11 +86,19 @@ def upload_message_attachment( def claim_attachment( user_profile: UserProfile, path_id: str, - message: Message, + message: Union[Message, ScheduledMessage], is_message_realm_public: bool, is_message_web_public: bool = False, ) -> Attachment: attachment = Attachment.objects.get(path_id=path_id) + if isinstance(message, ScheduledMessage): + attachment.scheduled_messages.add(message) + # Setting the is_web_public and is_realm_public flags would be incorrect + # in the scheduled message case - since the attachment becomes such only + # when the message is actually posted. + return attachment + + assert isinstance(message, Message) attachment.messages.add(message) attachment.is_web_public = attachment.is_web_public or is_message_web_public attachment.is_realm_public = attachment.is_realm_public or is_message_realm_public diff --git a/zerver/migrations/0447_attachment_scheduled_messages_and_more.py b/zerver/migrations/0447_attachment_scheduled_messages_and_more.py new file mode 100644 index 0000000000..58c1d2bac8 --- /dev/null +++ b/zerver/migrations/0447_attachment_scheduled_messages_and_more.py @@ -0,0 +1,22 @@ +# Generated by Django 4.2 on 2023-05-06 23:34 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + dependencies = [ + ("zerver", "0446_realmauditlog_zerver_realmauditlog_user_subscriptions_idx"), + ] + + operations = [ + migrations.AddField( + model_name="attachment", + name="scheduled_messages", + field=models.ManyToManyField(to="zerver.scheduledmessage"), + ), + migrations.AddField( + model_name="scheduledmessage", + name="has_attachment", + field=models.BooleanField(db_index=True, default=False), + ), + ] diff --git a/zerver/models.py b/zerver/models.py index ba8bfff86c..18177ad288 100644 --- a/zerver/models.py +++ b/zerver/models.py @@ -3554,8 +3554,12 @@ class ArchivedAttachment(AbstractAttachment): class Attachment(AbstractAttachment): messages = models.ManyToManyField(Message) + # This is only present for Attachment and not ArchiveAttachment. + # because ScheduledMessage is not subject to archiving. + scheduled_messages = models.ManyToManyField("ScheduledMessage") + def is_claimed(self) -> bool: - return self.messages.count() > 0 + return self.messages.count() > 0 or self.scheduled_messages.count() > 0 def to_dict(self) -> Dict[str, Any]: return { @@ -3691,7 +3695,7 @@ def get_old_unclaimed_attachments( """ The logic in this function is fairly tricky. The essence is that a file should be cleaned up if and only if it not referenced by any - Message or ArchivedMessage. The way to find that out is through the + Message, ScheduledMessage or ArchivedMessage. The way to find that out is through the Attachment and ArchivedAttachment tables. The queries are complicated by the fact that an uploaded file may have either only an Attachment row, only an ArchivedAttachment row, @@ -3699,14 +3703,24 @@ def get_old_unclaimed_attachments( linking to it have been archived. """ delta_weeks_ago = timezone_now() - datetime.timedelta(weeks=weeks_ago) + + # The Attachment vs ArchivedAttachment queries are asymmetric because only + # Attachment has the scheduled_messages relation. old_attachments = Attachment.objects.annotate( has_other_messages=Exists( ArchivedAttachment.objects.filter(id=OuterRef("id")).exclude(messages=None) ) - ).filter(messages=None, create_time__lt=delta_weeks_ago, has_other_messages=False) + ).filter( + messages=None, + scheduled_messages=None, + create_time__lt=delta_weeks_ago, + has_other_messages=False, + ) old_archived_attachments = ArchivedAttachment.objects.annotate( has_other_messages=Exists( - Attachment.objects.filter(id=OuterRef("id")).exclude(messages=None) + Attachment.objects.filter(id=OuterRef("id")) + .exclude(messages=None) + .exclude(scheduled_messages=None) ) ).filter(messages=None, create_time__lt=delta_weeks_ago, has_other_messages=False) @@ -4311,6 +4325,7 @@ class ScheduledMessage(models.Model): realm = models.ForeignKey(Realm, on_delete=CASCADE) scheduled_timestamp = models.DateTimeField(db_index=True) delivered = models.BooleanField(default=False) + has_attachment = models.BooleanField(default=False, db_index=True) SEND_LATER = 1 REMIND = 2 @@ -4335,6 +4350,9 @@ class ScheduledMessage(models.Model): def set_topic_name(self, topic_name: str) -> None: self.subject = topic_name + def is_stream_message(self) -> bool: + return self.recipient.type == Recipient.STREAM + def to_dict(self) -> Union[StreamScheduledMessageAPI, DirectScheduledMessageAPI]: recipient, recipient_type_str = get_recipient_ids(self.recipient, self.sender.id) diff --git a/zerver/tests/test_scheduled_messages.py b/zerver/tests/test_scheduled_messages.py index 06f7ffe89a..5e05a6615c 100644 --- a/zerver/tests/test_scheduled_messages.py +++ b/zerver/tests/test_scheduled_messages.py @@ -1,11 +1,13 @@ +import re import time +from io import StringIO from typing import TYPE_CHECKING, List, Union import orjson from zerver.lib.test_classes import ZulipTestCase from zerver.lib.timestamp import timestamp_to_datetime -from zerver.models import ScheduledMessage +from zerver.models import Attachment, ScheduledMessage if TYPE_CHECKING: from django.test.client import _MonkeyPatchedWSGIResponse as TestHttpResponse @@ -199,3 +201,90 @@ class ScheduledMessageTest(ZulipTestCase): # Already deleted. result = self.client_delete(f"/json/scheduled_messages/{scheduled_message.id}") self.assert_json_error(result, "Scheduled message does not exist", 404) + + def test_attachment_handling(self) -> None: + self.login("hamlet") + hamlet = self.example_user("hamlet") + verona_stream_id = self.get_stream_id("Verona") + + attachment_file1 = StringIO("zulip!") + attachment_file1.name = "dummy_1.txt" + result = self.client_post("/json/user_uploads", {"file": attachment_file1}) + path_id1 = re.sub("/user_uploads/", "", result.json()["uri"]) + attachment_object1 = Attachment.objects.get(path_id=path_id1) + + attachment_file2 = StringIO("zulip!") + attachment_file2.name = "dummy_1.txt" + result = self.client_post("/json/user_uploads", {"file": attachment_file2}) + path_id2 = re.sub("/user_uploads/", "", result.json()["uri"]) + attachment_object2 = Attachment.objects.get(path_id=path_id2) + + content = f"Test [zulip.txt](http://{hamlet.realm.host}/user_uploads/{path_id1})" + scheduled_delivery_timestamp = int(time.time() + 86400) + + # Test sending with attachment + self.do_schedule_message("stream", verona_stream_id, content, scheduled_delivery_timestamp) + scheduled_message = self.last_scheduled_message() + self.assertEqual( + list(attachment_object1.scheduled_messages.all().values_list("id", flat=True)), + [scheduled_message.id], + ) + self.assertEqual(scheduled_message.has_attachment, True) + + # Test editing to change attachmment + edited_content = f"Test [zulip.txt](http://{hamlet.realm.host}/user_uploads/{path_id2})" + result = self.do_schedule_message( + "stream", + verona_stream_id, + edited_content, + scheduled_delivery_timestamp, + scheduled_message_id=str(scheduled_message.id), + ) + scheduled_message = self.get_scheduled_message(str(scheduled_message.id)) + self.assertEqual( + list(attachment_object1.scheduled_messages.all().values_list("id", flat=True)), [] + ) + self.assertEqual( + list(attachment_object2.scheduled_messages.all().values_list("id", flat=True)), + [scheduled_message.id], + ) + self.assertEqual(scheduled_message.has_attachment, True) + + # Test editing to no longer reference any attachments + edited_content = "No more attachments" + result = self.do_schedule_message( + "stream", + verona_stream_id, + edited_content, + scheduled_delivery_timestamp, + scheduled_message_id=str(scheduled_message.id), + ) + scheduled_message = self.get_scheduled_message(str(scheduled_message.id)) + self.assertEqual( + list(attachment_object1.scheduled_messages.all().values_list("id", flat=True)), [] + ) + self.assertEqual( + list(attachment_object2.scheduled_messages.all().values_list("id", flat=True)), [] + ) + self.assertEqual(scheduled_message.has_attachment, False) + + # Test editing to now have an attachment again + edited_content = ( + f"Attachment is back! [zulip.txt](http://{hamlet.realm.host}/user_uploads/{path_id2})" + ) + result = self.do_schedule_message( + "stream", + verona_stream_id, + edited_content, + scheduled_delivery_timestamp, + scheduled_message_id=str(scheduled_message.id), + ) + scheduled_message = self.get_scheduled_message(str(scheduled_message.id)) + self.assertEqual( + list(attachment_object1.scheduled_messages.all().values_list("id", flat=True)), [] + ) + self.assertEqual( + list(attachment_object2.scheduled_messages.all().values_list("id", flat=True)), + [scheduled_message.id], + ) + self.assertEqual(scheduled_message.has_attachment, True) diff --git a/zerver/tests/test_upload.py b/zerver/tests/test_upload.py index 6e0f4b43bc..4e72f33af8 100644 --- a/zerver/tests/test_upload.py +++ b/zerver/tests/test_upload.py @@ -24,6 +24,7 @@ from zerver.actions.message_send import internal_send_private_message from zerver.actions.realm_icon import do_change_icon_source from zerver.actions.realm_logo import do_change_logo_source from zerver.actions.realm_settings import do_change_realm_plan_type, do_set_realm_property +from zerver.actions.scheduled_messages import check_schedule_message, delete_scheduled_message from zerver.actions.uploads import do_delete_old_unclaimed_attachments from zerver.actions.user_settings import do_delete_avatar_image from zerver.lib.avatar import avatar_url, get_avatar_field @@ -48,6 +49,7 @@ from zerver.models import ( Realm, RealmDomain, UserProfile, + get_client, get_realm, get_system_bot, get_user_by_delivery_email, @@ -372,6 +374,11 @@ class FileUploadTest(UploadSerializeMixin, ZulipTestCase): result = self.client_post("/json/user_uploads", {"file": d3}) d3_path_id = re.sub("/user_uploads/", "", result.json()["uri"]) + d4 = StringIO("zulip!") + d4.name = "dummy_4.txt" + result = self.client_post("/json/user_uploads", {"file": d4}) + d4_path_id = re.sub("/user_uploads/", "", result.json()["uri"]) + two_week_ago = timezone_now() - datetime.timedelta(weeks=2) # This Attachment will have a message linking to it: d1_attachment = Attachment.objects.get(path_id=d1_path_id) @@ -388,6 +395,12 @@ class FileUploadTest(UploadSerializeMixin, ZulipTestCase): d3_attachment.create_time = two_week_ago d3_attachment.save() + # This Attachment will just have a ScheduledMessage referencing it. It should not be deleted + # until the ScheduledMessage is deleted. + d4_attachment = Attachment.objects.get(path_id=d4_path_id) + d4_attachment.create_time = two_week_ago + d4_attachment.save() + # Send message referencing only dummy_1 self.subscribe(hamlet, "Denmark") body = ( @@ -408,6 +421,29 @@ class FileUploadTest(UploadSerializeMixin, ZulipTestCase): d3_local_path = os.path.join(settings.LOCAL_FILES_DIR, d3_path_id) self.assertTrue(os.path.exists(d3_local_path)) + body = ( + f"Some files here ...[zulip.txt](http://{hamlet.realm.host}/user_uploads/" + + d4_path_id + + ")" + ) + scheduled_message_d4_id = check_schedule_message( + hamlet, + get_client("website"), + "stream", + [self.get_stream_id("Verona")], + "Test topic", + body, + None, + timezone_now() + datetime.timedelta(days=365), + hamlet.realm, + ) + self.assertEqual( + list(d4_attachment.scheduled_messages.all().values_list("id", flat=True)), + [scheduled_message_d4_id], + ) + d4_local_path = os.path.join(settings.LOCAL_FILES_DIR, d4_path_id) + self.assertTrue(os.path.exists(d4_local_path)) + do_delete_messages(hamlet.realm, [Message.objects.get(id=message_id)]) # dummy_2 should not exist in database or the uploads folder do_delete_old_unclaimed_attachments(2) @@ -431,6 +467,13 @@ class FileUploadTest(UploadSerializeMixin, ZulipTestCase): self.assertTrue(not Attachment.objects.filter(path_id=d3_path_id).exists()) self.assertTrue(not ArchivedAttachment.objects.filter(path_id=d3_path_id).exists()) + # dummy_4 only get deleted after the scheduled message is deleted. + self.assertTrue(os.path.exists(d4_local_path)) + delete_scheduled_message(hamlet, scheduled_message_d4_id) + do_delete_old_unclaimed_attachments(2) + self.assertFalse(os.path.exists(d4_local_path)) + self.assertTrue(not Attachment.objects.filter(path_id=d4_path_id).exists()) + def test_attachment_url_without_upload(self) -> None: hamlet = self.example_user("hamlet") self.login_user(hamlet)