scheduled_message: Handle attachments properly.

Fixes #25414.

We add Attachment.scheduled_messages relation to track ScheduledMessages
which reference the attachment.

The import bits can be done after merging this, by updating #25345.
This commit is contained in:
Mateusz Mandera 2023-05-07 20:04:37 +02:00 committed by Tim Abbott
parent 4598607a46
commit 414658fc8e
10 changed files with 228 additions and 18 deletions

View File

@ -1,10 +1,11 @@
import datetime
from typing import List, Optional, Sequence, Union
from typing import List, Optional, Sequence, Tuple, Union
from django.db import transaction
from django.utils.translation import gettext as _
from zerver.actions.message_send import check_message
from zerver.actions.uploads import check_attachment_reference_change, do_claim_attachments
from zerver.lib.addressee import Addressee
from zerver.lib.exceptions import JsonableError
from zerver.lib.message import SendMessageRequest, render_markdown
@ -45,7 +46,7 @@ def check_schedule_message(
def do_schedule_messages(
send_message_requests: Sequence[SendMessageRequest], sender: UserProfile
) -> List[int]:
scheduled_messages: List[ScheduledMessage] = []
scheduled_messages: List[Tuple[ScheduledMessage, SendMessageRequest]] = []
for send_request in send_message_requests:
scheduled_message = ScheduledMessage()
@ -65,18 +66,28 @@ def do_schedule_messages(
scheduled_message.scheduled_timestamp = send_request.deliver_at
scheduled_message.delivery_type = ScheduledMessage.SEND_LATER
scheduled_messages.append(scheduled_message)
scheduled_messages.append((scheduled_message, send_request))
with transaction.atomic():
ScheduledMessage.objects.bulk_create(
[scheduled_message for scheduled_message, ignored in scheduled_messages]
)
for scheduled_message, send_request in scheduled_messages:
if do_claim_attachments(
scheduled_message, send_request.rendering_result.potential_attachment_path_ids
):
scheduled_message.has_attachment = True
scheduled_message.save(update_fields=["has_attachment"])
ScheduledMessage.objects.bulk_create(scheduled_messages)
event = {
"type": "scheduled_messages",
"op": "add",
"scheduled_messages": [
scheduled_message.to_dict() for scheduled_message in scheduled_messages
scheduled_message.to_dict() for scheduled_message, ignored in scheduled_messages
],
}
send_event(sender.realm, event, [sender.id])
return [scheduled_message.id for scheduled_message in scheduled_messages]
return [scheduled_message.id for scheduled_message, ignored in scheduled_messages]
def edit_scheduled_message(
@ -102,6 +113,11 @@ def edit_scheduled_message(
scheduled_message_object.stream = send_request.stream
assert send_request.deliver_at is not None
scheduled_message_object.scheduled_timestamp = send_request.deliver_at
scheduled_message_object.has_attachment = check_attachment_reference_change(
scheduled_message_object, rendering_result
)
scheduled_message_object.save()
event = {

View File

@ -1,11 +1,12 @@
import logging
from typing import Any, Dict, List
from typing import Any, Dict, List, Union
from zerver.lib.markdown import MessageRenderingResult
from zerver.lib.upload import claim_attachment, delete_message_attachment
from zerver.models import (
Attachment,
Message,
ScheduledMessage,
Stream,
UserProfile,
get_old_unclaimed_attachments,
@ -26,7 +27,9 @@ def notify_attachment_update(
send_event(user_profile.realm, event, [user_profile.id])
def do_claim_attachments(message: Message, potential_path_ids: List[str]) -> bool:
def do_claim_attachments(
message: Union[Message, ScheduledMessage], potential_path_ids: List[str]
) -> bool:
claimed = False
for path_id in potential_path_ids:
user_profile = message.sender
@ -59,7 +62,10 @@ def do_claim_attachments(message: Message, potential_path_ids: List[str]) -> boo
attachment = claim_attachment(
user_profile, path_id, message, is_message_realm_public, is_message_web_public
)
notify_attachment_update(user_profile, "update", attachment.to_dict())
if not isinstance(message, ScheduledMessage):
# attachment update events don't say anything about scheduled messages,
# so sending an event is pointless.
notify_attachment_update(user_profile, "update", attachment.to_dict())
return claimed
@ -77,7 +83,7 @@ def do_delete_old_unclaimed_attachments(weeks_ago: int) -> None:
def check_attachment_reference_change(
message: Message, rendering_result: MessageRenderingResult
message: Union[Message, ScheduledMessage], rendering_result: MessageRenderingResult
) -> bool:
# For a unsaved message edit (message.* has been updated, but not
# saved to the database), adjusts Attachment data to correspond to

View File

@ -116,6 +116,7 @@ ALL_ZULIP_TABLES = {
"zerver_archivedusermessage",
"zerver_attachment",
"zerver_attachment_messages",
"zerver_attachment_scheduled_messages",
"zerver_archivedreaction",
"zerver_archivedsubmessage",
"zerver_archivetransaction",
@ -222,6 +223,8 @@ NON_EXPORTED_TABLES = {
"zerver_archivedreaction",
"zerver_archivedsubmessage",
"zerver_archivetransaction",
# We don't export this until export of ScheduledMessage in general is implemented.
"zerver_attachment_scheduled_messages",
# Social auth tables are not needed post-export, since we don't
# use any of this state outside of a direct authentication flow.
"social_auth_association",

View File

@ -1546,6 +1546,10 @@ def import_attachments(data: TableData) -> None:
m2m_row[child_singular] = ID_MAP["message"][fk_id]
m2m_rows.append(m2m_row)
# TODO: Import of scheduled messages is not implemented yet.
if "scheduled_messages" in parent_row:
del parent_row["scheduled_messages"]
# Create our table data for insert.
m2m_data: TableData = {m2m_table_name: m2m_rows}
convert_to_id_fields(m2m_data, m2m_table_name, parent_singular)

View File

@ -324,6 +324,7 @@ def delete_messages(msg_ids: List[int]) -> None:
def delete_expired_attachments(realm: Realm) -> None:
(num_deleted, ignored) = Attachment.objects.filter(
messages__isnull=True,
scheduled_messages__isnull=True,
realm_id=realm.id,
id__in=ArchivedAttachment.objects.filter(realm_id=realm.id),
).delete()

View File

@ -3,7 +3,7 @@ import logging
import urllib
from datetime import datetime
from mimetypes import guess_type
from typing import IO, Any, BinaryIO, Callable, Iterator, List, Optional, Tuple
from typing import IO, Any, BinaryIO, Callable, Iterator, List, Optional, Tuple, Union
from urllib.parse import urljoin
from django.conf import settings
@ -15,7 +15,7 @@ from zerver.lib.outgoing_http import OutgoingSession
from zerver.lib.upload.base import ZulipUploadBackend
from zerver.lib.upload.local import LocalUploadBackend
from zerver.lib.upload.s3 import S3UploadBackend
from zerver.models import Attachment, Message, Realm, RealmEmoji, UserProfile
from zerver.models import Attachment, Message, Realm, RealmEmoji, ScheduledMessage, UserProfile
class RealmUploadQuotaError(JsonableError):
@ -86,11 +86,19 @@ def upload_message_attachment(
def claim_attachment(
user_profile: UserProfile,
path_id: str,
message: Message,
message: Union[Message, ScheduledMessage],
is_message_realm_public: bool,
is_message_web_public: bool = False,
) -> Attachment:
attachment = Attachment.objects.get(path_id=path_id)
if isinstance(message, ScheduledMessage):
attachment.scheduled_messages.add(message)
# Setting the is_web_public and is_realm_public flags would be incorrect
# in the scheduled message case - since the attachment becomes such only
# when the message is actually posted.
return attachment
assert isinstance(message, Message)
attachment.messages.add(message)
attachment.is_web_public = attachment.is_web_public or is_message_web_public
attachment.is_realm_public = attachment.is_realm_public or is_message_realm_public

View File

@ -0,0 +1,22 @@
# Generated by Django 4.2 on 2023-05-06 23:34
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
("zerver", "0446_realmauditlog_zerver_realmauditlog_user_subscriptions_idx"),
]
operations = [
migrations.AddField(
model_name="attachment",
name="scheduled_messages",
field=models.ManyToManyField(to="zerver.scheduledmessage"),
),
migrations.AddField(
model_name="scheduledmessage",
name="has_attachment",
field=models.BooleanField(db_index=True, default=False),
),
]

View File

@ -3554,8 +3554,12 @@ class ArchivedAttachment(AbstractAttachment):
class Attachment(AbstractAttachment):
messages = models.ManyToManyField(Message)
# This is only present for Attachment and not ArchiveAttachment.
# because ScheduledMessage is not subject to archiving.
scheduled_messages = models.ManyToManyField("ScheduledMessage")
def is_claimed(self) -> bool:
return self.messages.count() > 0
return self.messages.count() > 0 or self.scheduled_messages.count() > 0
def to_dict(self) -> Dict[str, Any]:
return {
@ -3691,7 +3695,7 @@ def get_old_unclaimed_attachments(
"""
The logic in this function is fairly tricky. The essence is that
a file should be cleaned up if and only if it not referenced by any
Message or ArchivedMessage. The way to find that out is through the
Message, ScheduledMessage or ArchivedMessage. The way to find that out is through the
Attachment and ArchivedAttachment tables.
The queries are complicated by the fact that an uploaded file
may have either only an Attachment row, only an ArchivedAttachment row,
@ -3699,14 +3703,24 @@ def get_old_unclaimed_attachments(
linking to it have been archived.
"""
delta_weeks_ago = timezone_now() - datetime.timedelta(weeks=weeks_ago)
# The Attachment vs ArchivedAttachment queries are asymmetric because only
# Attachment has the scheduled_messages relation.
old_attachments = Attachment.objects.annotate(
has_other_messages=Exists(
ArchivedAttachment.objects.filter(id=OuterRef("id")).exclude(messages=None)
)
).filter(messages=None, create_time__lt=delta_weeks_ago, has_other_messages=False)
).filter(
messages=None,
scheduled_messages=None,
create_time__lt=delta_weeks_ago,
has_other_messages=False,
)
old_archived_attachments = ArchivedAttachment.objects.annotate(
has_other_messages=Exists(
Attachment.objects.filter(id=OuterRef("id")).exclude(messages=None)
Attachment.objects.filter(id=OuterRef("id"))
.exclude(messages=None)
.exclude(scheduled_messages=None)
)
).filter(messages=None, create_time__lt=delta_weeks_ago, has_other_messages=False)
@ -4311,6 +4325,7 @@ class ScheduledMessage(models.Model):
realm = models.ForeignKey(Realm, on_delete=CASCADE)
scheduled_timestamp = models.DateTimeField(db_index=True)
delivered = models.BooleanField(default=False)
has_attachment = models.BooleanField(default=False, db_index=True)
SEND_LATER = 1
REMIND = 2
@ -4335,6 +4350,9 @@ class ScheduledMessage(models.Model):
def set_topic_name(self, topic_name: str) -> None:
self.subject = topic_name
def is_stream_message(self) -> bool:
return self.recipient.type == Recipient.STREAM
def to_dict(self) -> Union[StreamScheduledMessageAPI, DirectScheduledMessageAPI]:
recipient, recipient_type_str = get_recipient_ids(self.recipient, self.sender.id)

View File

@ -1,11 +1,13 @@
import re
import time
from io import StringIO
from typing import TYPE_CHECKING, List, Union
import orjson
from zerver.lib.test_classes import ZulipTestCase
from zerver.lib.timestamp import timestamp_to_datetime
from zerver.models import ScheduledMessage
from zerver.models import Attachment, ScheduledMessage
if TYPE_CHECKING:
from django.test.client import _MonkeyPatchedWSGIResponse as TestHttpResponse
@ -199,3 +201,90 @@ class ScheduledMessageTest(ZulipTestCase):
# Already deleted.
result = self.client_delete(f"/json/scheduled_messages/{scheduled_message.id}")
self.assert_json_error(result, "Scheduled message does not exist", 404)
def test_attachment_handling(self) -> None:
self.login("hamlet")
hamlet = self.example_user("hamlet")
verona_stream_id = self.get_stream_id("Verona")
attachment_file1 = StringIO("zulip!")
attachment_file1.name = "dummy_1.txt"
result = self.client_post("/json/user_uploads", {"file": attachment_file1})
path_id1 = re.sub("/user_uploads/", "", result.json()["uri"])
attachment_object1 = Attachment.objects.get(path_id=path_id1)
attachment_file2 = StringIO("zulip!")
attachment_file2.name = "dummy_1.txt"
result = self.client_post("/json/user_uploads", {"file": attachment_file2})
path_id2 = re.sub("/user_uploads/", "", result.json()["uri"])
attachment_object2 = Attachment.objects.get(path_id=path_id2)
content = f"Test [zulip.txt](http://{hamlet.realm.host}/user_uploads/{path_id1})"
scheduled_delivery_timestamp = int(time.time() + 86400)
# Test sending with attachment
self.do_schedule_message("stream", verona_stream_id, content, scheduled_delivery_timestamp)
scheduled_message = self.last_scheduled_message()
self.assertEqual(
list(attachment_object1.scheduled_messages.all().values_list("id", flat=True)),
[scheduled_message.id],
)
self.assertEqual(scheduled_message.has_attachment, True)
# Test editing to change attachmment
edited_content = f"Test [zulip.txt](http://{hamlet.realm.host}/user_uploads/{path_id2})"
result = self.do_schedule_message(
"stream",
verona_stream_id,
edited_content,
scheduled_delivery_timestamp,
scheduled_message_id=str(scheduled_message.id),
)
scheduled_message = self.get_scheduled_message(str(scheduled_message.id))
self.assertEqual(
list(attachment_object1.scheduled_messages.all().values_list("id", flat=True)), []
)
self.assertEqual(
list(attachment_object2.scheduled_messages.all().values_list("id", flat=True)),
[scheduled_message.id],
)
self.assertEqual(scheduled_message.has_attachment, True)
# Test editing to no longer reference any attachments
edited_content = "No more attachments"
result = self.do_schedule_message(
"stream",
verona_stream_id,
edited_content,
scheduled_delivery_timestamp,
scheduled_message_id=str(scheduled_message.id),
)
scheduled_message = self.get_scheduled_message(str(scheduled_message.id))
self.assertEqual(
list(attachment_object1.scheduled_messages.all().values_list("id", flat=True)), []
)
self.assertEqual(
list(attachment_object2.scheduled_messages.all().values_list("id", flat=True)), []
)
self.assertEqual(scheduled_message.has_attachment, False)
# Test editing to now have an attachment again
edited_content = (
f"Attachment is back! [zulip.txt](http://{hamlet.realm.host}/user_uploads/{path_id2})"
)
result = self.do_schedule_message(
"stream",
verona_stream_id,
edited_content,
scheduled_delivery_timestamp,
scheduled_message_id=str(scheduled_message.id),
)
scheduled_message = self.get_scheduled_message(str(scheduled_message.id))
self.assertEqual(
list(attachment_object1.scheduled_messages.all().values_list("id", flat=True)), []
)
self.assertEqual(
list(attachment_object2.scheduled_messages.all().values_list("id", flat=True)),
[scheduled_message.id],
)
self.assertEqual(scheduled_message.has_attachment, True)

View File

@ -24,6 +24,7 @@ from zerver.actions.message_send import internal_send_private_message
from zerver.actions.realm_icon import do_change_icon_source
from zerver.actions.realm_logo import do_change_logo_source
from zerver.actions.realm_settings import do_change_realm_plan_type, do_set_realm_property
from zerver.actions.scheduled_messages import check_schedule_message, delete_scheduled_message
from zerver.actions.uploads import do_delete_old_unclaimed_attachments
from zerver.actions.user_settings import do_delete_avatar_image
from zerver.lib.avatar import avatar_url, get_avatar_field
@ -48,6 +49,7 @@ from zerver.models import (
Realm,
RealmDomain,
UserProfile,
get_client,
get_realm,
get_system_bot,
get_user_by_delivery_email,
@ -372,6 +374,11 @@ class FileUploadTest(UploadSerializeMixin, ZulipTestCase):
result = self.client_post("/json/user_uploads", {"file": d3})
d3_path_id = re.sub("/user_uploads/", "", result.json()["uri"])
d4 = StringIO("zulip!")
d4.name = "dummy_4.txt"
result = self.client_post("/json/user_uploads", {"file": d4})
d4_path_id = re.sub("/user_uploads/", "", result.json()["uri"])
two_week_ago = timezone_now() - datetime.timedelta(weeks=2)
# This Attachment will have a message linking to it:
d1_attachment = Attachment.objects.get(path_id=d1_path_id)
@ -388,6 +395,12 @@ class FileUploadTest(UploadSerializeMixin, ZulipTestCase):
d3_attachment.create_time = two_week_ago
d3_attachment.save()
# This Attachment will just have a ScheduledMessage referencing it. It should not be deleted
# until the ScheduledMessage is deleted.
d4_attachment = Attachment.objects.get(path_id=d4_path_id)
d4_attachment.create_time = two_week_ago
d4_attachment.save()
# Send message referencing only dummy_1
self.subscribe(hamlet, "Denmark")
body = (
@ -408,6 +421,29 @@ class FileUploadTest(UploadSerializeMixin, ZulipTestCase):
d3_local_path = os.path.join(settings.LOCAL_FILES_DIR, d3_path_id)
self.assertTrue(os.path.exists(d3_local_path))
body = (
f"Some files here ...[zulip.txt](http://{hamlet.realm.host}/user_uploads/"
+ d4_path_id
+ ")"
)
scheduled_message_d4_id = check_schedule_message(
hamlet,
get_client("website"),
"stream",
[self.get_stream_id("Verona")],
"Test topic",
body,
None,
timezone_now() + datetime.timedelta(days=365),
hamlet.realm,
)
self.assertEqual(
list(d4_attachment.scheduled_messages.all().values_list("id", flat=True)),
[scheduled_message_d4_id],
)
d4_local_path = os.path.join(settings.LOCAL_FILES_DIR, d4_path_id)
self.assertTrue(os.path.exists(d4_local_path))
do_delete_messages(hamlet.realm, [Message.objects.get(id=message_id)])
# dummy_2 should not exist in database or the uploads folder
do_delete_old_unclaimed_attachments(2)
@ -431,6 +467,13 @@ class FileUploadTest(UploadSerializeMixin, ZulipTestCase):
self.assertTrue(not Attachment.objects.filter(path_id=d3_path_id).exists())
self.assertTrue(not ArchivedAttachment.objects.filter(path_id=d3_path_id).exists())
# dummy_4 only get deleted after the scheduled message is deleted.
self.assertTrue(os.path.exists(d4_local_path))
delete_scheduled_message(hamlet, scheduled_message_d4_id)
do_delete_old_unclaimed_attachments(2)
self.assertFalse(os.path.exists(d4_local_path))
self.assertTrue(not Attachment.objects.filter(path_id=d4_path_id).exists())
def test_attachment_url_without_upload(self) -> None:
hamlet = self.example_user("hamlet")
self.login_user(hamlet)