mirror of https://github.com/zulip/zulip.git
469 lines
19 KiB
Python
469 lines
19 KiB
Python
import os
|
|
import re
|
|
from datetime import datetime, timedelta
|
|
from unittest.mock import patch
|
|
|
|
import time_machine
|
|
from django.conf import settings
|
|
from django.utils.timezone import now as timezone_now
|
|
|
|
from zerver.actions.message_delete import do_delete_messages
|
|
from zerver.actions.scheduled_messages import check_schedule_message, delete_scheduled_message
|
|
from zerver.actions.uploads import do_delete_old_unclaimed_attachments
|
|
from zerver.lib.retention import clean_archived_data
|
|
from zerver.lib.test_classes import UploadSerializeMixin, ZulipTestCase
|
|
from zerver.lib.test_helpers import get_test_image_file
|
|
from zerver.lib.thumbnail import ThumbnailFormat
|
|
from zerver.models import ArchivedAttachment, Attachment, Message, UserProfile
|
|
from zerver.models.clients import get_client
|
|
|
|
|
|
class UnclaimedAttachmentTest(UploadSerializeMixin, ZulipTestCase):
|
|
def make_attachment(
|
|
self, filename: str, when: datetime | None = None, uploader: UserProfile | None = None
|
|
) -> Attachment:
|
|
if when is None:
|
|
when = timezone_now() - timedelta(weeks=2)
|
|
if uploader is None:
|
|
uploader = self.example_user("hamlet")
|
|
self.login_user(uploader)
|
|
|
|
with time_machine.travel(when, tick=False):
|
|
with get_test_image_file(filename) as file_obj:
|
|
response = self.assert_json_success(
|
|
self.client_post("/json/user_uploads", {"file": file_obj})
|
|
)
|
|
path_id = re.sub(r"/user_uploads/", "", response["url"])
|
|
return Attachment.objects.get(path_id=path_id)
|
|
|
|
def assert_exists(
|
|
self,
|
|
attachment: Attachment,
|
|
*,
|
|
has_file: bool,
|
|
has_attachment: bool,
|
|
has_archived_attachment: bool,
|
|
) -> None:
|
|
assert settings.LOCAL_FILES_DIR
|
|
self.assertEqual( # File existence on disk
|
|
os.path.isfile(os.path.join(settings.LOCAL_FILES_DIR, attachment.path_id)), has_file
|
|
)
|
|
self.assertEqual( # Attachment row
|
|
Attachment.objects.filter(id=attachment.id).exists(), has_attachment
|
|
)
|
|
self.assertEqual( # ArchivedAttachment row
|
|
ArchivedAttachment.objects.filter(id=attachment.id).exists(), has_archived_attachment
|
|
)
|
|
|
|
def test_delete_unused_thumbnails(self) -> None:
|
|
assert settings.LOCAL_FILES_DIR
|
|
with self.captureOnCommitCallbacks(execute=True):
|
|
unused_attachment = self.make_attachment("img.png")
|
|
|
|
self.assert_exists(
|
|
unused_attachment, has_file=True, has_attachment=True, has_archived_attachment=False
|
|
)
|
|
|
|
# It also has thumbnails
|
|
self.assertTrue(
|
|
os.path.isdir(
|
|
os.path.join(settings.LOCAL_FILES_DIR, "thumbnail", unused_attachment.path_id)
|
|
)
|
|
)
|
|
self.assertGreater(
|
|
len(
|
|
os.listdir(
|
|
os.path.join(settings.LOCAL_FILES_DIR, "thumbnail", unused_attachment.path_id)
|
|
)
|
|
),
|
|
0,
|
|
)
|
|
|
|
# If we have 3 weeks of grace, nothing happens
|
|
do_delete_old_unclaimed_attachments(3)
|
|
self.assert_exists(
|
|
unused_attachment, has_file=True, has_attachment=True, has_archived_attachment=False
|
|
)
|
|
self.assertTrue(
|
|
os.path.isdir(
|
|
os.path.join(settings.LOCAL_FILES_DIR, "thumbnail", unused_attachment.path_id)
|
|
)
|
|
)
|
|
self.assertGreater(
|
|
len(
|
|
os.listdir(
|
|
os.path.join(settings.LOCAL_FILES_DIR, "thumbnail", unused_attachment.path_id)
|
|
)
|
|
),
|
|
0,
|
|
)
|
|
|
|
# If we have 1 weeks of grace, the Attachment is deleted, and so is the file on disk
|
|
do_delete_old_unclaimed_attachments(1)
|
|
self.assert_exists(
|
|
unused_attachment, has_file=False, has_attachment=False, has_archived_attachment=False
|
|
)
|
|
self.assertFalse(
|
|
os.path.exists(
|
|
os.path.join(settings.LOCAL_FILES_DIR, "thumbnail", unused_attachment.path_id)
|
|
)
|
|
)
|
|
|
|
def test_delete_unused_upload(self) -> None:
|
|
unused_attachment = self.make_attachment("text.txt")
|
|
self.assert_exists(
|
|
unused_attachment, has_file=True, has_attachment=True, has_archived_attachment=False
|
|
)
|
|
|
|
# If we have 3 weeks of grace, nothing happens
|
|
do_delete_old_unclaimed_attachments(3)
|
|
self.assert_exists(
|
|
unused_attachment, has_file=True, has_attachment=True, has_archived_attachment=False
|
|
)
|
|
|
|
# If we have 1 weeks of grace, the Attachment is deleted, and so is the file on disk
|
|
do_delete_old_unclaimed_attachments(1)
|
|
self.assert_exists(
|
|
unused_attachment, has_file=False, has_attachment=False, has_archived_attachment=False
|
|
)
|
|
|
|
def test_delete_used_upload(self) -> None:
|
|
hamlet = self.example_user("hamlet")
|
|
attachment = self.make_attachment("text.txt")
|
|
|
|
# Send message referencing that message
|
|
self.subscribe(hamlet, "Denmark")
|
|
body = f"Some files here ...[zulip.txt](http://{hamlet.realm.host}/user_uploads/{attachment.path_id})"
|
|
self.send_stream_message(hamlet, "Denmark", body, "test")
|
|
|
|
# Because the message is claimed, it is not removed
|
|
do_delete_old_unclaimed_attachments(1)
|
|
self.assert_exists(
|
|
attachment, has_file=True, has_attachment=True, has_archived_attachment=False
|
|
)
|
|
|
|
def test_delete_upload_archived_message(self) -> None:
|
|
hamlet = self.example_user("hamlet")
|
|
attachment = self.make_attachment("text.txt")
|
|
|
|
# Send message referencing that message
|
|
self.subscribe(hamlet, "Denmark")
|
|
body = f"Some files here ...[zulip.txt](http://{hamlet.realm.host}/user_uploads/{attachment.path_id})"
|
|
message_id = self.send_stream_message(hamlet, "Denmark", body, "test")
|
|
|
|
# Delete that message; this moves it to ArchivedAttachment but leaves the file on disk
|
|
do_delete_messages(hamlet.realm, [Message.objects.get(id=message_id)], acting_user=None)
|
|
self.assert_exists(
|
|
attachment, has_file=True, has_attachment=False, has_archived_attachment=True
|
|
)
|
|
|
|
# Removing unclaimed attachments leaves the file, since it is
|
|
# attached to an existing ArchivedAttachment
|
|
do_delete_old_unclaimed_attachments(1)
|
|
self.assert_exists(
|
|
attachment, has_file=True, has_attachment=False, has_archived_attachment=True
|
|
)
|
|
|
|
# Now purge the ArchivedMessage
|
|
with self.settings(ARCHIVED_DATA_VACUUMING_DELAY_DAYS=0):
|
|
clean_archived_data()
|
|
|
|
# The attachment still exists as an unclaimed ArchivedAttachment
|
|
self.assert_exists(
|
|
attachment, has_file=True, has_attachment=False, has_archived_attachment=True
|
|
)
|
|
|
|
# Removing unclaimed attachments now cleans it out
|
|
do_delete_old_unclaimed_attachments(1)
|
|
self.assert_exists(
|
|
attachment, has_file=False, has_attachment=False, has_archived_attachment=False
|
|
)
|
|
|
|
def test_delete_one_message(self) -> None:
|
|
hamlet = self.example_user("hamlet")
|
|
attachment = self.make_attachment("text.txt")
|
|
|
|
# Send message referencing that message
|
|
self.subscribe(hamlet, "Denmark")
|
|
body = f"Some files here ...[zulip.txt](http://{hamlet.realm.host}/user_uploads/{attachment.path_id})"
|
|
first_message_id = self.send_stream_message(hamlet, "Denmark", body, "test")
|
|
second_message_id = self.send_stream_message(hamlet, "Denmark", body, "test")
|
|
|
|
# Delete the second message; this leaves an Attachment and an
|
|
# ArchivedAttachment, both associated with a message
|
|
do_delete_messages(
|
|
hamlet.realm, [Message.objects.get(id=first_message_id)], acting_user=None
|
|
)
|
|
self.assert_exists(
|
|
attachment, has_file=True, has_attachment=True, has_archived_attachment=True
|
|
)
|
|
|
|
# Removing unclaimed attachments leaves the file, since it is
|
|
# attached to an existing Attachment and ArchivedAttachment
|
|
# which have Messages and ArchivedMessages, respectively
|
|
do_delete_old_unclaimed_attachments(1)
|
|
self.assert_exists(
|
|
attachment, has_file=True, has_attachment=True, has_archived_attachment=True
|
|
)
|
|
|
|
# Purging the ArchivedMessage does not affect the Attachment
|
|
# or ArchivedAttachment
|
|
with self.settings(ARCHIVED_DATA_VACUUMING_DELAY_DAYS=0):
|
|
clean_archived_data()
|
|
self.assert_exists(
|
|
attachment, has_file=True, has_attachment=True, has_archived_attachment=True
|
|
)
|
|
|
|
# Removing unclaimed attachments still does nothing, because
|
|
# the ArchivedAttachment is protected by the existing
|
|
# Attachment.
|
|
do_delete_old_unclaimed_attachments(1)
|
|
self.assert_exists(
|
|
attachment, has_file=True, has_attachment=True, has_archived_attachment=True
|
|
)
|
|
|
|
# Deleting the other message now leaves just an ArchivedAttachment
|
|
do_delete_messages(
|
|
hamlet.realm, [Message.objects.get(id=second_message_id)], acting_user=None
|
|
)
|
|
self.assert_exists(
|
|
attachment, has_file=True, has_attachment=False, has_archived_attachment=True
|
|
)
|
|
|
|
# Cleaning out the archived message and purging unclaimed
|
|
# attachments now finally removes it.
|
|
with self.settings(ARCHIVED_DATA_VACUUMING_DELAY_DAYS=0):
|
|
clean_archived_data()
|
|
do_delete_old_unclaimed_attachments(1)
|
|
self.assert_exists(
|
|
attachment, has_file=False, has_attachment=False, has_archived_attachment=False
|
|
)
|
|
|
|
def test_delete_with_scheduled_messages(self) -> None:
|
|
hamlet = self.example_user("hamlet")
|
|
attachment = self.make_attachment("text.txt")
|
|
|
|
# Schedule a future send with the attachment
|
|
self.subscribe(hamlet, "Denmark")
|
|
body = f"Some files here ...[zulip.txt](http://{hamlet.realm.host}/user_uploads/{attachment.path_id})"
|
|
scheduled_message_id = check_schedule_message(
|
|
hamlet,
|
|
get_client("website"),
|
|
"stream",
|
|
[self.get_stream_id("Denmark")],
|
|
"Test topic",
|
|
body,
|
|
timezone_now() + timedelta(days=365),
|
|
hamlet.realm,
|
|
)
|
|
self.assert_exists(
|
|
attachment, has_file=True, has_attachment=True, has_archived_attachment=False
|
|
)
|
|
|
|
# The ScheduledMessage protects the attachment from being removed
|
|
do_delete_old_unclaimed_attachments(1)
|
|
self.assert_exists(
|
|
attachment, has_file=True, has_attachment=True, has_archived_attachment=False
|
|
)
|
|
|
|
# Deleting the ScheduledMessage leaves the attachment dangling
|
|
delete_scheduled_message(hamlet, scheduled_message_id)
|
|
self.assert_exists(
|
|
attachment, has_file=True, has_attachment=True, has_archived_attachment=False
|
|
)
|
|
|
|
# Having no referents, it is now a target for removal
|
|
do_delete_old_unclaimed_attachments(1)
|
|
self.assert_exists(
|
|
attachment, has_file=False, has_attachment=False, has_archived_attachment=False
|
|
)
|
|
|
|
def test_delete_with_scheduled_message_and_archive(self) -> None:
|
|
hamlet = self.example_user("hamlet")
|
|
attachment = self.make_attachment("text.txt")
|
|
|
|
# Schedule a message, and also send one now
|
|
self.subscribe(hamlet, "Denmark")
|
|
body = f"Some files here ...[zulip.txt](http://{hamlet.realm.host}/user_uploads/{attachment.path_id})"
|
|
scheduled_message_id = check_schedule_message(
|
|
hamlet,
|
|
get_client("website"),
|
|
"stream",
|
|
[self.get_stream_id("Denmark")],
|
|
"Test topic",
|
|
body,
|
|
timezone_now() + timedelta(days=365),
|
|
hamlet.realm,
|
|
)
|
|
sent_message_id = self.send_stream_message(hamlet, "Denmark", body, "test")
|
|
self.assert_exists(
|
|
attachment, has_file=True, has_attachment=True, has_archived_attachment=False
|
|
)
|
|
|
|
# Deleting the sent message leaves us with an Attachment
|
|
# attached to the scheduled message, and an archived
|
|
# attachment with an archived message
|
|
do_delete_messages(
|
|
hamlet.realm, [Message.objects.get(id=sent_message_id)], acting_user=None
|
|
)
|
|
self.assert_exists(
|
|
attachment, has_file=True, has_attachment=True, has_archived_attachment=True
|
|
)
|
|
|
|
# Expiring the archived message leaves a dangling
|
|
# ArchivedAttachment and a protected Attachment
|
|
with self.settings(ARCHIVED_DATA_VACUUMING_DELAY_DAYS=0):
|
|
clean_archived_data()
|
|
self.assert_exists(
|
|
attachment, has_file=True, has_attachment=True, has_archived_attachment=True
|
|
)
|
|
|
|
# Removing unclaimed attachments deletes nothing, since the
|
|
# the ArchivedAttachment is protected by the Attachment which
|
|
# is still protected by the scheduled message
|
|
do_delete_old_unclaimed_attachments(1)
|
|
self.assert_exists(
|
|
attachment, has_file=True, has_attachment=True, has_archived_attachment=True
|
|
)
|
|
|
|
# Deleting the ScheduledMessage leaves the attachment fully dangling
|
|
delete_scheduled_message(hamlet, scheduled_message_id)
|
|
self.assert_exists(
|
|
attachment, has_file=True, has_attachment=True, has_archived_attachment=True
|
|
)
|
|
|
|
# Having no referents, it is now a target for removal
|
|
do_delete_old_unclaimed_attachments(1)
|
|
self.assert_exists(
|
|
attachment, has_file=False, has_attachment=False, has_archived_attachment=False
|
|
)
|
|
|
|
def test_delete_with_unscheduled_message_and_archive(self) -> None:
|
|
# This is subtly different from the test above -- we delete
|
|
# the scheduled message first, which is the only way to get an
|
|
# Attachment with not referents as well as an
|
|
# ArchivedAttachment which does have references. Normally,
|
|
# the process of archiving prunes Attachments which have no
|
|
# references.
|
|
hamlet = self.example_user("hamlet")
|
|
attachment = self.make_attachment("text.txt")
|
|
|
|
# Schedule a message, and also send one now
|
|
self.subscribe(hamlet, "Denmark")
|
|
body = f"Some files here ...[zulip.txt](http://{hamlet.realm.host}/user_uploads/{attachment.path_id})"
|
|
scheduled_message_id = check_schedule_message(
|
|
hamlet,
|
|
get_client("website"),
|
|
"stream",
|
|
[self.get_stream_id("Denmark")],
|
|
"Test topic",
|
|
body,
|
|
timezone_now() + timedelta(days=365),
|
|
hamlet.realm,
|
|
)
|
|
sent_message_id = self.send_stream_message(hamlet, "Denmark", body, "test")
|
|
self.assert_exists(
|
|
attachment, has_file=True, has_attachment=True, has_archived_attachment=False
|
|
)
|
|
|
|
# Delete the message and then unschedule the scheduled message
|
|
# before expiring the ArchivedMessages.
|
|
do_delete_messages(
|
|
hamlet.realm, [Message.objects.get(id=sent_message_id)], acting_user=None
|
|
)
|
|
delete_scheduled_message(hamlet, scheduled_message_id)
|
|
self.assert_exists(
|
|
attachment, has_file=True, has_attachment=True, has_archived_attachment=True
|
|
)
|
|
|
|
# Attempting to expire unclaimed attachments leaves the
|
|
# unreferenced Attachment which is protected by the
|
|
# ArchivedAttachment which has archived messages referencing
|
|
# it.
|
|
do_delete_old_unclaimed_attachments(1)
|
|
self.assert_exists(
|
|
attachment, has_file=True, has_attachment=True, has_archived_attachment=True
|
|
)
|
|
|
|
# Expiring archived messages leaves us with a dangling
|
|
# Attachment and ArchivedAttachment, with neither having
|
|
# referents.
|
|
with self.settings(ARCHIVED_DATA_VACUUMING_DELAY_DAYS=0):
|
|
clean_archived_data()
|
|
self.assert_exists(
|
|
attachment, has_file=True, has_attachment=True, has_archived_attachment=True
|
|
)
|
|
|
|
# Having no referents in either place, it is now a target for
|
|
# removal
|
|
do_delete_old_unclaimed_attachments(1)
|
|
self.assert_exists(
|
|
attachment, has_file=False, has_attachment=False, has_archived_attachment=False
|
|
)
|
|
|
|
def test_delete_batch_size(self) -> None:
|
|
# 3 attachments, each of which has 2 files because of the thumbnail
|
|
thumbnail_format = ThumbnailFormat("webp", 100, 75, animated=False)
|
|
with self.thumbnail_formats(thumbnail_format), self.captureOnCommitCallbacks(execute=True):
|
|
attachments = [self.make_attachment("img.png") for _ in range(3)]
|
|
|
|
with (
|
|
patch("zerver.actions.uploads.DELETE_BATCH_SIZE", 5),
|
|
patch("zerver.actions.uploads.delete_message_attachments") as delete_mock,
|
|
):
|
|
do_delete_old_unclaimed_attachments(1)
|
|
|
|
# We expect all of the 5 attachments to be deleted, across two
|
|
# different calls of 5- and 1-element lists. Since each image
|
|
# attachment is two files, this means that the thumbnail and
|
|
# its original is split across batches.
|
|
self.assertEqual(delete_mock.call_count, 2)
|
|
self.assert_length(delete_mock.call_args_list[0][0][0], 5)
|
|
self.assert_length(delete_mock.call_args_list[1][0][0], 1)
|
|
|
|
deleted = set(delete_mock.call_args_list[0][0][0] + delete_mock.call_args_list[1][0][0])
|
|
existing = set()
|
|
for attachment in attachments:
|
|
existing.add(attachment.path_id)
|
|
existing.add(f"thumbnail/{attachment.path_id}/{thumbnail_format!s}")
|
|
self.assertEqual(deleted, existing)
|
|
|
|
def test_delete_batch_size_archived(self) -> None:
|
|
hamlet = self.example_user("hamlet")
|
|
attachments = [self.make_attachment("text.txt") for _ in range(20)]
|
|
|
|
# Send message referencing 10/20 of those attachments
|
|
self.subscribe(hamlet, "Denmark")
|
|
body = "Some files here\n" + "\n".join(
|
|
f"[a](http://{hamlet.realm.host}/user_uploads/{attachment.path_id}"
|
|
for attachment in attachments[:10]
|
|
)
|
|
message_id = self.send_stream_message(hamlet, "Denmark", body, "test")
|
|
|
|
# Delete and purge the message, leaving both the ArchivedAttachments dangling
|
|
do_delete_messages(hamlet.realm, [Message.objects.get(id=message_id)], acting_user=None)
|
|
with self.settings(ARCHIVED_DATA_VACUUMING_DELAY_DAYS=0):
|
|
clean_archived_data()
|
|
|
|
# Removing unclaimed attachments now cleans them all out
|
|
with (
|
|
patch("zerver.actions.uploads.DELETE_BATCH_SIZE", 6),
|
|
patch("zerver.actions.uploads.delete_message_attachments") as delete_mock,
|
|
):
|
|
do_delete_old_unclaimed_attachments(1)
|
|
|
|
# We expect all of the 20 attachments (10 of which are
|
|
# ArchivedAttachments) to be deleted, across four different
|
|
# calls: 6, 6, 6, 2
|
|
self.assertEqual(delete_mock.call_count, 4)
|
|
self.assert_length(delete_mock.call_args_list[0][0][0], 6)
|
|
self.assert_length(delete_mock.call_args_list[1][0][0], 6)
|
|
self.assert_length(delete_mock.call_args_list[2][0][0], 6)
|
|
self.assert_length(delete_mock.call_args_list[3][0][0], 2)
|
|
|
|
deleted_path_ids = {elem for call in delete_mock.call_args_list for elem in call[0][0]}
|
|
self.assertEqual(
|
|
deleted_path_ids,
|
|
{attachment.path_id for attachment in attachments},
|
|
)
|