mirror of https://github.com/zulip/zulip.git
do_delete_old_unclaimed_attachments: Cap deletions at batch size.
Since each loop may add more than one file to the `storage_paths` list, this may result in more than 1000 files being sent to delete_message_attachments. Since the S3 backend only supports 1000 elements being deleted at once, we must partition the list into chunks which are no more than 1000 elements long.
This commit is contained in:
parent
3d6dcf8fe5
commit
704423787b
|
@ -94,8 +94,8 @@ def do_delete_old_unclaimed_attachments(weeks_ago: int) -> None:
|
||||||
already_removed.add(attachment.path_id)
|
already_removed.add(attachment.path_id)
|
||||||
attachment.delete()
|
attachment.delete()
|
||||||
if len(storage_paths) >= DELETE_BATCH_SIZE:
|
if len(storage_paths) >= DELETE_BATCH_SIZE:
|
||||||
delete_message_attachments(storage_paths)
|
delete_message_attachments(storage_paths[:DELETE_BATCH_SIZE])
|
||||||
storage_paths = []
|
storage_paths = storage_paths[DELETE_BATCH_SIZE:]
|
||||||
for archived_attachment in old_unclaimed_archived_attachments:
|
for archived_attachment in old_unclaimed_archived_attachments:
|
||||||
if archived_attachment.path_id not in already_removed:
|
if archived_attachment.path_id not in already_removed:
|
||||||
storage_paths.append(archived_attachment.path_id)
|
storage_paths.append(archived_attachment.path_id)
|
||||||
|
@ -107,8 +107,9 @@ def do_delete_old_unclaimed_attachments(weeks_ago: int) -> None:
|
||||||
image_row.delete()
|
image_row.delete()
|
||||||
archived_attachment.delete()
|
archived_attachment.delete()
|
||||||
if len(storage_paths) >= DELETE_BATCH_SIZE:
|
if len(storage_paths) >= DELETE_BATCH_SIZE:
|
||||||
delete_message_attachments(storage_paths)
|
delete_message_attachments(storage_paths[:DELETE_BATCH_SIZE])
|
||||||
storage_paths = []
|
storage_paths = storage_paths[DELETE_BATCH_SIZE:]
|
||||||
|
|
||||||
if storage_paths:
|
if storage_paths:
|
||||||
delete_message_attachments(storage_paths)
|
delete_message_attachments(storage_paths)
|
||||||
|
|
||||||
|
|
|
@ -13,6 +13,7 @@ from zerver.actions.uploads import do_delete_old_unclaimed_attachments
|
||||||
from zerver.lib.retention import clean_archived_data
|
from zerver.lib.retention import clean_archived_data
|
||||||
from zerver.lib.test_classes import UploadSerializeMixin, ZulipTestCase
|
from zerver.lib.test_classes import UploadSerializeMixin, ZulipTestCase
|
||||||
from zerver.lib.test_helpers import get_test_image_file
|
from zerver.lib.test_helpers import get_test_image_file
|
||||||
|
from zerver.lib.thumbnail import ThumbnailFormat
|
||||||
from zerver.models import ArchivedAttachment, Attachment, Message, UserProfile
|
from zerver.models import ArchivedAttachment, Attachment, Message, UserProfile
|
||||||
from zerver.models.clients import get_client
|
from zerver.models.clients import get_client
|
||||||
|
|
||||||
|
@ -401,24 +402,31 @@ class UnclaimedAttachmentTest(UploadSerializeMixin, ZulipTestCase):
|
||||||
)
|
)
|
||||||
|
|
||||||
def test_delete_batch_size(self) -> None:
|
def test_delete_batch_size(self) -> None:
|
||||||
attachments = [self.make_attachment("text.txt") for _ in range(10)]
|
# 3 attachments, each of which has 2 files because of the thumbnail
|
||||||
|
thumbnail_format = ThumbnailFormat("webp", 100, 75, animated=False)
|
||||||
|
with self.thumbnail_formats(thumbnail_format), self.captureOnCommitCallbacks(execute=True):
|
||||||
|
attachments = [self.make_attachment("img.png") for _ in range(3)]
|
||||||
|
|
||||||
with (
|
with (
|
||||||
patch("zerver.actions.uploads.DELETE_BATCH_SIZE", 6),
|
patch("zerver.actions.uploads.DELETE_BATCH_SIZE", 5),
|
||||||
patch("zerver.actions.uploads.delete_message_attachments") as delete_mock,
|
patch("zerver.actions.uploads.delete_message_attachments") as delete_mock,
|
||||||
):
|
):
|
||||||
do_delete_old_unclaimed_attachments(1)
|
do_delete_old_unclaimed_attachments(1)
|
||||||
|
|
||||||
# We expect all of the 10 attachments to be deleted,
|
# We expect all of the 5 attachments to be deleted, across two
|
||||||
# across two different calls of 6- and 4-element lists
|
# different calls of 5- and 1-element lists. Since each image
|
||||||
|
# attachment is two files, this means that the thumbnail and
|
||||||
|
# its original is split across batches.
|
||||||
self.assertEqual(delete_mock.call_count, 2)
|
self.assertEqual(delete_mock.call_count, 2)
|
||||||
self.assert_length(delete_mock.call_args_list[0][0][0], 6)
|
self.assert_length(delete_mock.call_args_list[0][0][0], 5)
|
||||||
self.assert_length(delete_mock.call_args_list[1][0][0], 4)
|
self.assert_length(delete_mock.call_args_list[1][0][0], 1)
|
||||||
|
|
||||||
self.assertEqual(
|
deleted = set(delete_mock.call_args_list[0][0][0] + delete_mock.call_args_list[1][0][0])
|
||||||
set(delete_mock.call_args_list[0][0][0] + delete_mock.call_args_list[1][0][0]),
|
existing = set()
|
||||||
{attachment.path_id for attachment in attachments},
|
for attachment in attachments:
|
||||||
)
|
existing.add(attachment.path_id)
|
||||||
|
existing.add(f"thumbnail/{attachment.path_id}/{thumbnail_format!s}")
|
||||||
|
self.assertEqual(deleted, existing)
|
||||||
|
|
||||||
def test_delete_batch_size_archived(self) -> None:
|
def test_delete_batch_size_archived(self) -> None:
|
||||||
hamlet = self.example_user("hamlet")
|
hamlet = self.example_user("hamlet")
|
||||||
|
|
Loading…
Reference in New Issue