do_delete_old_unclaimed_attachments: Cap deletions at batch size.

Since each loop may add more than one file to the `storage_paths`
list, this may result in more than 1000 files being sent to
delete_message_attachments.  Since the S3 backend only supports 1000
elements being deleted at once, we must partition the list into chunks
which are no more than 1000 elements long.
This commit is contained in:
Alex Vandiver 2024-08-28 18:47:51 +00:00 committed by Tim Abbott
parent 3d6dcf8fe5
commit 704423787b
2 changed files with 23 additions and 14 deletions

View File

@ -94,8 +94,8 @@ def do_delete_old_unclaimed_attachments(weeks_ago: int) -> None:
already_removed.add(attachment.path_id) already_removed.add(attachment.path_id)
attachment.delete() attachment.delete()
if len(storage_paths) >= DELETE_BATCH_SIZE: if len(storage_paths) >= DELETE_BATCH_SIZE:
delete_message_attachments(storage_paths) delete_message_attachments(storage_paths[:DELETE_BATCH_SIZE])
storage_paths = [] storage_paths = storage_paths[DELETE_BATCH_SIZE:]
for archived_attachment in old_unclaimed_archived_attachments: for archived_attachment in old_unclaimed_archived_attachments:
if archived_attachment.path_id not in already_removed: if archived_attachment.path_id not in already_removed:
storage_paths.append(archived_attachment.path_id) storage_paths.append(archived_attachment.path_id)
@ -107,8 +107,9 @@ def do_delete_old_unclaimed_attachments(weeks_ago: int) -> None:
image_row.delete() image_row.delete()
archived_attachment.delete() archived_attachment.delete()
if len(storage_paths) >= DELETE_BATCH_SIZE: if len(storage_paths) >= DELETE_BATCH_SIZE:
delete_message_attachments(storage_paths) delete_message_attachments(storage_paths[:DELETE_BATCH_SIZE])
storage_paths = [] storage_paths = storage_paths[DELETE_BATCH_SIZE:]
if storage_paths: if storage_paths:
delete_message_attachments(storage_paths) delete_message_attachments(storage_paths)

View File

@ -13,6 +13,7 @@ from zerver.actions.uploads import do_delete_old_unclaimed_attachments
from zerver.lib.retention import clean_archived_data from zerver.lib.retention import clean_archived_data
from zerver.lib.test_classes import UploadSerializeMixin, ZulipTestCase from zerver.lib.test_classes import UploadSerializeMixin, ZulipTestCase
from zerver.lib.test_helpers import get_test_image_file from zerver.lib.test_helpers import get_test_image_file
from zerver.lib.thumbnail import ThumbnailFormat
from zerver.models import ArchivedAttachment, Attachment, Message, UserProfile from zerver.models import ArchivedAttachment, Attachment, Message, UserProfile
from zerver.models.clients import get_client from zerver.models.clients import get_client
@ -401,24 +402,31 @@ class UnclaimedAttachmentTest(UploadSerializeMixin, ZulipTestCase):
) )
def test_delete_batch_size(self) -> None: def test_delete_batch_size(self) -> None:
attachments = [self.make_attachment("text.txt") for _ in range(10)] # 3 attachments, each of which has 2 files because of the thumbnail
thumbnail_format = ThumbnailFormat("webp", 100, 75, animated=False)
with self.thumbnail_formats(thumbnail_format), self.captureOnCommitCallbacks(execute=True):
attachments = [self.make_attachment("img.png") for _ in range(3)]
with ( with (
patch("zerver.actions.uploads.DELETE_BATCH_SIZE", 6), patch("zerver.actions.uploads.DELETE_BATCH_SIZE", 5),
patch("zerver.actions.uploads.delete_message_attachments") as delete_mock, patch("zerver.actions.uploads.delete_message_attachments") as delete_mock,
): ):
do_delete_old_unclaimed_attachments(1) do_delete_old_unclaimed_attachments(1)
# We expect all of the 10 attachments to be deleted, # We expect all of the 5 attachments to be deleted, across two
# across two different calls of 6- and 4-element lists # different calls of 5- and 1-element lists. Since each image
# attachment is two files, this means that the thumbnail and
# its original is split across batches.
self.assertEqual(delete_mock.call_count, 2) self.assertEqual(delete_mock.call_count, 2)
self.assert_length(delete_mock.call_args_list[0][0][0], 6) self.assert_length(delete_mock.call_args_list[0][0][0], 5)
self.assert_length(delete_mock.call_args_list[1][0][0], 4) self.assert_length(delete_mock.call_args_list[1][0][0], 1)
self.assertEqual( deleted = set(delete_mock.call_args_list[0][0][0] + delete_mock.call_args_list[1][0][0])
set(delete_mock.call_args_list[0][0][0] + delete_mock.call_args_list[1][0][0]), existing = set()
{attachment.path_id for attachment in attachments}, for attachment in attachments:
) existing.add(attachment.path_id)
existing.add(f"thumbnail/{attachment.path_id}/{thumbnail_format!s}")
self.assertEqual(deleted, existing)
def test_delete_batch_size_archived(self) -> None: def test_delete_batch_size_archived(self) -> None:
hamlet = self.example_user("hamlet") hamlet = self.example_user("hamlet")