From 704423787b63de99547a0a0c1166ee9a1c87f1fe Mon Sep 17 00:00:00 2001 From: Alex Vandiver Date: Wed, 28 Aug 2024 18:47:51 +0000 Subject: [PATCH] do_delete_old_unclaimed_attachments: Cap deletions at batch size. Since each loop may add more than one file to the `storage_paths` list, this may result in more than 1000 files being sent to delete_message_attachments. Since the S3 backend only supports 1000 elements being deleted at once, we must partition the list into chunks which are no more than 1000 elements long. --- zerver/actions/uploads.py | 9 +++--- .../test_delete_unclaimed_attachments.py | 28 ++++++++++++------- 2 files changed, 23 insertions(+), 14 deletions(-) diff --git a/zerver/actions/uploads.py b/zerver/actions/uploads.py index 1076b46674..1e49eae90b 100644 --- a/zerver/actions/uploads.py +++ b/zerver/actions/uploads.py @@ -94,8 +94,8 @@ def do_delete_old_unclaimed_attachments(weeks_ago: int) -> None: already_removed.add(attachment.path_id) attachment.delete() if len(storage_paths) >= DELETE_BATCH_SIZE: - delete_message_attachments(storage_paths) - storage_paths = [] + delete_message_attachments(storage_paths[:DELETE_BATCH_SIZE]) + storage_paths = storage_paths[DELETE_BATCH_SIZE:] for archived_attachment in old_unclaimed_archived_attachments: if archived_attachment.path_id not in already_removed: storage_paths.append(archived_attachment.path_id) @@ -107,8 +107,9 @@ def do_delete_old_unclaimed_attachments(weeks_ago: int) -> None: image_row.delete() archived_attachment.delete() if len(storage_paths) >= DELETE_BATCH_SIZE: - delete_message_attachments(storage_paths) - storage_paths = [] + delete_message_attachments(storage_paths[:DELETE_BATCH_SIZE]) + storage_paths = storage_paths[DELETE_BATCH_SIZE:] + if storage_paths: delete_message_attachments(storage_paths) diff --git a/zerver/tests/test_delete_unclaimed_attachments.py b/zerver/tests/test_delete_unclaimed_attachments.py index e0a8bebd12..4e12100cda 100644 --- a/zerver/tests/test_delete_unclaimed_attachments.py +++ b/zerver/tests/test_delete_unclaimed_attachments.py @@ -13,6 +13,7 @@ from zerver.actions.uploads import do_delete_old_unclaimed_attachments from zerver.lib.retention import clean_archived_data from zerver.lib.test_classes import UploadSerializeMixin, ZulipTestCase from zerver.lib.test_helpers import get_test_image_file +from zerver.lib.thumbnail import ThumbnailFormat from zerver.models import ArchivedAttachment, Attachment, Message, UserProfile from zerver.models.clients import get_client @@ -401,24 +402,31 @@ class UnclaimedAttachmentTest(UploadSerializeMixin, ZulipTestCase): ) def test_delete_batch_size(self) -> None: - attachments = [self.make_attachment("text.txt") for _ in range(10)] + # 3 attachments, each of which has 2 files because of the thumbnail + thumbnail_format = ThumbnailFormat("webp", 100, 75, animated=False) + with self.thumbnail_formats(thumbnail_format), self.captureOnCommitCallbacks(execute=True): + attachments = [self.make_attachment("img.png") for _ in range(3)] with ( - patch("zerver.actions.uploads.DELETE_BATCH_SIZE", 6), + patch("zerver.actions.uploads.DELETE_BATCH_SIZE", 5), patch("zerver.actions.uploads.delete_message_attachments") as delete_mock, ): do_delete_old_unclaimed_attachments(1) - # We expect all of the 10 attachments to be deleted, - # across two different calls of 6- and 4-element lists + # We expect all of the 5 attachments to be deleted, across two + # different calls of 5- and 1-element lists. Since each image + # attachment is two files, this means that the thumbnail and + # its original is split across batches. self.assertEqual(delete_mock.call_count, 2) - self.assert_length(delete_mock.call_args_list[0][0][0], 6) - self.assert_length(delete_mock.call_args_list[1][0][0], 4) + self.assert_length(delete_mock.call_args_list[0][0][0], 5) + self.assert_length(delete_mock.call_args_list[1][0][0], 1) - self.assertEqual( - set(delete_mock.call_args_list[0][0][0] + delete_mock.call_args_list[1][0][0]), - {attachment.path_id for attachment in attachments}, - ) + deleted = set(delete_mock.call_args_list[0][0][0] + delete_mock.call_args_list[1][0][0]) + existing = set() + for attachment in attachments: + existing.add(attachment.path_id) + existing.add(f"thumbnail/{attachment.path_id}/{thumbnail_format!s}") + self.assertEqual(deleted, existing) def test_delete_batch_size_archived(self) -> None: hamlet = self.example_user("hamlet")