zulip/zerver/management/commands/delete_old_unclaimed_attach...

103 lines
4.1 KiB
Python
Raw Normal View History

from argparse import ArgumentParser
from datetime import timedelta
from typing import Any
from django.core.management.base import CommandError
from django.utils.timezone import now as timezone_now
from typing_extensions import override
from zerver.actions.uploads import do_delete_old_unclaimed_attachments
from zerver.lib.attachments import get_old_unclaimed_attachments
from zerver.lib.management import ZulipBaseCommand, abort_unless_locked
from zerver.lib.thumbnail import split_thumbnail_path
from zerver.lib.upload import all_message_attachments, delete_message_attachments
from zerver.models import ArchivedAttachment, Attachment
class Command(ZulipBaseCommand):
help = """Remove unclaimed attachments from storage older than a supplied
numerical value indicating the limit of how old the attachment can be.
The default is five weeks."""
@override
def add_arguments(self, parser: ArgumentParser) -> None:
parser.add_argument(
"-w",
"--weeks",
dest="delta_weeks",
default=5,
type=int,
help="How long unattached attachments are preserved; defaults to 5 weeks.",
)
parser.add_argument(
"-f",
"--for-real",
action="store_true",
help="Actually remove the files from the storage.",
)
parser.add_argument(
"-C",
"--clean-up-storage",
action="store_true",
help="Examine all attachments in storage (local disk or S3) and remove "
"any files which are not in the database. This may take a very long time!",
)
@override
@abort_unless_locked
def handle(self, *args: Any, **options: Any) -> None:
delta_weeks = options["delta_weeks"]
print(f"Deleting unclaimed attached files older than {delta_weeks} weeks")
# print the list of files that are going to be removed
old_attachments, old_archived_attachments = get_old_unclaimed_attachments(delta_weeks)
for old_attachment in old_attachments:
print(f"* {old_attachment.file_name} created at {old_attachment.create_time}")
for old_archived_attachment in old_archived_attachments:
print(
f"* {old_archived_attachment.file_name} created at {old_archived_attachment.create_time}"
)
if options["for_real"]:
do_delete_old_unclaimed_attachments(delta_weeks)
print()
print("Unclaimed files deleted.")
if options["clean_up_storage"]:
print()
self.clean_attachment_upload_backend(dry_run=not options["for_real"])
if not options["for_real"]:
print()
raise CommandError("This was a dry run. Pass -f to actually delete.")
def clean_attachment_upload_backend(self, dry_run: bool = True) -> None:
cutoff = timezone_now() - timedelta(minutes=5)
print(f"Removing extra files in storage black-end older than {cutoff.isoformat()}")
to_delete = []
for file_path, modified_at in all_message_attachments(include_thumbnails=True):
if file_path.startswith("thumbnail/"):
path_id = split_thumbnail_path(file_path)[0]
else:
path_id = file_path
if Attachment.objects.filter(path_id=path_id).exists():
continue
if ArchivedAttachment.objects.filter(path_id=path_id).exists():
continue
if modified_at > cutoff:
# We upload files to the backend storage and _then_
# make the database entry, so must give some leeway to
# recently-added files which do not have DB rows.
continue
print(f"* {file_path} modified at {modified_at}")
if dry_run:
continue
to_delete.append(file_path)
if len(to_delete) > 1000:
delete_message_attachments(to_delete)
to_delete = []
if not dry_run and len(to_delete) > 0:
delete_message_attachments(to_delete)