2023-02-27 20:55:33 +01:00
|
|
|
import datetime
|
2017-11-16 00:43:27 +01:00
|
|
|
from argparse import ArgumentParser
|
2016-06-04 16:52:18 +02:00
|
|
|
from typing import Any
|
|
|
|
|
2019-05-03 23:20:39 +02:00
|
|
|
from django.core.management.base import BaseCommand, CommandError
|
2023-02-27 20:55:33 +01:00
|
|
|
from django.utils.timezone import now as timezone_now
|
2016-03-24 20:24:01 +01:00
|
|
|
|
2022-04-14 23:43:26 +02:00
|
|
|
from zerver.actions.uploads import do_delete_old_unclaimed_attachments
|
2023-02-27 20:55:33 +01:00
|
|
|
from zerver.lib.upload import all_message_attachments, delete_message_attachments
|
|
|
|
from zerver.models import ArchivedAttachment, Attachment, get_old_unclaimed_attachments
|
2016-03-24 20:24:01 +01:00
|
|
|
|
2020-01-14 21:59:46 +01:00
|
|
|
|
2016-03-24 20:24:01 +01:00
|
|
|
class Command(BaseCommand):
|
|
|
|
help = """Remove unclaimed attachments from storage older than a supplied
|
|
|
|
numerical value indicating the limit of how old the attachment can be.
|
|
|
|
One week is taken as the default value."""
|
|
|
|
|
2017-10-26 11:35:57 +02:00
|
|
|
def add_arguments(self, parser: ArgumentParser) -> None:
|
2021-02-12 08:19:30 +01:00
|
|
|
parser.add_argument(
|
2021-02-12 08:20:45 +01:00
|
|
|
"-w",
|
|
|
|
"--weeks",
|
|
|
|
dest="delta_weeks",
|
2021-02-12 08:19:30 +01:00
|
|
|
default=5,
|
|
|
|
type=int,
|
|
|
|
help="Limiting value of how old the file can be.",
|
|
|
|
)
|
|
|
|
|
|
|
|
parser.add_argument(
|
2021-02-12 08:20:45 +01:00
|
|
|
"-f",
|
|
|
|
"--for-real",
|
|
|
|
action="store_true",
|
2021-02-12 08:19:30 +01:00
|
|
|
help="Actually remove the files from the storage.",
|
|
|
|
)
|
2016-03-24 20:24:01 +01:00
|
|
|
|
2023-02-27 20:55:33 +01:00
|
|
|
parser.add_argument(
|
|
|
|
"-C",
|
|
|
|
"--clean-up-storage",
|
|
|
|
action="store_true",
|
|
|
|
help="Examine all attachments in storage (local disk or S3) and remove "
|
|
|
|
"any files which are not in the database. This may take a very long time!",
|
|
|
|
)
|
|
|
|
|
2017-10-26 11:35:57 +02:00
|
|
|
def handle(self, *args: Any, **options: Any) -> None:
|
2021-02-12 08:20:45 +01:00
|
|
|
delta_weeks = options["delta_weeks"]
|
2020-06-10 06:41:04 +02:00
|
|
|
print(f"Deleting unclaimed attached files older than {delta_weeks} weeks")
|
2016-03-24 20:24:01 +01:00
|
|
|
|
|
|
|
# print the list of files that are going to be removed
|
2022-05-18 22:07:15 +02:00
|
|
|
old_attachments, old_archived_attachments = get_old_unclaimed_attachments(delta_weeks)
|
2016-03-24 20:24:01 +01:00
|
|
|
for old_attachment in old_attachments:
|
2020-06-10 06:41:04 +02:00
|
|
|
print(f"* {old_attachment.file_name} created at {old_attachment.create_time}")
|
2022-05-18 22:07:15 +02:00
|
|
|
for old_archived_attachment in old_archived_attachments:
|
|
|
|
print(
|
|
|
|
f"* {old_archived_attachment.file_name} created at {old_archived_attachment.create_time}"
|
|
|
|
)
|
2016-03-24 20:24:01 +01:00
|
|
|
|
2023-02-27 20:55:33 +01:00
|
|
|
if options["for_real"]:
|
|
|
|
do_delete_old_unclaimed_attachments(delta_weeks)
|
|
|
|
print("")
|
|
|
|
print("Unclaimed files deleted.")
|
|
|
|
|
|
|
|
if options["clean_up_storage"]:
|
|
|
|
print("")
|
|
|
|
self.clean_attachment_upload_backend(dry_run=not options["for_real"])
|
|
|
|
|
2016-03-24 20:24:01 +01:00
|
|
|
if not options["for_real"]:
|
2023-02-27 20:55:33 +01:00
|
|
|
print("")
|
2019-05-03 23:20:39 +02:00
|
|
|
raise CommandError("This was a dry run. Pass -f to actually delete.")
|
2016-03-24 20:24:01 +01:00
|
|
|
|
2023-02-27 20:55:33 +01:00
|
|
|
def clean_attachment_upload_backend(self, dry_run: bool = True) -> None:
|
|
|
|
cutoff = timezone_now() - datetime.timedelta(minutes=5)
|
|
|
|
print(f"Removing extra files in storage black-end older than {cutoff.isoformat()}")
|
|
|
|
to_delete = []
|
|
|
|
for path_id, modified_at in all_message_attachments():
|
|
|
|
if Attachment.objects.filter(path_id=path_id).exists():
|
|
|
|
continue
|
|
|
|
if ArchivedAttachment.objects.filter(path_id=path_id).exists():
|
|
|
|
continue
|
|
|
|
if modified_at > cutoff:
|
|
|
|
# We upload files to the backend storage and _then_
|
|
|
|
# make the database entry, so must give some leeway to
|
|
|
|
# recently-added files which do not have DB rows.
|
|
|
|
continue
|
|
|
|
print(f"* {path_id} modified at {modified_at}")
|
|
|
|
if dry_run:
|
|
|
|
continue
|
|
|
|
to_delete.append(path_id)
|
|
|
|
if len(to_delete) > 1000:
|
|
|
|
delete_message_attachments(to_delete)
|
|
|
|
to_delete = []
|
|
|
|
if not dry_run and len(to_delete) > 0:
|
|
|
|
delete_message_attachments(to_delete)
|