mirror of https://github.com/zulip/zulip.git
retention: Prevent deletion of partially-archived messages.
Previously, this code: ```python3 old_archived_attachments = ArchivedAttachment.objects.annotate( has_other_messages=Exists( Attachment.objects.filter(id=OuterRef("id")) .exclude(messages=None) .exclude(scheduled_messages=None) ) ).filter(messages=None, create_time__lt=delta_weeks_ago, has_other_messages=False) ``` ...protected from removal any ArchivedAttachment objects where there was an Attachment which had _both_ a message _and_ a scheduled message, instead of _either_ a message _or_ a scheduled message. Since files are removed from disk when the ArchivedAttachment rows are deleted, this meant that if an upload was referenced in two messages, and one was deleted, the file was permanently deleted when the ArchivedMessage and ArchivedAttachment were cleaned up, despite being still referenced in live Messages and Attachments. Switch from `.exclude(messages=None).exclude(scheduled_messages=None)` to `.exclude(messages=None, scheduled_messages=None)` which "OR"s those conditions appropriately. Pull the relevant test into its own file, and expand it significantly to cover this, and other, corner cases.
This commit is contained in:
parent
0f918d9071
commit
b67108c8c6
|
@ -74,11 +74,17 @@ def do_delete_old_unclaimed_attachments(weeks_ago: int) -> None:
|
||||||
weeks_ago
|
weeks_ago
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# An attachment may be removed from Attachments and
|
||||||
|
# ArchiveAttachments in the same run; prevent warnings from the
|
||||||
|
# backing store by only removing it from there once.
|
||||||
|
already_removed = set()
|
||||||
for attachment in old_unclaimed_attachments:
|
for attachment in old_unclaimed_attachments:
|
||||||
delete_message_attachment(attachment.path_id)
|
delete_message_attachment(attachment.path_id)
|
||||||
|
already_removed.add(attachment.path_id)
|
||||||
attachment.delete()
|
attachment.delete()
|
||||||
for archived_attachment in old_unclaimed_archived_attachments:
|
for archived_attachment in old_unclaimed_archived_attachments:
|
||||||
delete_message_attachment(archived_attachment.path_id)
|
if archived_attachment.path_id not in already_removed:
|
||||||
|
delete_message_attachment(archived_attachment.path_id)
|
||||||
archived_attachment.delete()
|
archived_attachment.delete()
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -3739,9 +3739,9 @@ def get_old_unclaimed_attachments(
|
||||||
)
|
)
|
||||||
old_archived_attachments = ArchivedAttachment.objects.annotate(
|
old_archived_attachments = ArchivedAttachment.objects.annotate(
|
||||||
has_other_messages=Exists(
|
has_other_messages=Exists(
|
||||||
Attachment.objects.filter(id=OuterRef("id"))
|
Attachment.objects.filter(id=OuterRef("id")).exclude(
|
||||||
.exclude(messages=None)
|
messages=None, scheduled_messages=None
|
||||||
.exclude(scheduled_messages=None)
|
)
|
||||||
)
|
)
|
||||||
).filter(messages=None, create_time__lt=delta_weeks_ago, has_other_messages=False)
|
).filter(messages=None, create_time__lt=delta_weeks_ago, has_other_messages=False)
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,345 @@
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
from datetime import datetime, timedelta
|
||||||
|
from io import StringIO
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
import time_machine
|
||||||
|
from django.conf import settings
|
||||||
|
from django.utils.timezone import now as timezone_now
|
||||||
|
|
||||||
|
from zerver.actions.message_delete import do_delete_messages
|
||||||
|
from zerver.actions.scheduled_messages import check_schedule_message, delete_scheduled_message
|
||||||
|
from zerver.actions.uploads import do_delete_old_unclaimed_attachments
|
||||||
|
from zerver.lib.retention import clean_archived_data
|
||||||
|
from zerver.lib.test_classes import UploadSerializeMixin, ZulipTestCase
|
||||||
|
from zerver.models import (
|
||||||
|
ArchivedAttachment,
|
||||||
|
Attachment,
|
||||||
|
Message,
|
||||||
|
UserProfile,
|
||||||
|
get_client,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class UnclaimedAttachmentTest(UploadSerializeMixin, ZulipTestCase):
|
||||||
|
def make_attachment(
|
||||||
|
self, filename: str, when: Optional[datetime] = None, uploader: Optional[UserProfile] = None
|
||||||
|
) -> Attachment:
|
||||||
|
if when is None:
|
||||||
|
when = timezone_now() - timedelta(weeks=2)
|
||||||
|
if uploader is None:
|
||||||
|
uploader = self.example_user("hamlet")
|
||||||
|
self.login_user(uploader)
|
||||||
|
|
||||||
|
with time_machine.travel(when):
|
||||||
|
file_obj = StringIO("zulip!")
|
||||||
|
file_obj.name = filename
|
||||||
|
response = self.assert_json_success(
|
||||||
|
self.client_post("/json/user_uploads", {"file": file_obj})
|
||||||
|
)
|
||||||
|
path_id = re.sub("/user_uploads/", "", response["uri"])
|
||||||
|
return Attachment.objects.get(path_id=path_id)
|
||||||
|
|
||||||
|
def assert_exists(
|
||||||
|
self,
|
||||||
|
attachment: Attachment,
|
||||||
|
*,
|
||||||
|
has_file: bool,
|
||||||
|
has_attachment: bool,
|
||||||
|
has_archived_attachment: bool,
|
||||||
|
) -> None:
|
||||||
|
assert settings.LOCAL_FILES_DIR
|
||||||
|
self.assertEqual( # File existence on disk
|
||||||
|
os.path.isfile(os.path.join(settings.LOCAL_FILES_DIR, attachment.path_id)), has_file
|
||||||
|
)
|
||||||
|
self.assertEqual( # Attachment row
|
||||||
|
Attachment.objects.filter(id=attachment.id).exists(), has_attachment
|
||||||
|
)
|
||||||
|
self.assertEqual( # ArchivedAttachment row
|
||||||
|
ArchivedAttachment.objects.filter(id=attachment.id).exists(), has_archived_attachment
|
||||||
|
)
|
||||||
|
|
||||||
|
def test_delete_unused_upload(self) -> None:
|
||||||
|
unused_attachment = self.make_attachment("unused.txt")
|
||||||
|
self.assert_exists(
|
||||||
|
unused_attachment, has_file=True, has_attachment=True, has_archived_attachment=False
|
||||||
|
)
|
||||||
|
|
||||||
|
# If we have 3 weeks of grace, nothing happens
|
||||||
|
do_delete_old_unclaimed_attachments(3)
|
||||||
|
self.assert_exists(
|
||||||
|
unused_attachment, has_file=True, has_attachment=True, has_archived_attachment=False
|
||||||
|
)
|
||||||
|
|
||||||
|
# If we have 1 weeks of grace, the Attachment is deleted, and so is the file on disk
|
||||||
|
do_delete_old_unclaimed_attachments(1)
|
||||||
|
self.assert_exists(
|
||||||
|
unused_attachment, has_file=False, has_attachment=False, has_archived_attachment=False
|
||||||
|
)
|
||||||
|
|
||||||
|
def test_delete_used_upload(self) -> None:
|
||||||
|
hamlet = self.example_user("hamlet")
|
||||||
|
attachment = self.make_attachment("used.txt")
|
||||||
|
|
||||||
|
# Send message referencing that message
|
||||||
|
self.subscribe(hamlet, "Denmark")
|
||||||
|
body = f"Some files here ...[zulip.txt](http://{hamlet.realm.host}/user_uploads/{attachment.path_id})"
|
||||||
|
self.send_stream_message(hamlet, "Denmark", body, "test")
|
||||||
|
|
||||||
|
# Because the message is claimed, it is not removed
|
||||||
|
do_delete_old_unclaimed_attachments(1)
|
||||||
|
self.assert_exists(
|
||||||
|
attachment, has_file=True, has_attachment=True, has_archived_attachment=False
|
||||||
|
)
|
||||||
|
|
||||||
|
def test_delete_upload_archived_message(self) -> None:
|
||||||
|
hamlet = self.example_user("hamlet")
|
||||||
|
attachment = self.make_attachment("used.txt")
|
||||||
|
|
||||||
|
# Send message referencing that message
|
||||||
|
self.subscribe(hamlet, "Denmark")
|
||||||
|
body = f"Some files here ...[zulip.txt](http://{hamlet.realm.host}/user_uploads/{attachment.path_id})"
|
||||||
|
message_id = self.send_stream_message(hamlet, "Denmark", body, "test")
|
||||||
|
|
||||||
|
# Delete that message; this moves it to ArchivedAttachment but leaves the file on disk
|
||||||
|
do_delete_messages(hamlet.realm, [Message.objects.get(id=message_id)])
|
||||||
|
self.assert_exists(
|
||||||
|
attachment, has_file=True, has_attachment=False, has_archived_attachment=True
|
||||||
|
)
|
||||||
|
|
||||||
|
# Removing unclaimed attachments leaves the file, since it is
|
||||||
|
# attached to an existing ArchivedAttachment
|
||||||
|
do_delete_old_unclaimed_attachments(1)
|
||||||
|
self.assert_exists(
|
||||||
|
attachment, has_file=True, has_attachment=False, has_archived_attachment=True
|
||||||
|
)
|
||||||
|
|
||||||
|
# Now purge the ArchivedMessage
|
||||||
|
with self.settings(ARCHIVED_DATA_VACUUMING_DELAY_DAYS=0):
|
||||||
|
clean_archived_data()
|
||||||
|
|
||||||
|
# The attachment still exists as an unclaimed ArchivedAttachment
|
||||||
|
self.assert_exists(
|
||||||
|
attachment, has_file=True, has_attachment=False, has_archived_attachment=True
|
||||||
|
)
|
||||||
|
|
||||||
|
# Removing unclaimed attachments now cleans it out
|
||||||
|
do_delete_old_unclaimed_attachments(1)
|
||||||
|
self.assert_exists(
|
||||||
|
attachment, has_file=False, has_attachment=False, has_archived_attachment=False
|
||||||
|
)
|
||||||
|
|
||||||
|
def test_delete_one_message(self) -> None:
|
||||||
|
hamlet = self.example_user("hamlet")
|
||||||
|
attachment = self.make_attachment("used.txt")
|
||||||
|
|
||||||
|
# Send message referencing that message
|
||||||
|
self.subscribe(hamlet, "Denmark")
|
||||||
|
body = f"Some files here ...[zulip.txt](http://{hamlet.realm.host}/user_uploads/{attachment.path_id})"
|
||||||
|
first_message_id = self.send_stream_message(hamlet, "Denmark", body, "test")
|
||||||
|
second_message_id = self.send_stream_message(hamlet, "Denmark", body, "test")
|
||||||
|
|
||||||
|
# Delete the second message; this leaves an Attachment and an
|
||||||
|
# ArchivedAttachment, both associated with a message
|
||||||
|
do_delete_messages(hamlet.realm, [Message.objects.get(id=first_message_id)])
|
||||||
|
self.assert_exists(
|
||||||
|
attachment, has_file=True, has_attachment=True, has_archived_attachment=True
|
||||||
|
)
|
||||||
|
|
||||||
|
# Removing unclaimed attachments leaves the file, since it is
|
||||||
|
# attached to an existing Attachment and ArchivedAttachment
|
||||||
|
# which have Messages and ArchivedMessages, respectively
|
||||||
|
do_delete_old_unclaimed_attachments(1)
|
||||||
|
self.assert_exists(
|
||||||
|
attachment, has_file=True, has_attachment=True, has_archived_attachment=True
|
||||||
|
)
|
||||||
|
|
||||||
|
# Purging the ArchivedMessage does not affect the Attachment
|
||||||
|
# or ArchivedAttachment
|
||||||
|
with self.settings(ARCHIVED_DATA_VACUUMING_DELAY_DAYS=0):
|
||||||
|
clean_archived_data()
|
||||||
|
self.assert_exists(
|
||||||
|
attachment, has_file=True, has_attachment=True, has_archived_attachment=True
|
||||||
|
)
|
||||||
|
|
||||||
|
# Removing unclaimed attachments still does nothing, because
|
||||||
|
# the ArchivedAttachment is protected by the existing
|
||||||
|
# Attachment.
|
||||||
|
do_delete_old_unclaimed_attachments(1)
|
||||||
|
self.assert_exists(
|
||||||
|
attachment, has_file=True, has_attachment=True, has_archived_attachment=True
|
||||||
|
)
|
||||||
|
|
||||||
|
# Deleting the other message now leaves just an ArchivedAttachment
|
||||||
|
do_delete_messages(hamlet.realm, [Message.objects.get(id=second_message_id)])
|
||||||
|
self.assert_exists(
|
||||||
|
attachment, has_file=True, has_attachment=False, has_archived_attachment=True
|
||||||
|
)
|
||||||
|
|
||||||
|
# Cleaning out the archived message and purging unclaimed
|
||||||
|
# attachments now finally removes it.
|
||||||
|
with self.settings(ARCHIVED_DATA_VACUUMING_DELAY_DAYS=0):
|
||||||
|
clean_archived_data()
|
||||||
|
do_delete_old_unclaimed_attachments(1)
|
||||||
|
self.assert_exists(
|
||||||
|
attachment, has_file=False, has_attachment=False, has_archived_attachment=False
|
||||||
|
)
|
||||||
|
|
||||||
|
def test_delete_with_scheduled_messages(self) -> None:
|
||||||
|
hamlet = self.example_user("hamlet")
|
||||||
|
attachment = self.make_attachment("used.txt")
|
||||||
|
|
||||||
|
# Schedule a future send with the attachment
|
||||||
|
self.subscribe(hamlet, "Denmark")
|
||||||
|
body = f"Some files here ...[zulip.txt](http://{hamlet.realm.host}/user_uploads/{attachment.path_id})"
|
||||||
|
scheduled_message_id = check_schedule_message(
|
||||||
|
hamlet,
|
||||||
|
get_client("website"),
|
||||||
|
"stream",
|
||||||
|
[self.get_stream_id("Denmark")],
|
||||||
|
"Test topic",
|
||||||
|
body,
|
||||||
|
timezone_now() + timedelta(days=365),
|
||||||
|
hamlet.realm,
|
||||||
|
)
|
||||||
|
self.assert_exists(
|
||||||
|
attachment, has_file=True, has_attachment=True, has_archived_attachment=False
|
||||||
|
)
|
||||||
|
|
||||||
|
# The ScheduledMessage protects the attachment from being removed
|
||||||
|
do_delete_old_unclaimed_attachments(1)
|
||||||
|
self.assert_exists(
|
||||||
|
attachment, has_file=True, has_attachment=True, has_archived_attachment=False
|
||||||
|
)
|
||||||
|
|
||||||
|
# Deleting the ScheduledMessage leaves the attachment dangling
|
||||||
|
delete_scheduled_message(hamlet, scheduled_message_id)
|
||||||
|
self.assert_exists(
|
||||||
|
attachment, has_file=True, has_attachment=True, has_archived_attachment=False
|
||||||
|
)
|
||||||
|
|
||||||
|
# Having no referents, it is now a target for removal
|
||||||
|
do_delete_old_unclaimed_attachments(1)
|
||||||
|
self.assert_exists(
|
||||||
|
attachment, has_file=False, has_attachment=False, has_archived_attachment=False
|
||||||
|
)
|
||||||
|
|
||||||
|
def test_delete_with_scheduled_message_and_archive(self) -> None:
|
||||||
|
hamlet = self.example_user("hamlet")
|
||||||
|
attachment = self.make_attachment("used.txt")
|
||||||
|
|
||||||
|
# Schedule a message, and also send one now
|
||||||
|
self.subscribe(hamlet, "Denmark")
|
||||||
|
body = f"Some files here ...[zulip.txt](http://{hamlet.realm.host}/user_uploads/{attachment.path_id})"
|
||||||
|
scheduled_message_id = check_schedule_message(
|
||||||
|
hamlet,
|
||||||
|
get_client("website"),
|
||||||
|
"stream",
|
||||||
|
[self.get_stream_id("Denmark")],
|
||||||
|
"Test topic",
|
||||||
|
body,
|
||||||
|
timezone_now() + timedelta(days=365),
|
||||||
|
hamlet.realm,
|
||||||
|
)
|
||||||
|
sent_message_id = self.send_stream_message(hamlet, "Denmark", body, "test")
|
||||||
|
self.assert_exists(
|
||||||
|
attachment, has_file=True, has_attachment=True, has_archived_attachment=False
|
||||||
|
)
|
||||||
|
|
||||||
|
# Deleting the sent message leaves us with an Attachment
|
||||||
|
# attached to the scheduled message, and an archived
|
||||||
|
# attachment with an archived message
|
||||||
|
do_delete_messages(hamlet.realm, [Message.objects.get(id=sent_message_id)])
|
||||||
|
self.assert_exists(
|
||||||
|
attachment, has_file=True, has_attachment=True, has_archived_attachment=True
|
||||||
|
)
|
||||||
|
|
||||||
|
# Expiring the archived message leaves a dangling
|
||||||
|
# ArchivedAttachment and a protected Attachment
|
||||||
|
with self.settings(ARCHIVED_DATA_VACUUMING_DELAY_DAYS=0):
|
||||||
|
clean_archived_data()
|
||||||
|
self.assert_exists(
|
||||||
|
attachment, has_file=True, has_attachment=True, has_archived_attachment=True
|
||||||
|
)
|
||||||
|
|
||||||
|
# Removing unclaimed attachments deletes nothing, since the
|
||||||
|
# the ArchivedAttachment is protected by the Attachment which
|
||||||
|
# is still protected by the scheduled message
|
||||||
|
do_delete_old_unclaimed_attachments(1)
|
||||||
|
self.assert_exists(
|
||||||
|
attachment, has_file=True, has_attachment=True, has_archived_attachment=True
|
||||||
|
)
|
||||||
|
|
||||||
|
# Deleting the ScheduledMessage leaves the attachment fully dangling
|
||||||
|
delete_scheduled_message(hamlet, scheduled_message_id)
|
||||||
|
self.assert_exists(
|
||||||
|
attachment, has_file=True, has_attachment=True, has_archived_attachment=True
|
||||||
|
)
|
||||||
|
|
||||||
|
# Having no referents, it is now a target for removal
|
||||||
|
do_delete_old_unclaimed_attachments(1)
|
||||||
|
self.assert_exists(
|
||||||
|
attachment, has_file=False, has_attachment=False, has_archived_attachment=False
|
||||||
|
)
|
||||||
|
|
||||||
|
def test_delete_with_unscheduled_message_and_archive(self) -> None:
|
||||||
|
# This is subtly different from the test above -- we delete
|
||||||
|
# the scheduled message first, which is the only way to get an
|
||||||
|
# Attachment with not referents as well as an
|
||||||
|
# ArchivedAttachment which does have references. Normally,
|
||||||
|
# the process of archiving prunes Attachments which have no
|
||||||
|
# references.
|
||||||
|
hamlet = self.example_user("hamlet")
|
||||||
|
attachment = self.make_attachment("used.txt")
|
||||||
|
|
||||||
|
# Schedule a message, and also send one now
|
||||||
|
self.subscribe(hamlet, "Denmark")
|
||||||
|
body = f"Some files here ...[zulip.txt](http://{hamlet.realm.host}/user_uploads/{attachment.path_id})"
|
||||||
|
scheduled_message_id = check_schedule_message(
|
||||||
|
hamlet,
|
||||||
|
get_client("website"),
|
||||||
|
"stream",
|
||||||
|
[self.get_stream_id("Denmark")],
|
||||||
|
"Test topic",
|
||||||
|
body,
|
||||||
|
timezone_now() + timedelta(days=365),
|
||||||
|
hamlet.realm,
|
||||||
|
)
|
||||||
|
sent_message_id = self.send_stream_message(hamlet, "Denmark", body, "test")
|
||||||
|
self.assert_exists(
|
||||||
|
attachment, has_file=True, has_attachment=True, has_archived_attachment=False
|
||||||
|
)
|
||||||
|
|
||||||
|
# Delete the message and then unschedule the scheduled message
|
||||||
|
# before expiring the ArchivedMessages.
|
||||||
|
do_delete_messages(hamlet.realm, [Message.objects.get(id=sent_message_id)])
|
||||||
|
delete_scheduled_message(hamlet, scheduled_message_id)
|
||||||
|
self.assert_exists(
|
||||||
|
attachment, has_file=True, has_attachment=True, has_archived_attachment=True
|
||||||
|
)
|
||||||
|
|
||||||
|
# Attempting to expire unclaimed attachments leaves the
|
||||||
|
# unreferenced Attachment which is protected by the
|
||||||
|
# ArchivedAttachment which has archived messages referencing
|
||||||
|
# it.
|
||||||
|
do_delete_old_unclaimed_attachments(1)
|
||||||
|
self.assert_exists(
|
||||||
|
attachment, has_file=True, has_attachment=True, has_archived_attachment=True
|
||||||
|
)
|
||||||
|
|
||||||
|
# Expiring archived messages leaves us with a dangling
|
||||||
|
# Attachment and ArchivedAttachment, with neither having
|
||||||
|
# referents.
|
||||||
|
with self.settings(ARCHIVED_DATA_VACUUMING_DELAY_DAYS=0):
|
||||||
|
clean_archived_data()
|
||||||
|
self.assert_exists(
|
||||||
|
attachment, has_file=True, has_attachment=True, has_archived_attachment=True
|
||||||
|
)
|
||||||
|
|
||||||
|
# Having no referents in either place, it is now a target for
|
||||||
|
# removal
|
||||||
|
do_delete_old_unclaimed_attachments(1)
|
||||||
|
self.assert_exists(
|
||||||
|
attachment, has_file=False, has_attachment=False, has_archived_attachment=False
|
||||||
|
)
|
|
@ -1,4 +1,3 @@
|
||||||
import datetime
|
|
||||||
import io
|
import io
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
|
@ -10,20 +9,16 @@ from unittest.mock import patch
|
||||||
|
|
||||||
import orjson
|
import orjson
|
||||||
from django.conf import settings
|
from django.conf import settings
|
||||||
from django.utils.timezone import now as timezone_now
|
|
||||||
from PIL import Image
|
from PIL import Image
|
||||||
from urllib3 import encode_multipart_formdata
|
from urllib3 import encode_multipart_formdata
|
||||||
from urllib3.fields import RequestField
|
from urllib3.fields import RequestField
|
||||||
|
|
||||||
import zerver.lib.upload
|
import zerver.lib.upload
|
||||||
from zerver.actions.create_realm import do_create_realm
|
from zerver.actions.create_realm import do_create_realm
|
||||||
from zerver.actions.message_delete import do_delete_messages
|
|
||||||
from zerver.actions.message_send import internal_send_private_message
|
from zerver.actions.message_send import internal_send_private_message
|
||||||
from zerver.actions.realm_icon import do_change_icon_source
|
from zerver.actions.realm_icon import do_change_icon_source
|
||||||
from zerver.actions.realm_logo import do_change_logo_source
|
from zerver.actions.realm_logo import do_change_logo_source
|
||||||
from zerver.actions.realm_settings import do_change_realm_plan_type, do_set_realm_property
|
from zerver.actions.realm_settings import do_change_realm_plan_type, do_set_realm_property
|
||||||
from zerver.actions.scheduled_messages import check_schedule_message, delete_scheduled_message
|
|
||||||
from zerver.actions.uploads import do_delete_old_unclaimed_attachments
|
|
||||||
from zerver.actions.user_settings import do_delete_avatar_image
|
from zerver.actions.user_settings import do_delete_avatar_image
|
||||||
from zerver.lib.avatar import avatar_url, get_avatar_field
|
from zerver.lib.avatar import avatar_url, get_avatar_field
|
||||||
from zerver.lib.cache import cache_get, get_realm_used_upload_space_cache_key
|
from zerver.lib.cache import cache_get, get_realm_used_upload_space_cache_key
|
||||||
|
@ -31,7 +26,6 @@ from zerver.lib.create_user import copy_default_settings
|
||||||
from zerver.lib.initial_password import initial_password
|
from zerver.lib.initial_password import initial_password
|
||||||
from zerver.lib.realm_icon import realm_icon_url
|
from zerver.lib.realm_icon import realm_icon_url
|
||||||
from zerver.lib.realm_logo import get_realm_logo_url
|
from zerver.lib.realm_logo import get_realm_logo_url
|
||||||
from zerver.lib.retention import clean_archived_data
|
|
||||||
from zerver.lib.test_classes import UploadSerializeMixin, ZulipTestCase
|
from zerver.lib.test_classes import UploadSerializeMixin, ZulipTestCase
|
||||||
from zerver.lib.test_helpers import (
|
from zerver.lib.test_helpers import (
|
||||||
avatar_disk_path,
|
avatar_disk_path,
|
||||||
|
@ -39,19 +33,17 @@ from zerver.lib.test_helpers import (
|
||||||
ratelimit_rule,
|
ratelimit_rule,
|
||||||
read_test_image_file,
|
read_test_image_file,
|
||||||
)
|
)
|
||||||
from zerver.lib.upload import delete_message_attachment, upload_message_attachment
|
from zerver.lib.upload import upload_message_attachment
|
||||||
from zerver.lib.upload.base import BadImageError, ZulipUploadBackend, resize_emoji, sanitize_name
|
from zerver.lib.upload.base import BadImageError, ZulipUploadBackend, resize_emoji, sanitize_name
|
||||||
from zerver.lib.upload.local import LocalUploadBackend
|
from zerver.lib.upload.local import LocalUploadBackend
|
||||||
from zerver.lib.upload.s3 import S3UploadBackend
|
from zerver.lib.upload.s3 import S3UploadBackend
|
||||||
from zerver.lib.users import get_api_key
|
from zerver.lib.users import get_api_key
|
||||||
from zerver.models import (
|
from zerver.models import (
|
||||||
ArchivedAttachment,
|
|
||||||
Attachment,
|
Attachment,
|
||||||
Message,
|
Message,
|
||||||
Realm,
|
Realm,
|
||||||
RealmDomain,
|
RealmDomain,
|
||||||
UserProfile,
|
UserProfile,
|
||||||
get_client,
|
|
||||||
get_realm,
|
get_realm,
|
||||||
get_system_bot,
|
get_system_bot,
|
||||||
get_user_by_delivery_email,
|
get_user_by_delivery_email,
|
||||||
|
@ -361,126 +353,6 @@ class FileUploadTest(UploadSerializeMixin, ZulipTestCase):
|
||||||
self.assertEqual(response.status_code, 404)
|
self.assertEqual(response.status_code, 404)
|
||||||
self.assert_in_response("This file does not exist or has been deleted.", response)
|
self.assert_in_response("This file does not exist or has been deleted.", response)
|
||||||
|
|
||||||
def test_delete_old_unclaimed_attachments(self) -> None:
|
|
||||||
# Upload some files and make them older than a week
|
|
||||||
hamlet = self.example_user("hamlet")
|
|
||||||
self.login("hamlet")
|
|
||||||
d1 = StringIO("zulip!")
|
|
||||||
d1.name = "dummy_1.txt"
|
|
||||||
result = self.client_post("/json/user_uploads", {"file": d1})
|
|
||||||
response_dict = self.assert_json_success(result)
|
|
||||||
d1_path_id = re.sub("/user_uploads/", "", response_dict["uri"])
|
|
||||||
|
|
||||||
d2 = StringIO("zulip!")
|
|
||||||
d2.name = "dummy_2.txt"
|
|
||||||
result = self.client_post("/json/user_uploads", {"file": d2})
|
|
||||||
response_dict = self.assert_json_success(result)
|
|
||||||
d2_path_id = re.sub("/user_uploads/", "", response_dict["uri"])
|
|
||||||
|
|
||||||
d3 = StringIO("zulip!")
|
|
||||||
d3.name = "dummy_3.txt"
|
|
||||||
result = self.client_post("/json/user_uploads", {"file": d3})
|
|
||||||
d3_path_id = re.sub("/user_uploads/", "", result.json()["uri"])
|
|
||||||
|
|
||||||
d4 = StringIO("zulip!")
|
|
||||||
d4.name = "dummy_4.txt"
|
|
||||||
result = self.client_post("/json/user_uploads", {"file": d4})
|
|
||||||
d4_path_id = re.sub("/user_uploads/", "", result.json()["uri"])
|
|
||||||
|
|
||||||
two_week_ago = timezone_now() - datetime.timedelta(weeks=2)
|
|
||||||
# This Attachment will have a message linking to it:
|
|
||||||
d1_attachment = Attachment.objects.get(path_id=d1_path_id)
|
|
||||||
d1_attachment.create_time = two_week_ago
|
|
||||||
d1_attachment.save()
|
|
||||||
self.assertEqual(repr(d1_attachment), "<Attachment: dummy_1.txt>")
|
|
||||||
# This Attachment won't have any messages.
|
|
||||||
d2_attachment = Attachment.objects.get(path_id=d2_path_id)
|
|
||||||
d2_attachment.create_time = two_week_ago
|
|
||||||
d2_attachment.save()
|
|
||||||
# This Attachment will have a message that gets archived. The file
|
|
||||||
# should not be deleted until the message is deleted from the archive.
|
|
||||||
d3_attachment = Attachment.objects.get(path_id=d3_path_id)
|
|
||||||
d3_attachment.create_time = two_week_ago
|
|
||||||
d3_attachment.save()
|
|
||||||
|
|
||||||
# This Attachment will just have a ScheduledMessage referencing it. It should not be deleted
|
|
||||||
# until the ScheduledMessage is deleted.
|
|
||||||
d4_attachment = Attachment.objects.get(path_id=d4_path_id)
|
|
||||||
d4_attachment.create_time = two_week_ago
|
|
||||||
d4_attachment.save()
|
|
||||||
|
|
||||||
# Send message referencing only dummy_1
|
|
||||||
self.subscribe(hamlet, "Denmark")
|
|
||||||
body = (
|
|
||||||
f"Some files here ...[zulip.txt](http://{hamlet.realm.host}/user_uploads/"
|
|
||||||
+ d1_path_id
|
|
||||||
+ ")"
|
|
||||||
)
|
|
||||||
self.send_stream_message(hamlet, "Denmark", body, "test")
|
|
||||||
|
|
||||||
# Send message referencing dummy_3 - it will be archived next.
|
|
||||||
body = (
|
|
||||||
f"Some more files here ...[zulip.txt](http://{hamlet.realm.host}/user_uploads/"
|
|
||||||
+ d3_path_id
|
|
||||||
+ ")"
|
|
||||||
)
|
|
||||||
message_id = self.send_stream_message(hamlet, "Denmark", body, "test")
|
|
||||||
assert settings.LOCAL_FILES_DIR
|
|
||||||
d3_local_path = os.path.join(settings.LOCAL_FILES_DIR, d3_path_id)
|
|
||||||
self.assertTrue(os.path.exists(d3_local_path))
|
|
||||||
|
|
||||||
body = (
|
|
||||||
f"Some files here ...[zulip.txt](http://{hamlet.realm.host}/user_uploads/"
|
|
||||||
+ d4_path_id
|
|
||||||
+ ")"
|
|
||||||
)
|
|
||||||
scheduled_message_d4_id = check_schedule_message(
|
|
||||||
hamlet,
|
|
||||||
get_client("website"),
|
|
||||||
"stream",
|
|
||||||
[self.get_stream_id("Verona")],
|
|
||||||
"Test topic",
|
|
||||||
body,
|
|
||||||
timezone_now() + datetime.timedelta(days=365),
|
|
||||||
hamlet.realm,
|
|
||||||
)
|
|
||||||
self.assertEqual(
|
|
||||||
list(d4_attachment.scheduled_messages.all().values_list("id", flat=True)),
|
|
||||||
[scheduled_message_d4_id],
|
|
||||||
)
|
|
||||||
d4_local_path = os.path.join(settings.LOCAL_FILES_DIR, d4_path_id)
|
|
||||||
self.assertTrue(os.path.exists(d4_local_path))
|
|
||||||
|
|
||||||
do_delete_messages(hamlet.realm, [Message.objects.get(id=message_id)])
|
|
||||||
# dummy_2 should not exist in database or the uploads folder
|
|
||||||
do_delete_old_unclaimed_attachments(2)
|
|
||||||
self.assertTrue(not Attachment.objects.filter(path_id=d2_path_id).exists())
|
|
||||||
with self.assertLogs(level="WARNING") as warn_log:
|
|
||||||
self.assertTrue(not delete_message_attachment(d2_path_id))
|
|
||||||
self.assertEqual(
|
|
||||||
warn_log.output,
|
|
||||||
["WARNING:root:dummy_2.txt does not exist. Its entry in the database will be removed."],
|
|
||||||
)
|
|
||||||
|
|
||||||
# dummy_3 file should still exist, because the attachment has been moved to the archive.
|
|
||||||
self.assertTrue(os.path.exists(d3_local_path))
|
|
||||||
|
|
||||||
# After the archive gets emptied, do_delete_old_unclaimed_attachments should result
|
|
||||||
# in deleting the file, since it is now truly no longer referenced.
|
|
||||||
with self.settings(ARCHIVED_DATA_VACUUMING_DELAY_DAYS=0):
|
|
||||||
clean_archived_data()
|
|
||||||
do_delete_old_unclaimed_attachments(2)
|
|
||||||
self.assertFalse(os.path.exists(d3_local_path))
|
|
||||||
self.assertTrue(not Attachment.objects.filter(path_id=d3_path_id).exists())
|
|
||||||
self.assertTrue(not ArchivedAttachment.objects.filter(path_id=d3_path_id).exists())
|
|
||||||
|
|
||||||
# dummy_4 only get deleted after the scheduled message is deleted.
|
|
||||||
self.assertTrue(os.path.exists(d4_local_path))
|
|
||||||
delete_scheduled_message(hamlet, scheduled_message_d4_id)
|
|
||||||
do_delete_old_unclaimed_attachments(2)
|
|
||||||
self.assertFalse(os.path.exists(d4_local_path))
|
|
||||||
self.assertTrue(not Attachment.objects.filter(path_id=d4_path_id).exists())
|
|
||||||
|
|
||||||
def test_attachment_url_without_upload(self) -> None:
|
def test_attachment_url_without_upload(self) -> None:
|
||||||
hamlet = self.example_user("hamlet")
|
hamlet = self.example_user("hamlet")
|
||||||
self.login_user(hamlet)
|
self.login_user(hamlet)
|
||||||
|
|
Loading…
Reference in New Issue