zulip/zerver/tests/test_retention.py

789 lines
34 KiB
Python

# -*- coding: utf-8 -*-
from datetime import datetime, timedelta
from typing import Any, Dict, List, Optional
from unittest import mock
from django.conf import settings
from django.utils.timezone import now as timezone_now
from zerver.lib.actions import internal_send_private_message, do_add_submessage, do_delete_messages
from zerver.lib.test_classes import ZulipTestCase
from zerver.lib.test_helpers import queries_captured
from zerver.lib.upload import create_attachment
from zerver.models import (Message, Realm, Stream, ArchivedUserMessage, SubMessage,
ArchivedMessage, Attachment, ArchivedAttachment, UserMessage,
Reaction, ArchivedReaction, ArchivedSubMessage, ArchiveTransaction,
get_realm, get_user_profile_by_email, get_stream, get_system_bot)
from zerver.lib.retention import (
archive_messages,
move_messages_to_archive,
restore_all_data_from_archive,
clean_archived_data,
)
from zerver.tornado.event_queue import send_event
# Class with helper functions useful for testing archiving of reactions:
from zerver.tests.test_reactions import EmojiReactionBase
ZULIP_REALM_DAYS = 30
MIT_REALM_DAYS = 100
class RetentionTestingBase(ZulipTestCase):
def _get_usermessage_ids(self, message_ids: List[int]) -> List[int]:
return list(UserMessage.objects.filter(message_id__in=message_ids).values_list('id', flat=True))
def _verify_archive_data(self, expected_message_ids: List[int],
expected_usermessage_ids: List[int]) -> None:
self.assertEqual(
set(ArchivedMessage.objects.values_list('id', flat=True)),
set(expected_message_ids)
)
self.assertEqual(
set(ArchivedUserMessage.objects.values_list('id', flat=True)),
set(expected_usermessage_ids)
)
# Archived Messages and UserMessages should have been removed from the normal tables:
self.assertEqual(Message.objects.filter(id__in=expected_message_ids).count(), 0)
self.assertEqual(UserMessage.objects.filter(id__in=expected_usermessage_ids).count(), 0)
def _verify_restored_data(self, expected_message_ids: List[int],
expected_usermessage_ids: List[int]) -> None:
# Check that the data was restored:
self.assertEqual(
set(Message.objects.filter(id__in=expected_message_ids).values_list('id', flat=True)),
set(expected_message_ids)
)
self.assertEqual(
set(UserMessage.objects.filter(id__in=expected_usermessage_ids).values_list('id', flat=True)),
set(expected_usermessage_ids)
)
# The Messages and UserMessages should still be in the archive - we don't delete them.
self.assertEqual(
set(ArchivedMessage.objects.values_list('id', flat=True)),
set(expected_message_ids)
)
self.assertEqual(
set(ArchivedUserMessage.objects.values_list('id', flat=True)),
set(expected_usermessage_ids)
)
class ArchiveMessagesTestingBase(RetentionTestingBase):
def setUp(self) -> None:
super().setUp()
self.zulip_realm = get_realm('zulip')
self.mit_realm = get_realm('zephyr')
self._set_realm_message_retention_value(self.zulip_realm, ZULIP_REALM_DAYS)
self._set_realm_message_retention_value(self.mit_realm, MIT_REALM_DAYS)
# Set publication date of all existing messages to "now", so that we have full
# control over what's expired and what isn't.
Message.objects.all().update(date_sent=timezone_now())
def _set_realm_message_retention_value(self, realm: Realm, retention_period: Optional[int]) -> None:
realm.message_retention_days = retention_period
realm.save()
def _set_stream_message_retention_value(self, stream: Stream, retention_period: Optional[int]) -> None:
stream.message_retention_days = retention_period
stream.save()
def _change_messages_date_sent(self, msgs_ids: List[int], date_sent: datetime) -> None:
Message.objects.filter(id__in=msgs_ids).update(date_sent=date_sent)
def _make_mit_messages(self, message_quantity: int, date_sent: datetime) -> Any:
# send messages from mit.edu realm and change messages pub date
sender = self.mit_user('espuser')
recipient = self.mit_user('starnine')
msg_ids = [self.send_personal_message(sender, recipient)
for i in range(message_quantity)]
self._change_messages_date_sent(msg_ids, date_sent)
return msg_ids
def _send_cross_realm_personal_message(self) -> int:
# Send message from bot to users from different realm.
bot_email = 'notification-bot@zulip.com'
get_user_profile_by_email(bot_email)
zulip_user = self.example_user("hamlet")
msg_id = internal_send_private_message(
realm=self.zulip_realm,
sender=get_system_bot(bot_email),
recipient_user=zulip_user,
content='test message',
)
assert msg_id is not None
return msg_id
def _make_expired_zulip_messages(self, message_quantity: int) -> List[int]:
msg_ids = list(Message.objects.order_by('id').filter(
sender__realm=self.zulip_realm).values_list('id', flat=True)[3:3 + message_quantity])
self._change_messages_date_sent(
msg_ids,
timezone_now() - timedelta(ZULIP_REALM_DAYS+1)
)
return msg_ids
def _send_messages_with_attachments(self) -> Dict[str, int]:
user_profile = self.example_user("hamlet")
sample_size = 10
host = user_profile.realm.host
realm_id = get_realm("zulip").id
dummy_files = [
('zulip.txt', '%s/31/4CBjtTLYZhk66pZrF8hnYGwc/zulip.txt' % (realm_id,), sample_size),
('temp_file.py', '%s/31/4CBjtTLYZhk66pZrF8hnYGwc/temp_file.py' % (realm_id,), sample_size),
('abc.py', '%s/31/4CBjtTLYZhk66pZrF8hnYGwc/abc.py' % (realm_id,), sample_size)
]
for file_name, path_id, size in dummy_files:
create_attachment(file_name, path_id, user_profile, size)
self.subscribe(user_profile, "Denmark")
body = ("Some files here ... [zulip.txt](http://{host}/user_uploads/{id}/31/4CBjtTLYZhk66pZrF8hnYGwc/zulip.txt)" +
" http://{host}/user_uploads/{id}/31/4CBjtTLYZhk66pZrF8hnYGwc/temp_file.py.... Some more...." +
" http://{host}/user_uploads/{id}/31/4CBjtTLYZhk66pZrF8hnYGwc/abc.py").format(id=realm_id, host=host)
expired_message_id = self.send_stream_message(user_profile, "Denmark", body)
actual_message_id = self.send_stream_message(user_profile, "Denmark", body)
othello = self.example_user('othello')
other_message_id = self.send_stream_message(othello, "Denmark", body)
self._change_messages_date_sent([expired_message_id], timezone_now() - timedelta(days=MIT_REALM_DAYS + 1))
return {'expired_message_id': expired_message_id, 'actual_message_id': actual_message_id,
'other_user_message_id': other_message_id}
class TestArchiveMessagesGeneral(ArchiveMessagesTestingBase):
def test_no_expired_messages(self) -> None:
archive_messages()
self.assertEqual(ArchivedUserMessage.objects.count(), 0)
self.assertEqual(ArchivedMessage.objects.count(), 0)
def test_expired_messages_in_each_realm(self) -> None:
"""General test for archiving expired messages properly with
multiple realms involved"""
# Make some expired messages in MIT:
expired_mit_msg_ids = self._make_mit_messages(
5,
timezone_now() - timedelta(days=MIT_REALM_DAYS+1)
)
# Make some non-expired messages in MIT:
self._make_mit_messages(4, timezone_now() - timedelta(days=MIT_REALM_DAYS-1))
# Change some Zulip messages to be expired:
expired_zulip_msg_ids = list(Message.objects.order_by('id').filter(
sender__realm=self.zulip_realm).values_list('id', flat=True)[3:10])
self._change_messages_date_sent(
expired_zulip_msg_ids,
timezone_now() - timedelta(ZULIP_REALM_DAYS+1)
)
expired_msg_ids = expired_mit_msg_ids + expired_zulip_msg_ids
expired_usermsg_ids = self._get_usermessage_ids(expired_msg_ids)
archive_messages()
self._verify_archive_data(expired_msg_ids, expired_usermsg_ids)
restore_all_data_from_archive()
self._verify_restored_data(expired_msg_ids, expired_usermsg_ids)
def test_expired_messages_in_one_realm(self) -> None:
"""Test with a retention policy set for only the MIT realm"""
self._set_realm_message_retention_value(self.zulip_realm, None)
# Make some expired messages in MIT:
expired_mit_msg_ids = self._make_mit_messages(
5,
timezone_now() - timedelta(days=MIT_REALM_DAYS+1)
)
# Make some non-expired messages in MIT:
self._make_mit_messages(4, timezone_now() - timedelta(days=MIT_REALM_DAYS-1))
# Change some Zulip messages date_sent, but the realm has no retention policy,
# so they shouldn't get archived
zulip_msg_ids = list(Message.objects.order_by('id').filter(
sender__realm=self.zulip_realm).values_list('id', flat=True)[3:10])
self._change_messages_date_sent(
zulip_msg_ids,
timezone_now() - timedelta(ZULIP_REALM_DAYS+1)
)
# Only MIT has a retention policy:
expired_msg_ids = expired_mit_msg_ids
expired_usermsg_ids = self._get_usermessage_ids(expired_msg_ids)
archive_messages()
self._verify_archive_data(expired_msg_ids, expired_usermsg_ids)
restore_all_data_from_archive()
self._verify_restored_data(expired_msg_ids, expired_usermsg_ids)
self._set_realm_message_retention_value(self.zulip_realm, ZULIP_REALM_DAYS)
def test_different_stream_realm_policies(self) -> None:
verona = get_stream("Verona", self.zulip_realm)
hamlet = self.example_user("hamlet")
msg_id = self.send_stream_message(hamlet, "Verona", "test")
usermsg_ids = self._get_usermessage_ids([msg_id])
self._change_messages_date_sent([msg_id], timezone_now() - timedelta(days=2))
# Don't archive if stream's retention policy set to -1:
self._set_realm_message_retention_value(self.zulip_realm, 1)
self._set_stream_message_retention_value(verona, -1)
archive_messages()
self._verify_archive_data([], [])
# Don't archive if stream and realm have no retention policy:
self._set_realm_message_retention_value(self.zulip_realm, None)
self._set_stream_message_retention_value(verona, None)
archive_messages()
self._verify_archive_data([], [])
# Archive if stream has a retention policy set:
self._set_realm_message_retention_value(self.zulip_realm, None)
self._set_stream_message_retention_value(verona, 1)
archive_messages()
self._verify_archive_data([msg_id], usermsg_ids)
def test_cross_realm_personal_message_archiving(self) -> None:
"""Check that cross-realm personal messages get correctly archived. """
msg_ids = [self._send_cross_realm_personal_message() for i in range(1, 7)]
usermsg_ids = self._get_usermessage_ids(msg_ids)
# Make the message expired on the recipient's realm:
self._change_messages_date_sent(msg_ids, timezone_now() - timedelta(ZULIP_REALM_DAYS+1))
archive_messages()
self._verify_archive_data(msg_ids, usermsg_ids)
def test_archiving_interrupted(self) -> None:
""" Check that queries get rolled back to a consistent state
if archiving gets interrupted in the middle of processing a chunk. """
expired_msg_ids = self._make_expired_zulip_messages(7)
expired_usermsg_ids = self._get_usermessage_ids(expired_msg_ids)
# Insert an exception near the end of the archiving process of a chunk:
with mock.patch("zerver.lib.retention.delete_messages", side_effect=Exception):
with self.assertRaises(Exception):
archive_messages(chunk_size=1000) # Specify large chunk_size to ensure things happen in a single batch
# Archiving code has been executed, but because we got an exception, things should have been rolled back:
self._verify_archive_data([], [])
self.assertEqual(
set(Message.objects.filter(id__in=expired_msg_ids).values_list('id', flat=True)),
set(expired_msg_ids)
)
self.assertEqual(
set(UserMessage.objects.filter(id__in=expired_usermsg_ids).values_list('id', flat=True)),
set(expired_usermsg_ids)
)
def test_archive_message_tool(self) -> None:
"""End-to-end test of the archiving tool, directly calling
archive_messages."""
# Make some expired messages in MIT:
expired_mit_msg_ids = self._make_mit_messages(
5,
timezone_now() - timedelta(days=MIT_REALM_DAYS+1)
)
# Make some non-expired messages in MIT:
self._make_mit_messages(4, timezone_now() - timedelta(days=MIT_REALM_DAYS-1))
# Change some Zulip messages to be expired:
expired_zulip_msg_ids = self._make_expired_zulip_messages(7)
expired_crossrealm_msg_id = self._send_cross_realm_personal_message()
# Make the message expired in the recipient's realm:
self._change_messages_date_sent(
[expired_crossrealm_msg_id],
timezone_now() - timedelta(ZULIP_REALM_DAYS+1)
)
expired_msg_ids = expired_mit_msg_ids + expired_zulip_msg_ids + [expired_crossrealm_msg_id]
expired_usermsg_ids = self._get_usermessage_ids(expired_msg_ids)
archive_messages(chunk_size=2) # Specify low chunk_size to test batching.
# Make sure we archived what neeeded:
self._verify_archive_data(expired_msg_ids, expired_usermsg_ids)
restore_all_data_from_archive()
self._verify_restored_data(expired_msg_ids, expired_usermsg_ids)
def test_archiving_attachments(self) -> None:
"""End-to-end test for the logic for archiving attachments. This test
is hard to read without first reading _send_messages_with_attachments"""
msgs_ids = self._send_messages_with_attachments()
# First, confirm deleting the oldest message
# (`expired_message_id`) creates ArchivedAttachment objects
# and associates that message ID with them, but does not
# delete the Attachment object.
archive_messages()
self.assertEqual(ArchivedAttachment.objects.count(), 3)
self.assertEqual(
list(ArchivedAttachment.objects.distinct('messages__id').values_list('messages__id',
flat=True)),
[msgs_ids['expired_message_id']]
)
self.assertEqual(Attachment.objects.count(), 3)
# Now make `actual_message_id` expired too. We still don't
# delete the Attachment objects.
self._change_messages_date_sent([msgs_ids['actual_message_id']],
timezone_now() - timedelta(days=MIT_REALM_DAYS + 1))
archive_messages()
self.assertEqual(Attachment.objects.count(), 3)
# Finally, make the last message mentioning those attachments
# expired. We should now delete the Attachment objects and
# each ArchivedAttachment object should list all 3 messages.
self._change_messages_date_sent([msgs_ids['other_user_message_id']],
timezone_now() - timedelta(days=MIT_REALM_DAYS + 1))
archive_messages()
self.assertEqual(Attachment.objects.count(), 0)
self.assertEqual(ArchivedAttachment.objects.count(), 3)
self.assertEqual(
list(ArchivedAttachment.objects.distinct('messages__id').order_by('messages__id').values_list(
'messages__id', flat=True)),
sorted(msgs_ids.values())
)
restore_all_data_from_archive()
# Attachments should have been restored:
self.assertEqual(Attachment.objects.count(), 3)
self.assertEqual(ArchivedAttachment.objects.count(), 3) # Archived data doesn't get deleted by restoring.
self.assertEqual(
list(Attachment.objects.distinct('messages__id').order_by('messages__id').values_list(
'messages__id', flat=True)),
sorted(msgs_ids.values())
)
def test_restoring_and_rearchiving(self) -> None:
expired_msg_ids = self._make_mit_messages(
7,
timezone_now() - timedelta(days=MIT_REALM_DAYS+1)
)
expired_usermsg_ids = self._get_usermessage_ids(expired_msg_ids)
archive_messages(chunk_size=4)
self._verify_archive_data(expired_msg_ids, expired_usermsg_ids)
transactions = ArchiveTransaction.objects.all()
self.assertEqual(len(transactions), 2) # With chunk_size 4, there should be 2 transactions
restore_all_data_from_archive()
transactions[0].refresh_from_db()
transactions[1].refresh_from_db()
self.assertTrue(transactions[0].restored)
self.assertTrue(transactions[1].restored)
archive_messages(chunk_size=10)
self._verify_archive_data(expired_msg_ids, expired_usermsg_ids)
transactions = ArchiveTransaction.objects.order_by("id")
self.assertEqual(len(transactions), 3)
archived_messages = ArchivedMessage.objects.filter(id__in=expired_msg_ids)
# Check that the re-archived messages are correctly assigned to the new transaction:
for message in archived_messages:
self.assertEqual(message.archive_transaction_id, transactions[2].id)
class TestArchivingSubMessages(ArchiveMessagesTestingBase):
def test_archiving_submessages(self) -> None:
expired_msg_ids = self._make_expired_zulip_messages(2)
cordelia = self.example_user('cordelia')
hamlet = self.example_user('hamlet')
do_add_submessage(
realm=self.zulip_realm,
sender_id=cordelia.id,
message_id=expired_msg_ids[0],
msg_type='whatever',
content='{"name": "alice", "salary": 20}'
)
do_add_submessage(
realm=self.zulip_realm,
sender_id=hamlet.id,
message_id=expired_msg_ids[0],
msg_type='whatever',
content='{"name": "john", "salary": 30}'
)
do_add_submessage(
realm=self.zulip_realm,
sender_id=cordelia.id,
message_id=expired_msg_ids[1],
msg_type='whatever',
content='{"name": "jack", "salary": 10}'
)
submessage_ids = list(
SubMessage.objects.filter(message_id__in=expired_msg_ids).values_list('id', flat=True)
)
self.assertEqual(len(submessage_ids), 3)
self.assertEqual(SubMessage.objects.filter(id__in=submessage_ids).count(), 3)
archive_messages()
self.assertEqual(SubMessage.objects.filter(id__in=submessage_ids).count(), 0)
self.assertEqual(
set(ArchivedSubMessage.objects.filter(id__in=submessage_ids).values_list('id', flat=True)),
set(submessage_ids)
)
restore_all_data_from_archive()
self.assertEqual(
set(SubMessage.objects.filter(id__in=submessage_ids).values_list('id', flat=True)),
set(submessage_ids)
)
class TestArchivingReactions(ArchiveMessagesTestingBase, EmojiReactionBase):
def test_archiving_reactions(self) -> None:
expired_msg_ids = self._make_expired_zulip_messages(2)
self.post_zulip_reaction(expired_msg_ids[0], 'hamlet')
self.post_zulip_reaction(expired_msg_ids[0], 'cordelia')
self.post_zulip_reaction(expired_msg_ids[1], 'hamlet')
reaction_ids = list(
Reaction.objects.filter(message_id__in=expired_msg_ids).values_list('id', flat=True)
)
self.assertEqual(len(reaction_ids), 3)
self.assertEqual(Reaction.objects.filter(id__in=reaction_ids).count(), 3)
archive_messages()
self.assertEqual(Reaction.objects.filter(id__in=reaction_ids).count(), 0)
self.assertEqual(
set(ArchivedReaction.objects.filter(id__in=reaction_ids).values_list('id', flat=True)),
set(reaction_ids)
)
restore_all_data_from_archive()
self.assertEqual(
set(Reaction.objects.filter(id__in=reaction_ids).values_list('id', flat=True)),
set(reaction_ids)
)
class MoveMessageToArchiveBase(RetentionTestingBase):
def setUp(self) -> None:
super().setUp()
self.sender = self.example_user('hamlet')
self.recipient = self.example_user('cordelia')
def _create_attachments(self) -> None:
sample_size = 10
realm_id = get_realm("zulip").id
dummy_files = [
('zulip.txt', '%s/31/4CBjtTLYZhk66pZrF8hnYGwc/zulip.txt' % (realm_id,), sample_size),
('temp_file.py', '%s/31/4CBjtTLYZhk66pZrF8hnYGwc/temp_file.py' % (realm_id,), sample_size),
('abc.py', '%s/31/4CBjtTLYZhk66pZrF8hnYGwc/abc.py' % (realm_id,), sample_size),
('hello.txt', '%s/31/4CBjtTLYZhk66pZrF8hnYGwc/hello.txt' % (realm_id,), sample_size),
('new.py', '%s/31/4CBjtTLYZhk66pZrF8hnYGwc/new.py' % (realm_id,), sample_size)
]
user_profile = self.example_user('hamlet')
for file_name, path_id, size in dummy_files:
create_attachment(file_name, path_id, user_profile, size)
def _assert_archive_empty(self) -> None:
self.assertFalse(ArchivedUserMessage.objects.exists())
self.assertFalse(ArchivedMessage.objects.exists())
self.assertFalse(ArchivedAttachment.objects.exists())
class MoveMessageToArchiveGeneral(MoveMessageToArchiveBase):
def test_personal_messages_archiving(self) -> None:
msg_ids = [self.send_personal_message(self.sender, self.recipient)
for i in range(0, 3)]
usermsg_ids = self._get_usermessage_ids(msg_ids)
self._assert_archive_empty()
move_messages_to_archive(message_ids=msg_ids)
self._verify_archive_data(msg_ids, usermsg_ids)
restore_all_data_from_archive()
self._verify_restored_data(msg_ids, usermsg_ids)
def test_stream_messages_archiving(self) -> None:
msg_ids = [self.send_stream_message(self.sender, "Verona")
for i in range(0, 3)]
usermsg_ids = self._get_usermessage_ids(msg_ids)
self._assert_archive_empty()
move_messages_to_archive(message_ids=msg_ids)
self._verify_archive_data(msg_ids, usermsg_ids)
restore_all_data_from_archive()
self._verify_restored_data(msg_ids, usermsg_ids)
def test_archiving_messages_second_time(self) -> None:
msg_ids = [self.send_stream_message(self.sender, "Verona")
for i in range(0, 3)]
usermsg_ids = self._get_usermessage_ids(msg_ids)
self._assert_archive_empty()
move_messages_to_archive(message_ids=msg_ids)
self._verify_archive_data(msg_ids, usermsg_ids)
with self.assertRaises(Message.DoesNotExist):
move_messages_to_archive(message_ids=msg_ids)
def test_archiving_messages_with_attachment(self) -> None:
self._create_attachments()
realm_id = get_realm("zulip").id
host = get_realm("zulip").host
body1 = """Some files here ...[zulip.txt](
http://{host}/user_uploads/{id}/31/4CBjtTLYZhk66pZrF8hnYGwc/zulip.txt)
http://{host}/user_uploads/{id}/31/4CBjtTLYZhk66pZrF8hnYGwc/temp_file.py ....
Some more.... http://{host}/user_uploads/{id}/31/4CBjtTLYZhk66pZrF8hnYGwc/abc.py
""".format(id=realm_id, host=host)
body2 = """Some files here
http://{host}/user_uploads/{id}/31/4CBjtTLYZhk66pZrF8hnYGwc/zulip.txt ...
http://{host}/user_uploads/{id}/31/4CBjtTLYZhk66pZrF8hnYGwc/hello.txt ....
http://{host}/user_uploads/{id}/31/4CBjtTLYZhk66pZrF8hnYGwc/new.py ....
""".format(id=realm_id, host=host)
msg_ids = [
self.send_personal_message(self.sender, self.recipient, body1),
self.send_personal_message(self.sender, self.recipient, body2)
]
attachment_id_to_message_ids = {} # type: Dict[int, List[int]]
attachment_ids = list(
Attachment.objects.filter(messages__id__in=msg_ids).values_list("id", flat=True)
)
for attachment_id in attachment_ids:
attachment_id_to_message_ids[attachment_id] = list(
Message.objects.filter(attachment__id=attachment_id).values_list("id", flat=True)
)
usermsg_ids = self._get_usermessage_ids(msg_ids)
self._assert_archive_empty()
move_messages_to_archive(message_ids=msg_ids)
self._verify_archive_data(msg_ids, usermsg_ids)
self.assertFalse(Attachment.objects.exists())
archived_attachment_ids = list(
ArchivedAttachment.objects.filter(messages__id__in=msg_ids).values_list("id", flat=True)
)
self.assertEqual(set(attachment_ids), set(archived_attachment_ids))
for attachment_id in archived_attachment_ids:
self.assertEqual(
set(attachment_id_to_message_ids[attachment_id]),
set(ArchivedMessage.objects.filter(
archivedattachment__id=attachment_id).values_list("id", flat=True))
)
restore_all_data_from_archive()
self._verify_restored_data(msg_ids, usermsg_ids)
restored_attachment_ids = list(
Attachment.objects.filter(messages__id__in=msg_ids).values_list("id", flat=True)
)
self.assertEqual(set(attachment_ids), set(restored_attachment_ids))
for attachment_id in restored_attachment_ids:
self.assertEqual(
set(attachment_id_to_message_ids[attachment_id]),
set(Message.objects.filter(attachment__id=attachment_id).values_list("id", flat=True))
)
def test_archiving_message_with_shared_attachment(self) -> None:
# Make sure that attachments still in use in other messages don't get deleted:
self._create_attachments()
realm_id = get_realm("zulip").id
host = get_realm("zulip").host
body = """Some files here ...[zulip.txt](
http://{host}/user_uploads/{id}/31/4CBjtTLYZhk66pZrF8hnYGwc/zulip.txt)
http://{host}/user_uploads/{id}/31/4CBjtTLYZhk66pZrF8hnYGwc/temp_file.py ....
Some more.... http://{host}/user_uploads/{id}/31/4CBjtTLYZhk66pZrF8hnYGwc/abc.py ...
http://{host}/user_uploads/{id}/31/4CBjtTLYZhk66pZrF8hnYGwc/new.py ....
http://{host}/user_uploads/{id}/31/4CBjtTLYZhk66pZrF8hnYGwc/hello.txt ....
""".format(id=realm_id, host=host)
msg_id = self.send_personal_message(self.sender, self.recipient, body)
# Simulate a reply with the same contents.
reply_msg_id = self.send_personal_message(
from_user=self.recipient,
to_user=self.sender,
content=body,
)
usermsg_ids = self._get_usermessage_ids([msg_id])
attachment_ids = list(
Attachment.objects.filter(messages__id=msg_id).values_list("id", flat=True)
)
self._assert_archive_empty()
# Archive one of the messages:
move_messages_to_archive(message_ids=[msg_id])
self._verify_archive_data([msg_id], usermsg_ids)
# Attachments shouldn't have been deleted, as the second message links to them:
self.assertEqual(Attachment.objects.count(), 5)
self.assertEqual(
set(ArchivedAttachment.objects.filter(messages__id=msg_id).values_list("id", flat=True)),
set(attachment_ids)
)
# Restore the first message:
restore_all_data_from_archive()
# Archive the second:
move_messages_to_archive(message_ids=[reply_msg_id])
# The restored messages links to the Attachments, so they shouldn't be deleted:
self.assertEqual(Attachment.objects.count(), 5)
# Archive the first message again:
move_messages_to_archive(message_ids=[msg_id])
# Now the attachment should have been deleted:
self.assertEqual(Attachment.objects.count(), 0)
# Restore everything:
restore_all_data_from_archive()
self.assertEqual(
set(Attachment.objects.filter(messages__id=msg_id).values_list("id", flat=True)),
set(attachment_ids)
)
class MoveMessageToArchiveWithSubMessages(MoveMessageToArchiveBase):
def test_archiving_message_with_submessages(self) -> None:
msg_id = self.send_stream_message(self.sender, "Verona")
cordelia = self.example_user('cordelia')
hamlet = self.example_user('hamlet')
do_add_submessage(
realm=get_realm('zulip'),
sender_id=cordelia.id,
message_id=msg_id,
msg_type='whatever',
content='{"name": "alice", "salary": 20}'
)
do_add_submessage(
realm=get_realm('zulip'),
sender_id=hamlet.id,
message_id=msg_id,
msg_type='whatever',
content='{"name": "john", "salary": 30}'
)
submessage_ids = list(
SubMessage.objects.filter(message_id=msg_id).values_list('id', flat=True)
)
self.assertEqual(SubMessage.objects.filter(id__in=submessage_ids).count(), 2)
move_messages_to_archive(message_ids=[msg_id])
self.assertEqual(
set(ArchivedSubMessage.objects.filter(message_id=msg_id).values_list("id", flat=True)),
set(submessage_ids)
)
self.assertEqual(SubMessage.objects.filter(id__in=submessage_ids).count(), 0)
restore_all_data_from_archive()
self.assertEqual(
set(SubMessage.objects.filter(id__in=submessage_ids).values_list('id', flat=True)),
set(submessage_ids)
)
class MoveMessageToArchiveWithReactions(MoveMessageToArchiveBase, EmojiReactionBase):
def test_archiving_message_with_reactions(self) -> None:
msg_id = self.send_stream_message(self.sender, "Verona")
self.post_zulip_reaction(msg_id, 'hamlet')
self.post_zulip_reaction(msg_id, 'cordelia')
reaction_ids = list(
Reaction.objects.filter(message_id=msg_id).values_list('id', flat=True)
)
self.assertEqual(Reaction.objects.filter(id__in=reaction_ids).count(), 2)
move_messages_to_archive(message_ids=[msg_id])
self.assertEqual(
set(ArchivedReaction.objects.filter(message_id=msg_id).values_list("id", flat=True)),
set(reaction_ids)
)
self.assertEqual(Reaction.objects.filter(id__in=reaction_ids).count(), 0)
restore_all_data_from_archive()
self.assertEqual(
set(Reaction.objects.filter(id__in=reaction_ids).values_list('id', flat=True)),
set(reaction_ids)
)
class TestCleaningArchive(ArchiveMessagesTestingBase):
def test_clean_archived_data(self) -> None:
self._make_expired_zulip_messages(7)
archive_messages(chunk_size=2) # Small chunk size to have multiple transactions
transactions = list(ArchiveTransaction.objects.all())
for transaction in transactions[0:-1]:
transaction.timestamp = timezone_now() - timedelta(
days=settings.ARCHIVED_DATA_VACUUMING_DELAY_DAYS + 1)
transaction.save()
message_ids_to_clean = list(ArchivedMessage.objects.filter(
archive_transaction__in=transactions[0:-1]).values_list('id', flat=True))
clean_archived_data()
remaining_transactions = list(ArchiveTransaction.objects.all())
self.assertEqual(len(remaining_transactions), 1)
# All transactions except the last one were deleted:
self.assertEqual(remaining_transactions[0].id, transactions[-1].id)
# And corresponding ArchivedMessages should have been deleted:
self.assertFalse(ArchivedMessage.objects.filter(id__in=message_ids_to_clean).exists())
self.assertFalse(ArchivedUserMessage.objects.filter(message_id__in=message_ids_to_clean).exists())
for message in ArchivedMessage.objects.all():
self.assertEqual(message.archive_transaction_id, remaining_transactions[0].id)
class TestDoDeleteMessages(ZulipTestCase):
def test_do_delete_messages_multiple(self) -> None:
realm = get_realm("zulip")
cordelia = self.example_user('cordelia')
message_ids = [self.send_stream_message(cordelia, "Denmark", str(i)) for i in range(0, 10)]
messages = Message.objects.filter(id__in=message_ids)
with queries_captured() as queries:
do_delete_messages(realm, messages)
self.assertFalse(Message.objects.filter(id__in=message_ids).exists())
self.assert_length(queries, 37)
archived_messages = ArchivedMessage.objects.filter(id__in=message_ids)
self.assertEqual(archived_messages.count(), len(message_ids))
self.assertEqual(len({message.archive_transaction_id for message in archived_messages}), 1)
def test_old_event_format_processed_correctly(self) -> None:
"""
do_delete_messages used to send events with users in dict format {"id": <int>}.
We have a block in process_notification to deal with that old format, that should be
deleted in a later release. This test is meant to ensure correctness of that block.
"""
realm = get_realm("zulip")
cordelia = self.example_user('cordelia')
hamlet = self.example_user('hamlet')
message_id = self.send_personal_message(cordelia, hamlet)
message = Message.objects.get(id=message_id)
event = {
'type': 'delete_message',
'sender': message.sender.email,
'sender_id': message.sender_id,
'message_id': message.id,
'message_type': "private",
'recipient_id': message.recipient_id
}
move_messages_to_archive([message_id])
# We only send the event to see no exception is thrown - as it would be if the block
# in process_notification to handle this old format of "users to notify" wasn't correct.
send_event(realm, event, [{"id": cordelia.id}, {"id": hamlet.id}])