mirror of https://github.com/zulip/zulip.git
968 lines
42 KiB
Python
968 lines
42 KiB
Python
from datetime import datetime, timedelta
|
|
from typing import Any, Dict, List, Optional, Tuple
|
|
from unittest import mock
|
|
|
|
from django.conf import settings
|
|
from django.utils.timezone import now as timezone_now
|
|
|
|
from zerver.lib.actions import do_add_submessage, do_delete_messages, internal_send_private_message
|
|
from zerver.lib.retention import (
|
|
archive_messages,
|
|
clean_archived_data,
|
|
get_realms_and_streams_for_archiving,
|
|
move_messages_to_archive,
|
|
restore_all_data_from_archive,
|
|
restore_retention_policy_deletions_for_stream,
|
|
)
|
|
from zerver.lib.test_classes import ZulipTestCase
|
|
from zerver.lib.test_helpers import queries_captured
|
|
from zerver.lib.upload import create_attachment
|
|
from zerver.models import (
|
|
ArchivedAttachment,
|
|
ArchivedMessage,
|
|
ArchivedReaction,
|
|
ArchivedSubMessage,
|
|
ArchivedUserMessage,
|
|
ArchiveTransaction,
|
|
Attachment,
|
|
Message,
|
|
Reaction,
|
|
Realm,
|
|
Stream,
|
|
SubMessage,
|
|
UserMessage,
|
|
get_realm,
|
|
get_stream,
|
|
get_system_bot,
|
|
get_user_profile_by_email,
|
|
)
|
|
# Class with helper functions useful for testing archiving of reactions:
|
|
from zerver.tests.test_reactions import EmojiReactionBase
|
|
from zerver.tornado.event_queue import send_event
|
|
|
|
ZULIP_REALM_DAYS = 30
|
|
MIT_REALM_DAYS = 100
|
|
|
|
class RetentionTestingBase(ZulipTestCase):
|
|
def _get_usermessage_ids(self, message_ids: List[int]) -> List[int]:
|
|
return list(UserMessage.objects.filter(message_id__in=message_ids).values_list('id', flat=True))
|
|
|
|
def _verify_archive_data(self, expected_message_ids: List[int],
|
|
expected_usermessage_ids: List[int]) -> None:
|
|
self.assertEqual(
|
|
set(ArchivedMessage.objects.values_list('id', flat=True)),
|
|
set(expected_message_ids),
|
|
)
|
|
|
|
self.assertEqual(
|
|
set(ArchivedUserMessage.objects.values_list('id', flat=True)),
|
|
set(expected_usermessage_ids),
|
|
)
|
|
|
|
# Archived Messages and UserMessages should have been removed from the normal tables:
|
|
self.assertEqual(Message.objects.filter(id__in=expected_message_ids).count(), 0)
|
|
self.assertEqual(UserMessage.objects.filter(id__in=expected_usermessage_ids).count(), 0)
|
|
|
|
def _verify_restored_data(self, expected_message_ids: List[int],
|
|
expected_usermessage_ids: List[int]) -> None:
|
|
# Check that the data was restored:
|
|
self.assertEqual(
|
|
set(Message.objects.filter(id__in=expected_message_ids).values_list('id', flat=True)),
|
|
set(expected_message_ids),
|
|
)
|
|
|
|
self.assertEqual(
|
|
set(UserMessage.objects.filter(id__in=expected_usermessage_ids).values_list('id', flat=True)),
|
|
set(expected_usermessage_ids),
|
|
)
|
|
|
|
# The Messages and UserMessages should still be in the archive - we don't delete them.
|
|
self.assertEqual(
|
|
set(ArchivedMessage.objects.values_list('id', flat=True)),
|
|
set(expected_message_ids),
|
|
)
|
|
|
|
self.assertEqual(
|
|
set(ArchivedUserMessage.objects.values_list('id', flat=True)),
|
|
set(expected_usermessage_ids),
|
|
)
|
|
|
|
class ArchiveMessagesTestingBase(RetentionTestingBase):
|
|
def setUp(self) -> None:
|
|
super().setUp()
|
|
self.zulip_realm = get_realm('zulip')
|
|
self.mit_realm = get_realm('zephyr')
|
|
self._set_realm_message_retention_value(self.zulip_realm, ZULIP_REALM_DAYS)
|
|
self._set_realm_message_retention_value(self.mit_realm, MIT_REALM_DAYS)
|
|
|
|
# Set publication date of all existing messages to "now", so that we have full
|
|
# control over what's expired and what isn't.
|
|
Message.objects.all().update(date_sent=timezone_now())
|
|
|
|
def _set_realm_message_retention_value(self, realm: Realm, retention_period: int) -> None:
|
|
realm.message_retention_days = retention_period
|
|
realm.save()
|
|
|
|
def _set_stream_message_retention_value(self, stream: Stream, retention_period: Optional[int]) -> None:
|
|
stream.message_retention_days = retention_period
|
|
stream.save()
|
|
|
|
def _change_messages_date_sent(self, msgs_ids: List[int], date_sent: datetime) -> None:
|
|
Message.objects.filter(id__in=msgs_ids).update(date_sent=date_sent)
|
|
|
|
def _make_mit_messages(self, message_quantity: int, date_sent: datetime) -> Any:
|
|
# send messages from mit.edu realm and change messages pub date
|
|
sender = self.mit_user('espuser')
|
|
recipient = self.mit_user('starnine')
|
|
msg_ids = [self.send_personal_message(sender, recipient)
|
|
for i in range(message_quantity)]
|
|
|
|
self._change_messages_date_sent(msg_ids, date_sent)
|
|
return msg_ids
|
|
|
|
def _send_cross_realm_personal_message(self) -> int:
|
|
# Send message from bot to users from different realm.
|
|
bot_email = 'notification-bot@zulip.com'
|
|
get_user_profile_by_email(bot_email)
|
|
zulip_user = self.example_user("hamlet")
|
|
msg_id = internal_send_private_message(
|
|
realm=self.zulip_realm,
|
|
sender=get_system_bot(bot_email),
|
|
recipient_user=zulip_user,
|
|
content='test message',
|
|
)
|
|
assert msg_id is not None
|
|
return msg_id
|
|
|
|
def _make_expired_zulip_messages(self, message_quantity: int) -> List[int]:
|
|
msg_ids = list(Message.objects.order_by('id').filter(
|
|
sender__realm=self.zulip_realm).values_list('id', flat=True)[3:3 + message_quantity])
|
|
self._change_messages_date_sent(
|
|
msg_ids,
|
|
timezone_now() - timedelta(ZULIP_REALM_DAYS+1),
|
|
)
|
|
|
|
return msg_ids
|
|
|
|
def _send_messages_with_attachments(self) -> Dict[str, int]:
|
|
user_profile = self.example_user("hamlet")
|
|
sample_size = 10
|
|
host = user_profile.realm.host
|
|
realm_id = get_realm("zulip").id
|
|
dummy_files = [
|
|
('zulip.txt', f'{realm_id}/31/4CBjtTLYZhk66pZrF8hnYGwc/zulip.txt', sample_size),
|
|
('temp_file.py', f'{realm_id}/31/4CBjtTLYZhk66pZrF8hnYGwc/temp_file.py', sample_size),
|
|
('abc.py', f'{realm_id}/31/4CBjtTLYZhk66pZrF8hnYGwc/abc.py', sample_size),
|
|
]
|
|
|
|
for file_name, path_id, size in dummy_files:
|
|
create_attachment(file_name, path_id, user_profile, size)
|
|
|
|
self.subscribe(user_profile, "Denmark")
|
|
body = ("Some files here ... [zulip.txt](http://{host}/user_uploads/{id}/31/4CBjtTLYZhk66pZrF8hnYGwc/zulip.txt)" +
|
|
" http://{host}/user_uploads/{id}/31/4CBjtTLYZhk66pZrF8hnYGwc/temp_file.py.... Some more...." +
|
|
" http://{host}/user_uploads/{id}/31/4CBjtTLYZhk66pZrF8hnYGwc/abc.py").format(id=realm_id, host=host)
|
|
|
|
expired_message_id = self.send_stream_message(user_profile, "Denmark", body)
|
|
actual_message_id = self.send_stream_message(user_profile, "Denmark", body)
|
|
|
|
othello = self.example_user('othello')
|
|
other_message_id = self.send_stream_message(othello, "Denmark", body)
|
|
self._change_messages_date_sent([expired_message_id], timezone_now() - timedelta(days=MIT_REALM_DAYS + 1))
|
|
return {'expired_message_id': expired_message_id, 'actual_message_id': actual_message_id,
|
|
'other_user_message_id': other_message_id}
|
|
|
|
class TestArchiveMessagesGeneral(ArchiveMessagesTestingBase):
|
|
def test_no_expired_messages(self) -> None:
|
|
archive_messages()
|
|
|
|
self.assertEqual(ArchivedUserMessage.objects.count(), 0)
|
|
self.assertEqual(ArchivedMessage.objects.count(), 0)
|
|
|
|
def test_expired_messages_in_each_realm(self) -> None:
|
|
"""General test for archiving expired messages properly with
|
|
multiple realms involved"""
|
|
# Make some expired messages in MIT:
|
|
expired_mit_msg_ids = self._make_mit_messages(
|
|
5,
|
|
timezone_now() - timedelta(days=MIT_REALM_DAYS+1),
|
|
)
|
|
# Make some non-expired messages in MIT:
|
|
self._make_mit_messages(4, timezone_now() - timedelta(days=MIT_REALM_DAYS-1))
|
|
|
|
# Change some Zulip messages to be expired:
|
|
expired_zulip_msg_ids = list(Message.objects.order_by('id').filter(
|
|
sender__realm=self.zulip_realm).values_list('id', flat=True)[3:10])
|
|
self._change_messages_date_sent(
|
|
expired_zulip_msg_ids,
|
|
timezone_now() - timedelta(ZULIP_REALM_DAYS+1),
|
|
)
|
|
|
|
expired_msg_ids = expired_mit_msg_ids + expired_zulip_msg_ids
|
|
expired_usermsg_ids = self._get_usermessage_ids(expired_msg_ids)
|
|
|
|
archive_messages()
|
|
self._verify_archive_data(expired_msg_ids, expired_usermsg_ids)
|
|
|
|
restore_all_data_from_archive()
|
|
self._verify_restored_data(expired_msg_ids, expired_usermsg_ids)
|
|
|
|
def test_expired_messages_in_one_realm(self) -> None:
|
|
"""Test with a retention policy set for only the MIT realm"""
|
|
self._set_realm_message_retention_value(self.zulip_realm, -1)
|
|
|
|
# Make some expired messages in MIT:
|
|
expired_mit_msg_ids = self._make_mit_messages(
|
|
5,
|
|
timezone_now() - timedelta(days=MIT_REALM_DAYS+1),
|
|
)
|
|
# Make some non-expired messages in MIT:
|
|
self._make_mit_messages(4, timezone_now() - timedelta(days=MIT_REALM_DAYS-1))
|
|
|
|
# Change some Zulip messages date_sent, but the realm has no retention policy,
|
|
# so they shouldn't get archived
|
|
zulip_msg_ids = list(Message.objects.order_by('id').filter(
|
|
sender__realm=self.zulip_realm).values_list('id', flat=True)[3:10])
|
|
self._change_messages_date_sent(
|
|
zulip_msg_ids,
|
|
timezone_now() - timedelta(ZULIP_REALM_DAYS+1),
|
|
)
|
|
|
|
# Only MIT has a retention policy:
|
|
expired_msg_ids = expired_mit_msg_ids
|
|
expired_usermsg_ids = self._get_usermessage_ids(expired_msg_ids)
|
|
|
|
archive_messages()
|
|
self._verify_archive_data(expired_msg_ids, expired_usermsg_ids)
|
|
|
|
restore_all_data_from_archive()
|
|
self._verify_restored_data(expired_msg_ids, expired_usermsg_ids)
|
|
|
|
self._set_realm_message_retention_value(self.zulip_realm, ZULIP_REALM_DAYS)
|
|
|
|
def test_different_stream_realm_policies(self) -> None:
|
|
verona = get_stream("Verona", self.zulip_realm)
|
|
hamlet = self.example_user("hamlet")
|
|
|
|
msg_id = self.send_stream_message(hamlet, "Verona", "test")
|
|
usermsg_ids = self._get_usermessage_ids([msg_id])
|
|
self._change_messages_date_sent([msg_id], timezone_now() - timedelta(days=2))
|
|
|
|
# Don't archive if stream's retention policy set to -1:
|
|
self._set_realm_message_retention_value(self.zulip_realm, 1)
|
|
self._set_stream_message_retention_value(verona, -1)
|
|
archive_messages()
|
|
self._verify_archive_data([], [])
|
|
|
|
# Don't archive if stream and realm have no retention policy:
|
|
self._set_realm_message_retention_value(self.zulip_realm, -1)
|
|
self._set_stream_message_retention_value(verona, None)
|
|
archive_messages()
|
|
self._verify_archive_data([], [])
|
|
|
|
# Archive if stream has a retention policy set:
|
|
self._set_realm_message_retention_value(self.zulip_realm, -1)
|
|
self._set_stream_message_retention_value(verona, 1)
|
|
archive_messages()
|
|
self._verify_archive_data([msg_id], usermsg_ids)
|
|
|
|
def test_cross_realm_personal_message_archiving(self) -> None:
|
|
"""Check that cross-realm personal messages get correctly archived. """
|
|
msg_ids = [self._send_cross_realm_personal_message() for i in range(1, 7)]
|
|
usermsg_ids = self._get_usermessage_ids(msg_ids)
|
|
# Make the message expired on the recipient's realm:
|
|
self._change_messages_date_sent(msg_ids, timezone_now() - timedelta(ZULIP_REALM_DAYS+1))
|
|
|
|
archive_messages()
|
|
self._verify_archive_data(msg_ids, usermsg_ids)
|
|
|
|
def test_archiving_interrupted(self) -> None:
|
|
""" Check that queries get rolled back to a consistent state
|
|
if archiving gets interrupted in the middle of processing a chunk. """
|
|
expired_msg_ids = self._make_expired_zulip_messages(7)
|
|
expired_usermsg_ids = self._get_usermessage_ids(expired_msg_ids)
|
|
|
|
# Insert an exception near the end of the archiving process of a chunk:
|
|
with mock.patch("zerver.lib.retention.delete_messages", side_effect=Exception):
|
|
with self.assertRaises(Exception):
|
|
archive_messages(chunk_size=1000) # Specify large chunk_size to ensure things happen in a single batch
|
|
|
|
# Archiving code has been executed, but because we got an exception, things should have been rolled back:
|
|
self._verify_archive_data([], [])
|
|
|
|
self.assertEqual(
|
|
set(Message.objects.filter(id__in=expired_msg_ids).values_list('id', flat=True)),
|
|
set(expired_msg_ids),
|
|
)
|
|
self.assertEqual(
|
|
set(UserMessage.objects.filter(id__in=expired_usermsg_ids).values_list('id', flat=True)),
|
|
set(expired_usermsg_ids),
|
|
)
|
|
|
|
def test_archive_message_tool(self) -> None:
|
|
"""End-to-end test of the archiving tool, directly calling
|
|
archive_messages."""
|
|
# Make some expired messages in MIT:
|
|
expired_mit_msg_ids = self._make_mit_messages(
|
|
5,
|
|
timezone_now() - timedelta(days=MIT_REALM_DAYS+1),
|
|
)
|
|
# Make some non-expired messages in MIT:
|
|
self._make_mit_messages(4, timezone_now() - timedelta(days=MIT_REALM_DAYS-1))
|
|
|
|
# Change some Zulip messages to be expired:
|
|
expired_zulip_msg_ids = self._make_expired_zulip_messages(7)
|
|
|
|
expired_crossrealm_msg_id = self._send_cross_realm_personal_message()
|
|
# Make the message expired in the recipient's realm:
|
|
self._change_messages_date_sent(
|
|
[expired_crossrealm_msg_id],
|
|
timezone_now() - timedelta(ZULIP_REALM_DAYS+1),
|
|
)
|
|
|
|
expired_msg_ids = expired_mit_msg_ids + expired_zulip_msg_ids + [expired_crossrealm_msg_id]
|
|
expired_usermsg_ids = self._get_usermessage_ids(expired_msg_ids)
|
|
|
|
archive_messages(chunk_size=2) # Specify low chunk_size to test batching.
|
|
# Make sure we archived what needed:
|
|
self._verify_archive_data(expired_msg_ids, expired_usermsg_ids)
|
|
|
|
restore_all_data_from_archive()
|
|
self._verify_restored_data(expired_msg_ids, expired_usermsg_ids)
|
|
|
|
def test_archiving_attachments(self) -> None:
|
|
"""End-to-end test for the logic for archiving attachments. This test
|
|
is hard to read without first reading _send_messages_with_attachments"""
|
|
msgs_ids = self._send_messages_with_attachments()
|
|
|
|
# First, confirm deleting the oldest message
|
|
# (`expired_message_id`) creates ArchivedAttachment objects
|
|
# and associates that message ID with them, but does not
|
|
# delete the Attachment object.
|
|
archive_messages()
|
|
self.assertEqual(ArchivedAttachment.objects.count(), 3)
|
|
self.assertEqual(
|
|
list(ArchivedAttachment.objects.distinct('messages__id').values_list('messages__id',
|
|
flat=True)),
|
|
[msgs_ids['expired_message_id']],
|
|
)
|
|
self.assertEqual(Attachment.objects.count(), 3)
|
|
|
|
# Now make `actual_message_id` expired too. We still don't
|
|
# delete the Attachment objects.
|
|
self._change_messages_date_sent([msgs_ids['actual_message_id']],
|
|
timezone_now() - timedelta(days=MIT_REALM_DAYS + 1))
|
|
archive_messages()
|
|
self.assertEqual(Attachment.objects.count(), 3)
|
|
|
|
# Finally, make the last message mentioning those attachments
|
|
# expired. We should now delete the Attachment objects and
|
|
# each ArchivedAttachment object should list all 3 messages.
|
|
self._change_messages_date_sent([msgs_ids['other_user_message_id']],
|
|
timezone_now() - timedelta(days=MIT_REALM_DAYS + 1))
|
|
|
|
archive_messages()
|
|
self.assertEqual(Attachment.objects.count(), 0)
|
|
self.assertEqual(ArchivedAttachment.objects.count(), 3)
|
|
self.assertEqual(
|
|
list(ArchivedAttachment.objects.distinct('messages__id').order_by('messages__id').values_list(
|
|
'messages__id', flat=True)),
|
|
sorted(msgs_ids.values()),
|
|
)
|
|
|
|
restore_all_data_from_archive()
|
|
# Attachments should have been restored:
|
|
self.assertEqual(Attachment.objects.count(), 3)
|
|
self.assertEqual(ArchivedAttachment.objects.count(), 3) # Archived data doesn't get deleted by restoring.
|
|
self.assertEqual(
|
|
list(Attachment.objects.distinct('messages__id').order_by('messages__id').values_list(
|
|
'messages__id', flat=True)),
|
|
sorted(msgs_ids.values()),
|
|
)
|
|
|
|
def test_restoring_and_rearchiving(self) -> None:
|
|
expired_msg_ids = self._make_mit_messages(
|
|
7,
|
|
timezone_now() - timedelta(days=MIT_REALM_DAYS+1),
|
|
)
|
|
expired_usermsg_ids = self._get_usermessage_ids(expired_msg_ids)
|
|
|
|
archive_messages(chunk_size=4)
|
|
self._verify_archive_data(expired_msg_ids, expired_usermsg_ids)
|
|
|
|
transactions = ArchiveTransaction.objects.all()
|
|
self.assertEqual(len(transactions), 2) # With chunk_size 4, there should be 2 transactions
|
|
|
|
restore_all_data_from_archive()
|
|
transactions[0].refresh_from_db()
|
|
transactions[1].refresh_from_db()
|
|
self.assertTrue(transactions[0].restored)
|
|
self.assertTrue(transactions[1].restored)
|
|
|
|
archive_messages(chunk_size=10)
|
|
self._verify_archive_data(expired_msg_ids, expired_usermsg_ids)
|
|
|
|
transactions = ArchiveTransaction.objects.order_by("id")
|
|
self.assertEqual(len(transactions), 3)
|
|
|
|
archived_messages = ArchivedMessage.objects.filter(id__in=expired_msg_ids)
|
|
# Check that the re-archived messages are correctly assigned to the new transaction:
|
|
for message in archived_messages:
|
|
self.assertEqual(message.archive_transaction_id, transactions[2].id)
|
|
|
|
class TestArchivingSubMessages(ArchiveMessagesTestingBase):
|
|
def test_archiving_submessages(self) -> None:
|
|
expired_msg_ids = self._make_expired_zulip_messages(2)
|
|
cordelia = self.example_user('cordelia')
|
|
hamlet = self.example_user('hamlet')
|
|
|
|
do_add_submessage(
|
|
realm=self.zulip_realm,
|
|
sender_id=cordelia.id,
|
|
message_id=expired_msg_ids[0],
|
|
msg_type='whatever',
|
|
content='{"name": "alice", "salary": 20}',
|
|
)
|
|
do_add_submessage(
|
|
realm=self.zulip_realm,
|
|
sender_id=hamlet.id,
|
|
message_id=expired_msg_ids[0],
|
|
msg_type='whatever',
|
|
content='{"name": "john", "salary": 30}',
|
|
)
|
|
|
|
do_add_submessage(
|
|
realm=self.zulip_realm,
|
|
sender_id=cordelia.id,
|
|
message_id=expired_msg_ids[1],
|
|
msg_type='whatever',
|
|
content='{"name": "jack", "salary": 10}',
|
|
)
|
|
|
|
submessage_ids = list(
|
|
SubMessage.objects.filter(message_id__in=expired_msg_ids).values_list('id', flat=True),
|
|
)
|
|
|
|
self.assertEqual(len(submessage_ids), 3)
|
|
self.assertEqual(SubMessage.objects.filter(id__in=submessage_ids).count(), 3)
|
|
archive_messages()
|
|
self.assertEqual(SubMessage.objects.filter(id__in=submessage_ids).count(), 0)
|
|
|
|
self.assertEqual(
|
|
set(ArchivedSubMessage.objects.filter(id__in=submessage_ids).values_list('id', flat=True)),
|
|
set(submessage_ids),
|
|
)
|
|
|
|
restore_all_data_from_archive()
|
|
self.assertEqual(
|
|
set(SubMessage.objects.filter(id__in=submessage_ids).values_list('id', flat=True)),
|
|
set(submessage_ids),
|
|
)
|
|
|
|
class TestArchivingReactions(ArchiveMessagesTestingBase, EmojiReactionBase):
|
|
def test_archiving_reactions(self) -> None:
|
|
expired_msg_ids = self._make_expired_zulip_messages(2)
|
|
|
|
self.post_zulip_reaction(expired_msg_ids[0], 'hamlet')
|
|
self.post_zulip_reaction(expired_msg_ids[0], 'cordelia')
|
|
|
|
self.post_zulip_reaction(expired_msg_ids[1], 'hamlet')
|
|
|
|
reaction_ids = list(
|
|
Reaction.objects.filter(message_id__in=expired_msg_ids).values_list('id', flat=True),
|
|
)
|
|
|
|
self.assertEqual(len(reaction_ids), 3)
|
|
self.assertEqual(Reaction.objects.filter(id__in=reaction_ids).count(), 3)
|
|
archive_messages()
|
|
self.assertEqual(Reaction.objects.filter(id__in=reaction_ids).count(), 0)
|
|
|
|
self.assertEqual(
|
|
set(ArchivedReaction.objects.filter(id__in=reaction_ids).values_list('id', flat=True)),
|
|
set(reaction_ids),
|
|
)
|
|
|
|
restore_all_data_from_archive()
|
|
self.assertEqual(
|
|
set(Reaction.objects.filter(id__in=reaction_ids).values_list('id', flat=True)),
|
|
set(reaction_ids),
|
|
)
|
|
|
|
class MoveMessageToArchiveBase(RetentionTestingBase):
|
|
def setUp(self) -> None:
|
|
super().setUp()
|
|
self.sender = self.example_user('hamlet')
|
|
self.recipient = self.example_user('cordelia')
|
|
|
|
def _create_attachments(self) -> None:
|
|
sample_size = 10
|
|
realm_id = get_realm("zulip").id
|
|
dummy_files = [
|
|
('zulip.txt', f'{realm_id}/31/4CBjtTLYZhk66pZrF8hnYGwc/zulip.txt', sample_size),
|
|
('temp_file.py', f'{realm_id}/31/4CBjtTLYZhk66pZrF8hnYGwc/temp_file.py', sample_size),
|
|
('abc.py', f'{realm_id}/31/4CBjtTLYZhk66pZrF8hnYGwc/abc.py', sample_size),
|
|
('hello.txt', f'{realm_id}/31/4CBjtTLYZhk66pZrF8hnYGwc/hello.txt', sample_size),
|
|
('new.py', f'{realm_id}/31/4CBjtTLYZhk66pZrF8hnYGwc/new.py', sample_size),
|
|
]
|
|
user_profile = self.example_user('hamlet')
|
|
for file_name, path_id, size in dummy_files:
|
|
create_attachment(file_name, path_id, user_profile, size)
|
|
|
|
def _assert_archive_empty(self) -> None:
|
|
self.assertFalse(ArchivedUserMessage.objects.exists())
|
|
self.assertFalse(ArchivedMessage.objects.exists())
|
|
self.assertFalse(ArchivedAttachment.objects.exists())
|
|
|
|
class MoveMessageToArchiveGeneral(MoveMessageToArchiveBase):
|
|
def test_personal_messages_archiving(self) -> None:
|
|
msg_ids = [self.send_personal_message(self.sender, self.recipient)
|
|
for i in range(0, 3)]
|
|
usermsg_ids = self._get_usermessage_ids(msg_ids)
|
|
|
|
self._assert_archive_empty()
|
|
move_messages_to_archive(message_ids=msg_ids)
|
|
self._verify_archive_data(msg_ids, usermsg_ids)
|
|
|
|
restore_all_data_from_archive()
|
|
self._verify_restored_data(msg_ids, usermsg_ids)
|
|
|
|
def test_move_messages_to_archive_with_realm_argument(self) -> None:
|
|
realm = get_realm("zulip")
|
|
msg_ids = [self.send_personal_message(self.sender, self.recipient)
|
|
for i in range(0, 3)]
|
|
usermsg_ids = self._get_usermessage_ids(msg_ids)
|
|
|
|
self._assert_archive_empty()
|
|
move_messages_to_archive(message_ids=msg_ids, realm=realm)
|
|
self._verify_archive_data(msg_ids, usermsg_ids)
|
|
|
|
archive_transaction = ArchiveTransaction.objects.last()
|
|
self.assertEqual(archive_transaction.realm, realm)
|
|
|
|
def test_stream_messages_archiving(self) -> None:
|
|
msg_ids = [self.send_stream_message(self.sender, "Verona")
|
|
for i in range(0, 3)]
|
|
usermsg_ids = self._get_usermessage_ids(msg_ids)
|
|
|
|
self._assert_archive_empty()
|
|
move_messages_to_archive(message_ids=msg_ids)
|
|
self._verify_archive_data(msg_ids, usermsg_ids)
|
|
|
|
restore_all_data_from_archive()
|
|
self._verify_restored_data(msg_ids, usermsg_ids)
|
|
|
|
def test_archiving_messages_second_time(self) -> None:
|
|
msg_ids = [self.send_stream_message(self.sender, "Verona")
|
|
for i in range(0, 3)]
|
|
usermsg_ids = self._get_usermessage_ids(msg_ids)
|
|
|
|
self._assert_archive_empty()
|
|
move_messages_to_archive(message_ids=msg_ids)
|
|
self._verify_archive_data(msg_ids, usermsg_ids)
|
|
|
|
with self.assertRaises(Message.DoesNotExist):
|
|
move_messages_to_archive(message_ids=msg_ids)
|
|
|
|
def test_archiving_messages_multiple_realms(self) -> None:
|
|
"""
|
|
Verifies that move_messages_to_archive works correctly
|
|
if called on messages in multiple realms.
|
|
"""
|
|
iago = self.example_user("iago")
|
|
othello = self.example_user("othello")
|
|
|
|
cordelia = self.lear_user("cordelia")
|
|
king = self.lear_user("king")
|
|
|
|
zulip_msg_ids = [self.send_personal_message(iago, othello)
|
|
for i in range(0, 3)]
|
|
leary_msg_ids = [self.send_personal_message(cordelia, king)
|
|
for i in range(0, 3)]
|
|
msg_ids = zulip_msg_ids + leary_msg_ids
|
|
usermsg_ids = self._get_usermessage_ids(msg_ids)
|
|
|
|
self._assert_archive_empty()
|
|
move_messages_to_archive(message_ids=msg_ids)
|
|
self._verify_archive_data(msg_ids, usermsg_ids)
|
|
|
|
restore_all_data_from_archive()
|
|
self._verify_restored_data(msg_ids, usermsg_ids)
|
|
|
|
def test_archiving_messages_with_attachment(self) -> None:
|
|
self._create_attachments()
|
|
realm_id = get_realm("zulip").id
|
|
host = get_realm("zulip").host
|
|
body1 = """Some files here ...[zulip.txt](
|
|
http://{host}/user_uploads/{id}/31/4CBjtTLYZhk66pZrF8hnYGwc/zulip.txt)
|
|
http://{host}/user_uploads/{id}/31/4CBjtTLYZhk66pZrF8hnYGwc/temp_file.py ....
|
|
Some more.... http://{host}/user_uploads/{id}/31/4CBjtTLYZhk66pZrF8hnYGwc/abc.py
|
|
""".format(id=realm_id, host=host)
|
|
body2 = """Some files here
|
|
http://{host}/user_uploads/{id}/31/4CBjtTLYZhk66pZrF8hnYGwc/zulip.txt ...
|
|
http://{host}/user_uploads/{id}/31/4CBjtTLYZhk66pZrF8hnYGwc/hello.txt ....
|
|
http://{host}/user_uploads/{id}/31/4CBjtTLYZhk66pZrF8hnYGwc/new.py ....
|
|
""".format(id=realm_id, host=host)
|
|
|
|
msg_ids = [
|
|
self.send_personal_message(self.sender, self.recipient, body1),
|
|
self.send_personal_message(self.sender, self.recipient, body2),
|
|
]
|
|
|
|
attachment_id_to_message_ids: Dict[int, List[int]] = {}
|
|
attachment_ids = list(
|
|
Attachment.objects.filter(messages__id__in=msg_ids).values_list("id", flat=True),
|
|
)
|
|
for attachment_id in attachment_ids:
|
|
attachment_id_to_message_ids[attachment_id] = list(
|
|
Message.objects.filter(attachment__id=attachment_id).values_list("id", flat=True),
|
|
)
|
|
|
|
usermsg_ids = self._get_usermessage_ids(msg_ids)
|
|
|
|
self._assert_archive_empty()
|
|
move_messages_to_archive(message_ids=msg_ids)
|
|
self._verify_archive_data(msg_ids, usermsg_ids)
|
|
|
|
self.assertFalse(Attachment.objects.exists())
|
|
archived_attachment_ids = list(
|
|
ArchivedAttachment.objects.filter(messages__id__in=msg_ids).values_list("id", flat=True),
|
|
)
|
|
|
|
self.assertEqual(set(attachment_ids), set(archived_attachment_ids))
|
|
for attachment_id in archived_attachment_ids:
|
|
self.assertEqual(
|
|
set(attachment_id_to_message_ids[attachment_id]),
|
|
set(ArchivedMessage.objects.filter(
|
|
archivedattachment__id=attachment_id).values_list("id", flat=True)),
|
|
)
|
|
|
|
restore_all_data_from_archive()
|
|
self._verify_restored_data(msg_ids, usermsg_ids)
|
|
|
|
restored_attachment_ids = list(
|
|
Attachment.objects.filter(messages__id__in=msg_ids).values_list("id", flat=True),
|
|
)
|
|
|
|
self.assertEqual(set(attachment_ids), set(restored_attachment_ids))
|
|
for attachment_id in restored_attachment_ids:
|
|
self.assertEqual(
|
|
set(attachment_id_to_message_ids[attachment_id]),
|
|
set(Message.objects.filter(attachment__id=attachment_id).values_list("id", flat=True)),
|
|
)
|
|
|
|
def test_archiving_message_with_shared_attachment(self) -> None:
|
|
# Make sure that attachments still in use in other messages don't get deleted:
|
|
self._create_attachments()
|
|
realm_id = get_realm("zulip").id
|
|
host = get_realm("zulip").host
|
|
body = """Some files here ...[zulip.txt](
|
|
http://{host}/user_uploads/{id}/31/4CBjtTLYZhk66pZrF8hnYGwc/zulip.txt)
|
|
http://{host}/user_uploads/{id}/31/4CBjtTLYZhk66pZrF8hnYGwc/temp_file.py ....
|
|
Some more.... http://{host}/user_uploads/{id}/31/4CBjtTLYZhk66pZrF8hnYGwc/abc.py ...
|
|
http://{host}/user_uploads/{id}/31/4CBjtTLYZhk66pZrF8hnYGwc/new.py ....
|
|
http://{host}/user_uploads/{id}/31/4CBjtTLYZhk66pZrF8hnYGwc/hello.txt ....
|
|
""".format(id=realm_id, host=host)
|
|
|
|
msg_id = self.send_personal_message(self.sender, self.recipient, body)
|
|
# Simulate a reply with the same contents.
|
|
reply_msg_id = self.send_personal_message(
|
|
from_user=self.recipient,
|
|
to_user=self.sender,
|
|
content=body,
|
|
)
|
|
|
|
usermsg_ids = self._get_usermessage_ids([msg_id])
|
|
attachment_ids = list(
|
|
Attachment.objects.filter(messages__id=msg_id).values_list("id", flat=True),
|
|
)
|
|
|
|
self._assert_archive_empty()
|
|
# Archive one of the messages:
|
|
move_messages_to_archive(message_ids=[msg_id])
|
|
self._verify_archive_data([msg_id], usermsg_ids)
|
|
# Attachments shouldn't have been deleted, as the second message links to them:
|
|
self.assertEqual(Attachment.objects.count(), 5)
|
|
|
|
self.assertEqual(
|
|
set(ArchivedAttachment.objects.filter(messages__id=msg_id).values_list("id", flat=True)),
|
|
set(attachment_ids),
|
|
)
|
|
|
|
# Restore the first message:
|
|
restore_all_data_from_archive()
|
|
# Archive the second:
|
|
move_messages_to_archive(message_ids=[reply_msg_id])
|
|
# The restored messages links to the Attachments, so they shouldn't be deleted:
|
|
self.assertEqual(Attachment.objects.count(), 5)
|
|
|
|
# Archive the first message again:
|
|
move_messages_to_archive(message_ids=[msg_id])
|
|
# Now the attachment should have been deleted:
|
|
self.assertEqual(Attachment.objects.count(), 0)
|
|
|
|
# Restore everything:
|
|
restore_all_data_from_archive()
|
|
self.assertEqual(
|
|
set(Attachment.objects.filter(messages__id=msg_id).values_list("id", flat=True)),
|
|
set(attachment_ids),
|
|
)
|
|
|
|
class MoveMessageToArchiveWithSubMessages(MoveMessageToArchiveBase):
|
|
def test_archiving_message_with_submessages(self) -> None:
|
|
msg_id = self.send_stream_message(self.sender, "Verona")
|
|
cordelia = self.example_user('cordelia')
|
|
hamlet = self.example_user('hamlet')
|
|
|
|
do_add_submessage(
|
|
realm=get_realm('zulip'),
|
|
sender_id=cordelia.id,
|
|
message_id=msg_id,
|
|
msg_type='whatever',
|
|
content='{"name": "alice", "salary": 20}',
|
|
)
|
|
do_add_submessage(
|
|
realm=get_realm('zulip'),
|
|
sender_id=hamlet.id,
|
|
message_id=msg_id,
|
|
msg_type='whatever',
|
|
content='{"name": "john", "salary": 30}',
|
|
)
|
|
|
|
submessage_ids = list(
|
|
SubMessage.objects.filter(message_id=msg_id).values_list('id', flat=True),
|
|
)
|
|
|
|
self.assertEqual(SubMessage.objects.filter(id__in=submessage_ids).count(), 2)
|
|
move_messages_to_archive(message_ids=[msg_id])
|
|
|
|
self.assertEqual(
|
|
set(ArchivedSubMessage.objects.filter(message_id=msg_id).values_list("id", flat=True)),
|
|
set(submessage_ids),
|
|
)
|
|
self.assertEqual(SubMessage.objects.filter(id__in=submessage_ids).count(), 0)
|
|
|
|
restore_all_data_from_archive()
|
|
self.assertEqual(
|
|
set(SubMessage.objects.filter(id__in=submessage_ids).values_list('id', flat=True)),
|
|
set(submessage_ids),
|
|
)
|
|
|
|
class MoveMessageToArchiveWithReactions(MoveMessageToArchiveBase, EmojiReactionBase):
|
|
def test_archiving_message_with_reactions(self) -> None:
|
|
msg_id = self.send_stream_message(self.sender, "Verona")
|
|
|
|
self.post_zulip_reaction(msg_id, 'hamlet')
|
|
self.post_zulip_reaction(msg_id, 'cordelia')
|
|
|
|
reaction_ids = list(
|
|
Reaction.objects.filter(message_id=msg_id).values_list('id', flat=True),
|
|
)
|
|
|
|
self.assertEqual(Reaction.objects.filter(id__in=reaction_ids).count(), 2)
|
|
move_messages_to_archive(message_ids=[msg_id])
|
|
|
|
self.assertEqual(
|
|
set(ArchivedReaction.objects.filter(message_id=msg_id).values_list("id", flat=True)),
|
|
set(reaction_ids),
|
|
)
|
|
self.assertEqual(Reaction.objects.filter(id__in=reaction_ids).count(), 0)
|
|
|
|
restore_all_data_from_archive()
|
|
self.assertEqual(
|
|
set(Reaction.objects.filter(id__in=reaction_ids).values_list('id', flat=True)),
|
|
set(reaction_ids),
|
|
)
|
|
|
|
class TestCleaningArchive(ArchiveMessagesTestingBase):
|
|
def test_clean_archived_data(self) -> None:
|
|
self._make_expired_zulip_messages(7)
|
|
archive_messages(chunk_size=2) # Small chunk size to have multiple transactions
|
|
|
|
transactions = list(ArchiveTransaction.objects.all())
|
|
for transaction in transactions[0:-1]:
|
|
transaction.timestamp = timezone_now() - timedelta(
|
|
days=settings.ARCHIVED_DATA_VACUUMING_DELAY_DAYS + 1)
|
|
transaction.save()
|
|
|
|
message_ids_to_clean = list(ArchivedMessage.objects.filter(
|
|
archive_transaction__in=transactions[0:-1]).values_list('id', flat=True))
|
|
|
|
clean_archived_data()
|
|
remaining_transactions = list(ArchiveTransaction.objects.all())
|
|
self.assertEqual(len(remaining_transactions), 1)
|
|
# All transactions except the last one were deleted:
|
|
self.assertEqual(remaining_transactions[0].id, transactions[-1].id)
|
|
# And corresponding ArchivedMessages should have been deleted:
|
|
self.assertFalse(ArchivedMessage.objects.filter(id__in=message_ids_to_clean).exists())
|
|
self.assertFalse(ArchivedUserMessage.objects.filter(message_id__in=message_ids_to_clean).exists())
|
|
|
|
for message in ArchivedMessage.objects.all():
|
|
self.assertEqual(message.archive_transaction_id, remaining_transactions[0].id)
|
|
|
|
|
|
class TestGetRealmAndStreamsForArchiving(ZulipTestCase):
|
|
def fix_ordering_of_result(self, result: List[Tuple[Realm, List[Stream]]]) -> None:
|
|
"""
|
|
This is a helper for giving the struture returned by get_realms_and_streams_for_archiving
|
|
a consistent ordering.
|
|
"""
|
|
# Sort the list of tuples by realm id:
|
|
result.sort(key=lambda x: x[0].id)
|
|
|
|
# Now we sort the lists of streams in each tuple:
|
|
for realm, streams_list in result:
|
|
streams_list.sort(key=lambda stream: stream.id)
|
|
|
|
def simple_get_realms_and_streams_for_archiving(self) -> List[Tuple[Realm, List[Stream]]]:
|
|
"""
|
|
This is an implementation of the function we're testing, but using the obvious,
|
|
unoptimized algorithm. We can use this for additional verification of correctness,
|
|
by comparing the output of the two implementations.
|
|
"""
|
|
|
|
result = []
|
|
for realm in Realm.objects.all():
|
|
if realm.message_retention_days != -1:
|
|
streams = Stream.objects.filter(realm=realm).exclude(message_retention_days=-1)
|
|
result.append((realm, list(streams)))
|
|
else:
|
|
streams = Stream.objects.filter(realm=realm).exclude(message_retention_days__isnull=True) \
|
|
.exclude(message_retention_days=-1)
|
|
if streams.exists():
|
|
result.append((realm, list(streams)))
|
|
|
|
return result
|
|
|
|
def test_get_realms_and_streams_for_archiving(self) -> None:
|
|
zulip_realm = get_realm("zulip")
|
|
zulip_realm.message_retention_days = 10
|
|
zulip_realm.save()
|
|
|
|
verona = get_stream("Verona", zulip_realm)
|
|
verona.message_retention_days = -1 # Block archiving for this stream
|
|
verona.save()
|
|
denmark = get_stream("Denmark", zulip_realm)
|
|
denmark.message_retention_days = 1
|
|
denmark.save()
|
|
|
|
zephyr_realm = get_realm("zephyr")
|
|
zephyr_realm.message_retention_days = -1
|
|
zephyr_realm.save()
|
|
self.make_stream("normal stream", realm=zephyr_realm)
|
|
|
|
archiving_blocked_zephyr_stream = self.make_stream("no archiving", realm=zephyr_realm)
|
|
archiving_blocked_zephyr_stream.message_retention_days = -1
|
|
archiving_blocked_zephyr_stream.save()
|
|
|
|
archiving_enabled_zephyr_stream = self.make_stream("with archiving", realm=zephyr_realm)
|
|
archiving_enabled_zephyr_stream.message_retention_days = 1
|
|
archiving_enabled_zephyr_stream.save()
|
|
|
|
Realm.objects.create(string_id="no_archiving", invite_required=False, message_retention_days=-1)
|
|
empty_realm_with_archiving = Realm.objects.create(string_id="with_archiving", invite_required=False,
|
|
message_retention_days=1)
|
|
|
|
# We construct a list representing how the result of get_realms_and_streams_for_archiving should be.
|
|
# One nuisance is that the ordering of the elements in the result structure is not deterministic,
|
|
# so we use a helper to order both structures in a consistent manner. This wouldn't be necessary
|
|
# if python had a true "unordered list" data structure. Set doesn't do the job, because it requires
|
|
# elements to be hashable.
|
|
expected_result = [
|
|
(zulip_realm, list(Stream.objects.filter(realm=zulip_realm).exclude(id=verona.id))),
|
|
(zephyr_realm, [archiving_enabled_zephyr_stream]),
|
|
(empty_realm_with_archiving, []),
|
|
]
|
|
self.fix_ordering_of_result(expected_result)
|
|
|
|
simple_algorithm_result = self.simple_get_realms_and_streams_for_archiving()
|
|
self.fix_ordering_of_result(simple_algorithm_result)
|
|
|
|
result = get_realms_and_streams_for_archiving()
|
|
self.fix_ordering_of_result(result)
|
|
|
|
self.assert_length(result, len(expected_result))
|
|
self.assertEqual(result, expected_result)
|
|
|
|
self.assert_length(result, len(simple_algorithm_result))
|
|
self.assertEqual(result, simple_algorithm_result)
|
|
|
|
class TestRestoreStreamMessages(ArchiveMessagesTestingBase):
|
|
def test_restore_retention_policy_deletions_for_stream(self) -> None:
|
|
cordelia = self.example_user('cordelia')
|
|
hamlet = self.example_user('hamlet')
|
|
|
|
realm = get_realm("zulip")
|
|
stream_name = "Denmark"
|
|
stream = get_stream(stream_name, realm)
|
|
|
|
message_ids_to_archive_manually = [
|
|
self.send_stream_message(cordelia, stream_name, str(i)) for i in range(0, 2)
|
|
]
|
|
usermessage_ids_to_archive_manually = self._get_usermessage_ids(message_ids_to_archive_manually)
|
|
message_ids_to_archive_by_policy = [
|
|
self.send_stream_message(hamlet, stream_name, str(i)) for i in range(0, 2)
|
|
]
|
|
usermessage_ids_to_archive_by_policy = self._get_usermessage_ids(message_ids_to_archive_by_policy)
|
|
|
|
expected_archived_message_ids = message_ids_to_archive_manually + message_ids_to_archive_by_policy
|
|
expected_archived_usermessage_ids = usermessage_ids_to_archive_manually + \
|
|
usermessage_ids_to_archive_by_policy
|
|
|
|
self._set_stream_message_retention_value(stream, 5)
|
|
self._change_messages_date_sent(message_ids_to_archive_by_policy, timezone_now() - timedelta(days=6))
|
|
|
|
move_messages_to_archive(message_ids_to_archive_manually)
|
|
archive_messages()
|
|
|
|
self._verify_archive_data(expected_archived_message_ids, expected_archived_usermessage_ids)
|
|
|
|
restore_retention_policy_deletions_for_stream(stream)
|
|
|
|
# Verify that we restore the stream messages that were archived due to retention policy,
|
|
# but not the ones manually deleted.
|
|
self.assert_length(Message.objects.filter(id__in=message_ids_to_archive_by_policy),
|
|
len(message_ids_to_archive_by_policy))
|
|
self.assertFalse(Message.objects.filter(id__in=message_ids_to_archive_manually))
|
|
|
|
|
|
class TestDoDeleteMessages(ZulipTestCase):
|
|
def test_do_delete_messages_multiple(self) -> None:
|
|
realm = get_realm("zulip")
|
|
cordelia = self.example_user('cordelia')
|
|
message_ids = [self.send_stream_message(cordelia, "Denmark", str(i)) for i in range(0, 10)]
|
|
messages = Message.objects.filter(id__in=message_ids)
|
|
|
|
with queries_captured() as queries:
|
|
do_delete_messages(realm, messages)
|
|
self.assertFalse(Message.objects.filter(id__in=message_ids).exists())
|
|
self.assert_length(queries, 18)
|
|
|
|
archived_messages = ArchivedMessage.objects.filter(id__in=message_ids)
|
|
self.assertEqual(archived_messages.count(), len(message_ids))
|
|
self.assertEqual(len({message.archive_transaction_id for message in archived_messages}), 1)
|
|
|
|
def test_old_event_format_processed_correctly(self) -> None:
|
|
"""
|
|
do_delete_messages used to send events with users in dict format {"id": <int>}.
|
|
We have a block in process_notification to deal with that old format, that should be
|
|
deleted in a later release. This test is meant to ensure correctness of that block.
|
|
"""
|
|
realm = get_realm("zulip")
|
|
cordelia = self.example_user('cordelia')
|
|
hamlet = self.example_user('hamlet')
|
|
message_id = self.send_personal_message(cordelia, hamlet)
|
|
message = Message.objects.get(id=message_id)
|
|
|
|
event = {
|
|
'type': 'delete_message',
|
|
'sender': message.sender.email,
|
|
'sender_id': message.sender_id,
|
|
'message_id': message.id,
|
|
'message_type': "private",
|
|
'recipient_id': message.recipient_id,
|
|
}
|
|
move_messages_to_archive([message_id])
|
|
# We only send the event to see no exception is thrown - as it would be if the block
|
|
# in process_notification to handle this old format of "users to notify" wasn't correct.
|
|
send_event(realm, event, [{"id": cordelia.id}, {"id": hamlet.id}])
|