zulip/zerver/tests/test_retention.py

462 lines
24 KiB
Python

# -*- coding: utf-8 -*-
from datetime import datetime, timedelta
from typing import Any, Dict, List, Optional, Tuple
from django.utils.timezone import now as timezone_now
from zerver.lib.actions import internal_send_private_message
from zerver.lib.test_classes import ZulipTestCase
from zerver.lib.upload import create_attachment
from zerver.models import (Message, Realm, UserProfile, ArchivedUserMessage,
ArchivedMessage, Attachment, ArchivedAttachment, UserMessage,
get_user_profile_by_email, get_system_bot)
from zerver.lib.retention import (
archive_messages,
clean_unused_messages,
delete_expired_messages,
delete_expired_user_messages,
move_expired_messages_to_archive,
move_expired_user_messages_to_archive,
move_messages_to_archive
)
ZULIP_REALM_DAYS = 30
MIT_REALM_DAYS = 100
class TestRetentionLib(ZulipTestCase):
"""
Test receiving expired messages retention tool.
"""
def setUp(self) -> None:
super().setUp()
self.zulip_realm = self._set_realm_message_retention_value('zulip', ZULIP_REALM_DAYS)
self.mit_realm = self._set_realm_message_retention_value('zephyr', MIT_REALM_DAYS)
Message.objects.all().update(pub_date=timezone_now())
@staticmethod
def _set_realm_message_retention_value(realm_str: str, retention_period: int) -> Realm:
realm = Realm.objects.get(string_id=realm_str)
realm.message_retention_days = retention_period
realm.save()
return realm
@staticmethod
def _change_messages_pub_date(msgs_ids: List[int], pub_date: datetime) -> Any:
messages = Message.objects.filter(id__in=msgs_ids).order_by('id')
messages.update(pub_date=pub_date)
return messages
def _make_mit_messages(self, message_quantity: int, pub_date: datetime) -> Any:
# send messages from mit.edu realm and change messages pub date
sender = self.mit_user('espuser')
recipient = self.mit_user('starnine')
msgs_ids = [self.send_personal_message(sender.email, recipient.email,
sender_realm='zephyr') for i in
range(message_quantity)]
mit_messages = self._change_messages_pub_date(msgs_ids, pub_date)
return mit_messages
def _send_cross_realm_message(self) -> int:
# Send message from bot to users from different realm.
bot_email = 'notification-bot@zulip.com'
get_user_profile_by_email(bot_email)
mit_user = UserProfile.objects.filter(realm=self.mit_realm).first()
result = internal_send_private_message(
realm=mit_user.realm,
sender=get_system_bot(bot_email),
recipient_user=mit_user,
content='test message',
)
assert result is not None
return result
def _check_archive_data_by_realm(self, expected_messages: Any, realm: Realm) -> None:
self._check_archived_messages_ids_by_realm(
[msg.id for msg in expected_messages.order_by('id')],
realm
)
user_messages = UserMessage.objects.filter(message__in=expected_messages).order_by('id')
archived_user_messages = ArchivedUserMessage.objects.filter(
user_profile__realm=realm).order_by('id')
self.assertEqual(
[user_msg.id for user_msg in user_messages],
[arc_user_msg.id for arc_user_msg in archived_user_messages]
)
def _check_archived_messages_ids_by_realm(self, expected_message_ids: List[int],
realm: Realm) -> None:
arc_messages = ArchivedMessage.objects.filter(
archivedusermessage__user_profile__realm=realm).distinct('id').order_by('id')
self.assertEqual(
expected_message_ids,
[arc_msg.id for arc_msg in arc_messages]
)
def _send_messages_with_attachments(self) -> Dict[str, int]:
user_profile = self.example_user("hamlet")
sender_email = user_profile.email
sample_size = 10
dummy_files = [
('zulip.txt', '1/31/4CBjtTLYZhk66pZrF8hnYGwc/zulip.txt', sample_size),
('temp_file.py', '1/31/4CBjtTLYZhk66pZrF8hnYGwc/temp_file.py', sample_size),
('abc.py', '1/31/4CBjtTLYZhk66pZrF8hnYGwc/abc.py', sample_size)
]
for file_name, path_id, size in dummy_files:
create_attachment(file_name, path_id, user_profile, size)
self.subscribe(user_profile, "Denmark")
body = "Some files here ...[zulip.txt](http://localhost:9991/user_uploads/1/31/4CBjtTLYZhk66pZrF8hnYGwc/zulip.txt)" + \
"http://localhost:9991/user_uploads/1/31/4CBjtTLYZhk66pZrF8hnYGwc/temp_file.py.... Some more...." + \
"http://localhost:9991/user_uploads/1/31/4CBjtTLYZhk66pZrF8hnYGwc/abc.py"
expired_message_id = self.send_stream_message(sender_email, "Denmark", body)
actual_message_id = self.send_stream_message(sender_email, "Denmark", body)
other_message_id = self.send_stream_message("othello@zulip.com", "Denmark", body)
self._change_messages_pub_date([expired_message_id], timezone_now() - timedelta(days=MIT_REALM_DAYS + 1))
return {'expired_message_id': expired_message_id, 'actual_message_id': actual_message_id,
'other_user_message_id': other_message_id}
def _check_cross_realm_messages_archiving(self, arc_user_msg_qty: int, period: int,
realm: Optional[Realm]=None) -> int:
sent_message_id = self._send_cross_realm_message()
all_user_messages_qty = UserMessage.objects.count()
self._change_messages_pub_date([sent_message_id], timezone_now() - timedelta(days=period))
realms = Realm.objects.filter(message_retention_days__isnull=False)
for realm_instance in realms:
move_expired_messages_to_archive(realm_instance)
move_expired_user_messages_to_archive(realm_instance)
user_messages_sent = UserMessage.objects.order_by('id').filter(
message_id=sent_message_id)
archived_messages = ArchivedMessage.objects.all()
archived_user_messages = ArchivedUserMessage.objects.order_by('id')
self.assertEqual(user_messages_sent.count(), 2)
# Compare archived messages and user messages
# with expired sent messages.
self.assertEqual(archived_messages.count(), 1)
self.assertEqual(archived_user_messages.count(), arc_user_msg_qty)
if realm:
user_messages_sent = user_messages_sent.filter(user_profile__realm=self.zulip_realm)
self.assertEqual(
[arc_user_msg.id for arc_user_msg in archived_user_messages],
[user_msg.id for user_msg in user_messages_sent]
)
for realm_instance in realms:
delete_expired_user_messages(realm_instance)
delete_expired_messages(realm_instance)
clean_unused_messages()
# Check messages and user messages after deleting expired messages
# from the main tables.
self.assertEqual(
UserMessage.objects.filter(message_id=sent_message_id).count(),
2 - arc_user_msg_qty)
self.assertEqual(
UserMessage.objects.count(),
all_user_messages_qty - arc_user_msg_qty)
return sent_message_id
def _make_expired_messages(self) -> Dict[str, List[int]]:
# Create messages in Zephyr realm with already-expired date
expected_mit_msgs = self._make_mit_messages(3, timezone_now() - timedelta(days=MIT_REALM_DAYS + 1))
expected_mit_msgs_ids = [msg.id for msg in expected_mit_msgs.order_by('id')]
self._make_mit_messages(4, timezone_now() - timedelta(days=MIT_REALM_DAYS - 1))
# Move existing messages in Zulip realm to be expired
expected_zulip_msgs_ids = list(Message.objects.order_by('id').filter(
sender__realm=self.zulip_realm).values_list('id', flat=True)[3:10])
self._change_messages_pub_date(expected_zulip_msgs_ids,
timezone_now() - timedelta(days=ZULIP_REALM_DAYS + 1))
return {
"mit_msgs_ids": expected_mit_msgs_ids,
"zulip_msgs_ids": expected_zulip_msgs_ids
}
def test_no_expired_messages(self) -> None:
for realm_instance in Realm.objects.filter(message_retention_days__isnull=False):
move_expired_messages_to_archive(realm_instance)
move_expired_user_messages_to_archive(realm_instance)
self.assertEqual(ArchivedUserMessage.objects.count(), 0)
self.assertEqual(ArchivedMessage.objects.count(), 0)
def test_expired_messages_in_each_realm(self) -> None:
"""General test for archiving expired messages properly with
multiple realms involved"""
expected_message_ids = []
expected_mit_msgs = self._make_mit_messages(3, timezone_now() - timedelta(days=MIT_REALM_DAYS + 1))
expected_message_ids.extend([msg.id for msg in expected_mit_msgs.order_by('id')])
self._make_mit_messages(4, timezone_now() - timedelta(days=MIT_REALM_DAYS - 1))
zulip_msgs_ids = list(Message.objects.order_by('id').filter(
sender__realm=self.zulip_realm).values_list('id', flat=True)[3:10])
expected_message_ids.extend(zulip_msgs_ids)
expected_zulip_msgs = self._change_messages_pub_date(
zulip_msgs_ids,
timezone_now() - timedelta(days=ZULIP_REALM_DAYS + 1))
for realm_instance in Realm.objects.filter(message_retention_days__isnull=False):
move_expired_messages_to_archive(realm_instance)
move_expired_user_messages_to_archive(realm_instance)
self.assertEqual(ArchivedMessage.objects.count(), len(expected_message_ids))
self.assertEqual(
ArchivedUserMessage.objects.count(),
UserMessage.objects.filter(message_id__in=expected_message_ids).count()
)
# Compare expected messages ids with archived messages for both realms
self._check_archive_data_by_realm(expected_mit_msgs, self.mit_realm)
self._check_archive_data_by_realm(expected_zulip_msgs, self.zulip_realm)
def test_expired_messages_in_one_realm(self) -> None:
"""Test with a retention policy set for only the MIT realm"""
expected_mit_msgs = self._make_mit_messages(
5, timezone_now() - timedelta(days=MIT_REALM_DAYS + 1))
for realm_instance in Realm.objects.filter(message_retention_days__isnull=False):
move_expired_messages_to_archive(realm_instance)
move_expired_user_messages_to_archive(realm_instance)
self.assertEqual(ArchivedMessage.objects.count(), 5)
self.assertEqual(ArchivedUserMessage.objects.count(), 10)
# Compare expected messages ids with archived messages in mit realm
self._check_archive_data_by_realm(expected_mit_msgs, self.mit_realm)
# Check no archive messages for zulip realm.
self.assertEqual(
ArchivedMessage.objects.filter(
archivedusermessage__user_profile__realm=self.zulip_realm).count(),
0
)
self.assertEqual(
ArchivedUserMessage.objects.filter(user_profile__realm=self.zulip_realm).count(),
0
)
def test_cross_realm_messages_archiving_one_realm_expired(self) -> None:
"""Test that a cross-realm message that is expired in only
one of the realms only has the UserMessage for that realm archived"""
arc_msg_id = self._check_cross_realm_messages_archiving(
1, ZULIP_REALM_DAYS + 1, realm=self.zulip_realm)
self.assertTrue(Message.objects.filter(id=arc_msg_id).exists())
def test_cross_realm_messages_archiving_two_realm_expired(self) -> None:
"""Check that archiving a message that's expired in both
realms is archived both in Message and UserMessage."""
arc_msg_id = self._check_cross_realm_messages_archiving(2, MIT_REALM_DAYS + 1)
self.assertFalse(Message.objects.filter(id=arc_msg_id).exists())
def test_archive_message_tool(self) -> None:
"""End-to-end test of the archiving tool, directly calling
archive_messages."""
expected_message_ids_dict = self._make_expired_messages()
# We also include a cross-realm message in this test.
sent_cross_realm_message_id = self._send_cross_realm_message()
expected_message_ids_dict['mit_msgs_ids'].append(sent_cross_realm_message_id)
self._change_messages_pub_date(
[sent_cross_realm_message_id],
timezone_now() - timedelta(days=MIT_REALM_DAYS + 1)
)
expected_message_ids = expected_message_ids_dict['mit_msgs_ids'] + expected_message_ids_dict['zulip_msgs_ids']
# Get expired user messages by message ids
expected_user_msgs_ids = list(UserMessage.objects.filter(
message_id__in=expected_message_ids).order_by('id').values_list('id', flat=True))
msgs_qty = Message.objects.count()
archive_messages()
# Compare archived messages and user messages with expired messages
self.assertEqual(ArchivedMessage.objects.count(), len(expected_message_ids))
self.assertEqual(ArchivedUserMessage.objects.count(), len(expected_user_msgs_ids))
# Check non-archived messages messages after removing expired
# messages from main tables without cross-realm messages.
self.assertEqual(Message.objects.count(), msgs_qty - ArchivedMessage.objects.count())
self.assertEqual(
Message.objects.filter(id__in=expected_message_ids_dict['zulip_msgs_ids']).count(), 0)
self.assertEqual(
Message.objects.filter(id__in=expected_message_ids_dict['mit_msgs_ids']).count(), 0)
self.assertEqual(
Message.objects.filter(id__in=expected_message_ids_dict['zulip_msgs_ids']).count(), 0)
# Check archived messages by realm using our standard checker
# function; we add the cross-realm message ID to the
# zulip_realm list for this test because its sender lives in
# that realm in the development environment.
expected_message_ids_dict['zulip_msgs_ids'].append(sent_cross_realm_message_id)
self._check_archived_messages_ids_by_realm(
expected_message_ids_dict['zulip_msgs_ids'], self.zulip_realm)
self._check_archived_messages_ids_by_realm(
expected_message_ids_dict['mit_msgs_ids'], self.mit_realm)
def test_archiving_attachments(self) -> None:
"""End-to-end test for the logic for archiving attachments. This test
is hard to read without first reading _send_messages_with_attachments"""
msgs_ids = self._send_messages_with_attachments()
# First, confirm deleting the oldest message
# (`expired_message_id`) creates ArchivedAttachment objects
# and associates that message ID with them, but does not
# delete the Attachment object.
archive_messages()
archived_attachment = ArchivedAttachment.objects.all()
attachment = Attachment.objects.all()
self.assertEqual(archived_attachment.count(), 3)
self.assertEqual(
list(archived_attachment.distinct('messages__id').values_list('messages__id',
flat=True)),
[msgs_ids['expired_message_id']])
self.assertEqual(attachment.count(), 3)
# Now make `actual_message_id` expired too. We still don't
# delete the Attachment objects.
self._change_messages_pub_date([msgs_ids['actual_message_id']],
timezone_now() - timedelta(days=MIT_REALM_DAYS + 1))
archive_messages()
self.assertEqual(attachment.count(), 3)
# Finally, make the last message mentioning those attachments
# expired. We should now delete the Attachment objects and
# each ArchivedAttachment object should list all 3 messages.
self._change_messages_pub_date([msgs_ids['other_user_message_id']],
timezone_now() - timedelta(days=MIT_REALM_DAYS + 1))
archive_messages()
self.assertEqual(attachment.count(), 0)
self.assertEqual(archived_attachment.count(), 3)
self.assertEqual(
list(archived_attachment.distinct('messages__id').order_by('messages__id').values_list(
'messages__id', flat=True)),
sorted(msgs_ids.values()))
class TestMoveMessageToArchive(ZulipTestCase):
def setUp(self) -> None:
super().setUp()
self.sender = 'hamlet@zulip.com'
self.recipient = 'cordelia@zulip.com'
def _create_attachments(self) -> None:
sample_size = 10
dummy_files = [
('zulip.txt', '1/31/4CBjtTLYZhk66pZrF8hnYGwc/zulip.txt', sample_size),
('temp_file.py', '1/31/4CBjtTLYZhk66pZrF8hnYGwc/temp_file.py', sample_size),
('abc.py', '1/31/4CBjtTLYZhk66pZrF8hnYGwc/abc.py', sample_size),
('hello.txt', '1/31/4CBjtTLYZhk66pZrF8hnYGwc/hello.txt', sample_size),
('new.py', '1/31/4CBjtTLYZhk66pZrF8hnYGwc/new.py', sample_size)
]
user_profile = self.example_user('hamlet')
for file_name, path_id, size in dummy_files:
create_attachment(file_name, path_id, user_profile, size)
def _check_messages_before_archiving(self, msg_ids: List[int]) -> Tuple[List[int], List[int]]:
user_msgs_ids_before = list(UserMessage.objects.filter(
message_id__in=msg_ids).order_by('id').values_list('id', flat=True))
all_msgs_ids_before = list(Message.objects.filter().order_by('id').values_list('id', flat=True))
self.assertEqual(ArchivedUserMessage.objects.count(), 0)
self.assertEqual(ArchivedMessage.objects.count(), 0)
return (user_msgs_ids_before, all_msgs_ids_before)
def _check_messages_after_archiving(self, msg_ids: List[int], user_msgs_ids_before: List[int],
all_msgs_ids_before: List[int]) -> None:
self.assertEqual(ArchivedMessage.objects.all().count(), len(msg_ids))
self.assertEqual(Message.objects.filter().count(), len(all_msgs_ids_before) - len(msg_ids))
self.assertEqual(UserMessage.objects.filter(message_id__in=msg_ids).count(), 0)
arc_user_messages_ids_after = list(ArchivedUserMessage.objects.filter().order_by('id').values_list('id', flat=True))
self.assertEqual(arc_user_messages_ids_after, user_msgs_ids_before)
def test_personal_messages_archiving(self) -> None:
msg_ids = []
for i in range(0, 3):
msg_ids.append(self.send_personal_message(self.sender, self.recipient))
(user_msgs_ids_before, all_msgs_ids_before) = self._check_messages_before_archiving(msg_ids)
move_messages_to_archive(message_ids=msg_ids)
self._check_messages_after_archiving(msg_ids, user_msgs_ids_before, all_msgs_ids_before)
def test_stream_messages_archiving(self) -> None:
msg_ids = []
for i in range(0, 3):
msg_ids.append(self.send_stream_message(self.sender, "Verona"))
(user_msgs_ids_before, all_msgs_ids_before) = self._check_messages_before_archiving(msg_ids)
move_messages_to_archive(message_ids=msg_ids)
self._check_messages_after_archiving(msg_ids, user_msgs_ids_before, all_msgs_ids_before)
def test_archiving_messages_second_time(self) -> None:
msg_ids = []
for i in range(0, 3):
msg_ids.append(self.send_stream_message(self.sender, "Verona"))
(user_msgs_ids_before, all_msgs_ids_before) = self._check_messages_before_archiving(msg_ids)
move_messages_to_archive(message_ids=msg_ids)
self._check_messages_after_archiving(msg_ids, user_msgs_ids_before, all_msgs_ids_before)
with self.assertRaises(Message.DoesNotExist):
move_messages_to_archive(message_ids=msg_ids)
def test_archiving_messages_with_attachment(self) -> None:
self._create_attachments()
body1 = """Some files here ...[zulip.txt](
http://localhost:9991/user_uploads/1/31/4CBjtTLYZhk66pZrF8hnYGwc/zulip.txt)
http://localhost:9991/user_uploads/1/31/4CBjtTLYZhk66pZrF8hnYGwc/temp_file.py ....
Some more.... http://localhost:9991/user_uploads/1/31/4CBjtTLYZhk66pZrF8hnYGwc/abc.py
"""
body2 = """Some files here
http://localhost:9991/user_uploads/1/31/4CBjtTLYZhk66pZrF8hnYGwc/zulip.txt ...
http://localhost:9991/user_uploads/1/31/4CBjtTLYZhk66pZrF8hnYGwc/hello.txt ....
http://localhost:9991/user_uploads/1/31/4CBjtTLYZhk66pZrF8hnYGwc/new.py ....
"""
msg_ids = []
msg_ids.append(self.send_personal_message(self.sender, self.recipient, body1))
msg_ids.append(self.send_personal_message(self.sender, self.recipient, body2))
attachment_id_to_message_ids = {}
attachments = Attachment.objects.filter(messages__id__in=msg_ids)
for attachment in attachments:
attachment_id_to_message_ids[attachment.id] = {message.id for message in attachment.messages.all()}
(user_msgs_ids_before, all_msgs_ids_before) = self._check_messages_before_archiving(msg_ids)
attachments_ids_before = list(attachments.order_by("id").values_list("id", flat=True))
self.assertEqual(ArchivedAttachment.objects.count(), 0)
move_messages_to_archive(message_ids=msg_ids)
self._check_messages_after_archiving(msg_ids, user_msgs_ids_before, all_msgs_ids_before)
self.assertEqual(Attachment.objects.count(), 0)
archived_attachments = ArchivedAttachment.objects.filter(messages__id__in=msg_ids)
arc_attachments_ids_after = list(archived_attachments.order_by("id").values_list("id", flat=True))
self.assertEqual(attachments_ids_before, arc_attachments_ids_after)
for attachment in archived_attachments:
self.assertEqual(attachment_id_to_message_ids[attachment.id],
{message.id for message in attachment.messages.all()})
def test_archiving_message_with_shared_attachment(self) -> None:
# Check do not removing attachments which is used in other messages.
self._create_attachments()
body = """Some files here ...[zulip.txt](
http://localhost:9991/user_uploads/1/31/4CBjtTLYZhk66pZrF8hnYGwc/zulip.txt)
http://localhost:9991/user_uploads/1/31/4CBjtTLYZhk66pZrF8hnYGwc/temp_file.py ....
Some more.... http://localhost:9991/user_uploads/1/31/4CBjtTLYZhk66pZrF8hnYGwc/abc.py ...
http://localhost:9991/user_uploads/1/31/4CBjtTLYZhk66pZrF8hnYGwc/new.py ....
http://localhost:9991/user_uploads/1/31/4CBjtTLYZhk66pZrF8hnYGwc/hello.txt ....
"""
msg_id = self.send_personal_message(self.sender, self.recipient, body)
# Simulate a reply with the same contents.
msg_id_shared_attachments = self.send_personal_message(
from_email=self.recipient,
to_email=self.sender,
content=body,
)
(user_msgs_ids_before, all_msgs_ids_before) = self._check_messages_before_archiving([msg_id])
attachments_ids_before = list(Attachment.objects.filter(
messages__id=msg_id).order_by("id").values_list("id", flat=True))
self.assertEqual(ArchivedAttachment.objects.count(), 0)
move_messages_to_archive(message_ids=[msg_id])
self._check_messages_after_archiving([msg_id], user_msgs_ids_before, all_msgs_ids_before)
self.assertEqual(Attachment.objects.count(), 5)
arc_attachments_ids_after = list(ArchivedAttachment.objects.filter(
messages__id=msg_id).order_by("id").values_list("id", flat=True))
self.assertEqual(attachments_ids_before, arc_attachments_ids_after)
move_messages_to_archive(message_ids=[msg_id_shared_attachments])
self.assertEqual(Attachment.objects.count(), 0)