2016-10-25 19:51:31 +02:00
|
|
|
|
|
|
|
from datetime import timedelta
|
2017-05-14 21:14:26 +02:00
|
|
|
|
|
|
|
from django.db import connection, transaction
|
2017-04-15 04:03:56 +02:00
|
|
|
from django.utils.timezone import now as timezone_now
|
2016-11-01 11:26:38 +01:00
|
|
|
from zerver.models import (Message, UserMessage, ArchivedMessage, ArchivedUserMessage, Realm,
|
2019-05-30 15:24:18 +02:00
|
|
|
Attachment, ArchivedAttachment, Reaction, ArchivedReaction,
|
|
|
|
SubMessage, ArchivedSubMessage)
|
2016-10-25 19:51:31 +02:00
|
|
|
|
2019-05-30 15:06:45 +02:00
|
|
|
from typing import Any, Dict, List
|
2016-10-25 19:51:31 +02:00
|
|
|
|
2019-05-30 15:06:45 +02:00
|
|
|
models_with_message_key = [
|
|
|
|
{
|
|
|
|
'class': Reaction,
|
|
|
|
'archive_class': ArchivedReaction,
|
|
|
|
'table_name': 'zerver_reaction',
|
|
|
|
'archive_table_name': 'zerver_archivedreaction'
|
|
|
|
},
|
2019-05-30 15:24:18 +02:00
|
|
|
{
|
|
|
|
'class': SubMessage,
|
|
|
|
'archive_class': ArchivedSubMessage,
|
|
|
|
'table_name': 'zerver_submessage',
|
|
|
|
'archive_table_name': 'zerver_archivedsubmessage'
|
|
|
|
},
|
2019-05-30 15:06:45 +02:00
|
|
|
] # type: List[Dict[str, Any]]
|
2016-10-25 19:51:31 +02:00
|
|
|
|
2016-11-01 11:26:38 +01:00
|
|
|
@transaction.atomic
|
2019-06-02 17:29:25 +02:00
|
|
|
def move_expired_rows(src_model: Any, raw_query: str, returning_id: bool=False,
|
|
|
|
**kwargs: Any) -> List[int]:
|
2016-11-01 11:26:38 +01:00
|
|
|
src_db_table = src_model._meta.db_table
|
|
|
|
src_fields = ["{}.{}".format(src_db_table, field.column) for field in src_model._meta.fields]
|
|
|
|
dst_fields = [field.column for field in src_model._meta.fields]
|
|
|
|
sql_args = {
|
|
|
|
'src_fields': ','.join(src_fields),
|
|
|
|
'dst_fields': ','.join(dst_fields),
|
|
|
|
'archive_timestamp': timezone_now()
|
|
|
|
}
|
|
|
|
sql_args.update(kwargs)
|
|
|
|
with connection.cursor() as cursor:
|
|
|
|
cursor.execute(
|
|
|
|
raw_query.format(**sql_args)
|
|
|
|
)
|
2019-06-02 17:29:25 +02:00
|
|
|
if returning_id:
|
|
|
|
return [row[0] for row in cursor.fetchall()] # return list of row ids
|
|
|
|
else:
|
|
|
|
return []
|
2016-10-25 19:51:31 +02:00
|
|
|
|
2019-06-02 17:29:25 +02:00
|
|
|
def ids_list_to_sql_query_format(ids: List[int]) -> str:
|
|
|
|
assert len(ids) > 0
|
2016-10-25 19:51:31 +02:00
|
|
|
|
2019-06-02 17:29:25 +02:00
|
|
|
ids_tuple = tuple(ids)
|
|
|
|
if len(ids_tuple) > 1:
|
|
|
|
ids_string = str(ids_tuple)
|
|
|
|
elif len(ids_tuple) == 1:
|
|
|
|
ids_string = '({})'.format(ids_tuple[0])
|
|
|
|
|
|
|
|
return ids_string
|
|
|
|
|
|
|
|
def move_expired_messages_to_archive(realm: Realm) -> List[int]:
|
2016-11-01 11:26:38 +01:00
|
|
|
query = """
|
|
|
|
INSERT INTO zerver_archivedmessage ({dst_fields}, archive_timestamp)
|
|
|
|
SELECT {src_fields}, '{archive_timestamp}'
|
|
|
|
FROM zerver_message
|
|
|
|
INNER JOIN zerver_userprofile ON zerver_message.sender_id = zerver_userprofile.id
|
|
|
|
LEFT JOIN zerver_archivedmessage ON zerver_archivedmessage.id = zerver_message.id
|
|
|
|
WHERE zerver_userprofile.realm_id = {realm_id}
|
|
|
|
AND zerver_message.pub_date < '{check_date}'
|
|
|
|
AND zerver_archivedmessage.id is NULL
|
2019-06-02 17:29:25 +02:00
|
|
|
RETURNING id
|
2016-11-01 11:26:38 +01:00
|
|
|
"""
|
|
|
|
assert realm.message_retention_days is not None
|
|
|
|
check_date = timezone_now() - timedelta(days=realm.message_retention_days)
|
|
|
|
|
2019-06-02 17:29:25 +02:00
|
|
|
return move_expired_rows(Message, query, returning_id=True,
|
|
|
|
realm_id=realm.id, check_date=check_date.isoformat())
|
|
|
|
|
|
|
|
def move_expired_user_messages_to_archive(msg_ids: List[int]) -> List[int]:
|
|
|
|
if not msg_ids:
|
|
|
|
return []
|
2016-11-01 11:26:38 +01:00
|
|
|
|
|
|
|
query = """
|
|
|
|
INSERT INTO zerver_archivedusermessage ({dst_fields}, archive_timestamp)
|
|
|
|
SELECT {src_fields}, '{archive_timestamp}'
|
|
|
|
FROM zerver_usermessage
|
|
|
|
LEFT JOIN zerver_archivedusermessage ON zerver_archivedusermessage.id = zerver_usermessage.id
|
2019-06-02 17:29:25 +02:00
|
|
|
WHERE zerver_usermessage.message_id IN {message_ids}
|
2016-11-01 11:26:38 +01:00
|
|
|
AND zerver_archivedusermessage.id is NULL
|
2019-06-02 17:29:25 +02:00
|
|
|
RETURNING id
|
2016-11-01 11:26:38 +01:00
|
|
|
"""
|
|
|
|
|
2019-06-02 17:29:25 +02:00
|
|
|
return move_expired_rows(UserMessage, query, returning_id=True,
|
|
|
|
message_ids=ids_list_to_sql_query_format(msg_ids))
|
|
|
|
|
|
|
|
def move_expired_models_with_message_key_to_archive(msg_ids: List[int]) -> None:
|
|
|
|
if not msg_ids:
|
|
|
|
return
|
|
|
|
|
2019-05-30 15:06:45 +02:00
|
|
|
for model in models_with_message_key:
|
|
|
|
query = """
|
|
|
|
INSERT INTO {archive_table_name} ({dst_fields}, archive_timestamp)
|
|
|
|
SELECT {src_fields}, '{archive_timestamp}'
|
|
|
|
FROM {table_name}
|
|
|
|
LEFT JOIN {archive_table_name} ON {archive_table_name}.id = {table_name}.id
|
2019-06-02 17:29:25 +02:00
|
|
|
WHERE {table_name}.message_id IN {message_ids}
|
2019-05-30 15:06:45 +02:00
|
|
|
AND {archive_table_name}.id IS NULL
|
|
|
|
"""
|
2019-06-02 17:29:25 +02:00
|
|
|
move_expired_rows(model['class'], query, table_name=model['table_name'],
|
|
|
|
archive_table_name=model['archive_table_name'],
|
|
|
|
message_ids=ids_list_to_sql_query_format(msg_ids))
|
|
|
|
|
2019-06-03 12:19:58 +02:00
|
|
|
def move_expired_attachments_to_archive(msg_ids: List[int]) -> None:
|
2019-06-02 17:29:25 +02:00
|
|
|
if not msg_ids:
|
|
|
|
return
|
2019-05-30 15:06:45 +02:00
|
|
|
|
2016-11-01 11:26:38 +01:00
|
|
|
query = """
|
|
|
|
INSERT INTO zerver_archivedattachment ({dst_fields}, archive_timestamp)
|
|
|
|
SELECT {src_fields}, '{archive_timestamp}'
|
|
|
|
FROM zerver_attachment
|
|
|
|
INNER JOIN zerver_attachment_messages
|
|
|
|
ON zerver_attachment_messages.attachment_id = zerver_attachment.id
|
|
|
|
LEFT JOIN zerver_archivedattachment ON zerver_archivedattachment.id = zerver_attachment.id
|
2019-06-02 17:29:25 +02:00
|
|
|
WHERE zerver_attachment_messages.message_id IN {message_ids}
|
2016-11-01 11:26:38 +01:00
|
|
|
AND zerver_archivedattachment.id IS NULL
|
|
|
|
GROUP BY zerver_attachment.id
|
|
|
|
"""
|
2019-06-03 12:19:58 +02:00
|
|
|
move_expired_rows(Attachment, query, message_ids=ids_list_to_sql_query_format(msg_ids))
|
2016-11-01 11:26:38 +01:00
|
|
|
|
|
|
|
|
2019-06-03 12:19:58 +02:00
|
|
|
def move_expired_attachments_message_rows_to_archive(msg_ids: List[int]) -> None:
|
2019-06-02 17:29:25 +02:00
|
|
|
if not msg_ids:
|
|
|
|
return
|
|
|
|
|
2016-11-01 11:26:38 +01:00
|
|
|
query = """
|
|
|
|
INSERT INTO zerver_archivedattachment_messages (id, archivedattachment_id, archivedmessage_id)
|
|
|
|
SELECT zerver_attachment_messages.id, zerver_attachment_messages.attachment_id,
|
|
|
|
zerver_attachment_messages.message_id
|
|
|
|
FROM zerver_attachment_messages
|
|
|
|
INNER JOIN zerver_attachment
|
|
|
|
ON zerver_attachment_messages.attachment_id = zerver_attachment.id
|
|
|
|
LEFT JOIN zerver_archivedattachment_messages
|
|
|
|
ON zerver_archivedattachment_messages.id = zerver_attachment_messages.id
|
2019-06-02 17:29:25 +02:00
|
|
|
WHERE zerver_attachment_messages.message_id IN {message_ids}
|
2016-11-01 11:26:38 +01:00
|
|
|
AND zerver_archivedattachment_messages.id IS NULL
|
|
|
|
"""
|
|
|
|
with connection.cursor() as cursor:
|
2019-06-03 12:19:58 +02:00
|
|
|
cursor.execute(query.format(message_ids=ids_list_to_sql_query_format(msg_ids)))
|
2016-11-01 11:26:38 +01:00
|
|
|
|
|
|
|
|
2019-06-02 17:29:25 +02:00
|
|
|
def delete_expired_messages(realm: Realm, msg_ids: List[int]) -> None:
|
2016-11-01 11:26:38 +01:00
|
|
|
removing_messages = Message.objects.filter(
|
2019-06-02 17:29:25 +02:00
|
|
|
usermessage__isnull=True, id__in=msg_ids,
|
2016-11-01 11:26:38 +01:00
|
|
|
sender__realm_id=realm.id
|
|
|
|
)
|
|
|
|
removing_messages.delete()
|
|
|
|
|
|
|
|
|
2019-06-02 17:29:25 +02:00
|
|
|
def delete_expired_user_messages(realm: Realm, usermsg_ids: List[int]) -> None:
|
|
|
|
assert realm.message_retention_days is not None
|
2016-11-01 11:26:38 +01:00
|
|
|
removing_user_messages = UserMessage.objects.filter(
|
2019-06-02 17:29:25 +02:00
|
|
|
id__in=usermsg_ids,
|
|
|
|
user_profile__realm_id=realm.id,
|
2016-11-01 11:26:38 +01:00
|
|
|
)
|
|
|
|
removing_user_messages.delete()
|
|
|
|
|
|
|
|
|
|
|
|
def delete_expired_attachments(realm: Realm) -> None:
|
|
|
|
attachments_to_remove = Attachment.objects.filter(
|
|
|
|
messages__isnull=True, id__in=ArchivedAttachment.objects.all(),
|
|
|
|
realm_id=realm.id
|
|
|
|
)
|
|
|
|
attachments_to_remove.delete()
|
|
|
|
|
2019-06-02 17:29:25 +02:00
|
|
|
def move_expired_to_archive() -> Dict[int, Dict[str, List[int]]]:
|
|
|
|
archived_data_info = {} # type: Dict[int, Dict[str, List[int]]]
|
|
|
|
for realm in Realm.objects.filter(message_retention_days__isnull=False).order_by("id"):
|
|
|
|
msg_ids = move_expired_messages_to_archive(realm)
|
|
|
|
usermsg_ids = move_expired_user_messages_to_archive(msg_ids)
|
|
|
|
move_expired_models_with_message_key_to_archive(msg_ids)
|
2019-06-03 12:19:58 +02:00
|
|
|
move_expired_attachments_to_archive(msg_ids)
|
|
|
|
move_expired_attachments_message_rows_to_archive(msg_ids)
|
2016-11-01 11:26:38 +01:00
|
|
|
|
2019-06-02 17:29:25 +02:00
|
|
|
archived_data_info[realm.id] = {
|
|
|
|
'message_ids': msg_ids,
|
|
|
|
'usermessage_ids': usermsg_ids,
|
|
|
|
}
|
2016-11-01 11:26:38 +01:00
|
|
|
|
2019-06-02 17:29:25 +02:00
|
|
|
return archived_data_info
|
2019-05-24 12:41:04 +02:00
|
|
|
|
2019-06-02 17:29:25 +02:00
|
|
|
def clean_expired(archived_data_info: Dict[int, Dict[str, List[int]]]) -> None:
|
2019-05-24 12:41:04 +02:00
|
|
|
for realm in Realm.objects.filter(message_retention_days__isnull=False).order_by("id"):
|
2019-06-02 17:29:25 +02:00
|
|
|
delete_expired_user_messages(realm, archived_data_info[realm.id]['usermessage_ids'])
|
|
|
|
delete_expired_messages(realm, archived_data_info[realm.id]['message_ids'])
|
2016-11-01 11:26:38 +01:00
|
|
|
delete_expired_attachments(realm)
|
2017-05-14 21:14:26 +02:00
|
|
|
|
2019-05-24 12:41:04 +02:00
|
|
|
def archive_messages() -> None:
|
2019-06-02 17:29:25 +02:00
|
|
|
archived_data_info = move_expired_to_archive()
|
|
|
|
clean_expired(archived_data_info)
|
2017-05-14 21:14:26 +02:00
|
|
|
|
2019-05-28 11:40:56 +02:00
|
|
|
def move_attachment_messages_to_archive_by_message(message_ids: List[int]) -> None:
|
2017-05-14 21:14:26 +02:00
|
|
|
# Move attachments messages relation table data to archive.
|
2018-09-13 16:29:12 +02:00
|
|
|
id_list = ', '.join(str(message_id) for message_id in message_ids)
|
|
|
|
|
2017-05-14 21:14:26 +02:00
|
|
|
query = """
|
|
|
|
INSERT INTO zerver_archivedattachment_messages (id, archivedattachment_id,
|
|
|
|
archivedmessage_id)
|
|
|
|
SELECT zerver_attachment_messages.id, zerver_attachment_messages.attachment_id,
|
|
|
|
zerver_attachment_messages.message_id
|
|
|
|
FROM zerver_attachment_messages
|
|
|
|
LEFT JOIN zerver_archivedattachment_messages
|
|
|
|
ON zerver_archivedattachment_messages.id = zerver_attachment_messages.id
|
2018-09-13 16:29:12 +02:00
|
|
|
WHERE zerver_attachment_messages.message_id in ({message_ids})
|
2017-05-14 21:14:26 +02:00
|
|
|
AND zerver_archivedattachment_messages.id IS NULL
|
|
|
|
"""
|
|
|
|
with connection.cursor() as cursor:
|
2018-09-13 16:29:12 +02:00
|
|
|
cursor.execute(query.format(message_ids=id_list))
|
2017-05-14 21:14:26 +02:00
|
|
|
|
|
|
|
|
|
|
|
@transaction.atomic
|
2018-09-13 16:29:12 +02:00
|
|
|
def move_messages_to_archive(message_ids: List[int]) -> None:
|
|
|
|
messages = list(Message.objects.filter(id__in=message_ids).values())
|
|
|
|
if not messages:
|
2017-05-14 21:14:26 +02:00
|
|
|
raise Message.DoesNotExist
|
2019-05-28 11:38:53 +02:00
|
|
|
|
|
|
|
ArchivedMessage.objects.bulk_create([ArchivedMessage(**message) for message in messages])
|
|
|
|
|
2017-05-14 21:14:26 +02:00
|
|
|
# Move user_messages to the archive.
|
|
|
|
user_messages = UserMessage.objects.filter(
|
2019-05-28 11:38:53 +02:00
|
|
|
message_id__in=message_ids).exclude(id__in=ArchivedUserMessage.objects.all()).values()
|
|
|
|
user_messages_ids = [user_message['id'] for user_message in user_messages]
|
|
|
|
ArchivedUserMessage.objects.bulk_create(
|
|
|
|
[ArchivedUserMessage(**user_message) for user_message in user_messages]
|
|
|
|
)
|
2017-05-14 21:14:26 +02:00
|
|
|
|
2019-05-30 15:06:45 +02:00
|
|
|
for model in models_with_message_key:
|
|
|
|
elements = model['class'].objects.filter(message_id__in=message_ids).exclude(
|
|
|
|
id__in=model['archive_class'].objects.all()
|
|
|
|
).values()
|
|
|
|
|
|
|
|
model['archive_class'].objects.bulk_create(
|
|
|
|
[model['archive_class'](**element) for element in elements]
|
|
|
|
)
|
|
|
|
|
2017-05-14 21:14:26 +02:00
|
|
|
# Move attachments to archive
|
2018-09-13 16:29:12 +02:00
|
|
|
attachments = Attachment.objects.filter(messages__id__in=message_ids).exclude(
|
2019-05-28 11:38:53 +02:00
|
|
|
id__in=ArchivedAttachment.objects.all()).distinct().values()
|
|
|
|
ArchivedAttachment.objects.bulk_create(
|
|
|
|
[ArchivedAttachment(**attachment) for attachment in attachments]
|
|
|
|
)
|
|
|
|
|
2019-05-28 11:40:56 +02:00
|
|
|
move_attachment_messages_to_archive_by_message(message_ids)
|
2019-05-28 11:38:53 +02:00
|
|
|
|
2017-05-14 21:14:26 +02:00
|
|
|
# Remove data from main tables
|
2018-09-13 16:29:12 +02:00
|
|
|
Message.objects.filter(id__in=message_ids).delete()
|
2019-05-28 11:38:53 +02:00
|
|
|
UserMessage.objects.filter(id__in=user_messages_ids, message_id__isnull=True).delete()
|
|
|
|
|
2018-09-13 16:29:12 +02:00
|
|
|
archived_attachments = ArchivedAttachment.objects.filter(messages__id__in=message_ids).distinct()
|
2017-05-14 21:14:26 +02:00
|
|
|
Attachment.objects.filter(messages__isnull=True, id__in=archived_attachments).delete()
|