2019-03-08 02:48:54 +01:00
|
|
|
# Documented in https://zulip.readthedocs.io/en/latest/subsystems/sending-messages.html#soft-deactivation
|
2017-07-30 19:48:49 +02:00
|
|
|
|
2017-12-13 01:45:57 +01:00
|
|
|
from zerver.lib.logging_util import log_to_file
|
2017-07-13 16:39:01 +02:00
|
|
|
from collections import defaultdict
|
2017-12-13 01:45:57 +01:00
|
|
|
import logging
|
2017-07-30 19:48:49 +02:00
|
|
|
from django.db import transaction
|
2017-07-31 05:25:36 +02:00
|
|
|
from django.db.models import Max
|
2017-08-16 05:09:06 +02:00
|
|
|
from django.conf import settings
|
2017-07-13 16:39:01 +02:00
|
|
|
from django.utils.timezone import now as timezone_now
|
2019-03-12 04:28:40 +01:00
|
|
|
from typing import DefaultDict, Dict, List, Optional, Union, Any
|
2017-07-30 19:48:49 +02:00
|
|
|
|
2017-07-13 16:39:01 +02:00
|
|
|
from zerver.models import UserProfile, UserMessage, RealmAuditLog, \
|
2019-03-12 04:28:40 +01:00
|
|
|
Subscription, Message, Recipient, UserActivity, Realm
|
2017-07-30 19:48:49 +02:00
|
|
|
|
2017-12-13 01:45:57 +01:00
|
|
|
logger = logging.getLogger("zulip.soft_deactivation")
|
|
|
|
log_to_file(logger, settings.SOFT_DEACTIVATION_LOG_PATH)
|
2019-03-11 04:32:04 +01:00
|
|
|
BULK_CREATE_BATCH_SIZE = 10000
|
2017-08-16 05:09:06 +02:00
|
|
|
|
2017-12-06 19:39:35 +01:00
|
|
|
def filter_by_subscription_history(user_profile: UserProfile,
|
|
|
|
all_stream_messages: DefaultDict[int, List[Message]],
|
|
|
|
all_stream_subscription_logs: DefaultDict[int, List[RealmAuditLog]],
|
|
|
|
) -> List[UserMessage]:
|
2017-08-15 16:58:46 +02:00
|
|
|
user_messages_to_insert = [] # type: List[UserMessage]
|
|
|
|
|
2017-11-05 11:15:10 +01:00
|
|
|
def store_user_message_to_insert(message: Message) -> None:
|
2017-07-13 16:39:01 +02:00
|
|
|
message = UserMessage(user_profile=user_profile,
|
|
|
|
message_id=message['id'], flags=0)
|
2017-08-15 16:58:46 +02:00
|
|
|
user_messages_to_insert.append(message)
|
2017-07-13 16:39:01 +02:00
|
|
|
|
2019-05-06 03:14:42 +02:00
|
|
|
for (stream_id, stream_messages_raw) in all_stream_messages.items():
|
2017-07-13 16:39:01 +02:00
|
|
|
stream_subscription_logs = all_stream_subscription_logs[stream_id]
|
2019-05-06 03:14:42 +02:00
|
|
|
# Make a copy of the original list of messages, which we will
|
|
|
|
# mutate in the loop below.
|
|
|
|
stream_messages = list(stream_messages_raw)
|
2017-07-13 16:39:01 +02:00
|
|
|
|
|
|
|
for log_entry in stream_subscription_logs:
|
2019-05-06 03:33:15 +02:00
|
|
|
# For each stream, we iterate through all of the changes
|
|
|
|
# to the user's subscription to that stream, ordered by
|
|
|
|
# event_last_message_id, to determine whether the user was
|
|
|
|
# subscribed to the target stream at that time.
|
|
|
|
#
|
|
|
|
# For each message, we're looking for the first event for
|
|
|
|
# the user's subscription to the target stream after the
|
|
|
|
# message was sent.
|
|
|
|
# * If it's an unsubscribe, we know the user was subscribed
|
|
|
|
# when the message was sent, and create a UserMessage
|
|
|
|
# * If it's a subscribe, we know the user was not, and we
|
|
|
|
# skip the message by mutating the stream_messages list
|
|
|
|
# to skip that message.
|
|
|
|
|
2017-07-13 16:39:01 +02:00
|
|
|
if len(stream_messages) == 0:
|
2019-05-06 03:13:21 +02:00
|
|
|
# Because stream_messages gets mutated below, this
|
|
|
|
# check belongs in this inner loop, not the outer loop.
|
|
|
|
break
|
|
|
|
|
2018-07-10 06:56:58 +02:00
|
|
|
if log_entry.event_type == RealmAuditLog.SUBSCRIPTION_DEACTIVATED:
|
2019-05-06 03:33:15 +02:00
|
|
|
# If the event shows the user was unsubscribed after
|
|
|
|
# event_last_message_id, we know they must have been
|
|
|
|
# subscribed immediately before the event.
|
2017-07-13 16:39:01 +02:00
|
|
|
for stream_message in stream_messages:
|
|
|
|
if stream_message['id'] <= log_entry.event_last_message_id:
|
|
|
|
store_user_message_to_insert(stream_message)
|
|
|
|
else:
|
|
|
|
break
|
2018-07-10 07:01:24 +02:00
|
|
|
elif log_entry.event_type in (RealmAuditLog.SUBSCRIPTION_ACTIVATED,
|
2018-07-10 06:54:06 +02:00
|
|
|
RealmAuditLog.SUBSCRIPTION_CREATED):
|
2017-07-13 16:39:01 +02:00
|
|
|
initial_msg_count = len(stream_messages)
|
|
|
|
for i, stream_message in enumerate(stream_messages):
|
|
|
|
if stream_message['id'] > log_entry.event_last_message_id:
|
|
|
|
stream_messages = stream_messages[i:]
|
|
|
|
break
|
|
|
|
final_msg_count = len(stream_messages)
|
|
|
|
if initial_msg_count == final_msg_count:
|
|
|
|
if stream_messages[-1]['id'] <= log_entry.event_last_message_id:
|
|
|
|
stream_messages = []
|
|
|
|
else:
|
2019-04-20 01:00:46 +02:00
|
|
|
raise AssertionError('%s is not a Subscription Event.' % (log_entry.event_type,))
|
2017-07-13 16:39:01 +02:00
|
|
|
|
|
|
|
if len(stream_messages) > 0:
|
|
|
|
# We do this check for last event since if the last subscription
|
|
|
|
# event was a subscription_deactivated then we don't want to create
|
|
|
|
# UserMessage rows for any of the remaining messages.
|
|
|
|
if stream_subscription_logs[-1].event_type in (
|
2018-07-10 07:01:24 +02:00
|
|
|
RealmAuditLog.SUBSCRIPTION_ACTIVATED,
|
2018-07-10 06:54:06 +02:00
|
|
|
RealmAuditLog.SUBSCRIPTION_CREATED):
|
2017-07-13 16:39:01 +02:00
|
|
|
for stream_message in stream_messages:
|
|
|
|
store_user_message_to_insert(stream_message)
|
2017-08-15 16:58:46 +02:00
|
|
|
return user_messages_to_insert
|
2017-07-13 16:39:01 +02:00
|
|
|
|
2017-11-05 11:15:10 +01:00
|
|
|
def add_missing_messages(user_profile: UserProfile) -> None:
|
2017-08-15 17:15:08 +02:00
|
|
|
"""This function takes a soft-deactivated user, and computes and adds
|
|
|
|
to the database any UserMessage rows that were not created while
|
|
|
|
the user was soft-deactivated. The end result is that from the
|
|
|
|
perspective of the message database, it should be impossible to
|
|
|
|
tell that the user was soft-deactivated at all.
|
|
|
|
|
|
|
|
At a high level, the algorithm is as follows:
|
|
|
|
|
|
|
|
* Find all the streams that the user was at any time a subscriber
|
|
|
|
of when or after they were soft-deactivated (`recipient_ids`
|
|
|
|
below).
|
|
|
|
|
|
|
|
* Find all the messages sent to those streams since the user was
|
|
|
|
soft-deactivated. This will be a superset of the target
|
|
|
|
UserMessages we need to create in two ways: (1) some UserMessage
|
|
|
|
rows will have already been created in do_send_messages because
|
|
|
|
the user had a nonzero set of flags (the fact that we do so in
|
|
|
|
do_send_messages simplifies things considerably, since it means
|
|
|
|
we don't need to inspect message content to look for things like
|
|
|
|
mentions here), and (2) the user might not have been subscribed
|
|
|
|
to all of the streams in recipient_ids for the entire time
|
|
|
|
window.
|
|
|
|
|
|
|
|
* Correct the list from the previous state by excluding those with
|
|
|
|
existing UserMessage rows.
|
|
|
|
|
|
|
|
* Correct the list from the previous state by excluding those
|
|
|
|
where the user wasn't subscribed at the time, using the
|
|
|
|
RealmAuditLog data to determine exactly when the user was
|
|
|
|
subscribed/unsubscribed.
|
|
|
|
|
|
|
|
* Create the UserMessage rows.
|
|
|
|
|
2019-03-08 02:48:54 +01:00
|
|
|
For further documentation, see:
|
|
|
|
|
|
|
|
https://zulip.readthedocs.io/en/latest/subsystems/sending-messages.html#soft-deactivation
|
|
|
|
|
2017-08-15 17:15:08 +02:00
|
|
|
"""
|
2018-05-17 19:09:03 +02:00
|
|
|
assert user_profile.last_active_message_id is not None
|
2019-03-10 18:13:34 +01:00
|
|
|
all_stream_subs = list(Subscription.objects.filter(
|
2017-07-13 16:39:01 +02:00
|
|
|
user_profile=user_profile,
|
2019-03-10 18:18:14 +01:00
|
|
|
recipient__type=Recipient.STREAM).values('recipient_id', 'recipient__type_id'))
|
2017-07-13 16:39:01 +02:00
|
|
|
|
|
|
|
# For Stream messages we need to check messages against data from
|
|
|
|
# RealmAuditLog for visibility to user. So we fetch the subscription logs.
|
|
|
|
stream_ids = [sub['recipient__type_id'] for sub in all_stream_subs]
|
2018-07-10 07:01:24 +02:00
|
|
|
events = [RealmAuditLog.SUBSCRIPTION_CREATED, RealmAuditLog.SUBSCRIPTION_DEACTIVATED,
|
|
|
|
RealmAuditLog.SUBSCRIPTION_ACTIVATED]
|
2019-05-06 03:23:34 +02:00
|
|
|
|
|
|
|
# Important: We order first by event_last_message_id, which is the
|
|
|
|
# official ordering, and then tiebreak by RealmAuditLog event ID.
|
|
|
|
# That second tiebreak is important in case a user is subscribed
|
|
|
|
# and then unsubscribed without any messages being sent in the
|
|
|
|
# meantime. Without that tiebreak, we could end up incorrectly
|
|
|
|
# processing the ordering of those two subscription changes.
|
2017-07-13 16:39:01 +02:00
|
|
|
subscription_logs = list(RealmAuditLog.objects.select_related(
|
|
|
|
'modified_stream').filter(
|
|
|
|
modified_user=user_profile,
|
|
|
|
modified_stream__id__in=stream_ids,
|
2019-05-06 03:23:34 +02:00
|
|
|
event_type__in=events).order_by('event_last_message_id', 'id'))
|
2017-07-13 16:39:01 +02:00
|
|
|
|
2017-11-02 07:34:21 +01:00
|
|
|
all_stream_subscription_logs = defaultdict(list) # type: DefaultDict[int, List[RealmAuditLog]]
|
2017-07-13 16:39:01 +02:00
|
|
|
for log in subscription_logs:
|
2019-05-08 23:40:33 +02:00
|
|
|
all_stream_subscription_logs[log.modified_stream_id].append(log)
|
2017-07-13 16:39:01 +02:00
|
|
|
|
|
|
|
recipient_ids = []
|
|
|
|
for sub in all_stream_subs:
|
|
|
|
stream_subscription_logs = all_stream_subscription_logs[sub['recipient__type_id']]
|
2018-07-10 06:56:58 +02:00
|
|
|
if stream_subscription_logs[-1].event_type == RealmAuditLog.SUBSCRIPTION_DEACTIVATED:
|
2018-05-17 19:09:03 +02:00
|
|
|
assert stream_subscription_logs[-1].event_last_message_id is not None
|
|
|
|
if stream_subscription_logs[-1].event_last_message_id <= user_profile.last_active_message_id:
|
|
|
|
# We are going to short circuit this iteration as its no use
|
|
|
|
# iterating since user unsubscribed before soft-deactivation
|
|
|
|
continue
|
2019-03-10 18:18:14 +01:00
|
|
|
recipient_ids.append(sub['recipient_id'])
|
2017-07-13 16:39:01 +02:00
|
|
|
|
2019-03-10 18:13:34 +01:00
|
|
|
all_stream_msgs = list(Message.objects.filter(
|
2017-07-13 16:39:01 +02:00
|
|
|
recipient__id__in=recipient_ids,
|
|
|
|
id__gt=user_profile.last_active_message_id).order_by('id').values(
|
|
|
|
'id', 'recipient__type_id'))
|
2019-03-10 18:15:28 +01:00
|
|
|
already_created_ums = set(UserMessage.objects.filter(
|
2017-07-13 16:39:01 +02:00
|
|
|
user_profile=user_profile,
|
|
|
|
message__recipient__type=Recipient.STREAM,
|
2019-03-10 18:15:28 +01:00
|
|
|
message__id__gt=user_profile.last_active_message_id).values_list('message__id', flat=True))
|
2017-07-13 16:39:01 +02:00
|
|
|
|
|
|
|
# Filter those messages for which UserMessage rows have been already created
|
|
|
|
all_stream_msgs = [msg for msg in all_stream_msgs
|
|
|
|
if msg['id'] not in already_created_ums]
|
|
|
|
|
2017-11-02 07:34:21 +01:00
|
|
|
stream_messages = defaultdict(list) # type: DefaultDict[int, List[Message]]
|
2017-07-13 16:39:01 +02:00
|
|
|
for msg in all_stream_msgs:
|
|
|
|
stream_messages[msg['recipient__type_id']].append(msg)
|
|
|
|
|
|
|
|
# Calling this function to filter out stream messages based upon
|
|
|
|
# subscription logs and then store all UserMessage objects for bulk insert
|
|
|
|
# This function does not perform any SQL related task and gets all the data
|
|
|
|
# required for its operation in its params.
|
2017-08-15 17:02:03 +02:00
|
|
|
user_messages_to_insert = filter_by_subscription_history(
|
2017-08-15 16:58:46 +02:00
|
|
|
user_profile, stream_messages, all_stream_subscription_logs)
|
2017-07-30 19:48:49 +02:00
|
|
|
|
2017-07-13 16:39:01 +02:00
|
|
|
# Doing a bulk create for all the UserMessage objects stored for creation.
|
2019-03-11 04:32:04 +01:00
|
|
|
while len(user_messages_to_insert) > 0:
|
|
|
|
messages, user_messages_to_insert = (
|
|
|
|
user_messages_to_insert[0:BULK_CREATE_BATCH_SIZE],
|
|
|
|
user_messages_to_insert[BULK_CREATE_BATCH_SIZE:])
|
|
|
|
UserMessage.objects.bulk_create(messages)
|
|
|
|
user_profile.last_active_message_id = messages[-1].message_id
|
|
|
|
user_profile.save(update_fields=['last_active_message_id'])
|
2017-07-30 19:48:49 +02:00
|
|
|
|
2017-11-05 11:15:10 +01:00
|
|
|
def do_soft_deactivate_user(user_profile: UserProfile) -> None:
|
2018-12-17 03:45:19 +01:00
|
|
|
try:
|
|
|
|
user_profile.last_active_message_id = UserMessage.objects.filter(
|
|
|
|
user_profile=user_profile).order_by(
|
|
|
|
'-message__id')[0].message_id
|
|
|
|
except IndexError: # nocoverage
|
|
|
|
# In the unlikely event that a user somehow has never received
|
|
|
|
# a message, we just use the overall max message ID.
|
|
|
|
user_profile.last_active_message_id = Message.objects.max().id
|
2017-07-30 19:48:49 +02:00
|
|
|
user_profile.long_term_idle = True
|
|
|
|
user_profile.save(update_fields=[
|
|
|
|
'long_term_idle',
|
|
|
|
'last_active_message_id'])
|
2019-11-16 01:50:17 +01:00
|
|
|
logger.info('Soft Deactivated user %s' % (user_profile.id,))
|
2017-07-30 19:48:49 +02:00
|
|
|
|
2017-11-05 11:15:10 +01:00
|
|
|
def do_soft_deactivate_users(users: List[UserProfile]) -> List[UserProfile]:
|
2018-12-17 03:41:24 +01:00
|
|
|
BATCH_SIZE = 100
|
2017-08-16 05:35:04 +02:00
|
|
|
users_soft_deactivated = []
|
2018-12-17 03:41:24 +01:00
|
|
|
while True:
|
|
|
|
(user_batch, users) = (users[0:BATCH_SIZE], users[BATCH_SIZE:])
|
|
|
|
if len(user_batch) == 0:
|
|
|
|
break
|
|
|
|
with transaction.atomic():
|
|
|
|
realm_logs = []
|
|
|
|
for user in user_batch:
|
|
|
|
do_soft_deactivate_user(user)
|
|
|
|
event_time = timezone_now()
|
|
|
|
log = RealmAuditLog(
|
|
|
|
realm=user.realm,
|
|
|
|
modified_user=user,
|
|
|
|
event_type=RealmAuditLog.USER_SOFT_DEACTIVATED,
|
|
|
|
event_time=event_time
|
|
|
|
)
|
|
|
|
realm_logs.append(log)
|
|
|
|
users_soft_deactivated.append(user)
|
|
|
|
RealmAuditLog.objects.bulk_create(realm_logs)
|
|
|
|
|
|
|
|
logging.info("Soft-deactivated batch of %s users; %s remain to process" %
|
|
|
|
(len(user_batch), len(users)))
|
|
|
|
|
2017-08-16 05:35:04 +02:00
|
|
|
return users_soft_deactivated
|
2017-07-16 09:41:38 +02:00
|
|
|
|
2019-03-12 04:28:40 +01:00
|
|
|
def do_auto_soft_deactivate_users(inactive_for_days: int, realm: Optional[Realm]) -> List[UserProfile]:
|
|
|
|
filter_kwargs = {} # type: Dict[str, Realm]
|
|
|
|
if realm is not None:
|
|
|
|
filter_kwargs = dict(user_profile__realm=realm)
|
|
|
|
users_to_deactivate = get_users_for_soft_deactivation(inactive_for_days, filter_kwargs)
|
|
|
|
users_deactivated = do_soft_deactivate_users(users_to_deactivate)
|
|
|
|
|
|
|
|
if not settings.AUTO_CATCH_UP_SOFT_DEACTIVATED_USERS:
|
|
|
|
logging.info('Not catching up users since AUTO_CATCH_UP_SOFT_DEACTIVATED_USERS if off')
|
|
|
|
return users_deactivated
|
|
|
|
|
|
|
|
if realm is not None:
|
|
|
|
filter_kwargs = dict(realm=realm)
|
|
|
|
users_to_catch_up = get_soft_deactivated_users_for_catch_up(filter_kwargs)
|
|
|
|
do_catch_up_soft_deactivated_users(users_to_catch_up)
|
|
|
|
return users_deactivated
|
|
|
|
|
2019-03-12 02:48:01 +01:00
|
|
|
def reactivate_user_if_soft_deactivated(user_profile: UserProfile) -> Union[UserProfile, None]:
|
2017-07-16 09:41:38 +02:00
|
|
|
if user_profile.long_term_idle:
|
|
|
|
add_missing_messages(user_profile)
|
|
|
|
user_profile.long_term_idle = False
|
|
|
|
user_profile.save(update_fields=['long_term_idle'])
|
|
|
|
RealmAuditLog.objects.create(
|
|
|
|
realm=user_profile.realm,
|
|
|
|
modified_user=user_profile,
|
2018-07-10 11:40:49 +02:00
|
|
|
event_type=RealmAuditLog.USER_SOFT_ACTIVATED,
|
2017-07-16 09:41:38 +02:00
|
|
|
event_time=timezone_now()
|
|
|
|
)
|
2019-11-16 01:50:17 +01:00
|
|
|
logger.info('Soft Reactivated user %s' % (user_profile.id,))
|
2017-08-16 05:35:04 +02:00
|
|
|
return user_profile
|
|
|
|
return None
|
2017-07-31 05:25:36 +02:00
|
|
|
|
2017-11-05 11:15:10 +01:00
|
|
|
def get_users_for_soft_deactivation(inactive_for_days: int, filter_kwargs: Any) -> List[UserProfile]:
|
2017-07-31 05:25:36 +02:00
|
|
|
users_activity = list(UserActivity.objects.filter(
|
|
|
|
user_profile__is_active=True,
|
|
|
|
user_profile__is_bot=False,
|
2017-08-22 02:43:03 +02:00
|
|
|
user_profile__long_term_idle=False,
|
|
|
|
**filter_kwargs).values('user_profile_id').annotate(
|
|
|
|
last_visit=Max('last_visit')))
|
2017-07-31 05:25:36 +02:00
|
|
|
user_ids_to_deactivate = []
|
|
|
|
today = timezone_now()
|
|
|
|
for user_activity in users_activity:
|
|
|
|
if (today - user_activity['last_visit']).days > inactive_for_days:
|
|
|
|
user_ids_to_deactivate.append(user_activity['user_profile_id'])
|
|
|
|
users_to_deactivate = list(UserProfile.objects.filter(
|
|
|
|
id__in=user_ids_to_deactivate))
|
|
|
|
return users_to_deactivate
|
|
|
|
|
2017-11-05 11:15:10 +01:00
|
|
|
def do_soft_activate_users(users: List[UserProfile]) -> List[UserProfile]:
|
2017-08-16 05:35:04 +02:00
|
|
|
users_soft_activated = []
|
2017-07-31 05:25:36 +02:00
|
|
|
for user_profile in users:
|
2019-03-12 02:48:01 +01:00
|
|
|
user_activated = reactivate_user_if_soft_deactivated(user_profile)
|
2017-08-16 05:35:04 +02:00
|
|
|
if user_activated:
|
|
|
|
users_soft_activated.append(user_activated)
|
|
|
|
return users_soft_activated
|
2019-03-12 03:59:02 +01:00
|
|
|
|
|
|
|
def do_catch_up_soft_deactivated_users(users: List[UserProfile]) -> List[UserProfile]:
|
|
|
|
users_caught_up = []
|
|
|
|
for user_profile in users:
|
|
|
|
if user_profile.long_term_idle:
|
|
|
|
add_missing_messages(user_profile)
|
|
|
|
users_caught_up.append(user_profile)
|
|
|
|
logging.info("Caught up %d soft-deactivated users" % (len(users_caught_up),))
|
|
|
|
return users_caught_up
|
|
|
|
|
|
|
|
def get_soft_deactivated_users_for_catch_up(filter_kwargs: Any) -> List[UserProfile]:
|
|
|
|
users_to_catch_up = UserProfile.objects.select_related().filter(
|
|
|
|
long_term_idle=True,
|
|
|
|
is_active=True,
|
|
|
|
is_bot=False,
|
|
|
|
**filter_kwargs
|
|
|
|
)
|
|
|
|
return users_to_catch_up
|