2019-02-02 23:53:55 +01:00
|
|
|
from typing import Any, Dict, List, Set, Tuple, Union
|
2013-10-21 23:25:53 +02:00
|
|
|
|
|
|
|
from collections import defaultdict
|
|
|
|
import datetime
|
2017-12-13 01:45:57 +01:00
|
|
|
import logging
|
2017-02-26 04:44:22 +01:00
|
|
|
import pytz
|
2013-10-21 23:25:53 +02:00
|
|
|
|
2013-11-16 00:54:12 +01:00
|
|
|
from django.conf import settings
|
2017-08-27 16:30:48 +02:00
|
|
|
from django.utils.timezone import now as timezone_now
|
2013-10-21 23:25:53 +02:00
|
|
|
|
2018-11-08 22:40:27 +01:00
|
|
|
from confirmation.models import one_click_unsubscribe_link
|
2019-03-15 18:51:39 +01:00
|
|
|
from zerver.lib.email_notifications import build_message_list
|
2017-07-11 06:13:23 +02:00
|
|
|
from zerver.lib.send_email import send_future_email, FromAddress
|
2018-07-29 00:05:45 +02:00
|
|
|
from zerver.lib.url_encoding import encode_stream
|
2019-03-03 07:14:58 +01:00
|
|
|
from zerver.models import UserProfile, Recipient, Subscription, UserActivity, \
|
|
|
|
get_active_streams, get_user_profile_by_id, Realm, Message, RealmAuditLog
|
2016-11-08 10:07:47 +01:00
|
|
|
from zerver.context_processors import common_context
|
2017-08-25 21:52:47 +02:00
|
|
|
from zerver.lib.queue import queue_json_publish
|
2017-12-13 01:45:57 +01:00
|
|
|
from zerver.lib.logging_util import log_to_file
|
2013-10-21 23:25:53 +02:00
|
|
|
|
2017-12-13 01:45:57 +01:00
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
log_to_file(logger, settings.DIGEST_LOG_PATH)
|
2014-01-08 17:33:12 +01:00
|
|
|
|
2017-08-27 16:30:48 +02:00
|
|
|
DIGEST_CUTOFF = 5
|
|
|
|
|
2019-05-02 05:54:48 +02:00
|
|
|
# Digests accumulate 2 types of interesting traffic for a user:
|
|
|
|
# 1. New streams
|
|
|
|
# 2. Interesting stream traffic, as determined by the longest and most
|
2013-10-21 23:25:53 +02:00
|
|
|
# diversely comment upon topics.
|
|
|
|
|
2017-11-05 11:15:10 +01:00
|
|
|
def inactive_since(user_profile: UserProfile, cutoff: datetime.datetime) -> bool:
|
2017-08-25 21:44:28 +02:00
|
|
|
# Hasn't used the app in the last DIGEST_CUTOFF (5) days.
|
|
|
|
most_recent_visit = [row.last_visit for row in
|
|
|
|
UserActivity.objects.filter(
|
|
|
|
user_profile=user_profile)]
|
|
|
|
|
|
|
|
if not most_recent_visit:
|
|
|
|
# This person has never used the app.
|
|
|
|
return True
|
|
|
|
|
|
|
|
last_visit = max(most_recent_visit)
|
|
|
|
return last_visit < cutoff
|
|
|
|
|
2017-11-05 11:15:10 +01:00
|
|
|
def should_process_digest(realm_str: str) -> bool:
|
2017-08-25 21:48:19 +02:00
|
|
|
if realm_str in settings.SYSTEM_ONLY_REALMS:
|
|
|
|
# Don't try to send emails to system-only realms
|
|
|
|
return False
|
|
|
|
return True
|
|
|
|
|
2017-08-25 21:52:47 +02:00
|
|
|
# Changes to this should also be reflected in
|
|
|
|
# zerver/worker/queue_processors.py:DigestWorker.consume()
|
2017-11-05 11:15:10 +01:00
|
|
|
def queue_digest_recipient(user_profile: UserProfile, cutoff: datetime.datetime) -> None:
|
2017-08-25 21:52:47 +02:00
|
|
|
# Convert cutoff to epoch seconds for transit.
|
|
|
|
event = {"user_profile_id": user_profile.id,
|
|
|
|
"cutoff": cutoff.strftime('%s')}
|
2017-11-24 13:18:46 +01:00
|
|
|
queue_json_publish("digest_emails", event)
|
2017-08-25 21:52:47 +02:00
|
|
|
|
2017-11-05 11:15:10 +01:00
|
|
|
def enqueue_emails(cutoff: datetime.datetime) -> None:
|
2018-03-09 00:16:44 +01:00
|
|
|
if not settings.SEND_DIGEST_EMAILS:
|
|
|
|
return
|
|
|
|
|
2019-03-28 04:47:03 +01:00
|
|
|
weekday = timezone_now().weekday()
|
|
|
|
for realm in Realm.objects.filter(deactivated=False, digest_emails_enabled=True, digest_weekday=weekday):
|
2017-08-27 16:30:48 +02:00
|
|
|
if not should_process_digest(realm.string_id):
|
|
|
|
continue
|
|
|
|
|
|
|
|
user_profiles = UserProfile.objects.filter(
|
|
|
|
realm=realm, is_active=True, is_bot=False, enable_digest_emails=True)
|
|
|
|
|
|
|
|
for user_profile in user_profiles:
|
|
|
|
if inactive_since(user_profile, cutoff):
|
|
|
|
queue_digest_recipient(user_profile, cutoff)
|
|
|
|
logger.info("%s is inactive, queuing for potential digest" % (
|
|
|
|
user_profile.email,))
|
|
|
|
|
2019-03-03 10:54:21 +01:00
|
|
|
def gather_hot_conversations(user_profile: UserProfile, messages: List[Message]) -> List[Dict[str, Any]]:
|
2013-10-21 23:25:53 +02:00
|
|
|
# Gather stream conversations of 2 types:
|
|
|
|
# 1. long conversations
|
|
|
|
# 2. conversations where many different people participated
|
|
|
|
#
|
|
|
|
# Returns a list of dictionaries containing the templating
|
|
|
|
# information for each hot conversation.
|
|
|
|
|
2018-05-10 19:13:36 +02:00
|
|
|
conversation_length = defaultdict(int) # type: Dict[Tuple[int, str], int]
|
2018-11-11 18:46:21 +01:00
|
|
|
conversation_messages = defaultdict(list) # type: Dict[Tuple[int, str], List[Message]]
|
2018-05-10 19:13:36 +02:00
|
|
|
conversation_diversity = defaultdict(set) # type: Dict[Tuple[int, str], Set[str]]
|
2018-11-11 19:06:59 +01:00
|
|
|
for message in messages:
|
|
|
|
key = (message.recipient.type_id,
|
|
|
|
message.topic_name())
|
|
|
|
|
2018-11-11 18:46:21 +01:00
|
|
|
conversation_messages[key].append(message)
|
|
|
|
|
2018-11-11 19:06:59 +01:00
|
|
|
if not message.sent_by_human():
|
2013-12-31 22:45:21 +01:00
|
|
|
# Don't include automated messages in the count.
|
|
|
|
continue
|
|
|
|
|
2013-10-21 23:25:53 +02:00
|
|
|
conversation_diversity[key].add(
|
2018-11-11 19:06:59 +01:00
|
|
|
message.sender.full_name)
|
2013-10-21 23:25:53 +02:00
|
|
|
conversation_length[key] += 1
|
|
|
|
|
2016-01-25 01:27:18 +01:00
|
|
|
diversity_list = list(conversation_diversity.items())
|
2013-10-21 23:25:53 +02:00
|
|
|
diversity_list.sort(key=lambda entry: len(entry[1]), reverse=True)
|
|
|
|
|
2016-01-25 01:27:18 +01:00
|
|
|
length_list = list(conversation_length.items())
|
2013-10-21 23:25:53 +02:00
|
|
|
length_list.sort(key=lambda entry: entry[1], reverse=True)
|
|
|
|
|
|
|
|
# Get up to the 4 best conversations from the diversity list
|
|
|
|
# and length list, filtering out overlapping conversations.
|
|
|
|
hot_conversations = [elt[0] for elt in diversity_list[:2]]
|
|
|
|
for candidate, _ in length_list:
|
|
|
|
if candidate not in hot_conversations:
|
|
|
|
hot_conversations.append(candidate)
|
|
|
|
if len(hot_conversations) >= 4:
|
|
|
|
break
|
|
|
|
|
2013-12-13 20:18:44 +01:00
|
|
|
# There was so much overlap between the diversity and length lists that we
|
|
|
|
# still have < 4 conversations. Try to use remaining diversity items to pad
|
|
|
|
# out the hot conversations.
|
|
|
|
num_convos = len(hot_conversations)
|
|
|
|
if num_convos < 4:
|
|
|
|
hot_conversations.extend([elt[0] for elt in diversity_list[num_convos:4]])
|
|
|
|
|
2013-10-21 23:25:53 +02:00
|
|
|
hot_conversation_render_payloads = []
|
|
|
|
for h in hot_conversations:
|
|
|
|
users = list(conversation_diversity[h])
|
|
|
|
count = conversation_length[h]
|
2018-11-11 18:46:21 +01:00
|
|
|
messages = conversation_messages[h]
|
2013-10-21 23:25:53 +02:00
|
|
|
|
|
|
|
# We'll display up to 2 messages from the conversation.
|
2018-11-11 18:46:21 +01:00
|
|
|
first_few_messages = messages[:2]
|
2013-10-21 23:25:53 +02:00
|
|
|
|
2013-10-25 18:53:35 +02:00
|
|
|
teaser_data = {"participants": users,
|
2013-10-21 23:25:53 +02:00
|
|
|
"count": count - len(first_few_messages),
|
2016-12-02 08:15:16 +01:00
|
|
|
"first_few_messages": build_message_list(
|
2017-01-24 06:02:39 +01:00
|
|
|
user_profile, first_few_messages)}
|
2013-10-21 23:25:53 +02:00
|
|
|
|
|
|
|
hot_conversation_render_payloads.append(teaser_data)
|
2013-12-13 20:26:44 +01:00
|
|
|
return hot_conversation_render_payloads
|
2013-10-21 23:25:53 +02:00
|
|
|
|
2017-11-05 11:15:10 +01:00
|
|
|
def gather_new_streams(user_profile: UserProfile,
|
2018-05-10 19:13:36 +02:00
|
|
|
threshold: datetime.datetime) -> Tuple[int, Dict[str, List[str]]]:
|
2018-04-20 20:59:22 +02:00
|
|
|
if user_profile.can_access_public_streams():
|
2014-01-24 23:30:53 +01:00
|
|
|
new_streams = list(get_active_streams(user_profile.realm).filter(
|
2017-01-24 07:06:13 +01:00
|
|
|
invite_only=False, date_created__gt=threshold))
|
2018-04-20 20:59:22 +02:00
|
|
|
else:
|
|
|
|
new_streams = []
|
2013-10-21 23:25:53 +02:00
|
|
|
|
2017-11-04 05:34:38 +01:00
|
|
|
base_url = "%s/#narrow/stream/" % (user_profile.realm.uri,)
|
2013-10-25 18:53:35 +02:00
|
|
|
|
|
|
|
streams_html = []
|
|
|
|
streams_plain = []
|
|
|
|
|
2013-10-21 23:25:53 +02:00
|
|
|
for stream in new_streams:
|
2018-02-15 21:02:47 +01:00
|
|
|
narrow_url = base_url + encode_stream(stream.id, stream.name)
|
2017-11-04 05:34:38 +01:00
|
|
|
stream_link = "<a href='%s'>%s</a>" % (narrow_url, stream.name)
|
2013-10-25 18:53:35 +02:00
|
|
|
streams_html.append(stream_link)
|
|
|
|
streams_plain.append(stream.name)
|
2013-10-21 23:25:53 +02:00
|
|
|
|
|
|
|
return len(new_streams), {"html": streams_html, "plain": streams_plain}
|
|
|
|
|
2019-05-02 05:54:48 +02:00
|
|
|
def enough_traffic(hot_conversations: str, new_streams: int) -> bool:
|
|
|
|
return bool(hot_conversations or new_streams)
|
2013-10-21 23:25:53 +02:00
|
|
|
|
2018-08-12 22:09:34 +02:00
|
|
|
def handle_digest_email(user_profile_id: int, cutoff: float,
|
|
|
|
render_to_web: bool = False) -> Union[None, Dict[str, Any]]:
|
2017-08-15 21:53:48 +02:00
|
|
|
user_profile = get_user_profile_by_id(user_profile_id)
|
2017-08-16 06:59:38 +02:00
|
|
|
|
2013-10-21 23:25:53 +02:00
|
|
|
# Convert from epoch seconds to a datetime object.
|
2017-02-26 04:44:22 +01:00
|
|
|
cutoff_date = datetime.datetime.fromtimestamp(int(cutoff), tz=pytz.utc)
|
2013-10-21 23:25:53 +02:00
|
|
|
|
2017-05-04 23:37:01 +02:00
|
|
|
context = common_context(user_profile)
|
2016-11-08 10:07:47 +01:00
|
|
|
|
2013-10-21 23:25:53 +02:00
|
|
|
# Start building email template data.
|
2017-05-04 23:37:01 +02:00
|
|
|
context.update({
|
2013-12-02 01:39:10 +01:00
|
|
|
'unsubscribe_link': one_click_unsubscribe_link(user_profile, "digest")
|
2017-01-24 06:34:26 +01:00
|
|
|
})
|
2013-10-21 23:25:53 +02:00
|
|
|
|
2019-03-03 07:14:58 +01:00
|
|
|
home_view_streams = Subscription.objects.filter(
|
2019-03-03 07:09:32 +01:00
|
|
|
user_profile=user_profile,
|
2019-03-03 07:14:58 +01:00
|
|
|
recipient__type=Recipient.STREAM,
|
2019-03-03 07:09:32 +01:00
|
|
|
active=True,
|
2018-08-02 23:46:05 +02:00
|
|
|
is_muted=False).values_list('recipient__type_id', flat=True)
|
2013-10-21 23:25:53 +02:00
|
|
|
|
2019-03-03 07:14:58 +01:00
|
|
|
if not user_profile.long_term_idle:
|
|
|
|
stream_ids = home_view_streams
|
|
|
|
else:
|
|
|
|
stream_ids = exclude_subscription_modified_streams(user_profile, home_view_streams, cutoff_date)
|
2013-10-21 23:25:53 +02:00
|
|
|
|
2019-03-03 07:14:58 +01:00
|
|
|
# Fetch list of all messages sent after cutoff_date where the user is subscribed
|
|
|
|
messages = Message.objects.filter(
|
|
|
|
recipient__type=Recipient.STREAM,
|
|
|
|
recipient__type_id__in=stream_ids,
|
|
|
|
pub_date__gt=cutoff_date).select_related('recipient', 'sender', 'sending_client')
|
2019-03-03 10:54:21 +01:00
|
|
|
|
2013-10-21 23:25:53 +02:00
|
|
|
# Gather hot conversations.
|
2017-05-04 23:37:01 +02:00
|
|
|
context["hot_conversations"] = gather_hot_conversations(
|
2019-03-03 10:54:21 +01:00
|
|
|
user_profile, messages)
|
2013-10-21 23:25:53 +02:00
|
|
|
|
|
|
|
# Gather new streams.
|
|
|
|
new_streams_count, new_streams = gather_new_streams(
|
2016-06-04 21:50:32 +02:00
|
|
|
user_profile, cutoff_date)
|
2017-05-04 23:37:01 +02:00
|
|
|
context["new_streams"] = new_streams
|
|
|
|
context["new_streams_count"] = new_streams_count
|
2013-10-21 23:25:53 +02:00
|
|
|
|
2019-08-17 11:04:48 +02:00
|
|
|
# TODO: Set has_preheader if we want to include a preheader.
|
|
|
|
|
2018-08-12 22:09:34 +02:00
|
|
|
if render_to_web:
|
|
|
|
return context
|
|
|
|
|
2013-10-21 23:25:53 +02:00
|
|
|
# We don't want to send emails containing almost no information.
|
2019-05-02 05:54:48 +02:00
|
|
|
if enough_traffic(context["hot_conversations"], new_streams_count):
|
2014-01-08 17:33:12 +01:00
|
|
|
logger.info("Sending digest email for %s" % (user_profile.email,))
|
2017-07-02 21:10:41 +02:00
|
|
|
# Send now, as a ScheduledEmail
|
2018-12-03 23:26:51 +01:00
|
|
|
send_future_email('zerver/emails/digest', user_profile.realm, to_user_ids=[user_profile.id],
|
2019-01-10 00:10:44 +01:00
|
|
|
from_name="Zulip Digest", from_address=FromAddress.NOREPLY, context=context)
|
2018-08-12 22:09:34 +02:00
|
|
|
return None
|
2019-03-03 07:14:58 +01:00
|
|
|
|
|
|
|
def exclude_subscription_modified_streams(user_profile: UserProfile,
|
|
|
|
stream_ids: List[int],
|
|
|
|
cutoff_date: datetime.datetime) -> List[int]:
|
|
|
|
"""Exclude streams from given list where users' subscription was modified."""
|
|
|
|
|
|
|
|
events = [
|
|
|
|
RealmAuditLog.SUBSCRIPTION_CREATED,
|
|
|
|
RealmAuditLog.SUBSCRIPTION_ACTIVATED,
|
|
|
|
RealmAuditLog.SUBSCRIPTION_DEACTIVATED
|
|
|
|
]
|
|
|
|
|
|
|
|
# Streams where the user's subscription was changed
|
|
|
|
modified_streams = RealmAuditLog.objects.filter(
|
|
|
|
realm=user_profile.realm,
|
|
|
|
modified_user=user_profile,
|
|
|
|
event_time__gt=cutoff_date,
|
|
|
|
event_type__in=events).values_list('modified_stream_id', flat=True)
|
|
|
|
|
|
|
|
return list(set(stream_ids) - set(modified_streams))
|