2013-10-21 23:25:53 +02:00
|
|
|
import datetime
|
2017-12-13 01:45:57 +01:00
|
|
|
import logging
|
2020-06-11 00:54:34 +02:00
|
|
|
from collections import defaultdict
|
|
|
|
from typing import Any, Dict, List, Set, Tuple, Union
|
2013-10-21 23:25:53 +02:00
|
|
|
|
2013-11-16 00:54:12 +01:00
|
|
|
from django.conf import settings
|
2017-08-27 16:30:48 +02:00
|
|
|
from django.utils.timezone import now as timezone_now
|
2013-10-21 23:25:53 +02:00
|
|
|
|
2018-11-08 22:40:27 +01:00
|
|
|
from confirmation.models import one_click_unsubscribe_link
|
2016-11-08 10:07:47 +01:00
|
|
|
from zerver.context_processors import common_context
|
2020-06-11 00:54:34 +02:00
|
|
|
from zerver.lib.email_notifications import build_message_list
|
2017-12-13 01:45:57 +01:00
|
|
|
from zerver.lib.logging_util import log_to_file
|
2020-06-11 00:54:34 +02:00
|
|
|
from zerver.lib.queue import queue_json_publish
|
|
|
|
from zerver.lib.send_email import FromAddress, send_future_email
|
|
|
|
from zerver.lib.url_encoding import encode_stream
|
|
|
|
from zerver.models import (
|
|
|
|
Message,
|
|
|
|
Realm,
|
|
|
|
RealmAuditLog,
|
|
|
|
Recipient,
|
|
|
|
Subscription,
|
|
|
|
UserActivity,
|
|
|
|
UserProfile,
|
|
|
|
get_active_streams,
|
|
|
|
get_user_profile_by_id,
|
|
|
|
)
|
2013-10-21 23:25:53 +02:00
|
|
|
|
2017-12-13 01:45:57 +01:00
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
log_to_file(logger, settings.DIGEST_LOG_PATH)
|
2014-01-08 17:33:12 +01:00
|
|
|
|
2017-08-27 16:30:48 +02:00
|
|
|
DIGEST_CUTOFF = 5
|
|
|
|
|
2019-05-02 05:54:48 +02:00
|
|
|
# Digests accumulate 2 types of interesting traffic for a user:
|
|
|
|
# 1. New streams
|
|
|
|
# 2. Interesting stream traffic, as determined by the longest and most
|
2013-10-21 23:25:53 +02:00
|
|
|
# diversely comment upon topics.
|
|
|
|
|
2017-11-05 11:15:10 +01:00
|
|
|
def inactive_since(user_profile: UserProfile, cutoff: datetime.datetime) -> bool:
|
2017-08-25 21:44:28 +02:00
|
|
|
# Hasn't used the app in the last DIGEST_CUTOFF (5) days.
|
|
|
|
most_recent_visit = [row.last_visit for row in
|
|
|
|
UserActivity.objects.filter(
|
|
|
|
user_profile=user_profile)]
|
|
|
|
|
|
|
|
if not most_recent_visit:
|
|
|
|
# This person has never used the app.
|
|
|
|
return True
|
|
|
|
|
|
|
|
last_visit = max(most_recent_visit)
|
|
|
|
return last_visit < cutoff
|
|
|
|
|
2017-11-05 11:15:10 +01:00
|
|
|
def should_process_digest(realm_str: str) -> bool:
|
2017-08-25 21:48:19 +02:00
|
|
|
if realm_str in settings.SYSTEM_ONLY_REALMS:
|
|
|
|
# Don't try to send emails to system-only realms
|
|
|
|
return False
|
|
|
|
return True
|
|
|
|
|
2017-08-25 21:52:47 +02:00
|
|
|
# Changes to this should also be reflected in
|
|
|
|
# zerver/worker/queue_processors.py:DigestWorker.consume()
|
2017-11-05 11:15:10 +01:00
|
|
|
def queue_digest_recipient(user_profile: UserProfile, cutoff: datetime.datetime) -> None:
|
2017-08-25 21:52:47 +02:00
|
|
|
# Convert cutoff to epoch seconds for transit.
|
|
|
|
event = {"user_profile_id": user_profile.id,
|
|
|
|
"cutoff": cutoff.strftime('%s')}
|
2017-11-24 13:18:46 +01:00
|
|
|
queue_json_publish("digest_emails", event)
|
2017-08-25 21:52:47 +02:00
|
|
|
|
2017-11-05 11:15:10 +01:00
|
|
|
def enqueue_emails(cutoff: datetime.datetime) -> None:
|
2018-03-09 00:16:44 +01:00
|
|
|
if not settings.SEND_DIGEST_EMAILS:
|
|
|
|
return
|
|
|
|
|
2019-03-28 04:47:03 +01:00
|
|
|
weekday = timezone_now().weekday()
|
|
|
|
for realm in Realm.objects.filter(deactivated=False, digest_emails_enabled=True, digest_weekday=weekday):
|
2017-08-27 16:30:48 +02:00
|
|
|
if not should_process_digest(realm.string_id):
|
|
|
|
continue
|
|
|
|
|
|
|
|
user_profiles = UserProfile.objects.filter(
|
|
|
|
realm=realm, is_active=True, is_bot=False, enable_digest_emails=True)
|
|
|
|
|
|
|
|
for user_profile in user_profiles:
|
|
|
|
if inactive_since(user_profile, cutoff):
|
|
|
|
queue_digest_recipient(user_profile, cutoff)
|
2020-05-02 08:44:14 +02:00
|
|
|
logger.info(
|
|
|
|
"User %s is inactive, queuing for potential digest",
|
|
|
|
user_profile.id,
|
|
|
|
)
|
2017-08-27 16:30:48 +02:00
|
|
|
|
2019-03-03 10:54:21 +01:00
|
|
|
def gather_hot_conversations(user_profile: UserProfile, messages: List[Message]) -> List[Dict[str, Any]]:
|
2013-10-21 23:25:53 +02:00
|
|
|
# Gather stream conversations of 2 types:
|
|
|
|
# 1. long conversations
|
|
|
|
# 2. conversations where many different people participated
|
|
|
|
#
|
|
|
|
# Returns a list of dictionaries containing the templating
|
|
|
|
# information for each hot conversation.
|
|
|
|
|
python: Convert assignment type annotations to Python 3.6 style.
This commit was split by tabbott; this piece covers the vast majority
of files in Zulip, but excludes scripts/, tools/, and puppet/ to help
ensure we at least show the right error messages for Xenial systems.
We can likely further refine the remaining pieces with some testing.
Generated by com2ann, with whitespace fixes and various manual fixes
for runtime issues:
- invoiced_through: Optional[LicenseLedger] = models.ForeignKey(
+ invoiced_through: Optional["LicenseLedger"] = models.ForeignKey(
-_apns_client: Optional[APNsClient] = None
+_apns_client: Optional["APNsClient"] = None
- notifications_stream: Optional[Stream] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
- signup_notifications_stream: Optional[Stream] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
+ notifications_stream: Optional["Stream"] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
+ signup_notifications_stream: Optional["Stream"] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
- author: Optional[UserProfile] = models.ForeignKey('UserProfile', blank=True, null=True, on_delete=CASCADE)
+ author: Optional["UserProfile"] = models.ForeignKey('UserProfile', blank=True, null=True, on_delete=CASCADE)
- bot_owner: Optional[UserProfile] = models.ForeignKey('self', null=True, on_delete=models.SET_NULL)
+ bot_owner: Optional["UserProfile"] = models.ForeignKey('self', null=True, on_delete=models.SET_NULL)
- default_sending_stream: Optional[Stream] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
- default_events_register_stream: Optional[Stream] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
+ default_sending_stream: Optional["Stream"] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
+ default_events_register_stream: Optional["Stream"] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
-descriptors_by_handler_id: Dict[int, ClientDescriptor] = {}
+descriptors_by_handler_id: Dict[int, "ClientDescriptor"] = {}
-worker_classes: Dict[str, Type[QueueProcessingWorker]] = {}
-queues: Dict[str, Dict[str, Type[QueueProcessingWorker]]] = {}
+worker_classes: Dict[str, Type["QueueProcessingWorker"]] = {}
+queues: Dict[str, Dict[str, Type["QueueProcessingWorker"]]] = {}
-AUTH_LDAP_REVERSE_EMAIL_SEARCH: Optional[LDAPSearch] = None
+AUTH_LDAP_REVERSE_EMAIL_SEARCH: Optional["LDAPSearch"] = None
Signed-off-by: Anders Kaseorg <anders@zulipchat.com>
2020-04-22 01:09:50 +02:00
|
|
|
conversation_length: Dict[Tuple[int, str], int] = defaultdict(int)
|
|
|
|
conversation_messages: Dict[Tuple[int, str], List[Message]] = defaultdict(list)
|
|
|
|
conversation_diversity: Dict[Tuple[int, str], Set[str]] = defaultdict(set)
|
2018-11-11 19:06:59 +01:00
|
|
|
for message in messages:
|
|
|
|
key = (message.recipient.type_id,
|
|
|
|
message.topic_name())
|
|
|
|
|
2018-11-11 18:46:21 +01:00
|
|
|
conversation_messages[key].append(message)
|
|
|
|
|
2018-11-11 19:06:59 +01:00
|
|
|
if not message.sent_by_human():
|
2013-12-31 22:45:21 +01:00
|
|
|
# Don't include automated messages in the count.
|
|
|
|
continue
|
|
|
|
|
2013-10-21 23:25:53 +02:00
|
|
|
conversation_diversity[key].add(
|
2018-11-11 19:06:59 +01:00
|
|
|
message.sender.full_name)
|
2013-10-21 23:25:53 +02:00
|
|
|
conversation_length[key] += 1
|
|
|
|
|
2016-01-25 01:27:18 +01:00
|
|
|
diversity_list = list(conversation_diversity.items())
|
2013-10-21 23:25:53 +02:00
|
|
|
diversity_list.sort(key=lambda entry: len(entry[1]), reverse=True)
|
|
|
|
|
2016-01-25 01:27:18 +01:00
|
|
|
length_list = list(conversation_length.items())
|
2013-10-21 23:25:53 +02:00
|
|
|
length_list.sort(key=lambda entry: entry[1], reverse=True)
|
|
|
|
|
|
|
|
# Get up to the 4 best conversations from the diversity list
|
|
|
|
# and length list, filtering out overlapping conversations.
|
|
|
|
hot_conversations = [elt[0] for elt in diversity_list[:2]]
|
|
|
|
for candidate, _ in length_list:
|
|
|
|
if candidate not in hot_conversations:
|
|
|
|
hot_conversations.append(candidate)
|
|
|
|
if len(hot_conversations) >= 4:
|
|
|
|
break
|
|
|
|
|
2013-12-13 20:18:44 +01:00
|
|
|
# There was so much overlap between the diversity and length lists that we
|
|
|
|
# still have < 4 conversations. Try to use remaining diversity items to pad
|
|
|
|
# out the hot conversations.
|
|
|
|
num_convos = len(hot_conversations)
|
|
|
|
if num_convos < 4:
|
|
|
|
hot_conversations.extend([elt[0] for elt in diversity_list[num_convos:4]])
|
|
|
|
|
2013-10-21 23:25:53 +02:00
|
|
|
hot_conversation_render_payloads = []
|
|
|
|
for h in hot_conversations:
|
|
|
|
users = list(conversation_diversity[h])
|
|
|
|
count = conversation_length[h]
|
2018-11-11 18:46:21 +01:00
|
|
|
messages = conversation_messages[h]
|
2013-10-21 23:25:53 +02:00
|
|
|
|
|
|
|
# We'll display up to 2 messages from the conversation.
|
2018-11-11 18:46:21 +01:00
|
|
|
first_few_messages = messages[:2]
|
2013-10-21 23:25:53 +02:00
|
|
|
|
2013-10-25 18:53:35 +02:00
|
|
|
teaser_data = {"participants": users,
|
2013-10-21 23:25:53 +02:00
|
|
|
"count": count - len(first_few_messages),
|
2016-12-02 08:15:16 +01:00
|
|
|
"first_few_messages": build_message_list(
|
2017-01-24 06:02:39 +01:00
|
|
|
user_profile, first_few_messages)}
|
2013-10-21 23:25:53 +02:00
|
|
|
|
|
|
|
hot_conversation_render_payloads.append(teaser_data)
|
2013-12-13 20:26:44 +01:00
|
|
|
return hot_conversation_render_payloads
|
2013-10-21 23:25:53 +02:00
|
|
|
|
2017-11-05 11:15:10 +01:00
|
|
|
def gather_new_streams(user_profile: UserProfile,
|
2018-05-10 19:13:36 +02:00
|
|
|
threshold: datetime.datetime) -> Tuple[int, Dict[str, List[str]]]:
|
2020-07-30 05:48:04 +02:00
|
|
|
if user_profile.can_access_public_streams():
|
2014-01-24 23:30:53 +01:00
|
|
|
new_streams = list(get_active_streams(user_profile.realm).filter(
|
2017-01-24 07:06:13 +01:00
|
|
|
invite_only=False, date_created__gt=threshold))
|
2018-04-20 20:59:22 +02:00
|
|
|
else:
|
|
|
|
new_streams = []
|
2013-10-21 23:25:53 +02:00
|
|
|
|
2020-06-10 06:41:04 +02:00
|
|
|
base_url = f"{user_profile.realm.uri}/#narrow/stream/"
|
2013-10-25 18:53:35 +02:00
|
|
|
|
|
|
|
streams_html = []
|
|
|
|
streams_plain = []
|
|
|
|
|
2013-10-21 23:25:53 +02:00
|
|
|
for stream in new_streams:
|
2018-02-15 21:02:47 +01:00
|
|
|
narrow_url = base_url + encode_stream(stream.id, stream.name)
|
2020-06-10 06:41:04 +02:00
|
|
|
stream_link = f"<a href='{narrow_url}'>{stream.name}</a>"
|
2013-10-25 18:53:35 +02:00
|
|
|
streams_html.append(stream_link)
|
|
|
|
streams_plain.append(stream.name)
|
2013-10-21 23:25:53 +02:00
|
|
|
|
|
|
|
return len(new_streams), {"html": streams_html, "plain": streams_plain}
|
|
|
|
|
2019-05-02 05:54:48 +02:00
|
|
|
def enough_traffic(hot_conversations: str, new_streams: int) -> bool:
|
|
|
|
return bool(hot_conversations or new_streams)
|
2013-10-21 23:25:53 +02:00
|
|
|
|
2018-08-12 22:09:34 +02:00
|
|
|
def handle_digest_email(user_profile_id: int, cutoff: float,
|
|
|
|
render_to_web: bool = False) -> Union[None, Dict[str, Any]]:
|
2017-08-15 21:53:48 +02:00
|
|
|
user_profile = get_user_profile_by_id(user_profile_id)
|
2017-08-16 06:59:38 +02:00
|
|
|
|
2013-10-21 23:25:53 +02:00
|
|
|
# Convert from epoch seconds to a datetime object.
|
2020-06-05 06:55:20 +02:00
|
|
|
cutoff_date = datetime.datetime.fromtimestamp(int(cutoff), tz=datetime.timezone.utc)
|
2013-10-21 23:25:53 +02:00
|
|
|
|
2017-05-04 23:37:01 +02:00
|
|
|
context = common_context(user_profile)
|
2016-11-08 10:07:47 +01:00
|
|
|
|
2013-10-21 23:25:53 +02:00
|
|
|
# Start building email template data.
|
2017-05-04 23:37:01 +02:00
|
|
|
context.update({
|
python: Use trailing commas consistently.
Automatically generated by the following script, based on the output
of lint with flake8-comma:
import re
import sys
last_filename = None
last_row = None
lines = []
for msg in sys.stdin:
m = re.match(
r"\x1b\[35mflake8 \|\x1b\[0m \x1b\[1;31m(.+):(\d+):(\d+): (\w+)", msg
)
if m:
filename, row_str, col_str, err = m.groups()
row, col = int(row_str), int(col_str)
if filename == last_filename:
assert last_row != row
else:
if last_filename is not None:
with open(last_filename, "w") as f:
f.writelines(lines)
with open(filename) as f:
lines = f.readlines()
last_filename = filename
last_row = row
line = lines[row - 1]
if err in ["C812", "C815"]:
lines[row - 1] = line[: col - 1] + "," + line[col - 1 :]
elif err in ["C819"]:
assert line[col - 2] == ","
lines[row - 1] = line[: col - 2] + line[col - 1 :].lstrip(" ")
if last_filename is not None:
with open(last_filename, "w") as f:
f.writelines(lines)
Signed-off-by: Anders Kaseorg <anders@zulipchat.com>
2020-04-10 05:23:40 +02:00
|
|
|
'unsubscribe_link': one_click_unsubscribe_link(user_profile, "digest"),
|
2017-01-24 06:34:26 +01:00
|
|
|
})
|
2013-10-21 23:25:53 +02:00
|
|
|
|
2019-03-03 07:14:58 +01:00
|
|
|
home_view_streams = Subscription.objects.filter(
|
2019-03-03 07:09:32 +01:00
|
|
|
user_profile=user_profile,
|
2019-03-03 07:14:58 +01:00
|
|
|
recipient__type=Recipient.STREAM,
|
2019-03-03 07:09:32 +01:00
|
|
|
active=True,
|
2018-08-02 23:46:05 +02:00
|
|
|
is_muted=False).values_list('recipient__type_id', flat=True)
|
2013-10-21 23:25:53 +02:00
|
|
|
|
2019-03-03 07:14:58 +01:00
|
|
|
if not user_profile.long_term_idle:
|
|
|
|
stream_ids = home_view_streams
|
|
|
|
else:
|
|
|
|
stream_ids = exclude_subscription_modified_streams(user_profile, home_view_streams, cutoff_date)
|
2013-10-21 23:25:53 +02:00
|
|
|
|
2019-03-03 07:14:58 +01:00
|
|
|
# Fetch list of all messages sent after cutoff_date where the user is subscribed
|
|
|
|
messages = Message.objects.filter(
|
|
|
|
recipient__type=Recipient.STREAM,
|
|
|
|
recipient__type_id__in=stream_ids,
|
2019-08-28 02:43:19 +02:00
|
|
|
date_sent__gt=cutoff_date).select_related('recipient', 'sender', 'sending_client')
|
2019-03-03 10:54:21 +01:00
|
|
|
|
2013-10-21 23:25:53 +02:00
|
|
|
# Gather hot conversations.
|
2017-05-04 23:37:01 +02:00
|
|
|
context["hot_conversations"] = gather_hot_conversations(
|
2019-03-03 10:54:21 +01:00
|
|
|
user_profile, messages)
|
2013-10-21 23:25:53 +02:00
|
|
|
|
|
|
|
# Gather new streams.
|
|
|
|
new_streams_count, new_streams = gather_new_streams(
|
2016-06-04 21:50:32 +02:00
|
|
|
user_profile, cutoff_date)
|
2017-05-04 23:37:01 +02:00
|
|
|
context["new_streams"] = new_streams
|
|
|
|
context["new_streams_count"] = new_streams_count
|
2013-10-21 23:25:53 +02:00
|
|
|
|
2019-08-17 11:04:48 +02:00
|
|
|
# TODO: Set has_preheader if we want to include a preheader.
|
|
|
|
|
2018-08-12 22:09:34 +02:00
|
|
|
if render_to_web:
|
|
|
|
return context
|
|
|
|
|
2013-10-21 23:25:53 +02:00
|
|
|
# We don't want to send emails containing almost no information.
|
2019-05-02 05:54:48 +02:00
|
|
|
if enough_traffic(context["hot_conversations"], new_streams_count):
|
2020-05-02 08:44:14 +02:00
|
|
|
logger.info("Sending digest email for user %s", user_profile.id)
|
2017-07-02 21:10:41 +02:00
|
|
|
# Send now, as a ScheduledEmail
|
2018-12-03 23:26:51 +01:00
|
|
|
send_future_email('zerver/emails/digest', user_profile.realm, to_user_ids=[user_profile.id],
|
2020-03-12 20:28:05 +01:00
|
|
|
from_name="Zulip Digest", from_address=FromAddress.no_reply_placeholder,
|
|
|
|
context=context)
|
2018-08-12 22:09:34 +02:00
|
|
|
return None
|
2019-03-03 07:14:58 +01:00
|
|
|
|
|
|
|
def exclude_subscription_modified_streams(user_profile: UserProfile,
|
|
|
|
stream_ids: List[int],
|
|
|
|
cutoff_date: datetime.datetime) -> List[int]:
|
|
|
|
"""Exclude streams from given list where users' subscription was modified."""
|
|
|
|
|
|
|
|
events = [
|
|
|
|
RealmAuditLog.SUBSCRIPTION_CREATED,
|
|
|
|
RealmAuditLog.SUBSCRIPTION_ACTIVATED,
|
python: Use trailing commas consistently.
Automatically generated by the following script, based on the output
of lint with flake8-comma:
import re
import sys
last_filename = None
last_row = None
lines = []
for msg in sys.stdin:
m = re.match(
r"\x1b\[35mflake8 \|\x1b\[0m \x1b\[1;31m(.+):(\d+):(\d+): (\w+)", msg
)
if m:
filename, row_str, col_str, err = m.groups()
row, col = int(row_str), int(col_str)
if filename == last_filename:
assert last_row != row
else:
if last_filename is not None:
with open(last_filename, "w") as f:
f.writelines(lines)
with open(filename) as f:
lines = f.readlines()
last_filename = filename
last_row = row
line = lines[row - 1]
if err in ["C812", "C815"]:
lines[row - 1] = line[: col - 1] + "," + line[col - 1 :]
elif err in ["C819"]:
assert line[col - 2] == ","
lines[row - 1] = line[: col - 2] + line[col - 1 :].lstrip(" ")
if last_filename is not None:
with open(last_filename, "w") as f:
f.writelines(lines)
Signed-off-by: Anders Kaseorg <anders@zulipchat.com>
2020-04-10 05:23:40 +02:00
|
|
|
RealmAuditLog.SUBSCRIPTION_DEACTIVATED,
|
2019-03-03 07:14:58 +01:00
|
|
|
]
|
|
|
|
|
|
|
|
# Streams where the user's subscription was changed
|
|
|
|
modified_streams = RealmAuditLog.objects.filter(
|
|
|
|
realm=user_profile.realm,
|
|
|
|
modified_user=user_profile,
|
|
|
|
event_time__gt=cutoff_date,
|
|
|
|
event_type__in=events).values_list('modified_stream_id', flat=True)
|
|
|
|
|
|
|
|
return list(set(stream_ids) - set(modified_streams))
|