zulip/zerver/lib/digest.py

from __future__ import absolute_import

from collections import defaultdict
import datetime

from django.db.models import Q
from django.template import loader
from django.conf import settings

from zerver.lib.notifications import build_message_list, hashchange_encode, \
    send_future_email, one_click_unsubscribe_link
from zerver.models import UserProfile, UserMessage, Recipient, Stream, \
    Subscription, get_active_streams

import logging

log_format = "%(asctime)s: %(message)s"
logging.basicConfig(format=log_format)

formatter = logging.Formatter(log_format)
file_handler = logging.FileHandler(settings.DIGEST_LOG_PATH)
file_handler.setFormatter(formatter)

logger = logging.getLogger(__name__)
logger.setLevel(logging.DEBUG)
logger.addHandler(file_handler)

# Digests accumulate 4 types of interesting traffic for a user:
# 1. Missed PMs
# 2. New streams
# 3. New users
# 4. Interesting stream traffic, as determined by the longest and most
#    diversely comment upon topics.

def gather_hot_conversations(user_profile, stream_messages):
    # Gather stream conversations of 2 types:
    # 1. long conversations
    # 2. conversations where many different people participated
    #
    # Returns a list of dictionaries containing the templating
    # information for each hot conversation.

    conversation_length = defaultdict(int)
    conversation_diversity = defaultdict(set)
    for user_message in stream_messages:
        if not user_message.message.sent_by_human():
            # Don't include automated messages in the count.
            continue

        key = (user_message.message.recipient.type_id,
               user_message.message.subject)
        conversation_diversity[key].add(
            user_message.message.sender.full_name)
        conversation_length[key] += 1

    diversity_list = conversation_diversity.items()
    diversity_list.sort(key=lambda entry: len(entry[1]), reverse=True)

    length_list = conversation_length.items()
    length_list.sort(key=lambda entry: entry[1], reverse=True)

    # Get up to the 4 best conversations from the diversity list
    # and length list, filtering out overlapping conversations.
    hot_conversations = [elt[0] for elt in diversity_list[:2]]
    for candidate, _ in length_list:
        if candidate not in hot_conversations:
            hot_conversations.append(candidate)
        if len(hot_conversations) >= 4:
            break

    # There was so much overlap between the diversity and length lists that we
    # still have < 4 conversations. Try to use remaining diversity items to pad
    # out the hot conversations.
    num_convos = len(hot_conversations)
    if num_convos < 4:
        hot_conversations.extend([elt[0] for elt in diversity_list[num_convos:4]])

    hot_conversation_render_payloads = []
    for h in hot_conversations:
        stream_id, subject = h
        users = list(conversation_diversity[h])
        count = conversation_length[h]

        # We'll display up to 2 messages from the conversation.
        first_few_messages = [user_message.message for user_message in \
                                  stream_messages.filter(
                message__recipient__type_id=stream_id,
                message__subject=subject)[:2]]

        teaser_data = {"participants": users,
                       "count": count - len(first_few_messages),
                       "first_few_messages": build_message_list(
                user_profile, first_few_messages)}

        hot_conversation_render_payloads.append(teaser_data)
    return hot_conversation_render_payloads

def gather_new_users(user_profile, threshold):
    # Gather information on users in the realm who have recently
    # joined.
    if user_profile.realm.domain == "mit.edu":
        new_users = []
    else:
        new_users = list(UserProfile.objects.filter(
                realm=user_profile.realm, date_joined__gt=threshold,
                is_bot=False))
    user_names = [user.full_name for user in new_users]

    return len(user_names), user_names

def gather_new_streams(user_profile, threshold):
    if user_profile.realm.domain == "mit.edu":
        new_streams = []
    else:
        new_streams = list(get_active_streams(user_profile.realm).filter(
                invite_only=False, date_created__gt=threshold))

    base_url = "https://%s/#narrow/stream/" % (settings.EXTERNAL_HOST,)

    streams_html = []
    streams_plain = []

    for stream in new_streams:
        narrow_url = base_url + hashchange_encode(stream.name)
        stream_link = "<a href='%s'>%s</a>" % (narrow_url, stream.name)
        streams_html.append(stream_link)
        streams_plain.append(stream.name)

    return len(new_streams), {"html": streams_html, "plain": streams_plain}

def enough_traffic(unread_pms, hot_conversations, new_streams, new_users):
    if unread_pms or hot_conversations:
        # If you have any unread traffic, good enough.
        return True
    if new_streams and new_users:
        # If you somehow don't have any traffic but your realm did get
        # new streams and users, good enough.
        return True
    return False

def send_digest_email(user_profile, html_content, text_content):
    recipients = [{'email': user_profile.email, 'name': user_profile.full_name}]
    subject = "While you've been gone - Zulip"
    sender = {'email': settings.NOREPLY_EMAIL_ADDRESS, 'name': 'Zulip'}

    # Send now, through Mandrill.
    send_future_email(recipients, html_content, text_content, subject,
                      delay=datetime.timedelta(0), sender=sender,
                      tags=["digest-emails"])

def handle_digest_email(user_profile_id, cutoff):
    user_profile=UserProfile.objects.get(id=user_profile_id)
    # Convert from epoch seconds to a datetime object.
    cutoff = datetime.datetime.utcfromtimestamp(int(cutoff))

    all_messages = UserMessage.objects.filter(
        user_profile=user_profile,
        message__pub_date__gt=cutoff).order_by("message__pub_date")

    # Start building email template data.
    template_payload = {
        'name': user_profile.full_name,
        'external_host': settings.EXTERNAL_HOST,
        'unsubscribe_link': one_click_unsubscribe_link(user_profile, "digest")
        }

    # Gather recent missed PMs, re-using the missed PM email logic.
    # You can't have an unread message that you sent, but when testing
    # this causes confusion so filter your messages out.
    pms = all_messages.filter(
        ~Q(message__recipient__type=Recipient.STREAM) & \
             ~Q(message__sender=user_profile))

    # Show up to 4 missed PMs.
    pms_limit = 4

    template_payload['unread_pms'] = build_message_list(
        user_profile, [pm.message for pm in pms[:pms_limit]])
    template_payload['remaining_unread_pms_count'] = min(0, len(pms) - pms_limit)

    home_view_recipients = [sub.recipient for sub in \
                                Subscription.objects.filter(
            user_profile=user_profile, active=True, in_home_view=True)]

    stream_messages = all_messages.filter(
        message__recipient__type=Recipient.STREAM,
        message__recipient__in=home_view_recipients)

    # Gather hot conversations.
    template_payload["hot_conversations"] = gather_hot_conversations(
        user_profile, stream_messages)

    # Gather new streams.
    new_streams_count, new_streams = gather_new_streams(
        user_profile, cutoff)
    template_payload["new_streams"] = new_streams
    template_payload["new_streams_count"] = new_streams_count

    # Gather users who signed up recently.
    new_users_count, new_users = gather_new_users(
        user_profile, cutoff)
    template_payload["new_users"] = new_users

    text_content = loader.render_to_string(
        'zerver/emails/digest/digest_email.txt', template_payload)
    html_content = loader.render_to_string(
        'zerver/emails/digest/digest_email_html.txt', template_payload)

    # We don't want to send emails containing almost no information.
    if enough_traffic(template_payload["unread_pms"],
                      template_payload["hot_conversations"],
                      new_streams_count, new_users_count):
        logger.info("Sending digest email for %s" % (user_profile.email,))
        send_digest_email(user_profile, html_content, text_content)
digest: Add functions used to compute interesting traffic. So far, we gather: "hot conversations", missed PMs, new streams, and new users. (imported from commit c3c723d0426cb55bd0e43917c67f93db9052f9ed) 2013-10-21 23:25:53 +02:00			`from __future__ import absolute_import`

			`from collections import defaultdict`
			`import datetime`

			`from django.db.models import Q`
			`from django.template import loader`
Don't use hardcoded noreply@zulip.com, zulip@zulip.com, or https://zulip.com (imported from commit 1132553b63ae23ebcca746f0f65205b97bfee7dc) 2013-11-16 00:54:12 +01:00			`from django.conf import settings`
digest: Add functions used to compute interesting traffic. So far, we gather: "hot conversations", missed PMs, new streams, and new users. (imported from commit c3c723d0426cb55bd0e43917c67f93db9052f9ed) 2013-10-21 23:25:53 +02:00
Split out zerver/lib/notifications.py from actions.py. (imported from commit 784b82834ee4fcb4431e77f8fb1c526f8eec82ad) 2014-01-24 22:29:17 +01:00			`from zerver.lib.notifications import build_message_list, hashchange_encode, \`
Add unsubscribe links to digest e-mails. (imported from commit 4e2a324decf4bc694752cc24b9085361338a08a5) 2013-12-02 01:39:10 +01:00			`send_future_email, one_click_unsubscribe_link`
digest: Add functions used to compute interesting traffic. So far, we gather: "hot conversations", missed PMs, new streams, and new users. (imported from commit c3c723d0426cb55bd0e43917c67f93db9052f9ed) 2013-10-21 23:25:53 +02:00			`from zerver.models import UserProfile, UserMessage, Recipient, Stream, \`
Don't expose deactivated streams to users through clients or API. (imported from commit c32715255b3286f52fb313d35659f9357082603a) 2014-01-24 23:30:53 +01:00			`Subscription, get_active_streams`
digest: Add functions used to compute interesting traffic. So far, we gather: "hot conversations", missed PMs, new streams, and new users. (imported from commit c3c723d0426cb55bd0e43917c67f93db9052f9ed) 2013-10-21 23:25:53 +02:00
Log the actual sending of digest emails in addition to queuing possible ones. (imported from commit e43ac59e71620fe715db462347db228d5a950aed) 2014-01-08 17:33:12 +01:00			`import logging`

			`log_format = "%(asctime)s: %(message)s"`
			`logging.basicConfig(format=log_format)`

			`formatter = logging.Formatter(log_format)`
			`file_handler = logging.FileHandler(settings.DIGEST_LOG_PATH)`
			`file_handler.setFormatter(formatter)`

			`logger = logging.getLogger(__name__)`
			`logger.setLevel(logging.DEBUG)`
			`logger.addHandler(file_handler)`

digest: Add functions used to compute interesting traffic. So far, we gather: "hot conversations", missed PMs, new streams, and new users. (imported from commit c3c723d0426cb55bd0e43917c67f93db9052f9ed) 2013-10-21 23:25:53 +02:00			`# Digests accumulate 4 types of interesting traffic for a user:`
			`# 1. Missed PMs`
			`# 2. New streams`
			`# 3. New users`
			`# 4. Interesting stream traffic, as determined by the longest and most`
			`# diversely comment upon topics.`

			`def gather_hot_conversations(user_profile, stream_messages):`
			`# Gather stream conversations of 2 types:`
			`# 1. long conversations`
			`# 2. conversations where many different people participated`
			`#`
			`# Returns a list of dictionaries containing the templating`
			`# information for each hot conversation.`

			`conversation_length = defaultdict(int)`
			`conversation_diversity = defaultdict(set)`
			`for user_message in stream_messages:`
digest: don't include automated messages in hot conversation accounting. (imported from commit ab8db46c644b3cf66a4bc654d204d870b8fae91f) 2013-12-31 22:45:21 +01:00			`if not user_message.message.sent_by_human():`
			`# Don't include automated messages in the count.`
			`continue`

digest: Add functions used to compute interesting traffic. So far, we gather: "hot conversations", missed PMs, new streams, and new users. (imported from commit c3c723d0426cb55bd0e43917c67f93db9052f9ed) 2013-10-21 23:25:53 +02:00			`key = (user_message.message.recipient.type_id,`
			`user_message.message.subject)`
			`conversation_diversity[key].add(`
			`user_message.message.sender.full_name)`
			`conversation_length[key] += 1`

			`diversity_list = conversation_diversity.items()`
			`diversity_list.sort(key=lambda entry: len(entry[1]), reverse=True)`

			`length_list = conversation_length.items()`
			`length_list.sort(key=lambda entry: entry[1], reverse=True)`

			`# Get up to the 4 best conversations from the diversity list`
			`# and length list, filtering out overlapping conversations.`
			`hot_conversations = [elt[0] for elt in diversity_list[:2]]`
			`for candidate, _ in length_list:`
			`if candidate not in hot_conversations:`
			`hot_conversations.append(candidate)`
			`if len(hot_conversations) >= 4:`
			`break`

digest: Always pad out to 4 conversations if possible. (imported from commit 1b31a472db5a22a017e461ba846524226641ade1) 2013-12-13 20:18:44 +01:00			`# There was so much overlap between the diversity and length lists that we`
			`# still have < 4 conversations. Try to use remaining diversity items to pad`
			`# out the hot conversations.`
			`num_convos = len(hot_conversations)`
			`if num_convos < 4:`
			`hot_conversations.extend([elt[0] for elt in diversity_list[num_convos:4]])`

digest: Add functions used to compute interesting traffic. So far, we gather: "hot conversations", missed PMs, new streams, and new users. (imported from commit c3c723d0426cb55bd0e43917c67f93db9052f9ed) 2013-10-21 23:25:53 +02:00			`hot_conversation_render_payloads = []`
			`for h in hot_conversations:`
			`stream_id, subject = h`
			`users = list(conversation_diversity[h])`
			`count = conversation_length[h]`

			`# We'll display up to 2 messages from the conversation.`
			`first_few_messages = [user_message.message for user_message in \`
			`stream_messages.filter(`
			`message__recipient__type_id=stream_id,`
			`message__subject=subject)[:2]]`

digest emails: use a template filter to format new users, streams, etc. (imported from commit e61a3dd6753db82da1b30c50e5bbf6273f302645) 2013-10-25 18:53:35 +02:00			`teaser_data = {"participants": users,`
digest: Add functions used to compute interesting traffic. So far, we gather: "hot conversations", missed PMs, new streams, and new users. (imported from commit c3c723d0426cb55bd0e43917c67f93db9052f9ed) 2013-10-21 23:25:53 +02:00			`"count": count - len(first_few_messages),`
			`"first_few_messages": build_message_list(`
			`user_profile, first_few_messages)}`

			`hot_conversation_render_payloads.append(teaser_data)`
digest: Fix erroneous early return giving us only 1 hot conversation. (imported from commit db1588bf98778ef838edd6de88fb1ba750251eae) 2013-12-13 20:26:44 +01:00			`return hot_conversation_render_payloads`
digest: Add functions used to compute interesting traffic. So far, we gather: "hot conversations", missed PMs, new streams, and new users. (imported from commit c3c723d0426cb55bd0e43917c67f93db9052f9ed) 2013-10-21 23:25:53 +02:00
			`def gather_new_users(user_profile, threshold):`
			`# Gather information on users in the realm who have recently`
			`# joined.`
digest: don't show new users for MIT. (imported from commit b7453cb30e6fcf5b36a5e3e53f67faeb17e74048) 2013-12-16 22:26:04 +01:00			`if user_profile.realm.domain == "mit.edu":`
			`new_users = []`
			`else:`
			`new_users = list(UserProfile.objects.filter(`
			`realm=user_profile.realm, date_joined__gt=threshold,`
			`is_bot=False))`
digest emails: use a template filter to format new users, streams, etc. (imported from commit e61a3dd6753db82da1b30c50e5bbf6273f302645) 2013-10-25 18:53:35 +02:00			`user_names = [user.full_name for user in new_users]`
digest: Add functions used to compute interesting traffic. So far, we gather: "hot conversations", missed PMs, new streams, and new users. (imported from commit c3c723d0426cb55bd0e43917c67f93db9052f9ed) 2013-10-21 23:25:53 +02:00
digest emails: use a template filter to format new users, streams, etc. (imported from commit e61a3dd6753db82da1b30c50e5bbf6273f302645) 2013-10-25 18:53:35 +02:00			`return len(user_names), user_names`
digest: Add functions used to compute interesting traffic. So far, we gather: "hot conversations", missed PMs, new streams, and new users. (imported from commit c3c723d0426cb55bd0e43917c67f93db9052f9ed) 2013-10-21 23:25:53 +02:00
			`def gather_new_streams(user_profile, threshold):`
digest: Make it impossible to report new streams for MIT users. We may never enqueue MIT users, but just in case we do, never report new streams. (imported from commit 01885f339d7bbb3c8ee665fd576831206d493b88) 2013-12-13 19:46:47 +01:00			`if user_profile.realm.domain == "mit.edu":`
			`new_streams = []`
			`else:`
Don't expose deactivated streams to users through clients or API. (imported from commit c32715255b3286f52fb313d35659f9357082603a) 2014-01-24 23:30:53 +01:00			`new_streams = list(get_active_streams(user_profile.realm).filter(`
			`invite_only=False, date_created__gt=threshold))`
digest: Add functions used to compute interesting traffic. So far, we gather: "hot conversations", missed PMs, new streams, and new users. (imported from commit c3c723d0426cb55bd0e43917c67f93db9052f9ed) 2013-10-21 23:25:53 +02:00
Don't use hardcoded noreply@zulip.com, zulip@zulip.com, or https://zulip.com (imported from commit 1132553b63ae23ebcca746f0f65205b97bfee7dc) 2013-11-16 00:54:12 +01:00			`base_url = "https://%s/#narrow/stream/" % (settings.EXTERNAL_HOST,)`
digest emails: use a template filter to format new users, streams, etc. (imported from commit e61a3dd6753db82da1b30c50e5bbf6273f302645) 2013-10-25 18:53:35 +02:00
			`streams_html = []`
			`streams_plain = []`

digest: Add functions used to compute interesting traffic. So far, we gather: "hot conversations", missed PMs, new streams, and new users. (imported from commit c3c723d0426cb55bd0e43917c67f93db9052f9ed) 2013-10-21 23:25:53 +02:00			`for stream in new_streams:`
			`narrow_url = base_url + hashchange_encode(stream.name)`
			`stream_link = "<a href='%s'>%s</a>" % (narrow_url, stream.name)`
digest emails: use a template filter to format new users, streams, etc. (imported from commit e61a3dd6753db82da1b30c50e5bbf6273f302645) 2013-10-25 18:53:35 +02:00			`streams_html.append(stream_link)`
			`streams_plain.append(stream.name)`
digest: Add functions used to compute interesting traffic. So far, we gather: "hot conversations", missed PMs, new streams, and new users. (imported from commit c3c723d0426cb55bd0e43917c67f93db9052f9ed) 2013-10-21 23:25:53 +02:00
			`return len(new_streams), {"html": streams_html, "plain": streams_plain}`

			`def enough_traffic(unread_pms, hot_conversations, new_streams, new_users):`
			`if unread_pms or hot_conversations:`
			`# If you have any unread traffic, good enough.`
			`return True`
			`if new_streams and new_users:`
			`# If you somehow don't have any traffic but your realm did get`
			`# new streams and users, good enough.`
			`return True`
			`return False`

digest: refactor sending the emails into a function for easier testing. (imported from commit 2f3c8fb8e812c04ab838b1034ae0811309186625) 2013-12-02 02:35:51 +01:00			`def send_digest_email(user_profile, html_content, text_content):`
			`recipients = [{'email': user_profile.email, 'name': user_profile.full_name}]`
digest: tweak sender information. (imported from commit fafc54d4b9c9cda447c42f2bd24845aee62624c2) 2013-12-18 19:04:40 +01:00			`subject = "While you've been gone - Zulip"`
			`sender = {'email': settings.NOREPLY_EMAIL_ADDRESS, 'name': 'Zulip'}`
digest: refactor sending the emails into a function for easier testing. (imported from commit 2f3c8fb8e812c04ab838b1034ae0811309186625) 2013-12-02 02:35:51 +01:00
			`# Send now, through Mandrill.`
			`send_future_email(recipients, html_content, text_content, subject,`
			`delay=datetime.timedelta(0), sender=sender,`
			`tags=["digest-emails"])`

digest: Add functions used to compute interesting traffic. So far, we gather: "hot conversations", missed PMs, new streams, and new users. (imported from commit c3c723d0426cb55bd0e43917c67f93db9052f9ed) 2013-10-21 23:25:53 +02:00			`def handle_digest_email(user_profile_id, cutoff):`
			`user_profile=UserProfile.objects.get(id=user_profile_id)`
			`# Convert from epoch seconds to a datetime object.`
			`cutoff = datetime.datetime.utcfromtimestamp(int(cutoff))`

			`all_messages = UserMessage.objects.filter(`
			`user_profile=user_profile,`
			`message__pub_date__gt=cutoff).order_by("message__pub_date")`

			`# Start building email template data.`
Add unsubscribe links to digest e-mails. (imported from commit 4e2a324decf4bc694752cc24b9085361338a08a5) 2013-12-02 01:39:10 +01:00			`template_payload = {`
			`'name': user_profile.full_name,`
			`'external_host': settings.EXTERNAL_HOST,`
			`'unsubscribe_link': one_click_unsubscribe_link(user_profile, "digest")`
			`}`
digest: Add functions used to compute interesting traffic. So far, we gather: "hot conversations", missed PMs, new streams, and new users. (imported from commit c3c723d0426cb55bd0e43917c67f93db9052f9ed) 2013-10-21 23:25:53 +02:00
			`# Gather recent missed PMs, re-using the missed PM email logic.`
digest: Don't show PMs sent by you. You can't have unread PMs sent by you, so we weren't explicitly checking this, but when testing locally we often ignore the unread check. Filter PMs sent by you to reduce confusion when testing locally. (imported from commit 0205c4a3ed67790b9d60d4f2b927e4cb9e720bf3) 2013-12-04 23:49:43 +01:00			`# You can't have an unread message that you sent, but when testing`
			`# this causes confusion so filter your messages out.`
			`pms = all_messages.filter(`
			`~Q(message__recipient__type=Recipient.STREAM) & \`
			`~Q(message__sender=user_profile))`
digest: Add functions used to compute interesting traffic. So far, we gather: "hot conversations", missed PMs, new streams, and new users. (imported from commit c3c723d0426cb55bd0e43917c67f93db9052f9ed) 2013-10-21 23:25:53 +02:00
			`# Show up to 4 missed PMs.`
			`pms_limit = 4`

			`template_payload['unread_pms'] = build_message_list(`
			`user_profile, [pm.message for pm in pms[:pms_limit]])`
			`template_payload['remaining_unread_pms_count'] = min(0, len(pms) - pms_limit)`

			`home_view_recipients = [sub.recipient for sub in \`
			`Subscription.objects.filter(`
			`user_profile=user_profile, active=True, in_home_view=True)]`

			`stream_messages = all_messages.filter(`
			`message__recipient__type=Recipient.STREAM,`
			`message__recipient__in=home_view_recipients)`

			`# Gather hot conversations.`
			`template_payload["hot_conversations"] = gather_hot_conversations(`
			`user_profile, stream_messages)`

			`# Gather new streams.`
			`new_streams_count, new_streams = gather_new_streams(`
			`user_profile, cutoff)`
			`template_payload["new_streams"] = new_streams`
			`template_payload["new_streams_count"] = new_streams_count`

			`# Gather users who signed up recently.`
			`new_users_count, new_users = gather_new_users(`
			`user_profile, cutoff)`
			`template_payload["new_users"] = new_users`

			`text_content = loader.render_to_string(`
			`'zerver/emails/digest/digest_email.txt', template_payload)`
			`html_content = loader.render_to_string(`
			`'zerver/emails/digest/digest_email_html.txt', template_payload)`

			`# We don't want to send emails containing almost no information.`
			`if enough_traffic(template_payload["unread_pms"],`
			`template_payload["hot_conversations"],`
			`new_streams_count, new_users_count):`
Log the actual sending of digest emails in addition to queuing possible ones. (imported from commit e43ac59e71620fe715db462347db228d5a950aed) 2014-01-08 17:33:12 +01:00			`logger.info("Sending digest email for %s" % (user_profile.email,))`
digest: refactor sending the emails into a function for easier testing. (imported from commit 2f3c8fb8e812c04ab838b1034ae0811309186625) 2013-12-02 02:35:51 +01:00			`send_digest_email(user_profile, html_content, text_content)`