zulip/zerver/lib/email_notifications.py

from typing import Any, Dict, Iterable, List, Optional, Tuple, Union

from confirmation.models import one_click_unsubscribe_link
from django.conf import settings
from django.utils.timezone import now as timezone_now
from django.contrib.auth import get_backends
from django_auth_ldap.backend import LDAPBackend

from zerver.decorator import statsd_increment
from zerver.lib.message import bulk_access_messages
from zerver.lib.queue import queue_json_publish
from zerver.lib.send_email import send_future_email, FromAddress
from zerver.lib.url_encoding import personal_narrow_url, huddle_narrow_url, \
    stream_narrow_url, topic_narrow_url
from zerver.models import (
    Recipient,
    UserMessage,
    Stream,
    get_display_recipient,
    UserProfile,
    get_user_profile_by_id,
    receives_offline_email_notifications,
    get_context_for_message,
    Message,
)

from datetime import timedelta
from email.utils import formataddr
import html2text
from lxml.cssselect import CSSSelector
import lxml.html
import re
from collections import defaultdict
import pytz
from bs4 import BeautifulSoup

def relative_to_full_url(base_url: str, content: str) -> str:
    # Convert relative URLs to absolute URLs.
    fragment = lxml.html.fromstring(content)

    # We handle narrow URLs separately because of two reasons:
    # 1: 'lxml' seems to be having an issue in dealing with URLs that begin
    # `#` due to which it doesn't add a `/` before joining the base_url to
    # the relative URL.
    # 2: We also need to update the title attribute in the narrow links which
    # is not possible with `make_links_absolute()`.
    for link_info in fragment.iterlinks():
        elem, attrib, link, pos = link_info
        match = re.match("/?#narrow/", link)
        if match is not None:
            link = re.sub(r"^/?#narrow/", base_url + "/#narrow/", link)
            elem.set(attrib, link)
            # Only manually linked narrow URLs have title attribute set.
            if elem.get('title') is not None:
                elem.set('title', link)

    # Inline images can't be displayed in the emails as the request
    # from the mail server can't be authenticated because it has no
    # user_profile object linked to it. So we scrub the inline image
    # container.
    inline_image_containers = fragment.find_class("message_inline_image")
    for container in inline_image_containers:
        container.drop_tree()

    # The previous block handles most inline images, but for messages
    # where the entire markdown input was just the URL of an image
    # (i.e. the entire body is a message_inline_image object), the
    # entire message body will be that image element; here, we need a
    # more drastic edit to the content.
    if fragment.get('class') == 'message_inline_image':
        content_template = '<p><a href="%s" target="_blank" title="%s">%s</a></p>'
        image_link = fragment.find('a').get('href')
        image_title = fragment.find('a').get('title')
        new_content = (content_template % (image_link, image_title, image_link))
        fragment = lxml.html.fromstring(new_content)

    fragment.make_links_absolute(base_url)
    content = lxml.html.tostring(fragment).decode("utf-8")

    return content

def fix_emojis(content: str, base_url: str, emojiset: str) -> str:
    def make_emoji_img_elem(emoji_span_elem: CSSSelector) -> Dict[str, Any]:
        # Convert the emoji spans to img tags.
        classes = emoji_span_elem.get('class')
        match = re.search(r'emoji-(?P<emoji_code>\S+)', classes)
        # re.search is capable of returning None,
        # but since the parent function should only be called with a valid css element
        # we assert that it does not.
        assert match is not None
        emoji_code = match.group('emoji_code')
        emoji_name = emoji_span_elem.get('title')
        alt_code = emoji_span_elem.text
        image_url = base_url + '/static/generated/emoji/images-%(emojiset)s-64/%(emoji_code)s.png' % {
            'emojiset': emojiset,
            'emoji_code': emoji_code
        }
        img_elem = lxml.html.fromstring(
            '<img alt="%(alt_code)s" src="%(image_url)s" title="%(title)s">' % {
                'alt_code': alt_code,
                'image_url': image_url,
                'title': emoji_name,
            })
        img_elem.set('style', 'height: 20px;')
        img_elem.tail = emoji_span_elem.tail
        return img_elem

    fragment = lxml.html.fromstring(content)
    for elem in fragment.cssselect('span.emoji'):
        parent = elem.getparent()
        img_elem = make_emoji_img_elem(elem)
        parent.replace(elem, img_elem)

    for realm_emoji in fragment.cssselect('.emoji'):
        del realm_emoji.attrib['class']
        realm_emoji.set('style', 'height: 20px;')

    content = lxml.html.tostring(fragment).decode('utf-8')
    return content

def build_message_list(user_profile: UserProfile, messages: List[Message]) -> List[Dict[str, Any]]:
    """
    Builds the message list object for the missed message email template.
    The messages are collapsed into per-recipient and per-sender blocks, like
    our web interface
    """
    messages_to_render = []  # type: List[Dict[str, Any]]

    def sender_string(message: Message) -> str:
        if message.recipient.type in (Recipient.STREAM, Recipient.HUDDLE):
            return message.sender.full_name
        else:
            return ''

    def fix_plaintext_image_urls(content: str) -> str:
        # Replace image URLs in plaintext content of the form
        #     [image name](image url)
        # with a simple hyperlink.
        return re.sub(r"\[(\S*)\]\((\S*)\)", r"\2", content)

    def append_sender_to_message(message_plain: str, message_html: str, sender: str) -> Tuple[str, str]:
        message_plain = "{}: {}".format(sender, message_plain)
        message_soup = BeautifulSoup(message_html, "html.parser")
        sender_name_soup = BeautifulSoup("<b>{}</b>: ".format(sender), "html.parser")
        first_tag = message_soup.find()
        if first_tag.name == "p":
            first_tag.insert(0, sender_name_soup)
        else:
            message_soup.insert(0, sender_name_soup)
        return message_plain, str(message_soup)

    def build_message_payload(message: Message, sender: Optional[str]=None) -> Dict[str, str]:
        plain = message.content
        plain = fix_plaintext_image_urls(plain)
        # There's a small chance of colliding with non-Zulip URLs containing
        # "/user_uploads/", but we don't have much information about the
        # structure of the URL to leverage. We can't use `relative_to_full_url()`
        # function here because it uses a stricter regex which will not work for
        # plain text.
        plain = re.sub(
            r"/user_uploads/(\S*)",
            user_profile.realm.uri + r"/user_uploads/\1", plain)

        assert message.rendered_content is not None
        html = message.rendered_content
        html = relative_to_full_url(user_profile.realm.uri, html)
        html = fix_emojis(html, user_profile.realm.uri, user_profile.emojiset)
        if sender:
            plain, html = append_sender_to_message(plain, html, sender)
        return {'plain': plain, 'html': html}

    def build_sender_payload(message: Message) -> Dict[str, Any]:
        sender = sender_string(message)
        return {'sender': sender,
                'content': [build_message_payload(message, sender)]}

    def message_header(user_profile: UserProfile, message: Message) -> Dict[str, Any]:
        if message.recipient.type == Recipient.PERSONAL:
            narrow_link = get_narrow_url(user_profile, message)
            header = "You and %s" % (message.sender.full_name,)
            header_html = "<a style='color: #ffffff;' href='%s'>%s</a>" % (narrow_link, header)
        elif message.recipient.type == Recipient.HUDDLE:
            display_recipient = get_display_recipient(message.recipient)
            assert not isinstance(display_recipient, str)
            narrow_link = get_narrow_url(user_profile, message,
                                         display_recipient=display_recipient)
            other_recipients = [r['full_name'] for r in display_recipient
                                if r['id'] != user_profile.id]
            header = "You and %s" % (", ".join(other_recipients),)
            header_html = "<a style='color: #ffffff;' href='%s'>%s</a>" % (narrow_link, header)
        else:
            stream = Stream.objects.only('id', 'name').get(id=message.recipient.type_id)
            narrow_link = get_narrow_url(user_profile, message, stream=stream)
            header = "%s > %s" % (stream.name, message.topic_name())
            stream_link = stream_narrow_url(user_profile.realm, stream)
            header_html = "<a href='%s'>%s</a> > <a href='%s'>%s</a>" % (
                stream_link, stream.name, narrow_link, message.topic_name())
        return {"plain": header,
                "html": header_html,
                "stream_message": message.recipient.type_name() == "stream"}

    # # Collapse message list to
    # [
    #    {
    #       "header": {
    #                   "plain":"header",
    #                   "html":"htmlheader"
    #                 }
    #       "senders":[
    #          {
    #             "sender":"sender_name",
    #             "content":[
    #                {
    #                   "plain":"content",
    #                   "html":"htmlcontent"
    #                }
    #                {
    #                   "plain":"content",
    #                   "html":"htmlcontent"
    #                }
    #             ]
    #          }
    #       ]
    #    },
    # ]

    messages.sort(key=lambda message: message.pub_date)

    for message in messages:
        header = message_header(user_profile, message)

        # If we want to collapse into the previous recipient block
        if len(messages_to_render) > 0 and messages_to_render[-1]['header'] == header:
            sender = sender_string(message)
            sender_block = messages_to_render[-1]['senders']

            # Same message sender, collapse again
            if sender_block[-1]['sender'] == sender:
                sender_block[-1]['content'].append(build_message_payload(message))
            else:
                # Start a new sender block
                sender_block.append(build_sender_payload(message))
        else:
            # New recipient and sender block
            recipient_block = {'header': header,
                               'senders': [build_sender_payload(message)]}

            messages_to_render.append(recipient_block)

    return messages_to_render

def get_narrow_url(user_profile: UserProfile, message: Message,
                   display_recipient: Optional[Union[str, List[Dict[str, Any]]]]=None,
                   stream: Optional[Stream]=None) -> str:
    """The display_recipient and stream arguments are optional.  If not
    provided, we'll compute them from the message; they exist as a
    performance optimization for cases where the caller needs those
    data too.
    """
    if message.recipient.type == Recipient.PERSONAL:
        assert stream is None
        assert display_recipient is None
        return personal_narrow_url(
            realm=user_profile.realm,
            sender=message.sender,
        )
    elif message.recipient.type == Recipient.HUDDLE:
        assert stream is None
        if display_recipient is None:
            display_recipient = get_display_recipient(message.recipient)
        assert display_recipient is not None
        assert not isinstance(display_recipient, str)
        other_user_ids = [r['id'] for r in display_recipient
                          if r['id'] != user_profile.id]
        return huddle_narrow_url(
            realm=user_profile.realm,
            other_user_ids=other_user_ids,
        )
    else:
        assert display_recipient is None
        if stream is None:
            stream = Stream.objects.only('id', 'name').get(id=message.recipient.type_id)
        return topic_narrow_url(user_profile.realm, stream, message.topic_name())

def message_content_allowed_in_missedmessage_emails(user_profile: UserProfile) -> bool:
    return user_profile.realm.message_content_allowed_in_email_notifications and \
        user_profile.message_content_in_email_notifications

@statsd_increment("missed_message_reminders")
def do_send_missedmessage_events_reply_in_zulip(user_profile: UserProfile,
                                                missed_messages: List[Dict[str, Any]],
                                                message_count: int) -> None:
    """
    Send a reminder email to a user if she's missed some PMs by being offline.

    The email will have its reply to address set to a limited used email
    address that will send a zulip message to the correct recipient. This
    allows the user to respond to missed PMs, huddles, and @-mentions directly
    from the email.

    `user_profile` is the user to send the reminder to
    `missed_messages` is a list of dictionaries to Message objects and other data
                      for a group of messages that share a recipient (and topic)
    """
    from zerver.context_processors import common_context
    # Disabled missedmessage emails internally
    if not user_profile.enable_offline_email_notifications:
        return

    recipients = set((msg['message'].recipient_id, msg['message'].topic_name()) for msg in missed_messages)
    if len(recipients) != 1:
        raise ValueError(
            'All missed_messages must have the same recipient and topic %r' %
            (recipients,)
        )

    # This link is no longer a part of the email, but keeping the code in case
    # we find a clean way to add it back in the future
    unsubscribe_link = one_click_unsubscribe_link(user_profile, "missed_messages")
    context = common_context(user_profile)
    context.update({
        'name': user_profile.full_name,
        'message_count': message_count,
        'unsubscribe_link': unsubscribe_link,
        'realm_name_in_notifications': user_profile.realm_name_in_notifications,
        'show_message_content': message_content_allowed_in_missedmessage_emails(user_profile)
    })

    triggers = list(message['trigger'] for message in missed_messages)
    unique_triggers = set(triggers)
    context.update({
        'mention': 'mentioned' in unique_triggers,
        'stream_email_notify': 'stream_email_notify' in unique_triggers,
        'mention_count': triggers.count('mentioned'),
    })

    # If this setting (email mirroring integration) is enabled, only then
    # can users reply to email to send message to Zulip. Thus, one must
    # ensure to display warning in the template.
    if settings.EMAIL_GATEWAY_PATTERN:
        context.update({
            'reply_to_zulip': True,
        })
    else:
        context.update({
            'reply_to_zulip': False,
        })

    from zerver.lib.email_mirror import create_missed_message_address
    reply_to_address = create_missed_message_address(user_profile, missed_messages[0]['message'])
    if reply_to_address == FromAddress.NOREPLY:
        reply_to_name = None
    else:
        reply_to_name = "Zulip"

    narrow_url = get_narrow_url(user_profile, missed_messages[0]['message'])
    context.update({
        'narrow_url': narrow_url,
    })

    senders = list(set(m['message'].sender for m in missed_messages))
    if (missed_messages[0]['message'].recipient.type == Recipient.HUDDLE):
        display_recipient = get_display_recipient(missed_messages[0]['message'].recipient)
        # Make sure that this is a list of strings, not a string.
        assert not isinstance(display_recipient, str)
        other_recipients = [r['full_name'] for r in display_recipient
                            if r['id'] != user_profile.id]
        context.update({'group_pm': True})
        if len(other_recipients) == 2:
            huddle_display_name = " and ".join(other_recipients)
            context.update({'huddle_display_name': huddle_display_name})
        elif len(other_recipients) == 3:
            huddle_display_name = "%s, %s, and %s" % (
                other_recipients[0], other_recipients[1], other_recipients[2])
            context.update({'huddle_display_name': huddle_display_name})
        else:
            huddle_display_name = "%s, and %s others" % (
                ', '.join(other_recipients[:2]), len(other_recipients) - 2)
            context.update({'huddle_display_name': huddle_display_name})
    elif (missed_messages[0]['message'].recipient.type == Recipient.PERSONAL):
        context.update({'private_message': True})
    elif (context['mention'] or context['stream_email_notify']):
        # Keep only the senders who actually mentioned the user
        if context['mention']:
            senders = list(set(m['message'].sender for m in missed_messages
                           if m['trigger'] == 'mentioned'))
            # TODO: When we add wildcard mentions that send emails, we
            # should make sure the right logic applies here.

        message = missed_messages[0]['message']
        stream = Stream.objects.only('id', 'name').get(id=message.recipient.type_id)
        stream_header = "%s > %s" % (stream.name, message.topic_name())
        context.update({
            'stream_header': stream_header,
        })
    else:
        raise AssertionError("Invalid messages!")

    # If message content is disabled, then flush all information we pass to email.
    if not message_content_allowed_in_missedmessage_emails(user_profile):
        context.update({
            'reply_to_zulip': False,
            'messages': [],
            'sender_str': "",
            'realm_str': user_profile.realm.name,
            'huddle_display_name': "",
        })
    else:
        context.update({
            'messages': build_message_list(user_profile, list(m['message'] for m in missed_messages)),
            'sender_str': ", ".join(sender.full_name for sender in senders),
            'realm_str': user_profile.realm.name,
        })

    from_name = "Zulip missed messages"  # type: str
    from_address = FromAddress.NOREPLY
    if len(senders) == 1 and settings.SEND_MISSED_MESSAGE_EMAILS_AS_USER:
        # If this setting is enabled, you can reply to the Zulip
        # missed message emails directly back to the original sender.
        # However, one must ensure the Zulip server is in the SPF
        # record for the domain, or there will be spam/deliverability
        # problems.
        sender = senders[0]
        from_name, from_address = (sender.full_name, sender.email)
        context.update({
            'reply_to_zulip': False,
        })

    email_dict = {
        'template_prefix': 'zerver/emails/missed_message',
        'to_user_ids': [user_profile.id],
        'from_name': from_name,
        'from_address': from_address,
        'reply_to_email': formataddr((reply_to_name, reply_to_address)),
        'context': context}
    queue_json_publish("email_senders", email_dict)

    user_profile.last_reminder = timezone_now()
    user_profile.save(update_fields=['last_reminder'])

def handle_missedmessage_emails(user_profile_id: int,
                                missed_email_events: Iterable[Dict[str, Any]]) -> None:
    message_ids = {event.get('message_id'): event.get('trigger') for event in missed_email_events}

    user_profile = get_user_profile_by_id(user_profile_id)
    if not receives_offline_email_notifications(user_profile):
        return

    # Note: This query structure automatically filters out any
    # messages that were permanently deleted, since those would now be
    # in the ArchivedMessage table, not the Message table.
    messages = Message.objects.filter(usermessage__user_profile_id=user_profile,
                                      id__in=message_ids,
                                      usermessage__flags=~UserMessage.flags.read)

    # Cancel missed-message emails for deleted messages
    messages = [um for um in messages if um.content != "(deleted)"]

    if not messages:
        return

    # We bucket messages by tuples that identify similar messages.
    # For streams it's recipient_id and topic.
    # For PMs it's recipient id and sender.
    messages_by_bucket = defaultdict(list)  # type: Dict[Tuple[int, str], List[Message]]
    for msg in messages:
        if msg.recipient.type == Recipient.PERSONAL:
            # For PM's group using (recipient, sender).
            messages_by_bucket[(msg.recipient_id, msg.sender_id)].append(msg)
        else:
            messages_by_bucket[(msg.recipient_id, msg.topic_name())].append(msg)

    message_count_by_bucket = {
        bucket_tup: len(msgs)
        for bucket_tup, msgs in messages_by_bucket.items()
    }

    for msg_list in messages_by_bucket.values():
        msg = min(msg_list, key=lambda msg: msg.pub_date)
        if msg.is_stream_message():
            context_messages = get_context_for_message(msg)
            filtered_context_messages = bulk_access_messages(user_profile, context_messages)
            msg_list.extend(filtered_context_messages)

    # Sort emails by least recently-active discussion.
    bucket_tups = []  # type: List[Tuple[Tuple[int, str], int]]
    for bucket_tup, msg_list in messages_by_bucket.items():
        max_message_id = max(msg_list, key=lambda msg: msg.id).id
        bucket_tups.append((bucket_tup, max_message_id))

    bucket_tups = sorted(bucket_tups, key=lambda x: x[1])

    # Send an email per bucket.
    for bucket_tup, ignored_max_id in bucket_tups:
        unique_messages = {}
        for m in messages_by_bucket[bucket_tup]:
            unique_messages[m.id] = dict(
                message=m,
                trigger=message_ids.get(m.id)
            )
        do_send_missedmessage_events_reply_in_zulip(
            user_profile,
            list(unique_messages.values()),
            message_count_by_bucket[bucket_tup],
        )

def log_digest_event(msg: str) -> None:
    import logging
    import time
    logging.Formatter.converter = time.gmtime
    logging.basicConfig(filename=settings.DIGEST_LOG_PATH, level=logging.INFO)
    logging.info(msg)

def followup_day2_email_delay(user: UserProfile) -> timedelta:
    days_to_delay = 2
    user_tz = user.timezone
    if user_tz == '':
        user_tz = 'UTC'
    signup_day = user.date_joined.astimezone(pytz.timezone(user_tz)).isoweekday()
    if signup_day == 5:
        # If the day is Friday then delay should be till Monday
        days_to_delay = 3
    elif signup_day == 4:
        # If the day is Thursday then delay should be till Friday
        days_to_delay = 1

    # The delay should be 1 hour before the above calculated delay as
    # our goal is to maximize the chance that this email is near the top
    # of the user's inbox when the user sits down to deal with their inbox,
    # or comes in while they are dealing with their inbox.
    return timedelta(days=days_to_delay, hours=-1)

def enqueue_welcome_emails(user: UserProfile, realm_creation: bool=False) -> None:
    from zerver.context_processors import common_context
    if settings.WELCOME_EMAIL_SENDER is not None:
        # line break to avoid triggering lint rule
        from_name = settings.WELCOME_EMAIL_SENDER['name']
        from_address = settings.WELCOME_EMAIL_SENDER['email']
    else:
        from_name = None
        from_address = FromAddress.SUPPORT

    other_account_count = UserProfile.objects.filter(
        delivery_email__iexact=user.delivery_email).exclude(id=user.id).count()
    unsubscribe_link = one_click_unsubscribe_link(user, "welcome")
    context = common_context(user)
    context.update({
        'unsubscribe_link': unsubscribe_link,
        'keyboard_shortcuts_link': user.realm.uri + '/help/keyboard-shortcuts',
        'realm_name': user.realm.name,
        'realm_creation': realm_creation,
        'email': user.email,
        'is_realm_admin': user.is_realm_admin,
    })
    if user.is_realm_admin:
        context['getting_started_link'] = (user.realm.uri +
                                           '/help/getting-your-organization-started-with-zulip')
    else:
        context['getting_started_link'] = "https://zulipchat.com"

    from zproject.backends import email_belongs_to_ldap

    if email_belongs_to_ldap(user.realm, user.email):
        context["ldap"] = True
        if settings.LDAP_APPEND_DOMAIN:
            for backend in get_backends():
                if isinstance(backend, LDAPBackend):
                    context["ldap_username"] = backend.django_to_ldap_username(user.email)
        elif not settings.LDAP_EMAIL_ATTR:
            context["ldap_username"] = user.email

    send_future_email(
        "zerver/emails/followup_day1", user.realm, to_user_ids=[user.id], from_name=from_name,
        from_address=from_address, context=context)

    if other_account_count == 0:
        send_future_email(
            "zerver/emails/followup_day2", user.realm, to_user_ids=[user.id], from_name=from_name,
            from_address=from_address, context=context, delay=followup_day2_email_delay(user))

def convert_html_to_markdown(html: str) -> str:
    parser = html2text.HTML2Text()
    markdown = parser.handle(html).strip()

    # We want images to get linked and inline previewed, but html2text will turn
    # them into links of the form `![](http://foo.com/image.png)`, which is
    # ugly. Run a regex over the resulting description, turning links of the
    # form `![](http://foo.com/image.png?12345)` into
    # `[image.png](http://foo.com/image.png)`.
    return re.sub("!\\[\\]\\((\\S*)/(\\S*)\\?(\\S*)\\)",
                  "[\\2](\\1/\\2)", markdown)