From 4e7fce60eefe448768c041c45664668df6c2b78f Mon Sep 17 00:00:00 2001 From: Steve Howell Date: Thu, 14 Sep 2017 10:47:22 -0700 Subject: [PATCH] Add possible_mentions() to speed up rendering. We now triage message content for possible mentions before going to the cache/DB to get name info. This will create an extra data hop for messages with mentions, but it will save a fairly expensive cache lookup for most messages. (This will be especially helpful for large realms.) [Note that we need a subsequent commit to actually make the speedup happen here, since avatars also cause us to look up all users in the realm.] --- zerver/lib/bugdown/__init__.py | 43 ++++++++++++++++++++++++++++++++-- zerver/lib/mention.py | 23 +++++++++++++++++- zerver/tests/test_bugdown.py | 20 +++++++++++++++- 3 files changed, 82 insertions(+), 4 deletions(-) diff --git a/zerver/lib/bugdown/__init__.py b/zerver/lib/bugdown/__init__.py index 32c754b52d..a4a0d0f5a4 100644 --- a/zerver/lib/bugdown/__init__.py +++ b/zerver/lib/bugdown/__init__.py @@ -3,6 +3,7 @@ import subprocess # Zulip's main markdown implementation. See docs/markdown.md for # detailed documentation on our markdown syntax. from typing import Any, Callable, Dict, Iterable, List, Optional, Set, Text, Tuple, TypeVar, Union +from mypy_extensions import TypedDict from typing.re import Match import markdown @@ -16,6 +17,7 @@ import html import twitter import platform import time +import functools import httplib2 import itertools import ujson @@ -29,11 +31,13 @@ import requests from django.core import mail from django.conf import settings +from django.db.models import Q from markdown.extensions import codehilite from zerver.lib.bugdown import fenced_code from zerver.lib.bugdown.fenced_code import FENCE_RE from zerver.lib.camo import get_camo_url +from zerver.lib.mention import possible_mentions from zerver.lib.timeout import timeout, TimeoutExpired from zerver.lib.cache import ( cache_with_key, cache_get_many, cache_set_many, NotFoundInCache) @@ -56,6 +60,12 @@ from zerver.lib.tex import render_tex import six from six.moves import range, html_parser +FullNameInfo = TypedDict('FullNameInfo', { + 'id': int, + 'email': Text, + 'full_name': Text, +}) + # Format version of the bugdown rendering; stored along with rendered # messages so that we can efficiently determine what needs to be re-rendered version = 1 @@ -1118,7 +1128,7 @@ class UserMentionPattern(markdown.inlinepatterns.Pattern): name = match wildcard = mention.user_mention_matches_wildcard(name) - user = db_data['full_names'].get(name.lower(), None) + user = db_data['full_name_info'].get(name.lower(), None) if wildcard: current_message.mentions_wildcard = True @@ -1465,6 +1475,32 @@ def log_bugdown_error(msg): could cause an infinite exception loop.""" logging.getLogger('').error(msg) +def get_full_name_info(realm_id, full_names): + # type: (int, Set[Text]) -> Dict[Text, FullNameInfo] + if not full_names: + return dict() + + q_list = { + Q(full_name__iexact=full_name) + for full_name in full_names + } + + rows = UserProfile.objects.filter( + realm_id=realm_id + ).filter( + functools.reduce(lambda a, b: a | b, q_list), + ).values( + 'id', + 'full_name', + 'email', + ) + + dct = { + row['full_name'].lower(): row + for row in rows + } + return dct + def do_convert(content, message=None, message_realm=None, possible_words=None, sent_by_bot=False): # type: (Text, Optional[Message], Optional[Realm], Optional[Set[Text]], Optional[bool]) -> Text """Convert Markdown to HTML, with Zulip-specific settings and hacks.""" @@ -1511,9 +1547,12 @@ def do_convert(content, message=None, message_realm=None, possible_words=None, s if possible_words is None: possible_words = set() # Set[Text] + full_names = possible_mentions(content) + full_name_info = get_full_name_info(message_realm.id, full_names) + db_data = {'possible_words': possible_words, - 'full_names': dict((user['full_name'].lower(), user) for user in realm_users), 'by_email': dict((user['email'].lower(), user) for user in realm_users), + 'full_name_info': full_name_info, 'emoji': message_realm.get_emoji(), 'sent_by_bot': sent_by_bot, 'stream_names': dict((stream['name'], stream) for stream in realm_streams)} diff --git a/zerver/lib/mention.py b/zerver/lib/mention.py index 80e19e4e1f..ec74df33f7 100644 --- a/zerver/lib/mention.py +++ b/zerver/lib/mention.py @@ -1,6 +1,9 @@ from __future__ import absolute_import -from typing import Text +from typing import Optional, Set, Text + +import re + # Match multi-word string between @** ** or match any one-word # sequences after @ find_mentions = r'(? bool return mention in wildcards + +def extract_name(s): + # type: (Text) -> Optional[Text] + if s.startswith("**") and s.endswith("**"): + name = s[2:-2] + if name in wildcards: + return None + return name + + # We don't care about @all or @everyone + return None + +def possible_mentions(content): + # type: (Text) -> Set[Text] + matches = re.findall(find_mentions, content) + names = {extract_name(match) for match in matches} + names = {name for name in names if name} + return names diff --git a/zerver/tests/test_bugdown.py b/zerver/tests/test_bugdown.py index 9ffc6b8aad..76ce6bc1a6 100644 --- a/zerver/tests/test_bugdown.py +++ b/zerver/tests/test_bugdown.py @@ -13,6 +13,7 @@ from zerver.lib.actions import ( from zerver.lib.alert_words import alert_words_in_realm from zerver.lib.camo import get_camo_url from zerver.lib.emoji import get_emoji_url +from zerver.lib.mention import possible_mentions from zerver.lib.message import render_markdown from zerver.lib.request import ( JsonableError, @@ -44,7 +45,7 @@ import six from six.moves import urllib from zerver.lib.str_utils import NonBinaryStr -from typing import Any, AnyStr, Dict, List, Optional, Tuple, Text +from typing import Any, AnyStr, Dict, List, Optional, Set, Tuple, Text class FencedBlockPreprocessorTest(TestCase): def test_simple_quoting(self): @@ -740,6 +741,22 @@ class BugdownTest(ZulipTestCase): '@King Hamlet

' % (self.example_email("hamlet"), user_id)) self.assertEqual(msg.mentions_user_ids, set([user_profile.id])) + def test_possible_mentions(self): + # type: () -> None + def assert_mentions(content, names): + # type: (Text, Set[Text]) -> None + self.assertEqual(possible_mentions(content), names) + + assert_mentions('', set()) + assert_mentions('boring', set()) + assert_mentions('@all', set()) + assert_mentions('smush@**steve**smush', set()) + + assert_mentions( + 'Hello @**King Hamlet** and @**Cordelia Lear**\n@**Foo van Barson** @**all**', + {'King Hamlet', 'Cordelia Lear', 'Foo van Barson'} + ) + def test_mention_multiple(self): # type: () -> None sender_user_profile = self.example_user('othello') @@ -748,6 +765,7 @@ class BugdownTest(ZulipTestCase): msg = Message(sender=sender_user_profile, sending_client=get_client("test")) content = "@**King Hamlet** and @**Cordelia Lear**, check this out" + self.assertEqual(render_markdown(msg, content), '

' '