Reduce queries needed for sending messages.

In do_send_messages, we only produce one dictionary for the event queues, instead of different flavors for text vs. html. This prevents two unnecessary queries to the database. It also means we only put one dictionary on the "message" event queue instead of two, albeit a wider one that has some values that won't be sent to the actual clients. This wider dictionary from MessageDict.wide_dict is also used for the `feedback_messages` queue and service bot queues. Since the extra fields are possibly useful down the road, and they'll just be ignored for now, we don't bother to remove them. Also, those queue processors won't have access to `content_type`, which they shouldn't need. Fixes #6947
2017-10-20 12:34:05 -07:00 · 2017-10-20 12:34:05 -07:00 · 635675fe48
parent 9b6a4d0b16
commit 635675fe48
8 changed files with 73 additions and 54 deletions
--- a/zerver/lib/actions.py
+++ b/zerver/lib/actions.py
@ -28,7 +28,6 @@ from zerver.lib.hotspots import get_next_hotspots
 from zerver.lib.message import (
    access_message,
    MessageDict,
-    message_to_dict,
    render_markdown,
 )
 from zerver.lib.realm_icon import realm_icon_url
@ -1098,12 +1097,11 @@ def do_send_messages(messages_maybe_none):
    for message in messages:
        # Deliver events to the real-time push system, as well as
        # enqueuing any additional processing triggered by the message.
-        message_dict_markdown = message_to_dict(message['message'], apply_markdown=True)
-        message_dict_no_markdown = message_to_dict(message['message'], apply_markdown=False)
+        wide_message_dict = MessageDict.wide_dict(message['message'])

        user_flags = user_message_flags.get(message['message'].id, {})
        sender = message['message'].sender
-        message_type = message_dict_no_markdown['type']
+        message_type = wide_message_dict['type']

        presence_idle_user_ids = get_active_presence_idle_user_ids(
            realm=sender.realm,
@ -1116,8 +1114,7 @@ def do_send_messages(messages_maybe_none):
        event = dict(
            type='message',
            message=message['message'].id,
-            message_dict_markdown=message_dict_markdown,
-            message_dict_no_markdown=message_dict_no_markdown,
+            message_dict=wide_message_dict,
            presence_idle_user_ids=presence_idle_user_ids,
        )

@ -1166,7 +1163,7 @@ def do_send_messages(messages_maybe_none):
            if feedback_bot_id in message['active_user_ids']:
                queue_json_publish(
                    'feedback_messages',
-                    message_to_dict(message['message'], apply_markdown=False),
+                    wide_message_dict,
                    lambda x: None
                )

@ -1181,7 +1178,7 @@ def do_send_messages(messages_maybe_none):
                queue_json_publish(
                    queue_name,
                    {
-                        "message": message_to_dict(message['message'], apply_markdown=False),
+                        "message": wide_message_dict,
                        "trigger": event['trigger'],
                        "user_profile_id": event["user_profile_id"],
                        "failed_tries": 0,
--- a/zerver/lib/message.py
+++ b/zerver/lib/message.py
@ -74,30 +74,41 @@ def stringify_message_dict(message_dict):
    # type: (Dict[str, Any]) -> binary_type
    return zlib.compress(force_bytes(ujson.dumps(message_dict)))

-def message_to_dict(message, apply_markdown):
-    # type: (Message, bool) -> Dict[str, Any]
-    json = message_to_dict_json(message)
-    obj = extract_message_dict(json)
-
-    '''
-    In this codepath we do net yet optimize for clients
-    that can compute their own gravatar URLs.
-    '''
-    client_gravatar = False
-
-    MessageDict.post_process_dicts(
-        [obj],
-        apply_markdown=apply_markdown,
-        client_gravatar=client_gravatar,
-    )
-    return obj
-
@cache_with_key(to_dict_cache_key, timeout=3600*24)
 def message_to_dict_json(message):
    # type: (Message) -> binary_type
    return MessageDict.to_dict_uncached(message)

 class MessageDict(object):
+    @staticmethod
+    def wide_dict(message):
+        # type: (Message) -> Dict[str, Any]
+        '''
+        The next two lines get the cachable field related
+        to our message object, with the side effect of
+        populating the cache.
+        '''
+        json = message_to_dict_json(message)
+        obj = extract_message_dict(json)
+
+        '''
+        In this codepath we do net yet optimize for clients
+        that can compute their own gravatar URLs.
+        '''
+        client_gravatar = False
+
+        '''
+        The steps below are similar to what we do in
+        post_process_dicts(), except we don't call finalize_payload(),
+        since that step happens later in the queue
+        processor.
+        '''
+        MessageDict.bulk_hydrate_sender_info([obj])
+        MessageDict.hydrate_recipient_info(obj)
+        MessageDict.set_sender_avatar(obj, client_gravatar)
+
+        return obj
+
    @staticmethod
    def post_process_dicts(objs, apply_markdown, client_gravatar):
        # type: (List[Dict[str, Any]], bool, bool) -> None
--- a/zerver/tests/test_events.py
+++ b/zerver/tests/test_events.py
@ -2109,19 +2109,24 @@ class ClientDescriptorsTest(ZulipTestCase):
        sender_id = hamlet.id

        message_event = dict(
-            message_dict_markdown=dict(
-                id=999,
-                content='<b>hello</b>',
-                sender_id=sender_id,
-                type='stream',
-                client='website',
-            ),
-            message_dict_no_markdown=dict(
+            message_dict=dict(
                id=999,
                content='**hello**',
+                rendered_content='<b>hello</b>',
                sender_id=sender_id,
                type='stream',
                client='website',
+
+                # NOTE: Some of these fields are clutter, but some
+                #       will be useful when we let clients specify
+                #       that they can compute their own gravatar URLs.
+                sender_realm_id=None,
+                sender_avatar_source=None,
+                sender_avatar_version=None,
+                sender_is_mirror_dummy=None,
+                raw_display_recipient=None,
+                recipient_type=None,
+                recipient_type_id=None,
            ),
        )

@ -2142,6 +2147,7 @@ class ClientDescriptorsTest(ZulipTestCase):
                    sender_id=sender_id,
                    id=999,
                    content='<b>hello</b>',
+                    content_type='text/html',
                    client='website',
                ),
                flags=['starred'],
@ -2156,6 +2162,7 @@ class ClientDescriptorsTest(ZulipTestCase):
                    sender_id=sender_id,
                    id=999,
                    content='**hello**',
+                    content_type='text/x-markdown',
                    client='website',
                ),
                flags=['has_alert_word'],
--- a/zerver/tests/test_messages.py
+++ b/zerver/tests/test_messages.py
@ -21,7 +21,6 @@ from zerver.lib.actions import (

 from zerver.lib.message import (
    MessageDict,
-    message_to_dict,
    sew_messages_and_reactions,
 )

@ -560,12 +559,7 @@ class StreamMessagesTest(ZulipTestCase):
        with queries_captured() as queries:
            send_message()

-        '''
-        Part of the reason we have so many queries is that we duplicate
-        a lot of code to generate messages with markdown and without
-        markdown.
-        '''
-        self.assert_length(queries, 15)
+        self.assert_length(queries, 13)

    def test_stream_message_dict(self):
        # type: () -> None
@ -1351,7 +1345,9 @@ class EditMessageTest(ZulipTestCase):
    def check_message(self, msg_id, subject=None, content=None):
        # type: (int, Optional[Text], Optional[Text]) -> Message
        msg = Message.objects.get(id=msg_id)
-        cached = message_to_dict(msg, apply_markdown=False)
+        cached = MessageDict.wide_dict(msg)
+        MessageDict.finalize_payload(cached, apply_markdown=False)
+
        uncached = MessageDict.to_dict_uncached_helper(msg)
        MessageDict.post_process_dicts([uncached], apply_markdown=False, client_gravatar=False)
        self.assertEqual(cached, uncached)
--- a/zerver/tests/test_narrow.py
+++ b/zerver/tests/test_narrow.py
@ -14,7 +14,7 @@ from zerver.models import (
    Reaction, UserMessage
 )
 from zerver.lib.message import (
-    message_to_dict,
+    MessageDict,
 )
 from zerver.lib.narrow import (
    build_narrow_filter,
@ -1219,7 +1219,8 @@ class GetOldMessagesTest(ZulipTestCase):
        m = self.get_last_message()
        m.rendered_content = m.rendered_content_version = None
        m.content = 'test content'
-        d = message_to_dict(m, True)
+        d = MessageDict.wide_dict(m)
+        MessageDict.finalize_payload(d, apply_markdown=True)
        self.assertEqual(d['content'], '<p>test content</p>')

    def common_check_get_messages_query(self, query_params, expected):
--- a/zerver/tests/test_signup.py
+++ b/zerver/tests/test_signup.py
@ -324,7 +324,7 @@ class LoginTest(ZulipTestCase):
        with queries_captured() as queries:
            self.register(self.nonreg_email('test'), "test")
        # Ensure the number of queries we make is not O(streams)
-        self.assert_length(queries, 67)
+        self.assert_length(queries, 65)
        user_profile = self.nonreg_user('test')
        self.assertEqual(get_session_dict_user(self.client.session), user_profile.id)
        self.assertFalse(user_profile.enable_stream_desktop_notifications)
--- a/zerver/tests/test_subs.py
+++ b/zerver/tests/test_subs.py
@ -1629,7 +1629,7 @@ class SubscriptionAPITest(ZulipTestCase):
                    streams_to_sub,
                    dict(principals=ujson.dumps([user1.email, user2.email])),
                )
-        self.assert_length(queries, 42)
+        self.assert_length(queries, 40)

        self.assert_length(events, 7)
        for ev in [x for x in events if x['event']['type'] not in ('message', 'stream')]:
--- a/zerver/tornado/event_queue.py
+++ b/zerver/tornado/event_queue.py
@ -28,6 +28,7 @@ from zerver.tornado.handlers import clear_handler_by_id, get_handler_by_id, \
    finish_handler, handler_stats_string
 from zerver.lib.utils import statsd
 from zerver.middleware import async_request_restart
+from zerver.lib.message import MessageDict
 from zerver.lib.narrow import build_narrow_filter
 from zerver.lib.queue import queue_json_publish
 from zerver.lib.request import JsonableError
@ -783,12 +784,18 @@ def process_message_event(event_template, users):
    send_to_clients = get_client_info_for_message_event(event_template, users)

    presence_idle_user_ids = set(event_template.get('presence_idle_user_ids', []))
-    message_dict_markdown = event_template['message_dict_markdown']  # type: Dict[str, Any]
-    message_dict_no_markdown = event_template['message_dict_no_markdown']  # type: Dict[str, Any]
-    sender_id = message_dict_markdown['sender_id']  # type: int
-    message_id = message_dict_markdown['id']  # type: int
-    message_type = message_dict_markdown['type']  # type: str
-    sending_client = message_dict_markdown['client']  # type: Text
+    message_dict = event_template['message_dict']  # type: Dict[str, Any]
+
+    sender_id = message_dict['sender_id']  # type: int
+    message_id = message_dict['id']  # type: int
+    message_type = message_dict['type']  # type: str
+    sending_client = message_dict['client']  # type: Text
+
+    message_dict_html = copy.deepcopy(message_dict)
+    MessageDict.finalize_payload(message_dict_html, apply_markdown=True)
+
+    message_dict_text = copy.deepcopy(message_dict)
+    MessageDict.finalize_payload(message_dict_text, apply_markdown=False)

    # Extra user-specific data to include
    extra_user_data = {}  # type: Dict[int, Any]
@ -828,9 +835,9 @@ def process_message_event(event_template, users):
            continue

        if client.apply_markdown:
-            message_dict = message_dict_markdown
+            message_dict = message_dict_html
        else:
-            message_dict = message_dict_no_markdown
+            message_dict = message_dict_text

        # Make sure Zephyr mirroring bots know whether stream is invite-only
        if "mirror" in client.client_type_name and event_template.get("invite_only"):