Cache only one row per message.

Before this change, we populated two cache entries for each message that we sent. The entries were largely redundant, with the only difference being whether we sent the content as raw markdown or as the rendered HTML. This commit makes it so we only have one cache entry per message, and it includes both content and rendered_content. One legacy source on confusion here is that `content` changes meaning when you're on the front end. Here is the situation going forward: database: content = raw rendered_contented = rendered cache entry: content = raw rendered_contented = rendered payload for the frontend: content = raw (for apply_markdown=False) content = rendered (for apply_markdown=True)
2017-10-20 11:29:49 -07:00 · 2017-10-20 11:29:49 -07:00 · df93a99b50
parent 0cef7c9fd5
commit df93a99b50
8 changed files with 86 additions and 86 deletions
--- a/zerver/lib/actions.py
+++ b/zerver/lib/actions.py
@ -20,7 +20,6 @@ from zerver.lib.addressee import (
 )
 from zerver.lib.cache import (
    delete_user_profile_caches,
-    to_dict_cache_key,
    to_dict_cache_key_id,
 )
 from zerver.lib.context_managers import lockfile
@ -2625,9 +2624,7 @@ def do_rename_stream(stream, new_name, log=True):
    # clearer than trying to set them. display_recipient is the out of
    # date field in all cases.
    cache_delete_many(
-        to_dict_cache_key_id(message.id, True) for message in messages)
-    cache_delete_many(
-        to_dict_cache_key_id(message.id, False) for message in messages)
+        to_dict_cache_key_id(message.id) for message in messages)
    new_email = encode_email_address(stream)

    # We will tell our users to essentially
@ -3193,10 +3190,10 @@ def update_to_dict_cache(changed_messages):
    message_ids = []
    for changed_message in changed_messages:
        message_ids.append(changed_message.id)
-        items_for_remote_cache[to_dict_cache_key(changed_message, True)] = \
-            (MessageDict.to_dict_uncached(changed_message, apply_markdown=True),)
-        items_for_remote_cache[to_dict_cache_key(changed_message, False)] = \
-            (MessageDict.to_dict_uncached(changed_message, apply_markdown=False),)
+        key = to_dict_cache_key_id(changed_message.id)
+        value = MessageDict.to_dict_uncached(changed_message)
+        items_for_remote_cache[key] = (value,)
+
    cache_set_many(items_for_remote_cache)
    return message_ids

--- a/zerver/lib/cache.py
+++ b/zerver/lib/cache.py
@ -449,17 +449,15 @@ def flush_stream(sender, **kwargs):
           Q(default_events_register_stream=stream)).exists():
        cache_delete(bot_dicts_in_realm_cache_key(stream.realm))

-# TODO: Rename to_dict_cache_key_id and to_dict_cache_key
-def to_dict_cache_key_id(message_id, apply_markdown):
-    # type: (int, bool) -> Text
-    return u'message_dict:%d:%d' % (message_id, apply_markdown)
+def to_dict_cache_key_id(message_id):
+    # type: (int) -> Text
+    return u'message_dict:%d' % (message_id,)

-def to_dict_cache_key(message, apply_markdown):
-    # type: (Message, bool) -> Text
-    return to_dict_cache_key_id(message.id, apply_markdown)
+def to_dict_cache_key(message):
+    # type: (Message) -> Text
+    return to_dict_cache_key_id(message.id)

 def flush_message(sender, **kwargs):
    # type: (Any, **Any) -> None
    message = kwargs['instance']
-    cache_delete(to_dict_cache_key(message, False))
-    cache_delete(to_dict_cache_key(message, True))
+    cache_delete(to_dict_cache_key_id(message.id))
--- a/zerver/lib/cache_helpers.py
+++ b/zerver/lib/cache_helpers.py
@ -15,6 +15,7 @@ from zerver.lib.cache import cache_with_key, cache_set, \
    user_profile_by_id_cache_key, \
    user_profile_cache_key, get_remote_cache_time, get_remote_cache_requests, \
    cache_set_many, to_dict_cache_key_id
+from zerver.lib.message import MessageDict
 from importlib import import_module
 from django.contrib.sessions.models import Session
 import logging
@ -33,7 +34,13 @@ def message_fetch_objects():

 def message_cache_items(items_for_remote_cache, message):
    # type: (Dict[Text, Tuple[binary_type]], Message) -> None
-    items_for_remote_cache[to_dict_cache_key_id(message.id, True)] = (message.to_dict_uncached(True),)
+    '''
+    Note: this code is untested, and the caller has been
+    commented out for a while.
+    '''
+    key = to_dict_cache_key_id(message.id)
+    value = MessageDict.to_dict_uncached(message)
+    items_for_remote_cache[key] = (value,)

 def user_cache_items(items_for_remote_cache, user_profile):
    # type: (Dict[Text, Tuple[UserProfile]], UserProfile) -> None
--- a/zerver/lib/message.py
+++ b/zerver/lib/message.py
@ -76,7 +76,7 @@ def stringify_message_dict(message_dict):

 def message_to_dict(message, apply_markdown):
    # type: (Message, bool) -> Dict[str, Any]
-    json = message_to_dict_json(message, apply_markdown)
+    json = message_to_dict_json(message)
    obj = extract_message_dict(json)

    '''
@ -87,25 +87,33 @@ def message_to_dict(message, apply_markdown):

    MessageDict.post_process_dicts(
        [obj],
+        apply_markdown=apply_markdown,
        client_gravatar=client_gravatar,
    )
    return obj

@cache_with_key(to_dict_cache_key, timeout=3600*24)
-def message_to_dict_json(message, apply_markdown):
-    # type: (Message, bool) -> binary_type
-    return MessageDict.to_dict_uncached(message, apply_markdown)
+def message_to_dict_json(message):
+    # type: (Message) -> binary_type
+    return MessageDict.to_dict_uncached(message)

 class MessageDict(object):
    @staticmethod
-    def post_process_dicts(objs, client_gravatar):
-        # type: (List[Dict[str, Any]], bool) -> None
+    def post_process_dicts(objs, apply_markdown, client_gravatar):
+        # type: (List[Dict[str, Any]], bool, bool) -> None
        MessageDict.bulk_hydrate_sender_info(objs)

        for obj in objs:
            MessageDict.hydrate_recipient_info(obj)
            MessageDict.set_sender_avatar(obj, client_gravatar)

+            if apply_markdown:
+                obj['content_type'] = 'text/html'
+                obj['content'] = obj['rendered_content']
+            else:
+                obj['content_type'] = 'text/x-markdown'
+
+            del obj['rendered_content']
            del obj['sender_realm_id']
            del obj['sender_avatar_source']
            del obj['sender_avatar_version']
@ -116,16 +124,15 @@ class MessageDict(object):
            del obj['sender_is_mirror_dummy']

    @staticmethod
-    def to_dict_uncached(message, apply_markdown):
-        # type: (Message, bool) -> binary_type
-        dct = MessageDict.to_dict_uncached_helper(message, apply_markdown)
+    def to_dict_uncached(message):
+        # type: (Message) -> binary_type
+        dct = MessageDict.to_dict_uncached_helper(message)
        return stringify_message_dict(dct)

    @staticmethod
-    def to_dict_uncached_helper(message, apply_markdown):
-        # type: (Message, bool) -> Dict[str, Any]
+    def to_dict_uncached_helper(message):
+        # type: (Message) -> Dict[str, Any]
        return MessageDict.build_message_dict(
-            apply_markdown = apply_markdown,
            message = message,
            message_id = message.id,
            last_edit_time = message.last_edit_time,
@ -175,14 +182,13 @@ class MessageDict(object):
        return sew_messages_and_reactions(messages, reactions)

    @staticmethod
-    def build_dict_from_raw_db_row(row, apply_markdown):
-        # type: (Dict[str, Any], bool) -> Dict[str, Any]
+    def build_dict_from_raw_db_row(row):
+        # type: (Dict[str, Any]) -> Dict[str, Any]
        '''
        row is a row from a .values() call, and it needs to have
        all the relevant fields populated
        '''
        return MessageDict.build_message_dict(
-            apply_markdown = apply_markdown,
            message = None,
            message_id = row['id'],
            last_edit_time = row['last_edit_time'],
@ -203,7 +209,6 @@ class MessageDict(object):

    @staticmethod
    def build_message_dict(
-            apply_markdown,
            message,
            message_id,
            last_edit_time,
@ -221,11 +226,12 @@ class MessageDict(object):
            recipient_type_id,
            reactions
    ):
-        # type: (bool, Optional[Message], int, Optional[datetime.datetime], Optional[Text], Text, Text, datetime.datetime, Optional[Text], Optional[int], int, int, Text, int, int, int, List[Dict[str, Any]]) -> Dict[str, Any]
+        # type: (Optional[Message], int, Optional[datetime.datetime], Optional[Text], Text, Text, datetime.datetime, Optional[Text], Optional[int], int, int, Text, int, int, int, List[Dict[str, Any]]) -> Dict[str, Any]

        obj = dict(
            id                = message_id,
            sender_id         = sender_id,
+            content           = content,
            recipient_type_id = recipient_type_id,
            recipient_type    = recipient_type,
            recipient_id      = recipient_id,
@ -248,38 +254,32 @@ class MessageDict(object):
            assert edit_history is not None
            obj['edit_history'] = ujson.loads(edit_history)

-        if apply_markdown:
-            if Message.need_to_render_content(rendered_content, rendered_content_version, bugdown.version):
-                if message is None:
-                    # We really shouldn't be rendering objects in this method, but there is
-                    # a scenario where we upgrade the version of bugdown and fail to run
-                    # management commands to re-render historical messages, and then we
-                    # need to have side effects.  This method is optimized to not need full
-                    # blown ORM objects, but the bugdown renderer is unfortunately highly
-                    # coupled to Message, and we also need to persist the new rendered content.
-                    # If we don't have a message object passed in, we get one here.  The cost
-                    # of going to the DB here should be overshadowed by the cost of rendering
-                    # and updating the row.
-                    # TODO: see #1379 to eliminate bugdown dependencies
-                    message = Message.objects.select_related().get(id=message_id)
+        if Message.need_to_render_content(rendered_content, rendered_content_version, bugdown.version):
+            if message is None:
+                # We really shouldn't be rendering objects in this method, but there is
+                # a scenario where we upgrade the version of bugdown and fail to run
+                # management commands to re-render historical messages, and then we
+                # need to have side effects.  This method is optimized to not need full
+                # blown ORM objects, but the bugdown renderer is unfortunately highly
+                # coupled to Message, and we also need to persist the new rendered content.
+                # If we don't have a message object passed in, we get one here.  The cost
+                # of going to the DB here should be overshadowed by the cost of rendering
+                # and updating the row.
+                # TODO: see #1379 to eliminate bugdown dependencies
+                message = Message.objects.select_related().get(id=message_id)

-                assert message is not None  # Hint for mypy.
-                # It's unfortunate that we need to have side effects on the message
-                # in some cases.
-                rendered_content = render_markdown(message, content, realm=message.get_realm())
-                message.rendered_content = rendered_content
-                message.rendered_content_version = bugdown.version
-                message.save_rendered_content()
+            assert message is not None  # Hint for mypy.
+            # It's unfortunate that we need to have side effects on the message
+            # in some cases.
+            rendered_content = render_markdown(message, content, realm=message.get_realm())
+            message.rendered_content = rendered_content
+            message.rendered_content_version = bugdown.version
+            message.save_rendered_content()

-            if rendered_content is not None:
-                obj['content'] = rendered_content
-            else:
-                obj['content'] = u'<p>[Zulip note: Sorry, we could not understand the formatting of your message]</p>'
-
-            obj['content_type'] = 'text/html'
+        if rendered_content is not None:
+            obj['rendered_content'] = rendered_content
        else:
-            obj['content'] = content
-            obj['content_type'] = 'text/x-markdown'
+            obj['rendered_content'] = u'<p>[Zulip note: Sorry, we could not understand the formatting of your message]</p>'

        if rendered_content is not None:
            obj['is_me_message'] = Message.is_status_message(content, rendered_content)
--- a/zerver/management/commands/merge_streams.py
+++ b/zerver/management/commands/merge_streams.py
@ -13,9 +13,7 @@ def bulk_delete_cache_keys(message_ids_to_clear: List[int]) -> None:
    while len(message_ids_to_clear) > 0:
        batch = message_ids_to_clear[0:5000]

-        keys_to_delete = [to_dict_cache_key_id(message_id, True) for message_id in batch]
-        cache_delete_many(keys_to_delete)
-        keys_to_delete = [to_dict_cache_key_id(message_id, False) for message_id in batch]
+        keys_to_delete = [to_dict_cache_key_id(message_id) for message_id in batch]
        cache_delete_many(keys_to_delete)

        message_ids_to_clear = message_ids_to_clear[5000:]
--- a/zerver/tests/test_messages.py
+++ b/zerver/tests/test_messages.py
@ -575,8 +575,8 @@ class StreamMessagesTest(ZulipTestCase):
                          content="whatever", subject="my topic")
        message = most_recent_message(user_profile)
        row = MessageDict.get_raw_db_rows([message.id])[0]
-        dct = MessageDict.build_dict_from_raw_db_row(row, apply_markdown=True)
-        MessageDict.post_process_dicts([dct], client_gravatar=False)
+        dct = MessageDict.build_dict_from_raw_db_row(row)
+        MessageDict.post_process_dicts([dct], apply_markdown=True, client_gravatar=False)
        self.assertEqual(dct['display_recipient'], 'Denmark')

        stream = get_stream('Denmark', user_profile.realm)
@ -760,6 +760,8 @@ class MessageDictTest(ZulipTestCase):
                    recipient=recipient,
                    subject='whatever',
                    content='whatever %d' % i,
+                    rendered_content='DOES NOT MATTER',
+                    rendered_content_version=bugdown.version,
                    pub_date=timezone_now(),
                    sending_client=sending_client,
                    last_edit_time=timezone_now(),
@ -780,10 +782,10 @@ class MessageDictTest(ZulipTestCase):
            rows = list(MessageDict.get_raw_db_rows(ids))

            objs = [
-                MessageDict.build_dict_from_raw_db_row(row, False)
+                MessageDict.build_dict_from_raw_db_row(row)
                for row in rows
            ]
-            MessageDict.post_process_dicts(objs, client_gravatar=False)
+            MessageDict.post_process_dicts(objs, apply_markdown=False, client_gravatar=False)

        delay = time.time() - t
        # Make sure we don't take longer than 1.5ms per message to
@ -816,9 +818,9 @@ class MessageDictTest(ZulipTestCase):
        # An important part of this test is to get the message through this exact code path,
        # because there is an ugly hack we need to cover.  So don't just say "row = message".
        row = MessageDict.get_raw_db_rows([message.id])[0]
-        dct = MessageDict.build_dict_from_raw_db_row(row, apply_markdown=True)
+        dct = MessageDict.build_dict_from_raw_db_row(row)
        expected_content = '<p>hello <strong>world</strong></p>'
-        self.assertEqual(dct['content'], expected_content)
+        self.assertEqual(dct['rendered_content'], expected_content)
        message = Message.objects.get(id=message.id)
        self.assertEqual(message.rendered_content, expected_content)
        self.assertEqual(message.rendered_content_version, bugdown.version)
@ -847,9 +849,9 @@ class MessageDictTest(ZulipTestCase):
        # An important part of this test is to get the message through this exact code path,
        # because there is an ugly hack we need to cover.  So don't just say "row = message".
        row = MessageDict.get_raw_db_rows([message.id])[0]
-        dct = MessageDict.build_dict_from_raw_db_row(row, apply_markdown=True)
+        dct = MessageDict.build_dict_from_raw_db_row(row)
        error_content = '<p>[Zulip note: Sorry, we could not understand the formatting of your message]</p>'
-        self.assertEqual(dct['content'], error_content)
+        self.assertEqual(dct['rendered_content'], error_content)

    def test_reaction(self):
        # type: () -> None
@ -873,8 +875,7 @@ class MessageDictTest(ZulipTestCase):
            message=message, user_profile=sender,
            emoji_name='simple_smile')
        row = MessageDict.get_raw_db_rows([message.id])[0]
-        msg_dict = MessageDict.build_dict_from_raw_db_row(
-            row, apply_markdown=True)
+        msg_dict = MessageDict.build_dict_from_raw_db_row(row)
        self.assertEqual(msg_dict['reactions'][0]['emoji_name'],
                         reaction.emoji_name)
        self.assertEqual(msg_dict['reactions'][0]['user']['id'],
@ -1350,9 +1351,9 @@ class EditMessageTest(ZulipTestCase):
    def check_message(self, msg_id, subject=None, content=None):
        # type: (int, Optional[Text], Optional[Text]) -> Message
        msg = Message.objects.get(id=msg_id)
-        cached = message_to_dict(msg, False)
-        uncached = MessageDict.to_dict_uncached_helper(msg, False)
-        MessageDict.post_process_dicts([uncached], client_gravatar=False)
+        cached = message_to_dict(msg, apply_markdown=False)
+        uncached = MessageDict.to_dict_uncached_helper(msg)
+        MessageDict.post_process_dicts([uncached], apply_markdown=False, client_gravatar=False)
        self.assertEqual(cached, uncached)
        if subject:
            self.assertEqual(msg.topic_name(), subject)
--- a/zerver/tests/test_narrow.py
+++ b/zerver/tests/test_narrow.py
@ -14,7 +14,7 @@ from zerver.models import (
    Reaction, UserMessage
 )
 from zerver.lib.message import (
-    MessageDict,
+    message_to_dict,
 )
 from zerver.lib.narrow import (
    build_narrow_filter,
@ -1219,8 +1219,7 @@ class GetOldMessagesTest(ZulipTestCase):
        m = self.get_last_message()
        m.rendered_content = m.rendered_content_version = None
        m.content = 'test content'
-        # Use to_dict_uncached_helper directly to avoid having to deal with remote cache
-        d = MessageDict.to_dict_uncached_helper(m, True)
+        d = message_to_dict(m, True)
        self.assertEqual(d['content'], '<p>test content</p>')

    def common_check_get_messages_query(self, query_params, expected):
--- a/zerver/views/messages.py
+++ b/zerver/views/messages.py
@ -737,10 +737,10 @@ def get_messages_backend(request, user_profile,
                search_fields[message_id] = get_search_fields(rendered_content, subject,
                                                              content_matches, subject_matches)

-    cache_transformer = lambda row: MessageDict.build_dict_from_raw_db_row(row, apply_markdown)
+    cache_transformer = MessageDict.build_dict_from_raw_db_row
    id_fetcher = lambda row: row['id']

-    message_dicts = generic_bulk_cached_fetch(lambda message_id: to_dict_cache_key_id(message_id, apply_markdown),
+    message_dicts = generic_bulk_cached_fetch(to_dict_cache_key_id,
                                              MessageDict.get_raw_db_rows,
                                              message_ids,
                                              id_fetcher=id_fetcher,
@ -760,7 +760,7 @@ def get_messages_backend(request, user_profile,
            del msg_dict["edit_history"]
        message_list.append(msg_dict)

-    MessageDict.post_process_dicts(message_list, client_gravatar)
+    MessageDict.post_process_dicts(message_list, apply_markdown, client_gravatar)

    statsd.incr('loaded_old_messages', len(message_list))
    ret = {'messages': message_list,