From df93a99b505c39771bc46dc7809351ec26eebb3f Mon Sep 17 00:00:00 2001 From: Steve Howell Date: Fri, 20 Oct 2017 11:29:49 -0700 Subject: [PATCH] Cache only one row per message. Before this change, we populated two cache entries for each message that we sent. The entries were largely redundant, with the only difference being whether we sent the content as raw markdown or as the rendered HTML. This commit makes it so we only have one cache entry per message, and it includes both content and rendered_content. One legacy source on confusion here is that `content` changes meaning when you're on the front end. Here is the situation going forward: database: content = raw rendered_contented = rendered cache entry: content = raw rendered_contented = rendered payload for the frontend: content = raw (for apply_markdown=False) content = rendered (for apply_markdown=True) --- zerver/lib/actions.py | 13 ++- zerver/lib/cache.py | 16 ++-- zerver/lib/cache_helpers.py | 9 +- zerver/lib/message.py | 92 ++++++++++----------- zerver/management/commands/merge_streams.py | 4 +- zerver/tests/test_messages.py | 27 +++--- zerver/tests/test_narrow.py | 5 +- zerver/views/messages.py | 6 +- 8 files changed, 86 insertions(+), 86 deletions(-) diff --git a/zerver/lib/actions.py b/zerver/lib/actions.py index 37ee732e73..a84a396d20 100644 --- a/zerver/lib/actions.py +++ b/zerver/lib/actions.py @@ -20,7 +20,6 @@ from zerver.lib.addressee import ( ) from zerver.lib.cache import ( delete_user_profile_caches, - to_dict_cache_key, to_dict_cache_key_id, ) from zerver.lib.context_managers import lockfile @@ -2625,9 +2624,7 @@ def do_rename_stream(stream, new_name, log=True): # clearer than trying to set them. display_recipient is the out of # date field in all cases. cache_delete_many( - to_dict_cache_key_id(message.id, True) for message in messages) - cache_delete_many( - to_dict_cache_key_id(message.id, False) for message in messages) + to_dict_cache_key_id(message.id) for message in messages) new_email = encode_email_address(stream) # We will tell our users to essentially @@ -3193,10 +3190,10 @@ def update_to_dict_cache(changed_messages): message_ids = [] for changed_message in changed_messages: message_ids.append(changed_message.id) - items_for_remote_cache[to_dict_cache_key(changed_message, True)] = \ - (MessageDict.to_dict_uncached(changed_message, apply_markdown=True),) - items_for_remote_cache[to_dict_cache_key(changed_message, False)] = \ - (MessageDict.to_dict_uncached(changed_message, apply_markdown=False),) + key = to_dict_cache_key_id(changed_message.id) + value = MessageDict.to_dict_uncached(changed_message) + items_for_remote_cache[key] = (value,) + cache_set_many(items_for_remote_cache) return message_ids diff --git a/zerver/lib/cache.py b/zerver/lib/cache.py index 44e86d8e74..4867e2fc20 100644 --- a/zerver/lib/cache.py +++ b/zerver/lib/cache.py @@ -449,17 +449,15 @@ def flush_stream(sender, **kwargs): Q(default_events_register_stream=stream)).exists(): cache_delete(bot_dicts_in_realm_cache_key(stream.realm)) -# TODO: Rename to_dict_cache_key_id and to_dict_cache_key -def to_dict_cache_key_id(message_id, apply_markdown): - # type: (int, bool) -> Text - return u'message_dict:%d:%d' % (message_id, apply_markdown) +def to_dict_cache_key_id(message_id): + # type: (int) -> Text + return u'message_dict:%d' % (message_id,) -def to_dict_cache_key(message, apply_markdown): - # type: (Message, bool) -> Text - return to_dict_cache_key_id(message.id, apply_markdown) +def to_dict_cache_key(message): + # type: (Message) -> Text + return to_dict_cache_key_id(message.id) def flush_message(sender, **kwargs): # type: (Any, **Any) -> None message = kwargs['instance'] - cache_delete(to_dict_cache_key(message, False)) - cache_delete(to_dict_cache_key(message, True)) + cache_delete(to_dict_cache_key_id(message.id)) diff --git a/zerver/lib/cache_helpers.py b/zerver/lib/cache_helpers.py index cede3ade6f..4ea12c2ebe 100644 --- a/zerver/lib/cache_helpers.py +++ b/zerver/lib/cache_helpers.py @@ -15,6 +15,7 @@ from zerver.lib.cache import cache_with_key, cache_set, \ user_profile_by_id_cache_key, \ user_profile_cache_key, get_remote_cache_time, get_remote_cache_requests, \ cache_set_many, to_dict_cache_key_id +from zerver.lib.message import MessageDict from importlib import import_module from django.contrib.sessions.models import Session import logging @@ -33,7 +34,13 @@ def message_fetch_objects(): def message_cache_items(items_for_remote_cache, message): # type: (Dict[Text, Tuple[binary_type]], Message) -> None - items_for_remote_cache[to_dict_cache_key_id(message.id, True)] = (message.to_dict_uncached(True),) + ''' + Note: this code is untested, and the caller has been + commented out for a while. + ''' + key = to_dict_cache_key_id(message.id) + value = MessageDict.to_dict_uncached(message) + items_for_remote_cache[key] = (value,) def user_cache_items(items_for_remote_cache, user_profile): # type: (Dict[Text, Tuple[UserProfile]], UserProfile) -> None diff --git a/zerver/lib/message.py b/zerver/lib/message.py index 3432c4727c..0ac75078c9 100644 --- a/zerver/lib/message.py +++ b/zerver/lib/message.py @@ -76,7 +76,7 @@ def stringify_message_dict(message_dict): def message_to_dict(message, apply_markdown): # type: (Message, bool) -> Dict[str, Any] - json = message_to_dict_json(message, apply_markdown) + json = message_to_dict_json(message) obj = extract_message_dict(json) ''' @@ -87,25 +87,33 @@ def message_to_dict(message, apply_markdown): MessageDict.post_process_dicts( [obj], + apply_markdown=apply_markdown, client_gravatar=client_gravatar, ) return obj @cache_with_key(to_dict_cache_key, timeout=3600*24) -def message_to_dict_json(message, apply_markdown): - # type: (Message, bool) -> binary_type - return MessageDict.to_dict_uncached(message, apply_markdown) +def message_to_dict_json(message): + # type: (Message) -> binary_type + return MessageDict.to_dict_uncached(message) class MessageDict(object): @staticmethod - def post_process_dicts(objs, client_gravatar): - # type: (List[Dict[str, Any]], bool) -> None + def post_process_dicts(objs, apply_markdown, client_gravatar): + # type: (List[Dict[str, Any]], bool, bool) -> None MessageDict.bulk_hydrate_sender_info(objs) for obj in objs: MessageDict.hydrate_recipient_info(obj) MessageDict.set_sender_avatar(obj, client_gravatar) + if apply_markdown: + obj['content_type'] = 'text/html' + obj['content'] = obj['rendered_content'] + else: + obj['content_type'] = 'text/x-markdown' + + del obj['rendered_content'] del obj['sender_realm_id'] del obj['sender_avatar_source'] del obj['sender_avatar_version'] @@ -116,16 +124,15 @@ class MessageDict(object): del obj['sender_is_mirror_dummy'] @staticmethod - def to_dict_uncached(message, apply_markdown): - # type: (Message, bool) -> binary_type - dct = MessageDict.to_dict_uncached_helper(message, apply_markdown) + def to_dict_uncached(message): + # type: (Message) -> binary_type + dct = MessageDict.to_dict_uncached_helper(message) return stringify_message_dict(dct) @staticmethod - def to_dict_uncached_helper(message, apply_markdown): - # type: (Message, bool) -> Dict[str, Any] + def to_dict_uncached_helper(message): + # type: (Message) -> Dict[str, Any] return MessageDict.build_message_dict( - apply_markdown = apply_markdown, message = message, message_id = message.id, last_edit_time = message.last_edit_time, @@ -175,14 +182,13 @@ class MessageDict(object): return sew_messages_and_reactions(messages, reactions) @staticmethod - def build_dict_from_raw_db_row(row, apply_markdown): - # type: (Dict[str, Any], bool) -> Dict[str, Any] + def build_dict_from_raw_db_row(row): + # type: (Dict[str, Any]) -> Dict[str, Any] ''' row is a row from a .values() call, and it needs to have all the relevant fields populated ''' return MessageDict.build_message_dict( - apply_markdown = apply_markdown, message = None, message_id = row['id'], last_edit_time = row['last_edit_time'], @@ -203,7 +209,6 @@ class MessageDict(object): @staticmethod def build_message_dict( - apply_markdown, message, message_id, last_edit_time, @@ -221,11 +226,12 @@ class MessageDict(object): recipient_type_id, reactions ): - # type: (bool, Optional[Message], int, Optional[datetime.datetime], Optional[Text], Text, Text, datetime.datetime, Optional[Text], Optional[int], int, int, Text, int, int, int, List[Dict[str, Any]]) -> Dict[str, Any] + # type: (Optional[Message], int, Optional[datetime.datetime], Optional[Text], Text, Text, datetime.datetime, Optional[Text], Optional[int], int, int, Text, int, int, int, List[Dict[str, Any]]) -> Dict[str, Any] obj = dict( id = message_id, sender_id = sender_id, + content = content, recipient_type_id = recipient_type_id, recipient_type = recipient_type, recipient_id = recipient_id, @@ -248,38 +254,32 @@ class MessageDict(object): assert edit_history is not None obj['edit_history'] = ujson.loads(edit_history) - if apply_markdown: - if Message.need_to_render_content(rendered_content, rendered_content_version, bugdown.version): - if message is None: - # We really shouldn't be rendering objects in this method, but there is - # a scenario where we upgrade the version of bugdown and fail to run - # management commands to re-render historical messages, and then we - # need to have side effects. This method is optimized to not need full - # blown ORM objects, but the bugdown renderer is unfortunately highly - # coupled to Message, and we also need to persist the new rendered content. - # If we don't have a message object passed in, we get one here. The cost - # of going to the DB here should be overshadowed by the cost of rendering - # and updating the row. - # TODO: see #1379 to eliminate bugdown dependencies - message = Message.objects.select_related().get(id=message_id) + if Message.need_to_render_content(rendered_content, rendered_content_version, bugdown.version): + if message is None: + # We really shouldn't be rendering objects in this method, but there is + # a scenario where we upgrade the version of bugdown and fail to run + # management commands to re-render historical messages, and then we + # need to have side effects. This method is optimized to not need full + # blown ORM objects, but the bugdown renderer is unfortunately highly + # coupled to Message, and we also need to persist the new rendered content. + # If we don't have a message object passed in, we get one here. The cost + # of going to the DB here should be overshadowed by the cost of rendering + # and updating the row. + # TODO: see #1379 to eliminate bugdown dependencies + message = Message.objects.select_related().get(id=message_id) - assert message is not None # Hint for mypy. - # It's unfortunate that we need to have side effects on the message - # in some cases. - rendered_content = render_markdown(message, content, realm=message.get_realm()) - message.rendered_content = rendered_content - message.rendered_content_version = bugdown.version - message.save_rendered_content() + assert message is not None # Hint for mypy. + # It's unfortunate that we need to have side effects on the message + # in some cases. + rendered_content = render_markdown(message, content, realm=message.get_realm()) + message.rendered_content = rendered_content + message.rendered_content_version = bugdown.version + message.save_rendered_content() - if rendered_content is not None: - obj['content'] = rendered_content - else: - obj['content'] = u'

[Zulip note: Sorry, we could not understand the formatting of your message]

' - - obj['content_type'] = 'text/html' + if rendered_content is not None: + obj['rendered_content'] = rendered_content else: - obj['content'] = content - obj['content_type'] = 'text/x-markdown' + obj['rendered_content'] = u'

[Zulip note: Sorry, we could not understand the formatting of your message]

' if rendered_content is not None: obj['is_me_message'] = Message.is_status_message(content, rendered_content) diff --git a/zerver/management/commands/merge_streams.py b/zerver/management/commands/merge_streams.py index ff04b016d0..202085b519 100644 --- a/zerver/management/commands/merge_streams.py +++ b/zerver/management/commands/merge_streams.py @@ -13,9 +13,7 @@ def bulk_delete_cache_keys(message_ids_to_clear: List[int]) -> None: while len(message_ids_to_clear) > 0: batch = message_ids_to_clear[0:5000] - keys_to_delete = [to_dict_cache_key_id(message_id, True) for message_id in batch] - cache_delete_many(keys_to_delete) - keys_to_delete = [to_dict_cache_key_id(message_id, False) for message_id in batch] + keys_to_delete = [to_dict_cache_key_id(message_id) for message_id in batch] cache_delete_many(keys_to_delete) message_ids_to_clear = message_ids_to_clear[5000:] diff --git a/zerver/tests/test_messages.py b/zerver/tests/test_messages.py index 64121a4315..e3b44547a9 100644 --- a/zerver/tests/test_messages.py +++ b/zerver/tests/test_messages.py @@ -575,8 +575,8 @@ class StreamMessagesTest(ZulipTestCase): content="whatever", subject="my topic") message = most_recent_message(user_profile) row = MessageDict.get_raw_db_rows([message.id])[0] - dct = MessageDict.build_dict_from_raw_db_row(row, apply_markdown=True) - MessageDict.post_process_dicts([dct], client_gravatar=False) + dct = MessageDict.build_dict_from_raw_db_row(row) + MessageDict.post_process_dicts([dct], apply_markdown=True, client_gravatar=False) self.assertEqual(dct['display_recipient'], 'Denmark') stream = get_stream('Denmark', user_profile.realm) @@ -760,6 +760,8 @@ class MessageDictTest(ZulipTestCase): recipient=recipient, subject='whatever', content='whatever %d' % i, + rendered_content='DOES NOT MATTER', + rendered_content_version=bugdown.version, pub_date=timezone_now(), sending_client=sending_client, last_edit_time=timezone_now(), @@ -780,10 +782,10 @@ class MessageDictTest(ZulipTestCase): rows = list(MessageDict.get_raw_db_rows(ids)) objs = [ - MessageDict.build_dict_from_raw_db_row(row, False) + MessageDict.build_dict_from_raw_db_row(row) for row in rows ] - MessageDict.post_process_dicts(objs, client_gravatar=False) + MessageDict.post_process_dicts(objs, apply_markdown=False, client_gravatar=False) delay = time.time() - t # Make sure we don't take longer than 1.5ms per message to @@ -816,9 +818,9 @@ class MessageDictTest(ZulipTestCase): # An important part of this test is to get the message through this exact code path, # because there is an ugly hack we need to cover. So don't just say "row = message". row = MessageDict.get_raw_db_rows([message.id])[0] - dct = MessageDict.build_dict_from_raw_db_row(row, apply_markdown=True) + dct = MessageDict.build_dict_from_raw_db_row(row) expected_content = '

hello world

' - self.assertEqual(dct['content'], expected_content) + self.assertEqual(dct['rendered_content'], expected_content) message = Message.objects.get(id=message.id) self.assertEqual(message.rendered_content, expected_content) self.assertEqual(message.rendered_content_version, bugdown.version) @@ -847,9 +849,9 @@ class MessageDictTest(ZulipTestCase): # An important part of this test is to get the message through this exact code path, # because there is an ugly hack we need to cover. So don't just say "row = message". row = MessageDict.get_raw_db_rows([message.id])[0] - dct = MessageDict.build_dict_from_raw_db_row(row, apply_markdown=True) + dct = MessageDict.build_dict_from_raw_db_row(row) error_content = '

[Zulip note: Sorry, we could not understand the formatting of your message]

' - self.assertEqual(dct['content'], error_content) + self.assertEqual(dct['rendered_content'], error_content) def test_reaction(self): # type: () -> None @@ -873,8 +875,7 @@ class MessageDictTest(ZulipTestCase): message=message, user_profile=sender, emoji_name='simple_smile') row = MessageDict.get_raw_db_rows([message.id])[0] - msg_dict = MessageDict.build_dict_from_raw_db_row( - row, apply_markdown=True) + msg_dict = MessageDict.build_dict_from_raw_db_row(row) self.assertEqual(msg_dict['reactions'][0]['emoji_name'], reaction.emoji_name) self.assertEqual(msg_dict['reactions'][0]['user']['id'], @@ -1350,9 +1351,9 @@ class EditMessageTest(ZulipTestCase): def check_message(self, msg_id, subject=None, content=None): # type: (int, Optional[Text], Optional[Text]) -> Message msg = Message.objects.get(id=msg_id) - cached = message_to_dict(msg, False) - uncached = MessageDict.to_dict_uncached_helper(msg, False) - MessageDict.post_process_dicts([uncached], client_gravatar=False) + cached = message_to_dict(msg, apply_markdown=False) + uncached = MessageDict.to_dict_uncached_helper(msg) + MessageDict.post_process_dicts([uncached], apply_markdown=False, client_gravatar=False) self.assertEqual(cached, uncached) if subject: self.assertEqual(msg.topic_name(), subject) diff --git a/zerver/tests/test_narrow.py b/zerver/tests/test_narrow.py index 7b4a3216e6..79e72e6345 100644 --- a/zerver/tests/test_narrow.py +++ b/zerver/tests/test_narrow.py @@ -14,7 +14,7 @@ from zerver.models import ( Reaction, UserMessage ) from zerver.lib.message import ( - MessageDict, + message_to_dict, ) from zerver.lib.narrow import ( build_narrow_filter, @@ -1219,8 +1219,7 @@ class GetOldMessagesTest(ZulipTestCase): m = self.get_last_message() m.rendered_content = m.rendered_content_version = None m.content = 'test content' - # Use to_dict_uncached_helper directly to avoid having to deal with remote cache - d = MessageDict.to_dict_uncached_helper(m, True) + d = message_to_dict(m, True) self.assertEqual(d['content'], '

test content

') def common_check_get_messages_query(self, query_params, expected): diff --git a/zerver/views/messages.py b/zerver/views/messages.py index aa0a11534d..f2e41b948f 100644 --- a/zerver/views/messages.py +++ b/zerver/views/messages.py @@ -737,10 +737,10 @@ def get_messages_backend(request, user_profile, search_fields[message_id] = get_search_fields(rendered_content, subject, content_matches, subject_matches) - cache_transformer = lambda row: MessageDict.build_dict_from_raw_db_row(row, apply_markdown) + cache_transformer = MessageDict.build_dict_from_raw_db_row id_fetcher = lambda row: row['id'] - message_dicts = generic_bulk_cached_fetch(lambda message_id: to_dict_cache_key_id(message_id, apply_markdown), + message_dicts = generic_bulk_cached_fetch(to_dict_cache_key_id, MessageDict.get_raw_db_rows, message_ids, id_fetcher=id_fetcher, @@ -760,7 +760,7 @@ def get_messages_backend(request, user_profile, del msg_dict["edit_history"] message_list.append(msg_dict) - MessageDict.post_process_dicts(message_list, client_gravatar) + MessageDict.post_process_dicts(message_list, apply_markdown, client_gravatar) statsd.incr('loaded_old_messages', len(message_list)) ret = {'messages': message_list,