Cache only one row per message.

Before this change, we populated two cache entries for each
message that we sent.  The entries were largely redundant,
with the only difference being whether we sent the content
as raw markdown or as the rendered HTML.

This commit makes it so we only have one cache entry per
message, and it includes both content and rendered_content.

One legacy source on confusion here is that `content`
changes meaning when you're on the front end.  Here is the
situation going forward:

    database:
        content = raw
        rendered_contented = rendered

    cache entry:
        content = raw
        rendered_contented = rendered

    payload for the frontend:
        content = raw (for apply_markdown=False)
        content = rendered (for apply_markdown=True)
This commit is contained in:
Steve Howell 2017-10-20 11:29:49 -07:00 committed by Tim Abbott
parent 0cef7c9fd5
commit df93a99b50
8 changed files with 86 additions and 86 deletions

View File

@ -20,7 +20,6 @@ from zerver.lib.addressee import (
)
from zerver.lib.cache import (
delete_user_profile_caches,
to_dict_cache_key,
to_dict_cache_key_id,
)
from zerver.lib.context_managers import lockfile
@ -2625,9 +2624,7 @@ def do_rename_stream(stream, new_name, log=True):
# clearer than trying to set them. display_recipient is the out of
# date field in all cases.
cache_delete_many(
to_dict_cache_key_id(message.id, True) for message in messages)
cache_delete_many(
to_dict_cache_key_id(message.id, False) for message in messages)
to_dict_cache_key_id(message.id) for message in messages)
new_email = encode_email_address(stream)
# We will tell our users to essentially
@ -3193,10 +3190,10 @@ def update_to_dict_cache(changed_messages):
message_ids = []
for changed_message in changed_messages:
message_ids.append(changed_message.id)
items_for_remote_cache[to_dict_cache_key(changed_message, True)] = \
(MessageDict.to_dict_uncached(changed_message, apply_markdown=True),)
items_for_remote_cache[to_dict_cache_key(changed_message, False)] = \
(MessageDict.to_dict_uncached(changed_message, apply_markdown=False),)
key = to_dict_cache_key_id(changed_message.id)
value = MessageDict.to_dict_uncached(changed_message)
items_for_remote_cache[key] = (value,)
cache_set_many(items_for_remote_cache)
return message_ids

View File

@ -449,17 +449,15 @@ def flush_stream(sender, **kwargs):
Q(default_events_register_stream=stream)).exists():
cache_delete(bot_dicts_in_realm_cache_key(stream.realm))
# TODO: Rename to_dict_cache_key_id and to_dict_cache_key
def to_dict_cache_key_id(message_id, apply_markdown):
# type: (int, bool) -> Text
return u'message_dict:%d:%d' % (message_id, apply_markdown)
def to_dict_cache_key_id(message_id):
# type: (int) -> Text
return u'message_dict:%d' % (message_id,)
def to_dict_cache_key(message, apply_markdown):
# type: (Message, bool) -> Text
return to_dict_cache_key_id(message.id, apply_markdown)
def to_dict_cache_key(message):
# type: (Message) -> Text
return to_dict_cache_key_id(message.id)
def flush_message(sender, **kwargs):
# type: (Any, **Any) -> None
message = kwargs['instance']
cache_delete(to_dict_cache_key(message, False))
cache_delete(to_dict_cache_key(message, True))
cache_delete(to_dict_cache_key_id(message.id))

View File

@ -15,6 +15,7 @@ from zerver.lib.cache import cache_with_key, cache_set, \
user_profile_by_id_cache_key, \
user_profile_cache_key, get_remote_cache_time, get_remote_cache_requests, \
cache_set_many, to_dict_cache_key_id
from zerver.lib.message import MessageDict
from importlib import import_module
from django.contrib.sessions.models import Session
import logging
@ -33,7 +34,13 @@ def message_fetch_objects():
def message_cache_items(items_for_remote_cache, message):
# type: (Dict[Text, Tuple[binary_type]], Message) -> None
items_for_remote_cache[to_dict_cache_key_id(message.id, True)] = (message.to_dict_uncached(True),)
'''
Note: this code is untested, and the caller has been
commented out for a while.
'''
key = to_dict_cache_key_id(message.id)
value = MessageDict.to_dict_uncached(message)
items_for_remote_cache[key] = (value,)
def user_cache_items(items_for_remote_cache, user_profile):
# type: (Dict[Text, Tuple[UserProfile]], UserProfile) -> None

View File

@ -76,7 +76,7 @@ def stringify_message_dict(message_dict):
def message_to_dict(message, apply_markdown):
# type: (Message, bool) -> Dict[str, Any]
json = message_to_dict_json(message, apply_markdown)
json = message_to_dict_json(message)
obj = extract_message_dict(json)
'''
@ -87,25 +87,33 @@ def message_to_dict(message, apply_markdown):
MessageDict.post_process_dicts(
[obj],
apply_markdown=apply_markdown,
client_gravatar=client_gravatar,
)
return obj
@cache_with_key(to_dict_cache_key, timeout=3600*24)
def message_to_dict_json(message, apply_markdown):
# type: (Message, bool) -> binary_type
return MessageDict.to_dict_uncached(message, apply_markdown)
def message_to_dict_json(message):
# type: (Message) -> binary_type
return MessageDict.to_dict_uncached(message)
class MessageDict(object):
@staticmethod
def post_process_dicts(objs, client_gravatar):
# type: (List[Dict[str, Any]], bool) -> None
def post_process_dicts(objs, apply_markdown, client_gravatar):
# type: (List[Dict[str, Any]], bool, bool) -> None
MessageDict.bulk_hydrate_sender_info(objs)
for obj in objs:
MessageDict.hydrate_recipient_info(obj)
MessageDict.set_sender_avatar(obj, client_gravatar)
if apply_markdown:
obj['content_type'] = 'text/html'
obj['content'] = obj['rendered_content']
else:
obj['content_type'] = 'text/x-markdown'
del obj['rendered_content']
del obj['sender_realm_id']
del obj['sender_avatar_source']
del obj['sender_avatar_version']
@ -116,16 +124,15 @@ class MessageDict(object):
del obj['sender_is_mirror_dummy']
@staticmethod
def to_dict_uncached(message, apply_markdown):
# type: (Message, bool) -> binary_type
dct = MessageDict.to_dict_uncached_helper(message, apply_markdown)
def to_dict_uncached(message):
# type: (Message) -> binary_type
dct = MessageDict.to_dict_uncached_helper(message)
return stringify_message_dict(dct)
@staticmethod
def to_dict_uncached_helper(message, apply_markdown):
# type: (Message, bool) -> Dict[str, Any]
def to_dict_uncached_helper(message):
# type: (Message) -> Dict[str, Any]
return MessageDict.build_message_dict(
apply_markdown = apply_markdown,
message = message,
message_id = message.id,
last_edit_time = message.last_edit_time,
@ -175,14 +182,13 @@ class MessageDict(object):
return sew_messages_and_reactions(messages, reactions)
@staticmethod
def build_dict_from_raw_db_row(row, apply_markdown):
# type: (Dict[str, Any], bool) -> Dict[str, Any]
def build_dict_from_raw_db_row(row):
# type: (Dict[str, Any]) -> Dict[str, Any]
'''
row is a row from a .values() call, and it needs to have
all the relevant fields populated
'''
return MessageDict.build_message_dict(
apply_markdown = apply_markdown,
message = None,
message_id = row['id'],
last_edit_time = row['last_edit_time'],
@ -203,7 +209,6 @@ class MessageDict(object):
@staticmethod
def build_message_dict(
apply_markdown,
message,
message_id,
last_edit_time,
@ -221,11 +226,12 @@ class MessageDict(object):
recipient_type_id,
reactions
):
# type: (bool, Optional[Message], int, Optional[datetime.datetime], Optional[Text], Text, Text, datetime.datetime, Optional[Text], Optional[int], int, int, Text, int, int, int, List[Dict[str, Any]]) -> Dict[str, Any]
# type: (Optional[Message], int, Optional[datetime.datetime], Optional[Text], Text, Text, datetime.datetime, Optional[Text], Optional[int], int, int, Text, int, int, int, List[Dict[str, Any]]) -> Dict[str, Any]
obj = dict(
id = message_id,
sender_id = sender_id,
content = content,
recipient_type_id = recipient_type_id,
recipient_type = recipient_type,
recipient_id = recipient_id,
@ -248,38 +254,32 @@ class MessageDict(object):
assert edit_history is not None
obj['edit_history'] = ujson.loads(edit_history)
if apply_markdown:
if Message.need_to_render_content(rendered_content, rendered_content_version, bugdown.version):
if message is None:
# We really shouldn't be rendering objects in this method, but there is
# a scenario where we upgrade the version of bugdown and fail to run
# management commands to re-render historical messages, and then we
# need to have side effects. This method is optimized to not need full
# blown ORM objects, but the bugdown renderer is unfortunately highly
# coupled to Message, and we also need to persist the new rendered content.
# If we don't have a message object passed in, we get one here. The cost
# of going to the DB here should be overshadowed by the cost of rendering
# and updating the row.
# TODO: see #1379 to eliminate bugdown dependencies
message = Message.objects.select_related().get(id=message_id)
if Message.need_to_render_content(rendered_content, rendered_content_version, bugdown.version):
if message is None:
# We really shouldn't be rendering objects in this method, but there is
# a scenario where we upgrade the version of bugdown and fail to run
# management commands to re-render historical messages, and then we
# need to have side effects. This method is optimized to not need full
# blown ORM objects, but the bugdown renderer is unfortunately highly
# coupled to Message, and we also need to persist the new rendered content.
# If we don't have a message object passed in, we get one here. The cost
# of going to the DB here should be overshadowed by the cost of rendering
# and updating the row.
# TODO: see #1379 to eliminate bugdown dependencies
message = Message.objects.select_related().get(id=message_id)
assert message is not None # Hint for mypy.
# It's unfortunate that we need to have side effects on the message
# in some cases.
rendered_content = render_markdown(message, content, realm=message.get_realm())
message.rendered_content = rendered_content
message.rendered_content_version = bugdown.version
message.save_rendered_content()
assert message is not None # Hint for mypy.
# It's unfortunate that we need to have side effects on the message
# in some cases.
rendered_content = render_markdown(message, content, realm=message.get_realm())
message.rendered_content = rendered_content
message.rendered_content_version = bugdown.version
message.save_rendered_content()
if rendered_content is not None:
obj['content'] = rendered_content
else:
obj['content'] = u'<p>[Zulip note: Sorry, we could not understand the formatting of your message]</p>'
obj['content_type'] = 'text/html'
if rendered_content is not None:
obj['rendered_content'] = rendered_content
else:
obj['content'] = content
obj['content_type'] = 'text/x-markdown'
obj['rendered_content'] = u'<p>[Zulip note: Sorry, we could not understand the formatting of your message]</p>'
if rendered_content is not None:
obj['is_me_message'] = Message.is_status_message(content, rendered_content)

View File

@ -13,9 +13,7 @@ def bulk_delete_cache_keys(message_ids_to_clear: List[int]) -> None:
while len(message_ids_to_clear) > 0:
batch = message_ids_to_clear[0:5000]
keys_to_delete = [to_dict_cache_key_id(message_id, True) for message_id in batch]
cache_delete_many(keys_to_delete)
keys_to_delete = [to_dict_cache_key_id(message_id, False) for message_id in batch]
keys_to_delete = [to_dict_cache_key_id(message_id) for message_id in batch]
cache_delete_many(keys_to_delete)
message_ids_to_clear = message_ids_to_clear[5000:]

View File

@ -575,8 +575,8 @@ class StreamMessagesTest(ZulipTestCase):
content="whatever", subject="my topic")
message = most_recent_message(user_profile)
row = MessageDict.get_raw_db_rows([message.id])[0]
dct = MessageDict.build_dict_from_raw_db_row(row, apply_markdown=True)
MessageDict.post_process_dicts([dct], client_gravatar=False)
dct = MessageDict.build_dict_from_raw_db_row(row)
MessageDict.post_process_dicts([dct], apply_markdown=True, client_gravatar=False)
self.assertEqual(dct['display_recipient'], 'Denmark')
stream = get_stream('Denmark', user_profile.realm)
@ -760,6 +760,8 @@ class MessageDictTest(ZulipTestCase):
recipient=recipient,
subject='whatever',
content='whatever %d' % i,
rendered_content='DOES NOT MATTER',
rendered_content_version=bugdown.version,
pub_date=timezone_now(),
sending_client=sending_client,
last_edit_time=timezone_now(),
@ -780,10 +782,10 @@ class MessageDictTest(ZulipTestCase):
rows = list(MessageDict.get_raw_db_rows(ids))
objs = [
MessageDict.build_dict_from_raw_db_row(row, False)
MessageDict.build_dict_from_raw_db_row(row)
for row in rows
]
MessageDict.post_process_dicts(objs, client_gravatar=False)
MessageDict.post_process_dicts(objs, apply_markdown=False, client_gravatar=False)
delay = time.time() - t
# Make sure we don't take longer than 1.5ms per message to
@ -816,9 +818,9 @@ class MessageDictTest(ZulipTestCase):
# An important part of this test is to get the message through this exact code path,
# because there is an ugly hack we need to cover. So don't just say "row = message".
row = MessageDict.get_raw_db_rows([message.id])[0]
dct = MessageDict.build_dict_from_raw_db_row(row, apply_markdown=True)
dct = MessageDict.build_dict_from_raw_db_row(row)
expected_content = '<p>hello <strong>world</strong></p>'
self.assertEqual(dct['content'], expected_content)
self.assertEqual(dct['rendered_content'], expected_content)
message = Message.objects.get(id=message.id)
self.assertEqual(message.rendered_content, expected_content)
self.assertEqual(message.rendered_content_version, bugdown.version)
@ -847,9 +849,9 @@ class MessageDictTest(ZulipTestCase):
# An important part of this test is to get the message through this exact code path,
# because there is an ugly hack we need to cover. So don't just say "row = message".
row = MessageDict.get_raw_db_rows([message.id])[0]
dct = MessageDict.build_dict_from_raw_db_row(row, apply_markdown=True)
dct = MessageDict.build_dict_from_raw_db_row(row)
error_content = '<p>[Zulip note: Sorry, we could not understand the formatting of your message]</p>'
self.assertEqual(dct['content'], error_content)
self.assertEqual(dct['rendered_content'], error_content)
def test_reaction(self):
# type: () -> None
@ -873,8 +875,7 @@ class MessageDictTest(ZulipTestCase):
message=message, user_profile=sender,
emoji_name='simple_smile')
row = MessageDict.get_raw_db_rows([message.id])[0]
msg_dict = MessageDict.build_dict_from_raw_db_row(
row, apply_markdown=True)
msg_dict = MessageDict.build_dict_from_raw_db_row(row)
self.assertEqual(msg_dict['reactions'][0]['emoji_name'],
reaction.emoji_name)
self.assertEqual(msg_dict['reactions'][0]['user']['id'],
@ -1350,9 +1351,9 @@ class EditMessageTest(ZulipTestCase):
def check_message(self, msg_id, subject=None, content=None):
# type: (int, Optional[Text], Optional[Text]) -> Message
msg = Message.objects.get(id=msg_id)
cached = message_to_dict(msg, False)
uncached = MessageDict.to_dict_uncached_helper(msg, False)
MessageDict.post_process_dicts([uncached], client_gravatar=False)
cached = message_to_dict(msg, apply_markdown=False)
uncached = MessageDict.to_dict_uncached_helper(msg)
MessageDict.post_process_dicts([uncached], apply_markdown=False, client_gravatar=False)
self.assertEqual(cached, uncached)
if subject:
self.assertEqual(msg.topic_name(), subject)

View File

@ -14,7 +14,7 @@ from zerver.models import (
Reaction, UserMessage
)
from zerver.lib.message import (
MessageDict,
message_to_dict,
)
from zerver.lib.narrow import (
build_narrow_filter,
@ -1219,8 +1219,7 @@ class GetOldMessagesTest(ZulipTestCase):
m = self.get_last_message()
m.rendered_content = m.rendered_content_version = None
m.content = 'test content'
# Use to_dict_uncached_helper directly to avoid having to deal with remote cache
d = MessageDict.to_dict_uncached_helper(m, True)
d = message_to_dict(m, True)
self.assertEqual(d['content'], '<p>test content</p>')
def common_check_get_messages_query(self, query_params, expected):

View File

@ -737,10 +737,10 @@ def get_messages_backend(request, user_profile,
search_fields[message_id] = get_search_fields(rendered_content, subject,
content_matches, subject_matches)
cache_transformer = lambda row: MessageDict.build_dict_from_raw_db_row(row, apply_markdown)
cache_transformer = MessageDict.build_dict_from_raw_db_row
id_fetcher = lambda row: row['id']
message_dicts = generic_bulk_cached_fetch(lambda message_id: to_dict_cache_key_id(message_id, apply_markdown),
message_dicts = generic_bulk_cached_fetch(to_dict_cache_key_id,
MessageDict.get_raw_db_rows,
message_ids,
id_fetcher=id_fetcher,
@ -760,7 +760,7 @@ def get_messages_backend(request, user_profile,
del msg_dict["edit_history"]
message_list.append(msg_dict)
MessageDict.post_process_dicts(message_list, client_gravatar)
MessageDict.post_process_dicts(message_list, apply_markdown, client_gravatar)
statsd.incr('loaded_old_messages', len(message_list))
ret = {'messages': message_list,