Remove sender names from the message cache.

This removes sender names from the message cache, since
they aren't guaranteed to be valid, and they're inexpensive
to add.

This commit will make the message cache entries smaller
by removing sender___full_name and sender__short_name
fields.

Then we add in the sender fields to the message payloads
by doing a query against the unique sender ids of the
messages we are processing.

This change leads to 2 extra database hops for most of
our message-related codepaths.  The reason there are 2 hops
instead of 1 is that we basically re-calculate way too
much data to get a no-markdown dictionary.
This commit is contained in:
Steve Howell 2017-10-10 11:10:57 -07:00 committed by Tim Abbott
parent 3910448b1d
commit 7c726a5e77
5 changed files with 43 additions and 14 deletions

View File

@ -21,6 +21,7 @@ from zerver.lib.topic_mutes import (
from zerver.models import (
get_display_recipient_by_id,
get_user_profile_by_id,
query_for_ids,
Message,
Realm,
Recipient,
@ -70,6 +71,8 @@ class MessageDict(object):
@staticmethod
def post_process_dicts(objs):
# type: (List[Dict[str, Any]]) -> None
MessageDict.bulk_hydrate_sender_info(objs)
for obj in objs:
MessageDict.hydrate_recipient_info(obj)
@ -97,8 +100,6 @@ class MessageDict(object):
sender_email = message.sender.email,
sender_realm_id = message.sender.realm_id,
sender_realm_str = message.sender.realm.string_id,
sender_full_name = message.sender.full_name,
sender_short_name = message.sender.short_name,
sender_avatar_source = message.sender.avatar_source,
sender_avatar_version = message.sender.avatar_version,
sender_is_mirror_dummy = message.sender.is_mirror_dummy,
@ -131,8 +132,6 @@ class MessageDict(object):
sender_email = row['sender__email'],
sender_realm_id = row['sender__realm__id'],
sender_realm_str = row['sender__realm__string_id'],
sender_full_name = row['sender__full_name'],
sender_short_name = row['sender__short_name'],
sender_avatar_source = row['sender__avatar_source'],
sender_avatar_version = row['sender__avatar_version'],
sender_is_mirror_dummy = row['sender__is_mirror_dummy'],
@ -159,8 +158,6 @@ class MessageDict(object):
sender_email,
sender_realm_id,
sender_realm_str,
sender_full_name,
sender_short_name,
sender_avatar_source,
sender_avatar_version,
sender_is_mirror_dummy,
@ -170,7 +167,7 @@ class MessageDict(object):
recipient_type_id,
reactions
):
# type: (bool, Optional[Message], int, Optional[datetime.datetime], Optional[Text], Text, Text, datetime.datetime, Optional[Text], Optional[int], int, Text, int, Text, Text, Text, Text, int, bool, Text, int, int, int, List[Dict[str, Any]]) -> Dict[str, Any]
# type: (bool, Optional[Message], int, Optional[datetime.datetime], Optional[Text], Text, Text, datetime.datetime, Optional[Text], Optional[int], int, Text, int, Text, Text, int, bool, Text, int, int, int, List[Dict[str, Any]]) -> Dict[str, Any]
avatar_url = avatar_url_from_dict(dict(
avatar_source=sender_avatar_source,
@ -182,8 +179,6 @@ class MessageDict(object):
obj = dict(
id = message_id,
sender_email = sender_email,
sender_full_name = sender_full_name,
sender_short_name = sender_short_name,
sender_realm_str = sender_realm_str,
sender_id = sender_id,
recipient_type_id = recipient_type_id,
@ -251,6 +246,37 @@ class MessageDict(object):
for reaction in reactions]
return obj
@staticmethod
def bulk_hydrate_sender_info(objs):
# type: (List[Dict[str, Any]]) -> None
sender_ids = list({
obj['sender_id']
for obj in objs
})
if not sender_ids:
return
query = UserProfile.objects.values(
'id',
'full_name',
'short_name',
)
rows = query_for_ids(query, sender_ids, 'id')
sender_dict = {
row['id']: row
for row in rows
}
for obj in objs:
sender_id = obj['sender_id']
user_row = sender_dict[sender_id]
obj['sender_full_name'] = user_row['full_name']
obj['sender_short_name'] = user_row['short_name']
@staticmethod
def hydrate_recipient_info(obj):
# type: (Dict[str, Any]) -> None

View File

@ -1148,8 +1148,6 @@ class Message(AbstractMessage):
'sender_id',
'sending_client__name',
'sender__email',
'sender__full_name',
'sender__short_name',
'sender__realm__id',
'sender__realm__string_id',
'sender__avatar_source',

View File

@ -576,7 +576,12 @@ class StreamMessagesTest(ZulipTestCase):
with queries_captured() as queries:
send_message()
self.assert_length(queries, 12)
'''
Part of the reason we have so many queries is that we duplicate
a lot of code to generate messages with markdown and without
markdown.
'''
self.assert_length(queries, 14)
def test_stream_message_dict(self):
# type: () -> None

View File

@ -305,7 +305,7 @@ class LoginTest(ZulipTestCase):
with queries_captured() as queries:
self.register(self.nonreg_email('test'), "test")
# Ensure the number of queries we make is not O(streams)
self.assert_length(queries, 66)
self.assert_length(queries, 68)
user_profile = self.nonreg_user('test')
self.assertEqual(get_session_dict_user(self.client.session), user_profile.id)
self.assertFalse(user_profile.enable_stream_desktop_notifications)

View File

@ -1628,7 +1628,7 @@ class SubscriptionAPITest(ZulipTestCase):
streams_to_sub,
dict(principals=ujson.dumps([user1.email, user2.email])),
)
self.assert_length(queries, 39)
self.assert_length(queries, 41)
self.assert_length(events, 7)
for ev in [x for x in events if x['event']['type'] not in ('message', 'stream')]: