Optimize get_recipient_info() for sending messages.

This commit makes get_recipient_info() faster by never creating
Django ORM objects.  We use the ORM to create a values query
instead, and then we iterate over the rows to create various
collections of ids.

In order to avoid lots of code duplication, this commit unifies
how we query UserProfile for PMs and streams.  Prior to this
commit we were getting "wide" UserProfile objects out of
our memcached cache.  Now we just go to the database with our
list of userids.  The new approach at worst adds one hop to the
database for PMs, which aren't really a performance bottleneck
(compared to streams).  And the new approach actually saves a
hop when both partners aren't in cache (plus we don't pay the
penalty of hitting the cache itself).

The performance improvement here is easy to measure for messages
to streams with many users, even with all the other activity
that goes on inside do_send_messages().  I took test_performance()
in test_messages.py, set num_extra_users to 3000, and consistently
measured a ~20% speedup in do_send_messages().

This commit also eliminates fetching of emails.  We probably
could have done that in a prior commit, but in this commit it
is very explicit that we don't need it.  While removing email
from the query is a no-brainer, it actually had a negigible
impact on performance.  Almost all the savings here comes from
not create UserProfile objects.
This commit is contained in:
Steve Howell 2017-09-09 07:14:06 -07:00 committed by Tim Abbott
parent d00c001b5f
commit d723be125a
2 changed files with 41 additions and 47 deletions

View File

@ -759,63 +759,57 @@ def get_recipient_info(recipient, sender_id):
# type: (Recipient, int) -> RecipientInfoResult
user_ids = get_recipient_user_ids(recipient, sender_id)
if recipient.type == Recipient.PERSONAL:
recipients = [get_user_profile_by_id(user_id) for user_id in user_ids]
elif (recipient.type == Recipient.STREAM or recipient.type == Recipient.HUDDLE):
query = UserProfile.objects.filter(
id__in=user_ids
)
fields = [
'id',
'email',
'enable_online_push_notifications',
'is_active',
'is_bot',
'bot_type',
'long_term_idle',
]
recipients = list(query.only(*fields))
else:
raise ValueError('Bad recipient type')
query = UserProfile.objects.filter(
id__in=user_ids
).values(
'id',
'enable_online_push_notifications',
'is_active',
'is_bot',
'bot_type',
'long_term_idle',
)
rows = list(query)
recipient_user_ids = {
user_profile.id
for user_profile in recipients
row['id']
for row in rows
}
def get_ids_for(f):
# type: (Callable[[Dict[str, Any]], bool]) -> Set[int]
return {
row['id']
for row in rows
if f(row)
}
def is_service_bot(row):
# type: (Dict[str, Any]) -> bool
return row['is_bot'] and (row['bot_type'] in UserProfile.SERVICE_BOT_TYPES)
# Only deliver the message to active user recipients
active_user_ids = {
user_profile.id
for user_profile in recipients
if user_profile.is_active
}
active_user_ids = get_ids_for(
lambda r: r['is_active']
)
push_notify_user_ids = {
user_profile.id
for user_profile in recipients
if user_profile.is_active and user_profile.enable_online_push_notifications
}
push_notify_user_ids = get_ids_for(
lambda r: r['is_active'] and r['enable_online_push_notifications']
)
# Service bots don't get UserMessage rows.
# is_service_bot is derived from is_bot and bot_type, and both of these fields
# should have been pre-fetched.
um_eligible_user_ids = {
user_profile.id
for user_profile in recipients
if user_profile.is_active and (not user_profile.is_service_bot)
}
um_eligible_user_ids = get_ids_for(
lambda r: r['is_active'] and (not is_service_bot(r))
)
long_term_idle_user_ids = {
user_profile.id
for user_profile in recipients
if user_profile.long_term_idle
}
long_term_idle_user_ids = get_ids_for(
lambda r: r['long_term_idle']
)
service_bot_tuples = [
(user_profile.id, user_profile.bot_type)
for user_profile in recipients
if user_profile.is_active and user_profile.is_service_bot
(row['id'], row['bot_type'])
for row in rows
if row['is_active'] and is_service_bot(row)
]
info = dict(

View File

@ -314,7 +314,7 @@ class LoginTest(ZulipTestCase):
with queries_captured() as queries:
self.register(self.nonreg_email('test'), "test")
# Ensure the number of queries we make is not O(streams)
self.assert_length(queries, 66)
self.assert_length(queries, 65)
user_profile = self.nonreg_user('test')
self.assertEqual(get_session_dict_user(self.client.session), user_profile.id)
self.assertFalse(user_profile.enable_stream_desktop_notifications)