mirror of https://github.com/zulip/zulip.git
Optimize get_recipient_info() for sending messages.
This commit makes get_recipient_info() faster by never creating Django ORM objects. We use the ORM to create a values query instead, and then we iterate over the rows to create various collections of ids. In order to avoid lots of code duplication, this commit unifies how we query UserProfile for PMs and streams. Prior to this commit we were getting "wide" UserProfile objects out of our memcached cache. Now we just go to the database with our list of userids. The new approach at worst adds one hop to the database for PMs, which aren't really a performance bottleneck (compared to streams). And the new approach actually saves a hop when both partners aren't in cache (plus we don't pay the penalty of hitting the cache itself). The performance improvement here is easy to measure for messages to streams with many users, even with all the other activity that goes on inside do_send_messages(). I took test_performance() in test_messages.py, set num_extra_users to 3000, and consistently measured a ~20% speedup in do_send_messages(). This commit also eliminates fetching of emails. We probably could have done that in a prior commit, but in this commit it is very explicit that we don't need it. While removing email from the query is a no-brainer, it actually had a negigible impact on performance. Almost all the savings here comes from not create UserProfile objects.
This commit is contained in:
parent
d00c001b5f
commit
d723be125a
|
@ -759,63 +759,57 @@ def get_recipient_info(recipient, sender_id):
|
|||
# type: (Recipient, int) -> RecipientInfoResult
|
||||
user_ids = get_recipient_user_ids(recipient, sender_id)
|
||||
|
||||
if recipient.type == Recipient.PERSONAL:
|
||||
recipients = [get_user_profile_by_id(user_id) for user_id in user_ids]
|
||||
elif (recipient.type == Recipient.STREAM or recipient.type == Recipient.HUDDLE):
|
||||
query = UserProfile.objects.filter(
|
||||
id__in=user_ids
|
||||
)
|
||||
|
||||
fields = [
|
||||
'id',
|
||||
'email',
|
||||
'enable_online_push_notifications',
|
||||
'is_active',
|
||||
'is_bot',
|
||||
'bot_type',
|
||||
'long_term_idle',
|
||||
]
|
||||
recipients = list(query.only(*fields))
|
||||
else:
|
||||
raise ValueError('Bad recipient type')
|
||||
query = UserProfile.objects.filter(
|
||||
id__in=user_ids
|
||||
).values(
|
||||
'id',
|
||||
'enable_online_push_notifications',
|
||||
'is_active',
|
||||
'is_bot',
|
||||
'bot_type',
|
||||
'long_term_idle',
|
||||
)
|
||||
rows = list(query)
|
||||
|
||||
recipient_user_ids = {
|
||||
user_profile.id
|
||||
for user_profile in recipients
|
||||
row['id']
|
||||
for row in rows
|
||||
}
|
||||
|
||||
def get_ids_for(f):
|
||||
# type: (Callable[[Dict[str, Any]], bool]) -> Set[int]
|
||||
return {
|
||||
row['id']
|
||||
for row in rows
|
||||
if f(row)
|
||||
}
|
||||
|
||||
def is_service_bot(row):
|
||||
# type: (Dict[str, Any]) -> bool
|
||||
return row['is_bot'] and (row['bot_type'] in UserProfile.SERVICE_BOT_TYPES)
|
||||
|
||||
# Only deliver the message to active user recipients
|
||||
active_user_ids = {
|
||||
user_profile.id
|
||||
for user_profile in recipients
|
||||
if user_profile.is_active
|
||||
}
|
||||
active_user_ids = get_ids_for(
|
||||
lambda r: r['is_active']
|
||||
)
|
||||
|
||||
push_notify_user_ids = {
|
||||
user_profile.id
|
||||
for user_profile in recipients
|
||||
if user_profile.is_active and user_profile.enable_online_push_notifications
|
||||
}
|
||||
push_notify_user_ids = get_ids_for(
|
||||
lambda r: r['is_active'] and r['enable_online_push_notifications']
|
||||
)
|
||||
|
||||
# Service bots don't get UserMessage rows.
|
||||
# is_service_bot is derived from is_bot and bot_type, and both of these fields
|
||||
# should have been pre-fetched.
|
||||
um_eligible_user_ids = {
|
||||
user_profile.id
|
||||
for user_profile in recipients
|
||||
if user_profile.is_active and (not user_profile.is_service_bot)
|
||||
}
|
||||
um_eligible_user_ids = get_ids_for(
|
||||
lambda r: r['is_active'] and (not is_service_bot(r))
|
||||
)
|
||||
|
||||
long_term_idle_user_ids = {
|
||||
user_profile.id
|
||||
for user_profile in recipients
|
||||
if user_profile.long_term_idle
|
||||
}
|
||||
long_term_idle_user_ids = get_ids_for(
|
||||
lambda r: r['long_term_idle']
|
||||
)
|
||||
|
||||
service_bot_tuples = [
|
||||
(user_profile.id, user_profile.bot_type)
|
||||
for user_profile in recipients
|
||||
if user_profile.is_active and user_profile.is_service_bot
|
||||
(row['id'], row['bot_type'])
|
||||
for row in rows
|
||||
if row['is_active'] and is_service_bot(row)
|
||||
]
|
||||
|
||||
info = dict(
|
||||
|
|
|
@ -314,7 +314,7 @@ class LoginTest(ZulipTestCase):
|
|||
with queries_captured() as queries:
|
||||
self.register(self.nonreg_email('test'), "test")
|
||||
# Ensure the number of queries we make is not O(streams)
|
||||
self.assert_length(queries, 66)
|
||||
self.assert_length(queries, 65)
|
||||
user_profile = self.nonreg_user('test')
|
||||
self.assertEqual(get_session_dict_user(self.client.session), user_profile.id)
|
||||
self.assertFalse(user_profile.enable_stream_desktop_notifications)
|
||||
|
|
Loading…
Reference in New Issue