narrow: Skip the realm_id limit on joins across to usermessage.

The unique index on `(user_id, message_id)` that is the
`zerver_usermessage` table is rather specific, and even the PostgreSQL
extended statistics are not enough for it to realize there is a
correlation between the `realm_id` in the message table and the
`user_id` in the usermessage table.  This means that adding the
`realm_id` limit when there is a join to `zerver_usermessage` flips
the query plan from a nested loop of unique usermessage index-only
scan, with an index scan of the messages pkey -- to a parallel hash
join of the messages limit with a index scan of just the user_id limit
on usermessages.  It thinks this is necessary because it thinks that
the `realm_id` limit may remove a large number of messages from the
usermessage set -- which is totally untrue.

Remove the `realm_id` limit if we have a usermessage join.
This commit is contained in:
Alex Vandiver 2023-09-21 19:52:56 +00:00 committed by Tim Abbott
parent 472ef3d890
commit 11a1cb9630
2 changed files with 17 additions and 16 deletions

View File

@ -912,13 +912,12 @@ def get_base_query_for_search(
realm_id: int, user_profile: Optional[UserProfile], need_message: bool, need_user_message: bool realm_id: int, user_profile: Optional[UserProfile], need_message: bool, need_user_message: bool
) -> Tuple[Select, ColumnElement[Integer]]: ) -> Tuple[Select, ColumnElement[Integer]]:
# Handle the simple case where user_message isn't involved first. # Handle the simple case where user_message isn't involved first.
realm_cond = column("realm_id", Integer) == literal(realm_id)
if not need_user_message: if not need_user_message:
assert need_message assert need_message
query = ( query = (
select(column("id", Integer).label("message_id")) select(column("id", Integer).label("message_id"))
.select_from(table("zerver_message")) .select_from(table("zerver_message"))
.where(realm_cond) .where(column("realm_id", Integer) == literal(realm_id))
) )
inner_msg_id_col = literal_column("zerver_message.id", Integer) inner_msg_id_col = literal_column("zerver_message.id", Integer)
@ -928,9 +927,11 @@ def get_base_query_for_search(
if need_message: if need_message:
query = ( query = (
select(column("message_id", Integer), column("flags", Integer)) select(column("message_id", Integer), column("flags", Integer))
.where(realm_cond) # We don't limit by realm_id despite the join to
.where(column("user_profile_id", Integer) == literal(user_profile.id)) # zerver_messages, since the user_profile_id limit in
.select_from( # usermessage is more selective, and the query planner
# can't know about that cross-table correlation.
.where(column("user_profile_id", Integer) == literal(user_profile.id)).select_from(
join( join(
table("zerver_usermessage"), table("zerver_usermessage"),
table("zerver_message"), table("zerver_message"),

View File

@ -3945,7 +3945,7 @@ recipient_id = %(recipient_id_3)s AND upper(subject) = upper(%(param_2)s))\
hamlet_email = self.example_user("hamlet").email hamlet_email = self.example_user("hamlet").email
othello_email = self.example_user("othello").email othello_email = self.example_user("othello").email
sql_template = "SELECT anon_1.message_id, anon_1.flags \nFROM (SELECT message_id, flags \nFROM zerver_usermessage JOIN zerver_message ON zerver_usermessage.message_id = zerver_message.id \nWHERE realm_id = 2 AND user_profile_id = {hamlet_id} AND (sender_id = {othello_id} AND recipient_id = {hamlet_recipient} OR sender_id = {hamlet_id} AND recipient_id = {othello_recipient}) AND message_id = 0) AS anon_1 ORDER BY message_id ASC" sql_template = "SELECT anon_1.message_id, anon_1.flags \nFROM (SELECT message_id, flags \nFROM zerver_usermessage JOIN zerver_message ON zerver_usermessage.message_id = zerver_message.id \nWHERE user_profile_id = {hamlet_id} AND (sender_id = {othello_id} AND recipient_id = {hamlet_recipient} OR sender_id = {hamlet_id} AND recipient_id = {othello_recipient}) AND message_id = 0) AS anon_1 ORDER BY message_id ASC"
sql = sql_template.format(**query_ids) sql = sql_template.format(**query_ids)
self.common_check_get_messages_query( self.common_check_get_messages_query(
{ {
@ -3957,7 +3957,7 @@ recipient_id = %(recipient_id_3)s AND upper(subject) = upper(%(param_2)s))\
sql, sql,
) )
sql_template = "SELECT anon_1.message_id, anon_1.flags \nFROM (SELECT message_id, flags \nFROM zerver_usermessage JOIN zerver_message ON zerver_usermessage.message_id = zerver_message.id \nWHERE realm_id = 2 AND user_profile_id = {hamlet_id} AND (sender_id = {othello_id} AND recipient_id = {hamlet_recipient} OR sender_id = {hamlet_id} AND recipient_id = {othello_recipient}) AND message_id = 0) AS anon_1 ORDER BY message_id ASC" sql_template = "SELECT anon_1.message_id, anon_1.flags \nFROM (SELECT message_id, flags \nFROM zerver_usermessage JOIN zerver_message ON zerver_usermessage.message_id = zerver_message.id \nWHERE user_profile_id = {hamlet_id} AND (sender_id = {othello_id} AND recipient_id = {hamlet_recipient} OR sender_id = {hamlet_id} AND recipient_id = {othello_recipient}) AND message_id = 0) AS anon_1 ORDER BY message_id ASC"
sql = sql_template.format(**query_ids) sql = sql_template.format(**query_ids)
self.common_check_get_messages_query( self.common_check_get_messages_query(
{ {
@ -3969,7 +3969,7 @@ recipient_id = %(recipient_id_3)s AND upper(subject) = upper(%(param_2)s))\
sql, sql,
) )
sql_template = "SELECT anon_1.message_id, anon_1.flags \nFROM (SELECT message_id, flags \nFROM zerver_usermessage JOIN zerver_message ON zerver_usermessage.message_id = zerver_message.id \nWHERE realm_id = 2 AND user_profile_id = {hamlet_id} AND (sender_id = {othello_id} AND recipient_id = {hamlet_recipient} OR sender_id = {hamlet_id} AND recipient_id = {othello_recipient}) ORDER BY message_id ASC \n LIMIT 10) AS anon_1 ORDER BY message_id ASC" sql_template = "SELECT anon_1.message_id, anon_1.flags \nFROM (SELECT message_id, flags \nFROM zerver_usermessage JOIN zerver_message ON zerver_usermessage.message_id = zerver_message.id \nWHERE user_profile_id = {hamlet_id} AND (sender_id = {othello_id} AND recipient_id = {hamlet_recipient} OR sender_id = {hamlet_id} AND recipient_id = {othello_recipient}) ORDER BY message_id ASC \n LIMIT 10) AS anon_1 ORDER BY message_id ASC"
sql = sql_template.format(**query_ids) sql = sql_template.format(**query_ids)
self.common_check_get_messages_query( self.common_check_get_messages_query(
{ {
@ -3981,13 +3981,13 @@ recipient_id = %(recipient_id_3)s AND upper(subject) = upper(%(param_2)s))\
sql, sql,
) )
sql_template = "SELECT anon_1.message_id, anon_1.flags \nFROM (SELECT message_id, flags \nFROM zerver_usermessage JOIN zerver_message ON zerver_usermessage.message_id = zerver_message.id \nWHERE realm_id = 2 AND user_profile_id = {hamlet_id} AND (flags & 2) != 0 ORDER BY message_id ASC \n LIMIT 10) AS anon_1 ORDER BY message_id ASC" sql_template = "SELECT anon_1.message_id, anon_1.flags \nFROM (SELECT message_id, flags \nFROM zerver_usermessage JOIN zerver_message ON zerver_usermessage.message_id = zerver_message.id \nWHERE user_profile_id = {hamlet_id} AND (flags & 2) != 0 ORDER BY message_id ASC \n LIMIT 10) AS anon_1 ORDER BY message_id ASC"
sql = sql_template.format(**query_ids) sql = sql_template.format(**query_ids)
self.common_check_get_messages_query( self.common_check_get_messages_query(
{"anchor": 0, "num_before": 0, "num_after": 9, "narrow": '[["is", "starred"]]'}, sql {"anchor": 0, "num_before": 0, "num_after": 9, "narrow": '[["is", "starred"]]'}, sql
) )
sql_template = "SELECT anon_1.message_id, anon_1.flags \nFROM (SELECT message_id, flags \nFROM zerver_usermessage JOIN zerver_message ON zerver_usermessage.message_id = zerver_message.id \nWHERE realm_id = 2 AND user_profile_id = {hamlet_id} AND sender_id = {othello_id} ORDER BY message_id ASC \n LIMIT 10) AS anon_1 ORDER BY message_id ASC" sql_template = "SELECT anon_1.message_id, anon_1.flags \nFROM (SELECT message_id, flags \nFROM zerver_usermessage JOIN zerver_message ON zerver_usermessage.message_id = zerver_message.id \nWHERE user_profile_id = {hamlet_id} AND sender_id = {othello_id} ORDER BY message_id ASC \n LIMIT 10) AS anon_1 ORDER BY message_id ASC"
sql = sql_template.format(**query_ids) sql = sql_template.format(**query_ids)
self.common_check_get_messages_query( self.common_check_get_messages_query(
{ {
@ -4012,7 +4012,7 @@ recipient_id = %(recipient_id_3)s AND upper(subject) = upper(%(param_2)s))\
{"anchor": 0, "num_before": 0, "num_after": 9, "narrow": '[["streams", "public"]]'}, sql {"anchor": 0, "num_before": 0, "num_after": 9, "narrow": '[["streams", "public"]]'}, sql
) )
sql_template = "SELECT anon_1.message_id, anon_1.flags \nFROM (SELECT message_id, flags \nFROM zerver_usermessage JOIN zerver_message ON zerver_usermessage.message_id = zerver_message.id \nWHERE realm_id = 2 AND user_profile_id = {hamlet_id} AND (recipient_id NOT IN ({public_streams_recipients})) ORDER BY message_id ASC \n LIMIT 10) AS anon_1 ORDER BY message_id ASC" sql_template = "SELECT anon_1.message_id, anon_1.flags \nFROM (SELECT message_id, flags \nFROM zerver_usermessage JOIN zerver_message ON zerver_usermessage.message_id = zerver_message.id \nWHERE user_profile_id = {hamlet_id} AND (recipient_id NOT IN ({public_streams_recipients})) ORDER BY message_id ASC \n LIMIT 10) AS anon_1 ORDER BY message_id ASC"
sql = sql_template.format(**query_ids) sql = sql_template.format(**query_ids)
self.common_check_get_messages_query( self.common_check_get_messages_query(
{ {
@ -4024,7 +4024,7 @@ recipient_id = %(recipient_id_3)s AND upper(subject) = upper(%(param_2)s))\
sql, sql,
) )
sql_template = "SELECT anon_1.message_id, anon_1.flags \nFROM (SELECT message_id, flags \nFROM zerver_usermessage JOIN zerver_message ON zerver_usermessage.message_id = zerver_message.id \nWHERE realm_id = 2 AND user_profile_id = {hamlet_id} AND upper(subject) = upper('blah') ORDER BY message_id ASC \n LIMIT 10) AS anon_1 ORDER BY message_id ASC" sql_template = "SELECT anon_1.message_id, anon_1.flags \nFROM (SELECT message_id, flags \nFROM zerver_usermessage JOIN zerver_message ON zerver_usermessage.message_id = zerver_message.id \nWHERE user_profile_id = {hamlet_id} AND upper(subject) = upper('blah') ORDER BY message_id ASC \n LIMIT 10) AS anon_1 ORDER BY message_id ASC"
sql = sql_template.format(**query_ids) sql = sql_template.format(**query_ids)
self.common_check_get_messages_query( self.common_check_get_messages_query(
{"anchor": 0, "num_before": 0, "num_after": 9, "narrow": '[["topic", "blah"]]'}, sql {"anchor": 0, "num_before": 0, "num_after": 9, "narrow": '[["topic", "blah"]]'}, sql
@ -4043,7 +4043,7 @@ recipient_id = %(recipient_id_3)s AND upper(subject) = upper(%(param_2)s))\
) )
# Narrow to direct messages with yourself # Narrow to direct messages with yourself
sql_template = "SELECT anon_1.message_id, anon_1.flags \nFROM (SELECT message_id, flags \nFROM zerver_usermessage JOIN zerver_message ON zerver_usermessage.message_id = zerver_message.id \nWHERE realm_id = 2 AND user_profile_id = {hamlet_id} AND sender_id = {hamlet_id} AND recipient_id = {hamlet_recipient} ORDER BY message_id ASC \n LIMIT 10) AS anon_1 ORDER BY message_id ASC" sql_template = "SELECT anon_1.message_id, anon_1.flags \nFROM (SELECT message_id, flags \nFROM zerver_usermessage JOIN zerver_message ON zerver_usermessage.message_id = zerver_message.id \nWHERE user_profile_id = {hamlet_id} AND sender_id = {hamlet_id} AND recipient_id = {hamlet_recipient} ORDER BY message_id ASC \n LIMIT 10) AS anon_1 ORDER BY message_id ASC"
sql = sql_template.format(**query_ids) sql = sql_template.format(**query_ids)
self.common_check_get_messages_query( self.common_check_get_messages_query(
{ {
@ -4055,7 +4055,7 @@ recipient_id = %(recipient_id_3)s AND upper(subject) = upper(%(param_2)s))\
sql, sql,
) )
sql_template = "SELECT anon_1.message_id, anon_1.flags \nFROM (SELECT message_id, flags \nFROM zerver_usermessage JOIN zerver_message ON zerver_usermessage.message_id = zerver_message.id \nWHERE realm_id = 2 AND user_profile_id = {hamlet_id} AND recipient_id = {scotland_recipient} AND (flags & 2) != 0 ORDER BY message_id ASC \n LIMIT 10) AS anon_1 ORDER BY message_id ASC" sql_template = "SELECT anon_1.message_id, anon_1.flags \nFROM (SELECT message_id, flags \nFROM zerver_usermessage JOIN zerver_message ON zerver_usermessage.message_id = zerver_message.id \nWHERE user_profile_id = {hamlet_id} AND recipient_id = {scotland_recipient} AND (flags & 2) != 0 ORDER BY message_id ASC \n LIMIT 10) AS anon_1 ORDER BY message_id ASC"
sql = sql_template.format(**query_ids) sql = sql_template.format(**query_ids)
self.common_check_get_messages_query( self.common_check_get_messages_query(
{ {
@ -4079,7 +4079,7 @@ FROM unnest(string_to_array(ts_headline('zulip.english_us_search', rendered_cont
FROM unnest(string_to_array(ts_headline('zulip.english_us_search', escape_html(subject), plainto_tsquery('zulip.english_us_search', 'jumping'), 'HighlightAll = TRUE, StartSel = <ts-match>, StopSel = </ts-match>'), '<ts-match>')) AS anon_5\n\ FROM unnest(string_to_array(ts_headline('zulip.english_us_search', escape_html(subject), plainto_tsquery('zulip.english_us_search', 'jumping'), 'HighlightAll = TRUE, StartSel = <ts-match>, StopSel = </ts-match>'), '<ts-match>')) AS anon_5\n\
LIMIT ALL OFFSET 1)) AS topic_matches \n\ LIMIT ALL OFFSET 1)) AS topic_matches \n\
FROM zerver_usermessage JOIN zerver_message ON zerver_usermessage.message_id = zerver_message.id \n\ FROM zerver_usermessage JOIN zerver_message ON zerver_usermessage.message_id = zerver_message.id \n\
WHERE realm_id = 2 AND user_profile_id = {hamlet_id} AND (search_tsvector @@ plainto_tsquery('zulip.english_us_search', 'jumping')) ORDER BY message_id ASC \n\ WHERE user_profile_id = {hamlet_id} AND (search_tsvector @@ plainto_tsquery('zulip.english_us_search', 'jumping')) ORDER BY message_id ASC \n\
LIMIT 10) AS anon_1 ORDER BY message_id ASC\ LIMIT 10) AS anon_1 ORDER BY message_id ASC\
""" """
sql = sql_template.format(**query_ids) sql = sql_template.format(**query_ids)
@ -4117,7 +4117,7 @@ FROM unnest(string_to_array(ts_headline('zulip.english_us_search', rendered_cont
FROM unnest(string_to_array(ts_headline('zulip.english_us_search', escape_html(subject), plainto_tsquery('zulip.english_us_search', '"jumping" quickly'), 'HighlightAll = TRUE, StartSel = <ts-match>, StopSel = </ts-match>'), '<ts-match>')) AS anon_5\n\ FROM unnest(string_to_array(ts_headline('zulip.english_us_search', escape_html(subject), plainto_tsquery('zulip.english_us_search', '"jumping" quickly'), 'HighlightAll = TRUE, StartSel = <ts-match>, StopSel = </ts-match>'), '<ts-match>')) AS anon_5\n\
LIMIT ALL OFFSET 1)) AS topic_matches \n\ LIMIT ALL OFFSET 1)) AS topic_matches \n\
FROM zerver_usermessage JOIN zerver_message ON zerver_usermessage.message_id = zerver_message.id \n\ FROM zerver_usermessage JOIN zerver_message ON zerver_usermessage.message_id = zerver_message.id \n\
WHERE realm_id = 2 AND user_profile_id = {hamlet_id} AND (content ILIKE '%jumping%' OR subject ILIKE '%jumping%') AND (search_tsvector @@ plainto_tsquery('zulip.english_us_search', '"jumping" quickly')) ORDER BY message_id ASC \n\ WHERE user_profile_id = {hamlet_id} AND (content ILIKE '%jumping%' OR subject ILIKE '%jumping%') AND (search_tsvector @@ plainto_tsquery('zulip.english_us_search', '"jumping" quickly')) ORDER BY message_id ASC \n\
LIMIT 10) AS anon_1 ORDER BY message_id ASC\ LIMIT 10) AS anon_1 ORDER BY message_id ASC\
""" """
sql = sql_template.format(**query_ids) sql = sql_template.format(**query_ids)