mirror of https://github.com/zulip/zulip.git
migrations: Add indexes with realm_id.
This is designed to help PostgreSQL have better specificity and locality in its indexes. Subsequent commits will adjust the code to make sure that we use these indexes rather than the `realm_id`-less versions. We do not add a `realm_id` variation to the full-text index, since it is a GIN index; multi-column GIN indexes are not terribly performant, require the `btree_gin` extension for `int` types (which requires superuser privileges on PostgreSQL 12 and earlier), and cannot be consistently added concurrently on running instances. After all indexes have been made, we also run `CREATE STATISTICS` in order to give PostgreSQL the opportunity to realize that recipient and sender are highly correlated with message realm, allowing it to estimate that `(realm_id, recipient_id)` is likely as specific as matching a given `recipient_id`, instead of as likely as matching `realm_id` times matching a `recipient_id`. Finally, those statistics must be filled by `ANALYZE zerver_message`, which is run last.
This commit is contained in:
parent
067de6f948
commit
3518d31797
|
@ -0,0 +1,102 @@
|
||||||
|
import django.db.models.functions.text
|
||||||
|
from django.contrib.postgres.operations import AddIndexConcurrently
|
||||||
|
from django.db import migrations, models
|
||||||
|
|
||||||
|
|
||||||
|
class Migration(migrations.Migration):
|
||||||
|
atomic = False
|
||||||
|
|
||||||
|
dependencies = [
|
||||||
|
("zerver", "0471_alter_realm_create_multiuse_invite_group"),
|
||||||
|
]
|
||||||
|
|
||||||
|
# The non-realm_id-prefixed versions will be removed in the next migration.
|
||||||
|
operations = [
|
||||||
|
AddIndexConcurrently(
|
||||||
|
model_name="message",
|
||||||
|
index=models.Index(
|
||||||
|
models.F("realm_id"),
|
||||||
|
models.F("recipient_id"),
|
||||||
|
models.F("id"),
|
||||||
|
name="zerver_message_realm_recipient_id",
|
||||||
|
),
|
||||||
|
),
|
||||||
|
AddIndexConcurrently(
|
||||||
|
model_name="message",
|
||||||
|
index=models.Index(
|
||||||
|
models.F("realm_id"),
|
||||||
|
models.F("recipient_id"),
|
||||||
|
models.F("date_sent"),
|
||||||
|
name="zerver_message_realm_recipient_date_sent",
|
||||||
|
),
|
||||||
|
),
|
||||||
|
AddIndexConcurrently(
|
||||||
|
model_name="message",
|
||||||
|
index=models.Index(
|
||||||
|
models.F("realm_id"),
|
||||||
|
models.F("sender_id"),
|
||||||
|
models.F("recipient_id"),
|
||||||
|
name="zerver_message_realm_sender_recipient",
|
||||||
|
),
|
||||||
|
),
|
||||||
|
AddIndexConcurrently(
|
||||||
|
model_name="message",
|
||||||
|
index=models.Index(
|
||||||
|
models.F("realm_id"), models.F("date_sent"), name="zerver_message_realm_date_sent"
|
||||||
|
),
|
||||||
|
),
|
||||||
|
AddIndexConcurrently(
|
||||||
|
model_name="message",
|
||||||
|
index=models.Index(
|
||||||
|
models.F("realm_id"),
|
||||||
|
django.db.models.functions.text.Upper("subject"),
|
||||||
|
models.OrderBy(models.F("id"), descending=True, nulls_last=True),
|
||||||
|
name="zerver_message_realm_upper_subject",
|
||||||
|
),
|
||||||
|
),
|
||||||
|
AddIndexConcurrently(
|
||||||
|
model_name="message",
|
||||||
|
index=models.Index(
|
||||||
|
models.F("realm_id"),
|
||||||
|
models.F("recipient_id"),
|
||||||
|
django.db.models.functions.text.Upper("subject"),
|
||||||
|
models.OrderBy(models.F("id"), descending=True, nulls_last=True),
|
||||||
|
name="zerver_message_realm_recipient_upper_subject",
|
||||||
|
),
|
||||||
|
),
|
||||||
|
AddIndexConcurrently(
|
||||||
|
model_name="message",
|
||||||
|
index=models.Index(
|
||||||
|
models.F("realm_id"),
|
||||||
|
models.F("recipient_id"),
|
||||||
|
models.F("subject"),
|
||||||
|
models.OrderBy(models.F("id"), descending=True, nulls_last=True),
|
||||||
|
name="zerver_message_realm_recipient_subject",
|
||||||
|
),
|
||||||
|
),
|
||||||
|
AddIndexConcurrently(
|
||||||
|
model_name="message",
|
||||||
|
index=models.Index(
|
||||||
|
models.F("realm_id"),
|
||||||
|
models.OrderBy(models.F("id"), descending=True, nulls_last=True),
|
||||||
|
name="zerver_message_realm_id",
|
||||||
|
),
|
||||||
|
),
|
||||||
|
AddIndexConcurrently(
|
||||||
|
model_name="scheduledmessage",
|
||||||
|
index=models.Index(
|
||||||
|
condition=models.Q(("delivered", False)),
|
||||||
|
fields=["realm_id", "sender", "delivery_type", "scheduled_timestamp"],
|
||||||
|
name="zerver_realm_unsent_scheduled_messages_by_user",
|
||||||
|
),
|
||||||
|
),
|
||||||
|
migrations.RunSQL(
|
||||||
|
sql="CREATE STATISTICS IF NOT EXISTS zerver_message_realm_recipient ON realm_id, recipient_id FROM zerver_message",
|
||||||
|
reverse_sql="DROP STATISTICS IF EXISTS zerver_message_realm_recipient",
|
||||||
|
),
|
||||||
|
migrations.RunSQL(
|
||||||
|
sql="CREATE STATISTICS IF NOT EXISTS zerver_message_realm_sender ON realm_id, sender_id FROM zerver_message",
|
||||||
|
reverse_sql="DROP STATISTICS IF EXISTS zerver_message_realm_sender",
|
||||||
|
),
|
||||||
|
migrations.RunSQL("ANALYZE zerver_message"),
|
||||||
|
]
|
|
@ -3116,6 +3116,74 @@ class Message(AbstractMessage):
|
||||||
F("id").desc(nulls_last=True),
|
F("id").desc(nulls_last=True),
|
||||||
name="zerver_message_recipient_subject",
|
name="zerver_message_recipient_subject",
|
||||||
),
|
),
|
||||||
|
# Indexes prefixed with realm_id
|
||||||
|
models.Index(
|
||||||
|
# For moving messages between streams or marking
|
||||||
|
# streams as read. The "id" at the end makes it easy
|
||||||
|
# to scan the resulting messages in order, and perform
|
||||||
|
# batching.
|
||||||
|
"realm_id",
|
||||||
|
"recipient_id",
|
||||||
|
"id",
|
||||||
|
name="zerver_message_realm_recipient_id",
|
||||||
|
),
|
||||||
|
models.Index(
|
||||||
|
# For generating digest emails and message archiving,
|
||||||
|
# which both group by stream.
|
||||||
|
"realm_id",
|
||||||
|
"recipient_id",
|
||||||
|
"date_sent",
|
||||||
|
name="zerver_message_realm_recipient_date_sent",
|
||||||
|
),
|
||||||
|
models.Index(
|
||||||
|
# For exports, which want to limit both sender and
|
||||||
|
# receiver. The prefix of this index (realm_id,
|
||||||
|
# sender_id) can be used for scrubbing users and/or
|
||||||
|
# deleting users' messages.
|
||||||
|
"realm_id",
|
||||||
|
"sender_id",
|
||||||
|
"recipient_id",
|
||||||
|
name="zerver_message_realm_sender_recipient",
|
||||||
|
),
|
||||||
|
models.Index(
|
||||||
|
# For analytics queries
|
||||||
|
"realm_id",
|
||||||
|
"date_sent",
|
||||||
|
name="zerver_message_realm_date_sent",
|
||||||
|
),
|
||||||
|
models.Index(
|
||||||
|
# For users searching by topic (but not stream), which
|
||||||
|
# is done case-insensitively
|
||||||
|
"realm_id",
|
||||||
|
Upper("subject"),
|
||||||
|
F("id").desc(nulls_last=True),
|
||||||
|
name="zerver_message_realm_upper_subject",
|
||||||
|
),
|
||||||
|
models.Index(
|
||||||
|
# Most stream/topic searches are case-insensitive by
|
||||||
|
# topic name (e.g. messages_for_topic). The "id" at
|
||||||
|
# the end makes it easy to scan the resulting messages
|
||||||
|
# in order, and perform batching.
|
||||||
|
"realm_id",
|
||||||
|
"recipient_id",
|
||||||
|
Upper("subject"),
|
||||||
|
F("id").desc(nulls_last=True),
|
||||||
|
name="zerver_message_realm_recipient_upper_subject",
|
||||||
|
),
|
||||||
|
models.Index(
|
||||||
|
# Only used by already_sent_mirrored_message_id
|
||||||
|
"realm_id",
|
||||||
|
"recipient_id",
|
||||||
|
"subject",
|
||||||
|
F("id").desc(nulls_last=True),
|
||||||
|
name="zerver_message_realm_recipient_subject",
|
||||||
|
),
|
||||||
|
models.Index(
|
||||||
|
# Only used by update_first_visible_message_id
|
||||||
|
"realm_id",
|
||||||
|
F("id").desc(nulls_last=True),
|
||||||
|
name="zerver_message_realm_id",
|
||||||
|
),
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
|
@ -4408,6 +4476,13 @@ class ScheduledMessage(models.Model):
|
||||||
delivered=False,
|
delivered=False,
|
||||||
),
|
),
|
||||||
),
|
),
|
||||||
|
models.Index(
|
||||||
|
name="zerver_realm_unsent_scheduled_messages_by_user",
|
||||||
|
fields=["realm_id", "sender", "delivery_type", "scheduled_timestamp"],
|
||||||
|
condition=Q(
|
||||||
|
delivered=False,
|
||||||
|
),
|
||||||
|
),
|
||||||
]
|
]
|
||||||
|
|
||||||
def __str__(self) -> str:
|
def __str__(self) -> str:
|
||||||
|
|
|
@ -5,6 +5,7 @@
|
||||||
# https://www.caktusgroup.com/blog/2016/02/02/writing-unit-tests-django-migrations/
|
# https://www.caktusgroup.com/blog/2016/02/02/writing-unit-tests-django-migrations/
|
||||||
# to get a tutorial on the framework that inspired this feature.
|
# to get a tutorial on the framework that inspired this feature.
|
||||||
from datetime import datetime, timezone
|
from datetime import datetime, timezone
|
||||||
|
from unittest import skip
|
||||||
|
|
||||||
import orjson
|
import orjson
|
||||||
from django.db.migrations.state import StateApps
|
from django.db.migrations.state import StateApps
|
||||||
|
@ -28,6 +29,7 @@ from zerver.lib.test_helpers import use_db_models
|
||||||
# been tested for a migration being merged.
|
# been tested for a migration being merged.
|
||||||
|
|
||||||
|
|
||||||
|
@skip("Cannot be run because there is a non-atomic migration that has been merged after it")
|
||||||
class ScheduledEmailData(MigrationsTestCase):
|
class ScheduledEmailData(MigrationsTestCase):
|
||||||
migrate_from = "0467_rename_extradata_realmauditlog_extra_data_json"
|
migrate_from = "0467_rename_extradata_realmauditlog_extra_data_json"
|
||||||
migrate_to = "0468_rename_followup_day_email_templates"
|
migrate_to = "0468_rename_followup_day_email_templates"
|
||||||
|
|
Loading…
Reference in New Issue