models: Migration of UserMessage.id to bigint, part 1.

As part of adding support for more than 2B UserMessage rows in a Zulip
server, we need to change UserMessage.id (a field we don't access but
is needed by Django) from an int to a bigint.  This commit is a series
of migrations which create a `bigint_id` column and populates it correctly.

This migration will take a long time to run; on chat.zulip.org (a
server with a lot of history), it took about 4 hours to complete.

How to migrate with minimal downtime:

1. Run `upgrade-zulip-from-git` through this commit.  It will install
migration 0238 and then more or less hang while applying migration
0239.  Once migration 0238 is completed, however, your server should
be able to be started back up safely while migration 0239 is running.

2. Run `/home/zulip/deployments/next/scripts/restart-server` in a
separate terminal to get Zulip running again.

3. When the `upgrade-zulip-from-git` command finishes, it will
automatically re-restart the Zulip server, leaving you in a consistent
state and ready to do part 2 of the migration.

A useful `manage.py shell` query for checking the state after this
commit is consistent is this:

    assert UserMessage.objects.exclude(bigint_id=F("id")).count() == 0

Part of #13040.
This commit is contained in:
Mateusz Mandera 2019-08-24 01:34:36 +02:00 committed by Tim Abbott
parent 89aeefed76
commit b008515d63
3 changed files with 103 additions and 0 deletions

View File

@ -0,0 +1,25 @@
# -*- coding: utf-8 -*-
# Generated by Django 1.11.23 on 2019-08-22 22:02
from __future__ import unicode_literals
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('zerver', '0237_rename_zulip_realm_to_zulipinternal'),
]
operations = [
migrations.AddField(
model_name='archivedusermessage',
name='bigint_id',
field=models.BigIntegerField(null=True),
),
migrations.AddField(
model_name='usermessage',
name='bigint_id',
field=models.BigIntegerField(null=True),
),
]

View File

@ -0,0 +1,76 @@
# -*- coding: utf-8 -*-
# Generated by Django 1.11.23 on 2019-08-21 21:43
from __future__ import unicode_literals
import time
from django.db import connection, migrations
from django.db.backends.postgresql_psycopg2.schema import DatabaseSchemaEditor
from django.db.migrations.state import StateApps
from django.db.models import Min
BATCH_SIZE = 10000
def sql_copy_id_to_bigint_id(id_range_lower_bound: int, id_range_upper_bound: int) -> None:
query = """
UPDATE zerver_usermessage
SET bigint_id = id
WHERE id BETWEEN {lower_bound} AND {upper_bound}
"""
query = query.format(lower_bound=id_range_lower_bound, upper_bound=id_range_upper_bound)
with connection.cursor() as cursor:
cursor.execute(query)
def copy_id_to_bigid(apps: StateApps, schema_editor: DatabaseSchemaEditor) -> None:
UserMessage = apps.get_model('zerver', 'UserMessage')
if not UserMessage.objects.exists():
# Nothing to do
return
# TODO: is the below lookup fast enough, considering there's no index on bigint_id?
first_uncopied_id = UserMessage.objects.filter(bigint_id__isnull=True
).aggregate(Min('id'))['id__min']
# Note: the below id can fall in a segment
# where bigint_id = id already, but it's not a big problem
# this will just do some redundant UPDATEs.
last_id = UserMessage.objects.latest("id").id
id_range_lower_bound = first_uncopied_id
id_range_upper_bound = first_uncopied_id + BATCH_SIZE
while id_range_upper_bound <= last_id:
sql_copy_id_to_bigint_id(id_range_lower_bound, id_range_upper_bound)
id_range_lower_bound = id_range_upper_bound + 1
id_range_upper_bound = id_range_lower_bound + BATCH_SIZE
time.sleep(0.1)
if last_id > id_range_lower_bound:
# Copy for the last batch.
sql_copy_id_to_bigint_id(id_range_lower_bound, last_id)
class Migration(migrations.Migration):
atomic = False
dependencies = [
('zerver', '0238_usermessage_bigint_id'),
]
operations = [
migrations.RunSQL("""
CREATE FUNCTION zerver_usermessage_bigint_id_to_id_trigger_function()
RETURNS trigger AS $$
BEGIN
NEW.bigint_id = NEW.id;
RETURN NEW;
END
$$ LANGUAGE 'plpgsql';
CREATE TRIGGER zerver_usermessage_bigint_id_to_id_trigger
BEFORE INSERT ON zerver_usermessage
FOR EACH ROW
EXECUTE PROCEDURE zerver_usermessage_bigint_id_to_id_trigger_function();
"""),
migrations.RunPython(copy_id_to_bigid),
migrations.RunSQL("""
CREATE UNIQUE INDEX CONCURRENTLY zerver_usermessage_bigint_id_idx ON zerver_usermessage (bigint_id);
""")
]

View File

@ -1780,6 +1780,8 @@ class ArchivedReaction(AbstractReaction):
# UserMessage is the largest table in a Zulip installation, even
# though each row is only 4 integers.
class AbstractUserMessage(models.Model):
bigint_id = models.BigIntegerField(null=True) # type: Optional[int]
user_profile = models.ForeignKey(UserProfile, on_delete=CASCADE) # type: UserProfile
# The order here is important! It's the order of fields in the bitfield.
ALL_FLAGS = [