From b008515d63841e1c0a16ad868d3d67be3bfc20ca Mon Sep 17 00:00:00 2001 From: Mateusz Mandera Date: Sat, 24 Aug 2019 01:34:36 +0200 Subject: [PATCH] models: Migration of UserMessage.id to bigint, part 1. As part of adding support for more than 2B UserMessage rows in a Zulip server, we need to change UserMessage.id (a field we don't access but is needed by Django) from an int to a bigint. This commit is a series of migrations which create a `bigint_id` column and populates it correctly. This migration will take a long time to run; on chat.zulip.org (a server with a lot of history), it took about 4 hours to complete. How to migrate with minimal downtime: 1. Run `upgrade-zulip-from-git` through this commit. It will install migration 0238 and then more or less hang while applying migration 0239. Once migration 0238 is completed, however, your server should be able to be started back up safely while migration 0239 is running. 2. Run `/home/zulip/deployments/next/scripts/restart-server` in a separate terminal to get Zulip running again. 3. When the `upgrade-zulip-from-git` command finishes, it will automatically re-restart the Zulip server, leaving you in a consistent state and ready to do part 2 of the migration. A useful `manage.py shell` query for checking the state after this commit is consistent is this: assert UserMessage.objects.exclude(bigint_id=F("id")).count() == 0 Part of #13040. --- .../migrations/0238_usermessage_bigint_id.py | 25 ++++++ .../0239_usermessage_copy_id_to_bigint_id.py | 76 +++++++++++++++++++ zerver/models.py | 2 + 3 files changed, 103 insertions(+) create mode 100644 zerver/migrations/0238_usermessage_bigint_id.py create mode 100644 zerver/migrations/0239_usermessage_copy_id_to_bigint_id.py diff --git a/zerver/migrations/0238_usermessage_bigint_id.py b/zerver/migrations/0238_usermessage_bigint_id.py new file mode 100644 index 0000000000..e078c6d62f --- /dev/null +++ b/zerver/migrations/0238_usermessage_bigint_id.py @@ -0,0 +1,25 @@ +# -*- coding: utf-8 -*- +# Generated by Django 1.11.23 on 2019-08-22 22:02 +from __future__ import unicode_literals + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('zerver', '0237_rename_zulip_realm_to_zulipinternal'), + ] + + operations = [ + migrations.AddField( + model_name='archivedusermessage', + name='bigint_id', + field=models.BigIntegerField(null=True), + ), + migrations.AddField( + model_name='usermessage', + name='bigint_id', + field=models.BigIntegerField(null=True), + ), + ] diff --git a/zerver/migrations/0239_usermessage_copy_id_to_bigint_id.py b/zerver/migrations/0239_usermessage_copy_id_to_bigint_id.py new file mode 100644 index 0000000000..98b3826165 --- /dev/null +++ b/zerver/migrations/0239_usermessage_copy_id_to_bigint_id.py @@ -0,0 +1,76 @@ +# -*- coding: utf-8 -*- +# Generated by Django 1.11.23 on 2019-08-21 21:43 +from __future__ import unicode_literals + +import time + +from django.db import connection, migrations +from django.db.backends.postgresql_psycopg2.schema import DatabaseSchemaEditor +from django.db.migrations.state import StateApps +from django.db.models import Min + +BATCH_SIZE = 10000 + +def sql_copy_id_to_bigint_id(id_range_lower_bound: int, id_range_upper_bound: int) -> None: + query = """ + UPDATE zerver_usermessage + SET bigint_id = id + WHERE id BETWEEN {lower_bound} AND {upper_bound} + """ + query = query.format(lower_bound=id_range_lower_bound, upper_bound=id_range_upper_bound) + with connection.cursor() as cursor: + cursor.execute(query) + +def copy_id_to_bigid(apps: StateApps, schema_editor: DatabaseSchemaEditor) -> None: + UserMessage = apps.get_model('zerver', 'UserMessage') + if not UserMessage.objects.exists(): + # Nothing to do + return + + # TODO: is the below lookup fast enough, considering there's no index on bigint_id? + first_uncopied_id = UserMessage.objects.filter(bigint_id__isnull=True + ).aggregate(Min('id'))['id__min'] + # Note: the below id can fall in a segment + # where bigint_id = id already, but it's not a big problem + # this will just do some redundant UPDATEs. + last_id = UserMessage.objects.latest("id").id + + id_range_lower_bound = first_uncopied_id + id_range_upper_bound = first_uncopied_id + BATCH_SIZE + while id_range_upper_bound <= last_id: + sql_copy_id_to_bigint_id(id_range_lower_bound, id_range_upper_bound) + id_range_lower_bound = id_range_upper_bound + 1 + id_range_upper_bound = id_range_lower_bound + BATCH_SIZE + time.sleep(0.1) + + if last_id > id_range_lower_bound: + # Copy for the last batch. + sql_copy_id_to_bigint_id(id_range_lower_bound, last_id) + + +class Migration(migrations.Migration): + atomic = False + dependencies = [ + ('zerver', '0238_usermessage_bigint_id'), + ] + + operations = [ + migrations.RunSQL(""" + CREATE FUNCTION zerver_usermessage_bigint_id_to_id_trigger_function() + RETURNS trigger AS $$ + BEGIN + NEW.bigint_id = NEW.id; + RETURN NEW; + END + $$ LANGUAGE 'plpgsql'; + + CREATE TRIGGER zerver_usermessage_bigint_id_to_id_trigger + BEFORE INSERT ON zerver_usermessage + FOR EACH ROW + EXECUTE PROCEDURE zerver_usermessage_bigint_id_to_id_trigger_function(); + """), + migrations.RunPython(copy_id_to_bigid), + migrations.RunSQL(""" + CREATE UNIQUE INDEX CONCURRENTLY zerver_usermessage_bigint_id_idx ON zerver_usermessage (bigint_id); + """) + ] diff --git a/zerver/models.py b/zerver/models.py index 0433b42e9f..63000fd944 100644 --- a/zerver/models.py +++ b/zerver/models.py @@ -1780,6 +1780,8 @@ class ArchivedReaction(AbstractReaction): # UserMessage is the largest table in a Zulip installation, even # though each row is only 4 integers. class AbstractUserMessage(models.Model): + bigint_id = models.BigIntegerField(null=True) # type: Optional[int] + user_profile = models.ForeignKey(UserProfile, on_delete=CASCADE) # type: UserProfile # The order here is important! It's the order of fields in the bitfield. ALL_FLAGS = [