mirror of https://github.com/zulip/zulip.git
models: Migration of UserMessage.id to bigint, part 1.
As part of adding support for more than 2B UserMessage rows in a Zulip server, we need to change UserMessage.id (a field we don't access but is needed by Django) from an int to a bigint. This commit is a series of migrations which create a `bigint_id` column and populates it correctly. This migration will take a long time to run; on chat.zulip.org (a server with a lot of history), it took about 4 hours to complete. How to migrate with minimal downtime: 1. Run `upgrade-zulip-from-git` through this commit. It will install migration 0238 and then more or less hang while applying migration 0239. Once migration 0238 is completed, however, your server should be able to be started back up safely while migration 0239 is running. 2. Run `/home/zulip/deployments/next/scripts/restart-server` in a separate terminal to get Zulip running again. 3. When the `upgrade-zulip-from-git` command finishes, it will automatically re-restart the Zulip server, leaving you in a consistent state and ready to do part 2 of the migration. A useful `manage.py shell` query for checking the state after this commit is consistent is this: assert UserMessage.objects.exclude(bigint_id=F("id")).count() == 0 Part of #13040.
This commit is contained in:
parent
89aeefed76
commit
b008515d63
|
@ -0,0 +1,25 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
# Generated by Django 1.11.23 on 2019-08-22 22:02
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from django.db import migrations, models
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('zerver', '0237_rename_zulip_realm_to_zulipinternal'),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.AddField(
|
||||
model_name='archivedusermessage',
|
||||
name='bigint_id',
|
||||
field=models.BigIntegerField(null=True),
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name='usermessage',
|
||||
name='bigint_id',
|
||||
field=models.BigIntegerField(null=True),
|
||||
),
|
||||
]
|
|
@ -0,0 +1,76 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
# Generated by Django 1.11.23 on 2019-08-21 21:43
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import time
|
||||
|
||||
from django.db import connection, migrations
|
||||
from django.db.backends.postgresql_psycopg2.schema import DatabaseSchemaEditor
|
||||
from django.db.migrations.state import StateApps
|
||||
from django.db.models import Min
|
||||
|
||||
BATCH_SIZE = 10000
|
||||
|
||||
def sql_copy_id_to_bigint_id(id_range_lower_bound: int, id_range_upper_bound: int) -> None:
|
||||
query = """
|
||||
UPDATE zerver_usermessage
|
||||
SET bigint_id = id
|
||||
WHERE id BETWEEN {lower_bound} AND {upper_bound}
|
||||
"""
|
||||
query = query.format(lower_bound=id_range_lower_bound, upper_bound=id_range_upper_bound)
|
||||
with connection.cursor() as cursor:
|
||||
cursor.execute(query)
|
||||
|
||||
def copy_id_to_bigid(apps: StateApps, schema_editor: DatabaseSchemaEditor) -> None:
|
||||
UserMessage = apps.get_model('zerver', 'UserMessage')
|
||||
if not UserMessage.objects.exists():
|
||||
# Nothing to do
|
||||
return
|
||||
|
||||
# TODO: is the below lookup fast enough, considering there's no index on bigint_id?
|
||||
first_uncopied_id = UserMessage.objects.filter(bigint_id__isnull=True
|
||||
).aggregate(Min('id'))['id__min']
|
||||
# Note: the below id can fall in a segment
|
||||
# where bigint_id = id already, but it's not a big problem
|
||||
# this will just do some redundant UPDATEs.
|
||||
last_id = UserMessage.objects.latest("id").id
|
||||
|
||||
id_range_lower_bound = first_uncopied_id
|
||||
id_range_upper_bound = first_uncopied_id + BATCH_SIZE
|
||||
while id_range_upper_bound <= last_id:
|
||||
sql_copy_id_to_bigint_id(id_range_lower_bound, id_range_upper_bound)
|
||||
id_range_lower_bound = id_range_upper_bound + 1
|
||||
id_range_upper_bound = id_range_lower_bound + BATCH_SIZE
|
||||
time.sleep(0.1)
|
||||
|
||||
if last_id > id_range_lower_bound:
|
||||
# Copy for the last batch.
|
||||
sql_copy_id_to_bigint_id(id_range_lower_bound, last_id)
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
atomic = False
|
||||
dependencies = [
|
||||
('zerver', '0238_usermessage_bigint_id'),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.RunSQL("""
|
||||
CREATE FUNCTION zerver_usermessage_bigint_id_to_id_trigger_function()
|
||||
RETURNS trigger AS $$
|
||||
BEGIN
|
||||
NEW.bigint_id = NEW.id;
|
||||
RETURN NEW;
|
||||
END
|
||||
$$ LANGUAGE 'plpgsql';
|
||||
|
||||
CREATE TRIGGER zerver_usermessage_bigint_id_to_id_trigger
|
||||
BEFORE INSERT ON zerver_usermessage
|
||||
FOR EACH ROW
|
||||
EXECUTE PROCEDURE zerver_usermessage_bigint_id_to_id_trigger_function();
|
||||
"""),
|
||||
migrations.RunPython(copy_id_to_bigid),
|
||||
migrations.RunSQL("""
|
||||
CREATE UNIQUE INDEX CONCURRENTLY zerver_usermessage_bigint_id_idx ON zerver_usermessage (bigint_id);
|
||||
""")
|
||||
]
|
|
@ -1780,6 +1780,8 @@ class ArchivedReaction(AbstractReaction):
|
|||
# UserMessage is the largest table in a Zulip installation, even
|
||||
# though each row is only 4 integers.
|
||||
class AbstractUserMessage(models.Model):
|
||||
bigint_id = models.BigIntegerField(null=True) # type: Optional[int]
|
||||
|
||||
user_profile = models.ForeignKey(UserProfile, on_delete=CASCADE) # type: UserProfile
|
||||
# The order here is important! It's the order of fields in the bitfield.
|
||||
ALL_FLAGS = [
|
||||
|
|
Loading…
Reference in New Issue