zulip/zerver/migrations/0444_userpresence_fill_data.py

from django.db import connection, migrations
from django.db.backends.base.schema import BaseDatabaseSchemaEditor
from django.db.migrations.state import StateApps


def fill_new_columns(apps: StateApps, schema_editor: BaseDatabaseSchemaEditor) -> None:
    UserPresence = apps.get_model("zerver", "UserPresence")

    # In theory, we'd like to preserve the distinction between the
    # IDLE and ACTIVE statuses in legacy data.  However, there is no
    # correct way to do so; the previous data structure only stored
    # the current IDLE/ACTIVE status of the last update for each
    # (user, client) pair. There's no way to know whether the last
    # time the user had the other status with that client was minutes
    # or months beforehand.
    #
    # So the only sane thing we can do with this migration is to treat
    # the last presence update as having been a PRESENCE_ACTIVE_STATUS
    # event. This will result in some currently-idle users being
    # incorrectly recorded as having been active at the last moment
    # that they were idle before this migration.  This error is
    # unlikely to be significant in practice, and in any case is an
    # unavoidable flaw caused by the legacy previous data model.
    with connection.cursor() as cursor:
        cursor.execute(
            "SELECT realm_id, user_profile_id, MAX(timestamp) FROM zerver_userpresenceold WHERE status IN (1, 2) GROUP BY realm_id, user_profile_id"
        )
        latest_presence_per_user = cursor.fetchall()

    UserPresence.objects.bulk_create(
        [
            UserPresence(
                user_profile_id=presence_row[1],
                realm_id=presence_row[0],
                last_connected_time=presence_row[2],
                last_active_time=presence_row[2],
            )
            for presence_row in latest_presence_per_user
        ],
        # Limit the size of individual network requests for very large
        # servers.
        batch_size=10000,
        # If the UserPresence worker has already started, or a user
        # has changed their invisible status while migrations are
        # running, then some UserPresence rows may exist. Those will
        # generally be newer than what we have here, so ignoring
        # conflicts so we can complete backfilling users who don't
        # have more current data is the right resolution.
        ignore_conflicts=True,
    )


def clear_new_columns(apps: StateApps, schema_editor: BaseDatabaseSchemaEditor) -> None:
    UserPresence = apps.get_model("zerver", "UserPresence")
    UserPresence.objects.all().delete()


class Migration(migrations.Migration):
    """
    Ports data from the UserPresence model into the new one.
    """

    atomic = False

    dependencies = [
        ("zerver", "0443_userpresence_new_table_schema"),
    ]

    operations = [migrations.RunPython(fill_new_columns, reverse_code=clear_new_columns)]
presence: Rewrite the backend data model. This implements the core of the rewrite described in: For the backend data model for UserPresence to one that supports much more efficient queries and is more correct around handling of multiple clients. The main loss of functionality is that we no longer track which Client sent presence data (so we will no longer be able to say using UserPresence "the user was last online on their desktop 15 minutes ago, but was online with their phone 3 minutes ago"). If we consider that information important for the occasional investigation query, we have can construct that answer data via UserActivity already. It's not worth making Presence much more expensive/complex to support it. For slim_presence clients, this sends the same data format we sent before, albeit with less complexity involved in constructing it. Note that we at present will always send both last_active_time and last_connected_time; we may revisit that in the future. This commit doesn't include the finalizing migration, which drops the UserPresenceOld table. The way to deploy is to start the backfill migration with the server down and then start the server without the user_presence queue worker, to let the migration finish without having new data interfering with it. Once the migration is done, the queue worker can be started, leading to the presence data catching up to the current state as the queue worker goes over the queued up events and updating the UserPresence table. Co-authored-by: Mateusz Mandera <mateusz.mandera@zulip.com> 2020-06-11 16:03:47 +02:00			`from django.db import connection, migrations`
			`from django.db.backends.base.schema import BaseDatabaseSchemaEditor`
			`from django.db.migrations.state import StateApps`


			`def fill_new_columns(apps: StateApps, schema_editor: BaseDatabaseSchemaEditor) -> None:`
			`UserPresence = apps.get_model("zerver", "UserPresence")`

			`# In theory, we'd like to preserve the distinction between the`
			`# IDLE and ACTIVE statuses in legacy data. However, there is no`
			`# correct way to do so; the previous data structure only stored`
			`# the current IDLE/ACTIVE status of the last update for each`
			`# (user, client) pair. There's no way to know whether the last`
			`# time the user had the other status with that client was minutes`
			`# or months beforehand.`
			`#`
			`# So the only sane thing we can do with this migration is to treat`
			`# the last presence update as having been a PRESENCE_ACTIVE_STATUS`
			`# event. This will result in some currently-idle users being`
			`# incorrectly recorded as having been active at the last moment`
			`# that they were idle before this migration. This error is`
			`# unlikely to be significant in practice, and in any case is an`
			`# unavoidable flaw caused by the legacy previous data model.`
			`with connection.cursor() as cursor:`
			`cursor.execute(`
			`"SELECT realm_id, user_profile_id, MAX(timestamp) FROM zerver_userpresenceold WHERE status IN (1, 2) GROUP BY realm_id, user_profile_id"`
			`)`
			`latest_presence_per_user = cursor.fetchall()`

			`UserPresence.objects.bulk_create(`
			`[`
			`UserPresence(`
			`user_profile_id=presence_row[1],`
			`realm_id=presence_row[0],`
			`last_connected_time=presence_row[2],`
			`last_active_time=presence_row[2],`
			`)`
			`for presence_row in latest_presence_per_user`
			`],`
			`# Limit the size of individual network requests for very large`
			`# servers.`
			`batch_size=10000,`
			`# If the UserPresence worker has already started, or a user`
			`# has changed their invisible status while migrations are`
			`# running, then some UserPresence rows may exist. Those will`
			`# generally be newer than what we have here, so ignoring`
			`# conflicts so we can complete backfilling users who don't`
			`# have more current data is the right resolution.`
			`ignore_conflicts=True,`
			`)`


			`def clear_new_columns(apps: StateApps, schema_editor: BaseDatabaseSchemaEditor) -> None:`
			`UserPresence = apps.get_model("zerver", "UserPresence")`
			`UserPresence.objects.all().delete()`


			`class Migration(migrations.Migration):`
			`"""`
			`Ports data from the UserPresence model into the new one.`
			`"""`

			`atomic = False`

			`dependencies = [`
			`("zerver", "0443_userpresence_new_table_schema"),`
			`]`

			`operations = [migrations.RunPython(fill_new_columns, reverse_code=clear_new_columns)]`