migrations: Fix migration 0576 local backfill path and repeat.

Unfortunately, because this migration has already been run for many installations, we need to ship another copy of the migration. It should be a noop when repeated. (cherry picked from commit 66537c8bf8)
2024-10-30 09:58:57 -07:00 · 2024-10-30 09:58:57 -07:00 · bad373bbfa
parent 71ff784e14
commit bad373bbfa
2 changed files with 133 additions and 1 deletions
--- a/zerver/migrations/0576_backfill_imageattachment.py
+++ b/zerver/migrations/0576_backfill_imageattachment.py
@ -77,7 +77,8 @@ def backfill_imageattachment(apps: StateApps, schema_editor: BaseDatabaseSchemaE
                source: pyvips.Source = pyvips.SourceCustom()
                source.on_read(partial(s3_read, metadata["Body"]))
            else:
-                attachment_path = os.path.join(settings.LOCAL_UPLOADS_DIR, attachment.path_id)
+                assert settings.LOCAL_FILES_DIR is not None
                attachment_path = os.path.join(settings.LOCAL_FILES_DIR, attachment.path_id)
                if not os.path.exists(attachment_path):
                    print(f"{attachment.path_id}: Missing!")
                    continue
--- a/zerver/migrations/0622_backfill_imageattachment_again.py
+++ b/zerver/migrations/0622_backfill_imageattachment_again.py
@ -0,0 +1,131 @@
 # Duplicate of database migration 0576, because the original used the
 # wrong path for servers using the local file upload backend, and
 # many servers had already upgraded to 9.2 where it was backported.
 import os
 from functools import reduce
 from operator import or_
 import boto3
 import pyvips
 from botocore.client import Config
 from botocore.exceptions import ClientError
 from botocore.response import StreamingBody
 from django.conf import settings
 from django.db import migrations
 from django.db.backends.base.schema import BaseDatabaseSchemaEditor
 from django.db.migrations.state import StateApps
 from django.db.models import Exists, OuterRef, Q
 from zerver.lib.partial import partial
 def backfill_imageattachment(apps: StateApps, schema_editor: BaseDatabaseSchemaEditor) -> None:
    ImageAttachment = apps.get_model("zerver", "ImageAttachment")
    Attachment = apps.get_model("zerver", "Attachment")
    if settings.LOCAL_UPLOADS_DIR is None:
        upload_bucket = boto3.resource(
            "s3",
            aws_access_key_id=settings.S3_KEY,
            aws_secret_access_key=settings.S3_SECRET_KEY,
            region_name=settings.S3_REGION,
            endpoint_url=settings.S3_ENDPOINT_URL,
            config=Config(
                signature_version=None,
                s3={"addressing_style": settings.S3_ADDRESSING_STYLE},
            ),
        ).Bucket(settings.S3_AUTH_UPLOADS_BUCKET)
    # Historical attachments do not have a mime_type value, so we used
    # to rely on the file extension.  We replicate that when
    # backfilling.  This is the value from zerver.lib.markdown:
    IMAGE_EXTENSIONS = [".bmp", ".gif", ".jpe", ".jpeg", ".jpg", ".png", ".webp"]
    extension_limits = Q()
    extension_limits = reduce(
        or_,
        [Q(file_name__endswith=extension) for extension in IMAGE_EXTENSIONS],
        extension_limits,
    )
    min_id: int | None = 0
    while True:
        attachments = (
            Attachment.objects.alias(
                has_imageattachment=Exists(
                    ImageAttachment.objects.filter(path_id=OuterRef("path_id"))
                )
            )
            .filter(extension_limits, has_imageattachment=False, id__gt=min_id)
            .order_by("id")
        )[:1000]
        min_id = None
        for attachment in attachments:
            min_id = attachment.id
            if settings.LOCAL_UPLOADS_DIR is None:
                try:
                    metadata = upload_bucket.Object(attachment.path_id).get()
                except ClientError:
                    print(f"{attachment.path_id}: Missing!")
                    continue
                def s3_read(streamingbody: StreamingBody, size: int) -> bytes:
                    return streamingbody.read(amt=size)
                # We use the streaming body to only pull down as much
                # of the image as we need to examine the headers --
                # generally about 40k
                source: pyvips.Source = pyvips.SourceCustom()
                source.on_read(partial(s3_read, metadata["Body"]))
            else:
                assert settings.LOCAL_FILES_DIR is not None
                attachment_path = os.path.join(settings.LOCAL_FILES_DIR, attachment.path_id)
                if not os.path.exists(attachment_path):
                    print(f"{attachment.path_id}: Missing!")
                    continue
                source = pyvips.Source.new_from_file(attachment_path)
            try:
                image = pyvips.Image.new_from_source(source, "", access="sequential")
                # "original_width_px" and "original_height_px" here are
                # _as rendered_, after applying the orientation
                # information which the image may contain.
                if (
                    "orientation" in image.get_fields()
                    and image.get("orientation") >= 5
                    and image.get("orientation") <= 8
                ):
                    (width, height) = (image.height, image.width)
                else:
                    (width, height) = (image.width, image.height)
                ImageAttachment.objects.create(
                    realm_id=attachment.realm_id,
                    path_id=attachment.path_id,
                    original_width_px=width,
                    original_height_px=height,
                    frames=image.get_n_pages(),
                    thumbnail_metadata=[],
                )
            except pyvips.Error:
                pass
        if min_id is None:
            break
 class Migration(migrations.Migration):
    atomic = False
    dependencies = [
        # Because this will be backported to 9.x, we only depend on the last migration in 9.x
        ("zerver", "0576_backfill_imageattachment"),
    ]
    operations = [
        migrations.RunPython(
            backfill_imageattachment, reverse_code=migrations.RunPython.noop, elidable=True
        )
    ]