diff --git a/zerver/migrations/0576_backfill_imageattachment.py b/zerver/migrations/0576_backfill_imageattachment.py index 80d2d8116f..9bcfaac358 100644 --- a/zerver/migrations/0576_backfill_imageattachment.py +++ b/zerver/migrations/0576_backfill_imageattachment.py @@ -77,7 +77,8 @@ def backfill_imageattachment(apps: StateApps, schema_editor: BaseDatabaseSchemaE source: pyvips.Source = pyvips.SourceCustom() source.on_read(partial(s3_read, metadata["Body"])) else: - attachment_path = os.path.join(settings.LOCAL_UPLOADS_DIR, attachment.path_id) + assert settings.LOCAL_FILES_DIR is not None + attachment_path = os.path.join(settings.LOCAL_FILES_DIR, attachment.path_id) if not os.path.exists(attachment_path): print(f"{attachment.path_id}: Missing!") continue diff --git a/zerver/migrations/0622_backfill_imageattachment_again.py b/zerver/migrations/0622_backfill_imageattachment_again.py new file mode 100644 index 0000000000..fcc6f3dbce --- /dev/null +++ b/zerver/migrations/0622_backfill_imageattachment_again.py @@ -0,0 +1,131 @@ +# Duplicate of database migration 0576, because the original used the +# wrong path for servers using the local file upload backend, and +# many servers had already upgraded to 9.2 where it was backported. + +import os +from functools import reduce +from operator import or_ + +import boto3 +import pyvips +from botocore.client import Config +from botocore.exceptions import ClientError +from botocore.response import StreamingBody +from django.conf import settings +from django.db import migrations +from django.db.backends.base.schema import BaseDatabaseSchemaEditor +from django.db.migrations.state import StateApps +from django.db.models import Exists, OuterRef, Q + +from zerver.lib.partial import partial + + +def backfill_imageattachment(apps: StateApps, schema_editor: BaseDatabaseSchemaEditor) -> None: + ImageAttachment = apps.get_model("zerver", "ImageAttachment") + Attachment = apps.get_model("zerver", "Attachment") + + if settings.LOCAL_UPLOADS_DIR is None: + upload_bucket = boto3.resource( + "s3", + aws_access_key_id=settings.S3_KEY, + aws_secret_access_key=settings.S3_SECRET_KEY, + region_name=settings.S3_REGION, + endpoint_url=settings.S3_ENDPOINT_URL, + config=Config( + signature_version=None, + s3={"addressing_style": settings.S3_ADDRESSING_STYLE}, + ), + ).Bucket(settings.S3_AUTH_UPLOADS_BUCKET) + + # Historical attachments do not have a mime_type value, so we used + # to rely on the file extension. We replicate that when + # backfilling. This is the value from zerver.lib.markdown: + IMAGE_EXTENSIONS = [".bmp", ".gif", ".jpe", ".jpeg", ".jpg", ".png", ".webp"] + + extension_limits = Q() + extension_limits = reduce( + or_, + [Q(file_name__endswith=extension) for extension in IMAGE_EXTENSIONS], + extension_limits, + ) + + min_id: int | None = 0 + while True: + attachments = ( + Attachment.objects.alias( + has_imageattachment=Exists( + ImageAttachment.objects.filter(path_id=OuterRef("path_id")) + ) + ) + .filter(extension_limits, has_imageattachment=False, id__gt=min_id) + .order_by("id") + )[:1000] + + min_id = None + for attachment in attachments: + min_id = attachment.id + + if settings.LOCAL_UPLOADS_DIR is None: + try: + metadata = upload_bucket.Object(attachment.path_id).get() + except ClientError: + print(f"{attachment.path_id}: Missing!") + continue + + def s3_read(streamingbody: StreamingBody, size: int) -> bytes: + return streamingbody.read(amt=size) + + # We use the streaming body to only pull down as much + # of the image as we need to examine the headers -- + # generally about 40k + source: pyvips.Source = pyvips.SourceCustom() + source.on_read(partial(s3_read, metadata["Body"])) + else: + assert settings.LOCAL_FILES_DIR is not None + attachment_path = os.path.join(settings.LOCAL_FILES_DIR, attachment.path_id) + if not os.path.exists(attachment_path): + print(f"{attachment.path_id}: Missing!") + continue + source = pyvips.Source.new_from_file(attachment_path) + try: + image = pyvips.Image.new_from_source(source, "", access="sequential") + + # "original_width_px" and "original_height_px" here are + # _as rendered_, after applying the orientation + # information which the image may contain. + if ( + "orientation" in image.get_fields() + and image.get("orientation") >= 5 + and image.get("orientation") <= 8 + ): + (width, height) = (image.height, image.width) + else: + (width, height) = (image.width, image.height) + + ImageAttachment.objects.create( + realm_id=attachment.realm_id, + path_id=attachment.path_id, + original_width_px=width, + original_height_px=height, + frames=image.get_n_pages(), + thumbnail_metadata=[], + ) + except pyvips.Error: + pass + + if min_id is None: + break + + +class Migration(migrations.Migration): + atomic = False + dependencies = [ + # Because this will be backported to 9.x, we only depend on the last migration in 9.x + ("zerver", "0576_backfill_imageattachment"), + ] + + operations = [ + migrations.RunPython( + backfill_imageattachment, reverse_code=migrations.RunPython.noop, elidable=True + ) + ] diff --git a/zerver/migrations/0623_merge_20241030_1835.py b/zerver/migrations/0623_merge_20241030_1835.py new file mode 100644 index 0000000000..1a4bfe0b9f --- /dev/null +++ b/zerver/migrations/0623_merge_20241030_1835.py @@ -0,0 +1,12 @@ +# Generated by Django 5.0.9 on 2024-10-30 18:35 + +from django.db import migrations + + +class Migration(migrations.Migration): + dependencies = [ + ("zerver", "0621_remove_realm_edit_topic_policy"), + ("zerver", "0622_backfill_imageattachment_again"), + ] + + operations = []