diff --git a/tools/semgrep-py.yml b/tools/semgrep-py.yml index aeaced0be6..b7455550a7 100644 --- a/tools/semgrep-py.yml +++ b/tools/semgrep-py.yml @@ -54,6 +54,7 @@ rules: - id: dont-import-models-in-migrations patterns: + - pattern-not: from zerver.lib.partial import partial - pattern-not: from zerver.lib.mime_types import $X - pattern-not: from zerver.lib.redis_utils import get_redis_client - pattern-not: from zerver.lib.utils import generate_api_key diff --git a/zerver/migrations/0576_backfill_imageattachment.py b/zerver/migrations/0576_backfill_imageattachment.py new file mode 100644 index 0000000000..80d2d8116f --- /dev/null +++ b/zerver/migrations/0576_backfill_imageattachment.py @@ -0,0 +1,126 @@ +import os +from functools import reduce +from operator import or_ + +import boto3 +import pyvips +from botocore.client import Config +from botocore.exceptions import ClientError +from botocore.response import StreamingBody +from django.conf import settings +from django.db import migrations +from django.db.backends.base.schema import BaseDatabaseSchemaEditor +from django.db.migrations.state import StateApps +from django.db.models import Exists, OuterRef, Q + +from zerver.lib.partial import partial + + +def backfill_imageattachment(apps: StateApps, schema_editor: BaseDatabaseSchemaEditor) -> None: + ImageAttachment = apps.get_model("zerver", "ImageAttachment") + Attachment = apps.get_model("zerver", "Attachment") + + if settings.LOCAL_UPLOADS_DIR is None: + upload_bucket = boto3.resource( + "s3", + aws_access_key_id=settings.S3_KEY, + aws_secret_access_key=settings.S3_SECRET_KEY, + region_name=settings.S3_REGION, + endpoint_url=settings.S3_ENDPOINT_URL, + config=Config( + signature_version=None, + s3={"addressing_style": settings.S3_ADDRESSING_STYLE}, + ), + ).Bucket(settings.S3_AUTH_UPLOADS_BUCKET) + + # Historical attachments do not have a mime_type value, so we used + # to rely on the file extension. We replicate that when + # backfilling. This is the value from zerver.lib.markdown: + IMAGE_EXTENSIONS = [".bmp", ".gif", ".jpe", ".jpeg", ".jpg", ".png", ".webp"] + + extension_limits = Q() + extension_limits = reduce( + or_, + [Q(file_name__endswith=extension) for extension in IMAGE_EXTENSIONS], + extension_limits, + ) + + min_id: int | None = 0 + while True: + attachments = ( + Attachment.objects.alias( + has_imageattachment=Exists( + ImageAttachment.objects.filter(path_id=OuterRef("path_id")) + ) + ) + .filter(extension_limits, has_imageattachment=False, id__gt=min_id) + .order_by("id") + )[:1000] + + min_id = None + for attachment in attachments: + min_id = attachment.id + + if settings.LOCAL_UPLOADS_DIR is None: + try: + metadata = upload_bucket.Object(attachment.path_id).get() + except ClientError: + print(f"{attachment.path_id}: Missing!") + continue + + def s3_read(streamingbody: StreamingBody, size: int) -> bytes: + return streamingbody.read(amt=size) + + # We use the streaming body to only pull down as much + # of the image as we need to examine the headers -- + # generally about 40k + source: pyvips.Source = pyvips.SourceCustom() + source.on_read(partial(s3_read, metadata["Body"])) + else: + attachment_path = os.path.join(settings.LOCAL_UPLOADS_DIR, attachment.path_id) + if not os.path.exists(attachment_path): + print(f"{attachment.path_id}: Missing!") + continue + source = pyvips.Source.new_from_file(attachment_path) + try: + image = pyvips.Image.new_from_source(source, "", access="sequential") + + # "original_width_px" and "original_height_px" here are + # _as rendered_, after applying the orientation + # information which the image may contain. + if ( + "orientation" in image.get_fields() + and image.get("orientation") >= 5 + and image.get("orientation") <= 8 + ): + (width, height) = (image.height, image.width) + else: + (width, height) = (image.width, image.height) + + ImageAttachment.objects.create( + realm_id=attachment.realm_id, + path_id=attachment.path_id, + original_width_px=width, + original_height_px=height, + frames=image.get_n_pages(), + thumbnail_metadata=[], + ) + except pyvips.Error: + pass + + if min_id is None: + break + + +class Migration(migrations.Migration): + atomic = False + dependencies = [ + # Because this will be backported to 9.x, we only depend on the last migration in 9.x + ("zerver", "0558_realmuserdefault_web_animate_image_previews_and_more"), + ] + + operations = [ + migrations.RunPython( + backfill_imageattachment, reverse_code=migrations.RunPython.noop, elidable=True + ) + ] diff --git a/zerver/migrations/0577_merge_20240829_0153.py b/zerver/migrations/0577_merge_20240829_0153.py new file mode 100644 index 0000000000..5f4b255e09 --- /dev/null +++ b/zerver/migrations/0577_merge_20240829_0153.py @@ -0,0 +1,10 @@ +from django.db import migrations + + +class Migration(migrations.Migration): + dependencies = [ + ("zerver", "0575_alter_directmessagegroup_group_size"), + ("zerver", "0576_backfill_imageattachment"), + ] + + operations = []