migrations: Re-thumbnail and rename existing emoji.

2024-06-17 21:33:21 +00:00 · 2024-06-17 21:33:21 +00:00 · 4df9b03451
parent 0385e5bab9
commit 4df9b03451
3 changed files with 319 additions and 1 deletions
--- a/static/images/bad-emoji.png
+++ b/static/images/bad-emoji.png
--- a/zerver/lib/transfer.py
+++ b/zerver/lib/transfer.py
@ -117,6 +117,7 @@ def _transfer_emoji_to_s3(realm_emoji: RealmEmoji) -> None:
    content_type = guess_type(emoji_path)[0]
    emoji_path = os.path.join(settings.LOCAL_AVATARS_DIR, emoji_path) + ".original"
    if content_type is None:  # nocoverage
+        # This should not be possible after zerver/migrations/0553_copy_emoji_images.py
        logging.error("Emoji %d has no recognizable file extension", realm_emoji.id)
        return
    try:
@ -127,7 +128,8 @@ def _transfer_emoji_to_s3(realm_emoji: RealmEmoji) -> None:
            logging.info("Uploaded emoji file in path %s", emoji_path)
    except FileNotFoundError:  # nocoverage
        logging.error("Emoji %d could not be loaded from local disk", realm_emoji.id)
-    except BadImageError as e:
+    except BadImageError as e:  # nocoverage
+        # This should not be possible after zerver/migrations/0553_copy_emoji_images.py
        logging.error("Emoji %d is invalid: %s", realm_emoji.id, e)


--- a/zerver/migrations/0553_copy_emoji_images.py
+++ b/zerver/migrations/0553_copy_emoji_images.py
@ -0,0 +1,316 @@
+import contextlib
+import hashlib
+import logging
+import os
+from typing import Any, Iterator, Optional, Tuple
+
+import boto3
+import botocore
+import pyvips
+from botocore.client import Config
+from django.conf import settings
+from django.db import migrations
+from django.db.backends.base.schema import BaseDatabaseSchemaEditor
+from django.db.migrations.state import StateApps
+
+from zerver.lib.mime_types import guess_extension, guess_type
+
+# From zerver.lib.thumbnail
+DEFAULT_EMOJI_SIZE = 64
+IMAGE_BOMB_TOTAL_PIXELS = 90000000
+MAX_EMOJI_GIF_FILE_SIZE_BYTES = 128 * 1024 * 1024  # 128 kb
+
+# This is the intersection of INLINE_MIME_TYPES and THUMBNAIL_ACCEPT_IMAGE_TYPES
+VALID_EMOJI_CONTENT_TYPE = frozenset(
+    [
+        "image/avif",
+        "image/gif",
+        "image/jpeg",
+        "image/png",
+        "image/webp",
+    ]
+)
+
+
+class SkipImageError(Exception):
+    pass
+
+
+# From zerver.lib.thumbnail, with minor exception changes
+@contextlib.contextmanager
+def libvips_check_image(image_data: bytes) -> Iterator[pyvips.Image]:
+    try:
+        source_image = pyvips.Image.new_from_buffer(image_data, "")
+    except pyvips.Error as e:
+        raise SkipImageError(f"Cannot process image: {e}")
+
+    if source_image.width * source_image.height > IMAGE_BOMB_TOTAL_PIXELS:
+        raise SkipImageError(f"Image too big: {source_image.height} * {source_image.width}")
+
+    try:
+        yield source_image
+    except pyvips.Error as e:
+        raise SkipImageError(f"Bad image data? {e}")
+
+
+# From zerver.lib.thumbnail, with minor exception changes
+def resize_emoji(
+    image_data: bytes, emoji_file_name: str, size: int = DEFAULT_EMOJI_SIZE
+) -> Tuple[bytes, Optional[bytes]]:
+    if len(image_data) > MAX_EMOJI_GIF_FILE_SIZE_BYTES:
+        raise SkipImageError(f"Image has too many bytes: {len(image_data)}")
+
+    # Square brackets are used for providing options to libvips' save
+    # operation; the extension on the filename comes from reversing
+    # the content-type, which removes most of the attacker control of
+    # this string, but assert it has no bracketed pieces for safety.
+    write_file_ext = os.path.splitext(emoji_file_name)[1]
+    assert "[" not in write_file_ext
+
+    # This function returns two values:
+    # 1) Emoji image data.
+    # 2) If it is animated, the still image data i.e. first frame of gif.
+    with libvips_check_image(image_data) as source_image:
+        if source_image.get_n_pages() == 1:
+            return (
+                pyvips.Image.thumbnail_buffer(
+                    image_data,
+                    size,
+                    height=size,
+                    crop=pyvips.Interesting.CENTRE,
+                ).write_to_buffer(write_file_ext),
+                None,
+            )
+        first_still = pyvips.Image.thumbnail_buffer(
+            image_data,
+            size,
+            height=size,
+            crop=pyvips.Interesting.CENTRE,
+        ).write_to_buffer(".png")
+
+        animated = pyvips.Image.thumbnail_buffer(
+            image_data,
+            size,
+            height=size,
+            # This is passed to the loader, and means "load all
+            # frames", instead of the default of just the first
+            option_string="n=-1",
+        )
+        if animated.width != animated.get("page-height"):
+            # If the image is non-square, we have to iterate the
+            # frames to add padding to make it so
+            if not animated.hasalpha():
+                animated = animated.addalpha()
+            frames = [
+                frame.gravity(
+                    pyvips.CompassDirection.CENTRE,
+                    size,
+                    size,
+                    extend=pyvips.Extend.BACKGROUND,
+                    background=[0, 0, 0, 0],
+                )
+                for frame in animated.pagesplit()
+            ]
+            animated = frames[0].pagejoin(frames[1:])
+        return (animated.write_to_buffer(write_file_ext), first_still)
+
+
+# From zerver.lib.emoji
+def get_emoji_file_name(content_type: str, emoji_id: int) -> str:
+    image_ext = guess_extension(content_type, strict=False)
+    # The only callsite of this pre-limits the content_type to a
+    # reasonable set that we know have extensions.
+    assert image_ext is not None
+
+    # We salt this with a server-side secret so that it is not
+    # enumerable by clients, and will not collide on the server.  New
+    # realm imports may pass a synthetic emoji_id, which is fine as
+    # long as it starts at 1, and as such later emoji cannot collide
+    # unless there is a legit hash collision.
+    #
+    # We truncate the hash at 8 characters, as this is enough entropy
+    # to make collisions vanishingly unlikely.  In the event of a
+    # collusion, the id will advance and a manual retry will succeed.
+    hash_key = settings.AVATAR_SALT.encode() + b":" + str(emoji_id).encode()
+    return "".join((hashlib.sha256(hash_key).hexdigest()[0:8], image_ext))
+
+
+def thumbnail_local_emoji(apps: StateApps) -> None:
+    assert settings.LOCAL_AVATARS_DIR is not None
+    for total_processed, emoji in enumerate(thumbnail_iterator(apps)):
+        if total_processed % 100 == 0:
+            print(f"Processed {total_processed} custom emoji")
+
+        try:
+            old_file_name = emoji.file_name
+            content_type = guess_type(old_file_name)[0]
+            if content_type not in VALID_EMOJI_CONTENT_TYPE:
+                raise SkipImageError(f"Invalid content-type: {content_type}")
+
+            new_file_name = get_emoji_file_name(content_type, emoji.id)
+            if old_file_name == new_file_name:
+                continue
+            base_path = os.path.join(
+                settings.LOCAL_AVATARS_DIR, str(emoji.realm_id), "emoji/images"
+            )
+            print(f"{base_path}/{old_file_name} -> {base_path}/{new_file_name}")
+            try:
+                if os.path.exists(f"{base_path}/{new_file_name}.original"):
+                    os.unlink(f"{base_path}/{new_file_name}.original")
+                os.link(
+                    f"{base_path}/{old_file_name}.original", f"{base_path}/{new_file_name}.original"
+                )
+                with open(f"{base_path}/{new_file_name}.original", "rb") as fh:
+                    original_bytes = fh.read()
+            except OSError as e:
+                raise SkipImageError(f"Failed to read original file: {e}")
+
+            animated, still = resize_emoji(original_bytes, new_file_name)
+            try:
+                with open(f"{base_path}/{new_file_name}", "wb") as fh:
+                    fh.write(animated)
+
+                if still is not None:
+                    filename_no_extension = os.path.splitext(new_file_name)[0]
+                    with open(f"{base_path}/still/{filename_no_extension}.png", "wb") as fh:
+                        fh.write(still)
+            except OSError as e:
+                raise SkipImageError(f"Failed to write new file: {e}")
+
+            emoji.file_name = new_file_name
+            emoji.save(update_fields=["file_name"])
+        except SkipImageError as e:
+            logging.warning(
+                "Failed to re-thumbnail %s/emoji/images/%s: %s", emoji.realm_id, emoji.file_name, e
+            )
+            new_file_name = get_emoji_file_name("image/png", emoji.id)
+            try:
+                with open(f"{settings.DEPLOY_ROOT}/static/images/bad-emoji.png", "rb") as f:
+                    with open(f"{base_path}/{new_file_name}", "wb") as new_f:
+                        new_f.write(f.read())
+                emoji.deactivated = True
+                emoji.is_animated = False
+                emoji.file_name = new_file_name
+                emoji.save(update_fields=["file_name", "is_animated", "deactivated"])
+            except Exception as e:
+                logging.error("Failed to deactivate and replace with known-good image: %s", e)
+
+
+def thumbnail_s3(apps: StateApps) -> None:
+    total_processed = 0
+    avatar_bucket = boto3.resource(
+        "s3",
+        aws_access_key_id=settings.S3_KEY,
+        aws_secret_access_key=settings.S3_SECRET_KEY,
+        region_name=settings.S3_REGION,
+        endpoint_url=settings.S3_ENDPOINT_URL,
+        config=Config(
+            signature_version=None,
+            s3={"addressing_style": settings.S3_ADDRESSING_STYLE},
+        ),
+    ).Bucket(settings.S3_AVATAR_BUCKET)
+    for total_processed, emoji in enumerate(thumbnail_iterator(apps)):
+        if total_processed % 100 == 0:
+            print(f"Processed {total_processed} custom emoji")
+
+        old_file_name = emoji.file_name
+        try:
+            base_path = os.path.join(str(emoji.realm_id), "emoji/images")
+            try:
+                old_data = avatar_bucket.Object(f"{base_path}/{old_file_name}.original").get()
+                original_bytes = old_data["Body"].read()
+            except botocore.exceptions.ClientError as e:
+                raise SkipImageError(f"Failed to read original file: {e}")
+
+            content_type = old_data["ContentType"]
+            if content_type not in VALID_EMOJI_CONTENT_TYPE:
+                raise SkipImageError(f"Invalid content-type: {content_type}")
+
+            metadata = old_data["Metadata"]
+            # Make sure this metadata is up-to-date, while we're
+            # in here; some early emoji are missing it
+            metadata["realm_id"] = str(emoji.realm_id)
+            if emoji.author_id:
+                metadata["user_profile_id"] = str(emoji.author_id)
+
+            new_file_name = get_emoji_file_name(content_type, emoji.id)
+            if old_file_name == new_file_name:
+                continue
+
+            print(f"{base_path}/{old_file_name} -> {base_path}/{new_file_name}")
+            avatar_bucket.Object(f"{base_path}/{new_file_name}.original").copy_from(
+                CopySource=f"{settings.S3_AVATAR_BUCKET}/{base_path}/{old_file_name}.original",
+                MetadataDirective="REPLACE",
+                Metadata=metadata,
+                ContentType=content_type,
+                CacheControl="public, max-age=31536000, immutable",
+            )
+
+            animated, still = resize_emoji(original_bytes, new_file_name)
+            try:
+                avatar_bucket.Object(f"{base_path}/{new_file_name}").put(
+                    Metadata=metadata,
+                    ContentType=content_type,
+                    CacheControl="public, max-age=31536000, immutable",
+                    Body=animated,
+                )
+                if still is not None:
+                    filename_no_extension = os.path.splitext(new_file_name)[0]
+                    avatar_bucket.Object(f"{base_path}/still/{filename_no_extension}.png").put(
+                        Metadata=metadata,
+                        ContentType="image/png",
+                        CacheControl="public, max-age=31536000, immutable",
+                        Body=still,
+                    )
+            except botocore.exceptions.ClientError as e:
+                raise SkipImageError(f"Failed to upload new file: {e}")
+
+            emoji.file_name = new_file_name
+            emoji.save(update_fields=["file_name"])
+        except SkipImageError as e:
+            logging.warning(
+                "Failed to re-thumbnail %s/emoji/images/%s: %s", emoji.realm_id, emoji.file_name, e
+            )
+            new_file_name = get_emoji_file_name("image/png", emoji.id)
+            try:
+                with open(f"{settings.DEPLOY_ROOT}/static/images/bad-emoji.png", "rb") as f:
+                    avatar_bucket.Object(f"{base_path}/{new_file_name}").put(
+                        Metadata={
+                            "user_profile_id": str(emoji.author_id),
+                            "realm_id": str(emoji.realm_id),
+                        },
+                        ContentType="image/png",
+                        CacheControl="public, max-age=31536000, immutable",
+                        Body=f.read(),
+                    )
+                emoji.deactivated = True
+                emoji.is_animated = False
+                emoji.file_name = new_file_name
+                emoji.save(update_fields=["file_name", "is_animated", "deactivated"])
+            except Exception as e:
+                logging.error("Failed to deactivate and replace with known-good image: %s", e)
+
+
+def thumbnail_iterator(apps: StateApps) -> Iterator[Any]:
+    Realm = apps.get_model("zerver", "Realm")
+    RealmEmoji = apps.get_model("zerver", "RealmEmoji")
+    for realm in Realm.objects.filter(realmemoji__isnull=False).distinct().order_by("id"):
+        yield from RealmEmoji.objects.filter(realm=realm).order_by("id")
+
+
+def thumbnail_emoji(apps: StateApps, schema_editor: BaseDatabaseSchemaEditor) -> None:
+    if settings.LOCAL_AVATARS_DIR is not None:
+        thumbnail_local_emoji(apps)
+    else:
+        thumbnail_s3(apps)
+
+
+class Migration(migrations.Migration):
+    atomic = False
+    elidable = True
+
+    dependencies = [
+        ("zerver", "0552_remove_realm_private_message_policy"),
+    ]
+
+    operations = [migrations.RunPython(thumbnail_emoji, elidable=True)]