thumbnailing: Switch to libvips, from PIL/pillow.

This is done in as much of a drop-in fashion as possible. Note that libvips does not support animated PNGs[^1], and as such this conversion removes support for them as emoji; however, libvips includes support for webp images, which future commits will take advantage of. This removes the MAX_EMOJI_GIF_SIZE limit, since that existed to work around bugs in Pillow. MAX_EMOJI_GIF_FILE_SIZE_BYTES is fixed to actually be 128KiB (not 128MiB, as it actually was), and is counted _after_ resizing, since the point is to limit the amount of data transfer to clients. [^1]: https://github.com/libvips/libvips/discussions/2000
2024-06-12 18:19:15 +00:00 · 2024-06-12 18:19:15 +00:00 · b14a33c659
parent 9fb03cb2c7
commit b14a33c659
8 changed files with 157 additions and 152 deletions
--- a/zerver/actions/realm_emoji.py
+++ b/zerver/actions/realm_emoji.py
@ -8,7 +8,6 @@ from django.utils.translation import gettext as _
 from zerver.lib.emoji import get_emoji_file_name
 from zerver.lib.exceptions import JsonableError
 from zerver.lib.pysa import mark_sanitized
 from zerver.lib.upload import upload_emoji_image
 from zerver.models import Realm, RealmAuditLog, RealmEmoji, UserProfile
 from zerver.models.realm_emoji import EmojiInfo, get_all_custom_emoji_for_realm
@ -34,10 +33,6 @@ def check_add_realm_emoji(
    emoji_file_name = get_emoji_file_name(image_file.name, realm_emoji.id)
    # The only user-controlled portion of 'emoji_file_name' is an extension,
    # which cannot contain '..' or '/' or '\', making it difficult to exploit
    emoji_file_name = mark_sanitized(emoji_file_name)
    emoji_uploaded_successfully = False
    is_animated = False
    try:
--- a/zerver/lib/emoji.py
+++ b/zerver/lib/emoji.py
@ -149,5 +149,11 @@ def get_emoji_url(emoji_file_name: str, realm_id: int, still: bool = False) -> s
 def get_emoji_file_name(emoji_file_name: str, emoji_id: int) -> str:
-    _, image_ext = os.path.splitext(emoji_file_name)
+    image_ext = os.path.splitext(emoji_file_name)[1]
    if not re.match(r"\.\w+$", image_ext):
        # Because the extension from the uploaded filename is
        # user-provided, preserved in the output filename, and libvips
        # uses `[...]` after the extension for options, we validate
        # the simple file extension.
        raise JsonableError(_("Bad file name!"))  # nocoverage
    return "".join((str(emoji_id), image_ext))
--- a/zerver/lib/thumbnail.py
+++ b/zerver/lib/thumbnail.py
@ -1,11 +1,12 @@
-import io
+import logging
-from typing import Optional, Tuple
+import os
 from contextlib import contextmanager
 from typing import Iterator, Optional, Tuple
 from urllib.parse import urljoin
 import pyvips
 from django.utils.http import url_has_allowed_host_and_scheme
 from django.utils.translation import gettext as _
 from PIL import GifImagePlugin, Image, ImageOps, PngImagePlugin
 from PIL.Image import DecompressionBombError
 from zerver.lib.camo import get_camo_url
 from zerver.lib.exceptions import ErrorCode, JsonableError
@ -14,11 +15,11 @@ DEFAULT_AVATAR_SIZE = 100
 MEDIUM_AVATAR_SIZE = 500
 DEFAULT_EMOJI_SIZE = 64
-# These sizes were selected based on looking at the maximum common
+# We refuse to deal with any image whose total pixelcount exceeds this.
-# sizes in a library of animated custom emoji, balanced against the
+IMAGE_BOMB_TOTAL_PIXELS = 90000000
-# network cost of very large emoji images.
+
-MAX_EMOJI_GIF_SIZE = 128
+# Reject emoji which, after resizing, have stills larger than this
-MAX_EMOJI_GIF_FILE_SIZE_BYTES = 128 * 1024 * 1024  # 128 kb
+MAX_EMOJI_GIF_FILE_SIZE_BYTES = 128 * 1024  # 128 kb
 class BadImageError(JsonableError):
@ -39,122 +40,114 @@ def generate_thumbnail_url(path: str, size: str = "0x0") -> str:
    return get_camo_url(path)
-def resize_avatar(image_data: bytes, size: int = DEFAULT_AVATAR_SIZE) -> bytes:
+@contextmanager
 def libvips_check_image(image_data: bytes) -> Iterator[pyvips.Image]:
    # The primary goal of this is to verify that the image is valid,
    # and raise BadImageError otherwise.  The yielded `source_image`
    # may be ignored, since calling `thumbnail_buffer` is faster than
    # calling `thumbnail_image` on a pyvips.Image, since the latter
    # cannot make use of shrink-on-load optimizations:
    # https://www.libvips.org/API/current/libvips-resample.html#vips-thumbnail-image
    try:
-        im = Image.open(io.BytesIO(image_data))
+        source_image = pyvips.Image.new_from_buffer(image_data, "")
-        im = ImageOps.exif_transpose(im)
+    except pyvips.Error:
        im = ImageOps.fit(im, (size, size), Image.Resampling.LANCZOS)
    except OSError:
        raise BadImageError(_("Could not decode image; did you upload an image file?"))
-    except DecompressionBombError:
+
    if source_image.width * source_image.height > IMAGE_BOMB_TOTAL_PIXELS:
        raise BadImageError(_("Image size exceeds limit."))
-    out = io.BytesIO()
+
-    if im.mode == "CMYK":
+    try:
-        im = im.convert("RGB")
+        yield source_image
-    im.save(out, format="png")
+    except pyvips.Error as e:  # nocoverage
-    return out.getvalue()
+        logging.exception(e)
        raise BadImageError(_("Bad image!"))
 def resize_avatar(image_data: bytes, size: int = DEFAULT_AVATAR_SIZE) -> bytes:
    # This will scale up, if necessary, and will scale the smallest
    # dimension to fit.  That is, a 1x1000 image will end up with the
    # one middle pixel enlarged to fill the full square.
    with libvips_check_image(image_data):
        return pyvips.Image.thumbnail_buffer(
            image_data,
            size,
            height=size,
            crop=pyvips.Interesting.CENTRE,
        ).write_to_buffer(".png")
 def resize_logo(image_data: bytes) -> bytes:
-    try:
+    # This will only scale the image down, and will resize it to
-        im = Image.open(io.BytesIO(image_data))
+    # preserve aspect ratio and be contained within 8*AVATAR by AVATAR
-        im = ImageOps.exif_transpose(im)
+    # pixels; it does not add any padding to make it exactly that
-        im.thumbnail((8 * DEFAULT_AVATAR_SIZE, DEFAULT_AVATAR_SIZE), Image.Resampling.LANCZOS)
+    # size.  A 1000x10 pixel image will end up as 800x8; a 10x10 will
-    except OSError:
+    # end up 10x10.
-        raise BadImageError(_("Could not decode image; did you upload an image file?"))
+    with libvips_check_image(image_data):
-    except DecompressionBombError:
+        return pyvips.Image.thumbnail_buffer(
-        raise BadImageError(_("Image size exceeds limit."))
+            image_data,
-    out = io.BytesIO()
+            8 * DEFAULT_AVATAR_SIZE,
-    if im.mode == "CMYK":
+            height=DEFAULT_AVATAR_SIZE,
-        im = im.convert("RGB")
+            size=pyvips.Size.DOWN,
-    im.save(out, format="png")
+        ).write_to_buffer(".png")
    return out.getvalue()
 def resize_animated(im: Image.Image, size: int = DEFAULT_EMOJI_SIZE) -> bytes:
    assert im.n_frames > 1
    frames = []
    duration_info = []
    disposals = []
    # If 'loop' info is not set then loop for infinite number of times.
    loop = im.info.get("loop", 0)
    for frame_num in range(im.n_frames):
        im.seek(frame_num)
        new_frame = im.copy()
        new_frame.paste(im, (0, 0), im.convert("RGBA"))
        new_frame = ImageOps.pad(new_frame, (size, size), Image.Resampling.LANCZOS)
        frames.append(new_frame)
        if im.info.get("duration") is None:  # nocoverage
            raise BadImageError(_("Corrupt animated image."))
        duration_info.append(im.info["duration"])
        if isinstance(im, GifImagePlugin.GifImageFile):
            disposals.append(
                im.disposal_method  # type: ignore[attr-defined]  # private member missing from stubs
            )
        elif isinstance(im, PngImagePlugin.PngImageFile):
            disposals.append(im.info.get("disposal", PngImagePlugin.Disposal.OP_NONE))
        else:  # nocoverage
            raise BadImageError(_("Unknown animated image format."))
    out = io.BytesIO()
    frames[0].save(
        out,
        save_all=True,
        optimize=False,
        format=im.format,
        append_images=frames[1:],
        duration=duration_info,
        disposal=disposals,
        loop=loop,
    )
    return out.getvalue()
 def resize_emoji(
-    image_data: bytes, size: int = DEFAULT_EMOJI_SIZE
+    image_data: bytes, emoji_file_name: str, size: int = DEFAULT_EMOJI_SIZE
 ) -> Tuple[bytes, bool, Optional[bytes]]:
    if len(image_data) > MAX_EMOJI_GIF_FILE_SIZE_BYTES:
        raise BadImageError(_("Image size exceeds limit."))
    # Square brackets are used for providing options to libvips' save
    # operation; these should have been filtered out earlier, so we
    # assert none are found here, for safety.
    write_file_ext = os.path.splitext(emoji_file_name)[1]
    assert "[" not in write_file_ext
    # This function returns three values:
    # 1) Emoji image data.
-    # 2) If emoji is gif i.e. animated.
+    # 2) If the emoji is animated.
-    # 3) If is animated then return still image data i.e. first frame of gif.
+    # 3) If it is animated, the still image data i.e. first frame of gif.
-
+    with libvips_check_image(image_data) as source_image:
-    try:
+        if source_image.get_n_pages() == 1:
-        im = Image.open(io.BytesIO(image_data))
+            return (
-        image_format = im.format
+                pyvips.Image.thumbnail_buffer(
-        if getattr(im, "n_frames", 1) > 1:
+                    image_data,
-            # There are a number of bugs in Pillow which cause results
+                    size,
-            # in resized images being broken. To work around this we
+                    height=size,
-            # only resize under certain conditions to minimize the
+                    crop=pyvips.Interesting.CENTRE,
-            # chance of creating ugly images.
+                ).write_to_buffer(write_file_ext),
-            should_resize = (
+                False,
-                im.size[0] != im.size[1]  # not square
+                None,
                or im.size[0] > MAX_EMOJI_GIF_SIZE  # dimensions too large
                or len(image_data) > MAX_EMOJI_GIF_FILE_SIZE_BYTES  # filesize too large
            )
        first_still = pyvips.Image.thumbnail_buffer(
            image_data,
            size,
            height=size,
            crop=pyvips.Interesting.CENTRE,
        ).write_to_buffer(".png")
-            # Generate a still image from the first frame.  Since
+        animated = pyvips.Image.thumbnail_buffer(
-            # we're converting the format to PNG anyway, we resize unconditionally.
+            image_data,
-            still_image = im.copy()
+            size,
-            still_image.seek(0)
+            height=size,
-            still_image = ImageOps.exif_transpose(still_image)
+            # This is passed to the loader, and means "load all
-            still_image = ImageOps.fit(still_image, (size, size), Image.Resampling.LANCZOS)
+            # frames", instead of the default of just the first
-            out = io.BytesIO()
+            option_string="n=-1",
-            still_image.save(out, format="PNG")
+        )
-            still_image_data = out.getvalue()
+        if animated.width != animated.get("page-height"):
-
+            # If the image is non-square, we have to iterate the
-            if should_resize:
+            # frames to add padding to make it so
-                image_data = resize_animated(im, size)
+            if not animated.hasalpha():
-
+                animated = animated.addalpha()
-            return image_data, True, still_image_data
+            frames = [
-        else:
+                frame.gravity(
-            # Note that this is essentially duplicated in the
+                    pyvips.CompassDirection.CENTRE,
-            # still_image code path, above.
+                    size,
-            im = ImageOps.exif_transpose(im)
+                    size,
-            im = ImageOps.fit(im, (size, size), Image.Resampling.LANCZOS)
+                    extend=pyvips.Extend.BACKGROUND,
-            out = io.BytesIO()
+                    background=[0, 0, 0, 0],
-            im.save(out, format=image_format)
+                )
-            return out.getvalue(), False, None
+                for frame in animated.pagesplit()
-    except OSError:
+            ]
-        raise BadImageError(_("Could not decode image; did you upload an image file?"))
+            animated = frames[0].pagejoin(frames[1:])
-    except DecompressionBombError:
+        return (animated.write_to_buffer(write_file_ext), True, first_still)
        raise BadImageError(_("Image size exceeds limit."))
--- a/zerver/lib/upload/init.py
+++ b/zerver/lib/upload/init.py
@ -13,7 +13,13 @@ from zerver.lib.avatar_hash import user_avatar_path
 from zerver.lib.exceptions import ErrorCode, JsonableError
 from zerver.lib.mime_types import guess_type
 from zerver.lib.outgoing_http import OutgoingSession
-from zerver.lib.thumbnail import MEDIUM_AVATAR_SIZE, resize_avatar, resize_emoji
+from zerver.lib.thumbnail import (
    MAX_EMOJI_GIF_FILE_SIZE_BYTES,
    MEDIUM_AVATAR_SIZE,
    BadImageError,
    resize_avatar,
    resize_emoji,
 )
 from zerver.lib.upload.base import ZulipUploadBackend
 from zerver.models import Attachment, Message, Realm, RealmEmoji, ScheduledMessage, UserProfile
@ -252,7 +258,11 @@ def upload_emoji_image(
    backend.upload_single_emoji_image(
        f"{emoji_path}.original", content_type, user_profile, image_data
    )
-    resized_image_data, is_animated, still_image_data = resize_emoji(image_data)
+    resized_image_data, is_animated, still_image_data = resize_emoji(image_data, emoji_file_name)
    if is_animated and len(still_image_data) > MAX_EMOJI_GIF_FILE_SIZE_BYTES:  # nocoverage
        raise BadImageError(_("Image size exceeds limit"))
    if not is_animated and len(image_data) > MAX_EMOJI_GIF_FILE_SIZE_BYTES:  # nocoverage
        raise BadImageError(_("Image size exceeds limit"))
    backend.upload_single_emoji_image(emoji_path, content_type, user_profile, resized_image_data)
    if is_animated:
        assert still_image_data is not None
--- a/zerver/tests/images/animated_large_img.png
+++ b/zerver/tests/images/animated_large_img.png
--- a/zerver/tests/test_transfer.py
+++ b/zerver/tests/test_transfer.py
@ -103,7 +103,7 @@ class TransferUploadsToS3Test(ZulipTestCase):
        resized_key = bucket.Object(emoji_path)
        image_data = read_test_image_file("img.png")
-        resized_image_data, is_animated, still_image_data = resize_emoji(image_data)
+        resized_image_data, is_animated, still_image_data = resize_emoji(image_data, "img.png")
        self.assertEqual(is_animated, False)
        self.assertEqual(still_image_data, None)
@ -135,7 +135,9 @@ class TransferUploadsToS3Test(ZulipTestCase):
        )
        image_data = read_test_image_file("animated_img.gif")
-        resized_image_data, is_animated, still_image_data = resize_emoji(image_data)
+        resized_image_data, is_animated, still_image_data = resize_emoji(
            image_data, "animated_img.gif"
        )
        self.assertEqual(is_animated, True)
        self.assertEqual(type(still_image_data), bytes)
--- a/zerver/tests/test_upload.py
+++ b/zerver/tests/test_upload.py
@ -1392,7 +1392,7 @@ class EmojiTest(UploadSerializeMixin, ZulipTestCase):
        # Test unequal width and height of animated GIF image
        animated_unequal_img_data = read_test_image_file("animated_unequal_img.gif")
        resized_img_data, is_animated, still_img_data = resize_emoji(
-            animated_unequal_img_data, size=50
+            animated_unequal_img_data, "animated_unequal_img.gif", size=50
        )
        im = Image.open(io.BytesIO(resized_img_data))
        self.assertEqual((50, 50), im.size)
@ -1404,37 +1404,34 @@ class EmojiTest(UploadSerializeMixin, ZulipTestCase):
        # Test corrupt image exception
        corrupted_img_data = read_test_image_file("corrupt.gif")
        with self.assertRaises(BadImageError):
-            resize_emoji(corrupted_img_data)
+            resize_emoji(corrupted_img_data, "corrupt.gif")
-        def test_resize(size: int = 50) -> None:
+        animated_large_img_data = read_test_image_file("animated_large_img.gif")
-            resized_img_data, is_animated, still_img_data = resize_emoji(
+        resized_img_data, is_animated, still_img_data = resize_emoji(
-                animated_large_img_data, size=50
+            animated_large_img_data, "animated_large_img.gif", size=50
-            )
+        )
-            im = Image.open(io.BytesIO(resized_img_data))
+        im = Image.open(io.BytesIO(resized_img_data))
-            self.assertEqual((size, size), im.size)
+        self.assertEqual((50, 50), im.size)
-            self.assertTrue(is_animated)
+        self.assertTrue(is_animated)
-            assert still_img_data
+        assert still_img_data
-            still_image = Image.open(io.BytesIO(still_img_data))
+        still_image = Image.open(io.BytesIO(still_img_data))
-            self.assertEqual((50, 50), still_image.size)
+        self.assertEqual((50, 50), still_image.size)
-        for img_format in ("gif", "png"):
+        # Test an image file with too many bytes is not resized
-            animated_large_img_data = read_test_image_file(f"animated_large_img.{img_format}")
+        with patch("zerver.lib.thumbnail.MAX_EMOJI_GIF_FILE_SIZE_BYTES", 1024):
            with self.assertRaises(BadImageError):
                resize_emoji(animated_large_img_data, "animated_large_img.gif", size=50)
-            # Test an image larger than max is resized
+        # Test an image file with too many pixels is not resized
-            with patch("zerver.lib.thumbnail.MAX_EMOJI_GIF_SIZE", 128):
+        with patch("zerver.lib.thumbnail.IMAGE_BOMB_TOTAL_PIXELS", 100):
-                test_resize()
+            with self.assertRaises(BadImageError):
-
+                resize_emoji(animated_large_img_data, "animated_large_img.gif", size=50)
            # Test an image file larger than max is resized
            with patch("zerver.lib.thumbnail.MAX_EMOJI_GIF_FILE_SIZE_BYTES", 3 * 1024 * 1024):
                test_resize()
            # Test an image smaller than max and smaller than file size max is not resized
            with patch("zerver.lib.thumbnail.MAX_EMOJI_GIF_SIZE", 512):
                test_resize(size=256)
        # Test a non-animated GIF image which does need to be resized
        still_large_img_data = read_test_image_file("still_large_img.gif")
-        resized_img_data, is_animated, no_still_data = resize_emoji(still_large_img_data, size=50)
+        resized_img_data, is_animated, no_still_data = resize_emoji(
            still_large_img_data, "still_large_img.gif", size=50
        )
        im = Image.open(io.BytesIO(resized_img_data))
        self.assertEqual((50, 50), im.size)
        self.assertFalse(is_animated)
@ -1442,7 +1439,9 @@ class EmojiTest(UploadSerializeMixin, ZulipTestCase):
        # Test a non-animated and non-animatable image format which needs to be resized
        still_large_img_data = read_test_image_file("img.jpg")
-        resized_img_data, is_animated, no_still_data = resize_emoji(still_large_img_data, size=50)
+        resized_img_data, is_animated, no_still_data = resize_emoji(
            still_large_img_data, "img.jpg", size=50
        )
        im = Image.open(io.BytesIO(resized_img_data))
        self.assertEqual((50, 50), im.size)
        self.assertFalse(is_animated)
--- a/zilencer/management/commands/populate_db.py
+++ b/zilencer/management/commands/populate_db.py
@ -913,7 +913,7 @@ class Command(ZulipBaseCommand):
        # Create a test realm emoji.
        IMAGE_FILE_PATH = static_path("images/test-images/checkbox.png")
        with open(IMAGE_FILE_PATH, "rb") as fp:
-            check_add_realm_emoji(zulip_realm, "green_tick", iago, File(fp))
+            check_add_realm_emoji(zulip_realm, "green_tick", iago, File(fp, name="checkbox.png"))
        if not options["test_suite"]:
            # Populate users with some bar data