thumbnailing: Switch to libvips, from PIL/pillow.

This is done in as much of a drop-in fashion as possible. Note that libvips does not support animated PNGs[^1], and as such this conversion removes support for them as emoji; however, libvips includes support for webp images, which future commits will take advantage of. This removes the MAX_EMOJI_GIF_SIZE limit, since that existed to work around bugs in Pillow. MAX_EMOJI_GIF_FILE_SIZE_BYTES is fixed to actually be 128KiB (not 128MiB, as it actually was), and is counted _after_ resizing, since the point is to limit the amount of data transfer to clients. [^1]: https://github.com/libvips/libvips/discussions/2000
2024-06-12 18:19:15 +00:00 · 2024-06-12 18:19:15 +00:00 · b14a33c659
parent 9fb03cb2c7
commit b14a33c659
8 changed files with 157 additions and 152 deletions
--- a/zerver/actions/realm_emoji.py
+++ b/zerver/actions/realm_emoji.py
@ -8,7 +8,6 @@ from django.utils.translation import gettext as _

 from zerver.lib.emoji import get_emoji_file_name
 from zerver.lib.exceptions import JsonableError
-from zerver.lib.pysa import mark_sanitized
 from zerver.lib.upload import upload_emoji_image
 from zerver.models import Realm, RealmAuditLog, RealmEmoji, UserProfile
 from zerver.models.realm_emoji import EmojiInfo, get_all_custom_emoji_for_realm
@ -34,10 +33,6 @@ def check_add_realm_emoji(

    emoji_file_name = get_emoji_file_name(image_file.name, realm_emoji.id)

-    # The only user-controlled portion of 'emoji_file_name' is an extension,
-    # which cannot contain '..' or '/' or '\', making it difficult to exploit
-    emoji_file_name = mark_sanitized(emoji_file_name)
-
    emoji_uploaded_successfully = False
    is_animated = False
    try:
--- a/zerver/lib/emoji.py
+++ b/zerver/lib/emoji.py
@ -149,5 +149,11 @@ def get_emoji_url(emoji_file_name: str, realm_id: int, still: bool = False) -> s


 def get_emoji_file_name(emoji_file_name: str, emoji_id: int) -> str:
-    _, image_ext = os.path.splitext(emoji_file_name)
+    image_ext = os.path.splitext(emoji_file_name)[1]
+    if not re.match(r"\.\w+$", image_ext):
+        # Because the extension from the uploaded filename is
+        # user-provided, preserved in the output filename, and libvips
+        # uses `[...]` after the extension for options, we validate
+        # the simple file extension.
+        raise JsonableError(_("Bad file name!"))  # nocoverage
    return "".join((str(emoji_id), image_ext))
--- a/zerver/lib/thumbnail.py
+++ b/zerver/lib/thumbnail.py
@ -1,11 +1,12 @@
-import io
-from typing import Optional, Tuple
+import logging
+import os
+from contextlib import contextmanager
+from typing import Iterator, Optional, Tuple
 from urllib.parse import urljoin

+import pyvips
 from django.utils.http import url_has_allowed_host_and_scheme
 from django.utils.translation import gettext as _
-from PIL import GifImagePlugin, Image, ImageOps, PngImagePlugin
-from PIL.Image import DecompressionBombError

 from zerver.lib.camo import get_camo_url
 from zerver.lib.exceptions import ErrorCode, JsonableError
@ -14,11 +15,11 @@ DEFAULT_AVATAR_SIZE = 100
 MEDIUM_AVATAR_SIZE = 500
 DEFAULT_EMOJI_SIZE = 64

-# These sizes were selected based on looking at the maximum common
-# sizes in a library of animated custom emoji, balanced against the
-# network cost of very large emoji images.
-MAX_EMOJI_GIF_SIZE = 128
-MAX_EMOJI_GIF_FILE_SIZE_BYTES = 128 * 1024 * 1024  # 128 kb
+# We refuse to deal with any image whose total pixelcount exceeds this.
+IMAGE_BOMB_TOTAL_PIXELS = 90000000
+
+# Reject emoji which, after resizing, have stills larger than this
+MAX_EMOJI_GIF_FILE_SIZE_BYTES = 128 * 1024  # 128 kb


 class BadImageError(JsonableError):
@ -39,122 +40,114 @@ def generate_thumbnail_url(path: str, size: str = "0x0") -> str:
    return get_camo_url(path)


-def resize_avatar(image_data: bytes, size: int = DEFAULT_AVATAR_SIZE) -> bytes:
+@contextmanager
+def libvips_check_image(image_data: bytes) -> Iterator[pyvips.Image]:
+    # The primary goal of this is to verify that the image is valid,
+    # and raise BadImageError otherwise.  The yielded `source_image`
+    # may be ignored, since calling `thumbnail_buffer` is faster than
+    # calling `thumbnail_image` on a pyvips.Image, since the latter
+    # cannot make use of shrink-on-load optimizations:
+    # https://www.libvips.org/API/current/libvips-resample.html#vips-thumbnail-image
    try:
-        im = Image.open(io.BytesIO(image_data))
-        im = ImageOps.exif_transpose(im)
-        im = ImageOps.fit(im, (size, size), Image.Resampling.LANCZOS)
-    except OSError:
+        source_image = pyvips.Image.new_from_buffer(image_data, "")
+    except pyvips.Error:
        raise BadImageError(_("Could not decode image; did you upload an image file?"))
-    except DecompressionBombError:
+
+    if source_image.width * source_image.height > IMAGE_BOMB_TOTAL_PIXELS:
        raise BadImageError(_("Image size exceeds limit."))
-    out = io.BytesIO()
-    if im.mode == "CMYK":
-        im = im.convert("RGB")
-    im.save(out, format="png")
-    return out.getvalue()
+
+    try:
+        yield source_image
+    except pyvips.Error as e:  # nocoverage
+        logging.exception(e)
+        raise BadImageError(_("Bad image!"))
+
+
+def resize_avatar(image_data: bytes, size: int = DEFAULT_AVATAR_SIZE) -> bytes:
+    # This will scale up, if necessary, and will scale the smallest
+    # dimension to fit.  That is, a 1x1000 image will end up with the
+    # one middle pixel enlarged to fill the full square.
+    with libvips_check_image(image_data):
+        return pyvips.Image.thumbnail_buffer(
+            image_data,
+            size,
+            height=size,
+            crop=pyvips.Interesting.CENTRE,
+        ).write_to_buffer(".png")


 def resize_logo(image_data: bytes) -> bytes:
-    try:
-        im = Image.open(io.BytesIO(image_data))
-        im = ImageOps.exif_transpose(im)
-        im.thumbnail((8 * DEFAULT_AVATAR_SIZE, DEFAULT_AVATAR_SIZE), Image.Resampling.LANCZOS)
-    except OSError:
-        raise BadImageError(_("Could not decode image; did you upload an image file?"))
-    except DecompressionBombError:
-        raise BadImageError(_("Image size exceeds limit."))
-    out = io.BytesIO()
-    if im.mode == "CMYK":
-        im = im.convert("RGB")
-    im.save(out, format="png")
-    return out.getvalue()
-
-
-def resize_animated(im: Image.Image, size: int = DEFAULT_EMOJI_SIZE) -> bytes:
-    assert im.n_frames > 1
-    frames = []
-    duration_info = []
-    disposals = []
-    # If 'loop' info is not set then loop for infinite number of times.
-    loop = im.info.get("loop", 0)
-    for frame_num in range(im.n_frames):
-        im.seek(frame_num)
-        new_frame = im.copy()
-        new_frame.paste(im, (0, 0), im.convert("RGBA"))
-        new_frame = ImageOps.pad(new_frame, (size, size), Image.Resampling.LANCZOS)
-        frames.append(new_frame)
-        if im.info.get("duration") is None:  # nocoverage
-            raise BadImageError(_("Corrupt animated image."))
-        duration_info.append(im.info["duration"])
-        if isinstance(im, GifImagePlugin.GifImageFile):
-            disposals.append(
-                im.disposal_method  # type: ignore[attr-defined]  # private member missing from stubs
-            )
-        elif isinstance(im, PngImagePlugin.PngImageFile):
-            disposals.append(im.info.get("disposal", PngImagePlugin.Disposal.OP_NONE))
-        else:  # nocoverage
-            raise BadImageError(_("Unknown animated image format."))
-    out = io.BytesIO()
-    frames[0].save(
-        out,
-        save_all=True,
-        optimize=False,
-        format=im.format,
-        append_images=frames[1:],
-        duration=duration_info,
-        disposal=disposals,
-        loop=loop,
-    )
-
-    return out.getvalue()
+    # This will only scale the image down, and will resize it to
+    # preserve aspect ratio and be contained within 8*AVATAR by AVATAR
+    # pixels; it does not add any padding to make it exactly that
+    # size.  A 1000x10 pixel image will end up as 800x8; a 10x10 will
+    # end up 10x10.
+    with libvips_check_image(image_data):
+        return pyvips.Image.thumbnail_buffer(
+            image_data,
+            8 * DEFAULT_AVATAR_SIZE,
+            height=DEFAULT_AVATAR_SIZE,
+            size=pyvips.Size.DOWN,
+        ).write_to_buffer(".png")


 def resize_emoji(
-    image_data: bytes, size: int = DEFAULT_EMOJI_SIZE
+    image_data: bytes, emoji_file_name: str, size: int = DEFAULT_EMOJI_SIZE
 ) -> Tuple[bytes, bool, Optional[bytes]]:
+    if len(image_data) > MAX_EMOJI_GIF_FILE_SIZE_BYTES:
+        raise BadImageError(_("Image size exceeds limit."))
+
+    # Square brackets are used for providing options to libvips' save
+    # operation; these should have been filtered out earlier, so we
+    # assert none are found here, for safety.
+    write_file_ext = os.path.splitext(emoji_file_name)[1]
+    assert "[" not in write_file_ext
+
    # This function returns three values:
    # 1) Emoji image data.
-    # 2) If emoji is gif i.e. animated.
-    # 3) If is animated then return still image data i.e. first frame of gif.
-
-    try:
-        im = Image.open(io.BytesIO(image_data))
-        image_format = im.format
-        if getattr(im, "n_frames", 1) > 1:
-            # There are a number of bugs in Pillow which cause results
-            # in resized images being broken. To work around this we
-            # only resize under certain conditions to minimize the
-            # chance of creating ugly images.
-            should_resize = (
-                im.size[0] != im.size[1]  # not square
-                or im.size[0] > MAX_EMOJI_GIF_SIZE  # dimensions too large
-                or len(image_data) > MAX_EMOJI_GIF_FILE_SIZE_BYTES  # filesize too large
+    # 2) If the emoji is animated.
+    # 3) If it is animated, the still image data i.e. first frame of gif.
+    with libvips_check_image(image_data) as source_image:
+        if source_image.get_n_pages() == 1:
+            return (
+                pyvips.Image.thumbnail_buffer(
+                    image_data,
+                    size,
+                    height=size,
+                    crop=pyvips.Interesting.CENTRE,
+                ).write_to_buffer(write_file_ext),
+                False,
+                None,
            )
+        first_still = pyvips.Image.thumbnail_buffer(
+            image_data,
+            size,
+            height=size,
+            crop=pyvips.Interesting.CENTRE,
+        ).write_to_buffer(".png")

-            # Generate a still image from the first frame.  Since
-            # we're converting the format to PNG anyway, we resize unconditionally.
-            still_image = im.copy()
-            still_image.seek(0)
-            still_image = ImageOps.exif_transpose(still_image)
-            still_image = ImageOps.fit(still_image, (size, size), Image.Resampling.LANCZOS)
-            out = io.BytesIO()
-            still_image.save(out, format="PNG")
-            still_image_data = out.getvalue()
-
-            if should_resize:
-                image_data = resize_animated(im, size)
-
-            return image_data, True, still_image_data
-        else:
-            # Note that this is essentially duplicated in the
-            # still_image code path, above.
-            im = ImageOps.exif_transpose(im)
-            im = ImageOps.fit(im, (size, size), Image.Resampling.LANCZOS)
-            out = io.BytesIO()
-            im.save(out, format=image_format)
-            return out.getvalue(), False, None
-    except OSError:
-        raise BadImageError(_("Could not decode image; did you upload an image file?"))
-    except DecompressionBombError:
-        raise BadImageError(_("Image size exceeds limit."))
+        animated = pyvips.Image.thumbnail_buffer(
+            image_data,
+            size,
+            height=size,
+            # This is passed to the loader, and means "load all
+            # frames", instead of the default of just the first
+            option_string="n=-1",
+        )
+        if animated.width != animated.get("page-height"):
+            # If the image is non-square, we have to iterate the
+            # frames to add padding to make it so
+            if not animated.hasalpha():
+                animated = animated.addalpha()
+            frames = [
+                frame.gravity(
+                    pyvips.CompassDirection.CENTRE,
+                    size,
+                    size,
+                    extend=pyvips.Extend.BACKGROUND,
+                    background=[0, 0, 0, 0],
+                )
+                for frame in animated.pagesplit()
+            ]
+            animated = frames[0].pagejoin(frames[1:])
+        return (animated.write_to_buffer(write_file_ext), True, first_still)
--- a/zerver/lib/upload/init.py
+++ b/zerver/lib/upload/init.py
@ -13,7 +13,13 @@ from zerver.lib.avatar_hash import user_avatar_path
 from zerver.lib.exceptions import ErrorCode, JsonableError
 from zerver.lib.mime_types import guess_type
 from zerver.lib.outgoing_http import OutgoingSession
-from zerver.lib.thumbnail import MEDIUM_AVATAR_SIZE, resize_avatar, resize_emoji
+from zerver.lib.thumbnail import (
+    MAX_EMOJI_GIF_FILE_SIZE_BYTES,
+    MEDIUM_AVATAR_SIZE,
+    BadImageError,
+    resize_avatar,
+    resize_emoji,
+)
 from zerver.lib.upload.base import ZulipUploadBackend
 from zerver.models import Attachment, Message, Realm, RealmEmoji, ScheduledMessage, UserProfile

@ -252,7 +258,11 @@ def upload_emoji_image(
    backend.upload_single_emoji_image(
        f"{emoji_path}.original", content_type, user_profile, image_data
    )
-    resized_image_data, is_animated, still_image_data = resize_emoji(image_data)
+    resized_image_data, is_animated, still_image_data = resize_emoji(image_data, emoji_file_name)
+    if is_animated and len(still_image_data) > MAX_EMOJI_GIF_FILE_SIZE_BYTES:  # nocoverage
+        raise BadImageError(_("Image size exceeds limit"))
+    if not is_animated and len(image_data) > MAX_EMOJI_GIF_FILE_SIZE_BYTES:  # nocoverage
+        raise BadImageError(_("Image size exceeds limit"))
    backend.upload_single_emoji_image(emoji_path, content_type, user_profile, resized_image_data)
    if is_animated:
        assert still_image_data is not None
--- a/zerver/tests/images/animated_large_img.png
+++ b/zerver/tests/images/animated_large_img.png
--- a/zerver/tests/test_transfer.py
+++ b/zerver/tests/test_transfer.py
@ -103,7 +103,7 @@ class TransferUploadsToS3Test(ZulipTestCase):
        resized_key = bucket.Object(emoji_path)

        image_data = read_test_image_file("img.png")
-        resized_image_data, is_animated, still_image_data = resize_emoji(image_data)
+        resized_image_data, is_animated, still_image_data = resize_emoji(image_data, "img.png")

        self.assertEqual(is_animated, False)
        self.assertEqual(still_image_data, None)
@ -135,7 +135,9 @@ class TransferUploadsToS3Test(ZulipTestCase):
        )

        image_data = read_test_image_file("animated_img.gif")
-        resized_image_data, is_animated, still_image_data = resize_emoji(image_data)
+        resized_image_data, is_animated, still_image_data = resize_emoji(
+            image_data, "animated_img.gif"
+        )

        self.assertEqual(is_animated, True)
        self.assertEqual(type(still_image_data), bytes)
--- a/zerver/tests/test_upload.py
+++ b/zerver/tests/test_upload.py
@ -1392,7 +1392,7 @@ class EmojiTest(UploadSerializeMixin, ZulipTestCase):
        # Test unequal width and height of animated GIF image
        animated_unequal_img_data = read_test_image_file("animated_unequal_img.gif")
        resized_img_data, is_animated, still_img_data = resize_emoji(
-            animated_unequal_img_data, size=50
+            animated_unequal_img_data, "animated_unequal_img.gif", size=50
        )
        im = Image.open(io.BytesIO(resized_img_data))
        self.assertEqual((50, 50), im.size)
@ -1404,37 +1404,34 @@ class EmojiTest(UploadSerializeMixin, ZulipTestCase):
        # Test corrupt image exception
        corrupted_img_data = read_test_image_file("corrupt.gif")
        with self.assertRaises(BadImageError):
-            resize_emoji(corrupted_img_data)
+            resize_emoji(corrupted_img_data, "corrupt.gif")

-        def test_resize(size: int = 50) -> None:
+        animated_large_img_data = read_test_image_file("animated_large_img.gif")
        resized_img_data, is_animated, still_img_data = resize_emoji(
-                animated_large_img_data, size=50
+            animated_large_img_data, "animated_large_img.gif", size=50
        )
        im = Image.open(io.BytesIO(resized_img_data))
-            self.assertEqual((size, size), im.size)
+        self.assertEqual((50, 50), im.size)
        self.assertTrue(is_animated)
        assert still_img_data
        still_image = Image.open(io.BytesIO(still_img_data))
        self.assertEqual((50, 50), still_image.size)

-        for img_format in ("gif", "png"):
-            animated_large_img_data = read_test_image_file(f"animated_large_img.{img_format}")
+        # Test an image file with too many bytes is not resized
+        with patch("zerver.lib.thumbnail.MAX_EMOJI_GIF_FILE_SIZE_BYTES", 1024):
+            with self.assertRaises(BadImageError):
+                resize_emoji(animated_large_img_data, "animated_large_img.gif", size=50)

-            # Test an image larger than max is resized
-            with patch("zerver.lib.thumbnail.MAX_EMOJI_GIF_SIZE", 128):
-                test_resize()
-
-            # Test an image file larger than max is resized
-            with patch("zerver.lib.thumbnail.MAX_EMOJI_GIF_FILE_SIZE_BYTES", 3 * 1024 * 1024):
-                test_resize()
-
-            # Test an image smaller than max and smaller than file size max is not resized
-            with patch("zerver.lib.thumbnail.MAX_EMOJI_GIF_SIZE", 512):
-                test_resize(size=256)
+        # Test an image file with too many pixels is not resized
+        with patch("zerver.lib.thumbnail.IMAGE_BOMB_TOTAL_PIXELS", 100):
+            with self.assertRaises(BadImageError):
+                resize_emoji(animated_large_img_data, "animated_large_img.gif", size=50)

        # Test a non-animated GIF image which does need to be resized
        still_large_img_data = read_test_image_file("still_large_img.gif")
-        resized_img_data, is_animated, no_still_data = resize_emoji(still_large_img_data, size=50)
+        resized_img_data, is_animated, no_still_data = resize_emoji(
+            still_large_img_data, "still_large_img.gif", size=50
+        )
        im = Image.open(io.BytesIO(resized_img_data))
        self.assertEqual((50, 50), im.size)
        self.assertFalse(is_animated)
@ -1442,7 +1439,9 @@ class EmojiTest(UploadSerializeMixin, ZulipTestCase):

        # Test a non-animated and non-animatable image format which needs to be resized
        still_large_img_data = read_test_image_file("img.jpg")
-        resized_img_data, is_animated, no_still_data = resize_emoji(still_large_img_data, size=50)
+        resized_img_data, is_animated, no_still_data = resize_emoji(
+            still_large_img_data, "img.jpg", size=50
+        )
        im = Image.open(io.BytesIO(resized_img_data))
        self.assertEqual((50, 50), im.size)
        self.assertFalse(is_animated)
--- a/zilencer/management/commands/populate_db.py
+++ b/zilencer/management/commands/populate_db.py
@ -913,7 +913,7 @@ class Command(ZulipBaseCommand):
        # Create a test realm emoji.
        IMAGE_FILE_PATH = static_path("images/test-images/checkbox.png")
        with open(IMAGE_FILE_PATH, "rb") as fp:
-            check_add_realm_emoji(zulip_realm, "green_tick", iago, File(fp))
+            check_add_realm_emoji(zulip_realm, "green_tick", iago, File(fp, name="checkbox.png"))

        if not options["test_suite"]:
            # Populate users with some bar data