emoji: Derive the file extension from a limited set of content-types.

We thumbnail and serve emoji with the same format as they were uploaded. However, we preserved the original extension, which might mismatch with the provided content-type. Limit the content-type to a subset which is both (a) an image format we can thumbnail, and (b) a media format which is widely-enough supported that we are willing to provide it to all browsers. This prevents uploading a `.tiff` emoji, for instance. Based on this limited content-type, we then reverse to find the reasonable extension to use when storing it. This is particularly important because the local file storage uses the file extension to choose what content-type to re-serve the emoji as. This does nothing for existing emoji, which may have odd or missing file extensions.
2024-07-11 19:10:17 +00:00 · 2024-07-11 19:10:17 +00:00 · f6b99171ce
parent 62a0611ddb
commit f6b99171ce
9 changed files with 114 additions and 14 deletions
--- a/zerver/actions/realm_emoji.py
+++ b/zerver/actions/realm_emoji.py
@ -8,7 +8,9 @@ from django.utils.translation import gettext as _
 from zerver.lib.emoji import get_emoji_file_name
 from zerver.lib.exceptions import JsonableError
 from zerver.lib.thumbnail import THUMBNAIL_ACCEPT_IMAGE_TYPES, BadImageError
 from zerver.lib.upload import upload_emoji_image
 from zerver.lib.upload.base import INLINE_MIME_TYPES
 from zerver.models import Realm, RealmAuditLog, RealmEmoji, UserProfile
 from zerver.models.realm_emoji import EmojiInfo, get_all_custom_emoji_for_realm
 from zerver.models.users import active_user_ids
@ -31,7 +33,13 @@ def check_add_realm_emoji(
        # Match the string in upload_emoji.
        raise JsonableError(_("A custom emoji with this name already exists."))
-    emoji_file_name = get_emoji_file_name(image_file.name, realm_emoji.id)
+    # This mirrors the check in upload_emoji_image because we want to
    # have some reasonable guarantees on the content-type before
    # deriving the file extension from it.
    if content_type not in THUMBNAIL_ACCEPT_IMAGE_TYPES or content_type not in INLINE_MIME_TYPES:
        raise BadImageError(_("Invalid image format"))
    emoji_file_name = get_emoji_file_name(content_type, realm_emoji.id)
    emoji_uploaded_successfully = False
    is_animated = False
--- a/zerver/lib/emoji.py
+++ b/zerver/lib/emoji.py
@ -7,6 +7,7 @@ from django.contrib.staticfiles.storage import staticfiles_storage
 from django.utils.translation import gettext as _
 from zerver.lib.exceptions import JsonableError
 from zerver.lib.mime_types import guess_extension
 from zerver.lib.storage import static_path
 from zerver.lib.upload import upload_backend
 from zerver.models import Reaction, Realm, RealmEmoji, UserProfile
@ -148,12 +149,9 @@ def get_emoji_url(emoji_file_name: str, realm_id: int, still: bool = False) -> s
    return upload_backend.get_emoji_url(emoji_file_name, realm_id, still)
-def get_emoji_file_name(emoji_file_name: str, emoji_id: int) -> str:
+def get_emoji_file_name(content_type: str, emoji_id: int) -> str:
-    image_ext = os.path.splitext(emoji_file_name)[1]
+    image_ext = guess_extension(content_type, strict=False)
-    if not re.match(r"\.\w+$", image_ext):
+    # The only callsite of this pre-limits the content_type to a
-        # Because the extension from the uploaded filename is
+    # reasonable set that we know have extensions.
-        # user-provided, preserved in the output filename, and libvips
+    assert image_ext is not None
        # uses `[...]` after the extension for options, we validate
        # the simple file extension.
        raise JsonableError(_("Bad file name!"))  # nocoverage
    return "".join((str(emoji_id), image_ext))
--- a/zerver/lib/mime_types.py
+++ b/zerver/lib/mime_types.py
@ -1,5 +1,6 @@
 import sys
 from mimetypes import add_type
 from mimetypes import guess_extension as guess_extension
 from mimetypes import guess_type as guess_type
 add_type("audio/flac", ".flac")
--- a/zerver/lib/thumbnail.py
+++ b/zerver/lib/thumbnail.py
@ -137,8 +137,9 @@ def resize_emoji(
        raise BadImageError(_("Image size exceeds limit."))
    # Square brackets are used for providing options to libvips' save
-    # operation; these should have been filtered out earlier, so we
+    # operation; the extension on the filename comes from reversing
-    # assert none are found here, for safety.
+    # the content-type, which removes most of the attacker control of
    # this string, but assert it has no bracketed pieces for safety.
    write_file_ext = os.path.splitext(emoji_file_name)[1]
    assert "[" not in write_file_ext
--- a/zerver/lib/upload/init.py
+++ b/zerver/lib/upload/init.py
@ -23,7 +23,7 @@ from zerver.lib.thumbnail import (
    resize_avatar,
    resize_emoji,
 )
-from zerver.lib.upload.base import ZulipUploadBackend
+from zerver.lib.upload.base import INLINE_MIME_TYPES, ZulipUploadBackend
 from zerver.models import Attachment, Message, Realm, RealmEmoji, ScheduledMessage, UserProfile
 from zerver.models.users import is_cross_realm_bot_email
@ -340,6 +340,14 @@ def upload_emoji_image(
    if backend is None:
        backend = upload_backend
    # Emoji are served in the format that they are uploaded, so must
    # be _both_ an image format that we're willing to thumbnail, _and_
    # a format which is widespread enough that we're willing to inline
    # it.  The latter contains non-image formats, but the former
    # limits to only images.
    if content_type not in THUMBNAIL_ACCEPT_IMAGE_TYPES or content_type not in INLINE_MIME_TYPES:
        raise BadImageError(_("Invalid image format"))
    emoji_path = RealmEmoji.PATH_ID_TEMPLATE.format(
        realm_id=user_profile.realm_id,
        emoji_file_name=emoji_file_name,
--- a/zerver/tests/test_audit_log.py
+++ b/zerver/tests/test_audit_log.py
@ -1039,7 +1039,9 @@ class TestRealmAuditLog(ZulipTestCase):
        added_emoji = EmojiInfo(
            id=str(realm_emoji.id),
            name="test_emoji",
-            source_url=get_emoji_url(get_emoji_file_name("img.png", realm_emoji.id), user.realm_id),
+            source_url=get_emoji_url(
                get_emoji_file_name("image/png", realm_emoji.id), user.realm_id
            ),
            deactivated=False,
            author_id=user.id,
            still_url=None,
@ -1067,7 +1069,9 @@ class TestRealmAuditLog(ZulipTestCase):
        deactivated_emoji = EmojiInfo(
            id=str(realm_emoji.id),
            name="test_emoji",
-            source_url=get_emoji_url(get_emoji_file_name("img.png", realm_emoji.id), user.realm_id),
+            source_url=get_emoji_url(
                get_emoji_file_name("image/png", realm_emoji.id), user.realm_id
            ),
            deactivated=True,
            author_id=user.id,
            still_url=None,
--- a/zerver/tests/test_realm_emoji.py
+++ b/zerver/tests/test_realm_emoji.py
@ -319,6 +319,12 @@ class RealmEmojiTest(ZulipTestCase):
                result = self.client_post("/json/realm/emoji/my_emoji", {"file": fp})
        self.assert_json_error(result, "Uploaded file is larger than the allowed limit of 0 MiB")
    def test_emoji_upload_file_format_error(self) -> None:
        self.login("iago")
        with get_test_image_file("img.tif") as fp:
            result = self.client_post("/json/realm/emoji/my_emoji", {"file": fp})
        self.assert_json_error(result, "Invalid image format")
    def test_upload_already_existed_emoji(self) -> None:
        self.login("iago")
        with get_test_image_file("img.png") as fp1:
--- a/zerver/tests/test_upload.py
+++ b/zerver/tests/test_upload.py
@ -3,6 +3,7 @@ import re
 import time
 from io import StringIO
 from unittest import mock
 from unittest.mock import patch
 from urllib.parse import quote
 import orjson
@ -1753,6 +1754,54 @@ class RealmNightLogoTest(RealmLogoTest):
    night = True
 class EmojiTest(UploadSerializeMixin, ZulipTestCase):
    def test_upload_emoji(self) -> None:
        self.login("iago")
        with get_test_image_file("img.png") as f:
            result = self.client_post("/json/realm/emoji/new", {"f1": f})
            self.assert_json_success(result)
    def test_non_image(self) -> None:
        """Non-image is not resized"""
        self.login("iago")
        with get_test_image_file("text.txt") as f:
            with patch("zerver.lib.upload.resize_emoji", return_value=(b"a", None)) as resize_mock:
                result = self.client_post("/json/realm/emoji/new", {"f1": f})
                self.assert_json_error(result, "Invalid image format")
                resize_mock.assert_not_called()
    def test_upsupported_format(self) -> None:
        """Invalid format is not resized"""
        self.login("iago")
        with get_test_image_file("img.bmp") as f:
            with patch("zerver.lib.upload.resize_emoji", return_value=(b"a", None)) as resize_mock:
                result = self.client_post("/json/realm/emoji/new", {"f1": f})
                self.assert_json_error(result, "Invalid image format")
                resize_mock.assert_not_called()
    def test_upload_big_after_animated_resize(self) -> None:
        """A big animated image is fine as long as the still is small"""
        self.login("iago")
        with get_test_image_file("animated_img.gif") as f:
            with patch(
                "zerver.lib.upload.resize_emoji", return_value=(b"a" * (200 * 1024), b"aaa")
            ) as resize_mock:
                result = self.client_post("/json/realm/emoji/new", {"f1": f})
                self.assert_json_success(result)
                resize_mock.assert_called_once()
    def test_upload_too_big_after_animated_resize_still(self) -> None:
        """Still of animated image is too big after resizing"""
        self.login("iago")
        with get_test_image_file("animated_img.gif") as f:
            with patch(
                "zerver.lib.upload.resize_emoji", return_value=(b"aaa", b"a" * (200 * 1024))
            ) as resize_mock:
                result = self.client_post("/json/realm/emoji/new", {"f1": f})
                self.assert_json_error(result, "Image size exceeds limit")
                resize_mock.assert_called_once()
 class SanitizeNameTests(ZulipTestCase):
    def test_file_name(self) -> None:
        self.assertEqual(sanitize_name("test.txt"), "test.txt")
--- a/zerver/tests/test_upload_s3.py
+++ b/zerver/tests/test_upload_s3.py
@ -23,7 +23,9 @@ from zerver.lib.thumbnail import (
    DEFAULT_AVATAR_SIZE,
    DEFAULT_EMOJI_SIZE,
    MEDIUM_AVATAR_SIZE,
    BadImageError,
    resize_avatar,
    resize_emoji,
 )
 from zerver.lib.upload import (
    all_message_attachments,
@ -492,6 +494,29 @@ class S3Test(ZulipTestCase):
        self.assertEqual(DEFAULT_EMOJI_SIZE, resized_image.height)
        self.assertEqual(DEFAULT_EMOJI_SIZE, resized_image.width)
    @use_s3_backend
    def test_upload_emoji_non_image(self) -> None:
        create_s3_buckets(settings.S3_AVATAR_BUCKET)[0]
        user_profile = self.example_user("hamlet")
        emoji_name = "emoji.png"
        with get_test_image_file("text.txt") as image_file:
            with patch("zerver.lib.upload.resize_emoji", side_effect=resize_emoji) as resize_mock:
                with self.assertRaises(BadImageError):
                    # We trust the content-type and fail when we try to load the image
                    zerver.lib.upload.upload_emoji_image(
                        image_file, emoji_name, user_profile, "image/png"
                    )
                resize_mock.assert_called_once()
            with patch("zerver.lib.upload.resize_emoji", side_effect=resize_emoji) as resize_mock:
                with self.assertRaises(BadImageError):
                    # We trust the content-type and abort before trying to load
                    zerver.lib.upload.upload_emoji_image(
                        image_file, emoji_name, user_profile, "text/plain"
                    )
                resize_mock.assert_not_called()
    @use_s3_backend
    def test_tarball_upload_and_deletion(self) -> None:
        bucket = create_s3_buckets(settings.S3_AVATAR_BUCKET)[0]