From f6b99171ce374138d29dd93520b5bec4c4f10e51 Mon Sep 17 00:00:00 2001
From: Alex Vandiver <alexmv@zulip.com>
Date: Thu, 11 Jul 2024 19:10:17 +0000
Subject: [PATCH] emoji: Derive the file extension from a limited set of
 content-types.

We thumbnail and serve emoji with the same format as they were
uploaded.  However, we preserved the original extension, which might
mismatch with the provided content-type.

Limit the content-type to a subset which is both (a) an image format
we can thumbnail, and (b) a media format which is widely-enough
supported that we are willing to provide it to all browsers.  This
prevents uploading a `.tiff` emoji, for instance.

Based on this limited content-type, we then reverse to find the
reasonable extension to use when storing it.  This is particularly
important because the local file storage uses the file extension to
choose what content-type to re-serve the emoji as.

This does nothing for existing emoji, which may have odd or missing
file extensions.
---
 zerver/actions/realm_emoji.py    | 10 ++++++-
 zerver/lib/emoji.py              | 14 ++++-----
 zerver/lib/mime_types.py         |  1 +
 zerver/lib/thumbnail.py          |  5 ++--
 zerver/lib/upload/__init__.py    | 10 ++++++-
 zerver/tests/test_audit_log.py   |  8 ++++--
 zerver/tests/test_realm_emoji.py |  6 ++++
 zerver/tests/test_upload.py      | 49 ++++++++++++++++++++++++++++++++
 zerver/tests/test_upload_s3.py   | 25 ++++++++++++++++
 9 files changed, 114 insertions(+), 14 deletions(-)

diff --git a/zerver/actions/realm_emoji.py b/zerver/actions/realm_emoji.py
index e860e1f837..58ba87261a 100644
--- a/zerver/actions/realm_emoji.py
+++ b/zerver/actions/realm_emoji.py
@@ -8,7 +8,9 @@ from django.utils.translation import gettext as _
 
 from zerver.lib.emoji import get_emoji_file_name
 from zerver.lib.exceptions import JsonableError
+from zerver.lib.thumbnail import THUMBNAIL_ACCEPT_IMAGE_TYPES, BadImageError
 from zerver.lib.upload import upload_emoji_image
+from zerver.lib.upload.base import INLINE_MIME_TYPES
 from zerver.models import Realm, RealmAuditLog, RealmEmoji, UserProfile
 from zerver.models.realm_emoji import EmojiInfo, get_all_custom_emoji_for_realm
 from zerver.models.users import active_user_ids
@@ -31,7 +33,13 @@ def check_add_realm_emoji(
         # Match the string in upload_emoji.
         raise JsonableError(_("A custom emoji with this name already exists."))
 
-    emoji_file_name = get_emoji_file_name(image_file.name, realm_emoji.id)
+    # This mirrors the check in upload_emoji_image because we want to
+    # have some reasonable guarantees on the content-type before
+    # deriving the file extension from it.
+    if content_type not in THUMBNAIL_ACCEPT_IMAGE_TYPES or content_type not in INLINE_MIME_TYPES:
+        raise BadImageError(_("Invalid image format"))
+
+    emoji_file_name = get_emoji_file_name(content_type, realm_emoji.id)
 
     emoji_uploaded_successfully = False
     is_animated = False
diff --git a/zerver/lib/emoji.py b/zerver/lib/emoji.py
index fefb71906f..0fd7f70c9c 100644
--- a/zerver/lib/emoji.py
+++ b/zerver/lib/emoji.py
@@ -7,6 +7,7 @@ from django.contrib.staticfiles.storage import staticfiles_storage
 from django.utils.translation import gettext as _
 
 from zerver.lib.exceptions import JsonableError
+from zerver.lib.mime_types import guess_extension
 from zerver.lib.storage import static_path
 from zerver.lib.upload import upload_backend
 from zerver.models import Reaction, Realm, RealmEmoji, UserProfile
@@ -148,12 +149,9 @@ def get_emoji_url(emoji_file_name: str, realm_id: int, still: bool = False) -> s
     return upload_backend.get_emoji_url(emoji_file_name, realm_id, still)
 
 
-def get_emoji_file_name(emoji_file_name: str, emoji_id: int) -> str:
-    image_ext = os.path.splitext(emoji_file_name)[1]
-    if not re.match(r"\.\w+$", image_ext):
-        # Because the extension from the uploaded filename is
-        # user-provided, preserved in the output filename, and libvips
-        # uses `[...]` after the extension for options, we validate
-        # the simple file extension.
-        raise JsonableError(_("Bad file name!"))  # nocoverage
+def get_emoji_file_name(content_type: str, emoji_id: int) -> str:
+    image_ext = guess_extension(content_type, strict=False)
+    # The only callsite of this pre-limits the content_type to a
+    # reasonable set that we know have extensions.
+    assert image_ext is not None
     return "".join((str(emoji_id), image_ext))
diff --git a/zerver/lib/mime_types.py b/zerver/lib/mime_types.py
index 208123eba5..1e8208cc7c 100644
--- a/zerver/lib/mime_types.py
+++ b/zerver/lib/mime_types.py
@@ -1,5 +1,6 @@
 import sys
 from mimetypes import add_type
+from mimetypes import guess_extension as guess_extension
 from mimetypes import guess_type as guess_type
 
 add_type("audio/flac", ".flac")
diff --git a/zerver/lib/thumbnail.py b/zerver/lib/thumbnail.py
index e8d557f7bb..a465de7d52 100644
--- a/zerver/lib/thumbnail.py
+++ b/zerver/lib/thumbnail.py
@@ -137,8 +137,9 @@ def resize_emoji(
         raise BadImageError(_("Image size exceeds limit."))
 
     # Square brackets are used for providing options to libvips' save
-    # operation; these should have been filtered out earlier, so we
-    # assert none are found here, for safety.
+    # operation; the extension on the filename comes from reversing
+    # the content-type, which removes most of the attacker control of
+    # this string, but assert it has no bracketed pieces for safety.
     write_file_ext = os.path.splitext(emoji_file_name)[1]
     assert "[" not in write_file_ext
 
diff --git a/zerver/lib/upload/__init__.py b/zerver/lib/upload/__init__.py
index 2004a5295d..69ecde95ed 100644
--- a/zerver/lib/upload/__init__.py
+++ b/zerver/lib/upload/__init__.py
@@ -23,7 +23,7 @@ from zerver.lib.thumbnail import (
     resize_avatar,
     resize_emoji,
 )
-from zerver.lib.upload.base import ZulipUploadBackend
+from zerver.lib.upload.base import INLINE_MIME_TYPES, ZulipUploadBackend
 from zerver.models import Attachment, Message, Realm, RealmEmoji, ScheduledMessage, UserProfile
 from zerver.models.users import is_cross_realm_bot_email
 
@@ -340,6 +340,14 @@ def upload_emoji_image(
     if backend is None:
         backend = upload_backend
 
+    # Emoji are served in the format that they are uploaded, so must
+    # be _both_ an image format that we're willing to thumbnail, _and_
+    # a format which is widespread enough that we're willing to inline
+    # it.  The latter contains non-image formats, but the former
+    # limits to only images.
+    if content_type not in THUMBNAIL_ACCEPT_IMAGE_TYPES or content_type not in INLINE_MIME_TYPES:
+        raise BadImageError(_("Invalid image format"))
+
     emoji_path = RealmEmoji.PATH_ID_TEMPLATE.format(
         realm_id=user_profile.realm_id,
         emoji_file_name=emoji_file_name,
diff --git a/zerver/tests/test_audit_log.py b/zerver/tests/test_audit_log.py
index 1f92f3a7e5..ac493f9c24 100644
--- a/zerver/tests/test_audit_log.py
+++ b/zerver/tests/test_audit_log.py
@@ -1039,7 +1039,9 @@ class TestRealmAuditLog(ZulipTestCase):
         added_emoji = EmojiInfo(
             id=str(realm_emoji.id),
             name="test_emoji",
-            source_url=get_emoji_url(get_emoji_file_name("img.png", realm_emoji.id), user.realm_id),
+            source_url=get_emoji_url(
+                get_emoji_file_name("image/png", realm_emoji.id), user.realm_id
+            ),
             deactivated=False,
             author_id=user.id,
             still_url=None,
@@ -1067,7 +1069,9 @@ class TestRealmAuditLog(ZulipTestCase):
         deactivated_emoji = EmojiInfo(
             id=str(realm_emoji.id),
             name="test_emoji",
-            source_url=get_emoji_url(get_emoji_file_name("img.png", realm_emoji.id), user.realm_id),
+            source_url=get_emoji_url(
+                get_emoji_file_name("image/png", realm_emoji.id), user.realm_id
+            ),
             deactivated=True,
             author_id=user.id,
             still_url=None,
diff --git a/zerver/tests/test_realm_emoji.py b/zerver/tests/test_realm_emoji.py
index e60f84401d..3f8c613bae 100644
--- a/zerver/tests/test_realm_emoji.py
+++ b/zerver/tests/test_realm_emoji.py
@@ -319,6 +319,12 @@ class RealmEmojiTest(ZulipTestCase):
                 result = self.client_post("/json/realm/emoji/my_emoji", {"file": fp})
         self.assert_json_error(result, "Uploaded file is larger than the allowed limit of 0 MiB")
 
+    def test_emoji_upload_file_format_error(self) -> None:
+        self.login("iago")
+        with get_test_image_file("img.tif") as fp:
+            result = self.client_post("/json/realm/emoji/my_emoji", {"file": fp})
+        self.assert_json_error(result, "Invalid image format")
+
     def test_upload_already_existed_emoji(self) -> None:
         self.login("iago")
         with get_test_image_file("img.png") as fp1:
diff --git a/zerver/tests/test_upload.py b/zerver/tests/test_upload.py
index f3723157d6..001533a1d5 100644
--- a/zerver/tests/test_upload.py
+++ b/zerver/tests/test_upload.py
@@ -3,6 +3,7 @@ import re
 import time
 from io import StringIO
 from unittest import mock
+from unittest.mock import patch
 from urllib.parse import quote
 
 import orjson
@@ -1753,6 +1754,54 @@ class RealmNightLogoTest(RealmLogoTest):
     night = True
 
 
+class EmojiTest(UploadSerializeMixin, ZulipTestCase):
+    def test_upload_emoji(self) -> None:
+        self.login("iago")
+        with get_test_image_file("img.png") as f:
+            result = self.client_post("/json/realm/emoji/new", {"f1": f})
+            self.assert_json_success(result)
+
+    def test_non_image(self) -> None:
+        """Non-image is not resized"""
+        self.login("iago")
+        with get_test_image_file("text.txt") as f:
+            with patch("zerver.lib.upload.resize_emoji", return_value=(b"a", None)) as resize_mock:
+                result = self.client_post("/json/realm/emoji/new", {"f1": f})
+                self.assert_json_error(result, "Invalid image format")
+                resize_mock.assert_not_called()
+
+    def test_upsupported_format(self) -> None:
+        """Invalid format is not resized"""
+        self.login("iago")
+        with get_test_image_file("img.bmp") as f:
+            with patch("zerver.lib.upload.resize_emoji", return_value=(b"a", None)) as resize_mock:
+                result = self.client_post("/json/realm/emoji/new", {"f1": f})
+                self.assert_json_error(result, "Invalid image format")
+                resize_mock.assert_not_called()
+
+    def test_upload_big_after_animated_resize(self) -> None:
+        """A big animated image is fine as long as the still is small"""
+        self.login("iago")
+        with get_test_image_file("animated_img.gif") as f:
+            with patch(
+                "zerver.lib.upload.resize_emoji", return_value=(b"a" * (200 * 1024), b"aaa")
+            ) as resize_mock:
+                result = self.client_post("/json/realm/emoji/new", {"f1": f})
+                self.assert_json_success(result)
+                resize_mock.assert_called_once()
+
+    def test_upload_too_big_after_animated_resize_still(self) -> None:
+        """Still of animated image is too big after resizing"""
+        self.login("iago")
+        with get_test_image_file("animated_img.gif") as f:
+            with patch(
+                "zerver.lib.upload.resize_emoji", return_value=(b"aaa", b"a" * (200 * 1024))
+            ) as resize_mock:
+                result = self.client_post("/json/realm/emoji/new", {"f1": f})
+                self.assert_json_error(result, "Image size exceeds limit")
+                resize_mock.assert_called_once()
+
+
 class SanitizeNameTests(ZulipTestCase):
     def test_file_name(self) -> None:
         self.assertEqual(sanitize_name("test.txt"), "test.txt")
diff --git a/zerver/tests/test_upload_s3.py b/zerver/tests/test_upload_s3.py
index e45ead1aed..c5cc6e8bc7 100644
--- a/zerver/tests/test_upload_s3.py
+++ b/zerver/tests/test_upload_s3.py
@@ -23,7 +23,9 @@ from zerver.lib.thumbnail import (
     DEFAULT_AVATAR_SIZE,
     DEFAULT_EMOJI_SIZE,
     MEDIUM_AVATAR_SIZE,
+    BadImageError,
     resize_avatar,
+    resize_emoji,
 )
 from zerver.lib.upload import (
     all_message_attachments,
@@ -492,6 +494,29 @@ class S3Test(ZulipTestCase):
         self.assertEqual(DEFAULT_EMOJI_SIZE, resized_image.height)
         self.assertEqual(DEFAULT_EMOJI_SIZE, resized_image.width)
 
+    @use_s3_backend
+    def test_upload_emoji_non_image(self) -> None:
+        create_s3_buckets(settings.S3_AVATAR_BUCKET)[0]
+
+        user_profile = self.example_user("hamlet")
+        emoji_name = "emoji.png"
+        with get_test_image_file("text.txt") as image_file:
+            with patch("zerver.lib.upload.resize_emoji", side_effect=resize_emoji) as resize_mock:
+                with self.assertRaises(BadImageError):
+                    # We trust the content-type and fail when we try to load the image
+                    zerver.lib.upload.upload_emoji_image(
+                        image_file, emoji_name, user_profile, "image/png"
+                    )
+                resize_mock.assert_called_once()
+
+            with patch("zerver.lib.upload.resize_emoji", side_effect=resize_emoji) as resize_mock:
+                with self.assertRaises(BadImageError):
+                    # We trust the content-type and abort before trying to load
+                    zerver.lib.upload.upload_emoji_image(
+                        image_file, emoji_name, user_profile, "text/plain"
+                    )
+                resize_mock.assert_not_called()
+
     @use_s3_backend
     def test_tarball_upload_and_deletion(self) -> None:
         bucket = create_s3_buckets(settings.S3_AVATAR_BUCKET)[0]