diff --git a/zerver/lib/thumbnail.py b/zerver/lib/thumbnail.py index 0acdf9aaf6..4bba1ee9dd 100644 --- a/zerver/lib/thumbnail.py +++ b/zerver/lib/thumbnail.py @@ -138,7 +138,7 @@ class BadImageError(JsonableError): @contextmanager -def libvips_check_image(image_data: bytes) -> Iterator[pyvips.Image]: +def libvips_check_image(image_data: bytes | pyvips.Source) -> Iterator[pyvips.Image]: # The primary goal of this is to verify that the image is valid, # and raise BadImageError otherwise. The yielded `source_image` # may be ignored, since calling `thumbnail_buffer` is faster than @@ -146,7 +146,10 @@ def libvips_check_image(image_data: bytes) -> Iterator[pyvips.Image]: # cannot make use of shrink-on-load optimizations: # https://www.libvips.org/API/current/libvips-resample.html#vips-thumbnail-image try: - source_image = pyvips.Image.new_from_buffer(image_data, "") + if isinstance(image_data, bytes): + source_image = pyvips.Image.new_from_buffer(image_data, "") + else: + source_image = pyvips.Image.new_from_source(image_data, "", access="sequential") except pyvips.Error: raise BadImageError(_("Could not decode image; did you upload an image file?")) @@ -275,7 +278,9 @@ def missing_thumbnails(image_attachment: ImageAttachment) -> list[ThumbnailForma return needed_thumbnails -def maybe_thumbnail(attachment: AbstractAttachment, content: bytes) -> ImageAttachment | None: +def maybe_thumbnail( + attachment: AbstractAttachment, content: bytes | pyvips.Source +) -> ImageAttachment | None: if attachment.content_type not in THUMBNAIL_ACCEPT_IMAGE_TYPES: # If it doesn't self-report as an image file that we might want # to thumbnail, don't parse the bytes at all. diff --git a/zerver/lib/upload/__init__.py b/zerver/lib/upload/__init__.py index d108c8b428..670ac6483f 100644 --- a/zerver/lib/upload/__init__.py +++ b/zerver/lib/upload/__init__.py @@ -8,6 +8,7 @@ from datetime import datetime from typing import IO, Any, BinaryIO from urllib.parse import unquote, urljoin +import pyvips from django.conf import settings from django.core.files.uploadedfile import UploadedFile from django.db import transaction @@ -26,7 +27,7 @@ from zerver.lib.thumbnail import ( resize_avatar, resize_emoji, ) -from zerver.lib.upload.base import INLINE_MIME_TYPES, ZulipUploadBackend +from zerver.lib.upload.base import INLINE_MIME_TYPES, StreamingSourceWithSize, ZulipUploadBackend from zerver.models import Attachment, Message, Realm, RealmEmoji, ScheduledMessage, UserProfile from zerver.models.users import is_cross_realm_bot_email @@ -48,22 +49,28 @@ def create_attachment( file_name: str, path_id: str, content_type: str, - file_data: bytes, + file_data: bytes | StreamingSourceWithSize, user_profile: UserProfile, realm: Realm, ) -> None: assert (user_profile.realm_id == realm.id) or is_cross_realm_bot_email( user_profile.delivery_email ) + if isinstance(file_data, bytes): + file_size = len(file_data) + file_real_data: bytes | pyvips.Source = file_data + else: + file_size = file_data.size + file_real_data = file_data.source attachment = Attachment.objects.create( file_name=file_name, path_id=path_id, owner=user_profile, realm=realm, - size=len(file_data), + size=file_size, content_type=content_type, ) - maybe_thumbnail(attachment, file_data) + maybe_thumbnail(attachment, file_real_data) from zerver.actions.uploads import notify_attachment_update notify_attachment_update(user_profile, "add", attachment.to_dict()) @@ -194,6 +201,10 @@ def upload_message_attachment_from_request( ) +def attachment_vips_source(path_id: str) -> StreamingSourceWithSize: + return upload_backend.attachment_vips_source(path_id) + + def save_attachment_contents(path_id: str, filehandle: BinaryIO) -> None: return upload_backend.save_attachment_contents(path_id, filehandle) diff --git a/zerver/lib/upload/base.py b/zerver/lib/upload/base.py index c0252f1964..021f19420a 100644 --- a/zerver/lib/upload/base.py +++ b/zerver/lib/upload/base.py @@ -1,8 +1,11 @@ import os from collections.abc import Callable, Iterator +from dataclasses import dataclass from datetime import datetime from typing import IO, Any, BinaryIO +import pyvips + from zerver.models import Realm, UserProfile INLINE_MIME_TYPES = [ @@ -27,6 +30,12 @@ INLINE_MIME_TYPES = [ ] +@dataclass +class StreamingSourceWithSize: + size: int + source: pyvips.Source + + class ZulipUploadBackend: # Message attachment uploads def get_public_upload_root_url(self) -> str: @@ -48,6 +57,9 @@ class ZulipUploadBackend: def save_attachment_contents(self, path_id: str, filehandle: BinaryIO) -> None: raise NotImplementedError + def attachment_vips_source(self, path_id: str) -> StreamingSourceWithSize: + raise NotImplementedError + def delete_message_attachment(self, path_id: str) -> bool: raise NotImplementedError diff --git a/zerver/lib/upload/local.py b/zerver/lib/upload/local.py index df226ac602..a25b320788 100644 --- a/zerver/lib/upload/local.py +++ b/zerver/lib/upload/local.py @@ -7,13 +7,14 @@ from collections.abc import Callable, Iterator from datetime import datetime from typing import IO, Any, BinaryIO, Literal +import pyvips from django.conf import settings from typing_extensions import override from zerver.lib.mime_types import guess_type from zerver.lib.thumbnail import resize_avatar, resize_logo from zerver.lib.timestamp import timestamp_to_datetime -from zerver.lib.upload.base import ZulipUploadBackend +from zerver.lib.upload.base import StreamingSourceWithSize, ZulipUploadBackend from zerver.lib.utils import assert_is_not_none from zerver.models import Realm, RealmEmoji, UserProfile @@ -100,6 +101,13 @@ class LocalUploadBackend(ZulipUploadBackend): def save_attachment_contents(self, path_id: str, filehandle: BinaryIO) -> None: filehandle.write(read_local_file("files", path_id)) + @override + def attachment_vips_source(self, path_id: str) -> StreamingSourceWithSize: + file_path = os.path.join(assert_is_not_none(settings.LOCAL_UPLOADS_DIR), "files", path_id) + assert_is_local_storage_path("files", file_path) + source = pyvips.Source.new_from_file(file_path) + return StreamingSourceWithSize(size=os.path.getsize(file_path), source=source) + @override def delete_message_attachment(self, path_id: str) -> bool: return delete_local_file("files", path_id) diff --git a/zerver/lib/upload/s3.py b/zerver/lib/upload/s3.py index 5cb28a590b..c9e912cce7 100644 --- a/zerver/lib/upload/s3.py +++ b/zerver/lib/upload/s3.py @@ -8,14 +8,17 @@ from urllib.parse import urljoin, urlsplit, urlunsplit import boto3 import botocore +import pyvips from botocore.client import Config +from botocore.response import StreamingBody from django.conf import settings from django.utils.http import content_disposition_header from mypy_boto3_s3.service_resource import Bucket from typing_extensions import override +from zerver.lib.partial import partial from zerver.lib.thumbnail import resize_avatar, resize_logo -from zerver.lib.upload.base import INLINE_MIME_TYPES, ZulipUploadBackend +from zerver.lib.upload.base import INLINE_MIME_TYPES, StreamingSourceWithSize, ZulipUploadBackend from zerver.models import Realm, RealmEmoji, UserProfile # Duration that the signed upload URLs that we redirect to when @@ -236,6 +239,17 @@ class S3UploadBackend(ZulipUploadBackend): for chunk in self.uploads_bucket.Object(path_id).get()["Body"]: filehandle.write(chunk) + @override + def attachment_vips_source(self, path_id: str) -> StreamingSourceWithSize: + metadata = self.uploads_bucket.Object(path_id).get() + + def s3_read(streamingbody: StreamingBody, size: int) -> bytes: + return streamingbody.read(amt=size) + + source: pyvips.Source = pyvips.SourceCustom() + source.on_read(partial(s3_read, metadata["Body"])) + return StreamingSourceWithSize(size=metadata["ContentLength"], source=source) + @override def delete_message_attachment(self, path_id: str) -> bool: return self.delete_file_from_s3(path_id, self.uploads_bucket) diff --git a/zerver/tests/test_thumbnail.py b/zerver/tests/test_thumbnail.py index b972381ace..eadc7d5532 100644 --- a/zerver/tests/test_thumbnail.py +++ b/zerver/tests/test_thumbnail.py @@ -26,7 +26,13 @@ from zerver.lib.thumbnail import ( resize_emoji, split_thumbnail_path, ) -from zerver.lib.upload import all_message_attachments, save_attachment_contents +from zerver.lib.upload import ( + all_message_attachments, + attachment_vips_source, + create_attachment, + save_attachment_contents, + upload_backend, +) from zerver.models import Attachment, ImageAttachment from zerver.views.upload import closest_thumbnail_format from zerver.worker.thumbnail import ensure_thumbnails @@ -562,6 +568,19 @@ class TestStoreThumbnail(ZulipTestCase): with self.thumbnail_formats(still_webp, anim_webp, still_jpeg): self.assertEqual(missing_thumbnails(image_attachment), [anim_webp, still_jpeg]) + def test_maybe_thumbnail_from_stream(self) -> None: + # If we put the file in place directly (e.g. simulating a + # chunked upload), and then use the streaming source to + # create the attachment, we still thumbnail correctly. + hamlet = self.example_user("hamlet") + path_id = upload_backend.generate_message_upload_path(str(hamlet.realm.id), "img.png") + upload_backend.upload_message_attachment( + path_id, "img.png", "image/png", read_test_image_file("img.png"), hamlet + ) + source = attachment_vips_source(path_id) + create_attachment("img.png", path_id, "image/png", source, hamlet, hamlet.realm) + self.assertTrue(ImageAttachment.objects.filter(path_id=path_id).exists()) + class TestThumbnailRetrieval(ZulipTestCase): def test_get_thumbnail(self) -> None: diff --git a/zerver/tests/test_upload_local.py b/zerver/tests/test_upload_local.py index 85cabc57f8..36bd825a5c 100644 --- a/zerver/tests/test_upload_local.py +++ b/zerver/tests/test_upload_local.py @@ -13,6 +13,7 @@ from zerver.lib.test_helpers import get_test_image_file, read_test_image_file from zerver.lib.thumbnail import DEFAULT_EMOJI_SIZE, MEDIUM_AVATAR_SIZE, resize_avatar from zerver.lib.upload import ( all_message_attachments, + attachment_vips_source, delete_export_tarball, delete_message_attachment, delete_message_attachments, @@ -21,6 +22,7 @@ from zerver.lib.upload import ( upload_export_tarball, upload_message_attachment, ) +from zerver.lib.upload.base import StreamingSourceWithSize from zerver.lib.upload.local import write_local_file from zerver.models import Attachment, RealmEmoji from zerver.models.realms import get_realm @@ -52,6 +54,20 @@ class LocalStorageTest(UploadSerializeMixin, ZulipTestCase): save_attachment_contents(path_id, output) self.assertEqual(output.getvalue(), b"zulip!") + def test_attachment_vips_source(self) -> None: + user_profile = self.example_user("hamlet") + url = upload_message_attachment( + "img.png", "image/png", read_test_image_file("img.png"), user_profile + )[0] + path_id = re.sub(r"/user_uploads/", "", url) + + source = attachment_vips_source(path_id) + self.assertIsInstance(source, StreamingSourceWithSize) + self.assertEqual(source.size, len(read_test_image_file("img.png"))) + image = pyvips.Image.new_from_source(source.source, "", access="sequential") + self.assertEqual(128, image.height) + self.assertEqual(128, image.width) + def test_upload_message_attachment_local_cross_realm_path(self) -> None: """ Verifies that the path of a file uploaded by a cross-realm bot to another diff --git a/zerver/tests/test_upload_s3.py b/zerver/tests/test_upload_s3.py index 98a9d7685d..3a33763b4f 100644 --- a/zerver/tests/test_upload_s3.py +++ b/zerver/tests/test_upload_s3.py @@ -30,6 +30,7 @@ from zerver.lib.thumbnail import ( ) from zerver.lib.upload import ( all_message_attachments, + attachment_vips_source, delete_export_tarball, delete_message_attachment, delete_message_attachments, @@ -37,6 +38,7 @@ from zerver.lib.upload import ( upload_export_tarball, upload_message_attachment, ) +from zerver.lib.upload.base import StreamingSourceWithSize from zerver.lib.upload.s3 import S3UploadBackend from zerver.models import Attachment, RealmEmoji, UserProfile from zerver.models.realms import get_realm @@ -75,6 +77,22 @@ class S3Test(ZulipTestCase): save_attachment_contents(path_id, output) self.assertEqual(output.getvalue(), b"zulip!") + @use_s3_backend + def test_attachment_vips_source(self) -> None: + create_s3_buckets(settings.S3_AUTH_UPLOADS_BUCKET) + user_profile = self.example_user("hamlet") + url = upload_message_attachment( + "img.png", "image/png", read_test_image_file("img.png"), user_profile + )[0] + path_id = re.sub(r"/user_uploads/", "", url) + + source = attachment_vips_source(path_id) + self.assertIsInstance(source, StreamingSourceWithSize) + self.assertEqual(source.size, len(read_test_image_file("img.png"))) + image = pyvips.Image.new_from_source(source.source, "", access="sequential") + self.assertEqual(128, image.height) + self.assertEqual(128, image.width) + @use_s3_backend def test_upload_message_attachment_s3_cross_realm_path(self) -> None: """