mirror of https://github.com/zulip/zulip.git
thumbnail: Support checking for images from streaming sources.
We may not always have trivial access to all of the bytes of the uploaded file -- for instance, if the file was uploaded previously, or by some other process. Downloading the entire image in order to check its headers is an inefficient use of time and bandwidth. Adjust `maybe_thumbnail` and dependencies to potentially take a `pyvips.Source` which supports streaming data from S3 or disk. This allows making the ImageAttachment row, if deemed appropriate, based on only a few KB of data, and not the entire image.
This commit is contained in:
parent
758aa36cbe
commit
9a1f78db22
|
@ -138,7 +138,7 @@ class BadImageError(JsonableError):
|
||||||
|
|
||||||
|
|
||||||
@contextmanager
|
@contextmanager
|
||||||
def libvips_check_image(image_data: bytes) -> Iterator[pyvips.Image]:
|
def libvips_check_image(image_data: bytes | pyvips.Source) -> Iterator[pyvips.Image]:
|
||||||
# The primary goal of this is to verify that the image is valid,
|
# The primary goal of this is to verify that the image is valid,
|
||||||
# and raise BadImageError otherwise. The yielded `source_image`
|
# and raise BadImageError otherwise. The yielded `source_image`
|
||||||
# may be ignored, since calling `thumbnail_buffer` is faster than
|
# may be ignored, since calling `thumbnail_buffer` is faster than
|
||||||
|
@ -146,7 +146,10 @@ def libvips_check_image(image_data: bytes) -> Iterator[pyvips.Image]:
|
||||||
# cannot make use of shrink-on-load optimizations:
|
# cannot make use of shrink-on-load optimizations:
|
||||||
# https://www.libvips.org/API/current/libvips-resample.html#vips-thumbnail-image
|
# https://www.libvips.org/API/current/libvips-resample.html#vips-thumbnail-image
|
||||||
try:
|
try:
|
||||||
source_image = pyvips.Image.new_from_buffer(image_data, "")
|
if isinstance(image_data, bytes):
|
||||||
|
source_image = pyvips.Image.new_from_buffer(image_data, "")
|
||||||
|
else:
|
||||||
|
source_image = pyvips.Image.new_from_source(image_data, "", access="sequential")
|
||||||
except pyvips.Error:
|
except pyvips.Error:
|
||||||
raise BadImageError(_("Could not decode image; did you upload an image file?"))
|
raise BadImageError(_("Could not decode image; did you upload an image file?"))
|
||||||
|
|
||||||
|
@ -275,7 +278,9 @@ def missing_thumbnails(image_attachment: ImageAttachment) -> list[ThumbnailForma
|
||||||
return needed_thumbnails
|
return needed_thumbnails
|
||||||
|
|
||||||
|
|
||||||
def maybe_thumbnail(attachment: AbstractAttachment, content: bytes) -> ImageAttachment | None:
|
def maybe_thumbnail(
|
||||||
|
attachment: AbstractAttachment, content: bytes | pyvips.Source
|
||||||
|
) -> ImageAttachment | None:
|
||||||
if attachment.content_type not in THUMBNAIL_ACCEPT_IMAGE_TYPES:
|
if attachment.content_type not in THUMBNAIL_ACCEPT_IMAGE_TYPES:
|
||||||
# If it doesn't self-report as an image file that we might want
|
# If it doesn't self-report as an image file that we might want
|
||||||
# to thumbnail, don't parse the bytes at all.
|
# to thumbnail, don't parse the bytes at all.
|
||||||
|
|
|
@ -8,6 +8,7 @@ from datetime import datetime
|
||||||
from typing import IO, Any, BinaryIO
|
from typing import IO, Any, BinaryIO
|
||||||
from urllib.parse import unquote, urljoin
|
from urllib.parse import unquote, urljoin
|
||||||
|
|
||||||
|
import pyvips
|
||||||
from django.conf import settings
|
from django.conf import settings
|
||||||
from django.core.files.uploadedfile import UploadedFile
|
from django.core.files.uploadedfile import UploadedFile
|
||||||
from django.db import transaction
|
from django.db import transaction
|
||||||
|
@ -26,7 +27,7 @@ from zerver.lib.thumbnail import (
|
||||||
resize_avatar,
|
resize_avatar,
|
||||||
resize_emoji,
|
resize_emoji,
|
||||||
)
|
)
|
||||||
from zerver.lib.upload.base import INLINE_MIME_TYPES, ZulipUploadBackend
|
from zerver.lib.upload.base import INLINE_MIME_TYPES, StreamingSourceWithSize, ZulipUploadBackend
|
||||||
from zerver.models import Attachment, Message, Realm, RealmEmoji, ScheduledMessage, UserProfile
|
from zerver.models import Attachment, Message, Realm, RealmEmoji, ScheduledMessage, UserProfile
|
||||||
from zerver.models.users import is_cross_realm_bot_email
|
from zerver.models.users import is_cross_realm_bot_email
|
||||||
|
|
||||||
|
@ -48,22 +49,28 @@ def create_attachment(
|
||||||
file_name: str,
|
file_name: str,
|
||||||
path_id: str,
|
path_id: str,
|
||||||
content_type: str,
|
content_type: str,
|
||||||
file_data: bytes,
|
file_data: bytes | StreamingSourceWithSize,
|
||||||
user_profile: UserProfile,
|
user_profile: UserProfile,
|
||||||
realm: Realm,
|
realm: Realm,
|
||||||
) -> None:
|
) -> None:
|
||||||
assert (user_profile.realm_id == realm.id) or is_cross_realm_bot_email(
|
assert (user_profile.realm_id == realm.id) or is_cross_realm_bot_email(
|
||||||
user_profile.delivery_email
|
user_profile.delivery_email
|
||||||
)
|
)
|
||||||
|
if isinstance(file_data, bytes):
|
||||||
|
file_size = len(file_data)
|
||||||
|
file_real_data: bytes | pyvips.Source = file_data
|
||||||
|
else:
|
||||||
|
file_size = file_data.size
|
||||||
|
file_real_data = file_data.source
|
||||||
attachment = Attachment.objects.create(
|
attachment = Attachment.objects.create(
|
||||||
file_name=file_name,
|
file_name=file_name,
|
||||||
path_id=path_id,
|
path_id=path_id,
|
||||||
owner=user_profile,
|
owner=user_profile,
|
||||||
realm=realm,
|
realm=realm,
|
||||||
size=len(file_data),
|
size=file_size,
|
||||||
content_type=content_type,
|
content_type=content_type,
|
||||||
)
|
)
|
||||||
maybe_thumbnail(attachment, file_data)
|
maybe_thumbnail(attachment, file_real_data)
|
||||||
from zerver.actions.uploads import notify_attachment_update
|
from zerver.actions.uploads import notify_attachment_update
|
||||||
|
|
||||||
notify_attachment_update(user_profile, "add", attachment.to_dict())
|
notify_attachment_update(user_profile, "add", attachment.to_dict())
|
||||||
|
@ -194,6 +201,10 @@ def upload_message_attachment_from_request(
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def attachment_vips_source(path_id: str) -> StreamingSourceWithSize:
|
||||||
|
return upload_backend.attachment_vips_source(path_id)
|
||||||
|
|
||||||
|
|
||||||
def save_attachment_contents(path_id: str, filehandle: BinaryIO) -> None:
|
def save_attachment_contents(path_id: str, filehandle: BinaryIO) -> None:
|
||||||
return upload_backend.save_attachment_contents(path_id, filehandle)
|
return upload_backend.save_attachment_contents(path_id, filehandle)
|
||||||
|
|
||||||
|
|
|
@ -1,8 +1,11 @@
|
||||||
import os
|
import os
|
||||||
from collections.abc import Callable, Iterator
|
from collections.abc import Callable, Iterator
|
||||||
|
from dataclasses import dataclass
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from typing import IO, Any, BinaryIO
|
from typing import IO, Any, BinaryIO
|
||||||
|
|
||||||
|
import pyvips
|
||||||
|
|
||||||
from zerver.models import Realm, UserProfile
|
from zerver.models import Realm, UserProfile
|
||||||
|
|
||||||
INLINE_MIME_TYPES = [
|
INLINE_MIME_TYPES = [
|
||||||
|
@ -27,6 +30,12 @@ INLINE_MIME_TYPES = [
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class StreamingSourceWithSize:
|
||||||
|
size: int
|
||||||
|
source: pyvips.Source
|
||||||
|
|
||||||
|
|
||||||
class ZulipUploadBackend:
|
class ZulipUploadBackend:
|
||||||
# Message attachment uploads
|
# Message attachment uploads
|
||||||
def get_public_upload_root_url(self) -> str:
|
def get_public_upload_root_url(self) -> str:
|
||||||
|
@ -48,6 +57,9 @@ class ZulipUploadBackend:
|
||||||
def save_attachment_contents(self, path_id: str, filehandle: BinaryIO) -> None:
|
def save_attachment_contents(self, path_id: str, filehandle: BinaryIO) -> None:
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
||||||
|
def attachment_vips_source(self, path_id: str) -> StreamingSourceWithSize:
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
def delete_message_attachment(self, path_id: str) -> bool:
|
def delete_message_attachment(self, path_id: str) -> bool:
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
||||||
|
|
|
@ -7,13 +7,14 @@ from collections.abc import Callable, Iterator
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from typing import IO, Any, BinaryIO, Literal
|
from typing import IO, Any, BinaryIO, Literal
|
||||||
|
|
||||||
|
import pyvips
|
||||||
from django.conf import settings
|
from django.conf import settings
|
||||||
from typing_extensions import override
|
from typing_extensions import override
|
||||||
|
|
||||||
from zerver.lib.mime_types import guess_type
|
from zerver.lib.mime_types import guess_type
|
||||||
from zerver.lib.thumbnail import resize_avatar, resize_logo
|
from zerver.lib.thumbnail import resize_avatar, resize_logo
|
||||||
from zerver.lib.timestamp import timestamp_to_datetime
|
from zerver.lib.timestamp import timestamp_to_datetime
|
||||||
from zerver.lib.upload.base import ZulipUploadBackend
|
from zerver.lib.upload.base import StreamingSourceWithSize, ZulipUploadBackend
|
||||||
from zerver.lib.utils import assert_is_not_none
|
from zerver.lib.utils import assert_is_not_none
|
||||||
from zerver.models import Realm, RealmEmoji, UserProfile
|
from zerver.models import Realm, RealmEmoji, UserProfile
|
||||||
|
|
||||||
|
@ -100,6 +101,13 @@ class LocalUploadBackend(ZulipUploadBackend):
|
||||||
def save_attachment_contents(self, path_id: str, filehandle: BinaryIO) -> None:
|
def save_attachment_contents(self, path_id: str, filehandle: BinaryIO) -> None:
|
||||||
filehandle.write(read_local_file("files", path_id))
|
filehandle.write(read_local_file("files", path_id))
|
||||||
|
|
||||||
|
@override
|
||||||
|
def attachment_vips_source(self, path_id: str) -> StreamingSourceWithSize:
|
||||||
|
file_path = os.path.join(assert_is_not_none(settings.LOCAL_UPLOADS_DIR), "files", path_id)
|
||||||
|
assert_is_local_storage_path("files", file_path)
|
||||||
|
source = pyvips.Source.new_from_file(file_path)
|
||||||
|
return StreamingSourceWithSize(size=os.path.getsize(file_path), source=source)
|
||||||
|
|
||||||
@override
|
@override
|
||||||
def delete_message_attachment(self, path_id: str) -> bool:
|
def delete_message_attachment(self, path_id: str) -> bool:
|
||||||
return delete_local_file("files", path_id)
|
return delete_local_file("files", path_id)
|
||||||
|
|
|
@ -8,14 +8,17 @@ from urllib.parse import urljoin, urlsplit, urlunsplit
|
||||||
|
|
||||||
import boto3
|
import boto3
|
||||||
import botocore
|
import botocore
|
||||||
|
import pyvips
|
||||||
from botocore.client import Config
|
from botocore.client import Config
|
||||||
|
from botocore.response import StreamingBody
|
||||||
from django.conf import settings
|
from django.conf import settings
|
||||||
from django.utils.http import content_disposition_header
|
from django.utils.http import content_disposition_header
|
||||||
from mypy_boto3_s3.service_resource import Bucket
|
from mypy_boto3_s3.service_resource import Bucket
|
||||||
from typing_extensions import override
|
from typing_extensions import override
|
||||||
|
|
||||||
|
from zerver.lib.partial import partial
|
||||||
from zerver.lib.thumbnail import resize_avatar, resize_logo
|
from zerver.lib.thumbnail import resize_avatar, resize_logo
|
||||||
from zerver.lib.upload.base import INLINE_MIME_TYPES, ZulipUploadBackend
|
from zerver.lib.upload.base import INLINE_MIME_TYPES, StreamingSourceWithSize, ZulipUploadBackend
|
||||||
from zerver.models import Realm, RealmEmoji, UserProfile
|
from zerver.models import Realm, RealmEmoji, UserProfile
|
||||||
|
|
||||||
# Duration that the signed upload URLs that we redirect to when
|
# Duration that the signed upload URLs that we redirect to when
|
||||||
|
@ -236,6 +239,17 @@ class S3UploadBackend(ZulipUploadBackend):
|
||||||
for chunk in self.uploads_bucket.Object(path_id).get()["Body"]:
|
for chunk in self.uploads_bucket.Object(path_id).get()["Body"]:
|
||||||
filehandle.write(chunk)
|
filehandle.write(chunk)
|
||||||
|
|
||||||
|
@override
|
||||||
|
def attachment_vips_source(self, path_id: str) -> StreamingSourceWithSize:
|
||||||
|
metadata = self.uploads_bucket.Object(path_id).get()
|
||||||
|
|
||||||
|
def s3_read(streamingbody: StreamingBody, size: int) -> bytes:
|
||||||
|
return streamingbody.read(amt=size)
|
||||||
|
|
||||||
|
source: pyvips.Source = pyvips.SourceCustom()
|
||||||
|
source.on_read(partial(s3_read, metadata["Body"]))
|
||||||
|
return StreamingSourceWithSize(size=metadata["ContentLength"], source=source)
|
||||||
|
|
||||||
@override
|
@override
|
||||||
def delete_message_attachment(self, path_id: str) -> bool:
|
def delete_message_attachment(self, path_id: str) -> bool:
|
||||||
return self.delete_file_from_s3(path_id, self.uploads_bucket)
|
return self.delete_file_from_s3(path_id, self.uploads_bucket)
|
||||||
|
|
|
@ -26,7 +26,13 @@ from zerver.lib.thumbnail import (
|
||||||
resize_emoji,
|
resize_emoji,
|
||||||
split_thumbnail_path,
|
split_thumbnail_path,
|
||||||
)
|
)
|
||||||
from zerver.lib.upload import all_message_attachments, save_attachment_contents
|
from zerver.lib.upload import (
|
||||||
|
all_message_attachments,
|
||||||
|
attachment_vips_source,
|
||||||
|
create_attachment,
|
||||||
|
save_attachment_contents,
|
||||||
|
upload_backend,
|
||||||
|
)
|
||||||
from zerver.models import Attachment, ImageAttachment
|
from zerver.models import Attachment, ImageAttachment
|
||||||
from zerver.views.upload import closest_thumbnail_format
|
from zerver.views.upload import closest_thumbnail_format
|
||||||
from zerver.worker.thumbnail import ensure_thumbnails
|
from zerver.worker.thumbnail import ensure_thumbnails
|
||||||
|
@ -562,6 +568,19 @@ class TestStoreThumbnail(ZulipTestCase):
|
||||||
with self.thumbnail_formats(still_webp, anim_webp, still_jpeg):
|
with self.thumbnail_formats(still_webp, anim_webp, still_jpeg):
|
||||||
self.assertEqual(missing_thumbnails(image_attachment), [anim_webp, still_jpeg])
|
self.assertEqual(missing_thumbnails(image_attachment), [anim_webp, still_jpeg])
|
||||||
|
|
||||||
|
def test_maybe_thumbnail_from_stream(self) -> None:
|
||||||
|
# If we put the file in place directly (e.g. simulating a
|
||||||
|
# chunked upload), and then use the streaming source to
|
||||||
|
# create the attachment, we still thumbnail correctly.
|
||||||
|
hamlet = self.example_user("hamlet")
|
||||||
|
path_id = upload_backend.generate_message_upload_path(str(hamlet.realm.id), "img.png")
|
||||||
|
upload_backend.upload_message_attachment(
|
||||||
|
path_id, "img.png", "image/png", read_test_image_file("img.png"), hamlet
|
||||||
|
)
|
||||||
|
source = attachment_vips_source(path_id)
|
||||||
|
create_attachment("img.png", path_id, "image/png", source, hamlet, hamlet.realm)
|
||||||
|
self.assertTrue(ImageAttachment.objects.filter(path_id=path_id).exists())
|
||||||
|
|
||||||
|
|
||||||
class TestThumbnailRetrieval(ZulipTestCase):
|
class TestThumbnailRetrieval(ZulipTestCase):
|
||||||
def test_get_thumbnail(self) -> None:
|
def test_get_thumbnail(self) -> None:
|
||||||
|
|
|
@ -13,6 +13,7 @@ from zerver.lib.test_helpers import get_test_image_file, read_test_image_file
|
||||||
from zerver.lib.thumbnail import DEFAULT_EMOJI_SIZE, MEDIUM_AVATAR_SIZE, resize_avatar
|
from zerver.lib.thumbnail import DEFAULT_EMOJI_SIZE, MEDIUM_AVATAR_SIZE, resize_avatar
|
||||||
from zerver.lib.upload import (
|
from zerver.lib.upload import (
|
||||||
all_message_attachments,
|
all_message_attachments,
|
||||||
|
attachment_vips_source,
|
||||||
delete_export_tarball,
|
delete_export_tarball,
|
||||||
delete_message_attachment,
|
delete_message_attachment,
|
||||||
delete_message_attachments,
|
delete_message_attachments,
|
||||||
|
@ -21,6 +22,7 @@ from zerver.lib.upload import (
|
||||||
upload_export_tarball,
|
upload_export_tarball,
|
||||||
upload_message_attachment,
|
upload_message_attachment,
|
||||||
)
|
)
|
||||||
|
from zerver.lib.upload.base import StreamingSourceWithSize
|
||||||
from zerver.lib.upload.local import write_local_file
|
from zerver.lib.upload.local import write_local_file
|
||||||
from zerver.models import Attachment, RealmEmoji
|
from zerver.models import Attachment, RealmEmoji
|
||||||
from zerver.models.realms import get_realm
|
from zerver.models.realms import get_realm
|
||||||
|
@ -52,6 +54,20 @@ class LocalStorageTest(UploadSerializeMixin, ZulipTestCase):
|
||||||
save_attachment_contents(path_id, output)
|
save_attachment_contents(path_id, output)
|
||||||
self.assertEqual(output.getvalue(), b"zulip!")
|
self.assertEqual(output.getvalue(), b"zulip!")
|
||||||
|
|
||||||
|
def test_attachment_vips_source(self) -> None:
|
||||||
|
user_profile = self.example_user("hamlet")
|
||||||
|
url = upload_message_attachment(
|
||||||
|
"img.png", "image/png", read_test_image_file("img.png"), user_profile
|
||||||
|
)[0]
|
||||||
|
path_id = re.sub(r"/user_uploads/", "", url)
|
||||||
|
|
||||||
|
source = attachment_vips_source(path_id)
|
||||||
|
self.assertIsInstance(source, StreamingSourceWithSize)
|
||||||
|
self.assertEqual(source.size, len(read_test_image_file("img.png")))
|
||||||
|
image = pyvips.Image.new_from_source(source.source, "", access="sequential")
|
||||||
|
self.assertEqual(128, image.height)
|
||||||
|
self.assertEqual(128, image.width)
|
||||||
|
|
||||||
def test_upload_message_attachment_local_cross_realm_path(self) -> None:
|
def test_upload_message_attachment_local_cross_realm_path(self) -> None:
|
||||||
"""
|
"""
|
||||||
Verifies that the path of a file uploaded by a cross-realm bot to another
|
Verifies that the path of a file uploaded by a cross-realm bot to another
|
||||||
|
|
|
@ -30,6 +30,7 @@ from zerver.lib.thumbnail import (
|
||||||
)
|
)
|
||||||
from zerver.lib.upload import (
|
from zerver.lib.upload import (
|
||||||
all_message_attachments,
|
all_message_attachments,
|
||||||
|
attachment_vips_source,
|
||||||
delete_export_tarball,
|
delete_export_tarball,
|
||||||
delete_message_attachment,
|
delete_message_attachment,
|
||||||
delete_message_attachments,
|
delete_message_attachments,
|
||||||
|
@ -37,6 +38,7 @@ from zerver.lib.upload import (
|
||||||
upload_export_tarball,
|
upload_export_tarball,
|
||||||
upload_message_attachment,
|
upload_message_attachment,
|
||||||
)
|
)
|
||||||
|
from zerver.lib.upload.base import StreamingSourceWithSize
|
||||||
from zerver.lib.upload.s3 import S3UploadBackend
|
from zerver.lib.upload.s3 import S3UploadBackend
|
||||||
from zerver.models import Attachment, RealmEmoji, UserProfile
|
from zerver.models import Attachment, RealmEmoji, UserProfile
|
||||||
from zerver.models.realms import get_realm
|
from zerver.models.realms import get_realm
|
||||||
|
@ -75,6 +77,22 @@ class S3Test(ZulipTestCase):
|
||||||
save_attachment_contents(path_id, output)
|
save_attachment_contents(path_id, output)
|
||||||
self.assertEqual(output.getvalue(), b"zulip!")
|
self.assertEqual(output.getvalue(), b"zulip!")
|
||||||
|
|
||||||
|
@use_s3_backend
|
||||||
|
def test_attachment_vips_source(self) -> None:
|
||||||
|
create_s3_buckets(settings.S3_AUTH_UPLOADS_BUCKET)
|
||||||
|
user_profile = self.example_user("hamlet")
|
||||||
|
url = upload_message_attachment(
|
||||||
|
"img.png", "image/png", read_test_image_file("img.png"), user_profile
|
||||||
|
)[0]
|
||||||
|
path_id = re.sub(r"/user_uploads/", "", url)
|
||||||
|
|
||||||
|
source = attachment_vips_source(path_id)
|
||||||
|
self.assertIsInstance(source, StreamingSourceWithSize)
|
||||||
|
self.assertEqual(source.size, len(read_test_image_file("img.png")))
|
||||||
|
image = pyvips.Image.new_from_source(source.source, "", access="sequential")
|
||||||
|
self.assertEqual(128, image.height)
|
||||||
|
self.assertEqual(128, image.width)
|
||||||
|
|
||||||
@use_s3_backend
|
@use_s3_backend
|
||||||
def test_upload_message_attachment_s3_cross_realm_path(self) -> None:
|
def test_upload_message_attachment_s3_cross_realm_path(self) -> None:
|
||||||
"""
|
"""
|
||||||
|
|
Loading…
Reference in New Issue