upload: Generate thumbnails when images are uploaded.

A new table is created to track which path_id attachments are images,
and for those their metadata, and which thumbnails have been created.
Using path_id as the effective primary key lets us ignore if the
attachment is archived or not, saving some foreign key messes.

A new worker is added to observe events when rows are added to this
table, and to generate and store thumbnails for those images in
differing sizes and formats.
This commit is contained in:
Alex Vandiver 2024-06-20 21:58:27 +00:00 committed by Tim Abbott
parent 7aa5bb233d
commit 2e38f426f4
21 changed files with 788 additions and 50 deletions

View File

@ -375,6 +375,13 @@ define service {
check_command check_rabbitmq_consumers!outgoing_webhooks
}
define service {
use rabbitmq-consumer-service
service_description Check RabbitMQ thumbnail consumers
check_command check_rabbitmq_consumers!thumbnail
}
define service {
use rabbitmq-consumer-service
service_description Check RabbitMQ user_activity consumers

View File

@ -141,6 +141,7 @@ class zulip::app_frontend_base {
'missedmessage_emails',
'missedmessage_mobile_notifications',
'outgoing_webhooks',
'thumbnail',
'user_activity',
'user_activity_interval',
'user_presence',

View File

@ -22,6 +22,7 @@ normal_queues = [
"missedmessage_emails",
"missedmessage_mobile_notifications",
"outgoing_webhooks",
"thumbnail",
"user_activity",
"user_activity_interval",
"user_presence",

View File

@ -3,8 +3,16 @@ from typing import Any
from zerver.lib.attachments import get_old_unclaimed_attachments, validate_attachment_request
from zerver.lib.markdown import MessageRenderingResult
from zerver.lib.upload import claim_attachment, delete_message_attachments
from zerver.models import Attachment, Message, ScheduledMessage, Stream, UserProfile
from zerver.lib.thumbnail import StoredThumbnailFormat
from zerver.lib.upload import claim_attachment, delete_message_attachments, get_image_thumbnail_path
from zerver.models import (
Attachment,
ImageAttachment,
Message,
ScheduledMessage,
Stream,
UserProfile,
)
from zerver.tornado.django_api import send_event_on_commit
@ -77,6 +85,12 @@ def do_delete_old_unclaimed_attachments(weeks_ago: int) -> None:
storage_paths = []
for attachment in old_unclaimed_attachments:
storage_paths.append(attachment.path_id)
image_row = ImageAttachment.objects.filter(path_id=attachment.path_id).first()
if image_row:
for existing_thumbnail in image_row.thumbnail_metadata:
thumb = StoredThumbnailFormat(**existing_thumbnail)
storage_paths.append(get_image_thumbnail_path(image_row, thumb))
image_row.delete()
already_removed.add(attachment.path_id)
attachment.delete()
if len(storage_paths) >= DELETE_BATCH_SIZE:
@ -85,6 +99,12 @@ def do_delete_old_unclaimed_attachments(weeks_ago: int) -> None:
for archived_attachment in old_unclaimed_archived_attachments:
if archived_attachment.path_id not in already_removed:
storage_paths.append(archived_attachment.path_id)
image_row = ImageAttachment.objects.filter(path_id=archived_attachment.path_id).first()
if image_row: # nocoverage
for existing_thumbnail in image_row.thumbnail_metadata:
thumb = StoredThumbnailFormat(**existing_thumbnail)
storage_paths.append(get_image_thumbnail_path(image_row, thumb))
image_row.delete()
archived_attachment.delete()
if len(storage_paths) >= DELETE_BATCH_SIZE:
delete_message_attachments(storage_paths)

View File

@ -136,6 +136,7 @@ ALL_ZULIP_TABLES = {
"zerver_emailchangestatus",
"zerver_groupgroupmembership",
"zerver_huddle",
"zerver_imageattachment",
"zerver_message",
"zerver_missedmessageemailaddress",
"zerver_multiuseinvite",

View File

@ -557,7 +557,11 @@ def use_s3_backend(method: Callable[P, None]) -> Callable[P, None]:
@override_settings(LOCAL_AVATARS_DIR=None)
@override_settings(LOCAL_FILES_DIR=None)
def new_method(*args: P.args, **kwargs: P.kwargs) -> None:
with mock.patch("zerver.lib.upload.upload_backend", S3UploadBackend()):
backend = S3UploadBackend()
with (
mock.patch("zerver.lib.upload.upload_backend", backend),
mock.patch("zerver.worker.thumbnail.upload_backend", backend),
):
return method(*args, **kwargs)
return new_method

View File

@ -1,15 +1,21 @@
import logging
import os
import re
from collections.abc import Iterator
from contextlib import contextmanager
from dataclasses import dataclass
from typing import TypeVar
from urllib.parse import urljoin
import pyvips
from django.utils.http import url_has_allowed_host_and_scheme
from django.utils.translation import gettext as _
from typing_extensions import override
from zerver.lib.camo import get_camo_url
from zerver.lib.exceptions import ErrorCode, JsonableError
from zerver.lib.queue import queue_event_on_commit
from zerver.models import AbstractAttachment, ImageAttachment
DEFAULT_AVATAR_SIZE = 100
MEDIUM_AVATAR_SIZE = 500
@ -21,6 +27,71 @@ IMAGE_BOMB_TOTAL_PIXELS = 90000000
# Reject emoji which, after resizing, have stills larger than this
MAX_EMOJI_GIF_FILE_SIZE_BYTES = 128 * 1024 # 128 kb
T = TypeVar("T", bound="BaseThumbnailFormat")
@dataclass(frozen=True)
class BaseThumbnailFormat:
extension: str
max_width: int
max_height: int
animated: bool
@override
def __eq__(self, other: object) -> bool:
if not isinstance(other, BaseThumbnailFormat):
return False
return str(self) == str(other)
@override
def __str__(self) -> str:
animated = "-anim" if self.animated else ""
return f"{self.max_width}x{self.max_height}{animated}.{self.extension}"
@classmethod
def from_string(cls: type[T], format_string: str) -> T | None:
format_parts = re.match(r"(\d+)x(\d+)(-anim)?\.(\w+)$", format_string)
if format_parts is None:
return None
return cls(
max_width=int(format_parts[1]),
max_height=int(format_parts[2]),
animated=format_parts[3] is not None,
extension=format_parts[4],
)
@dataclass(frozen=True, eq=False)
class ThumbnailFormat(BaseThumbnailFormat):
opts: str | None = ""
# Note that this is serialized into a JSONB column in the database,
# and as such fields cannot be removed without a migration.
@dataclass(frozen=True, eq=False)
class StoredThumbnailFormat(BaseThumbnailFormat):
content_type: str
width: int
height: int
byte_size: int
# Formats that we generate; the first animated and non-animated
# options on this list are the ones which are written into
# rendered_content.
THUMBNAIL_OUTPUT_FORMATS = [
# For now, we generate relatively large default "thumbnails", so
# that clients that do not understand the thumbnailing protocol
# (e.g. mobile) get something which does not look pixelated.
ThumbnailFormat("webp", 840, 560, animated=True),
ThumbnailFormat("webp", 840, 560, animated=False),
# 300x200 is the size preferred by the web client.
ThumbnailFormat("webp", 300, 200, animated=True),
ThumbnailFormat("webp", 300, 200, animated=False),
ThumbnailFormat("jpg", 300, 200, animated=False),
]
# These are the image content-types which the server supports parsing
# and thumbnailing; these do not need to supported on all browsers,
@ -186,3 +257,51 @@ def resize_emoji(
]
animated = frames[0].pagejoin(frames[1:])
return (animated.write_to_buffer(write_file_ext), first_still)
def missing_thumbnails(image_attachment: ImageAttachment) -> list[ThumbnailFormat]:
seen_thumbnails: set[StoredThumbnailFormat] = set()
for existing_thumbnail in image_attachment.thumbnail_metadata:
seen_thumbnails.add(StoredThumbnailFormat(**existing_thumbnail))
# We use the shared `__eq__` method from BaseThumbnailFormat to
# compare between the StoredThumbnailFormat values pulled from the
# database, and the ThumbnailFormat values in
# THUMBNAIL_OUTPUT_FORMATS.
needed_thumbnails = [
thumbnail_format
for thumbnail_format in THUMBNAIL_OUTPUT_FORMATS
if thumbnail_format not in seen_thumbnails
]
if image_attachment.frames == 1:
# We do not generate -anim versions if the source is still
needed_thumbnails = [
thumbnail_format
for thumbnail_format in needed_thumbnails
if not thumbnail_format.animated
]
return needed_thumbnails
def maybe_thumbnail(attachment: AbstractAttachment, content: bytes) -> ImageAttachment | None:
if attachment.content_type not in THUMBNAIL_ACCEPT_IMAGE_TYPES:
# If it doesn't self-report as an image file that we might want
# to thumbnail, don't parse the bytes at all.
return None
try:
# This only attempts to read the header, not the full image content
with libvips_check_image(content) as image:
image_row = ImageAttachment.objects.create(
realm_id=attachment.realm_id,
path_id=attachment.path_id,
original_width_px=image.width,
original_height_px=image.height,
frames=image.get_n_pages(),
thumbnail_metadata=[],
)
queue_event_on_commit("thumbnail", {"id": image_row.id})
return image_row
except BadImageError:
return None

View File

@ -10,6 +10,7 @@ from urllib.parse import unquote, urljoin
from django.conf import settings
from django.core.files.uploadedfile import UploadedFile
from django.db import transaction
from django.utils.translation import gettext as _
from zerver.lib.avatar_hash import user_avatar_base_path_from_ids, user_avatar_path
@ -21,11 +22,21 @@ from zerver.lib.thumbnail import (
MEDIUM_AVATAR_SIZE,
THUMBNAIL_ACCEPT_IMAGE_TYPES,
BadImageError,
BaseThumbnailFormat,
maybe_thumbnail,
resize_avatar,
resize_emoji,
)
from zerver.lib.upload.base import INLINE_MIME_TYPES, ZulipUploadBackend
from zerver.models import Attachment, Message, Realm, RealmEmoji, ScheduledMessage, UserProfile
from zerver.models import (
Attachment,
ImageAttachment,
Message,
Realm,
RealmEmoji,
ScheduledMessage,
UserProfile,
)
from zerver.models.users import is_cross_realm_bot_email
@ -61,6 +72,7 @@ def create_attachment(
size=len(file_data),
content_type=content_type,
)
maybe_thumbnail(attachment, file_data)
from zerver.actions.uploads import notify_attachment_update
notify_attachment_update(user_profile, "add", attachment.to_dict())
@ -124,6 +136,22 @@ def sanitize_name(value: str) -> str:
return value
def get_image_thumbnail_path(
image_attachment: ImageAttachment,
thumbnail_format: BaseThumbnailFormat,
) -> str:
return f"thumbnail/{image_attachment.path_id}/{thumbnail_format!s}"
def split_thumbnail_path(file_path: str) -> tuple[str, BaseThumbnailFormat]:
assert file_path.startswith("thumbnail/")
path_parts = file_path.split("/")
thumbnail_format = BaseThumbnailFormat.from_string(path_parts.pop())
assert thumbnail_format is not None
path_id = "/".join(path_parts[1:])
return path_id, thumbnail_format
def upload_message_attachment(
uploaded_file_name: str,
content_type: str,
@ -136,6 +164,7 @@ def upload_message_attachment(
path_id = upload_backend.generate_message_upload_path(
str(target_realm.id), sanitize_name(uploaded_file_name)
)
with transaction.atomic():
upload_backend.upload_message_attachment(
path_id,
content_type,
@ -196,8 +225,8 @@ def delete_message_attachments(path_ids: list[str]) -> None:
return upload_backend.delete_message_attachments(path_ids)
def all_message_attachments() -> Iterator[tuple[str, datetime]]:
return upload_backend.all_message_attachments()
def all_message_attachments(include_thumbnails: bool = False) -> Iterator[tuple[str, datetime]]:
return upload_backend.all_message_attachments(include_thumbnails)
# Avatar image uploads

View File

@ -40,7 +40,7 @@ class ZulipUploadBackend:
path_id: str,
content_type: str,
file_data: bytes,
user_profile: UserProfile,
user_profile: UserProfile | None,
) -> None:
raise NotImplementedError
@ -54,7 +54,9 @@ class ZulipUploadBackend:
for path_id in path_ids:
self.delete_message_attachment(path_id)
def all_message_attachments(self) -> Iterator[tuple[str, datetime]]:
def all_message_attachments(
self, include_thumbnails: bool = False
) -> Iterator[tuple[str, datetime]]:
raise NotImplementedError
# Avatar image uploads

View File

@ -91,7 +91,7 @@ class LocalUploadBackend(ZulipUploadBackend):
path_id: str,
content_type: str,
file_data: bytes,
user_profile: UserProfile,
user_profile: UserProfile | None,
) -> None:
write_local_file("files", path_id, file_data)
@ -104,9 +104,14 @@ class LocalUploadBackend(ZulipUploadBackend):
return delete_local_file("files", path_id)
@override
def all_message_attachments(self) -> Iterator[tuple[str, datetime]]:
def all_message_attachments(
self, include_thumbnails: bool = False
) -> Iterator[tuple[str, datetime]]:
assert settings.LOCAL_UPLOADS_DIR is not None
for dirname, _, files in os.walk(settings.LOCAL_UPLOADS_DIR + "/files"):
top = settings.LOCAL_UPLOADS_DIR + "/files"
for dirname, subdirnames, files in os.walk(top):
if not include_thumbnails and dirname == top and "thumbnail" in subdirnames:
subdirnames.remove("thumbnail")
for f in files:
fullpath = os.path.join(dirname, f)
yield (

View File

@ -64,7 +64,7 @@ def upload_image_to_s3(
bucket: Bucket,
file_name: str,
content_type: str | None,
user_profile: UserProfile,
user_profile: UserProfile | None,
contents: bytes,
*,
storage_class: Literal[
@ -79,10 +79,10 @@ def upload_image_to_s3(
extra_metadata: dict[str, str] | None = None,
) -> None:
key = bucket.Object(file_name)
metadata = {
"user_profile_id": str(user_profile.id),
"realm_id": str(user_profile.realm_id),
}
metadata: dict[str, str] = {}
if user_profile:
metadata["user_profile_id"] = str(user_profile.id)
metadata["realm_id"] = str(user_profile.realm_id)
if extra_metadata is not None:
metadata.update(extra_metadata)
@ -213,7 +213,7 @@ class S3UploadBackend(ZulipUploadBackend):
path_id: str,
content_type: str,
file_data: bytes,
user_profile: UserProfile,
user_profile: UserProfile | None,
) -> None:
upload_image_to_s3(
self.uploads_bucket,
@ -240,7 +240,9 @@ class S3UploadBackend(ZulipUploadBackend):
)
@override
def all_message_attachments(self) -> Iterator[tuple[str, datetime]]:
def all_message_attachments(
self, include_thumbnails: bool = False
) -> Iterator[tuple[str, datetime]]:
client = self.uploads_bucket.meta.client
paginator = client.get_paginator("list_objects_v2")
page_iterator = paginator.paginate(Bucket=self.uploads_bucket.name)
@ -248,6 +250,8 @@ class S3UploadBackend(ZulipUploadBackend):
for page in page_iterator:
if page["KeyCount"] > 0:
for item in page["Contents"]:
if not include_thumbnails and item["Key"].startswith("thumbnail/"):
continue
yield (
item["Key"],
item["LastModified"],

View File

@ -9,7 +9,11 @@ from typing_extensions import override
from zerver.actions.uploads import do_delete_old_unclaimed_attachments
from zerver.lib.attachments import get_old_unclaimed_attachments
from zerver.lib.management import ZulipBaseCommand, abort_unless_locked
from zerver.lib.upload import all_message_attachments, delete_message_attachments
from zerver.lib.upload import (
all_message_attachments,
delete_message_attachments,
split_thumbnail_path,
)
from zerver.models import ArchivedAttachment, Attachment
@ -76,7 +80,11 @@ class Command(ZulipBaseCommand):
cutoff = timezone_now() - timedelta(minutes=5)
print(f"Removing extra files in storage black-end older than {cutoff.isoformat()}")
to_delete = []
for path_id, modified_at in all_message_attachments():
for file_path, modified_at in all_message_attachments(include_thumbnails=True):
if file_path.startswith("thumbnail/"):
path_id = split_thumbnail_path(file_path)[0]
else:
path_id = file_path
if Attachment.objects.filter(path_id=path_id).exists():
continue
if ArchivedAttachment.objects.filter(path_id=path_id).exists():
@ -86,10 +94,10 @@ class Command(ZulipBaseCommand):
# make the database entry, so must give some leeway to
# recently-added files which do not have DB rows.
continue
print(f"* {path_id} modified at {modified_at}")
print(f"* {file_path} modified at {modified_at}")
if dry_run:
continue
to_delete.append(path_id)
to_delete.append(file_path)
if len(to_delete) > 1000:
delete_message_attachments(to_delete)
to_delete = []

View File

@ -0,0 +1,33 @@
import django.db.models.deletion
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
("zerver", "0553_copy_emoji_images"),
]
operations = [
migrations.CreateModel(
name="ImageAttachment",
fields=[
(
"id",
models.BigAutoField(
auto_created=True, primary_key=True, serialize=False, verbose_name="ID"
),
),
("path_id", models.TextField(db_index=True, unique=True)),
("original_width_px", models.IntegerField()),
("original_height_px", models.IntegerField()),
("frames", models.IntegerField()),
("thumbnail_metadata", models.JSONField(default=list)),
(
"realm",
models.ForeignKey(
on_delete=django.db.models.deletion.CASCADE, to="zerver.realm"
),
),
],
),
]

View File

@ -25,6 +25,7 @@ from zerver.models.messages import ArchivedSubMessage as ArchivedSubMessage
from zerver.models.messages import ArchivedUserMessage as ArchivedUserMessage
from zerver.models.messages import ArchiveTransaction as ArchiveTransaction
from zerver.models.messages import Attachment as Attachment
from zerver.models.messages import ImageAttachment as ImageAttachment
from zerver.models.messages import Message as Message
from zerver.models.messages import OnboardingUserMessage as OnboardingUserMessage
from zerver.models.messages import Reaction as Reaction

View File

@ -665,6 +665,18 @@ class ArchivedUserMessage(AbstractUserMessage):
return f"{recipient_string} / {self.user_profile.email} ({self.flags_list()})"
class ImageAttachment(models.Model):
realm = models.ForeignKey(Realm, on_delete=CASCADE)
path_id = models.TextField(db_index=True, unique=True)
original_width_px = models.IntegerField()
original_height_px = models.IntegerField()
frames = models.IntegerField()
# Contains a list of zerver.lib.thumbnail.StoredThumbnailFormat objects, serialized
thumbnail_metadata = models.JSONField(default=list, null=False)
class AbstractAttachment(models.Model):
file_name = models.TextField(db_index=True)

Binary file not shown.

After

Width:  |  Height:  |  Size: 500 B

View File

@ -1,7 +1,6 @@
import os
import re
from datetime import datetime, timedelta
from io import StringIO
from unittest.mock import patch
import time_machine
@ -13,6 +12,7 @@ from zerver.actions.scheduled_messages import check_schedule_message, delete_sch
from zerver.actions.uploads import do_delete_old_unclaimed_attachments
from zerver.lib.retention import clean_archived_data
from zerver.lib.test_classes import UploadSerializeMixin, ZulipTestCase
from zerver.lib.test_helpers import get_test_image_file
from zerver.models import ArchivedAttachment, Attachment, Message, UserProfile
from zerver.models.clients import get_client
@ -28,8 +28,7 @@ class UnclaimedAttachmentTest(UploadSerializeMixin, ZulipTestCase):
self.login_user(uploader)
with time_machine.travel(when, tick=False):
file_obj = StringIO("zulip!")
file_obj.name = filename
file_obj = get_test_image_file(filename)
response = self.assert_json_success(
self.client_post("/json/user_uploads", {"file": file_obj})
)
@ -55,8 +54,62 @@ class UnclaimedAttachmentTest(UploadSerializeMixin, ZulipTestCase):
ArchivedAttachment.objects.filter(id=attachment.id).exists(), has_archived_attachment
)
def test_delete_unused_thumbnails(self) -> None:
assert settings.LOCAL_FILES_DIR
with self.captureOnCommitCallbacks(execute=True):
unused_attachment = self.make_attachment("img.png")
self.assert_exists(
unused_attachment, has_file=True, has_attachment=True, has_archived_attachment=False
)
# It also has thumbnails
self.assertTrue(
os.path.isdir(
os.path.join(settings.LOCAL_FILES_DIR, "thumbnail", unused_attachment.path_id)
)
)
self.assertGreater(
len(
os.listdir(
os.path.join(settings.LOCAL_FILES_DIR, "thumbnail", unused_attachment.path_id)
)
),
0,
)
# If we have 3 weeks of grace, nothing happens
do_delete_old_unclaimed_attachments(3)
self.assert_exists(
unused_attachment, has_file=True, has_attachment=True, has_archived_attachment=False
)
self.assertTrue(
os.path.isdir(
os.path.join(settings.LOCAL_FILES_DIR, "thumbnail", unused_attachment.path_id)
)
)
self.assertGreater(
len(
os.listdir(
os.path.join(settings.LOCAL_FILES_DIR, "thumbnail", unused_attachment.path_id)
)
),
0,
)
# If we have 1 weeks of grace, the Attachment is deleted, and so is the file on disk
do_delete_old_unclaimed_attachments(1)
self.assert_exists(
unused_attachment, has_file=False, has_attachment=False, has_archived_attachment=False
)
self.assertFalse(
os.path.exists(
os.path.join(settings.LOCAL_FILES_DIR, "thumbnail", unused_attachment.path_id)
)
)
def test_delete_unused_upload(self) -> None:
unused_attachment = self.make_attachment("unused.txt")
unused_attachment = self.make_attachment("text.txt")
self.assert_exists(
unused_attachment, has_file=True, has_attachment=True, has_archived_attachment=False
)
@ -75,7 +128,7 @@ class UnclaimedAttachmentTest(UploadSerializeMixin, ZulipTestCase):
def test_delete_used_upload(self) -> None:
hamlet = self.example_user("hamlet")
attachment = self.make_attachment("used.txt")
attachment = self.make_attachment("text.txt")
# Send message referencing that message
self.subscribe(hamlet, "Denmark")
@ -90,7 +143,7 @@ class UnclaimedAttachmentTest(UploadSerializeMixin, ZulipTestCase):
def test_delete_upload_archived_message(self) -> None:
hamlet = self.example_user("hamlet")
attachment = self.make_attachment("used.txt")
attachment = self.make_attachment("text.txt")
# Send message referencing that message
self.subscribe(hamlet, "Denmark")
@ -127,7 +180,7 @@ class UnclaimedAttachmentTest(UploadSerializeMixin, ZulipTestCase):
def test_delete_one_message(self) -> None:
hamlet = self.example_user("hamlet")
attachment = self.make_attachment("used.txt")
attachment = self.make_attachment("text.txt")
# Send message referencing that message
self.subscribe(hamlet, "Denmark")
@ -183,7 +236,7 @@ class UnclaimedAttachmentTest(UploadSerializeMixin, ZulipTestCase):
def test_delete_with_scheduled_messages(self) -> None:
hamlet = self.example_user("hamlet")
attachment = self.make_attachment("used.txt")
attachment = self.make_attachment("text.txt")
# Schedule a future send with the attachment
self.subscribe(hamlet, "Denmark")
@ -222,7 +275,7 @@ class UnclaimedAttachmentTest(UploadSerializeMixin, ZulipTestCase):
def test_delete_with_scheduled_message_and_archive(self) -> None:
hamlet = self.example_user("hamlet")
attachment = self.make_attachment("used.txt")
attachment = self.make_attachment("text.txt")
# Schedule a message, and also send one now
self.subscribe(hamlet, "Denmark")
@ -286,7 +339,7 @@ class UnclaimedAttachmentTest(UploadSerializeMixin, ZulipTestCase):
# the process of archiving prunes Attachments which have no
# references.
hamlet = self.example_user("hamlet")
attachment = self.make_attachment("used.txt")
attachment = self.make_attachment("text.txt")
# Schedule a message, and also send one now
self.subscribe(hamlet, "Denmark")
@ -340,7 +393,7 @@ class UnclaimedAttachmentTest(UploadSerializeMixin, ZulipTestCase):
)
def test_delete_batch_size(self) -> None:
attachments = [self.make_attachment("unused.txt") for _ in range(10)]
attachments = [self.make_attachment("text.txt") for _ in range(10)]
with (
patch("zerver.actions.uploads.DELETE_BATCH_SIZE", 6),
@ -361,7 +414,7 @@ class UnclaimedAttachmentTest(UploadSerializeMixin, ZulipTestCase):
def test_delete_batch_size_archived(self) -> None:
hamlet = self.example_user("hamlet")
attachments = [self.make_attachment("unused.txt") for _ in range(20)]
attachments = [self.make_attachment("text.txt") for _ in range(20)]
# Send message referencing 10/20 of those attachments
self.subscribe(hamlet, "Denmark")

View File

@ -1,13 +1,30 @@
import re
from dataclasses import asdict
from io import StringIO
from unittest.mock import patch
import orjson
import pyvips
from django.conf import settings
from django.test import override_settings
from zerver.lib.test_classes import ZulipTestCase
from zerver.lib.test_helpers import ratelimit_rule, read_test_image_file
from zerver.lib.thumbnail import BadImageError, resize_emoji
from zerver.lib.test_helpers import get_test_image_file, ratelimit_rule, read_test_image_file
from zerver.lib.thumbnail import (
BadImageError,
BaseThumbnailFormat,
StoredThumbnailFormat,
ThumbnailFormat,
missing_thumbnails,
resize_emoji,
)
from zerver.lib.upload import (
all_message_attachments,
get_image_thumbnail_path,
split_thumbnail_path,
)
from zerver.models import Attachment, ImageAttachment
from zerver.worker.thumbnail import ensure_thumbnails
class ThumbnailRedirectEndpointTest(ZulipTestCase):
@ -180,3 +197,275 @@ class ThumbnailEmojiTest(ZulipTestCase):
non_img_data = read_test_image_file("text.txt")
with self.assertRaises(BadImageError):
resize_emoji(non_img_data, "text.png", size=50)
class ThumbnailClassesTest(ZulipTestCase):
def test_class_equivalence(self) -> None:
self.assertNotEqual(
ThumbnailFormat("webp", 150, 100, animated=True, opts="Q=90"),
"150x100-anim.webp",
)
self.assertEqual(
ThumbnailFormat("webp", 150, 100, animated=True, opts="Q=90"),
ThumbnailFormat("webp", 150, 100, animated=True, opts="Q=10"),
)
self.assertEqual(
ThumbnailFormat("webp", 150, 100, animated=True, opts="Q=90"),
BaseThumbnailFormat("webp", 150, 100, animated=True),
)
self.assertNotEqual(
ThumbnailFormat("jpeg", 150, 100, animated=True, opts="Q=90"),
ThumbnailFormat("webp", 150, 100, animated=True, opts="Q=90"),
)
self.assertNotEqual(
ThumbnailFormat("webp", 300, 100, animated=True, opts="Q=90"),
ThumbnailFormat("webp", 150, 100, animated=True, opts="Q=90"),
)
self.assertNotEqual(
ThumbnailFormat("webp", 150, 100, animated=False, opts="Q=90"),
ThumbnailFormat("webp", 150, 100, animated=True, opts="Q=90"),
)
# We can compare stored thumbnails, with much more metadata,
# to the thumbnail formats that spec how they are generated
self.assertEqual(
ThumbnailFormat("webp", 150, 100, animated=False, opts="Q=90"),
StoredThumbnailFormat(
"webp",
150,
100,
animated=False,
content_type="image/webp",
width=120,
height=100,
byte_size=123,
),
)
# But differences in the base four properties mean they are not equal
self.assertNotEqual(
ThumbnailFormat("webp", 150, 100, animated=False, opts="Q=90"),
StoredThumbnailFormat(
"webp",
150,
100,
animated=True, # Note this change
content_type="image/webp",
width=120,
height=100,
byte_size=123,
),
)
def test_stringification(self) -> None:
# These formats need to be stable, since they are written into URLs in the messages.
self.assertEqual(
str(ThumbnailFormat("webp", 150, 100, animated=False)),
"150x100.webp",
)
self.assertEqual(
str(ThumbnailFormat("webp", 150, 100, animated=True)),
"150x100-anim.webp",
)
# And they should round-trip into BaseThumbnailFormat, losing the opts= which we do not serialize
thumb_format = ThumbnailFormat("webp", 150, 100, animated=True, opts="Q=90")
self.assertEqual(thumb_format.extension, "webp")
self.assertEqual(thumb_format.max_width, 150)
self.assertEqual(thumb_format.max_height, 100)
self.assertEqual(thumb_format.animated, True)
round_trip = BaseThumbnailFormat.from_string(str(thumb_format))
assert round_trip is not None
self.assertEqual(thumb_format, round_trip)
self.assertEqual(round_trip.extension, "webp")
self.assertEqual(round_trip.max_width, 150)
self.assertEqual(round_trip.max_height, 100)
self.assertEqual(round_trip.animated, True)
self.assertIsNone(BaseThumbnailFormat.from_string("bad.webp"))
class TestStoreThumbnail(ZulipTestCase):
@patch(
"zerver.lib.thumbnail.THUMBNAIL_OUTPUT_FORMATS",
[ThumbnailFormat("webp", 100, 75, animated=True)],
)
def test_upload_image(self) -> None:
assert settings.LOCAL_FILES_DIR
self.login_user(self.example_user("hamlet"))
with self.captureOnCommitCallbacks(execute=True):
with get_test_image_file("animated_unequal_img.gif") as image_file:
response = self.assert_json_success(
self.client_post("/json/user_uploads", {"file": image_file})
)
path_id = re.sub(r"/user_uploads/", "", response["url"])
self.assertEqual(Attachment.objects.filter(path_id=path_id).count(), 1)
image_attachment = ImageAttachment.objects.get(path_id=path_id)
self.assertEqual(image_attachment.original_height_px, 56)
self.assertEqual(image_attachment.original_width_px, 128)
self.assertEqual(image_attachment.frames, 3)
self.assertEqual(image_attachment.thumbnail_metadata, [])
self.assertEqual(
[r[0] for r in all_message_attachments(include_thumbnails=True)],
[path_id],
)
# The worker triggers when we exit this block and call the pending callbacks
image_attachment = ImageAttachment.objects.get(path_id=path_id)
self.assert_length(image_attachment.thumbnail_metadata, 1)
generated_thumbnail = StoredThumbnailFormat(**image_attachment.thumbnail_metadata[0])
self.assertEqual(str(generated_thumbnail), "100x75-anim.webp")
self.assertEqual(generated_thumbnail.animated, True)
self.assertEqual(generated_thumbnail.width, 100)
self.assertEqual(generated_thumbnail.height, 44)
self.assertEqual(generated_thumbnail.content_type, "image/webp")
self.assertGreater(generated_thumbnail.byte_size, 200)
self.assertLess(generated_thumbnail.byte_size, 2 * 1024)
self.assertEqual(
get_image_thumbnail_path(image_attachment, generated_thumbnail),
f"thumbnail/{path_id}/100x75-anim.webp",
)
parsed_path = split_thumbnail_path(f"thumbnail/{path_id}/100x75-anim.webp")
self.assertEqual(parsed_path[0], path_id)
self.assertIsInstance(parsed_path[1], BaseThumbnailFormat)
self.assertEqual(str(parsed_path[1]), str(generated_thumbnail))
self.assertEqual(
sorted([r[0] for r in all_message_attachments(include_thumbnails=True)]),
sorted([path_id, f"thumbnail/{path_id}/100x75-anim.webp"]),
)
self.assertEqual(ensure_thumbnails(image_attachment), 0)
bigger_thumb_format = ThumbnailFormat("webp", 150, 100, opts="Q=90", animated=False)
with patch("zerver.lib.thumbnail.THUMBNAIL_OUTPUT_FORMATS", [bigger_thumb_format]):
self.assertEqual(ensure_thumbnails(image_attachment), 1)
self.assert_length(image_attachment.thumbnail_metadata, 2)
bigger_thumbnail = StoredThumbnailFormat(**image_attachment.thumbnail_metadata[1])
self.assertEqual(str(bigger_thumbnail), "150x100.webp")
self.assertEqual(bigger_thumbnail.animated, False)
# We don't scale up, so these are the original dimensions
self.assertEqual(bigger_thumbnail.width, 128)
self.assertEqual(bigger_thumbnail.height, 56)
self.assertEqual(bigger_thumbnail.content_type, "image/webp")
self.assertGreater(bigger_thumbnail.byte_size, 200)
self.assertLess(bigger_thumbnail.byte_size, 2 * 1024)
self.assertEqual(
sorted([r[0] for r in all_message_attachments(include_thumbnails=True)]),
sorted(
[
path_id,
f"thumbnail/{path_id}/100x75-anim.webp",
f"thumbnail/{path_id}/150x100.webp",
]
),
)
@patch(
"zerver.lib.thumbnail.THUMBNAIL_OUTPUT_FORMATS",
[ThumbnailFormat("webp", 100, 75, animated=False)],
)
def test_bad_upload(self) -> None:
assert settings.LOCAL_FILES_DIR
hamlet = self.example_user("hamlet")
self.login_user(hamlet)
with self.captureOnCommitCallbacks(execute=True):
with get_test_image_file("truncated.gif") as image_file:
response = self.assert_json_success(
self.client_post("/json/user_uploads", {"file": image_file})
)
path_id = re.sub(r"/user_uploads/", "", response["url"])
self.assertEqual(Attachment.objects.filter(path_id=path_id).count(), 1)
# This doesn't generate an ImageAttachment row because it's corrupted
self.assertEqual(ImageAttachment.objects.filter(path_id=path_id).count(), 0)
# Fake making one, based on if just part of the file is readable
image_attachment = ImageAttachment.objects.create(
realm_id=hamlet.realm_id,
path_id=path_id,
original_height_px=128,
original_width_px=128,
frames=1,
thumbnail_metadata=[],
)
self.assert_length(missing_thumbnails(image_attachment), 1)
with self.assertLogs("zerver.worker.thumbnail", level="ERROR") as error_log:
self.assertEqual(ensure_thumbnails(image_attachment), 0)
libvips_version = (pyvips.version(0), pyvips.version(1))
# This error message changed
if libvips_version < (8, 13): # nocoverage # branch varies with version
expected_message = "gifload_buffer: Insufficient data to do anything"
else: # nocoverage # branch varies with version
expected_message = "gifload_buffer: no frames in GIF"
self.assertTrue(expected_message in error_log.output[0])
# It should have now been removed
self.assertEqual(ImageAttachment.objects.filter(path_id=path_id).count(), 0)
def test_missing_thumbnails(self) -> None:
image_attachment = ImageAttachment(
path_id="example",
original_width_px=150,
original_height_px=100,
frames=1,
thumbnail_metadata=[],
)
with patch("zerver.lib.thumbnail.THUMBNAIL_OUTPUT_FORMATS", []):
self.assertEqual(missing_thumbnails(image_attachment), [])
still_webp = ThumbnailFormat("webp", 100, 75, animated=False, opts="Q=90")
with patch("zerver.lib.thumbnail.THUMBNAIL_OUTPUT_FORMATS", [still_webp]):
self.assertEqual(missing_thumbnails(image_attachment), [still_webp])
anim_webp = ThumbnailFormat("webp", 100, 75, animated=True, opts="Q=90")
with patch("zerver.lib.thumbnail.THUMBNAIL_OUTPUT_FORMATS", [still_webp, anim_webp]):
# It's not animated, so the animated format doesn't appear at all
self.assertEqual(missing_thumbnails(image_attachment), [still_webp])
still_jpeg = ThumbnailFormat("jpeg", 100, 75, animated=False, opts="Q=90")
with patch(
"zerver.lib.thumbnail.THUMBNAIL_OUTPUT_FORMATS", [still_webp, anim_webp, still_jpeg]
):
# But other still formats do
self.assertEqual(missing_thumbnails(image_attachment), [still_webp, still_jpeg])
# If we have a rendered 150x100.webp, then we're not missing it
rendered_still_webp = StoredThumbnailFormat(
"webp",
100,
75,
animated=False,
width=150,
height=50,
content_type="image/webp",
byte_size=1234,
)
image_attachment.thumbnail_metadata = [asdict(rendered_still_webp)]
with patch(
"zerver.lib.thumbnail.THUMBNAIL_OUTPUT_FORMATS", [still_webp, anim_webp, still_jpeg]
):
self.assertEqual(missing_thumbnails(image_attachment), [still_jpeg])
# If we have the still, and it's animated, we do still need the animated
image_attachment.frames = 10
with patch(
"zerver.lib.thumbnail.THUMBNAIL_OUTPUT_FORMATS", [still_webp, anim_webp, still_jpeg]
):
self.assertEqual(missing_thumbnails(image_attachment), [anim_webp, still_jpeg])

View File

@ -114,6 +114,15 @@ class LocalStorageTest(UploadSerializeMixin, ZulipTestCase):
found_files = [r[0] for r in all_message_attachments()]
self.assertEqual(sorted(found_files), ["bar/baz", "bar/troz", "foo", "test/other/file"])
write_local_file("files", "thumbnail/thing", b"content")
found_files = [r[0] for r in all_message_attachments()]
self.assertEqual(sorted(found_files), ["bar/baz", "bar/troz", "foo", "test/other/file"])
found_files = [r[0] for r in all_message_attachments(include_thumbnails=True)]
self.assertEqual(
sorted(found_files),
["bar/baz", "bar/troz", "foo", "test/other/file", "thumbnail/thing"],
)
def test_avatar_url(self) -> None:
self.login("hamlet")
with get_test_image_file("img.png") as image_file:

View File

@ -22,6 +22,7 @@ from zerver.lib.test_helpers import (
from zerver.lib.thumbnail import (
DEFAULT_AVATAR_SIZE,
MEDIUM_AVATAR_SIZE,
THUMBNAIL_OUTPUT_FORMATS,
BadImageError,
resize_avatar,
resize_emoji,
@ -147,9 +148,25 @@ class S3Test(ZulipTestCase):
for n in range(1, 5):
url = upload_message_attachment("dummy.txt", "text/plain", b"zulip!", user_profile)
path_ids.append(re.sub(r"/user_uploads/", "", url))
# Put an image in, which gets thumbnailed
with self.captureOnCommitCallbacks(execute=True):
url = upload_message_attachment(
"img.png", "image/png", read_test_image_file("img.png"), user_profile
)
image_path_id = re.sub(r"/user_uploads/", "", url)
path_ids.append(image_path_id)
found_paths = [r[0] for r in all_message_attachments()]
self.assertEqual(sorted(found_paths), sorted(path_ids))
found_paths = [r[0] for r in all_message_attachments(include_thumbnails=True)]
for thumbnail_format in THUMBNAIL_OUTPUT_FORMATS:
if thumbnail_format.animated:
continue
path_ids.append(f"thumbnail/{image_path_id}/{thumbnail_format!s}")
self.assertEqual(sorted(found_paths), sorted(path_ids))
@use_s3_backend
def test_user_uploads_authed(self) -> None:
"""

123
zerver/worker/thumbnail.py Normal file
View File

@ -0,0 +1,123 @@
import logging
import time
from dataclasses import asdict
from io import BytesIO
from typing import Any
import pyvips
from django.db import transaction
from typing_extensions import override
from zerver.lib.mime_types import guess_type
from zerver.lib.thumbnail import StoredThumbnailFormat, missing_thumbnails
from zerver.lib.upload import get_image_thumbnail_path, save_attachment_contents, upload_backend
from zerver.models import ImageAttachment
from zerver.worker.base import QueueProcessingWorker, assign_queue
logger = logging.getLogger(__name__)
@assign_queue("thumbnail")
class ThumbnailWorker(QueueProcessingWorker):
@override
def consume(self, event: dict[str, Any]) -> None:
start = time.time()
with transaction.atomic(savepoint=False):
try:
row = ImageAttachment.objects.select_for_update().get(id=event["id"])
except ImageAttachment.DoesNotExist: # nocoverage
logger.info("ImageAttachment row %d missing", event["id"])
return
uploaded_thumbnails = ensure_thumbnails(row)
end = time.time()
logger.info(
"Processed %d thumbnails (%dms)",
uploaded_thumbnails,
(end - start) * 1000,
)
def ensure_thumbnails(image_attachment: ImageAttachment) -> int:
needed_thumbnails = missing_thumbnails(image_attachment)
if not needed_thumbnails:
return 0
written_images = 0
image_bytes = BytesIO()
save_attachment_contents(image_attachment.path_id, image_bytes)
try:
# TODO: We could save some computational time by using the same
# bytes if multiple resolutions are larger than the source
# image. That is, if the input is 10x10, a 100x100.jpg is
# going to be the same as a 200x200.jpg, since those set the
# max dimensions, and we do not scale up.
for thumbnail_format in needed_thumbnails:
# This will scale to fit within the given dimensions; it
# may be smaller one one or more of them.
logger.info(
"Resizing to %d x %d, from %d x %d",
thumbnail_format.max_width,
thumbnail_format.max_height,
image_attachment.original_width_px,
image_attachment.original_height_px,
)
load_opts = ""
if image_attachment.frames > 1:
# If the original has multiple frames, we want to load
# one of them if we're outputting to a static format,
# otherwise we load them all.
if thumbnail_format.animated:
load_opts = "n=-1"
else:
load_opts = "n=1"
resized = pyvips.Image.thumbnail_buffer(
image_bytes.getbuffer(),
thumbnail_format.max_width,
height=thumbnail_format.max_height,
option_string=load_opts,
size=pyvips.Size.DOWN,
)
thumbnailed_bytes = resized.write_to_buffer(
f".{thumbnail_format.extension}[{thumbnail_format.opts}]"
)
content_type = guess_type(f"image.{thumbnail_format.extension}")[0]
assert content_type is not None
thumbnail_path = get_image_thumbnail_path(image_attachment, thumbnail_format)
logger.info("Uploading %d bytes to %s", len(thumbnailed_bytes), thumbnail_path)
upload_backend.upload_message_attachment(
thumbnail_path,
content_type,
thumbnailed_bytes,
None,
)
height = resized.get("page-height") if thumbnail_format.animated else resized.height
image_attachment.thumbnail_metadata.append(
asdict(
StoredThumbnailFormat(
extension=thumbnail_format.extension,
content_type=content_type,
max_width=thumbnail_format.max_width,
max_height=thumbnail_format.max_height,
animated=thumbnail_format.animated,
width=resized.width,
height=height,
byte_size=len(thumbnailed_bytes),
)
)
)
written_images += 1
except pyvips.Error as e:
logger.exception(e)
if written_images == 0 and len(image_attachment.thumbnail_metadata) == 0:
# We have never thumbnailed this -- it most likely had
# bad data. Remove the ImageAttachment row, since it is
# not valid for thumbnailing.
image_attachment.delete()
return 0
image_attachment.save(update_fields=["thumbnail_metadata"])
return written_images