2024-06-20 23:58:27 +02:00
|
|
|
import logging
|
|
|
|
import time
|
|
|
|
from dataclasses import asdict
|
|
|
|
from io import BytesIO
|
|
|
|
from typing import Any
|
|
|
|
|
|
|
|
import pyvips
|
|
|
|
from django.db import transaction
|
|
|
|
from typing_extensions import override
|
|
|
|
|
2024-06-21 21:02:36 +02:00
|
|
|
from zerver.actions.message_edit import do_update_embedded_data
|
2024-06-20 23:58:27 +02:00
|
|
|
from zerver.lib.mime_types import guess_type
|
2024-06-21 21:02:36 +02:00
|
|
|
from zerver.lib.thumbnail import (
|
2024-07-22 23:07:59 +02:00
|
|
|
MarkdownImageMetadata,
|
2024-06-21 21:02:36 +02:00
|
|
|
StoredThumbnailFormat,
|
|
|
|
get_default_thumbnail_url,
|
|
|
|
get_image_thumbnail_path,
|
|
|
|
missing_thumbnails,
|
|
|
|
rewrite_thumbnailed_images,
|
|
|
|
)
|
2024-07-17 03:11:57 +02:00
|
|
|
from zerver.lib.upload import save_attachment_contents, upload_backend
|
2024-06-21 21:02:36 +02:00
|
|
|
from zerver.models import ArchivedMessage, ImageAttachment, Message
|
2024-06-20 23:58:27 +02:00
|
|
|
from zerver.worker.base import QueueProcessingWorker, assign_queue
|
|
|
|
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
|
|
|
|
|
|
@assign_queue("thumbnail")
|
|
|
|
class ThumbnailWorker(QueueProcessingWorker):
|
|
|
|
@override
|
|
|
|
def consume(self, event: dict[str, Any]) -> None:
|
|
|
|
start = time.time()
|
|
|
|
with transaction.atomic(savepoint=False):
|
|
|
|
try:
|
2024-06-21 20:58:42 +02:00
|
|
|
# This lock prevents us from racing with the on-demand
|
|
|
|
# rendering that can be triggered if a request is made
|
|
|
|
# directly to a thumbnail URL we have not made yet.
|
|
|
|
# This may mean that we may generate 0 thumbnail
|
|
|
|
# images once we get the lock.
|
2024-06-20 23:58:27 +02:00
|
|
|
row = ImageAttachment.objects.select_for_update().get(id=event["id"])
|
|
|
|
except ImageAttachment.DoesNotExist: # nocoverage
|
|
|
|
logger.info("ImageAttachment row %d missing", event["id"])
|
|
|
|
return
|
|
|
|
uploaded_thumbnails = ensure_thumbnails(row)
|
|
|
|
end = time.time()
|
|
|
|
logger.info(
|
|
|
|
"Processed %d thumbnails (%dms)",
|
|
|
|
uploaded_thumbnails,
|
|
|
|
(end - start) * 1000,
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
def ensure_thumbnails(image_attachment: ImageAttachment) -> int:
|
|
|
|
needed_thumbnails = missing_thumbnails(image_attachment)
|
|
|
|
|
|
|
|
if not needed_thumbnails:
|
|
|
|
return 0
|
|
|
|
|
|
|
|
written_images = 0
|
|
|
|
image_bytes = BytesIO()
|
|
|
|
save_attachment_contents(image_attachment.path_id, image_bytes)
|
|
|
|
try:
|
|
|
|
# TODO: We could save some computational time by using the same
|
|
|
|
# bytes if multiple resolutions are larger than the source
|
|
|
|
# image. That is, if the input is 10x10, a 100x100.jpg is
|
|
|
|
# going to be the same as a 200x200.jpg, since those set the
|
|
|
|
# max dimensions, and we do not scale up.
|
|
|
|
for thumbnail_format in needed_thumbnails:
|
|
|
|
# This will scale to fit within the given dimensions; it
|
|
|
|
# may be smaller one one or more of them.
|
|
|
|
logger.info(
|
|
|
|
"Resizing to %d x %d, from %d x %d",
|
|
|
|
thumbnail_format.max_width,
|
|
|
|
thumbnail_format.max_height,
|
|
|
|
image_attachment.original_width_px,
|
|
|
|
image_attachment.original_height_px,
|
|
|
|
)
|
|
|
|
load_opts = ""
|
|
|
|
if image_attachment.frames > 1:
|
|
|
|
# If the original has multiple frames, we want to load
|
|
|
|
# one of them if we're outputting to a static format,
|
|
|
|
# otherwise we load them all.
|
|
|
|
if thumbnail_format.animated:
|
|
|
|
load_opts = "n=-1"
|
|
|
|
else:
|
|
|
|
load_opts = "n=1"
|
|
|
|
resized = pyvips.Image.thumbnail_buffer(
|
|
|
|
image_bytes.getbuffer(),
|
|
|
|
thumbnail_format.max_width,
|
|
|
|
height=thumbnail_format.max_height,
|
|
|
|
option_string=load_opts,
|
|
|
|
size=pyvips.Size.DOWN,
|
|
|
|
)
|
|
|
|
thumbnailed_bytes = resized.write_to_buffer(
|
|
|
|
f".{thumbnail_format.extension}[{thumbnail_format.opts}]"
|
|
|
|
)
|
|
|
|
content_type = guess_type(f"image.{thumbnail_format.extension}")[0]
|
|
|
|
assert content_type is not None
|
|
|
|
thumbnail_path = get_image_thumbnail_path(image_attachment, thumbnail_format)
|
|
|
|
logger.info("Uploading %d bytes to %s", len(thumbnailed_bytes), thumbnail_path)
|
|
|
|
upload_backend.upload_message_attachment(
|
|
|
|
thumbnail_path,
|
|
|
|
content_type,
|
|
|
|
thumbnailed_bytes,
|
|
|
|
None,
|
|
|
|
)
|
|
|
|
height = resized.get("page-height") if thumbnail_format.animated else resized.height
|
|
|
|
image_attachment.thumbnail_metadata.append(
|
|
|
|
asdict(
|
|
|
|
StoredThumbnailFormat(
|
|
|
|
extension=thumbnail_format.extension,
|
|
|
|
content_type=content_type,
|
|
|
|
max_width=thumbnail_format.max_width,
|
|
|
|
max_height=thumbnail_format.max_height,
|
|
|
|
animated=thumbnail_format.animated,
|
|
|
|
width=resized.width,
|
|
|
|
height=height,
|
|
|
|
byte_size=len(thumbnailed_bytes),
|
|
|
|
)
|
|
|
|
)
|
|
|
|
)
|
|
|
|
written_images += 1
|
|
|
|
|
|
|
|
except pyvips.Error as e:
|
|
|
|
logger.exception(e)
|
|
|
|
|
|
|
|
if written_images == 0 and len(image_attachment.thumbnail_metadata) == 0:
|
|
|
|
# We have never thumbnailed this -- it most likely had
|
|
|
|
# bad data. Remove the ImageAttachment row, since it is
|
|
|
|
# not valid for thumbnailing.
|
2024-06-21 21:02:36 +02:00
|
|
|
update_message_rendered_content(
|
|
|
|
image_attachment.realm_id, image_attachment.path_id, None
|
|
|
|
)
|
2024-06-20 23:58:27 +02:00
|
|
|
image_attachment.delete()
|
|
|
|
return 0
|
2024-06-21 21:02:36 +02:00
|
|
|
else: # nocoverage
|
|
|
|
# TODO: Clean up any dangling thumbnails we may have
|
|
|
|
# produced? Seems unlikely that we'd fail on one size,
|
|
|
|
# but not another, but anything's possible.
|
|
|
|
pass
|
2024-06-20 23:58:27 +02:00
|
|
|
|
|
|
|
image_attachment.save(update_fields=["thumbnail_metadata"])
|
2024-07-22 23:07:59 +02:00
|
|
|
url, is_animated = get_default_thumbnail_url(image_attachment)
|
2024-06-21 21:02:36 +02:00
|
|
|
update_message_rendered_content(
|
|
|
|
image_attachment.realm_id,
|
|
|
|
image_attachment.path_id,
|
2024-07-22 23:07:59 +02:00
|
|
|
MarkdownImageMetadata(
|
|
|
|
url=url,
|
|
|
|
is_animated=is_animated,
|
2024-07-22 23:16:03 +02:00
|
|
|
original_width_px=image_attachment.original_width_px,
|
|
|
|
original_height_px=image_attachment.original_height_px,
|
2024-07-22 23:07:59 +02:00
|
|
|
),
|
2024-06-21 21:02:36 +02:00
|
|
|
)
|
2024-06-20 23:58:27 +02:00
|
|
|
return written_images
|
2024-06-21 21:02:36 +02:00
|
|
|
|
|
|
|
|
|
|
|
def update_message_rendered_content(
|
2024-07-22 23:07:59 +02:00
|
|
|
realm_id: int, path_id: str, image_data: MarkdownImageMetadata | None
|
2024-06-21 21:02:36 +02:00
|
|
|
) -> None:
|
|
|
|
for message_class in [Message, ArchivedMessage]:
|
|
|
|
messages_with_image = (
|
|
|
|
message_class.objects.filter( # type: ignore[attr-defined] # TODO: ?
|
|
|
|
realm_id=realm_id, attachment__path_id=path_id
|
|
|
|
)
|
|
|
|
.select_for_update()
|
|
|
|
.order_by("id")
|
|
|
|
)
|
|
|
|
for message in messages_with_image:
|
|
|
|
rendered_content = rewrite_thumbnailed_images(
|
|
|
|
message.rendered_content,
|
|
|
|
{} if image_data is None else {path_id: image_data},
|
|
|
|
{path_id} if image_data is None else set(),
|
|
|
|
)
|
|
|
|
if rendered_content is None:
|
|
|
|
# There were no updates -- for instance, if we re-run
|
|
|
|
# ensure_thumbnails on an ImageAttachment we already
|
|
|
|
# ran it on once. Do not bother to no-op update
|
|
|
|
# clients.
|
|
|
|
continue
|
|
|
|
if isinstance(message, Message):
|
|
|
|
# Perform a silent update push to the clients
|
|
|
|
do_update_embedded_data(message.sender, message, rendered_content)
|
|
|
|
else:
|
|
|
|
message.rendered_content = rendered_content
|
|
|
|
message.save(update_fields=["rendered_content"])
|