zulip/zerver/worker/thumbnail.py

186 lines
7.4 KiB
Python

import logging
import time
from dataclasses import asdict
from io import BytesIO
from typing import Any
import pyvips
from django.db import transaction
from typing_extensions import override
from zerver.actions.message_edit import do_update_embedded_data
from zerver.lib.mime_types import guess_type
from zerver.lib.thumbnail import (
MarkdownImageMetadata,
StoredThumbnailFormat,
get_default_thumbnail_url,
get_image_thumbnail_path,
missing_thumbnails,
rewrite_thumbnailed_images,
)
from zerver.lib.upload import save_attachment_contents, upload_backend
from zerver.models import ArchivedMessage, ImageAttachment, Message
from zerver.worker.base import QueueProcessingWorker, assign_queue
logger = logging.getLogger(__name__)
@assign_queue("thumbnail")
class ThumbnailWorker(QueueProcessingWorker):
@override
def consume(self, event: dict[str, Any]) -> None:
start = time.time()
with transaction.atomic(savepoint=False):
try:
# This lock prevents us from racing with the on-demand
# rendering that can be triggered if a request is made
# directly to a thumbnail URL we have not made yet.
# This may mean that we may generate 0 thumbnail
# images once we get the lock.
row = ImageAttachment.objects.select_for_update().get(id=event["id"])
except ImageAttachment.DoesNotExist: # nocoverage
logger.info("ImageAttachment row %d missing", event["id"])
return
uploaded_thumbnails = ensure_thumbnails(row)
end = time.time()
logger.info(
"Processed %d thumbnails (%dms)",
uploaded_thumbnails,
(end - start) * 1000,
)
def ensure_thumbnails(image_attachment: ImageAttachment) -> int:
needed_thumbnails = missing_thumbnails(image_attachment)
if not needed_thumbnails:
return 0
written_images = 0
image_bytes = BytesIO()
save_attachment_contents(image_attachment.path_id, image_bytes)
try:
# TODO: We could save some computational time by using the same
# bytes if multiple resolutions are larger than the source
# image. That is, if the input is 10x10, a 100x100.jpg is
# going to be the same as a 200x200.jpg, since those set the
# max dimensions, and we do not scale up.
for thumbnail_format in needed_thumbnails:
# This will scale to fit within the given dimensions; it
# may be smaller one one or more of them.
logger.info(
"Resizing to %d x %d, from %d x %d",
thumbnail_format.max_width,
thumbnail_format.max_height,
image_attachment.original_width_px,
image_attachment.original_height_px,
)
load_opts = ""
if image_attachment.frames > 1:
# If the original has multiple frames, we want to load
# one of them if we're outputting to a static format,
# otherwise we load them all.
if thumbnail_format.animated:
load_opts = "n=-1"
else:
load_opts = "n=1"
resized = pyvips.Image.thumbnail_buffer(
image_bytes.getbuffer(),
thumbnail_format.max_width,
height=thumbnail_format.max_height,
option_string=load_opts,
size=pyvips.Size.DOWN,
)
thumbnailed_bytes = resized.write_to_buffer(
f".{thumbnail_format.extension}[{thumbnail_format.opts}]"
)
content_type = guess_type(f"image.{thumbnail_format.extension}")[0]
assert content_type is not None
thumbnail_path = get_image_thumbnail_path(image_attachment, thumbnail_format)
logger.info("Uploading %d bytes to %s", len(thumbnailed_bytes), thumbnail_path)
upload_backend.upload_message_attachment(
thumbnail_path,
content_type,
thumbnailed_bytes,
None,
)
height = resized.get("page-height") if thumbnail_format.animated else resized.height
image_attachment.thumbnail_metadata.append(
asdict(
StoredThumbnailFormat(
extension=thumbnail_format.extension,
content_type=content_type,
max_width=thumbnail_format.max_width,
max_height=thumbnail_format.max_height,
animated=thumbnail_format.animated,
width=resized.width,
height=height,
byte_size=len(thumbnailed_bytes),
)
)
)
written_images += 1
except pyvips.Error as e:
logger.exception(e)
if written_images == 0 and len(image_attachment.thumbnail_metadata) == 0:
# We have never thumbnailed this -- it most likely had
# bad data. Remove the ImageAttachment row, since it is
# not valid for thumbnailing.
update_message_rendered_content(
image_attachment.realm_id, image_attachment.path_id, None
)
image_attachment.delete()
return 0
else: # nocoverage
# TODO: Clean up any dangling thumbnails we may have
# produced? Seems unlikely that we'd fail on one size,
# but not another, but anything's possible.
pass
image_attachment.save(update_fields=["thumbnail_metadata"])
url, is_animated = get_default_thumbnail_url(image_attachment)
update_message_rendered_content(
image_attachment.realm_id,
image_attachment.path_id,
MarkdownImageMetadata(
url=url,
is_animated=is_animated,
original_width_px=image_attachment.original_width_px,
original_height_px=image_attachment.original_height_px,
),
)
return written_images
def update_message_rendered_content(
realm_id: int, path_id: str, image_data: MarkdownImageMetadata | None
) -> None:
for message_class in [Message, ArchivedMessage]:
messages_with_image = (
message_class.objects.filter( # type: ignore[attr-defined] # TODO: ?
realm_id=realm_id, attachment__path_id=path_id
)
.select_for_update()
.order_by("id")
)
for message in messages_with_image:
rendered_content = rewrite_thumbnailed_images(
message.rendered_content,
{} if image_data is None else {path_id: image_data},
{path_id} if image_data is None else set(),
)[0]
if rendered_content is None:
# There were no updates -- for instance, if we re-run
# ensure_thumbnails on an ImageAttachment we already
# ran it on once. Do not bother to no-op update
# clients.
continue
if isinstance(message, Message):
# Perform a silent update push to the clients
do_update_embedded_data(message.sender, message, rendered_content)
else:
message.rendered_content = rendered_content
message.save(update_fields=["rendered_content"])