From b42863be4b2ccf7ca697e2bdea4fc5fdf2b5c26d Mon Sep 17 00:00:00 2001 From: Alex Vandiver Date: Fri, 21 Jun 2024 19:02:36 +0000 Subject: [PATCH] markdown: Show thumbnails for uploaded images. Fixes: #16210. --- web/src/lightbox.ts | 2 + zerver/actions/message_edit.py | 37 +- zerver/lib/markdown/__init__.py | 50 ++- zerver/lib/thumbnail.py | 103 +++++- .../tests/fixtures/markdown_test_cases.json | 18 +- zerver/tests/test_events.py | 27 +- zerver/tests/test_markdown.py | 173 +++++---- zerver/tests/test_markdown_thumbnail.py | 334 ++++++++++++++++++ zerver/worker/embed_links.py | 2 +- zerver/worker/thumbnail.py | 56 ++- zproject/default_settings.py | 1 - 11 files changed, 654 insertions(+), 149 deletions(-) create mode 100644 zerver/tests/test_markdown_thumbnail.py diff --git a/web/src/lightbox.ts b/web/src/lightbox.ts index 93a512011e..94cbc291d2 100644 --- a/web/src/lightbox.ts +++ b/web/src/lightbox.ts @@ -509,6 +509,8 @@ export function parse_media_data(media: HTMLElement): Payload { type = "image"; if ($media.attr("data-src-fullsize")) { source = $media.attr("data-src-fullsize"); + } else if ($media.attr("src")?.startsWith("/user_uploads/thumbnail/")) { + source = url; } else { source = preview_src; } diff --git a/zerver/actions/message_edit.py b/zerver/actions/message_edit.py index 3597e33ce6..bdcd7ee2b0 100644 --- a/zerver/actions/message_edit.py +++ b/zerver/actions/message_edit.py @@ -366,33 +366,30 @@ def update_user_message_flags( def do_update_embedded_data( user_profile: UserProfile, message: Message, - content: str | None, - rendering_result: MessageRenderingResult, + rendered_content: str | MessageRenderingResult, ) -> None: - timestamp = timezone_now() + ums = UserMessage.objects.filter(message=message.id) + update_fields = ["rendered_content"] + if isinstance(rendered_content, MessageRenderingResult): + update_user_message_flags(rendered_content, ums) + message.rendered_content = rendered_content.rendered_content + message.rendered_content_version = markdown_version + update_fields.append("rendered_content_version") + else: + message.rendered_content = rendered_content + message.save(update_fields=update_fields) + + update_message_cache([message]) event: dict[str, Any] = { "type": "update_message", "user_id": None, - "edit_timestamp": datetime_to_timestamp(timestamp), + "edit_timestamp": datetime_to_timestamp(timezone_now()), "message_id": message.id, + "message_ids": [message.id], + "content": message.content, + "rendered_content": message.rendered_content, "rendering_only": True, } - changed_messages = [message] - rendered_content: str | None = None - - ums = UserMessage.objects.filter(message=message.id) - - if content is not None: - update_user_message_flags(rendering_result, ums) - rendered_content = rendering_result.rendered_content - message.rendered_content = rendered_content - message.rendered_content_version = markdown_version - event["content"] = content - event["rendered_content"] = rendered_content - - message.save(update_fields=["content", "rendered_content"]) - - event["message_ids"] = update_message_cache(changed_messages) def user_info(um: UserMessage) -> dict[str, Any]: return { diff --git a/zerver/lib/markdown/__init__.py b/zerver/lib/markdown/__init__.py index 8d5309c2bb..cc64147ae7 100644 --- a/zerver/lib/markdown/__init__.py +++ b/zerver/lib/markdown/__init__.py @@ -13,7 +13,7 @@ from datetime import datetime, timezone from functools import lru_cache from re import Match, Pattern from typing import Any, Generic, Optional, TypeAlias, TypedDict, TypeVar, cast -from urllib.parse import parse_qs, quote, urlencode, urljoin, urlsplit, urlunsplit +from urllib.parse import parse_qs, quote, urljoin, urlsplit, urlunsplit from xml.etree.ElementTree import Element, SubElement import ahocorasick @@ -55,7 +55,7 @@ from zerver.lib.mention import ( from zerver.lib.outgoing_http import OutgoingSession from zerver.lib.subdomains import is_static_or_current_realm_url from zerver.lib.tex import render_tex -from zerver.lib.thumbnail import user_uploads_or_external +from zerver.lib.thumbnail import get_user_upload_previews, rewrite_thumbnailed_images from zerver.lib.timeout import unsafe_timeout from zerver.lib.timezone import common_timezones from zerver.lib.types import LinkifierDict @@ -125,6 +125,7 @@ class DbData: sent_by_bot: bool stream_names: dict[str, int] translate_emoticons: bool + user_upload_previews: dict[str, tuple[str, bool] | None] # Format version of the Markdown rendering; stored along with rendered @@ -615,18 +616,21 @@ class InlineInterestingLinkProcessor(markdown.treeprocessors.Treeprocessor): if data_id is not None: a.set("data-id", data_id) img = SubElement(a, "img") - if ( - settings.THUMBNAIL_IMAGES - and (not already_thumbnailed) - and user_uploads_or_external(image_url) - ): - # We strip leading '/' from relative URLs here to ensure - # consistency in what gets passed to /thumbnail - image_url = image_url.lstrip("/") - img.set("src", "/thumbnail?" + urlencode({"url": image_url, "size": "thumbnail"})) - img.set( - "data-src-fullsize", "/thumbnail?" + urlencode({"url": image_url, "size": "full"}) - ) + if image_url.startswith("/user_uploads/") and self.zmd.zulip_db_data: + path_id = image_url[len("/user_uploads/") :] + + # We should have pulled the preview data for this image + # (even if that's "no preview yet") from the database + # before rendering; is_image should have enforced that. + assert path_id in self.zmd.zulip_db_data.user_upload_previews + + # Insert a placeholder image spinner. We post-process + # this content (see rewrite_thumbnailed_images in + # zerver.lib.thumbnail), looking specifically for this + # tag, and may re-write it into the thumbnail URL if it + # already exists when the message is sent. + img.set("class", "image-loading-placeholder") + img.set("src", "/static/images/loading/loader-black.svg") else: img.set("src", image_url) @@ -724,6 +728,13 @@ class InlineInterestingLinkProcessor(markdown.treeprocessors.Treeprocessor): if parsed_url.netloc == "pasteboard.co": return False + # Check against the previews we generated -- if we didn't have + # a row for the ImageAttachment, then its header didn't parse + # as a valid image type which libvips handles. + if url.startswith("/user_uploads/") and self.zmd.zulip_db_data: + path_id = url[len("/user_uploads/") :] + return path_id in self.zmd.zulip_db_data.user_upload_previews + return any(parsed_url.path.lower().endswith(ext) for ext in IMAGE_EXTENSIONS) def corrected_image_source(self, url: str) -> str | None: @@ -2623,6 +2634,7 @@ def do_convert( _md_engine.url_embed_data = url_embed_data # Pre-fetch data from the DB that is used in the Markdown thread + user_upload_previews = None if message_realm is not None: # Here we fetch the data structures needed to render # mentions/stream mentions from the database, but only @@ -2645,6 +2657,7 @@ def do_convert( else: active_realm_emoji = {} + user_upload_previews = get_user_upload_previews(message_realm.id, content) _md_engine.zulip_db_data = DbData( realm_alert_words_automaton=realm_alert_words_automaton, mention_data=mention_data, @@ -2653,6 +2666,7 @@ def do_convert( sent_by_bot=sent_by_bot, stream_names=stream_name_info, translate_emoticons=translate_emoticons, + user_upload_previews=user_upload_previews, ) try: @@ -2663,6 +2677,14 @@ def do_convert( # infinite-loop). rendering_result.rendered_content = unsafe_timeout(5, lambda: _md_engine.convert(content)) + # Post-process the result with the rendered image previews: + if user_upload_previews is not None: + content_with_thumbnails = rewrite_thumbnailed_images( + rendering_result.rendered_content, user_upload_previews + ) + if content_with_thumbnails is not None: + rendering_result.rendered_content = content_with_thumbnails + # Throw an exception if the content is huge; this protects the # rest of the codebase from any bugs where we end up rendering # something huge. diff --git a/zerver/lib/thumbnail.py b/zerver/lib/thumbnail.py index 89699089a9..a599e0323a 100644 --- a/zerver/lib/thumbnail.py +++ b/zerver/lib/thumbnail.py @@ -8,6 +8,7 @@ from typing import TypeVar from urllib.parse import urljoin import pyvips +from bs4 import BeautifulSoup from django.utils.http import url_has_allowed_host_and_scheme from django.utils.translation import gettext as _ from typing_extensions import override @@ -136,12 +137,6 @@ class BadImageError(JsonableError): code = ErrorCode.BAD_IMAGE -def user_uploads_or_external(url: str) -> bool: - return not url_has_allowed_host_and_scheme(url, allowed_hosts=None) or url.startswith( - "/user_uploads/" - ) - - def generate_thumbnail_url(path: str, size: str = "0x0") -> str: path = urljoin("/", path) @@ -324,3 +319,99 @@ def split_thumbnail_path(file_path: str) -> tuple[str, BaseThumbnailFormat]: assert thumbnail_format is not None path_id = "/".join(path_parts[1:]) return path_id, thumbnail_format + + +def get_user_upload_previews(realm_id: int, content: str) -> dict[str, tuple[str, bool] | None]: + matches = re.findall(r"/user_uploads/(\d+/[/\w.-]+)", content) + upload_preview_data: dict[str, tuple[str, bool] | None] = {} + for image_attachment in ImageAttachment.objects.filter(realm_id=realm_id, path_id__in=matches): + if image_attachment.thumbnail_metadata == []: + # Image exists, and header of it parsed as a valid image, + # but has not been thumbnailed yet; we will render a + # spinner. + upload_preview_data[image_attachment.path_id] = None + else: + upload_preview_data[image_attachment.path_id] = get_default_thumbnail_url( + image_attachment + ) + return upload_preview_data + + +def get_default_thumbnail_url(image_attachment: ImageAttachment) -> tuple[str, bool]: + # For "dumb" clients which cannot rewrite it into their + # preferred format and size, we choose the first one in + # THUMBNAIL_OUTPUT_FORMATS which matches the animated/not + # nature of the source image. + found_format: ThumbnailFormat | None = None + for thumbnail_format in THUMBNAIL_OUTPUT_FORMATS: + if thumbnail_format.animated == (image_attachment.frames > 1): + found_format = thumbnail_format + break + if found_format is None: + # No animated thumbnail formats exist somehow, and the + # image is animated? Just take the first thumbnail + # format. + found_format = THUMBNAIL_OUTPUT_FORMATS[0] + return ( + "/user_uploads/" + get_image_thumbnail_path(image_attachment, found_format), + found_format.animated, + ) + + +def rewrite_thumbnailed_images( + rendered_content: str, + images: dict[str, tuple[str, bool] | None], + to_delete: set[str] | None = None, +) -> str | None: + if not images and not to_delete: + return None + + parsed_message = BeautifulSoup(rendered_content, "html.parser") + + changed = False + for inline_image_div in parsed_message.find_all("div", class_="message_inline_image"): + image_link = inline_image_div.find("a") + if ( + image_link is None + or image_link["href"] is None + or not image_link["href"].startswith("/user_uploads/") + ): + # This is not an inline image generated by the markdown + # processor for a locally-uploaded image. + continue + image_tag = image_link.find("img", class_="image-loading-placeholder") + if image_tag is None: + # The placeholder was already replaced -- for instance, + # this is expected if multiple images are included in the + # same message. The second time this is run, for the + # second image, the first image will have no placeholder. + continue + + path_id = image_link["href"][len("/user_uploads/") :] + if to_delete and path_id in to_delete: + # This was not a valid thumbnail target, for some reason. + # Trim out the whole "message_inline_image" element, since + # it's not going be renderable by clients either. + inline_image_div.decompose() + changed = True + continue + + image_data = images.get(path_id) + if image_data is None: + # Has not been thumbnailed yet; leave it as a spinner. + # This happens routinely when a message contained multiple + # unthumbnailed images, and only one of those images just + # completed thumbnailing. + pass + else: + changed = True + del image_tag["class"] + image_tag["src"], is_animated = image_data + if is_animated: + image_tag["data-animated"] = "true" + + if changed: + # The formatter="html5" means we do not produce self-closing tags + return parsed_message.encode(formatter="html5").decode().strip() + else: + return None diff --git a/zerver/tests/fixtures/markdown_test_cases.json b/zerver/tests/fixtures/markdown_test_cases.json index f418b2f1e8..2e2b3c6fd1 100644 --- a/zerver/tests/fixtures/markdown_test_cases.json +++ b/zerver/tests/fixtures/markdown_test_cases.json @@ -408,7 +408,7 @@ { "name": "inline_image", "input": "Google logo today: https://www.google.com/images/srpr/logo4w.png\nKinda boring", - "expected_output": "

Google logo today: https://www.google.com/images/srpr/logo4w.png
\nKinda boring

\n
", + "expected_output": "

Google logo today: https://www.google.com/images/srpr/logo4w.png
\nKinda boring

\n
", "backend_only_rendering": true, "text_content": "Google logo today: https:\/\/www.google.com\/images\/srpr\/logo4w.png\nKinda boring\n" }, @@ -422,34 +422,34 @@ { "name": "two_inline_images", "input": "Google logo today: https://www.google.com/images/srpr/logo4w.png\nKinda boring\nZulip logo: https://zulip.com/static/images/logo/zulip-icon-128x128.png", - "expected_output": "

Google logo today: https:\/\/www.google.com\/images\/srpr\/logo4w.png<\/a>
\nKinda boring
\nZulip logo:
https:\/\/zulip.com\/static\/images\/logo\/zulip-icon-128x128.png<\/a><\/p>\n

<\/a><\/div>
<\/a><\/div>", + "expected_output": "

Google logo today: https:\/\/www.google.com\/images\/srpr\/logo4w.png<\/a>
\nKinda boring
\nZulip logo:
https:\/\/zulip.com\/static\/images\/logo\/zulip-icon-128x128.png<\/a><\/p>\n

<\/a><\/div>
<\/a><\/div>", "backend_only_rendering": true, "text_content": "Google logo today: https:\/\/www.google.com\/images\/srpr\/logo4w.png\nKinda boring\nZulip logo: https:\/\/zulip.com\/static\/images\/logo\/zulip-icon-128x128.png\n" }, { "name": "deduplicate_inline_previews", "input": "Google logo today: https://www.google.com/images/srpr/logo4w.png\nKinda boringGoogle logo today: https://www.google.com/images/srpr/logo4w.png\nKinda boring", - "expected_output": "

Google logo today: https://www.google.com/images/srpr/logo4w.png
\nKinda boringGoogle logo today: https://www.google.com/images/srpr/logo4w.png
\nKinda boring

\n
", + "expected_output": "

Google logo today: https://www.google.com/images/srpr/logo4w.png
\nKinda boringGoogle logo today: https://www.google.com/images/srpr/logo4w.png
\nKinda boring

\n
", "backend_only_rendering": true, "text_content": "Google logo today: https:\/\/www.google.com\/images\/srpr\/logo4w.png\nKinda boringGoogle logo today: https:\/\/www.google.com\/images\/srpr\/logo4w.png\nKinda boring\n" }, { "name": "bulleted_list_inlining", "input": "* Google?\n* Google. https://www.google.com/images/srpr/logo4w.png\n* Google!", - "expected_output": "", + "expected_output": "", "backend_only_rendering": true, "text_content": "\nGoogle?\nGoogle. https://www.google.com/images/srpr/logo4w.png\nGoogle!\n" }, { "name": "only_inline_image", "input": "https://www.google.com/images/srpr/logo4w.png", - "expected_output": "
", + "expected_output": "
", "backend_only_rendering": true }, { "name": "only_named_inline_image", "input": "[Google link](https://www.google.com/images/srpr/logo4w.png)", - "expected_output": "

Google link

\n
", + "expected_output": "

Google link

\n
", "backend_only_rendering": true, "text_content": "Google link\n" }, @@ -993,9 +993,9 @@ { "name": "spoiler_with_inline_image", "input": "```spoiler header\nContent http://example.com/image.png\n```", - "expected_output": "
\n

header

\n
", - "marked_expected_output": "
\n

header

\n
", - "text_content": "header (…)\n" + "expected_output": "
\n

header

\n
", + "marked_expected_output": "
\n

header

\n
", + "text_content": "header (…)\n" }, { "name": "embedded_link_inside_Bold", diff --git a/zerver/tests/test_events.py b/zerver/tests/test_events.py index f6d8b7356d..05068f1bd9 100644 --- a/zerver/tests/test_events.py +++ b/zerver/tests/test_events.py @@ -211,12 +211,14 @@ from zerver.lib.test_helpers import ( create_dummy_file, get_subscription, get_test_image_file, + read_test_image_file, reset_email_visibility_to_everyone_in_zulip_realm, stdout_suppressed, ) from zerver.lib.timestamp import convert_to_UTC, datetime_to_timestamp from zerver.lib.topic import TOPIC_NAME from zerver.lib.types import ProfileDataElementUpdateDict +from zerver.lib.upload import upload_message_attachment from zerver.lib.user_groups import ( AnonymousSettingGroupDict, get_group_setting_value_for_api, @@ -225,6 +227,7 @@ from zerver.lib.user_groups import ( from zerver.models import ( Attachment, CustomProfileField, + ImageAttachment, Message, MultiuseInvite, NamedUserGroup, @@ -258,6 +261,7 @@ from zerver.tornado.event_queue import ( send_web_reload_client_events, ) from zerver.views.realm_playgrounds import access_playground_by_id +from zerver.worker.thumbnail import ensure_thumbnails class BaseAction(ZulipTestCase): @@ -935,7 +939,7 @@ class NormalActionsTest(BaseAction): content = "embed_content" rendering_result = render_message_markdown(message, content) with self.verify_action(state_change_expected=False) as events: - do_update_embedded_data(self.user_profile, message, content, rendering_result) + do_update_embedded_data(self.user_profile, message, rendering_result) check_update_message( "events[0]", events[0], @@ -1028,6 +1032,27 @@ class NormalActionsTest(BaseAction): is_embedded_update_only=False, ) + def test_thumbnail_event(self) -> None: + iago = self.example_user("iago") + url = upload_message_attachment( + "img.png", "image/png", read_test_image_file("img.png"), self.example_user("iago") + ) + path_id = url[len("/user_upload/") + 1 :] + self.send_stream_message(iago, "Verona", f"[img.png]({url})") + + # Generating a thumbnail for an image sends a message update event + with self.verify_action(state_change_expected=False) as events: + ensure_thumbnails(ImageAttachment.objects.get(path_id=path_id)) + check_update_message( + "events[0]", + events[0], + is_stream_message=False, + has_content=False, + has_topic=False, + has_new_stream_id=False, + is_embedded_update_only=True, + ) + def test_update_message_flags(self) -> None: # Test message flag update events message = self.send_personal_message( diff --git a/zerver/tests/test_markdown.py b/zerver/tests/test_markdown.py index 8c1bb653f3..3a4f65c375 100644 --- a/zerver/tests/test_markdown.py +++ b/zerver/tests/test_markdown.py @@ -713,51 +713,13 @@ class MarkdownEmbedsTest(ZulipTestCase): f"""

http://www.youtube.com/watch_videos?video_ids=nOJgD4fcZhI,i96UO8-GFvw

\n
""", ) - @override_settings(THUMBNAIL_IMAGES=True) - def test_inline_image_thumbnail_url(self) -> None: - realm = get_realm("zephyr") - msg = "[foobar](/user_uploads/{realm_id}/50/w2G6ok9kr8AMCQCTNAUOFMln/IMG_0677.JPG)" - msg = msg.format(realm_id=realm.id) - thumbnail_img = '<' - thumbnail_img = thumbnail_img.format(realm_id=realm.id) - converted = markdown_convert_wrapper(msg) - self.assertIn(thumbnail_img, converted) - - msg = "https://www.google.com/images/srpr/logo4w.png" - thumbnail_img = '' - converted = markdown_convert_wrapper(msg) - self.assertIn(thumbnail_img, converted) - - msg = "www.google.com/images/srpr/logo4w.png" - thumbnail_img = '' - converted = markdown_convert_wrapper(msg) - self.assertIn(thumbnail_img, converted) - - msg = "https://www.google.com/images/srpr/logo4w.png" - thumbnail_img = f"""
""" - with self.settings(THUMBNAIL_IMAGES=False): - converted = markdown_convert_wrapper(msg) - self.assertIn(thumbnail_img, converted) - - # Any URL which is not an external link and doesn't start with - # /user_uploads/ is not thumbnailed - msg = "[foobar](/static/images/cute/turtle.png)" - thumbnail_img = '
' - converted = markdown_convert_wrapper(msg) - self.assertIn(thumbnail_img, converted) - - msg = "[foobar](/user_avatars/{realm_id}/emoji/images/50.png)" - msg = msg.format(realm_id=realm.id) - thumbnail_img = '
' - thumbnail_img = thumbnail_img.format(realm_id=realm.id) - converted = markdown_convert_wrapper(msg) - self.assertIn(thumbnail_img, converted) - - @override_settings(THUMBNAIL_IMAGES=True) def test_inline_image_preview(self) -> None: - with_preview = '
' - without_preview = '

http://cdn.wallpapersafari.com/13/6/16eVjx.jpeg

' - content = "http://cdn.wallpapersafari.com/13/6/16eVjx.jpeg" + url = "http://cdn.wallpapersafari.com/13/6/16eVjx.jpeg" + camo_url = get_camo_url(url) + with_preview = ( + f'
' + ) + without_preview = f'

{url}

' sender_user_profile = self.example_user("othello") msg = Message( @@ -765,7 +727,7 @@ class MarkdownEmbedsTest(ZulipTestCase): sending_client=get_client("test"), realm=sender_user_profile.realm, ) - converted = render_message_markdown(msg, content) + converted = render_message_markdown(msg, url) self.assertEqual(converted.rendered_content, with_preview) realm = msg.get_realm() @@ -778,17 +740,9 @@ class MarkdownEmbedsTest(ZulipTestCase): sending_client=get_client("test"), realm=sender_user_profile.realm, ) - converted = render_message_markdown(msg, content) + converted = render_message_markdown(msg, url) self.assertEqual(converted.rendered_content, without_preview) - @override_settings(EXTERNAL_URI_SCHEME="https://") - def test_external_image_preview_use_camo(self) -> None: - content = "https://example.com/thing.jpeg" - - thumbnail_img = f"""
""" - converted = markdown_convert_wrapper(content) - self.assertIn(converted, thumbnail_img) - @override_settings(EXTERNAL_URI_SCHEME="https://") def test_static_image_preview_skip_camo(self) -> None: content = f"{ settings.STATIC_URL }/thing.jpeg" @@ -843,10 +797,13 @@ class MarkdownEmbedsTest(ZulipTestCase): soup = BeautifulSoup(converted, "html.parser") self.assert_length(soup(class_="message_inline_image"), 0) - @override_settings(THUMBNAIL_IMAGES=True) def test_inline_image_quoted_blocks(self) -> None: - content = "http://cdn.wallpapersafari.com/13/6/16eVjx.jpeg" - expected = '
' + url = "http://cdn.wallpapersafari.com/13/6/16eVjx.jpeg" + camo_url = get_camo_url(url) + content = f"{url}" + expected = ( + f'
' + ) sender_user_profile = self.example_user("othello") msg = Message( sender=sender_user_profile, @@ -856,8 +813,8 @@ class MarkdownEmbedsTest(ZulipTestCase): converted = render_message_markdown(msg, content) self.assertEqual(converted.rendered_content, expected) - content = ">http://cdn.wallpapersafari.com/13/6/16eVjx.jpeg\n\nAwesome!" - expected = '
\n

http://cdn.wallpapersafari.com/13/6/16eVjx.jpeg

\n
\n

Awesome!

' + content = f">{url}\n\nAwesome!" + expected = f'
\n

{url}

\n
\n

Awesome!

' sender_user_profile = self.example_user("othello") msg = Message( sender=sender_user_profile, @@ -867,8 +824,8 @@ class MarkdownEmbedsTest(ZulipTestCase): converted = render_message_markdown(msg, content) self.assertEqual(converted.rendered_content, expected) - content = ">* http://cdn.wallpapersafari.com/13/6/16eVjx.jpeg\n\nAwesome!" - expected = '
\n\n
\n

Awesome!

' + content = f">* {url}\n\nAwesome!" + expected = f'
\n\n
\n

Awesome!

' sender_user_profile = self.example_user("othello") msg = Message( sender=sender_user_profile, @@ -878,12 +835,23 @@ class MarkdownEmbedsTest(ZulipTestCase): converted = render_message_markdown(msg, content) self.assertEqual(converted.rendered_content, expected) - @override_settings(THUMBNAIL_IMAGES=True) def test_inline_image_preview_order(self) -> None: - realm = get_realm("zulip") - content = "http://imaging.nikon.com/lineup/dslr/df/img/sample/img_01.jpg\nhttp://imaging.nikon.com/lineup/dslr/df/img/sample/img_02.jpg\nhttp://imaging.nikon.com/lineup/dslr/df/img/sample/img_03.jpg" - expected = '

http://imaging.nikon.com/lineup/dslr/df/img/sample/img_01.jpg
\nhttp://imaging.nikon.com/lineup/dslr/df/img/sample/img_02.jpg
\nhttp://imaging.nikon.com/lineup/dslr/df/img/sample/img_03.jpg

\n
' - + urls = [ + "http://imaging.nikon.com/lineup/dslr/df/img/sample/img_01.jpg", + "http://imaging.nikon.com/lineup/dslr/df/img/sample/img_02.jpg", + "http://imaging.nikon.com/lineup/dslr/df/img/sample/img_03.jpg", + ] + content = "\n".join(urls) + expected = ( + "

" + f'{urls[0]}
\n' + f'{urls[1]}
\n' + f'{urls[2]}' + "

\n" + f'
' + f'
' + f'
' + ) sender_user_profile = self.example_user("othello") msg = Message( sender=sender_user_profile, @@ -893,10 +861,16 @@ class MarkdownEmbedsTest(ZulipTestCase): converted = render_message_markdown(msg, content) self.assertEqual(converted.rendered_content, expected) - content = "http://imaging.nikon.com/lineup/dslr/df/img/sample/img_01.jpg\n\n>http://imaging.nikon.com/lineup/dslr/df/img/sample/img_02.jpg\n\n* http://imaging.nikon.com/lineup/dslr/df/img/sample/img_03.jpg\n* https://www.google.com/images/srpr/logo4w.png" - expected = '
\n

http://imaging.nikon.com/lineup/dslr/df/img/sample/img_02.jpg

\n
\n
    \n
  • \n
  • \n
' - - sender_user_profile = self.example_user("othello") + urls.append("https://www.google.com/images/srpr/logo4w.png") + content = f"{urls[0]}\n\n" f">{urls[1]}\n\n" f"* {urls[2]}\n" f"* {urls[3]}" + expected = ( + f'
' + f'
\n

{urls[1]}

\n
\n' + "
    \n" + f'
  • \n' + f'
  • \n' + "
" + ) msg = Message( sender=sender_user_profile, sending_client=get_client("test"), @@ -905,24 +879,14 @@ class MarkdownEmbedsTest(ZulipTestCase): converted = render_message_markdown(msg, content) self.assertEqual(converted.rendered_content, expected) - content = "Test 1\n[21136101110_1dde1c1a7e_o.jpg](/user_uploads/{realm_id}/6d/F1PX6u16JA2P-nK45PyxHIYZ/21136101110_1dde1c1a7e_o.jpg) \n\nNext image\n[IMG_20161116_023910.jpg](/user_uploads/{realm_id}/69/sh7L06e7uH7NaX6d5WFfVYQp/IMG_20161116_023910.jpg) \n\nAnother screenshot\n[Screenshot-from-2016-06-01-16-22-42.png](/user_uploads/{realm_id}/70/_aZmIEWaN1iUaxwkDjkO7bpj/Screenshot-from-2016-06-01-16-22-42.png)" - content = content.format(realm_id=realm.id) - expected = '

Test 1
\n21136101110_1dde1c1a7e_o.jpg

\n

Next image
\nIMG_20161116_023910.jpg

\n

Another screenshot
\nScreenshot-from-2016-06-01-16-22-42.png

\n
' - expected = expected.format(realm_id=realm.id) - - msg = Message( - sender=sender_user_profile, - sending_client=get_client("test"), - realm=sender_user_profile.realm, - ) - converted = render_message_markdown(msg, content) - self.assertEqual(converted.rendered_content, expected) - - @override_settings(THUMBNAIL_IMAGES=True) def test_corrected_image_source(self) -> None: # testing only Wikipedia because linx.li URLs can be expected to expire content = "https://en.wikipedia.org/wiki/File:Wright_of_Derby,_The_Orrery.jpg" - expected = '
' + expected_url = ( + "https://en.wikipedia.org/wiki/Special:FilePath/File:Wright_of_Derby,_The_Orrery.jpg" + ) + camo_url = get_camo_url(expected_url) + expected = f'
' sender_user_profile = self.example_user("othello") msg = Message( @@ -934,7 +898,8 @@ class MarkdownEmbedsTest(ZulipTestCase): self.assertEqual(converted.rendered_content, expected) content = "https://en.wikipedia.org/static/images/icons/wikipedia.png" - expected = '
' + camo_url = get_camo_url(content) + expected = f'
' converted = render_message_markdown(msg, content) self.assertEqual(converted.rendered_content, expected) @@ -1042,13 +1007,19 @@ class MarkdownEmbedsTest(ZulipTestCase): @override_settings(THUMBNAIL_IMAGES=True) def test_inline_dropbox_negative(self) -> None: # Make sure we're not overzealous in our conversion: - msg = "Look at the new dropbox logo: https://www.dropbox.com/static/images/home_logo.png" + url = "https://www.dropbox.com/static/images/home_logo.png" + msg = f"Look at the new dropbox logo: {url}" with mock.patch("zerver.lib.markdown.fetch_open_graph_image", return_value=None): converted = markdown_convert_wrapper(msg) + camo_url = get_camo_url(url) self.assertEqual( converted, - '

Look at the new dropbox logo: https://www.dropbox.com/static/images/home_logo.png

\n
', + ( + f'

Look at the new dropbox logo: {url}

' + "\n" + f'
' + ), ) def test_inline_dropbox_bad(self) -> None: @@ -1063,21 +1034,35 @@ class MarkdownEmbedsTest(ZulipTestCase): @override_settings(THUMBNAIL_IMAGES=True) def test_inline_github_preview(self) -> None: - # Test photo album previews - msg = "Test: https://github.com/zulip/zulip/blob/main/static/images/logo/zulip-icon-128x128.png" + # Test github URL translation + url = "https://github.com/zulip/zulip/blob/main/static/images/logo/zulip-icon-128x128.png" + camo_url = get_camo_url( + "https://raw.githubusercontent.com/zulip/zulip/main/static/images/logo/zulip-icon-128x128.png" + ) + msg = f"Test: {url}" converted = markdown_convert_wrapper(msg) self.assertEqual( converted, - '

Test: https://github.com/zulip/zulip/blob/main/static/images/logo/zulip-icon-128x128.png

\n
', + ( + f'

Test: {url}

' + "\n" + f'
' + ), ) - msg = "Test: https://developer.github.com/assets/images/hero-circuit-bg.png" + url = "https://developer.github.com/assets/images/hero-circuit-bg.png" + camo_url = get_camo_url(url) + msg = f"Test: {url}" converted = markdown_convert_wrapper(msg) self.assertEqual( converted, - '

Test: https://developer.github.com/assets/images/hero-circuit-bg.png

\n
', + ( + f'

Test: {url}

' + "\n" + f'
' + ), ) def test_inline_youtube_preview(self) -> None: @@ -3168,7 +3153,7 @@ class MarkdownStreamMentionTests(ZulipTestCase): "

\n" '", ) diff --git a/zerver/tests/test_markdown_thumbnail.py b/zerver/tests/test_markdown_thumbnail.py new file mode 100644 index 0000000000..b352b1c0eb --- /dev/null +++ b/zerver/tests/test_markdown_thumbnail.py @@ -0,0 +1,334 @@ +import re +from unittest.mock import patch + +import pyvips + +from zerver.actions.message_delete import do_delete_messages +from zerver.lib.camo import get_camo_url +from zerver.lib.markdown import render_message_markdown +from zerver.lib.test_classes import ZulipTestCase +from zerver.lib.test_helpers import get_test_image_file, read_test_image_file +from zerver.lib.thumbnail import ThumbnailFormat +from zerver.lib.upload import upload_message_attachment +from zerver.models import ArchivedAttachment, ArchivedMessage, Attachment, ImageAttachment, Message +from zerver.models.clients import get_client +from zerver.worker.thumbnail import ensure_thumbnails + + +class MarkdownThumbnailTest(ZulipTestCase): + def upload_image(self, image_name: str) -> str: + self.login("othello") + with get_test_image_file(image_name) as image_file: + response = self.assert_json_success( + self.client_post("/json/user_uploads", {"file": image_file}) + ) + return re.sub(r"/user_uploads/", "", response["url"]) + + def upload_and_thumbnail_image(self, image_name: str) -> str: + with self.captureOnCommitCallbacks(execute=True): + # Running captureOnCommitCallbacks includes inserting into + # the Rabbitmq queue, which in testing means we + # immediately run the worker for it, producing the thumbnails. + return self.upload_image(image_name) + + def assert_message_content_is( + self, message_id: int, rendered_content: str, user_name: str = "othello" + ) -> None: + sender_user_profile = self.example_user(user_name) + result = self.assert_json_success( + self.api_get(sender_user_profile, f"/api/v1/messages/{message_id}") + ) + self.assertEqual(result["message"]["content"], rendered_content) + + def send_message_content( + self, content: str, do_thumbnail: bool = False, user_name: str = "othello" + ) -> int: + sender_user_profile = self.example_user(user_name) + return self.send_stream_message( + sender=sender_user_profile, + stream_name="Verona", + content=content, + skip_capture_on_commit_callbacks=not do_thumbnail, + ) + + def test_uploads_preview_order(self) -> None: + image_names = ["img.jpg", "img.png", "img.gif"] + path_ids = [self.upload_and_thumbnail_image(image_name) for image_name in image_names] + content = ( + f"Test 1\n[{image_names[0]}](/user_uploads/{path_ids[0]}) \n\n" + f"Next image\n[{image_names[1]}](/user_uploads/{path_ids[1]}) \n\n" + f"Another screenshot\n[{image_names[2]}](/user_uploads/{path_ids[2]})" + ) + + sender_user_profile = self.example_user("othello") + msg = Message( + sender=sender_user_profile, + sending_client=get_client("test"), + realm=sender_user_profile.realm, + ) + converted = render_message_markdown(msg, content) + self.assertEqual( + converted.rendered_content, + ( + "

Test 1
\n" + f'{image_names[0]}

\n' + f'' + "

Next image
\n" + f'{image_names[1]}

\n' + f'' + "

Another screenshot
\n" + f'{image_names[2]}

\n' + f'' + ), + ) + + def test_thumbnail_code_block(self) -> None: + url = "http://example.com/image.png" + path_id = self.upload_and_thumbnail_image("img.png") + # We have a path_id of an image in the message content, so we + # will prefetch the thumbnail metadata -- but not insert it. + + sender_user_profile = self.example_user("othello") + msg = Message( + sender=sender_user_profile, + sending_client=get_client("test"), + realm=sender_user_profile.realm, + ) + converted = render_message_markdown(msg, f"{url}\n```\n/user_uploads/{path_id}\n```") + self.assertEqual( + converted.rendered_content, + ( + f'
' + f'
/user_uploads/{path_id}\n'
+                "
" + ), + ) + + def test_thumbnail_after_send(self) -> None: + with self.captureOnCommitCallbacks(execute=True): + path_id = self.upload_image("img.png") + content = f"[image](/user_uploads/{path_id})" + expected = ( + f'

image

\n' + f'' + ) + + message_id = self.send_message_content(content) + self.assert_message_content_is(message_id, expected) + + # Exit the block and run thumbnailing + expected = ( + f'

image

\n' + f'' + ) + self.assert_message_content_is(message_id, expected) + + def test_thumbnail_escaping(self) -> None: + self.login("othello") + with self.captureOnCommitCallbacks(execute=True): + url = upload_message_attachment( + "I am 95% ± 5% certain!", + "image/png", + read_test_image_file("img.png"), + self.example_user("othello"), + ) + path_id = re.sub(r"/user_uploads/", "", url) + self.assertTrue(ImageAttachment.objects.filter(path_id=path_id).exists()) + message_id = self.send_message_content(f"[I am 95% ± 5% certain!](/user_uploads/{path_id})") + expected = ( + f'

I am 95% ± 5% certain!

\n' + f'
' + ) + self.assert_message_content_is(message_id, expected) + + def test_thumbnail_repeated(self) -> None: + # We currently have no way to generate a thumbnailing event + # for the worker except during upload, meaning that we will + # never repeat a ImageAttachment thumbnailing. However, the + # code supports it, so test it. + + # Thumbnail with one set of sizes + with self.thumbnail_formats( + ThumbnailFormat("webp", 100, 75, animated=True), + ThumbnailFormat("webp", 100, 75, animated=False), + ): + path_id = self.upload_and_thumbnail_image("animated_img.gif") + content = f"[animated_img.gif](/user_uploads/{path_id})" + expected = ( + f'

animated_img.gif

\n' + f'' + ) + message_id = self.send_message_content(content, do_thumbnail=True) + self.assert_message_content_is(message_id, expected) + self.assert_length(ImageAttachment.objects.get(path_id=path_id).thumbnail_metadata, 2) + + # Re-thumbnail with a non-overlapping set of sizes + with self.thumbnail_formats(ThumbnailFormat("jpg", 100, 75, animated=False)): + ensure_thumbnails(ImageAttachment.objects.get(path_id=path_id)) + + # We generate a new size but leave the old ones + self.assert_length(ImageAttachment.objects.get(path_id=path_id).thumbnail_metadata, 3) + + # And the contents are not updated to the new size + self.assert_message_content_is(message_id, expected) + + def test_thumbnail_sequential_edits(self) -> None: + first_path_id = self.upload_image("img.png") + second_path_id = self.upload_image("img.jpg") + + message_id = self.send_message_content( + f"[first image](/user_uploads/{first_path_id})\n[second image](/user_uploads/{second_path_id})", + do_thumbnail=False, + ) + self.assert_message_content_is( + message_id, + ( + f'

first image
\n' + f'second image

\n' + f'' + f'' + ), + ) + + # Complete thumbnailing the second image first -- replacing only that spinner + ensure_thumbnails(ImageAttachment.objects.get(path_id=second_path_id)) + self.assert_message_content_is( + message_id, + ( + f'

first image
\n' + f'second image

\n' + f'' + f'' + ), + ) + + # Finish the other thumbnail + ensure_thumbnails(ImageAttachment.objects.get(path_id=first_path_id)) + self.assert_message_content_is( + message_id, + ( + f'

first image
\n' + f'second image

\n' + f'' + f'' + ), + ) + + def test_thumbnail_of_deleted(self) -> None: + sender_user_profile = self.example_user("othello") + path_id = self.upload_image("img.png") + message_id = self.send_message_content(f"[image](/user_uploads/{path_id})") + + # Delete the message + do_delete_messages( + sender_user_profile.realm, [Message.objects.get(id=message_id)], acting_user=None + ) + + # There is still an ImageAttachment row + self.assertFalse(Attachment.objects.filter(path_id=path_id).exists()) + self.assertTrue(ArchivedAttachment.objects.filter(path_id=path_id).exists()) + self.assertTrue(ImageAttachment.objects.filter(path_id=path_id).exists()) + + # Completing rendering after it is deleted should work, and + # update the rendered content in the archived message + ensure_thumbnails(ImageAttachment.objects.get(path_id=path_id)) + expected = ( + f'

image

\n' + f'' + ) + self.assertEqual( + ArchivedMessage.objects.get(id=message_id).rendered_content, + expected, + ) + # See test_delete_unclaimed_attachments for tests of the + # archiving process itself, and how it interacts with + # thumbnails. + + def test_thumbnail_bad_image(self) -> None: + """Test what happens if the file looks fine, but resizing later fails""" + path_id = self.upload_image("img.png") + message_id = self.send_message_content(f"[image](/user_uploads/{path_id})") + self.assert_length(ImageAttachment.objects.get(path_id=path_id).thumbnail_metadata, 0) + + # If the image is found to be bad, we remove all trace of the preview + with ( + patch.object( + pyvips.Image, "thumbnail_buffer", side_effect=pyvips.Error("some bad error") + ) as thumb_mock, + self.assertLogs("zerver.worker.thumbnail", "ERROR") as thumbnail_logs, + ): + ensure_thumbnails(ImageAttachment.objects.get(path_id=path_id)) + thumb_mock.assert_called_once() + self.assert_length(thumbnail_logs.output, 1) + self.assertTrue( + thumbnail_logs.output[0].startswith("ERROR:zerver.worker.thumbnail:some bad error") + ) + self.assertFalse(ImageAttachment.objects.filter(path_id=path_id).exists()) + self.assert_message_content_is( + message_id, f'

image

' + ) + + def test_thumbnail_multiple_messages(self) -> None: + sender_user_profile = self.example_user("othello") + path_id = self.upload_image("img.png") + channel_message_id = self.send_message_content(f"A public [image](/user_uploads/{path_id})") + private_message_id = self.send_personal_message( + from_user=sender_user_profile, + to_user=self.example_user("hamlet"), + content=f"This [image](/user_uploads/{path_id}) is private", + ) + placeholder = ( + f'' + ) + self.assert_message_content_is( + channel_message_id, + f'

A public image

\n{placeholder}', + ) + + self.assert_message_content_is( + private_message_id, + f'

This image is private

\n{placeholder}', + ) + + with ( + patch.object( + pyvips.Image, "thumbnail_buffer", wraps=pyvips.Image.thumbnail_buffer + ) as thumb_mock, + self.thumbnail_formats( + ThumbnailFormat("webp", 100, 75, animated=False), + ThumbnailFormat("webp", 200, 150, animated=False), + ), + ): + ensure_thumbnails(ImageAttachment.objects.get(path_id=path_id)) + + # Called once per format + self.assertEqual(thumb_mock.call_count, 2) + + rendered_thumb = ( + f'' + ) + + self.assert_message_content_is( + channel_message_id, + f'

A public image

\n{rendered_thumb}', + ) + + self.assert_message_content_is( + private_message_id, + f'

This image is private

\n{rendered_thumb}', + ) diff --git a/zerver/worker/embed_links.py b/zerver/worker/embed_links.py index 29dbe79230..08922872a8 100644 --- a/zerver/worker/embed_links.py +++ b/zerver/worker/embed_links.py @@ -56,7 +56,7 @@ class FetchLinksEmbedData(QueueProcessingWorker): realm, url_embed_data=url_embed_data, ) - do_update_embedded_data(message.sender, message, message.content, rendering_result) + do_update_embedded_data(message.sender, message, rendering_result) @override def timer_expired( diff --git a/zerver/worker/thumbnail.py b/zerver/worker/thumbnail.py index d9bba14fcc..98462563f4 100644 --- a/zerver/worker/thumbnail.py +++ b/zerver/worker/thumbnail.py @@ -8,10 +8,17 @@ import pyvips from django.db import transaction from typing_extensions import override +from zerver.actions.message_edit import do_update_embedded_data from zerver.lib.mime_types import guess_type -from zerver.lib.thumbnail import StoredThumbnailFormat, get_image_thumbnail_path, missing_thumbnails +from zerver.lib.thumbnail import ( + StoredThumbnailFormat, + get_default_thumbnail_url, + get_image_thumbnail_path, + missing_thumbnails, + rewrite_thumbnailed_images, +) from zerver.lib.upload import save_attachment_contents, upload_backend -from zerver.models import ImageAttachment +from zerver.models import ArchivedMessage, ImageAttachment, Message from zerver.worker.base import QueueProcessingWorker, assign_queue logger = logging.getLogger(__name__) @@ -120,9 +127,52 @@ def ensure_thumbnails(image_attachment: ImageAttachment) -> int: # We have never thumbnailed this -- it most likely had # bad data. Remove the ImageAttachment row, since it is # not valid for thumbnailing. + update_message_rendered_content( + image_attachment.realm_id, image_attachment.path_id, None + ) image_attachment.delete() return 0 + else: # nocoverage + # TODO: Clean up any dangling thumbnails we may have + # produced? Seems unlikely that we'd fail on one size, + # but not another, but anything's possible. + pass image_attachment.save(update_fields=["thumbnail_metadata"]) - + update_message_rendered_content( + image_attachment.realm_id, + image_attachment.path_id, + get_default_thumbnail_url(image_attachment), + ) return written_images + + +def update_message_rendered_content( + realm_id: int, path_id: str, image_data: tuple[str, bool] | None +) -> None: + for message_class in [Message, ArchivedMessage]: + messages_with_image = ( + message_class.objects.filter( # type: ignore[attr-defined] # TODO: ? + realm_id=realm_id, attachment__path_id=path_id + ) + .select_for_update() + .order_by("id") + ) + for message in messages_with_image: + rendered_content = rewrite_thumbnailed_images( + message.rendered_content, + {} if image_data is None else {path_id: image_data}, + {path_id} if image_data is None else set(), + ) + if rendered_content is None: + # There were no updates -- for instance, if we re-run + # ensure_thumbnails on an ImageAttachment we already + # ran it on once. Do not bother to no-op update + # clients. + continue + if isinstance(message, Message): + # Perform a silent update push to the clients + do_update_embedded_data(message.sender, message, rendered_content) + else: + message.rendered_content = rendered_content + message.save(update_fields=["rendered_content"]) diff --git a/zproject/default_settings.py b/zproject/default_settings.py index 815f81f9f1..6085d71a1a 100644 --- a/zproject/default_settings.py +++ b/zproject/default_settings.py @@ -203,7 +203,6 @@ REDIS_PORT = 6379 REMOTE_POSTGRES_HOST = "" REMOTE_POSTGRES_PORT = "" REMOTE_POSTGRES_SSLMODE = "" -THUMBNAIL_IMAGES = False TORNADO_PORTS: list[int] = [] USING_TORNADO = True