mirror of https://github.com/zulip/zulip.git
thumbnail: Fix corrupted email notifications due to HTML5 entities.
BeautifulSoup with formatter="html5" unnecessarily escapes many characters with HTML5-specific entities that cannot be correctly parsed by lxml during generation of email notifications. Signed-off-by: Anders Kaseorg <anders@zulip.com>
This commit is contained in:
parent
07ce4f0bc0
commit
e3abd09e67
|
@ -8,6 +8,7 @@ from typing import TypeVar
|
|||
|
||||
import pyvips
|
||||
from bs4 import BeautifulSoup
|
||||
from bs4.formatter import EntitySubstitution, HTMLFormatter
|
||||
from django.utils.translation import gettext as _
|
||||
from typing_extensions import override
|
||||
|
||||
|
@ -396,6 +397,17 @@ def get_default_thumbnail_url(image_attachment: ImageAttachment) -> tuple[str, b
|
|||
)
|
||||
|
||||
|
||||
# Like HTMLFormatter.REGISTRY["html5"], this formatter avoids producing
|
||||
# self-closing tags, but it differs by avoiding unnecessary escaping with
|
||||
# HTML5-specific entities that cannot be parsed by lxml and libxml2
|
||||
# (https://bugs.launchpad.net/lxml/+bug/2031045).
|
||||
html_formatter = HTMLFormatter(
|
||||
entity_substitution=EntitySubstitution.substitute_xml, # not substitute_html
|
||||
void_element_close_prefix="",
|
||||
empty_attributes_are_booleans=True,
|
||||
)
|
||||
|
||||
|
||||
def rewrite_thumbnailed_images(
|
||||
rendered_content: str,
|
||||
images: dict[str, MarkdownImageMetadata | None],
|
||||
|
@ -453,7 +465,8 @@ def rewrite_thumbnailed_images(
|
|||
image_tag["data-animated"] = "true"
|
||||
|
||||
if changed:
|
||||
# The formatter="html5" means we do not produce self-closing tags
|
||||
return parsed_message.encode(formatter="html5").decode().strip(), remaining_thumbnails
|
||||
return parsed_message.encode(
|
||||
formatter=html_formatter
|
||||
).decode().strip(), remaining_thumbnails
|
||||
else:
|
||||
return None, remaining_thumbnails
|
||||
|
|
|
@ -150,8 +150,8 @@ class MarkdownThumbnailTest(ZulipTestCase):
|
|||
self.assertTrue(ImageAttachment.objects.filter(path_id=path_id).exists())
|
||||
message_id = self.send_message_content(f"[I am 95% ± 5% certain!](/user_uploads/{path_id})")
|
||||
expected = (
|
||||
f'<p><a href="/user_uploads/{path_id}">I am 95% ± 5% certain!</a></p>\n'
|
||||
f'<div class="message_inline_image"><a href="/user_uploads/{path_id}" title="I am 95% ± 5% certain!">'
|
||||
f'<p><a href="/user_uploads/{path_id}">I am 95% ± 5% certain!</a></p>\n'
|
||||
f'<div class="message_inline_image"><a href="/user_uploads/{path_id}" title="I am 95% ± 5% certain!">'
|
||||
f'<img data-original-dimensions="128x128" src="/user_uploads/thumbnail/{path_id}/840x560.webp"></a></div>'
|
||||
)
|
||||
self.assert_message_content_is(message_id, expected)
|
||||
|
|
Loading…
Reference in New Issue