mirror of https://github.com/zulip/zulip.git
thumbnail: Fix corrupted email notifications due to HTML5 entities.
BeautifulSoup with formatter="html5" unnecessarily escapes many characters with HTML5-specific entities that cannot be correctly parsed by lxml during generation of email notifications. Signed-off-by: Anders Kaseorg <anders@zulip.com>
This commit is contained in:
parent
07ce4f0bc0
commit
e3abd09e67
|
@ -8,6 +8,7 @@ from typing import TypeVar
|
||||||
|
|
||||||
import pyvips
|
import pyvips
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
|
from bs4.formatter import EntitySubstitution, HTMLFormatter
|
||||||
from django.utils.translation import gettext as _
|
from django.utils.translation import gettext as _
|
||||||
from typing_extensions import override
|
from typing_extensions import override
|
||||||
|
|
||||||
|
@ -396,6 +397,17 @@ def get_default_thumbnail_url(image_attachment: ImageAttachment) -> tuple[str, b
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# Like HTMLFormatter.REGISTRY["html5"], this formatter avoids producing
|
||||||
|
# self-closing tags, but it differs by avoiding unnecessary escaping with
|
||||||
|
# HTML5-specific entities that cannot be parsed by lxml and libxml2
|
||||||
|
# (https://bugs.launchpad.net/lxml/+bug/2031045).
|
||||||
|
html_formatter = HTMLFormatter(
|
||||||
|
entity_substitution=EntitySubstitution.substitute_xml, # not substitute_html
|
||||||
|
void_element_close_prefix="",
|
||||||
|
empty_attributes_are_booleans=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def rewrite_thumbnailed_images(
|
def rewrite_thumbnailed_images(
|
||||||
rendered_content: str,
|
rendered_content: str,
|
||||||
images: dict[str, MarkdownImageMetadata | None],
|
images: dict[str, MarkdownImageMetadata | None],
|
||||||
|
@ -453,7 +465,8 @@ def rewrite_thumbnailed_images(
|
||||||
image_tag["data-animated"] = "true"
|
image_tag["data-animated"] = "true"
|
||||||
|
|
||||||
if changed:
|
if changed:
|
||||||
# The formatter="html5" means we do not produce self-closing tags
|
return parsed_message.encode(
|
||||||
return parsed_message.encode(formatter="html5").decode().strip(), remaining_thumbnails
|
formatter=html_formatter
|
||||||
|
).decode().strip(), remaining_thumbnails
|
||||||
else:
|
else:
|
||||||
return None, remaining_thumbnails
|
return None, remaining_thumbnails
|
||||||
|
|
|
@ -150,8 +150,8 @@ class MarkdownThumbnailTest(ZulipTestCase):
|
||||||
self.assertTrue(ImageAttachment.objects.filter(path_id=path_id).exists())
|
self.assertTrue(ImageAttachment.objects.filter(path_id=path_id).exists())
|
||||||
message_id = self.send_message_content(f"[I am 95% ± 5% certain!](/user_uploads/{path_id})")
|
message_id = self.send_message_content(f"[I am 95% ± 5% certain!](/user_uploads/{path_id})")
|
||||||
expected = (
|
expected = (
|
||||||
f'<p><a href="/user_uploads/{path_id}">I am 95% ± 5% certain!</a></p>\n'
|
f'<p><a href="/user_uploads/{path_id}">I am 95% ± 5% certain!</a></p>\n'
|
||||||
f'<div class="message_inline_image"><a href="/user_uploads/{path_id}" title="I am 95% ± 5% certain!">'
|
f'<div class="message_inline_image"><a href="/user_uploads/{path_id}" title="I am 95% ± 5% certain!">'
|
||||||
f'<img data-original-dimensions="128x128" src="/user_uploads/thumbnail/{path_id}/840x560.webp"></a></div>'
|
f'<img data-original-dimensions="128x128" src="/user_uploads/thumbnail/{path_id}/840x560.webp"></a></div>'
|
||||||
)
|
)
|
||||||
self.assert_message_content_is(message_id, expected)
|
self.assert_message_content_is(message_id, expected)
|
||||||
|
|
Loading…
Reference in New Issue