mirror of https://github.com/zulip/zulip.git
email_notifications: Handle empty rendered_messages.
The transforms called from `build_message_payload` use `lxml.html.fromstring` to parse (and stringify, and re-parse) the HTML generated by Markdown. However, this function fails if it is passed an empty document. "empty" is broader than just the empty string; it also includes any document made entirely out of control characters, spaces, unpaired surrogates, U+FFFE, or U+FFFF, and so forth. These documents would fail to parse, and raise a ParserError. Using `lxml.html.fragment_fromstring` handles these cases, but does by wrapping the contents in a <div> every time it is called. As such, replacing each `fromstring` with `fragment_fromstring` would nest another layer of `<div>`. Instead of each of the helper functions re-parsing, modifying, and stringifying the HTML, parse it once with `fragment_fromstring` and pass around the parsed document to each helper, which modifies it in-place. This adds one outer `<div>`, requiring minor changes to tests and the prepend-sender functions. The modification to add the sender is left using BeautifulSoup, as that sort of transform is much less readable, and more fiddly, in raw lxml. Partial fix for #19559.
This commit is contained in:
parent
42e3c4e6ec
commit
fe25517295
|
@ -44,10 +44,7 @@ from zerver.models import (
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def relative_to_full_url(base_url: str, content: str) -> str:
|
def relative_to_full_url(fragment: lxml.html.HtmlElement, base_url: str) -> None:
|
||||||
# Convert relative URLs to absolute URLs.
|
|
||||||
fragment = lxml.html.fromstring(content)
|
|
||||||
|
|
||||||
# We handle narrow URLs separately because of two reasons:
|
# We handle narrow URLs separately because of two reasons:
|
||||||
# 1: 'lxml' seems to be having an issue in dealing with URLs that begin
|
# 1: 'lxml' seems to be having an issue in dealing with URLs that begin
|
||||||
# `#` due to which it doesn't add a `/` before joining the base_url to
|
# `#` due to which it doesn't add a `/` before joining the base_url to
|
||||||
|
@ -81,15 +78,14 @@ def relative_to_full_url(base_url: str, content: str) -> str:
|
||||||
image_link = fragment.find("a").get("href")
|
image_link = fragment.find("a").get("href")
|
||||||
image_title = fragment.find("a").get("title")
|
image_title = fragment.find("a").get("title")
|
||||||
title_attr = {} if image_title is None else {"title": image_title}
|
title_attr = {} if image_title is None else {"title": image_title}
|
||||||
fragment = E.P(E.A(image_link, href=image_link, target="_blank", **title_attr))
|
fragment.clear()
|
||||||
|
fragment.tag = "p"
|
||||||
|
fragment.append(E.A(image_link, href=image_link, target="_blank", **title_attr))
|
||||||
|
|
||||||
fragment.make_links_absolute(base_url)
|
fragment.make_links_absolute(base_url)
|
||||||
content = lxml.html.tostring(fragment, encoding="unicode")
|
|
||||||
|
|
||||||
return content
|
|
||||||
|
|
||||||
|
|
||||||
def fix_emojis(content: str, base_url: str, emojiset: str) -> str:
|
def fix_emojis(fragment: lxml.html.HtmlElement, base_url: str, emojiset: str) -> None:
|
||||||
def make_emoji_img_elem(emoji_span_elem: lxml.html.HtmlElement) -> Dict[str, Any]:
|
def make_emoji_img_elem(emoji_span_elem: lxml.html.HtmlElement) -> Dict[str, Any]:
|
||||||
# Convert the emoji spans to img tags.
|
# Convert the emoji spans to img tags.
|
||||||
classes = emoji_span_elem.get("class")
|
classes = emoji_span_elem.get("class")
|
||||||
|
@ -106,7 +102,6 @@ def fix_emojis(content: str, base_url: str, emojiset: str) -> str:
|
||||||
img_elem.tail = emoji_span_elem.tail
|
img_elem.tail = emoji_span_elem.tail
|
||||||
return img_elem
|
return img_elem
|
||||||
|
|
||||||
fragment = lxml.html.fromstring(content)
|
|
||||||
for elem in fragment.cssselect("span.emoji"):
|
for elem in fragment.cssselect("span.emoji"):
|
||||||
parent = elem.getparent()
|
parent = elem.getparent()
|
||||||
img_elem = make_emoji_img_elem(elem)
|
img_elem = make_emoji_img_elem(elem)
|
||||||
|
@ -116,14 +111,10 @@ def fix_emojis(content: str, base_url: str, emojiset: str) -> str:
|
||||||
del realm_emoji.attrib["class"]
|
del realm_emoji.attrib["class"]
|
||||||
realm_emoji.set("style", "height: 20px;")
|
realm_emoji.set("style", "height: 20px;")
|
||||||
|
|
||||||
content = lxml.html.tostring(fragment, encoding="unicode")
|
|
||||||
return content
|
|
||||||
|
|
||||||
|
def fix_spoilers_in_html(fragment: lxml.html.HtmlElement, language: str) -> None:
|
||||||
def fix_spoilers_in_html(content: str, language: str) -> str:
|
|
||||||
with override_language(language):
|
with override_language(language):
|
||||||
spoiler_title: str = _("Open Zulip to see the spoiler content")
|
spoiler_title: str = _("Open Zulip to see the spoiler content")
|
||||||
fragment = lxml.html.fromstring(content)
|
|
||||||
spoilers = fragment.find_class("spoiler-block")
|
spoilers = fragment.find_class("spoiler-block")
|
||||||
for spoiler in spoilers:
|
for spoiler in spoilers:
|
||||||
header = spoiler.find_class("spoiler-header")[0]
|
header = spoiler.find_class("spoiler-header")[0]
|
||||||
|
@ -141,8 +132,6 @@ def fix_spoilers_in_html(content: str, language: str) -> str:
|
||||||
header_content.append(span_elem)
|
header_content.append(span_elem)
|
||||||
header.drop_tag()
|
header.drop_tag()
|
||||||
spoiler_content.drop_tree()
|
spoiler_content.drop_tree()
|
||||||
content = lxml.html.tostring(fragment, encoding="unicode")
|
|
||||||
return content
|
|
||||||
|
|
||||||
|
|
||||||
def fix_spoilers_in_text(content: str, language: str) -> str:
|
def fix_spoilers_in_text(content: str, language: str) -> str:
|
||||||
|
@ -199,7 +188,9 @@ def build_message_list(
|
||||||
message_soup = BeautifulSoup(message_html, "html.parser")
|
message_soup = BeautifulSoup(message_html, "html.parser")
|
||||||
sender_name_soup = BeautifulSoup(f"<b>{sender}</b>: ", "html.parser")
|
sender_name_soup = BeautifulSoup(f"<b>{sender}</b>: ", "html.parser")
|
||||||
first_tag = message_soup.find()
|
first_tag = message_soup.find()
|
||||||
if first_tag.name == "p":
|
if first_tag and first_tag.name == "div":
|
||||||
|
first_tag = first_tag.find()
|
||||||
|
if first_tag and first_tag.name == "p":
|
||||||
first_tag.insert(0, sender_name_soup)
|
first_tag.insert(0, sender_name_soup)
|
||||||
else:
|
else:
|
||||||
message_soup.insert(0, sender_name_soup)
|
message_soup.insert(0, sender_name_soup)
|
||||||
|
@ -217,10 +208,11 @@ def build_message_list(
|
||||||
plain = fix_spoilers_in_text(plain, user.default_language)
|
plain = fix_spoilers_in_text(plain, user.default_language)
|
||||||
|
|
||||||
assert message.rendered_content is not None
|
assert message.rendered_content is not None
|
||||||
html = message.rendered_content
|
fragment = lxml.html.fragment_fromstring(message.rendered_content, create_parent=True)
|
||||||
html = relative_to_full_url(user.realm.uri, html)
|
relative_to_full_url(fragment, user.realm.uri)
|
||||||
html = fix_emojis(html, user.realm.uri, user.emojiset)
|
fix_emojis(fragment, user.realm.uri, user.emojiset)
|
||||||
html = fix_spoilers_in_html(html, user.default_language)
|
fix_spoilers_in_html(fragment, user.default_language)
|
||||||
|
html = lxml.html.tostring(fragment, encoding="unicode")
|
||||||
if sender:
|
if sender:
|
||||||
plain, html = prepend_sender_to_message(plain, html, sender)
|
plain, html = prepend_sender_to_message(plain, html, sender)
|
||||||
return {"plain": plain, "html": html}
|
return {"plain": plain, "html": html}
|
||||||
|
|
|
@ -6,6 +6,7 @@ from unittest import mock
|
||||||
from unittest.mock import patch
|
from unittest.mock import patch
|
||||||
|
|
||||||
import ldap
|
import ldap
|
||||||
|
import lxml.html
|
||||||
import orjson
|
import orjson
|
||||||
from django.conf import settings
|
from django.conf import settings
|
||||||
from django.core import mail
|
from django.core import mail
|
||||||
|
@ -1098,14 +1099,14 @@ class TestMissedMessages(ZulipTestCase):
|
||||||
self.assertIn("Iago: * 1\n *2\n\n--\nYou are receiving", mail.outbox[1].body)
|
self.assertIn("Iago: * 1\n *2\n\n--\nYou are receiving", mail.outbox[1].body)
|
||||||
# If message content does not starts with <p> tag sender name is appended before the <p> tag
|
# If message content does not starts with <p> tag sender name is appended before the <p> tag
|
||||||
self.assertIn(
|
self.assertIn(
|
||||||
" <b>Iago</b>: <ul>\n<li>1<br/>\n *2</li>\n</ul>\n",
|
" <b>Iago</b>: <div><ul>\n<li>1<br/>\n *2</li>\n</ul></div>\n",
|
||||||
mail.outbox[1].alternatives[0][0],
|
mail.outbox[1].alternatives[0][0],
|
||||||
)
|
)
|
||||||
|
|
||||||
self.assertEqual("Hello\n\n--\n\nReply", mail.outbox[2].body[:16])
|
self.assertEqual("Hello\n\n--\n\nReply", mail.outbox[2].body[:16])
|
||||||
# Sender name is not appended to message for PM missed messages
|
# Sender name is not appended to message for PM missed messages
|
||||||
self.assertIn(
|
self.assertIn(
|
||||||
">\n \n <p>Hello</p>\n",
|
">\n \n <div><p>Hello</p></div>\n",
|
||||||
mail.outbox[2].alternatives[0][0],
|
mail.outbox[2].alternatives[0][0],
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -1243,6 +1244,11 @@ class TestMissedMessages(ZulipTestCase):
|
||||||
self.assertEqual(email_subjects, valid_email_subjects)
|
self.assertEqual(email_subjects, valid_email_subjects)
|
||||||
|
|
||||||
def test_relative_to_full_url(self) -> None:
|
def test_relative_to_full_url(self) -> None:
|
||||||
|
def convert(test_data: str) -> str:
|
||||||
|
fragment = lxml.html.fromstring(test_data)
|
||||||
|
relative_to_full_url(fragment, "http://example.com")
|
||||||
|
return lxml.html.tostring(fragment, encoding="unicode")
|
||||||
|
|
||||||
zulip_realm = get_realm("zulip")
|
zulip_realm = get_realm("zulip")
|
||||||
zephyr_realm = get_realm("zephyr")
|
zephyr_realm = get_realm("zephyr")
|
||||||
# Run `relative_to_full_url()` function over test fixtures present in
|
# Run `relative_to_full_url()` function over test fixtures present in
|
||||||
|
@ -1254,7 +1260,7 @@ class TestMissedMessages(ZulipTestCase):
|
||||||
test_fixtures[test["name"]] = test
|
test_fixtures[test["name"]] = test
|
||||||
for test_name in test_fixtures:
|
for test_name in test_fixtures:
|
||||||
test_data = test_fixtures[test_name]["expected_output"]
|
test_data = test_fixtures[test_name]["expected_output"]
|
||||||
output_data = relative_to_full_url("http://example.com", test_data)
|
output_data = convert(test_data)
|
||||||
if re.search(r"""(?<=\=['"])/(?=[^<]+>)""", output_data) is not None:
|
if re.search(r"""(?<=\=['"])/(?=[^<]+>)""", output_data) is not None:
|
||||||
raise AssertionError(
|
raise AssertionError(
|
||||||
"Relative URL present in email: "
|
"Relative URL present in email: "
|
||||||
|
@ -1268,14 +1274,14 @@ class TestMissedMessages(ZulipTestCase):
|
||||||
|
|
||||||
# A path similar to our emoji path, but not in a link:
|
# A path similar to our emoji path, but not in a link:
|
||||||
test_data = "<p>Check out the file at: '/static/generated/emoji/images/emoji/'</p>"
|
test_data = "<p>Check out the file at: '/static/generated/emoji/images/emoji/'</p>"
|
||||||
actual_output = relative_to_full_url("http://example.com", test_data)
|
actual_output = convert(test_data)
|
||||||
expected_output = "<p>Check out the file at: '/static/generated/emoji/images/emoji/'</p>"
|
expected_output = "<p>Check out the file at: '/static/generated/emoji/images/emoji/'</p>"
|
||||||
self.assertEqual(actual_output, expected_output)
|
self.assertEqual(actual_output, expected_output)
|
||||||
|
|
||||||
# An uploaded file
|
# An uploaded file
|
||||||
test_data = '<a href="/user_uploads/{realm_id}/1f/some_random_value">/user_uploads/{realm_id}/1f/some_random_value</a>'
|
test_data = '<a href="/user_uploads/{realm_id}/1f/some_random_value">/user_uploads/{realm_id}/1f/some_random_value</a>'
|
||||||
test_data = test_data.format(realm_id=zephyr_realm.id)
|
test_data = test_data.format(realm_id=zephyr_realm.id)
|
||||||
actual_output = relative_to_full_url("http://example.com", test_data)
|
actual_output = convert(test_data)
|
||||||
expected_output = (
|
expected_output = (
|
||||||
'<a href="http://example.com/user_uploads/{realm_id}/1f/some_random_value">'
|
'<a href="http://example.com/user_uploads/{realm_id}/1f/some_random_value">'
|
||||||
+ "/user_uploads/{realm_id}/1f/some_random_value</a>"
|
+ "/user_uploads/{realm_id}/1f/some_random_value</a>"
|
||||||
|
@ -1285,7 +1291,7 @@ class TestMissedMessages(ZulipTestCase):
|
||||||
|
|
||||||
# A profile picture like syntax, but not actually in an HTML tag
|
# A profile picture like syntax, but not actually in an HTML tag
|
||||||
test_data = '<p>Set src="/avatar/username@example.com?s=30"</p>'
|
test_data = '<p>Set src="/avatar/username@example.com?s=30"</p>'
|
||||||
actual_output = relative_to_full_url("http://example.com", test_data)
|
actual_output = convert(test_data)
|
||||||
expected_output = '<p>Set src="/avatar/username@example.com?s=30"</p>'
|
expected_output = '<p>Set src="/avatar/username@example.com?s=30"</p>'
|
||||||
self.assertEqual(actual_output, expected_output)
|
self.assertEqual(actual_output, expected_output)
|
||||||
|
|
||||||
|
@ -1294,7 +1300,7 @@ class TestMissedMessages(ZulipTestCase):
|
||||||
'<p><a href="#narrow/stream/test/topic/test.20topic/near/142"'
|
'<p><a href="#narrow/stream/test/topic/test.20topic/near/142"'
|
||||||
+ 'title="#narrow/stream/test/topic/test.20topic/near/142">Conversation</a></p>'
|
+ 'title="#narrow/stream/test/topic/test.20topic/near/142">Conversation</a></p>'
|
||||||
)
|
)
|
||||||
actual_output = relative_to_full_url("http://example.com", test_data)
|
actual_output = convert(test_data)
|
||||||
expected_output = (
|
expected_output = (
|
||||||
'<p><a href="http://example.com/#narrow/stream/test/topic/test.20topic/near/142" '
|
'<p><a href="http://example.com/#narrow/stream/test/topic/test.20topic/near/142" '
|
||||||
+ 'title="http://example.com/#narrow/stream/test/topic/test.20topic/near/142">Conversation</a></p>'
|
+ 'title="http://example.com/#narrow/stream/test/topic/test.20topic/near/142">Conversation</a></p>'
|
||||||
|
@ -1309,7 +1315,7 @@ class TestMissedMessages(ZulipTestCase):
|
||||||
+ 'target="_blank" title="avatar_103.jpeg"><img src="/user_uploads/{realm_id}/52/fG7GM9e3afz_qsiUcSce2tl_/avatar_103.jpeg"></a></div>'
|
+ 'target="_blank" title="avatar_103.jpeg"><img src="/user_uploads/{realm_id}/52/fG7GM9e3afz_qsiUcSce2tl_/avatar_103.jpeg"></a></div>'
|
||||||
)
|
)
|
||||||
test_data = test_data.format(realm_id=zulip_realm.id)
|
test_data = test_data.format(realm_id=zulip_realm.id)
|
||||||
actual_output = relative_to_full_url("http://example.com", test_data)
|
actual_output = convert(test_data)
|
||||||
expected_output = (
|
expected_output = (
|
||||||
'<div><p>See this <a href="http://example.com/user_uploads/{realm_id}/52/fG7GM9e3afz_qsiUcSce2tl_/avatar_103.jpeg" target="_blank" '
|
'<div><p>See this <a href="http://example.com/user_uploads/{realm_id}/52/fG7GM9e3afz_qsiUcSce2tl_/avatar_103.jpeg" target="_blank" '
|
||||||
+ 'title="avatar_103.jpeg">avatar_103.jpeg</a>.</p></div>'
|
+ 'title="avatar_103.jpeg">avatar_103.jpeg</a>.</p></div>'
|
||||||
|
@ -1325,7 +1331,7 @@ class TestMissedMessages(ZulipTestCase):
|
||||||
+ '<img data-src-fullsize="/thumbnail/https%3A//www.google.com/images/srpr/logo4w.png?size=0x0" '
|
+ '<img data-src-fullsize="/thumbnail/https%3A//www.google.com/images/srpr/logo4w.png?size=0x0" '
|
||||||
+ 'src="/thumbnail/https%3A//www.google.com/images/srpr/logo4w.png?size=0x100"></a></div>'
|
+ 'src="/thumbnail/https%3A//www.google.com/images/srpr/logo4w.png?size=0x100"></a></div>'
|
||||||
)
|
)
|
||||||
actual_output = relative_to_full_url("http://example.com", test_data)
|
actual_output = convert(test_data)
|
||||||
expected_output = (
|
expected_output = (
|
||||||
'<p><a href="https://www.google.com/images/srpr/logo4w.png" '
|
'<p><a href="https://www.google.com/images/srpr/logo4w.png" '
|
||||||
+ 'target="_blank" title="https://www.google.com/images/srpr/logo4w.png">'
|
+ 'target="_blank" title="https://www.google.com/images/srpr/logo4w.png">'
|
||||||
|
@ -1335,7 +1341,9 @@ class TestMissedMessages(ZulipTestCase):
|
||||||
|
|
||||||
def test_spoilers_in_html_emails(self) -> None:
|
def test_spoilers_in_html_emails(self) -> None:
|
||||||
test_data = '<div class="spoiler-block"><div class="spoiler-header">\n\n<p><a>header</a> text</p>\n</div><div class="spoiler-content" aria-hidden="true">\n\n<p>content</p>\n</div></div>\n\n<p>outside spoiler</p>'
|
test_data = '<div class="spoiler-block"><div class="spoiler-header">\n\n<p><a>header</a> text</p>\n</div><div class="spoiler-content" aria-hidden="true">\n\n<p>content</p>\n</div></div>\n\n<p>outside spoiler</p>'
|
||||||
actual_output = fix_spoilers_in_html(test_data, "en")
|
fragment = lxml.html.fromstring(test_data)
|
||||||
|
fix_spoilers_in_html(fragment, "en")
|
||||||
|
actual_output = lxml.html.tostring(fragment, encoding="unicode")
|
||||||
expected_output = '<div><div class="spoiler-block">\n\n<p><a>header</a> text <span class="spoiler-title" title="Open Zulip to see the spoiler content">(Open Zulip to see the spoiler content)</span></p>\n</div>\n\n<p>outside spoiler</p></div>'
|
expected_output = '<div><div class="spoiler-block">\n\n<p><a>header</a> text <span class="spoiler-title" title="Open Zulip to see the spoiler content">(Open Zulip to see the spoiler content)</span></p>\n</div>\n\n<p>outside spoiler</p></div>'
|
||||||
self.assertEqual(actual_output, expected_output)
|
self.assertEqual(actual_output, expected_output)
|
||||||
|
|
||||||
|
@ -1346,8 +1354,9 @@ class TestMissedMessages(ZulipTestCase):
|
||||||
if "spoiler" in test["name"]:
|
if "spoiler" in test["name"]:
|
||||||
test_fixtures[test["name"]] = test
|
test_fixtures[test["name"]] = test
|
||||||
for test_name in test_fixtures:
|
for test_name in test_fixtures:
|
||||||
test_data = test_fixtures[test_name]["expected_output"]
|
fragment = lxml.html.fromstring(test_fixtures[test_name]["expected_output"])
|
||||||
output_data = fix_spoilers_in_html(test_data, "en")
|
fix_spoilers_in_html(fragment, "en")
|
||||||
|
output_data = lxml.html.tostring(fragment, encoding="unicode")
|
||||||
assert "spoiler-header" not in output_data
|
assert "spoiler-header" not in output_data
|
||||||
assert "spoiler-content" not in output_data
|
assert "spoiler-content" not in output_data
|
||||||
assert "spoiler-block" in output_data
|
assert "spoiler-block" in output_data
|
||||||
|
@ -1375,9 +1384,23 @@ class TestMissedMessages(ZulipTestCase):
|
||||||
'<p>See <span aria-label="cloud with lightning and rain" class="emoji emoji-26c8" role="img" title="cloud with lightning and rain">'
|
'<p>See <span aria-label="cloud with lightning and rain" class="emoji emoji-26c8" role="img" title="cloud with lightning and rain">'
|
||||||
+ ":cloud_with_lightning_and_rain:</span>.</p>"
|
+ ":cloud_with_lightning_and_rain:</span>.</p>"
|
||||||
)
|
)
|
||||||
actual_output = fix_emojis(test_data, "http://example.com", "google")
|
fragment = lxml.html.fromstring(test_data)
|
||||||
|
fix_emojis(fragment, "http://example.com", "google")
|
||||||
|
actual_output = lxml.html.tostring(fragment, encoding="unicode")
|
||||||
expected_output = (
|
expected_output = (
|
||||||
'<p>See <img alt=":cloud_with_lightning_and_rain:" src="http://example.com/static/generated/emoji/images-google-64/26c8.png" '
|
'<p>See <img alt=":cloud_with_lightning_and_rain:" src="http://example.com/static/generated/emoji/images-google-64/26c8.png" '
|
||||||
+ 'title="cloud with lightning and rain" style="height: 20px;">.</p>'
|
+ 'title="cloud with lightning and rain" style="height: 20px;">.</p>'
|
||||||
)
|
)
|
||||||
self.assertEqual(actual_output, expected_output)
|
self.assertEqual(actual_output, expected_output)
|
||||||
|
|
||||||
|
def test_empty_backticks_in_missed_message(self) -> None:
|
||||||
|
msg_id = self.send_personal_message(
|
||||||
|
self.example_user("othello"),
|
||||||
|
self.example_user("hamlet"),
|
||||||
|
"```\n```",
|
||||||
|
)
|
||||||
|
verify_body_include = ["view it in Zulip"]
|
||||||
|
email_subject = "PMs with Othello, the Moor of Venice"
|
||||||
|
self._test_cases(
|
||||||
|
msg_id, verify_body_include, email_subject, send_as_user=False, verify_html_body=True
|
||||||
|
)
|
||||||
|
|
Loading…
Reference in New Issue