mirror of https://github.com/zulip/zulip.git
email_notifications: Prevent html2text from mangling Unicode.
html2text mangles Unicode by default, with a --unicode-snob option to disable it. If I have to get called a “snob” for wanting to correctly support non-English languages, then uh, I’ll take one for the team. https://github.com/Alir3z4/html2text/blob/2024.2.26/html2text/config.py#L111-L150 Signed-off-by: Anders Kaseorg <anders@zulip.com>
This commit is contained in:
parent
fc50736f4e
commit
42e1517255
|
@ -440,7 +440,7 @@ def process_raw_message_batch(
|
||||||
)
|
)
|
||||||
|
|
||||||
# html2text is GPL licensed, so run it as a subprocess.
|
# html2text is GPL licensed, so run it as a subprocess.
|
||||||
content = subprocess.check_output(["html2text"], input=content, text=True)
|
content = subprocess.check_output(["html2text", "--unicode-snob"], input=content, text=True)
|
||||||
|
|
||||||
if len(content) > 10000: # nocoverage
|
if len(content) > 10000: # nocoverage
|
||||||
logging.info("skipping too-long message of length %s", len(content))
|
logging.info("skipping too-long message of length %s", len(content))
|
||||||
|
|
|
@ -938,7 +938,7 @@ def enqueue_welcome_emails(user: UserProfile, realm_creation: bool = False) -> N
|
||||||
def convert_html_to_markdown(html: str) -> str:
|
def convert_html_to_markdown(html: str) -> str:
|
||||||
# html2text is GPL licensed, so run it as a subprocess.
|
# html2text is GPL licensed, so run it as a subprocess.
|
||||||
markdown = subprocess.check_output(
|
markdown = subprocess.check_output(
|
||||||
[os.path.join(sys.prefix, "bin", "html2text")], input=html, text=True
|
[os.path.join(sys.prefix, "bin", "html2text"), "--unicode-snob"], input=html, text=True
|
||||||
).strip()
|
).strip()
|
||||||
|
|
||||||
# We want images to get linked and inline previewed, but html2text will turn
|
# We want images to get linked and inline previewed, but html2text will turn
|
||||||
|
|
|
@ -11,6 +11,7 @@ from django.utils.timezone import now as timezone_now
|
||||||
from django_auth_ldap.config import LDAPSearch
|
from django_auth_ldap.config import LDAPSearch
|
||||||
|
|
||||||
from zerver.lib.email_notifications import (
|
from zerver.lib.email_notifications import (
|
||||||
|
convert_html_to_markdown,
|
||||||
enqueue_welcome_emails,
|
enqueue_welcome_emails,
|
||||||
get_onboarding_email_schedule,
|
get_onboarding_email_schedule,
|
||||||
send_account_registered_email,
|
send_account_registered_email,
|
||||||
|
@ -671,3 +672,10 @@ class TestCustomWelcomeEmailSender(ZulipTestCase):
|
||||||
email_data = orjson.loads(scheduled_emails[0].data)
|
email_data = orjson.loads(scheduled_emails[0].data)
|
||||||
self.assertEqual(email_data["from_name"], name)
|
self.assertEqual(email_data["from_name"], name)
|
||||||
self.assertEqual(email_data["from_address"], email)
|
self.assertEqual(email_data["from_address"], email)
|
||||||
|
|
||||||
|
|
||||||
|
class TestHtmlToMarkdown(ZulipTestCase):
|
||||||
|
def test_html_to_markdown_unicode(self) -> None:
|
||||||
|
self.assertEqual(
|
||||||
|
convert_html_to_markdown("a rose is not a rosé"), "a rose is not a rosé"
|
||||||
|
)
|
||||||
|
|
Loading…
Reference in New Issue