From 693b9596567da82c785cceb38948ff1e7d211bac Mon Sep 17 00:00:00 2001 From: Alex Vandiver Date: Mon, 15 Apr 2024 21:37:58 +0000 Subject: [PATCH] markdown: Switch to directly URL-escaping CSS URLs. soupsieve is a heavy-weight dependency, and Tornado pulls it in by way of markdown rendering; since we are only using it for a very simple process, perform that manually. Per CSS spec[^1]: > In quoted url()s, only newlines and the character used to > quote the string need to be escaped. [^1]: https://drafts.csswg.org/css-values/#urls --- requirements/common.in | 3 --- requirements/dev.txt | 4 +--- requirements/prod.txt | 4 +--- zerver/lib/markdown/__init__.py | 8 ++++++-- zerver/tests/test_link_embed.py | 18 ++++++++---------- 5 files changed, 16 insertions(+), 21 deletions(-) diff --git a/requirements/common.in b/requirements/common.in index f8adef3ae4..da0c20c711 100644 --- a/requirements/common.in +++ b/requirements/common.in @@ -186,9 +186,6 @@ django-cte # SCIM integration django-scim2 -# CSS manipulation -soupsieve - # Circuit-breaking for outgoing services circuitbreaker diff --git a/requirements/dev.txt b/requirements/dev.txt index 73c291b035..4b54a1985e 100644 --- a/requirements/dev.txt +++ b/requirements/dev.txt @@ -2894,9 +2894,7 @@ sortedcontainers==2.4.0 \ soupsieve==2.5 \ --hash=sha256:5663d5a7b3bfaeee0bc4372e7fc48f9cff4940b3eec54a6451cc5299f1097690 \ --hash=sha256:eaa337ff55a1579b6549dc679565eac1e3d000563bcb1c8ab0d0fefbc0c2cdc7 - # via - # -r requirements/common.in - # beautifulsoup4 + # via beautifulsoup4 sphinx==7.2.6 \ --hash=sha256:1e09160a40b956dc623c910118fa636da93bd3ca0b9876a7b3df90f07d691560 \ --hash=sha256:9a5160e1ea90688d5963ba09a2dcd8bdd526620edbb65c328728f1b2228d5ab5 diff --git a/requirements/prod.txt b/requirements/prod.txt index 6411f39995..1382feaa42 100644 --- a/requirements/prod.txt +++ b/requirements/prod.txt @@ -2240,9 +2240,7 @@ social-auth-core[azuread,openidconnect,saml]==4.5.3 \ soupsieve==2.5 \ --hash=sha256:5663d5a7b3bfaeee0bc4372e7fc48f9cff4940b3eec54a6451cc5299f1097690 \ --hash=sha256:eaa337ff55a1579b6549dc679565eac1e3d000563bcb1c8ab0d0fefbc0c2cdc7 - # via - # -r requirements/common.in - # beautifulsoup4 + # via beautifulsoup4 sqlalchemy==1.4.52 \ --hash=sha256:1296f2cdd6db09b98ceb3c93025f0da4835303b8ac46c15c2136e27ee4d18d94 \ --hash=sha256:1e135fff2e84103bc15c07edd8569612ce317d64bdb391f49ce57124a73f45c5 \ diff --git a/zerver/lib/markdown/__init__.py b/zerver/lib/markdown/__init__.py index 67e89c1935..fb23f19e6b 100644 --- a/zerver/lib/markdown/__init__.py +++ b/zerver/lib/markdown/__init__.py @@ -47,7 +47,6 @@ import uri_template from django.conf import settings from markdown.blockparser import BlockParser from markdown.extensions import codehilite, nl2br, sane_lists, tables -from soupsieve import escape as css_escape from tlds import tld_set from typing_extensions import Self, TypeAlias, override @@ -690,7 +689,12 @@ class InlineInterestingLinkProcessor(markdown.treeprocessors.Treeprocessor): img_link = get_camo_url(extracted_data.image) img = SubElement(container, "a") - img.set("style", "background-image: url(" + css_escape(img_link) + ")") + img.set( + "style", + 'background-image: url("' + + img_link.replace("\\", "\\\\").replace('"', '\\"').replace("\n", "\\a ") + + '")', + ) img.set("href", link) img.set("class", "message_embed_image") diff --git a/zerver/tests/test_link_embed.py b/zerver/tests/test_link_embed.py index dac6d89484..a69de2ce49 100644 --- a/zerver/tests/test_link_embed.py +++ b/zerver/tests/test_link_embed.py @@ -534,7 +534,7 @@ class PreviewTestCase(ZulipTestCase): @override_settings(CAMO_URI="") def test_inline_url_embed_preview(self) -> None: - with_preview = '

http://test.org/

\n
Description text
' + with_preview = '

http://test.org/

\n
Description text
' without_preview = '

http://test.org/

' msg = self._send_message_with_test_org_url(sender=self.example_user("hamlet")) self.assertEqual(msg.rendered_content, with_preview) @@ -549,13 +549,11 @@ class PreviewTestCase(ZulipTestCase): self.assertEqual(msg.rendered_content, without_preview) def test_inline_url_embed_preview_with_camo(self) -> None: - camo_url = re.sub( - r"([^\w-])", r"\\\1", get_camo_url("http://ia.media-imdb.com/images/rock.jpg") - ) + camo_url = get_camo_url("http://ia.media-imdb.com/images/rock.jpg") with_preview = ( - '

http://test.org/

\n
http://test.org/

\n
Description text
' + + '")">
Description text
' ) msg = self._send_message_with_test_org_url(sender=self.example_user("hamlet")) self.assertEqual(msg.rendered_content, with_preview) @@ -575,7 +573,7 @@ class PreviewTestCase(ZulipTestCase): event = patched.call_args[0][1] # Swap the URL out for one with characters that need CSS escaping - html = re.sub(r"rock\.jpg", "rock).jpg", self.open_graph_html) + html = re.sub(r"rock\.jpg", r"rock.jpg\\", self.open_graph_html) self.create_mock_response(url, body=html) with self.settings(TEST_SUITE=False): with self.assertLogs(level="INFO") as info_logs: @@ -590,7 +588,7 @@ class PreviewTestCase(ZulipTestCase): '

http://test.org/

\n' '
Description' " text
" @@ -614,7 +612,7 @@ class PreviewTestCase(ZulipTestCase): @override_settings(CAMO_URI="") def test_inline_url_embed_preview_with_relative_image_url(self) -> None: - with_preview_relative = '

http://test.org/

\n
Description text
' + with_preview_relative = '

http://test.org/

\n
Description text
' # Try case where the Open Graph image is a relative URL. msg = self._send_message_with_test_org_url( sender=self.example_user("prospero"), relative_url=True @@ -832,7 +830,7 @@ class PreviewTestCase(ZulipTestCase): assert msg.rendered_content is not None self.assertIn(cached_data.title, msg.rendered_content) assert cached_data.image is not None - self.assertIn(re.sub(r"([^\w-])", r"\\\1", cached_data.image), msg.rendered_content) + self.assertIn(cached_data.image, msg.rendered_content) @responses.activate @override_settings(INLINE_URL_EMBED_PREVIEW=True)