mirror of https://github.com/zulip/zulip.git
url_preview: Only return image URLs that validate as URLs.
This commit is contained in:
parent
6afdf2410d
commit
e53f9fad29
|
@ -1,4 +1,5 @@
|
|||
from typing import Dict, Optional
|
||||
from urllib.parse import urlparse
|
||||
|
||||
from bs4.element import Tag
|
||||
|
||||
|
@ -48,5 +49,11 @@ class GenericParser(BaseParser):
|
|||
first_image = first_h1.find_next_sibling("img", src=True)
|
||||
if isinstance(first_image, Tag) and first_image["src"] != "":
|
||||
assert isinstance(first_image["src"], str)
|
||||
try:
|
||||
# We use urlparse and not URLValidator because we
|
||||
# need to support relative URLs.
|
||||
urlparse(first_image["src"])
|
||||
except ValueError:
|
||||
return None
|
||||
return first_image["src"]
|
||||
return None
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
from typing import Dict
|
||||
from urllib.parse import urlparse
|
||||
|
||||
from .base import BaseParser
|
||||
|
||||
|
@ -23,6 +24,14 @@ class OpenGraphParser(BaseParser):
|
|||
if not tag.has_attr("content"):
|
||||
continue
|
||||
|
||||
if og_property_name == "image":
|
||||
try:
|
||||
# We use urlparse and not URLValidator because we
|
||||
# need to support relative URLs.
|
||||
urlparse(tag["content"])
|
||||
except ValueError:
|
||||
continue
|
||||
|
||||
result[og_property_name] = tag["content"]
|
||||
|
||||
return result
|
||||
|
|
|
@ -290,6 +290,25 @@ class GenericParserTestCase(ZulipTestCase):
|
|||
self.assertEqual(result.get("description"), "Description text")
|
||||
self.assertEqual(result.get("image"), "http://test.com/test.jpg")
|
||||
|
||||
def test_extract_bad_image(self) -> None:
|
||||
html = b"""
|
||||
<html>
|
||||
<body>
|
||||
<h1>Main header</h1>
|
||||
<img data-src="Not an image">
|
||||
<img src="http://[bad url/test.jpg">
|
||||
<div>
|
||||
<p>Description text</p>
|
||||
</div>
|
||||
</body>
|
||||
</html>
|
||||
"""
|
||||
parser = GenericParser(html, "text/html; charset=UTF-8")
|
||||
result = parser.extract_data()
|
||||
self.assertEqual(result.get("title"), "Main header")
|
||||
self.assertEqual(result.get("description"), "Description text")
|
||||
self.assertIsNone(result.get("image"))
|
||||
|
||||
def test_extract_description(self) -> None:
|
||||
html = b"""
|
||||
<html>
|
||||
|
@ -721,6 +740,44 @@ class PreviewTestCase(ZulipTestCase):
|
|||
msg.rendered_content,
|
||||
)
|
||||
|
||||
@responses.activate
|
||||
@override_settings(INLINE_URL_EMBED_PREVIEW=True)
|
||||
def test_link_preview_open_graph_image_bad_url(self) -> None:
|
||||
user = self.example_user("hamlet")
|
||||
self.login_user(user)
|
||||
url = "http://test.org/foo.html"
|
||||
with mock_queue_publish("zerver.lib.actions.queue_json_publish") as patched:
|
||||
msg_id = self.send_stream_message(user, "Denmark", topic_name="foo", content=url)
|
||||
patched.assert_called_once()
|
||||
queue = patched.call_args[0][0]
|
||||
self.assertEqual(queue, "embed_links")
|
||||
event = patched.call_args[0][1]
|
||||
|
||||
# HTML with a bad og:image metadata
|
||||
html = "\n".join(
|
||||
line
|
||||
if "og:image" not in line
|
||||
else '<meta property="og:image" content="http://[bad url/" />'
|
||||
for line in self.open_graph_html.splitlines()
|
||||
)
|
||||
self.create_mock_response(url, body=html)
|
||||
with self.settings(TEST_SUITE=False, CACHES=TEST_CACHES):
|
||||
with self.assertLogs(level="INFO") as info_logs:
|
||||
FetchLinksEmbedData().consume(event)
|
||||
cached_data = link_embed_data_from_cache(url)
|
||||
self.assertTrue(
|
||||
"INFO:root:Time spent on get_link_embed_data for http://test.org/foo.html: "
|
||||
in info_logs.output[0]
|
||||
)
|
||||
|
||||
self.assertIn("title", cached_data)
|
||||
self.assertNotIn("image", cached_data)
|
||||
msg = Message.objects.select_related("sender").get(id=msg_id)
|
||||
self.assertEqual(
|
||||
('<p><a href="http://test.org/foo.html">' "http://test.org/foo.html</a></p>"),
|
||||
msg.rendered_content,
|
||||
)
|
||||
|
||||
@responses.activate
|
||||
@override_settings(INLINE_URL_EMBED_PREVIEW=True)
|
||||
def test_link_preview_open_graph_image_missing_content(self) -> None:
|
||||
|
|
Loading…
Reference in New Issue