2021-10-21 23:20:56 +02:00
|
|
|
import re
|
2021-06-09 00:18:46 +02:00
|
|
|
from collections import OrderedDict
|
|
|
|
from typing import Any, Optional, Union
|
2020-05-26 07:16:25 +02:00
|
|
|
from unittest import mock
|
2021-06-09 00:18:46 +02:00
|
|
|
from urllib.parse import parse_qsl, urlencode, urlsplit, urlunsplit
|
2020-06-11 00:54:34 +02:00
|
|
|
|
2021-06-09 00:18:46 +02:00
|
|
|
import responses
|
2016-10-27 12:06:44 +02:00
|
|
|
from django.test import override_settings
|
2019-05-02 18:58:39 +02:00
|
|
|
from django.utils.html import escape
|
2021-06-09 00:18:46 +02:00
|
|
|
from pyoembed.providers import get_provider
|
2020-06-11 00:54:34 +02:00
|
|
|
from requests.exceptions import ConnectionError
|
2023-10-12 19:43:45 +02:00
|
|
|
from typing_extensions import override
|
2016-10-27 12:06:44 +02:00
|
|
|
|
2022-07-17 13:00:21 +02:00
|
|
|
from zerver.actions.message_delete import do_delete_messages
|
2022-04-14 07:18:18 +02:00
|
|
|
from zerver.lib.cache import cache_delete, cache_get, preview_url_cache_key
|
2021-03-23 10:34:55 +01:00
|
|
|
from zerver.lib.camo import get_camo_url
|
2021-07-16 22:11:10 +02:00
|
|
|
from zerver.lib.queue import queue_json_publish
|
2016-10-27 12:06:44 +02:00
|
|
|
from zerver.lib.test_classes import ZulipTestCase
|
2021-06-09 00:18:46 +02:00
|
|
|
from zerver.lib.test_helpers import mock_queue_publish
|
2019-12-12 09:39:41 +01:00
|
|
|
from zerver.lib.url_preview.oembed import get_oembed_data, strip_cdata
|
2020-06-11 00:54:34 +02:00
|
|
|
from zerver.lib.url_preview.parsers import GenericParser, OpenGraphParser
|
2022-04-14 21:57:20 +02:00
|
|
|
from zerver.lib.url_preview.preview import get_link_embed_data
|
2022-04-14 21:52:41 +02:00
|
|
|
from zerver.lib.url_preview.types import UrlEmbedData, UrlOEmbedData
|
2020-06-11 00:54:34 +02:00
|
|
|
from zerver.models import Message, Realm, UserProfile
|
|
|
|
from zerver.worker.queue_processors import FetchLinksEmbedData
|
2016-10-27 12:06:44 +02:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2021-06-09 00:18:46 +02:00
|
|
|
def reconstruct_url(url: str, maxwidth: int = 640, maxheight: int = 480) -> str:
|
|
|
|
# The following code is taken from
|
|
|
|
# https://github.com/rafaelmartins/pyoembed/blob/master/pyoembed/__init__.py.
|
|
|
|
# This is a helper function which will be indirectly use to mock the HTTP responses.
|
|
|
|
provider = get_provider(str(url))
|
|
|
|
oembed_url = provider.oembed_url(url)
|
|
|
|
scheme, netloc, path, query_string, fragment = urlsplit(oembed_url)
|
|
|
|
|
|
|
|
query_params = OrderedDict(parse_qsl(query_string))
|
|
|
|
query_params["maxwidth"] = str(maxwidth)
|
|
|
|
query_params["maxheight"] = str(maxheight)
|
|
|
|
final_url = urlunsplit((scheme, netloc, path, urlencode(query_params, True), fragment))
|
|
|
|
return final_url
|
|
|
|
|
|
|
|
|
2016-10-27 12:06:44 +02:00
|
|
|
@override_settings(INLINE_URL_EMBED_PREVIEW=True)
|
|
|
|
class OembedTestCase(ZulipTestCase):
|
2021-06-09 00:18:46 +02:00
|
|
|
@responses.activate
|
|
|
|
def test_present_provider(self) -> None:
|
2016-10-27 12:06:44 +02:00
|
|
|
response_data = {
|
2021-02-12 08:20:45 +01:00
|
|
|
"type": "rich",
|
|
|
|
"thumbnail_url": "https://scontent.cdninstagram.com/t51.2885-15/n.jpg",
|
|
|
|
"thumbnail_width": 640,
|
|
|
|
"thumbnail_height": 426,
|
|
|
|
"title": "NASA",
|
|
|
|
"html": "<p>test</p>",
|
|
|
|
"version": "1.0",
|
|
|
|
"width": 658,
|
|
|
|
"height": 400,
|
2021-02-12 08:19:30 +01:00
|
|
|
}
|
2021-02-12 08:20:45 +01:00
|
|
|
url = "http://instagram.com/p/BLtI2WdAymy"
|
2021-06-09 00:18:46 +02:00
|
|
|
reconstructed_url = reconstruct_url(url)
|
|
|
|
responses.add(
|
|
|
|
responses.GET,
|
|
|
|
reconstructed_url,
|
|
|
|
json=response_data,
|
|
|
|
status=200,
|
|
|
|
)
|
|
|
|
|
2016-10-27 12:06:44 +02:00
|
|
|
data = get_oembed_data(url)
|
2022-04-14 21:52:41 +02:00
|
|
|
assert data is not None
|
|
|
|
self.assertIsInstance(data, UrlEmbedData)
|
|
|
|
self.assertEqual(data.title, response_data["title"])
|
2016-10-27 12:06:44 +02:00
|
|
|
|
2021-06-09 00:18:46 +02:00
|
|
|
@responses.activate
|
|
|
|
def test_photo_provider(self) -> None:
|
2019-05-26 06:27:01 +02:00
|
|
|
response_data = {
|
2021-02-12 08:20:45 +01:00
|
|
|
"type": "photo",
|
|
|
|
"thumbnail_url": "https://scontent.cdninstagram.com/t51.2885-15/n.jpg",
|
|
|
|
"url": "https://scontent.cdninstagram.com/t51.2885-15/n.jpg",
|
|
|
|
"thumbnail_width": 640,
|
|
|
|
"thumbnail_height": 426,
|
|
|
|
"title": "NASA",
|
|
|
|
"html": "<p>test</p>",
|
|
|
|
"version": "1.0",
|
|
|
|
"width": 658,
|
|
|
|
"height": 400,
|
2021-02-12 08:19:30 +01:00
|
|
|
}
|
2021-05-07 03:54:25 +02:00
|
|
|
# pyoembed.providers.imgur only works with http:// URLs, not https:// (!)
|
2021-02-12 08:20:45 +01:00
|
|
|
url = "http://imgur.com/photo/158727223"
|
2021-06-09 00:18:46 +02:00
|
|
|
reconstructed_url = reconstruct_url(url)
|
|
|
|
responses.add(
|
|
|
|
responses.GET,
|
|
|
|
reconstructed_url,
|
|
|
|
json=response_data,
|
|
|
|
status=200,
|
|
|
|
)
|
|
|
|
|
2019-05-26 06:27:01 +02:00
|
|
|
data = get_oembed_data(url)
|
2022-04-14 21:52:41 +02:00
|
|
|
assert data is not None
|
|
|
|
self.assertIsInstance(data, UrlOEmbedData)
|
|
|
|
self.assertEqual(data.title, response_data["title"])
|
2019-05-26 06:27:01 +02:00
|
|
|
|
2021-06-09 00:18:46 +02:00
|
|
|
@responses.activate
|
|
|
|
def test_video_provider(self) -> None:
|
2019-05-02 18:58:39 +02:00
|
|
|
response_data = {
|
2021-02-12 08:20:45 +01:00
|
|
|
"type": "video",
|
|
|
|
"thumbnail_url": "https://scontent.cdninstagram.com/t51.2885-15/n.jpg",
|
|
|
|
"thumbnail_width": 640,
|
|
|
|
"thumbnail_height": 426,
|
|
|
|
"title": "NASA",
|
|
|
|
"html": "<p>test</p>",
|
|
|
|
"version": "1.0",
|
|
|
|
"width": 658,
|
|
|
|
"height": 400,
|
2021-02-12 08:19:30 +01:00
|
|
|
}
|
2021-02-12 08:20:45 +01:00
|
|
|
url = "http://blip.tv/video/158727223"
|
2021-06-09 00:18:46 +02:00
|
|
|
reconstructed_url = reconstruct_url(url)
|
|
|
|
responses.add(
|
|
|
|
responses.GET,
|
|
|
|
reconstructed_url,
|
|
|
|
json=response_data,
|
|
|
|
status=200,
|
|
|
|
)
|
|
|
|
|
2019-05-02 18:58:39 +02:00
|
|
|
data = get_oembed_data(url)
|
2022-04-14 21:52:41 +02:00
|
|
|
assert data is not None
|
|
|
|
self.assertIsInstance(data, UrlOEmbedData)
|
|
|
|
self.assertEqual(data.title, response_data["title"])
|
2019-05-02 18:58:39 +02:00
|
|
|
|
2021-06-09 00:18:46 +02:00
|
|
|
@responses.activate
|
2021-05-07 03:54:25 +02:00
|
|
|
def test_connect_error_request(self) -> None:
|
|
|
|
url = "http://instagram.com/p/BLtI2WdAymy"
|
|
|
|
reconstructed_url = reconstruct_url(url)
|
|
|
|
responses.add(responses.GET, reconstructed_url, body=ConnectionError())
|
|
|
|
data = get_oembed_data(url)
|
|
|
|
self.assertIsNone(data)
|
|
|
|
|
|
|
|
@responses.activate
|
|
|
|
def test_400_error_request(self) -> None:
|
2021-02-12 08:20:45 +01:00
|
|
|
url = "http://instagram.com/p/BLtI2WdAymy"
|
2021-06-09 00:18:46 +02:00
|
|
|
reconstructed_url = reconstruct_url(url)
|
|
|
|
responses.add(responses.GET, reconstructed_url, status=400)
|
2016-10-27 12:06:44 +02:00
|
|
|
data = get_oembed_data(url)
|
|
|
|
self.assertIsNone(data)
|
|
|
|
|
2021-05-07 03:54:25 +02:00
|
|
|
@responses.activate
|
|
|
|
def test_500_error_request(self) -> None:
|
|
|
|
url = "http://instagram.com/p/BLtI2WdAymy"
|
|
|
|
reconstructed_url = reconstruct_url(url)
|
|
|
|
responses.add(responses.GET, reconstructed_url, status=500)
|
|
|
|
data = get_oembed_data(url)
|
|
|
|
self.assertIsNone(data)
|
|
|
|
|
2021-06-09 00:18:46 +02:00
|
|
|
@responses.activate
|
|
|
|
def test_invalid_json_in_response(self) -> None:
|
2021-02-12 08:20:45 +01:00
|
|
|
url = "http://instagram.com/p/BLtI2WdAymy"
|
2021-06-09 00:18:46 +02:00
|
|
|
reconstructed_url = reconstruct_url(url)
|
|
|
|
responses.add(
|
|
|
|
responses.GET,
|
|
|
|
reconstructed_url,
|
|
|
|
json="{invalid json}",
|
|
|
|
status=200,
|
|
|
|
)
|
2020-04-11 13:24:06 +02:00
|
|
|
data = get_oembed_data(url)
|
|
|
|
self.assertIsNone(data)
|
|
|
|
|
2019-12-12 09:39:41 +01:00
|
|
|
def test_oembed_html(self) -> None:
|
2019-05-02 18:58:39 +02:00
|
|
|
html = '<iframe src="//www.instagram.com/embed.js"></iframe>'
|
2019-12-12 09:39:41 +01:00
|
|
|
stripped_html = strip_cdata(html)
|
|
|
|
self.assertEqual(html, stripped_html)
|
2019-05-02 18:58:39 +02:00
|
|
|
|
|
|
|
def test_autodiscovered_oembed_xml_format_html(self) -> None:
|
|
|
|
iframe_content = '<iframe src="https://w.soundcloud.com/player"></iframe>'
|
2021-02-12 08:20:45 +01:00
|
|
|
html = f"<![CDATA[{iframe_content}]]>"
|
2019-12-12 09:39:41 +01:00
|
|
|
stripped_html = strip_cdata(html)
|
|
|
|
self.assertEqual(iframe_content, stripped_html)
|
2019-05-02 18:58:39 +02:00
|
|
|
|
2016-10-27 12:06:44 +02:00
|
|
|
|
|
|
|
class OpenGraphParserTestCase(ZulipTestCase):
|
2017-11-05 10:51:25 +01:00
|
|
|
def test_page_with_og(self) -> None:
|
2020-12-08 04:26:30 +01:00
|
|
|
html = b"""<html>
|
2016-10-27 12:06:44 +02:00
|
|
|
<head>
|
|
|
|
<meta property="og:title" content="The Rock" />
|
|
|
|
<meta property="og:type" content="video.movie" />
|
|
|
|
<meta property="og:url" content="http://www.imdb.com/title/tt0117500/" />
|
|
|
|
<meta property="og:image" content="http://ia.media-imdb.com/images/rock.jpg" />
|
|
|
|
<meta property="og:description" content="The Rock film" />
|
|
|
|
</head>
|
|
|
|
</html>"""
|
|
|
|
|
2020-12-08 04:26:30 +01:00
|
|
|
parser = OpenGraphParser(html, "text/html; charset=UTF-8")
|
2016-10-27 12:06:44 +02:00
|
|
|
result = parser.extract_data()
|
2022-04-14 21:52:41 +02:00
|
|
|
self.assertEqual(result.title, "The Rock")
|
|
|
|
self.assertEqual(result.description, "The Rock film")
|
2016-10-27 12:06:44 +02:00
|
|
|
|
2020-12-08 04:26:30 +01:00
|
|
|
def test_charset_in_header(self) -> None:
|
|
|
|
html = """<html>
|
|
|
|
<head>
|
|
|
|
<meta property="og:title" content="中文" />
|
|
|
|
</head>
|
2023-12-05 18:45:07 +01:00
|
|
|
</html>""".encode("big5")
|
2020-12-08 04:26:30 +01:00
|
|
|
parser = OpenGraphParser(html, "text/html; charset=Big5")
|
|
|
|
result = parser.extract_data()
|
2022-04-14 21:52:41 +02:00
|
|
|
self.assertEqual(result.title, "中文")
|
2020-12-08 04:26:30 +01:00
|
|
|
|
|
|
|
def test_charset_in_meta(self) -> None:
|
|
|
|
html = """<html>
|
|
|
|
<head>
|
|
|
|
<meta content-type="text/html; charset=Big5" />
|
|
|
|
<meta property="og:title" content="中文" />
|
|
|
|
</head>
|
2023-12-05 18:45:07 +01:00
|
|
|
</html>""".encode("big5")
|
2020-12-08 04:26:30 +01:00
|
|
|
parser = OpenGraphParser(html, "text/html")
|
|
|
|
result = parser.extract_data()
|
2022-04-14 21:52:41 +02:00
|
|
|
self.assertEqual(result.title, "中文")
|
2020-12-08 04:26:30 +01:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2016-10-27 12:06:44 +02:00
|
|
|
class GenericParserTestCase(ZulipTestCase):
|
2017-11-05 10:51:25 +01:00
|
|
|
def test_parser(self) -> None:
|
2020-12-08 04:26:30 +01:00
|
|
|
html = b"""
|
2016-10-27 12:06:44 +02:00
|
|
|
<html>
|
|
|
|
<head><title>Test title</title></head>
|
|
|
|
<body>
|
|
|
|
<h1>Main header</h1>
|
|
|
|
<p>Description text</p>
|
|
|
|
</body>
|
|
|
|
</html>
|
|
|
|
"""
|
2020-12-08 04:26:30 +01:00
|
|
|
parser = GenericParser(html, "text/html; charset=UTF-8")
|
2016-10-27 12:06:44 +02:00
|
|
|
result = parser.extract_data()
|
2022-04-14 21:52:41 +02:00
|
|
|
self.assertEqual(result.title, "Test title")
|
|
|
|
self.assertEqual(result.description, "Description text")
|
2016-12-13 04:20:33 +01:00
|
|
|
|
2017-11-05 10:51:25 +01:00
|
|
|
def test_extract_image(self) -> None:
|
2020-12-08 04:26:30 +01:00
|
|
|
html = b"""
|
2016-12-13 04:20:33 +01:00
|
|
|
<html>
|
|
|
|
<body>
|
|
|
|
<h1>Main header</h1>
|
2020-08-18 00:19:44 +02:00
|
|
|
<img data-src="Not an image">
|
2016-12-13 04:20:33 +01:00
|
|
|
<img src="http://test.com/test.jpg">
|
|
|
|
<div>
|
|
|
|
<p>Description text</p>
|
|
|
|
</div>
|
|
|
|
</body>
|
|
|
|
</html>
|
|
|
|
"""
|
2020-12-08 04:26:30 +01:00
|
|
|
parser = GenericParser(html, "text/html; charset=UTF-8")
|
2016-12-13 04:20:33 +01:00
|
|
|
result = parser.extract_data()
|
2022-04-14 21:52:41 +02:00
|
|
|
self.assertEqual(result.title, "Main header")
|
|
|
|
self.assertEqual(result.description, "Description text")
|
|
|
|
self.assertEqual(result.image, "http://test.com/test.jpg")
|
2016-12-13 04:20:33 +01:00
|
|
|
|
2022-02-18 22:48:53 +01:00
|
|
|
def test_extract_bad_image(self) -> None:
|
|
|
|
html = b"""
|
|
|
|
<html>
|
|
|
|
<body>
|
|
|
|
<h1>Main header</h1>
|
|
|
|
<img data-src="Not an image">
|
|
|
|
<img src="http://[bad url/test.jpg">
|
|
|
|
<div>
|
|
|
|
<p>Description text</p>
|
|
|
|
</div>
|
|
|
|
</body>
|
|
|
|
</html>
|
|
|
|
"""
|
|
|
|
parser = GenericParser(html, "text/html; charset=UTF-8")
|
|
|
|
result = parser.extract_data()
|
2022-04-14 21:52:41 +02:00
|
|
|
self.assertEqual(result.title, "Main header")
|
|
|
|
self.assertEqual(result.description, "Description text")
|
|
|
|
self.assertIsNone(result.image)
|
2022-02-18 22:48:53 +01:00
|
|
|
|
2017-11-05 10:51:25 +01:00
|
|
|
def test_extract_description(self) -> None:
|
2020-12-08 04:26:30 +01:00
|
|
|
html = b"""
|
2016-12-13 04:20:33 +01:00
|
|
|
<html>
|
|
|
|
<body>
|
|
|
|
<div>
|
|
|
|
<div>
|
|
|
|
<p>Description text</p>
|
|
|
|
</div>
|
|
|
|
</div>
|
|
|
|
</body>
|
|
|
|
</html>
|
|
|
|
"""
|
2020-12-08 04:26:30 +01:00
|
|
|
parser = GenericParser(html, "text/html; charset=UTF-8")
|
2016-12-13 04:20:33 +01:00
|
|
|
result = parser.extract_data()
|
2022-04-14 21:52:41 +02:00
|
|
|
self.assertEqual(result.description, "Description text")
|
2016-12-13 04:20:33 +01:00
|
|
|
|
2020-12-08 04:26:30 +01:00
|
|
|
html = b"""
|
2016-12-13 04:20:33 +01:00
|
|
|
<html>
|
|
|
|
<head><meta name="description" content="description 123"</head>
|
|
|
|
<body></body>
|
|
|
|
</html>
|
|
|
|
"""
|
2020-12-08 04:26:30 +01:00
|
|
|
parser = GenericParser(html, "text/html; charset=UTF-8")
|
2016-12-13 04:20:33 +01:00
|
|
|
result = parser.extract_data()
|
2022-04-14 21:52:41 +02:00
|
|
|
self.assertEqual(result.description, "description 123")
|
2016-12-13 04:20:33 +01:00
|
|
|
|
2020-12-08 04:26:30 +01:00
|
|
|
html = b"<html><body></body></html>"
|
|
|
|
parser = GenericParser(html, "text/html; charset=UTF-8")
|
2016-12-13 04:20:33 +01:00
|
|
|
result = parser.extract_data()
|
2022-04-14 21:52:41 +02:00
|
|
|
self.assertIsNone(result.description)
|
2016-12-13 04:20:33 +01:00
|
|
|
|
|
|
|
|
|
|
|
class PreviewTestCase(ZulipTestCase):
|
2017-02-23 06:17:16 +01:00
|
|
|
open_graph_html = """
|
2016-12-13 04:20:33 +01:00
|
|
|
<html>
|
|
|
|
<head>
|
|
|
|
<title>Test title</title>
|
|
|
|
<meta property="og:title" content="The Rock" />
|
|
|
|
<meta property="og:type" content="video.movie" />
|
|
|
|
<meta property="og:url" content="http://www.imdb.com/title/tt0117500/" />
|
|
|
|
<meta property="og:image" content="http://ia.media-imdb.com/images/rock.jpg" />
|
2019-12-12 02:10:50 +01:00
|
|
|
<meta http-equiv="refresh" content="30" />
|
|
|
|
<meta property="notog:extra-text" content="Extra!" />
|
2016-12-13 04:20:33 +01:00
|
|
|
</head>
|
|
|
|
<body>
|
|
|
|
<h1>Main header</h1>
|
|
|
|
<p>Description text</p>
|
|
|
|
</body>
|
|
|
|
</html>
|
|
|
|
"""
|
2017-02-23 06:17:16 +01:00
|
|
|
|
2023-10-12 19:43:45 +02:00
|
|
|
@override
|
2019-05-31 04:45:02 +02:00
|
|
|
def setUp(self) -> None:
|
2020-04-09 21:51:58 +02:00
|
|
|
super().setUp()
|
2019-05-31 04:45:02 +02:00
|
|
|
Realm.objects.all().update(inline_url_embed_preview=True)
|
|
|
|
|
2019-05-05 06:41:34 +02:00
|
|
|
@classmethod
|
2021-02-12 08:19:30 +01:00
|
|
|
def create_mock_response(
|
|
|
|
cls,
|
|
|
|
url: str,
|
2021-06-09 00:18:46 +02:00
|
|
|
status: int = 200,
|
2021-02-12 08:19:30 +01:00
|
|
|
relative_url: bool = False,
|
2021-06-09 00:18:46 +02:00
|
|
|
content_type: str = "text/html",
|
|
|
|
body: Optional[Union[str, ConnectionError]] = None,
|
|
|
|
) -> None:
|
|
|
|
if body is None:
|
|
|
|
body = cls.open_graph_html
|
|
|
|
if relative_url is True and isinstance(body, str):
|
|
|
|
body = body.replace("http://ia.media-imdb.com", "")
|
|
|
|
responses.add(responses.GET, url, body=body, status=status, content_type=content_type)
|
|
|
|
|
|
|
|
@responses.activate
|
2017-02-23 06:16:30 +01:00
|
|
|
@override_settings(INLINE_URL_EMBED_PREVIEW=True)
|
2017-11-05 10:51:25 +01:00
|
|
|
def test_edit_message_history(self) -> None:
|
2021-02-12 08:20:45 +01:00
|
|
|
user = self.example_user("hamlet")
|
2020-03-06 18:40:46 +01:00
|
|
|
self.login_user(user)
|
tests: Ensure stream senders get a UserMessage row.
We now complain if a test author sends a stream message
that does not result in the sender getting a
UserMessage row for the message.
This is basically 100% equivalent to complaining that
the author failed to subscribe the sender to the stream
as part of the test setup, as far as I can tell, so the
AssertionError instructs the author to subscribe the
sender to the stream.
We exempt bots from this check, although it is
plausible we should only exempt the system bots like
the notification bot.
I considered auto-subscribing the sender to the stream,
but that can be a little more expensive than the
current check, and we generally want test setup to be
explicit.
If there is some legitimate way than a subscribed human
sender can't get a UserMessage, then we probably want
an explicit test for that, or we may want to change the
backend to just write a UserMessage row in that
hypothetical situation.
For most tests, including almost all the ones fixed
here, the author just wants their test setup to
realistically reflect normal operation, and often devs
may not realize that Cordelia is not subscribed to
Denmark or not realize that Hamlet is not subscribed to
Scotland.
Some of us don't remember our Shakespeare from high
school, and our stream subscriptions don't even
necessarily reflect which countries the Bard placed his
characters in.
There may also be some legitimate use case where an
author wants to simulate sending a message to an
unsubscribed stream, but for those edge cases, they can
always set allow_unsubscribed_sender to True.
2021-12-10 13:55:48 +01:00
|
|
|
msg_id = self.send_stream_message(user, "Denmark", topic_name="editing", content="original")
|
2017-02-23 06:20:01 +01:00
|
|
|
|
2021-02-12 08:20:45 +01:00
|
|
|
url = "http://test.org/"
|
2021-06-09 00:18:46 +02:00
|
|
|
self.create_mock_response(url)
|
2017-02-23 06:20:01 +01:00
|
|
|
|
2022-04-14 23:55:52 +02:00
|
|
|
with mock_queue_publish("zerver.actions.message_edit.queue_json_publish") as patched:
|
2021-02-12 08:19:30 +01:00
|
|
|
result = self.client_patch(
|
|
|
|
"/json/messages/" + str(msg_id),
|
|
|
|
{
|
2021-02-12 08:20:45 +01:00
|
|
|
"content": url,
|
2021-02-12 08:19:30 +01:00
|
|
|
},
|
|
|
|
)
|
2017-02-23 06:20:01 +01:00
|
|
|
self.assert_json_success(result)
|
|
|
|
patched.assert_called_once()
|
|
|
|
queue = patched.call_args[0][0]
|
|
|
|
self.assertEqual(queue, "embed_links")
|
|
|
|
event = patched.call_args[0][1]
|
|
|
|
|
2022-04-14 07:18:18 +02:00
|
|
|
with self.settings(TEST_SUITE=False):
|
2021-06-09 00:18:46 +02:00
|
|
|
with self.assertLogs(level="INFO") as info_logs:
|
2017-02-23 06:20:01 +01:00
|
|
|
FetchLinksEmbedData().consume(event)
|
2020-07-26 03:08:03 +02:00
|
|
|
self.assertTrue(
|
2021-02-12 08:20:45 +01:00
|
|
|
"INFO:root:Time spent on get_link_embed_data for http://test.org/: "
|
2021-02-12 08:19:30 +01:00
|
|
|
in info_logs.output[0]
|
2020-07-26 03:08:03 +02:00
|
|
|
)
|
2017-02-23 06:20:01 +01:00
|
|
|
|
2020-06-09 00:25:09 +02:00
|
|
|
embedded_link = f'<a href="{url}" title="The Rock">The Rock</a>'
|
2017-02-23 06:20:01 +01:00
|
|
|
msg = Message.objects.select_related("sender").get(id=msg_id)
|
2022-05-31 01:34:34 +02:00
|
|
|
assert msg.rendered_content is not None
|
2017-02-23 06:20:01 +01:00
|
|
|
self.assertIn(embedded_link, msg.rendered_content)
|
|
|
|
|
2021-06-09 00:18:46 +02:00
|
|
|
@responses.activate
|
2017-02-23 06:20:01 +01:00
|
|
|
@override_settings(INLINE_URL_EMBED_PREVIEW=True)
|
2021-02-12 08:19:30 +01:00
|
|
|
def _send_message_with_test_org_url(
|
|
|
|
self, sender: UserProfile, queue_should_run: bool = True, relative_url: bool = False
|
|
|
|
) -> Message:
|
2021-02-12 08:20:45 +01:00
|
|
|
url = "http://test.org/"
|
2022-04-14 07:18:18 +02:00
|
|
|
# Ensure the cache for this is empty
|
|
|
|
cache_delete(preview_url_cache_key(url))
|
2022-04-14 23:50:10 +02:00
|
|
|
with mock_queue_publish("zerver.actions.message_send.queue_json_publish") as patched:
|
2017-10-28 17:07:36 +02:00
|
|
|
msg_id = self.send_personal_message(
|
2020-03-07 11:43:05 +01:00
|
|
|
sender,
|
2021-02-12 08:20:45 +01:00
|
|
|
self.example_user("cordelia"),
|
2017-10-28 17:07:36 +02:00
|
|
|
content=url,
|
|
|
|
)
|
2017-02-23 06:16:30 +01:00
|
|
|
if queue_should_run:
|
|
|
|
patched.assert_called_once()
|
|
|
|
queue = patched.call_args[0][0]
|
|
|
|
self.assertEqual(queue, "embed_links")
|
|
|
|
event = patched.call_args[0][1]
|
|
|
|
else:
|
|
|
|
patched.assert_not_called()
|
|
|
|
# If we nothing was put in the queue, we don't need to
|
|
|
|
# run the queue processor or any of the following code
|
|
|
|
return Message.objects.select_related("sender").get(id=msg_id)
|
|
|
|
|
|
|
|
# Verify the initial message doesn't have the embedded links rendered
|
2017-01-22 05:55:30 +01:00
|
|
|
msg = Message.objects.select_related("sender").get(id=msg_id)
|
2022-07-06 21:53:02 +02:00
|
|
|
assert msg.rendered_content is not None
|
2021-02-12 08:19:30 +01:00
|
|
|
self.assertNotIn(f'<a href="{url}" title="The Rock">The Rock</a>', msg.rendered_content)
|
2017-01-22 05:55:30 +01:00
|
|
|
|
2021-06-09 00:18:46 +02:00
|
|
|
self.create_mock_response(url, relative_url=relative_url)
|
2017-02-23 06:16:30 +01:00
|
|
|
|
|
|
|
# Run the queue processor to potentially rerender things
|
2022-04-14 07:18:18 +02:00
|
|
|
with self.settings(TEST_SUITE=False):
|
2021-06-09 00:18:46 +02:00
|
|
|
with self.assertLogs(level="INFO") as info_logs:
|
2016-12-13 04:20:33 +01:00
|
|
|
FetchLinksEmbedData().consume(event)
|
2020-07-26 03:08:03 +02:00
|
|
|
self.assertTrue(
|
2021-02-12 08:20:45 +01:00
|
|
|
"INFO:root:Time spent on get_link_embed_data for http://test.org/: "
|
2021-02-12 08:19:30 +01:00
|
|
|
in info_logs.output[0]
|
2020-07-26 03:08:03 +02:00
|
|
|
)
|
|
|
|
|
2017-01-22 05:55:30 +01:00
|
|
|
msg = Message.objects.select_related("sender").get(id=msg_id)
|
2017-02-03 23:28:26 +01:00
|
|
|
return msg
|
|
|
|
|
2021-06-09 00:18:46 +02:00
|
|
|
@responses.activate
|
2017-11-06 20:48:55 +01:00
|
|
|
@override_settings(INLINE_URL_EMBED_PREVIEW=True)
|
2017-11-05 10:51:25 +01:00
|
|
|
def test_message_update_race_condition(self) -> None:
|
2021-02-12 08:20:45 +01:00
|
|
|
user = self.example_user("hamlet")
|
2020-03-06 18:40:46 +01:00
|
|
|
self.login_user(user)
|
2021-02-12 08:20:45 +01:00
|
|
|
original_url = "http://test.org/"
|
|
|
|
edited_url = "http://edited.org/"
|
2022-04-14 23:50:10 +02:00
|
|
|
with mock_queue_publish("zerver.actions.message_send.queue_json_publish") as patched:
|
2021-02-12 08:19:30 +01:00
|
|
|
msg_id = self.send_stream_message(
|
tests: Ensure stream senders get a UserMessage row.
We now complain if a test author sends a stream message
that does not result in the sender getting a
UserMessage row for the message.
This is basically 100% equivalent to complaining that
the author failed to subscribe the sender to the stream
as part of the test setup, as far as I can tell, so the
AssertionError instructs the author to subscribe the
sender to the stream.
We exempt bots from this check, although it is
plausible we should only exempt the system bots like
the notification bot.
I considered auto-subscribing the sender to the stream,
but that can be a little more expensive than the
current check, and we generally want test setup to be
explicit.
If there is some legitimate way than a subscribed human
sender can't get a UserMessage, then we probably want
an explicit test for that, or we may want to change the
backend to just write a UserMessage row in that
hypothetical situation.
For most tests, including almost all the ones fixed
here, the author just wants their test setup to
realistically reflect normal operation, and often devs
may not realize that Cordelia is not subscribed to
Denmark or not realize that Hamlet is not subscribed to
Scotland.
Some of us don't remember our Shakespeare from high
school, and our stream subscriptions don't even
necessarily reflect which countries the Bard placed his
characters in.
There may also be some legitimate use case where an
author wants to simulate sending a message to an
unsubscribed stream, but for those edge cases, they can
always set allow_unsubscribed_sender to True.
2021-12-10 13:55:48 +01:00
|
|
|
user, "Denmark", topic_name="foo", content=original_url
|
2021-02-12 08:19:30 +01:00
|
|
|
)
|
2017-11-06 20:48:55 +01:00
|
|
|
patched.assert_called_once()
|
|
|
|
queue = patched.call_args[0][0]
|
|
|
|
self.assertEqual(queue, "embed_links")
|
|
|
|
event = patched.call_args[0][1]
|
|
|
|
|
2017-11-05 10:51:25 +01:00
|
|
|
def wrapped_queue_json_publish(*args: Any, **kwargs: Any) -> None:
|
2021-06-09 00:18:46 +02:00
|
|
|
self.create_mock_response(original_url)
|
|
|
|
self.create_mock_response(edited_url)
|
2019-05-05 06:41:34 +02:00
|
|
|
|
2022-04-14 07:18:18 +02:00
|
|
|
with self.settings(TEST_SUITE=False):
|
2021-06-09 00:18:46 +02:00
|
|
|
with self.assertLogs(level="INFO") as info_logs:
|
2017-11-06 20:48:55 +01:00
|
|
|
# Run the queue processor. This will simulate the event for original_url being
|
|
|
|
# processed after the message has been edited.
|
|
|
|
FetchLinksEmbedData().consume(event)
|
2020-07-26 03:08:03 +02:00
|
|
|
self.assertTrue(
|
2021-02-12 08:20:45 +01:00
|
|
|
"INFO:root:Time spent on get_link_embed_data for http://test.org/: "
|
2021-02-12 08:19:30 +01:00
|
|
|
in info_logs.output[0]
|
2020-07-26 03:08:03 +02:00
|
|
|
)
|
2017-11-06 20:48:55 +01:00
|
|
|
msg = Message.objects.select_related("sender").get(id=msg_id)
|
2022-07-06 21:53:02 +02:00
|
|
|
assert msg.rendered_content is not None
|
2017-11-06 20:48:55 +01:00
|
|
|
# The content of the message has changed since the event for original_url has been created,
|
|
|
|
# it should not be rendered. Another, up-to-date event will have been sent (edited_url).
|
2021-02-12 08:19:30 +01:00
|
|
|
self.assertNotIn(
|
|
|
|
f'<a href="{original_url}" title="The Rock">The Rock</a>', msg.rendered_content
|
|
|
|
)
|
2021-06-09 00:18:46 +02:00
|
|
|
|
|
|
|
self.assertTrue(responses.assert_call_count(edited_url, 0))
|
2017-11-06 20:48:55 +01:00
|
|
|
|
2022-04-14 07:18:18 +02:00
|
|
|
with self.settings(TEST_SUITE=False):
|
2021-06-09 00:18:46 +02:00
|
|
|
with self.assertLogs(level="INFO") as info_logs:
|
2017-11-06 20:48:55 +01:00
|
|
|
# Now proceed with the original queue_json_publish and call the
|
|
|
|
# up-to-date event for edited_url.
|
|
|
|
queue_json_publish(*args, **kwargs)
|
|
|
|
msg = Message.objects.select_related("sender").get(id=msg_id)
|
2022-05-31 01:34:34 +02:00
|
|
|
assert msg.rendered_content is not None
|
2021-02-12 08:19:30 +01:00
|
|
|
self.assertIn(
|
|
|
|
f'<a href="{edited_url}" title="The Rock">The Rock</a>',
|
|
|
|
msg.rendered_content,
|
|
|
|
)
|
2020-07-26 03:08:03 +02:00
|
|
|
self.assertTrue(
|
2021-02-12 08:20:45 +01:00
|
|
|
"INFO:root:Time spent on get_link_embed_data for http://edited.org/: "
|
2021-02-12 08:19:30 +01:00
|
|
|
in info_logs.output[0]
|
2020-07-26 03:08:03 +02:00
|
|
|
)
|
2017-11-06 20:48:55 +01:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
with mock_queue_publish(
|
2022-04-14 23:55:52 +02:00
|
|
|
"zerver.actions.message_edit.queue_json_publish", wraps=wrapped_queue_json_publish
|
2021-02-12 08:19:30 +01:00
|
|
|
):
|
|
|
|
result = self.client_patch(
|
|
|
|
"/json/messages/" + str(msg_id),
|
|
|
|
{
|
2021-02-12 08:20:45 +01:00
|
|
|
"content": edited_url,
|
2021-02-12 08:19:30 +01:00
|
|
|
},
|
|
|
|
)
|
2017-11-06 20:48:55 +01:00
|
|
|
self.assert_json_success(result)
|
|
|
|
|
2022-04-18 23:30:19 +02:00
|
|
|
@responses.activate
|
|
|
|
@override_settings(INLINE_URL_EMBED_PREVIEW=True)
|
|
|
|
def test_message_deleted(self) -> None:
|
|
|
|
user = self.example_user("hamlet")
|
|
|
|
self.login_user(user)
|
|
|
|
url = "http://test.org/"
|
|
|
|
with mock_queue_publish("zerver.actions.message_send.queue_json_publish") as patched:
|
|
|
|
msg_id = self.send_stream_message(user, "Denmark", topic_name="foo", content=url)
|
|
|
|
patched.assert_called_once()
|
|
|
|
queue = patched.call_args[0][0]
|
|
|
|
self.assertEqual(queue, "embed_links")
|
|
|
|
event = patched.call_args[0][1]
|
|
|
|
|
|
|
|
msg = Message.objects.select_related("sender").get(id=msg_id)
|
2023-08-10 05:59:25 +02:00
|
|
|
do_delete_messages(msg.realm, [msg])
|
2022-04-18 23:30:19 +02:00
|
|
|
|
|
|
|
# We do still fetch the URL, as we don't want to incur the
|
|
|
|
# cost of locking the row while we do the HTTP fetches.
|
|
|
|
self.create_mock_response(url)
|
|
|
|
with self.settings(TEST_SUITE=False):
|
|
|
|
with self.assertLogs(level="INFO") as info_logs:
|
|
|
|
# Run the queue processor. This will simulate the event for original_url being
|
|
|
|
# processed after the message has been deleted.
|
|
|
|
FetchLinksEmbedData().consume(event)
|
|
|
|
self.assertTrue(
|
|
|
|
"INFO:root:Time spent on get_link_embed_data for http://test.org/: "
|
|
|
|
in info_logs.output[0]
|
|
|
|
)
|
|
|
|
|
2017-11-05 10:51:25 +01:00
|
|
|
def test_get_link_embed_data(self) -> None:
|
2021-02-12 08:20:45 +01:00
|
|
|
url = "http://test.org/"
|
2020-06-09 00:25:09 +02:00
|
|
|
embedded_link = f'<a href="{url}" title="The Rock">The Rock</a>'
|
2017-02-03 23:28:26 +01:00
|
|
|
|
|
|
|
# When humans send, we should get embedded content.
|
2021-02-12 08:20:45 +01:00
|
|
|
msg = self._send_message_with_test_org_url(sender=self.example_user("hamlet"))
|
2017-02-03 23:28:26 +01:00
|
|
|
self.assertIn(embedded_link, msg.rendered_content)
|
|
|
|
|
|
|
|
# We don't want embedded content for bots.
|
2021-02-12 08:19:30 +01:00
|
|
|
msg = self._send_message_with_test_org_url(
|
2021-02-12 08:20:45 +01:00
|
|
|
sender=self.example_user("webhook_bot"), queue_should_run=False
|
2021-02-12 08:19:30 +01:00
|
|
|
)
|
2017-02-03 23:28:26 +01:00
|
|
|
self.assertNotIn(embedded_link, msg.rendered_content)
|
|
|
|
|
|
|
|
# Try another human to make sure bot failure was due to the
|
|
|
|
# bot sending the message and not some other reason.
|
2021-02-12 08:20:45 +01:00
|
|
|
msg = self._send_message_with_test_org_url(sender=self.example_user("prospero"))
|
2017-02-03 23:28:26 +01:00
|
|
|
self.assertIn(embedded_link, msg.rendered_content)
|
2016-12-13 04:20:33 +01:00
|
|
|
|
2021-10-21 02:48:28 +02:00
|
|
|
@override_settings(CAMO_URI="")
|
2017-11-05 10:51:25 +01:00
|
|
|
def test_inline_url_embed_preview(self) -> None:
|
2021-10-21 23:20:56 +02:00
|
|
|
with_preview = '<p><a href="http://test.org/">http://test.org/</a></p>\n<div class="message_embed"><a class="message_embed_image" href="http://test.org/" style="background-image: url(http\\:\\/\\/ia\\.media-imdb\\.com\\/images\\/rock\\.jpg)"></a><div class="data-container"><div class="message_embed_title"><a href="http://test.org/" title="The Rock">The Rock</a></div><div class="message_embed_description">Description text</div></div></div>'
|
2020-05-09 03:44:56 +02:00
|
|
|
without_preview = '<p><a href="http://test.org/">http://test.org/</a></p>'
|
2021-02-12 08:20:45 +01:00
|
|
|
msg = self._send_message_with_test_org_url(sender=self.example_user("hamlet"))
|
2017-03-13 14:42:03 +01:00
|
|
|
self.assertEqual(msg.rendered_content, with_preview)
|
|
|
|
|
|
|
|
realm = msg.get_realm()
|
2022-10-08 06:10:17 +02:00
|
|
|
realm.inline_url_embed_preview = False
|
2017-03-13 14:42:03 +01:00
|
|
|
realm.save()
|
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
msg = self._send_message_with_test_org_url(
|
2021-02-12 08:20:45 +01:00
|
|
|
sender=self.example_user("prospero"), queue_should_run=False
|
2021-02-12 08:19:30 +01:00
|
|
|
)
|
2017-03-13 14:42:03 +01:00
|
|
|
self.assertEqual(msg.rendered_content, without_preview)
|
|
|
|
|
2021-10-21 02:48:28 +02:00
|
|
|
def test_inline_url_embed_preview_with_camo(self) -> None:
|
2021-10-21 23:20:56 +02:00
|
|
|
camo_url = re.sub(
|
|
|
|
r"([^\w-])", r"\\\1", get_camo_url("http://ia.media-imdb.com/images/rock.jpg")
|
|
|
|
)
|
2021-10-21 02:48:28 +02:00
|
|
|
with_preview = (
|
|
|
|
'<p><a href="http://test.org/">http://test.org/</a></p>\n<div class="message_embed"><a class="message_embed_image" href="http://test.org/" style="background-image: url('
|
|
|
|
+ camo_url
|
|
|
|
+ ')"></a><div class="data-container"><div class="message_embed_title"><a href="http://test.org/" title="The Rock">The Rock</a></div><div class="message_embed_description">Description text</div></div></div>'
|
|
|
|
)
|
|
|
|
msg = self._send_message_with_test_org_url(sender=self.example_user("hamlet"))
|
|
|
|
self.assertEqual(msg.rendered_content, with_preview)
|
|
|
|
|
2021-10-21 23:20:56 +02:00
|
|
|
@responses.activate
|
|
|
|
@override_settings(CAMO_URI="")
|
|
|
|
@override_settings(INLINE_URL_EMBED_PREVIEW=True)
|
|
|
|
def test_link_preview_css_escaping_image(self) -> None:
|
|
|
|
user = self.example_user("hamlet")
|
|
|
|
self.login_user(user)
|
|
|
|
url = "http://test.org/"
|
2022-04-14 23:50:10 +02:00
|
|
|
with mock_queue_publish("zerver.actions.message_send.queue_json_publish") as patched:
|
tests: Ensure stream senders get a UserMessage row.
We now complain if a test author sends a stream message
that does not result in the sender getting a
UserMessage row for the message.
This is basically 100% equivalent to complaining that
the author failed to subscribe the sender to the stream
as part of the test setup, as far as I can tell, so the
AssertionError instructs the author to subscribe the
sender to the stream.
We exempt bots from this check, although it is
plausible we should only exempt the system bots like
the notification bot.
I considered auto-subscribing the sender to the stream,
but that can be a little more expensive than the
current check, and we generally want test setup to be
explicit.
If there is some legitimate way than a subscribed human
sender can't get a UserMessage, then we probably want
an explicit test for that, or we may want to change the
backend to just write a UserMessage row in that
hypothetical situation.
For most tests, including almost all the ones fixed
here, the author just wants their test setup to
realistically reflect normal operation, and often devs
may not realize that Cordelia is not subscribed to
Denmark or not realize that Hamlet is not subscribed to
Scotland.
Some of us don't remember our Shakespeare from high
school, and our stream subscriptions don't even
necessarily reflect which countries the Bard placed his
characters in.
There may also be some legitimate use case where an
author wants to simulate sending a message to an
unsubscribed stream, but for those edge cases, they can
always set allow_unsubscribed_sender to True.
2021-12-10 13:55:48 +01:00
|
|
|
msg_id = self.send_stream_message(user, "Denmark", topic_name="foo", content=url)
|
2021-10-21 23:20:56 +02:00
|
|
|
patched.assert_called_once()
|
|
|
|
queue = patched.call_args[0][0]
|
|
|
|
self.assertEqual(queue, "embed_links")
|
|
|
|
event = patched.call_args[0][1]
|
|
|
|
|
|
|
|
# Swap the URL out for one with characters that need CSS escaping
|
|
|
|
html = re.sub(r"rock\.jpg", "rock).jpg", self.open_graph_html)
|
|
|
|
self.create_mock_response(url, body=html)
|
2022-04-14 07:18:18 +02:00
|
|
|
with self.settings(TEST_SUITE=False):
|
2021-10-21 23:20:56 +02:00
|
|
|
with self.assertLogs(level="INFO") as info_logs:
|
|
|
|
FetchLinksEmbedData().consume(event)
|
|
|
|
self.assertTrue(
|
|
|
|
"INFO:root:Time spent on get_link_embed_data for http://test.org/: "
|
|
|
|
in info_logs.output[0]
|
|
|
|
)
|
|
|
|
|
|
|
|
msg = Message.objects.select_related("sender").get(id=msg_id)
|
|
|
|
with_preview = (
|
2023-01-03 02:16:53 +01:00
|
|
|
'<p><a href="http://test.org/">http://test.org/</a></p>\n'
|
|
|
|
'<div class="message_embed"><a class="message_embed_image" href="http://test.org/"'
|
|
|
|
' style="background-image:'
|
|
|
|
' url(http\\:\\/\\/ia\\.media-imdb\\.com\\/images\\/rock\\)\\.jpg)"></a><div'
|
|
|
|
' class="data-container"><div class="message_embed_title"><a href="http://test.org/"'
|
|
|
|
' title="The Rock">The Rock</a></div><div class="message_embed_description">Description'
|
|
|
|
" text</div></div></div>"
|
2021-10-21 23:20:56 +02:00
|
|
|
)
|
|
|
|
self.assertEqual(
|
|
|
|
with_preview,
|
|
|
|
msg.rendered_content,
|
|
|
|
)
|
|
|
|
|
2021-10-21 02:48:28 +02:00
|
|
|
@override_settings(CAMO_URI="")
|
2018-02-23 21:17:29 +01:00
|
|
|
@override_settings(INLINE_URL_EMBED_PREVIEW=True)
|
|
|
|
def test_inline_relative_url_embed_preview(self) -> None:
|
2020-10-23 02:43:28 +02:00
|
|
|
# Relative URLs should not be sent for URL preview.
|
2022-04-14 23:50:10 +02:00
|
|
|
with mock_queue_publish("zerver.actions.message_send.queue_json_publish") as patched:
|
2018-02-23 21:17:29 +01:00
|
|
|
self.send_personal_message(
|
2021-02-12 08:20:45 +01:00
|
|
|
self.example_user("prospero"),
|
|
|
|
self.example_user("cordelia"),
|
2018-02-23 21:17:29 +01:00
|
|
|
content="http://zulip.testserver/api/",
|
|
|
|
)
|
|
|
|
patched.assert_not_called()
|
|
|
|
|
2021-10-21 02:48:28 +02:00
|
|
|
@override_settings(CAMO_URI="")
|
2017-11-05 10:51:25 +01:00
|
|
|
def test_inline_url_embed_preview_with_relative_image_url(self) -> None:
|
2021-10-21 23:20:56 +02:00
|
|
|
with_preview_relative = '<p><a href="http://test.org/">http://test.org/</a></p>\n<div class="message_embed"><a class="message_embed_image" href="http://test.org/" style="background-image: url(http\\:\\/\\/test\\.org\\/images\\/rock\\.jpg)"></a><div class="data-container"><div class="message_embed_title"><a href="http://test.org/" title="The Rock">The Rock</a></div><div class="message_embed_description">Description text</div></div></div>'
|
2020-10-23 02:43:28 +02:00
|
|
|
# Try case where the Open Graph image is a relative URL.
|
2021-02-12 08:19:30 +01:00
|
|
|
msg = self._send_message_with_test_org_url(
|
2021-02-12 08:20:45 +01:00
|
|
|
sender=self.example_user("prospero"), relative_url=True
|
2021-02-12 08:19:30 +01:00
|
|
|
)
|
2017-05-04 01:41:35 +02:00
|
|
|
self.assertEqual(msg.rendered_content, with_preview_relative)
|
|
|
|
|
2021-06-09 00:18:46 +02:00
|
|
|
@responses.activate
|
2017-11-05 10:51:25 +01:00
|
|
|
def test_http_error_get_data(self) -> None:
|
2021-02-12 08:20:45 +01:00
|
|
|
url = "http://test.org/"
|
2017-10-28 17:07:36 +02:00
|
|
|
msg_id = self.send_personal_message(
|
2021-02-12 08:20:45 +01:00
|
|
|
self.example_user("hamlet"),
|
|
|
|
self.example_user("cordelia"),
|
2017-10-28 17:07:36 +02:00
|
|
|
content=url,
|
|
|
|
)
|
2017-01-22 05:55:30 +01:00
|
|
|
msg = Message.objects.select_related("sender").get(id=msg_id)
|
2016-12-13 04:20:33 +01:00
|
|
|
event = {
|
2021-02-12 08:20:45 +01:00
|
|
|
"message_id": msg_id,
|
|
|
|
"urls": [url],
|
|
|
|
"message_realm_id": msg.sender.realm_id,
|
|
|
|
"message_content": url,
|
2021-02-12 08:19:30 +01:00
|
|
|
}
|
2021-06-09 00:18:46 +02:00
|
|
|
|
|
|
|
self.create_mock_response(url, body=ConnectionError())
|
|
|
|
|
2022-04-14 07:18:18 +02:00
|
|
|
with self.settings(INLINE_URL_EMBED_PREVIEW=True, TEST_SUITE=False):
|
2021-06-09 00:18:46 +02:00
|
|
|
with self.assertLogs(level="INFO") as info_logs:
|
2019-02-05 22:44:40 +01:00
|
|
|
FetchLinksEmbedData().consume(event)
|
2020-07-26 03:08:03 +02:00
|
|
|
self.assertTrue(
|
2021-02-12 08:20:45 +01:00
|
|
|
"INFO:root:Time spent on get_link_embed_data for http://test.org/: "
|
2021-02-12 08:19:30 +01:00
|
|
|
in info_logs.output[0]
|
2020-07-26 03:08:03 +02:00
|
|
|
)
|
|
|
|
|
2016-12-13 04:20:33 +01:00
|
|
|
msg = Message.objects.get(id=msg_id)
|
|
|
|
self.assertEqual(
|
2021-02-12 08:19:30 +01:00
|
|
|
'<p><a href="http://test.org/">http://test.org/</a></p>', msg.rendered_content
|
|
|
|
)
|
2016-12-13 04:20:33 +01:00
|
|
|
|
2017-11-05 10:51:25 +01:00
|
|
|
def test_invalid_link(self) -> None:
|
2022-04-14 07:18:18 +02:00
|
|
|
with self.settings(INLINE_URL_EMBED_PREVIEW=True, TEST_SUITE=False):
|
2021-02-12 08:20:45 +01:00
|
|
|
self.assertIsNone(get_link_embed_data("com.notvalidlink"))
|
|
|
|
self.assertIsNone(get_link_embed_data("μένει.com.notvalidlink"))
|
2018-03-18 20:58:13 +01:00
|
|
|
|
2021-06-09 00:18:46 +02:00
|
|
|
@responses.activate
|
2019-05-04 17:54:18 +02:00
|
|
|
@override_settings(INLINE_URL_EMBED_PREVIEW=True)
|
|
|
|
def test_link_preview_non_html_data(self) -> None:
|
2021-02-12 08:20:45 +01:00
|
|
|
user = self.example_user("hamlet")
|
2020-03-06 18:40:46 +01:00
|
|
|
self.login_user(user)
|
2021-02-12 08:20:45 +01:00
|
|
|
url = "http://test.org/audio.mp3"
|
2022-04-14 23:50:10 +02:00
|
|
|
with mock_queue_publish("zerver.actions.message_send.queue_json_publish") as patched:
|
tests: Ensure stream senders get a UserMessage row.
We now complain if a test author sends a stream message
that does not result in the sender getting a
UserMessage row for the message.
This is basically 100% equivalent to complaining that
the author failed to subscribe the sender to the stream
as part of the test setup, as far as I can tell, so the
AssertionError instructs the author to subscribe the
sender to the stream.
We exempt bots from this check, although it is
plausible we should only exempt the system bots like
the notification bot.
I considered auto-subscribing the sender to the stream,
but that can be a little more expensive than the
current check, and we generally want test setup to be
explicit.
If there is some legitimate way than a subscribed human
sender can't get a UserMessage, then we probably want
an explicit test for that, or we may want to change the
backend to just write a UserMessage row in that
hypothetical situation.
For most tests, including almost all the ones fixed
here, the author just wants their test setup to
realistically reflect normal operation, and often devs
may not realize that Cordelia is not subscribed to
Denmark or not realize that Hamlet is not subscribed to
Scotland.
Some of us don't remember our Shakespeare from high
school, and our stream subscriptions don't even
necessarily reflect which countries the Bard placed his
characters in.
There may also be some legitimate use case where an
author wants to simulate sending a message to an
unsubscribed stream, but for those edge cases, they can
always set allow_unsubscribed_sender to True.
2021-12-10 13:55:48 +01:00
|
|
|
msg_id = self.send_stream_message(user, "Denmark", topic_name="foo", content=url)
|
2019-05-04 17:54:18 +02:00
|
|
|
patched.assert_called_once()
|
|
|
|
queue = patched.call_args[0][0]
|
|
|
|
self.assertEqual(queue, "embed_links")
|
|
|
|
event = patched.call_args[0][1]
|
|
|
|
|
2021-06-09 00:18:46 +02:00
|
|
|
content_type = "application/octet-stream"
|
|
|
|
self.create_mock_response(url, content_type=content_type)
|
2019-05-04 17:54:18 +02:00
|
|
|
|
2022-04-14 07:18:18 +02:00
|
|
|
with self.settings(TEST_SUITE=False):
|
2021-06-09 00:18:46 +02:00
|
|
|
with self.assertLogs(level="INFO") as info_logs:
|
2019-05-04 17:54:18 +02:00
|
|
|
FetchLinksEmbedData().consume(event)
|
2022-04-14 06:53:08 +02:00
|
|
|
cached_data = cache_get(preview_url_cache_key(url))[0]
|
2020-07-26 03:08:03 +02:00
|
|
|
self.assertTrue(
|
2021-02-12 08:20:45 +01:00
|
|
|
"INFO:root:Time spent on get_link_embed_data for http://test.org/audio.mp3: "
|
2021-02-12 08:19:30 +01:00
|
|
|
in info_logs.output[0]
|
2020-07-26 03:08:03 +02:00
|
|
|
)
|
2019-05-04 17:54:18 +02:00
|
|
|
|
|
|
|
self.assertIsNone(cached_data)
|
|
|
|
msg = Message.objects.select_related("sender").get(id=msg_id)
|
|
|
|
self.assertEqual(
|
2023-01-03 01:51:16 +01:00
|
|
|
'<p><a href="http://test.org/audio.mp3">http://test.org/audio.mp3</a></p>',
|
2021-02-12 08:19:30 +01:00
|
|
|
msg.rendered_content,
|
|
|
|
)
|
2019-05-04 17:54:18 +02:00
|
|
|
|
2021-06-09 00:18:46 +02:00
|
|
|
@responses.activate
|
2019-06-23 13:36:58 +02:00
|
|
|
@override_settings(INLINE_URL_EMBED_PREVIEW=True)
|
|
|
|
def test_link_preview_no_open_graph_image(self) -> None:
|
2021-02-12 08:20:45 +01:00
|
|
|
user = self.example_user("hamlet")
|
2020-03-06 18:40:46 +01:00
|
|
|
self.login_user(user)
|
2021-02-12 08:20:45 +01:00
|
|
|
url = "http://test.org/foo.html"
|
2022-04-14 23:50:10 +02:00
|
|
|
with mock_queue_publish("zerver.actions.message_send.queue_json_publish") as patched:
|
tests: Ensure stream senders get a UserMessage row.
We now complain if a test author sends a stream message
that does not result in the sender getting a
UserMessage row for the message.
This is basically 100% equivalent to complaining that
the author failed to subscribe the sender to the stream
as part of the test setup, as far as I can tell, so the
AssertionError instructs the author to subscribe the
sender to the stream.
We exempt bots from this check, although it is
plausible we should only exempt the system bots like
the notification bot.
I considered auto-subscribing the sender to the stream,
but that can be a little more expensive than the
current check, and we generally want test setup to be
explicit.
If there is some legitimate way than a subscribed human
sender can't get a UserMessage, then we probably want
an explicit test for that, or we may want to change the
backend to just write a UserMessage row in that
hypothetical situation.
For most tests, including almost all the ones fixed
here, the author just wants their test setup to
realistically reflect normal operation, and often devs
may not realize that Cordelia is not subscribed to
Denmark or not realize that Hamlet is not subscribed to
Scotland.
Some of us don't remember our Shakespeare from high
school, and our stream subscriptions don't even
necessarily reflect which countries the Bard placed his
characters in.
There may also be some legitimate use case where an
author wants to simulate sending a message to an
unsubscribed stream, but for those edge cases, they can
always set allow_unsubscribed_sender to True.
2021-12-10 13:55:48 +01:00
|
|
|
msg_id = self.send_stream_message(user, "Denmark", topic_name="foo", content=url)
|
2019-06-23 13:36:58 +02:00
|
|
|
patched.assert_called_once()
|
|
|
|
queue = patched.call_args[0][0]
|
|
|
|
self.assertEqual(queue, "embed_links")
|
|
|
|
event = patched.call_args[0][1]
|
|
|
|
|
|
|
|
# HTML without the og:image metadata
|
2021-02-12 08:20:45 +01:00
|
|
|
html = "\n".join(
|
|
|
|
line for line in self.open_graph_html.splitlines() if "og:image" not in line
|
2021-02-12 08:19:30 +01:00
|
|
|
)
|
2021-06-09 00:18:46 +02:00
|
|
|
self.create_mock_response(url, body=html)
|
2022-04-14 07:18:18 +02:00
|
|
|
with self.settings(TEST_SUITE=False):
|
2021-06-09 00:18:46 +02:00
|
|
|
with self.assertLogs(level="INFO") as info_logs:
|
2022-02-18 22:48:53 +01:00
|
|
|
FetchLinksEmbedData().consume(event)
|
2022-04-14 06:53:08 +02:00
|
|
|
cached_data = cache_get(preview_url_cache_key(url))[0]
|
2022-02-18 22:48:53 +01:00
|
|
|
self.assertTrue(
|
|
|
|
"INFO:root:Time spent on get_link_embed_data for http://test.org/foo.html: "
|
|
|
|
in info_logs.output[0]
|
|
|
|
)
|
|
|
|
|
2022-04-14 21:57:20 +02:00
|
|
|
assert cached_data is not None
|
2022-04-14 21:52:41 +02:00
|
|
|
self.assertIsNotNone(cached_data.title)
|
|
|
|
self.assertIsNone(cached_data.image)
|
2022-02-18 22:48:53 +01:00
|
|
|
msg = Message.objects.select_related("sender").get(id=msg_id)
|
|
|
|
self.assertEqual(
|
2023-01-03 01:51:16 +01:00
|
|
|
'<p><a href="http://test.org/foo.html">http://test.org/foo.html</a></p>',
|
2022-02-18 22:48:53 +01:00
|
|
|
msg.rendered_content,
|
|
|
|
)
|
|
|
|
|
|
|
|
@responses.activate
|
|
|
|
@override_settings(INLINE_URL_EMBED_PREVIEW=True)
|
|
|
|
def test_link_preview_open_graph_image_bad_url(self) -> None:
|
|
|
|
user = self.example_user("hamlet")
|
|
|
|
self.login_user(user)
|
|
|
|
url = "http://test.org/foo.html"
|
2022-04-14 23:50:10 +02:00
|
|
|
with mock_queue_publish("zerver.actions.message_send.queue_json_publish") as patched:
|
2022-02-18 22:48:53 +01:00
|
|
|
msg_id = self.send_stream_message(user, "Denmark", topic_name="foo", content=url)
|
|
|
|
patched.assert_called_once()
|
|
|
|
queue = patched.call_args[0][0]
|
|
|
|
self.assertEqual(queue, "embed_links")
|
|
|
|
event = patched.call_args[0][1]
|
|
|
|
|
|
|
|
# HTML with a bad og:image metadata
|
|
|
|
html = "\n".join(
|
2024-01-29 00:32:21 +01:00
|
|
|
(
|
|
|
|
line
|
|
|
|
if "og:image" not in line
|
|
|
|
else '<meta property="og:image" content="http://[bad url/" />'
|
|
|
|
)
|
2022-02-18 22:48:53 +01:00
|
|
|
for line in self.open_graph_html.splitlines()
|
|
|
|
)
|
|
|
|
self.create_mock_response(url, body=html)
|
2022-04-14 07:18:18 +02:00
|
|
|
with self.settings(TEST_SUITE=False):
|
2022-02-18 22:48:53 +01:00
|
|
|
with self.assertLogs(level="INFO") as info_logs:
|
2019-06-23 13:36:58 +02:00
|
|
|
FetchLinksEmbedData().consume(event)
|
2022-04-14 06:53:08 +02:00
|
|
|
cached_data = cache_get(preview_url_cache_key(url))[0]
|
2020-07-26 03:08:03 +02:00
|
|
|
self.assertTrue(
|
2021-02-12 08:20:45 +01:00
|
|
|
"INFO:root:Time spent on get_link_embed_data for http://test.org/foo.html: "
|
2021-02-12 08:19:30 +01:00
|
|
|
in info_logs.output[0]
|
2020-07-26 03:08:03 +02:00
|
|
|
)
|
2019-06-23 13:36:58 +02:00
|
|
|
|
2022-04-14 21:57:20 +02:00
|
|
|
assert cached_data is not None
|
2022-04-14 21:52:41 +02:00
|
|
|
self.assertIsNotNone(cached_data.title)
|
|
|
|
self.assertIsNone(cached_data.image)
|
2019-06-23 13:36:58 +02:00
|
|
|
msg = Message.objects.select_related("sender").get(id=msg_id)
|
|
|
|
self.assertEqual(
|
2023-01-03 01:51:16 +01:00
|
|
|
'<p><a href="http://test.org/foo.html">http://test.org/foo.html</a></p>',
|
2021-02-12 08:19:30 +01:00
|
|
|
msg.rendered_content,
|
|
|
|
)
|
2019-06-23 13:36:58 +02:00
|
|
|
|
2021-06-09 00:18:46 +02:00
|
|
|
@responses.activate
|
2019-05-04 17:54:18 +02:00
|
|
|
@override_settings(INLINE_URL_EMBED_PREVIEW=True)
|
2019-12-12 02:10:50 +01:00
|
|
|
def test_link_preview_open_graph_image_missing_content(self) -> None:
|
2021-02-12 08:20:45 +01:00
|
|
|
user = self.example_user("hamlet")
|
2020-03-06 18:40:46 +01:00
|
|
|
self.login_user(user)
|
2021-02-12 08:20:45 +01:00
|
|
|
url = "http://test.org/foo.html"
|
2022-04-14 23:50:10 +02:00
|
|
|
with mock_queue_publish("zerver.actions.message_send.queue_json_publish") as patched:
|
tests: Ensure stream senders get a UserMessage row.
We now complain if a test author sends a stream message
that does not result in the sender getting a
UserMessage row for the message.
This is basically 100% equivalent to complaining that
the author failed to subscribe the sender to the stream
as part of the test setup, as far as I can tell, so the
AssertionError instructs the author to subscribe the
sender to the stream.
We exempt bots from this check, although it is
plausible we should only exempt the system bots like
the notification bot.
I considered auto-subscribing the sender to the stream,
but that can be a little more expensive than the
current check, and we generally want test setup to be
explicit.
If there is some legitimate way than a subscribed human
sender can't get a UserMessage, then we probably want
an explicit test for that, or we may want to change the
backend to just write a UserMessage row in that
hypothetical situation.
For most tests, including almost all the ones fixed
here, the author just wants their test setup to
realistically reflect normal operation, and often devs
may not realize that Cordelia is not subscribed to
Denmark or not realize that Hamlet is not subscribed to
Scotland.
Some of us don't remember our Shakespeare from high
school, and our stream subscriptions don't even
necessarily reflect which countries the Bard placed his
characters in.
There may also be some legitimate use case where an
author wants to simulate sending a message to an
unsubscribed stream, but for those edge cases, they can
always set allow_unsubscribed_sender to True.
2021-12-10 13:55:48 +01:00
|
|
|
msg_id = self.send_stream_message(user, "Denmark", topic_name="foo", content=url)
|
2019-12-12 02:10:50 +01:00
|
|
|
patched.assert_called_once()
|
|
|
|
queue = patched.call_args[0][0]
|
|
|
|
self.assertEqual(queue, "embed_links")
|
|
|
|
event = patched.call_args[0][1]
|
|
|
|
|
|
|
|
# HTML without the og:image metadata
|
2021-02-12 08:20:45 +01:00
|
|
|
html = "\n".join(
|
|
|
|
line if "og:image" not in line else '<meta property="og:image"/>'
|
2021-02-12 08:19:30 +01:00
|
|
|
for line in self.open_graph_html.splitlines()
|
|
|
|
)
|
2021-06-09 00:18:46 +02:00
|
|
|
self.create_mock_response(url, body=html)
|
2022-04-14 07:18:18 +02:00
|
|
|
with self.settings(TEST_SUITE=False):
|
2021-06-09 00:18:46 +02:00
|
|
|
with self.assertLogs(level="INFO") as info_logs:
|
2019-12-12 02:10:50 +01:00
|
|
|
FetchLinksEmbedData().consume(event)
|
2022-04-14 06:53:08 +02:00
|
|
|
cached_data = cache_get(preview_url_cache_key(url))[0]
|
2020-07-26 03:08:03 +02:00
|
|
|
self.assertTrue(
|
2021-02-12 08:20:45 +01:00
|
|
|
"INFO:root:Time spent on get_link_embed_data for http://test.org/foo.html: "
|
2021-02-12 08:19:30 +01:00
|
|
|
in info_logs.output[0]
|
2020-07-26 03:08:03 +02:00
|
|
|
)
|
2019-12-12 02:10:50 +01:00
|
|
|
|
2022-04-14 21:57:20 +02:00
|
|
|
assert cached_data is not None
|
2022-04-14 21:52:41 +02:00
|
|
|
self.assertIsNotNone(cached_data.title)
|
|
|
|
self.assertIsNone(cached_data.image)
|
2019-12-12 02:10:50 +01:00
|
|
|
msg = Message.objects.select_related("sender").get(id=msg_id)
|
|
|
|
self.assertEqual(
|
2023-01-03 01:51:16 +01:00
|
|
|
'<p><a href="http://test.org/foo.html">http://test.org/foo.html</a></p>',
|
2021-02-12 08:19:30 +01:00
|
|
|
msg.rendered_content,
|
|
|
|
)
|
2019-12-12 02:10:50 +01:00
|
|
|
|
2021-06-09 00:18:46 +02:00
|
|
|
@responses.activate
|
2021-10-21 02:48:28 +02:00
|
|
|
@override_settings(CAMO_URI="")
|
2019-12-12 02:10:50 +01:00
|
|
|
@override_settings(INLINE_URL_EMBED_PREVIEW=True)
|
2019-05-04 17:54:18 +02:00
|
|
|
def test_link_preview_no_content_type_header(self) -> None:
|
2021-02-12 08:20:45 +01:00
|
|
|
user = self.example_user("hamlet")
|
2020-03-06 18:40:46 +01:00
|
|
|
self.login_user(user)
|
2021-02-12 08:20:45 +01:00
|
|
|
url = "http://test.org/"
|
2022-04-14 23:50:10 +02:00
|
|
|
with mock_queue_publish("zerver.actions.message_send.queue_json_publish") as patched:
|
tests: Ensure stream senders get a UserMessage row.
We now complain if a test author sends a stream message
that does not result in the sender getting a
UserMessage row for the message.
This is basically 100% equivalent to complaining that
the author failed to subscribe the sender to the stream
as part of the test setup, as far as I can tell, so the
AssertionError instructs the author to subscribe the
sender to the stream.
We exempt bots from this check, although it is
plausible we should only exempt the system bots like
the notification bot.
I considered auto-subscribing the sender to the stream,
but that can be a little more expensive than the
current check, and we generally want test setup to be
explicit.
If there is some legitimate way than a subscribed human
sender can't get a UserMessage, then we probably want
an explicit test for that, or we may want to change the
backend to just write a UserMessage row in that
hypothetical situation.
For most tests, including almost all the ones fixed
here, the author just wants their test setup to
realistically reflect normal operation, and often devs
may not realize that Cordelia is not subscribed to
Denmark or not realize that Hamlet is not subscribed to
Scotland.
Some of us don't remember our Shakespeare from high
school, and our stream subscriptions don't even
necessarily reflect which countries the Bard placed his
characters in.
There may also be some legitimate use case where an
author wants to simulate sending a message to an
unsubscribed stream, but for those edge cases, they can
always set allow_unsubscribed_sender to True.
2021-12-10 13:55:48 +01:00
|
|
|
msg_id = self.send_stream_message(user, "Denmark", topic_name="foo", content=url)
|
2019-05-04 17:54:18 +02:00
|
|
|
patched.assert_called_once()
|
|
|
|
queue = patched.call_args[0][0]
|
|
|
|
self.assertEqual(queue, "embed_links")
|
|
|
|
event = patched.call_args[0][1]
|
|
|
|
|
2021-06-09 00:18:46 +02:00
|
|
|
self.create_mock_response(url)
|
2022-04-14 07:18:18 +02:00
|
|
|
with self.settings(TEST_SUITE=False):
|
2021-06-09 00:18:46 +02:00
|
|
|
with self.assertLogs(level="INFO") as info_logs:
|
2019-05-04 17:54:18 +02:00
|
|
|
FetchLinksEmbedData().consume(event)
|
2022-04-14 06:53:08 +02:00
|
|
|
cached_data = cache_get(preview_url_cache_key(url))[0]
|
2020-07-26 03:08:03 +02:00
|
|
|
self.assertTrue(
|
2021-02-12 08:20:45 +01:00
|
|
|
"INFO:root:Time spent on get_link_embed_data for http://test.org/: "
|
2021-02-12 08:19:30 +01:00
|
|
|
in info_logs.output[0]
|
2020-07-26 03:08:03 +02:00
|
|
|
)
|
2019-05-04 17:54:18 +02:00
|
|
|
|
2022-04-14 21:57:20 +02:00
|
|
|
assert cached_data is not None
|
2019-05-04 17:54:18 +02:00
|
|
|
msg = Message.objects.select_related("sender").get(id=msg_id)
|
2022-05-31 01:34:34 +02:00
|
|
|
assert msg.rendered_content is not None
|
2022-04-14 21:57:20 +02:00
|
|
|
self.assertIn(cached_data.title, msg.rendered_content)
|
|
|
|
assert cached_data.image is not None
|
|
|
|
self.assertIn(re.sub(r"([^\w-])", r"\\\1", cached_data.image), msg.rendered_content)
|
2019-05-04 17:54:18 +02:00
|
|
|
|
2021-06-09 00:18:46 +02:00
|
|
|
@responses.activate
|
2019-05-04 17:54:18 +02:00
|
|
|
@override_settings(INLINE_URL_EMBED_PREVIEW=True)
|
|
|
|
def test_valid_content_type_error_get_data(self) -> None:
|
2021-02-12 08:20:45 +01:00
|
|
|
url = "http://test.org/"
|
2022-04-14 23:50:10 +02:00
|
|
|
with mock_queue_publish("zerver.actions.message_send.queue_json_publish"):
|
2019-05-04 17:54:18 +02:00
|
|
|
msg_id = self.send_personal_message(
|
2021-02-12 08:20:45 +01:00
|
|
|
self.example_user("hamlet"),
|
|
|
|
self.example_user("cordelia"),
|
2019-05-04 17:54:18 +02:00
|
|
|
content=url,
|
|
|
|
)
|
|
|
|
msg = Message.objects.select_related("sender").get(id=msg_id)
|
|
|
|
event = {
|
2021-02-12 08:20:45 +01:00
|
|
|
"message_id": msg_id,
|
|
|
|
"urls": [url],
|
|
|
|
"message_realm_id": msg.sender.realm_id,
|
|
|
|
"message_content": url,
|
2021-02-12 08:19:30 +01:00
|
|
|
}
|
|
|
|
|
2021-06-09 00:18:46 +02:00
|
|
|
self.create_mock_response(url, body=ConnectionError())
|
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
with mock.patch(
|
2021-02-12 08:20:45 +01:00
|
|
|
"zerver.lib.url_preview.preview.get_oembed_data",
|
2021-02-12 08:19:30 +01:00
|
|
|
side_effect=lambda *args, **kwargs: None,
|
|
|
|
):
|
|
|
|
with mock.patch(
|
2021-02-12 08:20:45 +01:00
|
|
|
"zerver.lib.url_preview.preview.valid_content_type", side_effect=lambda k: True
|
2021-02-12 08:19:30 +01:00
|
|
|
):
|
2022-04-14 07:18:18 +02:00
|
|
|
with self.settings(TEST_SUITE=False):
|
2021-06-09 00:18:46 +02:00
|
|
|
with self.assertLogs(level="INFO") as info_logs:
|
2019-05-07 13:32:44 +02:00
|
|
|
FetchLinksEmbedData().consume(event)
|
2020-07-26 03:08:03 +02:00
|
|
|
self.assertTrue(
|
2021-02-12 08:20:45 +01:00
|
|
|
"INFO:root:Time spent on get_link_embed_data for http://test.org/: "
|
2021-02-12 08:19:30 +01:00
|
|
|
in info_logs.output[0]
|
2020-07-26 03:08:03 +02:00
|
|
|
)
|
2019-05-04 17:54:18 +02:00
|
|
|
|
2022-04-14 21:57:20 +02:00
|
|
|
# This did not get cached -- hence the lack of [0] on the cache_get
|
2022-04-14 06:53:08 +02:00
|
|
|
cached_data = cache_get(preview_url_cache_key(url))
|
2022-04-14 21:57:20 +02:00
|
|
|
self.assertIsNone(cached_data)
|
2019-05-10 14:29:33 +02:00
|
|
|
|
2019-05-04 17:54:18 +02:00
|
|
|
msg.refresh_from_db()
|
|
|
|
self.assertEqual(
|
2021-02-12 08:19:30 +01:00
|
|
|
'<p><a href="http://test.org/">http://test.org/</a></p>', msg.rendered_content
|
|
|
|
)
|
2019-05-04 17:54:18 +02:00
|
|
|
|
2021-06-09 00:18:46 +02:00
|
|
|
@responses.activate
|
2019-05-04 17:54:18 +02:00
|
|
|
@override_settings(INLINE_URL_EMBED_PREVIEW=True)
|
|
|
|
def test_invalid_url(self) -> None:
|
2021-02-12 08:20:45 +01:00
|
|
|
url = "http://test.org/"
|
|
|
|
error_url = "http://test.org/x"
|
2022-04-14 23:50:10 +02:00
|
|
|
with mock_queue_publish("zerver.actions.message_send.queue_json_publish"):
|
2019-05-04 17:54:18 +02:00
|
|
|
msg_id = self.send_personal_message(
|
2021-02-12 08:20:45 +01:00
|
|
|
self.example_user("hamlet"),
|
|
|
|
self.example_user("cordelia"),
|
2019-05-04 17:54:18 +02:00
|
|
|
content=error_url,
|
|
|
|
)
|
|
|
|
msg = Message.objects.select_related("sender").get(id=msg_id)
|
|
|
|
event = {
|
2021-02-12 08:20:45 +01:00
|
|
|
"message_id": msg_id,
|
|
|
|
"urls": [error_url],
|
|
|
|
"message_realm_id": msg.sender.realm_id,
|
|
|
|
"message_content": error_url,
|
2021-02-12 08:19:30 +01:00
|
|
|
}
|
2019-05-04 17:54:18 +02:00
|
|
|
|
2021-06-09 00:18:46 +02:00
|
|
|
self.create_mock_response(error_url, status=404)
|
2022-04-14 07:18:18 +02:00
|
|
|
with self.settings(TEST_SUITE=False):
|
2021-06-09 00:18:46 +02:00
|
|
|
with self.assertLogs(level="INFO") as info_logs:
|
2019-05-04 17:54:18 +02:00
|
|
|
FetchLinksEmbedData().consume(event)
|
2020-07-26 03:08:03 +02:00
|
|
|
self.assertTrue(
|
2021-02-12 08:20:45 +01:00
|
|
|
"INFO:root:Time spent on get_link_embed_data for http://test.org/x: "
|
2021-02-12 08:19:30 +01:00
|
|
|
in info_logs.output[0]
|
2020-07-26 03:08:03 +02:00
|
|
|
)
|
2019-05-04 17:54:18 +02:00
|
|
|
|
2022-04-14 21:57:20 +02:00
|
|
|
# FIXME: Should we really cache this, especially without cache invalidation?
|
2022-04-14 06:53:08 +02:00
|
|
|
cached_data = cache_get(preview_url_cache_key(error_url))[0]
|
2022-04-14 21:57:20 +02:00
|
|
|
|
2019-05-04 17:54:18 +02:00
|
|
|
self.assertIsNone(cached_data)
|
|
|
|
msg.refresh_from_db()
|
|
|
|
self.assertEqual(
|
2021-02-12 08:19:30 +01:00
|
|
|
'<p><a href="http://test.org/x">http://test.org/x</a></p>', msg.rendered_content
|
|
|
|
)
|
2021-06-09 00:18:46 +02:00
|
|
|
self.assertTrue(responses.assert_call_count(url, 0))
|
2019-05-02 18:58:39 +02:00
|
|
|
|
2021-06-09 00:18:46 +02:00
|
|
|
@responses.activate
|
2019-05-02 18:58:39 +02:00
|
|
|
@override_settings(INLINE_URL_EMBED_PREVIEW=True)
|
|
|
|
def test_safe_oembed_html_url(self) -> None:
|
2021-02-12 08:20:45 +01:00
|
|
|
url = "http://test.org/"
|
2022-04-14 23:50:10 +02:00
|
|
|
with mock_queue_publish("zerver.actions.message_send.queue_json_publish"):
|
2019-05-02 18:58:39 +02:00
|
|
|
msg_id = self.send_personal_message(
|
2021-02-12 08:20:45 +01:00
|
|
|
self.example_user("hamlet"),
|
|
|
|
self.example_user("cordelia"),
|
2019-05-02 18:58:39 +02:00
|
|
|
content=url,
|
|
|
|
)
|
|
|
|
msg = Message.objects.select_related("sender").get(id=msg_id)
|
|
|
|
event = {
|
2021-02-12 08:20:45 +01:00
|
|
|
"message_id": msg_id,
|
|
|
|
"urls": [url],
|
|
|
|
"message_realm_id": msg.sender.realm_id,
|
|
|
|
"message_content": url,
|
2021-02-12 08:19:30 +01:00
|
|
|
}
|
2019-05-02 18:58:39 +02:00
|
|
|
|
2022-04-14 21:52:41 +02:00
|
|
|
mocked_data = UrlOEmbedData(
|
|
|
|
html=f'<iframe src="{url}"></iframe>',
|
|
|
|
type="video",
|
|
|
|
image=f"{url}/image.png",
|
|
|
|
)
|
2021-06-09 00:18:46 +02:00
|
|
|
self.create_mock_response(url)
|
2022-04-14 07:18:18 +02:00
|
|
|
with self.settings(TEST_SUITE=False):
|
2021-06-09 00:18:46 +02:00
|
|
|
with self.assertLogs(level="INFO") as info_logs:
|
2021-02-12 08:19:30 +01:00
|
|
|
with mock.patch(
|
2021-02-12 08:20:45 +01:00
|
|
|
"zerver.lib.url_preview.preview.get_oembed_data",
|
2021-02-12 08:19:30 +01:00
|
|
|
lambda *args, **kwargs: mocked_data,
|
|
|
|
):
|
2019-05-02 18:58:39 +02:00
|
|
|
FetchLinksEmbedData().consume(event)
|
2022-04-14 06:53:08 +02:00
|
|
|
cached_data = cache_get(preview_url_cache_key(url))[0]
|
2020-07-26 03:08:03 +02:00
|
|
|
self.assertTrue(
|
2021-02-12 08:20:45 +01:00
|
|
|
"INFO:root:Time spent on get_link_embed_data for http://test.org/: "
|
2021-02-12 08:19:30 +01:00
|
|
|
in info_logs.output[0]
|
2020-07-26 03:08:03 +02:00
|
|
|
)
|
2019-05-02 18:58:39 +02:00
|
|
|
|
2022-04-14 21:57:20 +02:00
|
|
|
self.assertEqual(cached_data, mocked_data)
|
2019-05-02 18:58:39 +02:00
|
|
|
msg.refresh_from_db()
|
2022-05-31 01:34:34 +02:00
|
|
|
assert msg.rendered_content is not None
|
2022-04-27 01:44:58 +02:00
|
|
|
self.assertIn(f'a data-id="{escape(mocked_data.html)}"', msg.rendered_content)
|
2019-03-21 21:08:26 +01:00
|
|
|
|
2021-06-09 00:18:46 +02:00
|
|
|
@responses.activate
|
2019-03-21 21:08:26 +01:00
|
|
|
@override_settings(INLINE_URL_EMBED_PREVIEW=True)
|
|
|
|
def test_youtube_url_title_replaces_url(self) -> None:
|
2021-02-12 08:20:45 +01:00
|
|
|
url = "https://www.youtube.com/watch?v=eSJTXC7Ixgg"
|
2022-04-14 23:50:10 +02:00
|
|
|
with mock_queue_publish("zerver.actions.message_send.queue_json_publish"):
|
2019-03-21 21:08:26 +01:00
|
|
|
msg_id = self.send_personal_message(
|
2021-02-12 08:20:45 +01:00
|
|
|
self.example_user("hamlet"),
|
|
|
|
self.example_user("cordelia"),
|
2019-03-21 21:08:26 +01:00
|
|
|
content=url,
|
|
|
|
)
|
|
|
|
msg = Message.objects.select_related("sender").get(id=msg_id)
|
|
|
|
event = {
|
2021-02-12 08:20:45 +01:00
|
|
|
"message_id": msg_id,
|
|
|
|
"urls": [url],
|
|
|
|
"message_realm_id": msg.sender.realm_id,
|
|
|
|
"message_content": url,
|
2021-02-12 08:19:30 +01:00
|
|
|
}
|
2019-03-21 21:08:26 +01:00
|
|
|
|
2022-04-14 21:52:41 +02:00
|
|
|
mocked_data = UrlEmbedData(
|
|
|
|
title="Clearer Code at Scale - Static Types at Zulip and Dropbox"
|
|
|
|
)
|
2021-06-09 00:18:46 +02:00
|
|
|
self.create_mock_response(url)
|
2022-04-14 07:18:18 +02:00
|
|
|
with self.settings(TEST_SUITE=False):
|
2021-06-09 00:18:46 +02:00
|
|
|
with self.assertLogs(level="INFO") as info_logs:
|
2021-02-12 08:19:30 +01:00
|
|
|
with mock.patch(
|
2022-04-14 21:57:20 +02:00
|
|
|
"zerver.worker.queue_processors.url_preview.get_link_embed_data",
|
2021-02-12 08:19:30 +01:00
|
|
|
lambda *args, **kwargs: mocked_data,
|
|
|
|
):
|
2019-03-21 21:08:26 +01:00
|
|
|
FetchLinksEmbedData().consume(event)
|
2020-07-26 03:08:03 +02:00
|
|
|
self.assertTrue(
|
2021-02-12 08:20:45 +01:00
|
|
|
"INFO:root:Time spent on get_link_embed_data for https://www.youtube.com/watch?v=eSJTXC7Ixgg:"
|
2021-02-12 08:19:30 +01:00
|
|
|
in info_logs.output[0]
|
2020-07-26 03:08:03 +02:00
|
|
|
)
|
2019-03-21 21:08:26 +01:00
|
|
|
|
|
|
|
msg.refresh_from_db()
|
2021-03-23 10:34:55 +01:00
|
|
|
expected_content = f"""<p><a href="https://www.youtube.com/watch?v=eSJTXC7Ixgg">YouTube - Clearer Code at Scale - Static Types at Zulip and Dropbox</a></p>\n<div class="youtube-video message_inline_image"><a data-id="eSJTXC7Ixgg" href="https://www.youtube.com/watch?v=eSJTXC7Ixgg"><img src="{get_camo_url("https://i.ytimg.com/vi/eSJTXC7Ixgg/default.jpg")}"></a></div>"""
|
2019-03-21 21:08:26 +01:00
|
|
|
self.assertEqual(expected_content, msg.rendered_content)
|
2020-09-09 07:38:17 +02:00
|
|
|
|
2021-06-09 00:18:46 +02:00
|
|
|
@responses.activate
|
2020-09-09 07:38:17 +02:00
|
|
|
@override_settings(INLINE_URL_EMBED_PREVIEW=True)
|
|
|
|
def test_custom_title_replaces_youtube_url_title(self) -> None:
|
2021-02-12 08:20:45 +01:00
|
|
|
url = "[YouTube link](https://www.youtube.com/watch?v=eSJTXC7Ixgg)"
|
2022-04-14 23:50:10 +02:00
|
|
|
with mock_queue_publish("zerver.actions.message_send.queue_json_publish"):
|
2020-09-09 07:38:17 +02:00
|
|
|
msg_id = self.send_personal_message(
|
2021-02-12 08:20:45 +01:00
|
|
|
self.example_user("hamlet"),
|
|
|
|
self.example_user("cordelia"),
|
2020-09-09 07:38:17 +02:00
|
|
|
content=url,
|
|
|
|
)
|
|
|
|
msg = Message.objects.select_related("sender").get(id=msg_id)
|
|
|
|
event = {
|
2021-02-12 08:20:45 +01:00
|
|
|
"message_id": msg_id,
|
|
|
|
"urls": [url],
|
|
|
|
"message_realm_id": msg.sender.realm_id,
|
|
|
|
"message_content": url,
|
2021-02-12 08:19:30 +01:00
|
|
|
}
|
2020-09-09 07:38:17 +02:00
|
|
|
|
2022-04-14 21:52:41 +02:00
|
|
|
mocked_data = UrlEmbedData(
|
|
|
|
title="Clearer Code at Scale - Static Types at Zulip and Dropbox"
|
|
|
|
)
|
2021-06-09 00:18:46 +02:00
|
|
|
self.create_mock_response(url)
|
2022-04-14 07:18:18 +02:00
|
|
|
with self.settings(TEST_SUITE=False):
|
2021-06-09 00:18:46 +02:00
|
|
|
with self.assertLogs(level="INFO") as info_logs:
|
2021-02-12 08:19:30 +01:00
|
|
|
with mock.patch(
|
2022-04-14 21:57:20 +02:00
|
|
|
"zerver.worker.queue_processors.url_preview.get_link_embed_data",
|
2021-02-12 08:19:30 +01:00
|
|
|
lambda *args, **kwargs: mocked_data,
|
|
|
|
):
|
2020-09-09 07:38:17 +02:00
|
|
|
FetchLinksEmbedData().consume(event)
|
|
|
|
self.assertTrue(
|
2021-02-12 08:20:45 +01:00
|
|
|
"INFO:root:Time spent on get_link_embed_data for [YouTube link](https://www.youtube.com/watch?v=eSJTXC7Ixgg):"
|
2021-02-12 08:19:30 +01:00
|
|
|
in info_logs.output[0]
|
2020-09-09 07:38:17 +02:00
|
|
|
)
|
|
|
|
|
|
|
|
msg.refresh_from_db()
|
2021-03-23 10:34:55 +01:00
|
|
|
expected_content = f"""<p><a href="https://www.youtube.com/watch?v=eSJTXC7Ixgg">YouTube link</a></p>\n<div class="youtube-video message_inline_image"><a data-id="eSJTXC7Ixgg" href="https://www.youtube.com/watch?v=eSJTXC7Ixgg"><img src="{get_camo_url("https://i.ytimg.com/vi/eSJTXC7Ixgg/default.jpg")}"></a></div>"""
|
2020-09-09 07:38:17 +02:00
|
|
|
self.assertEqual(expected_content, msg.rendered_content)
|