2021-06-09 00:18:46 +02:00
|
|
|
from collections import OrderedDict
|
|
|
|
from typing import Any, Optional, Union
|
2020-05-26 07:16:25 +02:00
|
|
|
from unittest import mock
|
2021-06-09 00:18:46 +02:00
|
|
|
from urllib.parse import parse_qsl, urlencode, urlsplit, urlunsplit
|
2020-06-11 00:54:34 +02:00
|
|
|
|
2021-06-09 00:18:46 +02:00
|
|
|
import responses
|
2016-10-27 12:06:44 +02:00
|
|
|
from django.test import override_settings
|
2019-05-02 18:58:39 +02:00
|
|
|
from django.utils.html import escape
|
2021-06-09 00:18:46 +02:00
|
|
|
from pyoembed.providers import get_provider
|
2020-06-11 00:54:34 +02:00
|
|
|
from requests.exceptions import ConnectionError
|
2016-10-27 12:06:44 +02:00
|
|
|
|
2020-06-11 00:54:34 +02:00
|
|
|
from zerver.lib.cache import NotFoundInCache, cache_set, preview_url_cache_key
|
2021-03-23 10:34:55 +01:00
|
|
|
from zerver.lib.camo import get_camo_url
|
2021-07-16 22:11:10 +02:00
|
|
|
from zerver.lib.queue import queue_json_publish
|
2016-10-27 12:06:44 +02:00
|
|
|
from zerver.lib.test_classes import ZulipTestCase
|
2021-06-09 00:18:46 +02:00
|
|
|
from zerver.lib.test_helpers import mock_queue_publish
|
2019-12-12 09:39:41 +01:00
|
|
|
from zerver.lib.url_preview.oembed import get_oembed_data, strip_cdata
|
2020-06-11 00:54:34 +02:00
|
|
|
from zerver.lib.url_preview.parsers import GenericParser, OpenGraphParser
|
|
|
|
from zerver.lib.url_preview.preview import get_link_embed_data, link_embed_data_from_cache
|
|
|
|
from zerver.models import Message, Realm, UserProfile
|
|
|
|
from zerver.worker.queue_processors import FetchLinksEmbedData
|
2016-10-27 12:06:44 +02:00
|
|
|
|
2016-12-13 04:20:33 +01:00
|
|
|
TEST_CACHES = {
|
2021-02-12 08:20:45 +01:00
|
|
|
"default": {
|
|
|
|
"BACKEND": "django.core.cache.backends.locmem.LocMemCache",
|
|
|
|
"LOCATION": "default",
|
2016-12-13 04:20:33 +01:00
|
|
|
},
|
2021-02-12 08:20:45 +01:00
|
|
|
"database": {
|
|
|
|
"BACKEND": "django.core.cache.backends.locmem.LocMemCache",
|
|
|
|
"LOCATION": "url-preview",
|
2019-05-02 17:14:08 +02:00
|
|
|
},
|
2021-02-12 08:20:45 +01:00
|
|
|
"in-memory": {
|
|
|
|
"BACKEND": "django.core.cache.backends.locmem.LocMemCache",
|
|
|
|
"LOCATION": "url-preview",
|
2019-05-02 17:14:08 +02:00
|
|
|
},
|
2016-12-13 04:20:33 +01:00
|
|
|
}
|
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2021-06-09 00:18:46 +02:00
|
|
|
def reconstruct_url(url: str, maxwidth: int = 640, maxheight: int = 480) -> str:
|
|
|
|
# The following code is taken from
|
|
|
|
# https://github.com/rafaelmartins/pyoembed/blob/master/pyoembed/__init__.py.
|
|
|
|
# This is a helper function which will be indirectly use to mock the HTTP responses.
|
|
|
|
provider = get_provider(str(url))
|
|
|
|
oembed_url = provider.oembed_url(url)
|
|
|
|
scheme, netloc, path, query_string, fragment = urlsplit(oembed_url)
|
|
|
|
|
|
|
|
query_params = OrderedDict(parse_qsl(query_string))
|
|
|
|
query_params["maxwidth"] = str(maxwidth)
|
|
|
|
query_params["maxheight"] = str(maxheight)
|
|
|
|
final_url = urlunsplit((scheme, netloc, path, urlencode(query_params, True), fragment))
|
|
|
|
return final_url
|
|
|
|
|
|
|
|
|
2016-10-27 12:06:44 +02:00
|
|
|
@override_settings(INLINE_URL_EMBED_PREVIEW=True)
|
|
|
|
class OembedTestCase(ZulipTestCase):
|
2021-06-09 00:18:46 +02:00
|
|
|
@responses.activate
|
|
|
|
def test_present_provider(self) -> None:
|
2016-10-27 12:06:44 +02:00
|
|
|
response_data = {
|
2021-02-12 08:20:45 +01:00
|
|
|
"type": "rich",
|
|
|
|
"thumbnail_url": "https://scontent.cdninstagram.com/t51.2885-15/n.jpg",
|
|
|
|
"thumbnail_width": 640,
|
|
|
|
"thumbnail_height": 426,
|
|
|
|
"title": "NASA",
|
|
|
|
"html": "<p>test</p>",
|
|
|
|
"version": "1.0",
|
|
|
|
"width": 658,
|
|
|
|
"height": 400,
|
2021-02-12 08:19:30 +01:00
|
|
|
}
|
2021-02-12 08:20:45 +01:00
|
|
|
url = "http://instagram.com/p/BLtI2WdAymy"
|
2021-06-09 00:18:46 +02:00
|
|
|
reconstructed_url = reconstruct_url(url)
|
|
|
|
responses.add(
|
|
|
|
responses.GET,
|
|
|
|
reconstructed_url,
|
|
|
|
json=response_data,
|
|
|
|
status=200,
|
|
|
|
)
|
|
|
|
|
2016-10-27 12:06:44 +02:00
|
|
|
data = get_oembed_data(url)
|
|
|
|
self.assertIsInstance(data, dict)
|
2021-02-12 08:20:45 +01:00
|
|
|
self.assertIn("title", data)
|
2018-06-16 22:59:39 +02:00
|
|
|
assert data is not None # allow mypy to infer data is indexable
|
2021-02-12 08:20:45 +01:00
|
|
|
self.assertEqual(data["title"], response_data["title"])
|
2016-10-27 12:06:44 +02:00
|
|
|
|
2021-06-09 00:18:46 +02:00
|
|
|
@responses.activate
|
|
|
|
def test_photo_provider(self) -> None:
|
2019-05-26 06:27:01 +02:00
|
|
|
response_data = {
|
2021-02-12 08:20:45 +01:00
|
|
|
"type": "photo",
|
|
|
|
"thumbnail_url": "https://scontent.cdninstagram.com/t51.2885-15/n.jpg",
|
|
|
|
"url": "https://scontent.cdninstagram.com/t51.2885-15/n.jpg",
|
|
|
|
"thumbnail_width": 640,
|
|
|
|
"thumbnail_height": 426,
|
|
|
|
"title": "NASA",
|
|
|
|
"html": "<p>test</p>",
|
|
|
|
"version": "1.0",
|
|
|
|
"width": 658,
|
|
|
|
"height": 400,
|
2021-02-12 08:19:30 +01:00
|
|
|
}
|
2021-05-07 03:54:25 +02:00
|
|
|
# pyoembed.providers.imgur only works with http:// URLs, not https:// (!)
|
2021-02-12 08:20:45 +01:00
|
|
|
url = "http://imgur.com/photo/158727223"
|
2021-06-09 00:18:46 +02:00
|
|
|
reconstructed_url = reconstruct_url(url)
|
|
|
|
responses.add(
|
|
|
|
responses.GET,
|
|
|
|
reconstructed_url,
|
|
|
|
json=response_data,
|
|
|
|
status=200,
|
|
|
|
)
|
|
|
|
|
2019-05-26 06:27:01 +02:00
|
|
|
data = get_oembed_data(url)
|
|
|
|
self.assertIsInstance(data, dict)
|
2021-02-12 08:20:45 +01:00
|
|
|
self.assertIn("title", data)
|
2019-05-26 06:27:01 +02:00
|
|
|
assert data is not None # allow mypy to infer data is indexable
|
2021-02-12 08:20:45 +01:00
|
|
|
self.assertEqual(data["title"], response_data["title"])
|
|
|
|
self.assertTrue(data["oembed"])
|
2019-05-26 06:27:01 +02:00
|
|
|
|
2021-06-09 00:18:46 +02:00
|
|
|
@responses.activate
|
|
|
|
def test_video_provider(self) -> None:
|
2019-05-02 18:58:39 +02:00
|
|
|
response_data = {
|
2021-02-12 08:20:45 +01:00
|
|
|
"type": "video",
|
|
|
|
"thumbnail_url": "https://scontent.cdninstagram.com/t51.2885-15/n.jpg",
|
|
|
|
"thumbnail_width": 640,
|
|
|
|
"thumbnail_height": 426,
|
|
|
|
"title": "NASA",
|
|
|
|
"html": "<p>test</p>",
|
|
|
|
"version": "1.0",
|
|
|
|
"width": 658,
|
|
|
|
"height": 400,
|
2021-02-12 08:19:30 +01:00
|
|
|
}
|
2021-02-12 08:20:45 +01:00
|
|
|
url = "http://blip.tv/video/158727223"
|
2021-06-09 00:18:46 +02:00
|
|
|
reconstructed_url = reconstruct_url(url)
|
|
|
|
responses.add(
|
|
|
|
responses.GET,
|
|
|
|
reconstructed_url,
|
|
|
|
json=response_data,
|
|
|
|
status=200,
|
|
|
|
)
|
|
|
|
|
2019-05-02 18:58:39 +02:00
|
|
|
data = get_oembed_data(url)
|
|
|
|
self.assertIsInstance(data, dict)
|
2021-02-12 08:20:45 +01:00
|
|
|
self.assertIn("title", data)
|
2019-05-02 18:58:39 +02:00
|
|
|
assert data is not None # allow mypy to infer data is indexable
|
2021-02-12 08:20:45 +01:00
|
|
|
self.assertEqual(data["title"], response_data["title"])
|
2019-05-02 18:58:39 +02:00
|
|
|
|
2021-06-09 00:18:46 +02:00
|
|
|
@responses.activate
|
2021-05-07 03:54:25 +02:00
|
|
|
def test_connect_error_request(self) -> None:
|
|
|
|
url = "http://instagram.com/p/BLtI2WdAymy"
|
|
|
|
reconstructed_url = reconstruct_url(url)
|
|
|
|
responses.add(responses.GET, reconstructed_url, body=ConnectionError())
|
|
|
|
data = get_oembed_data(url)
|
|
|
|
self.assertIsNone(data)
|
|
|
|
|
|
|
|
@responses.activate
|
|
|
|
def test_400_error_request(self) -> None:
|
2021-02-12 08:20:45 +01:00
|
|
|
url = "http://instagram.com/p/BLtI2WdAymy"
|
2021-06-09 00:18:46 +02:00
|
|
|
reconstructed_url = reconstruct_url(url)
|
|
|
|
responses.add(responses.GET, reconstructed_url, status=400)
|
2016-10-27 12:06:44 +02:00
|
|
|
data = get_oembed_data(url)
|
|
|
|
self.assertIsNone(data)
|
|
|
|
|
2021-05-07 03:54:25 +02:00
|
|
|
@responses.activate
|
|
|
|
def test_500_error_request(self) -> None:
|
|
|
|
url = "http://instagram.com/p/BLtI2WdAymy"
|
|
|
|
reconstructed_url = reconstruct_url(url)
|
|
|
|
responses.add(responses.GET, reconstructed_url, status=500)
|
|
|
|
data = get_oembed_data(url)
|
|
|
|
self.assertIsNone(data)
|
|
|
|
|
2021-06-09 00:18:46 +02:00
|
|
|
@responses.activate
|
|
|
|
def test_invalid_json_in_response(self) -> None:
|
2021-02-12 08:20:45 +01:00
|
|
|
url = "http://instagram.com/p/BLtI2WdAymy"
|
2021-06-09 00:18:46 +02:00
|
|
|
reconstructed_url = reconstruct_url(url)
|
|
|
|
responses.add(
|
|
|
|
responses.GET,
|
|
|
|
reconstructed_url,
|
|
|
|
json="{invalid json}",
|
|
|
|
status=200,
|
|
|
|
)
|
2020-04-11 13:24:06 +02:00
|
|
|
data = get_oembed_data(url)
|
|
|
|
self.assertIsNone(data)
|
|
|
|
|
2019-12-12 09:39:41 +01:00
|
|
|
def test_oembed_html(self) -> None:
|
2019-05-02 18:58:39 +02:00
|
|
|
html = '<iframe src="//www.instagram.com/embed.js"></iframe>'
|
2019-12-12 09:39:41 +01:00
|
|
|
stripped_html = strip_cdata(html)
|
|
|
|
self.assertEqual(html, stripped_html)
|
2019-05-02 18:58:39 +02:00
|
|
|
|
|
|
|
def test_autodiscovered_oembed_xml_format_html(self) -> None:
|
|
|
|
iframe_content = '<iframe src="https://w.soundcloud.com/player"></iframe>'
|
2021-02-12 08:20:45 +01:00
|
|
|
html = f"<![CDATA[{iframe_content}]]>"
|
2019-12-12 09:39:41 +01:00
|
|
|
stripped_html = strip_cdata(html)
|
|
|
|
self.assertEqual(iframe_content, stripped_html)
|
2019-05-02 18:58:39 +02:00
|
|
|
|
2016-10-27 12:06:44 +02:00
|
|
|
|
|
|
|
class OpenGraphParserTestCase(ZulipTestCase):
|
2017-11-05 10:51:25 +01:00
|
|
|
def test_page_with_og(self) -> None:
|
2020-12-08 04:26:30 +01:00
|
|
|
html = b"""<html>
|
2016-10-27 12:06:44 +02:00
|
|
|
<head>
|
|
|
|
<meta property="og:title" content="The Rock" />
|
|
|
|
<meta property="og:type" content="video.movie" />
|
|
|
|
<meta property="og:url" content="http://www.imdb.com/title/tt0117500/" />
|
|
|
|
<meta property="og:image" content="http://ia.media-imdb.com/images/rock.jpg" />
|
|
|
|
<meta property="og:description" content="The Rock film" />
|
|
|
|
</head>
|
|
|
|
</html>"""
|
|
|
|
|
2020-12-08 04:26:30 +01:00
|
|
|
parser = OpenGraphParser(html, "text/html; charset=UTF-8")
|
2016-10-27 12:06:44 +02:00
|
|
|
result = parser.extract_data()
|
2021-02-12 08:20:45 +01:00
|
|
|
self.assertIn("title", result)
|
|
|
|
self.assertEqual(result["title"], "The Rock")
|
|
|
|
self.assertEqual(result.get("description"), "The Rock film")
|
2016-10-27 12:06:44 +02:00
|
|
|
|
2019-12-12 02:10:50 +01:00
|
|
|
def test_page_with_evil_og_tags(self) -> None:
|
2020-12-08 04:26:30 +01:00
|
|
|
html = b"""<html>
|
2019-12-12 02:10:50 +01:00
|
|
|
<head>
|
|
|
|
<meta property="og:title" content="The Rock" />
|
|
|
|
<meta property="og:type" content="video.movie" />
|
|
|
|
<meta property="og:url" content="http://www.imdb.com/title/tt0117500/" />
|
|
|
|
<meta property="og:image" content="http://ia.media-imdb.com/images/rock.jpg" />
|
|
|
|
<meta property="og:description" content="The Rock film" />
|
|
|
|
<meta property="og:html" content="<script>alert(window.location)</script>" />
|
|
|
|
<meta property="og:oembed" content="True" />
|
|
|
|
</head>
|
|
|
|
</html>"""
|
|
|
|
|
2020-12-08 04:26:30 +01:00
|
|
|
parser = OpenGraphParser(html, "text/html; charset=UTF-8")
|
2019-12-12 02:10:50 +01:00
|
|
|
result = parser.extract_data()
|
2021-02-12 08:20:45 +01:00
|
|
|
self.assertIn("title", result)
|
|
|
|
self.assertEqual(result["title"], "The Rock")
|
|
|
|
self.assertEqual(result.get("description"), "The Rock film")
|
|
|
|
self.assertEqual(result.get("oembed"), None)
|
|
|
|
self.assertEqual(result.get("html"), None)
|
2016-10-27 12:06:44 +02:00
|
|
|
|
2020-12-08 04:26:30 +01:00
|
|
|
def test_charset_in_header(self) -> None:
|
|
|
|
html = """<html>
|
|
|
|
<head>
|
|
|
|
<meta property="og:title" content="中文" />
|
|
|
|
</head>
|
2021-02-12 08:19:30 +01:00
|
|
|
</html>""".encode(
|
|
|
|
"big5"
|
|
|
|
)
|
2020-12-08 04:26:30 +01:00
|
|
|
parser = OpenGraphParser(html, "text/html; charset=Big5")
|
|
|
|
result = parser.extract_data()
|
|
|
|
self.assertEqual(result["title"], "中文")
|
|
|
|
|
|
|
|
def test_charset_in_meta(self) -> None:
|
|
|
|
html = """<html>
|
|
|
|
<head>
|
|
|
|
<meta content-type="text/html; charset=Big5" />
|
|
|
|
<meta property="og:title" content="中文" />
|
|
|
|
</head>
|
2021-02-12 08:19:30 +01:00
|
|
|
</html>""".encode(
|
|
|
|
"big5"
|
|
|
|
)
|
2020-12-08 04:26:30 +01:00
|
|
|
parser = OpenGraphParser(html, "text/html")
|
|
|
|
result = parser.extract_data()
|
|
|
|
self.assertEqual(result["title"], "中文")
|
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2016-10-27 12:06:44 +02:00
|
|
|
class GenericParserTestCase(ZulipTestCase):
|
2017-11-05 10:51:25 +01:00
|
|
|
def test_parser(self) -> None:
|
2020-12-08 04:26:30 +01:00
|
|
|
html = b"""
|
2016-10-27 12:06:44 +02:00
|
|
|
<html>
|
|
|
|
<head><title>Test title</title></head>
|
|
|
|
<body>
|
|
|
|
<h1>Main header</h1>
|
|
|
|
<p>Description text</p>
|
|
|
|
</body>
|
|
|
|
</html>
|
|
|
|
"""
|
2020-12-08 04:26:30 +01:00
|
|
|
parser = GenericParser(html, "text/html; charset=UTF-8")
|
2016-10-27 12:06:44 +02:00
|
|
|
result = parser.extract_data()
|
2021-02-12 08:20:45 +01:00
|
|
|
self.assertEqual(result.get("title"), "Test title")
|
|
|
|
self.assertEqual(result.get("description"), "Description text")
|
2016-12-13 04:20:33 +01:00
|
|
|
|
2017-11-05 10:51:25 +01:00
|
|
|
def test_extract_image(self) -> None:
|
2020-12-08 04:26:30 +01:00
|
|
|
html = b"""
|
2016-12-13 04:20:33 +01:00
|
|
|
<html>
|
|
|
|
<body>
|
|
|
|
<h1>Main header</h1>
|
2020-08-18 00:19:44 +02:00
|
|
|
<img data-src="Not an image">
|
2016-12-13 04:20:33 +01:00
|
|
|
<img src="http://test.com/test.jpg">
|
|
|
|
<div>
|
|
|
|
<p>Description text</p>
|
|
|
|
</div>
|
|
|
|
</body>
|
|
|
|
</html>
|
|
|
|
"""
|
2020-12-08 04:26:30 +01:00
|
|
|
parser = GenericParser(html, "text/html; charset=UTF-8")
|
2016-12-13 04:20:33 +01:00
|
|
|
result = parser.extract_data()
|
2021-02-12 08:20:45 +01:00
|
|
|
self.assertEqual(result.get("title"), "Main header")
|
|
|
|
self.assertEqual(result.get("description"), "Description text")
|
|
|
|
self.assertEqual(result.get("image"), "http://test.com/test.jpg")
|
2016-12-13 04:20:33 +01:00
|
|
|
|
2017-11-05 10:51:25 +01:00
|
|
|
def test_extract_description(self) -> None:
|
2020-12-08 04:26:30 +01:00
|
|
|
html = b"""
|
2016-12-13 04:20:33 +01:00
|
|
|
<html>
|
|
|
|
<body>
|
|
|
|
<div>
|
|
|
|
<div>
|
|
|
|
<p>Description text</p>
|
|
|
|
</div>
|
|
|
|
</div>
|
|
|
|
</body>
|
|
|
|
</html>
|
|
|
|
"""
|
2020-12-08 04:26:30 +01:00
|
|
|
parser = GenericParser(html, "text/html; charset=UTF-8")
|
2016-12-13 04:20:33 +01:00
|
|
|
result = parser.extract_data()
|
2021-02-12 08:20:45 +01:00
|
|
|
self.assertEqual(result.get("description"), "Description text")
|
2016-12-13 04:20:33 +01:00
|
|
|
|
2020-12-08 04:26:30 +01:00
|
|
|
html = b"""
|
2016-12-13 04:20:33 +01:00
|
|
|
<html>
|
|
|
|
<head><meta name="description" content="description 123"</head>
|
|
|
|
<body></body>
|
|
|
|
</html>
|
|
|
|
"""
|
2020-12-08 04:26:30 +01:00
|
|
|
parser = GenericParser(html, "text/html; charset=UTF-8")
|
2016-12-13 04:20:33 +01:00
|
|
|
result = parser.extract_data()
|
2021-02-12 08:20:45 +01:00
|
|
|
self.assertEqual(result.get("description"), "description 123")
|
2016-12-13 04:20:33 +01:00
|
|
|
|
2020-12-08 04:26:30 +01:00
|
|
|
html = b"<html><body></body></html>"
|
|
|
|
parser = GenericParser(html, "text/html; charset=UTF-8")
|
2016-12-13 04:20:33 +01:00
|
|
|
result = parser.extract_data()
|
2021-02-12 08:20:45 +01:00
|
|
|
self.assertIsNone(result.get("description"))
|
2016-12-13 04:20:33 +01:00
|
|
|
|
|
|
|
|
|
|
|
class PreviewTestCase(ZulipTestCase):
|
2017-02-23 06:17:16 +01:00
|
|
|
open_graph_html = """
|
2016-12-13 04:20:33 +01:00
|
|
|
<html>
|
|
|
|
<head>
|
|
|
|
<title>Test title</title>
|
|
|
|
<meta property="og:title" content="The Rock" />
|
|
|
|
<meta property="og:type" content="video.movie" />
|
|
|
|
<meta property="og:url" content="http://www.imdb.com/title/tt0117500/" />
|
|
|
|
<meta property="og:image" content="http://ia.media-imdb.com/images/rock.jpg" />
|
2019-12-12 02:10:50 +01:00
|
|
|
<meta http-equiv="refresh" content="30" />
|
|
|
|
<meta property="notog:extra-text" content="Extra!" />
|
2016-12-13 04:20:33 +01:00
|
|
|
</head>
|
|
|
|
<body>
|
|
|
|
<h1>Main header</h1>
|
|
|
|
<p>Description text</p>
|
|
|
|
</body>
|
|
|
|
</html>
|
|
|
|
"""
|
2017-02-23 06:17:16 +01:00
|
|
|
|
2019-05-31 04:45:02 +02:00
|
|
|
def setUp(self) -> None:
|
2020-04-09 21:51:58 +02:00
|
|
|
super().setUp()
|
2019-05-31 04:45:02 +02:00
|
|
|
Realm.objects.all().update(inline_url_embed_preview=True)
|
|
|
|
|
2019-05-05 06:41:34 +02:00
|
|
|
@classmethod
|
2021-02-12 08:19:30 +01:00
|
|
|
def create_mock_response(
|
|
|
|
cls,
|
|
|
|
url: str,
|
2021-06-09 00:18:46 +02:00
|
|
|
status: int = 200,
|
2021-02-12 08:19:30 +01:00
|
|
|
relative_url: bool = False,
|
2021-06-09 00:18:46 +02:00
|
|
|
content_type: str = "text/html",
|
|
|
|
body: Optional[Union[str, ConnectionError]] = None,
|
|
|
|
) -> None:
|
|
|
|
if body is None:
|
|
|
|
body = cls.open_graph_html
|
|
|
|
if relative_url is True and isinstance(body, str):
|
|
|
|
body = body.replace("http://ia.media-imdb.com", "")
|
|
|
|
responses.add(responses.GET, url, body=body, status=status, content_type=content_type)
|
|
|
|
|
|
|
|
@responses.activate
|
2017-02-23 06:16:30 +01:00
|
|
|
@override_settings(INLINE_URL_EMBED_PREVIEW=True)
|
2017-11-05 10:51:25 +01:00
|
|
|
def test_edit_message_history(self) -> None:
|
2021-02-12 08:20:45 +01:00
|
|
|
user = self.example_user("hamlet")
|
2020-03-06 18:40:46 +01:00
|
|
|
self.login_user(user)
|
2021-02-12 08:19:30 +01:00
|
|
|
msg_id = self.send_stream_message(
|
|
|
|
user, "Scotland", topic_name="editing", content="original"
|
|
|
|
)
|
2017-02-23 06:20:01 +01:00
|
|
|
|
2021-02-12 08:20:45 +01:00
|
|
|
url = "http://test.org/"
|
2021-06-09 00:18:46 +02:00
|
|
|
self.create_mock_response(url)
|
2017-02-23 06:20:01 +01:00
|
|
|
|
2021-05-09 17:24:40 +02:00
|
|
|
with mock_queue_publish("zerver.lib.actions.queue_json_publish") as patched:
|
2021-02-12 08:19:30 +01:00
|
|
|
result = self.client_patch(
|
|
|
|
"/json/messages/" + str(msg_id),
|
|
|
|
{
|
2021-02-12 08:20:45 +01:00
|
|
|
"message_id": msg_id,
|
|
|
|
"content": url,
|
2021-02-12 08:19:30 +01:00
|
|
|
},
|
|
|
|
)
|
2017-02-23 06:20:01 +01:00
|
|
|
self.assert_json_success(result)
|
|
|
|
patched.assert_called_once()
|
|
|
|
queue = patched.call_args[0][0]
|
|
|
|
self.assertEqual(queue, "embed_links")
|
|
|
|
event = patched.call_args[0][1]
|
|
|
|
|
|
|
|
with self.settings(TEST_SUITE=False, CACHES=TEST_CACHES):
|
2021-06-09 00:18:46 +02:00
|
|
|
with self.assertLogs(level="INFO") as info_logs:
|
2017-02-23 06:20:01 +01:00
|
|
|
FetchLinksEmbedData().consume(event)
|
2020-07-26 03:08:03 +02:00
|
|
|
self.assertTrue(
|
2021-02-12 08:20:45 +01:00
|
|
|
"INFO:root:Time spent on get_link_embed_data for http://test.org/: "
|
2021-02-12 08:19:30 +01:00
|
|
|
in info_logs.output[0]
|
2020-07-26 03:08:03 +02:00
|
|
|
)
|
2017-02-23 06:20:01 +01:00
|
|
|
|
2020-06-09 00:25:09 +02:00
|
|
|
embedded_link = f'<a href="{url}" title="The Rock">The Rock</a>'
|
2017-02-23 06:20:01 +01:00
|
|
|
msg = Message.objects.select_related("sender").get(id=msg_id)
|
|
|
|
self.assertIn(embedded_link, msg.rendered_content)
|
|
|
|
|
2021-06-09 00:18:46 +02:00
|
|
|
@responses.activate
|
2017-02-23 06:20:01 +01:00
|
|
|
@override_settings(INLINE_URL_EMBED_PREVIEW=True)
|
2021-02-12 08:19:30 +01:00
|
|
|
def _send_message_with_test_org_url(
|
|
|
|
self, sender: UserProfile, queue_should_run: bool = True, relative_url: bool = False
|
|
|
|
) -> Message:
|
2021-02-12 08:20:45 +01:00
|
|
|
url = "http://test.org/"
|
|
|
|
with mock_queue_publish("zerver.lib.actions.queue_json_publish") as patched:
|
2017-10-28 17:07:36 +02:00
|
|
|
msg_id = self.send_personal_message(
|
2020-03-07 11:43:05 +01:00
|
|
|
sender,
|
2021-02-12 08:20:45 +01:00
|
|
|
self.example_user("cordelia"),
|
2017-10-28 17:07:36 +02:00
|
|
|
content=url,
|
|
|
|
)
|
2017-02-23 06:16:30 +01:00
|
|
|
if queue_should_run:
|
|
|
|
patched.assert_called_once()
|
|
|
|
queue = patched.call_args[0][0]
|
|
|
|
self.assertEqual(queue, "embed_links")
|
|
|
|
event = patched.call_args[0][1]
|
|
|
|
else:
|
|
|
|
patched.assert_not_called()
|
|
|
|
# If we nothing was put in the queue, we don't need to
|
|
|
|
# run the queue processor or any of the following code
|
|
|
|
return Message.objects.select_related("sender").get(id=msg_id)
|
|
|
|
|
|
|
|
# Verify the initial message doesn't have the embedded links rendered
|
2017-01-22 05:55:30 +01:00
|
|
|
msg = Message.objects.select_related("sender").get(id=msg_id)
|
2021-02-12 08:19:30 +01:00
|
|
|
self.assertNotIn(f'<a href="{url}" title="The Rock">The Rock</a>', msg.rendered_content)
|
2017-01-22 05:55:30 +01:00
|
|
|
|
2021-06-09 00:18:46 +02:00
|
|
|
self.create_mock_response(url, relative_url=relative_url)
|
2017-02-23 06:16:30 +01:00
|
|
|
|
|
|
|
# Run the queue processor to potentially rerender things
|
|
|
|
with self.settings(TEST_SUITE=False, CACHES=TEST_CACHES):
|
2021-06-09 00:18:46 +02:00
|
|
|
with self.assertLogs(level="INFO") as info_logs:
|
2016-12-13 04:20:33 +01:00
|
|
|
FetchLinksEmbedData().consume(event)
|
2020-07-26 03:08:03 +02:00
|
|
|
self.assertTrue(
|
2021-02-12 08:20:45 +01:00
|
|
|
"INFO:root:Time spent on get_link_embed_data for http://test.org/: "
|
2021-02-12 08:19:30 +01:00
|
|
|
in info_logs.output[0]
|
2020-07-26 03:08:03 +02:00
|
|
|
)
|
|
|
|
|
2017-01-22 05:55:30 +01:00
|
|
|
msg = Message.objects.select_related("sender").get(id=msg_id)
|
2017-02-03 23:28:26 +01:00
|
|
|
return msg
|
|
|
|
|
2021-06-09 00:18:46 +02:00
|
|
|
@responses.activate
|
2017-11-06 20:48:55 +01:00
|
|
|
@override_settings(INLINE_URL_EMBED_PREVIEW=True)
|
2017-11-05 10:51:25 +01:00
|
|
|
def test_message_update_race_condition(self) -> None:
|
2021-02-12 08:20:45 +01:00
|
|
|
user = self.example_user("hamlet")
|
2020-03-06 18:40:46 +01:00
|
|
|
self.login_user(user)
|
2021-02-12 08:20:45 +01:00
|
|
|
original_url = "http://test.org/"
|
|
|
|
edited_url = "http://edited.org/"
|
|
|
|
with mock_queue_publish("zerver.lib.actions.queue_json_publish") as patched:
|
2021-02-12 08:19:30 +01:00
|
|
|
msg_id = self.send_stream_message(
|
|
|
|
user, "Scotland", topic_name="foo", content=original_url
|
|
|
|
)
|
2017-11-06 20:48:55 +01:00
|
|
|
patched.assert_called_once()
|
|
|
|
queue = patched.call_args[0][0]
|
|
|
|
self.assertEqual(queue, "embed_links")
|
|
|
|
event = patched.call_args[0][1]
|
|
|
|
|
2017-11-05 10:51:25 +01:00
|
|
|
def wrapped_queue_json_publish(*args: Any, **kwargs: Any) -> None:
|
2021-06-09 00:18:46 +02:00
|
|
|
self.create_mock_response(original_url)
|
|
|
|
self.create_mock_response(edited_url)
|
2019-05-05 06:41:34 +02:00
|
|
|
|
2017-11-06 20:48:55 +01:00
|
|
|
with self.settings(TEST_SUITE=False, CACHES=TEST_CACHES):
|
2021-06-09 00:18:46 +02:00
|
|
|
with self.assertLogs(level="INFO") as info_logs:
|
2017-11-06 20:48:55 +01:00
|
|
|
# Run the queue processor. This will simulate the event for original_url being
|
|
|
|
# processed after the message has been edited.
|
|
|
|
FetchLinksEmbedData().consume(event)
|
2020-07-26 03:08:03 +02:00
|
|
|
self.assertTrue(
|
2021-02-12 08:20:45 +01:00
|
|
|
"INFO:root:Time spent on get_link_embed_data for http://test.org/: "
|
2021-02-12 08:19:30 +01:00
|
|
|
in info_logs.output[0]
|
2020-07-26 03:08:03 +02:00
|
|
|
)
|
2017-11-06 20:48:55 +01:00
|
|
|
msg = Message.objects.select_related("sender").get(id=msg_id)
|
|
|
|
# The content of the message has changed since the event for original_url has been created,
|
|
|
|
# it should not be rendered. Another, up-to-date event will have been sent (edited_url).
|
2021-02-12 08:19:30 +01:00
|
|
|
self.assertNotIn(
|
|
|
|
f'<a href="{original_url}" title="The Rock">The Rock</a>', msg.rendered_content
|
|
|
|
)
|
2021-06-09 00:18:46 +02:00
|
|
|
|
|
|
|
self.assertTrue(responses.assert_call_count(edited_url, 0))
|
2017-11-06 20:48:55 +01:00
|
|
|
|
|
|
|
with self.settings(TEST_SUITE=False, CACHES=TEST_CACHES):
|
2021-06-09 00:18:46 +02:00
|
|
|
with self.assertLogs(level="INFO") as info_logs:
|
2017-11-06 20:48:55 +01:00
|
|
|
# Now proceed with the original queue_json_publish and call the
|
|
|
|
# up-to-date event for edited_url.
|
|
|
|
queue_json_publish(*args, **kwargs)
|
|
|
|
msg = Message.objects.select_related("sender").get(id=msg_id)
|
2021-02-12 08:19:30 +01:00
|
|
|
self.assertIn(
|
|
|
|
f'<a href="{edited_url}" title="The Rock">The Rock</a>',
|
|
|
|
msg.rendered_content,
|
|
|
|
)
|
2020-07-26 03:08:03 +02:00
|
|
|
self.assertTrue(
|
2021-02-12 08:20:45 +01:00
|
|
|
"INFO:root:Time spent on get_link_embed_data for http://edited.org/: "
|
2021-02-12 08:19:30 +01:00
|
|
|
in info_logs.output[0]
|
2020-07-26 03:08:03 +02:00
|
|
|
)
|
2017-11-06 20:48:55 +01:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
with mock_queue_publish(
|
2021-05-09 17:24:40 +02:00
|
|
|
"zerver.lib.actions.queue_json_publish", wraps=wrapped_queue_json_publish
|
2021-02-12 08:19:30 +01:00
|
|
|
):
|
|
|
|
result = self.client_patch(
|
|
|
|
"/json/messages/" + str(msg_id),
|
|
|
|
{
|
2021-02-12 08:20:45 +01:00
|
|
|
"message_id": msg_id,
|
|
|
|
"content": edited_url,
|
2021-02-12 08:19:30 +01:00
|
|
|
},
|
|
|
|
)
|
2017-11-06 20:48:55 +01:00
|
|
|
self.assert_json_success(result)
|
|
|
|
|
2017-11-05 10:51:25 +01:00
|
|
|
def test_get_link_embed_data(self) -> None:
|
2021-02-12 08:20:45 +01:00
|
|
|
url = "http://test.org/"
|
2020-06-09 00:25:09 +02:00
|
|
|
embedded_link = f'<a href="{url}" title="The Rock">The Rock</a>'
|
2017-02-03 23:28:26 +01:00
|
|
|
|
|
|
|
# When humans send, we should get embedded content.
|
2021-02-12 08:20:45 +01:00
|
|
|
msg = self._send_message_with_test_org_url(sender=self.example_user("hamlet"))
|
2017-02-03 23:28:26 +01:00
|
|
|
self.assertIn(embedded_link, msg.rendered_content)
|
|
|
|
|
|
|
|
# We don't want embedded content for bots.
|
2021-02-12 08:19:30 +01:00
|
|
|
msg = self._send_message_with_test_org_url(
|
2021-02-12 08:20:45 +01:00
|
|
|
sender=self.example_user("webhook_bot"), queue_should_run=False
|
2021-02-12 08:19:30 +01:00
|
|
|
)
|
2017-02-03 23:28:26 +01:00
|
|
|
self.assertNotIn(embedded_link, msg.rendered_content)
|
|
|
|
|
|
|
|
# Try another human to make sure bot failure was due to the
|
|
|
|
# bot sending the message and not some other reason.
|
2021-02-12 08:20:45 +01:00
|
|
|
msg = self._send_message_with_test_org_url(sender=self.example_user("prospero"))
|
2017-02-03 23:28:26 +01:00
|
|
|
self.assertIn(embedded_link, msg.rendered_content)
|
2016-12-13 04:20:33 +01:00
|
|
|
|
2017-11-05 10:51:25 +01:00
|
|
|
def test_inline_url_embed_preview(self) -> None:
|
2020-05-09 03:44:56 +02:00
|
|
|
with_preview = '<p><a href="http://test.org/">http://test.org/</a></p>\n<div class="message_embed"><a class="message_embed_image" href="http://test.org/" style="background-image: url(http://ia.media-imdb.com/images/rock.jpg)"></a><div class="data-container"><div class="message_embed_title"><a href="http://test.org/" title="The Rock">The Rock</a></div><div class="message_embed_description">Description text</div></div></div>'
|
|
|
|
without_preview = '<p><a href="http://test.org/">http://test.org/</a></p>'
|
2021-02-12 08:20:45 +01:00
|
|
|
msg = self._send_message_with_test_org_url(sender=self.example_user("hamlet"))
|
2017-03-13 14:42:03 +01:00
|
|
|
self.assertEqual(msg.rendered_content, with_preview)
|
|
|
|
|
|
|
|
realm = msg.get_realm()
|
2021-02-12 08:20:45 +01:00
|
|
|
setattr(realm, "inline_url_embed_preview", False)
|
2017-03-13 14:42:03 +01:00
|
|
|
realm.save()
|
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
msg = self._send_message_with_test_org_url(
|
2021-02-12 08:20:45 +01:00
|
|
|
sender=self.example_user("prospero"), queue_should_run=False
|
2021-02-12 08:19:30 +01:00
|
|
|
)
|
2017-03-13 14:42:03 +01:00
|
|
|
self.assertEqual(msg.rendered_content, without_preview)
|
|
|
|
|
2018-02-23 21:17:29 +01:00
|
|
|
@override_settings(INLINE_URL_EMBED_PREVIEW=True)
|
|
|
|
def test_inline_relative_url_embed_preview(self) -> None:
|
2020-10-23 02:43:28 +02:00
|
|
|
# Relative URLs should not be sent for URL preview.
|
2021-02-12 08:20:45 +01:00
|
|
|
with mock_queue_publish("zerver.lib.actions.queue_json_publish") as patched:
|
2018-02-23 21:17:29 +01:00
|
|
|
self.send_personal_message(
|
2021-02-12 08:20:45 +01:00
|
|
|
self.example_user("prospero"),
|
|
|
|
self.example_user("cordelia"),
|
2018-02-23 21:17:29 +01:00
|
|
|
content="http://zulip.testserver/api/",
|
|
|
|
)
|
|
|
|
patched.assert_not_called()
|
|
|
|
|
2017-11-05 10:51:25 +01:00
|
|
|
def test_inline_url_embed_preview_with_relative_image_url(self) -> None:
|
2020-05-09 03:44:56 +02:00
|
|
|
with_preview_relative = '<p><a href="http://test.org/">http://test.org/</a></p>\n<div class="message_embed"><a class="message_embed_image" href="http://test.org/" style="background-image: url(http://test.org/images/rock.jpg)"></a><div class="data-container"><div class="message_embed_title"><a href="http://test.org/" title="The Rock">The Rock</a></div><div class="message_embed_description">Description text</div></div></div>'
|
2020-10-23 02:43:28 +02:00
|
|
|
# Try case where the Open Graph image is a relative URL.
|
2021-02-12 08:19:30 +01:00
|
|
|
msg = self._send_message_with_test_org_url(
|
2021-02-12 08:20:45 +01:00
|
|
|
sender=self.example_user("prospero"), relative_url=True
|
2021-02-12 08:19:30 +01:00
|
|
|
)
|
2017-05-04 01:41:35 +02:00
|
|
|
self.assertEqual(msg.rendered_content, with_preview_relative)
|
|
|
|
|
2021-06-09 00:18:46 +02:00
|
|
|
@responses.activate
|
2017-11-05 10:51:25 +01:00
|
|
|
def test_http_error_get_data(self) -> None:
|
2021-02-12 08:20:45 +01:00
|
|
|
url = "http://test.org/"
|
2017-10-28 17:07:36 +02:00
|
|
|
msg_id = self.send_personal_message(
|
2021-02-12 08:20:45 +01:00
|
|
|
self.example_user("hamlet"),
|
|
|
|
self.example_user("cordelia"),
|
2017-10-28 17:07:36 +02:00
|
|
|
content=url,
|
|
|
|
)
|
2017-01-22 05:55:30 +01:00
|
|
|
msg = Message.objects.select_related("sender").get(id=msg_id)
|
2016-12-13 04:20:33 +01:00
|
|
|
event = {
|
2021-02-12 08:20:45 +01:00
|
|
|
"message_id": msg_id,
|
|
|
|
"urls": [url],
|
|
|
|
"message_realm_id": msg.sender.realm_id,
|
|
|
|
"message_content": url,
|
2021-02-12 08:19:30 +01:00
|
|
|
}
|
2021-06-09 00:18:46 +02:00
|
|
|
|
|
|
|
self.create_mock_response(url, body=ConnectionError())
|
|
|
|
|
2016-12-13 04:20:33 +01:00
|
|
|
with self.settings(INLINE_URL_EMBED_PREVIEW=True, TEST_SUITE=False, CACHES=TEST_CACHES):
|
2021-06-09 00:18:46 +02:00
|
|
|
with self.assertLogs(level="INFO") as info_logs:
|
2019-02-05 22:44:40 +01:00
|
|
|
FetchLinksEmbedData().consume(event)
|
2020-07-26 03:08:03 +02:00
|
|
|
self.assertTrue(
|
2021-02-12 08:20:45 +01:00
|
|
|
"INFO:root:Time spent on get_link_embed_data for http://test.org/: "
|
2021-02-12 08:19:30 +01:00
|
|
|
in info_logs.output[0]
|
2020-07-26 03:08:03 +02:00
|
|
|
)
|
|
|
|
|
2016-12-13 04:20:33 +01:00
|
|
|
msg = Message.objects.get(id=msg_id)
|
|
|
|
self.assertEqual(
|
2021-02-12 08:19:30 +01:00
|
|
|
'<p><a href="http://test.org/">http://test.org/</a></p>', msg.rendered_content
|
|
|
|
)
|
2016-12-13 04:20:33 +01:00
|
|
|
|
2017-11-05 10:51:25 +01:00
|
|
|
def test_invalid_link(self) -> None:
|
2016-12-13 04:20:33 +01:00
|
|
|
with self.settings(INLINE_URL_EMBED_PREVIEW=True, TEST_SUITE=False, CACHES=TEST_CACHES):
|
2021-02-12 08:20:45 +01:00
|
|
|
self.assertIsNone(get_link_embed_data("com.notvalidlink"))
|
|
|
|
self.assertIsNone(get_link_embed_data("μένει.com.notvalidlink"))
|
2018-03-18 20:58:13 +01:00
|
|
|
|
|
|
|
def test_link_embed_data_from_cache(self) -> None:
|
2021-02-12 08:20:45 +01:00
|
|
|
url = "http://test.org/"
|
|
|
|
link_embed_data = "test data"
|
2018-03-18 20:58:13 +01:00
|
|
|
|
|
|
|
with self.assertRaises(NotFoundInCache):
|
|
|
|
link_embed_data_from_cache(url)
|
|
|
|
|
|
|
|
with self.settings(CACHES=TEST_CACHES):
|
2018-10-14 14:41:15 +02:00
|
|
|
key = preview_url_cache_key(url)
|
2021-02-12 08:20:45 +01:00
|
|
|
cache_set(key, link_embed_data, "database")
|
2018-03-18 20:58:13 +01:00
|
|
|
self.assertEqual(link_embed_data, link_embed_data_from_cache(url))
|
2019-05-04 17:54:18 +02:00
|
|
|
|
2021-06-09 00:18:46 +02:00
|
|
|
@responses.activate
|
2019-05-04 17:54:18 +02:00
|
|
|
@override_settings(INLINE_URL_EMBED_PREVIEW=True)
|
|
|
|
def test_link_preview_non_html_data(self) -> None:
|
2021-02-12 08:20:45 +01:00
|
|
|
user = self.example_user("hamlet")
|
2020-03-06 18:40:46 +01:00
|
|
|
self.login_user(user)
|
2021-02-12 08:20:45 +01:00
|
|
|
url = "http://test.org/audio.mp3"
|
|
|
|
with mock_queue_publish("zerver.lib.actions.queue_json_publish") as patched:
|
2020-03-07 11:43:05 +01:00
|
|
|
msg_id = self.send_stream_message(user, "Scotland", topic_name="foo", content=url)
|
2019-05-04 17:54:18 +02:00
|
|
|
patched.assert_called_once()
|
|
|
|
queue = patched.call_args[0][0]
|
|
|
|
self.assertEqual(queue, "embed_links")
|
|
|
|
event = patched.call_args[0][1]
|
|
|
|
|
2021-06-09 00:18:46 +02:00
|
|
|
content_type = "application/octet-stream"
|
|
|
|
self.create_mock_response(url, content_type=content_type)
|
2019-05-04 17:54:18 +02:00
|
|
|
|
|
|
|
with self.settings(TEST_SUITE=False, CACHES=TEST_CACHES):
|
2021-06-09 00:18:46 +02:00
|
|
|
with self.assertLogs(level="INFO") as info_logs:
|
2019-05-04 17:54:18 +02:00
|
|
|
FetchLinksEmbedData().consume(event)
|
|
|
|
cached_data = link_embed_data_from_cache(url)
|
2020-07-26 03:08:03 +02:00
|
|
|
self.assertTrue(
|
2021-02-12 08:20:45 +01:00
|
|
|
"INFO:root:Time spent on get_link_embed_data for http://test.org/audio.mp3: "
|
2021-02-12 08:19:30 +01:00
|
|
|
in info_logs.output[0]
|
2020-07-26 03:08:03 +02:00
|
|
|
)
|
2019-05-04 17:54:18 +02:00
|
|
|
|
|
|
|
self.assertIsNone(cached_data)
|
|
|
|
msg = Message.objects.select_related("sender").get(id=msg_id)
|
|
|
|
self.assertEqual(
|
2021-02-12 08:20:45 +01:00
|
|
|
('<p><a href="http://test.org/audio.mp3">' "http://test.org/audio.mp3</a></p>"),
|
2021-02-12 08:19:30 +01:00
|
|
|
msg.rendered_content,
|
|
|
|
)
|
2019-05-04 17:54:18 +02:00
|
|
|
|
2021-06-09 00:18:46 +02:00
|
|
|
@responses.activate
|
2019-06-23 13:36:58 +02:00
|
|
|
@override_settings(INLINE_URL_EMBED_PREVIEW=True)
|
|
|
|
def test_link_preview_no_open_graph_image(self) -> None:
|
2021-02-12 08:20:45 +01:00
|
|
|
user = self.example_user("hamlet")
|
2020-03-06 18:40:46 +01:00
|
|
|
self.login_user(user)
|
2021-02-12 08:20:45 +01:00
|
|
|
url = "http://test.org/foo.html"
|
|
|
|
with mock_queue_publish("zerver.lib.actions.queue_json_publish") as patched:
|
2020-03-07 11:43:05 +01:00
|
|
|
msg_id = self.send_stream_message(user, "Scotland", topic_name="foo", content=url)
|
2019-06-23 13:36:58 +02:00
|
|
|
patched.assert_called_once()
|
|
|
|
queue = patched.call_args[0][0]
|
|
|
|
self.assertEqual(queue, "embed_links")
|
|
|
|
event = patched.call_args[0][1]
|
|
|
|
|
|
|
|
# HTML without the og:image metadata
|
2021-02-12 08:20:45 +01:00
|
|
|
html = "\n".join(
|
|
|
|
line for line in self.open_graph_html.splitlines() if "og:image" not in line
|
2021-02-12 08:19:30 +01:00
|
|
|
)
|
2021-06-09 00:18:46 +02:00
|
|
|
self.create_mock_response(url, body=html)
|
2019-06-23 13:36:58 +02:00
|
|
|
with self.settings(TEST_SUITE=False, CACHES=TEST_CACHES):
|
2021-06-09 00:18:46 +02:00
|
|
|
with self.assertLogs(level="INFO") as info_logs:
|
2019-06-23 13:36:58 +02:00
|
|
|
FetchLinksEmbedData().consume(event)
|
|
|
|
cached_data = link_embed_data_from_cache(url)
|
2020-07-26 03:08:03 +02:00
|
|
|
self.assertTrue(
|
2021-02-12 08:20:45 +01:00
|
|
|
"INFO:root:Time spent on get_link_embed_data for http://test.org/foo.html: "
|
2021-02-12 08:19:30 +01:00
|
|
|
in info_logs.output[0]
|
2020-07-26 03:08:03 +02:00
|
|
|
)
|
2019-06-23 13:36:58 +02:00
|
|
|
|
2021-02-12 08:20:45 +01:00
|
|
|
self.assertIn("title", cached_data)
|
|
|
|
self.assertNotIn("image", cached_data)
|
2019-06-23 13:36:58 +02:00
|
|
|
msg = Message.objects.select_related("sender").get(id=msg_id)
|
|
|
|
self.assertEqual(
|
2021-02-12 08:20:45 +01:00
|
|
|
('<p><a href="http://test.org/foo.html">' "http://test.org/foo.html</a></p>"),
|
2021-02-12 08:19:30 +01:00
|
|
|
msg.rendered_content,
|
|
|
|
)
|
2019-06-23 13:36:58 +02:00
|
|
|
|
2021-06-09 00:18:46 +02:00
|
|
|
@responses.activate
|
2019-05-04 17:54:18 +02:00
|
|
|
@override_settings(INLINE_URL_EMBED_PREVIEW=True)
|
2019-12-12 02:10:50 +01:00
|
|
|
def test_link_preview_open_graph_image_missing_content(self) -> None:
|
2021-02-12 08:20:45 +01:00
|
|
|
user = self.example_user("hamlet")
|
2020-03-06 18:40:46 +01:00
|
|
|
self.login_user(user)
|
2021-02-12 08:20:45 +01:00
|
|
|
url = "http://test.org/foo.html"
|
|
|
|
with mock_queue_publish("zerver.lib.actions.queue_json_publish") as patched:
|
2020-03-07 11:43:05 +01:00
|
|
|
msg_id = self.send_stream_message(user, "Scotland", topic_name="foo", content=url)
|
2019-12-12 02:10:50 +01:00
|
|
|
patched.assert_called_once()
|
|
|
|
queue = patched.call_args[0][0]
|
|
|
|
self.assertEqual(queue, "embed_links")
|
|
|
|
event = patched.call_args[0][1]
|
|
|
|
|
|
|
|
# HTML without the og:image metadata
|
2021-02-12 08:20:45 +01:00
|
|
|
html = "\n".join(
|
|
|
|
line if "og:image" not in line else '<meta property="og:image"/>'
|
2021-02-12 08:19:30 +01:00
|
|
|
for line in self.open_graph_html.splitlines()
|
|
|
|
)
|
2021-06-09 00:18:46 +02:00
|
|
|
self.create_mock_response(url, body=html)
|
2019-12-12 02:10:50 +01:00
|
|
|
with self.settings(TEST_SUITE=False, CACHES=TEST_CACHES):
|
2021-06-09 00:18:46 +02:00
|
|
|
with self.assertLogs(level="INFO") as info_logs:
|
2019-12-12 02:10:50 +01:00
|
|
|
FetchLinksEmbedData().consume(event)
|
|
|
|
cached_data = link_embed_data_from_cache(url)
|
2020-07-26 03:08:03 +02:00
|
|
|
self.assertTrue(
|
2021-02-12 08:20:45 +01:00
|
|
|
"INFO:root:Time spent on get_link_embed_data for http://test.org/foo.html: "
|
2021-02-12 08:19:30 +01:00
|
|
|
in info_logs.output[0]
|
2020-07-26 03:08:03 +02:00
|
|
|
)
|
2019-12-12 02:10:50 +01:00
|
|
|
|
2021-02-12 08:20:45 +01:00
|
|
|
self.assertIn("title", cached_data)
|
|
|
|
self.assertNotIn("image", cached_data)
|
2019-12-12 02:10:50 +01:00
|
|
|
msg = Message.objects.select_related("sender").get(id=msg_id)
|
|
|
|
self.assertEqual(
|
2021-02-12 08:20:45 +01:00
|
|
|
('<p><a href="http://test.org/foo.html">' "http://test.org/foo.html</a></p>"),
|
2021-02-12 08:19:30 +01:00
|
|
|
msg.rendered_content,
|
|
|
|
)
|
2019-12-12 02:10:50 +01:00
|
|
|
|
2021-06-09 00:18:46 +02:00
|
|
|
@responses.activate
|
2019-12-12 02:10:50 +01:00
|
|
|
@override_settings(INLINE_URL_EMBED_PREVIEW=True)
|
2019-05-04 17:54:18 +02:00
|
|
|
def test_link_preview_no_content_type_header(self) -> None:
|
2021-02-12 08:20:45 +01:00
|
|
|
user = self.example_user("hamlet")
|
2020-03-06 18:40:46 +01:00
|
|
|
self.login_user(user)
|
2021-02-12 08:20:45 +01:00
|
|
|
url = "http://test.org/"
|
|
|
|
with mock_queue_publish("zerver.lib.actions.queue_json_publish") as patched:
|
2020-03-07 11:43:05 +01:00
|
|
|
msg_id = self.send_stream_message(user, "Scotland", topic_name="foo", content=url)
|
2019-05-04 17:54:18 +02:00
|
|
|
patched.assert_called_once()
|
|
|
|
queue = patched.call_args[0][0]
|
|
|
|
self.assertEqual(queue, "embed_links")
|
|
|
|
event = patched.call_args[0][1]
|
|
|
|
|
2021-06-09 00:18:46 +02:00
|
|
|
self.create_mock_response(url)
|
2019-05-04 17:54:18 +02:00
|
|
|
with self.settings(TEST_SUITE=False, CACHES=TEST_CACHES):
|
2021-06-09 00:18:46 +02:00
|
|
|
with self.assertLogs(level="INFO") as info_logs:
|
2019-05-04 17:54:18 +02:00
|
|
|
FetchLinksEmbedData().consume(event)
|
|
|
|
data = link_embed_data_from_cache(url)
|
2020-07-26 03:08:03 +02:00
|
|
|
self.assertTrue(
|
2021-02-12 08:20:45 +01:00
|
|
|
"INFO:root:Time spent on get_link_embed_data for http://test.org/: "
|
2021-02-12 08:19:30 +01:00
|
|
|
in info_logs.output[0]
|
2020-07-26 03:08:03 +02:00
|
|
|
)
|
2019-05-04 17:54:18 +02:00
|
|
|
|
2021-02-12 08:20:45 +01:00
|
|
|
self.assertIn("title", data)
|
|
|
|
self.assertIn("image", data)
|
2019-05-04 17:54:18 +02:00
|
|
|
|
|
|
|
msg = Message.objects.select_related("sender").get(id=msg_id)
|
2021-02-12 08:20:45 +01:00
|
|
|
self.assertIn(data["title"], msg.rendered_content)
|
|
|
|
self.assertIn(data["image"], msg.rendered_content)
|
2019-05-04 17:54:18 +02:00
|
|
|
|
2021-06-09 00:18:46 +02:00
|
|
|
@responses.activate
|
2019-05-04 17:54:18 +02:00
|
|
|
@override_settings(INLINE_URL_EMBED_PREVIEW=True)
|
|
|
|
def test_valid_content_type_error_get_data(self) -> None:
|
2021-02-12 08:20:45 +01:00
|
|
|
url = "http://test.org/"
|
|
|
|
with mock_queue_publish("zerver.lib.actions.queue_json_publish"):
|
2019-05-04 17:54:18 +02:00
|
|
|
msg_id = self.send_personal_message(
|
2021-02-12 08:20:45 +01:00
|
|
|
self.example_user("hamlet"),
|
|
|
|
self.example_user("cordelia"),
|
2019-05-04 17:54:18 +02:00
|
|
|
content=url,
|
|
|
|
)
|
|
|
|
msg = Message.objects.select_related("sender").get(id=msg_id)
|
|
|
|
event = {
|
2021-02-12 08:20:45 +01:00
|
|
|
"message_id": msg_id,
|
|
|
|
"urls": [url],
|
|
|
|
"message_realm_id": msg.sender.realm_id,
|
|
|
|
"message_content": url,
|
2021-02-12 08:19:30 +01:00
|
|
|
}
|
|
|
|
|
2021-06-09 00:18:46 +02:00
|
|
|
self.create_mock_response(url, body=ConnectionError())
|
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
with mock.patch(
|
2021-02-12 08:20:45 +01:00
|
|
|
"zerver.lib.url_preview.preview.get_oembed_data",
|
2021-02-12 08:19:30 +01:00
|
|
|
side_effect=lambda *args, **kwargs: None,
|
|
|
|
):
|
|
|
|
with mock.patch(
|
2021-02-12 08:20:45 +01:00
|
|
|
"zerver.lib.url_preview.preview.valid_content_type", side_effect=lambda k: True
|
2021-02-12 08:19:30 +01:00
|
|
|
):
|
2019-05-07 13:32:44 +02:00
|
|
|
with self.settings(TEST_SUITE=False, CACHES=TEST_CACHES):
|
2021-06-09 00:18:46 +02:00
|
|
|
with self.assertLogs(level="INFO") as info_logs:
|
2019-05-07 13:32:44 +02:00
|
|
|
FetchLinksEmbedData().consume(event)
|
2020-07-26 03:08:03 +02:00
|
|
|
self.assertTrue(
|
2021-02-12 08:20:45 +01:00
|
|
|
"INFO:root:Time spent on get_link_embed_data for http://test.org/: "
|
2021-02-12 08:19:30 +01:00
|
|
|
in info_logs.output[0]
|
2020-07-26 03:08:03 +02:00
|
|
|
)
|
2019-05-04 17:54:18 +02:00
|
|
|
|
2019-05-10 14:29:33 +02:00
|
|
|
with self.assertRaises(NotFoundInCache):
|
|
|
|
link_embed_data_from_cache(url)
|
|
|
|
|
2019-05-04 17:54:18 +02:00
|
|
|
msg.refresh_from_db()
|
|
|
|
self.assertEqual(
|
2021-02-12 08:19:30 +01:00
|
|
|
'<p><a href="http://test.org/">http://test.org/</a></p>', msg.rendered_content
|
|
|
|
)
|
2019-05-04 17:54:18 +02:00
|
|
|
|
2021-06-09 00:18:46 +02:00
|
|
|
@responses.activate
|
2019-05-04 17:54:18 +02:00
|
|
|
@override_settings(INLINE_URL_EMBED_PREVIEW=True)
|
|
|
|
def test_invalid_url(self) -> None:
|
2021-02-12 08:20:45 +01:00
|
|
|
url = "http://test.org/"
|
|
|
|
error_url = "http://test.org/x"
|
|
|
|
with mock_queue_publish("zerver.lib.actions.queue_json_publish"):
|
2019-05-04 17:54:18 +02:00
|
|
|
msg_id = self.send_personal_message(
|
2021-02-12 08:20:45 +01:00
|
|
|
self.example_user("hamlet"),
|
|
|
|
self.example_user("cordelia"),
|
2019-05-04 17:54:18 +02:00
|
|
|
content=error_url,
|
|
|
|
)
|
|
|
|
msg = Message.objects.select_related("sender").get(id=msg_id)
|
|
|
|
event = {
|
2021-02-12 08:20:45 +01:00
|
|
|
"message_id": msg_id,
|
|
|
|
"urls": [error_url],
|
|
|
|
"message_realm_id": msg.sender.realm_id,
|
|
|
|
"message_content": error_url,
|
2021-02-12 08:19:30 +01:00
|
|
|
}
|
2019-05-04 17:54:18 +02:00
|
|
|
|
2021-06-09 00:18:46 +02:00
|
|
|
self.create_mock_response(error_url, status=404)
|
2019-05-04 17:54:18 +02:00
|
|
|
with self.settings(TEST_SUITE=False, CACHES=TEST_CACHES):
|
2021-06-09 00:18:46 +02:00
|
|
|
with self.assertLogs(level="INFO") as info_logs:
|
2019-05-04 17:54:18 +02:00
|
|
|
FetchLinksEmbedData().consume(event)
|
2020-07-26 03:08:03 +02:00
|
|
|
self.assertTrue(
|
2021-02-12 08:20:45 +01:00
|
|
|
"INFO:root:Time spent on get_link_embed_data for http://test.org/x: "
|
2021-02-12 08:19:30 +01:00
|
|
|
in info_logs.output[0]
|
2020-07-26 03:08:03 +02:00
|
|
|
)
|
2019-05-04 17:54:18 +02:00
|
|
|
cached_data = link_embed_data_from_cache(error_url)
|
|
|
|
|
|
|
|
# FIXME: Should we really cache this, especially without cache invalidation?
|
|
|
|
self.assertIsNone(cached_data)
|
|
|
|
msg.refresh_from_db()
|
|
|
|
self.assertEqual(
|
2021-02-12 08:19:30 +01:00
|
|
|
'<p><a href="http://test.org/x">http://test.org/x</a></p>', msg.rendered_content
|
|
|
|
)
|
2021-06-09 00:18:46 +02:00
|
|
|
self.assertTrue(responses.assert_call_count(url, 0))
|
2019-05-02 18:58:39 +02:00
|
|
|
|
2021-06-09 00:18:46 +02:00
|
|
|
@responses.activate
|
2019-05-02 18:58:39 +02:00
|
|
|
@override_settings(INLINE_URL_EMBED_PREVIEW=True)
|
|
|
|
def test_safe_oembed_html_url(self) -> None:
|
2021-02-12 08:20:45 +01:00
|
|
|
url = "http://test.org/"
|
|
|
|
with mock_queue_publish("zerver.lib.actions.queue_json_publish"):
|
2019-05-02 18:58:39 +02:00
|
|
|
msg_id = self.send_personal_message(
|
2021-02-12 08:20:45 +01:00
|
|
|
self.example_user("hamlet"),
|
|
|
|
self.example_user("cordelia"),
|
2019-05-02 18:58:39 +02:00
|
|
|
content=url,
|
|
|
|
)
|
|
|
|
msg = Message.objects.select_related("sender").get(id=msg_id)
|
|
|
|
event = {
|
2021-02-12 08:20:45 +01:00
|
|
|
"message_id": msg_id,
|
|
|
|
"urls": [url],
|
|
|
|
"message_realm_id": msg.sender.realm_id,
|
|
|
|
"message_content": url,
|
2021-02-12 08:19:30 +01:00
|
|
|
}
|
2019-05-02 18:58:39 +02:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
mocked_data = {
|
2021-02-12 08:20:45 +01:00
|
|
|
"html": f'<iframe src="{url}"></iframe>',
|
|
|
|
"oembed": True,
|
|
|
|
"type": "video",
|
|
|
|
"image": f"{url}/image.png",
|
2021-02-12 08:19:30 +01:00
|
|
|
}
|
2021-06-09 00:18:46 +02:00
|
|
|
self.create_mock_response(url)
|
2019-05-02 18:58:39 +02:00
|
|
|
with self.settings(TEST_SUITE=False, CACHES=TEST_CACHES):
|
2021-06-09 00:18:46 +02:00
|
|
|
with self.assertLogs(level="INFO") as info_logs:
|
2021-02-12 08:19:30 +01:00
|
|
|
with mock.patch(
|
2021-02-12 08:20:45 +01:00
|
|
|
"zerver.lib.url_preview.preview.get_oembed_data",
|
2021-02-12 08:19:30 +01:00
|
|
|
lambda *args, **kwargs: mocked_data,
|
|
|
|
):
|
2019-05-02 18:58:39 +02:00
|
|
|
FetchLinksEmbedData().consume(event)
|
|
|
|
data = link_embed_data_from_cache(url)
|
2020-07-26 03:08:03 +02:00
|
|
|
self.assertTrue(
|
2021-02-12 08:20:45 +01:00
|
|
|
"INFO:root:Time spent on get_link_embed_data for http://test.org/: "
|
2021-02-12 08:19:30 +01:00
|
|
|
in info_logs.output[0]
|
2020-07-26 03:08:03 +02:00
|
|
|
)
|
2019-05-02 18:58:39 +02:00
|
|
|
|
|
|
|
self.assertEqual(data, mocked_data)
|
|
|
|
msg.refresh_from_db()
|
2021-02-12 08:20:45 +01:00
|
|
|
self.assertIn('a data-id="{}"'.format(escape(mocked_data["html"])), msg.rendered_content)
|
2019-03-21 21:08:26 +01:00
|
|
|
|
2021-06-09 00:18:46 +02:00
|
|
|
@responses.activate
|
2019-03-21 21:08:26 +01:00
|
|
|
@override_settings(INLINE_URL_EMBED_PREVIEW=True)
|
|
|
|
def test_youtube_url_title_replaces_url(self) -> None:
|
2021-02-12 08:20:45 +01:00
|
|
|
url = "https://www.youtube.com/watch?v=eSJTXC7Ixgg"
|
|
|
|
with mock_queue_publish("zerver.lib.actions.queue_json_publish"):
|
2019-03-21 21:08:26 +01:00
|
|
|
msg_id = self.send_personal_message(
|
2021-02-12 08:20:45 +01:00
|
|
|
self.example_user("hamlet"),
|
|
|
|
self.example_user("cordelia"),
|
2019-03-21 21:08:26 +01:00
|
|
|
content=url,
|
|
|
|
)
|
|
|
|
msg = Message.objects.select_related("sender").get(id=msg_id)
|
|
|
|
event = {
|
2021-02-12 08:20:45 +01:00
|
|
|
"message_id": msg_id,
|
|
|
|
"urls": [url],
|
|
|
|
"message_realm_id": msg.sender.realm_id,
|
|
|
|
"message_content": url,
|
2021-02-12 08:19:30 +01:00
|
|
|
}
|
2019-03-21 21:08:26 +01:00
|
|
|
|
2021-02-12 08:20:45 +01:00
|
|
|
mocked_data = {"title": "Clearer Code at Scale - Static Types at Zulip and Dropbox"}
|
2021-06-09 00:18:46 +02:00
|
|
|
self.create_mock_response(url)
|
2019-03-21 21:08:26 +01:00
|
|
|
with self.settings(TEST_SUITE=False, CACHES=TEST_CACHES):
|
2021-06-09 00:18:46 +02:00
|
|
|
with self.assertLogs(level="INFO") as info_logs:
|
2021-02-12 08:19:30 +01:00
|
|
|
with mock.patch(
|
2021-02-12 08:20:45 +01:00
|
|
|
"zerver.lib.markdown.link_preview.link_embed_data_from_cache",
|
2021-02-12 08:19:30 +01:00
|
|
|
lambda *args, **kwargs: mocked_data,
|
|
|
|
):
|
2019-03-21 21:08:26 +01:00
|
|
|
FetchLinksEmbedData().consume(event)
|
2020-07-26 03:08:03 +02:00
|
|
|
self.assertTrue(
|
2021-02-12 08:20:45 +01:00
|
|
|
"INFO:root:Time spent on get_link_embed_data for https://www.youtube.com/watch?v=eSJTXC7Ixgg:"
|
2021-02-12 08:19:30 +01:00
|
|
|
in info_logs.output[0]
|
2020-07-26 03:08:03 +02:00
|
|
|
)
|
2019-03-21 21:08:26 +01:00
|
|
|
|
|
|
|
msg.refresh_from_db()
|
2021-03-23 10:34:55 +01:00
|
|
|
expected_content = f"""<p><a href="https://www.youtube.com/watch?v=eSJTXC7Ixgg">YouTube - Clearer Code at Scale - Static Types at Zulip and Dropbox</a></p>\n<div class="youtube-video message_inline_image"><a data-id="eSJTXC7Ixgg" href="https://www.youtube.com/watch?v=eSJTXC7Ixgg"><img src="{get_camo_url("https://i.ytimg.com/vi/eSJTXC7Ixgg/default.jpg")}"></a></div>"""
|
2019-03-21 21:08:26 +01:00
|
|
|
self.assertEqual(expected_content, msg.rendered_content)
|
2020-09-09 07:38:17 +02:00
|
|
|
|
2021-06-09 00:18:46 +02:00
|
|
|
@responses.activate
|
2020-09-09 07:38:17 +02:00
|
|
|
@override_settings(INLINE_URL_EMBED_PREVIEW=True)
|
|
|
|
def test_custom_title_replaces_youtube_url_title(self) -> None:
|
2021-02-12 08:20:45 +01:00
|
|
|
url = "[YouTube link](https://www.youtube.com/watch?v=eSJTXC7Ixgg)"
|
|
|
|
with mock_queue_publish("zerver.lib.actions.queue_json_publish"):
|
2020-09-09 07:38:17 +02:00
|
|
|
msg_id = self.send_personal_message(
|
2021-02-12 08:20:45 +01:00
|
|
|
self.example_user("hamlet"),
|
|
|
|
self.example_user("cordelia"),
|
2020-09-09 07:38:17 +02:00
|
|
|
content=url,
|
|
|
|
)
|
|
|
|
msg = Message.objects.select_related("sender").get(id=msg_id)
|
|
|
|
event = {
|
2021-02-12 08:20:45 +01:00
|
|
|
"message_id": msg_id,
|
|
|
|
"urls": [url],
|
|
|
|
"message_realm_id": msg.sender.realm_id,
|
|
|
|
"message_content": url,
|
2021-02-12 08:19:30 +01:00
|
|
|
}
|
2020-09-09 07:38:17 +02:00
|
|
|
|
2021-02-12 08:20:45 +01:00
|
|
|
mocked_data = {"title": "Clearer Code at Scale - Static Types at Zulip and Dropbox"}
|
2021-06-09 00:18:46 +02:00
|
|
|
self.create_mock_response(url)
|
2020-09-09 07:38:17 +02:00
|
|
|
with self.settings(TEST_SUITE=False, CACHES=TEST_CACHES):
|
2021-06-09 00:18:46 +02:00
|
|
|
with self.assertLogs(level="INFO") as info_logs:
|
2021-02-12 08:19:30 +01:00
|
|
|
with mock.patch(
|
2021-02-12 08:20:45 +01:00
|
|
|
"zerver.lib.markdown.link_preview.link_embed_data_from_cache",
|
2021-02-12 08:19:30 +01:00
|
|
|
lambda *args, **kwargs: mocked_data,
|
|
|
|
):
|
2020-09-09 07:38:17 +02:00
|
|
|
FetchLinksEmbedData().consume(event)
|
|
|
|
self.assertTrue(
|
2021-02-12 08:20:45 +01:00
|
|
|
"INFO:root:Time spent on get_link_embed_data for [YouTube link](https://www.youtube.com/watch?v=eSJTXC7Ixgg):"
|
2021-02-12 08:19:30 +01:00
|
|
|
in info_logs.output[0]
|
2020-09-09 07:38:17 +02:00
|
|
|
)
|
|
|
|
|
|
|
|
msg.refresh_from_db()
|
2021-03-23 10:34:55 +01:00
|
|
|
expected_content = f"""<p><a href="https://www.youtube.com/watch?v=eSJTXC7Ixgg">YouTube link</a></p>\n<div class="youtube-video message_inline_image"><a data-id="eSJTXC7Ixgg" href="https://www.youtube.com/watch?v=eSJTXC7Ixgg"><img src="{get_camo_url("https://i.ytimg.com/vi/eSJTXC7Ixgg/default.jpg")}"></a></div>"""
|
2020-09-09 07:38:17 +02:00
|
|
|
self.assertEqual(expected_content, msg.rendered_content)
|