mirror of https://github.com/zulip/zulip.git
markdown: Improve handling of broken img urls.
Some urls which end with image file extensions (eg .jpg) may link to html pages. This adds handling for linx.li, wikipedia.org and pasteboard.co. If it is possible, we redirect to the actual image url otherwise we do not attempt to render it as an image. Fixes #10438.
This commit is contained in:
parent
4986f11c67
commit
a724a38c03
|
@ -601,12 +601,33 @@ class InlineInterestingLinkProcessor(markdown.treeprocessors.Treeprocessor):
|
||||||
if not self.markdown.image_preview_enabled:
|
if not self.markdown.image_preview_enabled:
|
||||||
return False
|
return False
|
||||||
parsed_url = urllib.parse.urlparse(url)
|
parsed_url = urllib.parse.urlparse(url)
|
||||||
|
# remove html urls which end with img extensions that can not be shorted
|
||||||
|
if parsed_url.netloc == 'pasteboard.co':
|
||||||
|
return False
|
||||||
|
|
||||||
# List from http://support.google.com/chromeos/bin/answer.py?hl=en&answer=183093
|
# List from http://support.google.com/chromeos/bin/answer.py?hl=en&answer=183093
|
||||||
for ext in [".bmp", ".gif", ".jpg", "jpeg", ".png", ".webp"]:
|
for ext in [".bmp", ".gif", ".jpg", "jpeg", ".png", ".webp"]:
|
||||||
if parsed_url.path.lower().endswith(ext):
|
if parsed_url.path.lower().endswith(ext):
|
||||||
return True
|
return True
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
def corrected_image_source(self, url: str) -> str:
|
||||||
|
# This function adjusts any urls from linx.li and
|
||||||
|
# wikipedia.org to point to the actual image url. It's
|
||||||
|
# structurally very similar to dropbox_image, and possibly
|
||||||
|
# should be rewritten to use open graph, but has some value.
|
||||||
|
parsed_url = urllib.parse.urlparse(url)
|
||||||
|
if parsed_url.netloc.lower().endswith('.wikipedia.org'):
|
||||||
|
# Redirecting from "/wiki/File:" to "/wiki/Special:FilePath/File:"
|
||||||
|
# A possible alternative, that avoids the redirect after hitting "Special:"
|
||||||
|
# is using the first characters of md5($filename) to generate the url
|
||||||
|
domain = parsed_url.scheme + "://" + parsed_url.netloc
|
||||||
|
correct_url = domain + parsed_url.path[:6] + 'Special:FilePath' + parsed_url.path[5:]
|
||||||
|
return correct_url
|
||||||
|
if parsed_url.netloc == 'linx.li':
|
||||||
|
return 'https://linx.li/s' + parsed_url.path
|
||||||
|
return None
|
||||||
|
|
||||||
def dropbox_image(self, url: str) -> Optional[Dict[str, Any]]:
|
def dropbox_image(self, url: str) -> Optional[Dict[str, Any]]:
|
||||||
# TODO: The returned Dict could possibly be a TypedDict in future.
|
# TODO: The returned Dict could possibly be a TypedDict in future.
|
||||||
parsed_url = urllib.parse.urlparse(url)
|
parsed_url = urllib.parse.urlparse(url)
|
||||||
|
@ -978,9 +999,17 @@ class InlineInterestingLinkProcessor(markdown.treeprocessors.Treeprocessor):
|
||||||
class_attr=class_attr,
|
class_attr=class_attr,
|
||||||
already_thumbnailed=True)
|
already_thumbnailed=True)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
if self.is_image(url):
|
if self.is_image(url):
|
||||||
|
image_source = self.corrected_image_source(url)
|
||||||
|
if image_source is not None:
|
||||||
|
found_url = ResultWithFamily(
|
||||||
|
family=found_url.family,
|
||||||
|
result=(image_source, image_source)
|
||||||
|
)
|
||||||
self.handle_image_inlining(root, found_url)
|
self.handle_image_inlining(root, found_url)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
if get_tweet_id(url) is not None:
|
if get_tweet_id(url) is not None:
|
||||||
if rendered_tweet_count >= self.TWITTER_MAX_TO_PREVIEW:
|
if rendered_tweet_count >= self.TWITTER_MAX_TO_PREVIEW:
|
||||||
# Only render at most one tweet per message
|
# Only render at most one tweet per message
|
||||||
|
|
|
@ -448,6 +448,17 @@ class BugdownTest(ZulipTestCase):
|
||||||
converted = render_markdown(msg, content)
|
converted = render_markdown(msg, content)
|
||||||
self.assertEqual(converted, expected)
|
self.assertEqual(converted, expected)
|
||||||
|
|
||||||
|
@override_settings(INLINE_IMAGE_PREVIEW=True)
|
||||||
|
def test_corrected_image_source(self) -> None:
|
||||||
|
# testing only wikipedia because linx.li urls can be expected to expire
|
||||||
|
content = 'https://en.wikipedia.org/wiki/File:Wright_of_Derby,_The_Orrery.jpg'
|
||||||
|
expected = '<div class="message_inline_image"><a href="https://en.wikipedia.org/wiki/Special:FilePath/File:Wright_of_Derby,_The_Orrery.jpg" target="_blank" title="https://en.wikipedia.org/wiki/Special:FilePath/File:Wright_of_Derby,_The_Orrery.jpg"><img data-src-fullsize="/thumbnail?url=https%3A%2F%2Fen.wikipedia.org%2Fwiki%2FSpecial%3AFilePath%2FFile%3AWright_of_Derby%2C_The_Orrery.jpg&size=full" src="/thumbnail?url=https%3A%2F%2Fen.wikipedia.org%2Fwiki%2FSpecial%3AFilePath%2FFile%3AWright_of_Derby%2C_The_Orrery.jpg&size=thumbnail"></a></div>'
|
||||||
|
|
||||||
|
sender_user_profile = self.example_user('othello')
|
||||||
|
msg = Message(sender=sender_user_profile, sending_client=get_client("test"))
|
||||||
|
converted = render_markdown(msg, content)
|
||||||
|
self.assertEqual(converted, expected)
|
||||||
|
|
||||||
@override_settings(INLINE_IMAGE_PREVIEW=False)
|
@override_settings(INLINE_IMAGE_PREVIEW=False)
|
||||||
def test_image_preview_enabled(self) -> None:
|
def test_image_preview_enabled(self) -> None:
|
||||||
ret = bugdown.image_preview_enabled()
|
ret = bugdown.image_preview_enabled()
|
||||||
|
|
Loading…
Reference in New Issue