markdown: Improve handling of broken img urls.

Some urls which end with image file extensions (eg .jpg) may link to
html pages. This adds handling for linx.li, wikipedia.org and
pasteboard.co. If it is possible, we redirect to the actual image url
otherwise we do not attempt to render it as an image.

Fixes #10438.
This commit is contained in:
YashRE42 2019-02-14 21:45:30 +05:30 committed by Tim Abbott
parent 4986f11c67
commit a724a38c03
2 changed files with 40 additions and 0 deletions

View File

@ -601,12 +601,33 @@ class InlineInterestingLinkProcessor(markdown.treeprocessors.Treeprocessor):
if not self.markdown.image_preview_enabled:
return False
parsed_url = urllib.parse.urlparse(url)
# remove html urls which end with img extensions that can not be shorted
if parsed_url.netloc == 'pasteboard.co':
return False
# List from http://support.google.com/chromeos/bin/answer.py?hl=en&answer=183093
for ext in [".bmp", ".gif", ".jpg", "jpeg", ".png", ".webp"]:
if parsed_url.path.lower().endswith(ext):
return True
return False
def corrected_image_source(self, url: str) -> str:
# This function adjusts any urls from linx.li and
# wikipedia.org to point to the actual image url. It's
# structurally very similar to dropbox_image, and possibly
# should be rewritten to use open graph, but has some value.
parsed_url = urllib.parse.urlparse(url)
if parsed_url.netloc.lower().endswith('.wikipedia.org'):
# Redirecting from "/wiki/File:" to "/wiki/Special:FilePath/File:"
# A possible alternative, that avoids the redirect after hitting "Special:"
# is using the first characters of md5($filename) to generate the url
domain = parsed_url.scheme + "://" + parsed_url.netloc
correct_url = domain + parsed_url.path[:6] + 'Special:FilePath' + parsed_url.path[5:]
return correct_url
if parsed_url.netloc == 'linx.li':
return 'https://linx.li/s' + parsed_url.path
return None
def dropbox_image(self, url: str) -> Optional[Dict[str, Any]]:
# TODO: The returned Dict could possibly be a TypedDict in future.
parsed_url = urllib.parse.urlparse(url)
@ -978,9 +999,17 @@ class InlineInterestingLinkProcessor(markdown.treeprocessors.Treeprocessor):
class_attr=class_attr,
already_thumbnailed=True)
continue
if self.is_image(url):
image_source = self.corrected_image_source(url)
if image_source is not None:
found_url = ResultWithFamily(
family=found_url.family,
result=(image_source, image_source)
)
self.handle_image_inlining(root, found_url)
continue
if get_tweet_id(url) is not None:
if rendered_tweet_count >= self.TWITTER_MAX_TO_PREVIEW:
# Only render at most one tweet per message

View File

@ -448,6 +448,17 @@ class BugdownTest(ZulipTestCase):
converted = render_markdown(msg, content)
self.assertEqual(converted, expected)
@override_settings(INLINE_IMAGE_PREVIEW=True)
def test_corrected_image_source(self) -> None:
# testing only wikipedia because linx.li urls can be expected to expire
content = 'https://en.wikipedia.org/wiki/File:Wright_of_Derby,_The_Orrery.jpg'
expected = '<div class="message_inline_image"><a href="https://en.wikipedia.org/wiki/Special:FilePath/File:Wright_of_Derby,_The_Orrery.jpg" target="_blank" title="https://en.wikipedia.org/wiki/Special:FilePath/File:Wright_of_Derby,_The_Orrery.jpg"><img data-src-fullsize="/thumbnail?url=https%3A%2F%2Fen.wikipedia.org%2Fwiki%2FSpecial%3AFilePath%2FFile%3AWright_of_Derby%2C_The_Orrery.jpg&amp;size=full" src="/thumbnail?url=https%3A%2F%2Fen.wikipedia.org%2Fwiki%2FSpecial%3AFilePath%2FFile%3AWright_of_Derby%2C_The_Orrery.jpg&amp;size=thumbnail"></a></div>'
sender_user_profile = self.example_user('othello')
msg = Message(sender=sender_user_profile, sending_client=get_client("test"))
converted = render_markdown(msg, content)
self.assertEqual(converted, expected)
@override_settings(INLINE_IMAGE_PREVIEW=False)
def test_image_preview_enabled(self) -> None:
ret = bugdown.image_preview_enabled()