markdown: Improve handling of broken img urls.

Some urls which end with image file extensions (eg .jpg) may link to html pages. This adds handling for linx.li, wikipedia.org and pasteboard.co. If it is possible, we redirect to the actual image url otherwise we do not attempt to render it as an image. Fixes #10438.
2019-02-14 21:45:30 +05:30 · 2019-02-14 21:45:30 +05:30 · a724a38c03
parent 4986f11c67
commit a724a38c03
2 changed files with 40 additions and 0 deletions
--- a/zerver/lib/bugdown/init.py
+++ b/zerver/lib/bugdown/init.py
@ -601,12 +601,33 @@ class InlineInterestingLinkProcessor(markdown.treeprocessors.Treeprocessor):
        if not self.markdown.image_preview_enabled:
            return False
        parsed_url = urllib.parse.urlparse(url)
        # remove html urls which end with img extensions that can not be shorted
        if parsed_url.netloc == 'pasteboard.co':
            return False
        # List from http://support.google.com/chromeos/bin/answer.py?hl=en&answer=183093
        for ext in [".bmp", ".gif", ".jpg", "jpeg", ".png", ".webp"]:
            if parsed_url.path.lower().endswith(ext):
                return True
        return False
    def corrected_image_source(self, url: str) -> str:
        # This function adjusts any urls from linx.li and
        # wikipedia.org to point to the actual image url.  It's
        # structurally very similar to dropbox_image, and possibly
        # should be rewritten to use open graph, but has some value.
        parsed_url = urllib.parse.urlparse(url)
        if parsed_url.netloc.lower().endswith('.wikipedia.org'):
            # Redirecting from "/wiki/File:" to "/wiki/Special:FilePath/File:"
            # A possible alternative, that avoids the redirect after hitting "Special:"
            # is using the first characters of md5($filename) to generate the url
            domain = parsed_url.scheme + "://" + parsed_url.netloc
            correct_url = domain + parsed_url.path[:6] + 'Special:FilePath' + parsed_url.path[5:]
            return correct_url
        if parsed_url.netloc == 'linx.li':
            return 'https://linx.li/s' + parsed_url.path
        return None
    def dropbox_image(self, url: str) -> Optional[Dict[str, Any]]:
        # TODO: The returned Dict could possibly be a TypedDict in future.
        parsed_url = urllib.parse.urlparse(url)
@ -978,9 +999,17 @@ class InlineInterestingLinkProcessor(markdown.treeprocessors.Treeprocessor):
                      class_attr=class_attr,
                      already_thumbnailed=True)
                continue
            if self.is_image(url):
                image_source = self.corrected_image_source(url)
                if image_source is not None:
                    found_url = ResultWithFamily(
                        family=found_url.family,
                        result=(image_source, image_source)
                    )
                self.handle_image_inlining(root, found_url)
                continue
            if get_tweet_id(url) is not None:
                if rendered_tweet_count >= self.TWITTER_MAX_TO_PREVIEW:
                    # Only render at most one tweet per message
--- a/zerver/tests/test_bugdown.py
+++ b/zerver/tests/test_bugdown.py
@ -448,6 +448,17 @@ class BugdownTest(ZulipTestCase):
        converted = render_markdown(msg, content)
        self.assertEqual(converted, expected)
    @override_settings(INLINE_IMAGE_PREVIEW=True)
    def test_corrected_image_source(self) -> None:
        # testing only wikipedia because linx.li urls can be expected to expire
        content = 'https://en.wikipedia.org/wiki/File:Wright_of_Derby,_The_Orrery.jpg'
        expected = '<div class="message_inline_image"><a href="https://en.wikipedia.org/wiki/Special:FilePath/File:Wright_of_Derby,_The_Orrery.jpg" target="_blank" title="https://en.wikipedia.org/wiki/Special:FilePath/File:Wright_of_Derby,_The_Orrery.jpg"><img data-src-fullsize="/thumbnail?url=https%3A%2F%2Fen.wikipedia.org%2Fwiki%2FSpecial%3AFilePath%2FFile%3AWright_of_Derby%2C_The_Orrery.jpg&amp;size=full" src="/thumbnail?url=https%3A%2F%2Fen.wikipedia.org%2Fwiki%2FSpecial%3AFilePath%2FFile%3AWright_of_Derby%2C_The_Orrery.jpg&amp;size=thumbnail"></a></div>'
        sender_user_profile = self.example_user('othello')
        msg = Message(sender=sender_user_profile, sending_client=get_client("test"))
        converted = render_markdown(msg, content)
        self.assertEqual(converted, expected)
    @override_settings(INLINE_IMAGE_PREVIEW=False)
    def test_image_preview_enabled(self) -> None:
        ret = bugdown.image_preview_enabled()