markdown: Improve handling of broken img urls.

Some urls which end with image file extensions (eg .jpg) may link to html pages. This adds handling for linx.li, wikipedia.org and pasteboard.co. If it is possible, we redirect to the actual image url otherwise we do not attempt to render it as an image. Fixes #10438.
2019-02-14 21:45:30 +05:30 · 2019-02-14 21:45:30 +05:30 · a724a38c03
parent 4986f11c67
commit a724a38c03
2 changed files with 40 additions and 0 deletions
--- a/zerver/lib/bugdown/init.py
+++ b/zerver/lib/bugdown/init.py
@ -601,12 +601,33 @@ class InlineInterestingLinkProcessor(markdown.treeprocessors.Treeprocessor):
        if not self.markdown.image_preview_enabled:
            return False
        parsed_url = urllib.parse.urlparse(url)
+        # remove html urls which end with img extensions that can not be shorted
+        if parsed_url.netloc == 'pasteboard.co':
+            return False
+
        # List from http://support.google.com/chromeos/bin/answer.py?hl=en&answer=183093
        for ext in [".bmp", ".gif", ".jpg", "jpeg", ".png", ".webp"]:
            if parsed_url.path.lower().endswith(ext):
                return True
        return False

+    def corrected_image_source(self, url: str) -> str:
+        # This function adjusts any urls from linx.li and
+        # wikipedia.org to point to the actual image url.  It's
+        # structurally very similar to dropbox_image, and possibly
+        # should be rewritten to use open graph, but has some value.
+        parsed_url = urllib.parse.urlparse(url)
+        if parsed_url.netloc.lower().endswith('.wikipedia.org'):
+            # Redirecting from "/wiki/File:" to "/wiki/Special:FilePath/File:"
+            # A possible alternative, that avoids the redirect after hitting "Special:"
+            # is using the first characters of md5($filename) to generate the url
+            domain = parsed_url.scheme + "://" + parsed_url.netloc
+            correct_url = domain + parsed_url.path[:6] + 'Special:FilePath' + parsed_url.path[5:]
+            return correct_url
+        if parsed_url.netloc == 'linx.li':
+            return 'https://linx.li/s' + parsed_url.path
+        return None
+
    def dropbox_image(self, url: str) -> Optional[Dict[str, Any]]:
        # TODO: The returned Dict could possibly be a TypedDict in future.
        parsed_url = urllib.parse.urlparse(url)
@ -978,9 +999,17 @@ class InlineInterestingLinkProcessor(markdown.treeprocessors.Treeprocessor):
                      class_attr=class_attr,
                      already_thumbnailed=True)
                continue
+
            if self.is_image(url):
+                image_source = self.corrected_image_source(url)
+                if image_source is not None:
+                    found_url = ResultWithFamily(
+                        family=found_url.family,
+                        result=(image_source, image_source)
+                    )
                self.handle_image_inlining(root, found_url)
                continue
+
            if get_tweet_id(url) is not None:
                if rendered_tweet_count >= self.TWITTER_MAX_TO_PREVIEW:
                    # Only render at most one tweet per message
--- a/zerver/tests/test_bugdown.py
+++ b/zerver/tests/test_bugdown.py
@ -448,6 +448,17 @@ class BugdownTest(ZulipTestCase):
        converted = render_markdown(msg, content)
        self.assertEqual(converted, expected)

+    @override_settings(INLINE_IMAGE_PREVIEW=True)
+    def test_corrected_image_source(self) -> None:
+        # testing only wikipedia because linx.li urls can be expected to expire
+        content = 'https://en.wikipedia.org/wiki/File:Wright_of_Derby,_The_Orrery.jpg'
+        expected = '<div class="message_inline_image"><a href="https://en.wikipedia.org/wiki/Special:FilePath/File:Wright_of_Derby,_The_Orrery.jpg" target="_blank" title="https://en.wikipedia.org/wiki/Special:FilePath/File:Wright_of_Derby,_The_Orrery.jpg"><img data-src-fullsize="/thumbnail?url=https%3A%2F%2Fen.wikipedia.org%2Fwiki%2FSpecial%3AFilePath%2FFile%3AWright_of_Derby%2C_The_Orrery.jpg&amp;size=full" src="/thumbnail?url=https%3A%2F%2Fen.wikipedia.org%2Fwiki%2FSpecial%3AFilePath%2FFile%3AWright_of_Derby%2C_The_Orrery.jpg&amp;size=thumbnail"></a></div>'
+
+        sender_user_profile = self.example_user('othello')
+        msg = Message(sender=sender_user_profile, sending_client=get_client("test"))
+        converted = render_markdown(msg, content)
+        self.assertEqual(converted, expected)
+
    @override_settings(INLINE_IMAGE_PREVIEW=False)
    def test_image_preview_enabled(self) -> None:
        ret = bugdown.image_preview_enabled()