diff --git a/zerver/lib/bugdown/__init__.py b/zerver/lib/bugdown/__init__.py index 4d56b2808e..a38c95e9dd 100644 --- a/zerver/lib/bugdown/__init__.py +++ b/zerver/lib/bugdown/__init__.py @@ -601,12 +601,33 @@ class InlineInterestingLinkProcessor(markdown.treeprocessors.Treeprocessor): if not self.markdown.image_preview_enabled: return False parsed_url = urllib.parse.urlparse(url) + # remove html urls which end with img extensions that can not be shorted + if parsed_url.netloc == 'pasteboard.co': + return False + # List from http://support.google.com/chromeos/bin/answer.py?hl=en&answer=183093 for ext in [".bmp", ".gif", ".jpg", "jpeg", ".png", ".webp"]: if parsed_url.path.lower().endswith(ext): return True return False + def corrected_image_source(self, url: str) -> str: + # This function adjusts any urls from linx.li and + # wikipedia.org to point to the actual image url. It's + # structurally very similar to dropbox_image, and possibly + # should be rewritten to use open graph, but has some value. + parsed_url = urllib.parse.urlparse(url) + if parsed_url.netloc.lower().endswith('.wikipedia.org'): + # Redirecting from "/wiki/File:" to "/wiki/Special:FilePath/File:" + # A possible alternative, that avoids the redirect after hitting "Special:" + # is using the first characters of md5($filename) to generate the url + domain = parsed_url.scheme + "://" + parsed_url.netloc + correct_url = domain + parsed_url.path[:6] + 'Special:FilePath' + parsed_url.path[5:] + return correct_url + if parsed_url.netloc == 'linx.li': + return 'https://linx.li/s' + parsed_url.path + return None + def dropbox_image(self, url: str) -> Optional[Dict[str, Any]]: # TODO: The returned Dict could possibly be a TypedDict in future. parsed_url = urllib.parse.urlparse(url) @@ -978,9 +999,17 @@ class InlineInterestingLinkProcessor(markdown.treeprocessors.Treeprocessor): class_attr=class_attr, already_thumbnailed=True) continue + if self.is_image(url): + image_source = self.corrected_image_source(url) + if image_source is not None: + found_url = ResultWithFamily( + family=found_url.family, + result=(image_source, image_source) + ) self.handle_image_inlining(root, found_url) continue + if get_tweet_id(url) is not None: if rendered_tweet_count >= self.TWITTER_MAX_TO_PREVIEW: # Only render at most one tweet per message diff --git a/zerver/tests/test_bugdown.py b/zerver/tests/test_bugdown.py index fb6cb64cdb..fbae52124e 100644 --- a/zerver/tests/test_bugdown.py +++ b/zerver/tests/test_bugdown.py @@ -448,6 +448,17 @@ class BugdownTest(ZulipTestCase): converted = render_markdown(msg, content) self.assertEqual(converted, expected) + @override_settings(INLINE_IMAGE_PREVIEW=True) + def test_corrected_image_source(self) -> None: + # testing only wikipedia because linx.li urls can be expected to expire + content = 'https://en.wikipedia.org/wiki/File:Wright_of_Derby,_The_Orrery.jpg' + expected = '
' + + sender_user_profile = self.example_user('othello') + msg = Message(sender=sender_user_profile, sending_client=get_client("test")) + converted = render_markdown(msg, content) + self.assertEqual(converted, expected) + @override_settings(INLINE_IMAGE_PREVIEW=False) def test_image_preview_enabled(self) -> None: ret = bugdown.image_preview_enabled()