bugdown: Rewrite image urls to avoid mixed-content warnings.

(imported from commit fc0a41befb04f2a8aad2937a856366ac3cadb192)
This commit is contained in:
Tim Abbott 2013-08-28 16:45:26 -04:00
parent a575ea997f
commit b557b94d0b
3 changed files with 26 additions and 3 deletions

View File

@ -11,7 +11,8 @@ import time
import HTMLParser
import httplib2
from hashlib import sha1
import hashlib
import hmac
from django.core import mail
from django.conf import settings
@ -73,7 +74,7 @@ def add_a(root, url, link, height=None):
img.set("src", url)
def hash_embedly_url(link):
return 'embedly:' + sha1(link).hexdigest()
return 'embedly:' + hashlib.sha1(link).hexdigest()
@cache_with_key(lambda tweet_id: tweet_id, cache_name="database", with_statsd_key="tweet_data")
def fetch_tweet_data(tweet_id):
@ -145,6 +146,19 @@ def get_tweet_id(url):
return False
return tweet_id_match.group("tweetid")
class InlineHttpsProcessor(markdown.treeprocessors.Treeprocessor):
def run(self, root):
# Get all URLs from the blob
found_imgs = walk_tree(root, lambda e: e if e.tag == "img" else None)
for img in found_imgs:
url = img.get("src")
# We rewrite all HTTP URLs as well as all HTTPs URLs for mit.edu
if not url.startswith("http://"):
# Don't rewrite images on our own site (e.g. emoji).
continue
digest = hmac.new(settings.CAMO_KEY, url, hashlib.sha1).hexdigest()
encoded_url = url.encode("hex")
img.set("src", "https://external-content.zulipcdn.net/%s/%s" % (digest, encoded_url))
class InlineInterestingLinkProcessor(markdown.treeprocessors.Treeprocessor):
def is_image(self, url):
@ -651,6 +665,7 @@ class Bugdown(markdown.Extension):
"_begin")
md.treeprocessors.add("inline_interesting_links", InlineInterestingLinkProcessor(md), "_end")
md.treeprocessors.add("rewrite_to_https", InlineHttpsProcessor(md), "_end")
if self.getConfig("realm") == "mit.edu/zephyr_mirror":
# Disable almost all inline patterns for mit.edu users' traffic that is mirrored

View File

@ -2610,12 +2610,17 @@ xxxxxxx</strong></p>\n<p>xxxxxxx xxxxx xxxx xxxxx:<br>\n<code>xxxxxx</code>: xxx
self.assertEqual(converted, '<p>Google logo today: <a href="https://www.google.com/images/srpr/logo4w.png" target="_blank" title="https://www.google.com/images/srpr/logo4w.png">https://www.google.com/images/srpr/logo4w.png</a><br>\nKinda boring</p>\n<div class="message_inline_image"><a href="https://www.google.com/images/srpr/logo4w.png" target="_blank" title="https://www.google.com/images/srpr/logo4w.png"><img src="https://www.google.com/images/srpr/logo4w.png"></a></div>')
# If thre are two images, both should be previewed.
# If there are two images, both should be previewed.
msg = 'Google logo today: https://www.google.com/images/srpr/logo4w.png\nKinda boringGoogle logo today: https://www.google.com/images/srpr/logo4w.png\nKinda boring'
converted = bugdown_convert(msg)
self.assertEqual(converted, '<p>Google logo today: <a href="https://www.google.com/images/srpr/logo4w.png" target="_blank" title="https://www.google.com/images/srpr/logo4w.png">https://www.google.com/images/srpr/logo4w.png</a><br>\nKinda boringGoogle logo today: <a href="https://www.google.com/images/srpr/logo4w.png" target="_blank" title="https://www.google.com/images/srpr/logo4w.png">https://www.google.com/images/srpr/logo4w.png</a><br>\nKinda boring</p>\n<div class="message_inline_image"><a href="https://www.google.com/images/srpr/logo4w.png" target="_blank" title="https://www.google.com/images/srpr/logo4w.png"><img src="https://www.google.com/images/srpr/logo4w.png"></a></div><div class="message_inline_image"><a href="https://www.google.com/images/srpr/logo4w.png" target="_blank" title="https://www.google.com/images/srpr/logo4w.png"><img src="https://www.google.com/images/srpr/logo4w.png"></a></div>')
# http images should be converted to https via our Camo integration
msg = 'Google logo today: http://www.google.com/images/srpr/logo4w.png'
converted = bugdown_convert(msg)
self.assertEqual(converted, '<p>Google logo today: <a href="http://www.google.com/images/srpr/logo4w.png" target="_blank" title="http://www.google.com/images/srpr/logo4w.png">http://www.google.com/images/srpr/logo4w.png</a></p>\n<div class="message_inline_image"><a href="http://www.google.com/images/srpr/logo4w.png" target="_blank" title="http://www.google.com/images/srpr/logo4w.png"><img src="https://external-content.zulipcdn.net/4882a845c6edd9a945bfe5f33734ce0aed8170f3/687474703a2f2f7777772e676f6f676c652e636f6d2f696d616765732f737270722f6c6f676f34772e706e67"></a></div>')
def test_inline_youtube(self):
msg = 'Check out the debate: http://www.youtube.com/watch?v=hx1mjT73xYE'

View File

@ -580,6 +580,9 @@ OPENID_RENDER_FAILURE = openid_failure_handler
MAILCHIMP_API_KEY = 'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx-us4'
ZULIP_FRIENDS_LIST_ID = '84b2f3da6b'
# This should be synced with our camo installation
CAMO_KEY = 'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx'
# Client-side polling timeout for get_events, in milliseconds.
# We configure this here so that the client test suite can override it.
# We already kill the connection server-side with heartbeat events,