Limit the number of URLs we parse at once to 5.

(imported from commit 876c0d0713b4a4d328e4a69e46c984694f5bce1b)
This commit is contained in:
Luke Faraone 2013-05-07 12:26:46 -07:00 committed by Tim Abbott
parent 2c44ed1096
commit 3c08781764
1 changed files with 22 additions and 28 deletions

View File

@ -70,12 +70,6 @@ def add_a(root, url, link, height=None):
img = markdown.util.etree.SubElement(a, "img")
img.set("src", url)
def mygrouper(n, iterable):
# Adapted from http://stackoverflow.com/a/1625013/90777
args = [iter(iterable)] * n
return ([e for e in t if e != None] for t in itertools.izip_longest(*args))
def hash_embedly_url(link):
return 'embedly:' + sha1(link).hexdigest()
@ -92,7 +86,8 @@ class EmbedlyProcessor(markdown.treeprocessors.Treeprocessor):
continue
supported_urls.append(link)
if not supported_urls:
if not supported_urls or len(supported_urls) > 5:
# Either zero urls or too many urls.
return root
# We want this to be able to easily reverse the hashing later
@ -105,27 +100,26 @@ class EmbedlyProcessor(markdown.treeprocessors.Treeprocessor):
to_process = [url for url in supported_urls if not url in oembeds]
to_cache = {}
for links in mygrouper(10, to_process):
try:
responses = embedly_client.oembed(links, maxwidth=250)
except httplib2.socket.timeout:
# We put this in its own try-except because it requires external
# connectivity. If embedly flakes out, we don't want to not-render
# the entire message; we just want to not show the embedly preview.
logging.warning("Embedly Embed timeout for URLs: %s" % (" ".join(links)))
logging.warning(traceback.format_exc())
return root
except Exception:
# If things break for any other reason, don't make things sad.
logging.warning(traceback.format_exc())
return root
for oembed_data in responses:
# Don't cache permanent errors
if oembed_data["type"] == "error" and \
oembed_data["error_code"] in (500, 501, 503):
continue
# Convert to dict because otherwise pickling won't work.
to_cache[oembed_data["original_url"]] = dict(oembed_data)
try:
responses = embedly_client.oembed(to_process, maxwidth=250)
except httplib2.socket.timeout:
# We put this in its own try-except because it requires external
# connectivity. If embedly flakes out, we don't want to not-render
# the entire message; we just want to not show the embedly preview.
logging.warning("Embedly Embed timeout for URLs: %s" % (" ".join(to_process)))
logging.warning(traceback.format_exc())
return root
except Exception:
# If things break for any other reason, don't make things sad.
logging.warning(traceback.format_exc())
return root
for oembed_data in responses:
# Don't cache permanent errors
if oembed_data["type"] == "error" and \
oembed_data["error_code"] in (500, 501, 503):
continue
# Convert to dict because otherwise pickling won't work.
to_cache[oembed_data["original_url"]] = dict(oembed_data)
# Cache the newly collected data to the database
cache_set_many(dict((hash_embedly_url(link), to_cache[link]) for link in to_cache),