Limit the number of URLs we parse at once to 5.

(imported from commit 876c0d0713b4a4d328e4a69e46c984694f5bce1b)
2013-05-07 12:26:46 -07:00 · 2013-05-07 12:26:46 -07:00 · 3c08781764
parent 2c44ed1096
commit 3c08781764
1 changed files with 22 additions and 28 deletions
--- a/zephyr/lib/bugdown/init.py
+++ b/zephyr/lib/bugdown/init.py
@ -70,12 +70,6 @@ def add_a(root, url, link, height=None):
    img = markdown.util.etree.SubElement(a, "img")
    img.set("src", url)

-
-def mygrouper(n, iterable):
-    # Adapted from http://stackoverflow.com/a/1625013/90777
-    args = [iter(iterable)] * n
-    return ([e for e in t if e != None] for t in itertools.izip_longest(*args))
-
 def hash_embedly_url(link):
    return 'embedly:' + sha1(link).hexdigest()

@ -92,7 +86,8 @@ class EmbedlyProcessor(markdown.treeprocessors.Treeprocessor):
                continue
            supported_urls.append(link)

-        if not supported_urls:
+        if not supported_urls or len(supported_urls) > 5:
+            # Either zero urls or too many urls.
            return root

        # We want this to be able to easily reverse the hashing later
@ -105,27 +100,26 @@ class EmbedlyProcessor(markdown.treeprocessors.Treeprocessor):
        to_process = [url for url in supported_urls if not url in oembeds]
        to_cache = {}

-        for links in mygrouper(10, to_process):
-            try:
-                responses = embedly_client.oembed(links, maxwidth=250)
-            except httplib2.socket.timeout:
-                # We put this in its own try-except because it requires external
-                # connectivity. If embedly flakes out, we don't want to not-render
-                # the entire message; we just want to not show the embedly preview.
-                logging.warning("Embedly Embed timeout for URLs: %s" % (" ".join(links)))
-                logging.warning(traceback.format_exc())
-                return root
-            except Exception:
-                # If things break for any other reason, don't make things sad.
-                logging.warning(traceback.format_exc())
-                return root
-            for oembed_data in responses:
-                # Don't cache permanent errors
-                if oembed_data["type"] == "error" and \
-                        oembed_data["error_code"] in (500, 501, 503):
-                    continue
-                # Convert to dict because otherwise pickling won't work.
-                to_cache[oembed_data["original_url"]] = dict(oembed_data)
+        try:
+            responses = embedly_client.oembed(to_process, maxwidth=250)
+        except httplib2.socket.timeout:
+            # We put this in its own try-except because it requires external
+            # connectivity. If embedly flakes out, we don't want to not-render
+            # the entire message; we just want to not show the embedly preview.
+            logging.warning("Embedly Embed timeout for URLs: %s" % (" ".join(to_process)))
+            logging.warning(traceback.format_exc())
+            return root
+        except Exception:
+            # If things break for any other reason, don't make things sad.
+            logging.warning(traceback.format_exc())
+            return root
+        for oembed_data in responses:
+            # Don't cache permanent errors
+            if oembed_data["type"] == "error" and \
+                    oembed_data["error_code"] in (500, 501, 503):
+                continue
+            # Convert to dict because otherwise pickling won't work.
+            to_cache[oembed_data["original_url"]] = dict(oembed_data)

        # Cache the newly collected data to the database
        cache_set_many(dict((hash_embedly_url(link), to_cache[link]) for link in to_cache),