bugdown: Add custom per-realm filters to linkify certain strings.

I've tried to do this in a way that's scalable and easily configured, so that we can add new such filters for customers on-demand without needing to add anything other than a bit of configuration. Once we're confident in the arguments to this system, I think we'll want to move the regular expression lists into the database so that we don't need to do a prod push to modify the regular expression lists. The initial set of regular expressions are: (1) Linkifying e.g. "trac #224" in the Humbug realm, so we're exercising this code. (2) The various ticket number things CUSTOMER7 uses for the CUSTOMER7 realm. (imported from commit 992b0937b9012c15a7c2f585eb0aacb221c52e01)
2013-06-05 11:45:57 -04:00 · 2013-06-05 11:45:57 -04:00 · 0ad1094e45
parent ceacf6f97e
commit 0ad1094e45
4 changed files with 63 additions and 18 deletions
--- a/zephyr/lib/actions.py
+++ b/zephyr/lib/actions.py
@ -455,7 +455,7 @@ def check_message(sender, client, message_type_name, message_to,
    else:
        return "Invalid message type"
-    rendered_content = bugdown.convert(message_content)
+    rendered_content = bugdown.convert(message_content, sender.realm.domain)
    if rendered_content is None:
        return "We were unable to render your message"
@ -958,7 +958,7 @@ def do_update_message(user_profile, message_id, subject, content):
        first_rendered_content = message.rendered_content
    if content is not None:
-        rendered_content = bugdown.convert(content)
+        rendered_content = bugdown.convert(content, message.sender.realm.domain)
        if rendered_content is None:
            raise JsonableError("We were unable to render your updated message")
--- a/zephyr/lib/bugdown/init.py
+++ b/zephyr/lib/bugdown/init.py
@ -408,16 +408,18 @@ def sanitize_url(url):
    # Url passes all tests. Return url as-is.
    return urlparse.urlunparse((scheme, netloc, path, params, query, fragment))
-def url_to_a(url):
+def url_to_a(url, text = None):
    a = markdown.util.etree.Element('a')
    href = sanitize_url(url)
    if href is None:
        # Rejected by sanitize_url; render it as plain text.
        return url
    if text is None:
        text = url
    a.set('href', href)
-    a.text = url
+    a.text = text
    fixup_link(a, not 'mailto:' in href[:7])
    return a
@ -500,6 +502,19 @@ class LinkPattern(markdown.inlinepatterns.Pattern):
        fixup_link(el)
        return el
 # Given a regular expression pattern, linkifies groups that match it
 # using the provided format string to construct the URL.
 class RealmFilterPattern(markdown.inlinepatterns.Pattern):
    """ Applied a given realm filter to the input """
    def __init__(self, source_pattern, format_string, markdown_instance=None):
        self.pattern = r'\b(?P<name>' + source_pattern + ')(?!\w)'
        self.format_string = format_string
        markdown.inlinepatterns.Pattern.__init__(self, self.pattern, markdown_instance)
    def handleMatch(self, m):
        return url_to_a(self.format_string % m.groupdict(),
                        m.group("name"))
 class Bugdown(markdown.Extension):
    def extendMarkdown(self, md, md_globals):
        del md.preprocessors['reference']
@ -535,6 +550,10 @@ class Bugdown(markdown.Extension):
        md.inlinePatterns.add('http_autolink', HttpLink(http_link_regex), '>link')
        for (pattern, format_string) in self.getConfig("realm_filters"):
            md.inlinePatterns.add('realm_filters/%s' % (pattern,),
                                  RealmFilterPattern(pattern, format_string), '_begin')
        # A link starts at a word boundary, and ends at space, punctuation, or end-of-input.
        #
        # We detect a url by checking for the TLD, and building around it.
@ -555,15 +574,31 @@ class Bugdown(markdown.Extension):
        md.treeprocessors.add("inline_interesting_links", InlineInterestingLinkProcessor(md), "_end")
-_md_engine = markdown.Markdown(
+
-    safe_mode     = 'escape',
+md_engines = {}
-    output_format = 'html',
+
-    extensions    = ['nl2br',
+def make_md_engine(key, opts):
-        codehilite.makeExtension(configs=[
+    md_engines[key] = markdown.Markdown(
-            ('force_linenos', False),
+        safe_mode     = 'escape',
-            ('guess_lang',    False)]),
+        output_format = 'html',
-        fenced_code.makeExtension(),
+        extensions    = ['nl2br',
-        Bugdown()])
+                         codehilite.makeExtension(configs=[
                    ('force_linenos', False),
                    ('guess_lang',    False)]),
                         fenced_code.makeExtension(),
                         Bugdown(opts)])
 realm_filters = {
    "default": [],
    "humbughq.com": [
        ("[tT]rac #(?P<id>[0-9]{1,8})", "https://trac.humbughq.com/ticket/%(id)s"),
        ],
    }
 for realm in realm_filters.keys():
    # Because of how the Markdown config API works, this has confusing
    # large number of layers of dicts/arrays :(
    make_md_engine(realm, {"realm_filters": [realm_filters[realm], "Realm-specific filters for %s" % (realm,)]})
 # We want to log Markdown parser failures, but shouldn't log the actual input
 # message for privacy reasons.  The compromise is to replace all alphanumeric
@ -575,9 +610,13 @@ _privacy_re = re.compile(r'\w', flags=re.UNICODE)
 def _sanitize_for_log(md):
    return repr(_privacy_re.sub('x', md))
-def do_convert(md):
+def do_convert(md, realm):
    """Convert Markdown to HTML, with Humbug-specific settings and hacks."""
    if realm in md_engines:
        _md_engine = md_engines[realm]
    else:
        _md_engine = md_engines["default"]
    # Reset the parser; otherwise it will get slower over time.
    _md_engine.reset()
@ -625,8 +664,8 @@ def bugdown_stats_finish():
    bugdown_total_requests += 1
    bugdown_total_time += (time.time() - bugdown_time_start)
-def convert(md):
+def convert(md, realm):
    bugdown_stats_start()
-    ret = do_convert(md)
+    ret = do_convert(md, realm)
    bugdown_stats_finish()
    return ret
--- a/zephyr/models.py
+++ b/zephyr/models.py
@ -334,7 +334,7 @@ class Message(models.Model):
            obj['content_type'] = 'text/html'
        elif apply_markdown:
            if rendered_content is None:
-                rendered_content = bugdown.convert(self.content)
+                rendered_content = bugdown.convert(self.content, self.sender.realm.domain)
                if rendered_content is None:
                    rendered_content = '<p>[Humbug note: Sorry, we could not understand the formatting of your message]</p>'
--- a/zephyr/tests.py
+++ b/zephyr/tests.py
@ -1839,7 +1839,7 @@ class GetSubscribersTest(AuthedTestCase):
                               "Unable to retrieve subscribers for invite-only stream")
 def bugdown_convert(text):
-    return bugdown.convert(text)
+    return bugdown.convert(text, "humbughq.com")
 class BugdownTest(TestCase):
    def common_bugdown_test(self, text, expected):
@ -2310,6 +2310,12 @@ But you can never leave**"""
        converted = bugdown_convert(msg)
        self.assertEqual(converted, "<p>You can check out **any time you'd like<br>\nBut you can never leave**</p>")
    def test_realm_patterns(self):
        msg = "We should fix trac #224 and Trac #115, but not Ztrac #124 or trac #1124Z today."
        converted = bugdown_convert(msg)
        self.assertEqual(converted, '<p>We should fix <a href="https://trac.humbughq.com/ticket/224" target="_blank" title="https://trac.humbughq.com/ticket/224">trac #224</a> and <a href="https://trac.humbughq.com/ticket/115" target="_blank" title="https://trac.humbughq.com/ticket/115">Trac #115</a>, but not Ztrac #124 or trac #1124Z today.</p>')
 class UserPresenceTests(AuthedTestCase):
    fixtures = ['messages.json']