bugdown: Add custom per-realm filters to linkify certain strings.

I've tried to do this in a way that's scalable and easily configured,
so that we can add new such filters for customers on-demand without
needing to add anything other than a bit of configuration.

Once we're confident in the arguments to this system, I think we'll
want to move the regular expression lists into the database so that we
don't need to do a prod push to modify the regular expression lists.

The initial set of regular expressions are:
(1) Linkifying e.g. "trac #224" in the Humbug realm, so we're exercising this code.
(2) The various ticket number things CUSTOMER7 uses for the CUSTOMER7 realm.

(imported from commit 992b0937b9012c15a7c2f585eb0aacb221c52e01)
This commit is contained in:
Tim Abbott 2013-06-05 11:45:57 -04:00
parent ceacf6f97e
commit 0ad1094e45
4 changed files with 63 additions and 18 deletions

View File

@ -455,7 +455,7 @@ def check_message(sender, client, message_type_name, message_to,
else: else:
return "Invalid message type" return "Invalid message type"
rendered_content = bugdown.convert(message_content) rendered_content = bugdown.convert(message_content, sender.realm.domain)
if rendered_content is None: if rendered_content is None:
return "We were unable to render your message" return "We were unable to render your message"
@ -958,7 +958,7 @@ def do_update_message(user_profile, message_id, subject, content):
first_rendered_content = message.rendered_content first_rendered_content = message.rendered_content
if content is not None: if content is not None:
rendered_content = bugdown.convert(content) rendered_content = bugdown.convert(content, message.sender.realm.domain)
if rendered_content is None: if rendered_content is None:
raise JsonableError("We were unable to render your updated message") raise JsonableError("We were unable to render your updated message")

View File

@ -408,16 +408,18 @@ def sanitize_url(url):
# Url passes all tests. Return url as-is. # Url passes all tests. Return url as-is.
return urlparse.urlunparse((scheme, netloc, path, params, query, fragment)) return urlparse.urlunparse((scheme, netloc, path, params, query, fragment))
def url_to_a(url): def url_to_a(url, text = None):
a = markdown.util.etree.Element('a') a = markdown.util.etree.Element('a')
href = sanitize_url(url) href = sanitize_url(url)
if href is None: if href is None:
# Rejected by sanitize_url; render it as plain text. # Rejected by sanitize_url; render it as plain text.
return url return url
if text is None:
text = url
a.set('href', href) a.set('href', href)
a.text = url a.text = text
fixup_link(a, not 'mailto:' in href[:7]) fixup_link(a, not 'mailto:' in href[:7])
return a return a
@ -500,6 +502,19 @@ class LinkPattern(markdown.inlinepatterns.Pattern):
fixup_link(el) fixup_link(el)
return el return el
# Given a regular expression pattern, linkifies groups that match it
# using the provided format string to construct the URL.
class RealmFilterPattern(markdown.inlinepatterns.Pattern):
""" Applied a given realm filter to the input """
def __init__(self, source_pattern, format_string, markdown_instance=None):
self.pattern = r'\b(?P<name>' + source_pattern + ')(?!\w)'
self.format_string = format_string
markdown.inlinepatterns.Pattern.__init__(self, self.pattern, markdown_instance)
def handleMatch(self, m):
return url_to_a(self.format_string % m.groupdict(),
m.group("name"))
class Bugdown(markdown.Extension): class Bugdown(markdown.Extension):
def extendMarkdown(self, md, md_globals): def extendMarkdown(self, md, md_globals):
del md.preprocessors['reference'] del md.preprocessors['reference']
@ -535,6 +550,10 @@ class Bugdown(markdown.Extension):
md.inlinePatterns.add('http_autolink', HttpLink(http_link_regex), '>link') md.inlinePatterns.add('http_autolink', HttpLink(http_link_regex), '>link')
for (pattern, format_string) in self.getConfig("realm_filters"):
md.inlinePatterns.add('realm_filters/%s' % (pattern,),
RealmFilterPattern(pattern, format_string), '_begin')
# A link starts at a word boundary, and ends at space, punctuation, or end-of-input. # A link starts at a word boundary, and ends at space, punctuation, or end-of-input.
# #
# We detect a url by checking for the TLD, and building around it. # We detect a url by checking for the TLD, and building around it.
@ -555,15 +574,31 @@ class Bugdown(markdown.Extension):
md.treeprocessors.add("inline_interesting_links", InlineInterestingLinkProcessor(md), "_end") md.treeprocessors.add("inline_interesting_links", InlineInterestingLinkProcessor(md), "_end")
_md_engine = markdown.Markdown(
safe_mode = 'escape', md_engines = {}
output_format = 'html',
extensions = ['nl2br', def make_md_engine(key, opts):
codehilite.makeExtension(configs=[ md_engines[key] = markdown.Markdown(
('force_linenos', False), safe_mode = 'escape',
('guess_lang', False)]), output_format = 'html',
fenced_code.makeExtension(), extensions = ['nl2br',
Bugdown()]) codehilite.makeExtension(configs=[
('force_linenos', False),
('guess_lang', False)]),
fenced_code.makeExtension(),
Bugdown(opts)])
realm_filters = {
"default": [],
"humbughq.com": [
("[tT]rac #(?P<id>[0-9]{1,8})", "https://trac.humbughq.com/ticket/%(id)s"),
],
}
for realm in realm_filters.keys():
# Because of how the Markdown config API works, this has confusing
# large number of layers of dicts/arrays :(
make_md_engine(realm, {"realm_filters": [realm_filters[realm], "Realm-specific filters for %s" % (realm,)]})
# We want to log Markdown parser failures, but shouldn't log the actual input # We want to log Markdown parser failures, but shouldn't log the actual input
# message for privacy reasons. The compromise is to replace all alphanumeric # message for privacy reasons. The compromise is to replace all alphanumeric
@ -575,9 +610,13 @@ _privacy_re = re.compile(r'\w', flags=re.UNICODE)
def _sanitize_for_log(md): def _sanitize_for_log(md):
return repr(_privacy_re.sub('x', md)) return repr(_privacy_re.sub('x', md))
def do_convert(md): def do_convert(md, realm):
"""Convert Markdown to HTML, with Humbug-specific settings and hacks.""" """Convert Markdown to HTML, with Humbug-specific settings and hacks."""
if realm in md_engines:
_md_engine = md_engines[realm]
else:
_md_engine = md_engines["default"]
# Reset the parser; otherwise it will get slower over time. # Reset the parser; otherwise it will get slower over time.
_md_engine.reset() _md_engine.reset()
@ -625,8 +664,8 @@ def bugdown_stats_finish():
bugdown_total_requests += 1 bugdown_total_requests += 1
bugdown_total_time += (time.time() - bugdown_time_start) bugdown_total_time += (time.time() - bugdown_time_start)
def convert(md): def convert(md, realm):
bugdown_stats_start() bugdown_stats_start()
ret = do_convert(md) ret = do_convert(md, realm)
bugdown_stats_finish() bugdown_stats_finish()
return ret return ret

View File

@ -334,7 +334,7 @@ class Message(models.Model):
obj['content_type'] = 'text/html' obj['content_type'] = 'text/html'
elif apply_markdown: elif apply_markdown:
if rendered_content is None: if rendered_content is None:
rendered_content = bugdown.convert(self.content) rendered_content = bugdown.convert(self.content, self.sender.realm.domain)
if rendered_content is None: if rendered_content is None:
rendered_content = '<p>[Humbug note: Sorry, we could not understand the formatting of your message]</p>' rendered_content = '<p>[Humbug note: Sorry, we could not understand the formatting of your message]</p>'

View File

@ -1839,7 +1839,7 @@ class GetSubscribersTest(AuthedTestCase):
"Unable to retrieve subscribers for invite-only stream") "Unable to retrieve subscribers for invite-only stream")
def bugdown_convert(text): def bugdown_convert(text):
return bugdown.convert(text) return bugdown.convert(text, "humbughq.com")
class BugdownTest(TestCase): class BugdownTest(TestCase):
def common_bugdown_test(self, text, expected): def common_bugdown_test(self, text, expected):
@ -2310,6 +2310,12 @@ But you can never leave**"""
converted = bugdown_convert(msg) converted = bugdown_convert(msg)
self.assertEqual(converted, "<p>You can check out **any time you'd like<br>\nBut you can never leave**</p>") self.assertEqual(converted, "<p>You can check out **any time you'd like<br>\nBut you can never leave**</p>")
def test_realm_patterns(self):
msg = "We should fix trac #224 and Trac #115, but not Ztrac #124 or trac #1124Z today."
converted = bugdown_convert(msg)
self.assertEqual(converted, '<p>We should fix <a href="https://trac.humbughq.com/ticket/224" target="_blank" title="https://trac.humbughq.com/ticket/224">trac #224</a> and <a href="https://trac.humbughq.com/ticket/115" target="_blank" title="https://trac.humbughq.com/ticket/115">Trac #115</a>, but not Ztrac #124 or trac #1124Z today.</p>')
class UserPresenceTests(AuthedTestCase): class UserPresenceTests(AuthedTestCase):
fixtures = ['messages.json'] fixtures = ['messages.json']