mirror of https://github.com/zulip/zulip.git
bugdown: Add custom per-realm filters to linkify certain strings.
I've tried to do this in a way that's scalable and easily configured, so that we can add new such filters for customers on-demand without needing to add anything other than a bit of configuration. Once we're confident in the arguments to this system, I think we'll want to move the regular expression lists into the database so that we don't need to do a prod push to modify the regular expression lists. The initial set of regular expressions are: (1) Linkifying e.g. "trac #224" in the Humbug realm, so we're exercising this code. (2) The various ticket number things CUSTOMER7 uses for the CUSTOMER7 realm. (imported from commit 992b0937b9012c15a7c2f585eb0aacb221c52e01)
This commit is contained in:
parent
ceacf6f97e
commit
0ad1094e45
|
@ -455,7 +455,7 @@ def check_message(sender, client, message_type_name, message_to,
|
|||
else:
|
||||
return "Invalid message type"
|
||||
|
||||
rendered_content = bugdown.convert(message_content)
|
||||
rendered_content = bugdown.convert(message_content, sender.realm.domain)
|
||||
if rendered_content is None:
|
||||
return "We were unable to render your message"
|
||||
|
||||
|
@ -958,7 +958,7 @@ def do_update_message(user_profile, message_id, subject, content):
|
|||
first_rendered_content = message.rendered_content
|
||||
|
||||
if content is not None:
|
||||
rendered_content = bugdown.convert(content)
|
||||
rendered_content = bugdown.convert(content, message.sender.realm.domain)
|
||||
if rendered_content is None:
|
||||
raise JsonableError("We were unable to render your updated message")
|
||||
|
||||
|
|
|
@ -408,16 +408,18 @@ def sanitize_url(url):
|
|||
# Url passes all tests. Return url as-is.
|
||||
return urlparse.urlunparse((scheme, netloc, path, params, query, fragment))
|
||||
|
||||
def url_to_a(url):
|
||||
def url_to_a(url, text = None):
|
||||
a = markdown.util.etree.Element('a')
|
||||
|
||||
href = sanitize_url(url)
|
||||
if href is None:
|
||||
# Rejected by sanitize_url; render it as plain text.
|
||||
return url
|
||||
if text is None:
|
||||
text = url
|
||||
|
||||
a.set('href', href)
|
||||
a.text = url
|
||||
a.text = text
|
||||
fixup_link(a, not 'mailto:' in href[:7])
|
||||
return a
|
||||
|
||||
|
@ -500,6 +502,19 @@ class LinkPattern(markdown.inlinepatterns.Pattern):
|
|||
fixup_link(el)
|
||||
return el
|
||||
|
||||
# Given a regular expression pattern, linkifies groups that match it
|
||||
# using the provided format string to construct the URL.
|
||||
class RealmFilterPattern(markdown.inlinepatterns.Pattern):
|
||||
""" Applied a given realm filter to the input """
|
||||
def __init__(self, source_pattern, format_string, markdown_instance=None):
|
||||
self.pattern = r'\b(?P<name>' + source_pattern + ')(?!\w)'
|
||||
self.format_string = format_string
|
||||
markdown.inlinepatterns.Pattern.__init__(self, self.pattern, markdown_instance)
|
||||
|
||||
def handleMatch(self, m):
|
||||
return url_to_a(self.format_string % m.groupdict(),
|
||||
m.group("name"))
|
||||
|
||||
class Bugdown(markdown.Extension):
|
||||
def extendMarkdown(self, md, md_globals):
|
||||
del md.preprocessors['reference']
|
||||
|
@ -535,6 +550,10 @@ class Bugdown(markdown.Extension):
|
|||
|
||||
md.inlinePatterns.add('http_autolink', HttpLink(http_link_regex), '>link')
|
||||
|
||||
for (pattern, format_string) in self.getConfig("realm_filters"):
|
||||
md.inlinePatterns.add('realm_filters/%s' % (pattern,),
|
||||
RealmFilterPattern(pattern, format_string), '_begin')
|
||||
|
||||
# A link starts at a word boundary, and ends at space, punctuation, or end-of-input.
|
||||
#
|
||||
# We detect a url by checking for the TLD, and building around it.
|
||||
|
@ -555,15 +574,31 @@ class Bugdown(markdown.Extension):
|
|||
|
||||
md.treeprocessors.add("inline_interesting_links", InlineInterestingLinkProcessor(md), "_end")
|
||||
|
||||
_md_engine = markdown.Markdown(
|
||||
safe_mode = 'escape',
|
||||
output_format = 'html',
|
||||
extensions = ['nl2br',
|
||||
codehilite.makeExtension(configs=[
|
||||
('force_linenos', False),
|
||||
('guess_lang', False)]),
|
||||
fenced_code.makeExtension(),
|
||||
Bugdown()])
|
||||
|
||||
md_engines = {}
|
||||
|
||||
def make_md_engine(key, opts):
|
||||
md_engines[key] = markdown.Markdown(
|
||||
safe_mode = 'escape',
|
||||
output_format = 'html',
|
||||
extensions = ['nl2br',
|
||||
codehilite.makeExtension(configs=[
|
||||
('force_linenos', False),
|
||||
('guess_lang', False)]),
|
||||
fenced_code.makeExtension(),
|
||||
Bugdown(opts)])
|
||||
|
||||
realm_filters = {
|
||||
"default": [],
|
||||
"humbughq.com": [
|
||||
("[tT]rac #(?P<id>[0-9]{1,8})", "https://trac.humbughq.com/ticket/%(id)s"),
|
||||
],
|
||||
}
|
||||
|
||||
for realm in realm_filters.keys():
|
||||
# Because of how the Markdown config API works, this has confusing
|
||||
# large number of layers of dicts/arrays :(
|
||||
make_md_engine(realm, {"realm_filters": [realm_filters[realm], "Realm-specific filters for %s" % (realm,)]})
|
||||
|
||||
# We want to log Markdown parser failures, but shouldn't log the actual input
|
||||
# message for privacy reasons. The compromise is to replace all alphanumeric
|
||||
|
@ -575,9 +610,13 @@ _privacy_re = re.compile(r'\w', flags=re.UNICODE)
|
|||
def _sanitize_for_log(md):
|
||||
return repr(_privacy_re.sub('x', md))
|
||||
|
||||
def do_convert(md):
|
||||
def do_convert(md, realm):
|
||||
"""Convert Markdown to HTML, with Humbug-specific settings and hacks."""
|
||||
|
||||
if realm in md_engines:
|
||||
_md_engine = md_engines[realm]
|
||||
else:
|
||||
_md_engine = md_engines["default"]
|
||||
# Reset the parser; otherwise it will get slower over time.
|
||||
_md_engine.reset()
|
||||
|
||||
|
@ -625,8 +664,8 @@ def bugdown_stats_finish():
|
|||
bugdown_total_requests += 1
|
||||
bugdown_total_time += (time.time() - bugdown_time_start)
|
||||
|
||||
def convert(md):
|
||||
def convert(md, realm):
|
||||
bugdown_stats_start()
|
||||
ret = do_convert(md)
|
||||
ret = do_convert(md, realm)
|
||||
bugdown_stats_finish()
|
||||
return ret
|
||||
|
|
|
@ -334,7 +334,7 @@ class Message(models.Model):
|
|||
obj['content_type'] = 'text/html'
|
||||
elif apply_markdown:
|
||||
if rendered_content is None:
|
||||
rendered_content = bugdown.convert(self.content)
|
||||
rendered_content = bugdown.convert(self.content, self.sender.realm.domain)
|
||||
if rendered_content is None:
|
||||
rendered_content = '<p>[Humbug note: Sorry, we could not understand the formatting of your message]</p>'
|
||||
|
||||
|
|
|
@ -1839,7 +1839,7 @@ class GetSubscribersTest(AuthedTestCase):
|
|||
"Unable to retrieve subscribers for invite-only stream")
|
||||
|
||||
def bugdown_convert(text):
|
||||
return bugdown.convert(text)
|
||||
return bugdown.convert(text, "humbughq.com")
|
||||
|
||||
class BugdownTest(TestCase):
|
||||
def common_bugdown_test(self, text, expected):
|
||||
|
@ -2310,6 +2310,12 @@ But you can never leave**"""
|
|||
converted = bugdown_convert(msg)
|
||||
self.assertEqual(converted, "<p>You can check out **any time you'd like<br>\nBut you can never leave**</p>")
|
||||
|
||||
def test_realm_patterns(self):
|
||||
msg = "We should fix trac #224 and Trac #115, but not Ztrac #124 or trac #1124Z today."
|
||||
converted = bugdown_convert(msg)
|
||||
|
||||
self.assertEqual(converted, '<p>We should fix <a href="https://trac.humbughq.com/ticket/224" target="_blank" title="https://trac.humbughq.com/ticket/224">trac #224</a> and <a href="https://trac.humbughq.com/ticket/115" target="_blank" title="https://trac.humbughq.com/ticket/115">Trac #115</a>, but not Ztrac #124 or trac #1124Z today.</p>')
|
||||
|
||||
class UserPresenceTests(AuthedTestCase):
|
||||
fixtures = ['messages.json']
|
||||
|
||||
|
|
Loading…
Reference in New Issue