mirror of https://github.com/zulip/zulip.git
bugdown: Add custom per-realm filters to linkify certain strings.
I've tried to do this in a way that's scalable and easily configured, so that we can add new such filters for customers on-demand without needing to add anything other than a bit of configuration. Once we're confident in the arguments to this system, I think we'll want to move the regular expression lists into the database so that we don't need to do a prod push to modify the regular expression lists. The initial set of regular expressions are: (1) Linkifying e.g. "trac #224" in the Humbug realm, so we're exercising this code. (2) The various ticket number things CUSTOMER7 uses for the CUSTOMER7 realm. (imported from commit 992b0937b9012c15a7c2f585eb0aacb221c52e01)
This commit is contained in:
parent
ceacf6f97e
commit
0ad1094e45
|
@ -455,7 +455,7 @@ def check_message(sender, client, message_type_name, message_to,
|
||||||
else:
|
else:
|
||||||
return "Invalid message type"
|
return "Invalid message type"
|
||||||
|
|
||||||
rendered_content = bugdown.convert(message_content)
|
rendered_content = bugdown.convert(message_content, sender.realm.domain)
|
||||||
if rendered_content is None:
|
if rendered_content is None:
|
||||||
return "We were unable to render your message"
|
return "We were unable to render your message"
|
||||||
|
|
||||||
|
@ -958,7 +958,7 @@ def do_update_message(user_profile, message_id, subject, content):
|
||||||
first_rendered_content = message.rendered_content
|
first_rendered_content = message.rendered_content
|
||||||
|
|
||||||
if content is not None:
|
if content is not None:
|
||||||
rendered_content = bugdown.convert(content)
|
rendered_content = bugdown.convert(content, message.sender.realm.domain)
|
||||||
if rendered_content is None:
|
if rendered_content is None:
|
||||||
raise JsonableError("We were unable to render your updated message")
|
raise JsonableError("We were unable to render your updated message")
|
||||||
|
|
||||||
|
|
|
@ -408,16 +408,18 @@ def sanitize_url(url):
|
||||||
# Url passes all tests. Return url as-is.
|
# Url passes all tests. Return url as-is.
|
||||||
return urlparse.urlunparse((scheme, netloc, path, params, query, fragment))
|
return urlparse.urlunparse((scheme, netloc, path, params, query, fragment))
|
||||||
|
|
||||||
def url_to_a(url):
|
def url_to_a(url, text = None):
|
||||||
a = markdown.util.etree.Element('a')
|
a = markdown.util.etree.Element('a')
|
||||||
|
|
||||||
href = sanitize_url(url)
|
href = sanitize_url(url)
|
||||||
if href is None:
|
if href is None:
|
||||||
# Rejected by sanitize_url; render it as plain text.
|
# Rejected by sanitize_url; render it as plain text.
|
||||||
return url
|
return url
|
||||||
|
if text is None:
|
||||||
|
text = url
|
||||||
|
|
||||||
a.set('href', href)
|
a.set('href', href)
|
||||||
a.text = url
|
a.text = text
|
||||||
fixup_link(a, not 'mailto:' in href[:7])
|
fixup_link(a, not 'mailto:' in href[:7])
|
||||||
return a
|
return a
|
||||||
|
|
||||||
|
@ -500,6 +502,19 @@ class LinkPattern(markdown.inlinepatterns.Pattern):
|
||||||
fixup_link(el)
|
fixup_link(el)
|
||||||
return el
|
return el
|
||||||
|
|
||||||
|
# Given a regular expression pattern, linkifies groups that match it
|
||||||
|
# using the provided format string to construct the URL.
|
||||||
|
class RealmFilterPattern(markdown.inlinepatterns.Pattern):
|
||||||
|
""" Applied a given realm filter to the input """
|
||||||
|
def __init__(self, source_pattern, format_string, markdown_instance=None):
|
||||||
|
self.pattern = r'\b(?P<name>' + source_pattern + ')(?!\w)'
|
||||||
|
self.format_string = format_string
|
||||||
|
markdown.inlinepatterns.Pattern.__init__(self, self.pattern, markdown_instance)
|
||||||
|
|
||||||
|
def handleMatch(self, m):
|
||||||
|
return url_to_a(self.format_string % m.groupdict(),
|
||||||
|
m.group("name"))
|
||||||
|
|
||||||
class Bugdown(markdown.Extension):
|
class Bugdown(markdown.Extension):
|
||||||
def extendMarkdown(self, md, md_globals):
|
def extendMarkdown(self, md, md_globals):
|
||||||
del md.preprocessors['reference']
|
del md.preprocessors['reference']
|
||||||
|
@ -535,6 +550,10 @@ class Bugdown(markdown.Extension):
|
||||||
|
|
||||||
md.inlinePatterns.add('http_autolink', HttpLink(http_link_regex), '>link')
|
md.inlinePatterns.add('http_autolink', HttpLink(http_link_regex), '>link')
|
||||||
|
|
||||||
|
for (pattern, format_string) in self.getConfig("realm_filters"):
|
||||||
|
md.inlinePatterns.add('realm_filters/%s' % (pattern,),
|
||||||
|
RealmFilterPattern(pattern, format_string), '_begin')
|
||||||
|
|
||||||
# A link starts at a word boundary, and ends at space, punctuation, or end-of-input.
|
# A link starts at a word boundary, and ends at space, punctuation, or end-of-input.
|
||||||
#
|
#
|
||||||
# We detect a url by checking for the TLD, and building around it.
|
# We detect a url by checking for the TLD, and building around it.
|
||||||
|
@ -555,15 +574,31 @@ class Bugdown(markdown.Extension):
|
||||||
|
|
||||||
md.treeprocessors.add("inline_interesting_links", InlineInterestingLinkProcessor(md), "_end")
|
md.treeprocessors.add("inline_interesting_links", InlineInterestingLinkProcessor(md), "_end")
|
||||||
|
|
||||||
_md_engine = markdown.Markdown(
|
|
||||||
safe_mode = 'escape',
|
md_engines = {}
|
||||||
output_format = 'html',
|
|
||||||
extensions = ['nl2br',
|
def make_md_engine(key, opts):
|
||||||
codehilite.makeExtension(configs=[
|
md_engines[key] = markdown.Markdown(
|
||||||
('force_linenos', False),
|
safe_mode = 'escape',
|
||||||
('guess_lang', False)]),
|
output_format = 'html',
|
||||||
fenced_code.makeExtension(),
|
extensions = ['nl2br',
|
||||||
Bugdown()])
|
codehilite.makeExtension(configs=[
|
||||||
|
('force_linenos', False),
|
||||||
|
('guess_lang', False)]),
|
||||||
|
fenced_code.makeExtension(),
|
||||||
|
Bugdown(opts)])
|
||||||
|
|
||||||
|
realm_filters = {
|
||||||
|
"default": [],
|
||||||
|
"humbughq.com": [
|
||||||
|
("[tT]rac #(?P<id>[0-9]{1,8})", "https://trac.humbughq.com/ticket/%(id)s"),
|
||||||
|
],
|
||||||
|
}
|
||||||
|
|
||||||
|
for realm in realm_filters.keys():
|
||||||
|
# Because of how the Markdown config API works, this has confusing
|
||||||
|
# large number of layers of dicts/arrays :(
|
||||||
|
make_md_engine(realm, {"realm_filters": [realm_filters[realm], "Realm-specific filters for %s" % (realm,)]})
|
||||||
|
|
||||||
# We want to log Markdown parser failures, but shouldn't log the actual input
|
# We want to log Markdown parser failures, but shouldn't log the actual input
|
||||||
# message for privacy reasons. The compromise is to replace all alphanumeric
|
# message for privacy reasons. The compromise is to replace all alphanumeric
|
||||||
|
@ -575,9 +610,13 @@ _privacy_re = re.compile(r'\w', flags=re.UNICODE)
|
||||||
def _sanitize_for_log(md):
|
def _sanitize_for_log(md):
|
||||||
return repr(_privacy_re.sub('x', md))
|
return repr(_privacy_re.sub('x', md))
|
||||||
|
|
||||||
def do_convert(md):
|
def do_convert(md, realm):
|
||||||
"""Convert Markdown to HTML, with Humbug-specific settings and hacks."""
|
"""Convert Markdown to HTML, with Humbug-specific settings and hacks."""
|
||||||
|
|
||||||
|
if realm in md_engines:
|
||||||
|
_md_engine = md_engines[realm]
|
||||||
|
else:
|
||||||
|
_md_engine = md_engines["default"]
|
||||||
# Reset the parser; otherwise it will get slower over time.
|
# Reset the parser; otherwise it will get slower over time.
|
||||||
_md_engine.reset()
|
_md_engine.reset()
|
||||||
|
|
||||||
|
@ -625,8 +664,8 @@ def bugdown_stats_finish():
|
||||||
bugdown_total_requests += 1
|
bugdown_total_requests += 1
|
||||||
bugdown_total_time += (time.time() - bugdown_time_start)
|
bugdown_total_time += (time.time() - bugdown_time_start)
|
||||||
|
|
||||||
def convert(md):
|
def convert(md, realm):
|
||||||
bugdown_stats_start()
|
bugdown_stats_start()
|
||||||
ret = do_convert(md)
|
ret = do_convert(md, realm)
|
||||||
bugdown_stats_finish()
|
bugdown_stats_finish()
|
||||||
return ret
|
return ret
|
||||||
|
|
|
@ -334,7 +334,7 @@ class Message(models.Model):
|
||||||
obj['content_type'] = 'text/html'
|
obj['content_type'] = 'text/html'
|
||||||
elif apply_markdown:
|
elif apply_markdown:
|
||||||
if rendered_content is None:
|
if rendered_content is None:
|
||||||
rendered_content = bugdown.convert(self.content)
|
rendered_content = bugdown.convert(self.content, self.sender.realm.domain)
|
||||||
if rendered_content is None:
|
if rendered_content is None:
|
||||||
rendered_content = '<p>[Humbug note: Sorry, we could not understand the formatting of your message]</p>'
|
rendered_content = '<p>[Humbug note: Sorry, we could not understand the formatting of your message]</p>'
|
||||||
|
|
||||||
|
|
|
@ -1839,7 +1839,7 @@ class GetSubscribersTest(AuthedTestCase):
|
||||||
"Unable to retrieve subscribers for invite-only stream")
|
"Unable to retrieve subscribers for invite-only stream")
|
||||||
|
|
||||||
def bugdown_convert(text):
|
def bugdown_convert(text):
|
||||||
return bugdown.convert(text)
|
return bugdown.convert(text, "humbughq.com")
|
||||||
|
|
||||||
class BugdownTest(TestCase):
|
class BugdownTest(TestCase):
|
||||||
def common_bugdown_test(self, text, expected):
|
def common_bugdown_test(self, text, expected):
|
||||||
|
@ -2310,6 +2310,12 @@ But you can never leave**"""
|
||||||
converted = bugdown_convert(msg)
|
converted = bugdown_convert(msg)
|
||||||
self.assertEqual(converted, "<p>You can check out **any time you'd like<br>\nBut you can never leave**</p>")
|
self.assertEqual(converted, "<p>You can check out **any time you'd like<br>\nBut you can never leave**</p>")
|
||||||
|
|
||||||
|
def test_realm_patterns(self):
|
||||||
|
msg = "We should fix trac #224 and Trac #115, but not Ztrac #124 or trac #1124Z today."
|
||||||
|
converted = bugdown_convert(msg)
|
||||||
|
|
||||||
|
self.assertEqual(converted, '<p>We should fix <a href="https://trac.humbughq.com/ticket/224" target="_blank" title="https://trac.humbughq.com/ticket/224">trac #224</a> and <a href="https://trac.humbughq.com/ticket/115" target="_blank" title="https://trac.humbughq.com/ticket/115">Trac #115</a>, but not Ztrac #124 or trac #1124Z today.</p>')
|
||||||
|
|
||||||
class UserPresenceTests(AuthedTestCase):
|
class UserPresenceTests(AuthedTestCase):
|
||||||
fixtures = ['messages.json']
|
fixtures = ['messages.json']
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue