Match patterns that don't start on a word boundary

This allows us to e.g. match "#1329" in "Bug #1329", even though the
place between a space and a # is not a word boundary.

Also this commit factors out some repeated code used for both in-message
and subject filters.

(imported from commit 5f7d80a58e76e51ea07fed050c88c5251faaaacd)
This commit is contained in:
Scott Feeney 2013-07-15 11:56:45 -04:00
parent 5a22c533c4
commit b76b06591b
1 changed files with 8 additions and 2 deletions

View File

@ -505,12 +505,18 @@ class LinkPattern(markdown.inlinepatterns.Pattern):
fixup_link(el)
return el
def prepare_realm_pattern(source):
""" Augment a realm filter so it only matches after start-of-string,
whitespace, or opening delimiters, won't match if there are word
characters directly after, and saves what was matched as "name". """
return r"""(?<![^\s'"\(,:<])(?P<name>""" + source + ')(?!\w)'
# Given a regular expression pattern, linkifies groups that match it
# using the provided format string to construct the URL.
class RealmFilterPattern(markdown.inlinepatterns.Pattern):
""" Applied a given realm filter to the input """
def __init__(self, source_pattern, format_string, markdown_instance=None):
self.pattern = r'\b(?P<name>' + source_pattern + ')(?!\w)'
self.pattern = prepare_realm_pattern(source_pattern)
self.format_string = format_string
markdown.inlinepatterns.Pattern.__init__(self, self.pattern, markdown_instance)
@ -631,7 +637,7 @@ realm_filters = {
def subject_links(domain, subject):
matches = []
for source_pattern, format_string in realm_filters.get(domain, []):
pattern = r'\b(?P<name>' + source_pattern + ')(?!\w)'
pattern = prepare_realm_pattern(source_pattern)
for m in re.finditer(pattern, subject):
matches += [format_string % m.groupdict()]
return matches