markdown: Stop attempting to expand/collapse re2 regex.

549dd8a4c4 changed the regex that we build to contain whitespace for
readability, and strip that back out before returning it.
Unfortunately, this also serves to strip out whitespace in the source
linkifier, causing it to not match expected strings.

Revert 549dd8a4c4.

Fixes: #27854.
This commit is contained in:
Alex Vandiver 2023-11-28 22:19:19 +00:00 committed by Tim Abbott
parent 319cfc7d7f
commit 8c8dbb3d66
2 changed files with 20 additions and 19 deletions

View File

@ -1821,25 +1821,7 @@ def prepare_linkifier_pattern(source: str) -> str:
# We use an extended definition of 'whitespace' which is
# equivalent to \p{White_Space} -- since \s in re2 only matches
# ASCII spaces, and re2 does not support \p{White_Space}.
regex = rf"""
(?P<{BEFORE_CAPTURE_GROUP}>
^ |
\s | {next_line} | \pZ |
['"\(,:<]
)
(?P<{OUTER_CAPTURE_GROUP}>
{source}
)
(?P<{AFTER_CAPTURE_GROUP}>
$ |
[^\pL\pN]
)
"""
# Strip out the spaces and newlines added to make the above
# legible -- re2 does not have the equivalent of the /x modifier
# that does this automatically. Note that we are careful to not
# strip _whitespace_, which would strip the literal \u0085 out.
return regex.replace(" ", "").replace("\n", "")
return rf"""(?P<{BEFORE_CAPTURE_GROUP}>^|\s|{next_line}|\pZ|['"\(,:<])(?P<{OUTER_CAPTURE_GROUP}>{source})(?P<{AFTER_CAPTURE_GROUP}>$|[^\pL\pN])"""
# Given a regular expression pattern, linkifies groups that match it

View File

@ -1366,6 +1366,25 @@ class MarkdownTest(ZulipTestCase):
[{"url": "https://example.com/A%20Test/%25%25%ba/123", "text": "url-123"}],
)
# Test spaces in the linkifier pattern
RealmFilter(
realm=realm,
pattern=r"community guidelines",
url_template="https://zulip.com/development-community/#community-norms",
).save()
converted = markdown_convert("community guidelines", message_realm=realm, message=msg)
self.assertEqual(
converted.rendered_content,
'<p><a href="https://zulip.com/development-community/#community-norms">community guidelines</a></p>',
)
converted = markdown_convert(
"please observe community guidelines here", message_realm=realm, message=msg
)
self.assertEqual(
converted.rendered_content,
'<p>please observe <a href="https://zulip.com/development-community/#community-norms">community guidelines</a> here</p>',
)
def test_multiple_matching_realm_patterns(self) -> None:
realm = get_realm("zulip")
self.check_add_linkifiers(