slack_incoming: Fix formatting fixups with overlaps, or at start/end.

The previous regular expression required a `[^\w]` at the start and
end of the match.  This had two unintended effects -- it meant that it
could never match at the start or end of a string, and it meant
that *adjacent* words required *two* non-word characters between them,
as the pattern matches cannot overlap.

Switch to allowing string start/end to anchor the matches, and make
the trailing `[^\w]` be a zero-width look-ahead, to allow the patterns
to overlap.  Also remove the spurious `^` within the inner character
classes, which prevented `*foo^bar*` from matching.  Finally, add
tests to cover the functionality, which was previously untested.
This commit is contained in:
Alex Vandiver 2022-11-29 16:10:34 -05:00 committed by Tim Abbott
parent 512705f267
commit f1a8209705
2 changed files with 26 additions and 2 deletions

View File

@ -18,6 +18,30 @@ Hello, world.
expected_message,
)
def test_message_formatting(self) -> None:
tests = [
("some *foo* word", "some **foo** word"),
("*foo*", "**foo**"),
("*foo* *bar*", "**foo** **bar**"),
("*foo*a*bar*", "*foo*a*bar*"),
("some _foo_ word", "some *foo* word"),
]
self.subscribe(self.test_user, self.STREAM_NAME)
for input_value, output_value in tests:
payload = {"text": input_value}
msg = self.send_webhook_payload(
self.test_user,
self.url,
payload,
content_type="application/json",
)
self.assert_stream_message(
message=msg,
stream_name=self.STREAM_NAME,
topic_name="(no topic)",
content=output_value,
)
def test_null_message(self) -> None:
self.check_webhook(
"null_text",

View File

@ -224,8 +224,8 @@ def replace_links(text: str) -> str:
def replace_formatting(text: str) -> str:
# Slack uses *text* for bold, whereas Zulip interprets that as italics
text = re.sub(r"([^\w])\*(?!\s+)([^\*^\n]+)(?<!\s)\*([^\w])", r"\1**\2**\3", text)
text = re.sub(r"([^\w]|^)\*(?!\s+)([^\*\n]+)(?<!\s)\*((?=[^\w])|$)", r"\1**\2**\3", text)
# Slack uses _text_ for emphasis, whereas Zulip interprets that as nothing
text = re.sub(r"([^\w])[_](?!\s+)([^\_\^\n]+)(?<!\s)[_]([^\w])", r"\1*\2*\3", text)
text = re.sub(r"([^\w]|^)[_](?!\s+)([^\_\n]+)(?<!\s)[_]((?=[^\w])|$)", r"\1*\2*\3", text)
return text