mirror of https://github.com/zulip/zulip.git
slack_regex: Fix overlapping capture group in Slack regex.
The Slack text regexes match specific characters before and after the formatted string to prevent cases like reformatting already formatted text and test like `invlaid_bold_test` in `slack_message_conversion.json`. However, because the capture groups consume each matching character, two formatted strings separated by a single matching character result in one string not being matched, as the character is used to match the other string. e.g., (`*abc*b*def*` → **abc**b*def*) in this case, "b" is consumed to match *abc*. This prep commit modifies the Slack text regex in `slack_message_conversion.py` to use a non-greedy quantifier for the last capture group.
This commit is contained in:
parent
5c8f10179a
commit
8764e15599
|
@ -52,21 +52,21 @@ SLACK_STRIKETHROUGH_REGEX = r"""
|
|||
(\~) # followed by an ~
|
||||
([^~]*) # any character except ~
|
||||
(\~) # followed by an ~
|
||||
(\n|$|[ -']|[+-/]|[:-?]|\*|\_|\}|\)|\]|\||\^) # ends with specified characters
|
||||
(?=\n|$|[ -']|[+-/]|[:-?]|\*|\_|\}|\)|\]|\||\^) # ends with specified characters
|
||||
"""
|
||||
SLACK_ITALIC_REGEX = r"""
|
||||
(\n|^|[ -*]|[+-/]|[:-?]|\{|\[|\||\^|~)
|
||||
(\_)
|
||||
([^_]*) # any character except _
|
||||
(\_)
|
||||
(\n|$|[ -']|[+-/]|[:-?]|\}|\)|\]|\*|\||\^|~)
|
||||
(?=\n|$|[ -']|[+-/]|[:-?]|\}|\)|\]|\*|\||\^|~)
|
||||
"""
|
||||
SLACK_BOLD_REGEX = r"""
|
||||
(\n|^|[ -(]|[+-/]|[:-?]|\{|\[|\_|\||\^|~)
|
||||
(\*)
|
||||
([^*]*) # any character except *
|
||||
(\*)
|
||||
(\n|$|[ -']|[+-/]|[:-?]|\}|\)|\]|\_|\||\^|~)
|
||||
(?=\n|$|[ -']|[+-/]|[:-?]|\}|\)|\]|\_|\||\^|~)
|
||||
"""
|
||||
|
||||
|
||||
|
@ -139,9 +139,7 @@ def convert_markdown_syntax(text: str, regex: str, zulip_keyword: str) -> str:
|
|||
3. For italic formatting: This maps Slack's '_italic_' to Zulip's '*italic*'
|
||||
"""
|
||||
for match in re.finditer(regex, text, re.VERBOSE):
|
||||
converted_token = (
|
||||
match.group(1) + zulip_keyword + match.group(3) + zulip_keyword + match.group(5)
|
||||
)
|
||||
converted_token = match.group(1) + zulip_keyword + match.group(3) + zulip_keyword
|
||||
text = text.replace(match.group(0), converted_token)
|
||||
return text
|
||||
|
||||
|
|
|
@ -94,6 +94,11 @@
|
|||
"name": "format_emoji_test",
|
||||
"input": "*1️⃣ bold* some _1️⃣ italic_ word ~1️⃣ strike~",
|
||||
"conversion_output": "**1️⃣ bold** some *1️⃣ italic* word ~~1️⃣ strike~~"
|
||||
},
|
||||
{
|
||||
"name": "overlapping_capture_group_test",
|
||||
"input": "*abc*\n*def*\n*ghi*\n*jkl*\n\n*ABC ABC*",
|
||||
"conversion_output": "**abc**\n**def**\n**ghi**\n**jkl**\n\n**ABC ABC**"
|
||||
}
|
||||
]
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue