slack_regex: Update Slack text regex capture newlines.

This prep commit modifies the Slack text regex in
`slack_message_conversion.py` to include the newline character (`\n`) as
part of the first and last capture groups.

Previously, the regex in `slack_message_conversion.py` couldn't capture
and reformat strings surrounded by newlines, such as "\n*ABC*\n". This
kind of string is likely not uncommon to be generated by both the Slack
exporter itself or by our Slack message block and attachment formatters
(`render_blocks` and `render_attachments`).
This commit is contained in:
PieterCK 2024-11-19 16:05:06 +07:00
parent 49eca01cfc
commit 414c72172a
2 changed files with 11 additions and 6 deletions

View File

@ -48,25 +48,25 @@ SLACK_USERMENTION_REGEX = r"""
# Hence, ~stri~ke doesn't format the word in Slack, but ~~stri~~ke
# formats the word in Zulip
SLACK_STRIKETHROUGH_REGEX = r"""
(^|[ -(]|[+-/]|\*|\_|[:-?]|\{|\[|\||\^) # Start after specified characters
(\n|^|[ -(]|[+-/]|\*|\_|[:-?]|\{|\[|\||\^) # Start after specified characters
(\~) # followed by an asterisk
([ -)+-}]*)([ -}]+) # any character except asterisk
(\~) # followed by an asterisk
($|[ -']|[+-/]|[:-?]|\*|\_|\}|\)|\]|\||\^) # ends with specified characters
(\n|$|[ -']|[+-/]|[:-?]|\*|\_|\}|\)|\]|\||\^) # ends with specified characters
"""
SLACK_ITALIC_REGEX = r"""
(^|[ -*]|[+-/]|[:-?]|\{|\[|\||\^|~)
(\n|^|[ -*]|[+-/]|[:-?]|\{|\[|\||\^|~)
(\_)
([ -^`~]*)([ -^`-~]+) # any character
(\_)
($|[ -']|[+-/]|[:-?]|\}|\)|\]|\*|\||\^|~)
(\n|$|[ -']|[+-/]|[:-?]|\}|\)|\]|\*|\||\^|~)
"""
SLACK_BOLD_REGEX = r"""
(^|[ -(]|[+-/]|[:-?]|\{|\[|\_|\||\^|~)
(\n|^|[ -(]|[+-/]|[:-?]|\{|\[|\_|\||\^|~)
(\*)
([ -)+-~]*)([ -)+-~]+) # any character
(\*)
($|[ -']|[+-/]|[:-?]|\}|\)|\]|\_|\||\^|~)
(\n|$|[ -']|[+-/]|[:-?]|\}|\)|\]|\_|\||\^|~)
"""

View File

@ -84,6 +84,11 @@
"name": "italic_and_strike_conversion",
"input": "_~italic~_ and ~_strike_~",
"conversion_output": "*~~italic~~* and ~~*strike*~~"
},
{
"name": "new_line_test",
"input": "\n*abc*\n",
"conversion_output": "\n**abc**\n"
}
]
}