2018-01-11 15:52:31 +01:00
|
|
|
import re
|
2024-08-08 13:22:36 +02:00
|
|
|
from itertools import zip_longest
|
|
|
|
from typing import Any, Literal, TypeAlias, TypedDict, cast
|
|
|
|
|
|
|
|
from zerver.lib.types import Validator
|
|
|
|
from zerver.lib.validator import (
|
|
|
|
WildValue,
|
|
|
|
check_dict,
|
|
|
|
check_int,
|
|
|
|
check_list,
|
|
|
|
check_string,
|
|
|
|
check_string_in,
|
|
|
|
check_url,
|
|
|
|
)
|
2023-08-02 23:53:10 +02:00
|
|
|
|
2018-01-11 15:52:31 +01:00
|
|
|
# stubs
|
2024-07-12 02:30:17 +02:00
|
|
|
ZerverFieldsT: TypeAlias = dict[str, Any]
|
|
|
|
SlackToZulipUserIDT: TypeAlias = dict[str, int]
|
|
|
|
AddedChannelsT: TypeAlias = dict[str, tuple[str, int]]
|
2024-08-08 13:22:36 +02:00
|
|
|
SlackFieldsT: TypeAlias = dict[str, Any]
|
2018-01-11 15:52:31 +01:00
|
|
|
|
|
|
|
# Slack link can be in the format <http://www.foo.com|www.foo.com> and <http://foo.com/>
|
|
|
|
LINK_REGEX = r"""
|
2018-01-21 09:44:46 +01:00
|
|
|
(<) # match '>'
|
|
|
|
(http:\/\/www\.|https:\/\/www\.|http:\/\/|https:\/\/|ftp:\/\/)? # protocol and www
|
|
|
|
([a-z0-9]+([\-\.]{1}[a-z0-9]+)*)(\.) # domain name
|
|
|
|
([a-z]{2,63}(:[0-9]{1,5})?) # domain
|
|
|
|
(\/[^>]*)? # path
|
2020-10-23 02:43:28 +02:00
|
|
|
(\|)?(?:\|([^>]+))? # char after pipe (for Slack links)
|
2018-01-11 15:52:31 +01:00
|
|
|
(>)
|
|
|
|
"""
|
2018-01-21 12:30:54 +01:00
|
|
|
|
|
|
|
SLACK_MAILTO_REGEX = r"""
|
|
|
|
<((mailto:)? # match `<mailto:`
|
|
|
|
([\w\.-]+@[\w\.-]+(\.[\w]+)+)) # match email
|
|
|
|
(\|)? # match pipe
|
|
|
|
([\w\.-]+@[\w\.-]+(\.[\w]+)+)?> # match email
|
|
|
|
"""
|
|
|
|
|
2018-01-11 15:52:31 +01:00
|
|
|
SLACK_USERMENTION_REGEX = r"""
|
|
|
|
(<@) # Start with '<@'
|
|
|
|
([a-zA-Z0-9]+) # Here we have the Slack id
|
2021-05-10 07:02:14 +02:00
|
|
|
(\|)? # We not always have a vertical line in mention
|
|
|
|
([a-zA-Z0-9]+)? # If vertical line is present, this is short name
|
2018-01-11 15:52:31 +01:00
|
|
|
(>) # ends with '>'
|
|
|
|
"""
|
|
|
|
# Slack doesn't have mid-word message-formatting like Zulip.
|
2020-10-23 02:43:28 +02:00
|
|
|
# Hence, ~stri~ke doesn't format the word in Slack, but ~~stri~~ke
|
2018-01-11 15:52:31 +01:00
|
|
|
# formats the word in Zulip
|
|
|
|
SLACK_STRIKETHROUGH_REGEX = r"""
|
|
|
|
(^|[ -(]|[+-/]|\*|\_|[:-?]|\{|\[|\||\^) # Start after specified characters
|
|
|
|
(\~) # followed by an asterisk
|
|
|
|
([ -)+-}—]*)([ -}]+) # any character except asterisk
|
|
|
|
(\~) # followed by an asterisk
|
|
|
|
($|[ -']|[+-/]|[:-?]|\*|\_|\}|\)|\]|\||\^) # ends with specified characters
|
|
|
|
"""
|
|
|
|
SLACK_ITALIC_REGEX = r"""
|
2018-06-02 00:27:02 +02:00
|
|
|
(^|[ -*]|[+-/]|[:-?]|\{|\[|\||\^|~)
|
2018-01-11 15:52:31 +01:00
|
|
|
(\_)
|
|
|
|
([ -^`~—]*)([ -^`-~]+) # any character
|
|
|
|
(\_)
|
2018-06-02 00:27:02 +02:00
|
|
|
($|[ -']|[+-/]|[:-?]|\}|\)|\]|\*|\||\^|~)
|
2018-01-11 15:52:31 +01:00
|
|
|
"""
|
|
|
|
SLACK_BOLD_REGEX = r"""
|
2018-06-02 00:27:02 +02:00
|
|
|
(^|[ -(]|[+-/]|[:-?]|\{|\[|\_|\||\^|~)
|
2018-01-11 15:52:31 +01:00
|
|
|
(\*)
|
|
|
|
([ -)+-~—]*)([ -)+-~]+) # any character
|
|
|
|
(\*)
|
2018-06-02 00:27:02 +02:00
|
|
|
($|[ -']|[+-/]|[:-?]|\}|\)|\]|\_|\||\^|~)
|
2018-01-11 15:52:31 +01:00
|
|
|
"""
|
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2018-01-11 15:52:31 +01:00
|
|
|
def get_user_full_name(user: ZerverFieldsT) -> str:
|
2021-02-12 08:20:45 +01:00
|
|
|
if "deleted" in user and user["deleted"] is False:
|
|
|
|
return user["real_name"] or user["name"]
|
2019-08-08 19:39:26 +02:00
|
|
|
elif user["is_mirror_dummy"]:
|
|
|
|
return user["profile"].get("real_name", user["name"])
|
2018-01-11 15:52:31 +01:00
|
|
|
else:
|
2021-02-12 08:20:45 +01:00
|
|
|
return user["name"]
|
2018-01-11 15:52:31 +01:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2018-01-11 15:52:31 +01:00
|
|
|
# Markdown mapping
|
2021-02-12 08:19:30 +01:00
|
|
|
def convert_to_zulip_markdown(
|
|
|
|
text: str,
|
2024-07-12 02:30:17 +02:00
|
|
|
users: list[ZerverFieldsT],
|
2021-02-12 08:19:30 +01:00
|
|
|
added_channels: AddedChannelsT,
|
|
|
|
slack_user_id_to_zulip_user_id: SlackToZulipUserIDT,
|
2024-07-12 02:30:17 +02:00
|
|
|
) -> tuple[str, list[int], bool]:
|
2018-01-11 15:52:31 +01:00
|
|
|
mentioned_users_id = []
|
|
|
|
text = convert_markdown_syntax(text, SLACK_BOLD_REGEX, "**")
|
|
|
|
text = convert_markdown_syntax(text, SLACK_STRIKETHROUGH_REGEX, "~~")
|
|
|
|
text = convert_markdown_syntax(text, SLACK_ITALIC_REGEX, "*")
|
|
|
|
|
|
|
|
# Map Slack's mention all: '<!everyone>' to '@**all** '
|
2018-04-07 00:55:36 +02:00
|
|
|
# Map Slack's mention all: '<!channel>' to '@**all** '
|
|
|
|
# Map Slack's mention all: '<!here>' to '@**all** '
|
2018-01-11 15:52:31 +01:00
|
|
|
# No regex for this as it can be present anywhere in the sentence
|
2021-02-12 08:20:45 +01:00
|
|
|
text = text.replace("<!everyone>", "@**all**")
|
|
|
|
text = text.replace("<!channel>", "@**all**")
|
|
|
|
text = text.replace("<!here>", "@**all**")
|
2018-01-11 15:52:31 +01:00
|
|
|
|
2018-04-07 00:00:05 +02:00
|
|
|
# Map Slack channel mention: '<#C5Z73A7RA|general>' to '#**general**'
|
|
|
|
for cname, ids in added_channels.items():
|
|
|
|
cid = ids[0]
|
2021-02-12 08:20:45 +01:00
|
|
|
text = text.replace(f"<#{cid}|{cname}>", "#**" + cname + "**")
|
2018-04-07 00:00:05 +02:00
|
|
|
|
2021-02-12 08:20:45 +01:00
|
|
|
tokens = text.split(" ")
|
2018-01-11 15:52:31 +01:00
|
|
|
for iterator in range(len(tokens)):
|
|
|
|
# Check user mentions and change mention format from
|
|
|
|
# '<@slack_id|short_name>' to '@**full_name**'
|
2021-02-12 08:19:30 +01:00
|
|
|
if re.findall(SLACK_USERMENTION_REGEX, tokens[iterator], re.VERBOSE):
|
|
|
|
tokens[iterator], user_id = get_user_mentions(
|
|
|
|
tokens[iterator], users, slack_user_id_to_zulip_user_id
|
|
|
|
)
|
2018-01-11 15:52:31 +01:00
|
|
|
if user_id is not None:
|
|
|
|
mentioned_users_id.append(user_id)
|
|
|
|
|
2021-02-12 08:20:45 +01:00
|
|
|
text = " ".join(tokens)
|
2018-01-11 15:52:31 +01:00
|
|
|
|
|
|
|
# Check and convert link format
|
|
|
|
text, has_link = convert_link_format(text)
|
2018-01-21 12:30:54 +01:00
|
|
|
# convert `<mailto:foo@foo.com>` to `mailto:foo@foo.com`
|
2018-01-28 14:37:35 +01:00
|
|
|
text, has_mailto_link = convert_mailto_format(text)
|
2018-01-11 15:52:31 +01:00
|
|
|
|
2023-01-18 03:28:19 +01:00
|
|
|
message_has_link = has_link or has_mailto_link
|
2018-01-28 14:37:35 +01:00
|
|
|
|
|
|
|
return text, mentioned_users_id, message_has_link
|
2018-01-11 15:52:31 +01:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
|
|
|
def get_user_mentions(
|
2024-07-12 02:30:17 +02:00
|
|
|
token: str, users: list[ZerverFieldsT], slack_user_id_to_zulip_user_id: SlackToZulipUserIDT
|
2024-07-12 02:30:23 +02:00
|
|
|
) -> tuple[str, int | None]:
|
2018-01-11 15:52:31 +01:00
|
|
|
slack_usermention_match = re.search(SLACK_USERMENTION_REGEX, token, re.VERBOSE)
|
2019-08-10 00:30:33 +02:00
|
|
|
assert slack_usermention_match is not None
|
|
|
|
short_name = slack_usermention_match.group(4)
|
|
|
|
slack_id = slack_usermention_match.group(2)
|
2018-01-11 15:52:31 +01:00
|
|
|
for user in users:
|
2021-02-12 08:20:45 +01:00
|
|
|
if (user["id"] == slack_id and user["name"] == short_name and short_name) or (
|
|
|
|
user["id"] == slack_id and short_name is None
|
2021-02-12 08:19:30 +01:00
|
|
|
):
|
2019-01-31 14:32:37 +01:00
|
|
|
full_name = get_user_full_name(user)
|
2019-08-12 13:44:07 +02:00
|
|
|
user_id = slack_user_id_to_zulip_user_id[slack_id]
|
2019-01-31 14:32:37 +01:00
|
|
|
mention = "@**" + full_name + "**"
|
|
|
|
token = re.sub(SLACK_USERMENTION_REGEX, mention, token, flags=re.VERBOSE)
|
|
|
|
return token, user_id
|
2018-01-11 15:52:31 +01:00
|
|
|
return token, None
|
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2020-08-11 01:47:49 +02:00
|
|
|
# Map italic, bold and strikethrough Markdown
|
2018-01-11 15:52:31 +01:00
|
|
|
def convert_markdown_syntax(text: str, regex: str, zulip_keyword: str) -> str:
|
|
|
|
"""
|
|
|
|
Returns:
|
|
|
|
1. For strikethrough formatting: This maps Slack's '~strike~' to Zulip's '~~strike~~'
|
|
|
|
2. For bold formatting: This maps Slack's '*bold*' to Zulip's '**bold**'
|
|
|
|
3. For italic formatting: This maps Slack's '_italic_' to Zulip's '*italic*'
|
|
|
|
"""
|
|
|
|
for match in re.finditer(regex, text, re.VERBOSE):
|
2021-02-12 08:19:30 +01:00
|
|
|
converted_token = (
|
|
|
|
match.group(1)
|
|
|
|
+ zulip_keyword
|
|
|
|
+ match.group(3)
|
|
|
|
+ match.group(4)
|
|
|
|
+ zulip_keyword
|
|
|
|
+ match.group(6)
|
|
|
|
)
|
2018-01-11 15:52:31 +01:00
|
|
|
text = text.replace(match.group(0), converted_token)
|
|
|
|
return text
|
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2024-07-12 02:30:17 +02:00
|
|
|
def convert_link_format(text: str) -> tuple[str, bool]:
|
2018-01-11 15:52:31 +01:00
|
|
|
"""
|
|
|
|
1. Converts '<https://foo.com>' to 'https://foo.com'
|
2024-08-12 07:25:40 +02:00
|
|
|
2. Converts '<https://foo.com|foo>' to '[foo](https://foo.com)'
|
2018-01-11 15:52:31 +01:00
|
|
|
"""
|
|
|
|
has_link = False
|
|
|
|
for match in re.finditer(LINK_REGEX, text, re.VERBOSE):
|
2024-08-12 07:25:40 +02:00
|
|
|
slack_url = match.group(0)
|
|
|
|
url_parts = slack_url[1:-1].split("|", maxsplit=1)
|
|
|
|
# Check if there's a pipe with text after it
|
|
|
|
if len(url_parts) == 2:
|
|
|
|
converted_url = f"[{url_parts[1]}]({url_parts[0]})"
|
|
|
|
else:
|
|
|
|
converted_url = url_parts[0]
|
|
|
|
|
2018-01-11 15:52:31 +01:00
|
|
|
has_link = True
|
2024-08-12 07:25:40 +02:00
|
|
|
text = text.replace(slack_url, converted_url)
|
2018-01-11 15:52:31 +01:00
|
|
|
return text, has_link
|
2018-01-21 12:30:54 +01:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2024-07-12 02:30:17 +02:00
|
|
|
def convert_mailto_format(text: str) -> tuple[str, bool]:
|
2018-01-21 12:30:54 +01:00
|
|
|
"""
|
|
|
|
1. Converts '<mailto:foo@foo.com>' to 'mailto:foo@foo.com'
|
|
|
|
2. Converts '<mailto:foo@foo.com|foo@foo.com>' to 'mailto:foo@foo.com'
|
|
|
|
"""
|
|
|
|
has_link = False
|
|
|
|
for match in re.finditer(SLACK_MAILTO_REGEX, text, re.VERBOSE):
|
|
|
|
has_link = True
|
|
|
|
text = text.replace(match.group(0), match.group(1))
|
|
|
|
return text, has_link
|
2024-08-08 13:22:36 +02:00
|
|
|
|
|
|
|
|
|
|
|
def render_block(block: WildValue) -> str:
|
|
|
|
# https://api.slack.com/reference/block-kit/blocks
|
|
|
|
block_type = block["type"].tame(
|
|
|
|
check_string_in(["actions", "context", "divider", "header", "image", "input", "section"])
|
|
|
|
)
|
|
|
|
if block_type == "actions":
|
|
|
|
# Unhandled
|
|
|
|
return ""
|
|
|
|
elif block_type == "context" and block.get("elements"):
|
|
|
|
pieces = []
|
|
|
|
# Slack renders these pieces left-to-right, packed in as
|
|
|
|
# closely as possible. We just render them above each other,
|
|
|
|
# for simplicity.
|
|
|
|
for element in block["elements"]:
|
|
|
|
element_type = element["type"].tame(check_string_in(["image", "plain_text", "mrkdwn"]))
|
|
|
|
if element_type == "image":
|
|
|
|
pieces.append(render_block_element(element))
|
|
|
|
else:
|
|
|
|
pieces.append(element.tame(check_text_block())["text"])
|
|
|
|
return "\n\n".join(piece.strip() for piece in pieces if piece.strip() != "")
|
|
|
|
elif block_type == "divider":
|
|
|
|
return "----"
|
|
|
|
elif block_type == "header":
|
|
|
|
return "## " + block["text"].tame(check_text_block(plain_text_only=True))["text"]
|
|
|
|
elif block_type == "image":
|
|
|
|
image_url = block["image_url"].tame(check_url)
|
|
|
|
alt_text = block["alt_text"].tame(check_string)
|
|
|
|
if "title" in block:
|
|
|
|
alt_text = block["title"].tame(check_text_block(plain_text_only=True))["text"]
|
|
|
|
return f"[{alt_text}]({image_url})"
|
|
|
|
elif block_type == "input":
|
|
|
|
# Unhandled
|
|
|
|
pass
|
|
|
|
elif block_type == "section":
|
|
|
|
pieces = []
|
|
|
|
if "text" in block:
|
|
|
|
pieces.append(block["text"].tame(check_text_block())["text"])
|
|
|
|
|
|
|
|
if "accessory" in block:
|
|
|
|
pieces.append(render_block_element(block["accessory"]))
|
|
|
|
|
|
|
|
if "fields" in block:
|
|
|
|
fields = block["fields"].tame(check_list(check_text_block()))
|
|
|
|
if len(fields) == 1:
|
|
|
|
# Special-case a single field to display a bit more
|
|
|
|
# nicely, without extraneous borders and limitations
|
|
|
|
# on its contents.
|
|
|
|
pieces.append(fields[0]["text"])
|
|
|
|
else:
|
|
|
|
# It is not possible to have newlines in a table, nor
|
|
|
|
# escape the pipes that make it up; replace them with
|
|
|
|
# whitespace.
|
|
|
|
field_text = [f["text"].replace("\n", " ").replace("|", " ") for f in fields]
|
|
|
|
# Because Slack formats this as two columns, but not
|
|
|
|
# necessarily a table with a bold header, we emit a
|
|
|
|
# blank header row first.
|
|
|
|
table = "| | |\n|-|-|\n"
|
|
|
|
# Then take the fields two-at-a-time to make the table
|
|
|
|
iters = [iter(field_text)] * 2
|
|
|
|
for left, right in zip_longest(*iters, fillvalue=""):
|
|
|
|
table += f"| {left} | {right} |\n"
|
|
|
|
pieces.append(table)
|
|
|
|
|
|
|
|
return "\n\n".join(piece.strip() for piece in pieces if piece.strip() != "")
|
|
|
|
|
|
|
|
return ""
|
|
|
|
|
|
|
|
|
|
|
|
class TextField(TypedDict):
|
|
|
|
text: str
|
|
|
|
type: Literal["plain_text", "mrkdwn"]
|
|
|
|
|
|
|
|
|
|
|
|
def check_text_block(plain_text_only: bool = False) -> Validator[TextField]:
|
|
|
|
if plain_text_only:
|
|
|
|
type_validator = check_string_in(["plain_text"])
|
|
|
|
else:
|
|
|
|
type_validator = check_string_in(["plain_text", "mrkdwn"])
|
|
|
|
|
|
|
|
def f(var_name: str, val: object) -> TextField:
|
|
|
|
block = check_dict(
|
|
|
|
[
|
|
|
|
("type", type_validator),
|
|
|
|
("text", check_string),
|
|
|
|
],
|
|
|
|
)(var_name, val)
|
|
|
|
|
|
|
|
return cast(TextField, block)
|
|
|
|
|
|
|
|
return f
|
|
|
|
|
|
|
|
|
|
|
|
def render_block_element(element: WildValue) -> str:
|
|
|
|
# https://api.slack.com/reference/block-kit/block-elements
|
|
|
|
# Zulip doesn't support interactive elements, so we only render images here
|
|
|
|
element_type = element["type"].tame(check_string)
|
|
|
|
if element_type == "image":
|
|
|
|
image_url = element["image_url"].tame(check_url)
|
|
|
|
alt_text = element["alt_text"].tame(check_string)
|
|
|
|
return f"[{alt_text}]({image_url})"
|
|
|
|
else:
|
|
|
|
# Unsupported
|
|
|
|
return ""
|
|
|
|
|
|
|
|
|
|
|
|
def render_attachment(attachment: WildValue) -> str:
|
|
|
|
# https://api.slack.com/reference/messaging/attachments
|
|
|
|
# Slack recommends the usage of "blocks" even within attachments; the
|
|
|
|
# rest of the fields we handle here are legacy fields. These fields are
|
|
|
|
# optional and may contain null values.
|
|
|
|
pieces = []
|
|
|
|
if attachment.get("title"):
|
|
|
|
title = attachment["title"].tame(check_string)
|
|
|
|
if attachment.get("title_link"):
|
|
|
|
title_link = attachment["title_link"].tame(check_url)
|
|
|
|
pieces.append(f"## [{title}]({title_link})")
|
|
|
|
else:
|
|
|
|
pieces.append(f"## {title}")
|
|
|
|
if attachment.get("pretext"):
|
|
|
|
pieces.append(attachment["pretext"].tame(check_string))
|
|
|
|
if attachment.get("text"):
|
|
|
|
pieces.append(attachment["text"].tame(check_string))
|
|
|
|
if "fields" in attachment:
|
|
|
|
fields = []
|
|
|
|
for field in attachment["fields"]:
|
|
|
|
if "title" in field and "value" in field and field["title"] and field["value"]:
|
|
|
|
title = field["title"].tame(check_string)
|
|
|
|
value = field["value"].tame(check_string)
|
|
|
|
fields.append(f"*{title}*: {value}")
|
|
|
|
elif field.get("title"):
|
|
|
|
title = field["title"].tame(check_string)
|
|
|
|
fields.append(f"*{title}*")
|
|
|
|
elif field.get("value"):
|
|
|
|
value = field["value"].tame(check_string)
|
|
|
|
fields.append(f"{value}")
|
|
|
|
pieces.append("\n".join(fields))
|
|
|
|
if attachment.get("blocks"):
|
|
|
|
pieces += map(render_block, attachment["blocks"])
|
|
|
|
if attachment.get("image_url"):
|
|
|
|
pieces.append("[]({})".format(attachment["image_url"].tame(check_url)))
|
|
|
|
if attachment.get("footer"):
|
|
|
|
pieces.append(attachment["footer"].tame(check_string))
|
|
|
|
if attachment.get("ts"):
|
|
|
|
time = attachment["ts"].tame(check_int)
|
|
|
|
pieces.append(f"<time:{time}>")
|
|
|
|
|
|
|
|
return "\n\n".join(piece.strip() for piece in pieces if piece.strip() != "")
|