slack import: Convert threads to nicely named Zulip topics.

Fixes #9006.
This commit is contained in:
rht 2021-08-12 23:48:16 -04:00 committed by Tim Abbott
parent ddd174af36
commit 1c84f02f57
4 changed files with 163 additions and 4 deletions

View File

@ -154,8 +154,8 @@ in mind about the import process:
| Multi Channel Guest | Guest |
| Channel creator | none |
- Messages in threads are imported, but they are not explicitly marked as
being in a thread.
- Slack threads are imported as topics with names like "2023-05-30
Slack thread 1".
- Message edit history and `@user joined #channel_name` messages are not imported.

View File

@ -1,3 +1,4 @@
import datetime
import logging
import os
import posixpath
@ -703,6 +704,7 @@ def convert_slack_workspace_messages(
zerver_realmemoji: List[ZerverFieldsT],
domain_name: str,
output_dir: str,
convert_slack_threads: bool,
chunk_size: int = MESSAGE_BATCH_CHUNK_SIZE,
) -> Tuple[List[ZerverFieldsT], List[ZerverFieldsT], List[ZerverFieldsT]]:
"""
@ -764,6 +766,7 @@ def convert_slack_workspace_messages(
dm_members,
domain_name,
long_term_idle,
convert_slack_threads,
)
message_json = dict(zerver_message=zerver_message, zerver_usermessage=zerver_usermessage)
@ -844,6 +847,7 @@ def channel_message_to_zerver_message(
dm_members: DMMembersT,
domain_name: str,
long_term_idle: Set[int],
convert_slack_threads: bool,
) -> Tuple[
List[ZerverFieldsT],
List[ZerverFieldsT],
@ -867,6 +871,8 @@ def channel_message_to_zerver_message(
total_user_messages = 0
total_skipped_user_messages = 0
thread_counter: Dict[str, int] = defaultdict(int)
thread_map: Dict[str, str] = {}
for message in all_messages:
slack_user_id = get_message_sending_user(message)
if not slack_user_id:
@ -955,7 +961,25 @@ def channel_message_to_zerver_message(
has_attachment = file_info["has_attachment"]
has_image = file_info["has_image"]
# Slack's unthreaded messages go into a single topic, while
# threads each generate a unique topic labeled by the date and
# a counter among topics on that day.
topic_name = "imported from Slack"
if convert_slack_threads and "thread_ts" in message:
thread_ts = datetime.datetime.fromtimestamp(
float(message["thread_ts"]), tz=datetime.timezone.utc
)
thread_ts_str = thread_ts.strftime(r"%Y/%m/%d %H:%M:%S")
# The topic name is "2015-08-18 Slack thread 2", where the counter at the end is to disambiguate
# threads with the same date.
if thread_ts_str in thread_map:
topic_name = thread_map[thread_ts_str]
else:
thread_date = thread_ts.strftime(r"%Y-%m-%d")
thread_counter[thread_date] += 1
count = thread_counter[thread_date]
topic_name = f"{thread_date} Slack thread {count}"
thread_map[thread_ts_str] = topic_name
zulip_message = build_message(
topic_name=topic_name,
@ -1311,7 +1335,13 @@ def fetch_team_icons(
return records
def do_convert_data(original_path: str, output_dir: str, token: str, threads: int = 6) -> None:
def do_convert_data(
original_path: str,
output_dir: str,
token: str,
threads: int = 6,
convert_slack_threads: bool = False,
) -> None:
# Subdomain is set by the user while running the import command
realm_subdomain = ""
realm_id = 0
@ -1380,6 +1410,7 @@ def do_convert_data(original_path: str, output_dir: str, token: str, threads: in
realm["zerver_realmemoji"],
domain_name,
output_dir,
convert_slack_threads,
)
# Move zerver_reactions to realm.json file

View File

@ -34,6 +34,12 @@ class Command(BaseCommand):
help="Threads to use in exporting UserMessage objects in parallel",
)
parser.add_argument(
"--no-convert-slack-threads",
action="store_true",
help="If specified, do not convert Slack threads to separate Zulip topics",
)
parser.formatter_class = argparse.RawTextHelpFormatter
def handle(self, *args: Any, **options: Any) -> None:
@ -56,4 +62,11 @@ class Command(BaseCommand):
raise CommandError(f"Slack data directory not found: '{path}'")
print("Converting data ...")
do_convert_data(path, output_dir, token, threads=num_threads)
convert_slack_threads = not options["no_convert_slack_threads"]
do_convert_data(
path,
output_dir,
token,
threads=num_threads,
convert_slack_threads=convert_slack_threads,
)

View File

@ -930,6 +930,7 @@ class SlackImporter(ZulipTestCase):
dm_members,
"domain",
set(),
convert_slack_threads=False,
)
# functioning already tested in helper function
self.assertEqual(zerver_usermessage, [])
@ -992,6 +993,119 @@ class SlackImporter(ZulipTestCase):
self.assertEqual(zerver_message[7]["sender"], 43)
self.assertEqual(zerver_message[8]["sender"], 5)
@mock.patch("zerver.data_import.slack.build_usermessages", return_value=(2, 4))
def test_channel_message_to_zerver_message_with_threads(
self, mock_build_usermessage: mock.Mock
) -> None:
user_data = [
{"id": "U066MTL5U", "name": "john doe", "deleted": False, "real_name": "John"},
{"id": "U061A5N1G", "name": "jane doe", "deleted": False, "real_name": "Jane"},
{"id": "U061A1R2R", "name": "jon", "deleted": False, "real_name": "Jon"},
]
slack_user_id_to_zulip_user_id = {"U066MTL5U": 5, "U061A5N1G": 24, "U061A1R2R": 43}
all_messages: List[Dict[str, Any]] = [
{
"text": "<@U066MTL5U> has joined the channel",
"subtype": "channel_join",
"user": "U066MTL5U",
"ts": "1434139102.000002",
"channel_name": "random",
},
{
"text": "<@U061A5N1G>: hey!",
"user": "U061A1R2R",
"ts": "1437868294.000006",
"has_image": True,
"channel_name": "random",
},
{
"text": "random",
"user": "U061A5N1G",
"ts": "1439868294.000006",
# Thread!
"thread_ts": "1434139102.000002",
"channel_name": "random",
},
{
"text": "random",
"user": "U061A5N1G",
"ts": "1439868294.000007",
"thread_ts": "1434139102.000002",
"channel_name": "random",
},
{
"text": "random",
"user": "U061A5N1G",
"ts": "1439868294.000008",
# A different Thread!
"thread_ts": "1439868294.000008",
"channel_name": "random",
},
{
"text": "random",
"user": "U061A5N1G",
"ts": "1439868295.000008",
# Another different Thread!
"thread_ts": "1439868295.000008",
"channel_name": "random",
},
]
slack_recipient_name_to_zulip_recipient_id = {
"random": 2,
"general": 1,
}
dm_members: DMMembersT = {}
zerver_usermessage: List[Dict[str, Any]] = []
subscriber_map: Dict[int, Set[int]] = {}
added_channels: Dict[str, Tuple[str, int]] = {"random": ("c5", 1), "general": ("c6", 2)}
(
zerver_message,
zerver_usermessage,
attachment,
uploads,
reaction,
) = channel_message_to_zerver_message(
1,
user_data,
slack_user_id_to_zulip_user_id,
slack_recipient_name_to_zulip_recipient_id,
all_messages,
[],
subscriber_map,
added_channels,
dm_members,
"domain",
set(),
convert_slack_threads=True,
)
# functioning already tested in helper function
self.assertEqual(zerver_usermessage, [])
# subtype: channel_join is filtered
self.assert_length(zerver_message, 5)
self.assertEqual(uploads, [])
self.assertEqual(attachment, [])
# Message conversion already tested in tests.test_slack_message_conversion
self.assertEqual(zerver_message[0]["content"], "@**Jane**: hey!")
self.assertEqual(zerver_message[0]["has_link"], False)
self.assertEqual(zerver_message[1]["content"], "random")
self.assertEqual(zerver_message[1][EXPORT_TOPIC_NAME], "2015-06-12 Slack thread 1")
self.assertEqual(zerver_message[2][EXPORT_TOPIC_NAME], "2015-06-12 Slack thread 1")
# A new thread with a different date from 2015-06-12, starts the counter from 1.
self.assertEqual(zerver_message[3][EXPORT_TOPIC_NAME], "2015-08-18 Slack thread 1")
# A new thread with a different timestamp, but the same date as 2015-08-18, starts the
# counter from 2.
self.assertEqual(zerver_message[4][EXPORT_TOPIC_NAME], "2015-08-18 Slack thread 2")
self.assertEqual(
zerver_message[1]["recipient"], slack_recipient_name_to_zulip_recipient_id["random"]
)
@mock.patch("zerver.data_import.slack.channel_message_to_zerver_message")
@mock.patch("zerver.data_import.slack.get_messages_iterator")
def test_convert_slack_workspace_messages(
@ -1045,6 +1159,7 @@ class SlackImporter(ZulipTestCase):
[],
"domain",
output_dir=output_dir,
convert_slack_threads=False,
chunk_size=1,
)