From 1c84f02f5739c2f3e07896ccea9497195e0a1068 Mon Sep 17 00:00:00 2001 From: rht Date: Thu, 12 Aug 2021 23:48:16 -0400 Subject: [PATCH] slack import: Convert threads to nicely named Zulip topics. Fixes #9006. --- help/import-from-slack.md | 4 +- zerver/data_import/slack.py | 33 ++++- .../management/commands/convert_slack_data.py | 15 ++- zerver/tests/test_slack_importer.py | 115 ++++++++++++++++++ 4 files changed, 163 insertions(+), 4 deletions(-) diff --git a/help/import-from-slack.md b/help/import-from-slack.md index 5937da0661..1b617bdede 100644 --- a/help/import-from-slack.md +++ b/help/import-from-slack.md @@ -154,8 +154,8 @@ in mind about the import process: | Multi Channel Guest | Guest | | Channel creator | none | -- Messages in threads are imported, but they are not explicitly marked as - being in a thread. +- Slack threads are imported as topics with names like "2023-05-30 + Slack thread 1". - Message edit history and `@user joined #channel_name` messages are not imported. diff --git a/zerver/data_import/slack.py b/zerver/data_import/slack.py index 6995581eb5..9ded60a89c 100644 --- a/zerver/data_import/slack.py +++ b/zerver/data_import/slack.py @@ -1,3 +1,4 @@ +import datetime import logging import os import posixpath @@ -703,6 +704,7 @@ def convert_slack_workspace_messages( zerver_realmemoji: List[ZerverFieldsT], domain_name: str, output_dir: str, + convert_slack_threads: bool, chunk_size: int = MESSAGE_BATCH_CHUNK_SIZE, ) -> Tuple[List[ZerverFieldsT], List[ZerverFieldsT], List[ZerverFieldsT]]: """ @@ -764,6 +766,7 @@ def convert_slack_workspace_messages( dm_members, domain_name, long_term_idle, + convert_slack_threads, ) message_json = dict(zerver_message=zerver_message, zerver_usermessage=zerver_usermessage) @@ -844,6 +847,7 @@ def channel_message_to_zerver_message( dm_members: DMMembersT, domain_name: str, long_term_idle: Set[int], + convert_slack_threads: bool, ) -> Tuple[ List[ZerverFieldsT], List[ZerverFieldsT], @@ -867,6 +871,8 @@ def channel_message_to_zerver_message( total_user_messages = 0 total_skipped_user_messages = 0 + thread_counter: Dict[str, int] = defaultdict(int) + thread_map: Dict[str, str] = {} for message in all_messages: slack_user_id = get_message_sending_user(message) if not slack_user_id: @@ -955,7 +961,25 @@ def channel_message_to_zerver_message( has_attachment = file_info["has_attachment"] has_image = file_info["has_image"] + # Slack's unthreaded messages go into a single topic, while + # threads each generate a unique topic labeled by the date and + # a counter among topics on that day. topic_name = "imported from Slack" + if convert_slack_threads and "thread_ts" in message: + thread_ts = datetime.datetime.fromtimestamp( + float(message["thread_ts"]), tz=datetime.timezone.utc + ) + thread_ts_str = thread_ts.strftime(r"%Y/%m/%d %H:%M:%S") + # The topic name is "2015-08-18 Slack thread 2", where the counter at the end is to disambiguate + # threads with the same date. + if thread_ts_str in thread_map: + topic_name = thread_map[thread_ts_str] + else: + thread_date = thread_ts.strftime(r"%Y-%m-%d") + thread_counter[thread_date] += 1 + count = thread_counter[thread_date] + topic_name = f"{thread_date} Slack thread {count}" + thread_map[thread_ts_str] = topic_name zulip_message = build_message( topic_name=topic_name, @@ -1311,7 +1335,13 @@ def fetch_team_icons( return records -def do_convert_data(original_path: str, output_dir: str, token: str, threads: int = 6) -> None: +def do_convert_data( + original_path: str, + output_dir: str, + token: str, + threads: int = 6, + convert_slack_threads: bool = False, +) -> None: # Subdomain is set by the user while running the import command realm_subdomain = "" realm_id = 0 @@ -1380,6 +1410,7 @@ def do_convert_data(original_path: str, output_dir: str, token: str, threads: in realm["zerver_realmemoji"], domain_name, output_dir, + convert_slack_threads, ) # Move zerver_reactions to realm.json file diff --git a/zerver/management/commands/convert_slack_data.py b/zerver/management/commands/convert_slack_data.py index 21e504c11d..f3eb913dcf 100644 --- a/zerver/management/commands/convert_slack_data.py +++ b/zerver/management/commands/convert_slack_data.py @@ -34,6 +34,12 @@ class Command(BaseCommand): help="Threads to use in exporting UserMessage objects in parallel", ) + parser.add_argument( + "--no-convert-slack-threads", + action="store_true", + help="If specified, do not convert Slack threads to separate Zulip topics", + ) + parser.formatter_class = argparse.RawTextHelpFormatter def handle(self, *args: Any, **options: Any) -> None: @@ -56,4 +62,11 @@ class Command(BaseCommand): raise CommandError(f"Slack data directory not found: '{path}'") print("Converting data ...") - do_convert_data(path, output_dir, token, threads=num_threads) + convert_slack_threads = not options["no_convert_slack_threads"] + do_convert_data( + path, + output_dir, + token, + threads=num_threads, + convert_slack_threads=convert_slack_threads, + ) diff --git a/zerver/tests/test_slack_importer.py b/zerver/tests/test_slack_importer.py index 26c05b5997..039a0e8dab 100644 --- a/zerver/tests/test_slack_importer.py +++ b/zerver/tests/test_slack_importer.py @@ -930,6 +930,7 @@ class SlackImporter(ZulipTestCase): dm_members, "domain", set(), + convert_slack_threads=False, ) # functioning already tested in helper function self.assertEqual(zerver_usermessage, []) @@ -992,6 +993,119 @@ class SlackImporter(ZulipTestCase): self.assertEqual(zerver_message[7]["sender"], 43) self.assertEqual(zerver_message[8]["sender"], 5) + @mock.patch("zerver.data_import.slack.build_usermessages", return_value=(2, 4)) + def test_channel_message_to_zerver_message_with_threads( + self, mock_build_usermessage: mock.Mock + ) -> None: + user_data = [ + {"id": "U066MTL5U", "name": "john doe", "deleted": False, "real_name": "John"}, + {"id": "U061A5N1G", "name": "jane doe", "deleted": False, "real_name": "Jane"}, + {"id": "U061A1R2R", "name": "jon", "deleted": False, "real_name": "Jon"}, + ] + + slack_user_id_to_zulip_user_id = {"U066MTL5U": 5, "U061A5N1G": 24, "U061A1R2R": 43} + + all_messages: List[Dict[str, Any]] = [ + { + "text": "<@U066MTL5U> has joined the channel", + "subtype": "channel_join", + "user": "U066MTL5U", + "ts": "1434139102.000002", + "channel_name": "random", + }, + { + "text": "<@U061A5N1G>: hey!", + "user": "U061A1R2R", + "ts": "1437868294.000006", + "has_image": True, + "channel_name": "random", + }, + { + "text": "random", + "user": "U061A5N1G", + "ts": "1439868294.000006", + # Thread! + "thread_ts": "1434139102.000002", + "channel_name": "random", + }, + { + "text": "random", + "user": "U061A5N1G", + "ts": "1439868294.000007", + "thread_ts": "1434139102.000002", + "channel_name": "random", + }, + { + "text": "random", + "user": "U061A5N1G", + "ts": "1439868294.000008", + # A different Thread! + "thread_ts": "1439868294.000008", + "channel_name": "random", + }, + { + "text": "random", + "user": "U061A5N1G", + "ts": "1439868295.000008", + # Another different Thread! + "thread_ts": "1439868295.000008", + "channel_name": "random", + }, + ] + + slack_recipient_name_to_zulip_recipient_id = { + "random": 2, + "general": 1, + } + dm_members: DMMembersT = {} + + zerver_usermessage: List[Dict[str, Any]] = [] + subscriber_map: Dict[int, Set[int]] = {} + added_channels: Dict[str, Tuple[str, int]] = {"random": ("c5", 1), "general": ("c6", 2)} + + ( + zerver_message, + zerver_usermessage, + attachment, + uploads, + reaction, + ) = channel_message_to_zerver_message( + 1, + user_data, + slack_user_id_to_zulip_user_id, + slack_recipient_name_to_zulip_recipient_id, + all_messages, + [], + subscriber_map, + added_channels, + dm_members, + "domain", + set(), + convert_slack_threads=True, + ) + # functioning already tested in helper function + self.assertEqual(zerver_usermessage, []) + # subtype: channel_join is filtered + self.assert_length(zerver_message, 5) + + self.assertEqual(uploads, []) + self.assertEqual(attachment, []) + + # Message conversion already tested in tests.test_slack_message_conversion + self.assertEqual(zerver_message[0]["content"], "@**Jane**: hey!") + self.assertEqual(zerver_message[0]["has_link"], False) + self.assertEqual(zerver_message[1]["content"], "random") + self.assertEqual(zerver_message[1][EXPORT_TOPIC_NAME], "2015-06-12 Slack thread 1") + self.assertEqual(zerver_message[2][EXPORT_TOPIC_NAME], "2015-06-12 Slack thread 1") + # A new thread with a different date from 2015-06-12, starts the counter from 1. + self.assertEqual(zerver_message[3][EXPORT_TOPIC_NAME], "2015-08-18 Slack thread 1") + # A new thread with a different timestamp, but the same date as 2015-08-18, starts the + # counter from 2. + self.assertEqual(zerver_message[4][EXPORT_TOPIC_NAME], "2015-08-18 Slack thread 2") + self.assertEqual( + zerver_message[1]["recipient"], slack_recipient_name_to_zulip_recipient_id["random"] + ) + @mock.patch("zerver.data_import.slack.channel_message_to_zerver_message") @mock.patch("zerver.data_import.slack.get_messages_iterator") def test_convert_slack_workspace_messages( @@ -1045,6 +1159,7 @@ class SlackImporter(ZulipTestCase): [], "domain", output_dir=output_dir, + convert_slack_threads=False, chunk_size=1, )