mirror of https://github.com/zulip/zulip.git
slack import: Convert threads to nicely named Zulip topics.
Fixes #9006.
This commit is contained in:
parent
ddd174af36
commit
1c84f02f57
|
@ -154,8 +154,8 @@ in mind about the import process:
|
|||
| Multi Channel Guest | Guest |
|
||||
| Channel creator | none |
|
||||
|
||||
- Messages in threads are imported, but they are not explicitly marked as
|
||||
being in a thread.
|
||||
- Slack threads are imported as topics with names like "2023-05-30
|
||||
Slack thread 1".
|
||||
|
||||
- Message edit history and `@user joined #channel_name` messages are not imported.
|
||||
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
import datetime
|
||||
import logging
|
||||
import os
|
||||
import posixpath
|
||||
|
@ -703,6 +704,7 @@ def convert_slack_workspace_messages(
|
|||
zerver_realmemoji: List[ZerverFieldsT],
|
||||
domain_name: str,
|
||||
output_dir: str,
|
||||
convert_slack_threads: bool,
|
||||
chunk_size: int = MESSAGE_BATCH_CHUNK_SIZE,
|
||||
) -> Tuple[List[ZerverFieldsT], List[ZerverFieldsT], List[ZerverFieldsT]]:
|
||||
"""
|
||||
|
@ -764,6 +766,7 @@ def convert_slack_workspace_messages(
|
|||
dm_members,
|
||||
domain_name,
|
||||
long_term_idle,
|
||||
convert_slack_threads,
|
||||
)
|
||||
|
||||
message_json = dict(zerver_message=zerver_message, zerver_usermessage=zerver_usermessage)
|
||||
|
@ -844,6 +847,7 @@ def channel_message_to_zerver_message(
|
|||
dm_members: DMMembersT,
|
||||
domain_name: str,
|
||||
long_term_idle: Set[int],
|
||||
convert_slack_threads: bool,
|
||||
) -> Tuple[
|
||||
List[ZerverFieldsT],
|
||||
List[ZerverFieldsT],
|
||||
|
@ -867,6 +871,8 @@ def channel_message_to_zerver_message(
|
|||
|
||||
total_user_messages = 0
|
||||
total_skipped_user_messages = 0
|
||||
thread_counter: Dict[str, int] = defaultdict(int)
|
||||
thread_map: Dict[str, str] = {}
|
||||
for message in all_messages:
|
||||
slack_user_id = get_message_sending_user(message)
|
||||
if not slack_user_id:
|
||||
|
@ -955,7 +961,25 @@ def channel_message_to_zerver_message(
|
|||
has_attachment = file_info["has_attachment"]
|
||||
has_image = file_info["has_image"]
|
||||
|
||||
# Slack's unthreaded messages go into a single topic, while
|
||||
# threads each generate a unique topic labeled by the date and
|
||||
# a counter among topics on that day.
|
||||
topic_name = "imported from Slack"
|
||||
if convert_slack_threads and "thread_ts" in message:
|
||||
thread_ts = datetime.datetime.fromtimestamp(
|
||||
float(message["thread_ts"]), tz=datetime.timezone.utc
|
||||
)
|
||||
thread_ts_str = thread_ts.strftime(r"%Y/%m/%d %H:%M:%S")
|
||||
# The topic name is "2015-08-18 Slack thread 2", where the counter at the end is to disambiguate
|
||||
# threads with the same date.
|
||||
if thread_ts_str in thread_map:
|
||||
topic_name = thread_map[thread_ts_str]
|
||||
else:
|
||||
thread_date = thread_ts.strftime(r"%Y-%m-%d")
|
||||
thread_counter[thread_date] += 1
|
||||
count = thread_counter[thread_date]
|
||||
topic_name = f"{thread_date} Slack thread {count}"
|
||||
thread_map[thread_ts_str] = topic_name
|
||||
|
||||
zulip_message = build_message(
|
||||
topic_name=topic_name,
|
||||
|
@ -1311,7 +1335,13 @@ def fetch_team_icons(
|
|||
return records
|
||||
|
||||
|
||||
def do_convert_data(original_path: str, output_dir: str, token: str, threads: int = 6) -> None:
|
||||
def do_convert_data(
|
||||
original_path: str,
|
||||
output_dir: str,
|
||||
token: str,
|
||||
threads: int = 6,
|
||||
convert_slack_threads: bool = False,
|
||||
) -> None:
|
||||
# Subdomain is set by the user while running the import command
|
||||
realm_subdomain = ""
|
||||
realm_id = 0
|
||||
|
@ -1380,6 +1410,7 @@ def do_convert_data(original_path: str, output_dir: str, token: str, threads: in
|
|||
realm["zerver_realmemoji"],
|
||||
domain_name,
|
||||
output_dir,
|
||||
convert_slack_threads,
|
||||
)
|
||||
|
||||
# Move zerver_reactions to realm.json file
|
||||
|
|
|
@ -34,6 +34,12 @@ class Command(BaseCommand):
|
|||
help="Threads to use in exporting UserMessage objects in parallel",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--no-convert-slack-threads",
|
||||
action="store_true",
|
||||
help="If specified, do not convert Slack threads to separate Zulip topics",
|
||||
)
|
||||
|
||||
parser.formatter_class = argparse.RawTextHelpFormatter
|
||||
|
||||
def handle(self, *args: Any, **options: Any) -> None:
|
||||
|
@ -56,4 +62,11 @@ class Command(BaseCommand):
|
|||
raise CommandError(f"Slack data directory not found: '{path}'")
|
||||
|
||||
print("Converting data ...")
|
||||
do_convert_data(path, output_dir, token, threads=num_threads)
|
||||
convert_slack_threads = not options["no_convert_slack_threads"]
|
||||
do_convert_data(
|
||||
path,
|
||||
output_dir,
|
||||
token,
|
||||
threads=num_threads,
|
||||
convert_slack_threads=convert_slack_threads,
|
||||
)
|
||||
|
|
|
@ -930,6 +930,7 @@ class SlackImporter(ZulipTestCase):
|
|||
dm_members,
|
||||
"domain",
|
||||
set(),
|
||||
convert_slack_threads=False,
|
||||
)
|
||||
# functioning already tested in helper function
|
||||
self.assertEqual(zerver_usermessage, [])
|
||||
|
@ -992,6 +993,119 @@ class SlackImporter(ZulipTestCase):
|
|||
self.assertEqual(zerver_message[7]["sender"], 43)
|
||||
self.assertEqual(zerver_message[8]["sender"], 5)
|
||||
|
||||
@mock.patch("zerver.data_import.slack.build_usermessages", return_value=(2, 4))
|
||||
def test_channel_message_to_zerver_message_with_threads(
|
||||
self, mock_build_usermessage: mock.Mock
|
||||
) -> None:
|
||||
user_data = [
|
||||
{"id": "U066MTL5U", "name": "john doe", "deleted": False, "real_name": "John"},
|
||||
{"id": "U061A5N1G", "name": "jane doe", "deleted": False, "real_name": "Jane"},
|
||||
{"id": "U061A1R2R", "name": "jon", "deleted": False, "real_name": "Jon"},
|
||||
]
|
||||
|
||||
slack_user_id_to_zulip_user_id = {"U066MTL5U": 5, "U061A5N1G": 24, "U061A1R2R": 43}
|
||||
|
||||
all_messages: List[Dict[str, Any]] = [
|
||||
{
|
||||
"text": "<@U066MTL5U> has joined the channel",
|
||||
"subtype": "channel_join",
|
||||
"user": "U066MTL5U",
|
||||
"ts": "1434139102.000002",
|
||||
"channel_name": "random",
|
||||
},
|
||||
{
|
||||
"text": "<@U061A5N1G>: hey!",
|
||||
"user": "U061A1R2R",
|
||||
"ts": "1437868294.000006",
|
||||
"has_image": True,
|
||||
"channel_name": "random",
|
||||
},
|
||||
{
|
||||
"text": "random",
|
||||
"user": "U061A5N1G",
|
||||
"ts": "1439868294.000006",
|
||||
# Thread!
|
||||
"thread_ts": "1434139102.000002",
|
||||
"channel_name": "random",
|
||||
},
|
||||
{
|
||||
"text": "random",
|
||||
"user": "U061A5N1G",
|
||||
"ts": "1439868294.000007",
|
||||
"thread_ts": "1434139102.000002",
|
||||
"channel_name": "random",
|
||||
},
|
||||
{
|
||||
"text": "random",
|
||||
"user": "U061A5N1G",
|
||||
"ts": "1439868294.000008",
|
||||
# A different Thread!
|
||||
"thread_ts": "1439868294.000008",
|
||||
"channel_name": "random",
|
||||
},
|
||||
{
|
||||
"text": "random",
|
||||
"user": "U061A5N1G",
|
||||
"ts": "1439868295.000008",
|
||||
# Another different Thread!
|
||||
"thread_ts": "1439868295.000008",
|
||||
"channel_name": "random",
|
||||
},
|
||||
]
|
||||
|
||||
slack_recipient_name_to_zulip_recipient_id = {
|
||||
"random": 2,
|
||||
"general": 1,
|
||||
}
|
||||
dm_members: DMMembersT = {}
|
||||
|
||||
zerver_usermessage: List[Dict[str, Any]] = []
|
||||
subscriber_map: Dict[int, Set[int]] = {}
|
||||
added_channels: Dict[str, Tuple[str, int]] = {"random": ("c5", 1), "general": ("c6", 2)}
|
||||
|
||||
(
|
||||
zerver_message,
|
||||
zerver_usermessage,
|
||||
attachment,
|
||||
uploads,
|
||||
reaction,
|
||||
) = channel_message_to_zerver_message(
|
||||
1,
|
||||
user_data,
|
||||
slack_user_id_to_zulip_user_id,
|
||||
slack_recipient_name_to_zulip_recipient_id,
|
||||
all_messages,
|
||||
[],
|
||||
subscriber_map,
|
||||
added_channels,
|
||||
dm_members,
|
||||
"domain",
|
||||
set(),
|
||||
convert_slack_threads=True,
|
||||
)
|
||||
# functioning already tested in helper function
|
||||
self.assertEqual(zerver_usermessage, [])
|
||||
# subtype: channel_join is filtered
|
||||
self.assert_length(zerver_message, 5)
|
||||
|
||||
self.assertEqual(uploads, [])
|
||||
self.assertEqual(attachment, [])
|
||||
|
||||
# Message conversion already tested in tests.test_slack_message_conversion
|
||||
self.assertEqual(zerver_message[0]["content"], "@**Jane**: hey!")
|
||||
self.assertEqual(zerver_message[0]["has_link"], False)
|
||||
self.assertEqual(zerver_message[1]["content"], "random")
|
||||
self.assertEqual(zerver_message[1][EXPORT_TOPIC_NAME], "2015-06-12 Slack thread 1")
|
||||
self.assertEqual(zerver_message[2][EXPORT_TOPIC_NAME], "2015-06-12 Slack thread 1")
|
||||
# A new thread with a different date from 2015-06-12, starts the counter from 1.
|
||||
self.assertEqual(zerver_message[3][EXPORT_TOPIC_NAME], "2015-08-18 Slack thread 1")
|
||||
# A new thread with a different timestamp, but the same date as 2015-08-18, starts the
|
||||
# counter from 2.
|
||||
self.assertEqual(zerver_message[4][EXPORT_TOPIC_NAME], "2015-08-18 Slack thread 2")
|
||||
self.assertEqual(
|
||||
zerver_message[1]["recipient"], slack_recipient_name_to_zulip_recipient_id["random"]
|
||||
)
|
||||
|
||||
@mock.patch("zerver.data_import.slack.channel_message_to_zerver_message")
|
||||
@mock.patch("zerver.data_import.slack.get_messages_iterator")
|
||||
def test_convert_slack_workspace_messages(
|
||||
|
@ -1045,6 +1159,7 @@ class SlackImporter(ZulipTestCase):
|
|||
[],
|
||||
"domain",
|
||||
output_dir=output_dir,
|
||||
convert_slack_threads=False,
|
||||
chunk_size=1,
|
||||
)
|
||||
|
||||
|
|
Loading…
Reference in New Issue