mirror of https://github.com/zulip/zulip.git
slack_data_import: Improve topic names for imported Slack threads.
In this commit, the topic names for imported Slack threads now include extra details like a snippet of the original message and the thread's date. `thread_key` is used to map the converted thread conversations because mapping with `thread_ts_str` had a small chance of combining threads with the same timestamp under one topic. Fixes #27661.
This commit is contained in:
parent
fd647ae16f
commit
435ad8a111
|
@ -154,8 +154,8 @@ in mind about the import process:
|
|||
| Multi Channel Guest | Guest |
|
||||
| Channel creator | none |
|
||||
|
||||
- Slack threads are imported as topics with names like "2023-05-30
|
||||
Slack thread 1".
|
||||
- Slack threads are imported as topics with names that include snippets of the
|
||||
original message, such as "2023-05-30 Hi, can anyone reply if you're o…".
|
||||
|
||||
- Message edit history and `@user joined #channel_name` messages are not imported.
|
||||
|
||||
|
|
|
@ -874,6 +874,37 @@ def get_messages_iterator(
|
|||
yield from sorted(messages_for_one_day, key=get_timestamp_from_message)
|
||||
|
||||
|
||||
def get_parent_user_id(message: ZerverFieldsT, subtype: str) -> str | None:
|
||||
"""Retrieves the parent user ID based on message subtype."""
|
||||
if subtype == "thread_broadcast":
|
||||
return message.get("root", {}).get("user")
|
||||
return message.get("parent_user_id")
|
||||
|
||||
|
||||
def get_zulip_thread_topic_name(
|
||||
message: ZerverFieldsT, thread_ts: datetime, thread_counter: dict[str, int]
|
||||
) -> str:
|
||||
"""
|
||||
The topic name format is date + message snippet + counter.
|
||||
|
||||
e.g "2024-05-22 Hello this is a long message that will be c... (1)"
|
||||
"""
|
||||
|
||||
THREAD_TOPIC_SNIPPET_LENGTH = 45
|
||||
thread_snippet = (
|
||||
message["text"]
|
||||
if len(message["text"]) <= THREAD_TOPIC_SNIPPET_LENGTH
|
||||
else message["text"][: THREAD_TOPIC_SNIPPET_LENGTH - 3] + "..."
|
||||
)
|
||||
thread_date = thread_ts.strftime(r"%Y-%m-%d")
|
||||
base_zulip_topic_name = thread_date + thread_snippet
|
||||
collision = thread_counter[base_zulip_topic_name]
|
||||
thread_counter[base_zulip_topic_name] += 1
|
||||
count = (f" ({collision+1})") if collision > 0 else ""
|
||||
|
||||
return f"{thread_date} {thread_snippet}{count}"
|
||||
|
||||
|
||||
def channel_message_to_zerver_message(
|
||||
realm_id: int,
|
||||
realm: ZerverFieldsT,
|
||||
|
@ -1012,25 +1043,23 @@ def channel_message_to_zerver_message(
|
|||
has_image = file_info["has_image"]
|
||||
|
||||
# Slack's unthreaded messages go into a single topic, while
|
||||
# threads each generate a unique topic labeled by the date and
|
||||
# a counter among topics on that day.
|
||||
# threads each generate a unique topic labeled by the date,
|
||||
# a snippet of the original message and a counter if there
|
||||
# are any thread with the same topic name
|
||||
topic_name = MAIN_IMPORT_TOPIC
|
||||
if convert_slack_threads and "thread_ts" in message:
|
||||
thread_ts = datetime.fromtimestamp(float(message["thread_ts"]), tz=timezone.utc)
|
||||
message_ts = datetime.fromtimestamp(float(message["ts"]), tz=timezone.utc)
|
||||
thread_ts_str = thread_ts.strftime(r"%Y/%m/%d %H:%M:%S")
|
||||
parent_user_id = get_parent_user_id(message, subtype)
|
||||
|
||||
thread_key = f"{thread_ts_str}-{parent_user_id}"
|
||||
|
||||
if thread_ts == message_ts:
|
||||
# If the message is at the start of a thread, send it to the
|
||||
# main import channel and append a cross-linking notification
|
||||
# message to it.
|
||||
|
||||
# The topic name is "2015-08-18 Slack thread 2", where the counter at the end is to disambiguate
|
||||
# threads with the same date.
|
||||
thread_date = thread_ts.strftime(r"%Y-%m-%d")
|
||||
thread_counter[thread_date] += 1
|
||||
count = thread_counter[thread_date]
|
||||
thread_topic_name = f"{thread_date} Slack thread {count}"
|
||||
thread_topic_name = get_zulip_thread_topic_name(message, thread_ts, thread_counter)
|
||||
thread_topic_link_str = f"#**{import_channel_name}>{thread_topic_name}**"
|
||||
|
||||
message_dict = {
|
||||
|
@ -1040,8 +1069,9 @@ def channel_message_to_zerver_message(
|
|||
"topic": topic_name,
|
||||
"display_recipient": import_channel_name,
|
||||
}
|
||||
thread_key = f"{thread_ts_str}-{slack_user_id}"
|
||||
|
||||
thread_map[thread_ts_str] = {
|
||||
thread_map[thread_key] = {
|
||||
"thread_topic_name": thread_topic_name,
|
||||
"thread_head_message_index": len(zerver_message),
|
||||
"thread_topic_link_str": thread_topic_link_str,
|
||||
|
@ -1052,11 +1082,10 @@ def channel_message_to_zerver_message(
|
|||
),
|
||||
"thread_head_message": content,
|
||||
}
|
||||
|
||||
elif thread_ts_str in thread_map:
|
||||
elif thread_key in thread_map:
|
||||
# The first thread reply will quote and reply to the original
|
||||
# thread message/thread head in the main import topic.
|
||||
thread_metadata = thread_map[thread_ts_str]
|
||||
thread_metadata = thread_map[thread_key]
|
||||
topic_name = thread_metadata.get("thread_topic_name", topic_name)
|
||||
if thread_metadata["thread_length"] == 0:
|
||||
thread_metadata.update({"first_thread_reply": content})
|
||||
|
|
|
@ -1140,6 +1140,7 @@ class SlackImporter(ZulipTestCase):
|
|||
"user": "U061A5N1G",
|
||||
"ts": "1434139102.000002",
|
||||
# Start of thread 1!
|
||||
"parent_user_id": "U061A5N1G",
|
||||
"thread_ts": "1434139102.000002",
|
||||
"channel_name": "random",
|
||||
},
|
||||
|
@ -1148,6 +1149,7 @@ class SlackImporter(ZulipTestCase):
|
|||
"user": "U061A5N1G",
|
||||
"ts": "1439868294.000007",
|
||||
# A reply to thread 1
|
||||
"parent_user_id": "U061A5N1G",
|
||||
"thread_ts": "1434139102.000002",
|
||||
"channel_name": "random",
|
||||
},
|
||||
|
@ -1156,6 +1158,7 @@ class SlackImporter(ZulipTestCase):
|
|||
"user": "U061A5N1G",
|
||||
"ts": "1439868294.000008",
|
||||
# Start of thread 2!
|
||||
"parent_user_id": "U061A5N1G",
|
||||
"thread_ts": "1439868294.000008",
|
||||
"channel_name": "random",
|
||||
},
|
||||
|
@ -1164,6 +1167,7 @@ class SlackImporter(ZulipTestCase):
|
|||
"user": "U061A1R2R",
|
||||
"ts": "1439869294.000008",
|
||||
# A reply to thread 2
|
||||
"parent_user_id": "U061A5N1G",
|
||||
"thread_ts": "1439868294.000008",
|
||||
"channel_name": "random",
|
||||
},
|
||||
|
@ -1172,6 +1176,7 @@ class SlackImporter(ZulipTestCase):
|
|||
"user": "U061A1R2R",
|
||||
"ts": "1439869494.000008",
|
||||
# A reply to thread 2
|
||||
"parent_user_id": "U061A5N1G",
|
||||
"thread_ts": "1439868294.000008",
|
||||
"channel_name": "random",
|
||||
},
|
||||
|
@ -1180,6 +1185,7 @@ class SlackImporter(ZulipTestCase):
|
|||
"user": "U061A5N1G",
|
||||
"ts": "1439868295.000008",
|
||||
# Start of thread 3!
|
||||
"parent_user_id": "U061A5N1G",
|
||||
"thread_ts": "1439868295.000008",
|
||||
"channel_name": "random",
|
||||
},
|
||||
|
@ -1189,6 +1195,7 @@ class SlackImporter(ZulipTestCase):
|
|||
"ts": "1439869295.000008",
|
||||
"subtype": "thread_broadcast",
|
||||
# A broadcasted thread reply in thread 3!
|
||||
"root": {"user": "U061A5N1G"},
|
||||
"thread_ts": "1439868295.000008",
|
||||
"channel_name": "random",
|
||||
},
|
||||
|
@ -1198,9 +1205,28 @@ class SlackImporter(ZulipTestCase):
|
|||
"ts": "1439869395.000008",
|
||||
"subtype": "thread_broadcast",
|
||||
# Another broadcasted thread reply in thread 3!
|
||||
"root": {"user": "U061A5N1G"},
|
||||
"thread_ts": "1439868295.000008",
|
||||
"channel_name": "random",
|
||||
},
|
||||
{
|
||||
"text": "random",
|
||||
"user": "U061A1R2R",
|
||||
"ts": "1439868294.000008",
|
||||
# Start of thread 4!
|
||||
"parent_user_id": "U061A1R2R",
|
||||
"thread_ts": "1439868294.000008",
|
||||
"channel_name": "random",
|
||||
},
|
||||
{
|
||||
"text": "replying to the fourth thread :)",
|
||||
"user": "U061A5N1G",
|
||||
"ts": "1439869294.000008",
|
||||
# A reply to thread 4
|
||||
"parent_user_id": "U061A1R2R",
|
||||
"thread_ts": "1439868294.000008",
|
||||
"channel_name": "random",
|
||||
},
|
||||
]
|
||||
|
||||
slack_recipient_name_to_zulip_recipient_id = {
|
||||
|
@ -1246,7 +1272,7 @@ class SlackImporter(ZulipTestCase):
|
|||
# functioning already tested in helper function
|
||||
self.assertEqual(zerver_usermessage, [])
|
||||
# subtype: channel_join is filtered
|
||||
self.assert_length(zerver_message, 11)
|
||||
self.assert_length(zerver_message, 13)
|
||||
|
||||
self.assertEqual(uploads, [])
|
||||
self.assertEqual(attachment, [])
|
||||
|
@ -1257,7 +1283,7 @@ class SlackImporter(ZulipTestCase):
|
|||
|
||||
# Original thread message in the main topic will have additional cross-linking
|
||||
# message appended to it
|
||||
thread1_topic_name = "2015-06-12 Slack thread 1"
|
||||
thread1_topic_name = "2015-06-12 message body text"
|
||||
original_thread1_message = (
|
||||
f"message body text\n\n*1 reply in #**random>{thread1_topic_name}***"
|
||||
)
|
||||
|
@ -1279,7 +1305,7 @@ random
|
|||
self.assertEqual(zerver_message[2][EXPORT_TOPIC_NAME], thread1_topic_name)
|
||||
|
||||
# A new thread with a different date from 2015-06-12, starts the counter from 1.
|
||||
thread2_topic_name = "2015-08-18 Slack thread 1"
|
||||
thread2_topic_name = "2015-08-18 random"
|
||||
original_thread2_message = f"random\n\n*2 replies in #**random>{thread2_topic_name}***"
|
||||
original_thread2_message_id = zerver_message[3]["id"]
|
||||
self.assertEqual(zerver_message[3]["content"], original_thread2_message)
|
||||
|
@ -1300,7 +1326,7 @@ replying to the second thread :)
|
|||
self.assertEqual(zerver_message[5][EXPORT_TOPIC_NAME], thread2_topic_name)
|
||||
|
||||
# The third thread is to test how broadcasted thread replies are converted.
|
||||
thread3_topic_name = "2015-08-18 Slack thread 2"
|
||||
thread3_topic_name = "2015-08-18 original message for the third thread"
|
||||
original_thread3_message = f"original message for the third thread\n\n*2 replies in #**random>{thread3_topic_name}***"
|
||||
original_thread3_message_id = zerver_message[6]["id"]
|
||||
self.assertEqual(zerver_message[6]["content"], original_thread3_message)
|
||||
|
@ -1318,7 +1344,7 @@ original message for the third thread
|
|||
self.assertEqual(zerver_message[7]["content"], thread3_reply_1)
|
||||
self.assertEqual(zerver_message[7][EXPORT_TOPIC_NAME], thread3_topic_name)
|
||||
thread3_broadcasted_reply_1 = f"""
|
||||
*replied to a Slack thread: [{thread3_topic_name}](http://test-realm.testserver/#narrow/channel/2-random/topic/2015-08-18.20Slack.20thread.202/near/{thread3_reply_1_id})*
|
||||
*replied to a Slack thread: [{thread3_topic_name}](http://test-realm.testserver/#narrow/channel/2-random/topic/2015-08-18.20original.20message.20for.20the.20third.20thread/near/{thread3_reply_1_id})*
|
||||
|
||||
{thread3_reply_1_content}
|
||||
"""
|
||||
|
@ -1331,7 +1357,7 @@ original message for the third thread
|
|||
self.assertEqual(zerver_message[9]["content"], thread3_reply_2)
|
||||
self.assertEqual(zerver_message[9][EXPORT_TOPIC_NAME], thread3_topic_name)
|
||||
thread3_broadcasted_reply_2 = f"""
|
||||
*replied to a Slack thread: [{thread3_topic_name}](http://test-realm.testserver/#narrow/channel/2-random/topic/2015-08-18.20Slack.20thread.202/near/{thread3_reply_2_id})*
|
||||
*replied to a Slack thread: [{thread3_topic_name}](http://test-realm.testserver/#narrow/channel/2-random/topic/2015-08-18.20original.20message.20for.20the.20third.20thread/near/{thread3_reply_2_id})*
|
||||
|
||||
{thread3_reply_2}
|
||||
"""
|
||||
|
@ -1339,6 +1365,14 @@ original message for the third thread
|
|||
self.assertEqual(zerver_message[10][EXPORT_TOPIC_NAME], main_import_topic)
|
||||
self.assertIn(thread3_reply_2, zerver_message[10]["content"])
|
||||
|
||||
# The fourth thread is to test how colliding thread topics are handled.
|
||||
# Its topic will collide with thread two topic.
|
||||
thread4_topic_name = "2015-08-18 random (2)"
|
||||
original_thread4_message = f"random\n\n*1 reply in #**random>{thread4_topic_name}***"
|
||||
self.assertEqual(zerver_message[11]["content"], original_thread4_message)
|
||||
self.assertEqual(zerver_message[11][EXPORT_TOPIC_NAME], main_import_topic)
|
||||
self.assertEqual(zerver_message[12][EXPORT_TOPIC_NAME], thread4_topic_name)
|
||||
|
||||
self.assertEqual(
|
||||
zerver_message[1]["recipient"], slack_recipient_name_to_zulip_recipient_id["random"]
|
||||
)
|
||||
|
|
Loading…
Reference in New Issue