mirror of https://github.com/zulip/zulip.git
slack_data_import: Improve topic names for imported Slack threads.
In this commit, the topic names for imported Slack threads now include extra details like a snippet of the original message and the thread's date. `thread_key` is used to map the converted thread conversations because mapping with `thread_ts_str` had a small chance of combining threads with the same timestamp under one topic. Fixes #27661.
This commit is contained in:
parent
fd647ae16f
commit
435ad8a111
|
@ -154,8 +154,8 @@ in mind about the import process:
|
||||||
| Multi Channel Guest | Guest |
|
| Multi Channel Guest | Guest |
|
||||||
| Channel creator | none |
|
| Channel creator | none |
|
||||||
|
|
||||||
- Slack threads are imported as topics with names like "2023-05-30
|
- Slack threads are imported as topics with names that include snippets of the
|
||||||
Slack thread 1".
|
original message, such as "2023-05-30 Hi, can anyone reply if you're o…".
|
||||||
|
|
||||||
- Message edit history and `@user joined #channel_name` messages are not imported.
|
- Message edit history and `@user joined #channel_name` messages are not imported.
|
||||||
|
|
||||||
|
|
|
@ -874,6 +874,37 @@ def get_messages_iterator(
|
||||||
yield from sorted(messages_for_one_day, key=get_timestamp_from_message)
|
yield from sorted(messages_for_one_day, key=get_timestamp_from_message)
|
||||||
|
|
||||||
|
|
||||||
|
def get_parent_user_id(message: ZerverFieldsT, subtype: str) -> str | None:
|
||||||
|
"""Retrieves the parent user ID based on message subtype."""
|
||||||
|
if subtype == "thread_broadcast":
|
||||||
|
return message.get("root", {}).get("user")
|
||||||
|
return message.get("parent_user_id")
|
||||||
|
|
||||||
|
|
||||||
|
def get_zulip_thread_topic_name(
|
||||||
|
message: ZerverFieldsT, thread_ts: datetime, thread_counter: dict[str, int]
|
||||||
|
) -> str:
|
||||||
|
"""
|
||||||
|
The topic name format is date + message snippet + counter.
|
||||||
|
|
||||||
|
e.g "2024-05-22 Hello this is a long message that will be c... (1)"
|
||||||
|
"""
|
||||||
|
|
||||||
|
THREAD_TOPIC_SNIPPET_LENGTH = 45
|
||||||
|
thread_snippet = (
|
||||||
|
message["text"]
|
||||||
|
if len(message["text"]) <= THREAD_TOPIC_SNIPPET_LENGTH
|
||||||
|
else message["text"][: THREAD_TOPIC_SNIPPET_LENGTH - 3] + "..."
|
||||||
|
)
|
||||||
|
thread_date = thread_ts.strftime(r"%Y-%m-%d")
|
||||||
|
base_zulip_topic_name = thread_date + thread_snippet
|
||||||
|
collision = thread_counter[base_zulip_topic_name]
|
||||||
|
thread_counter[base_zulip_topic_name] += 1
|
||||||
|
count = (f" ({collision+1})") if collision > 0 else ""
|
||||||
|
|
||||||
|
return f"{thread_date} {thread_snippet}{count}"
|
||||||
|
|
||||||
|
|
||||||
def channel_message_to_zerver_message(
|
def channel_message_to_zerver_message(
|
||||||
realm_id: int,
|
realm_id: int,
|
||||||
realm: ZerverFieldsT,
|
realm: ZerverFieldsT,
|
||||||
|
@ -1012,25 +1043,23 @@ def channel_message_to_zerver_message(
|
||||||
has_image = file_info["has_image"]
|
has_image = file_info["has_image"]
|
||||||
|
|
||||||
# Slack's unthreaded messages go into a single topic, while
|
# Slack's unthreaded messages go into a single topic, while
|
||||||
# threads each generate a unique topic labeled by the date and
|
# threads each generate a unique topic labeled by the date,
|
||||||
# a counter among topics on that day.
|
# a snippet of the original message and a counter if there
|
||||||
|
# are any thread with the same topic name
|
||||||
topic_name = MAIN_IMPORT_TOPIC
|
topic_name = MAIN_IMPORT_TOPIC
|
||||||
if convert_slack_threads and "thread_ts" in message:
|
if convert_slack_threads and "thread_ts" in message:
|
||||||
thread_ts = datetime.fromtimestamp(float(message["thread_ts"]), tz=timezone.utc)
|
thread_ts = datetime.fromtimestamp(float(message["thread_ts"]), tz=timezone.utc)
|
||||||
message_ts = datetime.fromtimestamp(float(message["ts"]), tz=timezone.utc)
|
message_ts = datetime.fromtimestamp(float(message["ts"]), tz=timezone.utc)
|
||||||
thread_ts_str = thread_ts.strftime(r"%Y/%m/%d %H:%M:%S")
|
thread_ts_str = thread_ts.strftime(r"%Y/%m/%d %H:%M:%S")
|
||||||
|
parent_user_id = get_parent_user_id(message, subtype)
|
||||||
|
|
||||||
|
thread_key = f"{thread_ts_str}-{parent_user_id}"
|
||||||
|
|
||||||
if thread_ts == message_ts:
|
if thread_ts == message_ts:
|
||||||
# If the message is at the start of a thread, send it to the
|
# If the message is at the start of a thread, send it to the
|
||||||
# main import channel and append a cross-linking notification
|
# main import channel and append a cross-linking notification
|
||||||
# message to it.
|
# message to it.
|
||||||
|
thread_topic_name = get_zulip_thread_topic_name(message, thread_ts, thread_counter)
|
||||||
# The topic name is "2015-08-18 Slack thread 2", where the counter at the end is to disambiguate
|
|
||||||
# threads with the same date.
|
|
||||||
thread_date = thread_ts.strftime(r"%Y-%m-%d")
|
|
||||||
thread_counter[thread_date] += 1
|
|
||||||
count = thread_counter[thread_date]
|
|
||||||
thread_topic_name = f"{thread_date} Slack thread {count}"
|
|
||||||
thread_topic_link_str = f"#**{import_channel_name}>{thread_topic_name}**"
|
thread_topic_link_str = f"#**{import_channel_name}>{thread_topic_name}**"
|
||||||
|
|
||||||
message_dict = {
|
message_dict = {
|
||||||
|
@ -1040,8 +1069,9 @@ def channel_message_to_zerver_message(
|
||||||
"topic": topic_name,
|
"topic": topic_name,
|
||||||
"display_recipient": import_channel_name,
|
"display_recipient": import_channel_name,
|
||||||
}
|
}
|
||||||
|
thread_key = f"{thread_ts_str}-{slack_user_id}"
|
||||||
|
|
||||||
thread_map[thread_ts_str] = {
|
thread_map[thread_key] = {
|
||||||
"thread_topic_name": thread_topic_name,
|
"thread_topic_name": thread_topic_name,
|
||||||
"thread_head_message_index": len(zerver_message),
|
"thread_head_message_index": len(zerver_message),
|
||||||
"thread_topic_link_str": thread_topic_link_str,
|
"thread_topic_link_str": thread_topic_link_str,
|
||||||
|
@ -1052,11 +1082,10 @@ def channel_message_to_zerver_message(
|
||||||
),
|
),
|
||||||
"thread_head_message": content,
|
"thread_head_message": content,
|
||||||
}
|
}
|
||||||
|
elif thread_key in thread_map:
|
||||||
elif thread_ts_str in thread_map:
|
|
||||||
# The first thread reply will quote and reply to the original
|
# The first thread reply will quote and reply to the original
|
||||||
# thread message/thread head in the main import topic.
|
# thread message/thread head in the main import topic.
|
||||||
thread_metadata = thread_map[thread_ts_str]
|
thread_metadata = thread_map[thread_key]
|
||||||
topic_name = thread_metadata.get("thread_topic_name", topic_name)
|
topic_name = thread_metadata.get("thread_topic_name", topic_name)
|
||||||
if thread_metadata["thread_length"] == 0:
|
if thread_metadata["thread_length"] == 0:
|
||||||
thread_metadata.update({"first_thread_reply": content})
|
thread_metadata.update({"first_thread_reply": content})
|
||||||
|
|
|
@ -1140,6 +1140,7 @@ class SlackImporter(ZulipTestCase):
|
||||||
"user": "U061A5N1G",
|
"user": "U061A5N1G",
|
||||||
"ts": "1434139102.000002",
|
"ts": "1434139102.000002",
|
||||||
# Start of thread 1!
|
# Start of thread 1!
|
||||||
|
"parent_user_id": "U061A5N1G",
|
||||||
"thread_ts": "1434139102.000002",
|
"thread_ts": "1434139102.000002",
|
||||||
"channel_name": "random",
|
"channel_name": "random",
|
||||||
},
|
},
|
||||||
|
@ -1148,6 +1149,7 @@ class SlackImporter(ZulipTestCase):
|
||||||
"user": "U061A5N1G",
|
"user": "U061A5N1G",
|
||||||
"ts": "1439868294.000007",
|
"ts": "1439868294.000007",
|
||||||
# A reply to thread 1
|
# A reply to thread 1
|
||||||
|
"parent_user_id": "U061A5N1G",
|
||||||
"thread_ts": "1434139102.000002",
|
"thread_ts": "1434139102.000002",
|
||||||
"channel_name": "random",
|
"channel_name": "random",
|
||||||
},
|
},
|
||||||
|
@ -1156,6 +1158,7 @@ class SlackImporter(ZulipTestCase):
|
||||||
"user": "U061A5N1G",
|
"user": "U061A5N1G",
|
||||||
"ts": "1439868294.000008",
|
"ts": "1439868294.000008",
|
||||||
# Start of thread 2!
|
# Start of thread 2!
|
||||||
|
"parent_user_id": "U061A5N1G",
|
||||||
"thread_ts": "1439868294.000008",
|
"thread_ts": "1439868294.000008",
|
||||||
"channel_name": "random",
|
"channel_name": "random",
|
||||||
},
|
},
|
||||||
|
@ -1164,6 +1167,7 @@ class SlackImporter(ZulipTestCase):
|
||||||
"user": "U061A1R2R",
|
"user": "U061A1R2R",
|
||||||
"ts": "1439869294.000008",
|
"ts": "1439869294.000008",
|
||||||
# A reply to thread 2
|
# A reply to thread 2
|
||||||
|
"parent_user_id": "U061A5N1G",
|
||||||
"thread_ts": "1439868294.000008",
|
"thread_ts": "1439868294.000008",
|
||||||
"channel_name": "random",
|
"channel_name": "random",
|
||||||
},
|
},
|
||||||
|
@ -1172,6 +1176,7 @@ class SlackImporter(ZulipTestCase):
|
||||||
"user": "U061A1R2R",
|
"user": "U061A1R2R",
|
||||||
"ts": "1439869494.000008",
|
"ts": "1439869494.000008",
|
||||||
# A reply to thread 2
|
# A reply to thread 2
|
||||||
|
"parent_user_id": "U061A5N1G",
|
||||||
"thread_ts": "1439868294.000008",
|
"thread_ts": "1439868294.000008",
|
||||||
"channel_name": "random",
|
"channel_name": "random",
|
||||||
},
|
},
|
||||||
|
@ -1180,6 +1185,7 @@ class SlackImporter(ZulipTestCase):
|
||||||
"user": "U061A5N1G",
|
"user": "U061A5N1G",
|
||||||
"ts": "1439868295.000008",
|
"ts": "1439868295.000008",
|
||||||
# Start of thread 3!
|
# Start of thread 3!
|
||||||
|
"parent_user_id": "U061A5N1G",
|
||||||
"thread_ts": "1439868295.000008",
|
"thread_ts": "1439868295.000008",
|
||||||
"channel_name": "random",
|
"channel_name": "random",
|
||||||
},
|
},
|
||||||
|
@ -1189,6 +1195,7 @@ class SlackImporter(ZulipTestCase):
|
||||||
"ts": "1439869295.000008",
|
"ts": "1439869295.000008",
|
||||||
"subtype": "thread_broadcast",
|
"subtype": "thread_broadcast",
|
||||||
# A broadcasted thread reply in thread 3!
|
# A broadcasted thread reply in thread 3!
|
||||||
|
"root": {"user": "U061A5N1G"},
|
||||||
"thread_ts": "1439868295.000008",
|
"thread_ts": "1439868295.000008",
|
||||||
"channel_name": "random",
|
"channel_name": "random",
|
||||||
},
|
},
|
||||||
|
@ -1198,9 +1205,28 @@ class SlackImporter(ZulipTestCase):
|
||||||
"ts": "1439869395.000008",
|
"ts": "1439869395.000008",
|
||||||
"subtype": "thread_broadcast",
|
"subtype": "thread_broadcast",
|
||||||
# Another broadcasted thread reply in thread 3!
|
# Another broadcasted thread reply in thread 3!
|
||||||
|
"root": {"user": "U061A5N1G"},
|
||||||
"thread_ts": "1439868295.000008",
|
"thread_ts": "1439868295.000008",
|
||||||
"channel_name": "random",
|
"channel_name": "random",
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"text": "random",
|
||||||
|
"user": "U061A1R2R",
|
||||||
|
"ts": "1439868294.000008",
|
||||||
|
# Start of thread 4!
|
||||||
|
"parent_user_id": "U061A1R2R",
|
||||||
|
"thread_ts": "1439868294.000008",
|
||||||
|
"channel_name": "random",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"text": "replying to the fourth thread :)",
|
||||||
|
"user": "U061A5N1G",
|
||||||
|
"ts": "1439869294.000008",
|
||||||
|
# A reply to thread 4
|
||||||
|
"parent_user_id": "U061A1R2R",
|
||||||
|
"thread_ts": "1439868294.000008",
|
||||||
|
"channel_name": "random",
|
||||||
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
slack_recipient_name_to_zulip_recipient_id = {
|
slack_recipient_name_to_zulip_recipient_id = {
|
||||||
|
@ -1246,7 +1272,7 @@ class SlackImporter(ZulipTestCase):
|
||||||
# functioning already tested in helper function
|
# functioning already tested in helper function
|
||||||
self.assertEqual(zerver_usermessage, [])
|
self.assertEqual(zerver_usermessage, [])
|
||||||
# subtype: channel_join is filtered
|
# subtype: channel_join is filtered
|
||||||
self.assert_length(zerver_message, 11)
|
self.assert_length(zerver_message, 13)
|
||||||
|
|
||||||
self.assertEqual(uploads, [])
|
self.assertEqual(uploads, [])
|
||||||
self.assertEqual(attachment, [])
|
self.assertEqual(attachment, [])
|
||||||
|
@ -1257,7 +1283,7 @@ class SlackImporter(ZulipTestCase):
|
||||||
|
|
||||||
# Original thread message in the main topic will have additional cross-linking
|
# Original thread message in the main topic will have additional cross-linking
|
||||||
# message appended to it
|
# message appended to it
|
||||||
thread1_topic_name = "2015-06-12 Slack thread 1"
|
thread1_topic_name = "2015-06-12 message body text"
|
||||||
original_thread1_message = (
|
original_thread1_message = (
|
||||||
f"message body text\n\n*1 reply in #**random>{thread1_topic_name}***"
|
f"message body text\n\n*1 reply in #**random>{thread1_topic_name}***"
|
||||||
)
|
)
|
||||||
|
@ -1279,7 +1305,7 @@ random
|
||||||
self.assertEqual(zerver_message[2][EXPORT_TOPIC_NAME], thread1_topic_name)
|
self.assertEqual(zerver_message[2][EXPORT_TOPIC_NAME], thread1_topic_name)
|
||||||
|
|
||||||
# A new thread with a different date from 2015-06-12, starts the counter from 1.
|
# A new thread with a different date from 2015-06-12, starts the counter from 1.
|
||||||
thread2_topic_name = "2015-08-18 Slack thread 1"
|
thread2_topic_name = "2015-08-18 random"
|
||||||
original_thread2_message = f"random\n\n*2 replies in #**random>{thread2_topic_name}***"
|
original_thread2_message = f"random\n\n*2 replies in #**random>{thread2_topic_name}***"
|
||||||
original_thread2_message_id = zerver_message[3]["id"]
|
original_thread2_message_id = zerver_message[3]["id"]
|
||||||
self.assertEqual(zerver_message[3]["content"], original_thread2_message)
|
self.assertEqual(zerver_message[3]["content"], original_thread2_message)
|
||||||
|
@ -1300,7 +1326,7 @@ replying to the second thread :)
|
||||||
self.assertEqual(zerver_message[5][EXPORT_TOPIC_NAME], thread2_topic_name)
|
self.assertEqual(zerver_message[5][EXPORT_TOPIC_NAME], thread2_topic_name)
|
||||||
|
|
||||||
# The third thread is to test how broadcasted thread replies are converted.
|
# The third thread is to test how broadcasted thread replies are converted.
|
||||||
thread3_topic_name = "2015-08-18 Slack thread 2"
|
thread3_topic_name = "2015-08-18 original message for the third thread"
|
||||||
original_thread3_message = f"original message for the third thread\n\n*2 replies in #**random>{thread3_topic_name}***"
|
original_thread3_message = f"original message for the third thread\n\n*2 replies in #**random>{thread3_topic_name}***"
|
||||||
original_thread3_message_id = zerver_message[6]["id"]
|
original_thread3_message_id = zerver_message[6]["id"]
|
||||||
self.assertEqual(zerver_message[6]["content"], original_thread3_message)
|
self.assertEqual(zerver_message[6]["content"], original_thread3_message)
|
||||||
|
@ -1318,7 +1344,7 @@ original message for the third thread
|
||||||
self.assertEqual(zerver_message[7]["content"], thread3_reply_1)
|
self.assertEqual(zerver_message[7]["content"], thread3_reply_1)
|
||||||
self.assertEqual(zerver_message[7][EXPORT_TOPIC_NAME], thread3_topic_name)
|
self.assertEqual(zerver_message[7][EXPORT_TOPIC_NAME], thread3_topic_name)
|
||||||
thread3_broadcasted_reply_1 = f"""
|
thread3_broadcasted_reply_1 = f"""
|
||||||
*replied to a Slack thread: [{thread3_topic_name}](http://test-realm.testserver/#narrow/channel/2-random/topic/2015-08-18.20Slack.20thread.202/near/{thread3_reply_1_id})*
|
*replied to a Slack thread: [{thread3_topic_name}](http://test-realm.testserver/#narrow/channel/2-random/topic/2015-08-18.20original.20message.20for.20the.20third.20thread/near/{thread3_reply_1_id})*
|
||||||
|
|
||||||
{thread3_reply_1_content}
|
{thread3_reply_1_content}
|
||||||
"""
|
"""
|
||||||
|
@ -1331,7 +1357,7 @@ original message for the third thread
|
||||||
self.assertEqual(zerver_message[9]["content"], thread3_reply_2)
|
self.assertEqual(zerver_message[9]["content"], thread3_reply_2)
|
||||||
self.assertEqual(zerver_message[9][EXPORT_TOPIC_NAME], thread3_topic_name)
|
self.assertEqual(zerver_message[9][EXPORT_TOPIC_NAME], thread3_topic_name)
|
||||||
thread3_broadcasted_reply_2 = f"""
|
thread3_broadcasted_reply_2 = f"""
|
||||||
*replied to a Slack thread: [{thread3_topic_name}](http://test-realm.testserver/#narrow/channel/2-random/topic/2015-08-18.20Slack.20thread.202/near/{thread3_reply_2_id})*
|
*replied to a Slack thread: [{thread3_topic_name}](http://test-realm.testserver/#narrow/channel/2-random/topic/2015-08-18.20original.20message.20for.20the.20third.20thread/near/{thread3_reply_2_id})*
|
||||||
|
|
||||||
{thread3_reply_2}
|
{thread3_reply_2}
|
||||||
"""
|
"""
|
||||||
|
@ -1339,6 +1365,14 @@ original message for the third thread
|
||||||
self.assertEqual(zerver_message[10][EXPORT_TOPIC_NAME], main_import_topic)
|
self.assertEqual(zerver_message[10][EXPORT_TOPIC_NAME], main_import_topic)
|
||||||
self.assertIn(thread3_reply_2, zerver_message[10]["content"])
|
self.assertIn(thread3_reply_2, zerver_message[10]["content"])
|
||||||
|
|
||||||
|
# The fourth thread is to test how colliding thread topics are handled.
|
||||||
|
# Its topic will collide with thread two topic.
|
||||||
|
thread4_topic_name = "2015-08-18 random (2)"
|
||||||
|
original_thread4_message = f"random\n\n*1 reply in #**random>{thread4_topic_name}***"
|
||||||
|
self.assertEqual(zerver_message[11]["content"], original_thread4_message)
|
||||||
|
self.assertEqual(zerver_message[11][EXPORT_TOPIC_NAME], main_import_topic)
|
||||||
|
self.assertEqual(zerver_message[12][EXPORT_TOPIC_NAME], thread4_topic_name)
|
||||||
|
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
zerver_message[1]["recipient"], slack_recipient_name_to_zulip_recipient_id["random"]
|
zerver_message[1]["recipient"], slack_recipient_name_to_zulip_recipient_id["random"]
|
||||||
)
|
)
|
||||||
|
|
Loading…
Reference in New Issue