mirror of https://github.com/zulip/zulip.git
rocketchat: Remove unnecessary SHA-1 hashing of direct message groups.
Signed-off-by: Anders Kaseorg <anders@zulip.com>
This commit is contained in:
parent
541699a1c6
commit
722842a0aa
|
@ -1,4 +1,3 @@
|
||||||
import hashlib
|
|
||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
import random
|
import random
|
||||||
|
@ -886,23 +885,6 @@ def map_receiver_id_to_recipient_id(
|
||||||
user_id_to_recipient_id[recipient["type_id"]] = recipient["id"]
|
user_id_to_recipient_id[recipient["type_id"]] = recipient["id"]
|
||||||
|
|
||||||
|
|
||||||
# This is inspired by get_direct_message_group_hash
|
|
||||||
# from zerver/models/recipients.py. It expects strings
|
|
||||||
# identifying Rocket.Chat users, like `LdBZ7kPxtKESyHPEe`,
|
|
||||||
# not integer IDs.
|
|
||||||
#
|
|
||||||
# Its purpose is to be a stable map usable for deduplication/merging
|
|
||||||
# of Rocket.Chat threads involving the same set of people. Thus, its
|
|
||||||
# only important property is that if two sets of users S and T are
|
|
||||||
# equal and thus will have the same actual direct message group hash
|
|
||||||
# once imported, that get_string_direct_message_group_hash(S) =
|
|
||||||
# get_string_direct_message_group_hash(T).
|
|
||||||
def get_string_direct_message_group_hash(id_list: list[str]) -> str:
|
|
||||||
id_list = sorted(set(id_list))
|
|
||||||
hash_key = ",".join(str(x) for x in id_list)
|
|
||||||
return hashlib.sha1(hash_key.encode()).hexdigest()
|
|
||||||
|
|
||||||
|
|
||||||
def categorize_channels_and_map_with_id(
|
def categorize_channels_and_map_with_id(
|
||||||
channel_data: list[dict[str, Any]],
|
channel_data: list[dict[str, Any]],
|
||||||
room_id_to_room_map: dict[str, dict[str, Any]],
|
room_id_to_room_map: dict[str, dict[str, Any]],
|
||||||
|
@ -912,18 +894,14 @@ def categorize_channels_and_map_with_id(
|
||||||
huddle_id_to_huddle_map: dict[str, dict[str, Any]],
|
huddle_id_to_huddle_map: dict[str, dict[str, Any]],
|
||||||
livechat_id_to_livechat_map: dict[str, dict[str, Any]],
|
livechat_id_to_livechat_map: dict[str, dict[str, Any]],
|
||||||
) -> None:
|
) -> None:
|
||||||
direct_message_group_hashed_channels: dict[str, Any] = {}
|
direct_message_group_hashed_channels: dict[frozenset[str], Any] = {}
|
||||||
for channel in channel_data:
|
for channel in channel_data:
|
||||||
if channel.get("prid"):
|
if channel.get("prid"):
|
||||||
dsc_id_to_dsc_map[channel["_id"]] = channel
|
dsc_id_to_dsc_map[channel["_id"]] = channel
|
||||||
elif channel["t"] == "d":
|
elif channel["t"] == "d":
|
||||||
if len(channel["uids"]) > 2:
|
if len(channel["uids"]) > 2:
|
||||||
direct_message_group_hash = get_string_direct_message_group_hash(channel["uids"])
|
direct_message_group_members = frozenset(channel["uids"])
|
||||||
logging.info(
|
logging.info("Huddle channel found. UIDs: %r", channel["uids"])
|
||||||
"Huddle channel found. UIDs: %s -> hash %s",
|
|
||||||
channel["uids"],
|
|
||||||
direct_message_group_hash,
|
|
||||||
)
|
|
||||||
|
|
||||||
if channel["msgs"] == 0: # nocoverage
|
if channel["msgs"] == 0: # nocoverage
|
||||||
# Rocket.Chat exports in the wild sometimes
|
# Rocket.Chat exports in the wild sometimes
|
||||||
|
@ -935,15 +913,15 @@ def categorize_channels_and_map_with_id(
|
||||||
# value in Zulip's data model.
|
# value in Zulip's data model.
|
||||||
logging.debug("Skipping direct message group with 0 messages: %s", channel)
|
logging.debug("Skipping direct message group with 0 messages: %s", channel)
|
||||||
elif (
|
elif (
|
||||||
direct_message_group_hash in direct_message_group_hashed_channels
|
direct_message_group_members in direct_message_group_hashed_channels
|
||||||
): # nocoverage
|
): # nocoverage
|
||||||
logging.info(
|
logging.info(
|
||||||
"Mapping direct message group hash %s to existing channel: %s",
|
"Mapping direct message group %r to existing channel: %s",
|
||||||
direct_message_group_hash,
|
direct_message_group_members,
|
||||||
direct_message_group_hashed_channels[direct_message_group_hash],
|
direct_message_group_hashed_channels[direct_message_group_members],
|
||||||
)
|
)
|
||||||
huddle_id_to_huddle_map[channel["_id"]] = direct_message_group_hashed_channels[
|
huddle_id_to_huddle_map[channel["_id"]] = direct_message_group_hashed_channels[
|
||||||
direct_message_group_hash
|
direct_message_group_members
|
||||||
]
|
]
|
||||||
|
|
||||||
# Ideally, we'd merge the duplicate direct message
|
# Ideally, we'd merge the duplicate direct message
|
||||||
|
@ -961,7 +939,7 @@ def categorize_channels_and_map_with_id(
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
huddle_id_to_huddle_map[channel["_id"]] = channel
|
huddle_id_to_huddle_map[channel["_id"]] = channel
|
||||||
direct_message_group_hashed_channels[direct_message_group_hash] = channel
|
direct_message_group_hashed_channels[direct_message_group_members] = channel
|
||||||
else:
|
else:
|
||||||
direct_id_to_direct_map[channel["_id"]] = channel
|
direct_id_to_direct_map[channel["_id"]] = channel
|
||||||
elif channel["t"] == "l":
|
elif channel["t"] == "l":
|
||||||
|
|
|
@ -890,7 +890,7 @@ class RocketChatImporter(ZulipTestCase):
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
info_log.output,
|
info_log.output,
|
||||||
[
|
[
|
||||||
"INFO:root:Huddle channel found. UIDs: ['LdBZ7kPxtKESyHPEe', 'M2sXGqoQRJQwQoXY2', 'os6N2Xg2JkNMCSW9Z'] -> hash 752a5854d2b6eec337fe81f0066a5dd72c3f0639",
|
"INFO:root:Huddle channel found. UIDs: ['LdBZ7kPxtKESyHPEe', 'M2sXGqoQRJQwQoXY2', 'os6N2Xg2JkNMCSW9Z']",
|
||||||
"INFO:root:Starting to process custom emoji",
|
"INFO:root:Starting to process custom emoji",
|
||||||
"INFO:root:Done processing emoji",
|
"INFO:root:Done processing emoji",
|
||||||
"INFO:root:skipping direct messages discussion mention: Discussion with Hermione",
|
"INFO:root:skipping direct messages discussion mention: Discussion with Hermione",
|
||||||
|
|
Loading…
Reference in New Issue