2021-06-23 14:01:26 +02:00
|
|
|
import logging
|
|
|
|
import os
|
2021-08-04 17:44:30 +02:00
|
|
|
import random
|
|
|
|
import secrets
|
2022-06-29 15:33:42 +02:00
|
|
|
import uuid
|
2024-07-12 02:30:17 +02:00
|
|
|
from typing import Any
|
2021-06-23 14:01:26 +02:00
|
|
|
|
|
|
|
import bson
|
|
|
|
from django.conf import settings
|
|
|
|
from django.forms.models import model_to_dict
|
|
|
|
|
|
|
|
from zerver.data_import.import_util import (
|
|
|
|
SubscriberHandler,
|
|
|
|
ZerverFieldsT,
|
2021-08-04 17:44:30 +02:00
|
|
|
build_attachment,
|
2024-07-04 14:05:48 +02:00
|
|
|
build_direct_message_group,
|
|
|
|
build_direct_message_group_subscriptions,
|
2021-06-23 14:01:26 +02:00
|
|
|
build_message,
|
|
|
|
build_personal_subscriptions,
|
|
|
|
build_realm,
|
2021-08-02 19:11:32 +02:00
|
|
|
build_realm_emoji,
|
2021-06-23 14:01:26 +02:00
|
|
|
build_recipients,
|
|
|
|
build_stream,
|
|
|
|
build_stream_subscriptions,
|
|
|
|
build_user_profile,
|
|
|
|
build_zerver_realm,
|
|
|
|
create_converted_data_files,
|
|
|
|
make_subscriber_map,
|
|
|
|
make_user_messages,
|
|
|
|
)
|
|
|
|
from zerver.data_import.sequencer import NEXT_ID, IdMapper
|
|
|
|
from zerver.data_import.user_handler import UserHandler
|
|
|
|
from zerver.lib.emoji import name_to_codepoint
|
2021-08-04 17:44:30 +02:00
|
|
|
from zerver.lib.markdown import IMAGE_EXTENSIONS
|
2024-06-20 18:12:58 +02:00
|
|
|
from zerver.lib.upload import sanitize_name
|
2023-07-19 00:44:51 +02:00
|
|
|
from zerver.lib.utils import process_list_in_batches
|
2021-08-02 19:11:32 +02:00
|
|
|
from zerver.models import Reaction, RealmEmoji, Recipient, UserProfile
|
2021-06-23 14:01:26 +02:00
|
|
|
|
2024-10-17 20:09:44 +02:00
|
|
|
bson_codec_options = bson.DEFAULT_CODEC_OPTIONS.with_options(tz_aware=True)
|
|
|
|
|
2021-06-23 14:01:26 +02:00
|
|
|
|
|
|
|
def make_realm(
|
2024-07-12 02:30:17 +02:00
|
|
|
realm_id: int, realm_subdomain: str, domain_name: str, rc_instance: dict[str, Any]
|
2021-06-23 14:01:26 +02:00
|
|
|
) -> ZerverFieldsT:
|
|
|
|
created_at = float(rc_instance["_createdAt"].timestamp())
|
|
|
|
|
|
|
|
zerver_realm = build_zerver_realm(realm_id, realm_subdomain, created_at, "Rocket.Chat")
|
|
|
|
realm = build_realm(zerver_realm, realm_id, domain_name)
|
|
|
|
|
|
|
|
# We may override these later.
|
|
|
|
realm["zerver_defaultstream"] = []
|
|
|
|
|
|
|
|
return realm
|
|
|
|
|
|
|
|
|
|
|
|
def process_users(
|
2024-07-12 02:30:17 +02:00
|
|
|
user_id_to_user_map: dict[str, dict[str, Any]],
|
2021-06-23 14:01:26 +02:00
|
|
|
realm_id: int,
|
|
|
|
domain_name: str,
|
|
|
|
user_handler: UserHandler,
|
2024-07-17 22:45:14 +02:00
|
|
|
user_id_mapper: IdMapper[str],
|
2021-06-23 14:01:26 +02:00
|
|
|
) -> None:
|
2024-07-12 02:30:17 +02:00
|
|
|
realm_owners: list[int] = []
|
|
|
|
bots: list[int] = []
|
2021-06-23 14:01:26 +02:00
|
|
|
|
|
|
|
for rc_user_id in user_id_to_user_map:
|
|
|
|
user_dict = user_id_to_user_map[rc_user_id]
|
|
|
|
is_mirror_dummy = False
|
|
|
|
is_bot = False
|
|
|
|
is_active = True
|
|
|
|
|
|
|
|
# Rocket.Chat has three user types:
|
|
|
|
# "user": This is a regular user of the system.
|
|
|
|
# "bot": A special user types for bots.
|
|
|
|
# "unknown": This usually represents a livechat guest.
|
|
|
|
if user_dict["type"] != "user":
|
|
|
|
is_active = False
|
|
|
|
|
|
|
|
if user_dict["type"] == "bot":
|
|
|
|
is_bot = True
|
|
|
|
else:
|
|
|
|
is_mirror_dummy = True
|
|
|
|
|
2023-05-16 22:37:27 +02:00
|
|
|
if user_dict.get("emails") is None:
|
|
|
|
user_dict["emails"] = [
|
|
|
|
{
|
|
|
|
"address": "{}-{}@{}".format(
|
|
|
|
user_dict["username"], user_dict["type"], domain_name
|
|
|
|
)
|
|
|
|
}
|
|
|
|
]
|
2021-06-23 14:01:26 +02:00
|
|
|
|
|
|
|
# TODO: Change this to use actual exported avatar
|
|
|
|
avatar_source = "G"
|
|
|
|
full_name = user_dict["name"]
|
|
|
|
id = user_id_mapper.get(rc_user_id)
|
|
|
|
delivery_email = user_dict["emails"][0]["address"]
|
|
|
|
email = user_dict["emails"][0]["address"]
|
|
|
|
short_name = user_dict["username"]
|
|
|
|
date_joined = float(user_dict["createdAt"].timestamp())
|
|
|
|
timezone = "UTC"
|
|
|
|
|
|
|
|
role = UserProfile.ROLE_MEMBER
|
|
|
|
if "admin" in user_dict["roles"]:
|
|
|
|
role = UserProfile.ROLE_REALM_OWNER
|
|
|
|
realm_owners.append(id)
|
|
|
|
elif "guest" in user_dict["roles"]:
|
|
|
|
role = UserProfile.ROLE_GUEST
|
2023-05-16 22:41:57 +02:00
|
|
|
elif "bot" in user_dict["roles"]:
|
|
|
|
is_bot = True
|
2021-06-23 14:01:26 +02:00
|
|
|
|
|
|
|
if is_bot:
|
|
|
|
bots.append(id)
|
|
|
|
|
|
|
|
user = build_user_profile(
|
|
|
|
avatar_source=avatar_source,
|
|
|
|
date_joined=date_joined,
|
|
|
|
delivery_email=delivery_email,
|
|
|
|
email=email,
|
|
|
|
full_name=full_name,
|
|
|
|
id=id,
|
|
|
|
is_active=is_active,
|
|
|
|
role=role,
|
|
|
|
is_mirror_dummy=is_mirror_dummy,
|
|
|
|
realm_id=realm_id,
|
|
|
|
short_name=short_name,
|
|
|
|
timezone=timezone,
|
|
|
|
is_bot=is_bot,
|
|
|
|
bot_type=1 if is_bot else None,
|
|
|
|
)
|
|
|
|
user_handler.add_user(user)
|
|
|
|
|
2024-09-30 10:26:03 +02:00
|
|
|
user_handler.validate_user_emails()
|
2021-06-23 14:01:26 +02:00
|
|
|
# Set the first realm_owner as the owner of
|
|
|
|
# all the bots.
|
|
|
|
if realm_owners:
|
|
|
|
for bot_id in bots:
|
|
|
|
bot_user = user_handler.get_user(user_id=bot_id)
|
|
|
|
bot_user["bot_owner"] = realm_owners[0]
|
|
|
|
|
|
|
|
|
2022-06-29 15:40:43 +02:00
|
|
|
def truncate_name(name: str, name_id: int, max_length: int = 60) -> str:
|
|
|
|
if len(name) > max_length:
|
|
|
|
name_id_suffix = f" [{name_id}]"
|
|
|
|
name = name[0 : max_length - len(name_id_suffix)] + name_id_suffix
|
|
|
|
return name
|
|
|
|
|
|
|
|
|
2024-07-12 02:30:17 +02:00
|
|
|
def get_stream_name(rc_channel: dict[str, Any]) -> str:
|
2021-08-12 14:20:27 +02:00
|
|
|
if rc_channel.get("teamMain"):
|
2022-06-29 15:40:43 +02:00
|
|
|
stream_name = f'[TEAM] {rc_channel["name"]}'
|
2021-08-12 14:20:27 +02:00
|
|
|
else:
|
2022-06-29 15:40:43 +02:00
|
|
|
stream_name = rc_channel["name"]
|
|
|
|
|
|
|
|
stream_name = truncate_name(stream_name, rc_channel["_id"])
|
|
|
|
|
|
|
|
return stream_name
|
2021-08-12 14:20:27 +02:00
|
|
|
|
|
|
|
|
2021-06-23 14:01:26 +02:00
|
|
|
def convert_channel_data(
|
2024-07-12 02:30:17 +02:00
|
|
|
room_id_to_room_map: dict[str, dict[str, Any]],
|
|
|
|
team_id_to_team_map: dict[str, dict[str, Any]],
|
2024-07-17 22:45:14 +02:00
|
|
|
stream_id_mapper: IdMapper[str],
|
2021-06-23 14:01:26 +02:00
|
|
|
realm_id: int,
|
2024-07-12 02:30:17 +02:00
|
|
|
) -> list[ZerverFieldsT]:
|
2021-06-23 14:01:26 +02:00
|
|
|
streams = []
|
|
|
|
|
|
|
|
for rc_room_id in room_id_to_room_map:
|
|
|
|
channel_dict = room_id_to_room_map[rc_room_id]
|
|
|
|
|
|
|
|
date_created = float(channel_dict["ts"].timestamp())
|
|
|
|
stream_id = stream_id_mapper.get(rc_room_id)
|
|
|
|
invite_only = channel_dict["t"] == "p"
|
|
|
|
|
2021-08-12 14:20:27 +02:00
|
|
|
stream_name = get_stream_name(channel_dict)
|
|
|
|
|
2021-06-23 14:01:26 +02:00
|
|
|
stream_desc = channel_dict.get("description", "")
|
2021-08-12 14:20:27 +02:00
|
|
|
if channel_dict.get("teamId") and not channel_dict.get("teamMain"):
|
|
|
|
stream_desc = "[Team {} channel]. {}".format(
|
|
|
|
team_id_to_team_map[channel_dict["teamId"]]["name"], stream_desc
|
|
|
|
)
|
2021-06-23 14:01:26 +02:00
|
|
|
|
|
|
|
# If the channel is read-only, then only admins and moderators
|
|
|
|
# should be allowed to post in the converted Zulip stream.
|
2024-05-01 20:45:04 +02:00
|
|
|
# For more details: https://zulip.com/help/channel-posting-policy
|
2021-06-23 14:01:26 +02:00
|
|
|
#
|
2023-12-15 03:57:04 +01:00
|
|
|
# See `Stream` model in `zerver/models/streams.py` to know about what each
|
2021-06-23 14:01:26 +02:00
|
|
|
# number represent.
|
|
|
|
stream_post_policy = 4 if channel_dict.get("ro", False) else 1
|
|
|
|
|
|
|
|
stream = build_stream(
|
|
|
|
date_created=date_created,
|
|
|
|
realm_id=realm_id,
|
|
|
|
name=stream_name,
|
|
|
|
description=stream_desc,
|
|
|
|
stream_id=stream_id,
|
|
|
|
deactivated=False,
|
|
|
|
invite_only=invite_only,
|
|
|
|
stream_post_policy=stream_post_policy,
|
|
|
|
)
|
|
|
|
streams.append(stream)
|
|
|
|
|
|
|
|
return streams
|
|
|
|
|
|
|
|
|
2021-07-26 23:04:11 +02:00
|
|
|
def convert_stream_subscription_data(
|
2024-07-12 02:30:17 +02:00
|
|
|
user_id_to_user_map: dict[str, dict[str, Any]],
|
|
|
|
dsc_id_to_dsc_map: dict[str, dict[str, Any]],
|
|
|
|
zerver_stream: list[ZerverFieldsT],
|
2024-07-17 22:45:14 +02:00
|
|
|
stream_id_mapper: IdMapper[str],
|
|
|
|
user_id_mapper: IdMapper[str],
|
2021-06-23 14:01:26 +02:00
|
|
|
subscriber_handler: SubscriberHandler,
|
|
|
|
) -> None:
|
2024-07-12 02:30:17 +02:00
|
|
|
stream_members_map: dict[int, set[int]] = {}
|
2021-06-23 14:01:26 +02:00
|
|
|
|
|
|
|
for rc_user_id in user_id_to_user_map:
|
|
|
|
user_dict = user_id_to_user_map[rc_user_id]
|
|
|
|
|
|
|
|
if not user_dict.get("__rooms"):
|
|
|
|
continue
|
|
|
|
|
|
|
|
for channel in user_dict["__rooms"]:
|
|
|
|
if channel in dsc_id_to_dsc_map:
|
|
|
|
# Ignore discussion rooms as these are not
|
|
|
|
# imported as streams, but topics.
|
|
|
|
continue
|
|
|
|
stream_id = stream_id_mapper.get(channel)
|
|
|
|
if stream_id not in stream_members_map:
|
|
|
|
stream_members_map[stream_id] = set()
|
|
|
|
stream_members_map[stream_id].add(user_id_mapper.get(rc_user_id))
|
|
|
|
|
|
|
|
for stream in zerver_stream:
|
|
|
|
if stream["id"] in stream_members_map:
|
|
|
|
users = stream_members_map[stream["id"]]
|
|
|
|
else:
|
|
|
|
users = set()
|
|
|
|
# Set the stream without any subscribers
|
|
|
|
# as deactivated.
|
|
|
|
stream["deactivated"] = True
|
|
|
|
subscriber_handler.set_info(users=users, stream_id=stream["id"])
|
|
|
|
|
|
|
|
|
2024-07-04 14:05:48 +02:00
|
|
|
def convert_direct_message_group_data(
|
2024-07-08 16:46:01 +02:00
|
|
|
direct_message_group_id_to_direct_message_group_map: dict[str, dict[str, Any]],
|
|
|
|
direct_message_group_id_mapper: IdMapper[str],
|
2024-07-17 22:45:14 +02:00
|
|
|
user_id_mapper: IdMapper[str],
|
2021-07-26 23:04:11 +02:00
|
|
|
subscriber_handler: SubscriberHandler,
|
2024-07-12 02:30:17 +02:00
|
|
|
) -> list[ZerverFieldsT]:
|
|
|
|
zerver_direct_message_group: list[ZerverFieldsT] = []
|
2021-07-26 23:04:11 +02:00
|
|
|
|
2024-07-08 16:46:01 +02:00
|
|
|
for rc_direct_message_group_id in direct_message_group_id_to_direct_message_group_map:
|
|
|
|
direct_message_group_dict = direct_message_group_id_to_direct_message_group_map[
|
|
|
|
rc_direct_message_group_id
|
|
|
|
]
|
2024-08-19 08:23:37 +02:00
|
|
|
|
|
|
|
direct_message_group_id = direct_message_group_id_mapper.get(rc_direct_message_group_id)
|
|
|
|
direct_message_group = build_direct_message_group(
|
|
|
|
direct_message_group_id, len(direct_message_group_dict["uids"])
|
|
|
|
)
|
|
|
|
zerver_direct_message_group.append(direct_message_group)
|
|
|
|
|
2024-07-14 21:17:13 +02:00
|
|
|
direct_message_group_user_ids = {
|
|
|
|
user_id_mapper.get(rc_user_id) for rc_user_id in direct_message_group_dict["uids"]
|
|
|
|
}
|
2021-07-26 23:04:11 +02:00
|
|
|
subscriber_handler.set_info(
|
2024-07-04 14:05:48 +02:00
|
|
|
users=direct_message_group_user_ids,
|
|
|
|
direct_message_group_id=direct_message_group_id,
|
2021-07-26 23:04:11 +02:00
|
|
|
)
|
|
|
|
|
2024-07-04 14:05:48 +02:00
|
|
|
return zerver_direct_message_group
|
2021-07-26 23:04:11 +02:00
|
|
|
|
|
|
|
|
2021-08-02 19:11:32 +02:00
|
|
|
def build_custom_emoji(
|
2024-07-12 02:30:17 +02:00
|
|
|
realm_id: int, custom_emoji_data: dict[str, list[dict[str, Any]]], output_dir: str
|
|
|
|
) -> list[ZerverFieldsT]:
|
2021-08-02 19:11:32 +02:00
|
|
|
logging.info("Starting to process custom emoji")
|
|
|
|
|
|
|
|
emoji_folder = os.path.join(output_dir, "emoji")
|
|
|
|
os.makedirs(emoji_folder, exist_ok=True)
|
|
|
|
|
2024-07-12 02:30:17 +02:00
|
|
|
zerver_realmemoji: list[ZerverFieldsT] = []
|
|
|
|
emoji_records: list[ZerverFieldsT] = []
|
2021-08-02 19:11:32 +02:00
|
|
|
|
|
|
|
# Map emoji file_id to emoji file data
|
|
|
|
emoji_file_data = {}
|
|
|
|
for emoji_file in custom_emoji_data["file"]:
|
|
|
|
emoji_file_data[emoji_file["_id"]] = {"filename": emoji_file["filename"], "chunks": []}
|
|
|
|
for emoji_chunk in custom_emoji_data["chunk"]:
|
|
|
|
emoji_file_data[emoji_chunk["files_id"]]["chunks"].append(emoji_chunk["data"])
|
|
|
|
|
|
|
|
# Build custom emoji
|
|
|
|
for rc_emoji in custom_emoji_data["emoji"]:
|
|
|
|
# Subject to change with changes in database
|
2023-09-12 23:19:57 +02:00
|
|
|
emoji_file_id = f'{rc_emoji["name"]}.{rc_emoji["extension"]}'
|
2021-08-02 19:11:32 +02:00
|
|
|
|
|
|
|
emoji_file_info = emoji_file_data[emoji_file_id]
|
|
|
|
|
|
|
|
emoji_filename = emoji_file_info["filename"]
|
|
|
|
emoji_data = b"".join(emoji_file_info["chunks"])
|
|
|
|
|
|
|
|
target_sub_path = RealmEmoji.PATH_ID_TEMPLATE.format(
|
|
|
|
realm_id=realm_id,
|
|
|
|
emoji_file_name=emoji_filename,
|
|
|
|
)
|
|
|
|
target_path = os.path.join(emoji_folder, target_sub_path)
|
|
|
|
|
|
|
|
os.makedirs(os.path.dirname(target_path), exist_ok=True)
|
|
|
|
with open(target_path, "wb") as e_file:
|
|
|
|
e_file.write(emoji_data)
|
|
|
|
|
|
|
|
emoji_aliases = [rc_emoji["name"]]
|
|
|
|
emoji_aliases.extend(rc_emoji["aliases"])
|
|
|
|
|
|
|
|
for alias in emoji_aliases:
|
|
|
|
emoji_record = dict(
|
|
|
|
path=target_path,
|
|
|
|
s3_path=target_path,
|
|
|
|
file_name=emoji_filename,
|
|
|
|
realm_id=realm_id,
|
|
|
|
name=alias,
|
|
|
|
)
|
|
|
|
emoji_records.append(emoji_record)
|
|
|
|
|
|
|
|
realmemoji = build_realm_emoji(
|
|
|
|
realm_id=realm_id,
|
|
|
|
name=alias,
|
|
|
|
id=NEXT_ID("realmemoji"),
|
|
|
|
file_name=emoji_filename,
|
|
|
|
)
|
|
|
|
zerver_realmemoji.append(realmemoji)
|
|
|
|
|
|
|
|
create_converted_data_files(emoji_records, output_dir, "/emoji/records.json")
|
|
|
|
logging.info("Done processing emoji")
|
|
|
|
|
|
|
|
return zerver_realmemoji
|
|
|
|
|
|
|
|
|
2021-06-23 14:01:26 +02:00
|
|
|
def build_reactions(
|
2024-07-12 02:30:17 +02:00
|
|
|
total_reactions: list[ZerverFieldsT],
|
|
|
|
reactions: list[dict[str, Any]],
|
2021-06-23 14:01:26 +02:00
|
|
|
message_id: int,
|
2024-07-12 02:30:17 +02:00
|
|
|
zerver_realmemoji: list[ZerverFieldsT],
|
2021-06-23 14:01:26 +02:00
|
|
|
) -> None:
|
2021-08-02 19:11:32 +02:00
|
|
|
realmemoji = {}
|
|
|
|
for emoji in zerver_realmemoji:
|
|
|
|
realmemoji[emoji["name"]] = emoji["id"]
|
|
|
|
|
2021-06-23 14:01:26 +02:00
|
|
|
# For the Unicode emoji codes, we use equivalent of
|
|
|
|
# function 'emoji_name_to_emoji_code' in 'zerver/lib/emoji' here
|
2022-05-29 21:52:25 +02:00
|
|
|
for reaction_dict in reactions:
|
|
|
|
emoji_name = reaction_dict["name"]
|
|
|
|
user_id = reaction_dict["user_id"]
|
2021-08-02 19:11:32 +02:00
|
|
|
# Check in realm emoji
|
|
|
|
if emoji_name in realmemoji:
|
|
|
|
emoji_code = realmemoji[emoji_name]
|
|
|
|
reaction_type = Reaction.REALM_EMOJI
|
2021-06-23 14:01:26 +02:00
|
|
|
# Check in Unicode emoji
|
2021-08-02 19:11:32 +02:00
|
|
|
elif emoji_name in name_to_codepoint:
|
2021-06-23 14:01:26 +02:00
|
|
|
emoji_code = name_to_codepoint[emoji_name]
|
|
|
|
reaction_type = Reaction.UNICODE_EMOJI
|
|
|
|
else: # nocoverage
|
|
|
|
continue
|
|
|
|
|
|
|
|
reaction_id = NEXT_ID("reaction")
|
|
|
|
reaction = Reaction(
|
|
|
|
id=reaction_id,
|
|
|
|
emoji_code=emoji_code,
|
|
|
|
emoji_name=emoji_name,
|
|
|
|
reaction_type=reaction_type,
|
|
|
|
)
|
|
|
|
|
|
|
|
reaction_dict = model_to_dict(reaction, exclude=["message", "user_profile"])
|
|
|
|
reaction_dict["message"] = message_id
|
|
|
|
reaction_dict["user_profile"] = user_id
|
|
|
|
total_reactions.append(reaction_dict)
|
|
|
|
|
|
|
|
|
2021-08-04 17:44:30 +02:00
|
|
|
def process_message_attachment(
|
2024-07-12 02:30:17 +02:00
|
|
|
upload: dict[str, Any],
|
2021-08-04 17:44:30 +02:00
|
|
|
realm_id: int,
|
|
|
|
message_id: int,
|
|
|
|
user_id: int,
|
|
|
|
user_handler: UserHandler,
|
2024-07-12 02:30:17 +02:00
|
|
|
zerver_attachment: list[ZerverFieldsT],
|
|
|
|
uploads_list: list[ZerverFieldsT],
|
|
|
|
upload_id_to_upload_data_map: dict[str, dict[str, Any]],
|
2021-08-04 17:44:30 +02:00
|
|
|
output_dir: str,
|
2024-07-12 02:30:17 +02:00
|
|
|
) -> tuple[str, bool]:
|
2022-06-29 15:37:07 +02:00
|
|
|
if upload["_id"] not in upload_id_to_upload_data_map: # nocoverage
|
|
|
|
logging.info("Skipping unknown attachment of message_id: %s", message_id)
|
|
|
|
return "", False
|
|
|
|
|
2022-06-29 15:38:07 +02:00
|
|
|
if "type" not in upload: # nocoverage
|
|
|
|
logging.info("Skipping attachment without type of message_id: %s", message_id)
|
|
|
|
return "", False
|
|
|
|
|
2021-08-04 17:44:30 +02:00
|
|
|
upload_file_data = upload_id_to_upload_data_map[upload["_id"]]
|
|
|
|
file_name = upload["name"]
|
|
|
|
file_ext = f'.{upload["type"].split("/")[-1]}'
|
|
|
|
|
|
|
|
has_image = False
|
|
|
|
if file_ext.lower() in IMAGE_EXTENSIONS:
|
|
|
|
has_image = True
|
|
|
|
|
2022-06-29 15:33:42 +02:00
|
|
|
try:
|
|
|
|
sanitized_name = sanitize_name(file_name)
|
|
|
|
except AssertionError: # nocoverage
|
|
|
|
logging.info("Replacing invalid attachment name with random uuid: %s", file_name)
|
|
|
|
sanitized_name = uuid.uuid4().hex
|
|
|
|
|
2024-08-14 15:56:23 +02:00
|
|
|
if len(sanitized_name.encode("utf-8")) >= 255: # nocoverage
|
2022-06-29 15:33:42 +02:00
|
|
|
logging.info("Replacing too long attachment name with random uuid: %s", file_name)
|
|
|
|
sanitized_name = uuid.uuid4().hex
|
|
|
|
|
2021-08-04 17:44:30 +02:00
|
|
|
s3_path = "/".join(
|
|
|
|
[
|
|
|
|
str(realm_id),
|
|
|
|
format(random.randint(0, 255), "x"),
|
|
|
|
secrets.token_urlsafe(18),
|
2022-06-29 15:33:42 +02:00
|
|
|
sanitized_name,
|
2021-08-04 17:44:30 +02:00
|
|
|
]
|
|
|
|
)
|
|
|
|
|
|
|
|
# Build the attachment from chunks and save it to s3_path.
|
|
|
|
file_out_path = os.path.join(output_dir, "uploads", s3_path)
|
|
|
|
os.makedirs(os.path.dirname(file_out_path), exist_ok=True)
|
|
|
|
with open(file_out_path, "wb") as upload_file:
|
|
|
|
upload_file.write(b"".join(upload_file_data["chunk"]))
|
|
|
|
|
|
|
|
attachment_content = (
|
2021-11-03 20:15:51 +01:00
|
|
|
f'{upload_file_data.get("description", "")}\n\n[{file_name}](/user_uploads/{s3_path})'
|
2021-08-04 17:44:30 +02:00
|
|
|
)
|
|
|
|
|
|
|
|
fileinfo = {
|
|
|
|
"name": file_name,
|
|
|
|
"size": upload_file_data["size"],
|
|
|
|
"created": float(upload_file_data["_updatedAt"].timestamp()),
|
|
|
|
}
|
|
|
|
|
|
|
|
upload = dict(
|
|
|
|
path=s3_path,
|
|
|
|
realm_id=realm_id,
|
|
|
|
content_type=upload["type"],
|
|
|
|
user_profile_id=user_id,
|
|
|
|
last_modified=fileinfo["created"],
|
|
|
|
user_profile_email=user_handler.get_user(user_id=user_id)["email"],
|
|
|
|
s3_path=s3_path,
|
|
|
|
size=fileinfo["size"],
|
|
|
|
)
|
|
|
|
uploads_list.append(upload)
|
|
|
|
|
|
|
|
build_attachment(
|
|
|
|
realm_id=realm_id,
|
|
|
|
message_ids={message_id},
|
|
|
|
user_id=user_id,
|
|
|
|
fileinfo=fileinfo,
|
|
|
|
s3_path=s3_path,
|
|
|
|
zerver_attachment=zerver_attachment,
|
|
|
|
)
|
|
|
|
|
|
|
|
return attachment_content, has_image
|
|
|
|
|
|
|
|
|
2021-06-23 14:01:26 +02:00
|
|
|
def process_raw_message_batch(
|
|
|
|
realm_id: int,
|
2024-07-12 02:30:17 +02:00
|
|
|
raw_messages: list[dict[str, Any]],
|
|
|
|
subscriber_map: dict[int, set[int]],
|
2021-06-23 14:01:26 +02:00
|
|
|
user_handler: UserHandler,
|
|
|
|
is_pm_data: bool,
|
|
|
|
output_dir: str,
|
2024-07-12 02:30:17 +02:00
|
|
|
zerver_realmemoji: list[ZerverFieldsT],
|
|
|
|
total_reactions: list[ZerverFieldsT],
|
|
|
|
uploads_list: list[ZerverFieldsT],
|
|
|
|
zerver_attachment: list[ZerverFieldsT],
|
|
|
|
upload_id_to_upload_data_map: dict[str, dict[str, Any]],
|
2021-06-23 14:01:26 +02:00
|
|
|
) -> None:
|
2021-08-07 11:51:19 +02:00
|
|
|
def fix_mentions(
|
2024-07-12 02:30:17 +02:00
|
|
|
content: str, mention_user_ids: set[int], rc_channel_mention_data: list[dict[str, str]]
|
2021-08-07 11:51:19 +02:00
|
|
|
) -> str:
|
|
|
|
# Fix user mentions
|
2021-06-23 14:01:26 +02:00
|
|
|
for user_id in mention_user_ids:
|
|
|
|
user = user_handler.get_user(user_id=user_id)
|
|
|
|
rc_mention = "@{short_name}".format(**user)
|
|
|
|
zulip_mention = "@**{full_name}**".format(**user)
|
|
|
|
content = content.replace(rc_mention, zulip_mention)
|
|
|
|
|
|
|
|
content = content.replace("@all", "@**all**")
|
|
|
|
# We don't have an equivalent for Rocket.Chat's @here mention
|
|
|
|
# which mentions all users active in the channel.
|
|
|
|
content = content.replace("@here", "@**all**")
|
2021-08-07 11:51:19 +02:00
|
|
|
|
|
|
|
# Fix channel mentions
|
|
|
|
for mention_data in rc_channel_mention_data:
|
|
|
|
rc_mention = mention_data["rc_mention"]
|
|
|
|
zulip_mention = mention_data["zulip_mention"]
|
|
|
|
content = content.replace(rc_mention, zulip_mention)
|
|
|
|
|
2021-06-23 14:01:26 +02:00
|
|
|
return content
|
|
|
|
|
2024-07-12 02:30:17 +02:00
|
|
|
user_mention_map: dict[int, set[int]] = {}
|
|
|
|
wildcard_mention_map: dict[int, bool] = {}
|
|
|
|
zerver_message: list[ZerverFieldsT] = []
|
2021-06-23 14:01:26 +02:00
|
|
|
|
|
|
|
for raw_message in raw_messages:
|
|
|
|
message_id = NEXT_ID("message")
|
|
|
|
mention_user_ids = raw_message["mention_user_ids"]
|
2021-08-12 17:03:09 +02:00
|
|
|
user_mention_map[message_id] = mention_user_ids
|
|
|
|
wildcard_mention_map[message_id] = raw_message["wildcard_mention"]
|
2021-06-23 14:01:26 +02:00
|
|
|
|
|
|
|
content = fix_mentions(
|
|
|
|
content=raw_message["content"],
|
|
|
|
mention_user_ids=mention_user_ids,
|
2021-08-07 11:51:19 +02:00
|
|
|
rc_channel_mention_data=raw_message["rc_channel_mention_data"],
|
2021-06-23 14:01:26 +02:00
|
|
|
)
|
|
|
|
|
|
|
|
date_sent = raw_message["date_sent"]
|
|
|
|
sender_user_id = raw_message["sender_id"]
|
|
|
|
recipient_id = raw_message["recipient_id"]
|
|
|
|
|
|
|
|
rendered_content = None
|
|
|
|
|
2021-08-04 17:44:30 +02:00
|
|
|
has_attachment = False
|
|
|
|
has_image = False
|
|
|
|
has_link = raw_message["has_link"]
|
|
|
|
|
|
|
|
if "file" in raw_message:
|
|
|
|
has_attachment = True
|
|
|
|
has_link = True
|
|
|
|
|
|
|
|
attachment_content, has_image = process_message_attachment(
|
|
|
|
upload=raw_message["file"],
|
|
|
|
realm_id=realm_id,
|
|
|
|
message_id=message_id,
|
|
|
|
user_id=sender_user_id,
|
|
|
|
user_handler=user_handler,
|
|
|
|
uploads_list=uploads_list,
|
|
|
|
zerver_attachment=zerver_attachment,
|
|
|
|
upload_id_to_upload_data_map=upload_id_to_upload_data_map,
|
|
|
|
output_dir=output_dir,
|
|
|
|
)
|
|
|
|
|
|
|
|
content += attachment_content
|
|
|
|
|
2021-06-23 14:01:26 +02:00
|
|
|
topic_name = raw_message["topic_name"]
|
|
|
|
|
|
|
|
message = build_message(
|
|
|
|
content=content,
|
|
|
|
message_id=message_id,
|
|
|
|
date_sent=date_sent,
|
|
|
|
recipient_id=recipient_id,
|
2022-09-27 21:42:31 +02:00
|
|
|
realm_id=realm_id,
|
2021-06-23 14:01:26 +02:00
|
|
|
rendered_content=rendered_content,
|
|
|
|
topic_name=topic_name,
|
|
|
|
user_id=sender_user_id,
|
2021-08-04 17:44:30 +02:00
|
|
|
has_image=has_image,
|
|
|
|
has_link=has_link,
|
|
|
|
has_attachment=has_attachment,
|
2021-06-23 14:01:26 +02:00
|
|
|
)
|
|
|
|
zerver_message.append(message)
|
|
|
|
build_reactions(
|
|
|
|
total_reactions=total_reactions,
|
|
|
|
reactions=raw_message["reactions"],
|
|
|
|
message_id=message_id,
|
2021-08-02 19:11:32 +02:00
|
|
|
zerver_realmemoji=zerver_realmemoji,
|
2021-06-23 14:01:26 +02:00
|
|
|
)
|
|
|
|
|
|
|
|
zerver_usermessage = make_user_messages(
|
|
|
|
zerver_message=zerver_message,
|
|
|
|
subscriber_map=subscriber_map,
|
|
|
|
is_pm_data=is_pm_data,
|
2021-08-12 17:03:09 +02:00
|
|
|
mention_map=user_mention_map,
|
|
|
|
wildcard_mention_map=wildcard_mention_map,
|
2021-06-23 14:01:26 +02:00
|
|
|
)
|
|
|
|
|
|
|
|
message_json = dict(
|
|
|
|
zerver_message=zerver_message,
|
|
|
|
zerver_usermessage=zerver_usermessage,
|
|
|
|
)
|
|
|
|
|
|
|
|
dump_file_id = NEXT_ID("dump_file_id" + str(realm_id))
|
|
|
|
message_file = f"/messages-{dump_file_id:06}.json"
|
|
|
|
create_converted_data_files(message_json, output_dir, message_file)
|
|
|
|
|
|
|
|
|
2021-08-12 14:20:27 +02:00
|
|
|
def get_topic_name(
|
2024-07-12 02:30:17 +02:00
|
|
|
message: dict[str, Any],
|
|
|
|
dsc_id_to_dsc_map: dict[str, dict[str, Any]],
|
2024-07-17 22:45:14 +02:00
|
|
|
thread_id_mapper: IdMapper[str],
|
2021-08-14 12:30:57 +02:00
|
|
|
is_pm_data: bool = False,
|
2021-08-12 14:20:27 +02:00
|
|
|
) -> str:
|
|
|
|
if is_pm_data:
|
|
|
|
return ""
|
|
|
|
elif message["rid"] in dsc_id_to_dsc_map:
|
|
|
|
dsc_channel_name = dsc_id_to_dsc_map[message["rid"]]["fname"]
|
2022-06-29 15:40:43 +02:00
|
|
|
return truncate_name(f"{dsc_channel_name} (Imported from Rocket.Chat)", message["rid"])
|
2021-08-14 12:30:57 +02:00
|
|
|
elif message.get("replies"):
|
|
|
|
# Message is the start of a thread
|
|
|
|
thread_id = thread_id_mapper.get(message["_id"])
|
2022-06-29 15:40:43 +02:00
|
|
|
return truncate_name(f"Thread {thread_id} (Imported from Rocket.Chat)", message["_id"])
|
2021-08-14 12:30:57 +02:00
|
|
|
elif message.get("tmid"):
|
|
|
|
# Message is a part of a thread
|
|
|
|
thread_id = thread_id_mapper.get(message["tmid"])
|
2022-06-29 15:40:43 +02:00
|
|
|
return truncate_name(f"Thread {thread_id} (Imported from Rocket.Chat)", message["tmid"])
|
2021-08-12 14:20:27 +02:00
|
|
|
else:
|
2021-08-14 12:30:57 +02:00
|
|
|
# Normal channel message
|
2021-08-12 14:20:27 +02:00
|
|
|
return "Imported from Rocket.Chat"
|
|
|
|
|
|
|
|
|
2021-06-23 14:01:26 +02:00
|
|
|
def process_messages(
|
|
|
|
realm_id: int,
|
2024-07-12 02:30:17 +02:00
|
|
|
messages: list[dict[str, Any]],
|
|
|
|
subscriber_map: dict[int, set[int]],
|
2021-06-23 14:01:26 +02:00
|
|
|
is_pm_data: bool,
|
2024-07-12 02:30:17 +02:00
|
|
|
username_to_user_id_map: dict[str, str],
|
2024-07-17 22:45:14 +02:00
|
|
|
user_id_mapper: IdMapper[str],
|
2021-06-23 14:01:26 +02:00
|
|
|
user_handler: UserHandler,
|
2024-07-12 02:30:17 +02:00
|
|
|
user_id_to_recipient_id: dict[int, int],
|
2024-07-17 22:45:14 +02:00
|
|
|
stream_id_mapper: IdMapper[str],
|
2024-07-12 02:30:17 +02:00
|
|
|
stream_id_to_recipient_id: dict[int, int],
|
2024-07-08 16:46:01 +02:00
|
|
|
direct_message_group_id_mapper: IdMapper[str],
|
|
|
|
direct_message_group_id_to_recipient_id: dict[int, int],
|
2024-07-17 22:45:14 +02:00
|
|
|
thread_id_mapper: IdMapper[str],
|
2024-07-12 02:30:17 +02:00
|
|
|
room_id_to_room_map: dict[str, dict[str, Any]],
|
|
|
|
dsc_id_to_dsc_map: dict[str, dict[str, Any]],
|
|
|
|
direct_id_to_direct_map: dict[str, dict[str, Any]],
|
2024-07-08 16:46:01 +02:00
|
|
|
direct_message_group_id_to_direct_message_group_map: dict[str, dict[str, Any]],
|
2024-07-12 02:30:17 +02:00
|
|
|
zerver_realmemoji: list[ZerverFieldsT],
|
|
|
|
total_reactions: list[ZerverFieldsT],
|
|
|
|
uploads_list: list[ZerverFieldsT],
|
|
|
|
zerver_attachment: list[ZerverFieldsT],
|
|
|
|
upload_id_to_upload_data_map: dict[str, dict[str, Any]],
|
2021-06-23 14:01:26 +02:00
|
|
|
output_dir: str,
|
|
|
|
) -> None:
|
2024-07-12 02:30:17 +02:00
|
|
|
def list_reactions(reactions: dict[str, dict[str, Any]]) -> list[dict[str, Any]]:
|
2021-06-23 14:01:26 +02:00
|
|
|
# List of dictionaries of form:
|
|
|
|
# {"name": "smile", "user_id": 2}
|
2024-07-12 02:30:17 +02:00
|
|
|
reactions_list: list[dict[str, Any]] = []
|
2021-06-23 14:01:26 +02:00
|
|
|
for react_code in reactions:
|
|
|
|
name = react_code.split(":")[1]
|
|
|
|
usernames = reactions[react_code]["usernames"]
|
|
|
|
|
|
|
|
for username in usernames:
|
2022-06-29 15:42:05 +02:00
|
|
|
if username not in username_to_user_id_map: # nocoverage
|
|
|
|
# This can happen with production data when old user names no longer exist. We cannot do
|
|
|
|
# much about it here so we just ignore the unknown user name.
|
|
|
|
continue
|
|
|
|
|
2021-06-23 14:01:26 +02:00
|
|
|
rc_user_id = username_to_user_id_map[username]
|
|
|
|
user_id = user_id_mapper.get(rc_user_id)
|
|
|
|
reactions_list.append({"name": name, "user_id": user_id})
|
|
|
|
|
|
|
|
return reactions_list
|
|
|
|
|
2024-07-12 02:30:17 +02:00
|
|
|
def message_to_dict(message: dict[str, Any]) -> dict[str, Any]:
|
2021-06-23 14:01:26 +02:00
|
|
|
rc_sender_id = message["u"]["_id"]
|
|
|
|
sender_id = user_id_mapper.get(rc_sender_id)
|
2022-06-29 15:42:51 +02:00
|
|
|
if "msg" in message:
|
|
|
|
content = message["msg"]
|
|
|
|
else: # nocoverage
|
|
|
|
content = "This message imported from Rocket.Chat had no body in the data export."
|
|
|
|
logging.info(
|
|
|
|
"Message %s contains no message content: %s",
|
|
|
|
message["_id"],
|
|
|
|
message,
|
|
|
|
)
|
2021-06-23 14:01:26 +02:00
|
|
|
|
|
|
|
if message.get("reactions"):
|
|
|
|
reactions = list_reactions(message["reactions"])
|
|
|
|
else:
|
|
|
|
reactions = []
|
|
|
|
|
|
|
|
message_dict = dict(
|
|
|
|
sender_id=sender_id,
|
|
|
|
content=content,
|
|
|
|
date_sent=int(message["ts"].timestamp()),
|
|
|
|
reactions=reactions,
|
2023-01-18 03:28:19 +01:00
|
|
|
has_link=bool(message.get("urls")),
|
2021-06-23 14:01:26 +02:00
|
|
|
)
|
|
|
|
|
2021-08-12 14:20:27 +02:00
|
|
|
# Add recipient_id to message_dict
|
2021-06-23 14:01:26 +02:00
|
|
|
if is_pm_data:
|
2023-06-19 16:42:11 +02:00
|
|
|
# Message is in a 1:1 or group direct message.
|
2021-07-26 23:04:11 +02:00
|
|
|
rc_channel_id = message["rid"]
|
2024-07-08 16:46:01 +02:00
|
|
|
if rc_channel_id in direct_message_group_id_to_direct_message_group_map:
|
|
|
|
direct_message_group_id = direct_message_group_id_mapper.get(rc_channel_id)
|
|
|
|
message_dict["recipient_id"] = direct_message_group_id_to_recipient_id[
|
|
|
|
direct_message_group_id
|
|
|
|
]
|
2021-06-23 14:01:26 +02:00
|
|
|
else:
|
2021-07-26 23:04:11 +02:00
|
|
|
rc_member_ids = direct_id_to_direct_map[rc_channel_id]["uids"]
|
2022-07-15 02:35:51 +02:00
|
|
|
|
|
|
|
if len(rc_member_ids) == 1: # nocoverage
|
2023-06-19 16:42:11 +02:00
|
|
|
# direct messages to yourself only have one user.
|
2022-07-15 02:35:51 +02:00
|
|
|
rc_member_ids.append(rc_member_ids[0])
|
2021-07-26 23:04:11 +02:00
|
|
|
if rc_sender_id == rc_member_ids[0]:
|
|
|
|
zulip_member_id = user_id_mapper.get(rc_member_ids[1])
|
|
|
|
message_dict["recipient_id"] = user_id_to_recipient_id[zulip_member_id]
|
|
|
|
else:
|
|
|
|
zulip_member_id = user_id_mapper.get(rc_member_ids[0])
|
|
|
|
message_dict["recipient_id"] = user_id_to_recipient_id[zulip_member_id]
|
2021-06-23 14:01:26 +02:00
|
|
|
elif message["rid"] in dsc_id_to_dsc_map:
|
|
|
|
# Message is in a discussion
|
|
|
|
dsc_channel = dsc_id_to_dsc_map[message["rid"]]
|
|
|
|
parent_channel_id = dsc_channel["prid"]
|
|
|
|
stream_id = stream_id_mapper.get(parent_channel_id)
|
|
|
|
message_dict["recipient_id"] = stream_id_to_recipient_id[stream_id]
|
|
|
|
else:
|
|
|
|
stream_id = stream_id_mapper.get(message["rid"])
|
|
|
|
message_dict["recipient_id"] = stream_id_to_recipient_id[stream_id]
|
2021-08-12 14:20:27 +02:00
|
|
|
|
|
|
|
# Add topic name to message_dict
|
2021-08-14 12:30:57 +02:00
|
|
|
message_dict["topic_name"] = get_topic_name(
|
|
|
|
message, dsc_id_to_dsc_map, thread_id_mapper, is_pm_data
|
|
|
|
)
|
2021-06-23 14:01:26 +02:00
|
|
|
|
2021-08-07 11:51:19 +02:00
|
|
|
# Add user mentions to message_dict
|
2021-06-23 14:01:26 +02:00
|
|
|
mention_user_ids = set()
|
2021-08-12 17:03:09 +02:00
|
|
|
wildcard_mention = False
|
2021-06-23 14:01:26 +02:00
|
|
|
for mention in message.get("mentions", []):
|
|
|
|
mention_id = mention["_id"]
|
|
|
|
if mention_id in ["all", "here"]:
|
2021-08-12 17:03:09 +02:00
|
|
|
wildcard_mention = True
|
2021-06-23 14:01:26 +02:00
|
|
|
continue
|
2022-07-15 02:34:28 +02:00
|
|
|
if user_id_mapper.has(mention_id):
|
|
|
|
user_id = user_id_mapper.get(mention_id)
|
|
|
|
mention_user_ids.add(user_id)
|
|
|
|
else: # nocoverage
|
|
|
|
logging.info(
|
|
|
|
"Message %s contains mention of unknown user %s: %s",
|
|
|
|
message["_id"],
|
|
|
|
mention_id,
|
|
|
|
mention,
|
|
|
|
)
|
|
|
|
|
2021-06-23 14:01:26 +02:00
|
|
|
message_dict["mention_user_ids"] = mention_user_ids
|
2021-08-12 17:03:09 +02:00
|
|
|
message_dict["wildcard_mention"] = wildcard_mention
|
2021-06-23 14:01:26 +02:00
|
|
|
|
2021-08-07 11:51:19 +02:00
|
|
|
# Add channel mentions to message_dict
|
2024-07-12 02:30:17 +02:00
|
|
|
rc_channel_mention_data: list[dict[str, str]] = []
|
2021-08-07 11:51:19 +02:00
|
|
|
for mention in message.get("channels", []):
|
|
|
|
mention_rc_channel_id = mention["_id"]
|
|
|
|
mention_rc_channel_name = mention["name"]
|
|
|
|
rc_mention = f"#{mention_rc_channel_name}"
|
|
|
|
|
|
|
|
if mention_rc_channel_id in room_id_to_room_map:
|
|
|
|
# Channel is converted to a stream.
|
|
|
|
rc_channel = room_id_to_room_map[mention_rc_channel_id]
|
2021-08-12 14:20:27 +02:00
|
|
|
converted_stream_name = get_stream_name(rc_channel)
|
2021-08-07 11:51:19 +02:00
|
|
|
|
|
|
|
zulip_mention = f"#**{converted_stream_name}**"
|
|
|
|
elif mention_rc_channel_id in dsc_id_to_dsc_map:
|
|
|
|
# Channel is a discussion and is converted to a topic.
|
2021-08-13 14:10:08 +02:00
|
|
|
dsc_channel = dsc_id_to_dsc_map[mention_rc_channel_id]
|
|
|
|
parent_channel_id = dsc_channel["prid"]
|
|
|
|
if (
|
|
|
|
parent_channel_id in direct_id_to_direct_map
|
2024-07-08 16:46:01 +02:00
|
|
|
or parent_channel_id in direct_message_group_id_to_direct_message_group_map
|
2021-08-13 14:10:08 +02:00
|
|
|
):
|
|
|
|
# Discussion belongs to a direct channel and thus, should not be
|
|
|
|
# linked.
|
|
|
|
|
|
|
|
# This logging statement serves the side benefit of avoiding the
|
|
|
|
# CPython optimization for `continue` so that the coverage reports
|
|
|
|
# aren't misleading.
|
|
|
|
logging.info(
|
|
|
|
"skipping direct messages discussion mention: %s", dsc_channel["fname"]
|
|
|
|
)
|
|
|
|
continue
|
|
|
|
|
2021-08-12 14:20:27 +02:00
|
|
|
converted_topic_name = get_topic_name(
|
2021-08-14 12:30:57 +02:00
|
|
|
message={"rid": mention_rc_channel_id},
|
|
|
|
dsc_id_to_dsc_map=dsc_id_to_dsc_map,
|
|
|
|
thread_id_mapper=thread_id_mapper,
|
2021-08-12 14:20:27 +02:00
|
|
|
)
|
|
|
|
|
2021-08-07 11:51:19 +02:00
|
|
|
parent_rc_channel = room_id_to_room_map[parent_channel_id]
|
2021-08-12 14:20:27 +02:00
|
|
|
parent_stream_name = get_stream_name(parent_rc_channel)
|
2021-08-07 11:51:19 +02:00
|
|
|
|
|
|
|
zulip_mention = f"#**{parent_stream_name}>{converted_topic_name}**"
|
2022-06-29 15:43:39 +02:00
|
|
|
else: # nocoverage
|
|
|
|
logging.info("Failed to map mention '%s' to zulip syntax.", mention)
|
|
|
|
continue
|
2021-08-07 11:51:19 +02:00
|
|
|
|
|
|
|
mention_data = {"rc_mention": rc_mention, "zulip_mention": zulip_mention}
|
|
|
|
rc_channel_mention_data.append(mention_data)
|
|
|
|
message_dict["rc_channel_mention_data"] = rc_channel_mention_data
|
|
|
|
|
2021-08-04 17:44:30 +02:00
|
|
|
# Add uploaded file (attachment) to message_dict
|
|
|
|
if message.get("file"):
|
|
|
|
message_dict["file"] = message["file"]
|
|
|
|
|
2021-06-23 14:01:26 +02:00
|
|
|
return message_dict
|
|
|
|
|
2024-07-12 02:30:17 +02:00
|
|
|
raw_messages: list[dict[str, Any]] = []
|
2021-06-23 14:01:26 +02:00
|
|
|
for message in messages:
|
|
|
|
if message.get("t") is not None:
|
|
|
|
# Messages with a type are system notifications like user_joined
|
|
|
|
# that we don't include.
|
|
|
|
continue
|
|
|
|
raw_messages.append(message_to_dict(message))
|
|
|
|
|
2024-07-12 02:30:17 +02:00
|
|
|
def process_batch(lst: list[dict[str, Any]]) -> None:
|
2021-06-23 14:01:26 +02:00
|
|
|
process_raw_message_batch(
|
|
|
|
realm_id=realm_id,
|
|
|
|
raw_messages=lst,
|
|
|
|
subscriber_map=subscriber_map,
|
|
|
|
user_handler=user_handler,
|
|
|
|
is_pm_data=is_pm_data,
|
|
|
|
output_dir=output_dir,
|
2021-08-02 19:11:32 +02:00
|
|
|
zerver_realmemoji=zerver_realmemoji,
|
2021-06-23 14:01:26 +02:00
|
|
|
total_reactions=total_reactions,
|
2021-08-04 17:44:30 +02:00
|
|
|
uploads_list=uploads_list,
|
|
|
|
zerver_attachment=zerver_attachment,
|
|
|
|
upload_id_to_upload_data_map=upload_id_to_upload_data_map,
|
2021-06-23 14:01:26 +02:00
|
|
|
)
|
|
|
|
|
|
|
|
chunk_size = 1000
|
|
|
|
|
|
|
|
process_list_in_batches(
|
|
|
|
lst=raw_messages,
|
|
|
|
chunk_size=chunk_size,
|
|
|
|
process_batch=process_batch,
|
|
|
|
)
|
|
|
|
|
|
|
|
|
2021-08-04 17:44:30 +02:00
|
|
|
def map_upload_id_to_upload_data(
|
2024-07-12 02:30:17 +02:00
|
|
|
upload_data: dict[str, list[dict[str, Any]]],
|
|
|
|
) -> dict[str, dict[str, Any]]:
|
|
|
|
upload_id_to_upload_data_map: dict[str, dict[str, Any]] = {}
|
2021-08-04 17:44:30 +02:00
|
|
|
|
|
|
|
for upload in upload_data["upload"]:
|
|
|
|
upload_id_to_upload_data_map[upload["_id"]] = {**upload, "chunk": []}
|
|
|
|
|
|
|
|
for chunk in upload_data["chunk"]:
|
2022-07-15 02:17:02 +02:00
|
|
|
if chunk["files_id"] not in upload_id_to_upload_data_map: # nocoverage
|
|
|
|
logging.info("Skipping chunk %s without metadata", chunk["files_id"])
|
|
|
|
# It's unclear why this apparent data corruption in the
|
|
|
|
# Rocket.Chat database is possible, but empirically, some
|
|
|
|
# chunks don't have any associated metadata.
|
|
|
|
continue
|
|
|
|
|
2021-08-04 17:44:30 +02:00
|
|
|
upload_id_to_upload_data_map[chunk["files_id"]]["chunk"].append(chunk["data"])
|
|
|
|
|
|
|
|
return upload_id_to_upload_data_map
|
|
|
|
|
|
|
|
|
2021-11-07 17:01:59 +01:00
|
|
|
def separate_channel_private_and_livechat_messages(
|
2024-07-12 02:30:17 +02:00
|
|
|
messages: list[dict[str, Any]],
|
|
|
|
dsc_id_to_dsc_map: dict[str, dict[str, Any]],
|
|
|
|
direct_id_to_direct_map: dict[str, dict[str, Any]],
|
2024-07-08 16:46:01 +02:00
|
|
|
direct_message_group_id_to_direct_message_group_map: dict[str, dict[str, Any]],
|
2024-07-12 02:30:17 +02:00
|
|
|
livechat_id_to_livechat_map: dict[str, dict[str, Any]],
|
|
|
|
channel_messages: list[dict[str, Any]],
|
|
|
|
private_messages: list[dict[str, Any]],
|
|
|
|
livechat_messages: list[dict[str, Any]],
|
2021-06-23 14:01:26 +02:00
|
|
|
) -> None:
|
2024-07-08 16:46:01 +02:00
|
|
|
private_channels_list = [
|
|
|
|
*direct_id_to_direct_map,
|
|
|
|
*direct_message_group_id_to_direct_message_group_map,
|
|
|
|
]
|
2021-06-23 14:01:26 +02:00
|
|
|
for message in messages:
|
|
|
|
if not message.get("rid"):
|
|
|
|
# Message does not belong to any channel (might be
|
|
|
|
# related to livechat), so ignore all such messages.
|
|
|
|
continue
|
2021-08-13 14:10:08 +02:00
|
|
|
if message["rid"] in dsc_id_to_dsc_map:
|
|
|
|
parent_channel_id = dsc_id_to_dsc_map[message["rid"]]["prid"]
|
|
|
|
if parent_channel_id in private_channels_list:
|
|
|
|
# Messages in discussions originating from direct channels
|
|
|
|
# are treated as if they were posted in the parent direct
|
|
|
|
# channel only.
|
|
|
|
message["rid"] = parent_channel_id
|
2021-07-26 23:04:11 +02:00
|
|
|
if message["rid"] in private_channels_list:
|
2021-06-23 14:01:26 +02:00
|
|
|
private_messages.append(message)
|
2021-11-07 17:01:59 +01:00
|
|
|
elif message["rid"] in livechat_id_to_livechat_map:
|
|
|
|
livechat_messages.append(message)
|
2021-06-23 14:01:26 +02:00
|
|
|
else:
|
|
|
|
channel_messages.append(message)
|
|
|
|
|
|
|
|
|
|
|
|
def map_receiver_id_to_recipient_id(
|
2024-07-12 02:30:17 +02:00
|
|
|
zerver_recipient: list[ZerverFieldsT],
|
|
|
|
stream_id_to_recipient_id: dict[int, int],
|
2024-07-08 16:46:01 +02:00
|
|
|
direct_message_group_id_to_recipient_id: dict[int, int],
|
2024-07-12 02:30:17 +02:00
|
|
|
user_id_to_recipient_id: dict[int, int],
|
2021-06-23 14:01:26 +02:00
|
|
|
) -> None:
|
2024-07-04 14:05:48 +02:00
|
|
|
# receiver_id represents stream_id/direct_message_group_id/user_id
|
2021-06-23 14:01:26 +02:00
|
|
|
for recipient in zerver_recipient:
|
|
|
|
if recipient["type"] == Recipient.STREAM:
|
|
|
|
stream_id_to_recipient_id[recipient["type_id"]] = recipient["id"]
|
2024-03-22 00:39:33 +01:00
|
|
|
elif recipient["type"] == Recipient.DIRECT_MESSAGE_GROUP:
|
2024-07-08 16:46:01 +02:00
|
|
|
direct_message_group_id_to_recipient_id[recipient["type_id"]] = recipient["id"]
|
2021-06-23 14:01:26 +02:00
|
|
|
elif recipient["type"] == Recipient.PERSONAL:
|
|
|
|
user_id_to_recipient_id[recipient["type_id"]] = recipient["id"]
|
|
|
|
|
|
|
|
|
|
|
|
def categorize_channels_and_map_with_id(
|
2024-07-12 02:30:17 +02:00
|
|
|
channel_data: list[dict[str, Any]],
|
|
|
|
room_id_to_room_map: dict[str, dict[str, Any]],
|
|
|
|
team_id_to_team_map: dict[str, dict[str, Any]],
|
|
|
|
dsc_id_to_dsc_map: dict[str, dict[str, Any]],
|
|
|
|
direct_id_to_direct_map: dict[str, dict[str, Any]],
|
2024-07-08 16:46:01 +02:00
|
|
|
direct_message_group_id_to_direct_message_group_map: dict[str, dict[str, Any]],
|
2024-07-12 02:30:17 +02:00
|
|
|
livechat_id_to_livechat_map: dict[str, dict[str, Any]],
|
2021-06-23 14:01:26 +02:00
|
|
|
) -> None:
|
2024-07-17 23:25:45 +02:00
|
|
|
direct_message_group_hashed_channels: dict[frozenset[str], Any] = {}
|
2021-06-23 14:01:26 +02:00
|
|
|
for channel in channel_data:
|
|
|
|
if channel.get("prid"):
|
|
|
|
dsc_id_to_dsc_map[channel["_id"]] = channel
|
|
|
|
elif channel["t"] == "d":
|
2021-07-26 23:04:11 +02:00
|
|
|
if len(channel["uids"]) > 2:
|
2024-07-17 23:25:45 +02:00
|
|
|
direct_message_group_members = frozenset(channel["uids"])
|
2024-07-08 16:46:01 +02:00
|
|
|
logging.info("Direct message group channel found. UIDs: %r", channel["uids"])
|
2022-08-10 11:36:46 +02:00
|
|
|
|
|
|
|
if channel["msgs"] == 0: # nocoverage
|
|
|
|
# Rocket.Chat exports in the wild sometimes
|
2024-07-08 16:46:01 +02:00
|
|
|
# contain duplicates of real direct message
|
|
|
|
# groups, with no messages in the duplicate.
|
|
|
|
# We ignore these minor database corruptions
|
|
|
|
# in the Rocket.Chat export. Doing so is safe,
|
|
|
|
# because a direct message group with no message
|
|
|
|
# history has no value in Zulip's data model.
|
2024-07-04 14:05:48 +02:00
|
|
|
logging.debug("Skipping direct message group with 0 messages: %s", channel)
|
|
|
|
elif (
|
2024-07-17 23:25:45 +02:00
|
|
|
direct_message_group_members in direct_message_group_hashed_channels
|
2024-07-04 14:05:48 +02:00
|
|
|
): # nocoverage
|
2022-08-10 11:36:46 +02:00
|
|
|
logging.info(
|
2024-07-17 23:25:45 +02:00
|
|
|
"Mapping direct message group %r to existing channel: %s",
|
|
|
|
direct_message_group_members,
|
|
|
|
direct_message_group_hashed_channels[direct_message_group_members],
|
2022-08-10 11:36:46 +02:00
|
|
|
)
|
2024-07-08 16:46:01 +02:00
|
|
|
direct_message_group_id_to_direct_message_group_map[channel["_id"]] = (
|
|
|
|
direct_message_group_hashed_channels[direct_message_group_members]
|
|
|
|
)
|
2024-07-04 14:05:48 +02:00
|
|
|
|
|
|
|
# Ideally, we'd merge the duplicate direct message
|
|
|
|
# groups. Doing so correctly requires special
|
|
|
|
# handling in convert_direct_message_group_data()
|
|
|
|
# and on the message import side as well, since
|
|
|
|
# those appear to be mapped via rocketchat channel
|
|
|
|
# IDs and not all of that information is resolved
|
2024-07-08 16:46:01 +02:00
|
|
|
# via the direct_message_group_id_to_direct_message_group_map.
|
2022-08-10 11:36:46 +02:00
|
|
|
#
|
|
|
|
# For now, just throw an exception here rather
|
|
|
|
# than during the import process.
|
|
|
|
raise NotImplementedError(
|
2024-07-08 16:46:01 +02:00
|
|
|
"Mapping multiple direct message groups with messages to one is not fully implemented yet"
|
2022-08-10 11:36:46 +02:00
|
|
|
)
|
|
|
|
else:
|
2024-07-08 16:46:01 +02:00
|
|
|
direct_message_group_id_to_direct_message_group_map[channel["_id"]] = channel
|
2024-07-17 23:25:45 +02:00
|
|
|
direct_message_group_hashed_channels[direct_message_group_members] = channel
|
2021-07-26 23:04:11 +02:00
|
|
|
else:
|
|
|
|
direct_id_to_direct_map[channel["_id"]] = channel
|
2021-11-07 17:01:59 +01:00
|
|
|
elif channel["t"] == "l":
|
|
|
|
livechat_id_to_livechat_map[channel["_id"]] = channel
|
2021-06-23 14:01:26 +02:00
|
|
|
else:
|
|
|
|
room_id_to_room_map[channel["_id"]] = channel
|
2021-08-12 14:20:27 +02:00
|
|
|
if channel.get("teamMain"):
|
2021-06-23 14:01:26 +02:00
|
|
|
team_id_to_team_map[channel["teamId"]] = channel
|
|
|
|
|
|
|
|
|
2024-07-12 02:30:17 +02:00
|
|
|
def map_username_to_user_id(user_id_to_user_map: dict[str, dict[str, Any]]) -> dict[str, str]:
|
|
|
|
username_to_user_id_map: dict[str, str] = {}
|
2021-06-23 14:01:26 +02:00
|
|
|
for user_id, user_dict in user_id_to_user_map.items():
|
|
|
|
username_to_user_id_map[user_dict["username"]] = user_id
|
|
|
|
return username_to_user_id_map
|
|
|
|
|
|
|
|
|
2024-07-12 02:30:17 +02:00
|
|
|
def map_user_id_to_user(user_data_list: list[dict[str, Any]]) -> dict[str, dict[str, Any]]:
|
2021-06-23 14:01:26 +02:00
|
|
|
user_id_to_user_map = {}
|
|
|
|
for user in user_data_list:
|
|
|
|
user_id_to_user_map[user["_id"]] = user
|
|
|
|
return user_id_to_user_map
|
|
|
|
|
|
|
|
|
2024-10-17 20:28:35 +02:00
|
|
|
def rocketchat_data_to_dict(
|
|
|
|
rocketchat_data_dir: str, sections: list[str] | None = None
|
|
|
|
) -> dict[str, Any]:
|
|
|
|
"""Reads Rocket.Chat data from its BSON files for the requested sections of the
|
|
|
|
export. Defaults to fetching everything, which is convenient for tests, but
|
|
|
|
we prefer to fetch only those sections that are needed for a given stage to
|
|
|
|
provide a faster debug cycle for metadata data corruption issues.
|
|
|
|
|
|
|
|
TODO: Ideally, we'd read the big data sets, like messages and
|
|
|
|
uploads, with a streaming BSON parser, or pre-paginate the data.
|
|
|
|
"""
|
2024-07-12 02:30:17 +02:00
|
|
|
rocketchat_data: dict[str, Any] = {}
|
2021-06-23 14:01:26 +02:00
|
|
|
|
2024-10-17 20:28:35 +02:00
|
|
|
if sections is None or "instance" in sections:
|
|
|
|
rocketchat_data["instance"] = []
|
|
|
|
with open(os.path.join(rocketchat_data_dir, "instances.bson"), "rb") as fcache:
|
|
|
|
rocketchat_data["instance"] = bson.decode_all(fcache.read(), bson_codec_options)
|
|
|
|
|
|
|
|
if sections is None or "user" in sections:
|
|
|
|
rocketchat_data["user"] = []
|
|
|
|
with open(os.path.join(rocketchat_data_dir, "users.bson"), "rb") as fcache:
|
|
|
|
rocketchat_data["user"] = bson.decode_all(fcache.read(), bson_codec_options)
|
|
|
|
|
|
|
|
if sections is None or "avatar" in sections:
|
|
|
|
rocketchat_data["avatar"] = {"avatar": [], "file": [], "chunk": []}
|
|
|
|
with open(os.path.join(rocketchat_data_dir, "rocketchat_avatars.bson"), "rb") as fcache:
|
|
|
|
rocketchat_data["avatar"]["avatar"] = bson.decode_all(fcache.read(), bson_codec_options)
|
|
|
|
|
|
|
|
if rocketchat_data["avatar"]["avatar"]:
|
|
|
|
with open(
|
|
|
|
os.path.join(rocketchat_data_dir, "rocketchat_avatars.files.bson"), "rb"
|
|
|
|
) as fcache:
|
|
|
|
rocketchat_data["avatar"]["file"] = bson.decode_all(
|
|
|
|
fcache.read(), bson_codec_options
|
|
|
|
)
|
2021-06-23 14:01:26 +02:00
|
|
|
|
2024-10-17 20:28:35 +02:00
|
|
|
with open(
|
|
|
|
os.path.join(rocketchat_data_dir, "rocketchat_avatars.chunks.bson"), "rb"
|
|
|
|
) as fcache:
|
|
|
|
rocketchat_data["avatar"]["chunk"] = bson.decode_all(
|
|
|
|
fcache.read(), bson_codec_options
|
|
|
|
)
|
2021-06-23 14:01:26 +02:00
|
|
|
|
2024-10-17 20:28:35 +02:00
|
|
|
if sections is None or "room" in sections:
|
|
|
|
rocketchat_data["room"] = []
|
|
|
|
with open(os.path.join(rocketchat_data_dir, "rocketchat_room.bson"), "rb") as fcache:
|
|
|
|
rocketchat_data["room"] = bson.decode_all(fcache.read(), bson_codec_options)
|
2021-06-23 14:01:26 +02:00
|
|
|
|
2024-10-17 20:28:35 +02:00
|
|
|
if sections is None or "message" in sections:
|
|
|
|
rocketchat_data["message"] = []
|
|
|
|
with open(os.path.join(rocketchat_data_dir, "rocketchat_message.bson"), "rb") as fcache:
|
|
|
|
rocketchat_data["message"] = bson.decode_all(fcache.read(), bson_codec_options)
|
2021-08-02 19:11:32 +02:00
|
|
|
|
2024-10-17 20:28:35 +02:00
|
|
|
if sections is None or "custom_emoji" in sections:
|
|
|
|
rocketchat_data["custom_emoji"] = {"emoji": [], "file": [], "chunk": []}
|
|
|
|
with open(
|
|
|
|
os.path.join(rocketchat_data_dir, "rocketchat_custom_emoji.bson"), "rb"
|
|
|
|
) as fcache:
|
|
|
|
rocketchat_data["custom_emoji"]["emoji"] = bson.decode_all(
|
2024-10-17 20:09:44 +02:00
|
|
|
fcache.read(), bson_codec_options
|
|
|
|
)
|
2021-08-02 19:11:32 +02:00
|
|
|
|
2024-10-17 20:28:35 +02:00
|
|
|
if rocketchat_data["custom_emoji"]["emoji"]:
|
|
|
|
with open(os.path.join(rocketchat_data_dir, "custom_emoji.files.bson"), "rb") as fcache:
|
|
|
|
rocketchat_data["custom_emoji"]["file"] = bson.decode_all(
|
|
|
|
fcache.read(), bson_codec_options
|
|
|
|
)
|
2021-08-02 19:11:32 +02:00
|
|
|
|
2024-10-17 20:28:35 +02:00
|
|
|
with open(
|
|
|
|
os.path.join(rocketchat_data_dir, "custom_emoji.chunks.bson"), "rb"
|
|
|
|
) as fcache:
|
|
|
|
rocketchat_data["custom_emoji"]["chunk"] = bson.decode_all(
|
|
|
|
fcache.read(), bson_codec_options
|
|
|
|
)
|
2021-08-04 17:44:30 +02:00
|
|
|
|
2024-10-17 20:28:35 +02:00
|
|
|
if sections is None or "upload" in sections:
|
|
|
|
rocketchat_data["upload"] = {"upload": [], "file": [], "chunk": []}
|
|
|
|
with open(os.path.join(rocketchat_data_dir, "rocketchat_uploads.bson"), "rb") as fcache:
|
|
|
|
rocketchat_data["upload"]["upload"] = bson.decode_all(fcache.read(), bson_codec_options)
|
|
|
|
|
|
|
|
if rocketchat_data["upload"]["upload"]:
|
|
|
|
with open(
|
|
|
|
os.path.join(rocketchat_data_dir, "rocketchat_uploads.files.bson"), "rb"
|
|
|
|
) as fcache:
|
|
|
|
rocketchat_data["upload"]["file"] = bson.decode_all(
|
|
|
|
fcache.read(), bson_codec_options
|
|
|
|
)
|
2021-08-04 17:44:30 +02:00
|
|
|
|
2024-10-17 20:28:35 +02:00
|
|
|
with open(
|
|
|
|
os.path.join(rocketchat_data_dir, "rocketchat_uploads.chunks.bson"), "rb"
|
|
|
|
) as fcache:
|
|
|
|
rocketchat_data["upload"]["chunk"] = bson.decode_all(
|
|
|
|
fcache.read(), bson_codec_options
|
|
|
|
)
|
2021-08-04 17:44:30 +02:00
|
|
|
|
2021-06-23 14:01:26 +02:00
|
|
|
return rocketchat_data
|
|
|
|
|
|
|
|
|
|
|
|
def do_convert_data(rocketchat_data_dir: str, output_dir: str) -> None:
|
|
|
|
# Get all required exported data in a dictionary
|
|
|
|
|
|
|
|
# Subdomain is set by the user while running the import command
|
|
|
|
realm_subdomain = ""
|
|
|
|
realm_id = 0
|
|
|
|
domain_name = settings.EXTERNAL_HOST
|
|
|
|
|
2024-10-17 20:28:35 +02:00
|
|
|
rocketchat_instance_data = rocketchat_data_to_dict(rocketchat_data_dir, ["instance"])[
|
|
|
|
"instance"
|
|
|
|
][0]
|
|
|
|
realm = make_realm(realm_id, realm_subdomain, domain_name, rocketchat_instance_data)
|
2021-06-23 14:01:26 +02:00
|
|
|
|
2024-10-17 20:28:35 +02:00
|
|
|
rocketchat_user_data = rocketchat_data_to_dict(rocketchat_data_dir, ["user"])["user"]
|
|
|
|
user_id_to_user_map: dict[str, dict[str, Any]] = map_user_id_to_user(rocketchat_user_data)
|
2024-07-12 02:30:17 +02:00
|
|
|
username_to_user_id_map: dict[str, str] = map_username_to_user_id(user_id_to_user_map)
|
2021-06-23 14:01:26 +02:00
|
|
|
|
|
|
|
user_handler = UserHandler()
|
|
|
|
subscriber_handler = SubscriberHandler()
|
2024-07-17 22:45:14 +02:00
|
|
|
user_id_mapper = IdMapper[str]()
|
|
|
|
stream_id_mapper = IdMapper[str]()
|
2024-07-08 16:46:01 +02:00
|
|
|
direct_message_group_id_mapper = IdMapper[str]()
|
2024-07-17 22:45:14 +02:00
|
|
|
thread_id_mapper = IdMapper[str]()
|
2021-06-23 14:01:26 +02:00
|
|
|
|
|
|
|
process_users(
|
|
|
|
user_id_to_user_map=user_id_to_user_map,
|
|
|
|
realm_id=realm_id,
|
|
|
|
domain_name=domain_name,
|
|
|
|
user_handler=user_handler,
|
|
|
|
user_id_mapper=user_id_mapper,
|
|
|
|
)
|
|
|
|
|
2024-10-17 20:31:13 +02:00
|
|
|
rocketchat_emoji_data = rocketchat_data_to_dict(rocketchat_data_dir, ["custom_emoji"])[
|
|
|
|
"custom_emoji"
|
|
|
|
]
|
|
|
|
zerver_realmemoji = build_custom_emoji(
|
|
|
|
realm_id=realm_id,
|
|
|
|
custom_emoji_data=rocketchat_emoji_data,
|
|
|
|
output_dir=output_dir,
|
|
|
|
)
|
|
|
|
realm["zerver_realmemoji"] = zerver_realmemoji
|
|
|
|
|
2024-07-12 02:30:17 +02:00
|
|
|
room_id_to_room_map: dict[str, dict[str, Any]] = {}
|
|
|
|
team_id_to_team_map: dict[str, dict[str, Any]] = {}
|
|
|
|
dsc_id_to_dsc_map: dict[str, dict[str, Any]] = {}
|
|
|
|
direct_id_to_direct_map: dict[str, dict[str, Any]] = {}
|
2024-07-08 16:46:01 +02:00
|
|
|
direct_message_group_id_to_direct_message_group_map: dict[str, dict[str, Any]] = {}
|
2024-07-12 02:30:17 +02:00
|
|
|
livechat_id_to_livechat_map: dict[str, dict[str, Any]] = {}
|
2021-06-23 14:01:26 +02:00
|
|
|
|
2024-10-17 20:28:35 +02:00
|
|
|
rocketchat_room_data = rocketchat_data_to_dict(rocketchat_data_dir, ["room"])["room"]
|
2021-06-23 14:01:26 +02:00
|
|
|
categorize_channels_and_map_with_id(
|
2024-10-17 20:28:35 +02:00
|
|
|
channel_data=rocketchat_room_data,
|
2021-06-23 14:01:26 +02:00
|
|
|
room_id_to_room_map=room_id_to_room_map,
|
|
|
|
team_id_to_team_map=team_id_to_team_map,
|
|
|
|
dsc_id_to_dsc_map=dsc_id_to_dsc_map,
|
|
|
|
direct_id_to_direct_map=direct_id_to_direct_map,
|
2024-07-08 16:46:01 +02:00
|
|
|
direct_message_group_id_to_direct_message_group_map=direct_message_group_id_to_direct_message_group_map,
|
2021-11-07 17:01:59 +01:00
|
|
|
livechat_id_to_livechat_map=livechat_id_to_livechat_map,
|
2021-06-23 14:01:26 +02:00
|
|
|
)
|
|
|
|
|
|
|
|
zerver_stream = convert_channel_data(
|
|
|
|
room_id_to_room_map=room_id_to_room_map,
|
|
|
|
team_id_to_team_map=team_id_to_team_map,
|
|
|
|
stream_id_mapper=stream_id_mapper,
|
|
|
|
realm_id=realm_id,
|
|
|
|
)
|
|
|
|
realm["zerver_stream"] = zerver_stream
|
|
|
|
|
2021-07-26 23:04:11 +02:00
|
|
|
# Add stream subscription data to `subscriber_handler`
|
|
|
|
convert_stream_subscription_data(
|
2021-06-23 14:01:26 +02:00
|
|
|
user_id_to_user_map=user_id_to_user_map,
|
|
|
|
dsc_id_to_dsc_map=dsc_id_to_dsc_map,
|
|
|
|
zerver_stream=zerver_stream,
|
|
|
|
stream_id_mapper=stream_id_mapper,
|
|
|
|
user_id_mapper=user_id_mapper,
|
|
|
|
subscriber_handler=subscriber_handler,
|
|
|
|
)
|
|
|
|
|
2024-07-04 14:05:48 +02:00
|
|
|
zerver_direct_message_group = convert_direct_message_group_data(
|
2024-07-08 16:46:01 +02:00
|
|
|
direct_message_group_id_to_direct_message_group_map=direct_message_group_id_to_direct_message_group_map,
|
|
|
|
direct_message_group_id_mapper=direct_message_group_id_mapper,
|
2021-07-26 23:04:11 +02:00
|
|
|
user_id_mapper=user_id_mapper,
|
|
|
|
subscriber_handler=subscriber_handler,
|
|
|
|
)
|
2024-07-04 14:05:48 +02:00
|
|
|
realm["zerver_huddle"] = zerver_direct_message_group
|
2021-07-26 23:04:11 +02:00
|
|
|
|
2021-06-23 14:01:26 +02:00
|
|
|
all_users = user_handler.get_all_users()
|
|
|
|
|
|
|
|
zerver_recipient = build_recipients(
|
|
|
|
zerver_userprofile=all_users,
|
|
|
|
zerver_stream=zerver_stream,
|
2024-07-04 14:05:48 +02:00
|
|
|
zerver_direct_message_group=zerver_direct_message_group,
|
2021-06-23 14:01:26 +02:00
|
|
|
)
|
|
|
|
realm["zerver_recipient"] = zerver_recipient
|
|
|
|
|
|
|
|
stream_subscriptions = build_stream_subscriptions(
|
|
|
|
get_users=subscriber_handler.get_users,
|
|
|
|
zerver_recipient=zerver_recipient,
|
|
|
|
zerver_stream=zerver_stream,
|
|
|
|
)
|
|
|
|
|
2024-07-04 14:05:48 +02:00
|
|
|
direct_message_group_subscriptions = build_direct_message_group_subscriptions(
|
2021-07-26 23:04:11 +02:00
|
|
|
get_users=subscriber_handler.get_users,
|
|
|
|
zerver_recipient=zerver_recipient,
|
2024-07-04 14:05:48 +02:00
|
|
|
zerver_direct_message_group=zerver_direct_message_group,
|
2021-07-26 23:04:11 +02:00
|
|
|
)
|
|
|
|
|
2021-06-23 14:01:26 +02:00
|
|
|
personal_subscriptions = build_personal_subscriptions(
|
|
|
|
zerver_recipient=zerver_recipient,
|
|
|
|
)
|
|
|
|
|
2024-07-04 14:05:48 +02:00
|
|
|
zerver_subscription = (
|
|
|
|
personal_subscriptions + stream_subscriptions + direct_message_group_subscriptions
|
|
|
|
)
|
2021-06-23 14:01:26 +02:00
|
|
|
realm["zerver_subscription"] = zerver_subscription
|
|
|
|
|
|
|
|
subscriber_map = make_subscriber_map(
|
|
|
|
zerver_subscription=zerver_subscription,
|
|
|
|
)
|
|
|
|
|
2024-07-12 02:30:17 +02:00
|
|
|
stream_id_to_recipient_id: dict[int, int] = {}
|
2024-07-08 16:46:01 +02:00
|
|
|
direct_message_group_id_to_recipient_id: dict[int, int] = {}
|
2024-07-12 02:30:17 +02:00
|
|
|
user_id_to_recipient_id: dict[int, int] = {}
|
2021-06-23 14:01:26 +02:00
|
|
|
|
|
|
|
map_receiver_id_to_recipient_id(
|
|
|
|
zerver_recipient=zerver_recipient,
|
|
|
|
stream_id_to_recipient_id=stream_id_to_recipient_id,
|
2024-07-08 16:46:01 +02:00
|
|
|
direct_message_group_id_to_recipient_id=direct_message_group_id_to_recipient_id,
|
2021-06-23 14:01:26 +02:00
|
|
|
user_id_to_recipient_id=user_id_to_recipient_id,
|
|
|
|
)
|
|
|
|
|
2024-07-12 02:30:17 +02:00
|
|
|
channel_messages: list[dict[str, Any]] = []
|
|
|
|
private_messages: list[dict[str, Any]] = []
|
|
|
|
livechat_messages: list[dict[str, Any]] = []
|
2021-06-23 14:01:26 +02:00
|
|
|
|
2024-10-17 20:28:35 +02:00
|
|
|
rocketchat_message_data = rocketchat_data_to_dict(rocketchat_data_dir, ["message"])["message"]
|
2021-11-07 17:01:59 +01:00
|
|
|
separate_channel_private_and_livechat_messages(
|
2024-10-17 20:28:35 +02:00
|
|
|
messages=rocketchat_message_data,
|
2021-08-13 14:10:08 +02:00
|
|
|
dsc_id_to_dsc_map=dsc_id_to_dsc_map,
|
2021-06-23 14:01:26 +02:00
|
|
|
direct_id_to_direct_map=direct_id_to_direct_map,
|
2024-07-08 16:46:01 +02:00
|
|
|
direct_message_group_id_to_direct_message_group_map=direct_message_group_id_to_direct_message_group_map,
|
2021-11-07 17:01:59 +01:00
|
|
|
livechat_id_to_livechat_map=livechat_id_to_livechat_map,
|
2021-06-23 14:01:26 +02:00
|
|
|
channel_messages=channel_messages,
|
|
|
|
private_messages=private_messages,
|
2021-11-07 17:01:59 +01:00
|
|
|
livechat_messages=livechat_messages,
|
2021-06-23 14:01:26 +02:00
|
|
|
)
|
2024-10-17 20:28:35 +02:00
|
|
|
# Hint we can free the memory, now that we're done processing this.
|
|
|
|
rocketchat_message_data = []
|
2021-06-23 14:01:26 +02:00
|
|
|
|
2024-07-12 02:30:17 +02:00
|
|
|
total_reactions: list[ZerverFieldsT] = []
|
|
|
|
uploads_list: list[ZerverFieldsT] = []
|
|
|
|
zerver_attachment: list[ZerverFieldsT] = []
|
2021-08-04 17:44:30 +02:00
|
|
|
|
2024-10-17 20:28:35 +02:00
|
|
|
rocketchat_upload_data = rocketchat_data_to_dict(rocketchat_data_dir, ["upload"])["upload"]
|
|
|
|
upload_id_to_upload_data_map = map_upload_id_to_upload_data(rocketchat_upload_data)
|
2021-06-23 14:01:26 +02:00
|
|
|
|
|
|
|
# Process channel messages
|
|
|
|
process_messages(
|
|
|
|
realm_id=realm_id,
|
|
|
|
messages=channel_messages,
|
|
|
|
subscriber_map=subscriber_map,
|
|
|
|
is_pm_data=False,
|
|
|
|
username_to_user_id_map=username_to_user_id_map,
|
|
|
|
user_id_mapper=user_id_mapper,
|
|
|
|
user_handler=user_handler,
|
|
|
|
user_id_to_recipient_id=user_id_to_recipient_id,
|
|
|
|
stream_id_mapper=stream_id_mapper,
|
|
|
|
stream_id_to_recipient_id=stream_id_to_recipient_id,
|
2024-07-08 16:46:01 +02:00
|
|
|
direct_message_group_id_mapper=direct_message_group_id_mapper,
|
|
|
|
direct_message_group_id_to_recipient_id=direct_message_group_id_to_recipient_id,
|
2021-08-14 12:30:57 +02:00
|
|
|
thread_id_mapper=thread_id_mapper,
|
2021-08-07 11:51:19 +02:00
|
|
|
room_id_to_room_map=room_id_to_room_map,
|
2021-06-23 14:01:26 +02:00
|
|
|
dsc_id_to_dsc_map=dsc_id_to_dsc_map,
|
2021-07-26 23:04:11 +02:00
|
|
|
direct_id_to_direct_map=direct_id_to_direct_map,
|
2024-07-08 16:46:01 +02:00
|
|
|
direct_message_group_id_to_direct_message_group_map=direct_message_group_id_to_direct_message_group_map,
|
2021-08-02 19:11:32 +02:00
|
|
|
zerver_realmemoji=zerver_realmemoji,
|
2021-06-23 14:01:26 +02:00
|
|
|
total_reactions=total_reactions,
|
2021-08-04 17:44:30 +02:00
|
|
|
uploads_list=uploads_list,
|
|
|
|
zerver_attachment=zerver_attachment,
|
|
|
|
upload_id_to_upload_data_map=upload_id_to_upload_data_map,
|
2021-06-23 14:01:26 +02:00
|
|
|
output_dir=output_dir,
|
|
|
|
)
|
2023-06-19 16:42:11 +02:00
|
|
|
# Process direct messages
|
2021-06-23 14:01:26 +02:00
|
|
|
process_messages(
|
|
|
|
realm_id=realm_id,
|
|
|
|
messages=private_messages,
|
|
|
|
subscriber_map=subscriber_map,
|
|
|
|
is_pm_data=True,
|
|
|
|
username_to_user_id_map=username_to_user_id_map,
|
|
|
|
user_id_mapper=user_id_mapper,
|
|
|
|
user_handler=user_handler,
|
|
|
|
user_id_to_recipient_id=user_id_to_recipient_id,
|
|
|
|
stream_id_mapper=stream_id_mapper,
|
|
|
|
stream_id_to_recipient_id=stream_id_to_recipient_id,
|
2024-07-08 16:46:01 +02:00
|
|
|
direct_message_group_id_mapper=direct_message_group_id_mapper,
|
|
|
|
direct_message_group_id_to_recipient_id=direct_message_group_id_to_recipient_id,
|
2021-08-14 12:30:57 +02:00
|
|
|
thread_id_mapper=thread_id_mapper,
|
2021-08-07 11:51:19 +02:00
|
|
|
room_id_to_room_map=room_id_to_room_map,
|
2021-06-23 14:01:26 +02:00
|
|
|
dsc_id_to_dsc_map=dsc_id_to_dsc_map,
|
2021-07-26 23:04:11 +02:00
|
|
|
direct_id_to_direct_map=direct_id_to_direct_map,
|
2024-07-08 16:46:01 +02:00
|
|
|
direct_message_group_id_to_direct_message_group_map=direct_message_group_id_to_direct_message_group_map,
|
2021-08-02 19:11:32 +02:00
|
|
|
zerver_realmemoji=zerver_realmemoji,
|
2021-06-23 14:01:26 +02:00
|
|
|
total_reactions=total_reactions,
|
2021-08-04 17:44:30 +02:00
|
|
|
uploads_list=uploads_list,
|
|
|
|
zerver_attachment=zerver_attachment,
|
|
|
|
upload_id_to_upload_data_map=upload_id_to_upload_data_map,
|
2021-06-23 14:01:26 +02:00
|
|
|
output_dir=output_dir,
|
|
|
|
)
|
|
|
|
realm["zerver_reaction"] = total_reactions
|
|
|
|
realm["zerver_userprofile"] = user_handler.get_all_users()
|
|
|
|
realm["sort_by_date"] = True
|
|
|
|
|
|
|
|
create_converted_data_files(realm, output_dir, "/realm.json")
|
|
|
|
# TODO: Add support for importing avatars
|
|
|
|
create_converted_data_files([], output_dir, "/avatars/records.json")
|
|
|
|
|
2021-08-04 17:44:30 +02:00
|
|
|
# Import attachments
|
2024-07-12 02:30:17 +02:00
|
|
|
attachment: dict[str, list[Any]] = {"zerver_attachment": zerver_attachment}
|
2021-06-23 14:01:26 +02:00
|
|
|
create_converted_data_files(attachment, output_dir, "/attachment.json")
|
2021-08-04 17:44:30 +02:00
|
|
|
create_converted_data_files(uploads_list, output_dir, "/uploads/records.json")
|