2019-04-04 13:16:02 +02:00
|
|
|
"""
|
|
|
|
spec:
|
|
|
|
https://docs.mattermost.com/administration/bulk-export.html
|
|
|
|
"""
|
2024-01-29 00:32:21 +01:00
|
|
|
|
2019-04-04 13:16:02 +02:00
|
|
|
import logging
|
2020-06-11 00:54:34 +02:00
|
|
|
import os
|
2021-06-28 12:59:16 +02:00
|
|
|
import random
|
2019-04-04 13:16:02 +02:00
|
|
|
import re
|
2021-06-28 12:59:16 +02:00
|
|
|
import secrets
|
2019-04-04 13:16:02 +02:00
|
|
|
import shutil
|
2020-06-11 00:54:34 +02:00
|
|
|
import subprocess
|
2024-07-12 02:30:25 +02:00
|
|
|
from collections.abc import Callable
|
|
|
|
from typing import Any
|
2019-04-04 13:16:02 +02:00
|
|
|
|
2020-08-07 01:09:47 +02:00
|
|
|
import orjson
|
2019-04-04 13:16:02 +02:00
|
|
|
from django.conf import settings
|
|
|
|
from django.forms.models import model_to_dict
|
2020-06-11 00:54:34 +02:00
|
|
|
from django.utils.timezone import now as timezone_now
|
2019-04-04 13:16:02 +02:00
|
|
|
|
2020-06-11 00:54:34 +02:00
|
|
|
from zerver.data_import.import_util import (
|
|
|
|
SubscriberHandler,
|
|
|
|
ZerverFieldsT,
|
2021-06-28 12:59:16 +02:00
|
|
|
build_attachment,
|
2024-07-04 14:05:48 +02:00
|
|
|
build_direct_message_group,
|
|
|
|
build_direct_message_group_subscriptions,
|
2020-06-11 00:54:34 +02:00
|
|
|
build_message,
|
|
|
|
build_personal_subscriptions,
|
|
|
|
build_realm,
|
|
|
|
build_realm_emoji,
|
|
|
|
build_recipients,
|
|
|
|
build_stream,
|
|
|
|
build_stream_subscriptions,
|
|
|
|
build_user_profile,
|
|
|
|
build_zerver_realm,
|
|
|
|
create_converted_data_files,
|
|
|
|
make_subscriber_map,
|
|
|
|
make_user_messages,
|
2019-04-04 13:16:02 +02:00
|
|
|
)
|
|
|
|
from zerver.data_import.sequencer import NEXT_ID, IdMapper
|
2021-06-23 11:39:03 +02:00
|
|
|
from zerver.data_import.user_handler import UserHandler
|
2020-06-11 00:54:34 +02:00
|
|
|
from zerver.lib.emoji import name_to_codepoint
|
2024-09-24 12:21:20 +02:00
|
|
|
from zerver.lib.export import do_common_export_processes
|
2021-07-01 22:53:01 +02:00
|
|
|
from zerver.lib.markdown import IMAGE_EXTENSIONS
|
2024-06-20 18:12:58 +02:00
|
|
|
from zerver.lib.upload import sanitize_name
|
2020-06-11 00:54:34 +02:00
|
|
|
from zerver.lib.utils import process_list_in_batches
|
|
|
|
from zerver.models import Reaction, RealmEmoji, Recipient, UserProfile
|
|
|
|
|
2019-04-04 13:16:02 +02:00
|
|
|
|
2024-07-12 02:30:17 +02:00
|
|
|
def make_realm(realm_id: int, team: dict[str, Any]) -> ZerverFieldsT:
|
2019-04-04 13:16:02 +02:00
|
|
|
# set correct realm details
|
|
|
|
NOW = float(timezone_now().timestamp())
|
|
|
|
domain_name = settings.EXTERNAL_HOST
|
|
|
|
realm_subdomain = team["name"]
|
|
|
|
|
2021-02-12 08:20:45 +01:00
|
|
|
zerver_realm = build_zerver_realm(realm_id, realm_subdomain, NOW, "Mattermost")
|
2019-04-04 13:16:02 +02:00
|
|
|
realm = build_realm(zerver_realm, realm_id, domain_name)
|
|
|
|
|
|
|
|
# We may override these later.
|
2021-02-12 08:20:45 +01:00
|
|
|
realm["zerver_defaultstream"] = []
|
2019-04-04 13:16:02 +02:00
|
|
|
|
|
|
|
return realm
|
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
|
|
|
def process_user(
|
2024-07-17 22:45:14 +02:00
|
|
|
user_dict: dict[str, Any], realm_id: int, team_name: str, user_id_mapper: IdMapper[str]
|
2021-02-12 08:19:30 +01:00
|
|
|
) -> ZerverFieldsT:
|
2024-07-12 02:30:17 +02:00
|
|
|
def is_team_admin(user_dict: dict[str, Any]) -> bool:
|
2019-08-20 01:06:39 +02:00
|
|
|
if user_dict["teams"] is None:
|
|
|
|
return False
|
2023-01-05 07:25:17 +01:00
|
|
|
return any(
|
|
|
|
team["name"] == team_name and "team_admin" in team["roles"]
|
|
|
|
for team in user_dict["teams"]
|
|
|
|
)
|
2019-04-04 13:16:02 +02:00
|
|
|
|
2024-07-12 02:30:17 +02:00
|
|
|
def is_team_guest(user_dict: dict[str, Any]) -> bool:
|
2022-12-10 22:15:26 +01:00
|
|
|
if user_dict["teams"] is None:
|
|
|
|
return False
|
|
|
|
for team in user_dict["teams"]:
|
|
|
|
if team["name"] == team_name and "team_guest" in team["roles"]:
|
|
|
|
return True
|
|
|
|
return False
|
|
|
|
|
2024-07-12 02:30:17 +02:00
|
|
|
def get_full_name(user_dict: dict[str, Any]) -> str:
|
2019-04-04 13:16:02 +02:00
|
|
|
full_name = "{} {}".format(user_dict["first_name"], user_dict["last_name"])
|
|
|
|
if full_name.strip():
|
|
|
|
return full_name
|
2021-02-12 08:20:45 +01:00
|
|
|
return user_dict["username"]
|
2019-04-04 13:16:02 +02:00
|
|
|
|
2021-02-12 08:20:45 +01:00
|
|
|
avatar_source = "G"
|
2019-04-04 13:16:02 +02:00
|
|
|
full_name = get_full_name(user_dict)
|
2021-02-12 08:20:45 +01:00
|
|
|
id = user_id_mapper.get(user_dict["username"])
|
|
|
|
delivery_email = user_dict["email"]
|
|
|
|
email = user_dict["email"]
|
|
|
|
short_name = user_dict["username"]
|
2019-04-04 13:16:02 +02:00
|
|
|
date_joined = int(timezone_now().timestamp())
|
2021-02-12 08:20:45 +01:00
|
|
|
timezone = "UTC"
|
2019-04-04 13:16:02 +02:00
|
|
|
|
2019-10-05 02:35:07 +02:00
|
|
|
if is_team_admin(user_dict):
|
2020-12-18 03:45:33 +01:00
|
|
|
role = UserProfile.ROLE_REALM_OWNER
|
2022-12-10 22:15:26 +01:00
|
|
|
elif is_team_guest(user_dict):
|
|
|
|
role = UserProfile.ROLE_GUEST
|
|
|
|
else:
|
|
|
|
role = UserProfile.ROLE_MEMBER
|
2019-10-05 02:35:07 +02:00
|
|
|
|
2019-04-04 13:16:02 +02:00
|
|
|
if user_dict["is_mirror_dummy"]:
|
|
|
|
is_active = False
|
|
|
|
is_mirror_dummy = True
|
|
|
|
else:
|
|
|
|
is_active = True
|
|
|
|
is_mirror_dummy = False
|
|
|
|
|
|
|
|
return build_user_profile(
|
|
|
|
avatar_source=avatar_source,
|
|
|
|
date_joined=date_joined,
|
|
|
|
delivery_email=delivery_email,
|
|
|
|
email=email,
|
|
|
|
full_name=full_name,
|
|
|
|
id=id,
|
|
|
|
is_active=is_active,
|
2019-10-05 02:35:07 +02:00
|
|
|
role=role,
|
2019-04-04 13:16:02 +02:00
|
|
|
is_mirror_dummy=is_mirror_dummy,
|
|
|
|
realm_id=realm_id,
|
|
|
|
short_name=short_name,
|
|
|
|
timezone=timezone,
|
|
|
|
)
|
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
|
|
|
def convert_user_data(
|
|
|
|
user_handler: UserHandler,
|
2024-07-17 22:45:14 +02:00
|
|
|
user_id_mapper: IdMapper[str],
|
2024-07-12 02:30:17 +02:00
|
|
|
user_data_map: dict[str, dict[str, Any]],
|
2021-02-12 08:19:30 +01:00
|
|
|
realm_id: int,
|
|
|
|
team_name: str,
|
|
|
|
) -> None:
|
2024-09-30 10:26:03 +02:00
|
|
|
for user_data in user_data_map.values():
|
|
|
|
if check_user_in_team(user_data, team_name) or user_data["is_mirror_dummy"]:
|
|
|
|
user = process_user(user_data, realm_id, team_name, user_id_mapper)
|
|
|
|
user_handler.add_user(user)
|
|
|
|
user_handler.validate_user_emails()
|
2019-04-04 13:16:02 +02:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
|
|
|
def convert_channel_data(
|
2024-07-12 02:30:17 +02:00
|
|
|
channel_data: list[ZerverFieldsT],
|
|
|
|
user_data_map: dict[str, dict[str, Any]],
|
2021-02-12 08:19:30 +01:00
|
|
|
subscriber_handler: SubscriberHandler,
|
2024-07-17 22:45:14 +02:00
|
|
|
stream_id_mapper: IdMapper[str],
|
|
|
|
user_id_mapper: IdMapper[str],
|
2021-02-12 08:19:30 +01:00
|
|
|
realm_id: int,
|
|
|
|
team_name: str,
|
2024-07-12 02:30:17 +02:00
|
|
|
) -> list[ZerverFieldsT]:
|
2021-02-12 08:20:45 +01:00
|
|
|
channel_data_list = [d for d in channel_data if d["team"] == team_name]
|
2019-04-04 13:16:02 +02:00
|
|
|
|
2024-07-12 02:30:17 +02:00
|
|
|
channel_members_map: dict[str, list[str]] = {}
|
|
|
|
channel_admins_map: dict[str, list[str]] = {}
|
2019-04-04 13:16:02 +02:00
|
|
|
|
|
|
|
def initialize_stream_membership_dicts() -> None:
|
|
|
|
for channel in channel_data:
|
|
|
|
channel_name = channel["name"]
|
|
|
|
channel_members_map[channel_name] = []
|
|
|
|
channel_admins_map[channel_name] = []
|
|
|
|
|
|
|
|
for username in user_data_map:
|
|
|
|
user_dict = user_data_map[username]
|
|
|
|
teams = user_dict["teams"]
|
2019-08-20 01:06:39 +02:00
|
|
|
if user_dict["teams"] is None:
|
|
|
|
continue
|
|
|
|
|
2019-04-04 13:16:02 +02:00
|
|
|
for team in teams:
|
|
|
|
if team["name"] != team_name:
|
|
|
|
continue
|
|
|
|
for channel in team["channels"]:
|
|
|
|
channel_roles = channel["roles"]
|
|
|
|
channel_name = channel["name"]
|
|
|
|
if "channel_admin" in channel_roles:
|
|
|
|
channel_admins_map[channel_name].append(username)
|
|
|
|
elif "channel_user" in channel_roles:
|
|
|
|
channel_members_map[channel_name].append(username)
|
|
|
|
|
|
|
|
def get_invite_only_value_from_channel_type(channel_type: str) -> bool:
|
|
|
|
# Channel can have two types in Mattermost
|
|
|
|
# "O" for a public channel.
|
|
|
|
# "P" for a private channel.
|
2021-02-12 08:20:45 +01:00
|
|
|
if channel_type == "O":
|
2019-04-04 13:16:02 +02:00
|
|
|
return False
|
2021-02-12 08:20:45 +01:00
|
|
|
elif channel_type == "P":
|
2019-04-04 13:16:02 +02:00
|
|
|
return True
|
|
|
|
else: # nocoverage
|
2021-02-12 08:20:45 +01:00
|
|
|
raise Exception("unexpected value")
|
2019-04-04 13:16:02 +02:00
|
|
|
|
|
|
|
streams = []
|
|
|
|
initialize_stream_membership_dicts()
|
|
|
|
|
|
|
|
for channel_dict in channel_data_list:
|
|
|
|
now = int(timezone_now().timestamp())
|
2021-02-12 08:20:45 +01:00
|
|
|
stream_id = stream_id_mapper.get(channel_dict["name"])
|
2019-04-04 13:16:02 +02:00
|
|
|
stream_name = channel_dict["name"]
|
2021-02-12 08:20:45 +01:00
|
|
|
invite_only = get_invite_only_value_from_channel_type(channel_dict["type"])
|
2019-04-04 13:16:02 +02:00
|
|
|
|
|
|
|
stream = build_stream(
|
|
|
|
date_created=now,
|
|
|
|
realm_id=realm_id,
|
2021-02-12 08:20:45 +01:00
|
|
|
name=channel_dict["display_name"],
|
2019-04-04 13:16:02 +02:00
|
|
|
# Purpose describes how the channel should be used. It is similar to
|
|
|
|
# stream description and is shown in channel list to help others decide
|
|
|
|
# whether to join.
|
|
|
|
# Header text always appears right next to channel name in channel header.
|
|
|
|
# Can be used for advertising the purpose of stream, making announcements as
|
|
|
|
# well as including frequently used links. So probably not a bad idea to use
|
|
|
|
# this as description if the channel purpose is empty.
|
2021-02-12 08:20:45 +01:00
|
|
|
description=channel_dict["purpose"] or channel_dict["header"],
|
2019-04-04 13:16:02 +02:00
|
|
|
stream_id=stream_id,
|
|
|
|
# Mattermost export don't include data of archived(~ deactivated) channels.
|
|
|
|
deactivated=False,
|
|
|
|
invite_only=invite_only,
|
|
|
|
)
|
|
|
|
|
2024-07-14 21:17:13 +02:00
|
|
|
channel_users = {
|
|
|
|
*(user_id_mapper.get(username) for username in channel_admins_map[stream_name]),
|
|
|
|
*(user_id_mapper.get(username) for username in channel_members_map[stream_name]),
|
|
|
|
}
|
2019-04-04 13:16:02 +02:00
|
|
|
|
2019-11-04 07:22:20 +01:00
|
|
|
subscriber_handler.set_info(
|
|
|
|
users=channel_users,
|
|
|
|
stream_id=stream_id,
|
|
|
|
)
|
2019-04-04 13:16:02 +02:00
|
|
|
streams.append(stream)
|
|
|
|
return streams
|
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2024-07-04 14:05:48 +02:00
|
|
|
def convert_direct_message_group_data(
|
2024-07-12 02:30:17 +02:00
|
|
|
direct_message_group_data: list[ZerverFieldsT],
|
|
|
|
user_data_map: dict[str, dict[str, Any]],
|
2021-02-12 08:19:30 +01:00
|
|
|
subscriber_handler: SubscriberHandler,
|
2024-07-08 16:46:01 +02:00
|
|
|
direct_message_group_id_mapper: IdMapper[frozenset[str]],
|
2024-07-17 22:45:14 +02:00
|
|
|
user_id_mapper: IdMapper[str],
|
2021-02-12 08:19:30 +01:00
|
|
|
realm_id: int,
|
|
|
|
team_name: str,
|
2024-07-12 02:30:17 +02:00
|
|
|
) -> list[ZerverFieldsT]:
|
2024-07-04 14:05:48 +02:00
|
|
|
zerver_direct_message_group = []
|
|
|
|
for direct_message_group in direct_message_group_data:
|
|
|
|
if len(direct_message_group["members"]) > 2:
|
2024-07-17 23:12:54 +02:00
|
|
|
direct_message_group_members = frozenset(direct_message_group["members"])
|
2024-08-19 23:03:17 +02:00
|
|
|
if direct_message_group_id_mapper.has(direct_message_group_members):
|
|
|
|
logging.info("Duplicate direct message group found in the export data. Skipping.")
|
|
|
|
continue
|
2024-07-08 16:46:01 +02:00
|
|
|
direct_message_group_id = direct_message_group_id_mapper.get(
|
|
|
|
direct_message_group_members
|
|
|
|
)
|
2024-08-19 08:23:37 +02:00
|
|
|
direct_message_group_dict = build_direct_message_group(
|
|
|
|
direct_message_group_id, len(direct_message_group_members)
|
|
|
|
)
|
2024-07-14 21:17:13 +02:00
|
|
|
direct_message_group_user_ids = {
|
|
|
|
user_id_mapper.get(username) for username in direct_message_group["members"]
|
|
|
|
}
|
2019-09-25 15:46:05 +02:00
|
|
|
subscriber_handler.set_info(
|
2024-07-04 14:05:48 +02:00
|
|
|
users=direct_message_group_user_ids,
|
|
|
|
direct_message_group_id=direct_message_group_id,
|
2019-09-25 15:46:05 +02:00
|
|
|
)
|
2024-07-04 14:05:48 +02:00
|
|
|
zerver_direct_message_group.append(direct_message_group_dict)
|
|
|
|
return zerver_direct_message_group
|
2019-09-25 15:46:05 +02:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
|
|
|
def build_reactions(
|
|
|
|
realm_id: int,
|
2024-07-12 02:30:17 +02:00
|
|
|
total_reactions: list[ZerverFieldsT],
|
|
|
|
reactions: list[ZerverFieldsT],
|
2021-02-12 08:19:30 +01:00
|
|
|
message_id: int,
|
2024-07-17 22:45:14 +02:00
|
|
|
user_id_mapper: IdMapper[str],
|
2024-07-12 02:30:17 +02:00
|
|
|
zerver_realmemoji: list[ZerverFieldsT],
|
2021-02-12 08:19:30 +01:00
|
|
|
) -> None:
|
2019-04-04 13:16:02 +02:00
|
|
|
realmemoji = {}
|
|
|
|
for realm_emoji in zerver_realmemoji:
|
2021-02-12 08:20:45 +01:00
|
|
|
realmemoji[realm_emoji["name"]] = realm_emoji["id"]
|
2019-04-04 13:16:02 +02:00
|
|
|
|
2020-10-23 02:43:28 +02:00
|
|
|
# For the Unicode emoji codes, we use equivalent of
|
2023-07-14 14:25:57 +02:00
|
|
|
# function 'get_emoji_data' in 'zerver/lib/emoji' here
|
2019-04-04 13:16:02 +02:00
|
|
|
for mattermost_reaction in reactions:
|
2021-02-12 08:20:45 +01:00
|
|
|
emoji_name = mattermost_reaction["emoji_name"]
|
2019-04-04 13:16:02 +02:00
|
|
|
username = mattermost_reaction["user"]
|
2020-10-23 02:43:28 +02:00
|
|
|
# Check in Unicode emoji
|
2019-04-04 13:16:02 +02:00
|
|
|
if emoji_name in name_to_codepoint:
|
|
|
|
emoji_code = name_to_codepoint[emoji_name]
|
|
|
|
reaction_type = Reaction.UNICODE_EMOJI
|
|
|
|
# Check in realm emoji
|
|
|
|
elif emoji_name in realmemoji:
|
|
|
|
emoji_code = realmemoji[emoji_name]
|
|
|
|
reaction_type = Reaction.REALM_EMOJI
|
|
|
|
else: # nocoverage
|
|
|
|
continue
|
|
|
|
|
|
|
|
if not user_id_mapper.has(username):
|
|
|
|
continue
|
|
|
|
|
2021-02-12 08:20:45 +01:00
|
|
|
reaction_id = NEXT_ID("reaction")
|
2019-04-04 13:16:02 +02:00
|
|
|
reaction = Reaction(
|
|
|
|
id=reaction_id,
|
|
|
|
emoji_code=emoji_code,
|
|
|
|
emoji_name=emoji_name,
|
2021-02-12 08:19:30 +01:00
|
|
|
reaction_type=reaction_type,
|
|
|
|
)
|
2019-04-04 13:16:02 +02:00
|
|
|
|
2021-02-12 08:20:45 +01:00
|
|
|
reaction_dict = model_to_dict(reaction, exclude=["message", "user_profile"])
|
|
|
|
reaction_dict["message"] = message_id
|
|
|
|
reaction_dict["user_profile"] = user_id_mapper.get(username)
|
2019-04-04 13:16:02 +02:00
|
|
|
total_reactions.append(reaction_dict)
|
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2024-07-17 22:45:14 +02:00
|
|
|
def get_mentioned_user_ids(raw_message: dict[str, Any], user_id_mapper: IdMapper[str]) -> set[int]:
|
2019-04-04 13:16:02 +02:00
|
|
|
user_ids = set()
|
|
|
|
content = raw_message["content"]
|
|
|
|
|
|
|
|
# usernames can be of the form user.name, user_name, username., username_, user.name_ etc
|
2024-04-26 20:30:22 +02:00
|
|
|
matches = re.findall(r"(?<=^|(?<=[^a-zA-Z0-9-_.]))@(([A-Za-z0-9]+[_.]?)+)", content)
|
2019-04-04 13:16:02 +02:00
|
|
|
|
|
|
|
for match in matches:
|
|
|
|
possible_username = match[0]
|
|
|
|
if user_id_mapper.has(possible_username):
|
|
|
|
user_ids.add(user_id_mapper.get(possible_username))
|
|
|
|
return user_ids
|
|
|
|
|
|
|
|
|
2021-06-28 12:59:16 +02:00
|
|
|
def process_message_attachments(
|
2024-07-12 02:30:17 +02:00
|
|
|
attachments: list[dict[str, Any]],
|
2021-06-28 12:59:16 +02:00
|
|
|
realm_id: int,
|
|
|
|
message_id: int,
|
|
|
|
user_id: int,
|
|
|
|
user_handler: UserHandler,
|
2024-07-12 02:30:17 +02:00
|
|
|
zerver_attachment: list[ZerverFieldsT],
|
|
|
|
uploads_list: list[ZerverFieldsT],
|
2021-06-28 12:59:16 +02:00
|
|
|
mattermost_data_dir: str,
|
|
|
|
output_dir: str,
|
2024-07-12 02:30:17 +02:00
|
|
|
) -> tuple[str, bool]:
|
2021-06-28 12:59:16 +02:00
|
|
|
has_image = False
|
|
|
|
|
|
|
|
markdown_links = []
|
|
|
|
|
|
|
|
for attachment in attachments:
|
|
|
|
attachment_path = attachment["path"]
|
|
|
|
attachment_full_path = os.path.join(mattermost_data_dir, "data", attachment_path)
|
|
|
|
|
|
|
|
file_name = attachment_path.split("/")[-1]
|
2021-07-01 22:53:01 +02:00
|
|
|
file_ext = f'.{file_name.split(".")[-1]}'
|
2021-06-28 12:59:16 +02:00
|
|
|
|
2021-07-01 22:53:01 +02:00
|
|
|
if file_ext.lower() in IMAGE_EXTENSIONS:
|
2021-06-28 12:59:16 +02:00
|
|
|
has_image = True
|
|
|
|
|
|
|
|
s3_path = "/".join(
|
|
|
|
[
|
|
|
|
str(realm_id),
|
|
|
|
format(random.randint(0, 255), "x"),
|
|
|
|
secrets.token_urlsafe(18),
|
|
|
|
sanitize_name(file_name),
|
|
|
|
]
|
|
|
|
)
|
|
|
|
content_for_link = f"[{file_name}](/user_uploads/{s3_path})"
|
|
|
|
|
|
|
|
markdown_links.append(content_for_link)
|
|
|
|
|
|
|
|
fileinfo = {
|
|
|
|
"name": file_name,
|
|
|
|
"size": os.path.getsize(attachment_full_path),
|
|
|
|
"created": os.path.getmtime(attachment_full_path),
|
|
|
|
}
|
|
|
|
|
|
|
|
upload = dict(
|
|
|
|
path=s3_path,
|
|
|
|
realm_id=realm_id,
|
|
|
|
content_type=None,
|
|
|
|
user_profile_id=user_id,
|
|
|
|
last_modified=fileinfo["created"],
|
|
|
|
user_profile_email=user_handler.get_user(user_id=user_id)["email"],
|
|
|
|
s3_path=s3_path,
|
|
|
|
size=fileinfo["size"],
|
|
|
|
)
|
|
|
|
uploads_list.append(upload)
|
|
|
|
|
|
|
|
build_attachment(
|
|
|
|
realm_id=realm_id,
|
|
|
|
message_ids={message_id},
|
|
|
|
user_id=user_id,
|
|
|
|
fileinfo=fileinfo,
|
|
|
|
s3_path=s3_path,
|
|
|
|
zerver_attachment=zerver_attachment,
|
|
|
|
)
|
|
|
|
|
|
|
|
# Copy the attachment file to output_dir
|
|
|
|
attachment_out_path = os.path.join(output_dir, "uploads", s3_path)
|
|
|
|
os.makedirs(os.path.dirname(attachment_out_path), exist_ok=True)
|
|
|
|
shutil.copyfile(attachment_full_path, attachment_out_path)
|
|
|
|
|
|
|
|
content = "\n".join(markdown_links)
|
|
|
|
|
|
|
|
return content, has_image
|
|
|
|
|
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
def process_raw_message_batch(
|
|
|
|
realm_id: int,
|
2024-07-12 02:30:17 +02:00
|
|
|
raw_messages: list[dict[str, Any]],
|
|
|
|
subscriber_map: dict[int, set[int]],
|
2024-07-17 22:45:14 +02:00
|
|
|
user_id_mapper: IdMapper[str],
|
2021-02-12 08:19:30 +01:00
|
|
|
user_handler: UserHandler,
|
2024-07-17 23:01:58 +02:00
|
|
|
get_recipient_id_from_channel_name: Callable[[str], int],
|
2024-07-17 23:12:54 +02:00
|
|
|
get_recipient_id_from_direct_message_group_members: Callable[[frozenset[str]], int],
|
2024-07-17 23:01:58 +02:00
|
|
|
get_recipient_id_from_username: Callable[[str], int],
|
2021-02-12 08:19:30 +01:00
|
|
|
is_pm_data: bool,
|
|
|
|
output_dir: str,
|
2024-07-12 02:30:17 +02:00
|
|
|
zerver_realmemoji: list[dict[str, Any]],
|
|
|
|
total_reactions: list[dict[str, Any]],
|
|
|
|
uploads_list: list[ZerverFieldsT],
|
|
|
|
zerver_attachment: list[ZerverFieldsT],
|
2021-06-28 12:59:16 +02:00
|
|
|
mattermost_data_dir: str,
|
2021-02-12 08:19:30 +01:00
|
|
|
) -> None:
|
2024-07-12 02:30:17 +02:00
|
|
|
def fix_mentions(content: str, mention_user_ids: set[int]) -> str:
|
2019-04-04 13:16:02 +02:00
|
|
|
for user_id in mention_user_ids:
|
|
|
|
user = user_handler.get_user(user_id=user_id)
|
2021-02-12 08:20:45 +01:00
|
|
|
mattermost_mention = "@{short_name}".format(**user)
|
|
|
|
zulip_mention = "@**{full_name}**".format(**user)
|
2019-04-04 13:16:02 +02:00
|
|
|
content = content.replace(mattermost_mention, zulip_mention)
|
|
|
|
|
2021-02-12 08:20:45 +01:00
|
|
|
content = content.replace("@channel", "@**all**")
|
|
|
|
content = content.replace("@all", "@**all**")
|
2019-04-04 13:16:02 +02:00
|
|
|
# We don't have an equivalent for Mattermost's @here mention which mentions all users
|
|
|
|
# online in the channel.
|
2021-02-12 08:20:45 +01:00
|
|
|
content = content.replace("@here", "@**all**")
|
2019-04-04 13:16:02 +02:00
|
|
|
return content
|
|
|
|
|
2024-07-12 02:30:17 +02:00
|
|
|
mention_map: dict[int, set[int]] = {}
|
2019-04-04 13:16:02 +02:00
|
|
|
zerver_message = []
|
|
|
|
|
2019-09-25 15:46:05 +02:00
|
|
|
pm_members = {}
|
2019-04-04 13:16:02 +02:00
|
|
|
|
|
|
|
for raw_message in raw_messages:
|
2021-02-12 08:20:45 +01:00
|
|
|
message_id = NEXT_ID("message")
|
2019-04-04 13:16:02 +02:00
|
|
|
mention_user_ids = get_mentioned_user_ids(raw_message, user_id_mapper)
|
|
|
|
mention_map[message_id] = mention_user_ids
|
|
|
|
|
|
|
|
content = fix_mentions(
|
2021-02-12 08:20:45 +01:00
|
|
|
content=raw_message["content"],
|
2019-04-04 13:16:02 +02:00
|
|
|
mention_user_ids=mention_user_ids,
|
|
|
|
)
|
2022-06-26 01:14:48 +02:00
|
|
|
|
|
|
|
# html2text is GPL licensed, so run it as a subprocess.
|
2024-10-30 00:40:20 +01:00
|
|
|
content = subprocess.check_output(["html2text", "--unicode-snob"], input=content, text=True)
|
2019-04-04 13:16:02 +02:00
|
|
|
|
2021-02-12 08:20:45 +01:00
|
|
|
date_sent = raw_message["date_sent"]
|
|
|
|
sender_user_id = raw_message["sender_id"]
|
2019-09-25 15:46:05 +02:00
|
|
|
if "channel_name" in raw_message:
|
2024-07-17 23:01:58 +02:00
|
|
|
recipient_id = get_recipient_id_from_channel_name(raw_message["channel_name"])
|
2024-07-17 23:12:54 +02:00
|
|
|
elif "direct_message_group_members" in raw_message:
|
|
|
|
recipient_id = get_recipient_id_from_direct_message_group_members(
|
|
|
|
raw_message["direct_message_group_members"]
|
2021-02-12 08:19:30 +01:00
|
|
|
)
|
2019-09-25 15:46:05 +02:00
|
|
|
elif "pm_members" in raw_message:
|
|
|
|
members = raw_message["pm_members"]
|
|
|
|
member_ids = {user_id_mapper.get(member) for member in members}
|
|
|
|
pm_members[message_id] = member_ids
|
|
|
|
if sender_user_id == user_id_mapper.get(members[0]):
|
2024-07-17 23:01:58 +02:00
|
|
|
recipient_id = get_recipient_id_from_username(members[1])
|
2019-09-25 15:46:05 +02:00
|
|
|
else:
|
2024-07-17 23:01:58 +02:00
|
|
|
recipient_id = get_recipient_id_from_username(members[0])
|
2019-09-25 15:46:05 +02:00
|
|
|
else:
|
2024-07-17 23:12:54 +02:00
|
|
|
raise AssertionError(
|
|
|
|
"raw_message without channel_name, direct_message_group_members or pm_members key"
|
|
|
|
)
|
2019-09-25 15:46:05 +02:00
|
|
|
|
2019-04-04 13:16:02 +02:00
|
|
|
rendered_content = None
|
|
|
|
|
2021-06-28 12:59:16 +02:00
|
|
|
has_attachment = False
|
|
|
|
has_image = False
|
|
|
|
has_link = False
|
|
|
|
if "attachments" in raw_message:
|
|
|
|
has_attachment = True
|
|
|
|
has_link = True
|
|
|
|
|
|
|
|
attachment_markdown, has_image = process_message_attachments(
|
|
|
|
attachments=raw_message["attachments"],
|
|
|
|
realm_id=realm_id,
|
|
|
|
message_id=message_id,
|
|
|
|
user_id=sender_user_id,
|
|
|
|
user_handler=user_handler,
|
|
|
|
zerver_attachment=zerver_attachment,
|
|
|
|
uploads_list=uploads_list,
|
|
|
|
mattermost_data_dir=mattermost_data_dir,
|
|
|
|
output_dir=output_dir,
|
|
|
|
)
|
|
|
|
|
|
|
|
content += attachment_markdown
|
|
|
|
|
2021-02-12 08:20:45 +01:00
|
|
|
topic_name = "imported from mattermost"
|
2019-04-04 13:16:02 +02:00
|
|
|
|
|
|
|
message = build_message(
|
|
|
|
content=content,
|
|
|
|
message_id=message_id,
|
2019-08-28 02:43:19 +02:00
|
|
|
date_sent=date_sent,
|
2019-04-04 13:16:02 +02:00
|
|
|
recipient_id=recipient_id,
|
2022-09-27 21:42:31 +02:00
|
|
|
realm_id=realm_id,
|
2019-04-04 13:16:02 +02:00
|
|
|
rendered_content=rendered_content,
|
|
|
|
topic_name=topic_name,
|
2019-09-25 16:36:47 +02:00
|
|
|
user_id=sender_user_id,
|
2021-06-28 12:59:16 +02:00
|
|
|
has_image=has_image,
|
|
|
|
has_link=has_link,
|
|
|
|
has_attachment=has_attachment,
|
2019-04-04 13:16:02 +02:00
|
|
|
)
|
|
|
|
zerver_message.append(message)
|
2021-02-12 08:19:30 +01:00
|
|
|
build_reactions(
|
|
|
|
realm_id,
|
|
|
|
total_reactions,
|
|
|
|
raw_message["reactions"],
|
|
|
|
message_id,
|
|
|
|
user_id_mapper,
|
|
|
|
zerver_realmemoji,
|
|
|
|
)
|
2019-04-04 13:16:02 +02:00
|
|
|
|
|
|
|
zerver_usermessage = make_user_messages(
|
|
|
|
zerver_message=zerver_message,
|
|
|
|
subscriber_map=subscriber_map,
|
|
|
|
is_pm_data=is_pm_data,
|
|
|
|
mention_map=mention_map,
|
|
|
|
)
|
|
|
|
|
|
|
|
message_json = dict(
|
|
|
|
zerver_message=zerver_message,
|
|
|
|
zerver_usermessage=zerver_usermessage,
|
|
|
|
)
|
|
|
|
|
2021-02-12 08:20:45 +01:00
|
|
|
dump_file_id = NEXT_ID("dump_file_id" + str(realm_id))
|
2020-06-13 08:59:37 +02:00
|
|
|
message_file = f"/messages-{dump_file_id:06}.json"
|
2019-04-04 13:16:02 +02:00
|
|
|
create_converted_data_files(message_json, output_dir, message_file)
|
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
|
|
|
def process_posts(
|
|
|
|
num_teams: int,
|
|
|
|
team_name: str,
|
|
|
|
realm_id: int,
|
2024-07-12 02:30:17 +02:00
|
|
|
post_data: list[dict[str, Any]],
|
2024-07-17 23:01:58 +02:00
|
|
|
get_recipient_id_from_channel_name: Callable[[str], int],
|
2024-07-17 23:12:54 +02:00
|
|
|
get_recipient_id_from_direct_message_group_members: Callable[[frozenset[str]], int],
|
2024-07-17 23:01:58 +02:00
|
|
|
get_recipient_id_from_username: Callable[[str], int],
|
2024-07-12 02:30:17 +02:00
|
|
|
subscriber_map: dict[int, set[int]],
|
2021-02-12 08:19:30 +01:00
|
|
|
output_dir: str,
|
|
|
|
is_pm_data: bool,
|
|
|
|
masking_content: bool,
|
2024-07-17 22:45:14 +02:00
|
|
|
user_id_mapper: IdMapper[str],
|
2021-02-12 08:19:30 +01:00
|
|
|
user_handler: UserHandler,
|
2024-07-12 02:30:17 +02:00
|
|
|
zerver_realmemoji: list[dict[str, Any]],
|
|
|
|
total_reactions: list[dict[str, Any]],
|
|
|
|
uploads_list: list[ZerverFieldsT],
|
|
|
|
zerver_attachment: list[ZerverFieldsT],
|
2021-06-28 12:59:16 +02:00
|
|
|
mattermost_data_dir: str,
|
2021-02-12 08:19:30 +01:00
|
|
|
) -> None:
|
2019-08-31 12:46:01 +02:00
|
|
|
post_data_list = []
|
|
|
|
for post in post_data:
|
|
|
|
if "team" not in post:
|
2023-06-19 16:29:50 +02:00
|
|
|
# Mattermost doesn't specify a team for direct messages
|
2019-08-31 12:46:01 +02:00
|
|
|
# in its export format. This line of code requires that
|
|
|
|
# we only be importing data from a single team (checked
|
|
|
|
# elsewhere) -- we just assume it's the target team.
|
|
|
|
post_team = team_name
|
|
|
|
else:
|
|
|
|
post_team = post["team"]
|
|
|
|
if post_team == team_name:
|
|
|
|
post_data_list.append(post)
|
2019-04-04 13:16:02 +02:00
|
|
|
|
2024-07-12 02:30:17 +02:00
|
|
|
def message_to_dict(post_dict: dict[str, Any]) -> dict[str, Any]:
|
2019-09-25 15:46:05 +02:00
|
|
|
sender_username = post_dict["user"]
|
|
|
|
sender_id = user_id_mapper.get(sender_username)
|
2021-02-12 08:20:45 +01:00
|
|
|
content = post_dict["message"]
|
2019-04-04 13:16:02 +02:00
|
|
|
|
|
|
|
if masking_content:
|
2024-04-26 20:30:22 +02:00
|
|
|
content = re.sub(r"[a-z]", "x", content)
|
|
|
|
content = re.sub(r"[A-Z]", "X", content)
|
2019-04-04 13:16:02 +02:00
|
|
|
|
|
|
|
if "reactions" in post_dict:
|
|
|
|
reactions = post_dict["reactions"] or []
|
|
|
|
else:
|
|
|
|
reactions = []
|
|
|
|
|
2019-09-25 15:46:05 +02:00
|
|
|
message_dict = dict(
|
2019-04-04 13:16:02 +02:00
|
|
|
sender_id=sender_id,
|
|
|
|
content=content,
|
2021-02-12 08:20:45 +01:00
|
|
|
date_sent=int(post_dict["create_at"] / 1000),
|
python: Use trailing commas consistently.
Automatically generated by the following script, based on the output
of lint with flake8-comma:
import re
import sys
last_filename = None
last_row = None
lines = []
for msg in sys.stdin:
m = re.match(
r"\x1b\[35mflake8 \|\x1b\[0m \x1b\[1;31m(.+):(\d+):(\d+): (\w+)", msg
)
if m:
filename, row_str, col_str, err = m.groups()
row, col = int(row_str), int(col_str)
if filename == last_filename:
assert last_row != row
else:
if last_filename is not None:
with open(last_filename, "w") as f:
f.writelines(lines)
with open(filename) as f:
lines = f.readlines()
last_filename = filename
last_row = row
line = lines[row - 1]
if err in ["C812", "C815"]:
lines[row - 1] = line[: col - 1] + "," + line[col - 1 :]
elif err in ["C819"]:
assert line[col - 2] == ","
lines[row - 1] = line[: col - 2] + line[col - 1 :].lstrip(" ")
if last_filename is not None:
with open(last_filename, "w") as f:
f.writelines(lines)
Signed-off-by: Anders Kaseorg <anders@zulipchat.com>
2020-04-10 05:23:40 +02:00
|
|
|
reactions=reactions,
|
2019-04-04 13:16:02 +02:00
|
|
|
)
|
2019-09-25 15:46:05 +02:00
|
|
|
if "channel" in post_dict:
|
|
|
|
message_dict["channel_name"] = post_dict["channel"]
|
|
|
|
elif "channel_members" in post_dict:
|
2024-07-04 14:05:48 +02:00
|
|
|
# This case is for handling posts from direct messages and direct message,
|
|
|
|
# groups not channels. Direct messages and direct message groups are known
|
|
|
|
# as direct_channels in Slack and hence the name channel_members.
|
2019-09-25 15:46:05 +02:00
|
|
|
channel_members = post_dict["channel_members"]
|
|
|
|
if len(channel_members) > 2:
|
2024-07-17 23:12:54 +02:00
|
|
|
message_dict["direct_message_group_members"] = frozenset(channel_members)
|
2019-09-25 15:46:05 +02:00
|
|
|
elif len(channel_members) == 2:
|
|
|
|
message_dict["pm_members"] = channel_members
|
|
|
|
else:
|
|
|
|
raise AssertionError("Post without channel or channel_members key.")
|
2021-06-28 12:59:16 +02:00
|
|
|
|
|
|
|
if post_dict.get("attachments"):
|
|
|
|
message_dict["attachments"] = post_dict["attachments"]
|
|
|
|
|
2019-09-25 15:46:05 +02:00
|
|
|
return message_dict
|
2019-04-04 13:16:02 +02:00
|
|
|
|
|
|
|
raw_messages = []
|
|
|
|
for post_dict in post_data_list:
|
|
|
|
raw_messages.append(message_to_dict(post_dict))
|
|
|
|
message_replies = post_dict["replies"]
|
|
|
|
# Replies to a message in Mattermost are stored in the main message object.
|
|
|
|
# For now, we just append the replies immediately after the original message.
|
|
|
|
if message_replies is not None:
|
|
|
|
for reply in message_replies:
|
2021-02-12 08:20:45 +01:00
|
|
|
if "channel" in post_dict:
|
2019-11-04 07:23:19 +01:00
|
|
|
reply["channel"] = post_dict["channel"]
|
|
|
|
else: # nocoverage
|
|
|
|
reply["channel_members"] = post_dict["channel_members"]
|
2019-04-04 13:16:02 +02:00
|
|
|
raw_messages.append(message_to_dict(reply))
|
|
|
|
|
2024-07-12 02:30:17 +02:00
|
|
|
def process_batch(lst: list[dict[str, Any]]) -> None:
|
2019-04-04 13:16:02 +02:00
|
|
|
process_raw_message_batch(
|
|
|
|
realm_id=realm_id,
|
|
|
|
raw_messages=lst,
|
|
|
|
subscriber_map=subscriber_map,
|
|
|
|
user_id_mapper=user_id_mapper,
|
|
|
|
user_handler=user_handler,
|
2024-07-17 23:01:58 +02:00
|
|
|
get_recipient_id_from_channel_name=get_recipient_id_from_channel_name,
|
2024-07-17 23:12:54 +02:00
|
|
|
get_recipient_id_from_direct_message_group_members=get_recipient_id_from_direct_message_group_members,
|
2024-07-17 23:01:58 +02:00
|
|
|
get_recipient_id_from_username=get_recipient_id_from_username,
|
2019-04-04 13:16:02 +02:00
|
|
|
is_pm_data=is_pm_data,
|
|
|
|
output_dir=output_dir,
|
|
|
|
zerver_realmemoji=zerver_realmemoji,
|
|
|
|
total_reactions=total_reactions,
|
2021-06-28 12:59:16 +02:00
|
|
|
uploads_list=uploads_list,
|
|
|
|
zerver_attachment=zerver_attachment,
|
|
|
|
mattermost_data_dir=mattermost_data_dir,
|
2019-04-04 13:16:02 +02:00
|
|
|
)
|
|
|
|
|
|
|
|
chunk_size = 1000
|
|
|
|
|
|
|
|
process_list_in_batches(
|
|
|
|
lst=raw_messages,
|
|
|
|
chunk_size=chunk_size,
|
|
|
|
process_batch=process_batch,
|
|
|
|
)
|
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
|
|
|
def write_message_data(
|
|
|
|
num_teams: int,
|
|
|
|
team_name: str,
|
|
|
|
realm_id: int,
|
2024-07-12 02:30:17 +02:00
|
|
|
post_data: dict[str, list[dict[str, Any]]],
|
|
|
|
zerver_recipient: list[ZerverFieldsT],
|
|
|
|
subscriber_map: dict[int, set[int]],
|
2021-02-12 08:19:30 +01:00
|
|
|
output_dir: str,
|
|
|
|
masking_content: bool,
|
2024-07-17 22:45:14 +02:00
|
|
|
stream_id_mapper: IdMapper[str],
|
2024-07-08 16:46:01 +02:00
|
|
|
direct_message_group_id_mapper: IdMapper[frozenset[str]],
|
2024-07-17 22:45:14 +02:00
|
|
|
user_id_mapper: IdMapper[str],
|
2021-02-12 08:19:30 +01:00
|
|
|
user_handler: UserHandler,
|
2024-07-12 02:30:17 +02:00
|
|
|
zerver_realmemoji: list[dict[str, Any]],
|
|
|
|
total_reactions: list[dict[str, Any]],
|
|
|
|
uploads_list: list[ZerverFieldsT],
|
|
|
|
zerver_attachment: list[ZerverFieldsT],
|
2021-06-28 12:59:16 +02:00
|
|
|
mattermost_data_dir: str,
|
2021-02-12 08:19:30 +01:00
|
|
|
) -> None:
|
2019-09-25 16:35:54 +02:00
|
|
|
stream_id_to_recipient_id = {}
|
2024-07-08 16:46:01 +02:00
|
|
|
direct_message_group_id_to_recipient_id = {}
|
2019-09-25 15:46:05 +02:00
|
|
|
user_id_to_recipient_id = {}
|
2019-04-04 13:16:02 +02:00
|
|
|
|
2019-09-25 16:35:54 +02:00
|
|
|
for d in zerver_recipient:
|
2021-02-12 08:20:45 +01:00
|
|
|
if d["type"] == Recipient.STREAM:
|
|
|
|
stream_id_to_recipient_id[d["type_id"]] = d["id"]
|
2024-03-22 00:39:33 +01:00
|
|
|
elif d["type"] == Recipient.DIRECT_MESSAGE_GROUP:
|
2024-07-08 16:46:01 +02:00
|
|
|
direct_message_group_id_to_recipient_id[d["type_id"]] = d["id"]
|
2021-02-12 08:20:45 +01:00
|
|
|
if d["type"] == Recipient.PERSONAL:
|
|
|
|
user_id_to_recipient_id[d["type_id"]] = d["id"]
|
2019-04-04 13:16:02 +02:00
|
|
|
|
2024-07-17 23:01:58 +02:00
|
|
|
def get_recipient_id_from_channel_name(channel_name: str) -> int:
|
|
|
|
receiver_id = stream_id_mapper.get(channel_name)
|
|
|
|
return stream_id_to_recipient_id[receiver_id]
|
|
|
|
|
2024-07-17 23:12:54 +02:00
|
|
|
def get_recipient_id_from_direct_message_group_members(
|
|
|
|
direct_message_group_members: frozenset[str],
|
|
|
|
) -> int:
|
2024-07-08 16:46:01 +02:00
|
|
|
receiver_id = direct_message_group_id_mapper.get(direct_message_group_members)
|
|
|
|
return direct_message_group_id_to_recipient_id[receiver_id]
|
2024-07-17 23:01:58 +02:00
|
|
|
|
|
|
|
def get_recipient_id_from_username(username: str) -> int:
|
|
|
|
receiver_id = user_id_mapper.get(username)
|
|
|
|
return user_id_to_recipient_id[receiver_id]
|
2019-04-04 13:16:02 +02:00
|
|
|
|
2019-09-25 15:46:05 +02:00
|
|
|
if num_teams == 1:
|
|
|
|
post_types = ["channel_post", "direct_post"]
|
|
|
|
else:
|
|
|
|
post_types = ["channel_post"]
|
2021-02-12 08:19:30 +01:00
|
|
|
logging.warning(
|
2024-07-04 14:05:48 +02:00
|
|
|
"Skipping importing direct message groups and DMs since there are multiple teams in the export"
|
2021-02-12 08:19:30 +01:00
|
|
|
)
|
2019-09-25 15:46:05 +02:00
|
|
|
|
|
|
|
for post_type in post_types:
|
|
|
|
process_posts(
|
|
|
|
num_teams=num_teams,
|
|
|
|
team_name=team_name,
|
|
|
|
realm_id=realm_id,
|
|
|
|
post_data=post_data[post_type],
|
2024-07-17 23:01:58 +02:00
|
|
|
get_recipient_id_from_channel_name=get_recipient_id_from_channel_name,
|
2024-07-17 23:12:54 +02:00
|
|
|
get_recipient_id_from_direct_message_group_members=get_recipient_id_from_direct_message_group_members,
|
2024-07-17 23:01:58 +02:00
|
|
|
get_recipient_id_from_username=get_recipient_id_from_username,
|
2019-09-25 15:46:05 +02:00
|
|
|
subscriber_map=subscriber_map,
|
|
|
|
output_dir=output_dir,
|
|
|
|
is_pm_data=post_type == "direct_post",
|
|
|
|
masking_content=masking_content,
|
|
|
|
user_id_mapper=user_id_mapper,
|
|
|
|
user_handler=user_handler,
|
|
|
|
zerver_realmemoji=zerver_realmemoji,
|
|
|
|
total_reactions=total_reactions,
|
2021-06-28 12:59:16 +02:00
|
|
|
uploads_list=uploads_list,
|
|
|
|
zerver_attachment=zerver_attachment,
|
|
|
|
mattermost_data_dir=mattermost_data_dir,
|
2019-09-25 15:46:05 +02:00
|
|
|
)
|
2019-04-04 13:16:02 +02:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
|
|
|
def write_emoticon_data(
|
2024-07-12 02:30:17 +02:00
|
|
|
realm_id: int, custom_emoji_data: list[dict[str, Any]], data_dir: str, output_dir: str
|
|
|
|
) -> list[ZerverFieldsT]:
|
2021-02-12 08:19:30 +01:00
|
|
|
"""
|
2019-04-04 13:16:02 +02:00
|
|
|
This function does most of the work for processing emoticons, the bulk
|
|
|
|
of which is copying files. We also write a json file with metadata.
|
|
|
|
Finally, we return a list of RealmEmoji dicts to our caller.
|
|
|
|
|
|
|
|
In our data_dir we have a pretty simple setup:
|
|
|
|
|
|
|
|
The exported JSON file will have emoji rows if it contains any custom emoji
|
|
|
|
{
|
|
|
|
"type": "emoji",
|
|
|
|
"emoji": {"name": "peerdium", "image": "exported_emoji/h15ni7kf1bnj7jeua4qhmctsdo/image"}
|
|
|
|
}
|
|
|
|
{
|
|
|
|
"type": "emoji",
|
|
|
|
"emoji": {"name": "tick", "image": "exported_emoji/7u7x8ytgp78q8jir81o9ejwwnr/image"}
|
|
|
|
}
|
|
|
|
|
|
|
|
exported_emoji/ - contains a bunch of image files:
|
|
|
|
exported_emoji/7u7x8ytgp78q8jir81o9ejwwnr/image
|
|
|
|
exported_emoji/h15ni7kf1bnj7jeua4qhmctsdo/image
|
|
|
|
|
|
|
|
We move all the relevant files to Zulip's more nested
|
|
|
|
directory structure.
|
2021-02-12 08:19:30 +01:00
|
|
|
"""
|
2019-04-04 13:16:02 +02:00
|
|
|
|
2021-02-12 08:20:45 +01:00
|
|
|
logging.info("Starting to process emoticons")
|
2019-04-04 13:16:02 +02:00
|
|
|
|
|
|
|
flat_data = [
|
|
|
|
dict(
|
2021-02-12 08:20:45 +01:00
|
|
|
path=d["image"],
|
|
|
|
name=d["name"],
|
2019-04-04 13:16:02 +02:00
|
|
|
)
|
|
|
|
for d in custom_emoji_data
|
|
|
|
]
|
|
|
|
|
2021-02-12 08:20:45 +01:00
|
|
|
emoji_folder = os.path.join(output_dir, "emoji")
|
2019-04-04 13:16:02 +02:00
|
|
|
os.makedirs(emoji_folder, exist_ok=True)
|
|
|
|
|
|
|
|
def process(data: ZerverFieldsT) -> ZerverFieldsT:
|
2021-02-12 08:20:45 +01:00
|
|
|
source_sub_path = data["path"]
|
2019-04-04 13:16:02 +02:00
|
|
|
source_path = os.path.join(data_dir, source_sub_path)
|
|
|
|
|
|
|
|
target_fn = data["name"]
|
|
|
|
target_sub_path = RealmEmoji.PATH_ID_TEMPLATE.format(
|
|
|
|
realm_id=realm_id,
|
|
|
|
emoji_file_name=target_fn,
|
|
|
|
)
|
|
|
|
target_path = os.path.join(emoji_folder, target_sub_path)
|
|
|
|
|
|
|
|
os.makedirs(os.path.dirname(target_path), exist_ok=True)
|
|
|
|
|
|
|
|
source_path = os.path.abspath(source_path)
|
|
|
|
target_path = os.path.abspath(target_path)
|
|
|
|
|
|
|
|
shutil.copyfile(source_path, target_path)
|
|
|
|
|
|
|
|
return dict(
|
|
|
|
path=target_path,
|
|
|
|
s3_path=target_path,
|
|
|
|
file_name=target_fn,
|
|
|
|
realm_id=realm_id,
|
2021-02-12 08:20:45 +01:00
|
|
|
name=data["name"],
|
2019-04-04 13:16:02 +02:00
|
|
|
)
|
|
|
|
|
|
|
|
emoji_records = list(map(process, flat_data))
|
2021-02-12 08:20:45 +01:00
|
|
|
create_converted_data_files(emoji_records, output_dir, "/emoji/records.json")
|
2019-04-04 13:16:02 +02:00
|
|
|
|
|
|
|
realmemoji = [
|
|
|
|
build_realm_emoji(
|
|
|
|
realm_id=realm_id,
|
2021-02-12 08:20:45 +01:00
|
|
|
name=rec["name"],
|
|
|
|
id=NEXT_ID("realmemoji"),
|
|
|
|
file_name=rec["file_name"],
|
2019-04-04 13:16:02 +02:00
|
|
|
)
|
|
|
|
for rec in emoji_records
|
|
|
|
]
|
2021-02-12 08:20:45 +01:00
|
|
|
logging.info("Done processing emoticons")
|
2019-04-04 13:16:02 +02:00
|
|
|
|
|
|
|
return realmemoji
|
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
|
|
|
def create_username_to_user_mapping(
|
2024-07-12 02:30:17 +02:00
|
|
|
user_data_list: list[dict[str, Any]],
|
|
|
|
) -> dict[str, dict[str, Any]]:
|
2019-04-04 13:16:02 +02:00
|
|
|
username_to_user = {}
|
|
|
|
for user in user_data_list:
|
|
|
|
username_to_user[user["username"]] = user
|
|
|
|
return username_to_user
|
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2024-07-12 02:30:17 +02:00
|
|
|
def check_user_in_team(user: dict[str, Any], team_name: str) -> bool:
|
2019-08-20 01:06:39 +02:00
|
|
|
if user["teams"] is None:
|
|
|
|
# This is null for users not on any team
|
|
|
|
return False
|
2023-01-05 07:25:17 +01:00
|
|
|
return any(team["name"] == team_name for team in user["teams"])
|
2019-04-04 13:16:02 +02:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
|
|
|
def label_mirror_dummy_users(
|
|
|
|
num_teams: int,
|
|
|
|
team_name: str,
|
2024-07-12 02:30:17 +02:00
|
|
|
mattermost_data: dict[str, Any],
|
|
|
|
username_to_user: dict[str, dict[str, Any]],
|
2021-02-12 08:19:30 +01:00
|
|
|
) -> None:
|
2019-04-04 13:16:02 +02:00
|
|
|
# This function might looks like a great place to label admin users. But
|
|
|
|
# that won't be fully correct since we are iterating only though posts and
|
2020-03-28 01:25:56 +01:00
|
|
|
# it covers only users that have sent at least one message.
|
2019-09-25 15:46:05 +02:00
|
|
|
for post in mattermost_data["post"]["channel_post"]:
|
|
|
|
post_team = post["team"]
|
2019-08-31 12:46:01 +02:00
|
|
|
if post_team == team_name:
|
2019-04-04 13:16:02 +02:00
|
|
|
user = username_to_user[post["user"]]
|
|
|
|
if not check_user_in_team(user, team_name):
|
|
|
|
user["is_mirror_dummy"] = True
|
|
|
|
|
2019-09-25 15:46:05 +02:00
|
|
|
if num_teams == 1:
|
|
|
|
for post in mattermost_data["post"]["direct_post"]:
|
2021-02-12 08:19:30 +01:00
|
|
|
assert "team" not in post
|
2019-09-25 15:46:05 +02:00
|
|
|
user = username_to_user[post["user"]]
|
|
|
|
if not check_user_in_team(user, team_name):
|
|
|
|
user["is_mirror_dummy"] = True
|
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2024-07-12 02:30:17 +02:00
|
|
|
def reset_mirror_dummy_users(username_to_user: dict[str, dict[str, Any]]) -> None:
|
2019-04-04 13:16:02 +02:00
|
|
|
for username in username_to_user:
|
|
|
|
user = username_to_user[username]
|
|
|
|
user["is_mirror_dummy"] = False
|
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2024-07-12 02:30:17 +02:00
|
|
|
def mattermost_data_file_to_dict(mattermost_data_file: str) -> dict[str, Any]:
|
|
|
|
mattermost_data: dict[str, Any] = {}
|
2019-04-04 13:16:02 +02:00
|
|
|
mattermost_data["version"] = []
|
|
|
|
mattermost_data["team"] = []
|
|
|
|
mattermost_data["channel"] = []
|
|
|
|
mattermost_data["user"] = []
|
2019-09-25 15:46:05 +02:00
|
|
|
mattermost_data["post"] = {"channel_post": [], "direct_post": []}
|
2019-04-04 13:16:02 +02:00
|
|
|
mattermost_data["emoji"] = []
|
2019-09-25 15:46:05 +02:00
|
|
|
mattermost_data["direct_channel"] = []
|
2019-04-04 13:16:02 +02:00
|
|
|
|
2020-08-07 01:09:47 +02:00
|
|
|
with open(mattermost_data_file, "rb") as fp:
|
2019-04-04 13:16:02 +02:00
|
|
|
for line in fp:
|
2020-08-07 01:09:47 +02:00
|
|
|
row = orjson.loads(line)
|
2019-04-04 13:16:02 +02:00
|
|
|
data_type = row["type"]
|
2019-09-25 15:46:05 +02:00
|
|
|
if data_type == "post":
|
|
|
|
mattermost_data["post"]["channel_post"].append(row["post"])
|
|
|
|
elif data_type == "direct_post":
|
|
|
|
mattermost_data["post"]["direct_post"].append(row["direct_post"])
|
|
|
|
else:
|
|
|
|
mattermost_data[data_type].append(row[data_type])
|
2019-04-04 13:16:02 +02:00
|
|
|
return mattermost_data
|
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2019-04-04 13:16:02 +02:00
|
|
|
def do_convert_data(mattermost_data_dir: str, output_dir: str, masking_content: bool) -> None:
|
2024-07-12 02:30:17 +02:00
|
|
|
username_to_user: dict[str, dict[str, Any]] = {}
|
2019-04-04 13:16:02 +02:00
|
|
|
|
|
|
|
os.makedirs(output_dir, exist_ok=True)
|
|
|
|
if os.listdir(output_dir): # nocoverage
|
|
|
|
raise Exception("Output directory should be empty!")
|
|
|
|
|
|
|
|
mattermost_data_file = os.path.join(mattermost_data_dir, "export.json")
|
|
|
|
mattermost_data = mattermost_data_file_to_dict(mattermost_data_file)
|
|
|
|
|
|
|
|
username_to_user = create_username_to_user_mapping(mattermost_data["user"])
|
|
|
|
|
|
|
|
for team in mattermost_data["team"]:
|
|
|
|
realm_id = NEXT_ID("realm_id")
|
|
|
|
team_name = team["name"]
|
|
|
|
|
|
|
|
user_handler = UserHandler()
|
|
|
|
subscriber_handler = SubscriberHandler()
|
2024-07-17 22:45:14 +02:00
|
|
|
user_id_mapper = IdMapper[str]()
|
|
|
|
stream_id_mapper = IdMapper[str]()
|
2024-07-08 16:46:01 +02:00
|
|
|
direct_message_group_id_mapper = IdMapper[frozenset[str]]()
|
2019-04-04 13:16:02 +02:00
|
|
|
|
|
|
|
print("Generating data for", team_name)
|
|
|
|
realm = make_realm(realm_id, team)
|
|
|
|
realm_output_dir = os.path.join(output_dir, team_name)
|
|
|
|
|
|
|
|
reset_mirror_dummy_users(username_to_user)
|
2021-02-12 08:19:30 +01:00
|
|
|
label_mirror_dummy_users(
|
|
|
|
len(mattermost_data["team"]), team_name, mattermost_data, username_to_user
|
|
|
|
)
|
2019-04-04 13:16:02 +02:00
|
|
|
|
|
|
|
convert_user_data(
|
|
|
|
user_handler=user_handler,
|
|
|
|
user_id_mapper=user_id_mapper,
|
|
|
|
user_data_map=username_to_user,
|
|
|
|
realm_id=realm_id,
|
|
|
|
team_name=team_name,
|
|
|
|
)
|
|
|
|
|
|
|
|
zerver_stream = convert_channel_data(
|
|
|
|
channel_data=mattermost_data["channel"],
|
|
|
|
user_data_map=username_to_user,
|
|
|
|
subscriber_handler=subscriber_handler,
|
|
|
|
stream_id_mapper=stream_id_mapper,
|
|
|
|
user_id_mapper=user_id_mapper,
|
|
|
|
realm_id=realm_id,
|
|
|
|
team_name=team_name,
|
|
|
|
)
|
2021-02-12 08:20:45 +01:00
|
|
|
realm["zerver_stream"] = zerver_stream
|
2019-04-04 13:16:02 +02:00
|
|
|
|
2024-07-12 02:30:17 +02:00
|
|
|
zerver_direct_message_group: list[ZerverFieldsT] = []
|
2019-09-25 15:46:05 +02:00
|
|
|
if len(mattermost_data["team"]) == 1:
|
2024-07-04 14:05:48 +02:00
|
|
|
zerver_direct_message_group = convert_direct_message_group_data(
|
|
|
|
direct_message_group_data=mattermost_data["direct_channel"],
|
2019-09-25 15:46:05 +02:00
|
|
|
user_data_map=username_to_user,
|
|
|
|
subscriber_handler=subscriber_handler,
|
2024-07-08 16:46:01 +02:00
|
|
|
direct_message_group_id_mapper=direct_message_group_id_mapper,
|
2019-09-25 15:46:05 +02:00
|
|
|
user_id_mapper=user_id_mapper,
|
|
|
|
realm_id=realm_id,
|
|
|
|
team_name=team_name,
|
|
|
|
)
|
2024-07-04 14:05:48 +02:00
|
|
|
realm["zerver_huddle"] = zerver_direct_message_group
|
2019-09-25 15:46:05 +02:00
|
|
|
|
2019-04-04 13:16:02 +02:00
|
|
|
all_users = user_handler.get_all_users()
|
|
|
|
|
|
|
|
zerver_recipient = build_recipients(
|
|
|
|
zerver_userprofile=all_users,
|
|
|
|
zerver_stream=zerver_stream,
|
2024-07-04 14:05:48 +02:00
|
|
|
zerver_direct_message_group=zerver_direct_message_group,
|
2019-04-04 13:16:02 +02:00
|
|
|
)
|
2021-02-12 08:20:45 +01:00
|
|
|
realm["zerver_recipient"] = zerver_recipient
|
2019-04-04 13:16:02 +02:00
|
|
|
|
|
|
|
stream_subscriptions = build_stream_subscriptions(
|
|
|
|
get_users=subscriber_handler.get_users,
|
|
|
|
zerver_recipient=zerver_recipient,
|
|
|
|
zerver_stream=zerver_stream,
|
|
|
|
)
|
|
|
|
|
2024-07-04 14:05:48 +02:00
|
|
|
direct_message_group_subscriptions = build_direct_message_group_subscriptions(
|
2019-09-25 15:46:05 +02:00
|
|
|
get_users=subscriber_handler.get_users,
|
|
|
|
zerver_recipient=zerver_recipient,
|
2024-07-04 14:05:48 +02:00
|
|
|
zerver_direct_message_group=zerver_direct_message_group,
|
2019-09-25 15:46:05 +02:00
|
|
|
)
|
|
|
|
|
2019-04-04 13:16:02 +02:00
|
|
|
personal_subscriptions = build_personal_subscriptions(
|
|
|
|
zerver_recipient=zerver_recipient,
|
|
|
|
)
|
|
|
|
|
|
|
|
# Mattermost currently supports only exporting messages from channels.
|
2024-07-04 14:05:48 +02:00
|
|
|
# Personal and Group Direct messages are not exported.
|
|
|
|
zerver_subscription = (
|
|
|
|
personal_subscriptions + stream_subscriptions + direct_message_group_subscriptions
|
|
|
|
)
|
2021-02-12 08:20:45 +01:00
|
|
|
realm["zerver_subscription"] = zerver_subscription
|
2019-04-04 13:16:02 +02:00
|
|
|
|
|
|
|
zerver_realmemoji = write_emoticon_data(
|
|
|
|
realm_id=realm_id,
|
|
|
|
custom_emoji_data=mattermost_data["emoji"],
|
|
|
|
data_dir=mattermost_data_dir,
|
|
|
|
output_dir=realm_output_dir,
|
|
|
|
)
|
2021-02-12 08:20:45 +01:00
|
|
|
realm["zerver_realmemoji"] = zerver_realmemoji
|
2019-04-04 13:16:02 +02:00
|
|
|
|
|
|
|
subscriber_map = make_subscriber_map(
|
|
|
|
zerver_subscription=zerver_subscription,
|
|
|
|
)
|
|
|
|
|
2024-07-12 02:30:17 +02:00
|
|
|
total_reactions: list[dict[str, Any]] = []
|
|
|
|
uploads_list: list[ZerverFieldsT] = []
|
|
|
|
zerver_attachment: list[ZerverFieldsT] = []
|
2021-06-28 12:59:16 +02:00
|
|
|
|
2019-04-04 13:16:02 +02:00
|
|
|
write_message_data(
|
2019-08-31 12:46:01 +02:00
|
|
|
num_teams=len(mattermost_data["team"]),
|
2019-04-04 13:16:02 +02:00
|
|
|
team_name=team_name,
|
|
|
|
realm_id=realm_id,
|
|
|
|
post_data=mattermost_data["post"],
|
|
|
|
zerver_recipient=zerver_recipient,
|
|
|
|
subscriber_map=subscriber_map,
|
|
|
|
output_dir=realm_output_dir,
|
|
|
|
masking_content=masking_content,
|
|
|
|
stream_id_mapper=stream_id_mapper,
|
2024-07-08 16:46:01 +02:00
|
|
|
direct_message_group_id_mapper=direct_message_group_id_mapper,
|
2019-04-04 13:16:02 +02:00
|
|
|
user_id_mapper=user_id_mapper,
|
|
|
|
user_handler=user_handler,
|
|
|
|
zerver_realmemoji=zerver_realmemoji,
|
|
|
|
total_reactions=total_reactions,
|
2021-06-28 12:59:16 +02:00
|
|
|
uploads_list=uploads_list,
|
|
|
|
zerver_attachment=zerver_attachment,
|
|
|
|
mattermost_data_dir=mattermost_data_dir,
|
2019-04-04 13:16:02 +02:00
|
|
|
)
|
2021-02-12 08:20:45 +01:00
|
|
|
realm["zerver_reaction"] = total_reactions
|
|
|
|
realm["zerver_userprofile"] = user_handler.get_all_users()
|
|
|
|
realm["sort_by_date"] = True
|
2019-04-04 13:16:02 +02:00
|
|
|
|
2021-02-12 08:20:45 +01:00
|
|
|
create_converted_data_files(realm, realm_output_dir, "/realm.json")
|
2019-04-04 13:16:02 +02:00
|
|
|
# Mattermost currently doesn't support exporting avatars
|
2021-02-12 08:20:45 +01:00
|
|
|
create_converted_data_files([], realm_output_dir, "/avatars/records.json")
|
2019-04-04 13:16:02 +02:00
|
|
|
|
2021-06-28 12:59:16 +02:00
|
|
|
# Export message attachments
|
2024-07-12 02:30:17 +02:00
|
|
|
attachment: dict[str, list[Any]] = {"zerver_attachment": zerver_attachment}
|
2021-06-28 12:59:16 +02:00
|
|
|
create_converted_data_files(uploads_list, realm_output_dir, "/uploads/records.json")
|
2021-02-12 08:20:45 +01:00
|
|
|
create_converted_data_files(attachment, realm_output_dir, "/attachment.json")
|
2024-09-24 12:21:20 +02:00
|
|
|
|
|
|
|
do_common_export_processes(realm_output_dir)
|