2020-06-11 00:54:34 +02:00
|
|
|
import logging
|
2017-12-08 11:27:42 +01:00
|
|
|
import os
|
2020-06-11 00:54:34 +02:00
|
|
|
import random
|
2020-09-05 04:02:13 +02:00
|
|
|
import secrets
|
2017-12-08 11:27:42 +01:00
|
|
|
import shutil
|
|
|
|
import subprocess
|
2018-12-05 04:13:47 +01:00
|
|
|
from collections import defaultdict
|
2020-06-11 00:54:34 +02:00
|
|
|
from typing import Any, Dict, Iterator, List, Optional, Set, Tuple
|
2018-12-05 04:13:47 +01:00
|
|
|
|
2020-08-07 01:09:47 +02:00
|
|
|
import orjson
|
2020-06-11 00:54:34 +02:00
|
|
|
import requests
|
2018-02-21 10:02:44 +01:00
|
|
|
from django.conf import settings
|
2018-04-23 23:36:40 +02:00
|
|
|
from django.forms.models import model_to_dict
|
2020-06-11 00:54:34 +02:00
|
|
|
from django.utils.timezone import now as timezone_now
|
|
|
|
|
|
|
|
from zerver.data_import.import_util import (
|
|
|
|
ZerverFieldsT,
|
|
|
|
build_attachment,
|
|
|
|
build_avatar,
|
|
|
|
build_defaultstream,
|
|
|
|
build_huddle,
|
|
|
|
build_message,
|
|
|
|
build_realm,
|
|
|
|
build_recipient,
|
|
|
|
build_stream,
|
|
|
|
build_subscription,
|
|
|
|
build_usermessages,
|
|
|
|
build_zerver_realm,
|
|
|
|
create_converted_data_files,
|
|
|
|
make_subscriber_map,
|
|
|
|
process_avatars,
|
|
|
|
process_emojis,
|
|
|
|
process_uploads,
|
|
|
|
)
|
2018-10-28 18:55:32 +01:00
|
|
|
from zerver.data_import.sequencer import NEXT_ID
|
2020-06-11 00:54:34 +02:00
|
|
|
from zerver.data_import.slack_message_conversion import (
|
|
|
|
convert_to_zulip_markdown,
|
|
|
|
get_user_full_name,
|
|
|
|
)
|
2020-02-06 07:07:10 +01:00
|
|
|
from zerver.lib.emoji import name_to_codepoint
|
2020-06-11 00:54:34 +02:00
|
|
|
from zerver.lib.export import MESSAGE_BATCH_CHUNK_SIZE
|
2020-09-05 04:02:13 +02:00
|
|
|
from zerver.lib.upload import resize_logo, sanitize_name
|
2020-06-11 00:54:34 +02:00
|
|
|
from zerver.models import (
|
|
|
|
CustomProfileField,
|
|
|
|
CustomProfileFieldValue,
|
|
|
|
Reaction,
|
|
|
|
Realm,
|
|
|
|
RealmEmoji,
|
|
|
|
Recipient,
|
|
|
|
UserProfile,
|
|
|
|
)
|
2017-12-29 10:57:48 +01:00
|
|
|
|
2019-08-12 13:44:07 +02:00
|
|
|
SlackToZulipUserIDT = Dict[str, int]
|
2018-04-07 00:00:05 +02:00
|
|
|
AddedChannelsT = Dict[str, Tuple[str, int]]
|
2019-07-02 07:41:51 +02:00
|
|
|
AddedMPIMsT = Dict[str, Tuple[str, int]]
|
2019-06-28 10:34:14 +02:00
|
|
|
DMMembersT = Dict[str, Tuple[str, str]]
|
2019-08-13 10:02:03 +02:00
|
|
|
SlackToZulipRecipientT = Dict[str, int]
|
2017-12-08 11:27:42 +01:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2017-12-08 11:27:42 +01:00
|
|
|
def rm_tree(path: str) -> None:
|
|
|
|
if os.path.exists(path):
|
|
|
|
shutil.rmtree(path)
|
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
|
|
|
def slack_workspace_to_realm(
|
|
|
|
domain_name: str,
|
|
|
|
realm_id: int,
|
|
|
|
user_list: List[ZerverFieldsT],
|
|
|
|
realm_subdomain: str,
|
|
|
|
slack_data_dir: str,
|
|
|
|
custom_emoji_list: ZerverFieldsT,
|
|
|
|
) -> Tuple[
|
|
|
|
ZerverFieldsT,
|
|
|
|
SlackToZulipUserIDT,
|
|
|
|
SlackToZulipRecipientT,
|
|
|
|
AddedChannelsT,
|
|
|
|
AddedMPIMsT,
|
|
|
|
DMMembersT,
|
|
|
|
List[ZerverFieldsT],
|
|
|
|
ZerverFieldsT,
|
|
|
|
]:
|
2018-01-23 19:04:59 +01:00
|
|
|
"""
|
|
|
|
Returns:
|
2020-10-23 02:43:28 +02:00
|
|
|
1. realm, converted realm data
|
|
|
|
2. slack_user_id_to_zulip_user_id, which is a dictionary to map from Slack user id to Zulip user id
|
|
|
|
3. slack_recipient_name_to_zulip_recipient_id, which is a dictionary to map from Slack recipient
|
|
|
|
name(channel names, mpim names, usernames, etc) to Zulip recipient id
|
|
|
|
4. added_channels, which is a dictionary to map from channel name to channel id, Zulip stream_id
|
|
|
|
5. added_mpims, which is a dictionary to map from MPIM name to MPIM id, Zulip huddle_id
|
2019-06-28 10:34:14 +02:00
|
|
|
6. dm_members, which is a dictionary to map from DM id to tuple of DM participants.
|
2020-10-23 02:43:28 +02:00
|
|
|
7. avatars, which is list to map avatars to Zulip avatar records.json
|
|
|
|
8. emoji_url_map, which is maps emoji name to its Slack URL
|
2018-01-23 19:04:59 +01:00
|
|
|
"""
|
|
|
|
NOW = float(timezone_now().timestamp())
|
|
|
|
|
2021-02-12 08:20:45 +01:00
|
|
|
zerver_realm: List[ZerverFieldsT] = build_zerver_realm(realm_id, realm_subdomain, NOW, "Slack")
|
2018-08-10 02:27:17 +02:00
|
|
|
realm = build_realm(zerver_realm, realm_id, domain_name)
|
2018-01-23 19:04:59 +01:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
(
|
|
|
|
zerver_userprofile,
|
|
|
|
avatars,
|
|
|
|
slack_user_id_to_zulip_user_id,
|
|
|
|
zerver_customprofilefield,
|
|
|
|
zerver_customprofilefield_value,
|
|
|
|
) = users_to_zerver_userprofile(slack_data_dir, user_list, realm_id, int(NOW), domain_name)
|
|
|
|
(
|
|
|
|
realm,
|
|
|
|
added_channels,
|
|
|
|
added_mpims,
|
|
|
|
dm_members,
|
|
|
|
slack_recipient_name_to_zulip_recipient_id,
|
|
|
|
) = channels_to_zerver_stream(
|
|
|
|
slack_data_dir, realm_id, realm, slack_user_id_to_zulip_user_id, zerver_userprofile
|
|
|
|
)
|
2019-07-26 18:39:50 +02:00
|
|
|
|
2018-03-29 18:58:10 +02:00
|
|
|
zerver_realmemoji, emoji_url_map = build_realmemoji(custom_emoji_list, realm_id)
|
2021-02-12 08:20:45 +01:00
|
|
|
realm["zerver_realmemoji"] = zerver_realmemoji
|
2018-03-29 18:58:10 +02:00
|
|
|
|
2020-06-08 23:04:39 +02:00
|
|
|
# See https://zulip.com/help/set-default-streams-for-new-users
|
2018-01-23 19:04:59 +01:00
|
|
|
# for documentation on zerver_defaultstream
|
2021-02-12 08:20:45 +01:00
|
|
|
realm["zerver_userprofile"] = zerver_userprofile
|
2018-01-23 19:04:59 +01:00
|
|
|
|
2021-02-12 08:20:45 +01:00
|
|
|
realm["zerver_customprofilefield"] = zerver_customprofilefield
|
|
|
|
realm["zerver_customprofilefieldvalue"] = zerver_customprofilefield_value
|
2018-04-09 22:58:03 +02:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
return (
|
|
|
|
realm,
|
|
|
|
slack_user_id_to_zulip_user_id,
|
|
|
|
slack_recipient_name_to_zulip_recipient_id,
|
|
|
|
added_channels,
|
|
|
|
added_mpims,
|
|
|
|
dm_members,
|
|
|
|
avatars,
|
|
|
|
emoji_url_map,
|
|
|
|
)
|
|
|
|
|
2018-01-23 19:04:59 +01:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
def build_realmemoji(
|
|
|
|
custom_emoji_list: ZerverFieldsT, realm_id: int
|
|
|
|
) -> Tuple[List[ZerverFieldsT], ZerverFieldsT]:
|
2018-03-29 18:58:10 +02:00
|
|
|
zerver_realmemoji = []
|
|
|
|
emoji_url_map = {}
|
|
|
|
emoji_id = 0
|
|
|
|
for emoji_name, url in custom_emoji_list.items():
|
2021-02-12 08:20:45 +01:00
|
|
|
if "emoji.slack-edge.com" in url:
|
2020-10-23 02:43:28 +02:00
|
|
|
# Some of the emojis we get from the API have invalid links
|
2018-03-29 18:58:10 +02:00
|
|
|
# this is to prevent errors related to them
|
2018-08-17 23:24:57 +02:00
|
|
|
realmemoji = RealmEmoji(
|
2021-02-12 08:19:30 +01:00
|
|
|
name=emoji_name, id=emoji_id, file_name=os.path.basename(url), deactivated=False
|
|
|
|
)
|
2018-08-17 23:24:57 +02:00
|
|
|
|
2021-02-12 08:20:45 +01:00
|
|
|
realmemoji_dict = model_to_dict(realmemoji, exclude=["realm", "author"])
|
|
|
|
realmemoji_dict["author"] = None
|
|
|
|
realmemoji_dict["realm"] = realm_id
|
2018-08-17 23:24:57 +02:00
|
|
|
|
2018-03-29 18:58:10 +02:00
|
|
|
emoji_url_map[emoji_name] = url
|
2018-08-17 23:24:57 +02:00
|
|
|
zerver_realmemoji.append(realmemoji_dict)
|
2018-03-29 18:58:10 +02:00
|
|
|
emoji_id += 1
|
|
|
|
return zerver_realmemoji, emoji_url_map
|
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
|
|
|
def users_to_zerver_userprofile(
|
|
|
|
slack_data_dir: str, users: List[ZerverFieldsT], realm_id: int, timestamp: Any, domain_name: str
|
|
|
|
) -> Tuple[
|
|
|
|
List[ZerverFieldsT],
|
|
|
|
List[ZerverFieldsT],
|
|
|
|
SlackToZulipUserIDT,
|
|
|
|
List[ZerverFieldsT],
|
|
|
|
List[ZerverFieldsT],
|
|
|
|
]:
|
2017-12-08 11:27:42 +01:00
|
|
|
"""
|
|
|
|
Returns:
|
|
|
|
1. zerver_userprofile, which is a list of user profile
|
2020-10-23 02:43:28 +02:00
|
|
|
2. avatar_list, which is list to map avatars to Zulip avatard records.json
|
|
|
|
3. slack_user_id_to_zulip_user_id, which is a dictionary to map from Slack user ID to Zulip
|
2017-12-08 11:27:42 +01:00
|
|
|
user id
|
2018-04-09 22:58:03 +02:00
|
|
|
4. zerver_customprofilefield, which is a list of all custom profile fields
|
|
|
|
5. zerver_customprofilefield_values, which is a list of user profile fields
|
2017-12-08 11:27:42 +01:00
|
|
|
"""
|
2021-02-12 08:20:45 +01:00
|
|
|
logging.info("######### IMPORTING USERS STARTED #########\n")
|
2017-12-08 11:27:42 +01:00
|
|
|
zerver_userprofile = []
|
python: Convert assignment type annotations to Python 3.6 style.
This commit was split by tabbott; this piece covers the vast majority
of files in Zulip, but excludes scripts/, tools/, and puppet/ to help
ensure we at least show the right error messages for Xenial systems.
We can likely further refine the remaining pieces with some testing.
Generated by com2ann, with whitespace fixes and various manual fixes
for runtime issues:
- invoiced_through: Optional[LicenseLedger] = models.ForeignKey(
+ invoiced_through: Optional["LicenseLedger"] = models.ForeignKey(
-_apns_client: Optional[APNsClient] = None
+_apns_client: Optional["APNsClient"] = None
- notifications_stream: Optional[Stream] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
- signup_notifications_stream: Optional[Stream] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
+ notifications_stream: Optional["Stream"] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
+ signup_notifications_stream: Optional["Stream"] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
- author: Optional[UserProfile] = models.ForeignKey('UserProfile', blank=True, null=True, on_delete=CASCADE)
+ author: Optional["UserProfile"] = models.ForeignKey('UserProfile', blank=True, null=True, on_delete=CASCADE)
- bot_owner: Optional[UserProfile] = models.ForeignKey('self', null=True, on_delete=models.SET_NULL)
+ bot_owner: Optional["UserProfile"] = models.ForeignKey('self', null=True, on_delete=models.SET_NULL)
- default_sending_stream: Optional[Stream] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
- default_events_register_stream: Optional[Stream] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
+ default_sending_stream: Optional["Stream"] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
+ default_events_register_stream: Optional["Stream"] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
-descriptors_by_handler_id: Dict[int, ClientDescriptor] = {}
+descriptors_by_handler_id: Dict[int, "ClientDescriptor"] = {}
-worker_classes: Dict[str, Type[QueueProcessingWorker]] = {}
-queues: Dict[str, Dict[str, Type[QueueProcessingWorker]]] = {}
+worker_classes: Dict[str, Type["QueueProcessingWorker"]] = {}
+queues: Dict[str, Dict[str, Type["QueueProcessingWorker"]]] = {}
-AUTH_LDAP_REVERSE_EMAIL_SEARCH: Optional[LDAPSearch] = None
+AUTH_LDAP_REVERSE_EMAIL_SEARCH: Optional["LDAPSearch"] = None
Signed-off-by: Anders Kaseorg <anders@zulipchat.com>
2020-04-22 01:09:50 +02:00
|
|
|
zerver_customprofilefield: List[ZerverFieldsT] = []
|
|
|
|
zerver_customprofilefield_values: List[ZerverFieldsT] = []
|
|
|
|
avatar_list: List[ZerverFieldsT] = []
|
2019-08-12 13:44:07 +02:00
|
|
|
slack_user_id_to_zulip_user_id = {}
|
2018-01-20 10:01:17 +01:00
|
|
|
|
2020-10-23 02:43:28 +02:00
|
|
|
# The user data we get from the Slack API does not contain custom profile data
|
|
|
|
# Hence we get it from the Slack zip file
|
2021-02-12 08:20:45 +01:00
|
|
|
slack_data_file_user_list = get_data_file(slack_data_dir + "/users.json")
|
2018-04-09 13:53:32 +02:00
|
|
|
|
python: Convert assignment type annotations to Python 3.6 style.
This commit was split by tabbott; this piece covers the vast majority
of files in Zulip, but excludes scripts/, tools/, and puppet/ to help
ensure we at least show the right error messages for Xenial systems.
We can likely further refine the remaining pieces with some testing.
Generated by com2ann, with whitespace fixes and various manual fixes
for runtime issues:
- invoiced_through: Optional[LicenseLedger] = models.ForeignKey(
+ invoiced_through: Optional["LicenseLedger"] = models.ForeignKey(
-_apns_client: Optional[APNsClient] = None
+_apns_client: Optional["APNsClient"] = None
- notifications_stream: Optional[Stream] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
- signup_notifications_stream: Optional[Stream] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
+ notifications_stream: Optional["Stream"] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
+ signup_notifications_stream: Optional["Stream"] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
- author: Optional[UserProfile] = models.ForeignKey('UserProfile', blank=True, null=True, on_delete=CASCADE)
+ author: Optional["UserProfile"] = models.ForeignKey('UserProfile', blank=True, null=True, on_delete=CASCADE)
- bot_owner: Optional[UserProfile] = models.ForeignKey('self', null=True, on_delete=models.SET_NULL)
+ bot_owner: Optional["UserProfile"] = models.ForeignKey('self', null=True, on_delete=models.SET_NULL)
- default_sending_stream: Optional[Stream] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
- default_events_register_stream: Optional[Stream] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
+ default_sending_stream: Optional["Stream"] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
+ default_events_register_stream: Optional["Stream"] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
-descriptors_by_handler_id: Dict[int, ClientDescriptor] = {}
+descriptors_by_handler_id: Dict[int, "ClientDescriptor"] = {}
-worker_classes: Dict[str, Type[QueueProcessingWorker]] = {}
-queues: Dict[str, Dict[str, Type[QueueProcessingWorker]]] = {}
+worker_classes: Dict[str, Type["QueueProcessingWorker"]] = {}
+queues: Dict[str, Dict[str, Type["QueueProcessingWorker"]]] = {}
-AUTH_LDAP_REVERSE_EMAIL_SEARCH: Optional[LDAPSearch] = None
+AUTH_LDAP_REVERSE_EMAIL_SEARCH: Optional["LDAPSearch"] = None
Signed-off-by: Anders Kaseorg <anders@zulipchat.com>
2020-04-22 01:09:50 +02:00
|
|
|
slack_user_id_to_custom_profile_fields: ZerverFieldsT = {}
|
|
|
|
slack_custom_field_name_to_zulip_custom_field_id: ZerverFieldsT = {}
|
2018-04-09 13:53:32 +02:00
|
|
|
|
|
|
|
for user in slack_data_file_user_list:
|
2019-08-13 13:05:40 +02:00
|
|
|
process_slack_custom_fields(user, slack_user_id_to_custom_profile_fields)
|
2018-04-09 13:53:32 +02:00
|
|
|
|
2020-10-23 02:43:28 +02:00
|
|
|
# We have only one primary owner in Slack, see link
|
2018-02-06 22:19:47 +01:00
|
|
|
# https://get.slack.help/hc/en-us/articles/201912948-Owners-and-Administrators
|
|
|
|
# This is to import the primary owner first from all the users
|
2019-08-13 14:03:02 +02:00
|
|
|
user_id_count = custom_profile_field_value_id_count = custom_profile_field_id_count = 0
|
2018-02-06 22:19:47 +01:00
|
|
|
primary_owner_id = user_id_count
|
|
|
|
user_id_count += 1
|
|
|
|
|
2017-12-08 11:27:42 +01:00
|
|
|
for user in users:
|
2021-02-12 08:20:45 +01:00
|
|
|
slack_user_id = user["id"]
|
2017-12-08 11:27:42 +01:00
|
|
|
|
2021-02-12 08:20:45 +01:00
|
|
|
if user.get("is_primary_owner", False):
|
2018-03-17 12:15:57 +01:00
|
|
|
user_id = primary_owner_id
|
2018-02-06 22:19:47 +01:00
|
|
|
else:
|
2018-03-17 12:15:57 +01:00
|
|
|
user_id = user_id_count
|
2018-02-06 22:19:47 +01:00
|
|
|
|
2018-01-23 19:04:59 +01:00
|
|
|
email = get_user_email(user, domain_name)
|
2020-10-30 00:46:30 +01:00
|
|
|
# ref: https://zulip.com/help/change-your-profile-picture
|
2021-02-12 08:19:30 +01:00
|
|
|
avatar_url = build_avatar_url(
|
2021-02-12 08:20:45 +01:00
|
|
|
slack_user_id, user["team_id"], user["profile"]["avatar_hash"]
|
2021-02-12 08:19:30 +01:00
|
|
|
)
|
2018-03-07 13:53:35 +01:00
|
|
|
build_avatar(user_id, realm_id, email, avatar_url, timestamp, avatar_list)
|
2019-10-05 02:35:07 +02:00
|
|
|
role = UserProfile.ROLE_MEMBER
|
2020-06-08 21:01:42 +02:00
|
|
|
if get_owner(user):
|
|
|
|
role = UserProfile.ROLE_REALM_OWNER
|
|
|
|
elif get_admin(user):
|
2019-10-05 02:35:07 +02:00
|
|
|
role = UserProfile.ROLE_REALM_ADMINISTRATOR
|
2019-10-31 13:18:49 +01:00
|
|
|
if get_guest(user):
|
|
|
|
role = UserProfile.ROLE_GUEST
|
2018-01-23 19:04:59 +01:00
|
|
|
timezone = get_user_timezone(user)
|
2017-12-08 11:27:42 +01:00
|
|
|
|
2019-08-13 13:05:40 +02:00
|
|
|
if slack_user_id in slack_user_id_to_custom_profile_fields:
|
2021-02-12 08:19:30 +01:00
|
|
|
(
|
|
|
|
slack_custom_field_name_to_zulip_custom_field_id,
|
|
|
|
custom_profile_field_id_count,
|
|
|
|
) = build_customprofile_field(
|
|
|
|
zerver_customprofilefield,
|
|
|
|
slack_user_id_to_custom_profile_fields[slack_user_id],
|
|
|
|
custom_profile_field_id_count,
|
|
|
|
realm_id,
|
|
|
|
slack_custom_field_name_to_zulip_custom_field_id,
|
|
|
|
)
|
2019-08-13 13:58:37 +02:00
|
|
|
custom_profile_field_value_id_count = build_customprofilefields_values(
|
2019-08-13 13:16:38 +02:00
|
|
|
slack_custom_field_name_to_zulip_custom_field_id,
|
2021-02-12 08:19:30 +01:00
|
|
|
slack_user_id_to_custom_profile_fields[slack_user_id],
|
|
|
|
user_id,
|
|
|
|
custom_profile_field_value_id_count,
|
|
|
|
zerver_customprofilefield_values,
|
|
|
|
)
|
2018-04-09 13:53:32 +02:00
|
|
|
|
2018-05-01 18:16:12 +02:00
|
|
|
userprofile = UserProfile(
|
|
|
|
full_name=get_user_full_name(user),
|
2021-02-12 08:20:45 +01:00
|
|
|
is_active=not user.get("deleted", False) and not user["is_mirror_dummy"],
|
2019-08-08 19:39:26 +02:00
|
|
|
is_mirror_dummy=user["is_mirror_dummy"],
|
2018-05-01 18:16:12 +02:00
|
|
|
id=user_id,
|
|
|
|
email=email,
|
2018-08-17 16:16:49 +02:00
|
|
|
delivery_email=email,
|
2021-02-12 08:20:45 +01:00
|
|
|
avatar_source="U",
|
|
|
|
is_bot=user.get("is_bot", False),
|
2019-10-05 02:35:07 +02:00
|
|
|
role=role,
|
2021-02-12 08:20:45 +01:00
|
|
|
bot_type=1 if user.get("is_bot", False) else None,
|
2018-05-01 18:16:12 +02:00
|
|
|
date_joined=timestamp,
|
|
|
|
timezone=timezone,
|
2021-02-12 08:19:30 +01:00
|
|
|
last_login=timestamp,
|
|
|
|
)
|
2018-05-01 06:30:11 +02:00
|
|
|
userprofile_dict = model_to_dict(userprofile)
|
|
|
|
# Set realm id separately as the corresponding realm is not yet a Realm model instance
|
2021-02-12 08:20:45 +01:00
|
|
|
userprofile_dict["realm"] = realm_id
|
2017-12-08 11:27:42 +01:00
|
|
|
|
2018-05-01 06:30:11 +02:00
|
|
|
zerver_userprofile.append(userprofile_dict)
|
2019-08-12 13:44:07 +02:00
|
|
|
slack_user_id_to_zulip_user_id[slack_user_id] = user_id
|
2021-02-12 08:20:45 +01:00
|
|
|
if not user.get("is_primary_owner", False):
|
2018-02-06 22:19:47 +01:00
|
|
|
user_id_count += 1
|
|
|
|
|
2021-02-12 08:20:45 +01:00
|
|
|
logging.info("%s -> %s", user["name"], userprofile_dict["email"])
|
2018-04-09 13:53:32 +02:00
|
|
|
|
|
|
|
process_customprofilefields(zerver_customprofilefield, zerver_customprofilefield_values)
|
2021-02-12 08:20:45 +01:00
|
|
|
logging.info("######### IMPORTING USERS FINISHED #########\n")
|
2021-02-12 08:19:30 +01:00
|
|
|
return (
|
|
|
|
zerver_userprofile,
|
|
|
|
avatar_list,
|
|
|
|
slack_user_id_to_zulip_user_id,
|
|
|
|
zerver_customprofilefield,
|
|
|
|
zerver_customprofilefield_values,
|
|
|
|
)
|
|
|
|
|
2017-12-08 11:27:42 +01:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
def build_customprofile_field(
|
|
|
|
customprofile_field: List[ZerverFieldsT],
|
|
|
|
fields: ZerverFieldsT,
|
|
|
|
custom_profile_field_id: int,
|
|
|
|
realm_id: int,
|
|
|
|
slack_custom_field_name_to_zulip_custom_field_id: ZerverFieldsT,
|
|
|
|
) -> Tuple[ZerverFieldsT, int]:
|
2020-10-23 02:43:28 +02:00
|
|
|
# The name of the custom profile field is not provided in the Slack data
|
2018-04-09 13:53:32 +02:00
|
|
|
# Hash keys of the fields are provided
|
|
|
|
# Reference: https://api.slack.com/methods/users.profile.set
|
|
|
|
for field, value in fields.items():
|
2019-08-13 13:16:38 +02:00
|
|
|
if field not in slack_custom_field_name_to_zulip_custom_field_id:
|
2021-02-12 08:20:45 +01:00
|
|
|
slack_custom_fields = ["phone", "skype"]
|
2018-06-09 13:44:47 +02:00
|
|
|
if field in slack_custom_fields:
|
|
|
|
field_name = field
|
|
|
|
else:
|
2020-10-23 02:43:28 +02:00
|
|
|
field_name = f"Slack custom field {str(custom_profile_field_id + 1)}"
|
2018-08-17 23:24:57 +02:00
|
|
|
customprofilefield = CustomProfileField(
|
2019-08-13 14:03:02 +02:00
|
|
|
id=custom_profile_field_id,
|
2018-04-09 13:53:32 +02:00
|
|
|
name=field_name,
|
python: Use trailing commas consistently.
Automatically generated by the following script, based on the output
of lint with flake8-comma:
import re
import sys
last_filename = None
last_row = None
lines = []
for msg in sys.stdin:
m = re.match(
r"\x1b\[35mflake8 \|\x1b\[0m \x1b\[1;31m(.+):(\d+):(\d+): (\w+)", msg
)
if m:
filename, row_str, col_str, err = m.groups()
row, col = int(row_str), int(col_str)
if filename == last_filename:
assert last_row != row
else:
if last_filename is not None:
with open(last_filename, "w") as f:
f.writelines(lines)
with open(filename) as f:
lines = f.readlines()
last_filename = filename
last_row = row
line = lines[row - 1]
if err in ["C812", "C815"]:
lines[row - 1] = line[: col - 1] + "," + line[col - 1 :]
elif err in ["C819"]:
assert line[col - 2] == ","
lines[row - 1] = line[: col - 2] + line[col - 1 :].lstrip(" ")
if last_filename is not None:
with open(last_filename, "w") as f:
f.writelines(lines)
Signed-off-by: Anders Kaseorg <anders@zulipchat.com>
2020-04-10 05:23:40 +02:00
|
|
|
field_type=1, # For now this is defaulted to 'SHORT_TEXT'
|
2021-02-12 08:19:30 +01:00
|
|
|
# Processing is done in the function 'process_customprofilefields'
|
2018-04-09 13:53:32 +02:00
|
|
|
)
|
2018-08-17 23:24:57 +02:00
|
|
|
|
2021-02-12 08:20:45 +01:00
|
|
|
customprofilefield_dict = model_to_dict(customprofilefield, exclude=["realm"])
|
|
|
|
customprofilefield_dict["realm"] = realm_id
|
2018-08-17 23:24:57 +02:00
|
|
|
|
2019-08-13 14:03:02 +02:00
|
|
|
slack_custom_field_name_to_zulip_custom_field_id[field] = custom_profile_field_id
|
|
|
|
custom_profile_field_id += 1
|
2018-08-17 23:24:57 +02:00
|
|
|
customprofile_field.append(customprofilefield_dict)
|
2019-08-13 14:03:02 +02:00
|
|
|
return slack_custom_field_name_to_zulip_custom_field_id, custom_profile_field_id
|
2018-04-09 13:53:32 +02:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
|
|
|
def process_slack_custom_fields(
|
|
|
|
user: ZerverFieldsT, slack_user_id_to_custom_profile_fields: ZerverFieldsT
|
|
|
|
) -> None:
|
2021-02-12 08:20:45 +01:00
|
|
|
slack_user_id_to_custom_profile_fields[user["id"]] = {}
|
|
|
|
if user["profile"].get("fields"):
|
|
|
|
slack_user_id_to_custom_profile_fields[user["id"]] = user["profile"]["fields"]
|
2018-06-09 13:44:47 +02:00
|
|
|
|
2021-02-12 08:20:45 +01:00
|
|
|
slack_custom_fields = ["phone", "skype"]
|
2018-06-09 13:44:47 +02:00
|
|
|
for field in slack_custom_fields:
|
2021-02-12 08:20:45 +01:00
|
|
|
if field in user["profile"]:
|
|
|
|
slack_user_id_to_custom_profile_fields[user["id"]][field] = {
|
|
|
|
"value": user["profile"][field]
|
2021-02-12 08:19:30 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
def build_customprofilefields_values(
|
|
|
|
slack_custom_field_name_to_zulip_custom_field_id: ZerverFieldsT,
|
|
|
|
fields: ZerverFieldsT,
|
|
|
|
user_id: int,
|
|
|
|
custom_field_id: int,
|
|
|
|
custom_field_values: List[ZerverFieldsT],
|
|
|
|
) -> int:
|
2018-04-09 13:53:32 +02:00
|
|
|
for field, value in fields.items():
|
2021-02-12 08:20:45 +01:00
|
|
|
if value["value"] == "":
|
2018-12-12 21:56:14 +01:00
|
|
|
continue
|
2021-02-12 08:20:45 +01:00
|
|
|
custom_field_value = CustomProfileFieldValue(id=custom_field_id, value=value["value"])
|
2018-08-17 23:24:57 +02:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
custom_field_value_dict = model_to_dict(
|
2021-02-12 08:20:45 +01:00
|
|
|
custom_field_value, exclude=["user_profile", "field"]
|
2021-02-12 08:19:30 +01:00
|
|
|
)
|
2021-02-12 08:20:45 +01:00
|
|
|
custom_field_value_dict["user_profile"] = user_id
|
|
|
|
custom_field_value_dict["field"] = slack_custom_field_name_to_zulip_custom_field_id[field]
|
2018-08-17 23:24:57 +02:00
|
|
|
|
|
|
|
custom_field_values.append(custom_field_value_dict)
|
2018-04-09 13:53:32 +02:00
|
|
|
custom_field_id += 1
|
|
|
|
return custom_field_id
|
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
|
|
|
def process_customprofilefields(
|
|
|
|
customprofilefield: List[ZerverFieldsT], customprofilefield_value: List[ZerverFieldsT]
|
|
|
|
) -> None:
|
2018-04-09 13:53:32 +02:00
|
|
|
for field in customprofilefield:
|
|
|
|
for field_value in customprofilefield_value:
|
2021-02-12 08:20:45 +01:00
|
|
|
if field_value["field"] == field["id"] and len(field_value["value"]) > 50:
|
|
|
|
field["field_type"] = 2 # corresponding to Long text
|
2018-04-09 13:53:32 +02:00
|
|
|
break
|
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2018-01-23 19:04:59 +01:00
|
|
|
def get_user_email(user: ZerverFieldsT, domain_name: str) -> str:
|
2021-02-12 08:20:45 +01:00
|
|
|
if "email" in user["profile"]:
|
|
|
|
return user["profile"]["email"]
|
|
|
|
if user["is_mirror_dummy"]:
|
2019-08-08 19:39:26 +02:00
|
|
|
return "{}@{}.slack.com".format(user["name"], user["team_domain"])
|
2021-02-12 08:20:45 +01:00
|
|
|
if "bot_id" in user["profile"]:
|
|
|
|
if "real_name_normalized" in user["profile"]:
|
|
|
|
slack_bot_name = user["profile"]["real_name_normalized"]
|
|
|
|
elif "first_name" in user["profile"]:
|
|
|
|
slack_bot_name = user["profile"]["first_name"]
|
2018-03-13 00:21:54 +01:00
|
|
|
else:
|
|
|
|
raise AssertionError("Could not identify bot type")
|
2020-06-10 06:41:04 +02:00
|
|
|
return slack_bot_name.replace("Bot", "").replace(" ", "") + f"-bot@{domain_name}"
|
2019-01-27 17:54:44 +01:00
|
|
|
if get_user_full_name(user).lower() == "slackbot":
|
2020-06-10 06:41:04 +02:00
|
|
|
return f"imported-slackbot-bot@{domain_name}"
|
|
|
|
raise AssertionError(f"Could not find email address for Slack user {user}")
|
2018-01-23 19:04:59 +01:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2018-02-16 23:54:38 +01:00
|
|
|
def build_avatar_url(slack_user_id: str, team_id: str, avatar_hash: str) -> str:
|
2020-06-10 06:40:53 +02:00
|
|
|
avatar_url = f"https://ca.slack-edge.com/{team_id}-{slack_user_id}-{avatar_hash}"
|
2018-02-16 23:54:38 +01:00
|
|
|
return avatar_url
|
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2020-06-08 21:01:42 +02:00
|
|
|
def get_owner(user: ZerverFieldsT) -> bool:
|
2021-02-12 08:20:45 +01:00
|
|
|
owner = user.get("is_owner", False)
|
|
|
|
primary_owner = user.get("is_primary_owner", False)
|
2018-02-06 21:02:23 +01:00
|
|
|
|
2020-06-08 21:01:42 +02:00
|
|
|
return primary_owner or owner
|
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2020-06-08 21:01:42 +02:00
|
|
|
def get_admin(user: ZerverFieldsT) -> bool:
|
2021-02-12 08:20:45 +01:00
|
|
|
admin = user.get("is_admin", False)
|
2020-06-08 21:01:42 +02:00
|
|
|
return admin
|
2018-02-06 21:02:23 +01:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2019-10-31 13:18:49 +01:00
|
|
|
def get_guest(user: ZerverFieldsT) -> bool:
|
2021-02-12 08:20:45 +01:00
|
|
|
restricted_user = user.get("is_restricted", False)
|
|
|
|
ultra_restricted_user = user.get("is_ultra_restricted", False)
|
2019-10-31 13:18:49 +01:00
|
|
|
|
|
|
|
# Slack's Single channel and multi channel guests both have
|
|
|
|
# is_restricted set to True. So assuming Slack doesn't change their
|
|
|
|
# data model, it would also be correct to just check whether
|
|
|
|
# is_restricted is set to True.
|
|
|
|
return restricted_user or ultra_restricted_user
|
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2018-01-23 19:04:59 +01:00
|
|
|
def get_user_timezone(user: ZerverFieldsT) -> str:
|
|
|
|
_default_timezone = "America/New_York"
|
|
|
|
timezone = user.get("tz", _default_timezone)
|
2021-02-12 08:20:45 +01:00
|
|
|
if timezone is None or "/" not in timezone:
|
2018-01-23 19:04:59 +01:00
|
|
|
timezone = _default_timezone
|
|
|
|
return timezone
|
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
|
|
|
def channels_to_zerver_stream(
|
|
|
|
slack_data_dir: str,
|
|
|
|
realm_id: int,
|
|
|
|
realm: Dict[str, Any],
|
|
|
|
slack_user_id_to_zulip_user_id: SlackToZulipUserIDT,
|
|
|
|
zerver_userprofile: List[ZerverFieldsT],
|
|
|
|
) -> Tuple[
|
|
|
|
Dict[str, List[ZerverFieldsT]], AddedChannelsT, AddedMPIMsT, DMMembersT, SlackToZulipRecipientT
|
|
|
|
]:
|
2017-12-08 11:27:42 +01:00
|
|
|
"""
|
|
|
|
Returns:
|
2020-10-23 02:43:28 +02:00
|
|
|
1. realm, converted realm data
|
|
|
|
2. added_channels, which is a dictionary to map from channel name to channel id, Zulip stream_id
|
|
|
|
3. added_mpims, which is a dictionary to map from MPIM(multiparty IM) name to MPIM id, Zulip huddle_id
|
2019-07-26 18:39:50 +02:00
|
|
|
4. dm_members, which is a dictionary to map from DM id to tuple of DM participants.
|
2020-10-23 02:43:28 +02:00
|
|
|
5. slack_recipient_name_to_zulip_recipient_id, which is a dictionary to map from Slack recipient
|
|
|
|
name(channel names, mpim names, usernames etc) to Zulip recipient_id
|
2017-12-08 11:27:42 +01:00
|
|
|
"""
|
2021-02-12 08:20:45 +01:00
|
|
|
logging.info("######### IMPORTING CHANNELS STARTED #########\n")
|
2018-01-20 10:01:17 +01:00
|
|
|
|
2017-12-08 11:27:42 +01:00
|
|
|
added_channels = {}
|
2019-07-02 07:41:51 +02:00
|
|
|
added_mpims = {}
|
2019-06-28 10:34:14 +02:00
|
|
|
dm_members = {}
|
2019-08-13 10:02:03 +02:00
|
|
|
slack_recipient_name_to_zulip_recipient_id = {}
|
2017-12-08 11:27:42 +01:00
|
|
|
|
2019-07-26 18:39:50 +02:00
|
|
|
realm["zerver_stream"] = []
|
|
|
|
realm["zerver_huddle"] = []
|
|
|
|
realm["zerver_subscription"] = []
|
|
|
|
realm["zerver_recipient"] = []
|
|
|
|
realm["zerver_defaultstream"] = []
|
2017-12-08 11:27:42 +01:00
|
|
|
|
2019-07-02 07:41:51 +02:00
|
|
|
subscription_id_count = recipient_id_count = 0
|
|
|
|
stream_id_count = defaultstream_id = 0
|
|
|
|
huddle_id_count = 0
|
2017-12-08 11:27:42 +01:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
def process_channels(channels: List[Dict[str, Any]], invite_only: bool = False) -> None:
|
2019-06-28 11:07:03 +02:00
|
|
|
nonlocal stream_id_count
|
|
|
|
nonlocal recipient_id_count
|
|
|
|
nonlocal defaultstream_id
|
|
|
|
nonlocal subscription_id_count
|
|
|
|
|
|
|
|
for channel in channels:
|
|
|
|
# map Slack's topic and purpose content into Zulip's stream description.
|
|
|
|
# WARN This mapping is lossy since the topic.creator, topic.last_set,
|
|
|
|
# purpose.creator, purpose.last_set fields are not preserved.
|
|
|
|
description = channel["purpose"]["value"]
|
|
|
|
stream_id = stream_id_count
|
|
|
|
recipient_id = recipient_id_count
|
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
stream = build_stream(
|
|
|
|
float(channel["created"]),
|
|
|
|
realm_id,
|
|
|
|
channel["name"],
|
|
|
|
description,
|
|
|
|
stream_id,
|
|
|
|
channel["is_archived"],
|
|
|
|
invite_only,
|
|
|
|
)
|
2019-07-26 18:39:50 +02:00
|
|
|
realm["zerver_stream"].append(stream)
|
2019-06-28 11:07:03 +02:00
|
|
|
|
2021-02-12 08:20:45 +01:00
|
|
|
slack_default_channels = ["general", "random"]
|
|
|
|
if channel["name"] in slack_default_channels and not stream["deactivated"]:
|
2021-02-12 08:19:30 +01:00
|
|
|
defaultstream = build_defaultstream(realm_id, stream_id, defaultstream_id)
|
2019-07-26 18:39:50 +02:00
|
|
|
realm["zerver_defaultstream"].append(defaultstream)
|
2019-06-28 11:07:03 +02:00
|
|
|
defaultstream_id += 1
|
|
|
|
|
2021-02-12 08:20:45 +01:00
|
|
|
added_channels[stream["name"]] = (channel["id"], stream_id)
|
2019-06-28 11:07:03 +02:00
|
|
|
|
|
|
|
recipient = build_recipient(stream_id, recipient_id, Recipient.STREAM)
|
2019-07-26 18:39:50 +02:00
|
|
|
realm["zerver_recipient"].append(recipient)
|
2021-02-12 08:20:45 +01:00
|
|
|
slack_recipient_name_to_zulip_recipient_id[stream["name"]] = recipient_id
|
2019-06-28 11:07:03 +02:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
subscription_id_count = get_subscription(
|
2021-02-12 08:20:45 +01:00
|
|
|
channel["members"],
|
2021-02-12 08:19:30 +01:00
|
|
|
realm["zerver_subscription"],
|
|
|
|
recipient_id,
|
|
|
|
slack_user_id_to_zulip_user_id,
|
|
|
|
subscription_id_count,
|
|
|
|
)
|
2019-06-28 11:07:03 +02:00
|
|
|
|
|
|
|
stream_id_count += 1
|
|
|
|
recipient_id_count += 1
|
2021-02-12 08:20:45 +01:00
|
|
|
logging.info("%s -> created", channel["name"])
|
2019-06-28 11:07:03 +02:00
|
|
|
|
|
|
|
# TODO map Slack's pins to Zulip's stars
|
|
|
|
# There is the security model that Slack's pins are known to the team owner
|
|
|
|
# as evident from where it is stored at (channels)
|
|
|
|
# "pins": [
|
|
|
|
# {
|
|
|
|
# "id": "1444755381.000003",
|
|
|
|
# "type": "C",
|
|
|
|
# "user": "U061A5N1G",
|
|
|
|
# "owner": "U061A5N1G",
|
|
|
|
# "created": "1444755463"
|
|
|
|
# }
|
|
|
|
# ],
|
|
|
|
|
2021-02-12 08:20:45 +01:00
|
|
|
public_channels = get_data_file(slack_data_dir + "/channels.json")
|
2019-06-28 11:08:45 +02:00
|
|
|
process_channels(public_channels)
|
|
|
|
|
|
|
|
try:
|
2021-02-12 08:20:45 +01:00
|
|
|
private_channels = get_data_file(slack_data_dir + "/groups.json")
|
2019-06-28 11:08:45 +02:00
|
|
|
except FileNotFoundError:
|
|
|
|
private_channels = []
|
|
|
|
process_channels(private_channels, True)
|
2017-12-08 11:27:42 +01:00
|
|
|
|
2019-07-02 07:41:51 +02:00
|
|
|
# mpim is the Slack equivalent of huddle.
|
|
|
|
def process_mpims(mpims: List[Dict[str, Any]]) -> None:
|
|
|
|
nonlocal huddle_id_count
|
|
|
|
nonlocal recipient_id_count
|
|
|
|
nonlocal subscription_id_count
|
|
|
|
|
|
|
|
for mpim in mpims:
|
|
|
|
huddle = build_huddle(huddle_id_count)
|
2019-07-26 18:39:50 +02:00
|
|
|
realm["zerver_huddle"].append(huddle)
|
2019-07-02 07:41:51 +02:00
|
|
|
|
2021-02-12 08:20:45 +01:00
|
|
|
added_mpims[mpim["name"]] = (mpim["id"], huddle_id_count)
|
2019-07-02 07:41:51 +02:00
|
|
|
|
|
|
|
recipient = build_recipient(huddle_id_count, recipient_id_count, Recipient.HUDDLE)
|
2019-07-26 18:39:50 +02:00
|
|
|
realm["zerver_recipient"].append(recipient)
|
2021-02-12 08:20:45 +01:00
|
|
|
slack_recipient_name_to_zulip_recipient_id[mpim["name"]] = recipient_id_count
|
2019-07-02 07:41:51 +02:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
subscription_id_count = get_subscription(
|
2021-02-12 08:20:45 +01:00
|
|
|
mpim["members"],
|
2021-02-12 08:19:30 +01:00
|
|
|
realm["zerver_subscription"],
|
|
|
|
recipient_id_count,
|
|
|
|
slack_user_id_to_zulip_user_id,
|
|
|
|
subscription_id_count,
|
|
|
|
)
|
2019-07-02 07:41:51 +02:00
|
|
|
|
|
|
|
huddle_id_count += 1
|
|
|
|
recipient_id_count += 1
|
2021-02-12 08:20:45 +01:00
|
|
|
logging.info("%s -> created", mpim["name"])
|
2019-07-02 07:41:51 +02:00
|
|
|
|
|
|
|
try:
|
2021-02-12 08:20:45 +01:00
|
|
|
mpims = get_data_file(slack_data_dir + "/mpims.json")
|
2019-07-02 07:41:51 +02:00
|
|
|
except FileNotFoundError:
|
|
|
|
mpims = []
|
|
|
|
process_mpims(mpims)
|
|
|
|
|
2019-08-12 13:44:07 +02:00
|
|
|
for slack_user_id, zulip_user_id in slack_user_id_to_zulip_user_id.items():
|
2019-07-30 23:35:52 +02:00
|
|
|
recipient = build_recipient(zulip_user_id, recipient_id_count, Recipient.PERSONAL)
|
2019-08-13 10:02:03 +02:00
|
|
|
slack_recipient_name_to_zulip_recipient_id[slack_user_id] = recipient_id_count
|
2019-07-30 23:35:52 +02:00
|
|
|
sub = build_subscription(recipient_id_count, zulip_user_id, subscription_id_count)
|
2019-07-26 18:39:50 +02:00
|
|
|
realm["zerver_recipient"].append(recipient)
|
|
|
|
realm["zerver_subscription"].append(sub)
|
2018-01-23 19:04:59 +01:00
|
|
|
recipient_id_count += 1
|
2019-06-28 10:34:14 +02:00
|
|
|
subscription_id_count += 1
|
|
|
|
|
|
|
|
def process_dms(dms: List[Dict[str, Any]]) -> None:
|
|
|
|
for dm in dms:
|
|
|
|
user_a = dm["members"][0]
|
|
|
|
user_b = dm["members"][1]
|
|
|
|
dm_members[dm["id"]] = (user_a, user_b)
|
|
|
|
|
|
|
|
try:
|
2021-02-12 08:20:45 +01:00
|
|
|
dms = get_data_file(slack_data_dir + "/dms.json")
|
2019-06-28 10:34:14 +02:00
|
|
|
except FileNotFoundError:
|
|
|
|
dms = []
|
|
|
|
process_dms(dms)
|
2017-12-08 11:27:42 +01:00
|
|
|
|
2021-02-12 08:20:45 +01:00
|
|
|
logging.info("######### IMPORTING STREAMS FINISHED #########\n")
|
2021-02-12 08:19:30 +01:00
|
|
|
return (
|
|
|
|
realm,
|
|
|
|
added_channels,
|
|
|
|
added_mpims,
|
|
|
|
dm_members,
|
|
|
|
slack_recipient_name_to_zulip_recipient_id,
|
|
|
|
)
|
2018-01-23 19:04:59 +01:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
|
|
|
def get_subscription(
|
|
|
|
channel_members: List[str],
|
|
|
|
zerver_subscription: List[ZerverFieldsT],
|
|
|
|
recipient_id: int,
|
|
|
|
slack_user_id_to_zulip_user_id: SlackToZulipUserIDT,
|
|
|
|
subscription_id: int,
|
|
|
|
) -> int:
|
2019-08-12 13:44:07 +02:00
|
|
|
for slack_user_id in channel_members:
|
2021-02-12 08:19:30 +01:00
|
|
|
sub = build_subscription(
|
|
|
|
recipient_id, slack_user_id_to_zulip_user_id[slack_user_id], subscription_id
|
|
|
|
)
|
2017-12-08 11:27:42 +01:00
|
|
|
zerver_subscription.append(sub)
|
2018-03-17 12:15:57 +01:00
|
|
|
subscription_id += 1
|
|
|
|
return subscription_id
|
2017-12-08 11:27:42 +01:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
|
|
|
def process_long_term_idle_users(
|
|
|
|
slack_data_dir: str,
|
|
|
|
users: List[ZerverFieldsT],
|
|
|
|
slack_user_id_to_zulip_user_id: SlackToZulipUserIDT,
|
|
|
|
added_channels: AddedChannelsT,
|
|
|
|
added_mpims: AddedMPIMsT,
|
|
|
|
dm_members: DMMembersT,
|
|
|
|
zerver_userprofile: List[ZerverFieldsT],
|
|
|
|
) -> Set[int]:
|
2018-12-17 02:26:51 +01:00
|
|
|
"""Algorithmically, we treat users who have sent at least 10 messages
|
|
|
|
or have sent a message within the last 60 days as active.
|
|
|
|
Everyone else is treated as long-term idle, which means they will
|
2020-03-28 01:25:56 +01:00
|
|
|
have a slightly slower first page load when coming back to
|
2018-12-17 02:26:51 +01:00
|
|
|
Zulip.
|
|
|
|
"""
|
2019-06-28 10:34:14 +02:00
|
|
|
all_messages = get_messages_iterator(slack_data_dir, added_channels, added_mpims, dm_members)
|
2018-12-17 02:26:51 +01:00
|
|
|
|
python: Convert assignment type annotations to Python 3.6 style.
This commit was split by tabbott; this piece covers the vast majority
of files in Zulip, but excludes scripts/, tools/, and puppet/ to help
ensure we at least show the right error messages for Xenial systems.
We can likely further refine the remaining pieces with some testing.
Generated by com2ann, with whitespace fixes and various manual fixes
for runtime issues:
- invoiced_through: Optional[LicenseLedger] = models.ForeignKey(
+ invoiced_through: Optional["LicenseLedger"] = models.ForeignKey(
-_apns_client: Optional[APNsClient] = None
+_apns_client: Optional["APNsClient"] = None
- notifications_stream: Optional[Stream] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
- signup_notifications_stream: Optional[Stream] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
+ notifications_stream: Optional["Stream"] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
+ signup_notifications_stream: Optional["Stream"] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
- author: Optional[UserProfile] = models.ForeignKey('UserProfile', blank=True, null=True, on_delete=CASCADE)
+ author: Optional["UserProfile"] = models.ForeignKey('UserProfile', blank=True, null=True, on_delete=CASCADE)
- bot_owner: Optional[UserProfile] = models.ForeignKey('self', null=True, on_delete=models.SET_NULL)
+ bot_owner: Optional["UserProfile"] = models.ForeignKey('self', null=True, on_delete=models.SET_NULL)
- default_sending_stream: Optional[Stream] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
- default_events_register_stream: Optional[Stream] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
+ default_sending_stream: Optional["Stream"] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
+ default_events_register_stream: Optional["Stream"] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
-descriptors_by_handler_id: Dict[int, ClientDescriptor] = {}
+descriptors_by_handler_id: Dict[int, "ClientDescriptor"] = {}
-worker_classes: Dict[str, Type[QueueProcessingWorker]] = {}
-queues: Dict[str, Dict[str, Type[QueueProcessingWorker]]] = {}
+worker_classes: Dict[str, Type["QueueProcessingWorker"]] = {}
+queues: Dict[str, Dict[str, Type["QueueProcessingWorker"]]] = {}
-AUTH_LDAP_REVERSE_EMAIL_SEARCH: Optional[LDAPSearch] = None
+AUTH_LDAP_REVERSE_EMAIL_SEARCH: Optional["LDAPSearch"] = None
Signed-off-by: Anders Kaseorg <anders@zulipchat.com>
2020-04-22 01:09:50 +02:00
|
|
|
sender_counts: Dict[str, int] = defaultdict(int)
|
|
|
|
recent_senders: Set[str] = set()
|
2018-12-17 02:26:51 +01:00
|
|
|
NOW = float(timezone_now().timestamp())
|
|
|
|
for message in all_messages:
|
2021-02-12 08:20:45 +01:00
|
|
|
timestamp = float(message["ts"])
|
2018-12-17 02:26:51 +01:00
|
|
|
slack_user_id = get_message_sending_user(message)
|
|
|
|
if not slack_user_id:
|
|
|
|
continue
|
|
|
|
|
|
|
|
if slack_user_id in recent_senders:
|
|
|
|
continue
|
|
|
|
|
|
|
|
if NOW - timestamp < 60:
|
|
|
|
recent_senders.add(slack_user_id)
|
|
|
|
|
|
|
|
sender_counts[slack_user_id] += 1
|
|
|
|
for (slack_sender_id, count) in sender_counts.items():
|
|
|
|
if count > 10:
|
|
|
|
recent_senders.add(slack_sender_id)
|
|
|
|
|
|
|
|
long_term_idle = set()
|
|
|
|
|
|
|
|
for slack_user in users:
|
|
|
|
if slack_user["id"] in recent_senders:
|
|
|
|
continue
|
2021-02-12 08:20:45 +01:00
|
|
|
zulip_user_id = slack_user_id_to_zulip_user_id[slack_user["id"]]
|
2018-12-17 02:26:51 +01:00
|
|
|
long_term_idle.add(zulip_user_id)
|
|
|
|
|
|
|
|
for user_profile_row in zerver_userprofile:
|
2021-02-12 08:20:45 +01:00
|
|
|
if user_profile_row["id"] in long_term_idle:
|
|
|
|
user_profile_row["long_term_idle"] = True
|
2018-12-17 02:26:51 +01:00
|
|
|
# Setting last_active_message_id to 1 means the user, if
|
|
|
|
# imported, will get the full message history for the
|
|
|
|
# streams they were on.
|
2021-02-12 08:20:45 +01:00
|
|
|
user_profile_row["last_active_message_id"] = 1
|
2018-12-17 02:26:51 +01:00
|
|
|
|
|
|
|
return long_term_idle
|
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
|
|
|
def convert_slack_workspace_messages(
|
|
|
|
slack_data_dir: str,
|
|
|
|
users: List[ZerverFieldsT],
|
|
|
|
realm_id: int,
|
|
|
|
slack_user_id_to_zulip_user_id: SlackToZulipUserIDT,
|
|
|
|
slack_recipient_name_to_zulip_recipient_id: SlackToZulipRecipientT,
|
|
|
|
added_channels: AddedChannelsT,
|
|
|
|
added_mpims: AddedMPIMsT,
|
|
|
|
dm_members: DMMembersT,
|
|
|
|
realm: ZerverFieldsT,
|
|
|
|
zerver_userprofile: List[ZerverFieldsT],
|
|
|
|
zerver_realmemoji: List[ZerverFieldsT],
|
|
|
|
domain_name: str,
|
|
|
|
output_dir: str,
|
|
|
|
chunk_size: int = MESSAGE_BATCH_CHUNK_SIZE,
|
|
|
|
) -> Tuple[List[ZerverFieldsT], List[ZerverFieldsT], List[ZerverFieldsT]]:
|
2018-01-23 19:04:59 +01:00
|
|
|
"""
|
|
|
|
Returns:
|
2018-06-13 20:15:35 +02:00
|
|
|
1. reactions, which is a list of the reactions
|
2018-02-26 10:03:48 +01:00
|
|
|
2. uploads, which is a list of uploads to be mapped in uploads records.json
|
2018-02-26 10:16:34 +01:00
|
|
|
3. attachment, which is a list of the attachments
|
2018-01-23 19:04:59 +01:00
|
|
|
"""
|
2018-02-25 10:12:30 +01:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
long_term_idle = process_long_term_idle_users(
|
|
|
|
slack_data_dir,
|
|
|
|
users,
|
|
|
|
slack_user_id_to_zulip_user_id,
|
|
|
|
added_channels,
|
|
|
|
added_mpims,
|
|
|
|
dm_members,
|
|
|
|
zerver_userprofile,
|
|
|
|
)
|
2018-12-17 02:26:51 +01:00
|
|
|
|
2019-06-28 10:34:14 +02:00
|
|
|
all_messages = get_messages_iterator(slack_data_dir, added_channels, added_mpims, dm_members)
|
2021-02-12 08:20:45 +01:00
|
|
|
logging.info("######### IMPORTING MESSAGES STARTED #########\n")
|
2018-02-25 09:09:32 +01:00
|
|
|
|
python: Convert assignment type annotations to Python 3.6 style.
This commit was split by tabbott; this piece covers the vast majority
of files in Zulip, but excludes scripts/, tools/, and puppet/ to help
ensure we at least show the right error messages for Xenial systems.
We can likely further refine the remaining pieces with some testing.
Generated by com2ann, with whitespace fixes and various manual fixes
for runtime issues:
- invoiced_through: Optional[LicenseLedger] = models.ForeignKey(
+ invoiced_through: Optional["LicenseLedger"] = models.ForeignKey(
-_apns_client: Optional[APNsClient] = None
+_apns_client: Optional["APNsClient"] = None
- notifications_stream: Optional[Stream] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
- signup_notifications_stream: Optional[Stream] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
+ notifications_stream: Optional["Stream"] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
+ signup_notifications_stream: Optional["Stream"] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
- author: Optional[UserProfile] = models.ForeignKey('UserProfile', blank=True, null=True, on_delete=CASCADE)
+ author: Optional["UserProfile"] = models.ForeignKey('UserProfile', blank=True, null=True, on_delete=CASCADE)
- bot_owner: Optional[UserProfile] = models.ForeignKey('self', null=True, on_delete=models.SET_NULL)
+ bot_owner: Optional["UserProfile"] = models.ForeignKey('self', null=True, on_delete=models.SET_NULL)
- default_sending_stream: Optional[Stream] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
- default_events_register_stream: Optional[Stream] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
+ default_sending_stream: Optional["Stream"] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
+ default_events_register_stream: Optional["Stream"] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
-descriptors_by_handler_id: Dict[int, ClientDescriptor] = {}
+descriptors_by_handler_id: Dict[int, "ClientDescriptor"] = {}
-worker_classes: Dict[str, Type[QueueProcessingWorker]] = {}
-queues: Dict[str, Dict[str, Type[QueueProcessingWorker]]] = {}
+worker_classes: Dict[str, Type["QueueProcessingWorker"]] = {}
+queues: Dict[str, Dict[str, Type["QueueProcessingWorker"]]] = {}
-AUTH_LDAP_REVERSE_EMAIL_SEARCH: Optional[LDAPSearch] = None
+AUTH_LDAP_REVERSE_EMAIL_SEARCH: Optional["LDAPSearch"] = None
Signed-off-by: Anders Kaseorg <anders@zulipchat.com>
2020-04-22 01:09:50 +02:00
|
|
|
total_reactions: List[ZerverFieldsT] = []
|
|
|
|
total_attachments: List[ZerverFieldsT] = []
|
|
|
|
total_uploads: List[ZerverFieldsT] = []
|
2018-06-17 19:16:42 +02:00
|
|
|
|
|
|
|
dump_file_id = 1
|
|
|
|
|
2018-10-25 17:33:52 +02:00
|
|
|
subscriber_map = make_subscriber_map(
|
2021-02-12 08:20:45 +01:00
|
|
|
zerver_subscription=realm["zerver_subscription"],
|
2018-10-25 17:33:52 +02:00
|
|
|
)
|
|
|
|
|
2018-06-17 19:16:42 +02:00
|
|
|
while True:
|
2018-12-05 04:13:47 +01:00
|
|
|
message_data = []
|
|
|
|
_counter = 0
|
|
|
|
for msg in all_messages:
|
|
|
|
_counter += 1
|
|
|
|
message_data.append(msg)
|
|
|
|
if _counter == chunk_size:
|
|
|
|
break
|
2018-06-17 19:16:42 +02:00
|
|
|
if len(message_data) == 0:
|
|
|
|
break
|
2018-12-05 04:13:47 +01:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
(
|
|
|
|
zerver_message,
|
|
|
|
zerver_usermessage,
|
|
|
|
attachment,
|
|
|
|
uploads,
|
|
|
|
reactions,
|
|
|
|
) = channel_message_to_zerver_message(
|
|
|
|
realm_id,
|
|
|
|
users,
|
|
|
|
slack_user_id_to_zulip_user_id,
|
|
|
|
slack_recipient_name_to_zulip_recipient_id,
|
|
|
|
message_data,
|
|
|
|
zerver_realmemoji,
|
|
|
|
subscriber_map,
|
|
|
|
added_channels,
|
|
|
|
dm_members,
|
|
|
|
domain_name,
|
|
|
|
long_term_idle,
|
|
|
|
)
|
2018-06-17 19:16:42 +02:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
message_json = dict(zerver_message=zerver_message, zerver_usermessage=zerver_usermessage)
|
2018-06-17 19:16:42 +02:00
|
|
|
|
2020-06-13 08:59:37 +02:00
|
|
|
message_file = f"/messages-{dump_file_id:06}.json"
|
2020-05-02 08:44:14 +02:00
|
|
|
logging.info("Writing Messages to %s\n", output_dir + message_file)
|
2018-06-17 19:16:42 +02:00
|
|
|
create_converted_data_files(message_json, output_dir, message_file)
|
|
|
|
|
|
|
|
total_reactions += reactions
|
|
|
|
total_attachments += attachment
|
|
|
|
total_uploads += uploads
|
|
|
|
|
|
|
|
dump_file_id += 1
|
2018-01-23 19:04:59 +01:00
|
|
|
|
2021-02-12 08:20:45 +01:00
|
|
|
logging.info("######### IMPORTING MESSAGES FINISHED #########\n")
|
2018-06-17 19:16:42 +02:00
|
|
|
return total_reactions, total_uploads, total_attachments
|
2018-01-23 19:04:59 +01:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
|
|
|
def get_messages_iterator(
|
|
|
|
slack_data_dir: str,
|
|
|
|
added_channels: Dict[str, Any],
|
|
|
|
added_mpims: AddedMPIMsT,
|
|
|
|
dm_members: DMMembersT,
|
|
|
|
) -> Iterator[ZerverFieldsT]:
|
2018-12-05 04:13:47 +01:00
|
|
|
"""This function is an iterator that returns all the messages across
|
2021-02-12 08:19:30 +01:00
|
|
|
all Slack channels, in order by timestamp. It's important to
|
|
|
|
not read all the messages into memory at once, because for
|
|
|
|
large imports that can OOM kill."""
|
2019-07-02 07:41:51 +02:00
|
|
|
|
2019-06-28 10:34:14 +02:00
|
|
|
dir_names = list(added_channels.keys()) + list(added_mpims.keys()) + list(dm_members.keys())
|
python: Convert assignment type annotations to Python 3.6 style.
This commit was split by tabbott; this piece covers the vast majority
of files in Zulip, but excludes scripts/, tools/, and puppet/ to help
ensure we at least show the right error messages for Xenial systems.
We can likely further refine the remaining pieces with some testing.
Generated by com2ann, with whitespace fixes and various manual fixes
for runtime issues:
- invoiced_through: Optional[LicenseLedger] = models.ForeignKey(
+ invoiced_through: Optional["LicenseLedger"] = models.ForeignKey(
-_apns_client: Optional[APNsClient] = None
+_apns_client: Optional["APNsClient"] = None
- notifications_stream: Optional[Stream] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
- signup_notifications_stream: Optional[Stream] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
+ notifications_stream: Optional["Stream"] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
+ signup_notifications_stream: Optional["Stream"] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
- author: Optional[UserProfile] = models.ForeignKey('UserProfile', blank=True, null=True, on_delete=CASCADE)
+ author: Optional["UserProfile"] = models.ForeignKey('UserProfile', blank=True, null=True, on_delete=CASCADE)
- bot_owner: Optional[UserProfile] = models.ForeignKey('self', null=True, on_delete=models.SET_NULL)
+ bot_owner: Optional["UserProfile"] = models.ForeignKey('self', null=True, on_delete=models.SET_NULL)
- default_sending_stream: Optional[Stream] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
- default_events_register_stream: Optional[Stream] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
+ default_sending_stream: Optional["Stream"] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
+ default_events_register_stream: Optional["Stream"] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
-descriptors_by_handler_id: Dict[int, ClientDescriptor] = {}
+descriptors_by_handler_id: Dict[int, "ClientDescriptor"] = {}
-worker_classes: Dict[str, Type[QueueProcessingWorker]] = {}
-queues: Dict[str, Dict[str, Type[QueueProcessingWorker]]] = {}
+worker_classes: Dict[str, Type["QueueProcessingWorker"]] = {}
+queues: Dict[str, Dict[str, Type["QueueProcessingWorker"]]] = {}
-AUTH_LDAP_REVERSE_EMAIL_SEARCH: Optional[LDAPSearch] = None
+AUTH_LDAP_REVERSE_EMAIL_SEARCH: Optional["LDAPSearch"] = None
Signed-off-by: Anders Kaseorg <anders@zulipchat.com>
2020-04-22 01:09:50 +02:00
|
|
|
all_json_names: Dict[str, List[str]] = defaultdict(list)
|
2019-07-02 07:41:51 +02:00
|
|
|
for dir_name in dir_names:
|
|
|
|
dir_path = os.path.join(slack_data_dir, dir_name)
|
|
|
|
json_names = os.listdir(dir_path)
|
2018-02-25 07:08:28 +01:00
|
|
|
for json_name in json_names:
|
2019-07-02 07:41:51 +02:00
|
|
|
all_json_names[json_name].append(dir_path)
|
2018-12-05 04:13:47 +01:00
|
|
|
|
|
|
|
# Sort json_name by date
|
|
|
|
for json_name in sorted(all_json_names.keys()):
|
python: Convert assignment type annotations to Python 3.6 style.
This commit was split by tabbott; this piece covers the vast majority
of files in Zulip, but excludes scripts/, tools/, and puppet/ to help
ensure we at least show the right error messages for Xenial systems.
We can likely further refine the remaining pieces with some testing.
Generated by com2ann, with whitespace fixes and various manual fixes
for runtime issues:
- invoiced_through: Optional[LicenseLedger] = models.ForeignKey(
+ invoiced_through: Optional["LicenseLedger"] = models.ForeignKey(
-_apns_client: Optional[APNsClient] = None
+_apns_client: Optional["APNsClient"] = None
- notifications_stream: Optional[Stream] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
- signup_notifications_stream: Optional[Stream] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
+ notifications_stream: Optional["Stream"] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
+ signup_notifications_stream: Optional["Stream"] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
- author: Optional[UserProfile] = models.ForeignKey('UserProfile', blank=True, null=True, on_delete=CASCADE)
+ author: Optional["UserProfile"] = models.ForeignKey('UserProfile', blank=True, null=True, on_delete=CASCADE)
- bot_owner: Optional[UserProfile] = models.ForeignKey('self', null=True, on_delete=models.SET_NULL)
+ bot_owner: Optional["UserProfile"] = models.ForeignKey('self', null=True, on_delete=models.SET_NULL)
- default_sending_stream: Optional[Stream] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
- default_events_register_stream: Optional[Stream] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
+ default_sending_stream: Optional["Stream"] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
+ default_events_register_stream: Optional["Stream"] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
-descriptors_by_handler_id: Dict[int, ClientDescriptor] = {}
+descriptors_by_handler_id: Dict[int, "ClientDescriptor"] = {}
-worker_classes: Dict[str, Type[QueueProcessingWorker]] = {}
-queues: Dict[str, Dict[str, Type[QueueProcessingWorker]]] = {}
+worker_classes: Dict[str, Type["QueueProcessingWorker"]] = {}
+queues: Dict[str, Dict[str, Type["QueueProcessingWorker"]]] = {}
-AUTH_LDAP_REVERSE_EMAIL_SEARCH: Optional[LDAPSearch] = None
+AUTH_LDAP_REVERSE_EMAIL_SEARCH: Optional["LDAPSearch"] = None
Signed-off-by: Anders Kaseorg <anders@zulipchat.com>
2020-04-22 01:09:50 +02:00
|
|
|
messages_for_one_day: List[ZerverFieldsT] = []
|
2019-07-02 07:41:51 +02:00
|
|
|
for dir_path in all_json_names[json_name]:
|
|
|
|
message_dir = os.path.join(dir_path, json_name)
|
|
|
|
dir_name = os.path.basename(dir_path)
|
2020-01-26 07:47:21 +01:00
|
|
|
messages = []
|
|
|
|
for message in get_data_file(message_dir):
|
2021-02-12 08:20:45 +01:00
|
|
|
if message.get("user") == "U00":
|
2020-01-26 07:47:21 +01:00
|
|
|
# Skip messages involving the the "U00" user,
|
|
|
|
# which is apparently used in some channel rename
|
|
|
|
# messages. It's likely just the result of some
|
|
|
|
# bug in Slack's export system. Arguably we could
|
|
|
|
# change this to point to slackbot instead, but
|
|
|
|
# skipping those messages is simpler.
|
|
|
|
continue
|
2019-08-12 12:58:54 +02:00
|
|
|
if dir_name in added_channels:
|
2021-02-12 08:20:45 +01:00
|
|
|
message["channel_name"] = dir_name
|
2019-08-12 12:58:54 +02:00
|
|
|
elif dir_name in added_mpims:
|
2021-02-12 08:20:45 +01:00
|
|
|
message["mpim_name"] = dir_name
|
2019-08-12 12:58:54 +02:00
|
|
|
elif dir_name in dm_members:
|
2021-02-12 08:20:45 +01:00
|
|
|
message["pm_name"] = dir_name
|
2020-01-26 07:47:21 +01:00
|
|
|
messages.append(message)
|
2018-12-05 04:13:47 +01:00
|
|
|
messages_for_one_day += messages
|
|
|
|
|
|
|
|
# we sort the messages according to the timestamp to show messages with
|
|
|
|
# the proper date order
|
2021-02-12 08:20:45 +01:00
|
|
|
yield from sorted(messages_for_one_day, key=lambda m: m["ts"])
|
2018-02-25 07:08:28 +01:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
|
|
|
def channel_message_to_zerver_message(
|
|
|
|
realm_id: int,
|
|
|
|
users: List[ZerverFieldsT],
|
|
|
|
slack_user_id_to_zulip_user_id: SlackToZulipUserIDT,
|
|
|
|
slack_recipient_name_to_zulip_recipient_id: SlackToZulipRecipientT,
|
|
|
|
all_messages: List[ZerverFieldsT],
|
|
|
|
zerver_realmemoji: List[ZerverFieldsT],
|
|
|
|
subscriber_map: Dict[int, Set[int]],
|
|
|
|
added_channels: AddedChannelsT,
|
|
|
|
dm_members: DMMembersT,
|
|
|
|
domain_name: str,
|
|
|
|
long_term_idle: Set[int],
|
|
|
|
) -> Tuple[
|
|
|
|
List[ZerverFieldsT],
|
|
|
|
List[ZerverFieldsT],
|
|
|
|
List[ZerverFieldsT],
|
|
|
|
List[ZerverFieldsT],
|
|
|
|
List[ZerverFieldsT],
|
|
|
|
]:
|
2017-12-29 10:57:48 +01:00
|
|
|
"""
|
|
|
|
Returns:
|
|
|
|
1. zerver_message, which is a list of the messages
|
|
|
|
2. zerver_usermessage, which is a list of the usermessages
|
2018-02-26 10:16:34 +01:00
|
|
|
3. zerver_attachment, which is a list of the attachments
|
|
|
|
4. uploads_list, which is a list of uploads to be mapped in uploads records.json
|
2018-03-25 13:42:04 +02:00
|
|
|
5. reaction_list, which is a list of all user reactions
|
2017-12-29 10:57:48 +01:00
|
|
|
"""
|
|
|
|
zerver_message = []
|
python: Convert assignment type annotations to Python 3.6 style.
This commit was split by tabbott; this piece covers the vast majority
of files in Zulip, but excludes scripts/, tools/, and puppet/ to help
ensure we at least show the right error messages for Xenial systems.
We can likely further refine the remaining pieces with some testing.
Generated by com2ann, with whitespace fixes and various manual fixes
for runtime issues:
- invoiced_through: Optional[LicenseLedger] = models.ForeignKey(
+ invoiced_through: Optional["LicenseLedger"] = models.ForeignKey(
-_apns_client: Optional[APNsClient] = None
+_apns_client: Optional["APNsClient"] = None
- notifications_stream: Optional[Stream] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
- signup_notifications_stream: Optional[Stream] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
+ notifications_stream: Optional["Stream"] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
+ signup_notifications_stream: Optional["Stream"] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
- author: Optional[UserProfile] = models.ForeignKey('UserProfile', blank=True, null=True, on_delete=CASCADE)
+ author: Optional["UserProfile"] = models.ForeignKey('UserProfile', blank=True, null=True, on_delete=CASCADE)
- bot_owner: Optional[UserProfile] = models.ForeignKey('self', null=True, on_delete=models.SET_NULL)
+ bot_owner: Optional["UserProfile"] = models.ForeignKey('self', null=True, on_delete=models.SET_NULL)
- default_sending_stream: Optional[Stream] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
- default_events_register_stream: Optional[Stream] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
+ default_sending_stream: Optional["Stream"] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
+ default_events_register_stream: Optional["Stream"] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
-descriptors_by_handler_id: Dict[int, ClientDescriptor] = {}
+descriptors_by_handler_id: Dict[int, "ClientDescriptor"] = {}
-worker_classes: Dict[str, Type[QueueProcessingWorker]] = {}
-queues: Dict[str, Dict[str, Type[QueueProcessingWorker]]] = {}
+worker_classes: Dict[str, Type["QueueProcessingWorker"]] = {}
+queues: Dict[str, Dict[str, Type["QueueProcessingWorker"]]] = {}
-AUTH_LDAP_REVERSE_EMAIL_SEARCH: Optional[LDAPSearch] = None
+AUTH_LDAP_REVERSE_EMAIL_SEARCH: Optional["LDAPSearch"] = None
Signed-off-by: Anders Kaseorg <anders@zulipchat.com>
2020-04-22 01:09:50 +02:00
|
|
|
zerver_usermessage: List[ZerverFieldsT] = []
|
|
|
|
uploads_list: List[ZerverFieldsT] = []
|
|
|
|
zerver_attachment: List[ZerverFieldsT] = []
|
|
|
|
reaction_list: List[ZerverFieldsT] = []
|
2018-03-25 13:42:04 +02:00
|
|
|
|
2018-12-17 02:26:51 +01:00
|
|
|
total_user_messages = 0
|
|
|
|
total_skipped_user_messages = 0
|
2018-02-25 09:54:53 +01:00
|
|
|
for message in all_messages:
|
2019-08-12 13:44:07 +02:00
|
|
|
slack_user_id = get_message_sending_user(message)
|
|
|
|
if not slack_user_id:
|
|
|
|
# Ignore messages without slack_user_id
|
2020-10-23 02:43:28 +02:00
|
|
|
# These are Sometimes produced by Slack
|
2018-02-25 09:54:53 +01:00
|
|
|
continue
|
2018-08-10 21:55:14 +02:00
|
|
|
|
2021-02-12 08:20:45 +01:00
|
|
|
subtype = message.get("subtype", False)
|
2018-08-10 21:55:14 +02:00
|
|
|
if subtype in [
|
2021-02-12 08:19:30 +01:00
|
|
|
# Zulip doesn't have a pinned_item concept
|
|
|
|
"pinned_item",
|
|
|
|
"unpinned_item",
|
|
|
|
# Slack's channel join/leave notices are spammy
|
|
|
|
"channel_join",
|
|
|
|
"channel_leave",
|
|
|
|
"channel_name",
|
2018-04-18 20:35:59 +02:00
|
|
|
]:
|
|
|
|
continue
|
2018-02-25 09:54:53 +01:00
|
|
|
|
2018-04-18 20:29:40 +02:00
|
|
|
try:
|
2018-10-25 17:33:52 +02:00
|
|
|
content, mentioned_user_ids, has_link = convert_to_zulip_markdown(
|
2021-02-12 08:20:45 +01:00
|
|
|
message["text"], users, added_channels, slack_user_id_to_zulip_user_id
|
2021-02-12 08:19:30 +01:00
|
|
|
)
|
2018-04-18 20:29:40 +02:00
|
|
|
except Exception:
|
|
|
|
print("Slack message unexpectedly missing text representation:")
|
2020-08-07 01:09:47 +02:00
|
|
|
print(orjson.dumps(message, option=orjson.OPT_INDENT_2).decode())
|
2018-04-18 20:29:40 +02:00
|
|
|
continue
|
2018-02-25 09:54:53 +01:00
|
|
|
rendered_content = None
|
2018-02-26 08:48:14 +01:00
|
|
|
|
2019-07-02 07:41:51 +02:00
|
|
|
if "channel_name" in message:
|
|
|
|
is_private = False
|
2021-02-12 08:20:45 +01:00
|
|
|
recipient_id = slack_recipient_name_to_zulip_recipient_id[message["channel_name"]]
|
2019-07-02 07:41:51 +02:00
|
|
|
elif "mpim_name" in message:
|
|
|
|
is_private = True
|
2021-02-12 08:20:45 +01:00
|
|
|
recipient_id = slack_recipient_name_to_zulip_recipient_id[message["mpim_name"]]
|
2019-06-28 10:34:14 +02:00
|
|
|
elif "pm_name" in message:
|
|
|
|
is_private = True
|
|
|
|
sender = get_message_sending_user(message)
|
2021-02-12 08:20:45 +01:00
|
|
|
members = dm_members[message["pm_name"]]
|
2019-06-28 10:34:14 +02:00
|
|
|
if sender == members[0]:
|
2019-08-13 10:02:03 +02:00
|
|
|
recipient_id = slack_recipient_name_to_zulip_recipient_id[members[1]]
|
|
|
|
sender_recipient_id = slack_recipient_name_to_zulip_recipient_id[members[0]]
|
2019-06-28 10:34:14 +02:00
|
|
|
else:
|
2019-08-13 10:02:03 +02:00
|
|
|
recipient_id = slack_recipient_name_to_zulip_recipient_id[members[0]]
|
|
|
|
sender_recipient_id = slack_recipient_name_to_zulip_recipient_id[members[1]]
|
2019-07-02 07:41:51 +02:00
|
|
|
|
2021-02-12 08:20:45 +01:00
|
|
|
message_id = NEXT_ID("message")
|
2018-02-26 08:48:14 +01:00
|
|
|
|
2021-02-12 08:20:45 +01:00
|
|
|
if "reactions" in message.keys():
|
2021-02-12 08:19:30 +01:00
|
|
|
build_reactions(
|
|
|
|
reaction_list,
|
2021-02-12 08:20:45 +01:00
|
|
|
message["reactions"],
|
2021-02-12 08:19:30 +01:00
|
|
|
slack_user_id_to_zulip_user_id,
|
|
|
|
message_id,
|
|
|
|
zerver_realmemoji,
|
|
|
|
)
|
2018-03-25 13:42:04 +02:00
|
|
|
|
2018-03-20 19:26:35 +01:00
|
|
|
# Process different subtypes of slack messages
|
|
|
|
|
2018-08-10 21:55:14 +02:00
|
|
|
# Subtypes which have only the action in the message should
|
|
|
|
# be rendered with '/me' in the content initially
|
|
|
|
# For example "sh_room_created" has the message 'started a call'
|
|
|
|
# which should be displayed as '/me started a call'
|
|
|
|
if subtype in ["bot_add", "sh_room_created", "me_message"]:
|
2021-02-12 08:20:45 +01:00
|
|
|
content = f"/me {content}"
|
|
|
|
if subtype == "file_comment":
|
2018-11-06 22:14:23 +01:00
|
|
|
# The file_comment message type only indicates the
|
|
|
|
# responsible user in a subfield.
|
2021-02-12 08:20:45 +01:00
|
|
|
message["user"] = message["comment"]["user"]
|
2018-08-10 21:55:14 +02:00
|
|
|
|
2018-10-28 16:48:52 +01:00
|
|
|
file_info = process_message_files(
|
|
|
|
message=message,
|
|
|
|
domain_name=domain_name,
|
|
|
|
realm_id=realm_id,
|
|
|
|
message_id=message_id,
|
2019-08-12 13:44:07 +02:00
|
|
|
slack_user_id=slack_user_id,
|
2018-10-28 16:48:52 +01:00
|
|
|
users=users,
|
2019-08-12 13:44:07 +02:00
|
|
|
slack_user_id_to_zulip_user_id=slack_user_id_to_zulip_user_id,
|
2018-10-28 16:48:52 +01:00
|
|
|
zerver_attachment=zerver_attachment,
|
|
|
|
uploads_list=uploads_list,
|
|
|
|
)
|
2018-02-26 06:57:00 +01:00
|
|
|
|
2021-02-12 08:20:45 +01:00
|
|
|
content += file_info["content"]
|
|
|
|
has_link = has_link or file_info["has_link"]
|
2018-02-26 06:57:00 +01:00
|
|
|
|
2021-02-12 08:20:45 +01:00
|
|
|
has_attachment = file_info["has_attachment"]
|
|
|
|
has_image = file_info["has_image"]
|
2018-03-15 14:12:38 +01:00
|
|
|
|
2021-02-12 08:20:45 +01:00
|
|
|
topic_name = "imported from Slack"
|
2018-08-17 23:47:37 +02:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
zulip_message = build_message(
|
|
|
|
topic_name,
|
2021-02-12 08:20:45 +01:00
|
|
|
float(message["ts"]),
|
2021-02-12 08:19:30 +01:00
|
|
|
message_id,
|
|
|
|
content,
|
|
|
|
rendered_content,
|
|
|
|
slack_user_id_to_zulip_user_id[slack_user_id],
|
|
|
|
recipient_id,
|
|
|
|
has_image,
|
|
|
|
has_link,
|
|
|
|
has_attachment,
|
|
|
|
)
|
2018-08-17 23:47:37 +02:00
|
|
|
zerver_message.append(zulip_message)
|
2018-02-25 09:54:53 +01:00
|
|
|
|
2018-12-17 02:26:51 +01:00
|
|
|
(num_created, num_skipped) = build_usermessages(
|
2018-10-25 17:33:52 +02:00
|
|
|
zerver_usermessage=zerver_usermessage,
|
|
|
|
subscriber_map=subscriber_map,
|
|
|
|
recipient_id=recipient_id,
|
|
|
|
mentioned_user_ids=mentioned_user_ids,
|
|
|
|
message_id=message_id,
|
2019-07-02 07:41:51 +02:00
|
|
|
is_private=is_private,
|
2018-12-17 02:26:51 +01:00
|
|
|
long_term_idle=long_term_idle,
|
2018-10-25 17:33:52 +02:00
|
|
|
)
|
2018-12-17 02:26:51 +01:00
|
|
|
total_user_messages += num_created
|
|
|
|
total_skipped_user_messages += num_skipped
|
2018-02-25 09:54:53 +01:00
|
|
|
|
2019-06-28 10:34:14 +02:00
|
|
|
if "pm_name" in message and recipient_id != sender_recipient_id:
|
|
|
|
(num_created, num_skipped) = build_usermessages(
|
|
|
|
zerver_usermessage=zerver_usermessage,
|
|
|
|
subscriber_map=subscriber_map,
|
|
|
|
recipient_id=sender_recipient_id,
|
|
|
|
mentioned_user_ids=mentioned_user_ids,
|
|
|
|
message_id=message_id,
|
|
|
|
is_private=is_private,
|
|
|
|
long_term_idle=long_term_idle,
|
|
|
|
)
|
|
|
|
total_user_messages += num_created
|
|
|
|
total_skipped_user_messages += num_skipped
|
|
|
|
|
2020-05-02 08:44:14 +02:00
|
|
|
logging.debug(
|
|
|
|
"Created %s UserMessages; deferred %s due to long-term idle",
|
2021-02-12 08:19:30 +01:00
|
|
|
total_user_messages,
|
|
|
|
total_skipped_user_messages,
|
2020-05-02 08:44:14 +02:00
|
|
|
)
|
2021-02-12 08:19:30 +01:00
|
|
|
return zerver_message, zerver_usermessage, zerver_attachment, uploads_list, reaction_list
|
|
|
|
|
|
|
|
|
|
|
|
def process_message_files(
|
|
|
|
message: ZerverFieldsT,
|
|
|
|
domain_name: str,
|
|
|
|
realm_id: int,
|
|
|
|
message_id: int,
|
|
|
|
slack_user_id: str,
|
|
|
|
users: List[ZerverFieldsT],
|
|
|
|
slack_user_id_to_zulip_user_id: SlackToZulipUserIDT,
|
|
|
|
zerver_attachment: List[ZerverFieldsT],
|
|
|
|
uploads_list: List[ZerverFieldsT],
|
|
|
|
) -> Dict[str, Any]:
|
2018-10-28 16:48:52 +01:00
|
|
|
has_attachment = False
|
|
|
|
has_image = False
|
|
|
|
has_link = False
|
|
|
|
|
2021-02-12 08:20:45 +01:00
|
|
|
files = message.get("files", [])
|
2018-10-28 16:48:52 +01:00
|
|
|
|
2021-02-12 08:20:45 +01:00
|
|
|
subtype = message.get("subtype")
|
2018-10-28 16:48:52 +01:00
|
|
|
|
2021-02-12 08:20:45 +01:00
|
|
|
if subtype == "file_share":
|
2018-10-28 16:48:52 +01:00
|
|
|
# In Slack messages, uploads can either have the subtype as 'file_share' or
|
|
|
|
# have the upload information in 'files' keyword
|
2021-02-12 08:20:45 +01:00
|
|
|
files = [message["file"]]
|
2018-10-28 16:48:52 +01:00
|
|
|
|
|
|
|
markdown_links = []
|
|
|
|
|
|
|
|
for fileinfo in files:
|
2021-02-12 08:20:45 +01:00
|
|
|
if fileinfo.get("mode", "") in ["tombstone", "hidden_by_limit"]:
|
2019-03-13 20:42:42 +01:00
|
|
|
# Slack sometimes includes tombstone mode files with no
|
|
|
|
# real data on the actual file (presumably in cases where
|
2019-05-30 12:47:12 +02:00
|
|
|
# the file was deleted). hidden_by_limit mode is for files
|
|
|
|
# that are hidden because of 10k cap in free plan.
|
2019-03-13 20:42:42 +01:00
|
|
|
continue
|
|
|
|
|
2021-02-12 08:20:45 +01:00
|
|
|
url = fileinfo["url_private"]
|
2018-10-28 16:48:52 +01:00
|
|
|
|
2021-02-12 08:20:45 +01:00
|
|
|
if "files.slack.com" in url:
|
2020-10-23 02:43:28 +02:00
|
|
|
# For attachments with Slack download link
|
2018-10-28 16:48:52 +01:00
|
|
|
has_attachment = True
|
|
|
|
has_link = True
|
2021-02-12 08:20:45 +01:00
|
|
|
has_image = True if "image" in fileinfo["mimetype"] else False
|
2018-10-28 16:48:52 +01:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
file_user = [
|
2021-02-12 08:20:45 +01:00
|
|
|
iterate_user for iterate_user in users if message["user"] == iterate_user["id"]
|
2021-02-12 08:19:30 +01:00
|
|
|
]
|
2018-10-28 16:48:52 +01:00
|
|
|
file_user_email = get_user_email(file_user[0], domain_name)
|
|
|
|
|
|
|
|
s3_path, content_for_link = get_attachment_path_and_content(fileinfo, realm_id)
|
|
|
|
markdown_links.append(content_for_link)
|
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
build_uploads(
|
|
|
|
slack_user_id_to_zulip_user_id[slack_user_id],
|
|
|
|
realm_id,
|
|
|
|
file_user_email,
|
|
|
|
fileinfo,
|
|
|
|
s3_path,
|
|
|
|
uploads_list,
|
|
|
|
)
|
2018-10-28 16:48:52 +01:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
build_attachment(
|
|
|
|
realm_id,
|
|
|
|
{message_id},
|
|
|
|
slack_user_id_to_zulip_user_id[slack_user_id],
|
|
|
|
fileinfo,
|
|
|
|
s3_path,
|
|
|
|
zerver_attachment,
|
|
|
|
)
|
2018-10-28 16:48:52 +01:00
|
|
|
else:
|
2020-10-23 02:43:28 +02:00
|
|
|
# For attachments with link not from Slack
|
2018-10-28 16:48:52 +01:00
|
|
|
# Example: Google drive integration
|
|
|
|
has_link = True
|
2021-02-12 08:20:45 +01:00
|
|
|
if "title" in fileinfo:
|
|
|
|
file_name = fileinfo["title"]
|
2018-10-28 16:48:52 +01:00
|
|
|
else:
|
2021-02-12 08:20:45 +01:00
|
|
|
file_name = fileinfo["name"]
|
|
|
|
markdown_links.append("[{}]({})".format(file_name, fileinfo["url_private"]))
|
2018-10-28 16:48:52 +01:00
|
|
|
|
2021-02-12 08:20:45 +01:00
|
|
|
content = "\n".join(markdown_links)
|
2018-10-28 16:48:52 +01:00
|
|
|
|
|
|
|
return dict(
|
|
|
|
content=content,
|
|
|
|
has_attachment=has_attachment,
|
|
|
|
has_image=has_image,
|
|
|
|
has_link=has_link,
|
|
|
|
)
|
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
|
|
|
def get_attachment_path_and_content(fileinfo: ZerverFieldsT, realm_id: int) -> Tuple[str, str]:
|
2018-02-26 06:57:00 +01:00
|
|
|
# Should be kept in sync with its equivalent in zerver/lib/uploads in the function
|
2018-03-28 18:14:17 +02:00
|
|
|
# 'upload_message_file'
|
2021-02-12 08:19:30 +01:00
|
|
|
s3_path = "/".join(
|
|
|
|
[
|
|
|
|
str(realm_id),
|
2021-02-12 08:20:45 +01:00
|
|
|
"SlackImportAttachment", # This is a special placeholder which should be kept
|
2021-02-12 08:19:30 +01:00
|
|
|
# in sync with 'exports.py' function 'import_message_data'
|
2021-02-12 08:20:45 +01:00
|
|
|
format(random.randint(0, 255), "x"),
|
2021-02-12 08:19:30 +01:00
|
|
|
secrets.token_urlsafe(18),
|
2021-02-12 08:20:45 +01:00
|
|
|
sanitize_name(fileinfo["name"]),
|
2021-02-12 08:19:30 +01:00
|
|
|
]
|
|
|
|
)
|
2021-02-12 08:20:45 +01:00
|
|
|
attachment_path = f"/user_uploads/{s3_path}"
|
|
|
|
content = "[{}]({})".format(fileinfo["title"], attachment_path)
|
2018-02-26 06:57:00 +01:00
|
|
|
|
|
|
|
return s3_path, content
|
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
|
|
|
def build_reactions(
|
|
|
|
reaction_list: List[ZerverFieldsT],
|
|
|
|
reactions: List[ZerverFieldsT],
|
|
|
|
slack_user_id_to_zulip_user_id: SlackToZulipUserIDT,
|
|
|
|
message_id: int,
|
|
|
|
zerver_realmemoji: List[ZerverFieldsT],
|
|
|
|
) -> None:
|
2018-03-30 12:38:03 +02:00
|
|
|
realmemoji = {}
|
|
|
|
for realm_emoji in zerver_realmemoji:
|
2021-02-12 08:20:45 +01:00
|
|
|
realmemoji[realm_emoji["name"]] = realm_emoji["id"]
|
2018-03-30 12:38:03 +02:00
|
|
|
|
2020-10-23 02:43:28 +02:00
|
|
|
# For the Unicode emoji codes, we use equivalent of
|
2018-03-25 13:42:04 +02:00
|
|
|
# function 'emoji_name_to_emoji_code' in 'zerver/lib/emoji' here
|
|
|
|
for slack_reaction in reactions:
|
2021-02-12 08:20:45 +01:00
|
|
|
emoji_name = slack_reaction["name"]
|
2018-03-25 13:42:04 +02:00
|
|
|
if emoji_name in name_to_codepoint:
|
2018-03-30 12:38:03 +02:00
|
|
|
emoji_code = name_to_codepoint[emoji_name]
|
|
|
|
reaction_type = Reaction.UNICODE_EMOJI
|
|
|
|
elif emoji_name in realmemoji:
|
|
|
|
emoji_code = realmemoji[emoji_name]
|
|
|
|
reaction_type = Reaction.REALM_EMOJI
|
2018-03-25 13:42:04 +02:00
|
|
|
else:
|
|
|
|
continue
|
2018-03-30 12:38:03 +02:00
|
|
|
|
2021-02-12 08:20:45 +01:00
|
|
|
for slack_user_id in slack_reaction["users"]:
|
|
|
|
reaction_id = NEXT_ID("reaction")
|
2018-08-17 20:35:00 +02:00
|
|
|
reaction = Reaction(
|
2018-03-30 12:38:03 +02:00
|
|
|
id=reaction_id,
|
|
|
|
emoji_code=emoji_code,
|
|
|
|
emoji_name=emoji_name,
|
2021-02-12 08:19:30 +01:00
|
|
|
reaction_type=reaction_type,
|
|
|
|
)
|
2018-08-17 20:35:00 +02:00
|
|
|
|
2021-02-12 08:20:45 +01:00
|
|
|
reaction_dict = model_to_dict(reaction, exclude=["message", "user_profile"])
|
|
|
|
reaction_dict["message"] = message_id
|
|
|
|
reaction_dict["user_profile"] = slack_user_id_to_zulip_user_id[slack_user_id]
|
2018-08-17 20:35:00 +02:00
|
|
|
|
|
|
|
reaction_list.append(reaction_dict)
|
2018-03-25 13:42:04 +02:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
|
|
|
def build_uploads(
|
|
|
|
user_id: int,
|
|
|
|
realm_id: int,
|
|
|
|
email: str,
|
|
|
|
fileinfo: ZerverFieldsT,
|
|
|
|
s3_path: str,
|
|
|
|
uploads_list: List[ZerverFieldsT],
|
|
|
|
) -> None:
|
2018-02-26 06:57:00 +01:00
|
|
|
upload = dict(
|
2021-02-12 08:20:45 +01:00
|
|
|
path=fileinfo["url_private"], # Save Slack's URL here, which is used later while processing
|
2018-02-26 06:57:00 +01:00
|
|
|
realm_id=realm_id,
|
|
|
|
content_type=None,
|
|
|
|
user_profile_id=user_id,
|
2021-02-12 08:20:45 +01:00
|
|
|
last_modified=fileinfo["timestamp"],
|
2018-02-26 06:57:00 +01:00
|
|
|
user_profile_email=email,
|
|
|
|
s3_path=s3_path,
|
2021-02-12 08:20:45 +01:00
|
|
|
size=fileinfo["size"],
|
2021-02-12 08:19:30 +01:00
|
|
|
)
|
2018-02-26 06:57:00 +01:00
|
|
|
uploads_list.append(upload)
|
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2018-04-25 19:18:32 +02:00
|
|
|
def get_message_sending_user(message: ZerverFieldsT) -> Optional[str]:
|
2021-02-12 08:20:45 +01:00
|
|
|
if "user" in message:
|
|
|
|
return message["user"]
|
|
|
|
if message.get("file"):
|
|
|
|
return message["file"].get("user")
|
2018-04-25 19:18:32 +02:00
|
|
|
return None
|
2018-01-23 19:04:59 +01:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
|
|
|
def fetch_shared_channel_users(
|
|
|
|
user_list: List[ZerverFieldsT], slack_data_dir: str, token: str
|
|
|
|
) -> None:
|
2019-08-08 19:39:26 +02:00
|
|
|
normal_user_ids = set()
|
|
|
|
mirror_dummy_user_ids = set()
|
|
|
|
added_channels = {}
|
python: Convert assignment type annotations to Python 3.6 style.
This commit was split by tabbott; this piece covers the vast majority
of files in Zulip, but excludes scripts/, tools/, and puppet/ to help
ensure we at least show the right error messages for Xenial systems.
We can likely further refine the remaining pieces with some testing.
Generated by com2ann, with whitespace fixes and various manual fixes
for runtime issues:
- invoiced_through: Optional[LicenseLedger] = models.ForeignKey(
+ invoiced_through: Optional["LicenseLedger"] = models.ForeignKey(
-_apns_client: Optional[APNsClient] = None
+_apns_client: Optional["APNsClient"] = None
- notifications_stream: Optional[Stream] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
- signup_notifications_stream: Optional[Stream] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
+ notifications_stream: Optional["Stream"] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
+ signup_notifications_stream: Optional["Stream"] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
- author: Optional[UserProfile] = models.ForeignKey('UserProfile', blank=True, null=True, on_delete=CASCADE)
+ author: Optional["UserProfile"] = models.ForeignKey('UserProfile', blank=True, null=True, on_delete=CASCADE)
- bot_owner: Optional[UserProfile] = models.ForeignKey('self', null=True, on_delete=models.SET_NULL)
+ bot_owner: Optional["UserProfile"] = models.ForeignKey('self', null=True, on_delete=models.SET_NULL)
- default_sending_stream: Optional[Stream] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
- default_events_register_stream: Optional[Stream] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
+ default_sending_stream: Optional["Stream"] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
+ default_events_register_stream: Optional["Stream"] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
-descriptors_by_handler_id: Dict[int, ClientDescriptor] = {}
+descriptors_by_handler_id: Dict[int, "ClientDescriptor"] = {}
-worker_classes: Dict[str, Type[QueueProcessingWorker]] = {}
-queues: Dict[str, Dict[str, Type[QueueProcessingWorker]]] = {}
+worker_classes: Dict[str, Type["QueueProcessingWorker"]] = {}
+queues: Dict[str, Dict[str, Type["QueueProcessingWorker"]]] = {}
-AUTH_LDAP_REVERSE_EMAIL_SEARCH: Optional[LDAPSearch] = None
+AUTH_LDAP_REVERSE_EMAIL_SEARCH: Optional["LDAPSearch"] = None
Signed-off-by: Anders Kaseorg <anders@zulipchat.com>
2020-04-22 01:09:50 +02:00
|
|
|
team_id_to_domain: Dict[str, str] = {}
|
2019-08-08 19:39:26 +02:00
|
|
|
for user in user_list:
|
|
|
|
user["is_mirror_dummy"] = False
|
|
|
|
normal_user_ids.add(user["id"])
|
|
|
|
|
2021-02-12 08:20:45 +01:00
|
|
|
public_channels = get_data_file(slack_data_dir + "/channels.json")
|
2019-08-08 19:39:26 +02:00
|
|
|
try:
|
2021-02-12 08:20:45 +01:00
|
|
|
private_channels = get_data_file(slack_data_dir + "/groups.json")
|
2019-08-08 19:39:26 +02:00
|
|
|
except FileNotFoundError:
|
|
|
|
private_channels = []
|
|
|
|
for channel in public_channels + private_channels:
|
|
|
|
added_channels[channel["name"]] = True
|
|
|
|
for user_id in channel["members"]:
|
|
|
|
if user_id not in normal_user_ids:
|
|
|
|
mirror_dummy_user_ids.add(user_id)
|
|
|
|
|
|
|
|
all_messages = get_messages_iterator(slack_data_dir, added_channels, {}, {})
|
|
|
|
for message in all_messages:
|
|
|
|
user_id = get_message_sending_user(message)
|
|
|
|
if user_id is None or user_id in normal_user_ids:
|
|
|
|
continue
|
|
|
|
mirror_dummy_user_ids.add(user_id)
|
|
|
|
|
|
|
|
# Fetch data on the mirror_dummy_user_ids from the Slack API (it's
|
|
|
|
# not included in the data export file).
|
|
|
|
for user_id in mirror_dummy_user_ids:
|
2021-02-12 08:19:30 +01:00
|
|
|
user = get_slack_api_data(
|
|
|
|
"https://slack.com/api/users.info", "user", token=token, user=user_id
|
|
|
|
)
|
2019-08-08 19:39:26 +02:00
|
|
|
team_id = user["team_id"]
|
|
|
|
if team_id not in team_id_to_domain:
|
2021-02-12 08:19:30 +01:00
|
|
|
team = get_slack_api_data(
|
|
|
|
"https://slack.com/api/team.info", "team", token=token, team=team_id
|
|
|
|
)
|
2019-08-08 19:39:26 +02:00
|
|
|
team_id_to_domain[team_id] = team["domain"]
|
|
|
|
user["team_domain"] = team_id_to_domain[team_id]
|
|
|
|
user["is_mirror_dummy"] = True
|
|
|
|
user_list.append(user)
|
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
|
|
|
def fetch_team_icons(
|
|
|
|
zerver_realm: Dict[str, Any], team_info_dict: Dict[str, Any], output_dir: str
|
|
|
|
) -> List[Dict[str, Any]]:
|
2020-01-24 15:00:18 +01:00
|
|
|
records = []
|
|
|
|
|
|
|
|
team_icons_dict = team_info_dict["icon"]
|
|
|
|
if "image_default" in team_icons_dict and team_icons_dict["image_default"]:
|
|
|
|
return []
|
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
icon_url = (
|
|
|
|
team_icons_dict.get("image_original", None)
|
|
|
|
or team_icons_dict.get("image_230", None)
|
|
|
|
or team_icons_dict.get("image_132", None)
|
|
|
|
or team_icons_dict.get("image_102", None)
|
|
|
|
)
|
2020-01-24 15:00:18 +01:00
|
|
|
if icon_url is None:
|
|
|
|
return []
|
|
|
|
|
|
|
|
response = requests.get(icon_url, stream=True)
|
|
|
|
response_raw = response.raw
|
|
|
|
|
|
|
|
realm_id = zerver_realm["id"]
|
|
|
|
os.makedirs(os.path.join(output_dir, str(realm_id)), exist_ok=True)
|
|
|
|
|
|
|
|
original_icon_output_path = os.path.join(output_dir, str(realm_id), "icon.original")
|
2021-02-12 08:20:45 +01:00
|
|
|
with open(original_icon_output_path, "wb") as output_file:
|
2020-01-24 15:00:18 +01:00
|
|
|
shutil.copyfileobj(response_raw, output_file)
|
2021-02-12 08:19:30 +01:00
|
|
|
records.append(
|
|
|
|
{
|
|
|
|
"realm_id": realm_id,
|
|
|
|
"path": os.path.join(str(realm_id), "icon.original"),
|
|
|
|
"s3_path": os.path.join(str(realm_id), "icon.original"),
|
|
|
|
}
|
|
|
|
)
|
2020-01-24 15:00:18 +01:00
|
|
|
|
|
|
|
resized_icon_output_path = os.path.join(output_dir, str(realm_id), "icon.png")
|
2021-02-12 08:20:45 +01:00
|
|
|
with open(resized_icon_output_path, "wb") as output_file:
|
|
|
|
with open(original_icon_output_path, "rb") as original_file:
|
2020-01-24 15:00:18 +01:00
|
|
|
resized_data = resize_logo(original_file.read())
|
|
|
|
output_file.write(resized_data)
|
2021-02-12 08:19:30 +01:00
|
|
|
records.append(
|
|
|
|
{
|
|
|
|
"realm_id": realm_id,
|
|
|
|
"path": os.path.join(str(realm_id), "icon.png"),
|
|
|
|
"s3_path": os.path.join(str(realm_id), "icon.png"),
|
|
|
|
}
|
|
|
|
)
|
2020-01-24 15:00:18 +01:00
|
|
|
|
|
|
|
zerver_realm["icon_source"] = Realm.ICON_UPLOADED
|
|
|
|
|
|
|
|
return records
|
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
|
|
|
def do_convert_data(slack_zip_file: str, output_dir: str, token: str, threads: int = 6) -> None:
|
2018-04-06 02:21:16 +02:00
|
|
|
# Subdomain is set by the user while running the import command
|
|
|
|
realm_subdomain = ""
|
2018-04-18 19:10:17 +02:00
|
|
|
realm_id = 0
|
2018-02-25 14:46:34 +01:00
|
|
|
domain_name = settings.EXTERNAL_HOST
|
|
|
|
|
2020-11-25 09:28:25 +01:00
|
|
|
check_token_access(token)
|
2020-05-11 15:15:20 +02:00
|
|
|
|
2021-02-12 08:20:45 +01:00
|
|
|
slack_data_dir = slack_zip_file.replace(".zip", "")
|
2018-01-20 14:49:40 +01:00
|
|
|
if not os.path.exists(slack_data_dir):
|
|
|
|
os.makedirs(slack_data_dir)
|
2018-02-08 21:38:14 +01:00
|
|
|
|
|
|
|
os.makedirs(output_dir, exist_ok=True)
|
|
|
|
if os.listdir(output_dir):
|
2021-02-12 08:20:45 +01:00
|
|
|
raise Exception("Output directory should be empty!")
|
2018-02-08 21:38:14 +01:00
|
|
|
|
2021-02-12 08:20:45 +01:00
|
|
|
subprocess.check_call(["unzip", "-q", slack_zip_file, "-d", slack_data_dir])
|
2017-12-08 11:27:42 +01:00
|
|
|
|
2020-10-23 02:43:28 +02:00
|
|
|
# We get the user data from the legacy token method of Slack API, which is depreciated
|
2018-03-29 14:38:11 +02:00
|
|
|
# but we use it as the user email data is provided only in this method
|
2019-08-07 13:58:16 +02:00
|
|
|
user_list = get_slack_api_data("https://slack.com/api/users.list", "members", token=token)
|
2019-08-08 19:39:26 +02:00
|
|
|
fetch_shared_channel_users(user_list, slack_data_dir, token)
|
2019-08-07 13:58:16 +02:00
|
|
|
|
|
|
|
custom_emoji_list = get_slack_api_data("https://slack.com/api/emoji.list", "emoji", token=token)
|
2018-03-29 14:38:11 +02:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
(
|
|
|
|
realm,
|
|
|
|
slack_user_id_to_zulip_user_id,
|
|
|
|
slack_recipient_name_to_zulip_recipient_id,
|
|
|
|
added_channels,
|
|
|
|
added_mpims,
|
|
|
|
dm_members,
|
|
|
|
avatar_list,
|
|
|
|
emoji_url_map,
|
|
|
|
) = slack_workspace_to_realm(
|
|
|
|
domain_name, realm_id, user_list, realm_subdomain, slack_data_dir, custom_emoji_list
|
|
|
|
)
|
2018-02-17 00:42:59 +01:00
|
|
|
|
2018-06-13 20:15:35 +02:00
|
|
|
reactions, uploads_list, zerver_attachment = convert_slack_workspace_messages(
|
2021-02-12 08:19:30 +01:00
|
|
|
slack_data_dir,
|
|
|
|
user_list,
|
|
|
|
realm_id,
|
|
|
|
slack_user_id_to_zulip_user_id,
|
|
|
|
slack_recipient_name_to_zulip_recipient_id,
|
|
|
|
added_channels,
|
|
|
|
added_mpims,
|
|
|
|
dm_members,
|
|
|
|
realm,
|
2021-02-12 08:20:45 +01:00
|
|
|
realm["zerver_userprofile"],
|
|
|
|
realm["zerver_realmemoji"],
|
2021-02-12 08:19:30 +01:00
|
|
|
domain_name,
|
|
|
|
output_dir,
|
|
|
|
)
|
2017-12-08 11:27:42 +01:00
|
|
|
|
2018-05-24 13:56:15 +02:00
|
|
|
# Move zerver_reactions to realm.json file
|
2021-02-12 08:20:45 +01:00
|
|
|
realm["zerver_reaction"] = reactions
|
2018-05-24 13:56:15 +02:00
|
|
|
|
2021-02-12 08:20:45 +01:00
|
|
|
emoji_folder = os.path.join(output_dir, "emoji")
|
2018-03-30 00:09:29 +02:00
|
|
|
os.makedirs(emoji_folder, exist_ok=True)
|
2021-02-12 08:20:45 +01:00
|
|
|
emoji_records = process_emojis(realm["zerver_realmemoji"], emoji_folder, emoji_url_map, threads)
|
2018-03-30 00:09:29 +02:00
|
|
|
|
2021-02-12 08:20:45 +01:00
|
|
|
avatar_folder = os.path.join(output_dir, "avatars")
|
2018-03-07 14:14:08 +01:00
|
|
|
avatar_realm_folder = os.path.join(avatar_folder, str(realm_id))
|
2018-02-17 01:46:50 +01:00
|
|
|
os.makedirs(avatar_realm_folder, exist_ok=True)
|
2021-02-12 08:19:30 +01:00
|
|
|
avatar_records = process_avatars(
|
2021-02-12 08:20:45 +01:00
|
|
|
avatar_list, avatar_folder, realm_id, threads, size_url_suffix="-512"
|
2021-02-12 08:19:30 +01:00
|
|
|
)
|
2018-02-17 01:46:50 +01:00
|
|
|
|
2021-02-12 08:20:45 +01:00
|
|
|
uploads_folder = os.path.join(output_dir, "uploads")
|
2018-02-26 11:04:13 +01:00
|
|
|
os.makedirs(os.path.join(uploads_folder, str(realm_id)), exist_ok=True)
|
2018-04-15 16:21:02 +02:00
|
|
|
uploads_records = process_uploads(uploads_list, uploads_folder, threads)
|
2018-01-23 19:04:59 +01:00
|
|
|
attachment = {"zerver_attachment": zerver_attachment}
|
2017-12-08 11:27:42 +01:00
|
|
|
|
2020-01-24 15:00:18 +01:00
|
|
|
team_info_dict = get_slack_api_data("https://slack.com/api/team.info", "team", token=token)
|
2021-02-12 08:20:45 +01:00
|
|
|
realm_icons_folder = os.path.join(output_dir, "realm_icons")
|
2021-02-12 08:19:30 +01:00
|
|
|
realm_icon_records = fetch_team_icons(
|
|
|
|
realm["zerver_realm"][0], team_info_dict, realm_icons_folder
|
|
|
|
)
|
2020-01-24 15:00:18 +01:00
|
|
|
|
2021-02-12 08:20:45 +01:00
|
|
|
create_converted_data_files(realm, output_dir, "/realm.json")
|
|
|
|
create_converted_data_files(emoji_records, output_dir, "/emoji/records.json")
|
|
|
|
create_converted_data_files(avatar_records, output_dir, "/avatars/records.json")
|
|
|
|
create_converted_data_files(uploads_records, output_dir, "/uploads/records.json")
|
|
|
|
create_converted_data_files(attachment, output_dir, "/attachment.json")
|
|
|
|
create_converted_data_files(realm_icon_records, output_dir, "/realm_icons/records.json")
|
2017-12-08 11:27:42 +01:00
|
|
|
|
2018-01-06 19:42:18 +01:00
|
|
|
rm_tree(slack_data_dir)
|
2021-02-12 08:20:45 +01:00
|
|
|
subprocess.check_call(["tar", "-czf", output_dir + ".tar.gz", output_dir, "-P"])
|
2017-12-08 11:27:42 +01:00
|
|
|
|
2021-02-12 08:20:45 +01:00
|
|
|
logging.info("######### DATA CONVERSION FINISHED #########\n")
|
2020-05-02 08:44:14 +02:00
|
|
|
logging.info("Zulip data dump created at %s", output_dir)
|
2018-01-23 19:04:59 +01:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2018-01-23 19:04:59 +01:00
|
|
|
def get_data_file(path: str) -> Any:
|
2020-08-07 01:09:47 +02:00
|
|
|
with open(path, "rb") as fp:
|
|
|
|
data = orjson.loads(fp.read())
|
2018-10-17 01:14:09 +02:00
|
|
|
return data
|
2018-01-23 19:04:59 +01:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2020-11-25 09:28:25 +01:00
|
|
|
def check_token_access(token: str) -> None:
|
|
|
|
if token.startswith("xoxp-"):
|
2021-02-12 08:20:45 +01:00
|
|
|
logging.info("This is a Slack user token, which grants all rights the user has!")
|
2020-11-25 09:28:25 +01:00
|
|
|
elif token.startswith("xoxb-"):
|
2021-03-04 15:58:20 +01:00
|
|
|
data = requests.get(
|
|
|
|
"https://slack.com/api/team.info", headers={"Authorization": "Bearer {}".format(token)}
|
|
|
|
)
|
2020-11-25 09:28:25 +01:00
|
|
|
has_scopes = set(data.headers.get("x-oauth-scopes", "").split(","))
|
2021-02-12 08:20:45 +01:00
|
|
|
required_scopes = set(["emoji:read", "users:read", "users:read.email", "team:read"])
|
2020-11-25 09:28:25 +01:00
|
|
|
missing_scopes = required_scopes - has_scopes
|
|
|
|
if missing_scopes:
|
2021-02-12 08:19:30 +01:00
|
|
|
raise ValueError(
|
|
|
|
"Slack token is missing the following required scopes: {}".format(
|
|
|
|
sorted(missing_scopes)
|
|
|
|
)
|
|
|
|
)
|
2020-11-25 09:28:25 +01:00
|
|
|
else:
|
|
|
|
raise Exception("Unknown token type -- must start with xoxb- or xoxp-")
|
2020-05-11 15:15:20 +02:00
|
|
|
|
|
|
|
|
2019-08-07 13:58:16 +02:00
|
|
|
def get_slack_api_data(slack_api_url: str, get_param: str, **kwargs: Any) -> Any:
|
|
|
|
if not kwargs.get("token"):
|
2020-01-22 23:46:39 +01:00
|
|
|
raise AssertionError("Slack token missing in kwargs")
|
2021-03-04 15:58:20 +01:00
|
|
|
token = kwargs.pop("token")
|
|
|
|
data = requests.get(
|
|
|
|
slack_api_url, headers={"Authorization": "Bearer {}".format(token)}, **kwargs
|
|
|
|
)
|
2019-08-07 13:58:16 +02:00
|
|
|
|
2018-03-29 14:38:11 +02:00
|
|
|
if data.status_code == requests.codes.ok:
|
2020-01-22 23:46:39 +01:00
|
|
|
result = data.json()
|
2021-02-12 08:20:45 +01:00
|
|
|
if not result["ok"]:
|
|
|
|
raise Exception("Error accessing Slack API: {}".format(result["error"]))
|
2020-01-22 23:46:39 +01:00
|
|
|
return result[get_param]
|
|
|
|
|
2021-02-12 08:20:45 +01:00
|
|
|
raise Exception("HTTP error accessing the Slack API.")
|