2017-12-08 11:27:42 +01:00
|
|
|
import os
|
|
|
|
import json
|
2018-03-25 13:42:04 +02:00
|
|
|
import ujson
|
2017-12-08 11:27:42 +01:00
|
|
|
import hashlib
|
|
|
|
import sys
|
|
|
|
import argparse
|
|
|
|
import shutil
|
|
|
|
import subprocess
|
|
|
|
import re
|
2018-02-08 00:06:02 +01:00
|
|
|
import logging
|
2018-03-13 20:43:39 +01:00
|
|
|
import random
|
2018-02-01 00:56:57 +01:00
|
|
|
import requests
|
2018-02-26 06:57:00 +01:00
|
|
|
import random
|
2017-12-08 11:27:42 +01:00
|
|
|
|
2018-02-21 10:02:44 +01:00
|
|
|
from django.conf import settings
|
2018-02-09 16:03:18 +01:00
|
|
|
from django.db import connection
|
2017-12-08 11:27:42 +01:00
|
|
|
from django.utils.timezone import now as timezone_now
|
|
|
|
from typing import Any, Dict, List, Tuple
|
2018-01-09 11:46:56 +01:00
|
|
|
from zerver.forms import check_subdomain_available
|
2018-03-25 13:42:04 +02:00
|
|
|
from zerver.models import Reaction
|
2018-01-11 15:52:31 +01:00
|
|
|
from zerver.lib.slack_message_conversion import convert_to_zulip_markdown, \
|
|
|
|
get_user_full_name
|
2018-02-17 01:46:50 +01:00
|
|
|
from zerver.lib.avatar_hash import user_avatar_path_from_ids
|
2018-03-13 20:43:39 +01:00
|
|
|
from zerver.lib.actions import STREAM_ASSIGNMENT_COLORS as stream_colors
|
2018-02-26 06:57:00 +01:00
|
|
|
from zerver.lib.upload import random_name, sanitize_name
|
2018-03-25 13:42:04 +02:00
|
|
|
from zerver.lib.emoji import NAME_TO_CODEPOINT_PATH
|
2017-12-29 10:57:48 +01:00
|
|
|
|
2017-12-08 11:27:42 +01:00
|
|
|
# stubs
|
|
|
|
ZerverFieldsT = Dict[str, Any]
|
|
|
|
AddedUsersT = Dict[str, int]
|
|
|
|
AddedChannelsT = Dict[str, int]
|
2018-01-17 15:35:24 +01:00
|
|
|
AddedRecipientsT = Dict[str, int]
|
2017-12-08 11:27:42 +01:00
|
|
|
|
|
|
|
def rm_tree(path: str) -> None:
|
|
|
|
if os.path.exists(path):
|
|
|
|
shutil.rmtree(path)
|
|
|
|
|
2018-02-25 14:46:34 +01:00
|
|
|
def slack_workspace_to_realm(domain_name: str, realm_id: int, user_list: List[ZerverFieldsT],
|
2018-02-01 00:56:57 +01:00
|
|
|
realm_subdomain: str, fixtures_path: str,
|
2018-01-23 19:04:59 +01:00
|
|
|
slack_data_dir: str) -> Tuple[ZerverFieldsT, AddedUsersT,
|
2018-02-17 00:42:59 +01:00
|
|
|
AddedRecipientsT, AddedChannelsT,
|
|
|
|
List[ZerverFieldsT]]:
|
2018-01-23 19:04:59 +01:00
|
|
|
"""
|
|
|
|
Returns:
|
|
|
|
1. realm, Converted Realm data
|
|
|
|
2. added_users, which is a dictionary to map from slack user id to zulip user id
|
|
|
|
3. added_recipient, which is a dictionary to map from channel name to zulip recipient_id
|
|
|
|
4. added_channels, which is a dictionary to map from channel name to zulip stream_id
|
2018-02-17 00:42:59 +01:00
|
|
|
5. avatars, which is list to map avatars to zulip avatar records.json
|
2018-01-23 19:04:59 +01:00
|
|
|
"""
|
|
|
|
NOW = float(timezone_now().timestamp())
|
|
|
|
|
2018-03-07 14:14:08 +01:00
|
|
|
zerver_realm = build_zerver_realm(fixtures_path, realm_id, realm_subdomain, NOW)
|
2018-01-23 19:04:59 +01:00
|
|
|
|
|
|
|
realm = dict(zerver_client=[{"name": "populate_db", "id": 1},
|
|
|
|
{"name": "website", "id": 2},
|
|
|
|
{"name": "API", "id": 3}],
|
|
|
|
zerver_userpresence=[], # shows last logged in data, which is not available in slack
|
|
|
|
zerver_userprofile_mirrordummy=[],
|
2018-03-07 14:14:08 +01:00
|
|
|
zerver_realmdomain=[{"realm": realm_id,
|
2018-01-23 19:04:59 +01:00
|
|
|
"allow_subdomains": False,
|
2018-02-25 14:46:34 +01:00
|
|
|
"domain": domain_name,
|
2018-03-07 14:14:08 +01:00
|
|
|
"id": realm_id}],
|
2018-01-23 19:04:59 +01:00
|
|
|
zerver_useractivity=[],
|
|
|
|
zerver_realm=zerver_realm,
|
|
|
|
zerver_huddle=[],
|
|
|
|
zerver_userprofile_crossrealm=[],
|
|
|
|
zerver_useractivityinterval=[],
|
|
|
|
zerver_realmfilter=[],
|
|
|
|
zerver_realmemoji=[])
|
|
|
|
|
2018-02-17 00:42:59 +01:00
|
|
|
zerver_userprofile, avatars, added_users = users_to_zerver_userprofile(
|
2018-02-25 14:46:34 +01:00
|
|
|
slack_data_dir, user_list, realm_id, int(NOW), domain_name)
|
2018-01-23 19:04:59 +01:00
|
|
|
channels_to_zerver_stream_fields = channels_to_zerver_stream(slack_data_dir,
|
2018-03-07 14:14:08 +01:00
|
|
|
realm_id,
|
2018-01-23 19:04:59 +01:00
|
|
|
added_users,
|
|
|
|
zerver_userprofile)
|
|
|
|
# See https://zulipchat.com/help/set-default-streams-for-new-users
|
|
|
|
# for documentation on zerver_defaultstream
|
|
|
|
realm['zerver_userprofile'] = zerver_userprofile
|
|
|
|
|
|
|
|
realm['zerver_defaultstream'] = channels_to_zerver_stream_fields[0]
|
|
|
|
realm['zerver_stream'] = channels_to_zerver_stream_fields[1]
|
|
|
|
realm['zerver_subscription'] = channels_to_zerver_stream_fields[3]
|
|
|
|
realm['zerver_recipient'] = channels_to_zerver_stream_fields[4]
|
|
|
|
added_channels = channels_to_zerver_stream_fields[2]
|
|
|
|
added_recipient = channels_to_zerver_stream_fields[5]
|
|
|
|
|
2018-02-17 00:42:59 +01:00
|
|
|
return realm, added_users, added_recipient, added_channels, avatars
|
2018-01-23 19:04:59 +01:00
|
|
|
|
2018-03-07 14:14:08 +01:00
|
|
|
def build_zerver_realm(fixtures_path: str, realm_id: int, realm_subdomain: str,
|
2018-01-26 15:33:22 +01:00
|
|
|
time: float) -> List[ZerverFieldsT]:
|
2018-01-23 19:04:59 +01:00
|
|
|
|
|
|
|
zerver_realm_skeleton = get_data_file(fixtures_path + 'zerver_realm_skeleton.json')
|
|
|
|
|
2018-03-07 14:14:08 +01:00
|
|
|
zerver_realm_skeleton[0]['id'] = realm_id
|
2018-01-23 19:04:59 +01:00
|
|
|
zerver_realm_skeleton[0]['string_id'] = realm_subdomain # subdomain / short_name of realm
|
|
|
|
zerver_realm_skeleton[0]['name'] = realm_subdomain
|
|
|
|
zerver_realm_skeleton[0]['date_created'] = time
|
|
|
|
|
|
|
|
return zerver_realm_skeleton
|
|
|
|
|
2018-02-01 00:56:57 +01:00
|
|
|
def users_to_zerver_userprofile(slack_data_dir: str, users: List[ZerverFieldsT], realm_id: int,
|
|
|
|
timestamp: Any, domain_name: str) -> Tuple[List[ZerverFieldsT],
|
2018-02-17 00:42:59 +01:00
|
|
|
List[ZerverFieldsT],
|
2018-02-01 00:56:57 +01:00
|
|
|
AddedUsersT]:
|
2017-12-08 11:27:42 +01:00
|
|
|
"""
|
|
|
|
Returns:
|
|
|
|
1. zerver_userprofile, which is a list of user profile
|
2018-02-17 00:42:59 +01:00
|
|
|
2. avatar_list, which is list to map avatars to zulip avatard records.json
|
|
|
|
3. added_users, which is a dictionary to map from slack user id to zulip
|
2017-12-08 11:27:42 +01:00
|
|
|
user id
|
|
|
|
"""
|
2018-02-08 00:06:02 +01:00
|
|
|
logging.info('######### IMPORTING USERS STARTED #########\n')
|
2017-12-08 11:27:42 +01:00
|
|
|
zerver_userprofile = []
|
2018-02-16 23:54:38 +01:00
|
|
|
avatar_list = [] # type: List[ZerverFieldsT]
|
2017-12-08 11:27:42 +01:00
|
|
|
added_users = {}
|
2018-01-20 10:01:17 +01:00
|
|
|
|
2018-02-06 22:19:47 +01:00
|
|
|
# We have only one primary owner in slack, see link
|
|
|
|
# https://get.slack.help/hc/en-us/articles/201912948-Owners-and-Administrators
|
|
|
|
# This is to import the primary owner first from all the users
|
2018-02-12 23:26:52 +01:00
|
|
|
user_id_count = 0
|
2018-02-06 22:19:47 +01:00
|
|
|
primary_owner_id = user_id_count
|
|
|
|
user_id_count += 1
|
|
|
|
|
2017-12-08 11:27:42 +01:00
|
|
|
for user in users:
|
|
|
|
slack_user_id = user['id']
|
|
|
|
DESKTOP_NOTIFICATION = True
|
|
|
|
|
2018-02-06 22:19:47 +01:00
|
|
|
if user.get('is_primary_owner', False):
|
2018-03-17 12:15:57 +01:00
|
|
|
user_id = primary_owner_id
|
2018-02-06 22:19:47 +01:00
|
|
|
else:
|
2018-03-17 12:15:57 +01:00
|
|
|
user_id = user_id_count
|
2018-02-06 22:19:47 +01:00
|
|
|
|
2017-12-08 11:27:42 +01:00
|
|
|
# email
|
2018-01-23 19:04:59 +01:00
|
|
|
email = get_user_email(user, domain_name)
|
2017-12-08 11:27:42 +01:00
|
|
|
|
2018-02-16 23:54:38 +01:00
|
|
|
# avatar
|
|
|
|
# ref: https://chat.zulip.org/help/change-your-avatar
|
|
|
|
avatar_url = build_avatar_url(slack_user_id, user['team_id'],
|
|
|
|
user['profile']['avatar_hash'])
|
2018-03-07 13:53:35 +01:00
|
|
|
build_avatar(user_id, realm_id, email, avatar_url, timestamp, avatar_list)
|
2018-02-16 23:54:38 +01:00
|
|
|
|
2018-02-06 21:02:23 +01:00
|
|
|
# check if user is the admin
|
|
|
|
realm_admin = get_admin(user)
|
|
|
|
|
2017-12-08 11:27:42 +01:00
|
|
|
# timezone
|
2018-01-23 19:04:59 +01:00
|
|
|
timezone = get_user_timezone(user)
|
2017-12-08 11:27:42 +01:00
|
|
|
|
|
|
|
userprofile = dict(
|
|
|
|
enable_desktop_notifications=DESKTOP_NOTIFICATION,
|
2018-02-06 19:24:05 +01:00
|
|
|
is_staff=False, # 'staff' is for server administrators, which don't exist in Slack.
|
2018-02-16 23:49:44 +01:00
|
|
|
avatar_source='U',
|
2017-12-08 11:27:42 +01:00
|
|
|
is_bot=user.get('is_bot', False),
|
|
|
|
avatar_version=1,
|
|
|
|
default_desktop_notifications=True,
|
|
|
|
timezone=timezone,
|
|
|
|
default_sending_stream=None,
|
|
|
|
enable_offline_email_notifications=True,
|
|
|
|
user_permissions=[], # This is Zulip-specific
|
|
|
|
is_mirror_dummy=False,
|
|
|
|
pointer=-1,
|
|
|
|
default_events_register_stream=None,
|
2018-02-06 21:02:23 +01:00
|
|
|
is_realm_admin=realm_admin,
|
2017-12-08 11:27:42 +01:00
|
|
|
# invites_granted=0, # TODO
|
|
|
|
enter_sends=True,
|
|
|
|
bot_type=1 if user.get('is_bot', False) else None,
|
|
|
|
enable_stream_sounds=False,
|
|
|
|
is_api_super_user=False,
|
|
|
|
rate_limits="",
|
|
|
|
last_login=timestamp,
|
|
|
|
tos_version=None,
|
|
|
|
default_all_public_streams=False,
|
2018-01-07 22:09:02 +01:00
|
|
|
full_name=get_user_full_name(user),
|
2017-12-08 11:27:42 +01:00
|
|
|
twenty_four_hour_time=False,
|
|
|
|
groups=[], # This is Zulip-specific
|
|
|
|
enable_online_push_notifications=False,
|
|
|
|
alert_words="[]",
|
|
|
|
bot_owner=None, # This is Zulip-specific
|
|
|
|
short_name=user['name'],
|
|
|
|
enable_offline_push_notifications=True,
|
|
|
|
left_side_userlist=False,
|
|
|
|
enable_stream_desktop_notifications=False,
|
|
|
|
enable_digest_emails=True,
|
|
|
|
last_pointer_updater="",
|
|
|
|
email=email,
|
2018-02-08 23:59:00 +01:00
|
|
|
realm_name_in_notifications=False,
|
2017-12-08 11:27:42 +01:00
|
|
|
date_joined=timestamp,
|
|
|
|
last_reminder=timestamp,
|
|
|
|
is_superuser=False,
|
|
|
|
tutorial_status="T",
|
|
|
|
default_language="en",
|
|
|
|
enable_sounds=True,
|
|
|
|
pm_content_in_desktop_notifications=True,
|
|
|
|
is_active=not user['deleted'],
|
|
|
|
onboarding_steps="[]",
|
|
|
|
emojiset="google",
|
|
|
|
realm=realm_id,
|
|
|
|
# invites_used=0, # TODO
|
2018-02-06 22:19:47 +01:00
|
|
|
id=user_id)
|
2017-12-08 11:27:42 +01:00
|
|
|
|
|
|
|
# TODO map the avatar
|
|
|
|
# zerver auto-infer the url from Gravatar instead of from a specified
|
|
|
|
# url; zerver.lib.avatar needs to be patched
|
|
|
|
# profile['image_32'], Slack has 24, 32, 48, 72, 192, 512 size range
|
|
|
|
|
|
|
|
zerver_userprofile.append(userprofile)
|
2018-02-06 22:19:47 +01:00
|
|
|
added_users[slack_user_id] = user_id
|
|
|
|
if not user.get('is_primary_owner', False):
|
|
|
|
user_id_count += 1
|
|
|
|
|
2018-02-08 00:06:02 +01:00
|
|
|
logging.info(u"{} -> {}".format(user['name'], userprofile['email']))
|
|
|
|
logging.info('######### IMPORTING USERS FINISHED #########\n')
|
2018-02-17 00:42:59 +01:00
|
|
|
return zerver_userprofile, avatar_list, added_users
|
2017-12-08 11:27:42 +01:00
|
|
|
|
2018-01-23 19:04:59 +01:00
|
|
|
def get_user_email(user: ZerverFieldsT, domain_name: str) -> str:
|
2018-03-13 00:21:54 +01:00
|
|
|
if 'email' in user['profile']:
|
|
|
|
return user['profile']['email']
|
|
|
|
if 'bot_id' in user['profile']:
|
|
|
|
if 'real_name_normalized' in user['profile']:
|
|
|
|
slack_bot_name = user['profile']['real_name_normalized']
|
|
|
|
elif 'first_name' in user['profile']:
|
|
|
|
slack_bot_name = user['profile']['first_name']
|
|
|
|
else:
|
|
|
|
raise AssertionError("Could not identify bot type")
|
|
|
|
return slack_bot_name.replace("Bot", "").replace(" ", "") + "-bot@%s" % (domain_name,)
|
|
|
|
# TODO: Do we need this fallback case at all?
|
|
|
|
return (hashlib.sha256(user['real_name'].encode()).hexdigest() +
|
|
|
|
"@%s" % (domain_name,))
|
2018-01-23 19:04:59 +01:00
|
|
|
|
2018-02-16 23:54:38 +01:00
|
|
|
def build_avatar_url(slack_user_id: str, team_id: str, avatar_hash: str) -> str:
|
|
|
|
avatar_url = "https://ca.slack-edge.com/{}-{}-{}".format(team_id, slack_user_id,
|
|
|
|
avatar_hash)
|
|
|
|
return avatar_url
|
|
|
|
|
|
|
|
def build_avatar(zulip_user_id: int, realm_id: int, email: str, avatar_url: str,
|
2018-03-07 13:53:35 +01:00
|
|
|
timestamp: Any, avatar_list: List[ZerverFieldsT]) -> None:
|
2018-02-16 23:54:38 +01:00
|
|
|
avatar = dict(
|
|
|
|
path=avatar_url, # Save slack's url here, which is used later while processing
|
|
|
|
realm_id=realm_id,
|
|
|
|
content_type=None,
|
|
|
|
user_profile_id=zulip_user_id,
|
|
|
|
last_modified=timestamp,
|
|
|
|
user_profile_email=email,
|
|
|
|
s3_path="",
|
|
|
|
size="")
|
|
|
|
avatar_list.append(avatar)
|
|
|
|
|
2018-02-06 21:02:23 +01:00
|
|
|
def get_admin(user: ZerverFieldsT) -> bool:
|
|
|
|
admin = user.get('is_admin', False)
|
|
|
|
owner = user.get('is_owner', False)
|
|
|
|
primary_owner = user.get('is_primary_owner', False)
|
|
|
|
|
|
|
|
if admin or owner or primary_owner:
|
|
|
|
return True
|
|
|
|
return False
|
|
|
|
|
2018-01-23 19:04:59 +01:00
|
|
|
def get_user_timezone(user: ZerverFieldsT) -> str:
|
|
|
|
_default_timezone = "America/New_York"
|
|
|
|
timezone = user.get("tz", _default_timezone)
|
|
|
|
if timezone is None or '/' not in timezone:
|
|
|
|
timezone = _default_timezone
|
|
|
|
return timezone
|
|
|
|
|
2018-01-06 19:42:18 +01:00
|
|
|
def channels_to_zerver_stream(slack_data_dir: str, realm_id: int, added_users: AddedUsersT,
|
2017-12-08 11:27:42 +01:00
|
|
|
zerver_userprofile: List[ZerverFieldsT]) -> Tuple[List[ZerverFieldsT],
|
|
|
|
List[ZerverFieldsT],
|
|
|
|
AddedChannelsT,
|
|
|
|
List[ZerverFieldsT],
|
2018-01-17 15:35:24 +01:00
|
|
|
List[ZerverFieldsT],
|
|
|
|
AddedRecipientsT]:
|
2017-12-08 11:27:42 +01:00
|
|
|
"""
|
|
|
|
Returns:
|
|
|
|
1. zerver_defaultstream, which is a list of the default streams
|
|
|
|
2. zerver_stream, while is a list of all streams
|
|
|
|
3. added_channels, which is a dictionary to map from channel name to zulip stream_id
|
|
|
|
4. zerver_subscription, which is a list of the subscriptions
|
|
|
|
5. zerver_recipient, which is a list of the recipients
|
2018-01-23 19:04:59 +01:00
|
|
|
6. added_recipient, which is a dictionary to map from channel name to zulip recipient_id
|
2017-12-08 11:27:42 +01:00
|
|
|
"""
|
2018-02-08 00:06:02 +01:00
|
|
|
logging.info('######### IMPORTING CHANNELS STARTED #########\n')
|
2018-01-23 19:04:59 +01:00
|
|
|
channels = get_data_file(slack_data_dir + '/channels.json')
|
2018-01-20 10:01:17 +01:00
|
|
|
|
2017-12-08 11:27:42 +01:00
|
|
|
added_channels = {}
|
2018-01-20 10:01:17 +01:00
|
|
|
added_recipient = {}
|
2017-12-08 11:27:42 +01:00
|
|
|
|
|
|
|
zerver_stream = []
|
2018-01-23 19:04:59 +01:00
|
|
|
zerver_subscription = [] # type: List[ZerverFieldsT]
|
2017-12-08 11:27:42 +01:00
|
|
|
zerver_recipient = []
|
|
|
|
zerver_defaultstream = []
|
|
|
|
|
2018-02-12 23:26:52 +01:00
|
|
|
stream_id_count = subscription_id_count = recipient_id_count = defaultstream_id = 0
|
2017-12-08 11:27:42 +01:00
|
|
|
|
|
|
|
for channel in channels:
|
|
|
|
# slack_channel_id = channel['id']
|
|
|
|
|
|
|
|
# map Slack's topic and purpose content into Zulip's stream description.
|
|
|
|
# WARN This mapping is lossy since the topic.creator, topic.last_set,
|
|
|
|
# purpose.creator, purpose.last_set fields are not preserved.
|
2018-03-13 20:13:40 +01:00
|
|
|
description = channel["purpose"]["value"]
|
2018-03-17 12:15:57 +01:00
|
|
|
stream_id = stream_id_count
|
|
|
|
recipient_id = recipient_id_count
|
2017-12-08 11:27:42 +01:00
|
|
|
|
|
|
|
# construct the stream object and append it to zerver_stream
|
|
|
|
stream = dict(
|
|
|
|
realm=realm_id,
|
|
|
|
name=channel["name"],
|
|
|
|
deactivated=channel["is_archived"],
|
|
|
|
description=description,
|
2018-02-25 03:49:14 +01:00
|
|
|
invite_only=False, # TODO: private channels are not
|
|
|
|
# exported with Slack's standard plan;
|
|
|
|
# so this field is always false
|
2017-12-08 11:27:42 +01:00
|
|
|
date_created=float(channel["created"]),
|
2018-02-12 23:26:52 +01:00
|
|
|
id=stream_id)
|
2017-12-08 11:27:42 +01:00
|
|
|
|
2018-01-23 19:04:59 +01:00
|
|
|
# construct defaultstream object
|
2018-02-23 10:16:03 +01:00
|
|
|
# slack has the default channel 'general' and 'random'
|
|
|
|
# where every user is subscribed
|
|
|
|
default_channels = ['general', 'random'] # Slack specific
|
|
|
|
if channel['name'] in default_channels:
|
|
|
|
defaultstream = build_defaultstream(channel['name'], realm_id, stream_id,
|
2018-03-17 12:15:57 +01:00
|
|
|
defaultstream_id)
|
2017-12-08 11:27:42 +01:00
|
|
|
zerver_defaultstream.append(defaultstream)
|
2018-01-23 19:04:59 +01:00
|
|
|
defaultstream_id += 1
|
2017-12-08 11:27:42 +01:00
|
|
|
|
|
|
|
zerver_stream.append(stream)
|
2018-02-12 23:26:52 +01:00
|
|
|
added_channels[stream['name']] = stream_id
|
2017-12-08 11:27:42 +01:00
|
|
|
|
|
|
|
# construct the recipient object and append it to zerver_recipient
|
|
|
|
# type 1: private
|
|
|
|
# type 2: stream
|
|
|
|
# type 3: huddle
|
|
|
|
recipient = dict(
|
2018-02-12 23:26:52 +01:00
|
|
|
type_id=stream_id,
|
|
|
|
id=recipient_id,
|
2017-12-08 11:27:42 +01:00
|
|
|
type=2)
|
|
|
|
zerver_recipient.append(recipient)
|
2018-02-12 23:26:52 +01:00
|
|
|
added_recipient[stream['name']] = recipient_id
|
2017-12-08 11:27:42 +01:00
|
|
|
# TOODO add recipients for private message and huddles
|
|
|
|
|
|
|
|
# construct the subscription object and append it to zerver_subscription
|
2018-03-07 14:07:28 +01:00
|
|
|
subscription_id_count = build_subscription(channel['members'], zerver_subscription,
|
|
|
|
recipient_id, added_users,
|
2018-03-17 12:15:57 +01:00
|
|
|
subscription_id_count)
|
2018-01-23 19:04:59 +01:00
|
|
|
# TOODO add zerver_subscription which correspond to
|
|
|
|
# huddles type recipient
|
|
|
|
# For huddles:
|
|
|
|
# sub['recipient']=recipient['id'] where recipient['type_id']=added_users[member]
|
|
|
|
|
|
|
|
# TOODO do private message subscriptions between each users have to
|
|
|
|
# be generated from scratch?
|
2017-12-08 11:27:42 +01:00
|
|
|
|
|
|
|
stream_id_count += 1
|
2018-01-17 15:35:24 +01:00
|
|
|
recipient_id_count += 1
|
2018-02-08 00:06:02 +01:00
|
|
|
logging.info(u"{} -> created".format(channel['name']))
|
2017-12-08 11:27:42 +01:00
|
|
|
|
|
|
|
# TODO map Slack's pins to Zulip's stars
|
|
|
|
# There is the security model that Slack's pins are known to the team owner
|
|
|
|
# as evident from where it is stored at (channels)
|
|
|
|
# "pins": [
|
|
|
|
# {
|
|
|
|
# "id": "1444755381.000003",
|
|
|
|
# "type": "C",
|
|
|
|
# "user": "U061A5N1G",
|
|
|
|
# "owner": "U061A5N1G",
|
|
|
|
# "created": "1444755463"
|
|
|
|
# }
|
|
|
|
# ],
|
|
|
|
|
|
|
|
for user in zerver_userprofile:
|
|
|
|
zulip_user_id = user['id']
|
|
|
|
# this maps the recipients and subscriptions
|
|
|
|
# related to private messages
|
2018-03-17 12:15:57 +01:00
|
|
|
recipient_id = recipient_id_count
|
|
|
|
subscription_id = subscription_id_count
|
2018-02-12 23:26:52 +01:00
|
|
|
|
|
|
|
recipient, sub = build_pm_recipient_sub_from_user(zulip_user_id, recipient_id,
|
|
|
|
subscription_id)
|
2017-12-08 11:27:42 +01:00
|
|
|
zerver_recipient.append(recipient)
|
2018-01-23 19:04:59 +01:00
|
|
|
zerver_subscription.append(sub)
|
|
|
|
subscription_id_count += 1
|
|
|
|
recipient_id_count += 1
|
2017-12-08 11:27:42 +01:00
|
|
|
|
2018-02-08 00:06:02 +01:00
|
|
|
logging.info('######### IMPORTING STREAMS FINISHED #########\n')
|
2018-01-23 19:04:59 +01:00
|
|
|
return zerver_defaultstream, zerver_stream, added_channels, zerver_subscription, \
|
|
|
|
zerver_recipient, added_recipient
|
|
|
|
|
|
|
|
def build_defaultstream(channel_name: str, realm_id: int, stream_id: int,
|
|
|
|
defaultstream_id: int) -> ZerverFieldsT:
|
2018-02-23 10:16:03 +01:00
|
|
|
defaultstream = dict(
|
|
|
|
stream=stream_id,
|
|
|
|
realm=realm_id,
|
|
|
|
id=defaultstream_id)
|
|
|
|
return defaultstream
|
2018-01-23 19:04:59 +01:00
|
|
|
|
|
|
|
def build_pm_recipient_sub_from_user(zulip_user_id: int, recipient_id: int,
|
|
|
|
subscription_id: int) -> Tuple[ZerverFieldsT,
|
|
|
|
ZerverFieldsT]:
|
|
|
|
recipient = dict(
|
|
|
|
type_id=zulip_user_id,
|
|
|
|
id=recipient_id,
|
|
|
|
type=1)
|
|
|
|
|
|
|
|
sub = dict(
|
|
|
|
recipient=recipient_id,
|
|
|
|
notifications=False,
|
2018-03-13 20:43:39 +01:00
|
|
|
color=random.choice(stream_colors),
|
2018-01-23 19:04:59 +01:00
|
|
|
desktop_notifications=True,
|
|
|
|
pin_to_top=False,
|
|
|
|
in_home_view=True,
|
|
|
|
active=True,
|
|
|
|
user_profile=zulip_user_id,
|
|
|
|
id=subscription_id)
|
|
|
|
|
|
|
|
return recipient, sub
|
|
|
|
|
|
|
|
def build_subscription(channel_members: List[str], zerver_subscription: List[ZerverFieldsT],
|
|
|
|
recipient_id: int, added_users: AddedUsersT,
|
2018-03-17 12:15:57 +01:00
|
|
|
subscription_id: int) -> int:
|
2018-01-23 19:04:59 +01:00
|
|
|
for member in channel_members:
|
2017-12-08 11:27:42 +01:00
|
|
|
sub = dict(
|
2018-01-23 19:04:59 +01:00
|
|
|
recipient=recipient_id,
|
2017-12-08 11:27:42 +01:00
|
|
|
notifications=False,
|
2018-03-13 20:43:39 +01:00
|
|
|
color=random.choice(stream_colors),
|
2017-12-08 11:27:42 +01:00
|
|
|
desktop_notifications=True,
|
|
|
|
pin_to_top=False,
|
|
|
|
in_home_view=True,
|
|
|
|
active=True,
|
2018-01-23 19:04:59 +01:00
|
|
|
user_profile=added_users[member],
|
|
|
|
id=subscription_id)
|
|
|
|
# The recipient is a stream for stream-readable message.
|
|
|
|
# proof : https://github.com/zulip/zulip/blob/master/zerver/views/messages.py#L240 &
|
|
|
|
# https://github.com/zulip/zulip/blob/master/zerver/views/messages.py#L324
|
2017-12-08 11:27:42 +01:00
|
|
|
zerver_subscription.append(sub)
|
2018-03-17 12:15:57 +01:00
|
|
|
subscription_id += 1
|
|
|
|
return subscription_id
|
2017-12-08 11:27:42 +01:00
|
|
|
|
2018-03-07 14:14:08 +01:00
|
|
|
def convert_slack_workspace_messages(slack_data_dir: str, users: List[ZerverFieldsT], realm_id: int,
|
2018-02-01 00:56:57 +01:00
|
|
|
added_users: AddedUsersT, added_recipient: AddedRecipientsT,
|
2018-02-26 10:03:48 +01:00
|
|
|
added_channels: AddedChannelsT, realm: ZerverFieldsT,
|
|
|
|
domain_name: str) -> Tuple[ZerverFieldsT,
|
2018-02-26 10:16:34 +01:00
|
|
|
List[ZerverFieldsT],
|
2018-02-26 10:03:48 +01:00
|
|
|
List[ZerverFieldsT]]:
|
2018-01-23 19:04:59 +01:00
|
|
|
"""
|
|
|
|
Returns:
|
|
|
|
1. message.json, Converted messages
|
2018-02-26 10:03:48 +01:00
|
|
|
2. uploads, which is a list of uploads to be mapped in uploads records.json
|
2018-02-26 10:16:34 +01:00
|
|
|
3. attachment, which is a list of the attachments
|
2018-01-23 19:04:59 +01:00
|
|
|
"""
|
|
|
|
# now for message.json
|
|
|
|
message_json = {}
|
|
|
|
zerver_message = [] # type: List[ZerverFieldsT]
|
|
|
|
zerver_usermessage = [] # type: List[ZerverFieldsT]
|
2018-02-25 07:08:28 +01:00
|
|
|
all_messages = get_all_messages(slack_data_dir, added_channels)
|
2017-12-08 11:27:42 +01:00
|
|
|
|
2018-02-25 10:12:30 +01:00
|
|
|
# we sort the messages according to the timestamp to show messages with
|
|
|
|
# the proper date order
|
|
|
|
all_messages = sorted(all_messages, key=lambda message: message['ts'])
|
|
|
|
|
2018-02-08 00:06:02 +01:00
|
|
|
logging.info('######### IMPORTING MESSAGES STARTED #########\n')
|
2018-02-25 09:09:32 +01:00
|
|
|
|
2018-03-25 13:42:04 +02:00
|
|
|
zerver_message, zerver_usermessage, attachment, uploads, \
|
|
|
|
reactions = channel_message_to_zerver_message(realm_id, users, added_users,
|
|
|
|
added_recipient, all_messages,
|
|
|
|
realm['zerver_subscription'], domain_name)
|
2018-01-23 19:04:59 +01:00
|
|
|
|
2018-02-08 00:06:02 +01:00
|
|
|
logging.info('######### IMPORTING MESSAGES FINISHED #########\n')
|
2018-01-23 19:04:59 +01:00
|
|
|
|
|
|
|
message_json['zerver_message'] = zerver_message
|
|
|
|
message_json['zerver_usermessage'] = zerver_usermessage
|
2018-03-25 13:42:04 +02:00
|
|
|
message_json['zerver_reaction'] = reactions
|
2018-01-23 19:04:59 +01:00
|
|
|
|
2018-02-26 10:16:34 +01:00
|
|
|
return message_json, uploads, attachment
|
2018-01-23 19:04:59 +01:00
|
|
|
|
2018-02-25 07:08:28 +01:00
|
|
|
def get_all_messages(slack_data_dir: str, added_channels: AddedChannelsT) -> List[ZerverFieldsT]:
|
|
|
|
all_messages = [] # type: List[ZerverFieldsT]
|
|
|
|
for channel_name in added_channels.keys():
|
|
|
|
channel_dir = os.path.join(slack_data_dir, channel_name)
|
|
|
|
json_names = os.listdir(channel_dir)
|
|
|
|
for json_name in json_names:
|
|
|
|
message_dir = os.path.join(channel_dir, json_name)
|
|
|
|
messages = get_data_file(message_dir)
|
|
|
|
for message in messages:
|
|
|
|
# To give every message the channel information
|
|
|
|
message['channel_name'] = channel_name
|
|
|
|
all_messages += messages
|
|
|
|
return all_messages
|
|
|
|
|
2018-03-07 14:14:08 +01:00
|
|
|
def channel_message_to_zerver_message(realm_id: int, users: List[ZerverFieldsT],
|
2018-02-25 09:54:53 +01:00
|
|
|
added_users: AddedUsersT,
|
2018-02-01 00:56:57 +01:00
|
|
|
added_recipient: AddedRecipientsT,
|
2018-02-25 07:08:28 +01:00
|
|
|
all_messages: List[ZerverFieldsT],
|
2017-12-29 10:57:48 +01:00
|
|
|
zerver_subscription: List[ZerverFieldsT],
|
2018-03-17 12:15:57 +01:00
|
|
|
domain_name: str) -> Tuple[List[ZerverFieldsT],
|
2018-03-25 13:42:04 +02:00
|
|
|
List[ZerverFieldsT],
|
2018-03-17 12:15:57 +01:00
|
|
|
List[ZerverFieldsT],
|
|
|
|
List[ZerverFieldsT],
|
|
|
|
List[ZerverFieldsT]]:
|
2017-12-29 10:57:48 +01:00
|
|
|
"""
|
|
|
|
Returns:
|
|
|
|
1. zerver_message, which is a list of the messages
|
|
|
|
2. zerver_usermessage, which is a list of the usermessages
|
2018-02-26 10:16:34 +01:00
|
|
|
3. zerver_attachment, which is a list of the attachments
|
|
|
|
4. uploads_list, which is a list of uploads to be mapped in uploads records.json
|
2018-03-25 13:42:04 +02:00
|
|
|
5. reaction_list, which is a list of all user reactions
|
2017-12-29 10:57:48 +01:00
|
|
|
"""
|
2018-03-25 13:42:04 +02:00
|
|
|
message_id_count = usermessage_id_count = attachment_id_count = reaction_id_count = 0
|
2017-12-29 10:57:48 +01:00
|
|
|
zerver_message = []
|
2018-01-23 19:04:59 +01:00
|
|
|
zerver_usermessage = [] # type: List[ZerverFieldsT]
|
2018-02-26 06:57:00 +01:00
|
|
|
uploads_list = [] # type: List[ZerverFieldsT]
|
2018-02-26 08:48:14 +01:00
|
|
|
zerver_attachment = [] # type: List[ZerverFieldsT]
|
2018-03-25 13:42:04 +02:00
|
|
|
reaction_list = [] # type: List[ZerverFieldsT]
|
|
|
|
|
|
|
|
# For unicode emoji
|
|
|
|
with open(NAME_TO_CODEPOINT_PATH) as fp:
|
|
|
|
name_to_codepoint = ujson.load(fp)
|
2017-12-29 10:57:48 +01:00
|
|
|
|
2018-02-25 09:54:53 +01:00
|
|
|
for message in all_messages:
|
|
|
|
user = get_message_sending_user(message)
|
|
|
|
if not user:
|
|
|
|
# Ignore messages without user names
|
|
|
|
# These are Sometimes produced by slack
|
|
|
|
continue
|
|
|
|
|
2018-02-26 06:57:00 +01:00
|
|
|
has_attachment = has_image = False
|
2018-02-25 09:54:53 +01:00
|
|
|
content, mentioned_users_id, has_link = convert_to_zulip_markdown(message['text'],
|
|
|
|
users,
|
|
|
|
added_users)
|
|
|
|
rendered_content = None
|
2018-02-26 08:48:14 +01:00
|
|
|
|
|
|
|
recipient_id = added_recipient[message['channel_name']]
|
2018-03-17 12:15:57 +01:00
|
|
|
message_id = message_id_count
|
2018-02-26 08:48:14 +01:00
|
|
|
|
2018-03-25 13:42:04 +02:00
|
|
|
# Process message reactions
|
|
|
|
if 'reactions' in message.keys():
|
|
|
|
reaction_id_count = build_reactions(reaction_list, message['reactions'], added_users,
|
|
|
|
message_id, reaction_id_count, name_to_codepoint)
|
|
|
|
|
2018-03-20 19:26:35 +01:00
|
|
|
# Process different subtypes of slack messages
|
2018-02-25 09:54:53 +01:00
|
|
|
if 'subtype' in message.keys():
|
|
|
|
subtype = message['subtype']
|
|
|
|
if subtype in ["channel_join", "channel_leave", "channel_name"]:
|
2018-02-23 03:16:31 +01:00
|
|
|
continue
|
|
|
|
|
2018-03-20 19:26:35 +01:00
|
|
|
# Subtypes which have only the action in the message should
|
|
|
|
# be rendered with '/me' in the content initially
|
|
|
|
# For example "sh_room_created" has the message 'started a call'
|
|
|
|
# which should be displayed as '/me started a call'
|
|
|
|
elif subtype in ["bot_add", "sh_room_created", "me_message"]:
|
|
|
|
content = ('/me %s' % (content))
|
|
|
|
|
2018-03-15 14:12:38 +01:00
|
|
|
# For attachments with slack download link
|
|
|
|
elif subtype == "file_share" and 'files.slack.com' in message['file']['url_private']:
|
2018-02-26 06:57:00 +01:00
|
|
|
fileinfo = message['file']
|
|
|
|
|
|
|
|
has_attachment = has_link = True
|
|
|
|
has_image = True if 'image' in fileinfo['mimetype'] else False
|
|
|
|
|
|
|
|
file_user = [iterate_user for iterate_user in users if message['user'] == user]
|
|
|
|
file_user_email = get_user_email(file_user[0], domain_name)
|
|
|
|
|
|
|
|
s3_path, content = get_attachment_path_and_content(fileinfo, realm_id)
|
|
|
|
|
|
|
|
# construct attachments
|
|
|
|
build_uploads(added_users[user], realm_id, file_user_email, fileinfo, s3_path,
|
|
|
|
uploads_list)
|
|
|
|
|
2018-03-17 12:15:57 +01:00
|
|
|
attachment_id = attachment_id_count
|
2018-02-26 08:48:14 +01:00
|
|
|
build_zerver_attachment(realm_id, message_id, attachment_id, added_users[user],
|
|
|
|
fileinfo, s3_path, zerver_attachment)
|
|
|
|
attachment_id_count += 1
|
|
|
|
|
2018-03-15 14:12:38 +01:00
|
|
|
# For attachments with link not from slack
|
|
|
|
# Example: Google drive integration
|
|
|
|
elif subtype == "file_share":
|
|
|
|
fileinfo = message['file']
|
|
|
|
has_link = True
|
|
|
|
if 'title' in fileinfo:
|
|
|
|
file_name = fileinfo['title']
|
|
|
|
else:
|
|
|
|
file_name = fileinfo['name']
|
|
|
|
content = '[%s](%s)' % (file_name, fileinfo['url_private'])
|
|
|
|
|
2018-02-25 09:54:53 +01:00
|
|
|
# construct message
|
|
|
|
zulip_message = dict(
|
|
|
|
sending_client=1,
|
|
|
|
rendered_content_version=1, # This is Zulip-specific
|
2018-02-26 06:57:00 +01:00
|
|
|
has_image=has_image,
|
2018-03-13 20:09:27 +01:00
|
|
|
subject='imported from slack', # This is Zulip-specific
|
2018-02-25 09:54:53 +01:00
|
|
|
pub_date=float(message['ts']),
|
|
|
|
id=message_id,
|
|
|
|
has_attachment=has_attachment, # attachment will be posted in the subsequent message;
|
|
|
|
# this is how Slack does it, i.e. less like email
|
|
|
|
edit_history=None,
|
|
|
|
sender=added_users[user], # map slack id to zulip id
|
|
|
|
content=content,
|
|
|
|
rendered_content=rendered_content, # slack doesn't cache this
|
|
|
|
recipient=recipient_id,
|
|
|
|
last_edit_time=None,
|
|
|
|
has_link=has_link)
|
|
|
|
zerver_message.append(zulip_message)
|
|
|
|
|
|
|
|
# construct usermessages
|
2018-03-07 13:48:21 +01:00
|
|
|
usermessage_id_count = build_zerver_usermessage(
|
2018-03-17 12:15:57 +01:00
|
|
|
zerver_usermessage, usermessage_id_count, zerver_subscription,
|
|
|
|
recipient_id, mentioned_users_id, message_id)
|
2018-02-25 09:54:53 +01:00
|
|
|
|
|
|
|
message_id_count += 1
|
2018-03-25 13:42:04 +02:00
|
|
|
return zerver_message, zerver_usermessage, zerver_attachment, uploads_list, reaction_list
|
2017-12-29 10:57:48 +01:00
|
|
|
|
2018-02-26 06:57:00 +01:00
|
|
|
def get_attachment_path_and_content(fileinfo: ZerverFieldsT, realm_id: int) -> Tuple[str,
|
|
|
|
str]:
|
|
|
|
# Should be kept in sync with its equivalent in zerver/lib/uploads in the function
|
2018-03-28 18:14:17 +02:00
|
|
|
# 'upload_message_file'
|
2018-02-26 06:57:00 +01:00
|
|
|
s3_path = "/".join([
|
|
|
|
str(realm_id),
|
|
|
|
format(random.randint(0, 255), 'x'),
|
|
|
|
random_name(18),
|
|
|
|
sanitize_name(fileinfo['name'])
|
|
|
|
])
|
|
|
|
attachment_path = ('/user_uploads/%s' % (s3_path))
|
2018-03-13 00:33:42 +01:00
|
|
|
content = '[%s](%s)' % (fileinfo['title'], attachment_path)
|
2018-02-26 06:57:00 +01:00
|
|
|
|
|
|
|
return s3_path, content
|
|
|
|
|
2018-03-25 13:42:04 +02:00
|
|
|
def build_reactions(reaction_list: List[ZerverFieldsT], reactions: List[ZerverFieldsT],
|
|
|
|
added_users: AddedUsersT, message_id: int, reaction_id: int,
|
|
|
|
name_to_codepoint: ZerverFieldsT) -> int:
|
|
|
|
# For the unicode emoji codes, we use equivalent of
|
|
|
|
# function 'emoji_name_to_emoji_code' in 'zerver/lib/emoji' here
|
|
|
|
for slack_reaction in reactions:
|
|
|
|
emoji_name = slack_reaction['name']
|
|
|
|
if emoji_name in name_to_codepoint:
|
|
|
|
for user in slack_reaction['users']:
|
|
|
|
reaction = dict(
|
|
|
|
id=reaction_id,
|
|
|
|
emoji_code=name_to_codepoint[emoji_name],
|
|
|
|
emoji_name=emoji_name,
|
|
|
|
message=message_id,
|
|
|
|
reaction_type=Reaction.UNICODE_EMOJI,
|
|
|
|
user_profile=added_users[user])
|
|
|
|
reaction_id += 1
|
|
|
|
reaction_list.append(reaction)
|
|
|
|
else:
|
|
|
|
continue
|
|
|
|
return reaction_id
|
|
|
|
|
2018-02-26 06:57:00 +01:00
|
|
|
def build_uploads(user_id: int, realm_id: int, email: str, fileinfo: ZerverFieldsT, s3_path: str,
|
|
|
|
uploads_list: List[ZerverFieldsT]) -> None:
|
|
|
|
upload = dict(
|
|
|
|
path=fileinfo['url_private'], # Save slack's url here, which is used later while processing
|
|
|
|
realm_id=realm_id,
|
|
|
|
content_type=None,
|
|
|
|
user_profile_id=user_id,
|
|
|
|
last_modified=fileinfo['timestamp'],
|
|
|
|
user_profile_email=email,
|
|
|
|
s3_path=s3_path,
|
|
|
|
size=fileinfo['size'])
|
|
|
|
uploads_list.append(upload)
|
|
|
|
|
2018-02-26 08:48:14 +01:00
|
|
|
def build_zerver_attachment(realm_id: int, message_id: int, attachment_id: int,
|
|
|
|
user_id: int, fileinfo: ZerverFieldsT, s3_path: str,
|
|
|
|
zerver_attachment: List[ZerverFieldsT]) -> None:
|
|
|
|
attachment = dict(
|
|
|
|
owner=user_id,
|
|
|
|
messages=[message_id],
|
|
|
|
id=attachment_id,
|
|
|
|
size=fileinfo['size'],
|
|
|
|
create_time=fileinfo['created'],
|
|
|
|
is_realm_public=True, # is always true for stream message
|
|
|
|
path_id=s3_path,
|
|
|
|
realm=realm_id,
|
|
|
|
file_name=fileinfo['name'])
|
|
|
|
zerver_attachment.append(attachment)
|
|
|
|
|
2018-01-23 19:04:59 +01:00
|
|
|
def get_message_sending_user(message: ZerverFieldsT) -> str:
|
|
|
|
try:
|
|
|
|
user = message.get('user', message['file']['user'])
|
|
|
|
except KeyError:
|
2018-02-23 03:16:31 +01:00
|
|
|
user = message.get('user')
|
2018-01-23 19:04:59 +01:00
|
|
|
return user
|
|
|
|
|
2018-03-17 12:15:57 +01:00
|
|
|
def build_zerver_usermessage(zerver_usermessage: List[ZerverFieldsT], usermessage_id: int,
|
2018-01-23 19:04:59 +01:00
|
|
|
zerver_subscription: List[ZerverFieldsT], recipient_id: int,
|
2018-03-07 13:48:21 +01:00
|
|
|
mentioned_users_id: List[int], message_id: int) -> int:
|
2018-01-23 19:04:59 +01:00
|
|
|
for subscription in zerver_subscription:
|
|
|
|
if subscription['recipient'] == recipient_id:
|
|
|
|
flags_mask = 1 # For read
|
|
|
|
if subscription['user_profile'] in mentioned_users_id:
|
|
|
|
flags_mask = 9 # For read and mentioned
|
|
|
|
|
|
|
|
usermessage = dict(
|
|
|
|
user_profile=subscription['user_profile'],
|
2018-03-17 12:15:57 +01:00
|
|
|
id=usermessage_id,
|
2018-01-23 19:04:59 +01:00
|
|
|
flags_mask=flags_mask,
|
|
|
|
message=message_id)
|
2018-03-17 12:15:57 +01:00
|
|
|
usermessage_id += 1
|
2018-01-23 19:04:59 +01:00
|
|
|
zerver_usermessage.append(usermessage)
|
2018-03-17 12:15:57 +01:00
|
|
|
return usermessage_id
|
2018-01-23 19:04:59 +01:00
|
|
|
|
2018-02-01 00:56:57 +01:00
|
|
|
def do_convert_data(slack_zip_file: str, realm_subdomain: str, output_dir: str, token: str) -> None:
|
2018-01-09 11:46:56 +01:00
|
|
|
check_subdomain_available(realm_subdomain)
|
2018-02-25 14:46:34 +01:00
|
|
|
|
|
|
|
domain_name = settings.EXTERNAL_HOST
|
|
|
|
|
2018-01-06 19:42:18 +01:00
|
|
|
slack_data_dir = slack_zip_file.replace('.zip', '')
|
2018-01-20 14:49:40 +01:00
|
|
|
if not os.path.exists(slack_data_dir):
|
|
|
|
os.makedirs(slack_data_dir)
|
2018-02-08 21:38:14 +01:00
|
|
|
|
|
|
|
os.makedirs(output_dir, exist_ok=True)
|
|
|
|
# output directory should be empty initially
|
|
|
|
if os.listdir(output_dir):
|
|
|
|
raise Exception('Output directory should be empty!')
|
|
|
|
|
2018-01-20 14:49:40 +01:00
|
|
|
subprocess.check_call(['unzip', '-q', slack_zip_file, '-d', slack_data_dir])
|
2017-12-08 11:27:42 +01:00
|
|
|
# with zipfile.ZipFile(slack_zip_file, 'r') as zip_ref:
|
2018-01-06 19:42:18 +01:00
|
|
|
# zip_ref.extractall(slack_data_dir)
|
2017-12-08 11:27:42 +01:00
|
|
|
|
|
|
|
script_path = os.path.dirname(os.path.abspath(__file__)) + '/'
|
|
|
|
fixtures_path = script_path + '../fixtures/'
|
|
|
|
|
2018-03-17 12:15:57 +01:00
|
|
|
realm_id = 0
|
2018-02-01 00:56:57 +01:00
|
|
|
|
2018-03-29 14:38:11 +02:00
|
|
|
# We get the user data from the legacy token method of slack api, which is depreciated
|
|
|
|
# but we use it as the user email data is provided only in this method
|
|
|
|
user_list = get_slack_api_data(token, "https://slack.com/api/users.list", "members")
|
|
|
|
|
2018-02-17 00:42:59 +01:00
|
|
|
realm, added_users, added_recipient, added_channels, avatar_list = slack_workspace_to_realm(
|
2018-02-25 14:46:34 +01:00
|
|
|
domain_name, realm_id, user_list, realm_subdomain, fixtures_path, slack_data_dir)
|
2018-02-17 00:42:59 +01:00
|
|
|
|
2018-02-26 10:16:34 +01:00
|
|
|
message_json, uploads_list, zerver_attachment = convert_slack_workspace_messages(
|
2018-02-26 10:03:48 +01:00
|
|
|
slack_data_dir, user_list, realm_id, added_users, added_recipient, added_channels,
|
|
|
|
realm, domain_name)
|
2017-12-08 11:27:42 +01:00
|
|
|
|
2018-02-17 01:46:50 +01:00
|
|
|
avatar_folder = os.path.join(output_dir, 'avatars')
|
2018-03-07 14:14:08 +01:00
|
|
|
avatar_realm_folder = os.path.join(avatar_folder, str(realm_id))
|
2018-02-17 01:46:50 +01:00
|
|
|
|
|
|
|
os.makedirs(avatar_realm_folder, exist_ok=True)
|
2018-03-07 14:14:08 +01:00
|
|
|
avatar_records = process_avatars(avatar_list, avatar_folder, realm_id)
|
2018-02-17 01:46:50 +01:00
|
|
|
|
2018-02-26 11:04:13 +01:00
|
|
|
uploads_folder = os.path.join(output_dir, 'uploads')
|
|
|
|
os.makedirs(os.path.join(uploads_folder, str(realm_id)), exist_ok=True)
|
|
|
|
uploads_records = process_uploads(uploads_list, uploads_folder)
|
2018-01-23 19:04:59 +01:00
|
|
|
attachment = {"zerver_attachment": zerver_attachment}
|
2017-12-08 11:27:42 +01:00
|
|
|
|
2018-01-23 19:04:59 +01:00
|
|
|
# IO realm.json
|
2018-03-09 17:50:48 +01:00
|
|
|
create_converted_data_files(realm, output_dir, '/realm.json')
|
2017-12-08 11:27:42 +01:00
|
|
|
# IO message.json
|
2018-03-09 17:50:48 +01:00
|
|
|
create_converted_data_files(message_json, output_dir, '/messages-000001.json')
|
2017-12-08 11:27:42 +01:00
|
|
|
# IO avatar records
|
2018-03-09 17:50:48 +01:00
|
|
|
create_converted_data_files(avatar_records, output_dir, '/avatars/records.json')
|
2017-12-08 11:27:42 +01:00
|
|
|
# IO uploads TODO
|
2018-03-09 17:50:48 +01:00
|
|
|
create_converted_data_files(uploads_records, output_dir, '/uploads/records.json')
|
2017-12-08 11:27:42 +01:00
|
|
|
# IO attachments
|
2018-03-09 17:50:48 +01:00
|
|
|
create_converted_data_files(attachment, output_dir, '/attachment.json')
|
2017-12-08 11:27:42 +01:00
|
|
|
|
|
|
|
# remove slack dir
|
2018-01-06 19:42:18 +01:00
|
|
|
rm_tree(slack_data_dir)
|
2017-12-08 11:27:42 +01:00
|
|
|
subprocess.check_call(["tar", "-czf", output_dir + '.tar.gz', output_dir, '-P'])
|
|
|
|
|
2018-02-08 00:06:02 +01:00
|
|
|
logging.info('######### DATA CONVERSION FINISHED #########\n')
|
|
|
|
logging.info("Zulip data dump created at %s" % (output_dir))
|
2018-01-23 19:04:59 +01:00
|
|
|
|
2018-02-17 01:46:50 +01:00
|
|
|
def process_avatars(avatar_list: List[ZerverFieldsT], avatar_dir: str,
|
|
|
|
realm_id: int) -> List[ZerverFieldsT]:
|
|
|
|
"""
|
|
|
|
This function gets the avatar of size 512 px and saves it in the
|
|
|
|
user's avatar directory with both the extensions
|
|
|
|
'.png' and '.original'
|
|
|
|
"""
|
|
|
|
logging.info('######### GETTING AVATARS #########\n')
|
2018-03-20 20:03:33 +01:00
|
|
|
logging.info('DOWNLOADING AVATARS .......\n')
|
2018-02-17 01:46:50 +01:00
|
|
|
avatar_original_list = []
|
|
|
|
for avatar in avatar_list:
|
|
|
|
avatar_hash = user_avatar_path_from_ids(avatar['user_profile_id'], realm_id)
|
|
|
|
slack_avatar_url = avatar['path']
|
|
|
|
avatar_original = dict(avatar)
|
|
|
|
|
|
|
|
image_path = ('%s/%s.png' % (avatar_dir, avatar_hash))
|
|
|
|
original_image_path = ('%s/%s.original' % (avatar_dir, avatar_hash))
|
|
|
|
|
|
|
|
# Fetch the avatars from the url
|
|
|
|
get_avatar(slack_avatar_url, image_path, original_image_path)
|
|
|
|
image_size = os.stat(image_path).st_size
|
|
|
|
|
|
|
|
avatar['path'] = image_path
|
|
|
|
avatar['s3_path'] = image_path
|
|
|
|
avatar['size'] = image_size
|
|
|
|
|
|
|
|
avatar_original['path'] = original_image_path
|
|
|
|
avatar_original['s3_path'] = original_image_path
|
|
|
|
avatar_original['size'] = image_size
|
|
|
|
avatar_original_list.append(avatar_original)
|
|
|
|
logging.info('######### GETTING AVATARS FINISHED #########\n')
|
|
|
|
return avatar_list + avatar_original_list
|
|
|
|
|
|
|
|
def get_avatar(slack_avatar_url: str, image_path: str, original_image_path: str) -> None:
|
|
|
|
# get avatar of size 512
|
|
|
|
response = requests.get(slack_avatar_url + '-512', stream=True)
|
|
|
|
with open(image_path, 'wb') as image_file:
|
|
|
|
shutil.copyfileobj(response.raw, image_file)
|
|
|
|
shutil.copy(image_path, original_image_path)
|
|
|
|
|
2018-02-26 11:04:13 +01:00
|
|
|
def process_uploads(upload_list: List[ZerverFieldsT], upload_dir: str) -> List[ZerverFieldsT]:
|
|
|
|
"""
|
|
|
|
This function gets the uploads and saves it in the realm's upload directory
|
|
|
|
"""
|
|
|
|
logging.info('######### GETTING ATTACHMENTS #########\n')
|
2018-03-20 20:03:33 +01:00
|
|
|
logging.info('DOWNLOADING ATTACHMENTS .......\n')
|
2018-02-26 11:04:13 +01:00
|
|
|
for upload in upload_list:
|
|
|
|
upload_url = upload['path']
|
|
|
|
upload_s3_path = upload['s3_path']
|
|
|
|
|
|
|
|
upload_path = os.path.join(upload_dir, upload_s3_path)
|
|
|
|
response = requests.get(upload_url, stream=True)
|
|
|
|
os.makedirs(os.path.dirname(upload_path), exist_ok=True)
|
|
|
|
with open(upload_path, 'wb') as upload_file:
|
|
|
|
shutil.copyfileobj(response.raw, upload_file)
|
|
|
|
|
|
|
|
upload['path'] = upload_s3_path
|
|
|
|
logging.info('######### GETTING ATTACHMENTS FINISHED #########\n')
|
|
|
|
return upload_list
|
|
|
|
|
2018-01-23 19:04:59 +01:00
|
|
|
def get_data_file(path: str) -> Any:
|
|
|
|
data = json.load(open(path))
|
|
|
|
return data
|
|
|
|
|
2018-03-29 14:38:11 +02:00
|
|
|
def get_slack_api_data(token: str, slack_api_url: str, get_param: str) -> List[ZerverFieldsT]:
|
|
|
|
data = requests.get('%s?token=%s' % (slack_api_url, token))
|
|
|
|
if data.status_code == requests.codes.ok:
|
|
|
|
if 'error' in data.json():
|
|
|
|
raise Exception('Enter a valid token!')
|
|
|
|
json_data = data.json()[get_param]
|
|
|
|
return json_data
|
2018-02-01 00:56:57 +01:00
|
|
|
else:
|
2018-03-29 14:38:11 +02:00
|
|
|
raise Exception('Something went wrong. Please try again!')
|
2018-02-01 00:56:57 +01:00
|
|
|
|
2018-03-09 17:50:48 +01:00
|
|
|
def create_converted_data_files(data: Any, output_dir: str, file_path: str) -> None:
|
2018-01-23 19:04:59 +01:00
|
|
|
output_file = output_dir + file_path
|
|
|
|
json.dump(data, open(output_file, 'w'))
|