mirror of https://github.com/zulip/zulip.git
hipchat_import: Remove tool from codebase.
Remove functions and scripts used by HipChat import tool and those which will no longer be required in future.
This commit is contained in:
parent
62d721e859
commit
c685d36821
|
@ -27,6 +27,7 @@ in bursts.
|
|||
|
||||
#### Full feature changelog
|
||||
|
||||
- Removed HipChat import tool.
|
||||
- Added support for moving topics to private streams.
|
||||
- Added support for subscribing another stream's membership to a stream.
|
||||
- Added RealmAuditLog for most settings state changes in Zulip; this
|
||||
|
|
|
@ -78,7 +78,6 @@ backup][zulip-backups] or importing your data from [Slack][slack-import],
|
|||
or another Zulip server, you should stop here
|
||||
and return to the import instructions.
|
||||
|
||||
[hipchat-import]: https://zulip.com/help/import-from-hipchat
|
||||
[slack-import]: https://zulip.com/help/import-from-slack
|
||||
[zulip-backups]: ../production/export-and-import.html#backups
|
||||
|
||||
|
|
|
@ -59,9 +59,6 @@ httplib2
|
|||
# Forked to avoid pulling in scipy: https://github.com/mailgun/talon/issues/130
|
||||
https://github.com/zulip/talon/archive/7d8bdc4dbcfcc5a73298747293b99fe53da55315.zip#egg=talon==1.2.10.zulip1
|
||||
|
||||
# Needed for HipChat import
|
||||
hypchat
|
||||
|
||||
# Needed for inlining the CSS in emails
|
||||
premailer
|
||||
|
||||
|
|
|
@ -355,9 +355,6 @@ httplib2==0.18.1 \
|
|||
--hash=sha256:8af66c1c52c7ffe1aa5dc4bcd7c769885254b0756e6e69f953c7f0ab49a70ba3 \
|
||||
--hash=sha256:ca2914b015b6247791c4866782fa6042f495b94401a0f0bd3e1d6e0ba2236782 \
|
||||
# via -r requirements/common.in
|
||||
hypchat==0.21 \
|
||||
--hash=sha256:ef37a9cd8103bb13ad772b28ba9223ca9d4278371e374450c3ea2918df70a8e9 \
|
||||
# via -r requirements/common.in
|
||||
hyper==0.7.0 \
|
||||
--hash=sha256:069514f54231fb7b5df2fb910a114663a83306d5296f588fffcb0a9be19407fc \
|
||||
--hash=sha256:12c82eacd122a659673484c1ea0d34576430afbe5aa6b8f63fe37fcb06a2458c \
|
||||
|
@ -915,7 +912,7 @@ python-binary-memcached==0.30.1 \
|
|||
python-dateutil==2.8.1 \
|
||||
--hash=sha256:73ebfe9dbf22e832286dafa60473e4cd239f8592f699aa5adaf10050e6e1823c \
|
||||
--hash=sha256:75bb3f31ea686f1197762692a9ee6a7550b59fc6ca3a1f4b5d7e32fb98e2da2a \
|
||||
# via -r requirements/common.in, arrow, botocore, hypchat, moto
|
||||
# via -r requirements/common.in, arrow, botocore, moto
|
||||
python-debian==0.1.38 \
|
||||
--hash=sha256:a1f89336d7675a56cdd92fa90cd8c00b9178dabcc6d3e08a397e80eca2b855f3 \
|
||||
--hash=sha256:a352bb5f9ef19b0272078f516ee0ec42b05e90ac85651d87c10e7041550dcc1d \
|
||||
|
@ -1043,7 +1040,7 @@ requests-oauthlib==1.3.0 \
|
|||
requests[security]==2.25.0 \
|
||||
--hash=sha256:7f1a0b932f4a60a1a65caa4263921bb7d9ee911957e0ae4a23a6dd08185ad5f8 \
|
||||
--hash=sha256:e786fa28d8c9154e6a4de5d46a1d921b8749f8b74e28bde23768e5e16eece998 \
|
||||
# via -r requirements/common.in, docker, hypchat, matrix-client, moto, premailer, pyoembed, python-digitalocean, python-gcm, python-twitter, requests-oauthlib, responses, semgrep, social-auth-core, sphinx, stripe, twilio, zulip
|
||||
# via -r requirements/common.in, docker, matrix-client, moto, premailer, pyoembed, python-digitalocean, python-gcm, python-twitter, requests-oauthlib, responses, semgrep, social-auth-core, sphinx, stripe, twilio, zulip
|
||||
responses==0.12.0 \
|
||||
--hash=sha256:0de50fbf600adf5ef9f0821b85cc537acca98d66bc7776755924476775c1989c \
|
||||
--hash=sha256:e80d5276011a4b79ecb62c5f82ba07aa23fb31ecbc95ee7cad6de250a3c97444 \
|
||||
|
@ -1117,7 +1114,7 @@ sh==1.14.1 \
|
|||
six==1.15.0 \
|
||||
--hash=sha256:30639c035cdb23534cd4aa2dd52c3bf48f06e5f4a941509c8bafd8ce11080259 \
|
||||
--hash=sha256:8b74bedcbbbaca38ff6d7491d76f2b06b3592611af620f8426e82dddb04a5ced \
|
||||
# via argon2-cffi, automat, aws-sam-translator, cfn-lint, cryptography, django-bitfield, docker, ecdsa, hypchat, isodate, jsonschema, junit-xml, libthumbor, moto, openapi-core, openapi-schema-validator, openapi-spec-validator, parsel, pip-tools, protego, pyopenssl, python-binary-memcached, python-dateutil, python-debian, python-jose, qrcode, responses, social-auth-app-django, social-auth-core, talon, traitlets, twilio, w3lib, websocket-client, zulip
|
||||
# via argon2-cffi, automat, aws-sam-translator, cfn-lint, cryptography, django-bitfield, docker, ecdsa, isodate, jsonschema, junit-xml, libthumbor, moto, openapi-core, openapi-schema-validator, openapi-spec-validator, parsel, pip-tools, protego, pyopenssl, python-binary-memcached, python-dateutil, python-debian, python-jose, qrcode, responses, social-auth-app-django, social-auth-core, talon, traitlets, twilio, w3lib, websocket-client, zulip
|
||||
snakeviz==2.1.0 \
|
||||
--hash=sha256:8ce375b18ae4a749516d7e6c6fbbf8be6177c53974f53534d8eadb646cd279b1 \
|
||||
--hash=sha256:92ad876fb6a201a7e23a6b85ea96d9643a51e285667c253a8653643804f7cb68 \
|
||||
|
|
|
@ -243,9 +243,6 @@ httplib2==0.18.1 \
|
|||
--hash=sha256:8af66c1c52c7ffe1aa5dc4bcd7c769885254b0756e6e69f953c7f0ab49a70ba3 \
|
||||
--hash=sha256:ca2914b015b6247791c4866782fa6042f495b94401a0f0bd3e1d6e0ba2236782 \
|
||||
# via -r requirements/common.in
|
||||
hypchat==0.21 \
|
||||
--hash=sha256:ef37a9cd8103bb13ad772b28ba9223ca9d4278371e374450c3ea2918df70a8e9 \
|
||||
# via -r requirements/common.in
|
||||
hyper==0.7.0 \
|
||||
--hash=sha256:069514f54231fb7b5df2fb910a114663a83306d5296f588fffcb0a9be19407fc \
|
||||
--hash=sha256:12c82eacd122a659673484c1ea0d34576430afbe5aa6b8f63fe37fcb06a2458c \
|
||||
|
@ -646,7 +643,7 @@ python-binary-memcached==0.30.1 \
|
|||
python-dateutil==2.8.1 \
|
||||
--hash=sha256:73ebfe9dbf22e832286dafa60473e4cd239f8592f699aa5adaf10050e6e1823c \
|
||||
--hash=sha256:75bb3f31ea686f1197762692a9ee6a7550b59fc6ca3a1f4b5d7e32fb98e2da2a \
|
||||
# via -r requirements/common.in, botocore, hypchat
|
||||
# via -r requirements/common.in, botocore
|
||||
python-gcm==0.4 \
|
||||
--hash=sha256:511c35fc5ae829f7fc3cbdb45c4ec3fda02f85e4fae039864efe82682ccb9c18 \
|
||||
# via -r requirements/common.in
|
||||
|
@ -751,7 +748,7 @@ requests-oauthlib==1.3.0 \
|
|||
requests[security]==2.25.0 \
|
||||
--hash=sha256:7f1a0b932f4a60a1a65caa4263921bb7d9ee911957e0ae4a23a6dd08185ad5f8 \
|
||||
--hash=sha256:e786fa28d8c9154e6a4de5d46a1d921b8749f8b74e28bde23768e5e16eece998 \
|
||||
# via -r requirements/common.in, hypchat, matrix-client, premailer, pyoembed, python-gcm, python-twitter, requests-oauthlib, social-auth-core, stripe, twilio, zulip
|
||||
# via -r requirements/common.in, matrix-client, premailer, pyoembed, python-gcm, python-twitter, requests-oauthlib, social-auth-core, stripe, twilio, zulip
|
||||
s3transfer==0.3.3 \
|
||||
--hash=sha256:2482b4259524933a022d59da830f51bd746db62f047d6eb213f2f8855dcb8a13 \
|
||||
--hash=sha256:921a37e2aefc64145e7b73d50c71bb4f26f46e4c9f414dc648c6245ff92cf7db \
|
||||
|
@ -763,7 +760,7 @@ sentry-sdk==0.19.4 \
|
|||
six==1.15.0 \
|
||||
--hash=sha256:30639c035cdb23534cd4aa2dd52c3bf48f06e5f4a941509c8bafd8ce11080259 \
|
||||
--hash=sha256:8b74bedcbbbaca38ff6d7491d76f2b06b3592611af620f8426e82dddb04a5ced \
|
||||
# via argon2-cffi, cryptography, django-bitfield, hypchat, isodate, jsonschema, libthumbor, openapi-core, openapi-schema-validator, openapi-spec-validator, pyopenssl, python-binary-memcached, python-dateutil, qrcode, social-auth-app-django, social-auth-core, talon, traitlets, twilio, zulip
|
||||
# via argon2-cffi, cryptography, django-bitfield, isodate, jsonschema, libthumbor, openapi-core, openapi-schema-validator, openapi-spec-validator, pyopenssl, python-binary-memcached, python-dateutil, qrcode, social-auth-app-django, social-auth-core, talon, traitlets, twilio, zulip
|
||||
social-auth-app-django==4.0.0 \
|
||||
--hash=sha256:2c69e57df0b30c9c1823519c5f1992cbe4f3f98fdc7d95c840e091a752708840 \
|
||||
--hash=sha256:567ad0e028311541d7dfed51d3bf2c60440a6fd236d5d4d06c5a618b3d6c57c5 \
|
||||
|
|
|
@ -243,8 +243,7 @@
|
|||
<a class="feature-block" href="/help/import-from-slack" target="_blank" rel="noopener noreferrer">
|
||||
<h3>DATA IMPORT</h3>
|
||||
<p>
|
||||
Import an existing Slack, Mattermost, HipChat, Stride,
|
||||
or Gitter workspace into Zulip.
|
||||
Import an existing Slack, Mattermost or Gitter workspace into Zulip.
|
||||
</p>
|
||||
</a>
|
||||
<a class="feature-block" href="/help/add-custom-profile-fields" target="_blank" rel="noopener noreferrer">
|
||||
|
|
|
@ -7,7 +7,7 @@ message is about.
|
|||
|---|---|---
|
||||
| Zulip | Stream | Topic
|
||||
| Email | Mailing list | Subject line
|
||||
| Slack/IRC/HipChat | Channel/Room | -
|
||||
| Slack/IRC | Channel/Room | -
|
||||
|
||||
Messages with the same stream and topic are shown together as a
|
||||
conversational thread. Here is what it looks like in Zulip.
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
There are a lot of team chat apps. So why did we build Zulip?
|
||||
|
||||
We talk about Slack in the discussion below, but the problems apply equally
|
||||
to other apps with Slack’s conversation model, including HipChat, IRC,
|
||||
to other apps with Slack’s conversation model, including IRC,
|
||||
Mattermost, Discord, Spark, and others.
|
||||
|
||||
## Reading busy Slack channels is extremely inefficient.
|
||||
|
|
|
@ -120,7 +120,6 @@ not_yet_fully_covered = [
|
|||
'zerver/tornado/sharding.py',
|
||||
'zerver/tornado/views.py',
|
||||
# Data import files; relatively low priority
|
||||
'zerver/data_import/hipchat*.py',
|
||||
'zerver/data_import/sequencer.py',
|
||||
'zerver/data_import/slack.py',
|
||||
'zerver/data_import/gitter.py',
|
||||
|
|
|
@ -43,4 +43,4 @@ API_FEATURE_LEVEL = 36
|
|||
# historical commits sharing the same major version, in which case a
|
||||
# minor version bump suffices.
|
||||
|
||||
PROVISION_VERSION = '119.0'
|
||||
PROVISION_VERSION = '120.0'
|
||||
|
|
|
@ -1,882 +0,0 @@
|
|||
import base64
|
||||
import glob
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
import shutil
|
||||
import subprocess
|
||||
from typing import Any, Callable, Dict, List, Optional, Set
|
||||
|
||||
import dateutil
|
||||
import hypchat
|
||||
import orjson
|
||||
from django.conf import settings
|
||||
from django.utils.timezone import now as timezone_now
|
||||
|
||||
from zerver.data_import.hipchat_attachment import AttachmentHandler
|
||||
from zerver.data_import.hipchat_user import UserHandler
|
||||
from zerver.data_import.import_util import (
|
||||
SubscriberHandler,
|
||||
build_message,
|
||||
build_personal_subscriptions,
|
||||
build_public_stream_subscriptions,
|
||||
build_realm,
|
||||
build_realm_emoji,
|
||||
build_recipients,
|
||||
build_stream,
|
||||
build_stream_subscriptions,
|
||||
build_user_profile,
|
||||
build_zerver_realm,
|
||||
create_converted_data_files,
|
||||
make_subscriber_map,
|
||||
make_user_messages,
|
||||
write_avatar_png,
|
||||
)
|
||||
from zerver.data_import.sequencer import NEXT_ID, IdMapper
|
||||
from zerver.lib.utils import process_list_in_batches
|
||||
from zerver.models import RealmEmoji, Recipient, UserProfile
|
||||
|
||||
# stubs
|
||||
ZerverFieldsT = Dict[str, Any]
|
||||
|
||||
def str_date_to_float(date_str: str) -> float:
|
||||
'''
|
||||
Dates look like this:
|
||||
|
||||
"2018-08-08T14:23:54Z 626267"
|
||||
'''
|
||||
|
||||
parts = date_str.split(' ')
|
||||
time_str = parts[0].replace('T', ' ')
|
||||
date_time = dateutil.parser.parse(time_str)
|
||||
timestamp = date_time.timestamp()
|
||||
if len(parts) == 2:
|
||||
microseconds = int(parts[1])
|
||||
timestamp += microseconds / 1000000.0
|
||||
return timestamp
|
||||
|
||||
def untar_input_file(tar_file: str) -> str:
|
||||
data_dir = tar_file.replace('.tar', '')
|
||||
data_dir = os.path.abspath(data_dir)
|
||||
|
||||
if os.path.exists(data_dir):
|
||||
logging.info('input data was already untarred to %s, we will use it', data_dir)
|
||||
return data_dir
|
||||
|
||||
os.makedirs(data_dir)
|
||||
|
||||
subprocess.check_call(['tar', '-xf', tar_file, '-C', data_dir])
|
||||
|
||||
logging.info('input data was untarred to %s', data_dir)
|
||||
|
||||
return data_dir
|
||||
|
||||
def read_user_data(data_dir: str) -> List[ZerverFieldsT]:
|
||||
fn = 'users.json'
|
||||
data_file = os.path.join(data_dir, fn)
|
||||
with open(data_file, "rb") as fp:
|
||||
return orjson.loads(fp.read())
|
||||
|
||||
def convert_user_data(user_handler: UserHandler,
|
||||
slim_mode: bool,
|
||||
user_id_mapper: IdMapper,
|
||||
raw_data: List[ZerverFieldsT],
|
||||
realm_id: int) -> None:
|
||||
flat_data = [
|
||||
d['User']
|
||||
for d in raw_data
|
||||
]
|
||||
|
||||
def process(in_dict: ZerverFieldsT) -> ZerverFieldsT:
|
||||
delivery_email = in_dict['email']
|
||||
email = in_dict['email']
|
||||
full_name = in_dict['name']
|
||||
id = user_id_mapper.get(in_dict['id'])
|
||||
is_mirror_dummy = False
|
||||
short_name = in_dict['mention_name']
|
||||
timezone = in_dict['timezone']
|
||||
|
||||
role = UserProfile.ROLE_MEMBER
|
||||
if in_dict['account_type'] == 'admin':
|
||||
role = UserProfile.ROLE_REALM_ADMINISTRATOR
|
||||
if in_dict['account_type'] == 'guest':
|
||||
role = UserProfile.ROLE_GUEST
|
||||
|
||||
date_joined = int(timezone_now().timestamp())
|
||||
is_active = not in_dict['is_deleted']
|
||||
|
||||
if not email:
|
||||
if role == UserProfile.ROLE_GUEST:
|
||||
# HipChat guest users don't have emails, so
|
||||
# we just fake them.
|
||||
email = f'guest-{id}@example.com'
|
||||
delivery_email = email
|
||||
else:
|
||||
# HipChat sometimes doesn't export an email for deactivated users.
|
||||
assert not is_active
|
||||
email = delivery_email = f"deactivated-{id}@example.com"
|
||||
|
||||
# unmapped fields:
|
||||
# title - Developer, Project Manager, etc.
|
||||
# rooms - no good sample data
|
||||
# created - we just use "now"
|
||||
# roles - we just use account_type
|
||||
|
||||
if in_dict.get('avatar'):
|
||||
avatar_source = 'U'
|
||||
else:
|
||||
avatar_source = 'G'
|
||||
|
||||
return build_user_profile(
|
||||
avatar_source=avatar_source,
|
||||
date_joined=date_joined,
|
||||
delivery_email=delivery_email,
|
||||
email=email,
|
||||
full_name=full_name,
|
||||
id=id,
|
||||
is_active=is_active,
|
||||
role=role,
|
||||
is_mirror_dummy=is_mirror_dummy,
|
||||
realm_id=realm_id,
|
||||
short_name=short_name,
|
||||
timezone=timezone,
|
||||
)
|
||||
|
||||
for raw_item in flat_data:
|
||||
user = process(raw_item)
|
||||
user_handler.add_user(user)
|
||||
|
||||
def convert_avatar_data(avatar_folder: str,
|
||||
raw_data: List[ZerverFieldsT],
|
||||
user_id_mapper: IdMapper,
|
||||
realm_id: int) -> List[ZerverFieldsT]:
|
||||
'''
|
||||
This code is pretty specific to how HipChat sends us data.
|
||||
They give us the avatar payloads in base64 in users.json.
|
||||
|
||||
We process avatars in our own pass of that data, rather
|
||||
than doing it while we're getting other user data. I
|
||||
chose to keep this separate, as otherwise you have a lot
|
||||
of extraneous data getting passed around.
|
||||
|
||||
This code has MAJOR SIDE EFFECTS--namely writing a bunch
|
||||
of files to the avatars directory.
|
||||
'''
|
||||
|
||||
avatar_records = []
|
||||
|
||||
for d in raw_data:
|
||||
raw_user = d['User']
|
||||
avatar_payload = raw_user.get('avatar')
|
||||
if not avatar_payload:
|
||||
continue
|
||||
|
||||
bits = base64.b64decode(avatar_payload)
|
||||
|
||||
raw_user_id = raw_user['id']
|
||||
if not user_id_mapper.has(raw_user_id):
|
||||
continue
|
||||
|
||||
user_id = user_id_mapper.get(raw_user_id)
|
||||
|
||||
metadata = write_avatar_png(
|
||||
avatar_folder=avatar_folder,
|
||||
realm_id=realm_id,
|
||||
user_id=user_id,
|
||||
bits=bits,
|
||||
)
|
||||
avatar_records.append(metadata)
|
||||
|
||||
return avatar_records
|
||||
|
||||
def read_room_data(data_dir: str) -> List[ZerverFieldsT]:
|
||||
fn = 'rooms.json'
|
||||
data_file = os.path.join(data_dir, fn)
|
||||
with open(data_file, "rb") as f:
|
||||
data = orjson.loads(f.read())
|
||||
return data
|
||||
|
||||
def convert_room_data(raw_data: List[ZerverFieldsT],
|
||||
subscriber_handler: SubscriberHandler,
|
||||
stream_id_mapper: IdMapper,
|
||||
user_id_mapper: IdMapper,
|
||||
realm_id: int,
|
||||
api_token: Optional[str]=None) -> List[ZerverFieldsT]:
|
||||
flat_data = [
|
||||
d['Room']
|
||||
for d in raw_data
|
||||
]
|
||||
|
||||
def get_invite_only(v: str) -> bool:
|
||||
if v == 'public':
|
||||
return False
|
||||
elif v == 'private':
|
||||
return True
|
||||
else:
|
||||
raise Exception('unexpected value')
|
||||
|
||||
streams = []
|
||||
|
||||
for in_dict in flat_data:
|
||||
now = int(timezone_now().timestamp())
|
||||
stream_id = stream_id_mapper.get(in_dict['id'])
|
||||
|
||||
invite_only = get_invite_only(in_dict['privacy'])
|
||||
|
||||
stream = build_stream(
|
||||
date_created=now,
|
||||
realm_id=realm_id,
|
||||
name=in_dict['name'],
|
||||
description=in_dict['topic'],
|
||||
stream_id=stream_id,
|
||||
deactivated=in_dict['is_archived'],
|
||||
invite_only=invite_only,
|
||||
)
|
||||
|
||||
if invite_only:
|
||||
users: Set[int] = {
|
||||
user_id_mapper.get(key)
|
||||
for key in in_dict['members']
|
||||
if user_id_mapper.has(key)
|
||||
}
|
||||
|
||||
if user_id_mapper.has(in_dict['owner']):
|
||||
owner = user_id_mapper.get(in_dict['owner'])
|
||||
users.add(owner)
|
||||
else:
|
||||
users = set()
|
||||
if api_token is not None:
|
||||
hc = hypchat.HypChat(api_token)
|
||||
room_data = hc.fromurl('{}/v2/room/{}/member'.format(hc.endpoint, in_dict['id']))
|
||||
|
||||
for item in room_data['items']:
|
||||
hipchat_user_id = item['id']
|
||||
zulip_user_id = user_id_mapper.get(hipchat_user_id)
|
||||
users.add(zulip_user_id)
|
||||
|
||||
if users:
|
||||
subscriber_handler.set_info(
|
||||
stream_id=stream_id,
|
||||
users=users,
|
||||
)
|
||||
|
||||
# unmapped fields:
|
||||
# guest_access_url: no Zulip equivalent
|
||||
# created: we just use "now"
|
||||
# participants: no good sample data
|
||||
|
||||
streams.append(stream)
|
||||
|
||||
return streams
|
||||
|
||||
def make_realm(realm_id: int) -> ZerverFieldsT:
|
||||
NOW = float(timezone_now().timestamp())
|
||||
domain_name = settings.EXTERNAL_HOST
|
||||
realm_subdomain = ""
|
||||
zerver_realm = build_zerver_realm(realm_id, realm_subdomain, NOW, 'HipChat')
|
||||
realm = build_realm(zerver_realm, realm_id, domain_name)
|
||||
|
||||
# We may override these later.
|
||||
realm['zerver_defaultstream'] = []
|
||||
|
||||
return realm
|
||||
|
||||
def write_avatar_data(raw_user_data: List[ZerverFieldsT],
|
||||
output_dir: str,
|
||||
user_id_mapper: IdMapper,
|
||||
realm_id: int) -> None:
|
||||
avatar_folder = os.path.join(output_dir, 'avatars')
|
||||
avatar_realm_folder = os.path.join(avatar_folder, str(realm_id))
|
||||
os.makedirs(avatar_realm_folder, exist_ok=True)
|
||||
|
||||
avatar_records = convert_avatar_data(
|
||||
avatar_folder=avatar_folder,
|
||||
raw_data=raw_user_data,
|
||||
user_id_mapper=user_id_mapper,
|
||||
realm_id=realm_id,
|
||||
)
|
||||
|
||||
create_converted_data_files(avatar_records, output_dir, '/avatars/records.json')
|
||||
|
||||
def write_emoticon_data(realm_id: int,
|
||||
data_dir: str,
|
||||
output_dir: str) -> List[ZerverFieldsT]:
|
||||
'''
|
||||
This function does most of the work for processing emoticons, the bulk
|
||||
of which is copying files. We also write a json file with metadata.
|
||||
Finally, we return a list of RealmEmoji dicts to our caller.
|
||||
|
||||
In our data_dir we have a pretty simple setup:
|
||||
|
||||
emoticons.json - has very simple metadata on emojis:
|
||||
|
||||
{
|
||||
"Emoticon": {
|
||||
"id": 9875487,
|
||||
"path": "emoticons/yasss.jpg",
|
||||
"shortcut": "yasss"
|
||||
}
|
||||
},
|
||||
{
|
||||
"Emoticon": {
|
||||
"id": 718017,
|
||||
"path": "emoticons/yayyyyy.gif",
|
||||
"shortcut": "yayyyyy"
|
||||
}
|
||||
}
|
||||
|
||||
emoticons/ - contains a bunch of image files:
|
||||
|
||||
slytherinsnake.gif
|
||||
spanishinquisition.jpg
|
||||
sparkle.png
|
||||
spiderman.gif
|
||||
stableparrot.gif
|
||||
stalkerparrot.gif
|
||||
supergirl.png
|
||||
superman.png
|
||||
|
||||
We move all the relevant files to Zulip's more nested
|
||||
directory structure.
|
||||
'''
|
||||
|
||||
logging.info('Starting to process emoticons')
|
||||
|
||||
fn = 'emoticons.json'
|
||||
data_file = os.path.join(data_dir, fn)
|
||||
if not os.path.exists(data_file):
|
||||
logging.warning("HipChat export does not contain emoticons.json.")
|
||||
logging.warning("As a result, custom emoji cannot be imported.")
|
||||
return []
|
||||
|
||||
with open(data_file, "rb") as f:
|
||||
data = orjson.loads(f.read())
|
||||
|
||||
if isinstance(data, dict) and 'Emoticons' in data:
|
||||
# Handle the hc-migrate export format for emoticons.json.
|
||||
flat_data = [
|
||||
dict(
|
||||
path=d['path'],
|
||||
name=d['shortcut'],
|
||||
)
|
||||
for d in data['Emoticons']
|
||||
]
|
||||
else:
|
||||
flat_data = [
|
||||
dict(
|
||||
path=d['Emoticon']['path'],
|
||||
name=d['Emoticon']['shortcut'],
|
||||
)
|
||||
for d in data
|
||||
]
|
||||
|
||||
emoji_folder = os.path.join(output_dir, 'emoji')
|
||||
os.makedirs(emoji_folder, exist_ok=True)
|
||||
|
||||
def process(data: ZerverFieldsT) -> ZerverFieldsT:
|
||||
source_sub_path = data['path']
|
||||
source_fn = os.path.basename(source_sub_path)
|
||||
source_path = os.path.join(data_dir, source_sub_path)
|
||||
|
||||
# Use our template from RealmEmoji
|
||||
# PATH_ID_TEMPLATE = "{realm_id}/emoji/images/{emoji_file_name}"
|
||||
target_fn = source_fn
|
||||
target_sub_path = RealmEmoji.PATH_ID_TEMPLATE.format(
|
||||
realm_id=realm_id,
|
||||
emoji_file_name=target_fn,
|
||||
)
|
||||
target_path = os.path.join(emoji_folder, target_sub_path)
|
||||
|
||||
os.makedirs(os.path.dirname(target_path), exist_ok=True)
|
||||
|
||||
source_path = os.path.abspath(source_path)
|
||||
target_path = os.path.abspath(target_path)
|
||||
|
||||
shutil.copyfile(source_path, target_path)
|
||||
|
||||
return dict(
|
||||
path=target_path,
|
||||
s3_path=target_path,
|
||||
file_name=target_fn,
|
||||
realm_id=realm_id,
|
||||
name=data['name'],
|
||||
)
|
||||
|
||||
emoji_records = list(map(process, flat_data))
|
||||
create_converted_data_files(emoji_records, output_dir, '/emoji/records.json')
|
||||
|
||||
realmemoji = [
|
||||
build_realm_emoji(
|
||||
realm_id=realm_id,
|
||||
name=rec['name'],
|
||||
id=NEXT_ID('realmemoji'),
|
||||
file_name=rec['file_name'],
|
||||
)
|
||||
for rec in emoji_records
|
||||
]
|
||||
logging.info('Done processing emoticons')
|
||||
|
||||
return realmemoji
|
||||
|
||||
def write_message_data(realm_id: int,
|
||||
slim_mode: bool,
|
||||
message_key: str,
|
||||
zerver_recipient: List[ZerverFieldsT],
|
||||
subscriber_map: Dict[int, Set[int]],
|
||||
data_dir: str,
|
||||
output_dir: str,
|
||||
masking_content: bool,
|
||||
stream_id_mapper: IdMapper,
|
||||
user_id_mapper: IdMapper,
|
||||
user_handler: UserHandler,
|
||||
attachment_handler: AttachmentHandler) -> None:
|
||||
|
||||
stream_id_to_recipient_id = {
|
||||
d['type_id']: d['id']
|
||||
for d in zerver_recipient
|
||||
if d['type'] == Recipient.STREAM
|
||||
}
|
||||
|
||||
user_id_to_recipient_id = {
|
||||
d['type_id']: d['id']
|
||||
for d in zerver_recipient
|
||||
if d['type'] == Recipient.PERSONAL
|
||||
}
|
||||
|
||||
def get_stream_recipient_id(raw_message: ZerverFieldsT) -> int:
|
||||
fn_id = raw_message['fn_id']
|
||||
stream_id = stream_id_mapper.get(fn_id)
|
||||
recipient_id = stream_id_to_recipient_id[stream_id]
|
||||
return recipient_id
|
||||
|
||||
def get_pm_recipient_id(raw_message: ZerverFieldsT) -> int:
|
||||
raw_user_id = raw_message['receiver_id']
|
||||
assert(raw_user_id)
|
||||
user_id = user_id_mapper.get(raw_user_id)
|
||||
recipient_id = user_id_to_recipient_id[user_id]
|
||||
return recipient_id
|
||||
|
||||
if message_key in ['UserMessage', 'NotificationMessage']:
|
||||
is_pm_data = False
|
||||
dir_glob = os.path.join(data_dir, 'rooms', '*', 'history.json')
|
||||
get_recipient_id = get_stream_recipient_id
|
||||
get_files_dir = lambda fn_id: os.path.join(data_dir, 'rooms', str(fn_id), 'files')
|
||||
|
||||
elif message_key == 'PrivateUserMessage':
|
||||
is_pm_data = True
|
||||
dir_glob = os.path.join(data_dir, 'users', '*', 'history.json')
|
||||
get_recipient_id = get_pm_recipient_id
|
||||
get_files_dir = lambda fn_id: os.path.join(data_dir, 'users', 'files')
|
||||
|
||||
else:
|
||||
raise Exception('programming error: invalid message_key: ' + message_key)
|
||||
|
||||
history_files = glob.glob(dir_glob)
|
||||
for fn in history_files:
|
||||
dir = os.path.dirname(fn)
|
||||
fn_id = os.path.basename(dir)
|
||||
files_dir = get_files_dir(fn_id)
|
||||
|
||||
process_message_file(
|
||||
realm_id=realm_id,
|
||||
slim_mode=slim_mode,
|
||||
fn=fn,
|
||||
fn_id=fn_id,
|
||||
files_dir=files_dir,
|
||||
get_recipient_id=get_recipient_id,
|
||||
message_key=message_key,
|
||||
subscriber_map=subscriber_map,
|
||||
data_dir=data_dir,
|
||||
output_dir=output_dir,
|
||||
is_pm_data=is_pm_data,
|
||||
masking_content=masking_content,
|
||||
user_id_mapper=user_id_mapper,
|
||||
user_handler=user_handler,
|
||||
attachment_handler=attachment_handler,
|
||||
)
|
||||
|
||||
def get_hipchat_sender_id(realm_id: int,
|
||||
slim_mode: bool,
|
||||
message_dict: Dict[str, Any],
|
||||
user_id_mapper: IdMapper,
|
||||
user_handler: UserHandler) -> Optional[int]:
|
||||
'''
|
||||
The HipChat export is inconsistent in how it renders
|
||||
senders, and sometimes we don't even get an id.
|
||||
'''
|
||||
if isinstance(message_dict['sender'], str):
|
||||
if slim_mode:
|
||||
return None
|
||||
# Some HipChat instances just give us a person's
|
||||
# name in the sender field for NotificationMessage.
|
||||
# We turn them into a mirror user.
|
||||
mirror_user = user_handler.get_mirror_user(
|
||||
realm_id=realm_id,
|
||||
name=message_dict['sender'],
|
||||
)
|
||||
sender_id = mirror_user['id']
|
||||
return sender_id
|
||||
|
||||
raw_sender_id = message_dict['sender']['id']
|
||||
|
||||
if raw_sender_id == 0:
|
||||
if slim_mode:
|
||||
return None
|
||||
mirror_user = user_handler.get_mirror_user(
|
||||
realm_id=realm_id,
|
||||
name=message_dict['sender']['name'],
|
||||
)
|
||||
sender_id = mirror_user['id']
|
||||
return sender_id
|
||||
|
||||
if not user_id_mapper.has(raw_sender_id):
|
||||
if slim_mode:
|
||||
return None
|
||||
mirror_user = user_handler.get_mirror_user(
|
||||
realm_id=realm_id,
|
||||
name=message_dict['sender']['id'],
|
||||
)
|
||||
sender_id = mirror_user['id']
|
||||
return sender_id
|
||||
|
||||
# HAPPY PATH: HipChat just gave us an ordinary
|
||||
# sender_id.
|
||||
sender_id = user_id_mapper.get(raw_sender_id)
|
||||
return sender_id
|
||||
|
||||
def process_message_file(realm_id: int,
|
||||
slim_mode: bool,
|
||||
fn: str,
|
||||
fn_id: str,
|
||||
files_dir: str,
|
||||
get_recipient_id: Callable[[ZerverFieldsT], int],
|
||||
message_key: str,
|
||||
subscriber_map: Dict[int, Set[int]],
|
||||
data_dir: str,
|
||||
output_dir: str,
|
||||
is_pm_data: bool,
|
||||
masking_content: bool,
|
||||
user_id_mapper: IdMapper,
|
||||
user_handler: UserHandler,
|
||||
attachment_handler: AttachmentHandler) -> None:
|
||||
|
||||
def get_raw_messages(fn: str) -> List[ZerverFieldsT]:
|
||||
with open(fn, "rb") as f:
|
||||
data = orjson.loads(f.read())
|
||||
|
||||
flat_data = [
|
||||
d[message_key]
|
||||
for d in data
|
||||
if message_key in d
|
||||
]
|
||||
|
||||
def get_raw_message(d: Dict[str, Any]) -> Optional[ZerverFieldsT]:
|
||||
sender_id = get_hipchat_sender_id(
|
||||
realm_id=realm_id,
|
||||
slim_mode=slim_mode,
|
||||
message_dict=d,
|
||||
user_id_mapper=user_id_mapper,
|
||||
user_handler=user_handler,
|
||||
)
|
||||
|
||||
if sender_id is None:
|
||||
return None
|
||||
|
||||
if is_pm_data:
|
||||
# We need to compare with str() on both sides here.
|
||||
# In Stride, user IDs are strings, but in HipChat,
|
||||
# they are integers, and fn_id is always a string.
|
||||
if str(sender_id) != str(fn_id):
|
||||
# PMs are in multiple places in the HipChat export,
|
||||
# and we only use the copy from the sender
|
||||
return None
|
||||
|
||||
content = d['message']
|
||||
|
||||
if masking_content:
|
||||
content = re.sub('[a-z]', 'x', content)
|
||||
content = re.sub('[A-Z]', 'X', content)
|
||||
|
||||
return dict(
|
||||
fn_id=fn_id,
|
||||
sender_id=sender_id,
|
||||
receiver_id=d.get('receiver', {}).get('id'),
|
||||
content=content,
|
||||
mention_user_ids=d.get('mentions', []),
|
||||
date_sent=str_date_to_float(d['timestamp']),
|
||||
attachment=d.get('attachment'),
|
||||
files_dir=files_dir,
|
||||
)
|
||||
|
||||
raw_messages = []
|
||||
|
||||
for d in flat_data:
|
||||
raw_message = get_raw_message(d)
|
||||
if raw_message is not None:
|
||||
raw_messages.append(raw_message)
|
||||
|
||||
return raw_messages
|
||||
|
||||
raw_messages = get_raw_messages(fn)
|
||||
|
||||
def process_batch(lst: List[Any]) -> None:
|
||||
process_raw_message_batch(
|
||||
realm_id=realm_id,
|
||||
raw_messages=lst,
|
||||
subscriber_map=subscriber_map,
|
||||
user_id_mapper=user_id_mapper,
|
||||
user_handler=user_handler,
|
||||
attachment_handler=attachment_handler,
|
||||
get_recipient_id=get_recipient_id,
|
||||
is_pm_data=is_pm_data,
|
||||
output_dir=output_dir,
|
||||
)
|
||||
|
||||
chunk_size = 1000
|
||||
|
||||
process_list_in_batches(
|
||||
lst=raw_messages,
|
||||
chunk_size=chunk_size,
|
||||
process_batch=process_batch,
|
||||
)
|
||||
|
||||
def process_raw_message_batch(realm_id: int,
|
||||
raw_messages: List[Dict[str, Any]],
|
||||
subscriber_map: Dict[int, Set[int]],
|
||||
user_id_mapper: IdMapper,
|
||||
user_handler: UserHandler,
|
||||
attachment_handler: AttachmentHandler,
|
||||
get_recipient_id: Callable[[ZerverFieldsT], int],
|
||||
is_pm_data: bool,
|
||||
output_dir: str) -> None:
|
||||
|
||||
def fix_mentions(content: str,
|
||||
mention_user_ids: Set[int]) -> str:
|
||||
for user_id in mention_user_ids:
|
||||
user = user_handler.get_user(user_id=user_id)
|
||||
hipchat_mention = '@{short_name}'.format(**user)
|
||||
zulip_mention = '@**{full_name}**'.format(**user)
|
||||
content = content.replace(hipchat_mention, zulip_mention)
|
||||
|
||||
content = content.replace('@here', '@**all**')
|
||||
return content
|
||||
|
||||
mention_map: Dict[int, Set[int]] = {}
|
||||
|
||||
zerver_message = []
|
||||
|
||||
import html2text
|
||||
h = html2text.HTML2Text()
|
||||
|
||||
for raw_message in raw_messages:
|
||||
# One side effect here:
|
||||
|
||||
message_id = NEXT_ID('message')
|
||||
mention_user_ids = {
|
||||
user_id_mapper.get(id)
|
||||
for id in set(raw_message['mention_user_ids'])
|
||||
if user_id_mapper.has(id)
|
||||
}
|
||||
mention_map[message_id] = mention_user_ids
|
||||
|
||||
content = fix_mentions(
|
||||
content=raw_message['content'],
|
||||
mention_user_ids=mention_user_ids,
|
||||
)
|
||||
content = h.handle(content)
|
||||
|
||||
if len(content) > 10000:
|
||||
logging.info('skipping too-long message of length %s', len(content))
|
||||
continue
|
||||
|
||||
date_sent = raw_message['date_sent']
|
||||
|
||||
try:
|
||||
recipient_id = get_recipient_id(raw_message)
|
||||
except KeyError:
|
||||
logging.debug("Could not find recipient_id for a message, skipping.")
|
||||
continue
|
||||
|
||||
rendered_content = None
|
||||
|
||||
if is_pm_data:
|
||||
topic_name = ''
|
||||
else:
|
||||
topic_name = 'imported from HipChat'
|
||||
user_id = raw_message['sender_id']
|
||||
|
||||
# Another side effect:
|
||||
extra_content = attachment_handler.handle_message_data(
|
||||
realm_id=realm_id,
|
||||
message_id=message_id,
|
||||
sender_id=user_id,
|
||||
attachment=raw_message['attachment'],
|
||||
files_dir=raw_message['files_dir'],
|
||||
)
|
||||
|
||||
if extra_content:
|
||||
has_attachment = True
|
||||
content += '\n' + extra_content
|
||||
else:
|
||||
has_attachment = False
|
||||
|
||||
message = build_message(
|
||||
content=content,
|
||||
message_id=message_id,
|
||||
date_sent=date_sent,
|
||||
recipient_id=recipient_id,
|
||||
rendered_content=rendered_content,
|
||||
topic_name=topic_name,
|
||||
user_id=user_id,
|
||||
has_attachment=has_attachment,
|
||||
)
|
||||
zerver_message.append(message)
|
||||
|
||||
zerver_usermessage = make_user_messages(
|
||||
zerver_message=zerver_message,
|
||||
subscriber_map=subscriber_map,
|
||||
is_pm_data=is_pm_data,
|
||||
mention_map=mention_map,
|
||||
)
|
||||
|
||||
message_json = dict(
|
||||
zerver_message=zerver_message,
|
||||
zerver_usermessage=zerver_usermessage,
|
||||
)
|
||||
|
||||
dump_file_id = NEXT_ID('dump_file_id')
|
||||
message_file = f"/messages-{dump_file_id:06}.json"
|
||||
create_converted_data_files(message_json, output_dir, message_file)
|
||||
|
||||
def do_convert_data(input_tar_file: str,
|
||||
output_dir: str,
|
||||
masking_content: bool,
|
||||
api_token: Optional[str]=None,
|
||||
slim_mode: bool=False) -> None:
|
||||
input_data_dir = untar_input_file(input_tar_file)
|
||||
|
||||
attachment_handler = AttachmentHandler()
|
||||
user_handler = UserHandler()
|
||||
subscriber_handler = SubscriberHandler()
|
||||
user_id_mapper = IdMapper()
|
||||
stream_id_mapper = IdMapper()
|
||||
|
||||
realm_id = 0
|
||||
realm = make_realm(realm_id=realm_id)
|
||||
|
||||
# users.json -> UserProfile
|
||||
raw_user_data = read_user_data(data_dir=input_data_dir)
|
||||
convert_user_data(
|
||||
user_handler=user_handler,
|
||||
slim_mode=slim_mode,
|
||||
user_id_mapper=user_id_mapper,
|
||||
raw_data=raw_user_data,
|
||||
realm_id=realm_id,
|
||||
)
|
||||
normal_users = user_handler.get_normal_users()
|
||||
# Don't write zerver_userprofile here, because we
|
||||
# may add more users later.
|
||||
|
||||
# streams.json -> Stream
|
||||
raw_stream_data = read_room_data(data_dir=input_data_dir)
|
||||
zerver_stream = convert_room_data(
|
||||
raw_data=raw_stream_data,
|
||||
subscriber_handler=subscriber_handler,
|
||||
stream_id_mapper=stream_id_mapper,
|
||||
user_id_mapper=user_id_mapper,
|
||||
realm_id=realm_id,
|
||||
api_token=api_token,
|
||||
)
|
||||
realm['zerver_stream'] = zerver_stream
|
||||
|
||||
zerver_recipient = build_recipients(
|
||||
zerver_userprofile=normal_users,
|
||||
zerver_stream=zerver_stream,
|
||||
)
|
||||
realm['zerver_recipient'] = zerver_recipient
|
||||
|
||||
if api_token is None:
|
||||
if slim_mode:
|
||||
public_stream_subscriptions: List[ZerverFieldsT] = []
|
||||
else:
|
||||
public_stream_subscriptions = build_public_stream_subscriptions(
|
||||
zerver_userprofile=normal_users,
|
||||
zerver_recipient=zerver_recipient,
|
||||
zerver_stream=zerver_stream,
|
||||
)
|
||||
|
||||
private_stream_subscriptions = build_stream_subscriptions(
|
||||
get_users=subscriber_handler.get_users,
|
||||
zerver_recipient=zerver_recipient,
|
||||
zerver_stream=[stream_dict for stream_dict in zerver_stream
|
||||
if stream_dict['invite_only']],
|
||||
)
|
||||
stream_subscriptions = public_stream_subscriptions + private_stream_subscriptions
|
||||
else:
|
||||
stream_subscriptions = build_stream_subscriptions(
|
||||
get_users=subscriber_handler.get_users,
|
||||
zerver_recipient=zerver_recipient,
|
||||
zerver_stream=zerver_stream,
|
||||
)
|
||||
|
||||
personal_subscriptions = build_personal_subscriptions(
|
||||
zerver_recipient=zerver_recipient,
|
||||
)
|
||||
zerver_subscription = personal_subscriptions + stream_subscriptions
|
||||
|
||||
realm['zerver_subscription'] = zerver_subscription
|
||||
|
||||
zerver_realmemoji = write_emoticon_data(
|
||||
realm_id=realm_id,
|
||||
data_dir=input_data_dir,
|
||||
output_dir=output_dir,
|
||||
)
|
||||
realm['zerver_realmemoji'] = zerver_realmemoji
|
||||
|
||||
subscriber_map = make_subscriber_map(
|
||||
zerver_subscription=zerver_subscription,
|
||||
)
|
||||
|
||||
logging.info('Start importing message data')
|
||||
for message_key in ['UserMessage',
|
||||
'NotificationMessage',
|
||||
'PrivateUserMessage']:
|
||||
write_message_data(
|
||||
realm_id=realm_id,
|
||||
slim_mode=slim_mode,
|
||||
message_key=message_key,
|
||||
zerver_recipient=zerver_recipient,
|
||||
subscriber_map=subscriber_map,
|
||||
data_dir=input_data_dir,
|
||||
output_dir=output_dir,
|
||||
masking_content=masking_content,
|
||||
stream_id_mapper=stream_id_mapper,
|
||||
user_id_mapper=user_id_mapper,
|
||||
user_handler=user_handler,
|
||||
attachment_handler=attachment_handler,
|
||||
)
|
||||
|
||||
# Order is important here...don't write users until
|
||||
# we process everything else, since we may introduce
|
||||
# mirror users when processing messages.
|
||||
realm['zerver_userprofile'] = user_handler.get_all_users()
|
||||
realm['sort_by_date'] = True
|
||||
|
||||
create_converted_data_files(realm, output_dir, '/realm.json')
|
||||
|
||||
logging.info('Start importing avatar data')
|
||||
write_avatar_data(
|
||||
raw_user_data=raw_user_data,
|
||||
output_dir=output_dir,
|
||||
user_id_mapper=user_id_mapper,
|
||||
realm_id=realm_id,
|
||||
)
|
||||
|
||||
attachment_handler.write_info(
|
||||
output_dir=output_dir,
|
||||
realm_id=realm_id,
|
||||
)
|
||||
|
||||
logging.info('Start making tarball')
|
||||
subprocess.check_call(["tar", "-czf", output_dir + '.tar.gz', output_dir, '-P'])
|
||||
logging.info('Done making tarball')
|
|
@ -1,136 +0,0 @@
|
|||
import logging
|
||||
import os
|
||||
import shutil
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from zerver.data_import.import_util import build_attachment, create_converted_data_files
|
||||
|
||||
|
||||
class AttachmentHandler:
|
||||
def __init__(self) -> None:
|
||||
self.info_dict: Dict[str, Dict[str, Any]] = {}
|
||||
|
||||
def handle_message_data(self,
|
||||
realm_id: int,
|
||||
message_id: int,
|
||||
sender_id: int,
|
||||
attachment: Dict[str, Any],
|
||||
files_dir: str) -> Optional[str]:
|
||||
if not attachment:
|
||||
return None
|
||||
|
||||
name = attachment['name']
|
||||
|
||||
if 'path' not in attachment:
|
||||
logging.info('Skipping HipChat attachment with missing path data: ' + name)
|
||||
return None
|
||||
|
||||
size = attachment['size']
|
||||
path = attachment['path']
|
||||
|
||||
local_fn = os.path.join(files_dir, path)
|
||||
|
||||
if not os.path.exists(local_fn):
|
||||
# HipChat has an option to not include these in its
|
||||
# exports, since file uploads can be very large.
|
||||
logging.info('Skipping attachment with no file data: ' + local_fn)
|
||||
return None
|
||||
|
||||
target_path = os.path.join(
|
||||
str(realm_id),
|
||||
'HipChatImportAttachment',
|
||||
path,
|
||||
)
|
||||
|
||||
if target_path in self.info_dict:
|
||||
logging.info("file used multiple times: " + path)
|
||||
info = self.info_dict[target_path]
|
||||
info['message_ids'].add(message_id)
|
||||
return info['content']
|
||||
|
||||
# HipChat provides size info, but it's not
|
||||
# completely trustworthy, so we we just
|
||||
# ask the OS for file details.
|
||||
size = os.path.getsize(local_fn)
|
||||
mtime = os.path.getmtime(local_fn)
|
||||
|
||||
content = f'[{name}](/user_uploads/{target_path})'
|
||||
|
||||
info = dict(
|
||||
message_ids={message_id},
|
||||
sender_id=sender_id,
|
||||
local_fn=local_fn,
|
||||
target_path=target_path,
|
||||
name=name,
|
||||
size=size,
|
||||
mtime=mtime,
|
||||
content=content,
|
||||
)
|
||||
self.info_dict[target_path] = info
|
||||
|
||||
return content
|
||||
|
||||
def write_info(self, output_dir: str, realm_id: int) -> None:
|
||||
attachments: List[Dict[str, Any]] = []
|
||||
uploads_records: List[Dict[str, Any]] = []
|
||||
|
||||
def add_attachment(info: Dict[str, Any]) -> None:
|
||||
build_attachment(
|
||||
realm_id=realm_id,
|
||||
message_ids=info['message_ids'],
|
||||
user_id=info['sender_id'],
|
||||
fileinfo=dict(
|
||||
created=info['mtime'], # minor lie
|
||||
size=info['size'],
|
||||
name=info['name'],
|
||||
),
|
||||
s3_path=info['target_path'],
|
||||
zerver_attachment=attachments,
|
||||
)
|
||||
|
||||
def add_upload(info: Dict[str, Any]) -> None:
|
||||
target_path = info['target_path']
|
||||
upload_rec = dict(
|
||||
size=info['size'],
|
||||
user_profile_id=info['sender_id'],
|
||||
realm_id=realm_id,
|
||||
s3_path=target_path,
|
||||
path=target_path,
|
||||
content_type=None,
|
||||
)
|
||||
uploads_records.append(upload_rec)
|
||||
|
||||
def make_full_target_path(info: Dict[str, Any]) -> str:
|
||||
target_path = info['target_path']
|
||||
full_target_path = os.path.join(
|
||||
output_dir,
|
||||
'uploads',
|
||||
target_path,
|
||||
)
|
||||
full_target_path = os.path.abspath(full_target_path)
|
||||
os.makedirs(os.path.dirname(full_target_path), exist_ok=True)
|
||||
return full_target_path
|
||||
|
||||
def copy_file(info: Dict[str, Any]) -> None:
|
||||
source_path = info['local_fn']
|
||||
target_path = make_full_target_path(info)
|
||||
shutil.copyfile(source_path, target_path)
|
||||
|
||||
logging.info('Start processing attachment files')
|
||||
|
||||
for info in self.info_dict.values():
|
||||
add_attachment(info)
|
||||
add_upload(info)
|
||||
copy_file(info)
|
||||
|
||||
uploads_folder = os.path.join(output_dir, 'uploads')
|
||||
os.makedirs(os.path.join(uploads_folder, str(realm_id)), exist_ok=True)
|
||||
|
||||
attachment = dict(
|
||||
zerver_attachment=attachments,
|
||||
)
|
||||
|
||||
create_converted_data_files(uploads_records, output_dir, '/uploads/records.json')
|
||||
create_converted_data_files(attachment, output_dir, '/attachment.json')
|
||||
|
||||
logging.info('Done processing attachment files')
|
|
@ -1,84 +0,0 @@
|
|||
from typing import Any, Dict, List
|
||||
|
||||
from django.utils.timezone import now as timezone_now
|
||||
|
||||
from zerver.data_import.import_util import build_user_profile
|
||||
from zerver.models import UserProfile
|
||||
|
||||
|
||||
class UserHandler:
|
||||
'''
|
||||
Our UserHandler class is a glorified wrapper
|
||||
around the data that eventually goes into
|
||||
zerver_userprofile.
|
||||
|
||||
The class helps us do things like map ids
|
||||
to names for mentions.
|
||||
|
||||
We also sometimes need to build mirror
|
||||
users on the fly.
|
||||
'''
|
||||
|
||||
def __init__(self) -> None:
|
||||
self.id_to_user_map: Dict[int, Dict[str, Any]] = {}
|
||||
self.name_to_mirror_user_map: Dict[str, Dict[str, Any]] = {}
|
||||
self.mirror_user_id = 1
|
||||
|
||||
def add_user(self, user: Dict[str, Any]) -> None:
|
||||
user_id = user['id']
|
||||
self.id_to_user_map[user_id] = user
|
||||
|
||||
def get_user(self, user_id: int) -> Dict[str, Any]:
|
||||
user = self.id_to_user_map[user_id]
|
||||
return user
|
||||
|
||||
def get_mirror_user(self,
|
||||
realm_id: int,
|
||||
name: str) -> Dict[str, Any]:
|
||||
if name in self.name_to_mirror_user_map:
|
||||
user = self.name_to_mirror_user_map[name]
|
||||
return user
|
||||
|
||||
user_id = self._new_mirror_user_id()
|
||||
short_name = name
|
||||
full_name = name
|
||||
email = f'mirror-{user_id}@example.com'
|
||||
delivery_email = email
|
||||
avatar_source = 'G'
|
||||
date_joined = int(timezone_now().timestamp())
|
||||
timezone = 'UTC'
|
||||
|
||||
user = build_user_profile(
|
||||
avatar_source=avatar_source,
|
||||
date_joined=date_joined,
|
||||
delivery_email=delivery_email,
|
||||
email=email,
|
||||
full_name=full_name,
|
||||
id=user_id,
|
||||
is_active=False,
|
||||
role=UserProfile.ROLE_MEMBER,
|
||||
is_mirror_dummy=True,
|
||||
realm_id=realm_id,
|
||||
short_name=short_name,
|
||||
timezone=timezone,
|
||||
)
|
||||
|
||||
self.name_to_mirror_user_map[name] = user
|
||||
return user
|
||||
|
||||
def _new_mirror_user_id(self) -> int:
|
||||
next_id = self.mirror_user_id
|
||||
while next_id in self.id_to_user_map:
|
||||
next_id += 1
|
||||
self.mirror_user_id = next_id + 1
|
||||
return next_id
|
||||
|
||||
def get_normal_users(self) -> List[Dict[str, Any]]:
|
||||
users = list(self.id_to_user_map.values())
|
||||
return users
|
||||
|
||||
def get_all_users(self) -> List[Dict[str, Any]]:
|
||||
normal_users = self.get_normal_users()
|
||||
mirror_users = list(self.name_to_mirror_user_map.values())
|
||||
all_users = normal_users + mirror_users
|
||||
return all_users
|
|
@ -170,8 +170,8 @@ def build_public_stream_subscriptions(
|
|||
zerver_recipient: List[ZerverFieldsT],
|
||||
zerver_stream: List[ZerverFieldsT]) -> List[ZerverFieldsT]:
|
||||
'''
|
||||
This function is only used for HipChat now, but it may apply to
|
||||
future conversions. We often don't get full subscriber data in
|
||||
This function was only used for HipChat, but it may apply to
|
||||
future conversions. We often did't get full subscriber data in
|
||||
the HipChat export, so this function just autosubscribes all
|
||||
users to every public stream. This returns a list of Subscription
|
||||
dicts.
|
||||
|
@ -298,8 +298,8 @@ def build_recipients(zerver_userprofile: Iterable[ZerverFieldsT],
|
|||
zerver_stream: Iterable[ZerverFieldsT],
|
||||
zerver_huddle: Iterable[ZerverFieldsT] = []) -> List[ZerverFieldsT]:
|
||||
'''
|
||||
As of this writing, we only use this in the HipChat
|
||||
conversion. The Slack and Gitter conversions do it more
|
||||
This function was only used HipChat import, this function may be
|
||||
required for future conversions. The Slack and Gitter conversions do it more
|
||||
tightly integrated with creating other objects.
|
||||
'''
|
||||
|
||||
|
|
|
@ -8,7 +8,7 @@ sequences work.
|
|||
You need to be a bit careful here, since
|
||||
you're dealing with a big singleton, but
|
||||
for data imports that's usually easy to
|
||||
manage. See hipchat.py for example usage.
|
||||
manage.
|
||||
'''
|
||||
|
||||
def _seq() -> Callable[[], int]:
|
||||
|
|
|
@ -52,10 +52,6 @@ TAB_DISPLAY_NAMES = {
|
|||
'desktop': 'Desktop',
|
||||
'mobile': 'Mobile',
|
||||
|
||||
'cloud': 'HipChat Cloud',
|
||||
'server': 'HipChat Server or Data Center',
|
||||
'stride': 'Stride',
|
||||
|
||||
'mm-default': 'Default installation',
|
||||
'mm-docker': 'Docker',
|
||||
'mm-gitlab-omnibus': 'GitLab Omnibus',
|
||||
|
|
|
@ -1,81 +0,0 @@
|
|||
import argparse
|
||||
import os
|
||||
from typing import Any
|
||||
|
||||
'''
|
||||
Example usage for testing purposes:
|
||||
|
||||
Move the data:
|
||||
rm -Rf ~/hipchat-data
|
||||
mkdir ~/hipchat-data
|
||||
./manage.py convert_hipchat_data ~/hipchat-31028-2018-08-08_23-23-22.tar --output ~/hipchat-data
|
||||
./manage.py import --destroy-rebuild-database hipchat ~/hipchat-data
|
||||
|
||||
|
||||
Test out the realm:
|
||||
./tools/run-dev.py
|
||||
go to browser and use your dev url
|
||||
|
||||
spec:
|
||||
https://confluence.atlassian.com/hipchatkb/
|
||||
exporting-from-hipchat-server-or-data-center-for-data-portability-950821555.html
|
||||
'''
|
||||
|
||||
from django.core.management.base import BaseCommand, CommandError, CommandParser
|
||||
|
||||
from zerver.data_import.hipchat import do_convert_data
|
||||
|
||||
|
||||
class Command(BaseCommand):
|
||||
help = """Convert the HipChat data into Zulip data format."""
|
||||
|
||||
def add_arguments(self, parser: CommandParser) -> None:
|
||||
parser.add_argument('hipchat_tar', nargs='+',
|
||||
metavar='<hipchat data tarfile>',
|
||||
help="tar of HipChat data")
|
||||
|
||||
parser.add_argument('--output', dest='output_dir',
|
||||
help='Directory to write exported data to.')
|
||||
|
||||
parser.add_argument('--mask', dest='masking_content',
|
||||
action="store_true",
|
||||
help='Mask the content for privacy during QA.')
|
||||
|
||||
parser.add_argument('--slim-mode',
|
||||
action="store_true",
|
||||
help="Default to no public stream subscriptions if no token is available." +
|
||||
" See import docs for details.")
|
||||
|
||||
parser.add_argument('--token', dest='api_token',
|
||||
help='API token for the HipChat API for fetching subscribers.')
|
||||
|
||||
parser.formatter_class = argparse.RawTextHelpFormatter
|
||||
|
||||
def handle(self, *args: Any, **options: Any) -> None:
|
||||
output_dir = options["output_dir"]
|
||||
|
||||
if output_dir is None:
|
||||
raise CommandError("You need to specify --output <output directory>")
|
||||
|
||||
if os.path.exists(output_dir) and not os.path.isdir(output_dir):
|
||||
raise CommandError(output_dir + " is not a directory")
|
||||
|
||||
os.makedirs(output_dir, exist_ok=True)
|
||||
|
||||
if os.listdir(output_dir):
|
||||
raise CommandError('Output directory should be empty!')
|
||||
|
||||
output_dir = os.path.realpath(output_dir)
|
||||
|
||||
for path in options['hipchat_tar']:
|
||||
if not os.path.exists(path):
|
||||
raise CommandError(f"Tar file not found: '{path}'")
|
||||
|
||||
print("Converting data ...")
|
||||
do_convert_data(
|
||||
input_tar_file=path,
|
||||
output_dir=output_dir,
|
||||
masking_content=options.get('masking_content', False),
|
||||
slim_mode=options['slim_mode'],
|
||||
api_token=options.get("api_token"),
|
||||
)
|
|
@ -1,76 +0,0 @@
|
|||
from typing import Any, Dict
|
||||
|
||||
from zerver.data_import.hipchat import get_hipchat_sender_id
|
||||
from zerver.data_import.hipchat_user import UserHandler
|
||||
from zerver.data_import.sequencer import IdMapper
|
||||
from zerver.lib.test_classes import ZulipTestCase
|
||||
|
||||
|
||||
class HipChatImporter(ZulipTestCase):
|
||||
def test_sender_ids(self) -> None:
|
||||
realm_id = 5
|
||||
user_handler = UserHandler()
|
||||
|
||||
user_id_mapper = IdMapper()
|
||||
self.assertEqual(user_id_mapper.get(1), 1)
|
||||
|
||||
# Simulate a "normal" user first.
|
||||
user_with_id = dict(
|
||||
id=1,
|
||||
# other fields don't matter here
|
||||
)
|
||||
user_handler.add_user(user=user_with_id)
|
||||
|
||||
normal_message: Dict[str, Any] = dict(
|
||||
sender=dict(
|
||||
id=1,
|
||||
),
|
||||
)
|
||||
|
||||
sender_id = get_hipchat_sender_id(
|
||||
realm_id=realm_id,
|
||||
slim_mode=False,
|
||||
message_dict=normal_message,
|
||||
user_id_mapper=user_id_mapper,
|
||||
user_handler=user_handler,
|
||||
)
|
||||
|
||||
self.assertEqual(sender_id, 1)
|
||||
|
||||
bot_message = dict(
|
||||
sender='fred_bot',
|
||||
)
|
||||
|
||||
# Every message from fred_bot should
|
||||
# return the same sender_id.
|
||||
fred_bot_sender_id = 2
|
||||
|
||||
for i in range(3):
|
||||
sender_id = get_hipchat_sender_id(
|
||||
realm_id=realm_id,
|
||||
slim_mode=False,
|
||||
message_dict=bot_message,
|
||||
user_id_mapper=user_id_mapper,
|
||||
user_handler=user_handler,
|
||||
)
|
||||
|
||||
self.assertEqual(sender_id, fred_bot_sender_id)
|
||||
|
||||
id_zero_message = dict(
|
||||
sender=dict(
|
||||
id=0,
|
||||
name='hal_bot',
|
||||
),
|
||||
)
|
||||
|
||||
hal_bot_sender_id = 3
|
||||
for i in range(3):
|
||||
sender_id = get_hipchat_sender_id(
|
||||
realm_id=realm_id,
|
||||
slim_mode=False,
|
||||
message_dict=id_zero_message,
|
||||
user_id_mapper=user_id_mapper,
|
||||
user_handler=user_handler,
|
||||
)
|
||||
|
||||
self.assertEqual(sender_id, hal_bot_sender_id)
|
|
@ -51,7 +51,7 @@ def api_teamcity_webhook(request: HttpRequest, user_profile: UserProfile,
|
|||
payload: Dict[str, Any]=REQ(argument_type='body')) -> HttpResponse:
|
||||
message = payload.get('build')
|
||||
if message is None:
|
||||
# Ignore third-party specific (e.g. Slack/HipChat) payload formats
|
||||
# Ignore third-party specific (e.g. Slack) payload formats
|
||||
# and notify the bot owner
|
||||
message = MISCONFIGURED_PAYLOAD_TYPE_ERROR_MESSAGE.format(
|
||||
bot_name=user_profile.full_name,
|
||||
|
|
Loading…
Reference in New Issue