2018-05-29 13:55:52 +02:00
|
|
|
import logging
|
2020-06-11 00:54:34 +02:00
|
|
|
import os
|
2018-05-29 13:55:52 +02:00
|
|
|
import subprocess
|
2020-06-11 00:54:34 +02:00
|
|
|
from typing import Any, Dict, List, Set, Tuple
|
2018-05-29 13:55:52 +02:00
|
|
|
|
2020-06-11 00:54:34 +02:00
|
|
|
import dateutil.parser
|
2020-08-07 01:09:47 +02:00
|
|
|
import orjson
|
2018-05-29 13:55:52 +02:00
|
|
|
from django.conf import settings
|
|
|
|
from django.forms.models import model_to_dict
|
|
|
|
from django.utils.timezone import now as timezone_now
|
|
|
|
|
2020-06-11 00:54:34 +02:00
|
|
|
from zerver.data_import.import_util import (
|
|
|
|
ZerverFieldsT,
|
|
|
|
build_avatar,
|
|
|
|
build_defaultstream,
|
|
|
|
build_message,
|
|
|
|
build_realm,
|
|
|
|
build_recipient,
|
|
|
|
build_stream,
|
|
|
|
build_subscription,
|
|
|
|
build_usermessages,
|
|
|
|
build_zerver_realm,
|
|
|
|
create_converted_data_files,
|
2022-06-01 15:11:02 +02:00
|
|
|
long_term_idle_helper,
|
2020-06-11 00:54:34 +02:00
|
|
|
make_subscriber_map,
|
|
|
|
process_avatars,
|
|
|
|
)
|
2018-06-07 12:25:59 +02:00
|
|
|
from zerver.lib.export import MESSAGE_BATCH_CHUNK_SIZE
|
2020-06-11 00:54:34 +02:00
|
|
|
from zerver.models import Recipient, UserProfile
|
2022-08-26 14:43:46 +02:00
|
|
|
from zproject.backends import GitHubAuthBackend
|
2018-05-29 13:55:52 +02:00
|
|
|
|
|
|
|
# stubs
|
|
|
|
GitterDataT = List[Dict[str, Any]]
|
|
|
|
|
|
|
|
realm_id = 0
|
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
|
|
|
def gitter_workspace_to_realm(
|
|
|
|
domain_name: str, gitter_data: GitterDataT, realm_subdomain: str
|
|
|
|
) -> Tuple[ZerverFieldsT, List[ZerverFieldsT], Dict[str, int], Dict[str, int]]:
|
2018-05-29 13:55:52 +02:00
|
|
|
"""
|
|
|
|
Returns:
|
2020-10-23 02:43:28 +02:00
|
|
|
1. realm, converted realm data
|
|
|
|
2. avatars, which is list to map avatars to Zulip avatar records.json
|
|
|
|
3. user_map, which is a dictionary to map from Gitter user id to Zulip user id
|
|
|
|
4. stream_map, which is a dictionary to map from Gitter rooms to Zulip stream id
|
2018-05-29 13:55:52 +02:00
|
|
|
"""
|
|
|
|
NOW = float(timezone_now().timestamp())
|
2021-02-12 08:20:45 +01:00
|
|
|
zerver_realm: List[ZerverFieldsT] = build_zerver_realm(realm_id, realm_subdomain, NOW, "Gitter")
|
2022-08-26 14:43:46 +02:00
|
|
|
|
|
|
|
# Users will have GitHub's generated noreply email addresses so their only way to log in
|
|
|
|
# at first is via GitHub. So we set GitHub to be the only authentication method enabled
|
|
|
|
# default to avoid user confusion.
|
|
|
|
assert len(zerver_realm) == 1
|
|
|
|
authentication_methods = [
|
|
|
|
(auth_method[0], False)
|
|
|
|
if auth_method[0] != GitHubAuthBackend.auth_backend_name
|
|
|
|
else (auth_method[0], True)
|
|
|
|
for auth_method in zerver_realm[0]["authentication_methods"]
|
|
|
|
]
|
|
|
|
|
|
|
|
zerver_realm[0]["authentication_methods"] = authentication_methods
|
|
|
|
|
2018-08-10 02:27:17 +02:00
|
|
|
realm = build_realm(zerver_realm, realm_id, domain_name)
|
2018-05-29 13:55:52 +02:00
|
|
|
|
|
|
|
zerver_userprofile, avatars, user_map = build_userprofile(int(NOW), domain_name, gitter_data)
|
2020-04-30 23:28:33 +02:00
|
|
|
zerver_stream, zerver_defaultstream, stream_map = build_stream_map(int(NOW), gitter_data)
|
2018-05-29 13:55:52 +02:00
|
|
|
zerver_recipient, zerver_subscription = build_recipient_and_subscription(
|
2021-02-12 08:19:30 +01:00
|
|
|
zerver_userprofile, zerver_stream
|
|
|
|
)
|
2018-05-29 13:55:52 +02:00
|
|
|
|
2021-02-12 08:20:45 +01:00
|
|
|
realm["zerver_userprofile"] = zerver_userprofile
|
|
|
|
realm["zerver_stream"] = zerver_stream
|
|
|
|
realm["zerver_defaultstream"] = zerver_defaultstream
|
|
|
|
realm["zerver_recipient"] = zerver_recipient
|
|
|
|
realm["zerver_subscription"] = zerver_subscription
|
2018-05-29 13:55:52 +02:00
|
|
|
|
2020-04-30 23:28:33 +02:00
|
|
|
return realm, avatars, user_map, stream_map
|
2018-05-29 13:55:52 +02:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
|
|
|
def build_userprofile(
|
|
|
|
timestamp: Any, domain_name: str, gitter_data: GitterDataT
|
|
|
|
) -> Tuple[List[ZerverFieldsT], List[ZerverFieldsT], Dict[str, int]]:
|
2018-05-29 13:55:52 +02:00
|
|
|
"""
|
|
|
|
Returns:
|
|
|
|
1. zerver_userprofile, which is a list of user profile
|
2020-10-23 02:43:28 +02:00
|
|
|
2. avatar_list, which is list to map avatars to Zulip avatars records.json
|
|
|
|
3. added_users, which is a dictionary to map from Gitter user id to Zulip id
|
2018-05-29 13:55:52 +02:00
|
|
|
"""
|
2021-02-12 08:20:45 +01:00
|
|
|
logging.info("######### IMPORTING USERS STARTED #########\n")
|
2018-05-29 13:55:52 +02:00
|
|
|
zerver_userprofile = []
|
python: Convert assignment type annotations to Python 3.6 style.
This commit was split by tabbott; this piece covers the vast majority
of files in Zulip, but excludes scripts/, tools/, and puppet/ to help
ensure we at least show the right error messages for Xenial systems.
We can likely further refine the remaining pieces with some testing.
Generated by com2ann, with whitespace fixes and various manual fixes
for runtime issues:
- invoiced_through: Optional[LicenseLedger] = models.ForeignKey(
+ invoiced_through: Optional["LicenseLedger"] = models.ForeignKey(
-_apns_client: Optional[APNsClient] = None
+_apns_client: Optional["APNsClient"] = None
- notifications_stream: Optional[Stream] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
- signup_notifications_stream: Optional[Stream] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
+ notifications_stream: Optional["Stream"] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
+ signup_notifications_stream: Optional["Stream"] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
- author: Optional[UserProfile] = models.ForeignKey('UserProfile', blank=True, null=True, on_delete=CASCADE)
+ author: Optional["UserProfile"] = models.ForeignKey('UserProfile', blank=True, null=True, on_delete=CASCADE)
- bot_owner: Optional[UserProfile] = models.ForeignKey('self', null=True, on_delete=models.SET_NULL)
+ bot_owner: Optional["UserProfile"] = models.ForeignKey('self', null=True, on_delete=models.SET_NULL)
- default_sending_stream: Optional[Stream] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
- default_events_register_stream: Optional[Stream] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
+ default_sending_stream: Optional["Stream"] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
+ default_events_register_stream: Optional["Stream"] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
-descriptors_by_handler_id: Dict[int, ClientDescriptor] = {}
+descriptors_by_handler_id: Dict[int, "ClientDescriptor"] = {}
-worker_classes: Dict[str, Type[QueueProcessingWorker]] = {}
-queues: Dict[str, Dict[str, Type[QueueProcessingWorker]]] = {}
+worker_classes: Dict[str, Type["QueueProcessingWorker"]] = {}
+queues: Dict[str, Dict[str, Type["QueueProcessingWorker"]]] = {}
-AUTH_LDAP_REVERSE_EMAIL_SEARCH: Optional[LDAPSearch] = None
+AUTH_LDAP_REVERSE_EMAIL_SEARCH: Optional["LDAPSearch"] = None
Signed-off-by: Anders Kaseorg <anders@zulipchat.com>
2020-04-22 01:09:50 +02:00
|
|
|
avatar_list: List[ZerverFieldsT] = []
|
|
|
|
user_map: Dict[str, int] = {}
|
2018-05-29 13:55:52 +02:00
|
|
|
user_id = 0
|
|
|
|
|
|
|
|
for data in gitter_data:
|
2022-05-31 23:31:30 +02:00
|
|
|
if get_user_from_message(data) not in user_map:
|
2021-02-12 08:20:45 +01:00
|
|
|
user_data = data["fromUser"]
|
|
|
|
user_map[user_data["id"]] = user_id
|
2018-05-29 13:55:52 +02:00
|
|
|
|
|
|
|
email = get_user_email(user_data, domain_name)
|
2022-05-24 01:07:33 +02:00
|
|
|
if user_data.get("avatarUrl"):
|
|
|
|
build_avatar(
|
|
|
|
user_id, realm_id, email, user_data["avatarUrl"], timestamp, avatar_list
|
|
|
|
)
|
2018-05-29 13:55:52 +02:00
|
|
|
|
|
|
|
# Build userprofile object
|
|
|
|
userprofile = UserProfile(
|
2021-02-12 08:20:45 +01:00
|
|
|
full_name=user_data["displayName"],
|
2018-05-29 13:55:52 +02:00
|
|
|
id=user_id,
|
|
|
|
email=email,
|
2018-08-17 16:16:49 +02:00
|
|
|
delivery_email=email,
|
2021-02-12 08:20:45 +01:00
|
|
|
avatar_source="U",
|
2018-05-29 13:55:52 +02:00
|
|
|
date_joined=timestamp,
|
2021-02-12 08:19:30 +01:00
|
|
|
last_login=timestamp,
|
|
|
|
)
|
2018-05-29 13:55:52 +02:00
|
|
|
userprofile_dict = model_to_dict(userprofile)
|
|
|
|
# Set realm id separately as the corresponding realm is not yet a Realm model
|
|
|
|
# instance
|
2021-02-12 08:20:45 +01:00
|
|
|
userprofile_dict["realm"] = realm_id
|
2020-07-16 14:10:43 +02:00
|
|
|
|
|
|
|
# We use this later, even though Zulip doesn't
|
|
|
|
# support short_name
|
2021-02-12 08:20:45 +01:00
|
|
|
userprofile_dict["short_name"] = user_data["username"]
|
2020-07-16 14:10:43 +02:00
|
|
|
|
2018-05-29 13:55:52 +02:00
|
|
|
zerver_userprofile.append(userprofile_dict)
|
|
|
|
user_id += 1
|
2021-02-12 08:20:45 +01:00
|
|
|
logging.info("######### IMPORTING USERS FINISHED #########\n")
|
2018-05-29 13:55:52 +02:00
|
|
|
return zerver_userprofile, avatar_list, user_map
|
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2018-05-29 13:55:52 +02:00
|
|
|
def get_user_email(user_data: ZerverFieldsT, domain_name: str) -> str:
|
|
|
|
# TODO Get user email from github
|
2021-02-12 08:20:45 +01:00
|
|
|
email = "{}@users.noreply.github.com".format(user_data["username"])
|
2018-05-29 13:55:52 +02:00
|
|
|
return email
|
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
|
|
|
def build_stream_map(
|
|
|
|
timestamp: Any, gitter_data: GitterDataT
|
|
|
|
) -> Tuple[List[ZerverFieldsT], List[ZerverFieldsT], Dict[str, int]]:
|
2020-04-30 23:28:33 +02:00
|
|
|
"""
|
|
|
|
Returns:
|
|
|
|
1. stream, which is the list of streams
|
|
|
|
2. defaultstreams, which is the list of default streams
|
2020-10-23 02:43:28 +02:00
|
|
|
3. stream_map, which is a dictionary to map from Gitter rooms to Zulip stream id
|
2020-04-30 23:28:33 +02:00
|
|
|
"""
|
2021-02-12 08:20:45 +01:00
|
|
|
logging.info("######### IMPORTING STREAM STARTED #########\n")
|
2018-08-17 20:17:28 +02:00
|
|
|
stream_id = 0
|
2020-04-30 23:28:33 +02:00
|
|
|
stream: List[ZerverFieldsT] = []
|
2018-08-17 20:17:28 +02:00
|
|
|
|
2020-04-30 23:28:33 +02:00
|
|
|
# Default stream when no "room" field is present
|
2021-02-12 08:19:30 +01:00
|
|
|
stream.append(
|
2021-02-12 08:20:45 +01:00
|
|
|
build_stream(timestamp, realm_id, "from gitter", "Imported from Gitter", stream_id)
|
2021-02-12 08:19:30 +01:00
|
|
|
)
|
|
|
|
defaultstream = build_defaultstream(realm_id=realm_id, stream_id=stream_id, defaultstream_id=0)
|
2020-04-30 23:28:33 +02:00
|
|
|
stream_id += 1
|
|
|
|
|
|
|
|
# Gathering streams from gitter_data
|
|
|
|
stream_map: Dict[str, int] = {}
|
|
|
|
for data in gitter_data:
|
2021-02-12 08:20:45 +01:00
|
|
|
if "room" in data and data["room"] not in stream_map:
|
2021-02-12 08:19:30 +01:00
|
|
|
stream.append(
|
|
|
|
build_stream(
|
2021-02-12 08:20:45 +01:00
|
|
|
timestamp, realm_id, data["room"], f'Gitter room {data["room"]}', stream_id
|
2021-02-12 08:19:30 +01:00
|
|
|
)
|
|
|
|
)
|
2021-02-12 08:20:45 +01:00
|
|
|
stream_map[data["room"]] = stream_id
|
2020-04-30 23:28:33 +02:00
|
|
|
stream_id += 1
|
2021-02-12 08:20:45 +01:00
|
|
|
logging.info("######### IMPORTING STREAMS FINISHED #########\n")
|
2020-04-30 23:28:33 +02:00
|
|
|
|
|
|
|
return stream, [defaultstream], stream_map
|
2018-05-29 13:55:52 +02:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2018-05-29 13:55:52 +02:00
|
|
|
def build_recipient_and_subscription(
|
2021-02-12 08:19:30 +01:00
|
|
|
zerver_userprofile: List[ZerverFieldsT], zerver_stream: List[ZerverFieldsT]
|
|
|
|
) -> Tuple[List[ZerverFieldsT], List[ZerverFieldsT]]:
|
2018-05-29 13:55:52 +02:00
|
|
|
"""
|
2020-04-30 23:28:33 +02:00
|
|
|
Assumes that there is at least one stream with 'stream_id' = 0,
|
|
|
|
and that this stream is the only defaultstream, with 'defaultstream_id' = 0
|
2018-05-29 13:55:52 +02:00
|
|
|
Returns:
|
|
|
|
1. zerver_recipient, which is a list of mapped recipient
|
|
|
|
2. zerver_subscription, which is a list of mapped subscription
|
|
|
|
"""
|
|
|
|
zerver_recipient = []
|
|
|
|
zerver_subscription = []
|
|
|
|
recipient_id = subscription_id = 0
|
|
|
|
|
2020-04-30 23:28:33 +02:00
|
|
|
# For streams
|
2018-05-29 13:55:52 +02:00
|
|
|
|
2020-08-11 01:47:44 +02:00
|
|
|
# Initial recipients correspond to initial streams
|
2020-04-30 23:28:33 +02:00
|
|
|
# We enumerate all streams, and build a recipient for each
|
|
|
|
# Hence 'recipient_id'=n corresponds to 'stream_id'=n
|
|
|
|
for stream in zerver_stream:
|
|
|
|
zerver_recipient.append(build_recipient(recipient_id, recipient_id, Recipient.STREAM))
|
|
|
|
recipient_id += 1
|
2018-05-29 13:55:52 +02:00
|
|
|
|
|
|
|
# For users
|
|
|
|
for user in zerver_userprofile:
|
2021-02-12 08:20:45 +01:00
|
|
|
zerver_recipient.append(build_recipient(user["id"], recipient_id, Recipient.PERSONAL))
|
|
|
|
zerver_subscription.append(build_subscription(recipient_id, user["id"], subscription_id))
|
2018-05-29 13:55:52 +02:00
|
|
|
recipient_id += 1
|
|
|
|
subscription_id += 1
|
|
|
|
|
2020-05-06 06:31:35 +02:00
|
|
|
# As suggested in #14830, we subscribe every user to every stream.
|
|
|
|
# We rely on the above invariant: 'recipient_id'=n corresponds to 'stream_id'=n
|
|
|
|
#
|
|
|
|
# TODO: For multi-stream imports, subscribe users to streams
|
|
|
|
# based either on Gitter API data or who sent messages where.
|
|
|
|
for user in zerver_userprofile:
|
|
|
|
for stream in zerver_stream:
|
2021-02-12 08:19:30 +01:00
|
|
|
zerver_subscription.append(
|
2021-02-12 08:20:45 +01:00
|
|
|
build_subscription(stream["id"], user["id"], subscription_id)
|
2021-02-12 08:19:30 +01:00
|
|
|
)
|
2020-05-06 06:31:35 +02:00
|
|
|
subscription_id += 1
|
|
|
|
|
2018-05-29 13:55:52 +02:00
|
|
|
return zerver_recipient, zerver_subscription
|
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2022-05-31 22:11:55 +02:00
|
|
|
def get_timestamp_from_message(message: ZerverFieldsT) -> float:
|
|
|
|
# Gitter's timestamps are in UTC
|
|
|
|
return float(dateutil.parser.parse(message["sent"]).timestamp())
|
|
|
|
|
|
|
|
|
2022-05-31 23:31:30 +02:00
|
|
|
def get_user_from_message(message: ZerverFieldsT) -> str:
|
|
|
|
return message["fromUser"]["id"]
|
|
|
|
|
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
def convert_gitter_workspace_messages(
|
|
|
|
gitter_data: GitterDataT,
|
|
|
|
output_dir: str,
|
|
|
|
subscriber_map: Dict[int, Set[int]],
|
|
|
|
user_map: Dict[str, int],
|
|
|
|
stream_map: Dict[str, int],
|
|
|
|
user_short_name_to_full_name: Dict[str, str],
|
2022-06-01 15:11:02 +02:00
|
|
|
zerver_userprofile: List[ZerverFieldsT],
|
2022-09-27 21:42:31 +02:00
|
|
|
realm_id: int,
|
2021-02-12 08:19:30 +01:00
|
|
|
chunk_size: int = MESSAGE_BATCH_CHUNK_SIZE,
|
|
|
|
) -> None:
|
2018-05-29 13:55:52 +02:00
|
|
|
"""
|
2018-06-07 12:25:59 +02:00
|
|
|
Messages are stored in batches
|
2018-05-29 13:55:52 +02:00
|
|
|
"""
|
2021-02-12 08:20:45 +01:00
|
|
|
logging.info("######### IMPORTING MESSAGES STARTED #########\n")
|
2022-06-01 15:11:02 +02:00
|
|
|
|
|
|
|
long_term_idle = long_term_idle_helper(
|
|
|
|
iter(gitter_data),
|
|
|
|
get_user_from_message,
|
|
|
|
get_timestamp_from_message,
|
|
|
|
lambda id: user_map[id],
|
2022-07-30 22:18:05 +02:00
|
|
|
iter(user_map.keys()),
|
2022-06-01 15:11:02 +02:00
|
|
|
zerver_userprofile,
|
|
|
|
)
|
|
|
|
|
2018-10-23 22:45:43 +02:00
|
|
|
message_id = 0
|
2018-05-29 13:55:52 +02:00
|
|
|
|
2018-06-07 12:25:59 +02:00
|
|
|
low_index = 0
|
|
|
|
upper_index = low_index + chunk_size
|
|
|
|
dump_file_id = 1
|
|
|
|
|
|
|
|
while True:
|
|
|
|
message_json = {}
|
|
|
|
zerver_message = []
|
python: Convert assignment type annotations to Python 3.6 style.
This commit was split by tabbott; this piece covers the vast majority
of files in Zulip, but excludes scripts/, tools/, and puppet/ to help
ensure we at least show the right error messages for Xenial systems.
We can likely further refine the remaining pieces with some testing.
Generated by com2ann, with whitespace fixes and various manual fixes
for runtime issues:
- invoiced_through: Optional[LicenseLedger] = models.ForeignKey(
+ invoiced_through: Optional["LicenseLedger"] = models.ForeignKey(
-_apns_client: Optional[APNsClient] = None
+_apns_client: Optional["APNsClient"] = None
- notifications_stream: Optional[Stream] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
- signup_notifications_stream: Optional[Stream] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
+ notifications_stream: Optional["Stream"] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
+ signup_notifications_stream: Optional["Stream"] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
- author: Optional[UserProfile] = models.ForeignKey('UserProfile', blank=True, null=True, on_delete=CASCADE)
+ author: Optional["UserProfile"] = models.ForeignKey('UserProfile', blank=True, null=True, on_delete=CASCADE)
- bot_owner: Optional[UserProfile] = models.ForeignKey('self', null=True, on_delete=models.SET_NULL)
+ bot_owner: Optional["UserProfile"] = models.ForeignKey('self', null=True, on_delete=models.SET_NULL)
- default_sending_stream: Optional[Stream] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
- default_events_register_stream: Optional[Stream] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
+ default_sending_stream: Optional["Stream"] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
+ default_events_register_stream: Optional["Stream"] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
-descriptors_by_handler_id: Dict[int, ClientDescriptor] = {}
+descriptors_by_handler_id: Dict[int, "ClientDescriptor"] = {}
-worker_classes: Dict[str, Type[QueueProcessingWorker]] = {}
-queues: Dict[str, Dict[str, Type[QueueProcessingWorker]]] = {}
+worker_classes: Dict[str, Type["QueueProcessingWorker"]] = {}
+queues: Dict[str, Dict[str, Type["QueueProcessingWorker"]]] = {}
-AUTH_LDAP_REVERSE_EMAIL_SEARCH: Optional[LDAPSearch] = None
+AUTH_LDAP_REVERSE_EMAIL_SEARCH: Optional["LDAPSearch"] = None
Signed-off-by: Anders Kaseorg <anders@zulipchat.com>
2020-04-22 01:09:50 +02:00
|
|
|
zerver_usermessage: List[ZerverFieldsT] = []
|
2021-02-12 08:19:30 +01:00
|
|
|
message_data = gitter_data[low_index:upper_index]
|
2018-06-07 12:25:59 +02:00
|
|
|
if len(message_data) == 0:
|
|
|
|
break
|
|
|
|
for message in message_data:
|
2022-05-31 22:11:55 +02:00
|
|
|
message_time = get_timestamp_from_message(message)
|
2021-02-12 08:19:30 +01:00
|
|
|
mentioned_user_ids = get_usermentions(message, user_map, user_short_name_to_full_name)
|
2018-06-07 12:25:59 +02:00
|
|
|
rendered_content = None
|
2021-02-12 08:20:45 +01:00
|
|
|
topic_name = "imported from Gitter" + (
|
|
|
|
f' room {message["room"]}' if "room" in message else ""
|
2021-02-12 08:19:30 +01:00
|
|
|
)
|
2022-05-31 23:31:30 +02:00
|
|
|
user_id = user_map[get_user_from_message(message)]
|
2021-02-12 08:20:45 +01:00
|
|
|
recipient_id = stream_map[message["room"]] if "room" in message else 0
|
2021-02-12 08:19:30 +01:00
|
|
|
zulip_message = build_message(
|
2022-09-27 19:53:26 +02:00
|
|
|
topic_name=topic_name,
|
|
|
|
date_sent=message_time,
|
|
|
|
message_id=message_id,
|
|
|
|
content=message["text"],
|
|
|
|
rendered_content=rendered_content,
|
|
|
|
user_id=user_id,
|
|
|
|
recipient_id=recipient_id,
|
2022-09-27 21:42:31 +02:00
|
|
|
realm_id=realm_id,
|
2021-02-12 08:19:30 +01:00
|
|
|
)
|
2018-08-17 23:47:37 +02:00
|
|
|
zerver_message.append(zulip_message)
|
2018-06-07 12:25:59 +02:00
|
|
|
|
2018-10-23 22:45:43 +02:00
|
|
|
build_usermessages(
|
2018-10-25 17:33:52 +02:00
|
|
|
zerver_usermessage=zerver_usermessage,
|
|
|
|
subscriber_map=subscriber_map,
|
|
|
|
recipient_id=recipient_id,
|
|
|
|
mentioned_user_ids=mentioned_user_ids,
|
|
|
|
message_id=message_id,
|
2019-07-02 07:41:51 +02:00
|
|
|
is_private=False,
|
2022-06-01 15:11:02 +02:00
|
|
|
long_term_idle=long_term_idle,
|
2018-10-25 17:33:52 +02:00
|
|
|
)
|
|
|
|
|
2018-06-07 12:25:59 +02:00
|
|
|
message_id += 1
|
|
|
|
|
2021-02-12 08:20:45 +01:00
|
|
|
message_json["zerver_message"] = zerver_message
|
|
|
|
message_json["zerver_usermessage"] = zerver_usermessage
|
2020-06-13 08:59:37 +02:00
|
|
|
message_filename = os.path.join(output_dir, f"messages-{dump_file_id:06}.json")
|
2021-05-10 07:02:14 +02:00
|
|
|
logging.info("Writing messages to %s\n", message_filename)
|
2018-06-07 12:25:59 +02:00
|
|
|
write_data_to_file(os.path.join(message_filename), message_json)
|
|
|
|
|
|
|
|
low_index = upper_index
|
|
|
|
upper_index = chunk_size + low_index
|
|
|
|
dump_file_id += 1
|
2018-05-29 13:55:52 +02:00
|
|
|
|
2021-02-12 08:20:45 +01:00
|
|
|
logging.info("######### IMPORTING MESSAGES FINISHED #########\n")
|
2018-05-29 13:55:52 +02:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
|
|
|
def get_usermentions(
|
|
|
|
message: Dict[str, Any], user_map: Dict[str, int], user_short_name_to_full_name: Dict[str, str]
|
|
|
|
) -> List[int]:
|
2018-07-04 22:59:40 +02:00
|
|
|
mentioned_user_ids = []
|
2021-02-12 08:20:45 +01:00
|
|
|
if "mentions" in message:
|
|
|
|
for mention in message["mentions"]:
|
|
|
|
if mention.get("userId") in user_map:
|
|
|
|
gitter_mention = "@{}".format(mention["screenName"])
|
|
|
|
if mention["screenName"] not in user_short_name_to_full_name:
|
2021-02-12 08:19:30 +01:00
|
|
|
logging.info(
|
|
|
|
"Mentioned user %s never sent any messages, so has no full name data",
|
2021-02-12 08:20:45 +01:00
|
|
|
mention["screenName"],
|
2021-02-12 08:19:30 +01:00
|
|
|
)
|
2021-02-12 08:20:45 +01:00
|
|
|
full_name = mention["screenName"]
|
2018-12-12 19:07:52 +01:00
|
|
|
else:
|
2021-02-12 08:20:45 +01:00
|
|
|
full_name = user_short_name_to_full_name[mention["screenName"]]
|
|
|
|
zulip_mention = f"@**{full_name}**"
|
|
|
|
message["text"] = message["text"].replace(gitter_mention, zulip_mention)
|
2018-07-04 22:59:40 +02:00
|
|
|
|
2021-02-12 08:20:45 +01:00
|
|
|
mentioned_user_ids.append(user_map[mention["userId"]])
|
2018-07-04 22:59:40 +02:00
|
|
|
return mentioned_user_ids
|
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
|
|
|
def do_convert_data(gitter_data_file: str, output_dir: str, threads: int = 6) -> None:
|
2018-05-29 13:55:52 +02:00
|
|
|
# Subdomain is set by the user while running the import commands
|
|
|
|
realm_subdomain = ""
|
|
|
|
domain_name = settings.EXTERNAL_HOST
|
|
|
|
|
|
|
|
os.makedirs(output_dir, exist_ok=True)
|
|
|
|
# output directory should be empty initially
|
|
|
|
if os.listdir(output_dir):
|
|
|
|
raise Exception("Output directory should be empty!")
|
|
|
|
|
2020-10-23 02:43:28 +02:00
|
|
|
# Read data from the Gitter file
|
2020-08-07 01:09:47 +02:00
|
|
|
with open(gitter_data_file, "rb") as fp:
|
|
|
|
gitter_data = orjson.loads(fp.read())
|
2018-05-29 13:55:52 +02:00
|
|
|
|
2020-04-30 23:28:33 +02:00
|
|
|
realm, avatar_list, user_map, stream_map = gitter_workspace_to_realm(
|
2021-02-12 08:19:30 +01:00
|
|
|
domain_name, gitter_data, realm_subdomain
|
|
|
|
)
|
2018-07-04 22:59:40 +02:00
|
|
|
|
2018-10-25 17:33:52 +02:00
|
|
|
subscriber_map = make_subscriber_map(
|
2021-02-12 08:20:45 +01:00
|
|
|
zerver_subscription=realm["zerver_subscription"],
|
2018-10-25 17:33:52 +02:00
|
|
|
)
|
|
|
|
|
2018-07-04 22:59:40 +02:00
|
|
|
# For user mentions
|
|
|
|
user_short_name_to_full_name = {}
|
2021-02-12 08:20:45 +01:00
|
|
|
for userprofile in realm["zerver_userprofile"]:
|
|
|
|
user_short_name_to_full_name[userprofile["short_name"]] = userprofile["full_name"]
|
2018-07-04 22:59:40 +02:00
|
|
|
|
2018-06-07 12:25:59 +02:00
|
|
|
convert_gitter_workspace_messages(
|
2022-06-01 15:11:02 +02:00
|
|
|
gitter_data,
|
|
|
|
output_dir,
|
|
|
|
subscriber_map,
|
|
|
|
user_map,
|
|
|
|
stream_map,
|
|
|
|
user_short_name_to_full_name,
|
|
|
|
realm["zerver_userprofile"],
|
2022-09-27 21:42:31 +02:00
|
|
|
realm_id=realm_id,
|
2021-02-12 08:19:30 +01:00
|
|
|
)
|
2018-05-29 13:55:52 +02:00
|
|
|
|
2021-02-12 08:20:45 +01:00
|
|
|
avatar_folder = os.path.join(output_dir, "avatars")
|
2018-05-29 13:55:52 +02:00
|
|
|
avatar_realm_folder = os.path.join(avatar_folder, str(realm_id))
|
|
|
|
os.makedirs(avatar_realm_folder, exist_ok=True)
|
2018-08-02 15:24:34 +02:00
|
|
|
avatar_records = process_avatars(avatar_list, avatar_folder, realm_id, threads)
|
2018-05-29 13:55:52 +02:00
|
|
|
|
python: Convert assignment type annotations to Python 3.6 style.
This commit was split by tabbott; this piece covers the vast majority
of files in Zulip, but excludes scripts/, tools/, and puppet/ to help
ensure we at least show the right error messages for Xenial systems.
We can likely further refine the remaining pieces with some testing.
Generated by com2ann, with whitespace fixes and various manual fixes
for runtime issues:
- invoiced_through: Optional[LicenseLedger] = models.ForeignKey(
+ invoiced_through: Optional["LicenseLedger"] = models.ForeignKey(
-_apns_client: Optional[APNsClient] = None
+_apns_client: Optional["APNsClient"] = None
- notifications_stream: Optional[Stream] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
- signup_notifications_stream: Optional[Stream] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
+ notifications_stream: Optional["Stream"] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
+ signup_notifications_stream: Optional["Stream"] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
- author: Optional[UserProfile] = models.ForeignKey('UserProfile', blank=True, null=True, on_delete=CASCADE)
+ author: Optional["UserProfile"] = models.ForeignKey('UserProfile', blank=True, null=True, on_delete=CASCADE)
- bot_owner: Optional[UserProfile] = models.ForeignKey('self', null=True, on_delete=models.SET_NULL)
+ bot_owner: Optional["UserProfile"] = models.ForeignKey('self', null=True, on_delete=models.SET_NULL)
- default_sending_stream: Optional[Stream] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
- default_events_register_stream: Optional[Stream] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
+ default_sending_stream: Optional["Stream"] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
+ default_events_register_stream: Optional["Stream"] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
-descriptors_by_handler_id: Dict[int, ClientDescriptor] = {}
+descriptors_by_handler_id: Dict[int, "ClientDescriptor"] = {}
-worker_classes: Dict[str, Type[QueueProcessingWorker]] = {}
-queues: Dict[str, Dict[str, Type[QueueProcessingWorker]]] = {}
+worker_classes: Dict[str, Type["QueueProcessingWorker"]] = {}
+queues: Dict[str, Dict[str, Type["QueueProcessingWorker"]]] = {}
-AUTH_LDAP_REVERSE_EMAIL_SEARCH: Optional[LDAPSearch] = None
+AUTH_LDAP_REVERSE_EMAIL_SEARCH: Optional["LDAPSearch"] = None
Signed-off-by: Anders Kaseorg <anders@zulipchat.com>
2020-04-22 01:09:50 +02:00
|
|
|
attachment: Dict[str, List[Any]] = {"zerver_attachment": []}
|
2018-05-29 13:55:52 +02:00
|
|
|
|
|
|
|
# IO realm.json
|
2021-02-12 08:20:45 +01:00
|
|
|
create_converted_data_files(realm, output_dir, "/realm.json")
|
2018-05-29 13:55:52 +02:00
|
|
|
# IO emoji records
|
2021-02-12 08:20:45 +01:00
|
|
|
create_converted_data_files([], output_dir, "/emoji/records.json")
|
2018-05-29 13:55:52 +02:00
|
|
|
# IO avatar records
|
2021-02-12 08:20:45 +01:00
|
|
|
create_converted_data_files(avatar_records, output_dir, "/avatars/records.json")
|
2018-05-29 13:55:52 +02:00
|
|
|
# IO uploads records
|
2021-02-12 08:20:45 +01:00
|
|
|
create_converted_data_files([], output_dir, "/uploads/records.json")
|
2018-05-29 13:55:52 +02:00
|
|
|
# IO attachments records
|
2021-02-12 08:20:45 +01:00
|
|
|
create_converted_data_files(attachment, output_dir, "/attachment.json")
|
2018-05-29 13:55:52 +02:00
|
|
|
|
2021-02-12 08:20:45 +01:00
|
|
|
subprocess.check_call(["tar", "-czf", output_dir + ".tar.gz", output_dir, "-P"])
|
2018-05-29 13:55:52 +02:00
|
|
|
|
2021-02-12 08:20:45 +01:00
|
|
|
logging.info("######### DATA CONVERSION FINISHED #########\n")
|
2020-05-02 08:44:14 +02:00
|
|
|
logging.info("Zulip data dump created at %s", output_dir)
|
2018-05-29 13:55:52 +02:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2018-06-07 12:25:59 +02:00
|
|
|
def write_data_to_file(output_file: str, data: Any) -> None:
|
2020-08-07 01:09:47 +02:00
|
|
|
with open(output_file, "wb") as f:
|
|
|
|
f.write(orjson.dumps(data, option=orjson.OPT_INDENT_2))
|