2018-08-02 15:24:34 +02:00
|
|
|
import logging
|
2018-08-03 19:15:01 +02:00
|
|
|
import os
|
2020-06-11 00:54:34 +02:00
|
|
|
import random
|
|
|
|
import shutil
|
2022-05-24 01:43:00 +02:00
|
|
|
from collections import defaultdict
|
2022-07-29 08:54:11 +02:00
|
|
|
from concurrent.futures import ProcessPoolExecutor, as_completed
|
2022-04-27 02:23:56 +02:00
|
|
|
from typing import (
|
|
|
|
AbstractSet,
|
|
|
|
Any,
|
|
|
|
Callable,
|
|
|
|
Dict,
|
|
|
|
Iterable,
|
2022-05-24 01:43:00 +02:00
|
|
|
Iterator,
|
2022-04-27 02:23:56 +02:00
|
|
|
List,
|
2022-10-06 11:56:48 +02:00
|
|
|
Mapping,
|
2022-04-27 02:23:56 +02:00
|
|
|
Optional,
|
|
|
|
Protocol,
|
|
|
|
Set,
|
|
|
|
Tuple,
|
|
|
|
TypeVar,
|
|
|
|
)
|
2018-08-02 15:24:34 +02:00
|
|
|
|
2020-08-07 01:09:47 +02:00
|
|
|
import orjson
|
2020-06-11 00:54:34 +02:00
|
|
|
import requests
|
2018-08-01 01:01:55 +02:00
|
|
|
from django.forms.models import model_to_dict
|
2022-05-24 01:43:00 +02:00
|
|
|
from django.utils.timezone import now as timezone_now
|
2023-09-11 20:22:32 +02:00
|
|
|
from returns.curry import partial
|
2023-08-02 23:53:10 +02:00
|
|
|
from typing_extensions import TypeAlias
|
2018-08-01 01:01:55 +02:00
|
|
|
|
2018-10-23 21:58:51 +02:00
|
|
|
from zerver.data_import.sequencer import NEXT_ID
|
2018-08-02 15:24:34 +02:00
|
|
|
from zerver.lib.avatar_hash import user_avatar_path_from_ids
|
2022-11-16 06:20:53 +01:00
|
|
|
from zerver.lib.stream_color import STREAM_ASSIGNMENT_COLORS as STREAM_COLORS
|
2020-06-11 00:54:34 +02:00
|
|
|
from zerver.models import (
|
|
|
|
Attachment,
|
|
|
|
Huddle,
|
|
|
|
Message,
|
|
|
|
Realm,
|
|
|
|
RealmEmoji,
|
|
|
|
Recipient,
|
|
|
|
Stream,
|
|
|
|
Subscription,
|
|
|
|
UserProfile,
|
|
|
|
)
|
2023-04-16 21:53:22 +02:00
|
|
|
from zproject.backends import all_implemented_backend_names
|
2018-08-01 01:01:55 +02:00
|
|
|
|
|
|
|
# stubs
|
2023-08-02 23:53:10 +02:00
|
|
|
ZerverFieldsT: TypeAlias = Dict[str, Any]
|
2018-08-01 01:01:55 +02:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2019-03-20 13:36:09 +01:00
|
|
|
class SubscriberHandler:
|
|
|
|
def __init__(self) -> None:
|
2020-09-02 08:14:51 +02:00
|
|
|
self.stream_info: Dict[int, Set[int]] = {}
|
|
|
|
self.huddle_info: Dict[int, Set[int]] = {}
|
2019-03-20 13:36:09 +01:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
def set_info(
|
|
|
|
self,
|
|
|
|
users: Set[int],
|
|
|
|
stream_id: Optional[int] = None,
|
|
|
|
huddle_id: Optional[int] = None,
|
|
|
|
) -> None:
|
2019-08-27 12:01:07 +02:00
|
|
|
if stream_id is not None:
|
|
|
|
self.stream_info[stream_id] = users
|
|
|
|
elif huddle_id is not None:
|
|
|
|
self.huddle_info[huddle_id] = users
|
|
|
|
else:
|
|
|
|
raise AssertionError("stream_id or huddle_id is required")
|
2019-03-20 13:36:09 +01:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
def get_users(
|
|
|
|
self, stream_id: Optional[int] = None, huddle_id: Optional[int] = None
|
|
|
|
) -> Set[int]:
|
2019-08-27 12:01:07 +02:00
|
|
|
if stream_id is not None:
|
|
|
|
return self.stream_info[stream_id]
|
|
|
|
elif huddle_id is not None:
|
|
|
|
return self.huddle_info[huddle_id]
|
|
|
|
else:
|
|
|
|
raise AssertionError("stream_id or huddle_id is required")
|
2019-03-20 13:36:09 +01:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
|
|
|
def build_zerver_realm(
|
|
|
|
realm_id: int, realm_subdomain: str, time: float, other_product: str
|
|
|
|
) -> List[ZerverFieldsT]:
|
|
|
|
realm = Realm(
|
|
|
|
id=realm_id,
|
|
|
|
name=realm_subdomain,
|
|
|
|
string_id=realm_subdomain,
|
|
|
|
description=f"Organization imported from {other_product}!",
|
|
|
|
)
|
2023-04-16 21:53:22 +02:00
|
|
|
realm_dict = model_to_dict(realm)
|
2022-08-01 17:26:18 +02:00
|
|
|
realm_dict["date_created"] = time
|
2023-10-18 19:10:18 +02:00
|
|
|
# These fields are supposed to be generated upon import.
|
|
|
|
del realm_dict["uuid"]
|
|
|
|
del realm_dict["uuid_owner_secret"]
|
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
return [realm_dict]
|
|
|
|
|
|
|
|
|
|
|
|
def build_user_profile(
|
|
|
|
avatar_source: str,
|
|
|
|
date_joined: Any,
|
|
|
|
delivery_email: str,
|
|
|
|
email: str,
|
|
|
|
full_name: str,
|
|
|
|
id: int,
|
|
|
|
is_active: bool,
|
|
|
|
role: int,
|
|
|
|
is_mirror_dummy: bool,
|
|
|
|
realm_id: int,
|
|
|
|
short_name: str,
|
2022-06-15 05:14:23 +02:00
|
|
|
timezone: str,
|
2021-06-23 14:01:26 +02:00
|
|
|
is_bot: bool = False,
|
|
|
|
bot_type: Optional[int] = None,
|
2021-02-12 08:19:30 +01:00
|
|
|
) -> ZerverFieldsT:
|
2018-09-28 13:00:32 +02:00
|
|
|
obj = UserProfile(
|
|
|
|
avatar_source=avatar_source,
|
|
|
|
date_joined=date_joined,
|
|
|
|
delivery_email=delivery_email,
|
|
|
|
email=email,
|
|
|
|
full_name=full_name,
|
|
|
|
id=id,
|
2019-03-27 08:27:36 +01:00
|
|
|
is_mirror_dummy=is_mirror_dummy,
|
2018-09-28 23:49:59 +02:00
|
|
|
is_active=is_active,
|
2019-10-05 02:35:07 +02:00
|
|
|
role=role,
|
2018-09-28 13:00:32 +02:00
|
|
|
realm_id=realm_id,
|
|
|
|
timezone=timezone,
|
2021-06-23 14:01:26 +02:00
|
|
|
is_bot=is_bot,
|
|
|
|
bot_type=bot_type,
|
2018-09-28 13:00:32 +02:00
|
|
|
)
|
|
|
|
dct = model_to_dict(obj)
|
2020-07-16 14:10:43 +02:00
|
|
|
|
2021-02-12 08:20:45 +01:00
|
|
|
"""
|
2020-07-16 14:10:43 +02:00
|
|
|
Even though short_name is no longer in the Zulip
|
|
|
|
UserProfile, it's helpful to have it in our import
|
|
|
|
dictionaries for legacy reasons.
|
2021-02-12 08:20:45 +01:00
|
|
|
"""
|
|
|
|
dct["short_name"] = short_name
|
2018-09-28 13:00:32 +02:00
|
|
|
return dct
|
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
|
|
|
def build_avatar(
|
|
|
|
zulip_user_id: int,
|
|
|
|
realm_id: int,
|
|
|
|
email: str,
|
|
|
|
avatar_url: str,
|
|
|
|
timestamp: Any,
|
|
|
|
avatar_list: List[ZerverFieldsT],
|
|
|
|
) -> None:
|
2018-08-01 01:10:55 +02:00
|
|
|
avatar = dict(
|
2020-10-23 02:43:28 +02:00
|
|
|
path=avatar_url, # Save original avatar URL here, which is downloaded later
|
2018-08-01 01:10:55 +02:00
|
|
|
realm_id=realm_id,
|
|
|
|
content_type=None,
|
|
|
|
user_profile_id=zulip_user_id,
|
|
|
|
last_modified=timestamp,
|
|
|
|
user_profile_email=email,
|
|
|
|
s3_path="",
|
2021-02-12 08:19:30 +01:00
|
|
|
size="",
|
|
|
|
)
|
2018-08-01 01:10:55 +02:00
|
|
|
avatar_list.append(avatar)
|
2018-08-02 00:35:02 +02:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2018-10-23 16:53:09 +02:00
|
|
|
def make_subscriber_map(zerver_subscription: List[ZerverFieldsT]) -> Dict[int, Set[int]]:
|
2021-02-12 08:19:30 +01:00
|
|
|
"""
|
2018-10-23 16:53:09 +02:00
|
|
|
This can be convenient for building up UserMessage
|
|
|
|
rows.
|
2021-02-12 08:19:30 +01:00
|
|
|
"""
|
2020-09-02 08:14:51 +02:00
|
|
|
subscriber_map: Dict[int, Set[int]] = {}
|
2018-10-23 16:53:09 +02:00
|
|
|
for sub in zerver_subscription:
|
2021-02-12 08:20:45 +01:00
|
|
|
user_id = sub["user_profile"]
|
|
|
|
recipient_id = sub["recipient"]
|
2018-10-23 16:53:09 +02:00
|
|
|
if recipient_id not in subscriber_map:
|
|
|
|
subscriber_map[recipient_id] = set()
|
|
|
|
subscriber_map[recipient_id].add(user_id)
|
|
|
|
|
|
|
|
return subscriber_map
|
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
|
|
|
def make_user_messages(
|
|
|
|
zerver_message: List[ZerverFieldsT],
|
|
|
|
subscriber_map: Dict[int, Set[int]],
|
|
|
|
is_pm_data: bool,
|
|
|
|
mention_map: Dict[int, Set[int]],
|
2022-10-06 11:56:48 +02:00
|
|
|
wildcard_mention_map: Mapping[int, bool] = {},
|
2021-02-12 08:19:30 +01:00
|
|
|
) -> List[ZerverFieldsT]:
|
2019-03-26 12:46:29 +01:00
|
|
|
zerver_usermessage = []
|
|
|
|
|
|
|
|
for message in zerver_message:
|
2021-02-12 08:20:45 +01:00
|
|
|
message_id = message["id"]
|
|
|
|
recipient_id = message["recipient"]
|
|
|
|
sender_id = message["sender"]
|
2019-03-26 12:46:29 +01:00
|
|
|
mention_user_ids = mention_map[message_id]
|
2021-08-12 17:03:09 +02:00
|
|
|
wildcard_mention = wildcard_mention_map.get(message_id, False)
|
2019-03-26 12:46:29 +01:00
|
|
|
subscriber_ids = subscriber_map.get(recipient_id, set())
|
|
|
|
user_ids = subscriber_ids | {sender_id}
|
|
|
|
|
|
|
|
for user_id in user_ids:
|
|
|
|
is_mentioned = user_id in mention_user_ids
|
|
|
|
user_message = build_user_message(
|
|
|
|
user_id=user_id,
|
|
|
|
message_id=message_id,
|
|
|
|
is_private=is_pm_data,
|
|
|
|
is_mentioned=is_mentioned,
|
2021-08-12 17:03:09 +02:00
|
|
|
wildcard_mention=wildcard_mention,
|
2019-03-26 12:46:29 +01:00
|
|
|
)
|
|
|
|
zerver_usermessage.append(user_message)
|
|
|
|
|
|
|
|
return zerver_usermessage
|
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
|
|
|
def build_subscription(recipient_id: int, user_id: int, subscription_id: int) -> ZerverFieldsT:
|
2022-11-16 06:20:53 +01:00
|
|
|
subscription = Subscription(color=random.choice(STREAM_COLORS), id=subscription_id)
|
2021-02-12 08:20:45 +01:00
|
|
|
subscription_dict = model_to_dict(subscription, exclude=["user_profile", "recipient_id"])
|
|
|
|
subscription_dict["user_profile"] = user_id
|
|
|
|
subscription_dict["recipient"] = recipient_id
|
2018-08-17 00:51:32 +02:00
|
|
|
return subscription_dict
|
2018-08-02 00:35:02 +02:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2021-02-16 00:44:06 +01:00
|
|
|
class GetUsers(Protocol):
|
|
|
|
def __call__(self, stream_id: int = ..., huddle_id: int = ...) -> Set[int]:
|
|
|
|
...
|
|
|
|
|
|
|
|
|
2019-01-10 01:17:54 +01:00
|
|
|
def build_stream_subscriptions(
|
2021-02-16 00:44:06 +01:00
|
|
|
get_users: GetUsers,
|
2021-02-12 08:19:30 +01:00
|
|
|
zerver_recipient: List[ZerverFieldsT],
|
|
|
|
zerver_stream: List[ZerverFieldsT],
|
|
|
|
) -> List[ZerverFieldsT]:
|
python: Convert assignment type annotations to Python 3.6 style.
This commit was split by tabbott; this piece covers the vast majority
of files in Zulip, but excludes scripts/, tools/, and puppet/ to help
ensure we at least show the right error messages for Xenial systems.
We can likely further refine the remaining pieces with some testing.
Generated by com2ann, with whitespace fixes and various manual fixes
for runtime issues:
- invoiced_through: Optional[LicenseLedger] = models.ForeignKey(
+ invoiced_through: Optional["LicenseLedger"] = models.ForeignKey(
-_apns_client: Optional[APNsClient] = None
+_apns_client: Optional["APNsClient"] = None
- notifications_stream: Optional[Stream] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
- signup_notifications_stream: Optional[Stream] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
+ notifications_stream: Optional["Stream"] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
+ signup_notifications_stream: Optional["Stream"] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
- author: Optional[UserProfile] = models.ForeignKey('UserProfile', blank=True, null=True, on_delete=CASCADE)
+ author: Optional["UserProfile"] = models.ForeignKey('UserProfile', blank=True, null=True, on_delete=CASCADE)
- bot_owner: Optional[UserProfile] = models.ForeignKey('self', null=True, on_delete=models.SET_NULL)
+ bot_owner: Optional["UserProfile"] = models.ForeignKey('self', null=True, on_delete=models.SET_NULL)
- default_sending_stream: Optional[Stream] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
- default_events_register_stream: Optional[Stream] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
+ default_sending_stream: Optional["Stream"] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
+ default_events_register_stream: Optional["Stream"] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
-descriptors_by_handler_id: Dict[int, ClientDescriptor] = {}
+descriptors_by_handler_id: Dict[int, "ClientDescriptor"] = {}
-worker_classes: Dict[str, Type[QueueProcessingWorker]] = {}
-queues: Dict[str, Dict[str, Type[QueueProcessingWorker]]] = {}
+worker_classes: Dict[str, Type["QueueProcessingWorker"]] = {}
+queues: Dict[str, Dict[str, Type["QueueProcessingWorker"]]] = {}
-AUTH_LDAP_REVERSE_EMAIL_SEARCH: Optional[LDAPSearch] = None
+AUTH_LDAP_REVERSE_EMAIL_SEARCH: Optional["LDAPSearch"] = None
Signed-off-by: Anders Kaseorg <anders@zulipchat.com>
2020-04-22 01:09:50 +02:00
|
|
|
subscriptions: List[ZerverFieldsT] = []
|
2018-10-24 18:15:29 +02:00
|
|
|
|
2021-02-12 08:20:45 +01:00
|
|
|
stream_ids = {stream["id"] for stream in zerver_stream}
|
2018-10-24 18:15:29 +02:00
|
|
|
|
|
|
|
recipient_map = {
|
2021-02-12 08:20:45 +01:00
|
|
|
recipient["id"]: recipient["type_id"] # recipient_id -> stream_id
|
2018-10-24 18:15:29 +02:00
|
|
|
for recipient in zerver_recipient
|
2021-02-12 08:20:45 +01:00
|
|
|
if recipient["type"] == Recipient.STREAM and recipient["type_id"] in stream_ids
|
2018-10-24 18:15:29 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
for recipient_id, stream_id in recipient_map.items():
|
|
|
|
user_ids = get_users(stream_id=stream_id)
|
|
|
|
for user_id in user_ids:
|
|
|
|
subscription = build_subscription(
|
|
|
|
recipient_id=recipient_id,
|
|
|
|
user_id=user_id,
|
2021-02-12 08:20:45 +01:00
|
|
|
subscription_id=NEXT_ID("subscription"),
|
2018-09-28 13:00:32 +02:00
|
|
|
)
|
|
|
|
subscriptions.append(subscription)
|
|
|
|
|
2018-10-23 23:56:14 +02:00
|
|
|
return subscriptions
|
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2019-09-25 15:46:05 +02:00
|
|
|
def build_huddle_subscriptions(
|
2021-02-16 00:44:06 +01:00
|
|
|
get_users: GetUsers,
|
2021-02-12 08:19:30 +01:00
|
|
|
zerver_recipient: List[ZerverFieldsT],
|
|
|
|
zerver_huddle: List[ZerverFieldsT],
|
|
|
|
) -> List[ZerverFieldsT]:
|
python: Convert assignment type annotations to Python 3.6 style.
This commit was split by tabbott; this piece covers the vast majority
of files in Zulip, but excludes scripts/, tools/, and puppet/ to help
ensure we at least show the right error messages for Xenial systems.
We can likely further refine the remaining pieces with some testing.
Generated by com2ann, with whitespace fixes and various manual fixes
for runtime issues:
- invoiced_through: Optional[LicenseLedger] = models.ForeignKey(
+ invoiced_through: Optional["LicenseLedger"] = models.ForeignKey(
-_apns_client: Optional[APNsClient] = None
+_apns_client: Optional["APNsClient"] = None
- notifications_stream: Optional[Stream] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
- signup_notifications_stream: Optional[Stream] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
+ notifications_stream: Optional["Stream"] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
+ signup_notifications_stream: Optional["Stream"] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
- author: Optional[UserProfile] = models.ForeignKey('UserProfile', blank=True, null=True, on_delete=CASCADE)
+ author: Optional["UserProfile"] = models.ForeignKey('UserProfile', blank=True, null=True, on_delete=CASCADE)
- bot_owner: Optional[UserProfile] = models.ForeignKey('self', null=True, on_delete=models.SET_NULL)
+ bot_owner: Optional["UserProfile"] = models.ForeignKey('self', null=True, on_delete=models.SET_NULL)
- default_sending_stream: Optional[Stream] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
- default_events_register_stream: Optional[Stream] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
+ default_sending_stream: Optional["Stream"] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
+ default_events_register_stream: Optional["Stream"] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
-descriptors_by_handler_id: Dict[int, ClientDescriptor] = {}
+descriptors_by_handler_id: Dict[int, "ClientDescriptor"] = {}
-worker_classes: Dict[str, Type[QueueProcessingWorker]] = {}
-queues: Dict[str, Dict[str, Type[QueueProcessingWorker]]] = {}
+worker_classes: Dict[str, Type["QueueProcessingWorker"]] = {}
+queues: Dict[str, Dict[str, Type["QueueProcessingWorker"]]] = {}
-AUTH_LDAP_REVERSE_EMAIL_SEARCH: Optional[LDAPSearch] = None
+AUTH_LDAP_REVERSE_EMAIL_SEARCH: Optional["LDAPSearch"] = None
Signed-off-by: Anders Kaseorg <anders@zulipchat.com>
2020-04-22 01:09:50 +02:00
|
|
|
subscriptions: List[ZerverFieldsT] = []
|
2019-09-25 15:46:05 +02:00
|
|
|
|
2021-02-12 08:20:45 +01:00
|
|
|
huddle_ids = {huddle["id"] for huddle in zerver_huddle}
|
2019-09-25 15:46:05 +02:00
|
|
|
|
|
|
|
recipient_map = {
|
2021-02-12 08:20:45 +01:00
|
|
|
recipient["id"]: recipient["type_id"] # recipient_id -> stream_id
|
2019-09-25 15:46:05 +02:00
|
|
|
for recipient in zerver_recipient
|
2021-02-12 08:20:45 +01:00
|
|
|
if recipient["type"] == Recipient.HUDDLE and recipient["type_id"] in huddle_ids
|
2019-09-25 15:46:05 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
for recipient_id, huddle_id in recipient_map.items():
|
|
|
|
user_ids = get_users(huddle_id=huddle_id)
|
|
|
|
for user_id in user_ids:
|
|
|
|
subscription = build_subscription(
|
|
|
|
recipient_id=recipient_id,
|
|
|
|
user_id=user_id,
|
2021-02-12 08:20:45 +01:00
|
|
|
subscription_id=NEXT_ID("subscription"),
|
2019-09-25 15:46:05 +02:00
|
|
|
)
|
|
|
|
subscriptions.append(subscription)
|
|
|
|
|
|
|
|
return subscriptions
|
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2018-10-23 23:56:14 +02:00
|
|
|
def build_personal_subscriptions(zerver_recipient: List[ZerverFieldsT]) -> List[ZerverFieldsT]:
|
python: Convert assignment type annotations to Python 3.6 style.
This commit was split by tabbott; this piece covers the vast majority
of files in Zulip, but excludes scripts/, tools/, and puppet/ to help
ensure we at least show the right error messages for Xenial systems.
We can likely further refine the remaining pieces with some testing.
Generated by com2ann, with whitespace fixes and various manual fixes
for runtime issues:
- invoiced_through: Optional[LicenseLedger] = models.ForeignKey(
+ invoiced_through: Optional["LicenseLedger"] = models.ForeignKey(
-_apns_client: Optional[APNsClient] = None
+_apns_client: Optional["APNsClient"] = None
- notifications_stream: Optional[Stream] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
- signup_notifications_stream: Optional[Stream] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
+ notifications_stream: Optional["Stream"] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
+ signup_notifications_stream: Optional["Stream"] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
- author: Optional[UserProfile] = models.ForeignKey('UserProfile', blank=True, null=True, on_delete=CASCADE)
+ author: Optional["UserProfile"] = models.ForeignKey('UserProfile', blank=True, null=True, on_delete=CASCADE)
- bot_owner: Optional[UserProfile] = models.ForeignKey('self', null=True, on_delete=models.SET_NULL)
+ bot_owner: Optional["UserProfile"] = models.ForeignKey('self', null=True, on_delete=models.SET_NULL)
- default_sending_stream: Optional[Stream] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
- default_events_register_stream: Optional[Stream] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
+ default_sending_stream: Optional["Stream"] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
+ default_events_register_stream: Optional["Stream"] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
-descriptors_by_handler_id: Dict[int, ClientDescriptor] = {}
+descriptors_by_handler_id: Dict[int, "ClientDescriptor"] = {}
-worker_classes: Dict[str, Type[QueueProcessingWorker]] = {}
-queues: Dict[str, Dict[str, Type[QueueProcessingWorker]]] = {}
+worker_classes: Dict[str, Type["QueueProcessingWorker"]] = {}
+queues: Dict[str, Dict[str, Type["QueueProcessingWorker"]]] = {}
-AUTH_LDAP_REVERSE_EMAIL_SEARCH: Optional[LDAPSearch] = None
+AUTH_LDAP_REVERSE_EMAIL_SEARCH: Optional["LDAPSearch"] = None
Signed-off-by: Anders Kaseorg <anders@zulipchat.com>
2020-04-22 01:09:50 +02:00
|
|
|
subscriptions: List[ZerverFieldsT] = []
|
2018-10-23 23:56:14 +02:00
|
|
|
|
2018-09-28 13:00:32 +02:00
|
|
|
personal_recipients = [
|
2021-02-12 08:20:45 +01:00
|
|
|
recipient for recipient in zerver_recipient if recipient["type"] == Recipient.PERSONAL
|
2018-09-28 13:00:32 +02:00
|
|
|
]
|
|
|
|
|
|
|
|
for recipient in personal_recipients:
|
2021-02-12 08:20:45 +01:00
|
|
|
recipient_id = recipient["id"]
|
|
|
|
user_id = recipient["type_id"]
|
2018-09-28 13:00:32 +02:00
|
|
|
subscription = build_subscription(
|
|
|
|
recipient_id=recipient_id,
|
|
|
|
user_id=user_id,
|
2021-02-12 08:20:45 +01:00
|
|
|
subscription_id=NEXT_ID("subscription"),
|
2018-09-28 13:00:32 +02:00
|
|
|
)
|
|
|
|
subscriptions.append(subscription)
|
|
|
|
|
|
|
|
return subscriptions
|
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2018-08-02 00:35:02 +02:00
|
|
|
def build_recipient(type_id: int, recipient_id: int, type: int) -> ZerverFieldsT:
|
2018-08-17 00:51:32 +02:00
|
|
|
recipient = Recipient(
|
2018-08-02 00:35:02 +02:00
|
|
|
type_id=type_id, # stream id
|
|
|
|
id=recipient_id,
|
2020-09-02 02:50:08 +02:00
|
|
|
type=type,
|
|
|
|
)
|
2018-08-17 00:51:32 +02:00
|
|
|
recipient_dict = model_to_dict(recipient)
|
|
|
|
return recipient_dict
|
2018-08-02 15:24:34 +02:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
|
|
|
def build_recipients(
|
|
|
|
zerver_userprofile: Iterable[ZerverFieldsT],
|
|
|
|
zerver_stream: Iterable[ZerverFieldsT],
|
|
|
|
zerver_huddle: Iterable[ZerverFieldsT] = [],
|
|
|
|
) -> List[ZerverFieldsT]:
|
|
|
|
"""
|
2020-12-23 10:00:24 +01:00
|
|
|
This function was only used HipChat import, this function may be
|
|
|
|
required for future conversions. The Slack and Gitter conversions do it more
|
2018-09-28 13:00:32 +02:00
|
|
|
tightly integrated with creating other objects.
|
2021-02-12 08:19:30 +01:00
|
|
|
"""
|
2018-09-28 13:00:32 +02:00
|
|
|
|
|
|
|
recipients = []
|
|
|
|
|
|
|
|
for user in zerver_userprofile:
|
2021-02-12 08:20:45 +01:00
|
|
|
type_id = user["id"]
|
2018-09-28 13:00:32 +02:00
|
|
|
type = Recipient.PERSONAL
|
|
|
|
recipient = Recipient(
|
|
|
|
type_id=type_id,
|
2021-02-12 08:20:45 +01:00
|
|
|
id=NEXT_ID("recipient"),
|
2018-09-28 13:00:32 +02:00
|
|
|
type=type,
|
|
|
|
)
|
|
|
|
recipient_dict = model_to_dict(recipient)
|
|
|
|
recipients.append(recipient_dict)
|
|
|
|
|
|
|
|
for stream in zerver_stream:
|
2021-02-12 08:20:45 +01:00
|
|
|
type_id = stream["id"]
|
2018-09-28 13:00:32 +02:00
|
|
|
type = Recipient.STREAM
|
|
|
|
recipient = Recipient(
|
|
|
|
type_id=type_id,
|
2021-02-12 08:20:45 +01:00
|
|
|
id=NEXT_ID("recipient"),
|
2018-09-28 13:00:32 +02:00
|
|
|
type=type,
|
|
|
|
)
|
|
|
|
recipient_dict = model_to_dict(recipient)
|
|
|
|
recipients.append(recipient_dict)
|
|
|
|
|
2019-09-25 15:46:05 +02:00
|
|
|
for huddle in zerver_huddle:
|
2021-02-12 08:20:45 +01:00
|
|
|
type_id = huddle["id"]
|
2019-09-25 15:46:05 +02:00
|
|
|
type = Recipient.HUDDLE
|
|
|
|
recipient = Recipient(
|
|
|
|
type_id=type_id,
|
2021-02-12 08:20:45 +01:00
|
|
|
id=NEXT_ID("recipient"),
|
2019-09-25 15:46:05 +02:00
|
|
|
type=type,
|
|
|
|
)
|
|
|
|
recipient_dict = model_to_dict(recipient)
|
|
|
|
recipients.append(recipient_dict)
|
2018-09-28 13:00:32 +02:00
|
|
|
return recipients
|
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
|
|
|
def build_realm(
|
|
|
|
zerver_realm: List[ZerverFieldsT], realm_id: int, domain_name: str
|
|
|
|
) -> ZerverFieldsT:
|
|
|
|
realm = dict(
|
|
|
|
zerver_client=[
|
|
|
|
{"name": "populate_db", "id": 1},
|
|
|
|
{"name": "website", "id": 2},
|
|
|
|
{"name": "API", "id": 3},
|
|
|
|
],
|
|
|
|
zerver_customprofilefield=[],
|
|
|
|
zerver_customprofilefieldvalue=[],
|
|
|
|
zerver_userpresence=[], # shows last logged in data, which is not available
|
|
|
|
zerver_userprofile_mirrordummy=[],
|
|
|
|
zerver_realmdomain=[
|
|
|
|
{"realm": realm_id, "allow_subdomains": False, "domain": domain_name, "id": realm_id}
|
|
|
|
],
|
|
|
|
zerver_useractivity=[],
|
|
|
|
zerver_realm=zerver_realm,
|
|
|
|
zerver_huddle=[],
|
|
|
|
zerver_userprofile_crossrealm=[],
|
|
|
|
zerver_useractivityinterval=[],
|
|
|
|
zerver_reaction=[],
|
|
|
|
zerver_realmemoji=[],
|
|
|
|
zerver_realmfilter=[],
|
2021-02-14 12:07:09 +01:00
|
|
|
zerver_realmplayground=[],
|
2023-04-16 21:53:22 +02:00
|
|
|
zerver_realmauthenticationmethod=[
|
|
|
|
{"realm": realm_id, "name": name, "id": i}
|
|
|
|
for i, name in enumerate(all_implemented_backend_names(), start=1)
|
|
|
|
],
|
2021-02-12 08:19:30 +01:00
|
|
|
)
|
2018-08-10 02:27:17 +02:00
|
|
|
return realm
|
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
|
|
|
def build_usermessages(
|
|
|
|
zerver_usermessage: List[ZerverFieldsT],
|
|
|
|
subscriber_map: Dict[int, Set[int]],
|
|
|
|
recipient_id: int,
|
|
|
|
mentioned_user_ids: List[int],
|
|
|
|
message_id: int,
|
|
|
|
is_private: bool,
|
|
|
|
long_term_idle: AbstractSet[int] = set(),
|
|
|
|
) -> Tuple[int, int]:
|
2018-10-25 17:33:52 +02:00
|
|
|
user_ids = subscriber_map.get(recipient_id, set())
|
|
|
|
|
2018-12-17 02:26:51 +01:00
|
|
|
user_messages_created = 0
|
|
|
|
user_messages_skipped = 0
|
2018-10-25 17:33:52 +02:00
|
|
|
if user_ids:
|
|
|
|
for user_id in sorted(user_ids):
|
|
|
|
is_mentioned = user_id in mentioned_user_ids
|
2018-10-25 16:33:50 +02:00
|
|
|
|
2018-12-17 02:26:51 +01:00
|
|
|
if not is_mentioned and not is_private and user_id in long_term_idle:
|
|
|
|
# these users are long-term idle
|
|
|
|
user_messages_skipped += 1
|
|
|
|
continue
|
|
|
|
user_messages_created += 1
|
|
|
|
|
2018-10-25 16:33:50 +02:00
|
|
|
usermessage = build_user_message(
|
|
|
|
user_id=user_id,
|
|
|
|
message_id=message_id,
|
|
|
|
is_private=is_private,
|
|
|
|
is_mentioned=is_mentioned,
|
|
|
|
)
|
|
|
|
|
2018-08-03 21:11:47 +02:00
|
|
|
zerver_usermessage.append(usermessage)
|
2018-12-17 02:26:51 +01:00
|
|
|
return (user_messages_created, user_messages_skipped)
|
2018-08-03 21:11:47 +02:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
|
|
|
def build_user_message(
|
2021-08-12 17:03:09 +02:00
|
|
|
user_id: int,
|
|
|
|
message_id: int,
|
|
|
|
is_private: bool,
|
|
|
|
is_mentioned: bool,
|
|
|
|
wildcard_mention: bool = False,
|
2021-02-12 08:19:30 +01:00
|
|
|
) -> ZerverFieldsT:
|
2018-09-29 21:11:22 +02:00
|
|
|
flags_mask = 1 # For read
|
|
|
|
if is_mentioned:
|
|
|
|
flags_mask += 8 # For mentioned
|
2021-08-12 17:03:09 +02:00
|
|
|
if wildcard_mention:
|
|
|
|
flags_mask += 16
|
2018-10-25 15:58:53 +02:00
|
|
|
if is_private:
|
|
|
|
flags_mask += 2048 # For is_private
|
2018-09-29 21:11:22 +02:00
|
|
|
|
2021-02-12 08:20:45 +01:00
|
|
|
id = NEXT_ID("user_message")
|
2018-10-25 16:33:50 +02:00
|
|
|
|
2018-09-29 21:11:22 +02:00
|
|
|
usermessage = dict(
|
|
|
|
id=id,
|
|
|
|
user_profile=user_id,
|
|
|
|
message=message_id,
|
|
|
|
flags_mask=flags_mask,
|
|
|
|
)
|
|
|
|
return usermessage
|
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
|
|
|
def build_defaultstream(realm_id: int, stream_id: int, defaultstream_id: int) -> ZerverFieldsT:
|
|
|
|
defaultstream = dict(stream=stream_id, realm=realm_id, id=defaultstream_id)
|
2018-08-03 21:19:47 +02:00
|
|
|
return defaultstream
|
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
|
|
|
def build_stream(
|
|
|
|
date_created: Any,
|
|
|
|
realm_id: int,
|
|
|
|
name: str,
|
|
|
|
description: str,
|
|
|
|
stream_id: int,
|
|
|
|
deactivated: bool = False,
|
|
|
|
invite_only: bool = False,
|
2021-06-23 14:01:26 +02:00
|
|
|
stream_post_policy: int = 1,
|
2021-02-12 08:19:30 +01:00
|
|
|
) -> ZerverFieldsT:
|
2022-04-27 18:05:48 +02:00
|
|
|
# Other applications don't have the distinction of "private stream with public history"
|
|
|
|
# vs "private stream with hidden history" - and we've traditionally imported private "streams"
|
|
|
|
# of other products as private streams with hidden history.
|
|
|
|
# So we can set the history_public_to_subscribers value based on the invite_only flag.
|
|
|
|
history_public_to_subscribers = not invite_only
|
|
|
|
|
2018-08-17 20:17:28 +02:00
|
|
|
stream = Stream(
|
|
|
|
name=name,
|
|
|
|
deactivated=deactivated,
|
2019-02-20 21:09:21 +01:00
|
|
|
description=description.replace("\n", " "),
|
2019-01-11 13:48:22 +01:00
|
|
|
# We don't set rendered_description here; it'll be added on import
|
2018-08-17 20:17:28 +02:00
|
|
|
date_created=date_created,
|
|
|
|
invite_only=invite_only,
|
2021-02-12 08:19:30 +01:00
|
|
|
id=stream_id,
|
2021-06-23 14:01:26 +02:00
|
|
|
stream_post_policy=stream_post_policy,
|
2022-04-27 18:05:48 +02:00
|
|
|
history_public_to_subscribers=history_public_to_subscribers,
|
2021-02-12 08:19:30 +01:00
|
|
|
)
|
2021-02-12 08:20:45 +01:00
|
|
|
stream_dict = model_to_dict(stream, exclude=["realm"])
|
|
|
|
stream_dict["realm"] = realm_id
|
2018-08-17 20:17:28 +02:00
|
|
|
return stream_dict
|
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2019-07-02 07:41:51 +02:00
|
|
|
def build_huddle(huddle_id: int) -> ZerverFieldsT:
|
|
|
|
huddle = Huddle(
|
python: Use trailing commas consistently.
Automatically generated by the following script, based on the output
of lint with flake8-comma:
import re
import sys
last_filename = None
last_row = None
lines = []
for msg in sys.stdin:
m = re.match(
r"\x1b\[35mflake8 \|\x1b\[0m \x1b\[1;31m(.+):(\d+):(\d+): (\w+)", msg
)
if m:
filename, row_str, col_str, err = m.groups()
row, col = int(row_str), int(col_str)
if filename == last_filename:
assert last_row != row
else:
if last_filename is not None:
with open(last_filename, "w") as f:
f.writelines(lines)
with open(filename) as f:
lines = f.readlines()
last_filename = filename
last_row = row
line = lines[row - 1]
if err in ["C812", "C815"]:
lines[row - 1] = line[: col - 1] + "," + line[col - 1 :]
elif err in ["C819"]:
assert line[col - 2] == ","
lines[row - 1] = line[: col - 2] + line[col - 1 :].lstrip(" ")
if last_filename is not None:
with open(last_filename, "w") as f:
f.writelines(lines)
Signed-off-by: Anders Kaseorg <anders@zulipchat.com>
2020-04-10 05:23:40 +02:00
|
|
|
id=huddle_id,
|
2019-07-02 07:41:51 +02:00
|
|
|
)
|
|
|
|
return model_to_dict(huddle)
|
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
|
|
|
def build_message(
|
2022-09-27 19:53:26 +02:00
|
|
|
*,
|
2021-02-12 08:19:30 +01:00
|
|
|
topic_name: str,
|
|
|
|
date_sent: float,
|
|
|
|
message_id: int,
|
|
|
|
content: str,
|
|
|
|
rendered_content: Optional[str],
|
|
|
|
user_id: int,
|
|
|
|
recipient_id: int,
|
2022-09-27 21:42:31 +02:00
|
|
|
realm_id: int,
|
2021-02-12 08:19:30 +01:00
|
|
|
has_image: bool = False,
|
|
|
|
has_link: bool = False,
|
|
|
|
has_attachment: bool = True,
|
|
|
|
) -> ZerverFieldsT:
|
2018-08-17 23:47:37 +02:00
|
|
|
zulip_message = Message(
|
|
|
|
rendered_content_version=1, # this is Zulip specific
|
|
|
|
id=message_id,
|
|
|
|
content=content,
|
|
|
|
rendered_content=rendered_content,
|
|
|
|
has_image=has_image,
|
|
|
|
has_attachment=has_attachment,
|
2021-02-12 08:19:30 +01:00
|
|
|
has_link=has_link,
|
|
|
|
)
|
2018-11-10 17:10:45 +01:00
|
|
|
zulip_message.set_topic_name(topic_name)
|
2021-02-12 08:19:30 +01:00
|
|
|
zulip_message_dict = model_to_dict(
|
2021-02-12 08:20:45 +01:00
|
|
|
zulip_message, exclude=["recipient", "sender", "sending_client"]
|
2021-02-12 08:19:30 +01:00
|
|
|
)
|
2021-02-12 08:20:45 +01:00
|
|
|
zulip_message_dict["sender"] = user_id
|
|
|
|
zulip_message_dict["sending_client"] = 1
|
|
|
|
zulip_message_dict["recipient"] = recipient_id
|
2022-08-01 17:26:18 +02:00
|
|
|
zulip_message_dict["date_sent"] = date_sent
|
2018-08-17 23:47:37 +02:00
|
|
|
|
|
|
|
return zulip_message_dict
|
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
|
|
|
def build_attachment(
|
|
|
|
realm_id: int,
|
|
|
|
message_ids: Set[int],
|
|
|
|
user_id: int,
|
|
|
|
fileinfo: ZerverFieldsT,
|
|
|
|
s3_path: str,
|
|
|
|
zerver_attachment: List[ZerverFieldsT],
|
|
|
|
) -> None:
|
2018-08-03 22:33:41 +02:00
|
|
|
"""
|
|
|
|
This function should be passed a 'fileinfo' dictionary, which contains
|
|
|
|
information about 'size', 'created' (created time) and ['name'] (filename).
|
|
|
|
"""
|
2021-02-12 08:20:45 +01:00
|
|
|
attachment_id = NEXT_ID("attachment")
|
2018-10-28 18:32:15 +01:00
|
|
|
|
2018-08-17 02:21:52 +02:00
|
|
|
attachment = Attachment(
|
2018-08-03 22:33:41 +02:00
|
|
|
id=attachment_id,
|
2021-02-12 08:20:45 +01:00
|
|
|
size=fileinfo["size"],
|
|
|
|
create_time=fileinfo["created"],
|
2018-08-03 22:33:41 +02:00
|
|
|
is_realm_public=True,
|
|
|
|
path_id=s3_path,
|
2021-02-12 08:20:45 +01:00
|
|
|
file_name=fileinfo["name"],
|
2021-02-12 08:19:30 +01:00
|
|
|
)
|
2018-08-17 02:21:52 +02:00
|
|
|
|
2021-02-12 08:20:45 +01:00
|
|
|
attachment_dict = model_to_dict(attachment, exclude=["owner", "messages", "realm"])
|
|
|
|
attachment_dict["owner"] = user_id
|
|
|
|
attachment_dict["messages"] = list(message_ids)
|
|
|
|
attachment_dict["realm"] = realm_id
|
2018-08-17 02:21:52 +02:00
|
|
|
|
|
|
|
zerver_attachment.append(attachment_dict)
|
2018-08-03 22:33:41 +02:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2019-07-28 01:04:47 +02:00
|
|
|
def get_avatar(avatar_dir: str, size_url_suffix: str, avatar_upload_item: List[str]) -> None:
|
|
|
|
avatar_url = avatar_upload_item[0]
|
|
|
|
|
|
|
|
image_path = os.path.join(avatar_dir, avatar_upload_item[1])
|
|
|
|
original_image_path = os.path.join(avatar_dir, avatar_upload_item[2])
|
|
|
|
|
|
|
|
response = requests.get(avatar_url + size_url_suffix, stream=True)
|
2021-02-12 08:20:45 +01:00
|
|
|
with open(image_path, "wb") as image_file:
|
2019-07-28 01:04:47 +02:00
|
|
|
shutil.copyfileobj(response.raw, image_file)
|
|
|
|
shutil.copy(image_path, original_image_path)
|
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
|
|
|
def process_avatars(
|
|
|
|
avatar_list: List[ZerverFieldsT],
|
|
|
|
avatar_dir: str,
|
|
|
|
realm_id: int,
|
|
|
|
threads: int,
|
2021-02-12 08:20:45 +01:00
|
|
|
size_url_suffix: str = "",
|
2021-02-12 08:19:30 +01:00
|
|
|
) -> List[ZerverFieldsT]:
|
2018-08-02 15:24:34 +02:00
|
|
|
"""
|
|
|
|
This function gets the avatar of the user and saves it in the
|
|
|
|
user's avatar directory with both the extensions '.png' and '.original'
|
|
|
|
Required parameters:
|
|
|
|
|
|
|
|
1. avatar_list: List of avatars to be mapped in avatars records.json file
|
|
|
|
2. avatar_dir: Folder where the downloaded avatars are saved
|
|
|
|
3. realm_id: Realm ID.
|
2018-09-29 03:11:55 +02:00
|
|
|
|
|
|
|
We use this for Slack and Gitter conversions, where avatars need to be
|
|
|
|
downloaded. For simpler conversions see write_avatar_png.
|
2018-08-02 15:24:34 +02:00
|
|
|
"""
|
|
|
|
|
2021-02-12 08:20:45 +01:00
|
|
|
logging.info("######### GETTING AVATARS #########\n")
|
|
|
|
logging.info("DOWNLOADING AVATARS .......\n")
|
2018-08-02 15:24:34 +02:00
|
|
|
avatar_original_list = []
|
|
|
|
avatar_upload_list = []
|
|
|
|
for avatar in avatar_list:
|
2021-02-12 08:20:45 +01:00
|
|
|
avatar_hash = user_avatar_path_from_ids(avatar["user_profile_id"], realm_id)
|
|
|
|
avatar_url = avatar["path"]
|
2018-08-02 15:24:34 +02:00
|
|
|
avatar_original = dict(avatar)
|
|
|
|
|
2021-02-12 08:20:45 +01:00
|
|
|
image_path = f"{avatar_hash}.png"
|
|
|
|
original_image_path = f"{avatar_hash}.original"
|
2018-08-02 15:24:34 +02:00
|
|
|
|
|
|
|
avatar_upload_list.append([avatar_url, image_path, original_image_path])
|
|
|
|
# We don't add the size field here in avatar's records.json,
|
|
|
|
# since the metadata is not needed on the import end, and we
|
|
|
|
# don't have it until we've downloaded the files anyway.
|
2021-02-12 08:20:45 +01:00
|
|
|
avatar["path"] = image_path
|
|
|
|
avatar["s3_path"] = image_path
|
2018-08-02 15:24:34 +02:00
|
|
|
|
2021-02-12 08:20:45 +01:00
|
|
|
avatar_original["path"] = original_image_path
|
|
|
|
avatar_original["s3_path"] = original_image_path
|
2018-08-02 15:24:34 +02:00
|
|
|
avatar_original_list.append(avatar_original)
|
|
|
|
|
2020-03-28 01:25:56 +01:00
|
|
|
# Run downloads in parallel
|
2021-02-12 08:19:30 +01:00
|
|
|
run_parallel_wrapper(
|
|
|
|
partial(get_avatar, avatar_dir, size_url_suffix), avatar_upload_list, threads=threads
|
|
|
|
)
|
2018-08-02 15:24:34 +02:00
|
|
|
|
2021-02-12 08:20:45 +01:00
|
|
|
logging.info("######### GETTING AVATARS FINISHED #########\n")
|
2018-08-02 15:24:34 +02:00
|
|
|
return avatar_list + avatar_original_list
|
2018-08-03 19:15:01 +02:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2021-02-12 08:20:45 +01:00
|
|
|
ListJobData = TypeVar("ListJobData")
|
2021-02-12 08:19:30 +01:00
|
|
|
|
|
|
|
|
2019-07-28 01:04:47 +02:00
|
|
|
def wrapping_function(f: Callable[[ListJobData], None], item: ListJobData) -> None:
|
|
|
|
try:
|
|
|
|
f(item)
|
|
|
|
except Exception:
|
|
|
|
logging.exception("Error processing item: %s", item, stack_info=True)
|
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
|
|
|
def run_parallel_wrapper(
|
|
|
|
f: Callable[[ListJobData], None], full_items: List[ListJobData], threads: int = 6
|
|
|
|
) -> None:
|
2020-05-02 08:44:14 +02:00
|
|
|
logging.info("Distributing %s items across %s threads", len(full_items), threads)
|
2018-12-01 01:15:55 +01:00
|
|
|
|
2022-07-29 08:54:11 +02:00
|
|
|
with ProcessPoolExecutor(max_workers=threads) as executor:
|
2018-12-01 01:15:55 +01:00
|
|
|
count = 0
|
2022-07-29 08:54:11 +02:00
|
|
|
for future in as_completed(
|
|
|
|
executor.submit(wrapping_function, f, item) for item in full_items
|
|
|
|
):
|
|
|
|
future.result()
|
2018-12-01 01:15:55 +01:00
|
|
|
count += 1
|
|
|
|
if count % 1000 == 0:
|
2019-07-28 01:04:47 +02:00
|
|
|
logging.info("Finished %s items", count)
|
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2019-07-28 01:04:47 +02:00
|
|
|
def get_uploads(upload_dir: str, upload: List[str]) -> None:
|
|
|
|
upload_url = upload[0]
|
|
|
|
upload_path = upload[1]
|
|
|
|
upload_path = os.path.join(upload_dir, upload_path)
|
|
|
|
|
|
|
|
response = requests.get(upload_url, stream=True)
|
|
|
|
os.makedirs(os.path.dirname(upload_path), exist_ok=True)
|
2021-02-12 08:20:45 +01:00
|
|
|
with open(upload_path, "wb") as upload_file:
|
2019-07-28 01:04:47 +02:00
|
|
|
shutil.copyfileobj(response.raw, upload_file)
|
2018-12-01 01:15:55 +01:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
|
|
|
def process_uploads(
|
|
|
|
upload_list: List[ZerverFieldsT], upload_dir: str, threads: int
|
|
|
|
) -> List[ZerverFieldsT]:
|
2018-08-03 19:15:01 +02:00
|
|
|
"""
|
|
|
|
This function downloads the uploads and saves it in the realm's upload directory.
|
|
|
|
Required parameters:
|
|
|
|
|
|
|
|
1. upload_list: List of uploads to be mapped in uploads records.json file
|
|
|
|
2. upload_dir: Folder where the downloaded uploads are saved
|
|
|
|
"""
|
2021-02-12 08:20:45 +01:00
|
|
|
logging.info("######### GETTING ATTACHMENTS #########\n")
|
|
|
|
logging.info("DOWNLOADING ATTACHMENTS .......\n")
|
2018-08-03 19:15:01 +02:00
|
|
|
upload_url_list = []
|
|
|
|
for upload in upload_list:
|
2021-02-12 08:20:45 +01:00
|
|
|
upload_url = upload["path"]
|
|
|
|
upload_s3_path = upload["s3_path"]
|
2018-08-03 19:15:01 +02:00
|
|
|
upload_url_list.append([upload_url, upload_s3_path])
|
2021-02-12 08:20:45 +01:00
|
|
|
upload["path"] = upload_s3_path
|
2018-08-03 19:15:01 +02:00
|
|
|
|
2020-03-28 01:25:56 +01:00
|
|
|
# Run downloads in parallel
|
2019-07-28 01:04:47 +02:00
|
|
|
run_parallel_wrapper(partial(get_uploads, upload_dir), upload_url_list, threads=threads)
|
2018-08-03 19:15:01 +02:00
|
|
|
|
2021-02-12 08:20:45 +01:00
|
|
|
logging.info("######### GETTING ATTACHMENTS FINISHED #########\n")
|
2018-08-03 19:15:01 +02:00
|
|
|
return upload_list
|
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
|
|
|
def build_realm_emoji(realm_id: int, name: str, id: int, file_name: str) -> ZerverFieldsT:
|
2018-09-29 15:43:53 +02:00
|
|
|
return model_to_dict(
|
|
|
|
RealmEmoji(
|
|
|
|
realm_id=realm_id,
|
|
|
|
name=name,
|
|
|
|
id=id,
|
|
|
|
file_name=file_name,
|
python: Use trailing commas consistently.
Automatically generated by the following script, based on the output
of lint with flake8-comma:
import re
import sys
last_filename = None
last_row = None
lines = []
for msg in sys.stdin:
m = re.match(
r"\x1b\[35mflake8 \|\x1b\[0m \x1b\[1;31m(.+):(\d+):(\d+): (\w+)", msg
)
if m:
filename, row_str, col_str, err = m.groups()
row, col = int(row_str), int(col_str)
if filename == last_filename:
assert last_row != row
else:
if last_filename is not None:
with open(last_filename, "w") as f:
f.writelines(lines)
with open(filename) as f:
lines = f.readlines()
last_filename = filename
last_row = row
line = lines[row - 1]
if err in ["C812", "C815"]:
lines[row - 1] = line[: col - 1] + "," + line[col - 1 :]
elif err in ["C819"]:
assert line[col - 2] == ","
lines[row - 1] = line[: col - 2] + line[col - 1 :].lstrip(" ")
if last_filename is not None:
with open(last_filename, "w") as f:
f.writelines(lines)
Signed-off-by: Anders Kaseorg <anders@zulipchat.com>
2020-04-10 05:23:40 +02:00
|
|
|
),
|
2018-09-29 15:43:53 +02:00
|
|
|
)
|
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2019-07-28 01:04:47 +02:00
|
|
|
def get_emojis(emoji_dir: str, upload: List[str]) -> None:
|
|
|
|
emoji_url = upload[0]
|
|
|
|
emoji_path = upload[1]
|
|
|
|
upload_emoji_path = os.path.join(emoji_dir, emoji_path)
|
|
|
|
|
|
|
|
response = requests.get(emoji_url, stream=True)
|
|
|
|
os.makedirs(os.path.dirname(upload_emoji_path), exist_ok=True)
|
2021-02-12 08:20:45 +01:00
|
|
|
with open(upload_emoji_path, "wb") as emoji_file:
|
2019-07-28 01:04:47 +02:00
|
|
|
shutil.copyfileobj(response.raw, emoji_file)
|
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
|
|
|
def process_emojis(
|
|
|
|
zerver_realmemoji: List[ZerverFieldsT],
|
|
|
|
emoji_dir: str,
|
|
|
|
emoji_url_map: ZerverFieldsT,
|
|
|
|
threads: int,
|
|
|
|
) -> List[ZerverFieldsT]:
|
2018-08-03 19:15:01 +02:00
|
|
|
"""
|
|
|
|
This function downloads the custom emojis and saves in the output emoji folder.
|
|
|
|
Required parameters:
|
|
|
|
|
|
|
|
1. zerver_realmemoji: List of all RealmEmoji objects to be imported
|
|
|
|
2. emoji_dir: Folder where the downloaded emojis are saved
|
|
|
|
3. emoji_url_map: Maps emoji name to its url
|
|
|
|
"""
|
|
|
|
emoji_records = []
|
|
|
|
upload_emoji_list = []
|
2021-02-12 08:20:45 +01:00
|
|
|
logging.info("######### GETTING EMOJIS #########\n")
|
|
|
|
logging.info("DOWNLOADING EMOJIS .......\n")
|
2018-08-03 19:15:01 +02:00
|
|
|
for emoji in zerver_realmemoji:
|
2021-02-12 08:20:45 +01:00
|
|
|
emoji_url = emoji_url_map[emoji["name"]]
|
2018-08-03 19:15:01 +02:00
|
|
|
emoji_path = RealmEmoji.PATH_ID_TEMPLATE.format(
|
2021-02-12 08:20:45 +01:00
|
|
|
realm_id=emoji["realm"], emoji_file_name=emoji["name"]
|
2021-02-12 08:19:30 +01:00
|
|
|
)
|
2018-08-03 19:15:01 +02:00
|
|
|
|
|
|
|
upload_emoji_list.append([emoji_url, emoji_path])
|
|
|
|
|
|
|
|
emoji_record = dict(emoji)
|
2021-02-12 08:20:45 +01:00
|
|
|
emoji_record["path"] = emoji_path
|
|
|
|
emoji_record["s3_path"] = emoji_path
|
|
|
|
emoji_record["realm_id"] = emoji_record["realm"]
|
|
|
|
emoji_record.pop("realm")
|
2018-08-03 19:15:01 +02:00
|
|
|
|
|
|
|
emoji_records.append(emoji_record)
|
|
|
|
|
2020-03-28 01:25:56 +01:00
|
|
|
# Run downloads in parallel
|
2019-07-28 01:04:47 +02:00
|
|
|
run_parallel_wrapper(partial(get_emojis, emoji_dir), upload_emoji_list, threads=threads)
|
2018-08-03 19:15:01 +02:00
|
|
|
|
2021-02-12 08:20:45 +01:00
|
|
|
logging.info("######### GETTING EMOJIS FINISHED #########\n")
|
2018-08-03 19:15:01 +02:00
|
|
|
return emoji_records
|
2018-10-13 16:41:18 +02:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2018-10-13 16:41:18 +02:00
|
|
|
def create_converted_data_files(data: Any, output_dir: str, file_path: str) -> None:
|
|
|
|
output_file = output_dir + file_path
|
|
|
|
os.makedirs(os.path.dirname(output_file), exist_ok=True)
|
2021-02-12 08:20:45 +01:00
|
|
|
with open(output_file, "wb") as fp:
|
2020-08-07 01:09:47 +02:00
|
|
|
fp.write(orjson.dumps(data, option=orjson.OPT_INDENT_2))
|
2022-05-24 01:43:00 +02:00
|
|
|
|
|
|
|
|
|
|
|
# External user-id
|
|
|
|
ExternalId = TypeVar("ExternalId")
|
|
|
|
|
|
|
|
|
|
|
|
def long_term_idle_helper(
|
|
|
|
message_iterator: Iterator[ZerverFieldsT],
|
|
|
|
user_from_message: Callable[[ZerverFieldsT], Optional[ExternalId]],
|
2022-05-31 22:13:17 +02:00
|
|
|
timestamp_from_message: Callable[[ZerverFieldsT], float],
|
2022-05-24 01:43:00 +02:00
|
|
|
zulip_user_id_from_user: Callable[[ExternalId], int],
|
2022-07-30 22:18:05 +02:00
|
|
|
all_user_ids_iterator: Iterator[ExternalId],
|
2022-05-24 01:43:00 +02:00
|
|
|
zerver_userprofile: List[ZerverFieldsT],
|
|
|
|
) -> Set[int]:
|
|
|
|
"""Algorithmically, we treat users who have sent at least 10 messages
|
|
|
|
or have sent a message within the last 60 days as active.
|
|
|
|
Everyone else is treated as long-term idle, which means they will
|
|
|
|
have a slightly slower first page load when coming back to
|
|
|
|
Zulip.
|
|
|
|
"""
|
|
|
|
sender_counts: Dict[ExternalId, int] = defaultdict(int)
|
|
|
|
recent_senders: Set[ExternalId] = set()
|
|
|
|
NOW = float(timezone_now().timestamp())
|
|
|
|
for message in message_iterator:
|
2022-05-31 22:13:17 +02:00
|
|
|
timestamp = timestamp_from_message(message)
|
2022-05-24 01:43:00 +02:00
|
|
|
user = user_from_message(message)
|
|
|
|
if user is None:
|
|
|
|
continue
|
|
|
|
|
|
|
|
if user in recent_senders:
|
|
|
|
continue
|
|
|
|
|
2022-08-29 20:31:10 +02:00
|
|
|
if NOW - timestamp < 60 * 24 * 60 * 60:
|
2022-05-24 01:43:00 +02:00
|
|
|
recent_senders.add(user)
|
|
|
|
|
|
|
|
sender_counts[user] += 1
|
2023-02-02 04:35:24 +01:00
|
|
|
for user, count in sender_counts.items():
|
2022-05-24 01:43:00 +02:00
|
|
|
if count > 10:
|
|
|
|
recent_senders.add(user)
|
|
|
|
|
|
|
|
long_term_idle = set()
|
|
|
|
|
2022-07-30 22:18:05 +02:00
|
|
|
for user_id in all_user_ids_iterator:
|
2022-06-01 13:43:54 +02:00
|
|
|
if user_id in recent_senders:
|
2022-05-24 01:43:00 +02:00
|
|
|
continue
|
2022-06-01 13:43:54 +02:00
|
|
|
zulip_user_id = zulip_user_id_from_user(user_id)
|
2022-05-24 01:43:00 +02:00
|
|
|
long_term_idle.add(zulip_user_id)
|
|
|
|
|
|
|
|
for user_profile_row in zerver_userprofile:
|
|
|
|
if user_profile_row["id"] in long_term_idle:
|
|
|
|
user_profile_row["long_term_idle"] = True
|
|
|
|
# Setting last_active_message_id to 1 means the user, if
|
|
|
|
# imported, will get the full message history for the
|
|
|
|
# streams they were on.
|
|
|
|
user_profile_row["last_active_message_id"] = 1
|
|
|
|
|
|
|
|
return long_term_idle
|