2018-04-23 23:28:27 +02:00
|
|
|
import datetime
|
|
|
|
import logging
|
2019-07-28 01:08:18 +02:00
|
|
|
import multiprocessing
|
2018-04-23 23:28:27 +02:00
|
|
|
import os
|
2020-09-05 04:02:13 +02:00
|
|
|
import secrets
|
2018-04-23 23:28:27 +02:00
|
|
|
import shutil
|
2021-07-16 22:11:10 +02:00
|
|
|
from mimetypes import guess_type
|
2020-06-23 00:37:25 +02:00
|
|
|
from typing import Any, Dict, Iterable, List, Optional, Tuple
|
2018-04-23 23:28:27 +02:00
|
|
|
|
2020-08-07 01:09:47 +02:00
|
|
|
import orjson
|
2019-05-23 13:58:10 +02:00
|
|
|
from bs4 import BeautifulSoup
|
2018-04-23 23:28:27 +02:00
|
|
|
from django.conf import settings
|
2020-10-01 00:20:02 +02:00
|
|
|
from django.core.cache import cache
|
2018-04-23 23:28:27 +02:00
|
|
|
from django.db import connection
|
2020-06-05 06:55:20 +02:00
|
|
|
from django.utils.timezone import now as timezone_now
|
2020-06-09 11:01:54 +02:00
|
|
|
from psycopg2.extras import execute_values
|
2020-06-11 00:54:34 +02:00
|
|
|
from psycopg2.sql import SQL, Identifier
|
2018-04-23 23:28:27 +02:00
|
|
|
|
2019-01-05 01:15:49 +01:00
|
|
|
from analytics.models import RealmCount, StreamCount, UserCount
|
2020-06-11 00:54:34 +02:00
|
|
|
from zerver.lib.actions import (
|
|
|
|
UserMessageLite,
|
|
|
|
bulk_insert_ums,
|
|
|
|
do_change_avatar_fields,
|
|
|
|
do_change_plan_type,
|
|
|
|
)
|
2018-04-23 23:28:27 +02:00
|
|
|
from zerver.lib.avatar_hash import user_avatar_path_from_ids
|
2019-11-28 16:56:04 +01:00
|
|
|
from zerver.lib.bulk_create import bulk_create_users, bulk_set_users_or_streams_recipient_fields
|
2020-06-11 00:54:34 +02:00
|
|
|
from zerver.lib.export import DATE_FIELDS, Field, Path, Record, TableData, TableName
|
2020-07-04 14:34:46 +02:00
|
|
|
from zerver.lib.markdown import markdown_convert
|
2020-06-27 22:47:06 +02:00
|
|
|
from zerver.lib.markdown import version as markdown_version
|
2020-10-13 15:49:40 +02:00
|
|
|
from zerver.lib.message import get_last_message_id
|
2020-06-11 00:54:34 +02:00
|
|
|
from zerver.lib.server_initialization import create_internal_realm, server_initialized
|
2020-03-24 14:47:41 +01:00
|
|
|
from zerver.lib.streams import render_stream_description
|
2020-06-11 00:54:34 +02:00
|
|
|
from zerver.lib.timestamp import datetime_to_timestamp
|
2021-07-16 22:11:10 +02:00
|
|
|
from zerver.lib.upload import BadImageError, get_bucket, sanitize_name
|
2018-10-15 14:24:13 +02:00
|
|
|
from zerver.lib.utils import generate_api_key, process_list_in_batches
|
2020-06-11 00:54:34 +02:00
|
|
|
from zerver.models import (
|
2020-07-16 16:11:34 +02:00
|
|
|
AlertWord,
|
2020-06-11 00:54:34 +02:00
|
|
|
Attachment,
|
|
|
|
BotConfigData,
|
|
|
|
BotStorageData,
|
|
|
|
Client,
|
|
|
|
CustomProfileField,
|
|
|
|
CustomProfileFieldValue,
|
|
|
|
DefaultStream,
|
|
|
|
Huddle,
|
|
|
|
Message,
|
2021-03-27 12:23:32 +01:00
|
|
|
MutedUser,
|
2020-06-11 00:54:34 +02:00
|
|
|
Reaction,
|
|
|
|
Realm,
|
|
|
|
RealmAuditLog,
|
|
|
|
RealmDomain,
|
|
|
|
RealmEmoji,
|
|
|
|
RealmFilter,
|
2021-02-14 12:07:09 +01:00
|
|
|
RealmPlayground,
|
2020-06-11 00:54:34 +02:00
|
|
|
Recipient,
|
|
|
|
Service,
|
|
|
|
Stream,
|
|
|
|
Subscription,
|
|
|
|
UserActivity,
|
|
|
|
UserActivityInterval,
|
|
|
|
UserGroup,
|
|
|
|
UserGroupMembership,
|
|
|
|
UserHotspot,
|
|
|
|
UserMessage,
|
|
|
|
UserPresence,
|
|
|
|
UserProfile,
|
2021-07-23 15:26:02 +02:00
|
|
|
UserTopic,
|
2020-06-11 00:54:34 +02:00
|
|
|
get_huddle_hash,
|
2021-07-26 17:17:10 +02:00
|
|
|
get_realm,
|
2020-06-11 00:54:34 +02:00
|
|
|
get_system_bot,
|
|
|
|
get_user_profile_by_id,
|
|
|
|
)
|
2019-01-05 00:45:27 +01:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
realm_tables = [
|
|
|
|
("zerver_defaultstream", DefaultStream, "defaultstream"),
|
|
|
|
("zerver_realmemoji", RealmEmoji, "realmemoji"),
|
|
|
|
("zerver_realmdomain", RealmDomain, "realmdomain"),
|
|
|
|
("zerver_realmfilter", RealmFilter, "realmfilter"),
|
2021-02-14 12:07:09 +01:00
|
|
|
("zerver_realmplayground", RealmPlayground, "realmplayground"),
|
2021-02-12 08:19:30 +01:00
|
|
|
] # List[Tuple[TableName, Any, str]]
|
2018-04-23 23:28:27 +02:00
|
|
|
|
|
|
|
|
2018-10-17 20:15:52 +02:00
|
|
|
# ID_MAP is a dictionary that maps table names to dictionaries
|
2018-04-23 23:28:27 +02:00
|
|
|
# that map old ids to new ids. We use this in
|
|
|
|
# re_map_foreign_keys and other places.
|
|
|
|
#
|
2020-03-28 01:25:56 +01:00
|
|
|
# We explicitly initialize ID_MAP with the tables that support
|
2018-04-23 23:28:27 +02:00
|
|
|
# id re-mapping.
|
|
|
|
#
|
|
|
|
# Code reviewers: give these tables extra scrutiny, as we need to
|
|
|
|
# make sure to reload related tables AFTER we re-map the ids.
|
python: Convert assignment type annotations to Python 3.6 style.
This commit was split by tabbott; this piece covers the vast majority
of files in Zulip, but excludes scripts/, tools/, and puppet/ to help
ensure we at least show the right error messages for Xenial systems.
We can likely further refine the remaining pieces with some testing.
Generated by com2ann, with whitespace fixes and various manual fixes
for runtime issues:
- invoiced_through: Optional[LicenseLedger] = models.ForeignKey(
+ invoiced_through: Optional["LicenseLedger"] = models.ForeignKey(
-_apns_client: Optional[APNsClient] = None
+_apns_client: Optional["APNsClient"] = None
- notifications_stream: Optional[Stream] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
- signup_notifications_stream: Optional[Stream] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
+ notifications_stream: Optional["Stream"] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
+ signup_notifications_stream: Optional["Stream"] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
- author: Optional[UserProfile] = models.ForeignKey('UserProfile', blank=True, null=True, on_delete=CASCADE)
+ author: Optional["UserProfile"] = models.ForeignKey('UserProfile', blank=True, null=True, on_delete=CASCADE)
- bot_owner: Optional[UserProfile] = models.ForeignKey('self', null=True, on_delete=models.SET_NULL)
+ bot_owner: Optional["UserProfile"] = models.ForeignKey('self', null=True, on_delete=models.SET_NULL)
- default_sending_stream: Optional[Stream] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
- default_events_register_stream: Optional[Stream] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
+ default_sending_stream: Optional["Stream"] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
+ default_events_register_stream: Optional["Stream"] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
-descriptors_by_handler_id: Dict[int, ClientDescriptor] = {}
+descriptors_by_handler_id: Dict[int, "ClientDescriptor"] = {}
-worker_classes: Dict[str, Type[QueueProcessingWorker]] = {}
-queues: Dict[str, Dict[str, Type[QueueProcessingWorker]]] = {}
+worker_classes: Dict[str, Type["QueueProcessingWorker"]] = {}
+queues: Dict[str, Dict[str, Type["QueueProcessingWorker"]]] = {}
-AUTH_LDAP_REVERSE_EMAIL_SEARCH: Optional[LDAPSearch] = None
+AUTH_LDAP_REVERSE_EMAIL_SEARCH: Optional["LDAPSearch"] = None
Signed-off-by: Anders Kaseorg <anders@zulipchat.com>
2020-04-22 01:09:50 +02:00
|
|
|
ID_MAP: Dict[str, Dict[int, int]] = {
|
2021-02-12 08:20:45 +01:00
|
|
|
"alertword": {},
|
|
|
|
"client": {},
|
|
|
|
"user_profile": {},
|
|
|
|
"huddle": {},
|
|
|
|
"realm": {},
|
|
|
|
"stream": {},
|
|
|
|
"recipient": {},
|
|
|
|
"subscription": {},
|
|
|
|
"defaultstream": {},
|
|
|
|
"reaction": {},
|
|
|
|
"realmemoji": {},
|
|
|
|
"realmdomain": {},
|
|
|
|
"realmfilter": {},
|
2021-02-14 12:07:09 +01:00
|
|
|
"realmplayground": {},
|
2021-02-12 08:20:45 +01:00
|
|
|
"message": {},
|
|
|
|
"user_presence": {},
|
|
|
|
"useractivity": {},
|
|
|
|
"useractivityinterval": {},
|
|
|
|
"usermessage": {},
|
|
|
|
"customprofilefield": {},
|
|
|
|
"customprofilefieldvalue": {},
|
|
|
|
"attachment": {},
|
|
|
|
"realmauditlog": {},
|
|
|
|
"recipient_to_huddle_map": {},
|
|
|
|
"userhotspot": {},
|
|
|
|
"mutedtopic": {},
|
2021-03-27 12:23:32 +01:00
|
|
|
"muteduser": {},
|
2021-02-12 08:20:45 +01:00
|
|
|
"service": {},
|
|
|
|
"usergroup": {},
|
|
|
|
"usergroupmembership": {},
|
|
|
|
"botstoragedata": {},
|
|
|
|
"botconfigdata": {},
|
|
|
|
"analytics_realmcount": {},
|
|
|
|
"analytics_streamcount": {},
|
|
|
|
"analytics_usercount": {},
|
python: Convert assignment type annotations to Python 3.6 style.
This commit was split by tabbott; this piece covers the vast majority
of files in Zulip, but excludes scripts/, tools/, and puppet/ to help
ensure we at least show the right error messages for Xenial systems.
We can likely further refine the remaining pieces with some testing.
Generated by com2ann, with whitespace fixes and various manual fixes
for runtime issues:
- invoiced_through: Optional[LicenseLedger] = models.ForeignKey(
+ invoiced_through: Optional["LicenseLedger"] = models.ForeignKey(
-_apns_client: Optional[APNsClient] = None
+_apns_client: Optional["APNsClient"] = None
- notifications_stream: Optional[Stream] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
- signup_notifications_stream: Optional[Stream] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
+ notifications_stream: Optional["Stream"] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
+ signup_notifications_stream: Optional["Stream"] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
- author: Optional[UserProfile] = models.ForeignKey('UserProfile', blank=True, null=True, on_delete=CASCADE)
+ author: Optional["UserProfile"] = models.ForeignKey('UserProfile', blank=True, null=True, on_delete=CASCADE)
- bot_owner: Optional[UserProfile] = models.ForeignKey('self', null=True, on_delete=models.SET_NULL)
+ bot_owner: Optional["UserProfile"] = models.ForeignKey('self', null=True, on_delete=models.SET_NULL)
- default_sending_stream: Optional[Stream] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
- default_events_register_stream: Optional[Stream] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
+ default_sending_stream: Optional["Stream"] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
+ default_events_register_stream: Optional["Stream"] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
-descriptors_by_handler_id: Dict[int, ClientDescriptor] = {}
+descriptors_by_handler_id: Dict[int, "ClientDescriptor"] = {}
-worker_classes: Dict[str, Type[QueueProcessingWorker]] = {}
-queues: Dict[str, Dict[str, Type[QueueProcessingWorker]]] = {}
+worker_classes: Dict[str, Type["QueueProcessingWorker"]] = {}
+queues: Dict[str, Dict[str, Type["QueueProcessingWorker"]]] = {}
-AUTH_LDAP_REVERSE_EMAIL_SEARCH: Optional[LDAPSearch] = None
+AUTH_LDAP_REVERSE_EMAIL_SEARCH: Optional["LDAPSearch"] = None
Signed-off-by: Anders Kaseorg <anders@zulipchat.com>
2020-04-22 01:09:50 +02:00
|
|
|
}
|
2018-04-23 23:28:27 +02:00
|
|
|
|
python: Convert assignment type annotations to Python 3.6 style.
This commit was split by tabbott; this piece covers the vast majority
of files in Zulip, but excludes scripts/, tools/, and puppet/ to help
ensure we at least show the right error messages for Xenial systems.
We can likely further refine the remaining pieces with some testing.
Generated by com2ann, with whitespace fixes and various manual fixes
for runtime issues:
- invoiced_through: Optional[LicenseLedger] = models.ForeignKey(
+ invoiced_through: Optional["LicenseLedger"] = models.ForeignKey(
-_apns_client: Optional[APNsClient] = None
+_apns_client: Optional["APNsClient"] = None
- notifications_stream: Optional[Stream] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
- signup_notifications_stream: Optional[Stream] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
+ notifications_stream: Optional["Stream"] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
+ signup_notifications_stream: Optional["Stream"] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
- author: Optional[UserProfile] = models.ForeignKey('UserProfile', blank=True, null=True, on_delete=CASCADE)
+ author: Optional["UserProfile"] = models.ForeignKey('UserProfile', blank=True, null=True, on_delete=CASCADE)
- bot_owner: Optional[UserProfile] = models.ForeignKey('self', null=True, on_delete=models.SET_NULL)
+ bot_owner: Optional["UserProfile"] = models.ForeignKey('self', null=True, on_delete=models.SET_NULL)
- default_sending_stream: Optional[Stream] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
- default_events_register_stream: Optional[Stream] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
+ default_sending_stream: Optional["Stream"] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
+ default_events_register_stream: Optional["Stream"] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
-descriptors_by_handler_id: Dict[int, ClientDescriptor] = {}
+descriptors_by_handler_id: Dict[int, "ClientDescriptor"] = {}
-worker_classes: Dict[str, Type[QueueProcessingWorker]] = {}
-queues: Dict[str, Dict[str, Type[QueueProcessingWorker]]] = {}
+worker_classes: Dict[str, Type["QueueProcessingWorker"]] = {}
+queues: Dict[str, Dict[str, Type["QueueProcessingWorker"]]] = {}
-AUTH_LDAP_REVERSE_EMAIL_SEARCH: Optional[LDAPSearch] = None
+AUTH_LDAP_REVERSE_EMAIL_SEARCH: Optional["LDAPSearch"] = None
Signed-off-by: Anders Kaseorg <anders@zulipchat.com>
2020-04-22 01:09:50 +02:00
|
|
|
id_map_to_list: Dict[str, Dict[int, List[int]]] = {
|
2021-02-12 08:20:45 +01:00
|
|
|
"huddle_to_user_list": {},
|
python: Convert assignment type annotations to Python 3.6 style.
This commit was split by tabbott; this piece covers the vast majority
of files in Zulip, but excludes scripts/, tools/, and puppet/ to help
ensure we at least show the right error messages for Xenial systems.
We can likely further refine the remaining pieces with some testing.
Generated by com2ann, with whitespace fixes and various manual fixes
for runtime issues:
- invoiced_through: Optional[LicenseLedger] = models.ForeignKey(
+ invoiced_through: Optional["LicenseLedger"] = models.ForeignKey(
-_apns_client: Optional[APNsClient] = None
+_apns_client: Optional["APNsClient"] = None
- notifications_stream: Optional[Stream] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
- signup_notifications_stream: Optional[Stream] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
+ notifications_stream: Optional["Stream"] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
+ signup_notifications_stream: Optional["Stream"] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
- author: Optional[UserProfile] = models.ForeignKey('UserProfile', blank=True, null=True, on_delete=CASCADE)
+ author: Optional["UserProfile"] = models.ForeignKey('UserProfile', blank=True, null=True, on_delete=CASCADE)
- bot_owner: Optional[UserProfile] = models.ForeignKey('self', null=True, on_delete=models.SET_NULL)
+ bot_owner: Optional["UserProfile"] = models.ForeignKey('self', null=True, on_delete=models.SET_NULL)
- default_sending_stream: Optional[Stream] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
- default_events_register_stream: Optional[Stream] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
+ default_sending_stream: Optional["Stream"] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
+ default_events_register_stream: Optional["Stream"] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
-descriptors_by_handler_id: Dict[int, ClientDescriptor] = {}
+descriptors_by_handler_id: Dict[int, "ClientDescriptor"] = {}
-worker_classes: Dict[str, Type[QueueProcessingWorker]] = {}
-queues: Dict[str, Dict[str, Type[QueueProcessingWorker]]] = {}
+worker_classes: Dict[str, Type["QueueProcessingWorker"]] = {}
+queues: Dict[str, Dict[str, Type["QueueProcessingWorker"]]] = {}
-AUTH_LDAP_REVERSE_EMAIL_SEARCH: Optional[LDAPSearch] = None
+AUTH_LDAP_REVERSE_EMAIL_SEARCH: Optional["LDAPSearch"] = None
Signed-off-by: Anders Kaseorg <anders@zulipchat.com>
2020-04-22 01:09:50 +02:00
|
|
|
}
|
2018-05-25 18:54:22 +02:00
|
|
|
|
python: Convert assignment type annotations to Python 3.6 style.
This commit was split by tabbott; this piece covers the vast majority
of files in Zulip, but excludes scripts/, tools/, and puppet/ to help
ensure we at least show the right error messages for Xenial systems.
We can likely further refine the remaining pieces with some testing.
Generated by com2ann, with whitespace fixes and various manual fixes
for runtime issues:
- invoiced_through: Optional[LicenseLedger] = models.ForeignKey(
+ invoiced_through: Optional["LicenseLedger"] = models.ForeignKey(
-_apns_client: Optional[APNsClient] = None
+_apns_client: Optional["APNsClient"] = None
- notifications_stream: Optional[Stream] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
- signup_notifications_stream: Optional[Stream] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
+ notifications_stream: Optional["Stream"] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
+ signup_notifications_stream: Optional["Stream"] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
- author: Optional[UserProfile] = models.ForeignKey('UserProfile', blank=True, null=True, on_delete=CASCADE)
+ author: Optional["UserProfile"] = models.ForeignKey('UserProfile', blank=True, null=True, on_delete=CASCADE)
- bot_owner: Optional[UserProfile] = models.ForeignKey('self', null=True, on_delete=models.SET_NULL)
+ bot_owner: Optional["UserProfile"] = models.ForeignKey('self', null=True, on_delete=models.SET_NULL)
- default_sending_stream: Optional[Stream] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
- default_events_register_stream: Optional[Stream] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
+ default_sending_stream: Optional["Stream"] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
+ default_events_register_stream: Optional["Stream"] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
-descriptors_by_handler_id: Dict[int, ClientDescriptor] = {}
+descriptors_by_handler_id: Dict[int, "ClientDescriptor"] = {}
-worker_classes: Dict[str, Type[QueueProcessingWorker]] = {}
-queues: Dict[str, Dict[str, Type[QueueProcessingWorker]]] = {}
+worker_classes: Dict[str, Type["QueueProcessingWorker"]] = {}
+queues: Dict[str, Dict[str, Type["QueueProcessingWorker"]]] = {}
-AUTH_LDAP_REVERSE_EMAIL_SEARCH: Optional[LDAPSearch] = None
+AUTH_LDAP_REVERSE_EMAIL_SEARCH: Optional["LDAPSearch"] = None
Signed-off-by: Anders Kaseorg <anders@zulipchat.com>
2020-04-22 01:09:50 +02:00
|
|
|
path_maps: Dict[str, Dict[str, str]] = {
|
2021-02-12 08:20:45 +01:00
|
|
|
"attachment_path": {},
|
python: Convert assignment type annotations to Python 3.6 style.
This commit was split by tabbott; this piece covers the vast majority
of files in Zulip, but excludes scripts/, tools/, and puppet/ to help
ensure we at least show the right error messages for Xenial systems.
We can likely further refine the remaining pieces with some testing.
Generated by com2ann, with whitespace fixes and various manual fixes
for runtime issues:
- invoiced_through: Optional[LicenseLedger] = models.ForeignKey(
+ invoiced_through: Optional["LicenseLedger"] = models.ForeignKey(
-_apns_client: Optional[APNsClient] = None
+_apns_client: Optional["APNsClient"] = None
- notifications_stream: Optional[Stream] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
- signup_notifications_stream: Optional[Stream] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
+ notifications_stream: Optional["Stream"] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
+ signup_notifications_stream: Optional["Stream"] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
- author: Optional[UserProfile] = models.ForeignKey('UserProfile', blank=True, null=True, on_delete=CASCADE)
+ author: Optional["UserProfile"] = models.ForeignKey('UserProfile', blank=True, null=True, on_delete=CASCADE)
- bot_owner: Optional[UserProfile] = models.ForeignKey('self', null=True, on_delete=models.SET_NULL)
+ bot_owner: Optional["UserProfile"] = models.ForeignKey('self', null=True, on_delete=models.SET_NULL)
- default_sending_stream: Optional[Stream] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
- default_events_register_stream: Optional[Stream] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
+ default_sending_stream: Optional["Stream"] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
+ default_events_register_stream: Optional["Stream"] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
-descriptors_by_handler_id: Dict[int, ClientDescriptor] = {}
+descriptors_by_handler_id: Dict[int, "ClientDescriptor"] = {}
-worker_classes: Dict[str, Type[QueueProcessingWorker]] = {}
-queues: Dict[str, Dict[str, Type[QueueProcessingWorker]]] = {}
+worker_classes: Dict[str, Type["QueueProcessingWorker"]] = {}
+queues: Dict[str, Dict[str, Type["QueueProcessingWorker"]]] = {}
-AUTH_LDAP_REVERSE_EMAIL_SEARCH: Optional[LDAPSearch] = None
+AUTH_LDAP_REVERSE_EMAIL_SEARCH: Optional["LDAPSearch"] = None
Signed-off-by: Anders Kaseorg <anders@zulipchat.com>
2020-04-22 01:09:50 +02:00
|
|
|
}
|
2018-04-23 23:28:27 +02:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2018-04-23 23:28:27 +02:00
|
|
|
def update_id_map(table: TableName, old_id: int, new_id: int) -> None:
|
2018-10-17 20:15:52 +02:00
|
|
|
if table not in ID_MAP:
|
2021-02-12 08:19:30 +01:00
|
|
|
raise Exception(
|
2021-02-12 08:20:45 +01:00
|
|
|
f"""
|
2020-06-13 08:57:35 +02:00
|
|
|
Table {table} is not initialized in ID_MAP, which could
|
2018-04-23 23:28:27 +02:00
|
|
|
mean that we have not thought through circular
|
|
|
|
dependencies.
|
2021-02-12 08:20:45 +01:00
|
|
|
"""
|
2021-02-12 08:19:30 +01:00
|
|
|
)
|
2018-10-17 20:15:52 +02:00
|
|
|
ID_MAP[table][old_id] = new_id
|
2018-04-23 23:28:27 +02:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2018-04-23 23:28:27 +02:00
|
|
|
def fix_datetime_fields(data: TableData, table: TableName) -> None:
|
|
|
|
for item in data[table]:
|
|
|
|
for field_name in DATE_FIELDS[table]:
|
|
|
|
if item[field_name] is not None:
|
2021-02-12 08:19:30 +01:00
|
|
|
item[field_name] = datetime.datetime.fromtimestamp(
|
|
|
|
item[field_name], tz=datetime.timezone.utc
|
|
|
|
)
|
|
|
|
|
2018-04-23 23:28:27 +02:00
|
|
|
|
|
|
|
def fix_upload_links(data: TableData, message_table: TableName) -> None:
|
|
|
|
"""
|
|
|
|
Because the URLs for uploaded files encode the realm ID of the
|
|
|
|
organization being imported (which is only determined at import
|
|
|
|
time), we need to rewrite the URLs of links to uploaded files
|
|
|
|
during the import process.
|
|
|
|
"""
|
|
|
|
for message in data[message_table]:
|
2021-02-12 08:20:45 +01:00
|
|
|
if message["has_attachment"] is True:
|
|
|
|
for key, value in path_maps["attachment_path"].items():
|
|
|
|
if key in message["content"]:
|
|
|
|
message["content"] = message["content"].replace(key, value)
|
|
|
|
if message["rendered_content"]:
|
|
|
|
message["rendered_content"] = message["rendered_content"].replace(
|
2021-02-12 08:19:30 +01:00
|
|
|
key, value
|
|
|
|
)
|
|
|
|
|
2018-04-23 23:28:27 +02:00
|
|
|
|
2018-09-30 12:54:16 +02:00
|
|
|
def create_subscription_events(data: TableData, realm_id: int) -> None:
|
2018-07-05 21:28:21 +02:00
|
|
|
"""
|
|
|
|
When the export data doesn't contain the table `zerver_realmauditlog`,
|
|
|
|
this function creates RealmAuditLog objects for `subscription_created`
|
|
|
|
type event for all the existing Stream subscriptions.
|
|
|
|
|
|
|
|
This is needed for all the export tools which do not include the
|
|
|
|
table `zerver_realmauditlog` (Slack, Gitter, etc.) because the appropriate
|
|
|
|
data about when a user was subscribed is not exported by the third-party
|
|
|
|
service.
|
|
|
|
"""
|
|
|
|
all_subscription_logs = []
|
|
|
|
|
2020-10-13 15:49:40 +02:00
|
|
|
event_last_message_id = get_last_message_id()
|
2018-07-05 21:28:21 +02:00
|
|
|
event_time = timezone_now()
|
|
|
|
|
2018-09-30 12:54:16 +02:00
|
|
|
recipient_id_to_stream_id = {
|
2021-02-12 08:20:45 +01:00
|
|
|
d["id"]: d["type_id"] for d in data["zerver_recipient"] if d["type"] == Recipient.STREAM
|
2018-09-30 12:54:16 +02:00
|
|
|
}
|
|
|
|
|
2021-02-12 08:20:45 +01:00
|
|
|
for sub in data["zerver_subscription"]:
|
|
|
|
recipient_id = sub["recipient_id"]
|
2018-09-30 12:54:16 +02:00
|
|
|
stream_id = recipient_id_to_stream_id.get(recipient_id)
|
|
|
|
|
|
|
|
if stream_id is None:
|
2018-07-05 21:28:21 +02:00
|
|
|
continue
|
|
|
|
|
2021-02-12 08:20:45 +01:00
|
|
|
user_id = sub["user_profile_id"]
|
2018-07-05 21:28:21 +02:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
all_subscription_logs.append(
|
|
|
|
RealmAuditLog(
|
|
|
|
realm_id=realm_id,
|
|
|
|
acting_user_id=user_id,
|
|
|
|
modified_user_id=user_id,
|
|
|
|
modified_stream_id=stream_id,
|
|
|
|
event_last_message_id=event_last_message_id,
|
|
|
|
event_time=event_time,
|
|
|
|
event_type=RealmAuditLog.SUBSCRIPTION_CREATED,
|
|
|
|
)
|
|
|
|
)
|
2018-07-05 21:28:21 +02:00
|
|
|
RealmAuditLog.objects.bulk_create(all_subscription_logs)
|
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2018-07-14 17:18:24 +02:00
|
|
|
def fix_service_tokens(data: TableData, table: TableName) -> None:
|
|
|
|
"""
|
2018-08-01 11:18:37 +02:00
|
|
|
The tokens in the services are created by 'generate_api_key'.
|
2018-07-14 17:18:24 +02:00
|
|
|
As the tokens are unique, they should be re-created for the imports.
|
|
|
|
"""
|
|
|
|
for item in data[table]:
|
2021-02-12 08:20:45 +01:00
|
|
|
item["token"] = generate_api_key()
|
2018-07-14 17:18:24 +02:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2018-05-25 18:54:22 +02:00
|
|
|
def process_huddle_hash(data: TableData, table: TableName) -> None:
|
|
|
|
"""
|
|
|
|
Build new huddle hashes with the updated ids of the users
|
|
|
|
"""
|
|
|
|
for huddle in data[table]:
|
2021-02-12 08:20:45 +01:00
|
|
|
user_id_list = id_map_to_list["huddle_to_user_list"][huddle["id"]]
|
|
|
|
huddle["huddle_hash"] = get_huddle_hash(user_id_list)
|
2018-05-25 18:54:22 +02:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2018-05-25 18:54:22 +02:00
|
|
|
def get_huddles_from_subscription(data: TableData, table: TableName) -> None:
|
|
|
|
"""
|
|
|
|
Extract the IDs of the user_profiles involved in a huddle from the subscription object
|
|
|
|
This helps to generate a unique huddle hash from the updated user_profile ids
|
|
|
|
"""
|
2021-02-12 08:20:45 +01:00
|
|
|
id_map_to_list["huddle_to_user_list"] = {
|
|
|
|
value: [] for value in ID_MAP["recipient_to_huddle_map"].values()
|
2021-02-12 08:19:30 +01:00
|
|
|
}
|
2018-05-25 18:54:22 +02:00
|
|
|
|
|
|
|
for subscription in data[table]:
|
2021-02-12 08:20:45 +01:00
|
|
|
if subscription["recipient"] in ID_MAP["recipient_to_huddle_map"]:
|
|
|
|
huddle_id = ID_MAP["recipient_to_huddle_map"][subscription["recipient"]]
|
|
|
|
id_map_to_list["huddle_to_user_list"][huddle_id].append(subscription["user_profile_id"])
|
2018-05-25 18:54:22 +02:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2018-07-16 17:15:42 +02:00
|
|
|
def fix_customprofilefield(data: TableData) -> None:
|
|
|
|
"""
|
|
|
|
In CustomProfileField with 'field_type' like 'USER', the IDs need to be
|
|
|
|
re-mapped.
|
|
|
|
"""
|
|
|
|
field_type_USER_id_list = []
|
2021-02-12 08:20:45 +01:00
|
|
|
for item in data["zerver_customprofilefield"]:
|
|
|
|
if item["field_type"] == CustomProfileField.USER:
|
|
|
|
field_type_USER_id_list.append(item["id"])
|
2018-07-16 17:15:42 +02:00
|
|
|
|
2021-02-12 08:20:45 +01:00
|
|
|
for item in data["zerver_customprofilefieldvalue"]:
|
|
|
|
if item["field_id"] in field_type_USER_id_list:
|
|
|
|
old_user_id_list = orjson.loads(item["value"])
|
2018-07-16 17:15:42 +02:00
|
|
|
|
|
|
|
new_id_list = re_map_foreign_keys_many_to_many_internal(
|
2021-02-12 08:20:45 +01:00
|
|
|
table="zerver_customprofilefieldvalue",
|
|
|
|
field_name="value",
|
|
|
|
related_table="user_profile",
|
2021-02-12 08:19:30 +01:00
|
|
|
old_id_list=old_user_id_list,
|
|
|
|
)
|
2021-02-12 08:20:45 +01:00
|
|
|
item["value"] = orjson.dumps(new_id_list).decode()
|
2018-07-16 17:15:42 +02:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
|
|
|
def fix_message_rendered_content(
|
|
|
|
realm: Realm, sender_map: Dict[int, Record], messages: List[Record]
|
|
|
|
) -> None:
|
2018-08-09 17:50:43 +02:00
|
|
|
"""
|
|
|
|
This function sets the rendered_content of all the messages
|
2018-08-10 00:15:50 +02:00
|
|
|
after the messages have been imported from a non-Zulip platform.
|
2018-08-09 17:50:43 +02:00
|
|
|
"""
|
2018-11-02 15:21:30 +01:00
|
|
|
for message in messages:
|
2021-02-12 08:20:45 +01:00
|
|
|
if message["rendered_content"] is not None:
|
2019-01-02 23:59:37 +01:00
|
|
|
# For Zulip->Zulip imports, we use the original rendered
|
2020-08-11 01:47:49 +02:00
|
|
|
# Markdown; this avoids issues where e.g. a mention can no
|
2019-01-02 23:59:37 +01:00
|
|
|
# longer render properly because a user has changed their
|
2019-05-23 13:58:10 +02:00
|
|
|
# name.
|
2019-01-02 23:59:37 +01:00
|
|
|
#
|
2019-05-23 13:58:10 +02:00
|
|
|
# However, we still need to update the data-user-id and
|
|
|
|
# similar values stored on mentions, stream mentions, and
|
|
|
|
# similar syntax in the rendered HTML.
|
2019-05-31 12:18:14 +02:00
|
|
|
soup = BeautifulSoup(message["rendered_content"], "html.parser")
|
2019-05-23 13:58:10 +02:00
|
|
|
|
|
|
|
user_mentions = soup.findAll("span", {"class": "user-mention"})
|
|
|
|
if len(user_mentions) != 0:
|
|
|
|
user_id_map = ID_MAP["user_profile"]
|
|
|
|
for mention in user_mentions:
|
2019-06-18 20:13:32 +02:00
|
|
|
if not mention.has_attr("data-user-id"):
|
2019-06-18 19:35:01 +02:00
|
|
|
# Legacy mentions don't have a data-user-id
|
|
|
|
# field; we should just import them
|
|
|
|
# unmodified.
|
|
|
|
continue
|
2021-02-12 08:20:45 +01:00
|
|
|
if mention["data-user-id"] == "*":
|
2019-06-18 19:35:01 +02:00
|
|
|
# No rewriting is required for wildcard mentions
|
|
|
|
continue
|
2019-05-23 13:58:10 +02:00
|
|
|
old_user_id = int(mention["data-user-id"])
|
|
|
|
if old_user_id in user_id_map:
|
|
|
|
mention["data-user-id"] = str(user_id_map[old_user_id])
|
2021-02-12 08:20:45 +01:00
|
|
|
message["rendered_content"] = str(soup)
|
2019-05-28 13:06:48 +02:00
|
|
|
|
|
|
|
stream_mentions = soup.findAll("a", {"class": "stream"})
|
|
|
|
if len(stream_mentions) != 0:
|
|
|
|
stream_id_map = ID_MAP["stream"]
|
|
|
|
for mention in stream_mentions:
|
|
|
|
old_stream_id = int(mention["data-stream-id"])
|
|
|
|
if old_stream_id in stream_id_map:
|
|
|
|
mention["data-stream-id"] = str(stream_id_map[old_stream_id])
|
2021-02-12 08:20:45 +01:00
|
|
|
message["rendered_content"] = str(soup)
|
2019-05-28 13:47:41 +02:00
|
|
|
|
|
|
|
user_group_mentions = soup.findAll("span", {"class": "user-group-mention"})
|
|
|
|
if len(user_group_mentions) != 0:
|
|
|
|
user_group_id_map = ID_MAP["usergroup"]
|
|
|
|
for mention in user_group_mentions:
|
|
|
|
old_user_group_id = int(mention["data-user-group-id"])
|
|
|
|
if old_user_group_id in user_group_id_map:
|
|
|
|
mention["data-user-group-id"] = str(user_group_id_map[old_user_group_id])
|
2021-02-12 08:20:45 +01:00
|
|
|
message["rendered_content"] = str(soup)
|
2018-08-10 00:15:50 +02:00
|
|
|
continue
|
|
|
|
|
2018-08-09 17:50:43 +02:00
|
|
|
try:
|
2021-02-12 08:20:45 +01:00
|
|
|
content = message["content"]
|
2018-11-02 14:05:52 +01:00
|
|
|
|
2021-02-12 08:20:45 +01:00
|
|
|
sender_id = message["sender_id"]
|
2018-11-02 14:05:52 +01:00
|
|
|
sender = sender_map[sender_id]
|
2021-02-12 08:20:45 +01:00
|
|
|
sent_by_bot = sender["is_bot"]
|
|
|
|
translate_emoticons = sender["translate_emoticons"]
|
2018-11-02 14:05:52 +01:00
|
|
|
|
|
|
|
# We don't handle alert words on import from third-party
|
|
|
|
# platforms, since they generally don't have an "alert
|
|
|
|
# words" type feature, and notifications aren't important anyway.
|
2019-02-11 15:19:38 +01:00
|
|
|
realm_alert_words_automaton = None
|
2018-11-02 14:05:52 +01:00
|
|
|
|
2020-06-27 22:47:06 +02:00
|
|
|
rendered_content = markdown_convert(
|
2018-11-02 11:27:30 +01:00
|
|
|
content=content,
|
2019-02-11 15:19:38 +01:00
|
|
|
realm_alert_words_automaton=realm_alert_words_automaton,
|
2020-06-23 00:37:25 +02:00
|
|
|
message_realm=realm,
|
2018-11-02 14:05:52 +01:00
|
|
|
sent_by_bot=sent_by_bot,
|
|
|
|
translate_emoticons=translate_emoticons,
|
2021-06-17 12:20:40 +02:00
|
|
|
).rendered_content
|
2018-11-02 14:15:11 +01:00
|
|
|
|
2021-02-12 08:20:45 +01:00
|
|
|
message["rendered_content"] = rendered_content
|
|
|
|
message["rendered_content_version"] = markdown_version
|
2018-08-09 17:50:43 +02:00
|
|
|
except Exception:
|
2018-11-02 11:27:30 +01:00
|
|
|
# This generally happens with two possible causes:
|
2020-08-11 01:47:49 +02:00
|
|
|
# * rendering Markdown throwing an uncaught exception
|
|
|
|
# * rendering Markdown failing with the exception being
|
|
|
|
# caught in Markdown (which then returns None, causing the the
|
2018-11-02 11:27:30 +01:00
|
|
|
# rendered_content assert above to fire).
|
2021-02-12 08:19:30 +01:00
|
|
|
logging.warning(
|
2021-02-12 08:20:45 +01:00
|
|
|
"Error in Markdown rendering for message ID %s; continuing", message["id"]
|
2021-02-12 08:19:30 +01:00
|
|
|
)
|
|
|
|
|
2018-08-09 17:50:43 +02:00
|
|
|
|
2018-04-23 23:28:27 +02:00
|
|
|
def current_table_ids(data: TableData, table: TableName) -> List[int]:
|
|
|
|
"""
|
|
|
|
Returns the ids present in the current table
|
|
|
|
"""
|
|
|
|
id_list = []
|
|
|
|
for item in data[table]:
|
|
|
|
id_list.append(item["id"])
|
|
|
|
return id_list
|
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2018-04-23 23:28:27 +02:00
|
|
|
def idseq(model_class: Any) -> str:
|
|
|
|
if model_class == RealmDomain:
|
2021-02-12 08:20:45 +01:00
|
|
|
return "zerver_realmalias_id_seq"
|
2018-07-23 20:02:42 +02:00
|
|
|
elif model_class == BotStorageData:
|
2021-02-12 08:20:45 +01:00
|
|
|
return "zerver_botuserstatedata_id_seq"
|
2018-07-23 20:02:42 +02:00
|
|
|
elif model_class == BotConfigData:
|
2021-02-12 08:20:45 +01:00
|
|
|
return "zerver_botuserconfigdata_id_seq"
|
|
|
|
return f"{model_class._meta.db_table}_id_seq"
|
2018-04-23 23:28:27 +02:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2018-04-23 23:28:27 +02:00
|
|
|
def allocate_ids(model_class: Any, count: int) -> List[int]:
|
|
|
|
"""
|
|
|
|
Increases the sequence number for a given table by the amount of objects being
|
2020-10-23 02:43:28 +02:00
|
|
|
imported into that table. Hence, this gives a reserved range of IDs to import the
|
|
|
|
converted Slack objects into the tables.
|
2018-04-23 23:28:27 +02:00
|
|
|
"""
|
|
|
|
conn = connection.cursor()
|
|
|
|
sequence = idseq(model_class)
|
2021-02-12 08:19:30 +01:00
|
|
|
conn.execute("select nextval(%s) from generate_series(1, %s)", [sequence, count])
|
2018-04-23 23:28:27 +02:00
|
|
|
query = conn.fetchall() # Each element in the result is a tuple like (5,)
|
|
|
|
conn.close()
|
|
|
|
# convert List[Tuple[int]] to List[int]
|
|
|
|
return [item[0] for item in query]
|
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2018-04-23 23:28:27 +02:00
|
|
|
def convert_to_id_fields(data: TableData, table: TableName, field_name: Field) -> None:
|
2021-02-12 08:19:30 +01:00
|
|
|
"""
|
2018-04-23 23:28:27 +02:00
|
|
|
When Django gives us dict objects via model_to_dict, the foreign
|
|
|
|
key fields are `foo`, but we want `foo_id` for the bulk insert.
|
|
|
|
This function handles the simple case where we simply rename
|
|
|
|
the fields. For cases where we need to munge ids in the
|
|
|
|
database, see re_map_foreign_keys.
|
2021-02-12 08:19:30 +01:00
|
|
|
"""
|
2018-04-23 23:28:27 +02:00
|
|
|
for item in data[table]:
|
|
|
|
item[field_name + "_id"] = item[field_name]
|
|
|
|
del item[field_name]
|
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
|
|
|
def re_map_foreign_keys(
|
|
|
|
data: TableData,
|
|
|
|
table: TableName,
|
|
|
|
field_name: Field,
|
|
|
|
related_table: TableName,
|
|
|
|
verbose: bool = False,
|
|
|
|
id_field: bool = False,
|
|
|
|
recipient_field: bool = False,
|
|
|
|
reaction_field: bool = False,
|
|
|
|
) -> None:
|
2018-04-23 23:28:27 +02:00
|
|
|
"""
|
|
|
|
This is a wrapper function for all the realm data tables
|
|
|
|
and only avatar and attachment records need to be passed through the internal function
|
|
|
|
because of the difference in data format (TableData corresponding to realm data tables
|
|
|
|
and List[Record] corresponding to the avatar and attachment records)
|
|
|
|
"""
|
2018-11-05 14:45:10 +01:00
|
|
|
|
|
|
|
# See comments in bulk_import_user_message_data.
|
2021-02-12 08:20:45 +01:00
|
|
|
assert "usermessage" not in related_table
|
2021-02-12 08:19:30 +01:00
|
|
|
|
|
|
|
re_map_foreign_keys_internal(
|
|
|
|
data[table],
|
|
|
|
table,
|
|
|
|
field_name,
|
|
|
|
related_table,
|
|
|
|
verbose,
|
|
|
|
id_field,
|
|
|
|
recipient_field,
|
|
|
|
reaction_field,
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
def re_map_foreign_keys_internal(
|
|
|
|
data_table: List[Record],
|
|
|
|
table: TableName,
|
|
|
|
field_name: Field,
|
|
|
|
related_table: TableName,
|
|
|
|
verbose: bool = False,
|
|
|
|
id_field: bool = False,
|
|
|
|
recipient_field: bool = False,
|
|
|
|
reaction_field: bool = False,
|
|
|
|
) -> None:
|
|
|
|
"""
|
2018-04-23 23:28:27 +02:00
|
|
|
We occasionally need to assign new ids to rows during the
|
|
|
|
import/export process, to accommodate things like existing rows
|
|
|
|
already being in tables. See bulk_import_client for more context.
|
|
|
|
|
|
|
|
The tricky part is making sure that foreign key references
|
|
|
|
are in sync with the new ids, and this fixer function does
|
|
|
|
the re-mapping. (It also appends `_id` to the field.)
|
2021-02-12 08:19:30 +01:00
|
|
|
"""
|
2018-10-17 20:15:52 +02:00
|
|
|
lookup_table = ID_MAP[related_table]
|
2018-04-23 23:28:27 +02:00
|
|
|
for item in data_table:
|
2018-05-25 18:54:22 +02:00
|
|
|
old_id = item[field_name]
|
2018-04-23 23:28:27 +02:00
|
|
|
if recipient_field:
|
2021-02-12 08:20:45 +01:00
|
|
|
if related_table == "stream" and item["type"] == 2:
|
2018-04-23 23:28:27 +02:00
|
|
|
pass
|
2021-02-12 08:20:45 +01:00
|
|
|
elif related_table == "user_profile" and item["type"] == 1:
|
2018-04-23 23:28:27 +02:00
|
|
|
pass
|
2021-02-12 08:20:45 +01:00
|
|
|
elif related_table == "huddle" and item["type"] == 3:
|
2018-05-25 18:54:22 +02:00
|
|
|
# save the recipient id with the huddle id, so that we can extract
|
|
|
|
# the user_profile ids involved in a huddle with the help of the
|
|
|
|
# subscription object
|
|
|
|
# check function 'get_huddles_from_subscription'
|
2021-02-12 08:20:45 +01:00
|
|
|
ID_MAP["recipient_to_huddle_map"][item["id"]] = lookup_table[old_id]
|
2018-04-23 23:28:27 +02:00
|
|
|
else:
|
|
|
|
continue
|
|
|
|
old_id = item[field_name]
|
2018-05-27 17:15:04 +02:00
|
|
|
if reaction_field:
|
2021-02-12 08:20:45 +01:00
|
|
|
if item["reaction_type"] == Reaction.REALM_EMOJI:
|
2018-05-27 17:15:04 +02:00
|
|
|
old_id = int(old_id)
|
|
|
|
else:
|
|
|
|
continue
|
2018-04-23 23:28:27 +02:00
|
|
|
if old_id in lookup_table:
|
|
|
|
new_id = lookup_table[old_id]
|
|
|
|
if verbose:
|
2021-02-12 08:19:30 +01:00
|
|
|
logging.info(
|
2021-02-12 08:20:45 +01:00
|
|
|
"Remapping %s %s from %s to %s", table, field_name + "_id", old_id, new_id
|
2021-02-12 08:19:30 +01:00
|
|
|
)
|
2018-04-23 23:28:27 +02:00
|
|
|
else:
|
|
|
|
new_id = old_id
|
|
|
|
if not id_field:
|
|
|
|
item[field_name + "_id"] = new_id
|
|
|
|
del item[field_name]
|
|
|
|
else:
|
2018-05-27 17:15:04 +02:00
|
|
|
if reaction_field:
|
|
|
|
item[field_name] = str(new_id)
|
|
|
|
else:
|
|
|
|
item[field_name] = new_id
|
2018-04-23 23:28:27 +02:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
|
|
|
def re_map_foreign_keys_many_to_many(
|
|
|
|
data: TableData,
|
|
|
|
table: TableName,
|
|
|
|
field_name: Field,
|
|
|
|
related_table: TableName,
|
|
|
|
verbose: bool = False,
|
|
|
|
) -> None:
|
2018-07-16 16:38:29 +02:00
|
|
|
"""
|
|
|
|
We need to assign new ids to rows during the import/export
|
|
|
|
process.
|
|
|
|
|
|
|
|
The tricky part is making sure that foreign key references
|
|
|
|
are in sync with the new ids, and this wrapper function does
|
|
|
|
the re-mapping only for ManyToMany fields.
|
|
|
|
"""
|
|
|
|
for item in data[table]:
|
2018-07-23 18:38:46 +02:00
|
|
|
old_id_list = item[field_name]
|
2018-07-16 16:38:29 +02:00
|
|
|
new_id_list = re_map_foreign_keys_many_to_many_internal(
|
2021-02-12 08:19:30 +01:00
|
|
|
table, field_name, related_table, old_id_list, verbose
|
|
|
|
)
|
2018-07-16 16:38:29 +02:00
|
|
|
item[field_name] = new_id_list
|
|
|
|
del item[field_name]
|
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
|
|
|
def re_map_foreign_keys_many_to_many_internal(
|
|
|
|
table: TableName,
|
|
|
|
field_name: Field,
|
|
|
|
related_table: TableName,
|
|
|
|
old_id_list: List[int],
|
|
|
|
verbose: bool = False,
|
|
|
|
) -> List[int]:
|
2018-07-16 16:38:29 +02:00
|
|
|
"""
|
|
|
|
This is an internal function for tables with ManyToMany fields,
|
|
|
|
which takes the old ID list of the ManyToMany relation and returns the
|
|
|
|
new updated ID list.
|
|
|
|
"""
|
2018-10-17 20:15:52 +02:00
|
|
|
lookup_table = ID_MAP[related_table]
|
2018-07-16 16:38:29 +02:00
|
|
|
new_id_list = []
|
|
|
|
for old_id in old_id_list:
|
|
|
|
if old_id in lookup_table:
|
|
|
|
new_id = lookup_table[old_id]
|
|
|
|
if verbose:
|
2021-02-12 08:19:30 +01:00
|
|
|
logging.info(
|
2021-02-12 08:20:45 +01:00
|
|
|
"Remapping %s %s from %s to %s", table, field_name + "_id", old_id, new_id
|
2021-02-12 08:19:30 +01:00
|
|
|
)
|
2018-07-16 16:38:29 +02:00
|
|
|
else:
|
|
|
|
new_id = old_id
|
|
|
|
new_id_list.append(new_id)
|
|
|
|
return new_id_list
|
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2018-04-23 23:28:27 +02:00
|
|
|
def fix_bitfield_keys(data: TableData, table: TableName, field_name: Field) -> None:
|
|
|
|
for item in data[table]:
|
2021-02-12 08:20:45 +01:00
|
|
|
item[field_name] = item[field_name + "_mask"]
|
|
|
|
del item[field_name + "_mask"]
|
2018-04-23 23:28:27 +02:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2018-04-23 23:28:27 +02:00
|
|
|
def fix_realm_authentication_bitfield(data: TableData, table: TableName, field_name: Field) -> None:
|
|
|
|
"""Used to fixup the authentication_methods bitfield to be a string"""
|
|
|
|
for item in data[table]:
|
2021-02-12 08:20:45 +01:00
|
|
|
values_as_bitstring = "".join("1" if field[1] else "0" for field in item[field_name])
|
2018-04-23 23:28:27 +02:00
|
|
|
values_as_int = int(values_as_bitstring, 2)
|
|
|
|
item[field_name] = values_as_int
|
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2019-11-28 16:56:04 +01:00
|
|
|
def remove_denormalized_recipient_column_from_data(data: TableData) -> None:
|
|
|
|
"""
|
|
|
|
The recipient column shouldn't be imported, we'll set the correct values
|
|
|
|
when Recipient table gets imported.
|
|
|
|
"""
|
2021-02-12 08:20:45 +01:00
|
|
|
for stream_dict in data["zerver_stream"]:
|
2019-11-28 16:56:04 +01:00
|
|
|
if "recipient" in stream_dict:
|
|
|
|
del stream_dict["recipient"]
|
|
|
|
|
2021-02-12 08:20:45 +01:00
|
|
|
for user_profile_dict in data["zerver_userprofile"]:
|
|
|
|
if "recipient" in user_profile_dict:
|
|
|
|
del user_profile_dict["recipient"]
|
2019-11-28 16:56:04 +01:00
|
|
|
|
2021-02-12 08:20:45 +01:00
|
|
|
for huddle_dict in data["zerver_huddle"]:
|
|
|
|
if "recipient" in huddle_dict:
|
|
|
|
del huddle_dict["recipient"]
|
2020-03-15 19:05:27 +01:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2018-07-26 22:45:12 +02:00
|
|
|
def get_db_table(model_class: Any) -> str:
|
|
|
|
"""E.g. (RealmDomain -> 'zerver_realmdomain')"""
|
|
|
|
return model_class._meta.db_table
|
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2018-07-26 22:45:12 +02:00
|
|
|
def update_model_ids(model: Any, data: TableData, related_table: TableName) -> None:
|
|
|
|
table = get_db_table(model)
|
2018-11-05 14:45:10 +01:00
|
|
|
|
|
|
|
# Important: remapping usermessage rows is
|
|
|
|
# not only unnessary, it's expensive and can cause
|
|
|
|
# memory errors. We don't even use ids from ID_MAP.
|
2021-02-12 08:20:45 +01:00
|
|
|
assert "usermessage" not in table
|
2018-11-05 14:45:10 +01:00
|
|
|
|
2018-04-23 23:28:27 +02:00
|
|
|
old_id_list = current_table_ids(data, table)
|
|
|
|
allocated_id_list = allocate_ids(model, len(data[table]))
|
|
|
|
for item in range(len(data[table])):
|
|
|
|
update_id_map(related_table, old_id_list[item], allocated_id_list[item])
|
2021-02-12 08:20:45 +01:00
|
|
|
re_map_foreign_keys(data, table, "id", related_table=related_table, id_field=True)
|
2018-04-23 23:28:27 +02:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2018-10-12 23:42:17 +02:00
|
|
|
def bulk_import_user_message_data(data: TableData, dump_file_id: int) -> None:
|
|
|
|
model = UserMessage
|
2021-02-12 08:20:45 +01:00
|
|
|
table = "zerver_usermessage"
|
2018-10-12 23:42:17 +02:00
|
|
|
lst = data[table]
|
|
|
|
|
2018-11-05 14:45:10 +01:00
|
|
|
# IMPORTANT NOTE: We do not use any primary id
|
|
|
|
# data from either the import itself or ID_MAP.
|
|
|
|
# We let the DB itself generate ids. Note that
|
|
|
|
# no tables use user_message.id as a foreign key,
|
|
|
|
# so we can safely avoid all re-mapping complexity.
|
|
|
|
|
2018-10-12 23:42:17 +02:00
|
|
|
def process_batch(items: List[Dict[str, Any]]) -> None:
|
|
|
|
ums = [
|
|
|
|
UserMessageLite(
|
2021-02-12 08:20:45 +01:00
|
|
|
user_profile_id=item["user_profile_id"],
|
|
|
|
message_id=item["message_id"],
|
|
|
|
flags=item["flags"],
|
2018-10-12 23:42:17 +02:00
|
|
|
)
|
|
|
|
for item in items
|
|
|
|
]
|
|
|
|
bulk_insert_ums(ums)
|
|
|
|
|
|
|
|
chunk_size = 10000
|
|
|
|
|
2018-10-15 14:24:13 +02:00
|
|
|
process_list_in_batches(
|
|
|
|
lst=lst,
|
|
|
|
chunk_size=chunk_size,
|
|
|
|
process_batch=process_batch,
|
|
|
|
)
|
2018-10-12 23:42:17 +02:00
|
|
|
|
2020-05-02 08:44:14 +02:00
|
|
|
logging.info("Successfully imported %s from %s[%s].", model, table, dump_file_id)
|
2018-10-12 23:42:17 +02:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
|
|
|
def bulk_import_model(data: TableData, model: Any, dump_file_id: Optional[str] = None) -> None:
|
2018-07-26 22:45:12 +02:00
|
|
|
table = get_db_table(model)
|
2018-04-23 23:28:27 +02:00
|
|
|
# TODO, deprecate dump_file_id
|
|
|
|
model.objects.bulk_create(model(**item) for item in data[table])
|
|
|
|
if dump_file_id is None:
|
2020-05-02 08:44:14 +02:00
|
|
|
logging.info("Successfully imported %s from %s.", model, table)
|
2018-04-23 23:28:27 +02:00
|
|
|
else:
|
2020-05-02 08:44:14 +02:00
|
|
|
logging.info("Successfully imported %s from %s[%s].", model, table, dump_file_id)
|
2018-04-23 23:28:27 +02:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2018-04-23 23:28:27 +02:00
|
|
|
# Client is a table shared by multiple realms, so in order to
|
|
|
|
# correctly import multiple realms into the same server, we need to
|
|
|
|
# check if a Client object already exists, and so we need to support
|
|
|
|
# remap all Client IDs to the values in the new DB.
|
|
|
|
def bulk_import_client(data: TableData, model: Any, table: TableName) -> None:
|
|
|
|
for item in data[table]:
|
|
|
|
try:
|
2021-02-12 08:20:45 +01:00
|
|
|
client = Client.objects.get(name=item["name"])
|
2018-04-23 23:28:27 +02:00
|
|
|
except Client.DoesNotExist:
|
2021-02-12 08:20:45 +01:00
|
|
|
client = Client.objects.create(name=item["name"])
|
|
|
|
update_id_map(table="client", old_id=item["id"], new_id=client.id)
|
2018-04-23 23:28:27 +02:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2021-02-14 00:03:40 +01:00
|
|
|
def fix_subscriptions_is_user_active_column(
|
|
|
|
data: TableData, user_profiles: List[UserProfile]
|
|
|
|
) -> None:
|
|
|
|
table = get_db_table(Subscription)
|
|
|
|
user_id_to_active_status = {user.id: user.is_active for user in user_profiles}
|
|
|
|
for sub in data[table]:
|
|
|
|
sub["is_user_active"] = user_id_to_active_status[sub["user_profile_id"]]
|
|
|
|
|
|
|
|
|
2019-07-28 01:08:18 +02:00
|
|
|
def process_avatars(record: Dict[str, Any]) -> None:
|
|
|
|
from zerver.lib.upload import upload_backend
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2021-02-12 08:20:45 +01:00
|
|
|
if record["s3_path"].endswith(".original"):
|
|
|
|
user_profile = get_user_profile_by_id(record["user_profile_id"])
|
2019-07-28 01:08:18 +02:00
|
|
|
if settings.LOCAL_UPLOADS_DIR is not None:
|
2021-02-12 08:20:45 +01:00
|
|
|
avatar_path = user_avatar_path_from_ids(user_profile.id, record["realm_id"])
|
2021-02-12 08:19:30 +01:00
|
|
|
medium_file_path = (
|
2021-02-12 08:20:45 +01:00
|
|
|
os.path.join(settings.LOCAL_UPLOADS_DIR, "avatars", avatar_path) + "-medium.png"
|
2021-02-12 08:19:30 +01:00
|
|
|
)
|
2019-07-28 01:08:18 +02:00
|
|
|
if os.path.exists(medium_file_path):
|
|
|
|
# We remove the image here primarily to deal with
|
|
|
|
# issues when running the import script multiple
|
|
|
|
# times in development (where one might reuse the
|
|
|
|
# same realm ID from a previous iteration).
|
|
|
|
os.remove(medium_file_path)
|
|
|
|
try:
|
2021-03-17 17:54:23 +01:00
|
|
|
upload_backend.ensure_avatar_image(user_profile=user_profile, is_medium=True)
|
2019-07-28 01:08:18 +02:00
|
|
|
if record.get("importer_should_thumbnail"):
|
2021-03-17 17:54:23 +01:00
|
|
|
upload_backend.ensure_avatar_image(user_profile=user_profile)
|
2019-07-28 01:08:18 +02:00
|
|
|
except BadImageError:
|
|
|
|
logging.warning(
|
|
|
|
"Could not thumbnail avatar image for user %s; ignoring",
|
|
|
|
user_profile.id,
|
|
|
|
)
|
|
|
|
# Delete the record of the avatar to avoid 404s.
|
2021-02-12 08:19:30 +01:00
|
|
|
do_change_avatar_fields(
|
|
|
|
user_profile, UserProfile.AVATAR_FROM_GRAVATAR, acting_user=None
|
|
|
|
)
|
2019-07-28 01:08:18 +02:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
|
|
|
def import_uploads(
|
|
|
|
realm: Realm,
|
|
|
|
import_dir: Path,
|
|
|
|
processes: int,
|
|
|
|
processing_avatars: bool = False,
|
|
|
|
processing_emojis: bool = False,
|
|
|
|
processing_realm_icons: bool = False,
|
|
|
|
) -> None:
|
2018-12-06 01:00:29 +01:00
|
|
|
if processing_avatars and processing_emojis:
|
|
|
|
raise AssertionError("Cannot import avatars and emojis at the same time!")
|
|
|
|
if processing_avatars:
|
|
|
|
logging.info("Importing avatars")
|
|
|
|
elif processing_emojis:
|
|
|
|
logging.info("Importing emojis")
|
2019-07-19 19:15:23 +02:00
|
|
|
elif processing_realm_icons:
|
|
|
|
logging.info("Importing realm icons and logos")
|
2018-12-06 01:00:29 +01:00
|
|
|
else:
|
|
|
|
logging.info("Importing uploaded files")
|
|
|
|
|
|
|
|
records_filename = os.path.join(import_dir, "records.json")
|
2020-08-07 01:09:47 +02:00
|
|
|
with open(records_filename, "rb") as records_file:
|
|
|
|
records: List[Dict[str, Any]] = orjson.loads(records_file.read())
|
2018-12-06 00:48:27 +01:00
|
|
|
timestamp = datetime_to_timestamp(timezone_now())
|
2018-12-06 00:24:42 +01:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
re_map_foreign_keys_internal(
|
2021-02-12 08:20:45 +01:00
|
|
|
records, "records", "realm_id", related_table="realm", id_field=True
|
2021-02-12 08:19:30 +01:00
|
|
|
)
|
2019-07-19 19:15:23 +02:00
|
|
|
if not processing_emojis and not processing_realm_icons:
|
2021-02-12 08:19:30 +01:00
|
|
|
re_map_foreign_keys_internal(
|
2021-02-12 08:20:45 +01:00
|
|
|
records, "records", "user_profile_id", related_table="user_profile", id_field=True
|
2021-02-12 08:19:30 +01:00
|
|
|
)
|
2018-12-06 01:00:29 +01:00
|
|
|
|
|
|
|
s3_uploads = settings.LOCAL_UPLOADS_DIR is None
|
2018-12-06 00:38:34 +01:00
|
|
|
|
2018-12-06 01:00:29 +01:00
|
|
|
if s3_uploads:
|
2019-07-19 19:15:23 +02:00
|
|
|
if processing_avatars or processing_emojis or processing_realm_icons:
|
2018-12-06 01:00:29 +01:00
|
|
|
bucket_name = settings.S3_AVATAR_BUCKET
|
2018-12-06 00:38:34 +01:00
|
|
|
else:
|
2018-12-06 01:00:29 +01:00
|
|
|
bucket_name = settings.S3_AUTH_UPLOADS_BUCKET
|
2020-10-26 22:10:53 +01:00
|
|
|
bucket = get_bucket(bucket_name)
|
2018-12-06 00:32:49 +01:00
|
|
|
|
2018-12-06 00:24:42 +01:00
|
|
|
count = 0
|
2018-04-23 23:28:27 +02:00
|
|
|
for record in records:
|
2018-12-06 00:24:42 +01:00
|
|
|
count += 1
|
|
|
|
if count % 1000 == 0:
|
2020-05-02 08:44:14 +02:00
|
|
|
logging.info("Processed %s/%s uploads", count, len(records))
|
2018-12-06 00:24:42 +01:00
|
|
|
|
2018-04-23 23:28:27 +02:00
|
|
|
if processing_avatars:
|
2018-12-06 00:38:34 +01:00
|
|
|
# For avatars, we need to rehash the user ID with the
|
2018-04-23 23:28:27 +02:00
|
|
|
# new server's avatar salt
|
2021-02-12 08:20:45 +01:00
|
|
|
relative_path = user_avatar_path_from_ids(record["user_profile_id"], record["realm_id"])
|
|
|
|
if record["s3_path"].endswith(".original"):
|
|
|
|
relative_path += ".original"
|
2018-12-06 00:49:57 +01:00
|
|
|
else:
|
2019-07-22 06:24:39 +02:00
|
|
|
# TODO: This really should be unconditional. However,
|
|
|
|
# until we fix the S3 upload backend to use the .png
|
|
|
|
# path suffix for its normal avatar URLs, we need to
|
|
|
|
# only do this for the LOCAL_UPLOADS_DIR backend.
|
|
|
|
if not s3_uploads:
|
2021-02-12 08:20:45 +01:00
|
|
|
relative_path += ".png"
|
2018-06-18 18:58:44 +02:00
|
|
|
elif processing_emojis:
|
2018-04-23 23:28:27 +02:00
|
|
|
# For emojis we follow the function 'upload_emoji_image'
|
2018-12-06 00:38:34 +01:00
|
|
|
relative_path = RealmEmoji.PATH_ID_TEMPLATE.format(
|
2021-02-12 08:20:45 +01:00
|
|
|
realm_id=record["realm_id"], emoji_file_name=record["file_name"]
|
2021-02-12 08:19:30 +01:00
|
|
|
)
|
2021-02-12 08:20:45 +01:00
|
|
|
record["last_modified"] = timestamp
|
2019-07-19 19:15:23 +02:00
|
|
|
elif processing_realm_icons:
|
|
|
|
icon_name = os.path.basename(record["path"])
|
2021-02-12 08:20:45 +01:00
|
|
|
relative_path = os.path.join(str(record["realm_id"]), "realm", icon_name)
|
|
|
|
record["last_modified"] = timestamp
|
2018-04-23 23:28:27 +02:00
|
|
|
else:
|
|
|
|
# Should be kept in sync with its equivalent in zerver/lib/uploads in the
|
2021-08-01 17:07:22 +02:00
|
|
|
# function 'upload_message_file'.
|
|
|
|
# This relative_path is basically the new location of the file,
|
|
|
|
# which will later be copied from its original location as
|
|
|
|
# specified in record["s3_path"].
|
2021-02-12 08:19:30 +01:00
|
|
|
relative_path = "/".join(
|
|
|
|
[
|
2021-02-12 08:20:45 +01:00
|
|
|
str(record["realm_id"]),
|
2021-02-12 08:19:30 +01:00
|
|
|
secrets.token_urlsafe(18),
|
2021-02-12 08:20:45 +01:00
|
|
|
sanitize_name(os.path.basename(record["path"])),
|
2021-02-12 08:19:30 +01:00
|
|
|
]
|
|
|
|
)
|
2021-02-12 08:20:45 +01:00
|
|
|
path_maps["attachment_path"][record["s3_path"]] = relative_path
|
2018-04-23 23:28:27 +02:00
|
|
|
|
2018-12-06 01:00:29 +01:00
|
|
|
if s3_uploads:
|
2018-12-07 17:52:01 +01:00
|
|
|
key = bucket.Object(relative_path)
|
|
|
|
metadata = {}
|
2020-01-11 19:51:31 +01:00
|
|
|
if processing_emojis and "user_profile_id" not in record:
|
2019-07-19 19:15:23 +02:00
|
|
|
# Exported custom emoji from tools like Slack don't have
|
|
|
|
# the data for what user uploaded them in `user_profile_id`.
|
|
|
|
pass
|
|
|
|
elif processing_realm_icons and "user_profile_id" not in record:
|
|
|
|
# Exported realm icons and logos from local export don't have
|
|
|
|
# the value of user_profile_id in the associated record.
|
|
|
|
pass
|
|
|
|
else:
|
2021-02-12 08:20:45 +01:00
|
|
|
user_profile_id = int(record["user_profile_id"])
|
2018-12-06 01:00:29 +01:00
|
|
|
# Support email gateway bot and other cross-realm messages
|
|
|
|
if user_profile_id in ID_MAP["user_profile"]:
|
2020-05-02 08:44:14 +02:00
|
|
|
logging.info("Uploaded by ID mapped user: %s!", user_profile_id)
|
2018-12-06 01:00:29 +01:00
|
|
|
user_profile_id = ID_MAP["user_profile"][user_profile_id]
|
|
|
|
user_profile = get_user_profile_by_id(user_profile_id)
|
2018-12-07 17:52:01 +01:00
|
|
|
metadata["user_profile_id"] = str(user_profile.id)
|
2018-12-06 01:00:29 +01:00
|
|
|
|
2021-02-12 08:20:45 +01:00
|
|
|
if "last_modified" in record:
|
|
|
|
metadata["orig_last_modified"] = str(record["last_modified"])
|
|
|
|
metadata["realm_id"] = str(record["realm_id"])
|
2018-12-06 01:00:29 +01:00
|
|
|
|
|
|
|
# Zulip exports will always have a content-type, but third-party exports might not.
|
|
|
|
content_type = record.get("content_type")
|
|
|
|
if content_type is None:
|
2021-02-12 08:20:45 +01:00
|
|
|
content_type = guess_type(record["s3_path"])[0]
|
2018-12-30 07:13:11 +01:00
|
|
|
if content_type is None:
|
|
|
|
# This is the default for unknown data. Note that
|
|
|
|
# for `.original` files, this is the value we'll
|
|
|
|
# set; that is OK, because those are never served
|
|
|
|
# directly anyway.
|
2021-02-12 08:20:45 +01:00
|
|
|
content_type = "application/octet-stream"
|
2018-12-06 01:00:29 +01:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
key.upload_file(
|
2021-08-10 02:11:16 +02:00
|
|
|
Filename=os.path.join(import_dir, record["path"]),
|
2021-02-12 08:20:45 +01:00
|
|
|
ExtraArgs={"ContentType": content_type, "Metadata": metadata},
|
2021-02-12 08:19:30 +01:00
|
|
|
)
|
2018-04-23 23:28:27 +02:00
|
|
|
else:
|
2019-07-19 19:15:23 +02:00
|
|
|
if processing_avatars or processing_emojis or processing_realm_icons:
|
2018-12-06 01:00:29 +01:00
|
|
|
file_path = os.path.join(settings.LOCAL_UPLOADS_DIR, "avatars", relative_path)
|
|
|
|
else:
|
|
|
|
file_path = os.path.join(settings.LOCAL_UPLOADS_DIR, "files", relative_path)
|
2021-02-12 08:20:45 +01:00
|
|
|
orig_file_path = os.path.join(import_dir, record["path"])
|
2018-12-06 01:00:29 +01:00
|
|
|
os.makedirs(os.path.dirname(file_path), exist_ok=True)
|
|
|
|
shutil.copy(orig_file_path, file_path)
|
2018-04-23 23:28:27 +02:00
|
|
|
|
2018-12-06 00:35:16 +01:00
|
|
|
if processing_avatars:
|
|
|
|
# Ensure that we have medium-size avatar images for every
|
|
|
|
# avatar. TODO: This implementation is hacky, both in that it
|
|
|
|
# does get_user_profile_by_id for each user, and in that it
|
|
|
|
# might be better to require the export to just have these.
|
2019-01-25 20:40:49 +01:00
|
|
|
|
|
|
|
if processes == 1:
|
|
|
|
for record in records:
|
|
|
|
process_avatars(record)
|
|
|
|
else:
|
|
|
|
connection.close()
|
2020-10-01 00:20:02 +02:00
|
|
|
cache._cache.disconnect_all()
|
2019-07-28 01:08:18 +02:00
|
|
|
with multiprocessing.Pool(processes) as p:
|
|
|
|
for out in p.imap_unordered(process_avatars, records):
|
|
|
|
pass
|
2018-12-06 00:35:16 +01:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2018-04-23 23:28:27 +02:00
|
|
|
# Importing data suffers from a difficult ordering problem because of
|
|
|
|
# models that reference each other circularly. Here is a correct order.
|
|
|
|
#
|
|
|
|
# * Client [no deps]
|
|
|
|
# * Realm [-notifications_stream]
|
|
|
|
# * Stream [only depends on realm]
|
|
|
|
# * Realm's notifications_stream
|
|
|
|
# * Now can do all realm_tables
|
|
|
|
# * UserProfile, in order by ID to avoid bot loop issues
|
|
|
|
# * Huddle
|
|
|
|
# * Recipient
|
|
|
|
# * Subscription
|
|
|
|
# * Message
|
|
|
|
# * UserMessage
|
|
|
|
#
|
|
|
|
# Because the Python object => JSON conversion process is not fully
|
|
|
|
# faithful, we have to use a set of fixers (e.g. on DateTime objects
|
2021-05-10 07:02:14 +02:00
|
|
|
# and foreign keys) to do the import correctly.
|
2021-02-12 08:19:30 +01:00
|
|
|
def do_import_realm(import_dir: Path, subdomain: str, processes: int = 1) -> Realm:
|
2020-05-02 08:44:14 +02:00
|
|
|
logging.info("Importing realm dump %s", import_dir)
|
2018-04-23 23:28:27 +02:00
|
|
|
if not os.path.exists(import_dir):
|
|
|
|
raise Exception("Missing import directory!")
|
|
|
|
|
|
|
|
realm_data_filename = os.path.join(import_dir, "realm.json")
|
|
|
|
if not os.path.exists(realm_data_filename):
|
|
|
|
raise Exception("Missing realm.json file!")
|
|
|
|
|
2020-04-02 21:42:08 +02:00
|
|
|
if not server_initialized():
|
|
|
|
create_internal_realm()
|
|
|
|
|
2020-05-02 08:44:14 +02:00
|
|
|
logging.info("Importing realm data from %s", realm_data_filename)
|
2020-08-07 01:09:47 +02:00
|
|
|
with open(realm_data_filename, "rb") as f:
|
|
|
|
data = orjson.loads(f.read())
|
2019-11-28 16:56:04 +01:00
|
|
|
remove_denormalized_recipient_column_from_data(data)
|
2018-04-23 23:28:27 +02:00
|
|
|
|
2021-02-12 08:20:45 +01:00
|
|
|
sort_by_date = data.get("sort_by_date", False)
|
2018-10-16 12:34:47 +02:00
|
|
|
|
2021-02-12 08:20:45 +01:00
|
|
|
bulk_import_client(data, Client, "zerver_client")
|
2018-09-21 19:58:24 +02:00
|
|
|
|
2018-07-27 01:13:14 +02:00
|
|
|
# We don't import the Stream model yet, since it depends on Realm,
|
|
|
|
# which isn't imported yet. But we need the Stream model IDs for
|
|
|
|
# notifications_stream.
|
2021-02-12 08:20:45 +01:00
|
|
|
update_model_ids(Stream, data, "stream")
|
|
|
|
re_map_foreign_keys(data, "zerver_realm", "notifications_stream", related_table="stream")
|
|
|
|
re_map_foreign_keys(data, "zerver_realm", "signup_notifications_stream", related_table="stream")
|
2018-04-23 23:28:27 +02:00
|
|
|
|
2021-02-12 08:20:45 +01:00
|
|
|
fix_datetime_fields(data, "zerver_realm")
|
2018-04-23 23:28:27 +02:00
|
|
|
# Fix realm subdomain information
|
2021-02-12 08:20:45 +01:00
|
|
|
data["zerver_realm"][0]["string_id"] = subdomain
|
|
|
|
data["zerver_realm"][0]["name"] = subdomain
|
|
|
|
fix_realm_authentication_bitfield(data, "zerver_realm", "authentication_methods")
|
|
|
|
update_model_ids(Realm, data, "realm")
|
2018-04-23 23:28:27 +02:00
|
|
|
|
2021-02-12 08:20:45 +01:00
|
|
|
realm = Realm(**data["zerver_realm"][0])
|
2018-09-21 05:39:35 +02:00
|
|
|
|
2018-04-23 23:28:27 +02:00
|
|
|
if realm.notifications_stream_id is not None:
|
python: Convert assignment type annotations to Python 3.6 style.
This commit was split by tabbott; this piece covers the vast majority
of files in Zulip, but excludes scripts/, tools/, and puppet/ to help
ensure we at least show the right error messages for Xenial systems.
We can likely further refine the remaining pieces with some testing.
Generated by com2ann, with whitespace fixes and various manual fixes
for runtime issues:
- invoiced_through: Optional[LicenseLedger] = models.ForeignKey(
+ invoiced_through: Optional["LicenseLedger"] = models.ForeignKey(
-_apns_client: Optional[APNsClient] = None
+_apns_client: Optional["APNsClient"] = None
- notifications_stream: Optional[Stream] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
- signup_notifications_stream: Optional[Stream] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
+ notifications_stream: Optional["Stream"] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
+ signup_notifications_stream: Optional["Stream"] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
- author: Optional[UserProfile] = models.ForeignKey('UserProfile', blank=True, null=True, on_delete=CASCADE)
+ author: Optional["UserProfile"] = models.ForeignKey('UserProfile', blank=True, null=True, on_delete=CASCADE)
- bot_owner: Optional[UserProfile] = models.ForeignKey('self', null=True, on_delete=models.SET_NULL)
+ bot_owner: Optional["UserProfile"] = models.ForeignKey('self', null=True, on_delete=models.SET_NULL)
- default_sending_stream: Optional[Stream] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
- default_events_register_stream: Optional[Stream] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
+ default_sending_stream: Optional["Stream"] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
+ default_events_register_stream: Optional["Stream"] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
-descriptors_by_handler_id: Dict[int, ClientDescriptor] = {}
+descriptors_by_handler_id: Dict[int, "ClientDescriptor"] = {}
-worker_classes: Dict[str, Type[QueueProcessingWorker]] = {}
-queues: Dict[str, Dict[str, Type[QueueProcessingWorker]]] = {}
+worker_classes: Dict[str, Type["QueueProcessingWorker"]] = {}
+queues: Dict[str, Dict[str, Type["QueueProcessingWorker"]]] = {}
-AUTH_LDAP_REVERSE_EMAIL_SEARCH: Optional[LDAPSearch] = None
+AUTH_LDAP_REVERSE_EMAIL_SEARCH: Optional["LDAPSearch"] = None
Signed-off-by: Anders Kaseorg <anders@zulipchat.com>
2020-04-22 01:09:50 +02:00
|
|
|
notifications_stream_id: Optional[int] = int(realm.notifications_stream_id)
|
2018-04-23 23:28:27 +02:00
|
|
|
else:
|
|
|
|
notifications_stream_id = None
|
|
|
|
realm.notifications_stream_id = None
|
2018-09-21 02:40:36 +02:00
|
|
|
if realm.signup_notifications_stream_id is not None:
|
python: Convert assignment type annotations to Python 3.6 style.
This commit was split by tabbott; this piece covers the vast majority
of files in Zulip, but excludes scripts/, tools/, and puppet/ to help
ensure we at least show the right error messages for Xenial systems.
We can likely further refine the remaining pieces with some testing.
Generated by com2ann, with whitespace fixes and various manual fixes
for runtime issues:
- invoiced_through: Optional[LicenseLedger] = models.ForeignKey(
+ invoiced_through: Optional["LicenseLedger"] = models.ForeignKey(
-_apns_client: Optional[APNsClient] = None
+_apns_client: Optional["APNsClient"] = None
- notifications_stream: Optional[Stream] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
- signup_notifications_stream: Optional[Stream] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
+ notifications_stream: Optional["Stream"] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
+ signup_notifications_stream: Optional["Stream"] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
- author: Optional[UserProfile] = models.ForeignKey('UserProfile', blank=True, null=True, on_delete=CASCADE)
+ author: Optional["UserProfile"] = models.ForeignKey('UserProfile', blank=True, null=True, on_delete=CASCADE)
- bot_owner: Optional[UserProfile] = models.ForeignKey('self', null=True, on_delete=models.SET_NULL)
+ bot_owner: Optional["UserProfile"] = models.ForeignKey('self', null=True, on_delete=models.SET_NULL)
- default_sending_stream: Optional[Stream] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
- default_events_register_stream: Optional[Stream] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
+ default_sending_stream: Optional["Stream"] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
+ default_events_register_stream: Optional["Stream"] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
-descriptors_by_handler_id: Dict[int, ClientDescriptor] = {}
+descriptors_by_handler_id: Dict[int, "ClientDescriptor"] = {}
-worker_classes: Dict[str, Type[QueueProcessingWorker]] = {}
-queues: Dict[str, Dict[str, Type[QueueProcessingWorker]]] = {}
+worker_classes: Dict[str, Type["QueueProcessingWorker"]] = {}
+queues: Dict[str, Dict[str, Type["QueueProcessingWorker"]]] = {}
-AUTH_LDAP_REVERSE_EMAIL_SEARCH: Optional[LDAPSearch] = None
+AUTH_LDAP_REVERSE_EMAIL_SEARCH: Optional["LDAPSearch"] = None
Signed-off-by: Anders Kaseorg <anders@zulipchat.com>
2020-04-22 01:09:50 +02:00
|
|
|
signup_notifications_stream_id: Optional[int] = int(realm.signup_notifications_stream_id)
|
2018-09-21 02:40:36 +02:00
|
|
|
else:
|
|
|
|
signup_notifications_stream_id = None
|
|
|
|
realm.signup_notifications_stream_id = None
|
2018-04-23 23:28:27 +02:00
|
|
|
realm.save()
|
|
|
|
|
|
|
|
# Email tokens will automatically be randomly generated when the
|
|
|
|
# Stream objects are created by Django.
|
2021-02-12 08:20:45 +01:00
|
|
|
fix_datetime_fields(data, "zerver_stream")
|
|
|
|
re_map_foreign_keys(data, "zerver_stream", "realm", related_table="realm")
|
2019-01-11 13:48:22 +01:00
|
|
|
# Handle rendering of stream descriptions for import from non-Zulip
|
2021-02-12 08:20:45 +01:00
|
|
|
for stream in data["zerver_stream"]:
|
2019-03-01 09:10:40 +01:00
|
|
|
stream["rendered_description"] = render_stream_description(stream["description"])
|
2018-07-26 22:45:12 +02:00
|
|
|
bulk_import_model(data, Stream)
|
2018-04-23 23:28:27 +02:00
|
|
|
|
|
|
|
realm.notifications_stream_id = notifications_stream_id
|
2018-09-21 02:40:36 +02:00
|
|
|
realm.signup_notifications_stream_id = signup_notifications_stream_id
|
2018-04-23 23:28:27 +02:00
|
|
|
realm.save()
|
|
|
|
|
|
|
|
# Remap the user IDs for notification_bot and friends to their
|
|
|
|
# appropriate IDs on this server
|
2021-07-26 17:17:10 +02:00
|
|
|
internal_realm = get_realm(settings.SYSTEM_BOT_REALM)
|
2021-02-12 08:20:45 +01:00
|
|
|
for item in data["zerver_userprofile_crossrealm"]:
|
2021-07-26 17:17:10 +02:00
|
|
|
logging.info(
|
|
|
|
"Adding to ID map: %s %s",
|
|
|
|
item["id"],
|
|
|
|
get_system_bot(item["email"], internal_realm.id).id,
|
|
|
|
)
|
|
|
|
new_user_id = get_system_bot(item["email"], internal_realm.id).id
|
2021-02-12 08:20:45 +01:00
|
|
|
update_id_map(table="user_profile", old_id=item["id"], new_id=new_user_id)
|
2018-09-21 02:55:17 +02:00
|
|
|
new_recipient_id = Recipient.objects.get(type=Recipient.PERSONAL, type_id=new_user_id).id
|
2021-02-12 08:20:45 +01:00
|
|
|
update_id_map(table="recipient", old_id=item["recipient_id"], new_id=new_recipient_id)
|
2018-04-23 23:28:27 +02:00
|
|
|
|
|
|
|
# Merge in zerver_userprofile_mirrordummy
|
2021-02-12 08:20:45 +01:00
|
|
|
data["zerver_userprofile"] = data["zerver_userprofile"] + data["zerver_userprofile_mirrordummy"]
|
|
|
|
del data["zerver_userprofile_mirrordummy"]
|
|
|
|
data["zerver_userprofile"].sort(key=lambda r: r["id"])
|
2018-04-23 23:28:27 +02:00
|
|
|
|
|
|
|
# To remap foreign key for UserProfile.last_active_message_id
|
2018-10-16 12:34:47 +02:00
|
|
|
update_message_foreign_keys(import_dir=import_dir, sort_by_date=sort_by_date)
|
2018-04-23 23:28:27 +02:00
|
|
|
|
2021-02-12 08:20:45 +01:00
|
|
|
fix_datetime_fields(data, "zerver_userprofile")
|
|
|
|
update_model_ids(UserProfile, data, "user_profile")
|
|
|
|
re_map_foreign_keys(data, "zerver_userprofile", "realm", related_table="realm")
|
|
|
|
re_map_foreign_keys(data, "zerver_userprofile", "bot_owner", related_table="user_profile")
|
2021-02-12 08:19:30 +01:00
|
|
|
re_map_foreign_keys(
|
2021-02-12 08:20:45 +01:00
|
|
|
data, "zerver_userprofile", "default_sending_stream", related_table="stream"
|
2021-02-12 08:19:30 +01:00
|
|
|
)
|
|
|
|
re_map_foreign_keys(
|
2021-02-12 08:20:45 +01:00
|
|
|
data, "zerver_userprofile", "default_events_register_stream", related_table="stream"
|
2021-02-12 08:19:30 +01:00
|
|
|
)
|
|
|
|
re_map_foreign_keys(
|
2021-02-12 08:20:45 +01:00
|
|
|
data, "zerver_userprofile", "last_active_message_id", related_table="message", id_field=True
|
2021-02-12 08:19:30 +01:00
|
|
|
)
|
2021-02-12 08:20:45 +01:00
|
|
|
for user_profile_dict in data["zerver_userprofile"]:
|
|
|
|
user_profile_dict["password"] = None
|
|
|
|
user_profile_dict["api_key"] = generate_api_key()
|
2018-04-23 23:28:27 +02:00
|
|
|
# Since Zulip doesn't use these permissions, drop them
|
2021-02-12 08:20:45 +01:00
|
|
|
del user_profile_dict["user_permissions"]
|
|
|
|
del user_profile_dict["groups"]
|
2020-07-16 14:10:43 +02:00
|
|
|
# The short_name field is obsolete in Zulip, but it's
|
|
|
|
# convenient for third party exports to populate it.
|
2021-02-12 08:20:45 +01:00
|
|
|
if "short_name" in user_profile_dict:
|
|
|
|
del user_profile_dict["short_name"]
|
2018-04-23 23:28:27 +02:00
|
|
|
|
2021-02-12 08:20:45 +01:00
|
|
|
user_profiles = [UserProfile(**item) for item in data["zerver_userprofile"]]
|
2018-04-23 23:28:27 +02:00
|
|
|
for user_profile in user_profiles:
|
|
|
|
user_profile.set_unusable_password()
|
|
|
|
UserProfile.objects.bulk_create(user_profiles)
|
|
|
|
|
2021-02-12 08:20:45 +01:00
|
|
|
re_map_foreign_keys(data, "zerver_defaultstream", "stream", related_table="stream")
|
|
|
|
re_map_foreign_keys(data, "zerver_realmemoji", "author", related_table="user_profile")
|
2018-05-26 21:18:36 +02:00
|
|
|
for (table, model, related_table) in realm_tables:
|
2021-02-12 08:20:45 +01:00
|
|
|
re_map_foreign_keys(data, table, "realm", related_table="realm")
|
2018-07-26 22:45:12 +02:00
|
|
|
update_model_ids(model, data, related_table)
|
|
|
|
bulk_import_model(data, model)
|
2018-05-26 21:18:36 +02:00
|
|
|
|
2021-02-12 08:20:45 +01:00
|
|
|
if "zerver_huddle" in data:
|
|
|
|
update_model_ids(Huddle, data, "huddle")
|
2018-07-27 01:13:14 +02:00
|
|
|
# We don't import Huddle yet, since we don't have the data to
|
|
|
|
# compute huddle hashes until we've imported some of the
|
|
|
|
# tables below.
|
|
|
|
# TODO: double-check this.
|
2018-04-23 23:28:27 +02:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
re_map_foreign_keys(
|
|
|
|
data,
|
2021-02-12 08:20:45 +01:00
|
|
|
"zerver_recipient",
|
|
|
|
"type_id",
|
2021-02-12 08:19:30 +01:00
|
|
|
related_table="stream",
|
|
|
|
recipient_field=True,
|
|
|
|
id_field=True,
|
|
|
|
)
|
|
|
|
re_map_foreign_keys(
|
|
|
|
data,
|
2021-02-12 08:20:45 +01:00
|
|
|
"zerver_recipient",
|
|
|
|
"type_id",
|
2021-02-12 08:19:30 +01:00
|
|
|
related_table="user_profile",
|
|
|
|
recipient_field=True,
|
|
|
|
id_field=True,
|
|
|
|
)
|
|
|
|
re_map_foreign_keys(
|
|
|
|
data,
|
2021-02-12 08:20:45 +01:00
|
|
|
"zerver_recipient",
|
|
|
|
"type_id",
|
2021-02-12 08:19:30 +01:00
|
|
|
related_table="huddle",
|
|
|
|
recipient_field=True,
|
|
|
|
id_field=True,
|
|
|
|
)
|
2021-02-12 08:20:45 +01:00
|
|
|
update_model_ids(Recipient, data, "recipient")
|
2018-07-26 22:45:12 +02:00
|
|
|
bulk_import_model(data, Recipient)
|
2019-11-28 16:56:04 +01:00
|
|
|
bulk_set_users_or_streams_recipient_fields(Stream, Stream.objects.filter(realm=realm))
|
|
|
|
bulk_set_users_or_streams_recipient_fields(UserProfile, UserProfile.objects.filter(realm=realm))
|
2018-04-23 23:28:27 +02:00
|
|
|
|
2021-02-12 08:20:45 +01:00
|
|
|
re_map_foreign_keys(data, "zerver_subscription", "user_profile", related_table="user_profile")
|
|
|
|
get_huddles_from_subscription(data, "zerver_subscription")
|
|
|
|
re_map_foreign_keys(data, "zerver_subscription", "recipient", related_table="recipient")
|
|
|
|
update_model_ids(Subscription, data, "subscription")
|
2021-02-14 00:03:40 +01:00
|
|
|
fix_subscriptions_is_user_active_column(data, user_profiles)
|
2018-07-26 22:45:12 +02:00
|
|
|
bulk_import_model(data, Subscription)
|
2018-04-23 23:28:27 +02:00
|
|
|
|
2021-02-12 08:20:45 +01:00
|
|
|
if "zerver_realmauditlog" in data:
|
|
|
|
fix_datetime_fields(data, "zerver_realmauditlog")
|
|
|
|
re_map_foreign_keys(data, "zerver_realmauditlog", "realm", related_table="realm")
|
2021-02-12 08:19:30 +01:00
|
|
|
re_map_foreign_keys(
|
2021-02-12 08:20:45 +01:00
|
|
|
data, "zerver_realmauditlog", "modified_user", related_table="user_profile"
|
2021-02-12 08:19:30 +01:00
|
|
|
)
|
|
|
|
re_map_foreign_keys(
|
2021-02-12 08:20:45 +01:00
|
|
|
data, "zerver_realmauditlog", "acting_user", related_table="user_profile"
|
2021-02-12 08:19:30 +01:00
|
|
|
)
|
2021-02-12 08:20:45 +01:00
|
|
|
re_map_foreign_keys(data, "zerver_realmauditlog", "modified_stream", related_table="stream")
|
2018-07-26 22:45:12 +02:00
|
|
|
update_model_ids(RealmAuditLog, data, related_table="realmauditlog")
|
|
|
|
bulk_import_model(data, RealmAuditLog)
|
2018-07-05 21:28:21 +02:00
|
|
|
else:
|
2021-02-12 08:20:45 +01:00
|
|
|
logging.info("about to call create_subscription_events")
|
2018-09-30 12:54:16 +02:00
|
|
|
create_subscription_events(
|
|
|
|
data=data,
|
|
|
|
realm_id=realm.id,
|
|
|
|
)
|
2021-02-12 08:20:45 +01:00
|
|
|
logging.info("done with create_subscription_events")
|
2018-07-05 21:28:21 +02:00
|
|
|
|
2021-04-20 12:29:19 +02:00
|
|
|
# Ensure the invariant that there's always a realm-creation audit
|
|
|
|
# log event, even if the export was generated by an export tool
|
|
|
|
# that does not create RealmAuditLog events.
|
|
|
|
if not RealmAuditLog.objects.filter(
|
|
|
|
realm=realm, event_type=RealmAuditLog.REALM_CREATED
|
|
|
|
).exists():
|
|
|
|
RealmAuditLog.objects.create(
|
|
|
|
realm=realm,
|
|
|
|
event_type=RealmAuditLog.REALM_CREATED,
|
|
|
|
event_time=realm.date_created,
|
|
|
|
# Mark these as backfilled, since they weren't created
|
2021-05-18 14:44:05 +02:00
|
|
|
# when the realm was actually created, and thus do not
|
2021-04-20 12:29:19 +02:00
|
|
|
# have the creating user associated with them.
|
|
|
|
backfilled=True,
|
|
|
|
)
|
|
|
|
|
2021-02-12 08:20:45 +01:00
|
|
|
if "zerver_huddle" in data:
|
|
|
|
process_huddle_hash(data, "zerver_huddle")
|
2018-07-26 22:45:12 +02:00
|
|
|
bulk_import_model(data, Huddle)
|
2020-03-15 19:05:27 +01:00
|
|
|
for huddle in Huddle.objects.filter(recipient_id=None):
|
|
|
|
recipient = Recipient.objects.get(type=Recipient.HUDDLE, type_id=huddle.id)
|
|
|
|
huddle.recipient = recipient
|
|
|
|
huddle.save(update_fields=["recipient"])
|
2018-05-25 18:54:22 +02:00
|
|
|
|
2021-02-12 08:20:45 +01:00
|
|
|
if "zerver_alertword" in data:
|
|
|
|
re_map_foreign_keys(data, "zerver_alertword", "user_profile", related_table="user_profile")
|
|
|
|
re_map_foreign_keys(data, "zerver_alertword", "realm", related_table="realm")
|
|
|
|
update_model_ids(AlertWord, data, "alertword")
|
2020-07-16 16:11:34 +02:00
|
|
|
bulk_import_model(data, AlertWord)
|
|
|
|
|
2021-02-12 08:20:45 +01:00
|
|
|
if "zerver_userhotspot" in data:
|
|
|
|
fix_datetime_fields(data, "zerver_userhotspot")
|
|
|
|
re_map_foreign_keys(data, "zerver_userhotspot", "user", related_table="user_profile")
|
|
|
|
update_model_ids(UserHotspot, data, "userhotspot")
|
2018-07-26 22:45:12 +02:00
|
|
|
bulk_import_model(data, UserHotspot)
|
2018-07-12 16:34:26 +02:00
|
|
|
|
2021-02-12 08:20:45 +01:00
|
|
|
if "zerver_mutedtopic" in data:
|
|
|
|
fix_datetime_fields(data, "zerver_mutedtopic")
|
|
|
|
re_map_foreign_keys(data, "zerver_mutedtopic", "user_profile", related_table="user_profile")
|
|
|
|
re_map_foreign_keys(data, "zerver_mutedtopic", "stream", related_table="stream")
|
|
|
|
re_map_foreign_keys(data, "zerver_mutedtopic", "recipient", related_table="recipient")
|
2021-07-23 15:26:02 +02:00
|
|
|
update_model_ids(UserTopic, data, "mutedtopic")
|
|
|
|
bulk_import_model(data, UserTopic)
|
2018-07-14 16:10:45 +02:00
|
|
|
|
2021-03-27 12:23:32 +01:00
|
|
|
if "zerver_muteduser" in data:
|
|
|
|
fix_datetime_fields(data, "zerver_muteduser")
|
|
|
|
re_map_foreign_keys(data, "zerver_muteduser", "user_profile", related_table="user_profile")
|
|
|
|
re_map_foreign_keys(data, "zerver_muteduser", "muted_user", related_table="user_profile")
|
|
|
|
update_model_ids(MutedUser, data, "muteduser")
|
|
|
|
bulk_import_model(data, MutedUser)
|
|
|
|
|
2021-02-12 08:20:45 +01:00
|
|
|
if "zerver_service" in data:
|
|
|
|
re_map_foreign_keys(data, "zerver_service", "user_profile", related_table="user_profile")
|
|
|
|
fix_service_tokens(data, "zerver_service")
|
|
|
|
update_model_ids(Service, data, "service")
|
2018-07-26 22:45:12 +02:00
|
|
|
bulk_import_model(data, Service)
|
2018-07-14 17:18:24 +02:00
|
|
|
|
2021-02-12 08:20:45 +01:00
|
|
|
if "zerver_usergroup" in data:
|
|
|
|
re_map_foreign_keys(data, "zerver_usergroup", "realm", related_table="realm")
|
2021-02-12 08:19:30 +01:00
|
|
|
re_map_foreign_keys_many_to_many(
|
2021-02-12 08:20:45 +01:00
|
|
|
data, "zerver_usergroup", "members", related_table="user_profile"
|
2021-02-12 08:19:30 +01:00
|
|
|
)
|
2021-02-12 08:20:45 +01:00
|
|
|
update_model_ids(UserGroup, data, "usergroup")
|
2018-07-26 22:45:12 +02:00
|
|
|
bulk_import_model(data, UserGroup)
|
2018-07-12 13:27:12 +02:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
re_map_foreign_keys(
|
2021-02-12 08:20:45 +01:00
|
|
|
data, "zerver_usergroupmembership", "user_group", related_table="usergroup"
|
2021-02-12 08:19:30 +01:00
|
|
|
)
|
|
|
|
re_map_foreign_keys(
|
2021-02-12 08:20:45 +01:00
|
|
|
data, "zerver_usergroupmembership", "user_profile", related_table="user_profile"
|
2021-02-12 08:19:30 +01:00
|
|
|
)
|
2021-02-12 08:20:45 +01:00
|
|
|
update_model_ids(UserGroupMembership, data, "usergroupmembership")
|
2018-07-26 22:45:12 +02:00
|
|
|
bulk_import_model(data, UserGroupMembership)
|
2018-07-12 13:27:12 +02:00
|
|
|
|
2021-02-12 08:20:45 +01:00
|
|
|
if "zerver_botstoragedata" in data:
|
2021-02-12 08:19:30 +01:00
|
|
|
re_map_foreign_keys(
|
2021-02-12 08:20:45 +01:00
|
|
|
data, "zerver_botstoragedata", "bot_profile", related_table="user_profile"
|
2021-02-12 08:19:30 +01:00
|
|
|
)
|
2021-02-12 08:20:45 +01:00
|
|
|
update_model_ids(BotStorageData, data, "botstoragedata")
|
2018-07-26 22:45:12 +02:00
|
|
|
bulk_import_model(data, BotStorageData)
|
2018-07-17 19:11:16 +02:00
|
|
|
|
2021-02-12 08:20:45 +01:00
|
|
|
if "zerver_botconfigdata" in data:
|
2021-02-12 08:19:30 +01:00
|
|
|
re_map_foreign_keys(
|
2021-02-12 08:20:45 +01:00
|
|
|
data, "zerver_botconfigdata", "bot_profile", related_table="user_profile"
|
2021-02-12 08:19:30 +01:00
|
|
|
)
|
2021-02-12 08:20:45 +01:00
|
|
|
update_model_ids(BotConfigData, data, "botconfigdata")
|
2018-07-26 22:45:12 +02:00
|
|
|
bulk_import_model(data, BotConfigData)
|
2018-07-17 19:11:16 +02:00
|
|
|
|
2021-02-12 08:20:45 +01:00
|
|
|
fix_datetime_fields(data, "zerver_userpresence")
|
|
|
|
re_map_foreign_keys(data, "zerver_userpresence", "user_profile", related_table="user_profile")
|
|
|
|
re_map_foreign_keys(data, "zerver_userpresence", "client", related_table="client")
|
|
|
|
re_map_foreign_keys(data, "zerver_userpresence", "realm", related_table="realm")
|
|
|
|
update_model_ids(UserPresence, data, "user_presence")
|
2018-07-26 22:45:12 +02:00
|
|
|
bulk_import_model(data, UserPresence)
|
2018-04-23 23:28:27 +02:00
|
|
|
|
2021-02-12 08:20:45 +01:00
|
|
|
fix_datetime_fields(data, "zerver_useractivity")
|
|
|
|
re_map_foreign_keys(data, "zerver_useractivity", "user_profile", related_table="user_profile")
|
|
|
|
re_map_foreign_keys(data, "zerver_useractivity", "client", related_table="client")
|
|
|
|
update_model_ids(UserActivity, data, "useractivity")
|
2018-07-26 22:45:12 +02:00
|
|
|
bulk_import_model(data, UserActivity)
|
2018-04-23 23:28:27 +02:00
|
|
|
|
2021-02-12 08:20:45 +01:00
|
|
|
fix_datetime_fields(data, "zerver_useractivityinterval")
|
2021-02-12 08:19:30 +01:00
|
|
|
re_map_foreign_keys(
|
2021-02-12 08:20:45 +01:00
|
|
|
data, "zerver_useractivityinterval", "user_profile", related_table="user_profile"
|
2021-02-12 08:19:30 +01:00
|
|
|
)
|
2021-02-12 08:20:45 +01:00
|
|
|
update_model_ids(UserActivityInterval, data, "useractivityinterval")
|
2018-07-26 22:45:12 +02:00
|
|
|
bulk_import_model(data, UserActivityInterval)
|
2018-04-23 23:28:27 +02:00
|
|
|
|
2021-02-12 08:20:45 +01:00
|
|
|
re_map_foreign_keys(data, "zerver_customprofilefield", "realm", related_table="realm")
|
2018-07-26 22:45:12 +02:00
|
|
|
update_model_ids(CustomProfileField, data, related_table="customprofilefield")
|
|
|
|
bulk_import_model(data, CustomProfileField)
|
2018-05-23 08:50:11 +02:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
re_map_foreign_keys(
|
2021-02-12 08:20:45 +01:00
|
|
|
data, "zerver_customprofilefieldvalue", "user_profile", related_table="user_profile"
|
2021-02-12 08:19:30 +01:00
|
|
|
)
|
|
|
|
re_map_foreign_keys(
|
2021-02-12 08:20:45 +01:00
|
|
|
data, "zerver_customprofilefieldvalue", "field", related_table="customprofilefield"
|
2021-02-12 08:19:30 +01:00
|
|
|
)
|
2018-07-16 17:15:42 +02:00
|
|
|
fix_customprofilefield(data)
|
2018-07-26 22:45:12 +02:00
|
|
|
update_model_ids(CustomProfileFieldValue, data, related_table="customprofilefieldvalue")
|
|
|
|
bulk_import_model(data, CustomProfileFieldValue)
|
2018-04-23 23:28:27 +02:00
|
|
|
|
|
|
|
# Import uploaded files and avatars
|
2019-07-19 19:15:23 +02:00
|
|
|
import_uploads(realm, os.path.join(import_dir, "avatars"), processes, processing_avatars=True)
|
|
|
|
import_uploads(realm, os.path.join(import_dir, "uploads"), processes)
|
2018-04-23 23:28:27 +02:00
|
|
|
|
|
|
|
# We need to have this check as the emoji files are only present in the data
|
2020-10-23 02:43:28 +02:00
|
|
|
# importer from Slack
|
2018-04-23 23:28:27 +02:00
|
|
|
# For Zulip export, this doesn't exist
|
|
|
|
if os.path.exists(os.path.join(import_dir, "emoji")):
|
2019-07-19 19:15:23 +02:00
|
|
|
import_uploads(realm, os.path.join(import_dir, "emoji"), processes, processing_emojis=True)
|
|
|
|
|
|
|
|
if os.path.exists(os.path.join(import_dir, "realm_icons")):
|
2021-02-12 08:19:30 +01:00
|
|
|
import_uploads(
|
|
|
|
realm, os.path.join(import_dir, "realm_icons"), processes, processing_realm_icons=True
|
|
|
|
)
|
2018-04-23 23:28:27 +02:00
|
|
|
|
2021-02-12 08:20:45 +01:00
|
|
|
sender_map = {user["id"]: user for user in data["zerver_userprofile"]}
|
2018-11-02 14:05:52 +01:00
|
|
|
|
2018-04-23 23:28:27 +02:00
|
|
|
# Import zerver_message and zerver_usermessage
|
2018-11-02 14:05:52 +01:00
|
|
|
import_message_data(realm=realm, sender_map=sender_map, import_dir=import_dir)
|
2018-04-23 23:28:27 +02:00
|
|
|
|
2021-02-12 08:20:45 +01:00
|
|
|
re_map_foreign_keys(data, "zerver_reaction", "message", related_table="message")
|
|
|
|
re_map_foreign_keys(data, "zerver_reaction", "user_profile", related_table="user_profile")
|
2021-02-12 08:19:30 +01:00
|
|
|
re_map_foreign_keys(
|
|
|
|
data,
|
2021-02-12 08:20:45 +01:00
|
|
|
"zerver_reaction",
|
|
|
|
"emoji_code",
|
2021-02-12 08:19:30 +01:00
|
|
|
related_table="realmemoji",
|
|
|
|
id_field=True,
|
|
|
|
reaction_field=True,
|
|
|
|
)
|
2021-02-12 08:20:45 +01:00
|
|
|
update_model_ids(Reaction, data, "reaction")
|
2018-07-26 22:45:12 +02:00
|
|
|
bulk_import_model(data, Reaction)
|
2018-05-24 13:56:15 +02:00
|
|
|
|
2019-03-04 17:50:49 +01:00
|
|
|
# Similarly, we need to recalculate the first_message_id for stream objects.
|
|
|
|
for stream in Stream.objects.filter(realm=realm):
|
2019-06-18 19:25:00 +02:00
|
|
|
recipient = Recipient.objects.get(type=Recipient.STREAM, type_id=stream.id)
|
|
|
|
first_message = Message.objects.filter(recipient=recipient).first()
|
2019-03-04 17:50:49 +01:00
|
|
|
if first_message is None:
|
|
|
|
stream.first_message_id = None
|
|
|
|
else:
|
|
|
|
stream.first_message_id = first_message.id
|
|
|
|
stream.save(update_fields=["first_message_id"])
|
|
|
|
|
2018-04-23 23:28:27 +02:00
|
|
|
# Do attachments AFTER message data is loaded.
|
|
|
|
# TODO: de-dup how we read these json files.
|
|
|
|
fn = os.path.join(import_dir, "attachment.json")
|
|
|
|
if not os.path.exists(fn):
|
|
|
|
raise Exception("Missing attachment.json file!")
|
|
|
|
|
2020-05-02 08:44:14 +02:00
|
|
|
logging.info("Importing attachment data from %s", fn)
|
2020-08-07 01:09:47 +02:00
|
|
|
with open(fn, "rb") as f:
|
|
|
|
data = orjson.loads(f.read())
|
2018-04-23 23:28:27 +02:00
|
|
|
|
|
|
|
import_attachments(data)
|
2018-12-13 08:19:29 +01:00
|
|
|
|
2019-01-30 08:54:29 +01:00
|
|
|
# Import the analytics file.
|
|
|
|
import_analytics_data(realm=realm, import_dir=import_dir)
|
|
|
|
|
2018-12-13 08:19:29 +01:00
|
|
|
if settings.BILLING_ENABLED:
|
2020-12-04 10:54:15 +01:00
|
|
|
do_change_plan_type(realm, Realm.LIMITED, acting_user=None)
|
2019-02-13 01:01:02 +01:00
|
|
|
else:
|
2020-12-04 10:54:15 +01:00
|
|
|
do_change_plan_type(realm, Realm.SELF_HOSTED, acting_user=None)
|
2018-04-23 23:28:27 +02:00
|
|
|
return realm
|
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2020-01-30 21:23:36 +01:00
|
|
|
# create_users and do_import_system_bots differ from their equivalent
|
|
|
|
# in zerver/lib/server_initialization.py because here we check if the
|
|
|
|
# bots don't already exist and only then create a user for these bots.
|
2018-04-23 23:28:27 +02:00
|
|
|
def do_import_system_bots(realm: Any) -> None:
|
2021-02-12 08:19:30 +01:00
|
|
|
internal_bots = [
|
2021-02-12 08:20:45 +01:00
|
|
|
(bot["name"], bot["email_template"] % (settings.INTERNAL_BOT_DOMAIN,))
|
2021-02-12 08:19:30 +01:00
|
|
|
for bot in settings.INTERNAL_BOTS
|
|
|
|
]
|
2018-04-23 23:28:27 +02:00
|
|
|
create_users(realm, internal_bots, bot_type=UserProfile.DEFAULT_BOT)
|
|
|
|
print("Finished importing system bots.")
|
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
|
|
|
def create_users(
|
|
|
|
realm: Realm, name_list: Iterable[Tuple[str, str]], bot_type: Optional[int] = None
|
|
|
|
) -> None:
|
2018-04-23 23:28:27 +02:00
|
|
|
user_set = set()
|
|
|
|
for full_name, email in name_list:
|
|
|
|
if not UserProfile.objects.filter(email=email):
|
2020-07-16 14:10:43 +02:00
|
|
|
user_set.add((email, full_name, True))
|
2018-04-23 23:28:27 +02:00
|
|
|
bulk_create_users(realm, user_set, bot_type)
|
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
|
|
|
def update_message_foreign_keys(import_dir: Path, sort_by_date: bool) -> None:
|
2018-10-16 12:34:47 +02:00
|
|
|
old_id_list = get_incoming_message_ids(
|
|
|
|
import_dir=import_dir,
|
|
|
|
sort_by_date=sort_by_date,
|
|
|
|
)
|
|
|
|
|
|
|
|
count = len(old_id_list)
|
|
|
|
|
|
|
|
new_id_list = allocate_ids(model_class=Message, count=count)
|
|
|
|
|
|
|
|
for old_id, new_id in zip(old_id_list, new_id_list):
|
|
|
|
update_id_map(
|
2021-02-12 08:20:45 +01:00
|
|
|
table="message",
|
2018-10-16 12:34:47 +02:00
|
|
|
old_id=old_id,
|
|
|
|
new_id=new_id,
|
|
|
|
)
|
|
|
|
|
|
|
|
# We don't touch user_message keys here; that happens later when
|
|
|
|
# we're actually read the files a second time to get actual data.
|
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
|
|
|
def get_incoming_message_ids(import_dir: Path, sort_by_date: bool) -> List[int]:
|
|
|
|
"""
|
2018-10-16 12:34:47 +02:00
|
|
|
This function reads in our entire collection of message
|
|
|
|
ids, which can be millions of integers for some installations.
|
|
|
|
And then we sort the list. This is necessary to ensure
|
|
|
|
that the sort order of incoming ids matches the sort order
|
2019-08-28 02:43:19 +02:00
|
|
|
of date_sent, which isn't always guaranteed by our
|
2018-10-16 12:34:47 +02:00
|
|
|
utilities that convert third party chat data. We also
|
|
|
|
need to move our ids to a new range if we're dealing
|
|
|
|
with a server that has data for other realms.
|
2021-02-12 08:19:30 +01:00
|
|
|
"""
|
2018-10-16 12:34:47 +02:00
|
|
|
|
|
|
|
if sort_by_date:
|
2020-09-02 08:17:06 +02:00
|
|
|
tups: List[Tuple[int, int]] = []
|
2018-10-16 12:34:47 +02:00
|
|
|
else:
|
python: Convert assignment type annotations to Python 3.6 style.
This commit was split by tabbott; this piece covers the vast majority
of files in Zulip, but excludes scripts/, tools/, and puppet/ to help
ensure we at least show the right error messages for Xenial systems.
We can likely further refine the remaining pieces with some testing.
Generated by com2ann, with whitespace fixes and various manual fixes
for runtime issues:
- invoiced_through: Optional[LicenseLedger] = models.ForeignKey(
+ invoiced_through: Optional["LicenseLedger"] = models.ForeignKey(
-_apns_client: Optional[APNsClient] = None
+_apns_client: Optional["APNsClient"] = None
- notifications_stream: Optional[Stream] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
- signup_notifications_stream: Optional[Stream] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
+ notifications_stream: Optional["Stream"] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
+ signup_notifications_stream: Optional["Stream"] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
- author: Optional[UserProfile] = models.ForeignKey('UserProfile', blank=True, null=True, on_delete=CASCADE)
+ author: Optional["UserProfile"] = models.ForeignKey('UserProfile', blank=True, null=True, on_delete=CASCADE)
- bot_owner: Optional[UserProfile] = models.ForeignKey('self', null=True, on_delete=models.SET_NULL)
+ bot_owner: Optional["UserProfile"] = models.ForeignKey('self', null=True, on_delete=models.SET_NULL)
- default_sending_stream: Optional[Stream] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
- default_events_register_stream: Optional[Stream] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
+ default_sending_stream: Optional["Stream"] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
+ default_events_register_stream: Optional["Stream"] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
-descriptors_by_handler_id: Dict[int, ClientDescriptor] = {}
+descriptors_by_handler_id: Dict[int, "ClientDescriptor"] = {}
-worker_classes: Dict[str, Type[QueueProcessingWorker]] = {}
-queues: Dict[str, Dict[str, Type[QueueProcessingWorker]]] = {}
+worker_classes: Dict[str, Type["QueueProcessingWorker"]] = {}
+queues: Dict[str, Dict[str, Type["QueueProcessingWorker"]]] = {}
-AUTH_LDAP_REVERSE_EMAIL_SEARCH: Optional[LDAPSearch] = None
+AUTH_LDAP_REVERSE_EMAIL_SEARCH: Optional["LDAPSearch"] = None
Signed-off-by: Anders Kaseorg <anders@zulipchat.com>
2020-04-22 01:09:50 +02:00
|
|
|
message_ids: List[int] = []
|
2018-10-16 12:34:47 +02:00
|
|
|
|
2018-04-23 23:28:27 +02:00
|
|
|
dump_file_id = 1
|
|
|
|
while True:
|
2020-06-13 08:59:37 +02:00
|
|
|
message_filename = os.path.join(import_dir, f"messages-{dump_file_id:06}.json")
|
2018-04-23 23:28:27 +02:00
|
|
|
if not os.path.exists(message_filename):
|
|
|
|
break
|
|
|
|
|
2020-08-07 01:09:47 +02:00
|
|
|
with open(message_filename, "rb") as f:
|
|
|
|
data = orjson.loads(f.read())
|
2018-04-23 23:28:27 +02:00
|
|
|
|
2018-10-16 12:34:47 +02:00
|
|
|
# Aggressively free up memory.
|
2021-02-12 08:20:45 +01:00
|
|
|
del data["zerver_usermessage"]
|
2018-10-16 12:34:47 +02:00
|
|
|
|
2021-02-12 08:20:45 +01:00
|
|
|
for row in data["zerver_message"]:
|
2019-08-28 02:43:19 +02:00
|
|
|
# We truncate date_sent to int to theoretically
|
2018-10-16 12:34:47 +02:00
|
|
|
# save memory and speed up the sort. For
|
|
|
|
# Zulip-to-Zulip imports, the
|
|
|
|
# message_id will generally be a good tiebreaker.
|
|
|
|
# If we occasionally mis-order the ids for two
|
|
|
|
# messages from the same second, it's not the
|
|
|
|
# end of the world, as it's likely those messages
|
|
|
|
# arrived to the original server in somewhat
|
|
|
|
# arbitrary order.
|
|
|
|
|
2021-02-12 08:20:45 +01:00
|
|
|
message_id = row["id"]
|
2018-10-16 12:34:47 +02:00
|
|
|
|
|
|
|
if sort_by_date:
|
2021-02-12 08:20:45 +01:00
|
|
|
date_sent = int(row["date_sent"])
|
2019-08-28 02:43:19 +02:00
|
|
|
tup = (date_sent, message_id)
|
2018-10-16 12:34:47 +02:00
|
|
|
tups.append(tup)
|
|
|
|
else:
|
|
|
|
message_ids.append(message_id)
|
|
|
|
|
2018-04-23 23:28:27 +02:00
|
|
|
dump_file_id += 1
|
|
|
|
|
2018-10-16 12:34:47 +02:00
|
|
|
if sort_by_date:
|
|
|
|
tups.sort()
|
|
|
|
message_ids = [tup[1] for tup in tups]
|
|
|
|
|
|
|
|
return message_ids
|
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
|
|
|
def import_message_data(realm: Realm, sender_map: Dict[int, Record], import_dir: Path) -> None:
|
2018-04-23 23:28:27 +02:00
|
|
|
dump_file_id = 1
|
|
|
|
while True:
|
2020-06-13 08:59:37 +02:00
|
|
|
message_filename = os.path.join(import_dir, f"messages-{dump_file_id:06}.json")
|
2018-04-23 23:28:27 +02:00
|
|
|
if not os.path.exists(message_filename):
|
|
|
|
break
|
|
|
|
|
2020-08-07 01:09:47 +02:00
|
|
|
with open(message_filename, "rb") as f:
|
|
|
|
data = orjson.loads(f.read())
|
2018-04-23 23:28:27 +02:00
|
|
|
|
2020-05-02 08:44:14 +02:00
|
|
|
logging.info("Importing message dump %s", message_filename)
|
2021-02-12 08:20:45 +01:00
|
|
|
re_map_foreign_keys(data, "zerver_message", "sender", related_table="user_profile")
|
|
|
|
re_map_foreign_keys(data, "zerver_message", "recipient", related_table="recipient")
|
|
|
|
re_map_foreign_keys(data, "zerver_message", "sending_client", related_table="client")
|
|
|
|
fix_datetime_fields(data, "zerver_message")
|
2020-10-23 02:43:28 +02:00
|
|
|
# Parser to update message content with the updated attachment URLs
|
2021-02-12 08:20:45 +01:00
|
|
|
fix_upload_links(data, "zerver_message")
|
2018-04-23 23:28:27 +02:00
|
|
|
|
2018-10-16 12:34:47 +02:00
|
|
|
# We already create mappings for zerver_message ids
|
|
|
|
# in update_message_foreign_keys(), so here we simply
|
|
|
|
# apply them.
|
2021-02-12 08:20:45 +01:00
|
|
|
message_id_map = ID_MAP["message"]
|
|
|
|
for row in data["zerver_message"]:
|
|
|
|
row["id"] = message_id_map[row["id"]]
|
2018-10-16 12:34:47 +02:00
|
|
|
|
2021-02-12 08:20:45 +01:00
|
|
|
for row in data["zerver_usermessage"]:
|
|
|
|
assert row["message"] in message_id_map
|
2018-10-16 12:34:47 +02:00
|
|
|
|
2018-11-02 14:05:52 +01:00
|
|
|
fix_message_rendered_content(
|
|
|
|
realm=realm,
|
|
|
|
sender_map=sender_map,
|
2021-02-12 08:20:45 +01:00
|
|
|
messages=data["zerver_message"],
|
2018-11-02 14:05:52 +01:00
|
|
|
)
|
2020-08-11 01:47:49 +02:00
|
|
|
logging.info("Successfully rendered Markdown for message batch")
|
2018-08-09 17:50:43 +02:00
|
|
|
|
2018-11-02 15:18:29 +01:00
|
|
|
# A LOT HAPPENS HERE.
|
|
|
|
# This is where we actually import the message data.
|
|
|
|
bulk_import_model(data, Message)
|
|
|
|
|
2018-04-23 23:28:27 +02:00
|
|
|
# Due to the structure of these message chunks, we're
|
|
|
|
# guaranteed to have already imported all the Message objects
|
|
|
|
# for this batch of UserMessage objects.
|
2021-02-12 08:20:45 +01:00
|
|
|
re_map_foreign_keys(data, "zerver_usermessage", "message", related_table="message")
|
2021-02-12 08:19:30 +01:00
|
|
|
re_map_foreign_keys(
|
2021-02-12 08:20:45 +01:00
|
|
|
data, "zerver_usermessage", "user_profile", related_table="user_profile"
|
2021-02-12 08:19:30 +01:00
|
|
|
)
|
2021-02-12 08:20:45 +01:00
|
|
|
fix_bitfield_keys(data, "zerver_usermessage", "flags")
|
2018-10-12 23:42:17 +02:00
|
|
|
|
|
|
|
bulk_import_user_message_data(data, dump_file_id)
|
2018-04-23 23:28:27 +02:00
|
|
|
dump_file_id += 1
|
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2018-04-23 23:28:27 +02:00
|
|
|
def import_attachments(data: TableData) -> None:
|
|
|
|
|
|
|
|
# Clean up the data in zerver_attachment that is not
|
|
|
|
# relevant to our many-to-many import.
|
2021-02-12 08:20:45 +01:00
|
|
|
fix_datetime_fields(data, "zerver_attachment")
|
|
|
|
re_map_foreign_keys(data, "zerver_attachment", "owner", related_table="user_profile")
|
|
|
|
re_map_foreign_keys(data, "zerver_attachment", "realm", related_table="realm")
|
2018-04-23 23:28:27 +02:00
|
|
|
|
|
|
|
# Configure ourselves. Django models many-to-many (m2m)
|
|
|
|
# relations asymmetrically. The parent here refers to the
|
|
|
|
# Model that has the ManyToManyField. It is assumed here
|
|
|
|
# the child models have been loaded, but we are in turn
|
|
|
|
# responsible for loading the parents and the m2m rows.
|
|
|
|
parent_model = Attachment
|
2021-02-12 08:20:45 +01:00
|
|
|
parent_db_table_name = "zerver_attachment"
|
|
|
|
parent_singular = "attachment"
|
|
|
|
child_singular = "message"
|
|
|
|
child_plural = "messages"
|
|
|
|
m2m_table_name = "zerver_attachment_messages"
|
|
|
|
parent_id = "attachment_id"
|
|
|
|
child_id = "message_id"
|
|
|
|
|
|
|
|
update_model_ids(parent_model, data, "attachment")
|
2018-07-27 01:13:14 +02:00
|
|
|
# We don't bulk_import_model yet, because we need to first compute
|
|
|
|
# the many-to-many for this table.
|
|
|
|
|
2018-04-23 23:28:27 +02:00
|
|
|
# First, build our list of many-to-many (m2m) rows.
|
|
|
|
# We do this in a slightly convoluted way to anticipate
|
|
|
|
# a future where we may need to call re_map_foreign_keys.
|
|
|
|
|
python: Convert assignment type annotations to Python 3.6 style.
This commit was split by tabbott; this piece covers the vast majority
of files in Zulip, but excludes scripts/, tools/, and puppet/ to help
ensure we at least show the right error messages for Xenial systems.
We can likely further refine the remaining pieces with some testing.
Generated by com2ann, with whitespace fixes and various manual fixes
for runtime issues:
- invoiced_through: Optional[LicenseLedger] = models.ForeignKey(
+ invoiced_through: Optional["LicenseLedger"] = models.ForeignKey(
-_apns_client: Optional[APNsClient] = None
+_apns_client: Optional["APNsClient"] = None
- notifications_stream: Optional[Stream] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
- signup_notifications_stream: Optional[Stream] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
+ notifications_stream: Optional["Stream"] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
+ signup_notifications_stream: Optional["Stream"] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
- author: Optional[UserProfile] = models.ForeignKey('UserProfile', blank=True, null=True, on_delete=CASCADE)
+ author: Optional["UserProfile"] = models.ForeignKey('UserProfile', blank=True, null=True, on_delete=CASCADE)
- bot_owner: Optional[UserProfile] = models.ForeignKey('self', null=True, on_delete=models.SET_NULL)
+ bot_owner: Optional["UserProfile"] = models.ForeignKey('self', null=True, on_delete=models.SET_NULL)
- default_sending_stream: Optional[Stream] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
- default_events_register_stream: Optional[Stream] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
+ default_sending_stream: Optional["Stream"] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
+ default_events_register_stream: Optional["Stream"] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
-descriptors_by_handler_id: Dict[int, ClientDescriptor] = {}
+descriptors_by_handler_id: Dict[int, "ClientDescriptor"] = {}
-worker_classes: Dict[str, Type[QueueProcessingWorker]] = {}
-queues: Dict[str, Dict[str, Type[QueueProcessingWorker]]] = {}
+worker_classes: Dict[str, Type["QueueProcessingWorker"]] = {}
+queues: Dict[str, Dict[str, Type["QueueProcessingWorker"]]] = {}
-AUTH_LDAP_REVERSE_EMAIL_SEARCH: Optional[LDAPSearch] = None
+AUTH_LDAP_REVERSE_EMAIL_SEARCH: Optional["LDAPSearch"] = None
Signed-off-by: Anders Kaseorg <anders@zulipchat.com>
2020-04-22 01:09:50 +02:00
|
|
|
m2m_rows: List[Record] = []
|
2018-04-23 23:28:27 +02:00
|
|
|
for parent_row in data[parent_db_table_name]:
|
|
|
|
for fk_id in parent_row[child_plural]:
|
python: Convert assignment type annotations to Python 3.6 style.
This commit was split by tabbott; this piece covers the vast majority
of files in Zulip, but excludes scripts/, tools/, and puppet/ to help
ensure we at least show the right error messages for Xenial systems.
We can likely further refine the remaining pieces with some testing.
Generated by com2ann, with whitespace fixes and various manual fixes
for runtime issues:
- invoiced_through: Optional[LicenseLedger] = models.ForeignKey(
+ invoiced_through: Optional["LicenseLedger"] = models.ForeignKey(
-_apns_client: Optional[APNsClient] = None
+_apns_client: Optional["APNsClient"] = None
- notifications_stream: Optional[Stream] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
- signup_notifications_stream: Optional[Stream] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
+ notifications_stream: Optional["Stream"] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
+ signup_notifications_stream: Optional["Stream"] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
- author: Optional[UserProfile] = models.ForeignKey('UserProfile', blank=True, null=True, on_delete=CASCADE)
+ author: Optional["UserProfile"] = models.ForeignKey('UserProfile', blank=True, null=True, on_delete=CASCADE)
- bot_owner: Optional[UserProfile] = models.ForeignKey('self', null=True, on_delete=models.SET_NULL)
+ bot_owner: Optional["UserProfile"] = models.ForeignKey('self', null=True, on_delete=models.SET_NULL)
- default_sending_stream: Optional[Stream] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
- default_events_register_stream: Optional[Stream] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
+ default_sending_stream: Optional["Stream"] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
+ default_events_register_stream: Optional["Stream"] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
-descriptors_by_handler_id: Dict[int, ClientDescriptor] = {}
+descriptors_by_handler_id: Dict[int, "ClientDescriptor"] = {}
-worker_classes: Dict[str, Type[QueueProcessingWorker]] = {}
-queues: Dict[str, Dict[str, Type[QueueProcessingWorker]]] = {}
+worker_classes: Dict[str, Type["QueueProcessingWorker"]] = {}
+queues: Dict[str, Dict[str, Type["QueueProcessingWorker"]]] = {}
-AUTH_LDAP_REVERSE_EMAIL_SEARCH: Optional[LDAPSearch] = None
+AUTH_LDAP_REVERSE_EMAIL_SEARCH: Optional["LDAPSearch"] = None
Signed-off-by: Anders Kaseorg <anders@zulipchat.com>
2020-04-22 01:09:50 +02:00
|
|
|
m2m_row: Record = {}
|
2021-02-12 08:20:45 +01:00
|
|
|
m2m_row[parent_singular] = parent_row["id"]
|
|
|
|
m2m_row[child_singular] = ID_MAP["message"][fk_id]
|
2018-04-23 23:28:27 +02:00
|
|
|
m2m_rows.append(m2m_row)
|
|
|
|
|
|
|
|
# Create our table data for insert.
|
python: Convert assignment type annotations to Python 3.6 style.
This commit was split by tabbott; this piece covers the vast majority
of files in Zulip, but excludes scripts/, tools/, and puppet/ to help
ensure we at least show the right error messages for Xenial systems.
We can likely further refine the remaining pieces with some testing.
Generated by com2ann, with whitespace fixes and various manual fixes
for runtime issues:
- invoiced_through: Optional[LicenseLedger] = models.ForeignKey(
+ invoiced_through: Optional["LicenseLedger"] = models.ForeignKey(
-_apns_client: Optional[APNsClient] = None
+_apns_client: Optional["APNsClient"] = None
- notifications_stream: Optional[Stream] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
- signup_notifications_stream: Optional[Stream] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
+ notifications_stream: Optional["Stream"] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
+ signup_notifications_stream: Optional["Stream"] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
- author: Optional[UserProfile] = models.ForeignKey('UserProfile', blank=True, null=True, on_delete=CASCADE)
+ author: Optional["UserProfile"] = models.ForeignKey('UserProfile', blank=True, null=True, on_delete=CASCADE)
- bot_owner: Optional[UserProfile] = models.ForeignKey('self', null=True, on_delete=models.SET_NULL)
+ bot_owner: Optional["UserProfile"] = models.ForeignKey('self', null=True, on_delete=models.SET_NULL)
- default_sending_stream: Optional[Stream] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
- default_events_register_stream: Optional[Stream] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
+ default_sending_stream: Optional["Stream"] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
+ default_events_register_stream: Optional["Stream"] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
-descriptors_by_handler_id: Dict[int, ClientDescriptor] = {}
+descriptors_by_handler_id: Dict[int, "ClientDescriptor"] = {}
-worker_classes: Dict[str, Type[QueueProcessingWorker]] = {}
-queues: Dict[str, Dict[str, Type[QueueProcessingWorker]]] = {}
+worker_classes: Dict[str, Type["QueueProcessingWorker"]] = {}
+queues: Dict[str, Dict[str, Type["QueueProcessingWorker"]]] = {}
-AUTH_LDAP_REVERSE_EMAIL_SEARCH: Optional[LDAPSearch] = None
+AUTH_LDAP_REVERSE_EMAIL_SEARCH: Optional["LDAPSearch"] = None
Signed-off-by: Anders Kaseorg <anders@zulipchat.com>
2020-04-22 01:09:50 +02:00
|
|
|
m2m_data: TableData = {m2m_table_name: m2m_rows}
|
2018-04-23 23:28:27 +02:00
|
|
|
convert_to_id_fields(m2m_data, m2m_table_name, parent_singular)
|
|
|
|
convert_to_id_fields(m2m_data, m2m_table_name, child_singular)
|
|
|
|
m2m_rows = m2m_data[m2m_table_name]
|
|
|
|
|
|
|
|
# Next, delete out our child data from the parent rows.
|
|
|
|
for parent_row in data[parent_db_table_name]:
|
|
|
|
del parent_row[child_plural]
|
|
|
|
|
|
|
|
# Update 'path_id' for the attachments
|
|
|
|
for attachment in data[parent_db_table_name]:
|
2021-02-12 08:20:45 +01:00
|
|
|
attachment["path_id"] = path_maps["attachment_path"][attachment["path_id"]]
|
2018-04-23 23:28:27 +02:00
|
|
|
|
|
|
|
# Next, load the parent rows.
|
2018-07-26 22:45:12 +02:00
|
|
|
bulk_import_model(data, parent_model)
|
2018-04-23 23:28:27 +02:00
|
|
|
|
|
|
|
# Now, go back to our m2m rows.
|
|
|
|
# TODO: Do this the kosher Django way. We may find a
|
|
|
|
# better way to do this in Django 1.9 particularly.
|
|
|
|
with connection.cursor() as cursor:
|
2021-02-12 08:19:30 +01:00
|
|
|
sql_template = SQL(
|
2021-02-12 08:20:45 +01:00
|
|
|
"""
|
2020-06-09 11:57:01 +02:00
|
|
|
INSERT INTO {m2m_table_name} ({parent_id}, {child_id}) VALUES %s
|
2021-02-12 08:20:45 +01:00
|
|
|
"""
|
2021-02-12 08:19:30 +01:00
|
|
|
).format(
|
2020-06-09 11:57:01 +02:00
|
|
|
m2m_table_name=Identifier(m2m_table_name),
|
|
|
|
parent_id=Identifier(parent_id),
|
|
|
|
child_id=Identifier(child_id),
|
2020-06-09 11:01:54 +02:00
|
|
|
)
|
2018-04-23 23:28:27 +02:00
|
|
|
tups = [(row[parent_id], row[child_id]) for row in m2m_rows]
|
2020-06-09 11:01:54 +02:00
|
|
|
execute_values(cursor.cursor, sql_template, tups)
|
2018-04-23 23:28:27 +02:00
|
|
|
|
2021-02-12 08:20:45 +01:00
|
|
|
logging.info("Successfully imported M2M table %s", m2m_table_name)
|
2019-01-30 08:54:29 +01:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2019-01-30 08:54:29 +01:00
|
|
|
def import_analytics_data(realm: Realm, import_dir: Path) -> None:
|
|
|
|
analytics_filename = os.path.join(import_dir, "analytics.json")
|
|
|
|
if not os.path.exists(analytics_filename):
|
|
|
|
return
|
|
|
|
|
2020-05-02 08:44:14 +02:00
|
|
|
logging.info("Importing analytics data from %s", analytics_filename)
|
2020-08-07 01:09:47 +02:00
|
|
|
with open(analytics_filename, "rb") as f:
|
|
|
|
data = orjson.loads(f.read())
|
2019-01-30 08:54:29 +01:00
|
|
|
|
|
|
|
# Process the data through the fixer functions.
|
2021-02-12 08:20:45 +01:00
|
|
|
fix_datetime_fields(data, "analytics_realmcount")
|
|
|
|
re_map_foreign_keys(data, "analytics_realmcount", "realm", related_table="realm")
|
|
|
|
update_model_ids(RealmCount, data, "analytics_realmcount")
|
2019-01-30 08:54:29 +01:00
|
|
|
bulk_import_model(data, RealmCount)
|
|
|
|
|
2021-02-12 08:20:45 +01:00
|
|
|
fix_datetime_fields(data, "analytics_usercount")
|
|
|
|
re_map_foreign_keys(data, "analytics_usercount", "realm", related_table="realm")
|
|
|
|
re_map_foreign_keys(data, "analytics_usercount", "user", related_table="user_profile")
|
|
|
|
update_model_ids(UserCount, data, "analytics_usercount")
|
2019-01-30 08:54:29 +01:00
|
|
|
bulk_import_model(data, UserCount)
|
|
|
|
|
2021-02-12 08:20:45 +01:00
|
|
|
fix_datetime_fields(data, "analytics_streamcount")
|
|
|
|
re_map_foreign_keys(data, "analytics_streamcount", "realm", related_table="realm")
|
|
|
|
re_map_foreign_keys(data, "analytics_streamcount", "stream", related_table="stream")
|
|
|
|
update_model_ids(StreamCount, data, "analytics_streamcount")
|
2019-01-30 08:54:29 +01:00
|
|
|
bulk_import_model(data, StreamCount)
|