2016-04-05 00:27:37 +02:00
|
|
|
import os
|
|
|
|
import tempfile
|
2019-01-09 19:39:29 +01:00
|
|
|
from argparse import ArgumentParser
|
2017-11-16 00:43:27 +01:00
|
|
|
from typing import Any
|
|
|
|
|
2020-06-21 13:18:08 +02:00
|
|
|
from django.conf import settings
|
2017-11-16 00:43:27 +01:00
|
|
|
from django.core.management.base import CommandError
|
2016-04-05 00:27:37 +02:00
|
|
|
|
2020-08-29 00:17:21 +02:00
|
|
|
from zerver.lib.actions import do_deactivate_realm
|
2019-03-25 22:18:28 +01:00
|
|
|
from zerver.lib.export import export_realm_wrapper
|
2020-01-14 21:59:46 +01:00
|
|
|
from zerver.lib.management import ZulipBaseCommand
|
2020-08-29 00:10:32 +02:00
|
|
|
from zerver.models import Message, Reaction, UserProfile
|
2016-04-05 00:27:37 +02:00
|
|
|
|
2020-01-14 21:59:46 +01:00
|
|
|
|
2017-08-07 17:46:32 +02:00
|
|
|
class Command(ZulipBaseCommand):
|
2016-04-05 00:27:37 +02:00
|
|
|
help = """Exports all data from a Zulip realm
|
|
|
|
|
|
|
|
This command exports all significant data from a Zulip realm. The
|
|
|
|
result can be imported using the `./manage.py import` command.
|
|
|
|
|
|
|
|
Things that are exported:
|
|
|
|
* All user-accessible data in the Zulip database (Messages,
|
|
|
|
Streams, UserMessages, RealmEmoji, etc.)
|
|
|
|
* Copies of all uploaded files and avatar images along with
|
|
|
|
metadata needed to restore them even in the ab
|
|
|
|
|
|
|
|
Things that are not exported:
|
2017-07-07 10:03:18 +02:00
|
|
|
* Confirmation and PreregistrationUser (transient tables)
|
2020-08-11 02:20:10 +02:00
|
|
|
* Sessions (everyone will need to log in again post-export)
|
2016-04-05 00:27:37 +02:00
|
|
|
* Users' passwords and API keys (users will need to use SSO or reset password)
|
|
|
|
* Mobile tokens for APNS/GCM (users will need to reconnect their mobile devices)
|
2020-10-23 02:43:28 +02:00
|
|
|
* ScheduledEmail (not relevant on a new server)
|
|
|
|
* RemoteZulipServer (unlikely to be migrated)
|
2016-04-05 00:27:37 +02:00
|
|
|
* third_party_api_results cache (this means rerending all old
|
|
|
|
messages could be expensive)
|
|
|
|
|
|
|
|
Things that will break as a result of the export:
|
|
|
|
* Passwords will not be transferred. They will all need to go
|
|
|
|
through the password reset flow to obtain a new password (unless
|
2020-10-23 02:43:28 +02:00
|
|
|
they intend to only use e.g. Google auth).
|
2020-08-11 02:20:10 +02:00
|
|
|
* Users will need to log out and re-log in to the Zulip desktop and
|
2016-04-05 00:27:37 +02:00
|
|
|
mobile apps. The apps now all have an option on the login page
|
|
|
|
where you can specify which Zulip server to use; your users
|
|
|
|
should enter <domain name>.
|
|
|
|
* All bots will stop working since they will be pointing to the
|
|
|
|
wrong server URL, and all users' API keys have been rotated as
|
|
|
|
part of the migration. So to re-enable your integrations, you
|
|
|
|
will need to direct your integrations at the new server.
|
|
|
|
Usually this means updating the URL and the bots' API keys. You
|
|
|
|
can see a list of all the bots that have been configured for
|
2017-04-07 21:39:58 +02:00
|
|
|
your realm on the `/#organization` page, and use that list to
|
2016-04-05 00:27:37 +02:00
|
|
|
make sure you migrate them all.
|
|
|
|
|
|
|
|
The proper procedure for using this to export a realm is as follows:
|
|
|
|
|
2020-08-29 00:17:21 +02:00
|
|
|
* Use `./manage.py export --deactivate` to deactivate and export
|
|
|
|
the realm, producing a data tarball.
|
2016-04-05 00:27:37 +02:00
|
|
|
|
|
|
|
* Transfer the tarball to the new server and unpack it.
|
|
|
|
|
|
|
|
* Use `./manage.py import` to import the realm
|
|
|
|
|
|
|
|
* Use `./manage.py reactivate_realm` to reactivate the realm, so
|
2020-08-11 02:20:10 +02:00
|
|
|
users can log in again.
|
2016-04-05 00:27:37 +02:00
|
|
|
|
|
|
|
* Inform the users about the things broken above.
|
|
|
|
|
2020-08-29 00:17:21 +02:00
|
|
|
We recommend testing by exporting without `--deactivate` first, to
|
|
|
|
make sure you have the procedure right and minimize downtime.
|
2016-04-05 00:27:37 +02:00
|
|
|
|
|
|
|
Performance: In one test, the tool exported a realm with hundreds
|
|
|
|
of users and ~1M messages of history with --threads=1 in about 3
|
|
|
|
hours of serial runtime (goes down to ~50m with --threads=6 on a
|
|
|
|
machine with 8 CPUs). Importing that same data set took about 30
|
|
|
|
minutes. But this will vary a lot depending on the average number
|
|
|
|
of recipients of messages in the realm, hardware, etc."""
|
|
|
|
|
2017-10-26 11:35:57 +02:00
|
|
|
def add_arguments(self, parser: ArgumentParser) -> None:
|
2021-02-12 08:19:30 +01:00
|
|
|
parser.add_argument(
|
2021-02-12 08:20:45 +01:00
|
|
|
"--output", dest="output_dir", help="Directory to write exported data to."
|
2021-02-12 08:19:30 +01:00
|
|
|
)
|
|
|
|
parser.add_argument(
|
2021-02-12 08:20:45 +01:00
|
|
|
"--threads",
|
2021-02-12 08:19:30 +01:00
|
|
|
default=settings.DEFAULT_DATA_EXPORT_IMPORT_PARALLELISM,
|
2021-02-12 08:20:45 +01:00
|
|
|
help="Threads to use in exporting UserMessage objects in parallel",
|
2021-02-12 08:19:30 +01:00
|
|
|
)
|
|
|
|
parser.add_argument(
|
2021-02-12 08:20:45 +01:00
|
|
|
"--public-only",
|
2021-02-12 08:19:30 +01:00
|
|
|
action="store_true",
|
2021-02-12 08:20:45 +01:00
|
|
|
help="Export only public stream messages and associated attachments",
|
2021-02-12 08:19:30 +01:00
|
|
|
)
|
|
|
|
parser.add_argument(
|
2021-02-12 08:20:45 +01:00
|
|
|
"--deactivate-realm",
|
2021-02-12 08:19:30 +01:00
|
|
|
action="store_true",
|
2021-02-12 08:20:45 +01:00
|
|
|
help="Deactivate the realm immediately before exporting",
|
2021-02-12 08:19:30 +01:00
|
|
|
)
|
|
|
|
parser.add_argument(
|
2021-02-12 08:20:45 +01:00
|
|
|
"--consent-message-id",
|
2021-02-12 08:19:30 +01:00
|
|
|
type=int,
|
2021-02-12 08:20:45 +01:00
|
|
|
help="ID of the message advertising users to react with thumbs up",
|
2021-02-12 08:19:30 +01:00
|
|
|
)
|
|
|
|
parser.add_argument(
|
2021-02-12 08:20:45 +01:00
|
|
|
"--upload",
|
2021-02-12 08:19:30 +01:00
|
|
|
action="store_true",
|
|
|
|
help="Whether to upload resulting tarball to s3 or LOCAL_UPLOADS_DIR",
|
|
|
|
)
|
2021-05-10 21:29:25 +02:00
|
|
|
self.add_realm_args(parser, required=True)
|
2016-04-05 00:27:37 +02:00
|
|
|
|
2017-10-26 11:35:57 +02:00
|
|
|
def handle(self, *args: Any, **options: Any) -> None:
|
2017-08-07 17:46:32 +02:00
|
|
|
realm = self.get_realm(options)
|
2017-09-26 01:25:39 +02:00
|
|
|
assert realm is not None # Should be ensured by parser
|
|
|
|
|
2016-04-05 00:27:37 +02:00
|
|
|
output_dir = options["output_dir"]
|
2019-05-10 14:28:38 +02:00
|
|
|
public_only = options["public_only"]
|
|
|
|
consent_message_id = options["consent_message_id"]
|
|
|
|
|
2020-06-10 06:41:04 +02:00
|
|
|
print(f"\033[94mExporting realm\033[0m: {realm.string_id}")
|
2019-05-10 14:28:38 +02:00
|
|
|
|
2021-02-12 08:20:45 +01:00
|
|
|
num_threads = int(options["threads"])
|
2016-08-10 02:32:02 +02:00
|
|
|
if num_threads < 1:
|
2021-02-12 08:20:45 +01:00
|
|
|
raise CommandError("You must have at least one thread.")
|
2016-08-10 02:32:02 +02:00
|
|
|
|
2019-05-10 14:28:38 +02:00
|
|
|
if public_only and consent_message_id is not None:
|
2021-02-12 08:20:45 +01:00
|
|
|
raise CommandError("Please pass either --public-only or --consent-message-id")
|
2019-05-10 14:28:38 +02:00
|
|
|
|
2020-08-29 00:17:21 +02:00
|
|
|
if options["deactivate_realm"] and realm.deactivated:
|
|
|
|
raise CommandError(f"The realm {realm.string_id} is already deactivated. Aborting...")
|
|
|
|
|
2019-05-10 14:28:38 +02:00
|
|
|
if consent_message_id is not None:
|
|
|
|
try:
|
|
|
|
message = Message.objects.get(id=consent_message_id)
|
|
|
|
except Message.DoesNotExist:
|
|
|
|
raise CommandError("Message with given ID does not exist. Aborting...")
|
|
|
|
|
|
|
|
if message.last_edit_time is not None:
|
|
|
|
raise CommandError("Message was edited. Aborting...")
|
|
|
|
|
|
|
|
# Since the message might have been sent by
|
|
|
|
# Notification Bot, we can't trivially check the realm of
|
|
|
|
# the message through message.sender.realm. So instead we
|
|
|
|
# check the realm of the people who reacted to the message
|
|
|
|
# (who must all be in the message's realm).
|
2021-02-12 08:19:30 +01:00
|
|
|
reactions = Reaction.objects.filter(
|
|
|
|
message=message,
|
|
|
|
# outbox = 1f4e4
|
|
|
|
emoji_code="1f4e4",
|
|
|
|
reaction_type="unicode_emoji",
|
|
|
|
)
|
2019-05-10 14:28:38 +02:00
|
|
|
for reaction in reactions:
|
|
|
|
if reaction.user_profile.realm != realm:
|
2021-02-12 08:19:30 +01:00
|
|
|
raise CommandError(
|
|
|
|
"Users from a different realm reacted to message. Aborting..."
|
|
|
|
)
|
2019-05-10 14:28:38 +02:00
|
|
|
|
2020-06-09 00:25:09 +02:00
|
|
|
print(f"\n\033[94mMessage content:\033[0m\n{message.content}\n")
|
2019-05-10 14:28:38 +02:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
user_count = (
|
|
|
|
UserProfile.objects.filter(
|
|
|
|
realm_id=realm.id,
|
|
|
|
is_active=True,
|
|
|
|
is_bot=False,
|
|
|
|
)
|
|
|
|
.exclude(
|
|
|
|
# We exclude guests, because they're not a priority for
|
|
|
|
# looking at whether most users are being exported.
|
|
|
|
role=UserProfile.ROLE_GUEST,
|
|
|
|
)
|
|
|
|
.count()
|
|
|
|
)
|
|
|
|
print(
|
|
|
|
f"\033[94mNumber of users that reacted outbox:\033[0m {len(reactions)} / {user_count} total non-guest users\n"
|
|
|
|
)
|
2019-05-10 14:28:38 +02:00
|
|
|
|
2020-08-29 00:11:10 +02:00
|
|
|
proceed = input("Continue? [y/N] ")
|
2021-02-12 08:20:45 +01:00
|
|
|
if proceed.lower() not in ("y", "yes"):
|
2020-08-29 00:11:10 +02:00
|
|
|
raise CommandError("Aborting!")
|
|
|
|
|
2020-08-29 00:08:48 +02:00
|
|
|
if output_dir is None:
|
|
|
|
output_dir = tempfile.mkdtemp(prefix="zulip-export-")
|
|
|
|
else:
|
|
|
|
output_dir = os.path.realpath(os.path.expanduser(output_dir))
|
|
|
|
if os.path.exists(output_dir):
|
|
|
|
if os.listdir(output_dir):
|
|
|
|
raise CommandError(
|
|
|
|
f"Refusing to overwrite nonempty directory: {output_dir}. Aborting...",
|
|
|
|
)
|
|
|
|
else:
|
|
|
|
os.makedirs(output_dir)
|
|
|
|
|
|
|
|
tarball_path = output_dir.rstrip("/") + ".tar.gz"
|
|
|
|
try:
|
2020-10-20 03:14:06 +02:00
|
|
|
with open(tarball_path, "x"):
|
|
|
|
pass
|
2020-08-29 00:08:48 +02:00
|
|
|
except FileExistsError:
|
2021-02-12 08:19:30 +01:00
|
|
|
raise CommandError(
|
|
|
|
f"Refusing to overwrite existing tarball: {tarball_path}. Aborting..."
|
|
|
|
)
|
2020-08-29 00:08:48 +02:00
|
|
|
|
2020-08-29 00:17:21 +02:00
|
|
|
if options["deactivate_realm"]:
|
|
|
|
print(f"\033[94mDeactivating realm\033[0m: {realm.string_id}")
|
2021-04-02 17:11:25 +02:00
|
|
|
do_deactivate_realm(realm, acting_user=None)
|
2020-08-29 00:17:21 +02:00
|
|
|
|
2020-07-30 22:10:15 +02:00
|
|
|
def percent_callback(bytes_transferred: Any) -> None:
|
2021-06-09 22:11:26 +02:00
|
|
|
print(end=".", flush=True)
|
2020-07-30 22:10:15 +02:00
|
|
|
|
2019-03-25 22:18:28 +01:00
|
|
|
# Allows us to trigger exports separately from command line argument parsing
|
2021-02-12 08:19:30 +01:00
|
|
|
export_realm_wrapper(
|
|
|
|
realm=realm,
|
|
|
|
output_dir=output_dir,
|
|
|
|
threads=num_threads,
|
2021-02-12 08:20:45 +01:00
|
|
|
upload=options["upload"],
|
2021-02-12 08:19:30 +01:00
|
|
|
public_only=public_only,
|
|
|
|
percent_callback=percent_callback,
|
|
|
|
consent_message_id=consent_message_id,
|
|
|
|
)
|