export.py: Refactor './manage.py export' core logic.

This commit serves as the first step in supporting "public export" as a
webapp feature.  The refactoring was done as a means to allow calling
the export logic from elsewhere in the codebase.
This commit is contained in:
Wyatt Hoodes 2019-03-25 11:18:28 -10:00 committed by Tim Abbott
parent 701cc524b0
commit 0db7d6c31b
2 changed files with 47 additions and 42 deletions

View File

@ -14,6 +14,7 @@ from django.conf import settings
from django.forms.models import model_to_dict
from django.utils.timezone import make_aware as timezone_make_aware
from django.utils.timezone import is_naive as timezone_is_naive
from django.core.management.base import CommandError
import glob
import logging
import os
@ -21,6 +22,7 @@ import ujson
import subprocess
import tempfile
import shutil
import sys
from scripts.lib.zulip_tools import overwrite_symlink
from zerver.lib.avatar_hash import user_avatar_path_from_ids
from analytics.models import RealmCount, UserCount, StreamCount
@ -32,6 +34,7 @@ from zerver.models import UserProfile, Realm, Client, Huddle, Stream, \
RealmAuditLog, UserHotspot, MutedTopic, Service, UserGroup, \
UserGroupMembership, BotStorageData, BotConfigData
from zerver.lib.parallel import run_parallel
from zerver.lib.utils import generate_random_token
from typing import Any, Callable, Dict, List, Optional, Set, Tuple, \
Union
@ -1593,3 +1596,41 @@ def get_analytics_config() -> Config:
)
return analytics_config
def export_realm_wrapper(realm: Realm, output_dir: str, threads: int,
upload_to_s3: bool, public_only: bool) -> None:
do_export_realm(realm=realm, output_dir=output_dir, threads=threads, public_only=public_only)
print("Finished exporting to %s; tarring" % (output_dir,))
do_write_stats_file_for_realm_export(output_dir)
tarball_path = output_dir.rstrip('/') + '.tar.gz'
os.chdir(os.path.dirname(output_dir))
subprocess.check_call(["tar", "-czf", tarball_path, os.path.basename(output_dir)])
print("Tarball written to %s" % (tarball_path,))
if not upload_to_s3:
return
def percent_callback(complete: Any, total: Any) -> None:
sys.stdout.write('.')
sys.stdout.flush()
if settings.LOCAL_UPLOADS_DIR is not None:
raise CommandError("S3 backend must be configured to upload to S3")
print("Uploading export tarball to S3")
from zerver.lib.upload import S3Connection, get_bucket, Key
conn = S3Connection(settings.S3_KEY, settings.S3_SECRET_KEY)
# We use the avatar bucket, because it's world-readable.
bucket = get_bucket(conn, settings.S3_AVATAR_BUCKET)
key = Key(bucket)
key.key = os.path.join("exports", generate_random_token(32), os.path.basename(tarball_path))
key.set_contents_from_filename(tarball_path, cb=percent_callback, num_cb=40)
public_url = 'https://{bucket}.{host}/{key}'.format(
host=conn.server_name(),
bucket=bucket.name,
key=key.key)
print("Uploaded to %s" % (public_url,))

View File

@ -1,19 +1,14 @@
import os
import shutil
import subprocess
import sys
import tempfile
import shutil
from argparse import ArgumentParser
from typing import Any
from django.conf import settings
from django.core.management.base import CommandError
from zerver.lib.export import do_export_realm, \
do_write_stats_file_for_realm_export
from zerver.lib.management import ZulipBaseCommand
from zerver.lib.utils import generate_random_token
from zerver.lib.export import export_realm_wrapper
class Command(ZulipBaseCommand):
help = """Exports all data from a Zulip realm
@ -119,38 +114,7 @@ class Command(ZulipBaseCommand):
if num_threads < 1:
raise CommandError('You must have at least one thread.')
do_export_realm(realm, output_dir, threads=num_threads, public_only=options["public_only"])
print("Finished exporting to %s; tarring" % (output_dir,))
do_write_stats_file_for_realm_export(output_dir)
tarball_path = output_dir.rstrip('/') + '.tar.gz'
os.chdir(os.path.dirname(output_dir))
subprocess.check_call(["tar", "-czf", tarball_path, os.path.basename(output_dir)])
print("Tarball written to %s" % (tarball_path,))
if not options["upload_to_s3"]:
return
def percent_callback(complete: Any, total: Any) -> None:
sys.stdout.write('.')
sys.stdout.flush()
if settings.LOCAL_UPLOADS_DIR is not None:
raise CommandError("S3 backend must be configured to upload to S3")
print("Uploading export tarball to S3")
from zerver.lib.upload import S3Connection, get_bucket, Key
conn = S3Connection(settings.S3_KEY, settings.S3_SECRET_KEY)
# We use the avatar bucket, because it's world-readable.
bucket = get_bucket(conn, settings.S3_AVATAR_BUCKET)
key = Key(bucket)
key.key = os.path.join("exports", generate_random_token(32), os.path.basename(tarball_path))
key.set_contents_from_filename(tarball_path, cb=percent_callback, num_cb=40)
public_url = 'https://{bucket}.{host}/{key}'.format(
host=conn.server_name(),
bucket=bucket.name,
key=key.key)
print("Uploaded to %s" % (public_url,))
# Allows us to trigger exports separately from command line argument parsing
export_realm_wrapper(realm=realm, output_dir=output_dir,
threads=num_threads, upload_to_s3=options['upload_to_s3'],
public_only=options["public_only"])