zulip/zerver/management/commands/import_dump.py

167 lines
6.6 KiB
Python
Raw Normal View History

from __future__ import absolute_import
from __future__ import print_function
from optparse import make_option
from django.core.management import call_command
from django.core.management.base import BaseCommand
from django.db import connection
from django.conf import settings
from zerver.lib.actions import do_create_stream
from zerver.models import Realm, Stream, UserProfile, Recipient, Subscription, \
Message, UserMessage, Huddle, DefaultStream, RealmAlias, RealmFilter, Client
import sys
import json
class Command(BaseCommand):
DEFAULT_CHUNK_SIZE = 5000
help = """Import Zulip database dump files into a fresh Zulip instance.
This command should be used only on a newly created, empty Zulip instance to
import a database dump from one or more JSON files.
Usage: python2.7 manage.py import_dump [--destroy-rebuild-database] [--chunk-size=%s] <json file name> [<json file name>...]""" % DEFAULT_CHUNK_SIZE
option_list = BaseCommand.option_list + (
make_option('--destroy-rebuild-database',
dest='destroy_rebuild_database',
default=False,
action="store_true",
help='Destroys and rebuilds the databases prior to import.'),
make_option('--chunk-size',
dest='chunk_size',
type='int',
default=DEFAULT_CHUNK_SIZE,
help='Number of objects that are added to the table in one roundtrip to the database.')
)
def new_instance_check(self, model):
count = model.objects.count()
if count:
print("Zulip instance is not empty, found %d rows in %s table. " \
% (count, model._meta.db_table))
print("You may use --destroy-rebuild-database to destroy and rebuild the database prior to import.")
exit(1)
def do_destroy_and_rebuild_database(self, db_name):
call_command('flush', verbosity=0, interactive=False)
def increment_row_counter(self, row_counter, database_dump, model):
table_name = model._meta.db_table
row_counter[table_name] = (row_counter.get(table_name) or 0) + \
len(database_dump.get(table_name) or [ ])
def test_table_row_count(self, row_counter, model):
table_name = model._meta.db_table
sys.stdout.write("%s: " % table_name)
expected_count = row_counter.get(table_name) or 0
actual_count = model.objects.count()
status = "PASSED" if expected_count == actual_count else "FAILED"
params = (expected_count, actual_count, status)
sys.stdout.write("expected %d rows, got %d. %s\n" % params)
def import_table(self, database_dump, realm_notification_map, model):
table_name = model._meta.db_table
if table_name in database_dump:
cursor = connection.cursor()
sys.stdout.write("Importing %s: " % table_name)
accumulator = [ ]
for row in database_dump[table_name]:
# hack to filter out notifications_stream_id circular reference
# out of zerver_realm table prior to insert of corresponding
# streams.
# removes notifications_stream_id from row dict
if table_name == "zerver_realm":
realm_notification_map[row["id"]] = row.get("notifications_stream_id")
row = { field_name: value \
for field_name, value in row.items() \
if field_name != "notifications_stream_id" }
accumulator.append(model(**row))
if len(accumulator) % self.chunk_size == 0:
model.objects.bulk_create(accumulator)
sys.stdout.write(".")
accumulator = [ ]
# create any remaining objects that haven't been flushed yet
if len(accumulator):
model.objects.bulk_create(accumulator)
# set the next id sequence value to avoid a collision with the
# imported ids
cursor.execute("SELECT setval(%s, MAX(id)+1) FROM " + table_name,
[table_name + "_id_seq"])
sys.stdout.write(" [Done]\n")
def handle(self, *args, **options):
models_to_import = [Realm, Stream, UserProfile, Recipient, Subscription,
Client, Message, UserMessage, Huddle, DefaultStream, RealmAlias,
RealmFilter]
self.chunk_size = options["chunk_size"]
encoding = sys.getfilesystemencoding()
if len(args) == 0:
print("Please provide at least one database dump file name.")
exit(1)
if not options["destroy_rebuild_database"]:
for model in models_to_import:
self.new_instance_check(model)
else:
db_name = settings.DATABASES['default']['NAME']
self.do_destroy_and_rebuild_database(db_name)
# maps relationship between realm id and notifications_stream_id
# generally, there should be only one realm per dump, but the code
# doesn't make that assumption
realm_notification_map = dict()
# maping between table name and a total expected number of rows across
# all input json files
row_counter = dict()
for file_name in args:
try:
fp = open(file_name, 'r')
except IOError:
print("File not found: '%s'" % (file_name,))
exit(1)
print("Processing file: %s ..." % (file_name,))
# parse the database dump and load in memory
# TODO: change this to a streaming parser to support loads > RAM size
database_dump = json.load(fp, encoding)
for model in models_to_import:
self.increment_row_counter(row_counter, database_dump, model)
self.import_table(database_dump, realm_notification_map, model)
print("")
# set notifications_stream_id on realm objects to correct value now
# that foreign keys are in streams table
if len(realm_notification_map):
print("Setting realm notification stream...")
for id, notifications_stream_id in realm_notification_map.items():
Realm.objects \
.filter(id=id) \
.update(notifications_stream = notifications_stream_id)
print("")
print("Testing data import: ")
# test that everything from all json dumps made it into the database
for model in models_to_import:
self.test_table_row_count(row_counter, model)