Remove old prototype data import/export tool.

This prototype from Dropbox Hack Week turned out to be too inefficient
to be used for realms with any significant amount of history, so we're
removing it.

It will be replaced by https://github.com/zulip/zulip/pull/673.
This commit is contained in:
Tim Abbott 2016-06-27 13:45:21 -07:00
parent 101820bc29
commit e72f41cdec
4 changed files with 0 additions and 267 deletions

View File

@ -145,34 +145,5 @@
"python": "<div class=\"codehilite\"><pre><span class=\"n\">client</span><span class=\"o\">.</span><span class=\"n\">get_events<span class=\"p\">()</span>\n</span></pre></div> or if you want the event queues managed for you, <div class=\"codehilite\"><pre><span class=\"n\">client</span><span class=\"o\">.</span><span class=\"n\">call_on_each_event<span class=\"p\">()</span>\n</span></pre></div>",
"curl": "<div class=\"codehilite\"><pre>curl -G https://api.zulip.com/v1/events <span class=\"se\">\\</span>\n -u othello-bot@example.com:a0b1c2d3e4f5a6b7c8d9e0f1a2b3c4d5 <span class=\"se\">\\</span>\n -d <span class=\"s2\">\"queue_id=1375801870:2942\"</span> <span class=\"se\">\\</span>\n -d <span class=\"s2\">\"last_event_id=-1\"</span>\n</pre></div>"
}
},
{
"method": "GET",
"endpoint": "export",
"example_response":
"
{
'msg': '',
'result': 'success',
'zerver_message': [
{
'id': 'test'
},
{
'id': 'hello'
}
]
}
",
"returns": [
[
"export",
"a dict database tables"
]
],
"call": "Get an export of your database",
"example_request": {
"curl": "<div class=\"codehilite\"><pre>curl -G https://api.zulip.com/v1/export <span class=\"se\">\\</span>\n -u othello-bot@example.com:a0b1c2d3e4f5a6b7c8d9e0f1a2b3c4d5 </pre></div>"
}
}
]

View File

@ -1,166 +0,0 @@
from __future__ import absolute_import
from __future__ import print_function
from optparse import make_option
from django.core.management import call_command
from django.core.management.base import BaseCommand
from django.db import connection
from django.conf import settings
from zerver.lib.actions import do_create_stream
from zerver.models import Realm, Stream, UserProfile, Recipient, Subscription, \
Message, UserMessage, Huddle, DefaultStream, RealmAlias, RealmFilter, Client
import sys
import json
class Command(BaseCommand):
DEFAULT_CHUNK_SIZE = 5000
help = """Import Zulip database dump files into a fresh Zulip instance.
This command should be used only on a newly created, empty Zulip instance to
import a database dump from one or more JSON files.
Usage: python manage.py import_dump [--destroy-rebuild-database] [--chunk-size=%s] <json file name> [<json file name>...]""" % (DEFAULT_CHUNK_SIZE,)
option_list = BaseCommand.option_list + (
make_option('--destroy-rebuild-database',
dest='destroy_rebuild_database',
default=False,
action="store_true",
help='Destroys and rebuilds the databases prior to import.'),
make_option('--chunk-size',
dest='chunk_size',
type='int',
default=DEFAULT_CHUNK_SIZE,
help='Number of objects that are added to the table in one roundtrip to the database.')
)
def new_instance_check(self, model):
count = model.objects.count()
if count:
print("Zulip instance is not empty, found %d rows in %s table. " \
% (count, model._meta.db_table))
print("You may use --destroy-rebuild-database to destroy and rebuild the database prior to import.")
exit(1)
def do_destroy_and_rebuild_database(self, db_name):
call_command('flush', verbosity=0, interactive=False)
def increment_row_counter(self, row_counter, database_dump, model):
table_name = model._meta.db_table
row_counter[table_name] = (row_counter.get(table_name) or 0) + \
len(database_dump.get(table_name) or [ ])
def test_table_row_count(self, row_counter, model):
table_name = model._meta.db_table
sys.stdout.write("%s: " % (table_name,))
expected_count = row_counter.get(table_name) or 0
actual_count = model.objects.count()
status = "PASSED" if expected_count == actual_count else "FAILED"
sys.stdout.write("expected %d rows, got %d. %s\n" %
(expected_count, actual_count, status))
def import_table(self, database_dump, realm_notification_map, model):
table_name = model._meta.db_table
if table_name in database_dump:
cursor = connection.cursor()
sys.stdout.write("Importing %s: " % (table_name,))
accumulator = [ ]
for row in database_dump[table_name]:
# hack to filter out notifications_stream_id circular reference
# out of zerver_realm table prior to insert of corresponding
# streams.
# removes notifications_stream_id from row dict
if table_name == "zerver_realm":
realm_notification_map[row["id"]] = row.get("notifications_stream_id")
row = { field_name: value \
for field_name, value in row.items() \
if field_name != "notifications_stream_id" }
accumulator.append(model(**row))
if len(accumulator) % self.chunk_size == 0:
model.objects.bulk_create(accumulator)
sys.stdout.write(".")
accumulator = [ ]
# create any remaining objects that haven't been flushed yet
if len(accumulator):
model.objects.bulk_create(accumulator)
# set the next id sequence value to avoid a collision with the
# imported ids
cursor.execute("SELECT setval(%s, MAX(id)+1) FROM " + table_name,
[table_name + "_id_seq"])
sys.stdout.write(" [Done]\n")
def handle(self, *args, **options):
models_to_import = [Realm, Stream, UserProfile, Recipient, Subscription,
Client, Message, UserMessage, Huddle, DefaultStream, RealmAlias,
RealmFilter]
self.chunk_size = options["chunk_size"] # type: int # ignore mypy options bug
encoding = sys.getfilesystemencoding()
if len(args) == 0:
print("Please provide at least one database dump file name.")
exit(1)
if not options["destroy_rebuild_database"]:
for model in models_to_import:
self.new_instance_check(model)
else:
db_name = settings.DATABASES['default']['NAME']
self.do_destroy_and_rebuild_database(db_name)
# maps relationship between realm id and notifications_stream_id
# generally, there should be only one realm per dump, but the code
# doesn't make that assumption
realm_notification_map = dict() # type: Dict[int, int]
# maping between table name and a total expected number of rows across
# all input json files
row_counter = dict() # type: Dict[str, int]
for file_name in args:
try:
fp = open(file_name, 'r')
except IOError:
print("File not found: '%s'" % (file_name,))
exit(1)
print("Processing file: %s ..." % (file_name,))
# parse the database dump and load in memory
# TODO: change this to a streaming parser to support loads > RAM size
database_dump = json.load(fp, encoding)
for model in models_to_import:
self.increment_row_counter(row_counter, database_dump, model)
self.import_table(database_dump, realm_notification_map, model)
print("")
# set notifications_stream_id on realm objects to correct value now
# that foreign keys are in streams table
if len(realm_notification_map):
print("Setting realm notification stream...")
for id, notifications_stream_id in realm_notification_map.items():
Realm.objects \
.filter(id=id) \
.update(notifications_stream = notifications_stream_id)
print("")
print("Testing data import: ")
# test that everything from all json dumps made it into the database
for model in models_to_import:
self.test_table_row_count(row_counter, model)

View File

@ -1044,76 +1044,6 @@ def generate_client_id():
# type: () -> text_type
return generate_random_token(32)
# The order of creation of the various dictionaries are important.
# We filter on {userprofile,stream,subscription_recipient}_ids.
@require_realm_admin
def export(request, user_profile):
# type: (HttpRequest, UserProfile) -> HttpResponse
if (Message.objects.filter(sender__realm=user_profile.realm).count() > 1000000 or
UserMessage.objects.filter(user_profile__realm=user_profile.realm).count() > 3000000):
return json_error(_("Realm has too much data for non-batched export."))
response = {}
response['zerver_realm'] = [model_to_dict(x)
for x in Realm.objects.select_related().filter(id=user_profile.realm.id)]
response['zerver_userprofile'] = [model_to_dict(x, exclude=["password", "api_key"])
for x in UserProfile.objects.select_related().filter(realm=user_profile.realm)]
userprofile_ids = set(userprofile["id"] for userprofile in response['zerver_userprofile'])
response['zerver_stream'] = [model_to_dict(x, exclude=["email_token"])
for x in Stream.objects.select_related().filter(realm=user_profile.realm, invite_only=False)]
stream_ids = set(x["id"] for x in response['zerver_stream'])
response['zerver_usermessage'] = [model_to_dict(x) for x in UserMessage.objects.select_related()
if x.user_profile_id in userprofile_ids]
user_recipients = [model_to_dict(x)
for x in Recipient.objects.select_related().filter(type=1)
if x.type_id in userprofile_ids]
stream_recipients = [model_to_dict(x)
for x in Recipient.objects.select_related().filter(type=2)
if x.type_id in stream_ids]
stream_recipient_ids = set(x["id"] for x in stream_recipients)
# only check for subscriptions to streams
response['zerver_subscription'] = [model_to_dict(x) for x in Subscription.objects.select_related()
if x.user_profile_id in userprofile_ids
and x.recipient_id in stream_recipient_ids]
subscription_recipient_ids = set(x["recipient"] for x in response['zerver_subscription'])
huddle_recipients = [model_to_dict(r)
for r in Recipient.objects.select_related().filter(type=3)
if r.type_id in subscription_recipient_ids]
huddle_ids = set(x["type_id"] for x in huddle_recipients)
response["zerver_recipient"] = user_recipients + stream_recipients + huddle_recipients
response['zerver_huddle'] = [model_to_dict(h)
for h in Huddle.objects.select_related()
if h.id in huddle_ids]
recipient_ids = set(x["id"] for x in response['zerver_recipient'])
response["zerver_message"] = [model_to_dict(m) for m in Message.objects.select_related()
if m.recipient_id in recipient_ids
and m.sender_id in userprofile_ids]
for (table, model) in [("defaultstream", DefaultStream),
("realmemoji", RealmEmoji),
("realmalias", RealmAlias),
("realmfilter", RealmFilter)]:
response["zerver_"+table] = [model_to_dict(x) for x in
model.objects.select_related().filter(realm_id=user_profile.realm.id)] # type: ignore
return json_success(response)
def get_profile_backend(request, user_profile):
# type: (HttpRequest, UserProfile) -> HttpResponse
result = dict(pointer = user_profile.pointer,

View File

@ -119,8 +119,6 @@ urls = list(i18n_urls)
# All of these paths are accessed by either a /json or /api prefix
v1_api_and_json_patterns = [
# realm-level calls
url(r'^export$', 'zerver.lib.rest.rest_dispatch',
{'GET': 'zerver.views.export'}),
url(r'^realm$', 'zerver.lib.rest.rest_dispatch',
{'PATCH': 'zerver.views.update_realm'}),