From e72f41cdec40053c9671d631e5c1bd2ce8337560 Mon Sep 17 00:00:00 2001 From: Tim Abbott Date: Mon, 27 Jun 2016 13:45:21 -0700 Subject: [PATCH] Remove old prototype data import/export tool. This prototype from Dropbox Hack Week turned out to be too inefficient to be used for realms with any significant amount of history, so we're removing it. It will be replaced by https://github.com/zulip/zulip/pull/673. --- templates/zerver/api_content.json | 29 ---- zerver/management/commands/import_dump.py | 166 ---------------------- zerver/views/__init__.py | 70 --------- zproject/urls.py | 2 - 4 files changed, 267 deletions(-) delete mode 100644 zerver/management/commands/import_dump.py diff --git a/templates/zerver/api_content.json b/templates/zerver/api_content.json index 1841e287b9..6003409ade 100644 --- a/templates/zerver/api_content.json +++ b/templates/zerver/api_content.json @@ -145,34 +145,5 @@ "python": "
client.get_events()\n
or if you want the event queues managed for you,
client.call_on_each_event()\n
", "curl": "
curl -G https://api.zulip.com/v1/events \\\n -u othello-bot@example.com:a0b1c2d3e4f5a6b7c8d9e0f1a2b3c4d5 \\\n -d \"queue_id=1375801870:2942\" \\\n -d \"last_event_id=-1\"\n
" } - }, - { - "method": "GET", - "endpoint": "export", - "example_response": -" -{ - 'msg': '', - 'result': 'success', - 'zerver_message': [ - { - 'id': 'test' - }, - { - 'id': 'hello' - } - ] -} -", - "returns": [ - [ - "export", - "a dict database tables" - ] - ], - "call": "Get an export of your database", - "example_request": { - "curl": "
curl -G https://api.zulip.com/v1/export \\\n -u othello-bot@example.com:a0b1c2d3e4f5a6b7c8d9e0f1a2b3c4d5 
" - } } ] diff --git a/zerver/management/commands/import_dump.py b/zerver/management/commands/import_dump.py deleted file mode 100644 index 46e5b9611f..0000000000 --- a/zerver/management/commands/import_dump.py +++ /dev/null @@ -1,166 +0,0 @@ -from __future__ import absolute_import -from __future__ import print_function - -from optparse import make_option - -from django.core.management import call_command -from django.core.management.base import BaseCommand -from django.db import connection -from django.conf import settings - -from zerver.lib.actions import do_create_stream -from zerver.models import Realm, Stream, UserProfile, Recipient, Subscription, \ - Message, UserMessage, Huddle, DefaultStream, RealmAlias, RealmFilter, Client - -import sys -import json - -class Command(BaseCommand): - DEFAULT_CHUNK_SIZE = 5000 - - help = """Import Zulip database dump files into a fresh Zulip instance. - -This command should be used only on a newly created, empty Zulip instance to -import a database dump from one or more JSON files. - -Usage: python manage.py import_dump [--destroy-rebuild-database] [--chunk-size=%s] [...]""" % (DEFAULT_CHUNK_SIZE,) - - option_list = BaseCommand.option_list + ( - make_option('--destroy-rebuild-database', - dest='destroy_rebuild_database', - default=False, - action="store_true", - help='Destroys and rebuilds the databases prior to import.'), - make_option('--chunk-size', - dest='chunk_size', - type='int', - default=DEFAULT_CHUNK_SIZE, - help='Number of objects that are added to the table in one roundtrip to the database.') - ) - - - def new_instance_check(self, model): - count = model.objects.count() - if count: - print("Zulip instance is not empty, found %d rows in %s table. " \ - % (count, model._meta.db_table)) - print("You may use --destroy-rebuild-database to destroy and rebuild the database prior to import.") - exit(1) - - - def do_destroy_and_rebuild_database(self, db_name): - call_command('flush', verbosity=0, interactive=False) - - def increment_row_counter(self, row_counter, database_dump, model): - table_name = model._meta.db_table - row_counter[table_name] = (row_counter.get(table_name) or 0) + \ - len(database_dump.get(table_name) or [ ]) - - - def test_table_row_count(self, row_counter, model): - table_name = model._meta.db_table - sys.stdout.write("%s: " % (table_name,)) - expected_count = row_counter.get(table_name) or 0 - actual_count = model.objects.count() - status = "PASSED" if expected_count == actual_count else "FAILED" - sys.stdout.write("expected %d rows, got %d. %s\n" % - (expected_count, actual_count, status)) - - - def import_table(self, database_dump, realm_notification_map, model): - table_name = model._meta.db_table - if table_name in database_dump: - cursor = connection.cursor() - sys.stdout.write("Importing %s: " % (table_name,)) - accumulator = [ ] - for row in database_dump[table_name]: - # hack to filter out notifications_stream_id circular reference - # out of zerver_realm table prior to insert of corresponding - # streams. - # removes notifications_stream_id from row dict - if table_name == "zerver_realm": - realm_notification_map[row["id"]] = row.get("notifications_stream_id") - row = { field_name: value \ - for field_name, value in row.items() \ - if field_name != "notifications_stream_id" } - - accumulator.append(model(**row)) - if len(accumulator) % self.chunk_size == 0: - model.objects.bulk_create(accumulator) - sys.stdout.write(".") - accumulator = [ ] - - # create any remaining objects that haven't been flushed yet - if len(accumulator): - model.objects.bulk_create(accumulator) - - # set the next id sequence value to avoid a collision with the - # imported ids - cursor.execute("SELECT setval(%s, MAX(id)+1) FROM " + table_name, - [table_name + "_id_seq"]) - - sys.stdout.write(" [Done]\n") - - - def handle(self, *args, **options): - models_to_import = [Realm, Stream, UserProfile, Recipient, Subscription, - Client, Message, UserMessage, Huddle, DefaultStream, RealmAlias, - RealmFilter] - - self.chunk_size = options["chunk_size"] # type: int # ignore mypy options bug - encoding = sys.getfilesystemencoding() - - if len(args) == 0: - print("Please provide at least one database dump file name.") - exit(1) - - if not options["destroy_rebuild_database"]: - for model in models_to_import: - self.new_instance_check(model) - else: - db_name = settings.DATABASES['default']['NAME'] - self.do_destroy_and_rebuild_database(db_name) - - # maps relationship between realm id and notifications_stream_id - # generally, there should be only one realm per dump, but the code - # doesn't make that assumption - realm_notification_map = dict() # type: Dict[int, int] - - # maping between table name and a total expected number of rows across - # all input json files - row_counter = dict() # type: Dict[str, int] - - for file_name in args: - try: - fp = open(file_name, 'r') - except IOError: - print("File not found: '%s'" % (file_name,)) - exit(1) - - print("Processing file: %s ..." % (file_name,)) - - # parse the database dump and load in memory - # TODO: change this to a streaming parser to support loads > RAM size - database_dump = json.load(fp, encoding) - - for model in models_to_import: - self.increment_row_counter(row_counter, database_dump, model) - self.import_table(database_dump, realm_notification_map, model) - - print("") - - # set notifications_stream_id on realm objects to correct value now - # that foreign keys are in streams table - if len(realm_notification_map): - print("Setting realm notification stream...") - for id, notifications_stream_id in realm_notification_map.items(): - Realm.objects \ - .filter(id=id) \ - .update(notifications_stream = notifications_stream_id) - - print("") - print("Testing data import: ") - - # test that everything from all json dumps made it into the database - for model in models_to_import: - self.test_table_row_count(row_counter, model) diff --git a/zerver/views/__init__.py b/zerver/views/__init__.py index 3e5e6723a7..0248fd03e4 100644 --- a/zerver/views/__init__.py +++ b/zerver/views/__init__.py @@ -1044,76 +1044,6 @@ def generate_client_id(): # type: () -> text_type return generate_random_token(32) -# The order of creation of the various dictionaries are important. -# We filter on {userprofile,stream,subscription_recipient}_ids. -@require_realm_admin -def export(request, user_profile): - # type: (HttpRequest, UserProfile) -> HttpResponse - if (Message.objects.filter(sender__realm=user_profile.realm).count() > 1000000 or - UserMessage.objects.filter(user_profile__realm=user_profile.realm).count() > 3000000): - return json_error(_("Realm has too much data for non-batched export.")) - - response = {} - - response['zerver_realm'] = [model_to_dict(x) - for x in Realm.objects.select_related().filter(id=user_profile.realm.id)] - - response['zerver_userprofile'] = [model_to_dict(x, exclude=["password", "api_key"]) - for x in UserProfile.objects.select_related().filter(realm=user_profile.realm)] - - userprofile_ids = set(userprofile["id"] for userprofile in response['zerver_userprofile']) - - response['zerver_stream'] = [model_to_dict(x, exclude=["email_token"]) - for x in Stream.objects.select_related().filter(realm=user_profile.realm, invite_only=False)] - - stream_ids = set(x["id"] for x in response['zerver_stream']) - - response['zerver_usermessage'] = [model_to_dict(x) for x in UserMessage.objects.select_related() - if x.user_profile_id in userprofile_ids] - - user_recipients = [model_to_dict(x) - for x in Recipient.objects.select_related().filter(type=1) - if x.type_id in userprofile_ids] - - stream_recipients = [model_to_dict(x) - for x in Recipient.objects.select_related().filter(type=2) - if x.type_id in stream_ids] - - stream_recipient_ids = set(x["id"] for x in stream_recipients) - - # only check for subscriptions to streams - response['zerver_subscription'] = [model_to_dict(x) for x in Subscription.objects.select_related() - if x.user_profile_id in userprofile_ids - and x.recipient_id in stream_recipient_ids] - - subscription_recipient_ids = set(x["recipient"] for x in response['zerver_subscription']) - - huddle_recipients = [model_to_dict(r) - for r in Recipient.objects.select_related().filter(type=3) - if r.type_id in subscription_recipient_ids] - - huddle_ids = set(x["type_id"] for x in huddle_recipients) - - response["zerver_recipient"] = user_recipients + stream_recipients + huddle_recipients - - response['zerver_huddle'] = [model_to_dict(h) - for h in Huddle.objects.select_related() - if h.id in huddle_ids] - - recipient_ids = set(x["id"] for x in response['zerver_recipient']) - response["zerver_message"] = [model_to_dict(m) for m in Message.objects.select_related() - if m.recipient_id in recipient_ids - and m.sender_id in userprofile_ids] - - for (table, model) in [("defaultstream", DefaultStream), - ("realmemoji", RealmEmoji), - ("realmalias", RealmAlias), - ("realmfilter", RealmFilter)]: - response["zerver_"+table] = [model_to_dict(x) for x in - model.objects.select_related().filter(realm_id=user_profile.realm.id)] # type: ignore - - return json_success(response) - def get_profile_backend(request, user_profile): # type: (HttpRequest, UserProfile) -> HttpResponse result = dict(pointer = user_profile.pointer, diff --git a/zproject/urls.py b/zproject/urls.py index ccd1704d3a..d6e57236ec 100644 --- a/zproject/urls.py +++ b/zproject/urls.py @@ -119,8 +119,6 @@ urls = list(i18n_urls) # All of these paths are accessed by either a /json or /api prefix v1_api_and_json_patterns = [ # realm-level calls - url(r'^export$', 'zerver.lib.rest.rest_dispatch', - {'GET': 'zerver.views.export'}), url(r'^realm$', 'zerver.lib.rest.rest_dispatch', {'PATCH': 'zerver.views.update_realm'}),