Remove old prototype data import/export tool.

This prototype from Dropbox Hack Week turned out to be too inefficient to be used for realms with any significant amount of history, so we're removing it. It will be replaced by https://github.com/zulip/zulip/pull/673.
2016-06-27 13:45:21 -07:00 · 2016-06-27 13:45:21 -07:00 · e72f41cdec
parent 101820bc29
commit e72f41cdec
4 changed files with 0 additions and 267 deletions
--- a/templates/zerver/api_content.json
+++ b/templates/zerver/api_content.json
@ -145,34 +145,5 @@
            "python": "<div class=\"codehilite\"><pre><span class=\"n\">client</span><span class=\"o\">.</span><span class=\"n\">get_events<span class=\"p\">()</span>\n</span></pre></div> or if you want the event queues managed for you, <div class=\"codehilite\"><pre><span class=\"n\">client</span><span class=\"o\">.</span><span class=\"n\">call_on_each_event<span class=\"p\">()</span>\n</span></pre></div>",
            "curl": "<div class=\"codehilite\"><pre>curl -G https://api.zulip.com/v1/events <span class=\"se\">\\</span>\n -u othello-bot@example.com:a0b1c2d3e4f5a6b7c8d9e0f1a2b3c4d5 <span class=\"se\">\\</span>\n -d <span class=\"s2\">\"queue_id=1375801870:2942\"</span> <span class=\"se\">\\</span>\n -d <span class=\"s2\">\"last_event_id=-1\"</span>\n</pre></div>"
        }
-    },
-    {
-        "method": "GET",
-        "endpoint": "export",
-        "example_response":
-"
-{
-    'msg': '',
-    'result': 'success',
-    'zerver_message': [
-        {
-            'id': 'test'
-        },
-        {
-            'id': 'hello'
-        }
-    ]
-}
-",
-        "returns": [
-            [
-                "export",
-                "a dict database tables"
-            ]
-        ],
-        "call": "Get an export of your database",
-        "example_request": {
-            "curl": "<div class=\"codehilite\"><pre>curl -G https://api.zulip.com/v1/export <span class=\"se\">\\</span>\n -u othello-bot@example.com:a0b1c2d3e4f5a6b7c8d9e0f1a2b3c4d5 </pre></div>"
-        }
    }
 ]
--- a/zerver/management/commands/import_dump.py
+++ b/zerver/management/commands/import_dump.py
@ -1,166 +0,0 @@
-from __future__ import absolute_import
-from __future__ import print_function
-
-from optparse import make_option
-
-from django.core.management import call_command
-from django.core.management.base import BaseCommand
-from django.db import connection
-from django.conf import settings
-
-from zerver.lib.actions import do_create_stream
-from zerver.models import Realm, Stream, UserProfile, Recipient, Subscription, \
-    Message, UserMessage, Huddle, DefaultStream, RealmAlias, RealmFilter, Client
-
-import sys
-import json
-
-class Command(BaseCommand):
-    DEFAULT_CHUNK_SIZE = 5000
-
-    help = """Import Zulip database dump files into a fresh Zulip instance.
-
-This command should be used only on a newly created, empty Zulip instance to
-import a database dump from one or more JSON files.
-
-Usage: python manage.py import_dump [--destroy-rebuild-database] [--chunk-size=%s] <json file name> [<json file name>...]""" % (DEFAULT_CHUNK_SIZE,)
-
-    option_list = BaseCommand.option_list + (
-        make_option('--destroy-rebuild-database',
-                    dest='destroy_rebuild_database',
-                    default=False,
-                    action="store_true",
-                    help='Destroys and rebuilds the databases prior to import.'),
-        make_option('--chunk-size',
-                    dest='chunk_size',
-                    type='int',
-                    default=DEFAULT_CHUNK_SIZE,
-                    help='Number of objects that are added to the table in one roundtrip to the database.')
-    )
-
-
-    def new_instance_check(self, model):
-        count = model.objects.count()
-        if count:
-            print("Zulip instance is not empty, found %d rows in %s table. " \
-                % (count, model._meta.db_table))
-            print("You may use --destroy-rebuild-database to destroy and rebuild the database prior to import.")
-            exit(1)
-
-
-    def do_destroy_and_rebuild_database(self, db_name):
-        call_command('flush', verbosity=0, interactive=False)
-
-    def increment_row_counter(self, row_counter, database_dump, model):
-        table_name = model._meta.db_table
-        row_counter[table_name] = (row_counter.get(table_name) or 0) + \
-            len(database_dump.get(table_name) or [ ])
-
-
-    def test_table_row_count(self, row_counter, model):
-        table_name = model._meta.db_table
-        sys.stdout.write("%s: " % (table_name,))
-        expected_count = row_counter.get(table_name) or 0
-        actual_count = model.objects.count()
-        status = "PASSED" if expected_count == actual_count else "FAILED"
-        sys.stdout.write("expected %d rows, got %d. %s\n" %
-                         (expected_count, actual_count, status))
-
-
-    def import_table(self, database_dump, realm_notification_map, model):
-        table_name = model._meta.db_table
-        if table_name in database_dump:
-            cursor = connection.cursor()
-            sys.stdout.write("Importing %s: " % (table_name,))
-            accumulator = [ ]
-            for row in database_dump[table_name]:
-                # hack to filter out notifications_stream_id circular reference
-                # out of zerver_realm table prior to insert of corresponding
-                # streams.
-                # removes notifications_stream_id from row dict
-                if table_name == "zerver_realm":
-                    realm_notification_map[row["id"]] = row.get("notifications_stream_id")
-                    row = { field_name: value \
-                        for field_name, value in row.items() \
-                            if field_name != "notifications_stream_id" }
-
-                accumulator.append(model(**row))
-                if len(accumulator) % self.chunk_size == 0:
-                    model.objects.bulk_create(accumulator)
-                    sys.stdout.write(".")
-                    accumulator = [ ]
-
-            # create any remaining objects that haven't been flushed yet
-            if len(accumulator):
-                model.objects.bulk_create(accumulator)
-
-            # set the next id sequence value to avoid a collision with the
-            # imported ids
-            cursor.execute("SELECT setval(%s, MAX(id)+1) FROM " + table_name,
-                [table_name + "_id_seq"])
-
-            sys.stdout.write(" [Done]\n")
-
-
-    def handle(self, *args, **options):
-        models_to_import = [Realm, Stream, UserProfile, Recipient, Subscription,
-            Client, Message, UserMessage, Huddle, DefaultStream, RealmAlias,
-            RealmFilter]
-
-        self.chunk_size = options["chunk_size"] # type: int # ignore mypy options bug
-        encoding = sys.getfilesystemencoding()
-
-        if len(args) == 0:
-            print("Please provide at least one database dump file name.")
-            exit(1)
-
-        if not options["destroy_rebuild_database"]:
-            for model in models_to_import:
-                self.new_instance_check(model)
-        else:
-            db_name = settings.DATABASES['default']['NAME']
-            self.do_destroy_and_rebuild_database(db_name)
-
-        # maps relationship between realm id and notifications_stream_id
-        # generally, there should be only one realm per dump, but the code
-        # doesn't make that assumption
-        realm_notification_map = dict() # type: Dict[int, int]
-
-        # maping between table name and a total expected number of rows across
-        # all input json files
-        row_counter = dict() # type: Dict[str, int]
-
-        for file_name in args:
-            try:
-                fp = open(file_name, 'r')
-            except IOError:
-                print("File not found: '%s'" % (file_name,))
-                exit(1)
-
-            print("Processing file: %s ..." % (file_name,))
-
-            # parse the database dump and load in memory
-            # TODO: change this to a streaming parser to support loads > RAM size
-            database_dump = json.load(fp, encoding)
-
-            for model in models_to_import:
-                self.increment_row_counter(row_counter, database_dump, model)
-                self.import_table(database_dump, realm_notification_map, model)
-
-            print("")
-
-        # set notifications_stream_id on realm objects to correct value now
-        # that foreign keys are in streams table
-        if len(realm_notification_map):
-            print("Setting realm notification stream...")
-            for id, notifications_stream_id in realm_notification_map.items():
-                Realm.objects \
-                    .filter(id=id) \
-                    .update(notifications_stream = notifications_stream_id)
-
-        print("")
-        print("Testing data import: ")
-
-        # test that everything from all json dumps made it into the database
-        for model in models_to_import:
-            self.test_table_row_count(row_counter, model)
--- a/zerver/views/init.py
+++ b/zerver/views/init.py
@ -1044,76 +1044,6 @@ def generate_client_id():
    # type: () -> text_type
    return generate_random_token(32)

-# The order of creation of the various dictionaries are important.
-# We filter on {userprofile,stream,subscription_recipient}_ids.
-@require_realm_admin
-def export(request, user_profile):
-    # type: (HttpRequest, UserProfile) -> HttpResponse
-    if (Message.objects.filter(sender__realm=user_profile.realm).count() > 1000000 or
-        UserMessage.objects.filter(user_profile__realm=user_profile.realm).count() > 3000000):
-        return json_error(_("Realm has too much data for non-batched export."))
-
-    response = {}
-
-    response['zerver_realm'] = [model_to_dict(x)
-        for x in Realm.objects.select_related().filter(id=user_profile.realm.id)]
-
-    response['zerver_userprofile'] = [model_to_dict(x, exclude=["password", "api_key"])
-                                      for x in UserProfile.objects.select_related().filter(realm=user_profile.realm)]
-
-    userprofile_ids = set(userprofile["id"] for userprofile in response['zerver_userprofile'])
-
-    response['zerver_stream'] = [model_to_dict(x, exclude=["email_token"])
-                                 for x in Stream.objects.select_related().filter(realm=user_profile.realm, invite_only=False)]
-
-    stream_ids = set(x["id"] for x in response['zerver_stream'])
-
-    response['zerver_usermessage'] = [model_to_dict(x) for x in UserMessage.objects.select_related()
-                                 if x.user_profile_id in userprofile_ids]
-
-    user_recipients = [model_to_dict(x)
-                       for x in Recipient.objects.select_related().filter(type=1)
-                       if x.type_id in userprofile_ids]
-
-    stream_recipients = [model_to_dict(x)
-                         for x in Recipient.objects.select_related().filter(type=2)
-                         if x.type_id in stream_ids]
-
-    stream_recipient_ids = set(x["id"] for x in stream_recipients)
-
-    # only check for subscriptions to streams
-    response['zerver_subscription'] = [model_to_dict(x) for x in Subscription.objects.select_related()
-                                 if x.user_profile_id in userprofile_ids
-                                 and x.recipient_id in stream_recipient_ids]
-
-    subscription_recipient_ids = set(x["recipient"] for x in response['zerver_subscription'])
-
-    huddle_recipients = [model_to_dict(r)
-                         for r in Recipient.objects.select_related().filter(type=3)
-                         if r.type_id in subscription_recipient_ids]
-
-    huddle_ids = set(x["type_id"] for x in huddle_recipients)
-
-    response["zerver_recipient"] = user_recipients + stream_recipients + huddle_recipients
-
-    response['zerver_huddle'] = [model_to_dict(h)
-                                 for h in Huddle.objects.select_related()
-                                 if h.id in huddle_ids]
-
-    recipient_ids = set(x["id"] for x in response['zerver_recipient'])
-    response["zerver_message"] = [model_to_dict(m) for m in Message.objects.select_related()
-                                  if m.recipient_id in recipient_ids
-                                  and m.sender_id in userprofile_ids]
-
-    for (table, model) in [("defaultstream", DefaultStream),
-                           ("realmemoji", RealmEmoji),
-                           ("realmalias", RealmAlias),
-                           ("realmfilter", RealmFilter)]:
-        response["zerver_"+table] = [model_to_dict(x) for x in
-                                     model.objects.select_related().filter(realm_id=user_profile.realm.id)] # type: ignore
-
-    return json_success(response)
-
 def get_profile_backend(request, user_profile):
    # type: (HttpRequest, UserProfile) -> HttpResponse
    result = dict(pointer        = user_profile.pointer,
--- a/zproject/urls.py
+++ b/zproject/urls.py
@ -119,8 +119,6 @@ urls = list(i18n_urls)
 # All of these paths are accessed by either a /json or /api prefix
 v1_api_and_json_patterns = [
    # realm-level calls
-    url(r'^export$', 'zerver.lib.rest.rest_dispatch',
-        {'GET':  'zerver.views.export'}),
    url(r'^realm$', 'zerver.lib.rest.rest_dispatch',
        {'PATCH': 'zerver.views.update_realm'}),