From 48dc1d112810a8f76e1c0d961d2d3ab04d5ec763 Mon Sep 17 00:00:00 2001 From: Rishi Gupta Date: Wed, 2 Oct 2019 16:54:36 -0700 Subject: [PATCH] remote data: Refactor remote_server_post_analytics to be more generic. One small change in behavior is that this creates an array with all the row_objects at once, rather than creating them 1000 at a time. That should be fine, given that the client batches these in units of 10000 anyway, and so we're just creating 10K rows of a relatively small data structure in Python code here. --- zerver/tests/test_push_notifications.py | 2 +- zilencer/views.py | 87 +++++++++++-------------- 2 files changed, 40 insertions(+), 49 deletions(-) diff --git a/zerver/tests/test_push_notifications.py b/zerver/tests/test_push_notifications.py index f58e575d63..24ea783984 100644 --- a/zerver/tests/test_push_notifications.py +++ b/zerver/tests/test_push_notifications.py @@ -372,7 +372,7 @@ class AnalyticsBouncerTest(BouncerTestCase): subdomain="") self.assert_json_error(result, "Data is out of order.") - with mock.patch("zilencer.views.validate_count_stats"): + with mock.patch("zilencer.views.validate_incoming_table_data"): # We need to wrap a transaction here to avoid the # IntegrityError that will be thrown in here from breaking # the unittest transaction. diff --git a/zilencer/views.py b/zilencer/views.py index ed2c608eda..3728b20d07 100644 --- a/zilencer/views.py +++ b/zilencer/views.py @@ -150,15 +150,28 @@ def remote_server_notify_push(request: HttpRequest, entity: Union[UserProfile, R return json_success() -def validate_count_stats(server: RemoteZulipServer, model: Any, - counts: List[Dict[str, Any]]) -> None: +def validate_incoming_table_data(server: RemoteZulipServer, model: Any, + rows: List[Dict[str, Any]], is_count_stat: bool=False) -> None: last_id = get_last_id_from_server(server, model) - for item in counts: - if item['property'] not in COUNT_STATS: - raise JsonableError(_("Invalid property %s") % (item['property'],)) - if item['id'] <= last_id: + for row in rows: + if is_count_stat and row['property'] not in COUNT_STATS: + raise JsonableError(_("Invalid property %s") % (row['property'],)) + if row['id'] <= last_id: raise JsonableError(_("Data is out of order.")) - last_id = item['id'] + last_id = row['id'] + +def batch_create_table_data(server: RemoteZulipServer, model: Any, + row_objects: Union[List[RemoteRealmCount], + List[RemoteInstallationCount]]) -> None: + BATCH_SIZE = 1000 + while len(row_objects) > 0: + try: + model.objects.bulk_create(row_objects[:BATCH_SIZE]) + except IntegrityError: + logging.warning("Invalid data saving %s for server %s/%s" % ( + model._meta.db_table, server.hostname, server.uuid)) + raise JsonableError(_("Invalid data.")) + row_objects = row_objects[BATCH_SIZE:] @has_request_variables def remote_server_post_analytics(request: HttpRequest, @@ -182,50 +195,28 @@ def remote_server_post_analytics(request: HttpRequest, ])))) -> HttpResponse: server = validate_entity(entity) - validate_count_stats(server, RemoteRealmCount, realm_counts) - validate_count_stats(server, RemoteInstallationCount, installation_counts) + validate_incoming_table_data(server, RemoteRealmCount, realm_counts, True) + validate_incoming_table_data(server, RemoteInstallationCount, installation_counts, True) - BATCH_SIZE = 1000 - while len(realm_counts) > 0: - batch = realm_counts[0:BATCH_SIZE] - realm_counts = realm_counts[BATCH_SIZE:] + row_objects = [RemoteRealmCount( + property=row['property'], + realm_id=row['realm'], + remote_id=row['id'], + server=server, + end_time=datetime.datetime.fromtimestamp(row['end_time'], tz=timezone_utc), + subgroup=row['subgroup'], + value=row['value']) for row in realm_counts] + batch_create_table_data(server, RemoteRealmCount, row_objects) - objects_to_create = [] - for item in batch: - objects_to_create.append(RemoteRealmCount( - property=item['property'], - realm_id=item['realm'], - remote_id=item['id'], - server=server, - end_time=datetime.datetime.fromtimestamp(item['end_time'], tz=timezone_utc), - subgroup=item['subgroup'], - value=item['value'])) - try: - RemoteRealmCount.objects.bulk_create(objects_to_create) - except IntegrityError: - logging.warning("Invalid data saving RemoteRealmCount for server %s/%s" % ( - server.hostname, server.uuid)) - return json_error(_("Invalid data.")) + row_objects = [RemoteInstallationCount( + property=row['property'], + remote_id=row['id'], + server=server, + end_time=datetime.datetime.fromtimestamp(row['end_time'], tz=timezone_utc), + subgroup=row['subgroup'], + value=row['value']) for row in installation_counts] + batch_create_table_data(server, RemoteInstallationCount, row_objects) - while len(installation_counts) > 0: - batch = installation_counts[0:BATCH_SIZE] - installation_counts = installation_counts[BATCH_SIZE:] - - objects_to_create = [] - for item in batch: - objects_to_create.append(RemoteInstallationCount( - property=item['property'], - remote_id=item['id'], - server=server, - end_time=datetime.datetime.fromtimestamp(item['end_time'], tz=timezone_utc), - subgroup=item['subgroup'], - value=item['value'])) - try: - RemoteInstallationCount.objects.bulk_create(objects_to_create) - except IntegrityError: - logging.warning("Invalid data saving RemoteInstallationCount for server %s/%s" % ( - server.hostname, server.uuid)) - return json_error(_("Invalid data.")) return json_success() def get_last_id_from_server(server: RemoteZulipServer, model: Any) -> int: