remote data: Refactor remote_server_post_analytics to be more generic.

One small change in behavior is that this creates an array with all the
row_objects at once, rather than creating them 1000 at a time.

That should be fine, given that the client batches these in units of
10000 anyway, and so we're just creating 10K rows of a relatively
small data structure in Python code here.
This commit is contained in:
Rishi Gupta 2019-10-02 16:54:36 -07:00 committed by Tim Abbott
parent e10361a832
commit 48dc1d1128
2 changed files with 40 additions and 49 deletions

View File

@ -372,7 +372,7 @@ class AnalyticsBouncerTest(BouncerTestCase):
subdomain="") subdomain="")
self.assert_json_error(result, "Data is out of order.") self.assert_json_error(result, "Data is out of order.")
with mock.patch("zilencer.views.validate_count_stats"): with mock.patch("zilencer.views.validate_incoming_table_data"):
# We need to wrap a transaction here to avoid the # We need to wrap a transaction here to avoid the
# IntegrityError that will be thrown in here from breaking # IntegrityError that will be thrown in here from breaking
# the unittest transaction. # the unittest transaction.

View File

@ -150,15 +150,28 @@ def remote_server_notify_push(request: HttpRequest, entity: Union[UserProfile, R
return json_success() return json_success()
def validate_count_stats(server: RemoteZulipServer, model: Any, def validate_incoming_table_data(server: RemoteZulipServer, model: Any,
counts: List[Dict[str, Any]]) -> None: rows: List[Dict[str, Any]], is_count_stat: bool=False) -> None:
last_id = get_last_id_from_server(server, model) last_id = get_last_id_from_server(server, model)
for item in counts: for row in rows:
if item['property'] not in COUNT_STATS: if is_count_stat and row['property'] not in COUNT_STATS:
raise JsonableError(_("Invalid property %s") % (item['property'],)) raise JsonableError(_("Invalid property %s") % (row['property'],))
if item['id'] <= last_id: if row['id'] <= last_id:
raise JsonableError(_("Data is out of order.")) raise JsonableError(_("Data is out of order."))
last_id = item['id'] last_id = row['id']
def batch_create_table_data(server: RemoteZulipServer, model: Any,
row_objects: Union[List[RemoteRealmCount],
List[RemoteInstallationCount]]) -> None:
BATCH_SIZE = 1000
while len(row_objects) > 0:
try:
model.objects.bulk_create(row_objects[:BATCH_SIZE])
except IntegrityError:
logging.warning("Invalid data saving %s for server %s/%s" % (
model._meta.db_table, server.hostname, server.uuid))
raise JsonableError(_("Invalid data."))
row_objects = row_objects[BATCH_SIZE:]
@has_request_variables @has_request_variables
def remote_server_post_analytics(request: HttpRequest, def remote_server_post_analytics(request: HttpRequest,
@ -182,50 +195,28 @@ def remote_server_post_analytics(request: HttpRequest,
])))) -> HttpResponse: ])))) -> HttpResponse:
server = validate_entity(entity) server = validate_entity(entity)
validate_count_stats(server, RemoteRealmCount, realm_counts) validate_incoming_table_data(server, RemoteRealmCount, realm_counts, True)
validate_count_stats(server, RemoteInstallationCount, installation_counts) validate_incoming_table_data(server, RemoteInstallationCount, installation_counts, True)
BATCH_SIZE = 1000 row_objects = [RemoteRealmCount(
while len(realm_counts) > 0: property=row['property'],
batch = realm_counts[0:BATCH_SIZE] realm_id=row['realm'],
realm_counts = realm_counts[BATCH_SIZE:] remote_id=row['id'],
objects_to_create = []
for item in batch:
objects_to_create.append(RemoteRealmCount(
property=item['property'],
realm_id=item['realm'],
remote_id=item['id'],
server=server, server=server,
end_time=datetime.datetime.fromtimestamp(item['end_time'], tz=timezone_utc), end_time=datetime.datetime.fromtimestamp(row['end_time'], tz=timezone_utc),
subgroup=item['subgroup'], subgroup=row['subgroup'],
value=item['value'])) value=row['value']) for row in realm_counts]
try: batch_create_table_data(server, RemoteRealmCount, row_objects)
RemoteRealmCount.objects.bulk_create(objects_to_create)
except IntegrityError:
logging.warning("Invalid data saving RemoteRealmCount for server %s/%s" % (
server.hostname, server.uuid))
return json_error(_("Invalid data."))
while len(installation_counts) > 0: row_objects = [RemoteInstallationCount(
batch = installation_counts[0:BATCH_SIZE] property=row['property'],
installation_counts = installation_counts[BATCH_SIZE:] remote_id=row['id'],
objects_to_create = []
for item in batch:
objects_to_create.append(RemoteInstallationCount(
property=item['property'],
remote_id=item['id'],
server=server, server=server,
end_time=datetime.datetime.fromtimestamp(item['end_time'], tz=timezone_utc), end_time=datetime.datetime.fromtimestamp(row['end_time'], tz=timezone_utc),
subgroup=item['subgroup'], subgroup=row['subgroup'],
value=item['value'])) value=row['value']) for row in installation_counts]
try: batch_create_table_data(server, RemoteInstallationCount, row_objects)
RemoteInstallationCount.objects.bulk_create(objects_to_create)
except IntegrityError:
logging.warning("Invalid data saving RemoteInstallationCount for server %s/%s" % (
server.hostname, server.uuid))
return json_error(_("Invalid data."))
return json_success() return json_success()
def get_last_id_from_server(server: RemoteZulipServer, model: Any) -> int: def get_last_id_from_server(server: RemoteZulipServer, model: Any) -> int: