analytics: Add APIs for submitting analytics to another server.

This adds a new API for sending basic analytics data (number of users,
number of messages sent) from a Zulip server to the Zulip Cloud
central analytics database, which will make it possible for servers to
elect to have their usage numbers counted in published stats on the
size of the Zulip ecosystem.
This commit is contained in:
Tim Abbott 2019-01-30 15:39:02 -08:00
parent 8df0d95559
commit 022c8beaf5
7 changed files with 323 additions and 5 deletions

View File

@ -214,6 +214,7 @@ DATE_FIELDS = {
'zerver_userprofile': ['date_joined', 'last_login', 'last_reminder'],
'zerver_realmauditlog': ['event_time'],
'zerver_userhotspot': ['timestamp'],
'analytics_installationcount': ['end_time'],
'analytics_realmcount': ['end_time'],
'analytics_usercount': ['end_time'],
'analytics_streamcount': ['end_time'],

View File

@ -1,13 +1,16 @@
import requests
import ujson
import urllib
from typing import Any, Dict, List, Optional, Union
from typing import Any, Dict, List, Optional, Tuple, Union
from django.conf import settings
from django.forms.models import model_to_dict
from django.utils.translation import ugettext as _
from analytics.models import InstallationCount, RealmCount
from version import ZULIP_VERSION
from zerver.lib.exceptions import JsonableError
from zerver.lib.export import floatify_datetime_fields
class PushNotificationBouncerException(Exception):
pass
@ -85,3 +88,41 @@ def send_json_to_push_bouncer(method: str, endpoint: str, post_data: Dict[str, A
ujson.dumps(post_data),
extra_headers={"Content-type": "application/json"},
)
def build_analytics_data(realm_count_query: Any,
installation_count_query: Any) -> Tuple[List[Dict[str, Any]],
List[Dict[str, Any]]]:
data = {}
data['analytics_realmcount'] = [
model_to_dict(realm_count) for realm_count in realm_count_query.order_by("id")
]
data['analytics_installationcount'] = [
model_to_dict(count) for count in installation_count_query.order_by("id")
]
floatify_datetime_fields(data, 'analytics_realmcount')
floatify_datetime_fields(data, 'analytics_installationcount')
return (data['analytics_realmcount'], data['analytics_installationcount'])
def send_analytics_to_remote_server() -> None:
# first, check what's latest
result = send_to_push_bouncer("GET", "server/analytics/status", {})
last_acked_realm_count_id = result['last_realm_count_id']
last_acked_installation_count_id = result['last_installation_count_id']
(realm_count_data, installation_count_data) = build_analytics_data(
realm_count_query=RealmCount.objects.filter(
id__gt=last_acked_realm_count_id),
installation_count_query=InstallationCount.objects.filter(
id__gt=last_acked_installation_count_id))
if len(realm_count_data) == 0 and len(installation_count_data) == 0:
return
request = {
'realm_counts': ujson.dumps(realm_count_data),
'installation_counts': ujson.dumps(installation_count_data),
}
# Gather only entries with an ID greater than last_realm_count_id
send_to_push_bouncer("POST", "server/analytics", request)

View File

@ -1,5 +1,6 @@
from contextlib import contextmanager
import datetime
import itertools
import requests
import mock
@ -18,7 +19,11 @@ from django.test import TestCase, override_settings
from django.conf import settings
from django.http import HttpResponse
from django.utils.crypto import get_random_string
from django.utils.timezone import now as timezone_now
from django.utils.timezone import utc as timezone_utc
from analytics.lib.counts import CountStat, LoggingCountStat
from analytics.models import InstallationCount, RealmCount
from zerver.models import (
PushDeviceToken,
UserProfile,
@ -39,12 +44,16 @@ from zerver.lib.soft_deactivation import do_soft_deactivate_users
from zerver.lib import push_notifications as apn
from zerver.lib.push_notifications import get_mobile_push_content, \
DeviceToken, PushNotificationBouncerException, get_apns_client
from zerver.lib.remote_server import send_analytics_to_remote_server, \
build_analytics_data
from zerver.lib.request import JsonableError
from zerver.lib.response import json_success
from zerver.lib.test_classes import (
ZulipTestCase,
)
from zilencer.models import RemoteZulipServer, RemotePushDeviceToken
from zilencer.models import RemoteZulipServer, RemotePushDeviceToken, \
RemoteRealmCount, RemoteInstallationCount
from django.utils.timezone import now
ZERVER_DIR = os.path.dirname(os.path.dirname(__file__))
@ -74,6 +83,11 @@ class BouncerTestCase(ZulipTestCase):
local_url,
kwargs['data'],
subdomain="")
elif args[0] == "GET":
result = self.api_get(self.server_uuid,
local_url,
kwargs['data'],
subdomain="")
else:
raise AssertionError("Unsupported method for bounce_request")
return result
@ -278,6 +292,90 @@ class PushBouncerNotificationTest(BouncerTestCase):
server=server))
self.assertEqual(len(tokens), 0)
class AnalyticsBouncerTest(BouncerTestCase):
TIME_ZERO = datetime.datetime(1988, 3, 14).replace(tzinfo=timezone_utc)
@override_settings(PUSH_NOTIFICATION_BOUNCER_URL='https://push.zulip.org.example.com')
@mock.patch('zerver.lib.push_notifications.requests.request')
def test_analytics_api(self, mock: Any) -> None:
"""This is a variant of the below test_push_api, but using the full
push notification bouncer flow
"""
mock.side_effect = self.bounce_request
user = self.example_user('hamlet')
end_time = self.TIME_ZERO
realm_stat = LoggingCountStat('invites_sent::day', RealmCount, CountStat.DAY)
RealmCount.objects.create(
realm=user.realm, property=realm_stat.property, end_time=end_time, value=5)
InstallationCount.objects.create(
property=realm_stat.property, end_time=end_time, value=5)
self.assertEqual(RealmCount.objects.count(), 1)
self.assertEqual(InstallationCount.objects.count(), 1)
self.assertEqual(RemoteRealmCount.objects.count(), 0)
self.assertEqual(RemoteInstallationCount.objects.count(), 0)
send_analytics_to_remote_server()
self.assertEqual(mock.call_count, 2)
self.assertEqual(RemoteRealmCount.objects.count(), 1)
self.assertEqual(RemoteInstallationCount.objects.count(), 1)
send_analytics_to_remote_server()
self.assertEqual(mock.call_count, 3)
self.assertEqual(RemoteRealmCount.objects.count(), 1)
self.assertEqual(RemoteInstallationCount.objects.count(), 1)
RealmCount.objects.create(
realm=user.realm, property=realm_stat.property, end_time=end_time + datetime.timedelta(days=1), value=6)
RealmCount.objects.create(
realm=user.realm, property=realm_stat.property, end_time=end_time + datetime.timedelta(days=2), value=9)
self.assertEqual(RemoteRealmCount.objects.count(), 1)
self.assertEqual(mock.call_count, 3)
send_analytics_to_remote_server()
self.assertEqual(mock.call_count, 5)
self.assertEqual(RemoteRealmCount.objects.count(), 3)
self.assertEqual(RemoteInstallationCount.objects.count(), 1)
InstallationCount.objects.create(
property=realm_stat.property, end_time=end_time + datetime.timedelta(days=1), value=6)
InstallationCount.objects.create(
property=realm_stat.property, end_time=end_time + datetime.timedelta(days=2), value=9)
send_analytics_to_remote_server()
self.assertEqual(mock.call_count, 7)
self.assertEqual(RemoteRealmCount.objects.count(), 3)
self.assertEqual(RemoteInstallationCount.objects.count(), 3)
(realm_count_data,
installation_count_data) = build_analytics_data(RealmCount.objects.all(),
InstallationCount.objects.all())
result = self.api_post(self.server_uuid,
'/api/v1/remotes/server/analytics',
{'realm_counts': ujson.dumps(realm_count_data),
'installation_counts': ujson.dumps(installation_count_data)},
subdomain="")
self.assert_json_error(result, "Data is out of order.")
@override_settings(PUSH_NOTIFICATION_BOUNCER_URL='https://push.zulip.org.example.com')
@mock.patch('zerver.lib.push_notifications.requests.request')
def test_analytics_api_invalid(self, mock: Any) -> None:
"""This is a variant of the below test_push_api, but using the full
push notification bouncer flow
"""
mock.side_effect = self.bounce_request
user = self.example_user('hamlet')
end_time = self.TIME_ZERO
realm_stat = LoggingCountStat('invalid count stat', RealmCount, CountStat.DAY)
RealmCount.objects.create(
realm=user.realm, property=realm_stat.property, end_time=end_time, value=5)
self.assertEqual(RealmCount.objects.count(), 1)
self.assertEqual(RemoteRealmCount.objects.count(), 0)
with self.assertRaises(JsonableError):
send_analytics_to_remote_server()
self.assertEqual(RemoteRealmCount.objects.count(), 0)
class PushNotificationTest(BouncerTestCase):
def setUp(self) -> None:
super().setUp()

View File

@ -0,0 +1,53 @@
# -*- coding: utf-8 -*-
# Generated by Django 1.11.18 on 2019-02-02 06:02
from __future__ import unicode_literals
from django.db import migrations, models
import django.db.models.deletion
class Migration(migrations.Migration):
dependencies = [
('zilencer', '0015_delete_billing'),
]
operations = [
migrations.CreateModel(
name='RemoteInstallationCount',
fields=[
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('property', models.CharField(max_length=32)),
('subgroup', models.CharField(max_length=16, null=True)),
('end_time', models.DateTimeField()),
('value', models.BigIntegerField()),
('remote_id', models.IntegerField(db_index=True)),
('server', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='zilencer.RemoteZulipServer')),
],
),
migrations.CreateModel(
name='RemoteRealmCount',
fields=[
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('property', models.CharField(max_length=32)),
('subgroup', models.CharField(max_length=16, null=True)),
('end_time', models.DateTimeField()),
('value', models.BigIntegerField()),
('realm_id', models.IntegerField(db_index=True)),
('remote_id', models.IntegerField(db_index=True)),
('server', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='zilencer.RemoteZulipServer')),
],
),
migrations.AlterUniqueTogether(
name='remoterealmcount',
unique_together=set([('server', 'realm_id', 'property', 'subgroup', 'end_time')]),
),
migrations.AlterIndexTogether(
name='remoterealmcount',
index_together=set([('property', 'end_time')]),
),
migrations.AlterUniqueTogether(
name='remoteinstallationcount',
unique_together=set([('server', 'property', 'subgroup', 'end_time')]),
),
]

View File

@ -3,6 +3,7 @@ import datetime
from django.db import models
from zerver.models import AbstractPushDeviceToken
from analytics.models import BaseCount
def get_remote_server_by_uuid(uuid: str) -> 'RemoteZulipServer':
return RemoteZulipServer.objects.get(uuid=uuid)
@ -33,3 +34,28 @@ class RemotePushDeviceToken(AbstractPushDeviceToken):
def __str__(self) -> str:
return "<RemotePushDeviceToken %s %s>" % (self.server, self.user_id)
class RemoteInstallationCount(BaseCount):
server = models.ForeignKey(RemoteZulipServer, on_delete=models.CASCADE) # type: RemoteZulipServer
# The remote_id field lets us deduplicate data from the remote server
remote_id = models.IntegerField(db_index=True) # type: int
class Meta:
unique_together = ("server", "property", "subgroup", "end_time")
def __str__(self) -> str:
return "<InstallationCount: %s %s %s>" % (self.property, self.subgroup, self.value)
# We can't subclass RealmCount because we only have a realm_id here, not a foreign key.
class RemoteRealmCount(BaseCount):
server = models.ForeignKey(RemoteZulipServer, on_delete=models.CASCADE) # type: RemoteZulipServer
realm_id = models.IntegerField(db_index=True) # type: int
# The remote_id field lets us deduplicate data from the remote server
remote_id = models.IntegerField(db_index=True) # type: int
class Meta:
unique_together = ("server", "realm_id", "property", "subgroup", "end_time")
index_together = ["property", "end_time"]
def __str__(self) -> str:
return "%s %s %s %s %s" % (self.server, self.realm_id, self.property, self.subgroup, self.value)

View File

@ -18,6 +18,12 @@ v1_api_and_json_patterns = [
# Push signup doesn't use the REST API, since there's no auth.
url('^remotes/server/register$', zilencer.views.register_remote_server),
# For receiving InstallationCount data and similar analytics.
url('^remotes/server/analytics$', rest_dispatch,
{'POST': 'zilencer.views.remote_server_post_analytics'}),
url('^remotes/server/analytics/status$', rest_dispatch,
{'GET': 'zilencer.views.remote_server_check_analytics'}),
]
urlpatterns = [

View File

@ -1,4 +1,5 @@
from typing import Any, Dict, Optional, Union, cast
from typing import Any, Dict, List, Optional, Union, cast
import datetime
import logging
from django.core.exceptions import ValidationError
@ -6,9 +7,11 @@ from django.core.validators import validate_email, URLValidator
from django.db import IntegrityError, transaction
from django.http import HttpRequest, HttpResponse
from django.utils import timezone
from django.utils.timezone import utc as timezone_utc, now as timezone_now
from django.utils.translation import ugettext as _, ugettext as err_
from django.views.decorators.csrf import csrf_exempt
from analytics.lib.counts import COUNT_STATS
from zerver.decorator import require_post, InvalidZulipServerKeyError
from zerver.lib.exceptions import JsonableError
from zerver.lib.push_notifications import send_android_push_notification, \
@ -16,10 +19,12 @@ from zerver.lib.push_notifications import send_android_push_notification, \
from zerver.lib.request import REQ, has_request_variables
from zerver.lib.response import json_error, json_success
from zerver.lib.validator import check_int, check_string, \
check_capped_string, check_string_fixed_length
check_capped_string, check_string_fixed_length, check_float, check_none_or, \
check_dict, check_dict_only, check_list
from zerver.models import UserProfile
from zerver.views.push_notifications import validate_token
from zilencer.models import RemotePushDeviceToken, RemoteZulipServer
from zilencer.models import RemotePushDeviceToken, RemoteZulipServer, \
RemoteRealmCount, RemoteInstallationCount
def validate_entity(entity: Union[UserProfile, RemoteZulipServer]) -> None:
if not isinstance(entity, RemoteZulipServer):
@ -144,3 +149,91 @@ def remote_server_notify_push(request: HttpRequest, entity: Union[UserProfile, R
send_apple_push_notification(user_id, apple_devices, apns_payload, remote=True)
return json_success()
def validate_count_stats(server: RemoteZulipServer, model: Any,
counts: List[Dict[str, Any]]) -> None:
last_id = get_last_id_from_server(server, model)
for item in counts:
if item['property'] not in COUNT_STATS:
raise JsonableError(_("Invalid property %s" % item['property']))
if item['id'] <= last_id:
raise JsonableError(_("Data is out of order."))
last_id = item['id']
@has_request_variables
def remote_server_post_analytics(request: HttpRequest,
entity: Union[UserProfile, RemoteZulipServer],
realm_counts: List[Dict[str, Any]]=REQ(
validator=check_list(check_dict_only([
('property', check_string),
('realm', check_int),
('id', check_int),
('end_time', check_float),
('subgroup', check_none_or(check_string)),
('value', check_int),
]))),
installation_counts: List[Dict[str, Any]]=REQ(
validator=check_list(check_dict_only([
('property', check_string),
('id', check_int),
('end_time', check_float),
('subgroup', check_none_or(check_string)),
('value', check_int),
])))) -> HttpResponse:
validate_entity(entity)
server = cast(RemoteZulipServer, entity)
validate_count_stats(server, RemoteRealmCount, realm_counts)
validate_count_stats(server, RemoteInstallationCount, realm_counts)
BATCH_SIZE = 1000
while len(realm_counts) > 0:
batch = realm_counts[0:BATCH_SIZE]
realm_counts = realm_counts[BATCH_SIZE:]
objects_to_create = []
for item in batch:
objects_to_create.append(RemoteRealmCount(
property=item['property'],
realm_id=item['realm'],
remote_id=item['id'],
server=server,
end_time=datetime.datetime.fromtimestamp(item['end_time'], tz=timezone_utc),
subgroup=item['subgroup'],
value=item['value']))
RemoteRealmCount.objects.bulk_create(objects_to_create)
while len(installation_counts) > 0:
batch = installation_counts[0:BATCH_SIZE]
installation_counts = installation_counts[BATCH_SIZE:]
objects_to_create = []
for item in batch:
objects_to_create.append(RemoteInstallationCount(
property=item['property'],
remote_id=item['id'],
server=server,
end_time=datetime.datetime.fromtimestamp(item['end_time'], tz=timezone_utc),
subgroup=item['subgroup'],
value=item['value']))
RemoteInstallationCount.objects.bulk_create(objects_to_create)
return json_success()
def get_last_id_from_server(server: RemoteZulipServer, model: Any) -> int:
last_count = model.objects.filter(server=server).order_by("remote_id").last()
if last_count is not None:
return last_count.remote_id
return 0
@has_request_variables
def remote_server_check_analytics(request: HttpRequest,
entity: Union[UserProfile, RemoteZulipServer]) -> HttpResponse:
validate_entity(entity)
server = cast(RemoteZulipServer, entity)
result = {
'last_realm_count_id': get_last_id_from_server(server, RemoteRealmCount),
'last_installation_count_id': get_last_id_from_server(
server, RemoteInstallationCount),
}
return json_success(result)