mirror of https://github.com/zulip/zulip.git
analytics: Add APIs for submitting analytics to another server.
This adds a new API for sending basic analytics data (number of users, number of messages sent) from a Zulip server to the Zulip Cloud central analytics database, which will make it possible for servers to elect to have their usage numbers counted in published stats on the size of the Zulip ecosystem.
This commit is contained in:
parent
8df0d95559
commit
022c8beaf5
|
@ -214,6 +214,7 @@ DATE_FIELDS = {
|
|||
'zerver_userprofile': ['date_joined', 'last_login', 'last_reminder'],
|
||||
'zerver_realmauditlog': ['event_time'],
|
||||
'zerver_userhotspot': ['timestamp'],
|
||||
'analytics_installationcount': ['end_time'],
|
||||
'analytics_realmcount': ['end_time'],
|
||||
'analytics_usercount': ['end_time'],
|
||||
'analytics_streamcount': ['end_time'],
|
||||
|
|
|
@ -1,13 +1,16 @@
|
|||
import requests
|
||||
import ujson
|
||||
import urllib
|
||||
from typing import Any, Dict, List, Optional, Union
|
||||
from typing import Any, Dict, List, Optional, Tuple, Union
|
||||
|
||||
from django.conf import settings
|
||||
from django.forms.models import model_to_dict
|
||||
from django.utils.translation import ugettext as _
|
||||
|
||||
from analytics.models import InstallationCount, RealmCount
|
||||
from version import ZULIP_VERSION
|
||||
from zerver.lib.exceptions import JsonableError
|
||||
from zerver.lib.export import floatify_datetime_fields
|
||||
|
||||
class PushNotificationBouncerException(Exception):
|
||||
pass
|
||||
|
@ -85,3 +88,41 @@ def send_json_to_push_bouncer(method: str, endpoint: str, post_data: Dict[str, A
|
|||
ujson.dumps(post_data),
|
||||
extra_headers={"Content-type": "application/json"},
|
||||
)
|
||||
|
||||
def build_analytics_data(realm_count_query: Any,
|
||||
installation_count_query: Any) -> Tuple[List[Dict[str, Any]],
|
||||
List[Dict[str, Any]]]:
|
||||
data = {}
|
||||
data['analytics_realmcount'] = [
|
||||
model_to_dict(realm_count) for realm_count in realm_count_query.order_by("id")
|
||||
]
|
||||
data['analytics_installationcount'] = [
|
||||
model_to_dict(count) for count in installation_count_query.order_by("id")
|
||||
]
|
||||
|
||||
floatify_datetime_fields(data, 'analytics_realmcount')
|
||||
floatify_datetime_fields(data, 'analytics_installationcount')
|
||||
return (data['analytics_realmcount'], data['analytics_installationcount'])
|
||||
|
||||
def send_analytics_to_remote_server() -> None:
|
||||
# first, check what's latest
|
||||
result = send_to_push_bouncer("GET", "server/analytics/status", {})
|
||||
last_acked_realm_count_id = result['last_realm_count_id']
|
||||
last_acked_installation_count_id = result['last_installation_count_id']
|
||||
|
||||
(realm_count_data, installation_count_data) = build_analytics_data(
|
||||
realm_count_query=RealmCount.objects.filter(
|
||||
id__gt=last_acked_realm_count_id),
|
||||
installation_count_query=InstallationCount.objects.filter(
|
||||
id__gt=last_acked_installation_count_id))
|
||||
|
||||
if len(realm_count_data) == 0 and len(installation_count_data) == 0:
|
||||
return
|
||||
|
||||
request = {
|
||||
'realm_counts': ujson.dumps(realm_count_data),
|
||||
'installation_counts': ujson.dumps(installation_count_data),
|
||||
}
|
||||
|
||||
# Gather only entries with an ID greater than last_realm_count_id
|
||||
send_to_push_bouncer("POST", "server/analytics", request)
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
|
||||
from contextlib import contextmanager
|
||||
import datetime
|
||||
import itertools
|
||||
import requests
|
||||
import mock
|
||||
|
@ -18,7 +19,11 @@ from django.test import TestCase, override_settings
|
|||
from django.conf import settings
|
||||
from django.http import HttpResponse
|
||||
from django.utils.crypto import get_random_string
|
||||
from django.utils.timezone import now as timezone_now
|
||||
from django.utils.timezone import utc as timezone_utc
|
||||
|
||||
from analytics.lib.counts import CountStat, LoggingCountStat
|
||||
from analytics.models import InstallationCount, RealmCount
|
||||
from zerver.models import (
|
||||
PushDeviceToken,
|
||||
UserProfile,
|
||||
|
@ -39,12 +44,16 @@ from zerver.lib.soft_deactivation import do_soft_deactivate_users
|
|||
from zerver.lib import push_notifications as apn
|
||||
from zerver.lib.push_notifications import get_mobile_push_content, \
|
||||
DeviceToken, PushNotificationBouncerException, get_apns_client
|
||||
from zerver.lib.remote_server import send_analytics_to_remote_server, \
|
||||
build_analytics_data
|
||||
from zerver.lib.request import JsonableError
|
||||
from zerver.lib.response import json_success
|
||||
from zerver.lib.test_classes import (
|
||||
ZulipTestCase,
|
||||
)
|
||||
|
||||
from zilencer.models import RemoteZulipServer, RemotePushDeviceToken
|
||||
from zilencer.models import RemoteZulipServer, RemotePushDeviceToken, \
|
||||
RemoteRealmCount, RemoteInstallationCount
|
||||
from django.utils.timezone import now
|
||||
|
||||
ZERVER_DIR = os.path.dirname(os.path.dirname(__file__))
|
||||
|
@ -74,6 +83,11 @@ class BouncerTestCase(ZulipTestCase):
|
|||
local_url,
|
||||
kwargs['data'],
|
||||
subdomain="")
|
||||
elif args[0] == "GET":
|
||||
result = self.api_get(self.server_uuid,
|
||||
local_url,
|
||||
kwargs['data'],
|
||||
subdomain="")
|
||||
else:
|
||||
raise AssertionError("Unsupported method for bounce_request")
|
||||
return result
|
||||
|
@ -278,6 +292,90 @@ class PushBouncerNotificationTest(BouncerTestCase):
|
|||
server=server))
|
||||
self.assertEqual(len(tokens), 0)
|
||||
|
||||
class AnalyticsBouncerTest(BouncerTestCase):
|
||||
TIME_ZERO = datetime.datetime(1988, 3, 14).replace(tzinfo=timezone_utc)
|
||||
|
||||
@override_settings(PUSH_NOTIFICATION_BOUNCER_URL='https://push.zulip.org.example.com')
|
||||
@mock.patch('zerver.lib.push_notifications.requests.request')
|
||||
def test_analytics_api(self, mock: Any) -> None:
|
||||
"""This is a variant of the below test_push_api, but using the full
|
||||
push notification bouncer flow
|
||||
"""
|
||||
mock.side_effect = self.bounce_request
|
||||
user = self.example_user('hamlet')
|
||||
end_time = self.TIME_ZERO
|
||||
|
||||
realm_stat = LoggingCountStat('invites_sent::day', RealmCount, CountStat.DAY)
|
||||
RealmCount.objects.create(
|
||||
realm=user.realm, property=realm_stat.property, end_time=end_time, value=5)
|
||||
InstallationCount.objects.create(
|
||||
property=realm_stat.property, end_time=end_time, value=5)
|
||||
|
||||
self.assertEqual(RealmCount.objects.count(), 1)
|
||||
self.assertEqual(InstallationCount.objects.count(), 1)
|
||||
|
||||
self.assertEqual(RemoteRealmCount.objects.count(), 0)
|
||||
self.assertEqual(RemoteInstallationCount.objects.count(), 0)
|
||||
send_analytics_to_remote_server()
|
||||
self.assertEqual(mock.call_count, 2)
|
||||
self.assertEqual(RemoteRealmCount.objects.count(), 1)
|
||||
self.assertEqual(RemoteInstallationCount.objects.count(), 1)
|
||||
send_analytics_to_remote_server()
|
||||
self.assertEqual(mock.call_count, 3)
|
||||
self.assertEqual(RemoteRealmCount.objects.count(), 1)
|
||||
self.assertEqual(RemoteInstallationCount.objects.count(), 1)
|
||||
|
||||
RealmCount.objects.create(
|
||||
realm=user.realm, property=realm_stat.property, end_time=end_time + datetime.timedelta(days=1), value=6)
|
||||
RealmCount.objects.create(
|
||||
realm=user.realm, property=realm_stat.property, end_time=end_time + datetime.timedelta(days=2), value=9)
|
||||
self.assertEqual(RemoteRealmCount.objects.count(), 1)
|
||||
self.assertEqual(mock.call_count, 3)
|
||||
send_analytics_to_remote_server()
|
||||
self.assertEqual(mock.call_count, 5)
|
||||
self.assertEqual(RemoteRealmCount.objects.count(), 3)
|
||||
self.assertEqual(RemoteInstallationCount.objects.count(), 1)
|
||||
|
||||
InstallationCount.objects.create(
|
||||
property=realm_stat.property, end_time=end_time + datetime.timedelta(days=1), value=6)
|
||||
InstallationCount.objects.create(
|
||||
property=realm_stat.property, end_time=end_time + datetime.timedelta(days=2), value=9)
|
||||
send_analytics_to_remote_server()
|
||||
self.assertEqual(mock.call_count, 7)
|
||||
self.assertEqual(RemoteRealmCount.objects.count(), 3)
|
||||
self.assertEqual(RemoteInstallationCount.objects.count(), 3)
|
||||
|
||||
(realm_count_data,
|
||||
installation_count_data) = build_analytics_data(RealmCount.objects.all(),
|
||||
InstallationCount.objects.all())
|
||||
result = self.api_post(self.server_uuid,
|
||||
'/api/v1/remotes/server/analytics',
|
||||
{'realm_counts': ujson.dumps(realm_count_data),
|
||||
'installation_counts': ujson.dumps(installation_count_data)},
|
||||
subdomain="")
|
||||
self.assert_json_error(result, "Data is out of order.")
|
||||
|
||||
@override_settings(PUSH_NOTIFICATION_BOUNCER_URL='https://push.zulip.org.example.com')
|
||||
@mock.patch('zerver.lib.push_notifications.requests.request')
|
||||
def test_analytics_api_invalid(self, mock: Any) -> None:
|
||||
"""This is a variant of the below test_push_api, but using the full
|
||||
push notification bouncer flow
|
||||
"""
|
||||
mock.side_effect = self.bounce_request
|
||||
user = self.example_user('hamlet')
|
||||
end_time = self.TIME_ZERO
|
||||
|
||||
realm_stat = LoggingCountStat('invalid count stat', RealmCount, CountStat.DAY)
|
||||
RealmCount.objects.create(
|
||||
realm=user.realm, property=realm_stat.property, end_time=end_time, value=5)
|
||||
|
||||
self.assertEqual(RealmCount.objects.count(), 1)
|
||||
|
||||
self.assertEqual(RemoteRealmCount.objects.count(), 0)
|
||||
with self.assertRaises(JsonableError):
|
||||
send_analytics_to_remote_server()
|
||||
self.assertEqual(RemoteRealmCount.objects.count(), 0)
|
||||
|
||||
class PushNotificationTest(BouncerTestCase):
|
||||
def setUp(self) -> None:
|
||||
super().setUp()
|
||||
|
|
|
@ -0,0 +1,53 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
# Generated by Django 1.11.18 on 2019-02-02 06:02
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from django.db import migrations, models
|
||||
import django.db.models.deletion
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('zilencer', '0015_delete_billing'),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.CreateModel(
|
||||
name='RemoteInstallationCount',
|
||||
fields=[
|
||||
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
|
||||
('property', models.CharField(max_length=32)),
|
||||
('subgroup', models.CharField(max_length=16, null=True)),
|
||||
('end_time', models.DateTimeField()),
|
||||
('value', models.BigIntegerField()),
|
||||
('remote_id', models.IntegerField(db_index=True)),
|
||||
('server', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='zilencer.RemoteZulipServer')),
|
||||
],
|
||||
),
|
||||
migrations.CreateModel(
|
||||
name='RemoteRealmCount',
|
||||
fields=[
|
||||
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
|
||||
('property', models.CharField(max_length=32)),
|
||||
('subgroup', models.CharField(max_length=16, null=True)),
|
||||
('end_time', models.DateTimeField()),
|
||||
('value', models.BigIntegerField()),
|
||||
('realm_id', models.IntegerField(db_index=True)),
|
||||
('remote_id', models.IntegerField(db_index=True)),
|
||||
('server', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='zilencer.RemoteZulipServer')),
|
||||
],
|
||||
),
|
||||
migrations.AlterUniqueTogether(
|
||||
name='remoterealmcount',
|
||||
unique_together=set([('server', 'realm_id', 'property', 'subgroup', 'end_time')]),
|
||||
),
|
||||
migrations.AlterIndexTogether(
|
||||
name='remoterealmcount',
|
||||
index_together=set([('property', 'end_time')]),
|
||||
),
|
||||
migrations.AlterUniqueTogether(
|
||||
name='remoteinstallationcount',
|
||||
unique_together=set([('server', 'property', 'subgroup', 'end_time')]),
|
||||
),
|
||||
]
|
|
@ -3,6 +3,7 @@ import datetime
|
|||
from django.db import models
|
||||
|
||||
from zerver.models import AbstractPushDeviceToken
|
||||
from analytics.models import BaseCount
|
||||
|
||||
def get_remote_server_by_uuid(uuid: str) -> 'RemoteZulipServer':
|
||||
return RemoteZulipServer.objects.get(uuid=uuid)
|
||||
|
@ -33,3 +34,28 @@ class RemotePushDeviceToken(AbstractPushDeviceToken):
|
|||
|
||||
def __str__(self) -> str:
|
||||
return "<RemotePushDeviceToken %s %s>" % (self.server, self.user_id)
|
||||
|
||||
class RemoteInstallationCount(BaseCount):
|
||||
server = models.ForeignKey(RemoteZulipServer, on_delete=models.CASCADE) # type: RemoteZulipServer
|
||||
# The remote_id field lets us deduplicate data from the remote server
|
||||
remote_id = models.IntegerField(db_index=True) # type: int
|
||||
|
||||
class Meta:
|
||||
unique_together = ("server", "property", "subgroup", "end_time")
|
||||
|
||||
def __str__(self) -> str:
|
||||
return "<InstallationCount: %s %s %s>" % (self.property, self.subgroup, self.value)
|
||||
|
||||
# We can't subclass RealmCount because we only have a realm_id here, not a foreign key.
|
||||
class RemoteRealmCount(BaseCount):
|
||||
server = models.ForeignKey(RemoteZulipServer, on_delete=models.CASCADE) # type: RemoteZulipServer
|
||||
realm_id = models.IntegerField(db_index=True) # type: int
|
||||
# The remote_id field lets us deduplicate data from the remote server
|
||||
remote_id = models.IntegerField(db_index=True) # type: int
|
||||
|
||||
class Meta:
|
||||
unique_together = ("server", "realm_id", "property", "subgroup", "end_time")
|
||||
index_together = ["property", "end_time"]
|
||||
|
||||
def __str__(self) -> str:
|
||||
return "%s %s %s %s %s" % (self.server, self.realm_id, self.property, self.subgroup, self.value)
|
||||
|
|
|
@ -18,6 +18,12 @@ v1_api_and_json_patterns = [
|
|||
|
||||
# Push signup doesn't use the REST API, since there's no auth.
|
||||
url('^remotes/server/register$', zilencer.views.register_remote_server),
|
||||
|
||||
# For receiving InstallationCount data and similar analytics.
|
||||
url('^remotes/server/analytics$', rest_dispatch,
|
||||
{'POST': 'zilencer.views.remote_server_post_analytics'}),
|
||||
url('^remotes/server/analytics/status$', rest_dispatch,
|
||||
{'GET': 'zilencer.views.remote_server_check_analytics'}),
|
||||
]
|
||||
|
||||
urlpatterns = [
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
from typing import Any, Dict, Optional, Union, cast
|
||||
from typing import Any, Dict, List, Optional, Union, cast
|
||||
import datetime
|
||||
import logging
|
||||
|
||||
from django.core.exceptions import ValidationError
|
||||
|
@ -6,9 +7,11 @@ from django.core.validators import validate_email, URLValidator
|
|||
from django.db import IntegrityError, transaction
|
||||
from django.http import HttpRequest, HttpResponse
|
||||
from django.utils import timezone
|
||||
from django.utils.timezone import utc as timezone_utc, now as timezone_now
|
||||
from django.utils.translation import ugettext as _, ugettext as err_
|
||||
from django.views.decorators.csrf import csrf_exempt
|
||||
|
||||
from analytics.lib.counts import COUNT_STATS
|
||||
from zerver.decorator import require_post, InvalidZulipServerKeyError
|
||||
from zerver.lib.exceptions import JsonableError
|
||||
from zerver.lib.push_notifications import send_android_push_notification, \
|
||||
|
@ -16,10 +19,12 @@ from zerver.lib.push_notifications import send_android_push_notification, \
|
|||
from zerver.lib.request import REQ, has_request_variables
|
||||
from zerver.lib.response import json_error, json_success
|
||||
from zerver.lib.validator import check_int, check_string, \
|
||||
check_capped_string, check_string_fixed_length
|
||||
check_capped_string, check_string_fixed_length, check_float, check_none_or, \
|
||||
check_dict, check_dict_only, check_list
|
||||
from zerver.models import UserProfile
|
||||
from zerver.views.push_notifications import validate_token
|
||||
from zilencer.models import RemotePushDeviceToken, RemoteZulipServer
|
||||
from zilencer.models import RemotePushDeviceToken, RemoteZulipServer, \
|
||||
RemoteRealmCount, RemoteInstallationCount
|
||||
|
||||
def validate_entity(entity: Union[UserProfile, RemoteZulipServer]) -> None:
|
||||
if not isinstance(entity, RemoteZulipServer):
|
||||
|
@ -144,3 +149,91 @@ def remote_server_notify_push(request: HttpRequest, entity: Union[UserProfile, R
|
|||
send_apple_push_notification(user_id, apple_devices, apns_payload, remote=True)
|
||||
|
||||
return json_success()
|
||||
|
||||
def validate_count_stats(server: RemoteZulipServer, model: Any,
|
||||
counts: List[Dict[str, Any]]) -> None:
|
||||
last_id = get_last_id_from_server(server, model)
|
||||
for item in counts:
|
||||
if item['property'] not in COUNT_STATS:
|
||||
raise JsonableError(_("Invalid property %s" % item['property']))
|
||||
if item['id'] <= last_id:
|
||||
raise JsonableError(_("Data is out of order."))
|
||||
last_id = item['id']
|
||||
|
||||
@has_request_variables
|
||||
def remote_server_post_analytics(request: HttpRequest,
|
||||
entity: Union[UserProfile, RemoteZulipServer],
|
||||
realm_counts: List[Dict[str, Any]]=REQ(
|
||||
validator=check_list(check_dict_only([
|
||||
('property', check_string),
|
||||
('realm', check_int),
|
||||
('id', check_int),
|
||||
('end_time', check_float),
|
||||
('subgroup', check_none_or(check_string)),
|
||||
('value', check_int),
|
||||
]))),
|
||||
installation_counts: List[Dict[str, Any]]=REQ(
|
||||
validator=check_list(check_dict_only([
|
||||
('property', check_string),
|
||||
('id', check_int),
|
||||
('end_time', check_float),
|
||||
('subgroup', check_none_or(check_string)),
|
||||
('value', check_int),
|
||||
])))) -> HttpResponse:
|
||||
validate_entity(entity)
|
||||
server = cast(RemoteZulipServer, entity)
|
||||
|
||||
validate_count_stats(server, RemoteRealmCount, realm_counts)
|
||||
validate_count_stats(server, RemoteInstallationCount, realm_counts)
|
||||
|
||||
BATCH_SIZE = 1000
|
||||
while len(realm_counts) > 0:
|
||||
batch = realm_counts[0:BATCH_SIZE]
|
||||
realm_counts = realm_counts[BATCH_SIZE:]
|
||||
|
||||
objects_to_create = []
|
||||
for item in batch:
|
||||
objects_to_create.append(RemoteRealmCount(
|
||||
property=item['property'],
|
||||
realm_id=item['realm'],
|
||||
remote_id=item['id'],
|
||||
server=server,
|
||||
end_time=datetime.datetime.fromtimestamp(item['end_time'], tz=timezone_utc),
|
||||
subgroup=item['subgroup'],
|
||||
value=item['value']))
|
||||
RemoteRealmCount.objects.bulk_create(objects_to_create)
|
||||
|
||||
while len(installation_counts) > 0:
|
||||
batch = installation_counts[0:BATCH_SIZE]
|
||||
installation_counts = installation_counts[BATCH_SIZE:]
|
||||
|
||||
objects_to_create = []
|
||||
for item in batch:
|
||||
objects_to_create.append(RemoteInstallationCount(
|
||||
property=item['property'],
|
||||
remote_id=item['id'],
|
||||
server=server,
|
||||
end_time=datetime.datetime.fromtimestamp(item['end_time'], tz=timezone_utc),
|
||||
subgroup=item['subgroup'],
|
||||
value=item['value']))
|
||||
RemoteInstallationCount.objects.bulk_create(objects_to_create)
|
||||
return json_success()
|
||||
|
||||
def get_last_id_from_server(server: RemoteZulipServer, model: Any) -> int:
|
||||
last_count = model.objects.filter(server=server).order_by("remote_id").last()
|
||||
if last_count is not None:
|
||||
return last_count.remote_id
|
||||
return 0
|
||||
|
||||
@has_request_variables
|
||||
def remote_server_check_analytics(request: HttpRequest,
|
||||
entity: Union[UserProfile, RemoteZulipServer]) -> HttpResponse:
|
||||
validate_entity(entity)
|
||||
server = cast(RemoteZulipServer, entity)
|
||||
|
||||
result = {
|
||||
'last_realm_count_id': get_last_id_from_server(server, RemoteRealmCount),
|
||||
'last_installation_count_id': get_last_id_from_server(
|
||||
server, RemoteInstallationCount),
|
||||
}
|
||||
return json_success(result)
|
||||
|
|
Loading…
Reference in New Issue