From 022c8beaf581046f503b93ab73f0db4c8f1d12b4 Mon Sep 17 00:00:00 2001 From: Tim Abbott Date: Wed, 30 Jan 2019 15:39:02 -0800 Subject: [PATCH] analytics: Add APIs for submitting analytics to another server. This adds a new API for sending basic analytics data (number of users, number of messages sent) from a Zulip server to the Zulip Cloud central analytics database, which will make it possible for servers to elect to have their usage numbers counted in published stats on the size of the Zulip ecosystem. --- zerver/lib/export.py | 1 + zerver/lib/remote_server.py | 43 +++++++++- zerver/tests/test_push_notifications.py | 100 +++++++++++++++++++++- zilencer/migrations/0016_remote_counts.py | 53 ++++++++++++ zilencer/models.py | 26 ++++++ zilencer/urls.py | 6 ++ zilencer/views.py | 99 ++++++++++++++++++++- 7 files changed, 323 insertions(+), 5 deletions(-) create mode 100644 zilencer/migrations/0016_remote_counts.py diff --git a/zerver/lib/export.py b/zerver/lib/export.py index 3acb3c1795..c4c8e2780c 100644 --- a/zerver/lib/export.py +++ b/zerver/lib/export.py @@ -214,6 +214,7 @@ DATE_FIELDS = { 'zerver_userprofile': ['date_joined', 'last_login', 'last_reminder'], 'zerver_realmauditlog': ['event_time'], 'zerver_userhotspot': ['timestamp'], + 'analytics_installationcount': ['end_time'], 'analytics_realmcount': ['end_time'], 'analytics_usercount': ['end_time'], 'analytics_streamcount': ['end_time'], diff --git a/zerver/lib/remote_server.py b/zerver/lib/remote_server.py index fd6129a8d2..53e8b24777 100644 --- a/zerver/lib/remote_server.py +++ b/zerver/lib/remote_server.py @@ -1,13 +1,16 @@ import requests import ujson import urllib -from typing import Any, Dict, List, Optional, Union +from typing import Any, Dict, List, Optional, Tuple, Union from django.conf import settings +from django.forms.models import model_to_dict from django.utils.translation import ugettext as _ +from analytics.models import InstallationCount, RealmCount from version import ZULIP_VERSION from zerver.lib.exceptions import JsonableError +from zerver.lib.export import floatify_datetime_fields class PushNotificationBouncerException(Exception): pass @@ -85,3 +88,41 @@ def send_json_to_push_bouncer(method: str, endpoint: str, post_data: Dict[str, A ujson.dumps(post_data), extra_headers={"Content-type": "application/json"}, ) + +def build_analytics_data(realm_count_query: Any, + installation_count_query: Any) -> Tuple[List[Dict[str, Any]], + List[Dict[str, Any]]]: + data = {} + data['analytics_realmcount'] = [ + model_to_dict(realm_count) for realm_count in realm_count_query.order_by("id") + ] + data['analytics_installationcount'] = [ + model_to_dict(count) for count in installation_count_query.order_by("id") + ] + + floatify_datetime_fields(data, 'analytics_realmcount') + floatify_datetime_fields(data, 'analytics_installationcount') + return (data['analytics_realmcount'], data['analytics_installationcount']) + +def send_analytics_to_remote_server() -> None: + # first, check what's latest + result = send_to_push_bouncer("GET", "server/analytics/status", {}) + last_acked_realm_count_id = result['last_realm_count_id'] + last_acked_installation_count_id = result['last_installation_count_id'] + + (realm_count_data, installation_count_data) = build_analytics_data( + realm_count_query=RealmCount.objects.filter( + id__gt=last_acked_realm_count_id), + installation_count_query=InstallationCount.objects.filter( + id__gt=last_acked_installation_count_id)) + + if len(realm_count_data) == 0 and len(installation_count_data) == 0: + return + + request = { + 'realm_counts': ujson.dumps(realm_count_data), + 'installation_counts': ujson.dumps(installation_count_data), + } + + # Gather only entries with an ID greater than last_realm_count_id + send_to_push_bouncer("POST", "server/analytics", request) diff --git a/zerver/tests/test_push_notifications.py b/zerver/tests/test_push_notifications.py index 2aefa4fde0..01dc463ec9 100644 --- a/zerver/tests/test_push_notifications.py +++ b/zerver/tests/test_push_notifications.py @@ -1,5 +1,6 @@ from contextlib import contextmanager +import datetime import itertools import requests import mock @@ -18,7 +19,11 @@ from django.test import TestCase, override_settings from django.conf import settings from django.http import HttpResponse from django.utils.crypto import get_random_string +from django.utils.timezone import now as timezone_now +from django.utils.timezone import utc as timezone_utc +from analytics.lib.counts import CountStat, LoggingCountStat +from analytics.models import InstallationCount, RealmCount from zerver.models import ( PushDeviceToken, UserProfile, @@ -39,12 +44,16 @@ from zerver.lib.soft_deactivation import do_soft_deactivate_users from zerver.lib import push_notifications as apn from zerver.lib.push_notifications import get_mobile_push_content, \ DeviceToken, PushNotificationBouncerException, get_apns_client +from zerver.lib.remote_server import send_analytics_to_remote_server, \ + build_analytics_data +from zerver.lib.request import JsonableError from zerver.lib.response import json_success from zerver.lib.test_classes import ( ZulipTestCase, ) -from zilencer.models import RemoteZulipServer, RemotePushDeviceToken +from zilencer.models import RemoteZulipServer, RemotePushDeviceToken, \ + RemoteRealmCount, RemoteInstallationCount from django.utils.timezone import now ZERVER_DIR = os.path.dirname(os.path.dirname(__file__)) @@ -74,6 +83,11 @@ class BouncerTestCase(ZulipTestCase): local_url, kwargs['data'], subdomain="") + elif args[0] == "GET": + result = self.api_get(self.server_uuid, + local_url, + kwargs['data'], + subdomain="") else: raise AssertionError("Unsupported method for bounce_request") return result @@ -278,6 +292,90 @@ class PushBouncerNotificationTest(BouncerTestCase): server=server)) self.assertEqual(len(tokens), 0) +class AnalyticsBouncerTest(BouncerTestCase): + TIME_ZERO = datetime.datetime(1988, 3, 14).replace(tzinfo=timezone_utc) + + @override_settings(PUSH_NOTIFICATION_BOUNCER_URL='https://push.zulip.org.example.com') + @mock.patch('zerver.lib.push_notifications.requests.request') + def test_analytics_api(self, mock: Any) -> None: + """This is a variant of the below test_push_api, but using the full + push notification bouncer flow + """ + mock.side_effect = self.bounce_request + user = self.example_user('hamlet') + end_time = self.TIME_ZERO + + realm_stat = LoggingCountStat('invites_sent::day', RealmCount, CountStat.DAY) + RealmCount.objects.create( + realm=user.realm, property=realm_stat.property, end_time=end_time, value=5) + InstallationCount.objects.create( + property=realm_stat.property, end_time=end_time, value=5) + + self.assertEqual(RealmCount.objects.count(), 1) + self.assertEqual(InstallationCount.objects.count(), 1) + + self.assertEqual(RemoteRealmCount.objects.count(), 0) + self.assertEqual(RemoteInstallationCount.objects.count(), 0) + send_analytics_to_remote_server() + self.assertEqual(mock.call_count, 2) + self.assertEqual(RemoteRealmCount.objects.count(), 1) + self.assertEqual(RemoteInstallationCount.objects.count(), 1) + send_analytics_to_remote_server() + self.assertEqual(mock.call_count, 3) + self.assertEqual(RemoteRealmCount.objects.count(), 1) + self.assertEqual(RemoteInstallationCount.objects.count(), 1) + + RealmCount.objects.create( + realm=user.realm, property=realm_stat.property, end_time=end_time + datetime.timedelta(days=1), value=6) + RealmCount.objects.create( + realm=user.realm, property=realm_stat.property, end_time=end_time + datetime.timedelta(days=2), value=9) + self.assertEqual(RemoteRealmCount.objects.count(), 1) + self.assertEqual(mock.call_count, 3) + send_analytics_to_remote_server() + self.assertEqual(mock.call_count, 5) + self.assertEqual(RemoteRealmCount.objects.count(), 3) + self.assertEqual(RemoteInstallationCount.objects.count(), 1) + + InstallationCount.objects.create( + property=realm_stat.property, end_time=end_time + datetime.timedelta(days=1), value=6) + InstallationCount.objects.create( + property=realm_stat.property, end_time=end_time + datetime.timedelta(days=2), value=9) + send_analytics_to_remote_server() + self.assertEqual(mock.call_count, 7) + self.assertEqual(RemoteRealmCount.objects.count(), 3) + self.assertEqual(RemoteInstallationCount.objects.count(), 3) + + (realm_count_data, + installation_count_data) = build_analytics_data(RealmCount.objects.all(), + InstallationCount.objects.all()) + result = self.api_post(self.server_uuid, + '/api/v1/remotes/server/analytics', + {'realm_counts': ujson.dumps(realm_count_data), + 'installation_counts': ujson.dumps(installation_count_data)}, + subdomain="") + self.assert_json_error(result, "Data is out of order.") + + @override_settings(PUSH_NOTIFICATION_BOUNCER_URL='https://push.zulip.org.example.com') + @mock.patch('zerver.lib.push_notifications.requests.request') + def test_analytics_api_invalid(self, mock: Any) -> None: + """This is a variant of the below test_push_api, but using the full + push notification bouncer flow + """ + mock.side_effect = self.bounce_request + user = self.example_user('hamlet') + end_time = self.TIME_ZERO + + realm_stat = LoggingCountStat('invalid count stat', RealmCount, CountStat.DAY) + RealmCount.objects.create( + realm=user.realm, property=realm_stat.property, end_time=end_time, value=5) + + self.assertEqual(RealmCount.objects.count(), 1) + + self.assertEqual(RemoteRealmCount.objects.count(), 0) + with self.assertRaises(JsonableError): + send_analytics_to_remote_server() + self.assertEqual(RemoteRealmCount.objects.count(), 0) + class PushNotificationTest(BouncerTestCase): def setUp(self) -> None: super().setUp() diff --git a/zilencer/migrations/0016_remote_counts.py b/zilencer/migrations/0016_remote_counts.py new file mode 100644 index 0000000000..24eb04daa6 --- /dev/null +++ b/zilencer/migrations/0016_remote_counts.py @@ -0,0 +1,53 @@ +# -*- coding: utf-8 -*- +# Generated by Django 1.11.18 on 2019-02-02 06:02 +from __future__ import unicode_literals + +from django.db import migrations, models +import django.db.models.deletion + + +class Migration(migrations.Migration): + + dependencies = [ + ('zilencer', '0015_delete_billing'), + ] + + operations = [ + migrations.CreateModel( + name='RemoteInstallationCount', + fields=[ + ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('property', models.CharField(max_length=32)), + ('subgroup', models.CharField(max_length=16, null=True)), + ('end_time', models.DateTimeField()), + ('value', models.BigIntegerField()), + ('remote_id', models.IntegerField(db_index=True)), + ('server', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='zilencer.RemoteZulipServer')), + ], + ), + migrations.CreateModel( + name='RemoteRealmCount', + fields=[ + ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('property', models.CharField(max_length=32)), + ('subgroup', models.CharField(max_length=16, null=True)), + ('end_time', models.DateTimeField()), + ('value', models.BigIntegerField()), + ('realm_id', models.IntegerField(db_index=True)), + ('remote_id', models.IntegerField(db_index=True)), + ('server', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='zilencer.RemoteZulipServer')), + ], + ), + migrations.AlterUniqueTogether( + name='remoterealmcount', + unique_together=set([('server', 'realm_id', 'property', 'subgroup', 'end_time')]), + ), + migrations.AlterIndexTogether( + name='remoterealmcount', + index_together=set([('property', 'end_time')]), + ), + migrations.AlterUniqueTogether( + name='remoteinstallationcount', + unique_together=set([('server', 'property', 'subgroup', 'end_time')]), + ), + ] diff --git a/zilencer/models.py b/zilencer/models.py index 8bee60ce16..9943d6255a 100644 --- a/zilencer/models.py +++ b/zilencer/models.py @@ -3,6 +3,7 @@ import datetime from django.db import models from zerver.models import AbstractPushDeviceToken +from analytics.models import BaseCount def get_remote_server_by_uuid(uuid: str) -> 'RemoteZulipServer': return RemoteZulipServer.objects.get(uuid=uuid) @@ -33,3 +34,28 @@ class RemotePushDeviceToken(AbstractPushDeviceToken): def __str__(self) -> str: return "" % (self.server, self.user_id) + +class RemoteInstallationCount(BaseCount): + server = models.ForeignKey(RemoteZulipServer, on_delete=models.CASCADE) # type: RemoteZulipServer + # The remote_id field lets us deduplicate data from the remote server + remote_id = models.IntegerField(db_index=True) # type: int + + class Meta: + unique_together = ("server", "property", "subgroup", "end_time") + + def __str__(self) -> str: + return "" % (self.property, self.subgroup, self.value) + +# We can't subclass RealmCount because we only have a realm_id here, not a foreign key. +class RemoteRealmCount(BaseCount): + server = models.ForeignKey(RemoteZulipServer, on_delete=models.CASCADE) # type: RemoteZulipServer + realm_id = models.IntegerField(db_index=True) # type: int + # The remote_id field lets us deduplicate data from the remote server + remote_id = models.IntegerField(db_index=True) # type: int + + class Meta: + unique_together = ("server", "realm_id", "property", "subgroup", "end_time") + index_together = ["property", "end_time"] + + def __str__(self) -> str: + return "%s %s %s %s %s" % (self.server, self.realm_id, self.property, self.subgroup, self.value) diff --git a/zilencer/urls.py b/zilencer/urls.py index b4086b1773..5bb6376203 100644 --- a/zilencer/urls.py +++ b/zilencer/urls.py @@ -18,6 +18,12 @@ v1_api_and_json_patterns = [ # Push signup doesn't use the REST API, since there's no auth. url('^remotes/server/register$', zilencer.views.register_remote_server), + + # For receiving InstallationCount data and similar analytics. + url('^remotes/server/analytics$', rest_dispatch, + {'POST': 'zilencer.views.remote_server_post_analytics'}), + url('^remotes/server/analytics/status$', rest_dispatch, + {'GET': 'zilencer.views.remote_server_check_analytics'}), ] urlpatterns = [ diff --git a/zilencer/views.py b/zilencer/views.py index 611c4eb407..21acaec364 100644 --- a/zilencer/views.py +++ b/zilencer/views.py @@ -1,4 +1,5 @@ -from typing import Any, Dict, Optional, Union, cast +from typing import Any, Dict, List, Optional, Union, cast +import datetime import logging from django.core.exceptions import ValidationError @@ -6,9 +7,11 @@ from django.core.validators import validate_email, URLValidator from django.db import IntegrityError, transaction from django.http import HttpRequest, HttpResponse from django.utils import timezone +from django.utils.timezone import utc as timezone_utc, now as timezone_now from django.utils.translation import ugettext as _, ugettext as err_ from django.views.decorators.csrf import csrf_exempt +from analytics.lib.counts import COUNT_STATS from zerver.decorator import require_post, InvalidZulipServerKeyError from zerver.lib.exceptions import JsonableError from zerver.lib.push_notifications import send_android_push_notification, \ @@ -16,10 +19,12 @@ from zerver.lib.push_notifications import send_android_push_notification, \ from zerver.lib.request import REQ, has_request_variables from zerver.lib.response import json_error, json_success from zerver.lib.validator import check_int, check_string, \ - check_capped_string, check_string_fixed_length + check_capped_string, check_string_fixed_length, check_float, check_none_or, \ + check_dict, check_dict_only, check_list from zerver.models import UserProfile from zerver.views.push_notifications import validate_token -from zilencer.models import RemotePushDeviceToken, RemoteZulipServer +from zilencer.models import RemotePushDeviceToken, RemoteZulipServer, \ + RemoteRealmCount, RemoteInstallationCount def validate_entity(entity: Union[UserProfile, RemoteZulipServer]) -> None: if not isinstance(entity, RemoteZulipServer): @@ -144,3 +149,91 @@ def remote_server_notify_push(request: HttpRequest, entity: Union[UserProfile, R send_apple_push_notification(user_id, apple_devices, apns_payload, remote=True) return json_success() + +def validate_count_stats(server: RemoteZulipServer, model: Any, + counts: List[Dict[str, Any]]) -> None: + last_id = get_last_id_from_server(server, model) + for item in counts: + if item['property'] not in COUNT_STATS: + raise JsonableError(_("Invalid property %s" % item['property'])) + if item['id'] <= last_id: + raise JsonableError(_("Data is out of order.")) + last_id = item['id'] + +@has_request_variables +def remote_server_post_analytics(request: HttpRequest, + entity: Union[UserProfile, RemoteZulipServer], + realm_counts: List[Dict[str, Any]]=REQ( + validator=check_list(check_dict_only([ + ('property', check_string), + ('realm', check_int), + ('id', check_int), + ('end_time', check_float), + ('subgroup', check_none_or(check_string)), + ('value', check_int), + ]))), + installation_counts: List[Dict[str, Any]]=REQ( + validator=check_list(check_dict_only([ + ('property', check_string), + ('id', check_int), + ('end_time', check_float), + ('subgroup', check_none_or(check_string)), + ('value', check_int), + ])))) -> HttpResponse: + validate_entity(entity) + server = cast(RemoteZulipServer, entity) + + validate_count_stats(server, RemoteRealmCount, realm_counts) + validate_count_stats(server, RemoteInstallationCount, realm_counts) + + BATCH_SIZE = 1000 + while len(realm_counts) > 0: + batch = realm_counts[0:BATCH_SIZE] + realm_counts = realm_counts[BATCH_SIZE:] + + objects_to_create = [] + for item in batch: + objects_to_create.append(RemoteRealmCount( + property=item['property'], + realm_id=item['realm'], + remote_id=item['id'], + server=server, + end_time=datetime.datetime.fromtimestamp(item['end_time'], tz=timezone_utc), + subgroup=item['subgroup'], + value=item['value'])) + RemoteRealmCount.objects.bulk_create(objects_to_create) + + while len(installation_counts) > 0: + batch = installation_counts[0:BATCH_SIZE] + installation_counts = installation_counts[BATCH_SIZE:] + + objects_to_create = [] + for item in batch: + objects_to_create.append(RemoteInstallationCount( + property=item['property'], + remote_id=item['id'], + server=server, + end_time=datetime.datetime.fromtimestamp(item['end_time'], tz=timezone_utc), + subgroup=item['subgroup'], + value=item['value'])) + RemoteInstallationCount.objects.bulk_create(objects_to_create) + return json_success() + +def get_last_id_from_server(server: RemoteZulipServer, model: Any) -> int: + last_count = model.objects.filter(server=server).order_by("remote_id").last() + if last_count is not None: + return last_count.remote_id + return 0 + +@has_request_variables +def remote_server_check_analytics(request: HttpRequest, + entity: Union[UserProfile, RemoteZulipServer]) -> HttpResponse: + validate_entity(entity) + server = cast(RemoteZulipServer, entity) + + result = { + 'last_realm_count_id': get_last_id_from_server(server, RemoteRealmCount), + 'last_installation_count_id': get_last_id_from_server( + server, RemoteInstallationCount), + } + return json_success(result)