diff --git a/zerver/lib/export.py b/zerver/lib/export.py index 3acb3c1795..c4c8e2780c 100644 --- a/zerver/lib/export.py +++ b/zerver/lib/export.py @@ -214,6 +214,7 @@ DATE_FIELDS = { 'zerver_userprofile': ['date_joined', 'last_login', 'last_reminder'], 'zerver_realmauditlog': ['event_time'], 'zerver_userhotspot': ['timestamp'], + 'analytics_installationcount': ['end_time'], 'analytics_realmcount': ['end_time'], 'analytics_usercount': ['end_time'], 'analytics_streamcount': ['end_time'], diff --git a/zerver/lib/remote_server.py b/zerver/lib/remote_server.py index fd6129a8d2..53e8b24777 100644 --- a/zerver/lib/remote_server.py +++ b/zerver/lib/remote_server.py @@ -1,13 +1,16 @@ import requests import ujson import urllib -from typing import Any, Dict, List, Optional, Union +from typing import Any, Dict, List, Optional, Tuple, Union from django.conf import settings +from django.forms.models import model_to_dict from django.utils.translation import ugettext as _ +from analytics.models import InstallationCount, RealmCount from version import ZULIP_VERSION from zerver.lib.exceptions import JsonableError +from zerver.lib.export import floatify_datetime_fields class PushNotificationBouncerException(Exception): pass @@ -85,3 +88,41 @@ def send_json_to_push_bouncer(method: str, endpoint: str, post_data: Dict[str, A ujson.dumps(post_data), extra_headers={"Content-type": "application/json"}, ) + +def build_analytics_data(realm_count_query: Any, + installation_count_query: Any) -> Tuple[List[Dict[str, Any]], + List[Dict[str, Any]]]: + data = {} + data['analytics_realmcount'] = [ + model_to_dict(realm_count) for realm_count in realm_count_query.order_by("id") + ] + data['analytics_installationcount'] = [ + model_to_dict(count) for count in installation_count_query.order_by("id") + ] + + floatify_datetime_fields(data, 'analytics_realmcount') + floatify_datetime_fields(data, 'analytics_installationcount') + return (data['analytics_realmcount'], data['analytics_installationcount']) + +def send_analytics_to_remote_server() -> None: + # first, check what's latest + result = send_to_push_bouncer("GET", "server/analytics/status", {}) + last_acked_realm_count_id = result['last_realm_count_id'] + last_acked_installation_count_id = result['last_installation_count_id'] + + (realm_count_data, installation_count_data) = build_analytics_data( + realm_count_query=RealmCount.objects.filter( + id__gt=last_acked_realm_count_id), + installation_count_query=InstallationCount.objects.filter( + id__gt=last_acked_installation_count_id)) + + if len(realm_count_data) == 0 and len(installation_count_data) == 0: + return + + request = { + 'realm_counts': ujson.dumps(realm_count_data), + 'installation_counts': ujson.dumps(installation_count_data), + } + + # Gather only entries with an ID greater than last_realm_count_id + send_to_push_bouncer("POST", "server/analytics", request) diff --git a/zerver/tests/test_push_notifications.py b/zerver/tests/test_push_notifications.py index 2aefa4fde0..01dc463ec9 100644 --- a/zerver/tests/test_push_notifications.py +++ b/zerver/tests/test_push_notifications.py @@ -1,5 +1,6 @@ from contextlib import contextmanager +import datetime import itertools import requests import mock @@ -18,7 +19,11 @@ from django.test import TestCase, override_settings from django.conf import settings from django.http import HttpResponse from django.utils.crypto import get_random_string +from django.utils.timezone import now as timezone_now +from django.utils.timezone import utc as timezone_utc +from analytics.lib.counts import CountStat, LoggingCountStat +from analytics.models import InstallationCount, RealmCount from zerver.models import ( PushDeviceToken, UserProfile, @@ -39,12 +44,16 @@ from zerver.lib.soft_deactivation import do_soft_deactivate_users from zerver.lib import push_notifications as apn from zerver.lib.push_notifications import get_mobile_push_content, \ DeviceToken, PushNotificationBouncerException, get_apns_client +from zerver.lib.remote_server import send_analytics_to_remote_server, \ + build_analytics_data +from zerver.lib.request import JsonableError from zerver.lib.response import json_success from zerver.lib.test_classes import ( ZulipTestCase, ) -from zilencer.models import RemoteZulipServer, RemotePushDeviceToken +from zilencer.models import RemoteZulipServer, RemotePushDeviceToken, \ + RemoteRealmCount, RemoteInstallationCount from django.utils.timezone import now ZERVER_DIR = os.path.dirname(os.path.dirname(__file__)) @@ -74,6 +83,11 @@ class BouncerTestCase(ZulipTestCase): local_url, kwargs['data'], subdomain="") + elif args[0] == "GET": + result = self.api_get(self.server_uuid, + local_url, + kwargs['data'], + subdomain="") else: raise AssertionError("Unsupported method for bounce_request") return result @@ -278,6 +292,90 @@ class PushBouncerNotificationTest(BouncerTestCase): server=server)) self.assertEqual(len(tokens), 0) +class AnalyticsBouncerTest(BouncerTestCase): + TIME_ZERO = datetime.datetime(1988, 3, 14).replace(tzinfo=timezone_utc) + + @override_settings(PUSH_NOTIFICATION_BOUNCER_URL='https://push.zulip.org.example.com') + @mock.patch('zerver.lib.push_notifications.requests.request') + def test_analytics_api(self, mock: Any) -> None: + """This is a variant of the below test_push_api, but using the full + push notification bouncer flow + """ + mock.side_effect = self.bounce_request + user = self.example_user('hamlet') + end_time = self.TIME_ZERO + + realm_stat = LoggingCountStat('invites_sent::day', RealmCount, CountStat.DAY) + RealmCount.objects.create( + realm=user.realm, property=realm_stat.property, end_time=end_time, value=5) + InstallationCount.objects.create( + property=realm_stat.property, end_time=end_time, value=5) + + self.assertEqual(RealmCount.objects.count(), 1) + self.assertEqual(InstallationCount.objects.count(), 1) + + self.assertEqual(RemoteRealmCount.objects.count(), 0) + self.assertEqual(RemoteInstallationCount.objects.count(), 0) + send_analytics_to_remote_server() + self.assertEqual(mock.call_count, 2) + self.assertEqual(RemoteRealmCount.objects.count(), 1) + self.assertEqual(RemoteInstallationCount.objects.count(), 1) + send_analytics_to_remote_server() + self.assertEqual(mock.call_count, 3) + self.assertEqual(RemoteRealmCount.objects.count(), 1) + self.assertEqual(RemoteInstallationCount.objects.count(), 1) + + RealmCount.objects.create( + realm=user.realm, property=realm_stat.property, end_time=end_time + datetime.timedelta(days=1), value=6) + RealmCount.objects.create( + realm=user.realm, property=realm_stat.property, end_time=end_time + datetime.timedelta(days=2), value=9) + self.assertEqual(RemoteRealmCount.objects.count(), 1) + self.assertEqual(mock.call_count, 3) + send_analytics_to_remote_server() + self.assertEqual(mock.call_count, 5) + self.assertEqual(RemoteRealmCount.objects.count(), 3) + self.assertEqual(RemoteInstallationCount.objects.count(), 1) + + InstallationCount.objects.create( + property=realm_stat.property, end_time=end_time + datetime.timedelta(days=1), value=6) + InstallationCount.objects.create( + property=realm_stat.property, end_time=end_time + datetime.timedelta(days=2), value=9) + send_analytics_to_remote_server() + self.assertEqual(mock.call_count, 7) + self.assertEqual(RemoteRealmCount.objects.count(), 3) + self.assertEqual(RemoteInstallationCount.objects.count(), 3) + + (realm_count_data, + installation_count_data) = build_analytics_data(RealmCount.objects.all(), + InstallationCount.objects.all()) + result = self.api_post(self.server_uuid, + '/api/v1/remotes/server/analytics', + {'realm_counts': ujson.dumps(realm_count_data), + 'installation_counts': ujson.dumps(installation_count_data)}, + subdomain="") + self.assert_json_error(result, "Data is out of order.") + + @override_settings(PUSH_NOTIFICATION_BOUNCER_URL='https://push.zulip.org.example.com') + @mock.patch('zerver.lib.push_notifications.requests.request') + def test_analytics_api_invalid(self, mock: Any) -> None: + """This is a variant of the below test_push_api, but using the full + push notification bouncer flow + """ + mock.side_effect = self.bounce_request + user = self.example_user('hamlet') + end_time = self.TIME_ZERO + + realm_stat = LoggingCountStat('invalid count stat', RealmCount, CountStat.DAY) + RealmCount.objects.create( + realm=user.realm, property=realm_stat.property, end_time=end_time, value=5) + + self.assertEqual(RealmCount.objects.count(), 1) + + self.assertEqual(RemoteRealmCount.objects.count(), 0) + with self.assertRaises(JsonableError): + send_analytics_to_remote_server() + self.assertEqual(RemoteRealmCount.objects.count(), 0) + class PushNotificationTest(BouncerTestCase): def setUp(self) -> None: super().setUp() diff --git a/zilencer/migrations/0016_remote_counts.py b/zilencer/migrations/0016_remote_counts.py new file mode 100644 index 0000000000..24eb04daa6 --- /dev/null +++ b/zilencer/migrations/0016_remote_counts.py @@ -0,0 +1,53 @@ +# -*- coding: utf-8 -*- +# Generated by Django 1.11.18 on 2019-02-02 06:02 +from __future__ import unicode_literals + +from django.db import migrations, models +import django.db.models.deletion + + +class Migration(migrations.Migration): + + dependencies = [ + ('zilencer', '0015_delete_billing'), + ] + + operations = [ + migrations.CreateModel( + name='RemoteInstallationCount', + fields=[ + ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('property', models.CharField(max_length=32)), + ('subgroup', models.CharField(max_length=16, null=True)), + ('end_time', models.DateTimeField()), + ('value', models.BigIntegerField()), + ('remote_id', models.IntegerField(db_index=True)), + ('server', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='zilencer.RemoteZulipServer')), + ], + ), + migrations.CreateModel( + name='RemoteRealmCount', + fields=[ + ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('property', models.CharField(max_length=32)), + ('subgroup', models.CharField(max_length=16, null=True)), + ('end_time', models.DateTimeField()), + ('value', models.BigIntegerField()), + ('realm_id', models.IntegerField(db_index=True)), + ('remote_id', models.IntegerField(db_index=True)), + ('server', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='zilencer.RemoteZulipServer')), + ], + ), + migrations.AlterUniqueTogether( + name='remoterealmcount', + unique_together=set([('server', 'realm_id', 'property', 'subgroup', 'end_time')]), + ), + migrations.AlterIndexTogether( + name='remoterealmcount', + index_together=set([('property', 'end_time')]), + ), + migrations.AlterUniqueTogether( + name='remoteinstallationcount', + unique_together=set([('server', 'property', 'subgroup', 'end_time')]), + ), + ] diff --git a/zilencer/models.py b/zilencer/models.py index 8bee60ce16..9943d6255a 100644 --- a/zilencer/models.py +++ b/zilencer/models.py @@ -3,6 +3,7 @@ import datetime from django.db import models from zerver.models import AbstractPushDeviceToken +from analytics.models import BaseCount def get_remote_server_by_uuid(uuid: str) -> 'RemoteZulipServer': return RemoteZulipServer.objects.get(uuid=uuid) @@ -33,3 +34,28 @@ class RemotePushDeviceToken(AbstractPushDeviceToken): def __str__(self) -> str: return "" % (self.server, self.user_id) + +class RemoteInstallationCount(BaseCount): + server = models.ForeignKey(RemoteZulipServer, on_delete=models.CASCADE) # type: RemoteZulipServer + # The remote_id field lets us deduplicate data from the remote server + remote_id = models.IntegerField(db_index=True) # type: int + + class Meta: + unique_together = ("server", "property", "subgroup", "end_time") + + def __str__(self) -> str: + return "" % (self.property, self.subgroup, self.value) + +# We can't subclass RealmCount because we only have a realm_id here, not a foreign key. +class RemoteRealmCount(BaseCount): + server = models.ForeignKey(RemoteZulipServer, on_delete=models.CASCADE) # type: RemoteZulipServer + realm_id = models.IntegerField(db_index=True) # type: int + # The remote_id field lets us deduplicate data from the remote server + remote_id = models.IntegerField(db_index=True) # type: int + + class Meta: + unique_together = ("server", "realm_id", "property", "subgroup", "end_time") + index_together = ["property", "end_time"] + + def __str__(self) -> str: + return "%s %s %s %s %s" % (self.server, self.realm_id, self.property, self.subgroup, self.value) diff --git a/zilencer/urls.py b/zilencer/urls.py index b4086b1773..5bb6376203 100644 --- a/zilencer/urls.py +++ b/zilencer/urls.py @@ -18,6 +18,12 @@ v1_api_and_json_patterns = [ # Push signup doesn't use the REST API, since there's no auth. url('^remotes/server/register$', zilencer.views.register_remote_server), + + # For receiving InstallationCount data and similar analytics. + url('^remotes/server/analytics$', rest_dispatch, + {'POST': 'zilencer.views.remote_server_post_analytics'}), + url('^remotes/server/analytics/status$', rest_dispatch, + {'GET': 'zilencer.views.remote_server_check_analytics'}), ] urlpatterns = [ diff --git a/zilencer/views.py b/zilencer/views.py index 611c4eb407..21acaec364 100644 --- a/zilencer/views.py +++ b/zilencer/views.py @@ -1,4 +1,5 @@ -from typing import Any, Dict, Optional, Union, cast +from typing import Any, Dict, List, Optional, Union, cast +import datetime import logging from django.core.exceptions import ValidationError @@ -6,9 +7,11 @@ from django.core.validators import validate_email, URLValidator from django.db import IntegrityError, transaction from django.http import HttpRequest, HttpResponse from django.utils import timezone +from django.utils.timezone import utc as timezone_utc, now as timezone_now from django.utils.translation import ugettext as _, ugettext as err_ from django.views.decorators.csrf import csrf_exempt +from analytics.lib.counts import COUNT_STATS from zerver.decorator import require_post, InvalidZulipServerKeyError from zerver.lib.exceptions import JsonableError from zerver.lib.push_notifications import send_android_push_notification, \ @@ -16,10 +19,12 @@ from zerver.lib.push_notifications import send_android_push_notification, \ from zerver.lib.request import REQ, has_request_variables from zerver.lib.response import json_error, json_success from zerver.lib.validator import check_int, check_string, \ - check_capped_string, check_string_fixed_length + check_capped_string, check_string_fixed_length, check_float, check_none_or, \ + check_dict, check_dict_only, check_list from zerver.models import UserProfile from zerver.views.push_notifications import validate_token -from zilencer.models import RemotePushDeviceToken, RemoteZulipServer +from zilencer.models import RemotePushDeviceToken, RemoteZulipServer, \ + RemoteRealmCount, RemoteInstallationCount def validate_entity(entity: Union[UserProfile, RemoteZulipServer]) -> None: if not isinstance(entity, RemoteZulipServer): @@ -144,3 +149,91 @@ def remote_server_notify_push(request: HttpRequest, entity: Union[UserProfile, R send_apple_push_notification(user_id, apple_devices, apns_payload, remote=True) return json_success() + +def validate_count_stats(server: RemoteZulipServer, model: Any, + counts: List[Dict[str, Any]]) -> None: + last_id = get_last_id_from_server(server, model) + for item in counts: + if item['property'] not in COUNT_STATS: + raise JsonableError(_("Invalid property %s" % item['property'])) + if item['id'] <= last_id: + raise JsonableError(_("Data is out of order.")) + last_id = item['id'] + +@has_request_variables +def remote_server_post_analytics(request: HttpRequest, + entity: Union[UserProfile, RemoteZulipServer], + realm_counts: List[Dict[str, Any]]=REQ( + validator=check_list(check_dict_only([ + ('property', check_string), + ('realm', check_int), + ('id', check_int), + ('end_time', check_float), + ('subgroup', check_none_or(check_string)), + ('value', check_int), + ]))), + installation_counts: List[Dict[str, Any]]=REQ( + validator=check_list(check_dict_only([ + ('property', check_string), + ('id', check_int), + ('end_time', check_float), + ('subgroup', check_none_or(check_string)), + ('value', check_int), + ])))) -> HttpResponse: + validate_entity(entity) + server = cast(RemoteZulipServer, entity) + + validate_count_stats(server, RemoteRealmCount, realm_counts) + validate_count_stats(server, RemoteInstallationCount, realm_counts) + + BATCH_SIZE = 1000 + while len(realm_counts) > 0: + batch = realm_counts[0:BATCH_SIZE] + realm_counts = realm_counts[BATCH_SIZE:] + + objects_to_create = [] + for item in batch: + objects_to_create.append(RemoteRealmCount( + property=item['property'], + realm_id=item['realm'], + remote_id=item['id'], + server=server, + end_time=datetime.datetime.fromtimestamp(item['end_time'], tz=timezone_utc), + subgroup=item['subgroup'], + value=item['value'])) + RemoteRealmCount.objects.bulk_create(objects_to_create) + + while len(installation_counts) > 0: + batch = installation_counts[0:BATCH_SIZE] + installation_counts = installation_counts[BATCH_SIZE:] + + objects_to_create = [] + for item in batch: + objects_to_create.append(RemoteInstallationCount( + property=item['property'], + remote_id=item['id'], + server=server, + end_time=datetime.datetime.fromtimestamp(item['end_time'], tz=timezone_utc), + subgroup=item['subgroup'], + value=item['value'])) + RemoteInstallationCount.objects.bulk_create(objects_to_create) + return json_success() + +def get_last_id_from_server(server: RemoteZulipServer, model: Any) -> int: + last_count = model.objects.filter(server=server).order_by("remote_id").last() + if last_count is not None: + return last_count.remote_id + return 0 + +@has_request_variables +def remote_server_check_analytics(request: HttpRequest, + entity: Union[UserProfile, RemoteZulipServer]) -> HttpResponse: + validate_entity(entity) + server = cast(RemoteZulipServer, entity) + + result = { + 'last_realm_count_id': get_last_id_from_server(server, RemoteRealmCount), + 'last_installation_count_id': get_last_id_from_server( + server, RemoteInstallationCount), + } + return json_success(result)