remote data: Send RealmAuditLog data.

This commit is contained in:
Rishi Gupta 2019-10-02 17:01:36 -07:00 committed by Tim Abbott
parent b86142089b
commit 360cd7f147
8 changed files with 253 additions and 64 deletions

View File

@ -264,6 +264,7 @@ python_rules = RuleList(
'bad_lines': ['a =b', 'asdf =42']},
{'pattern': r'":\w[^"]*$',
'description': 'Missing whitespace after ":"',
'exclude': set(['zerver/tests/test_push_notifications.py']),
'good_lines': ['"foo": bar', '"some:string:with:colons"'],
'bad_lines': ['"foo":bar', '"foo":1']},
{'pattern': r"':\w[^']*$",

View File

@ -12,6 +12,7 @@ from analytics.models import InstallationCount, RealmCount
from version import ZULIP_VERSION
from zerver.lib.exceptions import JsonableError
from zerver.lib.export import floatify_datetime_fields
from zerver.models import RealmAuditLog
class PushNotificationBouncerException(Exception):
pass
@ -91,24 +92,34 @@ def send_json_to_push_bouncer(method: str, endpoint: str, post_data: Dict[str, A
extra_headers={"Content-type": "application/json"},
)
REALMAUDITLOG_PUSHED_FIELDS = ['id', 'realm', 'event_time', 'backfilled', 'extra_data', 'event_type']
def build_analytics_data(realm_count_query: Any,
installation_count_query: Any) -> Tuple[List[Dict[str, Any]],
List[Dict[str, Any]]]:
installation_count_query: Any,
realmauditlog_query: Any) -> Tuple[List[Dict[str, Any]],
List[Dict[str, Any]],
List[Dict[str, Any]]]:
# We limit the batch size on the client side to avoid OOM kills timeouts, etc.
MAX_CLIENT_BATCH_SIZE = 10000
data = {}
data['analytics_realmcount'] = [
model_to_dict(realm_count) for realm_count in
model_to_dict(row) for row in
realm_count_query.order_by("id")[0:MAX_CLIENT_BATCH_SIZE]
]
data['analytics_installationcount'] = [
model_to_dict(count) for count in
model_to_dict(row) for row in
installation_count_query.order_by("id")[0:MAX_CLIENT_BATCH_SIZE]
]
data['zerver_realmauditlog'] = [
model_to_dict(row, fields=REALMAUDITLOG_PUSHED_FIELDS) for row in
realmauditlog_query.order_by("id")[0:MAX_CLIENT_BATCH_SIZE]
]
floatify_datetime_fields(data, 'analytics_realmcount')
floatify_datetime_fields(data, 'analytics_installationcount')
return (data['analytics_realmcount'], data['analytics_installationcount'])
floatify_datetime_fields(data, 'zerver_realmauditlog')
return (data['analytics_realmcount'], data['analytics_installationcount'],
data['zerver_realmauditlog'])
def send_analytics_to_remote_server() -> None:
# first, check what's latest
@ -118,19 +129,24 @@ def send_analytics_to_remote_server() -> None:
last_acked_realm_count_id = result['last_realm_count_id']
last_acked_installation_count_id = result['last_installation_count_id']
last_acked_realmauditlog_id = result['last_realmauditlog_id']
(realm_count_data, installation_count_data) = build_analytics_data(
(realm_count_data, installation_count_data, realmauditlog_data) = build_analytics_data(
realm_count_query=RealmCount.objects.filter(
id__gt=last_acked_realm_count_id),
installation_count_query=InstallationCount.objects.filter(
id__gt=last_acked_installation_count_id))
id__gt=last_acked_installation_count_id),
realmauditlog_query=RealmAuditLog.objects.filter(
event_type__in=RealmAuditLog.SYNCED_BILLING_EVENTS,
id__gt=last_acked_realmauditlog_id))
if len(realm_count_data) == 0 and len(installation_count_data) == 0:
if len(realm_count_data) + len(installation_count_data) + len(realmauditlog_data) == 0:
return
request = {
'realm_counts': ujson.dumps(realm_count_data),
'installation_counts': ujson.dumps(installation_count_data),
'realmauditlog_rows': ujson.dumps(realmauditlog_data),
'version': ujson.dumps(ZULIP_VERSION),
}

View File

@ -2541,32 +2541,8 @@ EMAIL_TYPES = {
'invitation_reminder': ScheduledEmail.INVITATION_REMINDER,
}
class RealmAuditLog(models.Model):
"""
RealmAuditLog tracks important changes to users, streams, and
realms in Zulip. It is intended to support both
debugging/introspection (e.g. determining when a user's left a
given stream?) as well as help with some database migrations where
we might be able to do a better data backfill with it. Here are a
few key details about how this works:
* acting_user is the user who initiated the state change
* modified_user (if present) is the user being modified
* modified_stream (if present) is the stream being modified
For example:
* When a user subscribes another user to a stream, modified_user,
acting_user, and modified_stream will all be present and different.
* When an administrator changes an organization's realm icon,
acting_user is that administrator and both modified_user and
modified_stream will be None.
"""
realm = models.ForeignKey(Realm, on_delete=CASCADE) # type: Realm
acting_user = models.ForeignKey(UserProfile, null=True, related_name='+', on_delete=CASCADE) # type: Optional[UserProfile]
modified_user = models.ForeignKey(UserProfile, null=True, related_name='+', on_delete=CASCADE) # type: Optional[UserProfile]
modified_stream = models.ForeignKey(Stream, null=True, on_delete=CASCADE) # type: Optional[Stream]
event_last_message_id = models.IntegerField(null=True) # type: Optional[int]
class AbstractRealmAuditLog(models.Model):
"""Defines fields common to RealmAuditLog and RemoteRealmAuditLog."""
event_time = models.DateTimeField(db_index=True) # type: datetime.datetime
# If True, event_time is an overestimate of the true time. Can be used
# by migrations when introducing a new event_type.
@ -2627,6 +2603,35 @@ class RealmAuditLog(models.Model):
USER_CREATED, USER_ACTIVATED, USER_DEACTIVATED, USER_REACTIVATED, USER_ROLE_CHANGED,
REALM_DEACTIVATED, REALM_REACTIVATED]
class Meta:
abstract = True
class RealmAuditLog(AbstractRealmAuditLog):
"""
RealmAuditLog tracks important changes to users, streams, and
realms in Zulip. It is intended to support both
debugging/introspection (e.g. determining when a user's left a
given stream?) as well as help with some database migrations where
we might be able to do a better data backfill with it. Here are a
few key details about how this works:
* acting_user is the user who initiated the state change
* modified_user (if present) is the user being modified
* modified_stream (if present) is the stream being modified
For example:
* When a user subscribes another user to a stream, modified_user,
acting_user, and modified_stream will all be present and different.
* When an administrator changes an organization's realm icon,
acting_user is that administrator and both modified_user and
modified_stream will be None.
"""
realm = models.ForeignKey(Realm, on_delete=CASCADE) # type: Realm
acting_user = models.ForeignKey(UserProfile, null=True, related_name='+', on_delete=CASCADE) # type: Optional[UserProfile]
modified_user = models.ForeignKey(UserProfile, null=True, related_name='+', on_delete=CASCADE) # type: Optional[UserProfile]
modified_stream = models.ForeignKey(Stream, null=True, on_delete=CASCADE) # type: Optional[Stream]
event_last_message_id = models.IntegerField(null=True) # type: Optional[int]
def __str__(self) -> str:
if self.modified_user is not None:
return "<RealmAuditLog: %s %s %s %s>" % (

View File

@ -33,6 +33,7 @@ from zerver.models import (
get_realm,
get_stream,
Recipient,
RealmAuditLog,
Stream,
Subscription,
)
@ -67,7 +68,7 @@ from zerver.lib.test_classes import (
)
from zilencer.models import RemoteZulipServer, RemotePushDeviceToken, \
RemoteRealmCount, RemoteInstallationCount
RemoteRealmCount, RemoteInstallationCount, RemoteRealmAuditLog
from django.utils.timezone import now
ZERVER_DIR = os.path.dirname(os.path.dirname(__file__))
@ -319,6 +320,23 @@ class AnalyticsBouncerTest(BouncerTestCase):
user = self.example_user('hamlet')
end_time = self.TIME_ZERO
# Send any existing data over, so that we can start the test with a "clean" slate
audit_log_max_id = RealmAuditLog.objects.all().order_by('id').last().id
send_analytics_to_remote_server()
self.assertEqual(mock_request.call_count, 2)
remote_audit_log_count = RemoteRealmAuditLog.objects.count()
self.assertEqual(RemoteRealmCount.objects.count(), 0)
self.assertEqual(RemoteInstallationCount.objects.count(), 0)
def check_counts(mock_request_call_count: int, remote_realm_count: int,
remote_installation_count: int, remote_realm_audit_log: int) -> None:
self.assertEqual(mock_request.call_count, mock_request_call_count)
self.assertEqual(RemoteRealmCount.objects.count(), remote_realm_count)
self.assertEqual(RemoteInstallationCount.objects.count(), remote_installation_count)
self.assertEqual(RemoteRealmAuditLog.objects.count(),
remote_audit_log_count + remote_realm_audit_log)
# Create some rows we'll send to remote server
realm_stat = LoggingCountStat('invites_sent::day', RealmCount, CountStat.DAY)
RealmCount.objects.create(
realm=user.realm, property=realm_stat.property, end_time=end_time, value=5)
@ -327,48 +345,63 @@ class AnalyticsBouncerTest(BouncerTestCase):
# We set a subgroup here to work around:
# https://github.com/zulip/zulip/issues/12362
subgroup="test_subgroup")
# Event type in SYNCED_BILLING_EVENTS -- should be included
RealmAuditLog.objects.create(
realm=user.realm, modified_user=user, event_type=RealmAuditLog.USER_CREATED,
event_time=end_time, extra_data='data')
# Event type not in SYNCED_BILLING_EVENTS -- should not be included
RealmAuditLog.objects.create(
realm=user.realm, modified_user=user, event_type=RealmAuditLog.REALM_LOGO_CHANGED,
event_time=end_time, extra_data='data')
self.assertEqual(RealmCount.objects.count(), 1)
self.assertEqual(InstallationCount.objects.count(), 1)
self.assertEqual(RealmAuditLog.objects.filter(id__gt=audit_log_max_id).count(), 2)
self.assertEqual(RemoteRealmCount.objects.count(), 0)
self.assertEqual(RemoteInstallationCount.objects.count(), 0)
send_analytics_to_remote_server()
self.assertEqual(mock_request.call_count, 2)
self.assertEqual(RemoteRealmCount.objects.count(), 1)
self.assertEqual(RemoteInstallationCount.objects.count(), 1)
send_analytics_to_remote_server()
self.assertEqual(mock_request.call_count, 3)
self.assertEqual(RemoteRealmCount.objects.count(), 1)
self.assertEqual(RemoteInstallationCount.objects.count(), 1)
check_counts(4, 1, 1, 1)
# Test having no new rows
send_analytics_to_remote_server()
check_counts(5, 1, 1, 1)
# Test only having new RealmCount rows
RealmCount.objects.create(
realm=user.realm, property=realm_stat.property, end_time=end_time + datetime.timedelta(days=1), value=6)
RealmCount.objects.create(
realm=user.realm, property=realm_stat.property, end_time=end_time + datetime.timedelta(days=2), value=9)
self.assertEqual(RemoteRealmCount.objects.count(), 1)
self.assertEqual(mock_request.call_count, 3)
send_analytics_to_remote_server()
self.assertEqual(mock_request.call_count, 5)
self.assertEqual(RemoteRealmCount.objects.count(), 3)
self.assertEqual(RemoteInstallationCount.objects.count(), 1)
check_counts(7, 3, 1, 1)
# Test only having new InstallationCount rows
InstallationCount.objects.create(
property=realm_stat.property, end_time=end_time + datetime.timedelta(days=1), value=6)
InstallationCount.objects.create(
property=realm_stat.property, end_time=end_time + datetime.timedelta(days=2), value=9)
send_analytics_to_remote_server()
self.assertEqual(mock_request.call_count, 7)
self.assertEqual(RemoteRealmCount.objects.count(), 3)
self.assertEqual(RemoteInstallationCount.objects.count(), 3)
check_counts(9, 3, 2, 1)
# Test only having new RealmAuditLog rows
# Non-synced event
RealmAuditLog.objects.create(
realm=user.realm, modified_user=user, event_type=RealmAuditLog.REALM_LOGO_CHANGED,
event_time=end_time, extra_data='data')
send_analytics_to_remote_server()
check_counts(10, 3, 2, 1)
# Synced event
RealmAuditLog.objects.create(
realm=user.realm, modified_user=user, event_type=RealmAuditLog.USER_REACTIVATED,
event_time=end_time, extra_data='data')
send_analytics_to_remote_server()
check_counts(12, 3, 2, 2)
(realm_count_data,
installation_count_data) = build_analytics_data(RealmCount.objects.all(),
InstallationCount.objects.all())
installation_count_data,
realmauditlog_data) = build_analytics_data(RealmCount.objects.all(),
InstallationCount.objects.all(),
RealmAuditLog.objects.all())
result = self.api_post(self.server_uuid,
'/api/v1/remotes/server/analytics',
{'realm_counts': ujson.dumps(realm_count_data),
'installation_counts': ujson.dumps(installation_count_data)},
'installation_counts': ujson.dumps(installation_count_data),
'realmauditlog_rows': ujson.dumps(realmauditlog_data)},
subdomain="")
self.assert_json_error(result, "Data is out of order.")
@ -381,7 +414,8 @@ class AnalyticsBouncerTest(BouncerTestCase):
self.server_uuid,
'/api/v1/remotes/server/analytics',
{'realm_counts': ujson.dumps(realm_count_data),
'installation_counts': ujson.dumps(installation_count_data)},
'installation_counts': ujson.dumps(installation_count_data),
'realmauditlog_rows': ujson.dumps(realmauditlog_data)},
subdomain="")
self.assert_json_error(result, "Invalid data.")
@ -407,6 +441,73 @@ class AnalyticsBouncerTest(BouncerTestCase):
log_warning.assert_called_once()
self.assertEqual(RemoteRealmCount.objects.count(), 0)
# Servers on Zulip 2.0.6 and earlier only send realm_counts and installation_counts data,
# and don't send realmauditlog_rows. Make sure that continues to work.
@override_settings(PUSH_NOTIFICATION_BOUNCER_URL='https://push.zulip.org.example.com')
@mock.patch('zerver.lib.push_notifications.requests.request')
def test_old_two_table_format(self, mock_request: Any) -> None:
mock_request.side_effect = self.bounce_request
# Send fixture generated with Zulip 2.0 code
send_to_push_bouncer('POST', 'server/analytics', {
'realm_counts': '[{"id":1,"property":"invites_sent::day","subgroup":null,"end_time":574300800.0,"value":5,"realm":2}]', # lint:ignore
'installation_counts': '[]',
'version': '"2.0.6+git"'})
self.assertEqual(mock_request.call_count, 1)
self.assertEqual(RemoteRealmCount.objects.count(), 1)
self.assertEqual(RemoteInstallationCount.objects.count(), 0)
self.assertEqual(RemoteRealmAuditLog.objects.count(), 0)
# Make sure we aren't sending data we don't mean to, even if we don't store it.
@override_settings(PUSH_NOTIFICATION_BOUNCER_URL='https://push.zulip.org.example.com')
@mock.patch('zerver.lib.push_notifications.requests.request')
def test_only_sending_intended_realmauditlog_data(self, mock_request: Any) -> None:
mock_request.side_effect = self.bounce_request
user = self.example_user('hamlet')
# Event type in SYNCED_BILLING_EVENTS -- should be included
RealmAuditLog.objects.create(
realm=user.realm, modified_user=user, event_type=RealmAuditLog.USER_REACTIVATED,
event_time=self.TIME_ZERO, extra_data='data')
# Event type not in SYNCED_BILLING_EVENTS -- should not be included
RealmAuditLog.objects.create(
realm=user.realm, modified_user=user, event_type=RealmAuditLog.REALM_LOGO_CHANGED,
event_time=self.TIME_ZERO, extra_data='data')
def check_for_unwanted_data(*args: Any) -> Any:
if check_for_unwanted_data.first_call: # type: ignore
check_for_unwanted_data.first_call = False # type: ignore
else:
# Test that we're respecting SYNCED_BILLING_EVENTS
self.assertIn('"event_type":{}'.format(RealmAuditLog.USER_REACTIVATED), str(args))
self.assertNotIn('"event_type":{}'.format(RealmAuditLog.REALM_LOGO_CHANGED), str(args))
# Test that we're respecting REALMAUDITLOG_PUSHED_FIELDS
self.assertIn('backfilled', str(args))
self.assertNotIn('modified_user', str(args))
return send_to_push_bouncer(*args)
# send_analytics_to_remote_server calls send_to_push_bouncer twice.
# We need to distinguish the first and second calls.
check_for_unwanted_data.first_call = True # type: ignore
with mock.patch('zerver.lib.remote_server.send_to_push_bouncer',
side_effect=check_for_unwanted_data):
send_analytics_to_remote_server()
@override_settings(PUSH_NOTIFICATION_BOUNCER_URL='https://push.zulip.org.example.com')
@mock.patch('zerver.lib.push_notifications.requests.request')
def test_realmauditlog_data_mapping(self, mock_request: Any) -> None:
mock_request.side_effect = self.bounce_request
user = self.example_user('hamlet')
log_entry = RealmAuditLog.objects.create(
realm=user.realm, modified_user=user, backfilled=True,
event_type=RealmAuditLog.USER_REACTIVATED, event_time=self.TIME_ZERO, extra_data='data')
send_analytics_to_remote_server()
remote_log_entry = RemoteRealmAuditLog.objects.order_by('id').last()
self.assertEqual(remote_log_entry.server.uuid, self.server_uuid)
self.assertEqual(remote_log_entry.remote_id, log_entry.id)
self.assertEqual(remote_log_entry.event_time, self.TIME_ZERO)
self.assertEqual(remote_log_entry.backfilled, True)
self.assertEqual(remote_log_entry.extra_data, 'data')
self.assertEqual(remote_log_entry.event_type, RealmAuditLog.USER_REACTIVATED)
class PushNotificationTest(BouncerTestCase):
def setUp(self) -> None:
super().setUp()

View File

@ -0,0 +1,29 @@
# -*- coding: utf-8 -*-
# Generated by Django 1.11.24 on 2019-10-03 00:10
from __future__ import unicode_literals
from django.db import migrations, models
import django.db.models.deletion
class Migration(migrations.Migration):
dependencies = [
('zilencer', '0017_installationcount_indexes'),
]
operations = [
migrations.CreateModel(
name='RemoteRealmAuditLog',
fields=[
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('realm_id', models.IntegerField(db_index=True)),
('remote_id', models.IntegerField(db_index=True)),
('event_time', models.DateTimeField(db_index=True)),
('backfilled', models.BooleanField(default=False)),
('extra_data', models.TextField(null=True)),
('event_type', models.PositiveSmallIntegerField()),
('server', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='zilencer.RemoteZulipServer')),
],
),
]

View File

@ -2,7 +2,7 @@ import datetime
from django.db import models
from zerver.models import AbstractPushDeviceToken
from zerver.models import AbstractPushDeviceToken, AbstractRealmAuditLog
from analytics.models import BaseCount
def get_remote_server_by_uuid(uuid: str) -> 'RemoteZulipServer':
@ -35,6 +35,19 @@ class RemotePushDeviceToken(AbstractPushDeviceToken):
def __str__(self) -> str:
return "<RemotePushDeviceToken %s %s>" % (self.server, self.user_id)
class RemoteRealmAuditLog(AbstractRealmAuditLog):
"""Synced audit data from a remote Zulip server, used primarily for
billing. See RealmAuditLog and AbstractRealmAuditLog for details.
"""
server = models.ForeignKey(RemoteZulipServer, on_delete=models.CASCADE) # type: RemoteZulipServer
realm_id = models.IntegerField(db_index=True) # type: int
# The remote_id field lets us deduplicate data from the remote server
remote_id = models.IntegerField(db_index=True) # type: int
def __str__(self) -> str:
return "<RemoteRealmAuditLog: %s %s %s %s>" % (
self.server, self.event_type, self.event_time, self.id)
class RemoteInstallationCount(BaseCount):
server = models.ForeignKey(RemoteZulipServer, on_delete=models.CASCADE) # type: RemoteZulipServer
# The remote_id field lets us deduplicate data from the remote server

View File

@ -19,7 +19,7 @@ v1_api_and_json_patterns = [
# Push signup doesn't use the REST API, since there's no auth.
url('^remotes/server/register$', zilencer.views.register_remote_server),
# For receiving InstallationCount data and similar analytics.
# For receiving table data used in analytics and billing
url('^remotes/server/analytics$', rest_dispatch,
{'POST': 'zilencer.views.remote_server_post_analytics'}),
url('^remotes/server/analytics/status$', rest_dispatch,

View File

@ -20,11 +20,11 @@ from zerver.lib.request import REQ, has_request_variables
from zerver.lib.response import json_error, json_success
from zerver.lib.validator import check_int, check_string, \
check_capped_string, check_string_fixed_length, check_float, check_none_or, \
check_dict_only, check_list
check_dict_only, check_list, check_bool
from zerver.models import UserProfile
from zerver.views.push_notifications import validate_token
from zilencer.models import RemotePushDeviceToken, RemoteZulipServer, \
RemoteRealmCount, RemoteInstallationCount
RemoteRealmCount, RemoteInstallationCount, RemoteRealmAuditLog
def validate_entity(entity: Union[UserProfile, RemoteZulipServer]) -> RemoteZulipServer:
if not isinstance(entity, RemoteZulipServer):
@ -192,11 +192,22 @@ def remote_server_post_analytics(request: HttpRequest,
('end_time', check_float),
('subgroup', check_none_or(check_string)),
('value', check_int),
])))) -> HttpResponse:
]))),
realmauditlog_rows: Optional[List[Dict[str, Any]]]=REQ(
validator=check_list(check_dict_only([
('id', check_int),
('realm', check_int),
('event_time', check_float),
('backfilled', check_bool),
('extra_data', check_none_or(check_string)),
('event_type', check_int),
])), default=None)) -> HttpResponse:
server = validate_entity(entity)
validate_incoming_table_data(server, RemoteRealmCount, realm_counts, True)
validate_incoming_table_data(server, RemoteInstallationCount, installation_counts, True)
if realmauditlog_rows is not None:
validate_incoming_table_data(server, RemoteRealmAuditLog, realmauditlog_rows)
row_objects = [RemoteRealmCount(
property=row['property'],
@ -217,6 +228,17 @@ def remote_server_post_analytics(request: HttpRequest,
value=row['value']) for row in installation_counts]
batch_create_table_data(server, RemoteInstallationCount, row_objects)
if realmauditlog_rows is not None:
row_objects = [RemoteRealmAuditLog(
realm_id=row['realm'],
remote_id=row['id'],
server=server,
event_time=datetime.datetime.fromtimestamp(row['event_time'], tz=timezone_utc),
backfilled=row['backfilled'],
extra_data=row['extra_data'],
event_type=row['event_type']) for row in realmauditlog_rows]
batch_create_table_data(server, RemoteRealmAuditLog, row_objects)
return json_success()
def get_last_id_from_server(server: RemoteZulipServer, model: Any) -> int:
@ -234,5 +256,7 @@ def remote_server_check_analytics(request: HttpRequest,
'last_realm_count_id': get_last_id_from_server(server, RemoteRealmCount),
'last_installation_count_id': get_last_id_from_server(
server, RemoteInstallationCount),
'last_realmauditlog_id': get_last_id_from_server(
server, RemoteRealmAuditLog),
}
return json_success(result)