2016-10-08 02:27:50 +02:00
|
|
|
from django.db import models
|
2016-07-29 21:52:45 +02:00
|
|
|
from django.test import TestCase
|
2016-10-08 02:27:50 +02:00
|
|
|
from django.utils import timezone
|
2016-07-29 21:52:45 +02:00
|
|
|
|
|
|
|
from analytics.lib.interval import TimeInterval
|
|
|
|
from analytics.lib.counts import CountStat, process_count_stat, \
|
|
|
|
zerver_count_user_by_realm, zerver_count_message_by_user, \
|
|
|
|
zerver_count_message_by_stream, zerver_count_stream_by_realm, \
|
|
|
|
zerver_count_message_by_huddle
|
2016-10-08 02:27:50 +02:00
|
|
|
from analytics.models import BaseCount, InstallationCount, RealmCount, \
|
|
|
|
UserCount, StreamCount
|
|
|
|
|
|
|
|
from zerver.models import Realm, UserProfile, Message, Stream, Recipient, \
|
|
|
|
get_user_profile_by_email, get_client
|
2016-07-29 21:52:45 +02:00
|
|
|
|
2016-10-08 02:27:50 +02:00
|
|
|
from datetime import datetime, timedelta
|
2016-07-29 21:52:45 +02:00
|
|
|
|
2016-10-08 02:27:50 +02:00
|
|
|
from typing import Any, Type, Optional
|
2016-07-29 21:52:45 +02:00
|
|
|
from six import text_type
|
|
|
|
|
2016-10-07 01:29:57 +02:00
|
|
|
class AnalyticsTestCase(TestCase):
|
|
|
|
MINUTE = timedelta(seconds = 60)
|
|
|
|
HOUR = MINUTE * 60
|
|
|
|
DAY = HOUR * 24
|
|
|
|
TIME_ZERO = datetime(2042, 3, 14).replace(tzinfo=timezone.utc)
|
|
|
|
TIME_LAST_HOUR = TIME_ZERO - HOUR
|
|
|
|
|
2016-10-07 02:47:05 +02:00
|
|
|
def setUp(self):
|
|
|
|
# type: () -> None
|
|
|
|
self.default_realm = Realm.objects.create(domain='analytics.test', name='Realm Test',
|
|
|
|
date_created=self.TIME_ZERO - 2*self.DAY)
|
|
|
|
|
2016-10-07 01:29:57 +02:00
|
|
|
def process_last_hour(self, stat):
|
|
|
|
# type: (CountStat) -> None
|
|
|
|
# The last two arguments below are eventually passed as the first and
|
|
|
|
# last arguments of lib.interval.timeinterval_range, which is an
|
|
|
|
# inclusive range.
|
|
|
|
process_count_stat(stat, self.TIME_ZERO, self.TIME_ZERO)
|
|
|
|
|
2016-10-07 02:47:05 +02:00
|
|
|
# Lightweight creation of users, streams, and messages
|
2016-07-29 21:52:45 +02:00
|
|
|
def create_user(self, email, **kwargs):
|
|
|
|
# type: (str, **Any) -> UserProfile
|
2016-10-07 02:47:05 +02:00
|
|
|
defaults = {
|
|
|
|
'date_joined': self.TIME_LAST_HOUR,
|
|
|
|
'full_name': 'full_name',
|
|
|
|
'short_name': 'short_name',
|
|
|
|
'pointer': -1,
|
|
|
|
'last_pointer_updater': 'seems unused?',
|
|
|
|
'realm': self.default_realm,
|
|
|
|
'api_key': '42'}
|
2016-07-29 21:52:45 +02:00
|
|
|
for key, value in defaults.items():
|
|
|
|
kwargs[key] = kwargs.get(key, value)
|
2016-10-07 02:47:05 +02:00
|
|
|
return UserProfile.objects.create(email=email, **kwargs)
|
2016-07-29 21:52:45 +02:00
|
|
|
|
|
|
|
def create_stream(self, **kwargs):
|
|
|
|
# type: (**Any) -> Stream
|
2016-10-07 02:47:05 +02:00
|
|
|
defaults = {'name': 'stream name',
|
|
|
|
'realm': self.default_realm,
|
|
|
|
'date_created': self.TIME_LAST_HOUR}
|
2016-07-29 21:52:45 +02:00
|
|
|
for key, value in defaults.items():
|
|
|
|
kwargs[key] = kwargs.get(key, value)
|
2016-10-07 02:47:05 +02:00
|
|
|
return Stream.objects.create(**kwargs)
|
2016-07-29 21:52:45 +02:00
|
|
|
|
|
|
|
def create_message(self, sender, recipient, **kwargs):
|
|
|
|
# type: (UserProfile, Recipient, **Any) -> Message
|
|
|
|
defaults = {
|
|
|
|
'sender': sender,
|
|
|
|
'recipient': recipient,
|
2016-10-07 02:47:05 +02:00
|
|
|
'subject': 'subject',
|
|
|
|
'content': 'hi',
|
|
|
|
'pub_date': self.TIME_LAST_HOUR,
|
|
|
|
'sending_client': get_client("website")}
|
2016-07-29 21:52:45 +02:00
|
|
|
for key, value in defaults.items():
|
|
|
|
kwargs[key] = kwargs.get(key, value)
|
2016-10-07 02:47:05 +02:00
|
|
|
return Message.objects.create(**kwargs)
|
2016-07-29 21:52:45 +02:00
|
|
|
|
2016-10-08 02:27:50 +02:00
|
|
|
# Note that this doesn't work for InstallationCount, since InstallationCount has no realm_id
|
|
|
|
# kwargs should only ever be a UserProfile or Stream.
|
|
|
|
def assertCountEquals(self, value, property, interval = 'hour', end_time = TIME_ZERO,
|
|
|
|
table = RealmCount, realm = None, **kwargs):
|
|
|
|
# type: (int, text_type, str, datetime, Type[BaseCount], Optional[Realm], **models.Model) -> None
|
|
|
|
if realm is None:
|
|
|
|
realm = self.default_realm
|
|
|
|
self.assertEqual(table.objects.filter(realm=realm,
|
|
|
|
property=property,
|
|
|
|
interval=interval,
|
|
|
|
end_time=end_time) \
|
|
|
|
.filter(**kwargs).values_list('value', flat=True)[0],
|
|
|
|
value)
|
2016-07-29 21:52:45 +02:00
|
|
|
|
2016-10-08 02:27:50 +02:00
|
|
|
class TestDataCollectors(AnalyticsTestCase):
|
2016-07-29 21:52:45 +02:00
|
|
|
def test_human_and_bot_count_by_realm(self):
|
|
|
|
# type: () -> None
|
|
|
|
stats = [
|
|
|
|
CountStat('test_active_humans', zerver_count_user_by_realm, {'is_bot': False, 'is_active': True},
|
|
|
|
'hour', 'hour'),
|
|
|
|
CountStat('test_active_bots', zerver_count_user_by_realm, {'is_bot': True, 'is_active': True},
|
|
|
|
'hour', 'hour')]
|
|
|
|
|
|
|
|
# TODO these dates should probably be explicit, since the default args for the commands are timezone.now() dependent.
|
2016-10-07 02:47:05 +02:00
|
|
|
self.create_user('email1-bot', is_bot=True)
|
|
|
|
self.create_user('email2-bot', is_bot=True)
|
|
|
|
self.create_user('email3-human', is_bot=False)
|
2016-07-29 21:52:45 +02:00
|
|
|
|
|
|
|
for stat in stats:
|
2016-10-07 01:29:57 +02:00
|
|
|
self.process_last_hour(stat)
|
2016-07-29 21:52:45 +02:00
|
|
|
|
2016-10-08 02:27:50 +02:00
|
|
|
self.assertCountEquals(1, 'test_active_humans')
|
|
|
|
self.assertCountEquals(2, 'test_active_bots')
|
2016-07-29 21:52:45 +02:00
|
|
|
|
|
|
|
# test users added in last hour
|
|
|
|
def test_add_new_users(self):
|
|
|
|
# type: () -> None
|
|
|
|
stat = CountStat('add_new_user_test', zerver_count_user_by_realm, {}, 'hour', 'hour')
|
|
|
|
|
|
|
|
# add new users to realm in last hour
|
2016-10-07 02:47:05 +02:00
|
|
|
self.create_user('email1')
|
|
|
|
self.create_user('email2')
|
2016-07-29 21:52:45 +02:00
|
|
|
|
|
|
|
# add a new user before an hour
|
2016-10-07 02:47:05 +02:00
|
|
|
self.create_user('email3', date_joined=self.TIME_ZERO - 2*self.HOUR)
|
2016-07-29 21:52:45 +02:00
|
|
|
|
|
|
|
# check if user added before the hour is not included
|
2016-10-07 01:29:57 +02:00
|
|
|
self.process_last_hour(stat)
|
2016-07-29 21:52:45 +02:00
|
|
|
# do_update is writing the stat.property to all zerver tables
|
2016-10-08 02:27:50 +02:00
|
|
|
|
|
|
|
self.assertCountEquals(2, 'add_new_user_test')
|
2016-07-29 21:52:45 +02:00
|
|
|
|
|
|
|
def test_analytics_stat_write(self):
|
|
|
|
# type: () -> None
|
|
|
|
# might change if we refactor count_query
|
|
|
|
|
|
|
|
stat = CountStat('test_stat_write', zerver_count_stream_by_realm,
|
|
|
|
{'invite_only': False}, 'hour', 'hour')
|
|
|
|
|
|
|
|
# add some stuff to zerver_*
|
2016-10-07 02:47:05 +02:00
|
|
|
self.create_stream(name='stream1')
|
|
|
|
self.create_stream(name='stream2')
|
|
|
|
self.create_stream(name='stream3')
|
2016-07-29 21:52:45 +02:00
|
|
|
|
|
|
|
# run do_pull_from_zerver
|
2016-10-07 01:29:57 +02:00
|
|
|
self.process_last_hour(stat)
|
2016-07-29 21:52:45 +02:00
|
|
|
|
|
|
|
# check analytics_* values are correct
|
2016-10-08 02:27:50 +02:00
|
|
|
self.assertCountEquals(3, 'test_stat_write')
|
2016-07-29 21:52:45 +02:00
|
|
|
|
|
|
|
# test if process count does nothing if count already processed
|
|
|
|
def test_process_count(self):
|
|
|
|
# type: () -> None
|
|
|
|
# add some active and inactive users that are human
|
2016-10-07 02:47:05 +02:00
|
|
|
self.create_user('email1', is_bot=False, is_active=False)
|
|
|
|
self.create_user('email2', is_bot=False, is_active=False)
|
|
|
|
self.create_user('email3', is_bot=False, is_active=True)
|
2016-07-29 21:52:45 +02:00
|
|
|
|
|
|
|
# run stat to pull active humans
|
|
|
|
stat = CountStat('active_humans', zerver_count_user_by_realm,
|
|
|
|
{'is_bot': False, 'is_active': True}, 'hour', 'hour')
|
|
|
|
|
2016-10-07 01:29:57 +02:00
|
|
|
self.process_last_hour(stat)
|
2016-10-08 02:27:50 +02:00
|
|
|
self.assertCountEquals(1, 'active_humans')
|
2016-07-29 21:52:45 +02:00
|
|
|
|
|
|
|
# run command again
|
2016-10-07 01:29:57 +02:00
|
|
|
self.process_last_hour(stat)
|
2016-07-29 21:52:45 +02:00
|
|
|
|
2016-10-08 02:27:50 +02:00
|
|
|
# check that row is same as before
|
|
|
|
self.assertCountEquals(1, 'active_humans')
|
2016-07-29 21:52:45 +02:00
|
|
|
|
|
|
|
# test management commands
|
|
|
|
def test_update_analytics_tables(self):
|
|
|
|
# type: () -> None
|
|
|
|
stat = CountStat('test_messages_sent', zerver_count_message_by_user, {}, 'hour', 'hour')
|
|
|
|
|
2016-10-07 02:47:05 +02:00
|
|
|
user1 = self.create_user('email1')
|
|
|
|
user2 = self.create_user('email2')
|
|
|
|
recipient = Recipient.objects.create(type_id=user2.id, type=Recipient.PERSONAL)
|
|
|
|
self.create_message(user1, recipient)
|
2016-07-29 21:52:45 +02:00
|
|
|
|
|
|
|
# run command
|
2016-10-07 01:29:57 +02:00
|
|
|
self.process_last_hour(stat)
|
2016-10-07 02:47:05 +02:00
|
|
|
usercount_row = UserCount.objects.filter(realm=self.default_realm, interval='hour',
|
2016-07-29 21:52:45 +02:00
|
|
|
property='test_messages_sent').values_list(
|
|
|
|
'value', flat=True)[0]
|
|
|
|
assert (usercount_row == 1)
|
|
|
|
|
|
|
|
# run command with dates before message creation
|
2016-10-07 01:29:57 +02:00
|
|
|
process_count_stat(stat, range_start=self.TIME_ZERO - 2*self.HOUR,
|
|
|
|
range_end=self.TIME_LAST_HOUR)
|
2016-07-29 21:52:45 +02:00
|
|
|
|
2016-10-08 02:27:50 +02:00
|
|
|
# check no earlier rows created, old ones still there
|
|
|
|
self.assertFalse(UserCount.objects.filter(end_time__lt = self.TIME_ZERO - 2*self.HOUR).exists())
|
|
|
|
self.assertCountEquals(1, 'test_messages_sent', table = UserCount, user = user1)
|
2016-07-29 21:52:45 +02:00
|
|
|
|
|
|
|
def test_do_aggregate(self):
|
|
|
|
# type: () -> None
|
|
|
|
|
|
|
|
# write some entries to analytics.usercount with smallest interval as day
|
|
|
|
stat = CountStat('test_messages_aggregate', zerver_count_message_by_user, {}, 'day', 'hour')
|
|
|
|
|
|
|
|
# write some messages
|
2016-10-07 02:47:05 +02:00
|
|
|
user1 = self.create_user('email1')
|
|
|
|
user2 = self.create_user('email2')
|
|
|
|
recipient = Recipient.objects.create(type_id=user2.id, type=Recipient.PERSONAL)
|
|
|
|
|
|
|
|
self.create_message(user1, recipient)
|
|
|
|
self.create_message(user1, recipient)
|
|
|
|
self.create_message(user1, recipient)
|
2016-07-29 21:52:45 +02:00
|
|
|
|
|
|
|
# run command
|
2016-10-07 01:29:57 +02:00
|
|
|
self.process_last_hour(stat)
|
2016-07-29 21:52:45 +02:00
|
|
|
|
|
|
|
# check no rows for hour interval on usercount granularity
|
2016-10-08 02:27:50 +02:00
|
|
|
self.assertFalse(UserCount.objects.filter(realm=self.default_realm, interval='hour').exists())
|
2016-07-29 21:52:45 +02:00
|
|
|
|
|
|
|
# see if aggregated correctly to realmcount and installationcount
|
2016-10-08 02:27:50 +02:00
|
|
|
self.assertCountEquals(3, 'test_messages_aggregate', interval = 'day')
|
2016-07-29 21:52:45 +02:00
|
|
|
|
2016-10-08 02:27:50 +02:00
|
|
|
self.assertEquals(InstallationCount.objects.filter(interval='day',
|
|
|
|
property='test_messages_aggregate') \
|
|
|
|
.values_list('value', flat=True)[0], 3)
|
2016-07-29 21:52:45 +02:00
|
|
|
|
|
|
|
def test_message_to_stream_aggregation(self):
|
|
|
|
# type: () -> None
|
|
|
|
stat = CountStat('test_messages_to_stream', zerver_count_message_by_stream, {}, 'hour', 'hour')
|
|
|
|
|
|
|
|
# write some messages
|
2016-10-07 02:47:05 +02:00
|
|
|
user = self.create_user('email')
|
|
|
|
stream = self.create_stream(date_created=self.TIME_ZERO - 2*self.HOUR)
|
2016-07-29 21:52:45 +02:00
|
|
|
|
2016-10-07 02:47:05 +02:00
|
|
|
recipient = Recipient(type_id=stream.id, type=Recipient.STREAM)
|
2016-07-29 21:52:45 +02:00
|
|
|
recipient.save()
|
|
|
|
|
2016-10-07 02:47:05 +02:00
|
|
|
self.create_message(user, recipient = recipient)
|
2016-07-29 21:52:45 +02:00
|
|
|
|
|
|
|
# run command
|
2016-10-07 01:29:57 +02:00
|
|
|
self.process_last_hour(stat)
|
2016-07-29 21:52:45 +02:00
|
|
|
|
2016-10-08 02:27:50 +02:00
|
|
|
self.assertCountEquals(1, 'test_messages_to_stream', table = StreamCount)
|
2016-07-29 21:52:45 +02:00
|
|
|
|
|
|
|
def test_count_before_realm_creation(self):
|
|
|
|
# type: () -> None
|
|
|
|
stat = CountStat('test_active_humans', zerver_count_user_by_realm,
|
|
|
|
{'is_bot': False, 'is_active': True}, 'hour', 'hour')
|
|
|
|
|
2016-10-07 02:47:05 +02:00
|
|
|
realm = Realm.objects.create(domain='domain', name='name', date_created=self.TIME_ZERO)
|
|
|
|
self.create_user('email', realm=realm)
|
2016-07-29 21:52:45 +02:00
|
|
|
|
|
|
|
# run count prior to realm creation
|
2016-10-07 01:29:57 +02:00
|
|
|
process_count_stat(stat, range_start=self.TIME_ZERO - 2*self.HOUR,
|
|
|
|
range_end=self.TIME_LAST_HOUR)
|
2016-10-08 02:27:50 +02:00
|
|
|
|
|
|
|
self.assertFalse(RealmCount.objects.filter(realm=realm).exists())
|
2016-07-29 21:52:45 +02:00
|
|
|
|
|
|
|
def test_empty_counts_in_realm(self):
|
|
|
|
# type: () -> None
|
|
|
|
|
|
|
|
# test that rows with empty counts are returned if realm exists
|
|
|
|
stat = CountStat('test_active_humans', zerver_count_user_by_realm,
|
|
|
|
{'is_bot': False, 'is_active': True}, 'hour', 'hour')
|
|
|
|
|
2016-10-07 02:47:05 +02:00
|
|
|
self.create_user('email')
|
2016-07-29 21:52:45 +02:00
|
|
|
|
2016-10-07 01:29:57 +02:00
|
|
|
process_count_stat(stat, range_start=self.TIME_ZERO - 2*self.HOUR,
|
|
|
|
range_end=self.TIME_ZERO)
|
2016-10-08 02:27:50 +02:00
|
|
|
self.assertCountEquals(0, 'test_active_humans', end_time = self.TIME_ZERO - 2*self.HOUR)
|
|
|
|
self.assertCountEquals(0, 'test_active_humans', end_time = self.TIME_LAST_HOUR)
|
|
|
|
self.assertCountEquals(1, 'test_active_humans', end_time = self.TIME_ZERO)
|