2016-10-08 02:27:50 +02:00
|
|
|
from django.db import models
|
2016-07-29 21:52:45 +02:00
|
|
|
from django.test import TestCase
|
2016-10-08 02:27:50 +02:00
|
|
|
from django.utils import timezone
|
2016-07-29 21:52:45 +02:00
|
|
|
|
2016-10-11 02:23:42 +02:00
|
|
|
from analytics.lib.counts import CountStat, COUNT_STATS, process_count_stat, \
|
2016-07-29 21:52:45 +02:00
|
|
|
zerver_count_user_by_realm, zerver_count_message_by_user, \
|
2016-10-12 23:40:48 +02:00
|
|
|
zerver_count_message_by_stream, zerver_count_stream_by_realm, \
|
|
|
|
do_fill_count_stat_at_hour, ZerverCountQuery
|
2016-10-08 02:27:50 +02:00
|
|
|
from analytics.models import BaseCount, InstallationCount, RealmCount, \
|
2016-10-12 23:40:48 +02:00
|
|
|
UserCount, StreamCount, FillState, get_fill_state, installation_epoch
|
2016-10-08 02:27:50 +02:00
|
|
|
|
|
|
|
from zerver.models import Realm, UserProfile, Message, Stream, Recipient, \
|
|
|
|
get_user_profile_by_email, get_client
|
2016-07-29 21:52:45 +02:00
|
|
|
|
2016-10-08 02:27:50 +02:00
|
|
|
from datetime import datetime, timedelta
|
2016-07-29 21:52:45 +02:00
|
|
|
|
2016-10-08 02:27:50 +02:00
|
|
|
from typing import Any, Type, Optional
|
2016-07-29 21:52:45 +02:00
|
|
|
from six import text_type
|
|
|
|
|
2016-10-07 01:29:57 +02:00
|
|
|
class AnalyticsTestCase(TestCase):
|
|
|
|
MINUTE = timedelta(seconds = 60)
|
|
|
|
HOUR = MINUTE * 60
|
|
|
|
DAY = HOUR * 24
|
|
|
|
TIME_ZERO = datetime(2042, 3, 14).replace(tzinfo=timezone.utc)
|
|
|
|
TIME_LAST_HOUR = TIME_ZERO - HOUR
|
|
|
|
|
2016-10-07 02:47:05 +02:00
|
|
|
def setUp(self):
|
|
|
|
# type: () -> None
|
2016-10-27 03:05:21 +02:00
|
|
|
self.default_realm = Realm.objects.create(
|
2016-10-28 07:21:53 +02:00
|
|
|
string_id='realmtest', name='Realm Test',
|
|
|
|
domain='analytics.test', date_created=self.TIME_ZERO - 2*self.DAY)
|
2016-10-07 02:47:05 +02:00
|
|
|
|
|
|
|
# Lightweight creation of users, streams, and messages
|
2016-07-29 21:52:45 +02:00
|
|
|
def create_user(self, email, **kwargs):
|
|
|
|
# type: (str, **Any) -> UserProfile
|
2016-10-07 02:47:05 +02:00
|
|
|
defaults = {
|
|
|
|
'date_joined': self.TIME_LAST_HOUR,
|
|
|
|
'full_name': 'full_name',
|
|
|
|
'short_name': 'short_name',
|
|
|
|
'pointer': -1,
|
|
|
|
'last_pointer_updater': 'seems unused?',
|
|
|
|
'realm': self.default_realm,
|
|
|
|
'api_key': '42'}
|
2016-07-29 21:52:45 +02:00
|
|
|
for key, value in defaults.items():
|
|
|
|
kwargs[key] = kwargs.get(key, value)
|
2016-10-07 02:47:05 +02:00
|
|
|
return UserProfile.objects.create(email=email, **kwargs)
|
2016-07-29 21:52:45 +02:00
|
|
|
|
|
|
|
def create_stream(self, **kwargs):
|
|
|
|
# type: (**Any) -> Stream
|
2016-10-07 02:47:05 +02:00
|
|
|
defaults = {'name': 'stream name',
|
|
|
|
'realm': self.default_realm,
|
|
|
|
'date_created': self.TIME_LAST_HOUR}
|
2016-07-29 21:52:45 +02:00
|
|
|
for key, value in defaults.items():
|
|
|
|
kwargs[key] = kwargs.get(key, value)
|
2016-10-07 02:47:05 +02:00
|
|
|
return Stream.objects.create(**kwargs)
|
2016-07-29 21:52:45 +02:00
|
|
|
|
|
|
|
def create_message(self, sender, recipient, **kwargs):
|
|
|
|
# type: (UserProfile, Recipient, **Any) -> Message
|
|
|
|
defaults = {
|
|
|
|
'sender': sender,
|
|
|
|
'recipient': recipient,
|
2016-10-07 02:47:05 +02:00
|
|
|
'subject': 'subject',
|
|
|
|
'content': 'hi',
|
|
|
|
'pub_date': self.TIME_LAST_HOUR,
|
|
|
|
'sending_client': get_client("website")}
|
2016-07-29 21:52:45 +02:00
|
|
|
for key, value in defaults.items():
|
|
|
|
kwargs[key] = kwargs.get(key, value)
|
2016-10-07 02:47:05 +02:00
|
|
|
return Message.objects.create(**kwargs)
|
2016-07-29 21:52:45 +02:00
|
|
|
|
2016-10-08 02:27:50 +02:00
|
|
|
# Note that this doesn't work for InstallationCount, since InstallationCount has no realm_id
|
|
|
|
# kwargs should only ever be a UserProfile or Stream.
|
analytics: Simplify frequency and measurement interval options.
Change the CountStat object to take an is_gauge variable instead of a
smallest_interval variable. Previously, (smallest_interval, frequency)
could be any of (hour, hour), (hour, day), (hour, gauge), (day, hour),
(day, day), or (day, gauge).
The current change is equivalent to excluding (hour, day) and (day, hour)
from the list above.
This change, along with other recent changes, allows us to simplify how we
handle time intervals. This commit also removes the TimeInterval object.
2016-10-14 00:15:46 +02:00
|
|
|
def assertCountEquals(self, table, property, value, end_time = TIME_ZERO, interval = CountStat.HOUR,
|
2016-10-10 07:24:15 +02:00
|
|
|
realm = None, **kwargs):
|
|
|
|
# type: (Type[BaseCount], text_type, int, datetime, str, Optional[Realm], **models.Model) -> None
|
2016-10-08 02:27:50 +02:00
|
|
|
if realm is None:
|
|
|
|
realm = self.default_realm
|
|
|
|
self.assertEqual(table.objects.filter(realm=realm,
|
|
|
|
property=property,
|
|
|
|
interval=interval,
|
|
|
|
end_time=end_time) \
|
|
|
|
.filter(**kwargs).values_list('value', flat=True)[0],
|
|
|
|
value)
|
2016-07-29 21:52:45 +02:00
|
|
|
|
2016-10-08 03:23:24 +02:00
|
|
|
# Tests manangement commands, backfilling, adding new stats, etc
|
|
|
|
class TestUpdateAnalyticsCounts(AnalyticsTestCase):
|
|
|
|
def test_analytics_stat_write(self):
|
2016-07-29 21:52:45 +02:00
|
|
|
# type: () -> None
|
2016-10-08 03:23:24 +02:00
|
|
|
# might change if we refactor count_query
|
2016-07-29 21:52:45 +02:00
|
|
|
|
2016-10-08 03:23:24 +02:00
|
|
|
stat = CountStat('test_stat_write', zerver_count_stream_by_realm,
|
2016-10-26 00:41:57 +02:00
|
|
|
{'invite_only': False}, None, CountStat.HOUR, False)
|
2016-07-29 21:52:45 +02:00
|
|
|
|
2016-10-08 03:23:24 +02:00
|
|
|
# add some stuff to zerver_*
|
|
|
|
self.create_stream(name='stream1')
|
|
|
|
self.create_stream(name='stream2')
|
|
|
|
self.create_stream(name='stream3')
|
2016-07-29 21:52:45 +02:00
|
|
|
|
2016-10-08 03:23:24 +02:00
|
|
|
# run do_pull_from_zerver
|
2016-10-12 23:40:48 +02:00
|
|
|
do_fill_count_stat_at_hour(stat, self.TIME_ZERO)
|
2016-07-29 21:52:45 +02:00
|
|
|
|
2016-10-08 03:23:24 +02:00
|
|
|
# check analytics_* values are correct
|
2016-10-10 07:24:15 +02:00
|
|
|
self.assertCountEquals(RealmCount, 'test_stat_write', 3)
|
2016-10-08 03:23:24 +02:00
|
|
|
|
|
|
|
def test_update_analytics_tables(self):
|
|
|
|
# type: () -> None
|
2016-10-26 00:41:57 +02:00
|
|
|
stat = CountStat('test_messages_sent', zerver_count_message_by_user, {}, None, CountStat.HOUR, False)
|
2016-10-08 03:23:24 +02:00
|
|
|
|
|
|
|
user1 = self.create_user('email1')
|
|
|
|
user2 = self.create_user('email2')
|
|
|
|
recipient = Recipient.objects.create(type_id=user2.id, type=Recipient.PERSONAL)
|
|
|
|
self.create_message(user1, recipient)
|
|
|
|
|
|
|
|
# run command
|
2016-10-12 23:40:48 +02:00
|
|
|
do_fill_count_stat_at_hour(stat, self.TIME_ZERO)
|
analytics: Simplify frequency and measurement interval options.
Change the CountStat object to take an is_gauge variable instead of a
smallest_interval variable. Previously, (smallest_interval, frequency)
could be any of (hour, hour), (hour, day), (hour, gauge), (day, hour),
(day, day), or (day, gauge).
The current change is equivalent to excluding (hour, day) and (day, hour)
from the list above.
This change, along with other recent changes, allows us to simplify how we
handle time intervals. This commit also removes the TimeInterval object.
2016-10-14 00:15:46 +02:00
|
|
|
usercount_row = UserCount.objects.filter(realm=self.default_realm, interval=CountStat.HOUR,
|
2016-10-08 03:23:24 +02:00
|
|
|
property='test_messages_sent').values_list(
|
|
|
|
'value', flat=True)[0]
|
|
|
|
assert (usercount_row == 1)
|
|
|
|
|
2016-10-12 23:40:48 +02:00
|
|
|
# run command with date before message creation
|
|
|
|
do_fill_count_stat_at_hour(stat, self.TIME_LAST_HOUR)
|
2016-10-08 03:23:24 +02:00
|
|
|
|
|
|
|
# check no earlier rows created, old ones still there
|
2016-10-12 23:40:48 +02:00
|
|
|
self.assertFalse(UserCount.objects.filter(end_time__lt = self.TIME_LAST_HOUR).exists())
|
2016-10-10 07:24:15 +02:00
|
|
|
self.assertCountEquals(UserCount, 'test_messages_sent', 1, user = user1)
|
2016-10-08 03:23:24 +02:00
|
|
|
|
|
|
|
class TestProcessCountStat(AnalyticsTestCase):
|
2016-11-03 08:27:32 +01:00
|
|
|
def make_dummy_count_stat(self, current_time):
|
|
|
|
# type: (datetime) -> CountStat
|
|
|
|
dummy_query = """INSERT INTO analytics_realmcount (realm_id, property, end_time, interval, value)
|
|
|
|
VALUES (222, 'test stat', '%(end_time)s','hour', 22)""" % {'end_time': current_time}
|
|
|
|
count_stat = CountStat('test stat', ZerverCountQuery(Recipient, UserCount, dummy_query),
|
|
|
|
{}, None, CountStat.HOUR, False)
|
|
|
|
return count_stat
|
|
|
|
|
2016-10-12 23:40:48 +02:00
|
|
|
def assertFillStateEquals(self, end_time, state = FillState.DONE, property = None):
|
|
|
|
# type: (datetime, int, Optional[text_type]) -> None
|
2016-11-03 08:27:32 +01:00
|
|
|
count_stat = self.make_dummy_count_stat(end_time)
|
2016-10-12 23:40:48 +02:00
|
|
|
if property is None:
|
2016-11-03 08:27:32 +01:00
|
|
|
property = count_stat.property
|
2016-10-12 23:40:48 +02:00
|
|
|
fill_state = get_fill_state(property)
|
|
|
|
self.assertEqual(fill_state['end_time'], end_time)
|
|
|
|
self.assertEqual(fill_state['state'], state)
|
|
|
|
|
|
|
|
def test_process_stat(self):
|
|
|
|
# type: () -> None
|
|
|
|
# process new stat
|
|
|
|
current_time = installation_epoch() + self.HOUR
|
2016-11-03 08:27:32 +01:00
|
|
|
count_stat = self.make_dummy_count_stat(current_time)
|
|
|
|
process_count_stat(count_stat, current_time)
|
2016-10-12 23:40:48 +02:00
|
|
|
self.assertFillStateEquals(current_time)
|
2016-11-03 08:27:32 +01:00
|
|
|
self.assertEqual(InstallationCount.objects.filter(property = count_stat.property,
|
analytics: Simplify frequency and measurement interval options.
Change the CountStat object to take an is_gauge variable instead of a
smallest_interval variable. Previously, (smallest_interval, frequency)
could be any of (hour, hour), (hour, day), (hour, gauge), (day, hour),
(day, day), or (day, gauge).
The current change is equivalent to excluding (hour, day) and (day, hour)
from the list above.
This change, along with other recent changes, allows us to simplify how we
handle time intervals. This commit also removes the TimeInterval object.
2016-10-14 00:15:46 +02:00
|
|
|
interval = CountStat.HOUR).count(), 1)
|
2016-10-12 23:40:48 +02:00
|
|
|
|
|
|
|
# dirty stat
|
2016-11-03 08:27:32 +01:00
|
|
|
FillState.objects.filter(property=count_stat.property).update(state=FillState.STARTED)
|
|
|
|
process_count_stat(count_stat, current_time)
|
2016-10-12 23:40:48 +02:00
|
|
|
self.assertFillStateEquals(current_time)
|
2016-11-03 08:27:32 +01:00
|
|
|
self.assertEqual(InstallationCount.objects.filter(property = count_stat.property,
|
analytics: Simplify frequency and measurement interval options.
Change the CountStat object to take an is_gauge variable instead of a
smallest_interval variable. Previously, (smallest_interval, frequency)
could be any of (hour, hour), (hour, day), (hour, gauge), (day, hour),
(day, day), or (day, gauge).
The current change is equivalent to excluding (hour, day) and (day, hour)
from the list above.
This change, along with other recent changes, allows us to simplify how we
handle time intervals. This commit also removes the TimeInterval object.
2016-10-14 00:15:46 +02:00
|
|
|
interval = CountStat.HOUR).count(), 1)
|
2016-10-12 23:40:48 +02:00
|
|
|
|
|
|
|
# clean stat, no update
|
2016-11-03 08:27:32 +01:00
|
|
|
process_count_stat(count_stat, current_time)
|
2016-10-12 23:40:48 +02:00
|
|
|
self.assertFillStateEquals(current_time)
|
2016-11-03 08:27:32 +01:00
|
|
|
self.assertEqual(InstallationCount.objects.filter(property = count_stat.property,
|
analytics: Simplify frequency and measurement interval options.
Change the CountStat object to take an is_gauge variable instead of a
smallest_interval variable. Previously, (smallest_interval, frequency)
could be any of (hour, hour), (hour, day), (hour, gauge), (day, hour),
(day, day), or (day, gauge).
The current change is equivalent to excluding (hour, day) and (day, hour)
from the list above.
This change, along with other recent changes, allows us to simplify how we
handle time intervals. This commit also removes the TimeInterval object.
2016-10-14 00:15:46 +02:00
|
|
|
interval = CountStat.HOUR).count(), 1)
|
2016-10-12 23:40:48 +02:00
|
|
|
|
|
|
|
# clean stat, with update
|
|
|
|
current_time = current_time + self.HOUR
|
2016-11-03 08:27:32 +01:00
|
|
|
count_stat = self.make_dummy_count_stat(current_time)
|
|
|
|
process_count_stat(count_stat, current_time)
|
2016-10-12 23:40:48 +02:00
|
|
|
self.assertFillStateEquals(current_time)
|
2016-11-03 08:27:32 +01:00
|
|
|
self.assertEqual(InstallationCount.objects.filter(property = count_stat.property,
|
analytics: Simplify frequency and measurement interval options.
Change the CountStat object to take an is_gauge variable instead of a
smallest_interval variable. Previously, (smallest_interval, frequency)
could be any of (hour, hour), (hour, day), (hour, gauge), (day, hour),
(day, day), or (day, gauge).
The current change is equivalent to excluding (hour, day) and (day, hour)
from the list above.
This change, along with other recent changes, allows us to simplify how we
handle time intervals. This commit also removes the TimeInterval object.
2016-10-14 00:15:46 +02:00
|
|
|
interval = CountStat.HOUR).count(), 2)
|
2016-10-12 23:40:48 +02:00
|
|
|
|
2016-07-29 21:52:45 +02:00
|
|
|
# test users added in last hour
|
|
|
|
def test_add_new_users(self):
|
|
|
|
# type: () -> None
|
2016-10-26 00:41:57 +02:00
|
|
|
stat = CountStat('add_new_user_test', zerver_count_user_by_realm, {}, None, CountStat.HOUR, False)
|
2016-07-29 21:52:45 +02:00
|
|
|
|
|
|
|
# add new users to realm in last hour
|
2016-10-07 02:47:05 +02:00
|
|
|
self.create_user('email1')
|
|
|
|
self.create_user('email2')
|
2016-07-29 21:52:45 +02:00
|
|
|
|
|
|
|
# add a new user before an hour
|
2016-10-07 02:47:05 +02:00
|
|
|
self.create_user('email3', date_joined=self.TIME_ZERO - 2*self.HOUR)
|
2016-07-29 21:52:45 +02:00
|
|
|
|
|
|
|
# check if user added before the hour is not included
|
2016-10-12 23:40:48 +02:00
|
|
|
do_fill_count_stat_at_hour(stat, self.TIME_ZERO)
|
2016-07-29 21:52:45 +02:00
|
|
|
# do_update is writing the stat.property to all zerver tables
|
2016-10-08 02:27:50 +02:00
|
|
|
|
2016-10-10 07:24:15 +02:00
|
|
|
self.assertCountEquals(RealmCount, 'add_new_user_test', 2)
|
2016-07-29 21:52:45 +02:00
|
|
|
|
|
|
|
def test_count_before_realm_creation(self):
|
|
|
|
# type: () -> None
|
|
|
|
stat = CountStat('test_active_humans', zerver_count_user_by_realm,
|
2016-10-26 00:41:57 +02:00
|
|
|
{'is_bot': False, 'is_active': True}, None, CountStat.HOUR, False)
|
2016-07-29 21:52:45 +02:00
|
|
|
|
2016-10-28 07:21:53 +02:00
|
|
|
realm = Realm.objects.create(string_id='string_id', name='name', domain='domain',
|
2016-10-27 03:05:21 +02:00
|
|
|
date_created=self.TIME_ZERO)
|
2016-10-07 02:47:05 +02:00
|
|
|
self.create_user('email', realm=realm)
|
2016-07-29 21:52:45 +02:00
|
|
|
|
|
|
|
# run count prior to realm creation
|
2016-10-12 23:40:48 +02:00
|
|
|
do_fill_count_stat_at_hour(stat, self.TIME_LAST_HOUR)
|
2016-10-08 02:27:50 +02:00
|
|
|
self.assertFalse(RealmCount.objects.filter(realm=realm).exists())
|
2016-07-29 21:52:45 +02:00
|
|
|
|
|
|
|
def test_empty_counts_in_realm(self):
|
|
|
|
# type: () -> None
|
|
|
|
# test that rows with empty counts are returned if realm exists
|
|
|
|
stat = CountStat('test_active_humans', zerver_count_user_by_realm,
|
2016-10-26 00:41:57 +02:00
|
|
|
{'is_bot': False, 'is_active': True}, None, CountStat.HOUR, False)
|
2016-10-12 23:40:48 +02:00
|
|
|
do_fill_count_stat_at_hour(stat, self.TIME_ZERO)
|
2016-11-03 08:27:32 +01:00
|
|
|
self.assertFalse(RealmCount.objects.filter(realm=self.default_realm).exists())
|
2016-10-08 03:23:24 +02:00
|
|
|
|
2016-10-11 02:23:42 +02:00
|
|
|
def test_empty_message_aggregates(self):
|
|
|
|
# type: () -> None
|
|
|
|
# test that we write empty rows to realmcount in the event that we
|
|
|
|
# have no messages and no users
|
|
|
|
stat = COUNT_STATS['messages_sent']
|
2016-10-12 23:40:48 +02:00
|
|
|
do_fill_count_stat_at_hour(stat, self.TIME_ZERO)
|
2016-11-03 08:27:32 +01:00
|
|
|
self.assertFalse(RealmCount.objects.filter(realm=self.default_realm).exists())
|
2016-10-11 02:23:42 +02:00
|
|
|
|
2016-10-08 03:23:24 +02:00
|
|
|
class TestAggregates(AnalyticsTestCase):
|
|
|
|
pass
|
|
|
|
|
|
|
|
class TestXByYQueries(AnalyticsTestCase):
|
|
|
|
def test_message_to_stream_aggregation(self):
|
|
|
|
# type: () -> None
|
2016-10-26 00:41:57 +02:00
|
|
|
stat = CountStat('test_messages_to_stream', zerver_count_message_by_stream, {}, None, CountStat.HOUR, False)
|
2016-10-08 03:23:24 +02:00
|
|
|
|
|
|
|
# write some messages
|
|
|
|
user = self.create_user('email')
|
|
|
|
stream = self.create_stream(date_created=self.TIME_ZERO - 2*self.HOUR)
|
|
|
|
|
|
|
|
recipient = Recipient(type_id=stream.id, type=Recipient.STREAM)
|
|
|
|
recipient.save()
|
|
|
|
|
|
|
|
self.create_message(user, recipient = recipient)
|
|
|
|
|
|
|
|
# run command
|
2016-10-12 23:40:48 +02:00
|
|
|
do_fill_count_stat_at_hour(stat, self.TIME_ZERO)
|
2016-10-08 03:23:24 +02:00
|
|
|
|
2016-10-10 07:24:15 +02:00
|
|
|
self.assertCountEquals(StreamCount, 'test_messages_to_stream', 1)
|
2016-10-08 03:23:24 +02:00
|
|
|
|
|
|
|
class TestCountStats(AnalyticsTestCase):
|
|
|
|
def test_human_and_bot_count_by_realm(self):
|
|
|
|
# type: () -> None
|
|
|
|
stats = [
|
2016-10-26 00:41:57 +02:00
|
|
|
CountStat('test_active_humans', zerver_count_user_by_realm, {'is_bot': False, 'is_active': True}, None,
|
analytics: Simplify frequency and measurement interval options.
Change the CountStat object to take an is_gauge variable instead of a
smallest_interval variable. Previously, (smallest_interval, frequency)
could be any of (hour, hour), (hour, day), (hour, gauge), (day, hour),
(day, day), or (day, gauge).
The current change is equivalent to excluding (hour, day) and (day, hour)
from the list above.
This change, along with other recent changes, allows us to simplify how we
handle time intervals. This commit also removes the TimeInterval object.
2016-10-14 00:15:46 +02:00
|
|
|
CountStat.HOUR, False),
|
2016-10-26 00:41:57 +02:00
|
|
|
CountStat('test_active_bots', zerver_count_user_by_realm, {'is_bot': True, 'is_active': True}, None,
|
analytics: Simplify frequency and measurement interval options.
Change the CountStat object to take an is_gauge variable instead of a
smallest_interval variable. Previously, (smallest_interval, frequency)
could be any of (hour, hour), (hour, day), (hour, gauge), (day, hour),
(day, day), or (day, gauge).
The current change is equivalent to excluding (hour, day) and (day, hour)
from the list above.
This change, along with other recent changes, allows us to simplify how we
handle time intervals. This commit also removes the TimeInterval object.
2016-10-14 00:15:46 +02:00
|
|
|
CountStat.HOUR, False)]
|
2016-10-08 03:23:24 +02:00
|
|
|
|
|
|
|
self.create_user('email1-bot', is_bot=True)
|
|
|
|
self.create_user('email2-bot', is_bot=True)
|
|
|
|
self.create_user('email3-human', is_bot=False)
|
|
|
|
|
|
|
|
for stat in stats:
|
2016-10-12 23:40:48 +02:00
|
|
|
do_fill_count_stat_at_hour(stat, self.TIME_ZERO)
|
2016-10-08 03:23:24 +02:00
|
|
|
|
2016-10-10 07:24:15 +02:00
|
|
|
self.assertCountEquals(RealmCount, 'test_active_humans', 1)
|
|
|
|
self.assertCountEquals(RealmCount, 'test_active_bots', 2)
|