analytics: Fix aggregation to RealmCount for realms with no users.

Previously, if a Realm had no users (or no streams),
do_aggregate_to_summary_table would fail to add a row with value 0. This
commit fixes the issue and also simplifies the do_aggregate_to_summary_table
logic.
This commit is contained in:
umkay 2016-10-10 17:23:42 -07:00 committed by Tim Abbott
parent 52b56cca65
commit 7e2340155d
2 changed files with 56 additions and 34 deletions

View File

@ -40,49 +40,58 @@ def process_count_stat(stat, range_start, range_end):
# aggregate to summary tables # aggregate to summary tables
for interval in ['hour', 'day', 'gauge']: for interval in ['hour', 'day', 'gauge']:
for time_interval in timeinterval_range(range_start, range_end, interval, stat.frequency): for time_interval in timeinterval_range(range_start, range_end, interval, stat.frequency):
analytics_table = stat.zerver_count_query.analytics_table
if stat.smallest_interval in subintervals(interval): if stat.smallest_interval in subintervals(interval):
if analytics_table in (UserCount, StreamCount): do_aggregate_to_summary_table(stat, time_interval)
do_aggregate_to_summary_table(stat, time_interval, analytics_table, RealmCount)
do_aggregate_to_summary_table(stat, time_interval, RealmCount, InstallationCount)
def do_aggregate_to_summary_table(stat, time_interval):
# type: (CountStat, TimeInterval) -> None
if InstallationCount.objects.filter(property = stat.property,
end_time = time_interval.end,
interval = time_interval.interval).exists():
return
cursor = connection.cursor()
# There are only two summary tables at the moment: RealmCount and InstallationCount. # Aggregate into RealmCount
# Will have to generalize this a bit if more are added analytics_table = stat.zerver_count_query.analytics_table
def do_aggregate_to_summary_table(stat, time_interval, from_table, to_table): if analytics_table in (UserCount, StreamCount):
# type: (CountStat, TimeInterval, Type[BaseCount], Type[BaseCount]) -> None realmcount_query = """
if to_table == RealmCount: INSERT INTO analytics_realmcount
id_cols = 'realm_id,' (realm_id, value, property, end_time, interval)
group_by = 'GROUP BY realm_id' SELECT
elif to_table == InstallationCount: zerver_realm.id, COALESCE(sum(%(analytics_table)s.value), 0), '%(property)s', %%(end_time)s, '%(interval)s'
id_cols = '' FROM zerver_realm
group_by = '' LEFT JOIN %(analytics_table)s
else: ON
raise ValueError("%s is not a summary table" % (to_table,)) (
%(analytics_table)s.realm_id = zerver_realm.id AND
%(analytics_table)s.property = '%(property)s' AND
%(analytics_table)s.end_time = %%(end_time)s AND
%(analytics_table)s.interval = '%(interval)s'
)
GROUP BY zerver_realm.id
""" % {'analytics_table' : analytics_table._meta.db_table,
'property' : stat.property,
'interval' : time_interval.interval}
if to_table.objects.filter(property = stat.property, cursor.execute(realmcount_query, {'end_time': time_interval.end})
end_time = time_interval.end,
interval = time_interval.interval).exists():
return
query = """ # Aggregate into InstallationCount
INSERT INTO %(to_table)s (%(id_cols)s value, property, end_time, interval) installationcount_query = """
SELECT %(id_cols)s COALESCE (sum(value), 0), '%(property)s', %%(end_time)s, '%(interval)s' INSERT INTO analytics_installationcount
FROM %(from_table)s WHERE (value, property, end_time, interval)
SELECT
COALESCE(sum(value), 0), '%(property)s', %%(end_time)s, '%(interval)s'
FROM analytics_realmcount
WHERE
( (
property = '%(property)s' AND property = '%(property)s' AND
end_time = %%(end_time)s AND end_time = %%(end_time)s AND
interval = '%(interval)s' interval = '%(interval)s'
) )
%(group_by)s """ % {'property': stat.property,
""" % {'to_table': to_table._meta.db_table, 'interval': time_interval.interval}
'id_cols' : id_cols,
'from_table' : from_table._meta.db_table, cursor.execute(installationcount_query, {'end_time': time_interval.end})
'property' : stat.property,
'interval' : time_interval.interval,
'group_by' : group_by}
cursor = connection.cursor()
cursor.execute(query, {'end_time': time_interval.end})
cursor.close() cursor.close()
def do_aggregate_hour_to_day(stat, time_interval): def do_aggregate_hour_to_day(stat, time_interval):

View File

@ -3,7 +3,7 @@ from django.test import TestCase
from django.utils import timezone from django.utils import timezone
from analytics.lib.interval import TimeInterval from analytics.lib.interval import TimeInterval
from analytics.lib.counts import CountStat, process_count_stat, \ from analytics.lib.counts import CountStat, COUNT_STATS, process_count_stat, \
zerver_count_user_by_realm, zerver_count_message_by_user, \ zerver_count_user_by_realm, zerver_count_message_by_user, \
zerver_count_message_by_stream, zerver_count_stream_by_realm, \ zerver_count_message_by_stream, zerver_count_stream_by_realm, \
zerver_count_message_by_huddle zerver_count_message_by_huddle
@ -229,6 +229,19 @@ class TestProcessCountStat(AnalyticsTestCase):
self.assertCountEquals(RealmCount, 'test_active_humans', 0, end_time = self.TIME_LAST_HOUR) self.assertCountEquals(RealmCount, 'test_active_humans', 0, end_time = self.TIME_LAST_HOUR)
self.assertCountEquals(RealmCount, 'test_active_humans', 1, end_time = self.TIME_ZERO) self.assertCountEquals(RealmCount, 'test_active_humans', 1, end_time = self.TIME_ZERO)
def test_empty_message_aggregates(self):
# type: () -> None
# test that we write empty rows to realmcount in the event that we
# have no messages and no users
stat = COUNT_STATS['messages_sent']
process_count_stat(stat, range_start=self.TIME_ZERO - 2 * self.HOUR,
range_end=self.TIME_ZERO)
self.assertCountEquals(RealmCount, 'messages_sent', 0, end_time=self.TIME_ZERO - 2 * self.HOUR)
self.assertCountEquals(RealmCount, 'messages_sent', 0, end_time=self.TIME_LAST_HOUR)
self.assertCountEquals(RealmCount, 'messages_sent', 0, end_time=self.TIME_ZERO)
class TestAggregates(AnalyticsTestCase): class TestAggregates(AnalyticsTestCase):
pass pass