counts.py: Rename DataCollector-level operations to be more generic.

We're about to use these for DependentCountStats that will run SQL queries
on the analytics tables instead of the zerver tables.
This commit is contained in:
Rishi Gupta 2017-04-05 18:30:36 -07:00 committed by Tim Abbott
parent 47cf1d15ba
commit 1e8d2b984d
2 changed files with 12 additions and 12 deletions

View File

@ -218,7 +218,7 @@ def do_drop_all_analytics_tables():
## DataCollector-level operations ##
def do_pull_from_zerver(property, start_time, end_time, query, group_by):
def do_pull_by_sql_query(property, start_time, end_time, query, group_by):
# type: (str, datetime, datetime, str, Optional[Tuple[models.Model, str]]) -> int
if group_by is None:
subgroup = 'NULL'
@ -239,11 +239,11 @@ def do_pull_from_zerver(property, start_time, end_time, query, group_by):
cursor.close()
return rowcount
def zerver_data_collector(output_table, query, group_by):
def sql_data_collector(output_table, query, group_by):
# type: (Type[BaseCount], str, Optional[Tuple[models.Model, str]]) -> DataCollector
def pull_function(property, start_time, end_time):
# type: (str, datetime, datetime) -> int
return do_pull_from_zerver(property, start_time, end_time, query, group_by)
return do_pull_by_sql_query(property, start_time, end_time, query, group_by)
return DataCollector(output_table, pull_function)
def do_pull_minutes_active(property, start_time, end_time):
@ -428,22 +428,22 @@ count_stream_by_realm_query = """
count_stats_ = [
CountStat('messages_sent:is_bot:hour',
zerver_data_collector(UserCount, count_message_by_user_query, (UserProfile, 'is_bot')),
sql_data_collector(UserCount, count_message_by_user_query, (UserProfile, 'is_bot')),
CountStat.HOUR),
CountStat('messages_sent:message_type:day',
zerver_data_collector(UserCount, count_message_type_by_user_query, None), CountStat.DAY),
sql_data_collector(UserCount, count_message_type_by_user_query, None), CountStat.DAY),
CountStat('messages_sent:client:day',
zerver_data_collector(UserCount, count_message_by_user_query, (Message, 'sending_client_id')),
sql_data_collector(UserCount, count_message_by_user_query, (Message, 'sending_client_id')),
CountStat.DAY),
CountStat('messages_in_stream:is_bot:day',
zerver_data_collector(StreamCount, count_message_by_stream_query, (UserProfile, 'is_bot')),
sql_data_collector(StreamCount, count_message_by_stream_query, (UserProfile, 'is_bot')),
CountStat.DAY),
# Sanity check on the bottom two stats. Is only an approximation,
# e.g. if a user is deactivated between the end of the day and when this
# stat is run, they won't be counted.
CountStat('active_users:is_bot:day',
zerver_data_collector(RealmCount, count_user_by_realm_query, (UserProfile, 'is_bot')),
sql_data_collector(RealmCount, count_user_by_realm_query, (UserProfile, 'is_bot')),
CountStat.DAY, interval=TIMEDELTA_MAX),
# In RealmCount, 'active_humans_audit::day' should be the partial sum sequence
# of 'active_users_log:is_bot:day', for any realm that started after the
@ -451,14 +451,14 @@ count_stats_ = [
# 'active_users_audit:is_bot:day' is the canonical record of which users were
# active on which days (in the UserProfile.is_active sense).
CountStat('active_users_audit:is_bot:day',
zerver_data_collector(UserCount, check_realmauditlog_by_user_query, (UserProfile, 'is_bot')),
sql_data_collector(UserCount, check_realmauditlog_by_user_query, (UserProfile, 'is_bot')),
CountStat.DAY),
LoggingCountStat('active_users_log:is_bot:day', RealmCount, CountStat.DAY),
# The minutes=15 part is due to the 15 minutes added in
# zerver.lib.actions.do_update_user_activity_interval.
CountStat('15day_actives::day',
zerver_data_collector(UserCount, check_useractivityinterval_by_user_query, None),
sql_data_collector(UserCount, check_useractivityinterval_by_user_query, None),
CountStat.DAY, interval=timedelta(days=15)-timedelta(minutes=15)),
CountStat('minutes_active::day', DataCollector(UserCount, do_pull_minutes_active), CountStat.DAY)
]

View File

@ -8,7 +8,7 @@ from django.utils import timezone
from analytics.lib.counts import CountStat, COUNT_STATS, process_count_stat, \
do_fill_count_stat_at_hour, do_increment_logging_stat, DataCollector, \
zerver_data_collector, LoggingCountStat, do_aggregate_to_summary_table, \
sql_data_collector, LoggingCountStat, do_aggregate_to_summary_table, \
do_drop_all_analytics_tables
from analytics.models import BaseCount, InstallationCount, RealmCount, \
UserCount, StreamCount, FillState, Anomaly, installation_epoch
@ -157,7 +157,7 @@ class TestProcessCountStat(AnalyticsTestCase):
# type: (datetime) -> CountStat
dummy_query = """INSERT INTO analytics_realmcount (realm_id, property, end_time, value)
VALUES (1, 'test stat', '%(end_time)s', 22)""" % {'end_time': current_time}
stat = CountStat('test stat', zerver_data_collector(UserCount, dummy_query, None), CountStat.HOUR)
stat = CountStat('test stat', sql_data_collector(UserCount, dummy_query, None), CountStat.HOUR)
return stat
def assertFillStateEquals(self, end_time, state=FillState.DONE, property=None):