2016-07-29 21:52:45 +02:00
|
|
|
from django.db import connection, models
|
analytics: Simplify frequency and measurement interval options.
Change the CountStat object to take an is_gauge variable instead of a
smallest_interval variable. Previously, (smallest_interval, frequency)
could be any of (hour, hour), (hour, day), (hour, gauge), (day, hour),
(day, day), or (day, gauge).
The current change is equivalent to excluding (hour, day) and (day, hour)
from the list above.
This change, along with other recent changes, allows us to simplify how we
handle time intervals. This commit also removes the TimeInterval object.
2016-10-14 00:15:46 +02:00
|
|
|
from django.utils import timezone
|
2016-10-13 22:52:39 +02:00
|
|
|
from django.conf import settings
|
2016-07-29 21:52:45 +02:00
|
|
|
from datetime import timedelta, datetime
|
|
|
|
|
|
|
|
from analytics.models import InstallationCount, RealmCount, \
|
2016-10-12 23:40:48 +02:00
|
|
|
UserCount, StreamCount, BaseCount, FillState, get_fill_state, installation_epoch
|
2016-07-29 21:52:45 +02:00
|
|
|
from zerver.models import Realm, UserProfile, Message, Stream, models
|
analytics: Simplify frequency and measurement interval options.
Change the CountStat object to take an is_gauge variable instead of a
smallest_interval variable. Previously, (smallest_interval, frequency)
could be any of (hour, hour), (hour, day), (hour, gauge), (day, hour),
(day, day), or (day, gauge).
The current change is equivalent to excluding (hour, day) and (day, hour)
from the list above.
This change, along with other recent changes, allows us to simplify how we
handle time intervals. This commit also removes the TimeInterval object.
2016-10-14 00:15:46 +02:00
|
|
|
from zerver.lib.timestamp import floor_to_day
|
2016-07-29 21:52:45 +02:00
|
|
|
|
2016-10-26 00:41:57 +02:00
|
|
|
from typing import Any, Optional, Type, Tuple
|
2016-07-29 21:52:45 +02:00
|
|
|
from six import text_type
|
|
|
|
|
2016-10-13 22:52:39 +02:00
|
|
|
import logging
|
|
|
|
import time
|
|
|
|
|
|
|
|
## Logging setup ##
|
|
|
|
log_format = '%(asctime)s %(levelname)-8s %(message)s'
|
|
|
|
logging.basicConfig(format=log_format)
|
|
|
|
|
|
|
|
formatter = logging.Formatter(log_format)
|
|
|
|
file_handler = logging.FileHandler(settings.ANALYTICS_LOG_PATH)
|
|
|
|
file_handler.setFormatter(formatter)
|
|
|
|
|
|
|
|
logger = logging.getLogger("zulip.management")
|
|
|
|
logger.setLevel(logging.INFO)
|
|
|
|
logger.addHandler(file_handler)
|
|
|
|
|
analytics: Simplify frequency and measurement interval options.
Change the CountStat object to take an is_gauge variable instead of a
smallest_interval variable. Previously, (smallest_interval, frequency)
could be any of (hour, hour), (hour, day), (hour, gauge), (day, hour),
(day, day), or (day, gauge).
The current change is equivalent to excluding (hour, day) and (day, hour)
from the list above.
This change, along with other recent changes, allows us to simplify how we
handle time intervals. This commit also removes the TimeInterval object.
2016-10-14 00:15:46 +02:00
|
|
|
# First post office in Boston
|
|
|
|
MIN_TIME = datetime(1639, 1, 1, 0, 0, 0, tzinfo=timezone.utc)
|
|
|
|
|
2016-07-29 21:52:45 +02:00
|
|
|
class CountStat(object):
|
analytics: Simplify frequency and measurement interval options.
Change the CountStat object to take an is_gauge variable instead of a
smallest_interval variable. Previously, (smallest_interval, frequency)
could be any of (hour, hour), (hour, day), (hour, gauge), (day, hour),
(day, day), or (day, gauge).
The current change is equivalent to excluding (hour, day) and (day, hour)
from the list above.
This change, along with other recent changes, allows us to simplify how we
handle time intervals. This commit also removes the TimeInterval object.
2016-10-14 00:15:46 +02:00
|
|
|
HOUR = 'hour'
|
|
|
|
DAY = 'day'
|
|
|
|
FREQUENCIES = frozenset([HOUR, DAY])
|
|
|
|
# Allowed intervals are HOUR, DAY, and, GAUGE
|
|
|
|
GAUGE = 'gauge'
|
|
|
|
|
2016-10-26 00:41:57 +02:00
|
|
|
def __init__(self, property, zerver_count_query, filter_args, group_by, frequency, is_gauge):
|
|
|
|
# type: (text_type, ZerverCountQuery, Dict[str, bool], Optional[Tuple[models.Model, str]], str, bool) -> None
|
2016-07-29 21:52:45 +02:00
|
|
|
self.property = property
|
|
|
|
self.zerver_count_query = zerver_count_query
|
|
|
|
# might have to do something different for bitfields
|
|
|
|
self.filter_args = filter_args
|
2016-10-26 00:41:57 +02:00
|
|
|
self.group_by = group_by
|
analytics: Simplify frequency and measurement interval options.
Change the CountStat object to take an is_gauge variable instead of a
smallest_interval variable. Previously, (smallest_interval, frequency)
could be any of (hour, hour), (hour, day), (hour, gauge), (day, hour),
(day, day), or (day, gauge).
The current change is equivalent to excluding (hour, day) and (day, hour)
from the list above.
This change, along with other recent changes, allows us to simplify how we
handle time intervals. This commit also removes the TimeInterval object.
2016-10-14 00:15:46 +02:00
|
|
|
if frequency not in self.FREQUENCIES:
|
|
|
|
raise ValueError("Unknown frequency: %s" % (frequency,))
|
2016-07-29 21:52:45 +02:00
|
|
|
self.frequency = frequency
|
analytics: Simplify frequency and measurement interval options.
Change the CountStat object to take an is_gauge variable instead of a
smallest_interval variable. Previously, (smallest_interval, frequency)
could be any of (hour, hour), (hour, day), (hour, gauge), (day, hour),
(day, day), or (day, gauge).
The current change is equivalent to excluding (hour, day) and (day, hour)
from the list above.
This change, along with other recent changes, allows us to simplify how we
handle time intervals. This commit also removes the TimeInterval object.
2016-10-14 00:15:46 +02:00
|
|
|
self.interval = self.GAUGE if is_gauge else frequency
|
2016-07-29 21:52:45 +02:00
|
|
|
|
|
|
|
class ZerverCountQuery(object):
|
|
|
|
def __init__(self, zerver_table, analytics_table, query):
|
|
|
|
# type: (Type[models.Model], Type[BaseCount], text_type) -> None
|
|
|
|
self.zerver_table = zerver_table
|
|
|
|
self.analytics_table = analytics_table
|
|
|
|
self.query = query
|
|
|
|
|
2016-10-12 23:40:48 +02:00
|
|
|
def process_count_stat(stat, fill_to_time):
|
|
|
|
# type: (CountStat, datetime) -> None
|
|
|
|
fill_state = get_fill_state(stat.property)
|
|
|
|
if fill_state is None:
|
|
|
|
currently_filled = installation_epoch()
|
|
|
|
FillState.objects.create(property = stat.property,
|
|
|
|
end_time = currently_filled,
|
|
|
|
state = FillState.DONE)
|
2016-10-13 22:52:39 +02:00
|
|
|
logger.info("INITIALIZED %s %s" % (stat.property, currently_filled))
|
2016-10-12 23:40:48 +02:00
|
|
|
elif fill_state['state'] == FillState.STARTED:
|
2016-10-13 22:52:39 +02:00
|
|
|
logger.info("UNDO START %s %s" % (stat.property, fill_state['end_time']))
|
2016-10-12 23:40:48 +02:00
|
|
|
do_delete_count_stat_at_hour(stat, fill_state['end_time'])
|
|
|
|
currently_filled = fill_state['end_time'] - timedelta(hours = 1)
|
|
|
|
FillState.objects.filter(property = stat.property). \
|
|
|
|
update(end_time = currently_filled, state = FillState.DONE)
|
2016-10-13 22:52:39 +02:00
|
|
|
logger.info("UNDO DONE %s" % (stat.property,))
|
2016-10-12 23:40:48 +02:00
|
|
|
elif fill_state['state'] == FillState.DONE:
|
|
|
|
currently_filled = fill_state['end_time']
|
|
|
|
else:
|
|
|
|
raise ValueError("Unknown value for FillState.state: %s." % fill_state['state'])
|
|
|
|
|
|
|
|
currently_filled = currently_filled + timedelta(hours = 1)
|
|
|
|
while currently_filled <= fill_to_time:
|
2016-10-13 22:52:39 +02:00
|
|
|
logger.info("START %s %s %s" % (stat.property, stat.interval, currently_filled))
|
|
|
|
start = time.time()
|
2016-10-12 23:40:48 +02:00
|
|
|
FillState.objects.filter(property = stat.property) \
|
|
|
|
.update(end_time = currently_filled, state = FillState.STARTED)
|
|
|
|
do_fill_count_stat_at_hour(stat, currently_filled)
|
|
|
|
FillState.objects.filter(property = stat.property).update(state = FillState.DONE)
|
2016-10-13 22:52:39 +02:00
|
|
|
end = time.time()
|
2016-10-12 23:40:48 +02:00
|
|
|
currently_filled = currently_filled + timedelta(hours = 1)
|
2016-10-13 22:52:39 +02:00
|
|
|
logger.info("DONE %s %s (%dms)" % (stat.property, stat.interval, (end-start)*1000))
|
2016-10-12 23:40:48 +02:00
|
|
|
|
analytics: Simplify frequency and measurement interval options.
Change the CountStat object to take an is_gauge variable instead of a
smallest_interval variable. Previously, (smallest_interval, frequency)
could be any of (hour, hour), (hour, day), (hour, gauge), (day, hour),
(day, day), or (day, gauge).
The current change is equivalent to excluding (hour, day) and (day, hour)
from the list above.
This change, along with other recent changes, allows us to simplify how we
handle time intervals. This commit also removes the TimeInterval object.
2016-10-14 00:15:46 +02:00
|
|
|
# We assume end_time is on an hour boundary, and is timezone aware.
|
|
|
|
# It is the caller's responsibility to enforce this!
|
2016-10-12 23:40:48 +02:00
|
|
|
def do_fill_count_stat_at_hour(stat, end_time):
|
|
|
|
# type: (CountStat, datetime) -> None
|
analytics: Simplify frequency and measurement interval options.
Change the CountStat object to take an is_gauge variable instead of a
smallest_interval variable. Previously, (smallest_interval, frequency)
could be any of (hour, hour), (hour, day), (hour, gauge), (day, hour),
(day, day), or (day, gauge).
The current change is equivalent to excluding (hour, day) and (day, hour)
from the list above.
This change, along with other recent changes, allows us to simplify how we
handle time intervals. This commit also removes the TimeInterval object.
2016-10-14 00:15:46 +02:00
|
|
|
if stat.frequency == CountStat.DAY and (end_time != floor_to_day(end_time)):
|
2016-10-13 22:46:31 +02:00
|
|
|
return
|
analytics: Simplify frequency and measurement interval options.
Change the CountStat object to take an is_gauge variable instead of a
smallest_interval variable. Previously, (smallest_interval, frequency)
could be any of (hour, hour), (hour, day), (hour, gauge), (day, hour),
(day, day), or (day, gauge).
The current change is equivalent to excluding (hour, day) and (day, hour)
from the list above.
This change, along with other recent changes, allows us to simplify how we
handle time intervals. This commit also removes the TimeInterval object.
2016-10-14 00:15:46 +02:00
|
|
|
|
|
|
|
if stat.interval == CountStat.HOUR:
|
|
|
|
start_time = end_time - timedelta(hours = 1)
|
|
|
|
elif stat.interval == CountStat.DAY:
|
|
|
|
start_time = end_time - timedelta(days = 1)
|
|
|
|
else: # stat.interval == CountStat.GAUGE
|
|
|
|
start_time = MIN_TIME
|
|
|
|
|
|
|
|
do_pull_from_zerver(stat, start_time, end_time, stat.interval)
|
|
|
|
do_aggregate_to_summary_table(stat, end_time, stat.interval)
|
2016-07-29 21:52:45 +02:00
|
|
|
|
2016-10-12 23:40:48 +02:00
|
|
|
def do_delete_count_stat_at_hour(stat, end_time):
|
|
|
|
# type: (CountStat, datetime) -> None
|
|
|
|
UserCount.objects.filter(property = stat.property, end_time = end_time).delete()
|
|
|
|
StreamCount.objects.filter(property = stat.property, end_time = end_time).delete()
|
|
|
|
RealmCount.objects.filter(property = stat.property, end_time = end_time).delete()
|
|
|
|
InstallationCount.objects.filter(property = stat.property, end_time = end_time).delete()
|
|
|
|
|
analytics: Simplify frequency and measurement interval options.
Change the CountStat object to take an is_gauge variable instead of a
smallest_interval variable. Previously, (smallest_interval, frequency)
could be any of (hour, hour), (hour, day), (hour, gauge), (day, hour),
(day, day), or (day, gauge).
The current change is equivalent to excluding (hour, day) and (day, hour)
from the list above.
This change, along with other recent changes, allows us to simplify how we
handle time intervals. This commit also removes the TimeInterval object.
2016-10-14 00:15:46 +02:00
|
|
|
def do_aggregate_to_summary_table(stat, end_time, interval):
|
|
|
|
# type: (CountStat, datetime, str) -> None
|
2016-10-11 02:23:42 +02:00
|
|
|
cursor = connection.cursor()
|
2016-07-29 21:52:45 +02:00
|
|
|
|
2016-10-11 02:23:42 +02:00
|
|
|
# Aggregate into RealmCount
|
|
|
|
analytics_table = stat.zerver_count_query.analytics_table
|
|
|
|
if analytics_table in (UserCount, StreamCount):
|
|
|
|
realmcount_query = """
|
|
|
|
INSERT INTO analytics_realmcount
|
2016-10-26 00:41:57 +02:00
|
|
|
(realm_id, value, property, subgroup, end_time, interval)
|
2016-10-11 02:23:42 +02:00
|
|
|
SELECT
|
2016-10-26 00:41:57 +02:00
|
|
|
zerver_realm.id, COALESCE(sum(%(analytics_table)s.value), 0), '%(property)s',
|
|
|
|
%(analytics_table)s.subgroup, %%(end_time)s, '%(interval)s'
|
2016-10-11 02:23:42 +02:00
|
|
|
FROM zerver_realm
|
2016-11-03 08:27:32 +01:00
|
|
|
JOIN %(analytics_table)s
|
2016-10-11 02:23:42 +02:00
|
|
|
ON
|
|
|
|
(
|
|
|
|
%(analytics_table)s.realm_id = zerver_realm.id AND
|
|
|
|
%(analytics_table)s.property = '%(property)s' AND
|
|
|
|
%(analytics_table)s.end_time = %%(end_time)s AND
|
|
|
|
%(analytics_table)s.interval = '%(interval)s'
|
|
|
|
)
|
2016-10-26 00:41:57 +02:00
|
|
|
GROUP BY zerver_realm.id, %(analytics_table)s.subgroup
|
2016-10-11 02:23:42 +02:00
|
|
|
""" % {'analytics_table' : analytics_table._meta.db_table,
|
|
|
|
'property' : stat.property,
|
analytics: Simplify frequency and measurement interval options.
Change the CountStat object to take an is_gauge variable instead of a
smallest_interval variable. Previously, (smallest_interval, frequency)
could be any of (hour, hour), (hour, day), (hour, gauge), (day, hour),
(day, day), or (day, gauge).
The current change is equivalent to excluding (hour, day) and (day, hour)
from the list above.
This change, along with other recent changes, allows us to simplify how we
handle time intervals. This commit also removes the TimeInterval object.
2016-10-14 00:15:46 +02:00
|
|
|
'interval' : interval}
|
2016-10-13 22:52:39 +02:00
|
|
|
start = time.time()
|
analytics: Simplify frequency and measurement interval options.
Change the CountStat object to take an is_gauge variable instead of a
smallest_interval variable. Previously, (smallest_interval, frequency)
could be any of (hour, hour), (hour, day), (hour, gauge), (day, hour),
(day, day), or (day, gauge).
The current change is equivalent to excluding (hour, day) and (day, hour)
from the list above.
This change, along with other recent changes, allows us to simplify how we
handle time intervals. This commit also removes the TimeInterval object.
2016-10-14 00:15:46 +02:00
|
|
|
cursor.execute(realmcount_query, {'end_time': end_time})
|
2016-10-13 22:52:39 +02:00
|
|
|
end = time.time()
|
|
|
|
logger.info("%s RealmCount aggregation (%dms/%sr)" % (stat.property, (end-start)*1000, cursor.rowcount))
|
2016-07-29 21:52:45 +02:00
|
|
|
|
2016-10-11 02:23:42 +02:00
|
|
|
# Aggregate into InstallationCount
|
|
|
|
installationcount_query = """
|
|
|
|
INSERT INTO analytics_installationcount
|
2016-10-26 00:41:57 +02:00
|
|
|
(value, property, subgroup, end_time, interval)
|
2016-10-11 02:23:42 +02:00
|
|
|
SELECT
|
2016-11-03 08:27:32 +01:00
|
|
|
sum(value), '%(property)s', analytics_realmcount.subgroup, %%(end_time)s, '%(interval)s'
|
2016-10-11 02:23:42 +02:00
|
|
|
FROM analytics_realmcount
|
|
|
|
WHERE
|
2016-07-29 21:52:45 +02:00
|
|
|
(
|
|
|
|
property = '%(property)s' AND
|
|
|
|
end_time = %%(end_time)s AND
|
|
|
|
interval = '%(interval)s'
|
2016-10-26 00:41:57 +02:00
|
|
|
) GROUP BY analytics_realmcount.subgroup
|
2016-10-11 02:23:42 +02:00
|
|
|
""" % {'property': stat.property,
|
analytics: Simplify frequency and measurement interval options.
Change the CountStat object to take an is_gauge variable instead of a
smallest_interval variable. Previously, (smallest_interval, frequency)
could be any of (hour, hour), (hour, day), (hour, gauge), (day, hour),
(day, day), or (day, gauge).
The current change is equivalent to excluding (hour, day) and (day, hour)
from the list above.
This change, along with other recent changes, allows us to simplify how we
handle time intervals. This commit also removes the TimeInterval object.
2016-10-14 00:15:46 +02:00
|
|
|
'interval': interval}
|
2016-10-13 22:52:39 +02:00
|
|
|
start = time.time()
|
analytics: Simplify frequency and measurement interval options.
Change the CountStat object to take an is_gauge variable instead of a
smallest_interval variable. Previously, (smallest_interval, frequency)
could be any of (hour, hour), (hour, day), (hour, gauge), (day, hour),
(day, day), or (day, gauge).
The current change is equivalent to excluding (hour, day) and (day, hour)
from the list above.
This change, along with other recent changes, allows us to simplify how we
handle time intervals. This commit also removes the TimeInterval object.
2016-10-14 00:15:46 +02:00
|
|
|
cursor.execute(installationcount_query, {'end_time': end_time})
|
2016-10-13 22:52:39 +02:00
|
|
|
end = time.time()
|
|
|
|
logger.info("%s InstallationCount aggregation (%dms/%sr)" % (stat.property, (end-start)*1000, cursor.rowcount))
|
2016-07-29 21:52:45 +02:00
|
|
|
cursor.close()
|
|
|
|
|
2016-10-25 21:01:21 +02:00
|
|
|
# This is the only method that hits the prod databases directly.
|
analytics: Simplify frequency and measurement interval options.
Change the CountStat object to take an is_gauge variable instead of a
smallest_interval variable. Previously, (smallest_interval, frequency)
could be any of (hour, hour), (hour, day), (hour, gauge), (day, hour),
(day, day), or (day, gauge).
The current change is equivalent to excluding (hour, day) and (day, hour)
from the list above.
This change, along with other recent changes, allows us to simplify how we
handle time intervals. This commit also removes the TimeInterval object.
2016-10-14 00:15:46 +02:00
|
|
|
def do_pull_from_zerver(stat, start_time, end_time, interval):
|
|
|
|
# type: (CountStat, datetime, datetime, str) -> None
|
2016-07-29 21:52:45 +02:00
|
|
|
zerver_table = stat.zerver_count_query.zerver_table._meta.db_table # type: ignore
|
|
|
|
join_args = ' '.join('AND %s.%s = %s' % (zerver_table, key, value) \
|
|
|
|
for key, value in stat.filter_args.items())
|
2016-10-26 00:41:57 +02:00
|
|
|
if stat.group_by is None:
|
|
|
|
subgroup = 'NULL'
|
|
|
|
group_by_clause = ''
|
|
|
|
else:
|
|
|
|
subgroup = '%s.%s' % (stat.group_by[0]._meta.db_table, stat.group_by[1])
|
|
|
|
group_by_clause = ', ' + subgroup
|
|
|
|
|
2016-07-29 21:52:45 +02:00
|
|
|
# We do string replacement here because passing join_args as a param
|
|
|
|
# may result in problems when running cursor.execute; we do
|
|
|
|
# the string formatting prior so that cursor.execute runs it as sql
|
|
|
|
query_ = stat.zerver_count_query.query % {'zerver_table' : zerver_table,
|
|
|
|
'property' : stat.property,
|
analytics: Simplify frequency and measurement interval options.
Change the CountStat object to take an is_gauge variable instead of a
smallest_interval variable. Previously, (smallest_interval, frequency)
could be any of (hour, hour), (hour, day), (hour, gauge), (day, hour),
(day, day), or (day, gauge).
The current change is equivalent to excluding (hour, day) and (day, hour)
from the list above.
This change, along with other recent changes, allows us to simplify how we
handle time intervals. This commit also removes the TimeInterval object.
2016-10-14 00:15:46 +02:00
|
|
|
'interval' : interval,
|
2016-10-26 00:41:57 +02:00
|
|
|
'join_args' : join_args,
|
|
|
|
'subgroup': subgroup,
|
|
|
|
'group_by_clause': group_by_clause}
|
2016-07-29 21:52:45 +02:00
|
|
|
cursor = connection.cursor()
|
2016-10-13 22:52:39 +02:00
|
|
|
start = time.time()
|
analytics: Simplify frequency and measurement interval options.
Change the CountStat object to take an is_gauge variable instead of a
smallest_interval variable. Previously, (smallest_interval, frequency)
could be any of (hour, hour), (hour, day), (hour, gauge), (day, hour),
(day, day), or (day, gauge).
The current change is equivalent to excluding (hour, day) and (day, hour)
from the list above.
This change, along with other recent changes, allows us to simplify how we
handle time intervals. This commit also removes the TimeInterval object.
2016-10-14 00:15:46 +02:00
|
|
|
cursor.execute(query_, {'time_start': start_time, 'time_end': end_time})
|
2016-10-13 22:52:39 +02:00
|
|
|
end = time.time()
|
|
|
|
logger.info("%s do_pull_from_zerver (%dms/%sr)" % (stat.property, (end-start)*1000, cursor.rowcount))
|
2016-07-29 21:52:45 +02:00
|
|
|
cursor.close()
|
|
|
|
|
|
|
|
count_user_by_realm_query = """
|
|
|
|
INSERT INTO analytics_realmcount
|
2016-10-26 00:41:57 +02:00
|
|
|
(realm_id, value, property, subgroup, end_time, interval)
|
2016-07-29 21:52:45 +02:00
|
|
|
SELECT
|
2016-10-26 00:41:57 +02:00
|
|
|
zerver_realm.id, count(%(zerver_table)s),'%(property)s', %(subgroup)s, %%(time_end)s, '%(interval)s'
|
2016-07-29 21:52:45 +02:00
|
|
|
FROM zerver_realm
|
2016-11-03 08:27:32 +01:00
|
|
|
JOIN zerver_userprofile
|
2016-07-29 21:52:45 +02:00
|
|
|
ON
|
|
|
|
(
|
|
|
|
zerver_userprofile.realm_id = zerver_realm.id AND
|
|
|
|
zerver_userprofile.date_joined >= %%(time_start)s AND
|
|
|
|
zerver_userprofile.date_joined < %%(time_end)s
|
|
|
|
%(join_args)s
|
|
|
|
)
|
|
|
|
WHERE
|
|
|
|
zerver_realm.date_created < %%(time_end)s
|
2016-10-26 00:41:57 +02:00
|
|
|
GROUP BY zerver_realm.id %(group_by_clause)s
|
2016-07-29 21:52:45 +02:00
|
|
|
"""
|
|
|
|
zerver_count_user_by_realm = ZerverCountQuery(UserProfile, RealmCount, count_user_by_realm_query)
|
|
|
|
|
|
|
|
# currently .sender_id is only Message specific thing
|
|
|
|
count_message_by_user_query = """
|
|
|
|
INSERT INTO analytics_usercount
|
2016-10-26 00:41:57 +02:00
|
|
|
(user_id, realm_id, value, property, subgroup, end_time, interval)
|
2016-07-29 21:52:45 +02:00
|
|
|
SELECT
|
2016-10-26 00:41:57 +02:00
|
|
|
zerver_userprofile.id, zerver_userprofile.realm_id, count(*), '%(property)s', %(subgroup)s, %%(time_end)s, '%(interval)s'
|
2016-07-29 21:52:45 +02:00
|
|
|
FROM zerver_userprofile
|
|
|
|
JOIN zerver_message
|
|
|
|
ON
|
|
|
|
(
|
|
|
|
zerver_message.sender_id = zerver_userprofile.id AND
|
|
|
|
zerver_message.pub_date >= %%(time_start)s AND
|
|
|
|
zerver_message.pub_date < %%(time_end)s
|
|
|
|
%(join_args)s
|
|
|
|
)
|
|
|
|
WHERE
|
|
|
|
zerver_userprofile.date_joined < %%(time_end)s
|
2016-10-26 00:41:57 +02:00
|
|
|
GROUP BY zerver_userprofile.id %(group_by_clause)s
|
2016-07-29 21:52:45 +02:00
|
|
|
"""
|
|
|
|
zerver_count_message_by_user = ZerverCountQuery(Message, UserCount, count_message_by_user_query)
|
|
|
|
|
|
|
|
count_message_by_stream_query = """
|
|
|
|
INSERT INTO analytics_streamcount
|
2016-10-26 00:41:57 +02:00
|
|
|
(stream_id, realm_id, value, property, subgroup, end_time, interval)
|
2016-07-29 21:52:45 +02:00
|
|
|
SELECT
|
2016-10-26 00:41:57 +02:00
|
|
|
zerver_stream.id, zerver_stream.realm_id, count(*), '%(property)s', %(subgroup)s, %%(time_end)s, '%(interval)s'
|
2016-07-29 21:52:45 +02:00
|
|
|
FROM zerver_stream
|
2016-11-03 08:27:32 +01:00
|
|
|
JOIN zerver_recipient
|
2016-07-29 21:52:45 +02:00
|
|
|
ON
|
|
|
|
(
|
|
|
|
zerver_recipient.type = 2 AND
|
|
|
|
zerver_stream.id = zerver_recipient.type_id
|
|
|
|
)
|
2016-11-03 08:27:32 +01:00
|
|
|
JOIN zerver_message
|
2016-07-29 21:52:45 +02:00
|
|
|
ON
|
|
|
|
(
|
|
|
|
zerver_message.recipient_id = zerver_recipient.id AND
|
|
|
|
zerver_message.pub_date >= %%(time_start)s AND
|
|
|
|
zerver_message.pub_date < %%(time_end)s AND
|
|
|
|
zerver_stream.date_created < %%(time_end)s
|
|
|
|
%(join_args)s
|
|
|
|
)
|
2016-10-26 00:41:57 +02:00
|
|
|
GROUP BY zerver_stream.id %(group_by_clause)s
|
2016-07-29 21:52:45 +02:00
|
|
|
"""
|
|
|
|
zerver_count_message_by_stream = ZerverCountQuery(Message, StreamCount, count_message_by_stream_query)
|
|
|
|
|
|
|
|
count_stream_by_realm_query = """
|
2016-10-18 02:03:57 +02:00
|
|
|
INSERT INTO analytics_realmcount
|
2016-10-26 00:41:57 +02:00
|
|
|
(realm_id, value, property, subgroup, end_time, interval)
|
2016-07-29 21:52:45 +02:00
|
|
|
SELECT
|
2016-10-26 00:41:57 +02:00
|
|
|
zerver_realm.id, count(*), '%(property)s', %(subgroup)s, %%(time_end)s, '%(interval)s'
|
2016-10-18 02:03:57 +02:00
|
|
|
FROM zerver_realm
|
2016-11-03 08:27:32 +01:00
|
|
|
JOIN zerver_stream
|
2016-07-29 21:52:45 +02:00
|
|
|
ON
|
|
|
|
(
|
2016-10-18 02:03:57 +02:00
|
|
|
zerver_stream.realm_id = zerver_realm.id AND
|
|
|
|
zerver_stream.date_created >= %%(time_start)s AND
|
|
|
|
zerver_stream.date_created < %%(time_end)s
|
|
|
|
%(join_args)s
|
2016-07-29 21:52:45 +02:00
|
|
|
)
|
2016-10-18 02:03:57 +02:00
|
|
|
WHERE
|
|
|
|
zerver_realm.date_created < %%(time_end)s
|
2016-10-26 00:41:57 +02:00
|
|
|
GROUP BY zerver_realm.id %(group_by_clause)s
|
2016-07-29 21:52:45 +02:00
|
|
|
"""
|
|
|
|
zerver_count_stream_by_realm = ZerverCountQuery(Stream, RealmCount, count_stream_by_realm_query)
|
|
|
|
|
2016-11-01 00:17:29 +01:00
|
|
|
# This query violates the count_X_by_Y_query conventions in several ways. One,
|
|
|
|
# the X table is not specified by the query name; MessageType is not a zerver
|
|
|
|
# table. Two, it ignores the subgroup column in the CountStat object; instead,
|
|
|
|
# it uses 'message_type' from the subquery to fill in the subgroup column.
|
|
|
|
count_message_type_by_user_query = """
|
|
|
|
INSERT INTO analytics_usercount
|
|
|
|
(realm_id, user_id, value, property, subgroup, end_time, interval)
|
|
|
|
SELECT realm_id, id, SUM(count) AS value, 'message_type_by_user', message_type, %%(time_end)s, '%(interval)s'
|
|
|
|
FROM
|
|
|
|
(
|
|
|
|
SELECT zerver_userprofile.realm_id, zerver_userprofile.id, count(*),
|
|
|
|
CASE WHEN
|
|
|
|
zerver_recipient.type != 2 THEN 'private_message'
|
|
|
|
WHEN
|
|
|
|
zerver_stream.invite_only = TRUE THEN 'private_stream'
|
|
|
|
ELSE 'public_stream'
|
|
|
|
END
|
|
|
|
message_type
|
|
|
|
|
|
|
|
FROM zerver_userprofile
|
|
|
|
JOIN zerver_message
|
|
|
|
ON
|
|
|
|
zerver_message.sender_id = zerver_userprofile.id AND
|
|
|
|
zerver_message.pub_date >= %%(time_start)s AND
|
|
|
|
zerver_message.pub_date < %%(time_end)s
|
|
|
|
%(join_args)s
|
|
|
|
JOIN zerver_recipient
|
|
|
|
ON
|
|
|
|
zerver_recipient.id = zerver_message.recipient_id
|
|
|
|
JOIN zerver_stream
|
|
|
|
ON
|
|
|
|
zerver_stream.id = zerver_recipient.type_id
|
|
|
|
GROUP BY zerver_userprofile.realm_id, zerver_userprofile.id, zerver_recipient.type, zerver_stream.invite_only
|
|
|
|
) AS subquery
|
|
|
|
GROUP BY realm_id, id, message_type
|
|
|
|
"""
|
|
|
|
zerver_count_message_type_by_user = ZerverCountQuery(Message, UserCount, count_message_type_by_user_query)
|
|
|
|
|
2016-07-29 21:52:45 +02:00
|
|
|
COUNT_STATS = {
|
2016-11-03 22:51:18 +01:00
|
|
|
'active_users:is_bot': CountStat('active_users:is_bot', zerver_count_user_by_realm,
|
2016-11-01 00:17:29 +01:00
|
|
|
{'is_active': True}, (UserProfile, 'is_bot'), CountStat.DAY, True),
|
2016-10-26 00:41:57 +02:00
|
|
|
'messages_sent': CountStat('messages_sent', zerver_count_message_by_user, {}, None,
|
|
|
|
CountStat.HOUR, False),
|
2016-11-03 22:51:18 +01:00
|
|
|
'messages_sent:is_bot': CountStat('messages_sent:is_bot', zerver_count_message_by_user, {},
|
2016-11-01 00:17:29 +01:00
|
|
|
(UserProfile, 'is_bot'), CountStat.DAY, False),
|
2016-11-03 22:51:18 +01:00
|
|
|
'messages_sent:message_type': CountStat('messages_sent:message_type',
|
2016-11-01 00:17:29 +01:00
|
|
|
zerver_count_message_type_by_user, {},
|
2016-11-03 02:13:32 +01:00
|
|
|
None, CountStat.DAY, False),
|
|
|
|
'messages_sent:client': CountStat('messages_sent:client', zerver_count_message_by_user, {},
|
|
|
|
(Message, 'sending_client_id'), CountStat.HOUR, False),
|
|
|
|
'messages_sent_to_stream:is_bot': CountStat('messages_sent_to_stream:is_bot', zerver_count_message_by_stream,
|
2016-11-10 21:20:26 +01:00
|
|
|
{}, (UserProfile, 'is_bot'), CountStat.HOUR, False)
|
2016-11-03 02:13:32 +01:00
|
|
|
}
|