2013-10-17 00:12:30 +02:00
|
|
|
# -*- coding: utf-8 -*-
|
|
|
|
from __future__ import absolute_import
|
2016-01-24 03:56:05 +01:00
|
|
|
from __future__ import division
|
2013-10-17 00:12:30 +02:00
|
|
|
|
2013-10-18 17:40:16 +02:00
|
|
|
from zerver.models import UserProfile, UserActivity, UserActivityInterval, Message
|
2013-10-17 00:12:30 +02:00
|
|
|
|
|
|
|
from django.utils.timezone import utc
|
2016-06-27 14:33:29 +02:00
|
|
|
from typing import Any, Dict, List, Sequence, Set
|
2016-03-22 06:52:36 +01:00
|
|
|
|
|
|
|
from datetime import datetime, timedelta
|
2013-10-17 00:12:30 +02:00
|
|
|
|
|
|
|
from itertools import chain
|
2015-11-01 17:15:05 +01:00
|
|
|
from six.moves import range
|
2016-03-11 10:57:29 +01:00
|
|
|
import six
|
2013-10-17 00:12:30 +02:00
|
|
|
|
|
|
|
def median(data):
|
2016-06-27 14:33:29 +02:00
|
|
|
# type: (Sequence[float]) -> float
|
2013-10-17 00:12:30 +02:00
|
|
|
data = sorted(data)
|
|
|
|
|
|
|
|
size = len(data)
|
|
|
|
if size % 2 == 1:
|
|
|
|
return data[size//2]
|
|
|
|
else:
|
|
|
|
before = size//2 - 1
|
|
|
|
after = size//2
|
|
|
|
return (data[before] + data[after]) / 2.0
|
|
|
|
|
2013-10-18 17:40:16 +02:00
|
|
|
users_who_sent_query = Message.objects.select_related("sender") \
|
|
|
|
.exclude(sending_client__name__contains="mirror") \
|
|
|
|
.exclude(sending_client__name__contains="API")
|
|
|
|
|
|
|
|
def active_users():
|
2016-06-27 14:33:29 +02:00
|
|
|
# type: () -> Sequence[UserProfile]
|
2013-10-17 00:12:30 +02:00
|
|
|
# Return a list of active users we want to count towards various
|
2015-09-20 08:42:28 +02:00
|
|
|
# statistics.
|
|
|
|
return UserProfile.objects.filter(is_bot=False, is_active=True).select_related()
|
2013-10-17 00:12:30 +02:00
|
|
|
|
2013-10-18 17:40:16 +02:00
|
|
|
def users_who_sent_between(begin, end):
|
2016-03-22 06:52:36 +01:00
|
|
|
# type: (datetime, datetime) -> Set[int]
|
2013-10-18 17:40:16 +02:00
|
|
|
sender_objs = users_who_sent_query.filter(pub_date__gt=begin, pub_date__lt=end) \
|
|
|
|
.values("sender__id")
|
|
|
|
return set(s["sender__id"] for s in sender_objs)
|
2013-10-17 00:12:30 +02:00
|
|
|
|
2013-10-18 17:40:16 +02:00
|
|
|
def users_who_sent_ever():
|
2016-03-22 06:52:36 +01:00
|
|
|
# type: () -> Set[int]
|
2013-10-18 17:40:16 +02:00
|
|
|
return set(s["sender__id"] for s in users_who_sent_query.values("sender__id"))
|
|
|
|
|
|
|
|
def active_users_to_measure():
|
2016-03-22 06:52:36 +01:00
|
|
|
# type: () -> List[UserProfile]
|
2013-10-18 17:40:16 +02:00
|
|
|
senders = users_who_sent_ever()
|
|
|
|
return [u for u in active_users() if u.id in senders]
|
|
|
|
|
|
|
|
def active_users_who_sent_between(begin, end):
|
2016-03-22 06:52:36 +01:00
|
|
|
# type: (datetime, datetime) -> List[UserProfile]
|
2013-10-18 17:40:16 +02:00
|
|
|
senders = users_who_sent_between(begin, end)
|
|
|
|
return [u for u in active_users() if u.id in senders]
|
2013-10-17 00:12:30 +02:00
|
|
|
|
|
|
|
# Return the amount of Zulip usage for this user between the two
|
|
|
|
# given dates
|
|
|
|
def seconds_usage_between(user_profile, begin, end):
|
2016-03-22 06:52:36 +01:00
|
|
|
# type: (UserProfile, datetime, datetime) -> timedelta
|
2013-10-17 00:12:30 +02:00
|
|
|
intervals = UserActivityInterval.objects.filter(user_profile=user_profile, end__gte=begin, start__lte=end)
|
|
|
|
duration = timedelta(0)
|
|
|
|
for interval in intervals:
|
|
|
|
start = max(begin, interval.start)
|
|
|
|
finish = min(end, interval.end)
|
|
|
|
duration += finish-start
|
|
|
|
return duration
|
|
|
|
|
|
|
|
# Return a list of how many seconds each user has been engaging with the app on a given day
|
|
|
|
def seconds_active_during_day(day):
|
2016-03-22 06:52:36 +01:00
|
|
|
# type: (datetime) -> List[float]
|
2013-10-17 00:12:30 +02:00
|
|
|
begin_day = day.replace(hour=0, minute=0, second=0, microsecond=0, tzinfo=utc)
|
|
|
|
end_day = day.replace(hour=23, minute=59, second=59, microsecond=0, tzinfo=utc)
|
2013-10-18 17:40:16 +02:00
|
|
|
active_users = active_users_to_measure()
|
2013-10-17 00:12:30 +02:00
|
|
|
|
|
|
|
return [seconds_usage_between(user, begin_day, end_day).total_seconds() for user in active_users]
|
|
|
|
|
2013-10-18 17:40:16 +02:00
|
|
|
def users_active_nosend_during_day(day):
|
2016-03-22 06:52:36 +01:00
|
|
|
# type: (datetime) -> List[UserProfile]
|
2013-10-18 17:40:16 +02:00
|
|
|
begin_day = day.replace(hour=0, minute=0, second=0, microsecond=0, tzinfo=utc)
|
|
|
|
end_day = day.replace(hour=23, minute=59, second=59, microsecond=0, tzinfo=utc)
|
|
|
|
active_users = active_users_to_measure()
|
|
|
|
today_senders = users_who_sent_between(begin_day, end_day)
|
|
|
|
|
2016-06-27 14:33:29 +02:00
|
|
|
today_users = [] # type: List[UserProfile]
|
2013-10-18 17:40:16 +02:00
|
|
|
for user_profile in active_users:
|
|
|
|
intervals = UserActivityInterval.objects.filter(user_profile=user_profile,
|
|
|
|
end__gte=begin_day,
|
|
|
|
start__lte=end_day)
|
|
|
|
if len(intervals) != 0:
|
|
|
|
today_users.append(user_profile)
|
2016-05-10 01:55:43 +02:00
|
|
|
return [u for u in today_users if u.id not in today_senders]
|
2013-10-18 17:40:16 +02:00
|
|
|
|
|
|
|
def calculate_stats(data, all_users):
|
2016-06-27 14:33:29 +02:00
|
|
|
# type: (Sequence[float], Sequence[UserProfile]) -> Dict[str, Any]
|
2013-10-17 00:12:30 +02:00
|
|
|
if len(data) == 0:
|
2013-10-18 17:40:16 +02:00
|
|
|
return {"# data points": 0}
|
2013-10-17 00:12:30 +02:00
|
|
|
|
2013-10-18 17:40:16 +02:00
|
|
|
active_user_count = len([x for x in data if x > 1])
|
2016-01-24 03:56:05 +01:00
|
|
|
mean_data = sum(data) // active_user_count
|
2013-10-18 17:40:16 +02:00
|
|
|
median_data = median([x for x in data if x > 1])
|
2013-10-17 00:12:30 +02:00
|
|
|
|
2013-10-18 17:40:16 +02:00
|
|
|
return {'active users': active_user_count,
|
|
|
|
'total users': len(all_users),
|
|
|
|
'mean': str(timedelta(seconds=mean_data)),
|
|
|
|
'median': str(timedelta(seconds=median_data)),
|
|
|
|
'# data points': len(data)}
|
2013-10-17 00:12:30 +02:00
|
|
|
|
|
|
|
# Return an info dict {mean: , median} containing the mean/median seconds users were active on a given day
|
|
|
|
def activity_averages_during_day(day):
|
2016-03-22 06:52:36 +01:00
|
|
|
# type: (datetime) -> Dict[str, Any]
|
2013-10-18 17:40:16 +02:00
|
|
|
users_to_measure = active_users_to_measure()
|
2013-10-17 00:12:30 +02:00
|
|
|
seconds_active = seconds_active_during_day(day)
|
2013-10-18 17:40:16 +02:00
|
|
|
return calculate_stats(seconds_active, all_users=users_to_measure)
|
2013-10-17 00:12:30 +02:00
|
|
|
|
|
|
|
# Returns an info dict {mean: , median} with engagement numbers for all users according
|
2015-09-20 09:25:27 +02:00
|
|
|
# to active_users_to_measure.
|
2013-10-17 00:12:30 +02:00
|
|
|
def activity_averages_between(begin, end, by_day=True):
|
2016-03-22 06:52:36 +01:00
|
|
|
# type: (datetime, datetime, bool) -> Dict[str, Any]
|
2013-10-17 00:12:30 +02:00
|
|
|
seconds_active = {}
|
2013-10-18 17:40:16 +02:00
|
|
|
users_to_measure = active_users_to_measure()
|
2013-10-17 00:12:30 +02:00
|
|
|
for i in range((end - begin).days):
|
|
|
|
day = begin + timedelta(days=i)
|
|
|
|
|
|
|
|
# Ignore weekends
|
|
|
|
if day.weekday() in [5, 6]:
|
|
|
|
continue
|
|
|
|
|
|
|
|
seconds_active[day] = seconds_active_during_day(day)
|
|
|
|
|
|
|
|
if by_day:
|
2016-05-17 13:00:30 +02:00
|
|
|
return dict((str(day), calculate_stats(values, all_users=users_to_measure))
|
2016-03-11 10:57:29 +01:00
|
|
|
for day, values in six.iteritems(seconds_active))
|
2013-10-17 00:12:30 +02:00
|
|
|
else:
|
2016-01-28 00:59:01 +01:00
|
|
|
return calculate_stats(list(chain.from_iterable(seconds_active.values())), # type: ignore # chain.from_iterable needs overload
|
2013-10-18 17:40:16 +02:00
|
|
|
all_users=users_to_measure)
|