zulip/zerver/lib/statistics.py

138 lines
5.3 KiB
Python

# -*- coding: utf-8 -*-
from __future__ import absolute_import
from __future__ import division
from zerver.models import UserProfile, UserActivity, UserActivityInterval, Message
from django.utils.timezone import utc
from typing import Any, Dict, List, Sequence, Set
from datetime import datetime, timedelta
from itertools import chain
from six.moves import range
import six
def median(data):
# type: (Sequence[float]) -> float
data = sorted(data)
size = len(data)
if size % 2 == 1:
return data[size//2]
else:
before = size//2 - 1
after = size//2
return (data[before] + data[after]) / 2.0
users_who_sent_query = Message.objects.select_related("sender") \
.exclude(sending_client__name__contains="mirror") \
.exclude(sending_client__name__contains="API")
def active_users():
# type: () -> Sequence[UserProfile]
# Return a list of active users we want to count towards various
# statistics.
return UserProfile.objects.filter(is_bot=False, is_active=True).select_related()
def users_who_sent_between(begin, end):
# type: (datetime, datetime) -> Set[int]
sender_objs = users_who_sent_query.filter(pub_date__gt=begin, pub_date__lt=end) \
.values("sender__id")
return set(s["sender__id"] for s in sender_objs)
def users_who_sent_ever():
# type: () -> Set[int]
return set(s["sender__id"] for s in users_who_sent_query.values("sender__id"))
def active_users_to_measure():
# type: () -> List[UserProfile]
senders = users_who_sent_ever()
return [u for u in active_users() if u.id in senders]
def active_users_who_sent_between(begin, end):
# type: (datetime, datetime) -> List[UserProfile]
senders = users_who_sent_between(begin, end)
return [u for u in active_users() if u.id in senders]
# Return the amount of Zulip usage for this user between the two
# given dates
def seconds_usage_between(user_profile, begin, end):
# type: (UserProfile, datetime, datetime) -> timedelta
intervals = UserActivityInterval.objects.filter(user_profile=user_profile, end__gte=begin, start__lte=end)
duration = timedelta(0)
for interval in intervals:
start = max(begin, interval.start)
finish = min(end, interval.end)
duration += finish-start
return duration
# Return a list of how many seconds each user has been engaging with the app on a given day
def seconds_active_during_day(day):
# type: (datetime) -> List[float]
begin_day = day.replace(hour=0, minute=0, second=0, microsecond=0, tzinfo=utc)
end_day = day.replace(hour=23, minute=59, second=59, microsecond=0, tzinfo=utc)
active_users = active_users_to_measure()
return [seconds_usage_between(user, begin_day, end_day).total_seconds() for user in active_users]
def users_active_nosend_during_day(day):
# type: (datetime) -> List[UserProfile]
begin_day = day.replace(hour=0, minute=0, second=0, microsecond=0, tzinfo=utc)
end_day = day.replace(hour=23, minute=59, second=59, microsecond=0, tzinfo=utc)
active_users = active_users_to_measure()
today_senders = users_who_sent_between(begin_day, end_day)
today_users = [] # type: List[UserProfile]
for user_profile in active_users:
intervals = UserActivityInterval.objects.filter(user_profile=user_profile,
end__gte=begin_day,
start__lte=end_day)
if len(intervals) != 0:
today_users.append(user_profile)
return [u for u in today_users if u.id not in today_senders]
def calculate_stats(data, all_users):
# type: (Sequence[float], Sequence[UserProfile]) -> Dict[str, Any]
if len(data) == 0:
return {"# data points": 0}
active_user_count = len([x for x in data if x > 1])
mean_data = sum(data) // active_user_count
median_data = median([x for x in data if x > 1])
return {'active users': active_user_count,
'total users': len(all_users),
'mean': str(timedelta(seconds=mean_data)),
'median': str(timedelta(seconds=median_data)),
'# data points': len(data)}
# Return an info dict {mean: , median} containing the mean/median seconds users were active on a given day
def activity_averages_during_day(day):
# type: (datetime) -> Dict[str, Any]
users_to_measure = active_users_to_measure()
seconds_active = seconds_active_during_day(day)
return calculate_stats(seconds_active, all_users=users_to_measure)
# Returns an info dict {mean: , median} with engagement numbers for all users according
# to active_users_to_measure.
def activity_averages_between(begin, end, by_day=True):
# type: (datetime, datetime, bool) -> Dict[str, Any]
seconds_active = {}
users_to_measure = active_users_to_measure()
for i in range((end - begin).days):
day = begin + timedelta(days=i)
# Ignore weekends
if day.weekday() in [5, 6]:
continue
seconds_active[day] = seconds_active_during_day(day)
if by_day:
return dict((str(day), calculate_stats(values, all_users=users_to_measure))
for day, values in six.iteritems(seconds_active))
else:
return calculate_stats(list(chain.from_iterable(seconds_active.values())), # type: ignore # chain.from_iterable needs overload
all_users=users_to_measure)