zulip/analytics/views/installation_activity.py

615 lines
19 KiB
Python
Raw Normal View History

2017-11-16 00:55:49 +01:00
import itertools
import time
from collections import defaultdict
from datetime import datetime, timedelta
from typing import Callable, Dict, List, Optional, Sequence, Tuple, Union
2017-11-16 00:55:49 +01:00
from django.conf import settings
from django.db import connection
from django.http import HttpRequest, HttpResponse
2017-11-16 00:55:49 +01:00
from django.shortcuts import render
from django.template import loader
from django.utils.timezone import now as timezone_now
from jinja2 import Markup as mark_safe
from psycopg2.sql import SQL, Composable, Literal
from analytics.lib.counts import COUNT_STATS
from analytics.views.activity_common import (
dictfetchall,
format_date_for_activity_reports,
make_table,
realm_activity_link,
realm_stats_link,
remote_installation_stats_link,
)
from analytics.views.support import get_plan_name
from zerver.decorator import require_server_admin
from zerver.lib.request import has_request_variables
from zerver.lib.timestamp import timestamp_to_datetime
from zerver.models import Realm, UserActivityInterval, UserProfile
if settings.BILLING_ENABLED:
from corporate.lib.stripe import (
estimate_annual_recurring_revenue_by_realm,
get_realms_to_default_discount_dict,
)
def get_realm_day_counts() -> Dict[str, Dict[str, str]]:
query = SQL(
"""
select
2017-01-08 19:42:32 +01:00
r.string_id,
(now()::date - date_sent::date) age,
count(*) cnt
from zerver_message m
join zerver_userprofile up on up.id = m.sender_id
join zerver_realm r on r.id = up.realm_id
join zerver_client c on c.id = m.sending_client_id
where
(not up.is_bot)
and
date_sent > now()::date - interval '8 day'
and
c.name not in ('zephyr_mirror', 'ZulipMonitoring')
group by
2017-01-08 19:42:32 +01:00
r.string_id,
age
order by
2017-01-08 19:42:32 +01:00
r.string_id,
age
"""
)
cursor = connection.cursor()
cursor.execute(query)
rows = dictfetchall(cursor)
cursor.close()
python: Convert assignment type annotations to Python 3.6 style. This commit was split by tabbott; this piece covers the vast majority of files in Zulip, but excludes scripts/, tools/, and puppet/ to help ensure we at least show the right error messages for Xenial systems. We can likely further refine the remaining pieces with some testing. Generated by com2ann, with whitespace fixes and various manual fixes for runtime issues: - invoiced_through: Optional[LicenseLedger] = models.ForeignKey( + invoiced_through: Optional["LicenseLedger"] = models.ForeignKey( -_apns_client: Optional[APNsClient] = None +_apns_client: Optional["APNsClient"] = None - notifications_stream: Optional[Stream] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE) - signup_notifications_stream: Optional[Stream] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE) + notifications_stream: Optional["Stream"] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE) + signup_notifications_stream: Optional["Stream"] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE) - author: Optional[UserProfile] = models.ForeignKey('UserProfile', blank=True, null=True, on_delete=CASCADE) + author: Optional["UserProfile"] = models.ForeignKey('UserProfile', blank=True, null=True, on_delete=CASCADE) - bot_owner: Optional[UserProfile] = models.ForeignKey('self', null=True, on_delete=models.SET_NULL) + bot_owner: Optional["UserProfile"] = models.ForeignKey('self', null=True, on_delete=models.SET_NULL) - default_sending_stream: Optional[Stream] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE) - default_events_register_stream: Optional[Stream] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE) + default_sending_stream: Optional["Stream"] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE) + default_events_register_stream: Optional["Stream"] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE) -descriptors_by_handler_id: Dict[int, ClientDescriptor] = {} +descriptors_by_handler_id: Dict[int, "ClientDescriptor"] = {} -worker_classes: Dict[str, Type[QueueProcessingWorker]] = {} -queues: Dict[str, Dict[str, Type[QueueProcessingWorker]]] = {} +worker_classes: Dict[str, Type["QueueProcessingWorker"]] = {} +queues: Dict[str, Dict[str, Type["QueueProcessingWorker"]]] = {} -AUTH_LDAP_REVERSE_EMAIL_SEARCH: Optional[LDAPSearch] = None +AUTH_LDAP_REVERSE_EMAIL_SEARCH: Optional["LDAPSearch"] = None Signed-off-by: Anders Kaseorg <anders@zulipchat.com>
2020-04-22 01:09:50 +02:00
counts: Dict[str, Dict[int, int]] = defaultdict(dict)
for row in rows:
counts[row["string_id"]][row["age"]] = row["cnt"]
result = {}
2017-01-08 19:42:32 +01:00
for string_id in counts:
raw_cnts = [counts[string_id].get(age, 0) for age in range(8)]
min_cnt = min(raw_cnts[1:])
max_cnt = max(raw_cnts[1:])
def format_count(cnt: int, style: Optional[str] = None) -> str:
if style is not None:
good_bad = style
elif cnt == min_cnt:
good_bad = "bad"
elif cnt == max_cnt:
good_bad = "good"
else:
good_bad = "neutral"
return f'<td class="number {good_bad}">{cnt}</td>'
cnts = format_count(raw_cnts[0], "neutral") + "".join(map(format_count, raw_cnts[1:]))
2017-01-08 19:42:32 +01:00
result[string_id] = dict(cnts=cnts)
return result
def realm_summary_table(realm_minutes: Dict[str, float]) -> str:
now = timezone_now()
query = SQL(
"""
SELECT
2017-01-08 19:42:32 +01:00
realm.string_id,
realm.date_created,
realm.plan_type,
coalesce(wau_table.value, 0) wau_count,
coalesce(dau_table.value, 0) dau_count,
coalesce(user_count_table.value, 0) user_profile_count,
coalesce(bot_count_table.value, 0) bot_count
FROM
zerver_realm as realm
LEFT OUTER JOIN (
SELECT
value _14day_active_humans,
realm_id
from
analytics_realmcount
WHERE
property = 'realm_active_humans::day'
AND end_time = %(realm_active_humans_end_time)s
) as _14day_active_humans_table ON realm.id = _14day_active_humans_table.realm_id
LEFT OUTER JOIN (
SELECT
value,
realm_id
from
analytics_realmcount
WHERE
property = '7day_actives::day'
AND end_time = %(seven_day_actives_end_time)s
) as wau_table ON realm.id = wau_table.realm_id
LEFT OUTER JOIN (
SELECT
value,
realm_id
from
analytics_realmcount
WHERE
property = '1day_actives::day'
AND end_time = %(one_day_actives_end_time)s
) as dau_table ON realm.id = dau_table.realm_id
LEFT OUTER JOIN (
SELECT
value,
realm_id
from
analytics_realmcount
WHERE
property = 'active_users_audit:is_bot:day'
AND subgroup = 'false'
AND end_time = %(active_users_audit_end_time)s
) as user_count_table ON realm.id = user_count_table.realm_id
LEFT OUTER JOIN (
SELECT
value,
realm_id
from
analytics_realmcount
WHERE
property = 'active_users_audit:is_bot:day'
AND subgroup = 'true'
AND end_time = %(active_users_audit_end_time)s
) as bot_count_table ON realm.id = bot_count_table.realm_id
WHERE
_14day_active_humans IS NOT NULL
or realm.plan_type = 3
ORDER BY
dau_count DESC,
string_id ASC
"""
)
cursor = connection.cursor()
cursor.execute(
query,
{
"realm_active_humans_end_time": COUNT_STATS[
"realm_active_humans::day"
].last_successful_fill(),
"seven_day_actives_end_time": COUNT_STATS["7day_actives::day"].last_successful_fill(),
"one_day_actives_end_time": COUNT_STATS["1day_actives::day"].last_successful_fill(),
"active_users_audit_end_time": COUNT_STATS[
"active_users_audit:is_bot:day"
].last_successful_fill(),
},
)
rows = dictfetchall(cursor)
cursor.close()
# Fetch all the realm administrator users
realm_owners: Dict[str, List[str]] = defaultdict(list)
for up in UserProfile.objects.select_related("realm").filter(
role=UserProfile.ROLE_REALM_OWNER,
is_active=True,
):
realm_owners[up.realm.string_id].append(up.delivery_email)
for row in rows:
row["date_created_day"] = row["date_created"].strftime("%Y-%m-%d")
row["age_days"] = int((now - row["date_created"]).total_seconds() / 86400)
row["is_new"] = row["age_days"] < 12 * 7
row["realm_owner_emails"] = ", ".join(realm_owners[row["string_id"]])
# get messages sent per day
counts = get_realm_day_counts()
for row in rows:
try:
row["history"] = counts[row["string_id"]]["cnts"]
2017-01-08 16:58:30 +01:00
except Exception:
row["history"] = ""
# estimate annual subscription revenue
total_arr = 0
if settings.BILLING_ENABLED:
estimated_arrs = estimate_annual_recurring_revenue_by_realm()
realms_to_default_discount = get_realms_to_default_discount_dict()
for row in rows:
row["plan_type_string"] = get_plan_name(row["plan_type"])
string_id = row["string_id"]
if string_id in estimated_arrs:
row["arr"] = estimated_arrs[string_id]
if row["plan_type"] == Realm.STANDARD:
row["effective_rate"] = 100 - int(realms_to_default_discount.get(string_id, 0))
elif row["plan_type"] == Realm.STANDARD_FREE:
row["effective_rate"] = 0
elif row["plan_type"] == Realm.LIMITED and string_id in realms_to_default_discount:
row["effective_rate"] = 100 - int(realms_to_default_discount[string_id])
else:
row["effective_rate"] = ""
total_arr += sum(estimated_arrs.values())
# augment data with realm_minutes
total_hours = 0.0
for row in rows:
string_id = row["string_id"]
2017-01-08 19:42:32 +01:00
minutes = realm_minutes.get(string_id, 0.0)
hours = minutes / 60.0
total_hours += hours
row["hours"] = str(int(hours))
try:
row["hours_per_user"] = "{:.1f}".format(hours / row["dau_count"])
2017-01-08 16:58:30 +01:00
except Exception:
pass
# formatting
for row in rows:
row["stats_link"] = realm_stats_link(row["string_id"])
row["string_id"] = realm_activity_link(row["string_id"])
# Count active sites
def meets_goal(row: Dict[str, int]) -> bool:
return row["dau_count"] >= 5
num_active_sites = len(list(filter(meets_goal, rows)))
# create totals
total_dau_count = 0
total_user_profile_count = 0
total_bot_count = 0
total_wau_count = 0
for row in rows:
total_dau_count += int(row["dau_count"])
total_user_profile_count += int(row["user_profile_count"])
total_bot_count += int(row["bot_count"])
total_wau_count += int(row["wau_count"])
total_row = dict(
string_id="Total",
plan_type_string="",
effective_rate="",
arr=total_arr,
stats_link="",
date_created_day="",
realm_owner_emails="",
dau_count=total_dau_count,
user_profile_count=total_user_profile_count,
bot_count=total_bot_count,
hours=int(total_hours),
wau_count=total_wau_count,
)
rows.insert(0, total_row)
content = loader.render_to_string(
"analytics/realm_summary_table.html",
dict(
rows=rows,
num_active_sites=num_active_sites,
utctime=now.strftime("%Y-%m-%d %H:%MZ"),
billing_enabled=settings.BILLING_ENABLED,
),
)
return content
def user_activity_intervals() -> Tuple[mark_safe, Dict[str, float]]:
day_end = timestamp_to_datetime(time.time())
day_start = day_end - timedelta(hours=24)
output = "Per-user online duration for the last 24 hours:\n"
total_duration = timedelta(0)
all_intervals = (
UserActivityInterval.objects.filter(
end__gte=day_start,
start__lte=day_end,
)
.select_related(
"user_profile",
"user_profile__realm",
)
.only(
"start",
"end",
"user_profile__delivery_email",
"user_profile__realm__string_id",
)
.order_by(
"user_profile__realm__string_id",
"user_profile__delivery_email",
)
)
2017-01-08 19:42:32 +01:00
by_string_id = lambda row: row.user_profile.realm.string_id
by_email = lambda row: row.user_profile.delivery_email
realm_minutes = {}
2017-01-08 19:42:32 +01:00
for string_id, realm_intervals in itertools.groupby(all_intervals, by_string_id):
realm_duration = timedelta(0)
output += f"<hr>{string_id}\n"
for email, intervals in itertools.groupby(realm_intervals, by_email):
duration = timedelta(0)
for interval in intervals:
start = max(day_start, interval.start)
end = min(day_end, interval.end)
duration += end - start
total_duration += duration
realm_duration += duration
output += f" {email:<37}{duration}\n"
2017-01-08 19:42:32 +01:00
realm_minutes[string_id] = realm_duration.total_seconds() / 60
output += f"\nTotal duration: {total_duration}\n"
output += f"\nTotal duration in minutes: {total_duration.total_seconds() / 60.}\n"
output += f"Total duration amortized to a month: {total_duration.total_seconds() * 30. / 60.}"
content = mark_safe("<pre>" + output + "</pre>")
return content, realm_minutes
def ad_hoc_queries() -> List[Dict[str, str]]:
def get_page(
query: Composable, cols: Sequence[str], title: str, totals_columns: Sequence[int] = []
) -> Dict[str, str]:
cursor = connection.cursor()
cursor.execute(query)
rows = cursor.fetchall()
rows = list(map(list, rows))
cursor.close()
def fix_rows(
i: int, fixup_func: Union[Callable[[Realm], mark_safe], Callable[[datetime], str]]
) -> None:
for row in rows:
row[i] = fixup_func(row[i])
total_row = []
for i, col in enumerate(cols):
if col == "Realm":
fix_rows(i, realm_activity_link)
elif col in ["Last time", "Last visit"]:
fix_rows(i, format_date_for_activity_reports)
elif col == "Hostname":
for row in rows:
row[i] = remote_installation_stats_link(row[0], row[i])
if len(totals_columns) > 0:
if i == 0:
total_row.append("Total")
elif i in totals_columns:
total_row.append(str(sum(row[i] for row in rows if row[i] is not None)))
else:
total_row.append("")
if len(totals_columns) > 0:
rows.insert(0, total_row)
content = make_table(title, cols, rows)
return dict(
content=content,
title=title,
)
pages = []
###
for mobile_type in ["Android", "ZulipiOS"]:
title = f"{mobile_type} usage"
query = SQL(
"""
select
2017-01-08 19:42:32 +01:00
realm.string_id,
up.id user_id,
client.name,
sum(count) as hits,
max(last_visit) as last_time
from zerver_useractivity ua
join zerver_client client on client.id = ua.client_id
join zerver_userprofile up on up.id = ua.user_profile_id
join zerver_realm realm on realm.id = up.realm_id
where
client.name like {mobile_type}
2017-01-08 19:42:32 +01:00
group by string_id, up.id, client.name
having max(last_visit) > now() - interval '2 week'
2017-01-08 19:42:32 +01:00
order by string_id, up.id, client.name
"""
).format(
mobile_type=Literal(mobile_type),
)
cols = [
"Realm",
"User id",
"Name",
"Hits",
"Last time",
]
pages.append(get_page(query, cols, title))
###
title = "Desktop users"
query = SQL(
"""
select
2017-01-08 19:42:32 +01:00
realm.string_id,
client.name,
sum(count) as hits,
max(last_visit) as last_time
from zerver_useractivity ua
join zerver_client client on client.id = ua.client_id
join zerver_userprofile up on up.id = ua.user_profile_id
join zerver_realm realm on realm.id = up.realm_id
where
client.name like 'desktop%%'
2017-01-08 19:42:32 +01:00
group by string_id, client.name
having max(last_visit) > now() - interval '2 week'
2017-01-08 19:42:32 +01:00
order by string_id, client.name
"""
)
cols = [
"Realm",
"Client",
"Hits",
"Last time",
]
pages.append(get_page(query, cols, title))
###
title = "Integrations by realm"
query = SQL(
"""
select
2017-01-08 19:42:32 +01:00
realm.string_id,
case
when query like '%%external%%' then split_part(query, '/', 5)
else client.name
end client_name,
sum(count) as hits,
max(last_visit) as last_time
from zerver_useractivity ua
join zerver_client client on client.id = ua.client_id
join zerver_userprofile up on up.id = ua.user_profile_id
join zerver_realm realm on realm.id = up.realm_id
where
(query in ('send_message_backend', '/api/v1/send_message')
and client.name not in ('Android', 'ZulipiOS')
and client.name not like 'test: Zulip%%'
)
or
query like '%%external%%'
2017-01-08 19:42:32 +01:00
group by string_id, client_name
having max(last_visit) > now() - interval '2 week'
2017-01-08 19:42:32 +01:00
order by string_id, client_name
"""
)
cols = [
"Realm",
"Client",
"Hits",
"Last time",
]
pages.append(get_page(query, cols, title))
###
title = "Integrations by client"
query = SQL(
"""
select
case
when query like '%%external%%' then split_part(query, '/', 5)
else client.name
end client_name,
2017-01-08 19:42:32 +01:00
realm.string_id,
sum(count) as hits,
max(last_visit) as last_time
from zerver_useractivity ua
join zerver_client client on client.id = ua.client_id
join zerver_userprofile up on up.id = ua.user_profile_id
join zerver_realm realm on realm.id = up.realm_id
where
(query in ('send_message_backend', '/api/v1/send_message')
and client.name not in ('Android', 'ZulipiOS')
and client.name not like 'test: Zulip%%'
)
or
query like '%%external%%'
2017-01-08 19:42:32 +01:00
group by client_name, string_id
having max(last_visit) > now() - interval '2 week'
2017-01-08 19:42:32 +01:00
order by client_name, string_id
"""
)
cols = [
"Client",
"Realm",
"Hits",
"Last time",
]
pages.append(get_page(query, cols, title))
title = "Remote Zulip servers"
query = SQL(
"""
with icount as (
select
server_id,
max(value) as max_value,
max(end_time) as max_end_time
from zilencer_remoteinstallationcount
where
property='active_users:is_bot:day'
and subgroup='false'
group by server_id
),
remote_push_devices as (
select server_id, count(distinct(user_id)) as push_user_count from zilencer_remotepushdevicetoken
group by server_id
)
select
rserver.id,
rserver.hostname,
rserver.contact_email,
max_value,
push_user_count,
max_end_time
from zilencer_remotezulipserver rserver
left join icount on icount.server_id = rserver.id
left join remote_push_devices on remote_push_devices.server_id = rserver.id
order by max_value DESC NULLS LAST, push_user_count DESC NULLS LAST
"""
)
cols = [
"ID",
"Hostname",
"Contact email",
"Analytics users",
"Mobile users",
"Last update time",
]
pages.append(get_page(query, cols, title, totals_columns=[3, 4]))
return pages
@require_server_admin
@has_request_variables
def get_installation_activity(request: HttpRequest) -> HttpResponse:
duration_content, realm_minutes = user_activity_intervals()
python: Convert assignment type annotations to Python 3.6 style. This commit was split by tabbott; this piece covers the vast majority of files in Zulip, but excludes scripts/, tools/, and puppet/ to help ensure we at least show the right error messages for Xenial systems. We can likely further refine the remaining pieces with some testing. Generated by com2ann, with whitespace fixes and various manual fixes for runtime issues: - invoiced_through: Optional[LicenseLedger] = models.ForeignKey( + invoiced_through: Optional["LicenseLedger"] = models.ForeignKey( -_apns_client: Optional[APNsClient] = None +_apns_client: Optional["APNsClient"] = None - notifications_stream: Optional[Stream] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE) - signup_notifications_stream: Optional[Stream] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE) + notifications_stream: Optional["Stream"] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE) + signup_notifications_stream: Optional["Stream"] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE) - author: Optional[UserProfile] = models.ForeignKey('UserProfile', blank=True, null=True, on_delete=CASCADE) + author: Optional["UserProfile"] = models.ForeignKey('UserProfile', blank=True, null=True, on_delete=CASCADE) - bot_owner: Optional[UserProfile] = models.ForeignKey('self', null=True, on_delete=models.SET_NULL) + bot_owner: Optional["UserProfile"] = models.ForeignKey('self', null=True, on_delete=models.SET_NULL) - default_sending_stream: Optional[Stream] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE) - default_events_register_stream: Optional[Stream] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE) + default_sending_stream: Optional["Stream"] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE) + default_events_register_stream: Optional["Stream"] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE) -descriptors_by_handler_id: Dict[int, ClientDescriptor] = {} +descriptors_by_handler_id: Dict[int, "ClientDescriptor"] = {} -worker_classes: Dict[str, Type[QueueProcessingWorker]] = {} -queues: Dict[str, Dict[str, Type[QueueProcessingWorker]]] = {} +worker_classes: Dict[str, Type["QueueProcessingWorker"]] = {} +queues: Dict[str, Dict[str, Type["QueueProcessingWorker"]]] = {} -AUTH_LDAP_REVERSE_EMAIL_SEARCH: Optional[LDAPSearch] = None +AUTH_LDAP_REVERSE_EMAIL_SEARCH: Optional["LDAPSearch"] = None Signed-off-by: Anders Kaseorg <anders@zulipchat.com>
2020-04-22 01:09:50 +02:00
counts_content: str = realm_summary_table(realm_minutes)
data = [
("Counts", counts_content),
("Durations", duration_content),
]
for page in ad_hoc_queries():
data.append((page["title"], page["content"]))
title = "Activity"
return render(
request,
"analytics/activity.html",
context=dict(data=data, title=title, is_home=True),
)