analytics: Remove unused analytics management commands.

Unlike stream_stats, I'm not aware of any of these having been used in
the last few years, and it's basically just really bad subsets of the
data in /activity, which also doesn't require shell access to use.

These haven't had real work or usage, AFAIK, since 2013.
This commit is contained in:
Tim Abbott 2020-07-24 11:30:18 -07:00 committed by Tim Abbott
parent 49d06b9d69
commit bcab06509a
5 changed files with 0 additions and 414 deletions

View File

@ -1,82 +0,0 @@
import datetime
import logging
import time
from typing import Any, Dict
from django.core.management.base import BaseCommand, CommandParser
from zerver.lib.timestamp import timestamp_to_datetime
from zerver.models import Message, Recipient
def compute_stats(log_level: int) -> None:
logger = logging.getLogger()
logger.setLevel(log_level)
one_week_ago = timestamp_to_datetime(time.time()) - datetime.timedelta(weeks=1)
mit_query = Message.objects.filter(sender__realm__string_id="zephyr",
recipient__type=Recipient.STREAM,
date_sent__gt=one_week_ago)
for bot_sender_start in ["imap.", "rcmd.", "sys."]:
mit_query = mit_query.exclude(sender__email__startswith=(bot_sender_start))
# Filtering for "/" covers tabbott/extra@ and all the daemon/foo bots.
mit_query = mit_query.exclude(sender__email__contains=("/"))
mit_query = mit_query.exclude(sender__email__contains=("aim.com"))
mit_query = mit_query.exclude(
sender__email__in=["rss@mit.edu", "bash@mit.edu", "apache@mit.edu",
"bitcoin@mit.edu", "lp@mit.edu", "clocks@mit.edu",
"root@mit.edu", "nagios@mit.edu",
"www-data|local-realm@mit.edu"])
user_counts: Dict[str, Dict[str, int]] = {}
for m in mit_query.select_related("sending_client", "sender"):
email = m.sender.email
user_counts.setdefault(email, {})
user_counts[email].setdefault(m.sending_client.name, 0)
user_counts[email][m.sending_client.name] += 1
total_counts: Dict[str, int] = {}
total_user_counts: Dict[str, int] = {}
for email, counts in user_counts.items():
total_user_counts.setdefault(email, 0)
for client_name, count in counts.items():
total_counts.setdefault(client_name, 0)
total_counts[client_name] += count
total_user_counts[email] += count
logging.debug("%40s | %10s | %s", "User", "Messages", "Percentage Zulip")
top_percents: Dict[int, float] = {}
for size in [10, 25, 50, 100, 200, len(total_user_counts.keys())]:
top_percents[size] = 0.0
for i, email in enumerate(sorted(total_user_counts.keys(),
key=lambda x: -total_user_counts[x])):
percent_zulip = round(100 - (user_counts[email].get("zephyr_mirror", 0)) * 100. /
total_user_counts[email], 1)
for size in top_percents.keys():
top_percents.setdefault(size, 0)
if i < size:
top_percents[size] += (percent_zulip * 1.0 / size)
logging.debug("%40s | %10s | %s%%", email, total_user_counts[email],
percent_zulip)
logging.info("")
for size in sorted(top_percents.keys()):
logging.info("Top %6s | %s%%", size, round(top_percents[size], 1))
grand_total = sum(total_counts.values())
print(grand_total)
logging.info("%15s | %s", "Client", "Percentage")
for client in total_counts.keys():
logging.info("%15s | %s%%", client, round(100. * total_counts[client] / grand_total, 1))
class Command(BaseCommand):
help = "Compute statistics on MIT Zephyr usage."
def add_arguments(self, parser: CommandParser) -> None:
parser.add_argument('--verbose', default=False, action='store_true')
def handle(self, *args: Any, **options: Any) -> None:
level = logging.INFO
if options["verbose"]:
level = logging.DEBUG
compute_stats(level)

View File

@ -1,56 +0,0 @@
import datetime
from typing import Any, Dict
from django.core.management.base import BaseCommand, CommandParser
from zerver.lib.statistics import seconds_usage_between
from zerver.models import UserProfile
def analyze_activity(options: Dict[str, Any]) -> None:
day_start = datetime.datetime.strptime(options["date"], "%Y-%m-%d").replace(tzinfo=datetime.timezone.utc)
day_end = day_start + datetime.timedelta(days=options["duration"])
user_profile_query = UserProfile.objects.all()
if options["realm"]:
user_profile_query = user_profile_query.filter(realm__string_id=options["realm"])
print("Per-user online duration:\n")
total_duration = datetime.timedelta(0)
for user_profile in user_profile_query:
duration = seconds_usage_between(user_profile, day_start, day_end)
if duration == datetime.timedelta(0):
continue
total_duration += duration
print(f"{user_profile.email:<37}{duration}")
print(f"\nTotal Duration: {total_duration}")
print(f"\nTotal Duration in minutes: {total_duration.total_seconds() / 60.}")
print(f"Total Duration amortized to a month: {total_duration.total_seconds() * 30. / 60.}")
class Command(BaseCommand):
help = """Report analytics of user activity on a per-user and realm basis.
This command aggregates user activity data that is collected by each user using Zulip. It attempts
to approximate how much each user has been using Zulip per day, measured by recording each 15 minute
period where some activity has occurred (mouse move or keyboard activity).
It will correctly not count server-initiated reloads in the activity statistics.
The duration flag can be used to control how many days to show usage duration for
Usage: ./manage.py analyze_user_activity [--realm=zulip] [--date=2013-09-10] [--duration=1]
By default, if no date is selected 2013-09-10 is used. If no realm is provided, information
is shown for all realms"""
def add_arguments(self, parser: CommandParser) -> None:
parser.add_argument('--realm', action='store')
parser.add_argument('--date', action='store', default="2013-09-06")
parser.add_argument('--duration', action='store', default=1, type=int,
help="How many days to show usage information for")
def handle(self, *args: Any, **options: Any) -> None:
analyze_activity(options)

View File

@ -1,74 +0,0 @@
import datetime
from argparse import ArgumentParser
from typing import Any, Optional
from django.db.models import Count, QuerySet
from django.utils.timezone import now as timezone_now
from zerver.lib.management import ZulipBaseCommand
from zerver.models import UserActivity
class Command(ZulipBaseCommand):
help = """Report rough client activity globally, for a realm, or for a user
Usage examples:
./manage.py client_activity --target server
./manage.py client_activity --target realm --realm zulip
./manage.py client_activity --target user --user hamlet@zulip.com --realm zulip"""
def add_arguments(self, parser: ArgumentParser) -> None:
parser.add_argument('--target', dest='target', required=True, type=str,
help="'server' will calculate client activity of the entire server. "
"'realm' will calculate client activity of realm. "
"'user' will calculate client activity of the user.")
parser.add_argument('--user', dest='user', type=str,
help="The email address of the user you want to calculate activity.")
self.add_realm_args(parser)
def compute_activity(self, user_activity_objects: QuerySet) -> None:
# Report data from the past week.
#
# This is a rough report of client activity because we inconsistently
# register activity from various clients; think of it as telling you
# approximately how many people from a group have used a particular
# client recently. For example, this might be useful to get a sense of
# how popular different versions of a desktop client are.
#
# Importantly, this does NOT tell you anything about the relative
# volumes of requests from clients.
threshold = timezone_now() - datetime.timedelta(days=7)
client_counts = user_activity_objects.filter(
last_visit__gt=threshold).values("client__name").annotate(
count=Count('client__name'))
total = 0
counts = []
for client_type in client_counts:
count = client_type["count"]
client = client_type["client__name"]
total += count
counts.append((count, client))
counts.sort()
for count in counts:
print(f"{count[1]:>25} {count[0]:15}")
print("Total:", total)
def handle(self, *args: Any, **options: Optional[str]) -> None:
realm = self.get_realm(options)
if options["user"] is None:
if options["target"] == "server" and realm is None:
# Report global activity.
self.compute_activity(UserActivity.objects.all())
elif options["target"] == "realm" and realm is not None:
self.compute_activity(UserActivity.objects.filter(user_profile__realm=realm))
else:
self.print_help("./manage.py", "client_activity")
elif options["target"] == "user":
user_profile = self.get_user(options["user"], realm)
self.compute_activity(UserActivity.objects.filter(user_profile=user_profile))
else:
self.print_help("./manage.py", "client_activity")

View File

@ -1,160 +0,0 @@
import datetime
from argparse import ArgumentParser
from typing import Any, List
from django.core.management.base import BaseCommand, CommandError
from django.db.models import Count
from django.utils.timezone import now as timezone_now
from zerver.models import (
Message,
Realm,
Recipient,
Stream,
Subscription,
UserActivity,
UserMessage,
UserProfile,
get_realm,
)
MOBILE_CLIENT_LIST = ["Android", "ios"]
HUMAN_CLIENT_LIST = MOBILE_CLIENT_LIST + ["website"]
human_messages = Message.objects.filter(sending_client__name__in=HUMAN_CLIENT_LIST)
class Command(BaseCommand):
help = "Generate statistics on realm activity."
def add_arguments(self, parser: ArgumentParser) -> None:
parser.add_argument('realms', metavar='<realm>', type=str, nargs='*',
help="realm to generate statistics for")
def active_users(self, realm: Realm) -> List[UserProfile]:
# Has been active (on the website, for now) in the last 7 days.
activity_cutoff = timezone_now() - datetime.timedelta(days=7)
return [activity.user_profile for activity in (
UserActivity.objects.filter(user_profile__realm=realm,
user_profile__is_active=True,
last_visit__gt=activity_cutoff,
query="/json/users/me/pointer",
client__name="website"))]
def messages_sent_by(self, user: UserProfile, days_ago: int) -> int:
sent_time_cutoff = timezone_now() - datetime.timedelta(days=days_ago)
return human_messages.filter(sender=user, date_sent__gt=sent_time_cutoff).count()
def total_messages(self, realm: Realm, days_ago: int) -> int:
sent_time_cutoff = timezone_now() - datetime.timedelta(days=days_ago)
return Message.objects.filter(sender__realm=realm, date_sent__gt=sent_time_cutoff).count()
def human_messages(self, realm: Realm, days_ago: int) -> int:
sent_time_cutoff = timezone_now() - datetime.timedelta(days=days_ago)
return human_messages.filter(sender__realm=realm, date_sent__gt=sent_time_cutoff).count()
def api_messages(self, realm: Realm, days_ago: int) -> int:
return (self.total_messages(realm, days_ago) - self.human_messages(realm, days_ago))
def stream_messages(self, realm: Realm, days_ago: int) -> int:
sent_time_cutoff = timezone_now() - datetime.timedelta(days=days_ago)
return human_messages.filter(sender__realm=realm, date_sent__gt=sent_time_cutoff,
recipient__type=Recipient.STREAM).count()
def private_messages(self, realm: Realm, days_ago: int) -> int:
sent_time_cutoff = timezone_now() - datetime.timedelta(days=days_ago)
return human_messages.filter(sender__realm=realm, date_sent__gt=sent_time_cutoff).exclude(
recipient__type=Recipient.STREAM).exclude(recipient__type=Recipient.HUDDLE).count()
def group_private_messages(self, realm: Realm, days_ago: int) -> int:
sent_time_cutoff = timezone_now() - datetime.timedelta(days=days_ago)
return human_messages.filter(sender__realm=realm, date_sent__gt=sent_time_cutoff).exclude(
recipient__type=Recipient.STREAM).exclude(recipient__type=Recipient.PERSONAL).count()
def report_percentage(self, numerator: float, denominator: float, text: str) -> None:
if not denominator:
fraction = 0.0
else:
fraction = numerator / float(denominator)
print(f"{fraction * 100:.2f}% of", text)
def handle(self, *args: Any, **options: Any) -> None:
if options['realms']:
try:
realms = [get_realm(string_id) for string_id in options['realms']]
except Realm.DoesNotExist as e:
raise CommandError(e)
else:
realms = Realm.objects.all()
for realm in realms:
print(realm.string_id)
user_profiles = UserProfile.objects.filter(realm=realm, is_active=True)
active_users = self.active_users(realm)
num_active = len(active_users)
print(f"{num_active} active users ({len(user_profiles)} total)")
streams = Stream.objects.filter(realm=realm).extra(
tables=['zerver_subscription', 'zerver_recipient'],
where=['zerver_subscription.recipient_id = zerver_recipient.id',
'zerver_recipient.type = 2',
'zerver_recipient.type_id = zerver_stream.id',
'zerver_subscription.active = true']).annotate(count=Count("name"))
print(f"{streams.count()} streams")
for days_ago in (1, 7, 30):
print(f"In last {days_ago} days, users sent:")
sender_quantities = [self.messages_sent_by(user, days_ago) for user in user_profiles]
for quantity in sorted(sender_quantities, reverse=True):
print(quantity, end=' ')
print("")
print(f"{self.stream_messages(realm, days_ago)} stream messages")
print(f"{self.private_messages(realm, days_ago)} one-on-one private messages")
print(f"{self.api_messages(realm, days_ago)} messages sent via the API")
print(f"{self.group_private_messages(realm, days_ago)} group private messages")
num_notifications_enabled = len([x for x in active_users if x.enable_desktop_notifications])
self.report_percentage(num_notifications_enabled, num_active,
"active users have desktop notifications enabled")
num_enter_sends = len([x for x in active_users if x.enter_sends])
self.report_percentage(num_enter_sends, num_active,
"active users have enter-sends")
all_message_count = human_messages.filter(sender__realm=realm).count()
multi_paragraph_message_count = human_messages.filter(
sender__realm=realm, content__contains="\n\n").count()
self.report_percentage(multi_paragraph_message_count, all_message_count,
"all messages are multi-paragraph")
# Starred messages
starrers = UserMessage.objects.filter(user_profile__in=user_profiles,
flags=UserMessage.flags.starred).values(
"user_profile").annotate(count=Count("user_profile"))
print("{} users have starred {} messages".format(
len(starrers), sum([elt["count"] for elt in starrers])))
active_user_subs = Subscription.objects.filter(
user_profile__in=user_profiles, active=True)
# Streams not in home view
non_home_view = active_user_subs.filter(is_muted=True).values(
"user_profile").annotate(count=Count("user_profile"))
print("{} users have {} streams not in home view".format(
len(non_home_view), sum([elt["count"] for elt in non_home_view])))
# Code block markup
markup_messages = human_messages.filter(
sender__realm=realm, content__contains="~~~").values(
"sender").annotate(count=Count("sender"))
print("{} users have used code block markup on {} messages".format(
len(markup_messages), sum([elt["count"] for elt in markup_messages])))
# Notifications for stream messages
notifications = active_user_subs.filter(desktop_notifications=True).values(
"user_profile").annotate(count=Count("user_profile"))
print("{} users receive desktop notifications for {} streams".format(
len(notifications), sum([elt["count"] for elt in notifications])))
print("")

View File

@ -1,42 +0,0 @@
import datetime
from argparse import ArgumentParser
from typing import Any
from django.core.management.base import BaseCommand, CommandError
from django.utils.timezone import now as timezone_now
from zerver.models import Message, Realm, Stream, UserProfile, get_realm
class Command(BaseCommand):
help = "Generate statistics on user activity."
def add_arguments(self, parser: ArgumentParser) -> None:
parser.add_argument('realms', metavar='<realm>', type=str, nargs='*',
help="realm to generate statistics for")
def messages_sent_by(self, user: UserProfile, week: int) -> int:
start = timezone_now() - datetime.timedelta(days=(week + 1)*7)
end = timezone_now() - datetime.timedelta(days=week*7)
return Message.objects.filter(sender=user, date_sent__gt=start, date_sent__lte=end).count()
def handle(self, *args: Any, **options: Any) -> None:
if options['realms']:
try:
realms = [get_realm(string_id) for string_id in options['realms']]
except Realm.DoesNotExist as e:
raise CommandError(e)
else:
realms = Realm.objects.all()
for realm in realms:
print(realm.string_id)
user_profiles = UserProfile.objects.filter(realm=realm, is_active=True)
print(f"{len(user_profiles)} users")
print(f"{len(Stream.objects.filter(realm=realm))} streams")
for user_profile in user_profiles:
print(f"{user_profile.email:>35}", end=' ')
for week in range(10):
print(f"{self.messages_sent_by(user_profile, week):5}", end=' ')
print("")