analytics: Remove `active_users` and `active_users_log` metrics.

Both of these are inaccurate, not currently used anywhere, and have
been superseded by the `active_users_audit` metric.
This commit is contained in:
Alex Vandiver 2024-06-03 16:29:44 +00:00 committed by Tim Abbott
parent 0100440a86
commit 09e9c75ec6
8 changed files with 33 additions and 180 deletions

View File

@ -678,34 +678,6 @@ def count_message_by_stream_query(realm: Optional[Realm]) -> QueryFn:
).format(**kwargs, realm_clause=realm_clause) ).format(**kwargs, realm_clause=realm_clause)
# Hardcodes the query needed by active_users:is_bot:day, since that is
# currently the only stat that uses this.
def count_user_by_realm_query(realm: Optional[Realm]) -> QueryFn:
if realm is None:
realm_clause: Composable = SQL("")
else:
realm_clause = SQL("zerver_userprofile.realm_id = {} AND").format(Literal(realm.id))
return lambda kwargs: SQL(
"""
INSERT INTO analytics_realmcount
(realm_id, value, property, subgroup, end_time)
SELECT
zerver_realm.id, count(*), %(property)s, {subgroup}, %(time_end)s
FROM zerver_realm
JOIN zerver_userprofile
ON
zerver_realm.id = zerver_userprofile.realm_id
WHERE
zerver_realm.date_created < %(time_end)s AND
zerver_userprofile.date_joined >= %(time_start)s AND
zerver_userprofile.date_joined < %(time_end)s AND
{realm_clause}
zerver_userprofile.is_active = TRUE
GROUP BY zerver_realm.id {group_by_clause}
"""
).format(**kwargs, realm_clause=realm_clause)
# Hardcodes the query needed for active_users_audit:is_bot:day. # Hardcodes the query needed for active_users_audit:is_bot:day.
# Assumes that a user cannot have two RealmAuditLog entries with the # Assumes that a user cannot have two RealmAuditLog entries with the
# same event_time and event_type in [RealmAuditLog.USER_CREATED, # same event_time and event_type in [RealmAuditLog.USER_CREATED,
@ -878,10 +850,7 @@ def get_count_stats(realm: Optional[Realm] = None) -> Dict[str, CountStat]:
), ),
CountStat.DAY, CountStat.DAY,
), ),
# Number of users stats # Counts the number of active users in the UserProfile.is_active sense.
# Stats that count the number of active users in the UserProfile.is_active sense.
# 'active_users_audit:is_bot:day' is the canonical record of which users were
# active on which days (in the UserProfile.is_active sense).
# Important that this stay a daily stat, so that 'realm_active_humans::day' works as expected. # Important that this stay a daily stat, so that 'realm_active_humans::day' works as expected.
CountStat( CountStat(
"active_users_audit:is_bot:day", "active_users_audit:is_bot:day",
@ -890,29 +859,6 @@ def get_count_stats(realm: Optional[Realm] = None) -> Dict[str, CountStat]:
), ),
CountStat.DAY, CountStat.DAY,
), ),
# Important note: LoggingCountStat objects aren't passed the
# Realm argument, because by nature they have a logging
# structure, not a pull-from-database structure, so there's no
# way to compute them for a single realm after the fact (the
# use case for passing a Realm argument).
# Sanity check on 'active_users_audit:is_bot:day', and a archetype for future LoggingCountStats.
# In RealmCount, 'active_users_audit:is_bot:day' should be the partial
# sum sequence of 'active_users_log:is_bot:day', for any realm that
# started after the latter stat was introduced.
# Included in LOGGING_COUNT_STAT_PROPERTIES_NOT_SENT_TO_BOUNCER.
LoggingCountStat("active_users_log:is_bot:day", RealmCount, CountStat.DAY),
# Another sanity check on 'active_users_audit:is_bot:day'. Is only an
# approximation, e.g. if a user is deactivated between the end of the
# day and when this stat is run, they won't be counted. However, is the
# simplest of the three to inspect by hand.
CountStat(
"active_users:is_bot:day",
sql_data_collector(
RealmCount, count_user_by_realm_query(realm), (UserProfile, "is_bot")
),
CountStat.DAY,
interval=TIMEDELTA_MAX,
),
CountStat( CountStat(
"upload_quota_used_bytes::day", "upload_quota_used_bytes::day",
sql_data_collector(RealmCount, count_upload_space_used_by_realm_query(realm), None), sql_data_collector(RealmCount, count_upload_space_used_by_realm_query(realm), None),

View File

@ -0,0 +1,26 @@
from django.db import migrations
REMOVED_COUNTS = (
"active_users_log:is_bot:day",
"active_users:is_bot:day",
)
class Migration(migrations.Migration):
elidable = True
dependencies = [
("analytics", "0018_remove_usercount_active_users_audit"),
]
operations = [
migrations.RunSQL(
[
("DELETE FROM analytics_realmcount WHERE property IN %s", (REMOVED_COUNTS,)),
(
"DELETE FROM analytics_installationcount WHERE property IN %s",
(REMOVED_COUNTS,),
),
]
)
]

View File

@ -543,34 +543,6 @@ class TestCountStats(AnalyticsTestCase):
# This huddle should not show up anywhere # This huddle should not show up anywhere
self.create_huddle_with_recipient() self.create_huddle_with_recipient()
def test_active_users_by_is_bot(self) -> None:
stat = COUNT_STATS["active_users:is_bot:day"]
self.current_property = stat.property
# To be included
self.create_user(is_bot=True)
self.create_user(is_bot=True, date_joined=self.TIME_ZERO - 25 * self.HOUR)
self.create_user(is_bot=False)
# To be excluded
self.create_user(is_active=False)
do_fill_count_stat_at_hour(stat, self.TIME_ZERO)
self.assertTableState(
RealmCount,
["value", "subgroup", "realm"],
[
[2, "true"],
[1, "false"],
[3, "false", self.second_realm],
[1, "false", self.no_message_realm],
],
)
self.assertTableState(InstallationCount, ["value", "subgroup"], [[2, "true"], [5, "false"]])
self.assertTableState(UserCount, [], [])
self.assertTableState(StreamCount, [], [])
def test_upload_quota_used_bytes(self) -> None: def test_upload_quota_used_bytes(self) -> None:
stat = COUNT_STATS["upload_quota_used_bytes::day"] stat = COUNT_STATS["upload_quota_used_bytes::day"]
self.current_property = stat.property self.current_property = stat.property
@ -606,31 +578,6 @@ class TestCountStats(AnalyticsTestCase):
], ],
) )
def test_active_users_by_is_bot_for_realm_constraint(self) -> None:
# For single Realm
COUNT_STATS = get_count_stats(self.default_realm)
stat = COUNT_STATS["active_users:is_bot:day"]
self.current_property = stat.property
# To be included
self.create_user(is_bot=True, date_joined=self.TIME_ZERO - 25 * self.HOUR)
self.create_user(is_bot=False)
# To be excluded
self.create_user(
email="test@second.analytics",
realm=self.second_realm,
date_joined=self.TIME_ZERO - 2 * self.DAY,
)
do_fill_count_stat_at_hour(stat, self.TIME_ZERO, self.default_realm)
self.assertTableState(RealmCount, ["value", "subgroup"], [[1, "true"], [1, "false"]])
# No aggregation to InstallationCount with realm constraint
self.assertTableState(InstallationCount, ["value", "subgroup"], [])
self.assertTableState(UserCount, [], [])
self.assertTableState(StreamCount, [], [])
def test_messages_sent_by_is_bot(self) -> None: def test_messages_sent_by_is_bot(self) -> None:
stat = COUNT_STATS["messages_sent:is_bot:hour"] stat = COUNT_STATS["messages_sent:is_bot:hour"]
self.current_property = stat.property self.current_property = stat.property
@ -1419,46 +1366,6 @@ class TestLoggingCountStats(AnalyticsTestCase):
self.assertTableState(UserCount, ["property", "value"], [["user test", 1]]) self.assertTableState(UserCount, ["property", "value"], [["user test", 1]])
self.assertTableState(StreamCount, ["property", "value"], [["stream test", 1]]) self.assertTableState(StreamCount, ["property", "value"], [["stream test", 1]])
def test_active_users_log_by_is_bot(self) -> None:
property = "active_users_log:is_bot:day"
user = do_create_user(
"email", "password", self.default_realm, "full_name", acting_user=None
)
self.assertEqual(
1,
RealmCount.objects.filter(property=property, subgroup=False).aggregate(Sum("value"))[
"value__sum"
],
)
do_deactivate_user(user, acting_user=None)
self.assertEqual(
0,
RealmCount.objects.filter(property=property, subgroup=False).aggregate(Sum("value"))[
"value__sum"
],
)
do_activate_mirror_dummy_user(user, acting_user=None)
self.assertEqual(
1,
RealmCount.objects.filter(property=property, subgroup=False).aggregate(Sum("value"))[
"value__sum"
],
)
do_deactivate_user(user, acting_user=None)
self.assertEqual(
0,
RealmCount.objects.filter(property=property, subgroup=False).aggregate(Sum("value"))[
"value__sum"
],
)
do_reactivate_user(user, acting_user=None)
self.assertEqual(
1,
RealmCount.objects.filter(property=property, subgroup=False).aggregate(Sum("value"))[
"value__sum"
],
)
@override_settings(PUSH_NOTIFICATION_BOUNCER_URL="https://push.zulip.org.example.com") @override_settings(PUSH_NOTIFICATION_BOUNCER_URL="https://push.zulip.org.example.com")
def test_mobile_pushes_received_count(self) -> None: def test_mobile_pushes_received_count(self) -> None:
self.server_uuid = "6cde5f7a-1f7e-4978-9716-49f69ebfc9fe" self.server_uuid = "6cde5f7a-1f7e-4978-9716-49f69ebfc9fe"

View File

@ -35,9 +35,10 @@ The Zulip analytics system is built around collecting time series data in a
set of database tables. Each of these tables has the following fields: set of database tables. Each of these tables has the following fields:
- property: A human readable string uniquely identifying a `CountStat` - property: A human readable string uniquely identifying a `CountStat`
object. Example: `"active_users:is_bot:hour"` or `"messages_sent:client:day"`. object. Example: `"active_users_audit:is_bot:hour"` or
`"messages_sent:client:day"`.
- subgroup: Almost all `CountStat` objects are further sliced by subgroup. For - subgroup: Almost all `CountStat` objects are further sliced by subgroup. For
`"active_users:is_bot:day"`, this column will be `False` for measurements of `"active_users_audit:is_bot:day"`, this column will be `False` for measurements of
humans, and `True` for measurements of bots. For `"messages_sent:client:day"`, humans, and `True` for measurements of bots. For `"messages_sent:client:day"`,
this column is the client_id of the client under consideration. this column is the client_id of the client under consideration.
- end_time: A datetime indicating the end of a time interval. It will be on - end_time: A datetime indicating the end of a time interval. It will be on
@ -45,7 +46,7 @@ set of database tables. Each of these tables has the following fields:
frequency. The time interval is determined by the `CountStat`. frequency. The time interval is determined by the `CountStat`.
- various "id" fields: Foreign keys into `Realm`, `UserProfile`, `Stream`, or - various "id" fields: Foreign keys into `Realm`, `UserProfile`, `Stream`, or
nothing. E.g. the `RealmCount` table has a foreign key into `Realm`. nothing. E.g. the `RealmCount` table has a foreign key into `Realm`.
- value: The integer counts. For `"active_users:is_bot:hour"` in the - value: The integer counts. For `"active_users_audit:is_bot:hour"` in the
`RealmCount` table, this is the number of active humans or bots (depending `RealmCount` table, this is the number of active humans or bots (depending
on subgroup) in a particular realm at a particular `end_time`. For on subgroup) in a particular realm at a particular `end_time`. For
`"messages_sent:client:day"` in the `UserCount` table, this is the number of `"messages_sent:client:day"` in the `UserCount` table, this is the number of

View File

@ -8,7 +8,6 @@ from django.utils.timezone import now as timezone_now
from django.utils.translation import gettext as _ from django.utils.translation import gettext as _
from django.utils.translation import override as override_language from django.utils.translation import override as override_language
from analytics.lib.counts import COUNT_STATS, do_increment_logging_stat
from confirmation import settings as confirmation_settings from confirmation import settings as confirmation_settings
from zerver.actions.message_send import ( from zerver.actions.message_send import (
internal_send_huddle_message, internal_send_huddle_message,
@ -514,12 +513,6 @@ def do_create_user(
realm_creation_audit_log.acting_user = user_profile realm_creation_audit_log.acting_user = user_profile
realm_creation_audit_log.save(update_fields=["acting_user"]) realm_creation_audit_log.save(update_fields=["acting_user"])
do_increment_logging_stat(
user_profile.realm,
COUNT_STATS["active_users_log:is_bot:day"],
user_profile.is_bot,
event_time,
)
if settings.BILLING_ENABLED: if settings.BILLING_ENABLED:
billing_session = RealmBillingSession(user=user_profile, realm=user_profile.realm) billing_session = RealmBillingSession(user=user_profile, realm=user_profile.realm)
billing_session.update_license_ledger_if_needed(event_time) billing_session.update_license_ledger_if_needed(event_time)
@ -623,12 +616,6 @@ def do_activate_mirror_dummy_user(
}, },
) )
maybe_enqueue_audit_log_upload(user_profile.realm) maybe_enqueue_audit_log_upload(user_profile.realm)
do_increment_logging_stat(
user_profile.realm,
COUNT_STATS["active_users_log:is_bot:day"],
user_profile.is_bot,
event_time,
)
if settings.BILLING_ENABLED: if settings.BILLING_ENABLED:
billing_session = RealmBillingSession(user=user_profile, realm=user_profile.realm) billing_session = RealmBillingSession(user=user_profile, realm=user_profile.realm)
billing_session.update_license_ledger_if_needed(event_time) billing_session.update_license_ledger_if_needed(event_time)
@ -677,12 +664,6 @@ def do_reactivate_user(user_profile: UserProfile, *, acting_user: Optional[UserP
) )
bot_owner_changed = True bot_owner_changed = True
do_increment_logging_stat(
user_profile.realm,
COUNT_STATS["active_users_log:is_bot:day"],
user_profile.is_bot,
event_time,
)
if settings.BILLING_ENABLED: if settings.BILLING_ENABLED:
billing_session = RealmBillingSession(user=user_profile, realm=user_profile.realm) billing_session = RealmBillingSession(user=user_profile, realm=user_profile.realm)
billing_session.update_license_ledger_if_needed(event_time) billing_session.update_license_ledger_if_needed(event_time)

View File

@ -7,7 +7,6 @@ from django.conf import settings
from django.db import transaction from django.db import transaction
from django.utils.timezone import now as timezone_now from django.utils.timezone import now as timezone_now
from analytics.lib.counts import COUNT_STATS, do_increment_logging_stat
from zerver.actions.user_groups import ( from zerver.actions.user_groups import (
do_send_user_group_members_update_event, do_send_user_group_members_update_event,
update_users_in_full_members_system_group, update_users_in_full_members_system_group,
@ -360,13 +359,6 @@ def do_deactivate_user(
}, },
) )
maybe_enqueue_audit_log_upload(user_profile.realm) maybe_enqueue_audit_log_upload(user_profile.realm)
do_increment_logging_stat(
user_profile.realm,
COUNT_STATS["active_users_log:is_bot:day"],
user_profile.is_bot,
event_time,
increment=-1,
)
if settings.BILLING_ENABLED: if settings.BILLING_ENABLED:
billing_session = RealmBillingSession(user=user_profile, realm=user_profile.realm) billing_session = RealmBillingSession(user=user_profile, realm=user_profile.realm)
billing_session.update_license_ledger_if_needed(event_time) billing_session.update_license_ledger_if_needed(event_time)

View File

@ -936,7 +936,7 @@ class LoginTest(ZulipTestCase):
# seem to be any O(N) behavior. Some of the cache hits are related # seem to be any O(N) behavior. Some of the cache hits are related
# to sending messages, such as getting the welcome bot, looking up # to sending messages, such as getting the welcome bot, looking up
# the alert words for a realm, etc. # the alert words for a realm, etc.
with self.assert_database_query_count(91), self.assert_memcached_count(14): with self.assert_database_query_count(90), self.assert_memcached_count(14):
with self.captureOnCommitCallbacks(execute=True): with self.captureOnCommitCallbacks(execute=True):
self.register(self.nonreg_email("test"), "test") self.register(self.nonreg_email("test"), "test")

View File

@ -908,7 +908,7 @@ class QueryCountTest(ZulipTestCase):
prereg_user = PreregistrationUser.objects.get(email="fred@zulip.com") prereg_user = PreregistrationUser.objects.get(email="fred@zulip.com")
with self.assert_database_query_count(81): with self.assert_database_query_count(80):
with self.assert_memcached_count(19): with self.assert_memcached_count(19):
with self.capture_send_event_calls(expected_num_events=10) as events: with self.capture_send_event_calls(expected_num_events=10) as events:
fred = do_create_user( fred = do_create_user(