diff --git a/analytics/lib/counts.py b/analytics/lib/counts.py index b209a0ff34..a872446f9e 100644 --- a/analytics/lib/counts.py +++ b/analytics/lib/counts.py @@ -678,34 +678,6 @@ def count_message_by_stream_query(realm: Optional[Realm]) -> QueryFn: ).format(**kwargs, realm_clause=realm_clause) -# Hardcodes the query needed by active_users:is_bot:day, since that is -# currently the only stat that uses this. -def count_user_by_realm_query(realm: Optional[Realm]) -> QueryFn: - if realm is None: - realm_clause: Composable = SQL("") - else: - realm_clause = SQL("zerver_userprofile.realm_id = {} AND").format(Literal(realm.id)) - return lambda kwargs: SQL( - """ - INSERT INTO analytics_realmcount - (realm_id, value, property, subgroup, end_time) - SELECT - zerver_realm.id, count(*), %(property)s, {subgroup}, %(time_end)s - FROM zerver_realm - JOIN zerver_userprofile - ON - zerver_realm.id = zerver_userprofile.realm_id - WHERE - zerver_realm.date_created < %(time_end)s AND - zerver_userprofile.date_joined >= %(time_start)s AND - zerver_userprofile.date_joined < %(time_end)s AND - {realm_clause} - zerver_userprofile.is_active = TRUE - GROUP BY zerver_realm.id {group_by_clause} -""" - ).format(**kwargs, realm_clause=realm_clause) - - # Hardcodes the query needed for active_users_audit:is_bot:day. # Assumes that a user cannot have two RealmAuditLog entries with the # same event_time and event_type in [RealmAuditLog.USER_CREATED, @@ -878,10 +850,7 @@ def get_count_stats(realm: Optional[Realm] = None) -> Dict[str, CountStat]: ), CountStat.DAY, ), - # Number of users stats - # Stats that count the number of active users in the UserProfile.is_active sense. - # 'active_users_audit:is_bot:day' is the canonical record of which users were - # active on which days (in the UserProfile.is_active sense). + # Counts the number of active users in the UserProfile.is_active sense. # Important that this stay a daily stat, so that 'realm_active_humans::day' works as expected. CountStat( "active_users_audit:is_bot:day", @@ -890,29 +859,6 @@ def get_count_stats(realm: Optional[Realm] = None) -> Dict[str, CountStat]: ), CountStat.DAY, ), - # Important note: LoggingCountStat objects aren't passed the - # Realm argument, because by nature they have a logging - # structure, not a pull-from-database structure, so there's no - # way to compute them for a single realm after the fact (the - # use case for passing a Realm argument). - # Sanity check on 'active_users_audit:is_bot:day', and a archetype for future LoggingCountStats. - # In RealmCount, 'active_users_audit:is_bot:day' should be the partial - # sum sequence of 'active_users_log:is_bot:day', for any realm that - # started after the latter stat was introduced. - # Included in LOGGING_COUNT_STAT_PROPERTIES_NOT_SENT_TO_BOUNCER. - LoggingCountStat("active_users_log:is_bot:day", RealmCount, CountStat.DAY), - # Another sanity check on 'active_users_audit:is_bot:day'. Is only an - # approximation, e.g. if a user is deactivated between the end of the - # day and when this stat is run, they won't be counted. However, is the - # simplest of the three to inspect by hand. - CountStat( - "active_users:is_bot:day", - sql_data_collector( - RealmCount, count_user_by_realm_query(realm), (UserProfile, "is_bot") - ), - CountStat.DAY, - interval=TIMEDELTA_MAX, - ), CountStat( "upload_quota_used_bytes::day", sql_data_collector(RealmCount, count_upload_space_used_by_realm_query(realm), None), diff --git a/analytics/migrations/0019_remove_unused_counts.py b/analytics/migrations/0019_remove_unused_counts.py new file mode 100644 index 0000000000..56d3d8a7ec --- /dev/null +++ b/analytics/migrations/0019_remove_unused_counts.py @@ -0,0 +1,26 @@ +from django.db import migrations + +REMOVED_COUNTS = ( + "active_users_log:is_bot:day", + "active_users:is_bot:day", +) + + +class Migration(migrations.Migration): + elidable = True + + dependencies = [ + ("analytics", "0018_remove_usercount_active_users_audit"), + ] + + operations = [ + migrations.RunSQL( + [ + ("DELETE FROM analytics_realmcount WHERE property IN %s", (REMOVED_COUNTS,)), + ( + "DELETE FROM analytics_installationcount WHERE property IN %s", + (REMOVED_COUNTS,), + ), + ] + ) + ] diff --git a/analytics/tests/test_counts.py b/analytics/tests/test_counts.py index c1ceafaec3..ec0ab80cf1 100644 --- a/analytics/tests/test_counts.py +++ b/analytics/tests/test_counts.py @@ -543,34 +543,6 @@ class TestCountStats(AnalyticsTestCase): # This huddle should not show up anywhere self.create_huddle_with_recipient() - def test_active_users_by_is_bot(self) -> None: - stat = COUNT_STATS["active_users:is_bot:day"] - self.current_property = stat.property - - # To be included - self.create_user(is_bot=True) - self.create_user(is_bot=True, date_joined=self.TIME_ZERO - 25 * self.HOUR) - self.create_user(is_bot=False) - - # To be excluded - self.create_user(is_active=False) - - do_fill_count_stat_at_hour(stat, self.TIME_ZERO) - - self.assertTableState( - RealmCount, - ["value", "subgroup", "realm"], - [ - [2, "true"], - [1, "false"], - [3, "false", self.second_realm], - [1, "false", self.no_message_realm], - ], - ) - self.assertTableState(InstallationCount, ["value", "subgroup"], [[2, "true"], [5, "false"]]) - self.assertTableState(UserCount, [], []) - self.assertTableState(StreamCount, [], []) - def test_upload_quota_used_bytes(self) -> None: stat = COUNT_STATS["upload_quota_used_bytes::day"] self.current_property = stat.property @@ -606,31 +578,6 @@ class TestCountStats(AnalyticsTestCase): ], ) - def test_active_users_by_is_bot_for_realm_constraint(self) -> None: - # For single Realm - - COUNT_STATS = get_count_stats(self.default_realm) - stat = COUNT_STATS["active_users:is_bot:day"] - self.current_property = stat.property - - # To be included - self.create_user(is_bot=True, date_joined=self.TIME_ZERO - 25 * self.HOUR) - self.create_user(is_bot=False) - - # To be excluded - self.create_user( - email="test@second.analytics", - realm=self.second_realm, - date_joined=self.TIME_ZERO - 2 * self.DAY, - ) - - do_fill_count_stat_at_hour(stat, self.TIME_ZERO, self.default_realm) - self.assertTableState(RealmCount, ["value", "subgroup"], [[1, "true"], [1, "false"]]) - # No aggregation to InstallationCount with realm constraint - self.assertTableState(InstallationCount, ["value", "subgroup"], []) - self.assertTableState(UserCount, [], []) - self.assertTableState(StreamCount, [], []) - def test_messages_sent_by_is_bot(self) -> None: stat = COUNT_STATS["messages_sent:is_bot:hour"] self.current_property = stat.property @@ -1419,46 +1366,6 @@ class TestLoggingCountStats(AnalyticsTestCase): self.assertTableState(UserCount, ["property", "value"], [["user test", 1]]) self.assertTableState(StreamCount, ["property", "value"], [["stream test", 1]]) - def test_active_users_log_by_is_bot(self) -> None: - property = "active_users_log:is_bot:day" - user = do_create_user( - "email", "password", self.default_realm, "full_name", acting_user=None - ) - self.assertEqual( - 1, - RealmCount.objects.filter(property=property, subgroup=False).aggregate(Sum("value"))[ - "value__sum" - ], - ) - do_deactivate_user(user, acting_user=None) - self.assertEqual( - 0, - RealmCount.objects.filter(property=property, subgroup=False).aggregate(Sum("value"))[ - "value__sum" - ], - ) - do_activate_mirror_dummy_user(user, acting_user=None) - self.assertEqual( - 1, - RealmCount.objects.filter(property=property, subgroup=False).aggregate(Sum("value"))[ - "value__sum" - ], - ) - do_deactivate_user(user, acting_user=None) - self.assertEqual( - 0, - RealmCount.objects.filter(property=property, subgroup=False).aggregate(Sum("value"))[ - "value__sum" - ], - ) - do_reactivate_user(user, acting_user=None) - self.assertEqual( - 1, - RealmCount.objects.filter(property=property, subgroup=False).aggregate(Sum("value"))[ - "value__sum" - ], - ) - @override_settings(PUSH_NOTIFICATION_BOUNCER_URL="https://push.zulip.org.example.com") def test_mobile_pushes_received_count(self) -> None: self.server_uuid = "6cde5f7a-1f7e-4978-9716-49f69ebfc9fe" diff --git a/docs/subsystems/analytics.md b/docs/subsystems/analytics.md index 4ae08df098..175cefe2b8 100644 --- a/docs/subsystems/analytics.md +++ b/docs/subsystems/analytics.md @@ -35,9 +35,10 @@ The Zulip analytics system is built around collecting time series data in a set of database tables. Each of these tables has the following fields: - property: A human readable string uniquely identifying a `CountStat` - object. Example: `"active_users:is_bot:hour"` or `"messages_sent:client:day"`. + object. Example: `"active_users_audit:is_bot:hour"` or + `"messages_sent:client:day"`. - subgroup: Almost all `CountStat` objects are further sliced by subgroup. For - `"active_users:is_bot:day"`, this column will be `False` for measurements of + `"active_users_audit:is_bot:day"`, this column will be `False` for measurements of humans, and `True` for measurements of bots. For `"messages_sent:client:day"`, this column is the client_id of the client under consideration. - end_time: A datetime indicating the end of a time interval. It will be on @@ -45,7 +46,7 @@ set of database tables. Each of these tables has the following fields: frequency. The time interval is determined by the `CountStat`. - various "id" fields: Foreign keys into `Realm`, `UserProfile`, `Stream`, or nothing. E.g. the `RealmCount` table has a foreign key into `Realm`. -- value: The integer counts. For `"active_users:is_bot:hour"` in the +- value: The integer counts. For `"active_users_audit:is_bot:hour"` in the `RealmCount` table, this is the number of active humans or bots (depending on subgroup) in a particular realm at a particular `end_time`. For `"messages_sent:client:day"` in the `UserCount` table, this is the number of diff --git a/zerver/actions/create_user.py b/zerver/actions/create_user.py index 41c7a6db66..0006098a65 100644 --- a/zerver/actions/create_user.py +++ b/zerver/actions/create_user.py @@ -8,7 +8,6 @@ from django.utils.timezone import now as timezone_now from django.utils.translation import gettext as _ from django.utils.translation import override as override_language -from analytics.lib.counts import COUNT_STATS, do_increment_logging_stat from confirmation import settings as confirmation_settings from zerver.actions.message_send import ( internal_send_huddle_message, @@ -514,12 +513,6 @@ def do_create_user( realm_creation_audit_log.acting_user = user_profile realm_creation_audit_log.save(update_fields=["acting_user"]) - do_increment_logging_stat( - user_profile.realm, - COUNT_STATS["active_users_log:is_bot:day"], - user_profile.is_bot, - event_time, - ) if settings.BILLING_ENABLED: billing_session = RealmBillingSession(user=user_profile, realm=user_profile.realm) billing_session.update_license_ledger_if_needed(event_time) @@ -623,12 +616,6 @@ def do_activate_mirror_dummy_user( }, ) maybe_enqueue_audit_log_upload(user_profile.realm) - do_increment_logging_stat( - user_profile.realm, - COUNT_STATS["active_users_log:is_bot:day"], - user_profile.is_bot, - event_time, - ) if settings.BILLING_ENABLED: billing_session = RealmBillingSession(user=user_profile, realm=user_profile.realm) billing_session.update_license_ledger_if_needed(event_time) @@ -677,12 +664,6 @@ def do_reactivate_user(user_profile: UserProfile, *, acting_user: Optional[UserP ) bot_owner_changed = True - do_increment_logging_stat( - user_profile.realm, - COUNT_STATS["active_users_log:is_bot:day"], - user_profile.is_bot, - event_time, - ) if settings.BILLING_ENABLED: billing_session = RealmBillingSession(user=user_profile, realm=user_profile.realm) billing_session.update_license_ledger_if_needed(event_time) diff --git a/zerver/actions/users.py b/zerver/actions/users.py index 3103529d35..a5c3197fbc 100644 --- a/zerver/actions/users.py +++ b/zerver/actions/users.py @@ -7,7 +7,6 @@ from django.conf import settings from django.db import transaction from django.utils.timezone import now as timezone_now -from analytics.lib.counts import COUNT_STATS, do_increment_logging_stat from zerver.actions.user_groups import ( do_send_user_group_members_update_event, update_users_in_full_members_system_group, @@ -360,13 +359,6 @@ def do_deactivate_user( }, ) maybe_enqueue_audit_log_upload(user_profile.realm) - do_increment_logging_stat( - user_profile.realm, - COUNT_STATS["active_users_log:is_bot:day"], - user_profile.is_bot, - event_time, - increment=-1, - ) if settings.BILLING_ENABLED: billing_session = RealmBillingSession(user=user_profile, realm=user_profile.realm) billing_session.update_license_ledger_if_needed(event_time) diff --git a/zerver/tests/test_signup.py b/zerver/tests/test_signup.py index 95c0571765..990133e646 100644 --- a/zerver/tests/test_signup.py +++ b/zerver/tests/test_signup.py @@ -936,7 +936,7 @@ class LoginTest(ZulipTestCase): # seem to be any O(N) behavior. Some of the cache hits are related # to sending messages, such as getting the welcome bot, looking up # the alert words for a realm, etc. - with self.assert_database_query_count(91), self.assert_memcached_count(14): + with self.assert_database_query_count(90), self.assert_memcached_count(14): with self.captureOnCommitCallbacks(execute=True): self.register(self.nonreg_email("test"), "test") diff --git a/zerver/tests/test_users.py b/zerver/tests/test_users.py index fefecc3f67..f0dc6c516c 100644 --- a/zerver/tests/test_users.py +++ b/zerver/tests/test_users.py @@ -908,7 +908,7 @@ class QueryCountTest(ZulipTestCase): prereg_user = PreregistrationUser.objects.get(email="fred@zulip.com") - with self.assert_database_query_count(81): + with self.assert_database_query_count(80): with self.assert_memcached_count(19): with self.capture_send_event_calls(expected_num_events=10) as events: fred = do_create_user(