mirror of https://github.com/zulip/zulip.git
analytics: Rewrite realm_active_humans::day query.
This makes it no longer dependent on active_users_audit:is_bot:day, which subsequent commits will make a RealmCount, not UserCount, query. This folds the same behaviour of `active_users_audit` directly into the query; however, only running over active users, using the index from the earlier commit, and using the new `DISTINCT ON` formulation make this a fast query compared to `active_users_audit:is_bot:day` + the old `realm_active_humans::day`.
This commit is contained in:
parent
e638ae44a8
commit
195defb031
|
@ -783,29 +783,45 @@ def count_realm_active_humans_query(realm: Optional[Realm]) -> QueryFn:
|
|||
INSERT INTO analytics_realmcount
|
||||
(realm_id, value, property, subgroup, end_time)
|
||||
SELECT
|
||||
usercount1.realm_id, count(*), %(property)s, NULL, %(time_end)s
|
||||
active_usercount.realm_id, count(*), %(property)s, NULL, %(time_end)s
|
||||
FROM (
|
||||
SELECT realm_id, user_id
|
||||
FROM analytics_usercount
|
||||
SELECT
|
||||
realm_id,
|
||||
user_id
|
||||
FROM
|
||||
analytics_usercount
|
||||
WHERE
|
||||
property = 'active_users_audit:is_bot:day' AND
|
||||
subgroup = 'false' AND
|
||||
property = '15day_actives::day'
|
||||
{realm_clause}
|
||||
end_time = %(time_end)s
|
||||
) usercount1
|
||||
AND end_time = %(time_end)s
|
||||
) active_usercount
|
||||
JOIN zerver_userprofile ON active_usercount.user_id = zerver_userprofile.id
|
||||
JOIN (
|
||||
SELECT realm_id, user_id
|
||||
FROM analytics_usercount
|
||||
SELECT DISTINCT ON (modified_user_id)
|
||||
modified_user_id, event_type
|
||||
FROM
|
||||
zerver_realmauditlog
|
||||
WHERE
|
||||
property = '15day_actives::day' AND
|
||||
{realm_clause}
|
||||
end_time = %(time_end)s
|
||||
) usercount2
|
||||
ON
|
||||
usercount1.user_id = usercount2.user_id
|
||||
GROUP BY usercount1.realm_id
|
||||
event_type IN ({user_created}, {user_activated}, {user_deactivated}, {user_reactivated})
|
||||
AND event_time < %(time_end)s
|
||||
ORDER BY
|
||||
modified_user_id,
|
||||
event_time DESC
|
||||
) last_user_event ON last_user_event.modified_user_id = active_usercount.user_id
|
||||
WHERE
|
||||
NOT zerver_userprofile.is_bot
|
||||
AND event_type IN ({user_created}, {user_activated}, {user_reactivated})
|
||||
GROUP BY
|
||||
active_usercount.realm_id
|
||||
"""
|
||||
).format(**kwargs, realm_clause=realm_clause)
|
||||
).format(
|
||||
**kwargs,
|
||||
user_created=Literal(RealmAuditLog.USER_CREATED),
|
||||
user_activated=Literal(RealmAuditLog.USER_ACTIVATED),
|
||||
user_deactivated=Literal(RealmAuditLog.USER_DEACTIVATED),
|
||||
user_reactivated=Literal(RealmAuditLog.USER_REACTIVATED),
|
||||
realm_clause=realm_clause,
|
||||
)
|
||||
|
||||
|
||||
# Currently unused and untested
|
||||
|
@ -951,7 +967,7 @@ def get_count_stats(realm: Optional[Realm] = None) -> Dict[str, CountStat]:
|
|||
"realm_active_humans::day",
|
||||
sql_data_collector(RealmCount, count_realm_active_humans_query(realm), None),
|
||||
CountStat.DAY,
|
||||
dependencies=["active_users_audit:is_bot:day", "15day_actives::day"],
|
||||
dependencies=["15day_actives::day"],
|
||||
),
|
||||
]
|
||||
|
||||
|
|
|
@ -3,7 +3,6 @@ from datetime import datetime, timedelta, timezone
|
|||
from typing import Any, Dict, Iterator, List, Optional, Tuple, Type
|
||||
from unittest import mock
|
||||
|
||||
import orjson
|
||||
import time_machine
|
||||
from django.apps import apps
|
||||
from django.db import models
|
||||
|
@ -2081,18 +2080,6 @@ class TestRealmActiveHumans(AnalyticsTestCase):
|
|||
self.stat = COUNT_STATS["realm_active_humans::day"]
|
||||
self.current_property = self.stat.property
|
||||
|
||||
def mark_audit_active(self, user: UserProfile, end_time: Optional[datetime] = None) -> None:
|
||||
if end_time is None:
|
||||
end_time = self.TIME_ZERO
|
||||
UserCount.objects.create(
|
||||
user=user,
|
||||
realm=user.realm,
|
||||
property="active_users_audit:is_bot:day",
|
||||
subgroup=orjson.dumps(user.is_bot).decode(),
|
||||
end_time=end_time,
|
||||
value=1,
|
||||
)
|
||||
|
||||
def mark_15day_active(self, user: UserProfile, end_time: Optional[datetime] = None) -> None:
|
||||
if end_time is None:
|
||||
end_time = self.TIME_ZERO
|
||||
|
@ -2100,38 +2087,35 @@ class TestRealmActiveHumans(AnalyticsTestCase):
|
|||
user=user, realm=user.realm, property="15day_actives::day", end_time=end_time, value=1
|
||||
)
|
||||
|
||||
def test_basic_boolean_logic(self) -> None:
|
||||
def test_basic_logic(self) -> None:
|
||||
user = self.create_user()
|
||||
self.mark_audit_active(user, end_time=self.TIME_ZERO - self.DAY)
|
||||
self.mark_15day_active(user, end_time=self.TIME_ZERO)
|
||||
self.mark_audit_active(user, end_time=self.TIME_ZERO + self.DAY)
|
||||
self.mark_15day_active(user, end_time=self.TIME_ZERO + self.DAY)
|
||||
|
||||
for i in [-1, 0, 1]:
|
||||
do_fill_count_stat_at_hour(self.stat, self.TIME_ZERO + i * self.DAY)
|
||||
self.assertTableState(RealmCount, ["value", "end_time"], [[1, self.TIME_ZERO + self.DAY]])
|
||||
self.assertTableState(
|
||||
RealmCount, ["value", "end_time"], [[1, self.TIME_ZERO], [1, self.TIME_ZERO + self.DAY]]
|
||||
)
|
||||
|
||||
def test_bots_not_counted(self) -> None:
|
||||
bot = self.create_user(is_bot=True)
|
||||
self.mark_audit_active(bot)
|
||||
self.mark_15day_active(bot)
|
||||
do_fill_count_stat_at_hour(self.stat, self.TIME_ZERO)
|
||||
self.assertTableState(RealmCount, [], [])
|
||||
|
||||
def test_multiple_users_realms_and_times(self) -> None:
|
||||
user1 = self.create_user()
|
||||
user2 = self.create_user()
|
||||
user1 = self.create_user(date_joined=self.TIME_ZERO - 2 * self.DAY)
|
||||
user2 = self.create_user(date_joined=self.TIME_ZERO - 2 * self.DAY)
|
||||
second_realm = do_create_realm(string_id="second", name="second")
|
||||
user3 = self.create_user(realm=second_realm)
|
||||
user4 = self.create_user(realm=second_realm)
|
||||
user5 = self.create_user(realm=second_realm)
|
||||
user3 = self.create_user(date_joined=self.TIME_ZERO - 2 * self.DAY, realm=second_realm)
|
||||
user4 = self.create_user(date_joined=self.TIME_ZERO - 2 * self.DAY, realm=second_realm)
|
||||
user5 = self.create_user(date_joined=self.TIME_ZERO - 2 * self.DAY, realm=second_realm)
|
||||
|
||||
for user in [user1, user2, user3, user4, user5]:
|
||||
self.mark_audit_active(user)
|
||||
self.mark_15day_active(user)
|
||||
for user in [user1, user3, user4]:
|
||||
self.mark_audit_active(user, end_time=self.TIME_ZERO - self.DAY)
|
||||
self.mark_15day_active(user, end_time=self.TIME_ZERO - self.DAY)
|
||||
for user in [user1, user2, user3, user4, user5]:
|
||||
self.mark_15day_active(user)
|
||||
|
||||
for i in [-1, 0, 1]:
|
||||
do_fill_count_stat_at_hour(self.stat, self.TIME_ZERO + i * self.DAY)
|
||||
|
@ -2139,17 +2123,14 @@ class TestRealmActiveHumans(AnalyticsTestCase):
|
|||
RealmCount,
|
||||
["value", "realm", "end_time"],
|
||||
[
|
||||
[2, self.default_realm, self.TIME_ZERO],
|
||||
[3, second_realm, self.TIME_ZERO],
|
||||
[1, self.default_realm, self.TIME_ZERO - self.DAY],
|
||||
[2, second_realm, self.TIME_ZERO - self.DAY],
|
||||
[2, self.default_realm, self.TIME_ZERO],
|
||||
[3, second_realm, self.TIME_ZERO],
|
||||
],
|
||||
)
|
||||
|
||||
# Check that adding spurious entries doesn't make a difference
|
||||
self.mark_audit_active(user1, end_time=self.TIME_ZERO + self.DAY)
|
||||
self.mark_15day_active(user2, end_time=self.TIME_ZERO + self.DAY)
|
||||
self.mark_15day_active(user2, end_time=self.TIME_ZERO - self.DAY)
|
||||
self.create_user()
|
||||
third_realm = do_create_realm(string_id="third", name="third")
|
||||
self.create_user(realm=third_realm)
|
||||
|
@ -2162,10 +2143,10 @@ class TestRealmActiveHumans(AnalyticsTestCase):
|
|||
RealmCount,
|
||||
["value", "realm", "end_time"],
|
||||
[
|
||||
[2, self.default_realm, self.TIME_ZERO],
|
||||
[3, second_realm, self.TIME_ZERO],
|
||||
[1, self.default_realm, self.TIME_ZERO - self.DAY],
|
||||
[2, second_realm, self.TIME_ZERO - self.DAY],
|
||||
[2, self.default_realm, self.TIME_ZERO],
|
||||
[3, second_realm, self.TIME_ZERO],
|
||||
],
|
||||
)
|
||||
|
||||
|
|
Loading…
Reference in New Issue