mirror of https://github.com/zulip/zulip.git
analytics: Rewrite realm_active_humans::day query.
This makes it no longer dependent on active_users_audit:is_bot:day, which subsequent commits will make a RealmCount, not UserCount, query. This folds the same behaviour of `active_users_audit` directly into the query; however, only running over active users, using the index from the earlier commit, and using the new `DISTINCT ON` formulation make this a fast query compared to `active_users_audit:is_bot:day` + the old `realm_active_humans::day`.
This commit is contained in:
parent
e638ae44a8
commit
195defb031
|
@ -783,29 +783,45 @@ def count_realm_active_humans_query(realm: Optional[Realm]) -> QueryFn:
|
||||||
INSERT INTO analytics_realmcount
|
INSERT INTO analytics_realmcount
|
||||||
(realm_id, value, property, subgroup, end_time)
|
(realm_id, value, property, subgroup, end_time)
|
||||||
SELECT
|
SELECT
|
||||||
usercount1.realm_id, count(*), %(property)s, NULL, %(time_end)s
|
active_usercount.realm_id, count(*), %(property)s, NULL, %(time_end)s
|
||||||
FROM (
|
FROM (
|
||||||
SELECT realm_id, user_id
|
SELECT
|
||||||
FROM analytics_usercount
|
realm_id,
|
||||||
WHERE
|
user_id
|
||||||
property = 'active_users_audit:is_bot:day' AND
|
FROM
|
||||||
subgroup = 'false' AND
|
analytics_usercount
|
||||||
{realm_clause}
|
WHERE
|
||||||
end_time = %(time_end)s
|
property = '15day_actives::day'
|
||||||
) usercount1
|
{realm_clause}
|
||||||
|
AND end_time = %(time_end)s
|
||||||
|
) active_usercount
|
||||||
|
JOIN zerver_userprofile ON active_usercount.user_id = zerver_userprofile.id
|
||||||
JOIN (
|
JOIN (
|
||||||
SELECT realm_id, user_id
|
SELECT DISTINCT ON (modified_user_id)
|
||||||
FROM analytics_usercount
|
modified_user_id, event_type
|
||||||
WHERE
|
FROM
|
||||||
property = '15day_actives::day' AND
|
zerver_realmauditlog
|
||||||
{realm_clause}
|
WHERE
|
||||||
end_time = %(time_end)s
|
event_type IN ({user_created}, {user_activated}, {user_deactivated}, {user_reactivated})
|
||||||
) usercount2
|
AND event_time < %(time_end)s
|
||||||
ON
|
ORDER BY
|
||||||
usercount1.user_id = usercount2.user_id
|
modified_user_id,
|
||||||
GROUP BY usercount1.realm_id
|
event_time DESC
|
||||||
|
) last_user_event ON last_user_event.modified_user_id = active_usercount.user_id
|
||||||
|
WHERE
|
||||||
|
NOT zerver_userprofile.is_bot
|
||||||
|
AND event_type IN ({user_created}, {user_activated}, {user_reactivated})
|
||||||
|
GROUP BY
|
||||||
|
active_usercount.realm_id
|
||||||
"""
|
"""
|
||||||
).format(**kwargs, realm_clause=realm_clause)
|
).format(
|
||||||
|
**kwargs,
|
||||||
|
user_created=Literal(RealmAuditLog.USER_CREATED),
|
||||||
|
user_activated=Literal(RealmAuditLog.USER_ACTIVATED),
|
||||||
|
user_deactivated=Literal(RealmAuditLog.USER_DEACTIVATED),
|
||||||
|
user_reactivated=Literal(RealmAuditLog.USER_REACTIVATED),
|
||||||
|
realm_clause=realm_clause,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
# Currently unused and untested
|
# Currently unused and untested
|
||||||
|
@ -951,7 +967,7 @@ def get_count_stats(realm: Optional[Realm] = None) -> Dict[str, CountStat]:
|
||||||
"realm_active_humans::day",
|
"realm_active_humans::day",
|
||||||
sql_data_collector(RealmCount, count_realm_active_humans_query(realm), None),
|
sql_data_collector(RealmCount, count_realm_active_humans_query(realm), None),
|
||||||
CountStat.DAY,
|
CountStat.DAY,
|
||||||
dependencies=["active_users_audit:is_bot:day", "15day_actives::day"],
|
dependencies=["15day_actives::day"],
|
||||||
),
|
),
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
|
@ -3,7 +3,6 @@ from datetime import datetime, timedelta, timezone
|
||||||
from typing import Any, Dict, Iterator, List, Optional, Tuple, Type
|
from typing import Any, Dict, Iterator, List, Optional, Tuple, Type
|
||||||
from unittest import mock
|
from unittest import mock
|
||||||
|
|
||||||
import orjson
|
|
||||||
import time_machine
|
import time_machine
|
||||||
from django.apps import apps
|
from django.apps import apps
|
||||||
from django.db import models
|
from django.db import models
|
||||||
|
@ -2081,18 +2080,6 @@ class TestRealmActiveHumans(AnalyticsTestCase):
|
||||||
self.stat = COUNT_STATS["realm_active_humans::day"]
|
self.stat = COUNT_STATS["realm_active_humans::day"]
|
||||||
self.current_property = self.stat.property
|
self.current_property = self.stat.property
|
||||||
|
|
||||||
def mark_audit_active(self, user: UserProfile, end_time: Optional[datetime] = None) -> None:
|
|
||||||
if end_time is None:
|
|
||||||
end_time = self.TIME_ZERO
|
|
||||||
UserCount.objects.create(
|
|
||||||
user=user,
|
|
||||||
realm=user.realm,
|
|
||||||
property="active_users_audit:is_bot:day",
|
|
||||||
subgroup=orjson.dumps(user.is_bot).decode(),
|
|
||||||
end_time=end_time,
|
|
||||||
value=1,
|
|
||||||
)
|
|
||||||
|
|
||||||
def mark_15day_active(self, user: UserProfile, end_time: Optional[datetime] = None) -> None:
|
def mark_15day_active(self, user: UserProfile, end_time: Optional[datetime] = None) -> None:
|
||||||
if end_time is None:
|
if end_time is None:
|
||||||
end_time = self.TIME_ZERO
|
end_time = self.TIME_ZERO
|
||||||
|
@ -2100,38 +2087,35 @@ class TestRealmActiveHumans(AnalyticsTestCase):
|
||||||
user=user, realm=user.realm, property="15day_actives::day", end_time=end_time, value=1
|
user=user, realm=user.realm, property="15day_actives::day", end_time=end_time, value=1
|
||||||
)
|
)
|
||||||
|
|
||||||
def test_basic_boolean_logic(self) -> None:
|
def test_basic_logic(self) -> None:
|
||||||
user = self.create_user()
|
user = self.create_user()
|
||||||
self.mark_audit_active(user, end_time=self.TIME_ZERO - self.DAY)
|
|
||||||
self.mark_15day_active(user, end_time=self.TIME_ZERO)
|
self.mark_15day_active(user, end_time=self.TIME_ZERO)
|
||||||
self.mark_audit_active(user, end_time=self.TIME_ZERO + self.DAY)
|
|
||||||
self.mark_15day_active(user, end_time=self.TIME_ZERO + self.DAY)
|
self.mark_15day_active(user, end_time=self.TIME_ZERO + self.DAY)
|
||||||
|
|
||||||
for i in [-1, 0, 1]:
|
for i in [-1, 0, 1]:
|
||||||
do_fill_count_stat_at_hour(self.stat, self.TIME_ZERO + i * self.DAY)
|
do_fill_count_stat_at_hour(self.stat, self.TIME_ZERO + i * self.DAY)
|
||||||
self.assertTableState(RealmCount, ["value", "end_time"], [[1, self.TIME_ZERO + self.DAY]])
|
self.assertTableState(
|
||||||
|
RealmCount, ["value", "end_time"], [[1, self.TIME_ZERO], [1, self.TIME_ZERO + self.DAY]]
|
||||||
|
)
|
||||||
|
|
||||||
def test_bots_not_counted(self) -> None:
|
def test_bots_not_counted(self) -> None:
|
||||||
bot = self.create_user(is_bot=True)
|
bot = self.create_user(is_bot=True)
|
||||||
self.mark_audit_active(bot)
|
|
||||||
self.mark_15day_active(bot)
|
self.mark_15day_active(bot)
|
||||||
do_fill_count_stat_at_hour(self.stat, self.TIME_ZERO)
|
do_fill_count_stat_at_hour(self.stat, self.TIME_ZERO)
|
||||||
self.assertTableState(RealmCount, [], [])
|
self.assertTableState(RealmCount, [], [])
|
||||||
|
|
||||||
def test_multiple_users_realms_and_times(self) -> None:
|
def test_multiple_users_realms_and_times(self) -> None:
|
||||||
user1 = self.create_user()
|
user1 = self.create_user(date_joined=self.TIME_ZERO - 2 * self.DAY)
|
||||||
user2 = self.create_user()
|
user2 = self.create_user(date_joined=self.TIME_ZERO - 2 * self.DAY)
|
||||||
second_realm = do_create_realm(string_id="second", name="second")
|
second_realm = do_create_realm(string_id="second", name="second")
|
||||||
user3 = self.create_user(realm=second_realm)
|
user3 = self.create_user(date_joined=self.TIME_ZERO - 2 * self.DAY, realm=second_realm)
|
||||||
user4 = self.create_user(realm=second_realm)
|
user4 = self.create_user(date_joined=self.TIME_ZERO - 2 * self.DAY, realm=second_realm)
|
||||||
user5 = self.create_user(realm=second_realm)
|
user5 = self.create_user(date_joined=self.TIME_ZERO - 2 * self.DAY, realm=second_realm)
|
||||||
|
|
||||||
for user in [user1, user2, user3, user4, user5]:
|
|
||||||
self.mark_audit_active(user)
|
|
||||||
self.mark_15day_active(user)
|
|
||||||
for user in [user1, user3, user4]:
|
for user in [user1, user3, user4]:
|
||||||
self.mark_audit_active(user, end_time=self.TIME_ZERO - self.DAY)
|
|
||||||
self.mark_15day_active(user, end_time=self.TIME_ZERO - self.DAY)
|
self.mark_15day_active(user, end_time=self.TIME_ZERO - self.DAY)
|
||||||
|
for user in [user1, user2, user3, user4, user5]:
|
||||||
|
self.mark_15day_active(user)
|
||||||
|
|
||||||
for i in [-1, 0, 1]:
|
for i in [-1, 0, 1]:
|
||||||
do_fill_count_stat_at_hour(self.stat, self.TIME_ZERO + i * self.DAY)
|
do_fill_count_stat_at_hour(self.stat, self.TIME_ZERO + i * self.DAY)
|
||||||
|
@ -2139,17 +2123,14 @@ class TestRealmActiveHumans(AnalyticsTestCase):
|
||||||
RealmCount,
|
RealmCount,
|
||||||
["value", "realm", "end_time"],
|
["value", "realm", "end_time"],
|
||||||
[
|
[
|
||||||
[2, self.default_realm, self.TIME_ZERO],
|
|
||||||
[3, second_realm, self.TIME_ZERO],
|
|
||||||
[1, self.default_realm, self.TIME_ZERO - self.DAY],
|
[1, self.default_realm, self.TIME_ZERO - self.DAY],
|
||||||
[2, second_realm, self.TIME_ZERO - self.DAY],
|
[2, second_realm, self.TIME_ZERO - self.DAY],
|
||||||
|
[2, self.default_realm, self.TIME_ZERO],
|
||||||
|
[3, second_realm, self.TIME_ZERO],
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
|
||||||
# Check that adding spurious entries doesn't make a difference
|
# Check that adding spurious entries doesn't make a difference
|
||||||
self.mark_audit_active(user1, end_time=self.TIME_ZERO + self.DAY)
|
|
||||||
self.mark_15day_active(user2, end_time=self.TIME_ZERO + self.DAY)
|
|
||||||
self.mark_15day_active(user2, end_time=self.TIME_ZERO - self.DAY)
|
|
||||||
self.create_user()
|
self.create_user()
|
||||||
third_realm = do_create_realm(string_id="third", name="third")
|
third_realm = do_create_realm(string_id="third", name="third")
|
||||||
self.create_user(realm=third_realm)
|
self.create_user(realm=third_realm)
|
||||||
|
@ -2162,10 +2143,10 @@ class TestRealmActiveHumans(AnalyticsTestCase):
|
||||||
RealmCount,
|
RealmCount,
|
||||||
["value", "realm", "end_time"],
|
["value", "realm", "end_time"],
|
||||||
[
|
[
|
||||||
[2, self.default_realm, self.TIME_ZERO],
|
|
||||||
[3, second_realm, self.TIME_ZERO],
|
|
||||||
[1, self.default_realm, self.TIME_ZERO - self.DAY],
|
[1, self.default_realm, self.TIME_ZERO - self.DAY],
|
||||||
[2, second_realm, self.TIME_ZERO - self.DAY],
|
[2, second_realm, self.TIME_ZERO - self.DAY],
|
||||||
|
[2, self.default_realm, self.TIME_ZERO],
|
||||||
|
[3, second_realm, self.TIME_ZERO],
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue