mirror of https://github.com/zulip/zulip.git
queue_processors: Process user_activity in one query.
This leads to significant speedups. In a test, with 100 random unique event classes, the old code processed a batch of 100 rows (on average 66-ish unique in the batch) in 0.45 seconds. Doing this in a single query processes the same batch in 0.0076 seconds.
This commit is contained in:
parent
1249b5929e
commit
37fa181e5f
|
@ -2,7 +2,7 @@ from datetime import datetime
|
|||
|
||||
from zerver.lib.queue import queue_json_publish
|
||||
from zerver.lib.timestamp import datetime_to_timestamp
|
||||
from zerver.models import UserActivity, UserActivityInterval, UserProfile
|
||||
from zerver.models import UserActivityInterval, UserProfile
|
||||
|
||||
|
||||
def do_update_user_activity_interval(user_profile: UserProfile, log_time: datetime) -> None:
|
||||
|
@ -29,22 +29,6 @@ def do_update_user_activity_interval(user_profile: UserProfile, log_time: dateti
|
|||
)
|
||||
|
||||
|
||||
def do_update_user_activity(
|
||||
user_profile_id: int, client_id: int, query: str, count: int, log_time: datetime
|
||||
) -> None:
|
||||
(activity, created) = UserActivity.objects.get_or_create(
|
||||
user_profile_id=user_profile_id,
|
||||
client_id=client_id,
|
||||
query=query,
|
||||
defaults={"last_visit": log_time, "count": count},
|
||||
)
|
||||
|
||||
if not created:
|
||||
activity.count += count
|
||||
activity.last_visit = log_time
|
||||
activity.save(update_fields=["last_visit", "count"])
|
||||
|
||||
|
||||
def update_user_activity_interval(user_profile: UserProfile, log_time: datetime) -> None:
|
||||
event = {"user_profile_id": user_profile.id, "time": datetime_to_timestamp(log_time)}
|
||||
queue_json_publish("user_activity_interval", event)
|
||||
|
|
|
@ -33,7 +33,7 @@ from zerver.actions.realm_settings import (
|
|||
do_set_realm_authentication_methods,
|
||||
)
|
||||
from zerver.actions.scheduled_messages import check_schedule_message
|
||||
from zerver.actions.user_activity import do_update_user_activity, do_update_user_activity_interval
|
||||
from zerver.actions.user_activity import do_update_user_activity_interval
|
||||
from zerver.actions.user_status import do_update_user_status
|
||||
from zerver.actions.user_topics import do_set_user_topic_visibility_policy
|
||||
from zerver.actions.users import do_deactivate_user
|
||||
|
@ -76,6 +76,7 @@ from zerver.models import (
|
|||
ScheduledMessage,
|
||||
Stream,
|
||||
Subscription,
|
||||
UserActivity,
|
||||
UserGroup,
|
||||
UserGroupMembership,
|
||||
UserMessage,
|
||||
|
@ -1862,9 +1863,20 @@ class SingleUserExportTest(ExportFile):
|
|||
stream_id = last_recipient.type_id
|
||||
self.assertEqual(stream_id, get_stream("Scotland", realm).id)
|
||||
|
||||
do_update_user_activity(cordelia.id, client.id, "/some/endpoint", 2, now)
|
||||
do_update_user_activity(cordelia.id, client.id, "/some/endpoint", 3, now)
|
||||
do_update_user_activity(othello.id, client.id, "/bogus", 20, now)
|
||||
UserActivity.objects.create(
|
||||
user_profile_id=cordelia.id,
|
||||
client_id=client.id,
|
||||
query="/some/endpoint",
|
||||
count=5,
|
||||
last_visit=now,
|
||||
)
|
||||
UserActivity.objects.create(
|
||||
user_profile_id=othello.id,
|
||||
client_id=client.id,
|
||||
query="/bogus",
|
||||
count=20,
|
||||
last_visit=now,
|
||||
)
|
||||
|
||||
@checker
|
||||
def zerver_useractivity(records: List[Record]) -> None:
|
||||
|
|
|
@ -80,37 +80,143 @@ def simulated_queue_client(client: FakeClient) -> Iterator[None]:
|
|||
|
||||
|
||||
class WorkerTest(ZulipTestCase):
|
||||
def test_UserActivityWorker(self) -> None:
|
||||
def test_useractivity_worker(self) -> None:
|
||||
fake_client = FakeClient()
|
||||
|
||||
user = self.example_user("hamlet")
|
||||
other_user = self.example_user("iago")
|
||||
UserActivity.objects.filter(
|
||||
user_profile=user.id,
|
||||
client=get_client("ios"),
|
||||
user_profile__in=[user.id, other_user.id],
|
||||
).delete()
|
||||
|
||||
data = dict(
|
||||
user_profile_id=user.id,
|
||||
client_id=get_client("ios").id,
|
||||
time=time.time(),
|
||||
query="send_message",
|
||||
# Enqueue two events for the same user/client/query
|
||||
now = time.time()
|
||||
for event_time in [now, now + 10]:
|
||||
fake_client.enqueue(
|
||||
"user_activity",
|
||||
dict(
|
||||
user_profile_id=user.id,
|
||||
client_id=get_client("ios").id,
|
||||
time=event_time,
|
||||
query="send_message",
|
||||
),
|
||||
)
|
||||
# And a third event for a different query
|
||||
fake_client.enqueue(
|
||||
"user_activity",
|
||||
dict(
|
||||
user_profile_id=user.id,
|
||||
client_id=get_client("ios").id,
|
||||
time=now + 15,
|
||||
query="get_events",
|
||||
),
|
||||
)
|
||||
# One for a different client
|
||||
fake_client.enqueue(
|
||||
"user_activity",
|
||||
dict(
|
||||
user_profile_id=user.id,
|
||||
client_id=get_client("website").id,
|
||||
time=now + 20,
|
||||
query="get_events",
|
||||
),
|
||||
)
|
||||
fake_client.enqueue("user_activity", data)
|
||||
|
||||
# Now process the event a second time and confirm count goes
|
||||
# up. Ideally, we'd use an event with a slightly newer
|
||||
# time, but it's not really important.
|
||||
fake_client.enqueue("user_activity", data)
|
||||
# And one for a different user
|
||||
fake_client.enqueue(
|
||||
"user_activity",
|
||||
dict(
|
||||
user_profile_id=other_user.id,
|
||||
client_id=get_client("ios").id,
|
||||
time=now + 25,
|
||||
query="get_events",
|
||||
),
|
||||
)
|
||||
|
||||
# Run the worker; this will produce a single upsert statement
|
||||
with simulated_queue_client(fake_client):
|
||||
worker = queue_processors.UserActivityWorker()
|
||||
worker.setup()
|
||||
worker.start()
|
||||
activity_records = UserActivity.objects.filter(
|
||||
user_profile=user.id,
|
||||
client=get_client("ios"),
|
||||
with self.assert_database_query_count(1):
|
||||
worker.start()
|
||||
|
||||
activity_records = UserActivity.objects.filter(
|
||||
user_profile__in=[user.id, other_user.id],
|
||||
).order_by("last_visit")
|
||||
self.assert_length(activity_records, 4)
|
||||
|
||||
self.assertEqual(activity_records[0].query, "send_message")
|
||||
self.assertEqual(activity_records[0].client.name, "ios")
|
||||
self.assertEqual(activity_records[0].count, 2)
|
||||
self.assertEqual(
|
||||
activity_records[0].last_visit, datetime.fromtimestamp(now + 10, tz=timezone.utc)
|
||||
)
|
||||
|
||||
self.assertEqual(activity_records[1].query, "get_events")
|
||||
self.assertEqual(activity_records[1].client.name, "ios")
|
||||
self.assertEqual(activity_records[1].count, 1)
|
||||
self.assertEqual(
|
||||
activity_records[1].last_visit, datetime.fromtimestamp(now + 15, tz=timezone.utc)
|
||||
)
|
||||
|
||||
self.assertEqual(activity_records[2].query, "get_events")
|
||||
self.assertEqual(activity_records[2].client.name, "website")
|
||||
self.assertEqual(activity_records[2].count, 1)
|
||||
self.assertEqual(
|
||||
activity_records[2].last_visit, datetime.fromtimestamp(now + 20, tz=timezone.utc)
|
||||
)
|
||||
|
||||
self.assertEqual(activity_records[3].query, "get_events")
|
||||
self.assertEqual(activity_records[3].user_profile, other_user)
|
||||
self.assertEqual(activity_records[3].count, 1)
|
||||
self.assertEqual(
|
||||
activity_records[3].last_visit, datetime.fromtimestamp(now + 25, tz=timezone.utc)
|
||||
)
|
||||
|
||||
# Add 3 more events which stack atop the existing send_message
|
||||
# to test the update part, and a new "home_real" event to show
|
||||
# the insert part of the upsert.
|
||||
for event_time in [now + 30, now + 35, now + 40]:
|
||||
fake_client.enqueue(
|
||||
"user_activity",
|
||||
dict(
|
||||
user_profile_id=user.id,
|
||||
client_id=get_client("ios").id,
|
||||
time=event_time,
|
||||
query="send_message",
|
||||
),
|
||||
)
|
||||
self.assert_length(activity_records, 1)
|
||||
self.assertEqual(activity_records[0].count, 2)
|
||||
fake_client.enqueue(
|
||||
"user_activity",
|
||||
dict(
|
||||
user_profile_id=user.id,
|
||||
client_id=get_client("ios").id,
|
||||
time=now + 45,
|
||||
query="home_real",
|
||||
),
|
||||
)
|
||||
# Run the worker again; this will insert one row and update the other
|
||||
with simulated_queue_client(fake_client):
|
||||
worker = queue_processors.UserActivityWorker()
|
||||
worker.setup()
|
||||
with self.assert_database_query_count(1):
|
||||
worker.start()
|
||||
activity_records = UserActivity.objects.filter(
|
||||
user_profile__in=[user.id, other_user.id],
|
||||
).order_by("last_visit")
|
||||
self.assert_length(activity_records, 5)
|
||||
self.assertEqual(activity_records[3].query, "send_message")
|
||||
self.assertEqual(activity_records[3].client.name, "ios")
|
||||
self.assertEqual(activity_records[3].count, 5)
|
||||
self.assertEqual(
|
||||
activity_records[3].last_visit, datetime.fromtimestamp(now + 40, tz=timezone.utc)
|
||||
)
|
||||
self.assertEqual(activity_records[4].query, "home_real")
|
||||
self.assertEqual(activity_records[4].client.name, "ios")
|
||||
self.assertEqual(activity_records[4].count, 1)
|
||||
self.assertEqual(
|
||||
activity_records[4].last_visit, datetime.fromtimestamp(now + 45, tz=timezone.utc)
|
||||
)
|
||||
|
||||
def test_missed_message_worker(self) -> None:
|
||||
cordelia = self.example_user("cordelia")
|
||||
|
|
|
@ -42,6 +42,7 @@ from django.db.utils import IntegrityError
|
|||
from django.utils.timezone import now as timezone_now
|
||||
from django.utils.translation import gettext as _
|
||||
from django.utils.translation import override as override_language
|
||||
from psycopg2.sql import SQL, Literal
|
||||
from returns.curry import partial
|
||||
from sentry_sdk import add_breadcrumb, configure_scope
|
||||
from typing_extensions import override
|
||||
|
@ -53,7 +54,7 @@ from zerver.actions.message_flags import do_mark_stream_messages_as_read
|
|||
from zerver.actions.message_send import internal_send_private_message, render_incoming_message
|
||||
from zerver.actions.presence import do_update_user_presence
|
||||
from zerver.actions.realm_export import notify_realm_export
|
||||
from zerver.actions.user_activity import do_update_user_activity, do_update_user_activity_interval
|
||||
from zerver.actions.user_activity import do_update_user_activity_interval
|
||||
from zerver.context_processors import common_context
|
||||
from zerver.lib.bot_lib import EmbeddedBotHandler, EmbeddedBotQuitError, get_bot_handler
|
||||
from zerver.lib.context_managers import lockfile
|
||||
|
@ -541,19 +542,35 @@ class UserActivityWorker(LoopQueueProcessingWorker):
|
|||
if key_tuple not in uncommitted_events:
|
||||
uncommitted_events[key_tuple] = (1, event["time"])
|
||||
else:
|
||||
count, time = uncommitted_events[key_tuple]
|
||||
uncommitted_events[key_tuple] = (count + 1, max(time, event["time"]))
|
||||
count, event_time = uncommitted_events[key_tuple]
|
||||
uncommitted_events[key_tuple] = (count + 1, max(event_time, event["time"]))
|
||||
|
||||
# Then we insert the updates into the database.
|
||||
#
|
||||
# TODO: Doing these updates in sequence individually is likely
|
||||
# inefficient; the idealized version would do some sort of
|
||||
# bulk insert_or_update query.
|
||||
for key_tuple in uncommitted_events:
|
||||
(user_profile_id, client_id, query) = key_tuple
|
||||
count, time = uncommitted_events[key_tuple]
|
||||
log_time = timestamp_to_datetime(time)
|
||||
do_update_user_activity(user_profile_id, client_id, query, count, log_time)
|
||||
rows = []
|
||||
for key_tuple, value_tuple in uncommitted_events.items():
|
||||
user_profile_id, client_id, query = key_tuple
|
||||
count, event_time = value_tuple
|
||||
rows.append(
|
||||
SQL("({},{},{},{},to_timestamp({}))").format(
|
||||
Literal(user_profile_id),
|
||||
Literal(client_id),
|
||||
Literal(query),
|
||||
Literal(count),
|
||||
Literal(event_time),
|
||||
)
|
||||
)
|
||||
|
||||
# Perform a single bulk UPSERT for all of the rows
|
||||
sql_query = SQL(
|
||||
"""
|
||||
INSERT INTO zerver_useractivity(user_profile_id, client_id, query, count, last_visit)
|
||||
VALUES {rows}
|
||||
ON CONFLICT (user_profile_id, client_id, query) DO UPDATE SET
|
||||
count = zerver_useractivity.count + excluded.count,
|
||||
last_visit = greatest(zerver_useractivity.last_visit, excluded.last_visit)
|
||||
"""
|
||||
).format(rows=SQL(", ").join(rows))
|
||||
with connection.cursor() as cursor:
|
||||
cursor.execute(sql_query)
|
||||
|
||||
|
||||
@assign_queue("user_activity_interval")
|
||||
|
|
Loading…
Reference in New Issue