presence: Disable live presence updates in larger realms.

As discussed in the comment, this is a critical scalability
optimization for organizations with thousands of users.

With substantial comment updates by tabbott.
This commit is contained in:
Mateusz Mandera 2021-05-05 13:15:13 +02:00 committed by Tim Abbott
parent 638ffb36a4
commit cc96f02947
3 changed files with 62 additions and 12 deletions

View File

@ -4899,6 +4899,30 @@ def do_update_user_activity(
def send_presence_changed(user_profile: UserProfile, presence: UserPresence) -> None:
# Most presence data is sent to clients in the main presence
# endpoint in response to the user's own presence; this results
# data that is 1-2 minutes stale for who is online. The flaw with
# this plan is when a user comes back online and then immediately
# sends a message, recipients may still see that user as offline!
# We solve that by sending an immediate presence update clients.
#
# See https://zulip.readthedocs.io/en/latest/subsystems/presence.html for
# internals documentation on presence.
user_ids = active_user_ids(user_profile.realm_id)
if len(user_ids) > settings.USER_LIMIT_FOR_SENDING_PRESENCE_UPDATE_EVENTS:
# These immediate presence generate quadratic work for Tornado
# (linear number of users in each event and the frequency of
# users coming online grows linearly with userbase too). In
# organizations with thousands of users, this can overload
# Tornado, especially if much of the realm comes online at the
# same time.
#
# The utility of these live-presence updates goes down as
# organizations get bigger (since one is much less likely to
# be paying attention to the sidebar); so beyond a limit, we
# stop sending them at all.
return
presence_dict = presence.to_dict()
event = dict(
type="presence",
@ -4907,7 +4931,7 @@ def send_presence_changed(user_profile: UserProfile, presence: UserPresence) ->
server_timestamp=time.time(),
presence={presence_dict["client"]: presence_dict},
)
send_event(user_profile.realm, event, active_user_ids(user_profile.realm_id))
send_event(user_profile.realm, event, user_ids)
def consolidate_client(client: Client) -> Client:
@ -4965,16 +4989,6 @@ def do_update_user_presence(
presence.save(update_fields=update_fields)
if not user_profile.realm.presence_disabled and (created or became_online):
# Push event to all users in the realm so they see the new user
# appear in the presence list immediately, or the newly online
# user without delay. Note that we won't send an update here for a
# timestamp update, because we rely on the browser to ping us every 50
# seconds for realm-wide status updates, and those updates should have
# recent timestamps, which means the browser won't think active users
# have gone idle. If we were more aggressive in this function about
# sending timestamp updates, we could eliminate the ping responses, but
# that's not a high priority for now, considering that most of our non-MIT
# realms are pretty small.
send_presence_changed(user_profile, presence)

View File

@ -5,9 +5,15 @@ from unittest import mock
import orjson
from django.conf import settings
from django.http import HttpRequest, HttpResponse
from django.utils.timezone import now as timezone_now
from version import API_FEATURE_LEVEL, ZULIP_VERSION
from zerver.lib.actions import check_send_message, do_change_user_role, do_set_realm_property
from zerver.lib.actions import (
check_send_message,
do_change_user_role,
do_set_realm_property,
do_update_user_presence,
)
from zerver.lib.event_schema import check_restart_event
from zerver.lib.events import fetch_initial_state_data, get_raw_user_data
from zerver.lib.test_classes import ZulipTestCase
@ -16,6 +22,7 @@ from zerver.lib.users import get_api_key
from zerver.models import (
Realm,
UserMessage,
UserPresence,
UserProfile,
flush_per_request_caches,
get_client,
@ -1111,3 +1118,28 @@ class TestGetRawUserDataSystemBotRealm(ZulipTestCase):
bot_profile = get_system_bot(bot_email)
self.assertTrue(bot_profile.id in result)
self.assertTrue(result[bot_profile.id]["is_cross_realm_bot"])
class TestUserPresenceUpdatesDisabled(ZulipTestCase):
def test_presence_events_diabled_on_larger_realm(self) -> None:
# First check that normally the mocked function gets called.
with mock.patch("zerver.lib.actions.send_event") as mock_send_event:
do_update_user_presence(
self.example_user("cordelia"),
get_client("website"),
timezone_now(),
UserPresence.ACTIVE,
)
mock_send_event.assert_called_once()
# Now check that if the realm has more than the USER_LIMIT_FOR_SENDING_PRESENCE_UPDATE_EVENTS
# amount of active users, send_event doesn't get called.
with mock.patch("zerver.lib.actions.send_event") as mock_send_event:
with self.settings(USER_LIMIT_FOR_SENDING_PRESENCE_UPDATE_EVENTS=1):
do_update_user_presence(
self.example_user("hamlet"),
get_client("website"),
timezone_now(),
UserPresence.ACTIVE,
)
mock_send_event.assert_not_called()

View File

@ -411,6 +411,10 @@ STAGING_ERROR_NOTIFICATIONS = False
# default_settings, since it likely isn't usefully user-configurable.
OFFLINE_THRESHOLD_SECS = 5 * 60
# Specifies the number of active users in the realm
# above which sending of presence update events will be disabled.
USER_LIMIT_FOR_SENDING_PRESENCE_UPDATE_EVENTS = 100
# How many days deleted messages data should be kept before being
# permanently deleted.
ARCHIVED_DATA_VACUUMING_DELAY_DAYS = 7