presence: Backend implementation of the last_update_id API.

This builds on top of 016880f54d which
maintains correct .last_update_id for UserPresence objects; now we add
the related API changes to utilize it.
This commit is contained in:
Mateusz Mandera 2024-06-05 21:36:22 +02:00 committed by Tim Abbott
parent a83dc572df
commit 512f4d1476
12 changed files with 340 additions and 28 deletions

View File

@ -20,6 +20,29 @@ format used by the Zulip server that they are interacting with.
## Changes in Zulip 9.0
**Feature level 263**:
* `POST /users/me/presence`: A new `last_update_id`
parameter can be given, instructing
the server to only fetch presence data with `last_update_id`
greater than the value provided. The server also provides
a `presence_last_update_id` field in the response, which
tells the client the greatest `last_update_id` of the fetched
presence data. This can then be used as the value in the
aforementioned parameter to avoid re-fetching of already known
data when polling the endpoint next time.
Additionally, the client specifying the `last_update_id`
implies it uses the modern API format, so
`slim_presence=true` will be assumed by the server.
* [`POST /register`](/api/register-queue): The response now also
includes a `presence_last_update_id` field, with the same
meaning as described above for `/users/me/presence`.
In the same way, the retrieved value can be passed when
querying `/users/me/presence` to avoid re-fetching of already
known data.
**Feature level 262**:
* [`GET /users/{user_id}/status`](/api/get-user-status): Added a new

View File

@ -33,7 +33,7 @@ DESKTOP_WARNING_VERSION = "5.9.3"
# Changes should be accompanied by documentation explaining what the
# new level means in api_docs/changelog.md, as well as "**Changes**"
# entries in the endpoint's documentation in `zulip.yaml`.
API_FEATURE_LEVEL = 262
API_FEATURE_LEVEL = 263
# Bump the minor PROVISION_VERSION to indicate that folks should provision
# only when going from an old version of the code to a newer version. Bump

View File

@ -123,6 +123,7 @@ def fetch_initial_state_data(
user_avatar_url_field_optional: bool = False,
user_settings_object: bool = False,
slim_presence: bool = False,
presence_last_update_id_fetched_by_client: Optional[int] = None,
include_subscribers: bool = True,
include_streams: bool = True,
spectator_requested_language: Optional[str] = None,
@ -229,11 +230,23 @@ def fetch_initial_state_data(
state["muted_users"] = [] if user_profile is None else get_user_mutes(user_profile)
if want("presence"):
state["presences"] = (
{}
if user_profile is None
else get_presences_for_realm(realm, slim_presence, user_profile)
if presence_last_update_id_fetched_by_client is not None:
# This param being submitted by the client, means they want to use
# the modern API.
slim_presence = True
if user_profile is not None:
presences, presence_last_update_id_fetched_by_server = get_presences_for_realm(
realm,
slim_presence,
last_update_id_fetched_by_client=presence_last_update_id_fetched_by_client,
requesting_user_profile=user_profile,
)
state["presences"] = presences
state["presence_last_update_id"] = presence_last_update_id_fetched_by_server
else:
state["presences"] = {}
# Send server_timestamp, to match the format of `GET /presence` requests.
state["server_timestamp"] = time.time()
@ -1304,6 +1317,15 @@ def apply_event(
else:
raise AssertionError("Unexpected event type {type}/{op}".format(**event))
elif event["type"] == "presence":
# Note: Fetch_initial_state_data includes
# a presence_last_update_id value, reflecting the Max .last_update_id
# value of the UserPresence objects in the data. Events don't carry
# information about the last_update_id of the UserPresence object
# to which they correspond, so we don't (and can't) attempt to update that initial
# presence data here.
# This means that the state resulting from fetch_initial_state + apply_events will not
# match the state of a hypothetical fetch_initial_state fetch that included the fully
# updated data. This is intended and not a bug.
if slim_presence:
user_key = str(event["user_id"])
else:
@ -1559,6 +1581,7 @@ def do_events_register(
apply_markdown: bool = True,
client_gravatar: bool = False,
slim_presence: bool = False,
presence_last_update_id_fetched_by_client: Optional[int] = None,
event_types: Optional[Sequence[str]] = None,
queue_lifespan_secs: int = 0,
all_public_streams: bool = False,
@ -1608,8 +1631,9 @@ def do_events_register(
user_avatar_url_field_optional=user_avatar_url_field_optional,
user_settings_object=user_settings_object,
user_list_incomplete=user_list_incomplete,
# slim_presence is a noop, because presence is not included.
# These presence params are a noop, because presence is not included.
slim_presence=True,
presence_last_update_id_fetched_by_client=None,
# Force include_subscribers=False for security reasons.
include_subscribers=include_subscribers,
# Force include_streams=False for security reasons.
@ -1656,6 +1680,7 @@ def do_events_register(
user_avatar_url_field_optional=user_avatar_url_field_optional,
user_settings_object=user_settings_object,
slim_presence=slim_presence,
presence_last_update_id_fetched_by_client=presence_last_update_id_fetched_by_client,
include_subscribers=include_subscribers,
include_streams=include_streams,
pronouns_field_type_supported=pronouns_field_type_supported,

View File

@ -169,6 +169,7 @@ def build_page_params_for_home_page_load(
apply_markdown=True,
client_gravatar=True,
slim_presence=True,
presence_last_update_id_fetched_by_client=-1,
client_capabilities=client_capabilities,
narrow=narrow,
include_streams=False,

View File

@ -1,7 +1,7 @@
import time
from collections import defaultdict
from datetime import datetime, timedelta
from typing import Any, Dict, Mapping, Optional, Sequence
from typing import Any, Dict, Mapping, Optional, Sequence, Tuple
from django.conf import settings
from django.utils.timezone import now as timezone_now
@ -147,14 +147,25 @@ def get_presence_for_user(
def get_presence_dict_by_realm(
realm: Realm, slim_presence: bool = False, requesting_user_profile: Optional[UserProfile] = None
) -> Dict[str, Dict[str, Any]]:
realm: Realm,
slim_presence: bool = False,
last_update_id_fetched_by_client: Optional[int] = None,
requesting_user_profile: Optional[UserProfile] = None,
) -> Tuple[Dict[str, Dict[str, Any]], int]:
two_weeks_ago = timezone_now() - timedelta(weeks=2)
kwargs: Dict[str, object] = dict()
if last_update_id_fetched_by_client is not None:
kwargs["last_update_id__gt"] = last_update_id_fetched_by_client
query = UserPresence.objects.filter(
realm_id=realm.id,
last_connected_time__gte=two_weeks_ago,
user_profile__is_active=True,
user_profile__is_bot=False,
# We can consider tweaking this value when last_update_id is being used,
# to potentially fetch more data since such a client is expected to only
# do it once and then only do small, incremental fetches.
last_connected_time__gte=two_weeks_ago,
**kwargs,
)
if settings.CAN_ACCESS_ALL_USERS_GROUP_LIMITS_PRESENCE and not check_user_can_access_all_users(
@ -172,26 +183,67 @@ def get_presence_dict_by_realm(
"user_profile_id",
"user_profile__enable_offline_push_notifications",
"user_profile__date_joined",
"last_update_id",
)
)
# Get max last_update_id from the list.
if presence_rows:
last_update_id_fetched_by_server: Optional[int] = max(
row["last_update_id"] for row in presence_rows
)
elif last_update_id_fetched_by_client is not None:
# If there are no results, that means that are no new updates to presence
# since what the client has last seen. Therefore, returning the same
# last_update_id that the client provided is correct.
last_update_id_fetched_by_server = last_update_id_fetched_by_client
else:
# If the client didn't specify a last_update_id, we return -1 to indicate
# the lack of any data fetched, while sticking to the convention of
# returning an integer.
last_update_id_fetched_by_server = -1
return get_presence_dicts_for_rows(presence_rows, slim_presence)
assert last_update_id_fetched_by_server is not None
return get_presence_dicts_for_rows(
presence_rows, slim_presence
), last_update_id_fetched_by_server
def get_presences_for_realm(
realm: Realm, slim_presence: bool, requesting_user_profile: UserProfile
) -> Dict[str, Dict[str, Dict[str, Any]]]:
realm: Realm,
slim_presence: bool,
last_update_id_fetched_by_client: Optional[int],
requesting_user_profile: UserProfile,
) -> Tuple[Dict[str, Dict[str, Dict[str, Any]]], Optional[int]]:
if realm.presence_disabled:
# Return an empty dict if presence is disabled in this realm
return defaultdict(dict)
return defaultdict(dict), None
return get_presence_dict_by_realm(realm, slim_presence, requesting_user_profile)
return get_presence_dict_by_realm(
realm,
slim_presence,
last_update_id_fetched_by_client,
requesting_user_profile=requesting_user_profile,
)
def get_presence_response(
requesting_user_profile: UserProfile, slim_presence: bool
requesting_user_profile: UserProfile,
slim_presence: bool,
last_update_id_fetched_by_client: Optional[int] = None,
) -> Dict[str, Any]:
realm = requesting_user_profile.realm
server_timestamp = time.time()
presences = get_presences_for_realm(realm, slim_presence, requesting_user_profile)
return dict(presences=presences, server_timestamp=server_timestamp)
presences, last_update_id_fetched_by_server = get_presences_for_realm(
realm,
slim_presence,
last_update_id_fetched_by_client,
requesting_user_profile=requesting_user_profile,
)
response_dict = dict(
presences=presences,
server_timestamp=server_timestamp,
presence_last_update_id=last_update_id_fetched_by_server,
)
return response_dict

View File

@ -30,6 +30,13 @@ class UserPresence(models.Model):
# queries to fetch all presence data for a given realm.
realm = models.ForeignKey(Realm, on_delete=CASCADE)
# The sequence ID within this realm for the last update to this user's presence;
# these IDs are generated by the PresenceSequence table and an important part
# of how we send incremental presence updates efficiently.
# To put it simply, every time we update a UserPresence row in a realm,
# the row gets last_update_id equal to 1 more than the previously updated
# row in that realm.
# This allows us to order UserPresence rows by when they were last updated.
last_update_id = models.PositiveBigIntegerField(db_index=True, default=0)
# The last time the user had a client connected to Zulip,
@ -77,6 +84,17 @@ class UserPresence(models.Model):
class PresenceSequence(models.Model):
"""
This table is used to generate last_update_id values in the UserPresence table.
It serves as a per-realm sequence generator, while also facilitating
locking to avoid concurrency issues with setting last_update_id values.
Every realm has its unique row in this table, and when a UserPresence in the realm
is being updated, this row get locked against other UserPresence updates in the realm
to ensure sequential processing and set last_update_id values correctly.
"""
realm = models.OneToOneField(Realm, on_delete=CASCADE)
last_update_id = models.PositiveBigIntegerField()

View File

@ -13156,6 +13156,20 @@ paths:
either the user's ID or the user's Zulip API email.
additionalProperties:
$ref: "#/components/schemas/Presence"
presence_last_update_id:
type: integer
description: |
Present if `presence` is present in `fetch_event_types`.
Provides the `last_update_id` value of the latest presence data fetched by
the server and included in the response in `presences`. This can be used
as the value of the `presence_last_update_id` parameter when polling
for presence data at the /users/me/presence endpoint to tell the server
to only fetch the relevant newer data in order to skip redundant
already-known presence information.
**Changes**: New in Zulip 9.0 (feature level 263).
server_timestamp:
type: number
description: |

View File

@ -425,6 +425,11 @@ class BaseAction(ZulipTestCase):
state["realm_bots"] = {u["email"]: u for u in state["realm_bots"]}
# Since time is different for every call, just fix the value
state["server_timestamp"] = 0
if "presence_last_update_id" in state:
# We don't adjust presence_last_update_id via apply_events,
# since events don't carry the relevant information.
# Fix the value just like server_timestamp.
state["presence_last_update_id"] = 0
normalize(state1)
normalize(state2)

View File

@ -114,6 +114,7 @@ class HomeTest(ZulipTestCase):
"password_min_guesses",
"password_min_length",
"presences",
"presence_last_update_id",
"queue_id",
"realm_add_custom_emoji_policy",
"realm_allow_edit_history",

View File

@ -37,23 +37,28 @@ class UserPresenceModelTests(ZulipTestCase):
user_profile = self.example_user("hamlet")
email = user_profile.email
presence_dct = get_presence_dict_by_realm(user_profile.realm)
presence_dct, last_update_id = get_presence_dict_by_realm(user_profile.realm)
self.assert_length(presence_dct, 0)
self.assertEqual(last_update_id, -1)
self.login_user(user_profile)
result = self.client_post("/json/users/me/presence", {"status": "active"})
self.assert_json_success(result)
actual_last_update_id = UserPresence.objects.all().latest("last_update_id").last_update_id
slim_presence = False
presence_dct = get_presence_dict_by_realm(user_profile.realm, slim_presence)
presence_dct, last_update_id = get_presence_dict_by_realm(user_profile.realm, slim_presence)
self.assert_length(presence_dct, 1)
self.assertEqual(presence_dct[email]["website"]["status"], "active")
self.assertEqual(last_update_id, actual_last_update_id)
slim_presence = True
presence_dct = get_presence_dict_by_realm(user_profile.realm, slim_presence)
presence_dct, last_update_id = get_presence_dict_by_realm(user_profile.realm, slim_presence)
self.assert_length(presence_dct, 1)
info = presence_dct[str(user_profile.id)]
self.assertEqual(set(info.keys()), {"active_timestamp", "idle_timestamp"})
self.assertEqual(last_update_id, actual_last_update_id)
def back_date(num_weeks: int) -> None:
user_presence = UserPresence.objects.get(user_profile=user_profile)
@ -64,20 +69,73 @@ class UserPresenceModelTests(ZulipTestCase):
# Simulate the presence being a week old first. Nothing should change.
back_date(num_weeks=1)
presence_dct = get_presence_dict_by_realm(user_profile.realm)
presence_dct, last_update_id = get_presence_dict_by_realm(user_profile.realm)
self.assert_length(presence_dct, 1)
self.assertEqual(last_update_id, actual_last_update_id)
# If the UserPresence row is three weeks old, we ignore it.
back_date(num_weeks=3)
presence_dct = get_presence_dict_by_realm(user_profile.realm)
presence_dct, last_update_id = get_presence_dict_by_realm(user_profile.realm)
self.assert_length(presence_dct, 0)
self.assertEqual(last_update_id, -1)
# If the values are set to "never", ignore it just like for sufficiently old presence rows.
UserPresence.objects.filter(id=user_profile.id).update(
last_active_time=None, last_connected_time=None
)
presence_dct = get_presence_dict_by_realm(user_profile.realm)
presence_dct, last_update_id = get_presence_dict_by_realm(user_profile.realm)
self.assert_length(presence_dct, 0)
self.assertEqual(last_update_id, -1)
def test_last_update_id_logic(self) -> None:
slim_presence = True
UserPresence.objects.all().delete()
user_profile = self.example_user("hamlet")
presence_dct, last_update_id = get_presence_dict_by_realm(
user_profile.realm, slim_presence, last_update_id_fetched_by_client=-1
)
self.assert_length(presence_dct, 0)
self.assertEqual(last_update_id, -1)
self.login_user(user_profile)
result = self.client_post("/json/users/me/presence", {"status": "active"})
self.assert_json_success(result)
actual_last_update_id = UserPresence.objects.all().latest("last_update_id").last_update_id
presence_dct, last_update_id = get_presence_dict_by_realm(
user_profile.realm, slim_presence, last_update_id_fetched_by_client=-1
)
self.assert_length(presence_dct, 1)
self.assertEqual(last_update_id, actual_last_update_id)
# Now pass last_update_id as of this latest fetch. The server should only query for data
# updated after that. There's no such data, so we get no presence data back and the
# returned last_update_id remains the same.
presence_dct, last_update_id = get_presence_dict_by_realm(
user_profile.realm,
slim_presence,
last_update_id_fetched_by_client=actual_last_update_id,
)
self.assert_length(presence_dct, 0)
self.assertEqual(last_update_id, actual_last_update_id)
# Now generate a new update in the realm.
iago = self.example_user("iago")
self.login_user(iago)
result = self.client_post("/json/users/me/presence", {"status": "active"})
# There's a new update now, so we can expect it to be fetched; and no older data.
presence_dct, last_update_id = get_presence_dict_by_realm(
user_profile.realm,
slim_presence,
last_update_id_fetched_by_client=actual_last_update_id,
)
self.assert_length(presence_dct, 1)
self.assertEqual(presence_dct.keys(), {str(iago.id)})
# last_update_id is incremented due to the new update.
self.assertEqual(last_update_id, actual_last_update_id + 1)
def test_pushable_always_false(self) -> None:
# This field was never used by clients of the legacy API, so we
@ -92,7 +150,7 @@ class UserPresenceModelTests(ZulipTestCase):
self.assert_json_success(result)
def pushable() -> bool:
presence_dct = get_presence_dict_by_realm(user_profile.realm)
presence_dct, _ = get_presence_dict_by_realm(user_profile.realm)
self.assert_length(presence_dct, 1)
return presence_dct[email]["website"]["pushable"]
@ -139,6 +197,108 @@ class UserPresenceTests(ZulipTestCase):
result = self.client_post("/json/users/me/presence", {"status": "foo"})
self.assert_json_error(result, "Invalid status: foo")
def test_last_update_id_api(self) -> None:
UserPresence.objects.all().delete()
hamlet = self.example_user("hamlet")
othello = self.example_user("othello")
self.login_user(hamlet)
params = dict(status="idle", last_update_id=-1)
result = self.client_post("/json/users/me/presence", params)
json = self.assert_json_success(result)
self.assertEqual(set(json["presences"].keys()), {str(hamlet.id)})
# In tests, the presence update is processed immediately rather than in the background
# in the queue worker, so we see it reflected immediately.
# In production, our presence update may be processed with some delay, so the last_update_id
# might not include it yet. In such a case, we'd see the original value of -1 returned,
# due to there being no new data to return.
last_update_id = UserPresence.objects.latest("last_update_id").last_update_id
self.assertEqual(json["presence_last_update_id"], last_update_id)
# Briefly test that we include presence_last_update_id in the response
# also in the legacy format API with slim_presence=False.
# Re-doing an idle status so soon doesn't cause updates
# so this doesn't mutate any state.
params = dict(status="idle", slim_presence="false")
result = self.client_post("/json/users/me/presence", params)
json = self.assert_json_success(result)
self.assertEqual(json["presence_last_update_id"], last_update_id)
self.login_user(othello)
params = dict(status="idle", last_update_id=-1)
result = self.client_post("/json/users/me/presence", params)
json = self.assert_json_success(result)
self.assertEqual(set(json["presences"].keys()), {str(hamlet.id), str(othello.id)})
self.assertEqual(json["presence_last_update_id"], last_update_id + 1)
last_update_id += 1
# Immediately sending an idle status again doesn't cause updates, so the server
# doesn't have any new data since last_update_id to return.
params = dict(status="idle", last_update_id=last_update_id)
result = self.client_post("/json/users/me/presence", params)
json = self.assert_json_success(result)
self.assertEqual(set(json["presences"].keys()), set())
# No new data, so the last_update_id is returned back.
self.assertEqual(json["presence_last_update_id"], last_update_id)
# hamlet sends an active status. othello will next check presence and we'll
# want to verify he gets hamlet's update and nothing else.
self.login_user(hamlet)
params = dict(status="active", last_update_id=-1)
result = self.client_post("/json/users/me/presence", params)
json = self.assert_json_success(result)
# Make sure UserPresence.last_update_id is incremented.
self.assertEqual(
UserPresence.objects.latest("last_update_id").last_update_id, last_update_id + 1
)
# Now othello checks presence and should get hamlet's update.
self.login_user(othello)
params = dict(status="idle", last_update_id=last_update_id)
result = self.client_post("/json/users/me/presence", params)
json = self.assert_json_success(result)
self.assertEqual(set(json["presences"].keys()), {str(hamlet.id)})
self.assertEqual(json["presence_last_update_id"], last_update_id + 1)
def test_last_update_id_api_no_data_edge_cases(self) -> None:
hamlet = self.example_user("hamlet")
self.login_user(hamlet)
UserPresence.objects.all().delete()
params = dict(status="idle", last_update_id=-1)
# Make do_update_user_presence a noop. This simulates a production-like environment
# where the update is processed in a queue worker, so hamlet may not see his update
# reflected back to him in the response. Therefore it is as if there is no presence
# data.
# In such a situation, he should get his last_update_id=-1 back.
with mock.patch("zerver.worker.user_presence.do_update_user_presence"):
result = self.client_post("/json/users/me/presence", params)
json = self.assert_json_success(result)
self.assertEqual(set(json["presences"].keys()), set())
self.assertEqual(json["presence_last_update_id"], -1)
self.assertFalse(UserPresence.objects.exists())
# Now check the same, but hamlet doesn't pass last_update_id at all,
# like an old slim_presence client would due to an implementation
# prior to the introduction of last_update_id.
params = dict(status="idle")
with mock.patch("zerver.worker.user_presence.do_update_user_presence"):
result = self.client_post("/json/users/me/presence", params)
json = self.assert_json_success(result)
self.assertEqual(set(json["presences"].keys()), set())
# When there's no data and the client didn't provide a last_update_id
# value that we could reflect back to it, we fall back to -1.
self.assertEqual(json["presence_last_update_id"], -1)
self.assertFalse(UserPresence.objects.exists())
def test_set_idle(self) -> None:
client = "website"

View File

@ -145,6 +145,7 @@ def events_register_backend(
apply_markdown,
client_gravatar,
slim_presence,
None,
event_types,
queue_lifespan_secs,
all_public_streams,

View File

@ -154,7 +154,13 @@ def update_active_status_backend(
ping_only: Json[bool] = False,
new_user_input: Json[bool] = False,
slim_presence: Json[bool] = False,
last_update_id: Optional[Json[int]] = None,
) -> HttpResponse:
if last_update_id is not None:
# This param being submitted by the client, means they want to use
# the modern API.
slim_presence = True
status_val = UserPresence.status_from_string(status)
if status_val is None:
raise JsonableError(_("Invalid status: {status}").format(status=status))
@ -166,7 +172,9 @@ def update_active_status_backend(
if ping_only:
ret: Dict[str, Any] = {}
else:
ret = get_presence_response(user_profile, slim_presence)
ret = get_presence_response(
user_profile, slim_presence, last_update_id_fetched_by_client=last_update_id
)
if user_profile.realm.is_zephyr_mirror_realm:
# In zephyr mirroring realms, users can't see the presence of other
@ -190,4 +198,8 @@ def get_statuses_for_realm(request: HttpRequest, user_profile: UserProfile) -> H
# This isn't used by the web app; it's available for API use by
# bots and other clients. We may want to add slim_presence
# support for it (or just migrate its API wholesale) later.
return json_success(request, data=get_presence_response(user_profile, slim_presence=False))
data = get_presence_response(user_profile, slim_presence=False)
# We're not interested in the last_update_id field in this context.
data.pop("presence_last_update_id", None)
return json_success(request, data=data)