diff --git a/api_docs/changelog.md b/api_docs/changelog.md index 8007bbac64..fea33cafa9 100644 --- a/api_docs/changelog.md +++ b/api_docs/changelog.md @@ -20,6 +20,29 @@ format used by the Zulip server that they are interacting with. ## Changes in Zulip 9.0 +**Feature level 263**: + +* `POST /users/me/presence`: A new `last_update_id` + parameter can be given, instructing + the server to only fetch presence data with `last_update_id` + greater than the value provided. The server also provides + a `presence_last_update_id` field in the response, which + tells the client the greatest `last_update_id` of the fetched + presence data. This can then be used as the value in the + aforementioned parameter to avoid re-fetching of already known + data when polling the endpoint next time. + Additionally, the client specifying the `last_update_id` + implies it uses the modern API format, so + `slim_presence=true` will be assumed by the server. + + +* [`POST /register`](/api/register-queue): The response now also + includes a `presence_last_update_id` field, with the same + meaning as described above for `/users/me/presence`. + In the same way, the retrieved value can be passed when + querying `/users/me/presence` to avoid re-fetching of already + known data. + **Feature level 262**: * [`GET /users/{user_id}/status`](/api/get-user-status): Added a new diff --git a/version.py b/version.py index 1a3f1b56a2..a2ead205cb 100644 --- a/version.py +++ b/version.py @@ -33,7 +33,7 @@ DESKTOP_WARNING_VERSION = "5.9.3" # Changes should be accompanied by documentation explaining what the # new level means in api_docs/changelog.md, as well as "**Changes**" # entries in the endpoint's documentation in `zulip.yaml`. -API_FEATURE_LEVEL = 262 +API_FEATURE_LEVEL = 263 # Bump the minor PROVISION_VERSION to indicate that folks should provision # only when going from an old version of the code to a newer version. Bump diff --git a/zerver/lib/events.py b/zerver/lib/events.py index b33368e2f2..983ac42456 100644 --- a/zerver/lib/events.py +++ b/zerver/lib/events.py @@ -123,6 +123,7 @@ def fetch_initial_state_data( user_avatar_url_field_optional: bool = False, user_settings_object: bool = False, slim_presence: bool = False, + presence_last_update_id_fetched_by_client: Optional[int] = None, include_subscribers: bool = True, include_streams: bool = True, spectator_requested_language: Optional[str] = None, @@ -229,11 +230,23 @@ def fetch_initial_state_data( state["muted_users"] = [] if user_profile is None else get_user_mutes(user_profile) if want("presence"): - state["presences"] = ( - {} - if user_profile is None - else get_presences_for_realm(realm, slim_presence, user_profile) - ) + if presence_last_update_id_fetched_by_client is not None: + # This param being submitted by the client, means they want to use + # the modern API. + slim_presence = True + + if user_profile is not None: + presences, presence_last_update_id_fetched_by_server = get_presences_for_realm( + realm, + slim_presence, + last_update_id_fetched_by_client=presence_last_update_id_fetched_by_client, + requesting_user_profile=user_profile, + ) + state["presences"] = presences + state["presence_last_update_id"] = presence_last_update_id_fetched_by_server + else: + state["presences"] = {} + # Send server_timestamp, to match the format of `GET /presence` requests. state["server_timestamp"] = time.time() @@ -1304,6 +1317,15 @@ def apply_event( else: raise AssertionError("Unexpected event type {type}/{op}".format(**event)) elif event["type"] == "presence": + # Note: Fetch_initial_state_data includes + # a presence_last_update_id value, reflecting the Max .last_update_id + # value of the UserPresence objects in the data. Events don't carry + # information about the last_update_id of the UserPresence object + # to which they correspond, so we don't (and can't) attempt to update that initial + # presence data here. + # This means that the state resulting from fetch_initial_state + apply_events will not + # match the state of a hypothetical fetch_initial_state fetch that included the fully + # updated data. This is intended and not a bug. if slim_presence: user_key = str(event["user_id"]) else: @@ -1559,6 +1581,7 @@ def do_events_register( apply_markdown: bool = True, client_gravatar: bool = False, slim_presence: bool = False, + presence_last_update_id_fetched_by_client: Optional[int] = None, event_types: Optional[Sequence[str]] = None, queue_lifespan_secs: int = 0, all_public_streams: bool = False, @@ -1608,8 +1631,9 @@ def do_events_register( user_avatar_url_field_optional=user_avatar_url_field_optional, user_settings_object=user_settings_object, user_list_incomplete=user_list_incomplete, - # slim_presence is a noop, because presence is not included. + # These presence params are a noop, because presence is not included. slim_presence=True, + presence_last_update_id_fetched_by_client=None, # Force include_subscribers=False for security reasons. include_subscribers=include_subscribers, # Force include_streams=False for security reasons. @@ -1656,6 +1680,7 @@ def do_events_register( user_avatar_url_field_optional=user_avatar_url_field_optional, user_settings_object=user_settings_object, slim_presence=slim_presence, + presence_last_update_id_fetched_by_client=presence_last_update_id_fetched_by_client, include_subscribers=include_subscribers, include_streams=include_streams, pronouns_field_type_supported=pronouns_field_type_supported, diff --git a/zerver/lib/home.py b/zerver/lib/home.py index 028d0fe3e4..4981d77a26 100644 --- a/zerver/lib/home.py +++ b/zerver/lib/home.py @@ -169,6 +169,7 @@ def build_page_params_for_home_page_load( apply_markdown=True, client_gravatar=True, slim_presence=True, + presence_last_update_id_fetched_by_client=-1, client_capabilities=client_capabilities, narrow=narrow, include_streams=False, diff --git a/zerver/lib/presence.py b/zerver/lib/presence.py index 9cd9fa2d5f..00b19beda5 100644 --- a/zerver/lib/presence.py +++ b/zerver/lib/presence.py @@ -1,7 +1,7 @@ import time from collections import defaultdict from datetime import datetime, timedelta -from typing import Any, Dict, Mapping, Optional, Sequence +from typing import Any, Dict, Mapping, Optional, Sequence, Tuple from django.conf import settings from django.utils.timezone import now as timezone_now @@ -147,14 +147,25 @@ def get_presence_for_user( def get_presence_dict_by_realm( - realm: Realm, slim_presence: bool = False, requesting_user_profile: Optional[UserProfile] = None -) -> Dict[str, Dict[str, Any]]: + realm: Realm, + slim_presence: bool = False, + last_update_id_fetched_by_client: Optional[int] = None, + requesting_user_profile: Optional[UserProfile] = None, +) -> Tuple[Dict[str, Dict[str, Any]], int]: two_weeks_ago = timezone_now() - timedelta(weeks=2) + kwargs: Dict[str, object] = dict() + if last_update_id_fetched_by_client is not None: + kwargs["last_update_id__gt"] = last_update_id_fetched_by_client + query = UserPresence.objects.filter( realm_id=realm.id, - last_connected_time__gte=two_weeks_ago, user_profile__is_active=True, user_profile__is_bot=False, + # We can consider tweaking this value when last_update_id is being used, + # to potentially fetch more data since such a client is expected to only + # do it once and then only do small, incremental fetches. + last_connected_time__gte=two_weeks_ago, + **kwargs, ) if settings.CAN_ACCESS_ALL_USERS_GROUP_LIMITS_PRESENCE and not check_user_can_access_all_users( @@ -172,26 +183,67 @@ def get_presence_dict_by_realm( "user_profile_id", "user_profile__enable_offline_push_notifications", "user_profile__date_joined", + "last_update_id", ) ) + # Get max last_update_id from the list. + if presence_rows: + last_update_id_fetched_by_server: Optional[int] = max( + row["last_update_id"] for row in presence_rows + ) + elif last_update_id_fetched_by_client is not None: + # If there are no results, that means that are no new updates to presence + # since what the client has last seen. Therefore, returning the same + # last_update_id that the client provided is correct. + last_update_id_fetched_by_server = last_update_id_fetched_by_client + else: + # If the client didn't specify a last_update_id, we return -1 to indicate + # the lack of any data fetched, while sticking to the convention of + # returning an integer. + last_update_id_fetched_by_server = -1 - return get_presence_dicts_for_rows(presence_rows, slim_presence) + assert last_update_id_fetched_by_server is not None + return get_presence_dicts_for_rows( + presence_rows, slim_presence + ), last_update_id_fetched_by_server def get_presences_for_realm( - realm: Realm, slim_presence: bool, requesting_user_profile: UserProfile -) -> Dict[str, Dict[str, Dict[str, Any]]]: + realm: Realm, + slim_presence: bool, + last_update_id_fetched_by_client: Optional[int], + requesting_user_profile: UserProfile, +) -> Tuple[Dict[str, Dict[str, Dict[str, Any]]], Optional[int]]: if realm.presence_disabled: # Return an empty dict if presence is disabled in this realm - return defaultdict(dict) + return defaultdict(dict), None - return get_presence_dict_by_realm(realm, slim_presence, requesting_user_profile) + return get_presence_dict_by_realm( + realm, + slim_presence, + last_update_id_fetched_by_client, + requesting_user_profile=requesting_user_profile, + ) def get_presence_response( - requesting_user_profile: UserProfile, slim_presence: bool + requesting_user_profile: UserProfile, + slim_presence: bool, + last_update_id_fetched_by_client: Optional[int] = None, ) -> Dict[str, Any]: realm = requesting_user_profile.realm server_timestamp = time.time() - presences = get_presences_for_realm(realm, slim_presence, requesting_user_profile) - return dict(presences=presences, server_timestamp=server_timestamp) + presences, last_update_id_fetched_by_server = get_presences_for_realm( + realm, + slim_presence, + last_update_id_fetched_by_client, + requesting_user_profile=requesting_user_profile, + ) + + response_dict = dict( + presences=presences, + server_timestamp=server_timestamp, + presence_last_update_id=last_update_id_fetched_by_server, + ) + + return response_dict diff --git a/zerver/models/presence.py b/zerver/models/presence.py index e4adbc5222..a83daf83ed 100644 --- a/zerver/models/presence.py +++ b/zerver/models/presence.py @@ -30,6 +30,13 @@ class UserPresence(models.Model): # queries to fetch all presence data for a given realm. realm = models.ForeignKey(Realm, on_delete=CASCADE) + # The sequence ID within this realm for the last update to this user's presence; + # these IDs are generated by the PresenceSequence table and an important part + # of how we send incremental presence updates efficiently. + # To put it simply, every time we update a UserPresence row in a realm, + # the row gets last_update_id equal to 1 more than the previously updated + # row in that realm. + # This allows us to order UserPresence rows by when they were last updated. last_update_id = models.PositiveBigIntegerField(db_index=True, default=0) # The last time the user had a client connected to Zulip, @@ -77,6 +84,17 @@ class UserPresence(models.Model): class PresenceSequence(models.Model): + """ + This table is used to generate last_update_id values in the UserPresence table. + + It serves as a per-realm sequence generator, while also facilitating + locking to avoid concurrency issues with setting last_update_id values. + + Every realm has its unique row in this table, and when a UserPresence in the realm + is being updated, this row get locked against other UserPresence updates in the realm + to ensure sequential processing and set last_update_id values correctly. + """ + realm = models.OneToOneField(Realm, on_delete=CASCADE) last_update_id = models.PositiveBigIntegerField() diff --git a/zerver/openapi/zulip.yaml b/zerver/openapi/zulip.yaml index 9a49c0886f..6f6e3844f6 100644 --- a/zerver/openapi/zulip.yaml +++ b/zerver/openapi/zulip.yaml @@ -13156,6 +13156,20 @@ paths: either the user's ID or the user's Zulip API email. additionalProperties: $ref: "#/components/schemas/Presence" + presence_last_update_id: + type: integer + description: | + Present if `presence` is present in `fetch_event_types`. + + Provides the `last_update_id` value of the latest presence data fetched by + the server and included in the response in `presences`. This can be used + as the value of the `presence_last_update_id` parameter when polling + for presence data at the /users/me/presence endpoint to tell the server + to only fetch the relevant newer data in order to skip redundant + already-known presence information. + + **Changes**: New in Zulip 9.0 (feature level 263). + server_timestamp: type: number description: | diff --git a/zerver/tests/test_events.py b/zerver/tests/test_events.py index 7cfc78123a..d017c4979d 100644 --- a/zerver/tests/test_events.py +++ b/zerver/tests/test_events.py @@ -425,6 +425,11 @@ class BaseAction(ZulipTestCase): state["realm_bots"] = {u["email"]: u for u in state["realm_bots"]} # Since time is different for every call, just fix the value state["server_timestamp"] = 0 + if "presence_last_update_id" in state: + # We don't adjust presence_last_update_id via apply_events, + # since events don't carry the relevant information. + # Fix the value just like server_timestamp. + state["presence_last_update_id"] = 0 normalize(state1) normalize(state2) diff --git a/zerver/tests/test_home.py b/zerver/tests/test_home.py index c4e575cb82..fd2edef9b9 100644 --- a/zerver/tests/test_home.py +++ b/zerver/tests/test_home.py @@ -114,6 +114,7 @@ class HomeTest(ZulipTestCase): "password_min_guesses", "password_min_length", "presences", + "presence_last_update_id", "queue_id", "realm_add_custom_emoji_policy", "realm_allow_edit_history", diff --git a/zerver/tests/test_presence.py b/zerver/tests/test_presence.py index 84a63328f2..39dbba21c4 100644 --- a/zerver/tests/test_presence.py +++ b/zerver/tests/test_presence.py @@ -37,23 +37,28 @@ class UserPresenceModelTests(ZulipTestCase): user_profile = self.example_user("hamlet") email = user_profile.email - presence_dct = get_presence_dict_by_realm(user_profile.realm) + presence_dct, last_update_id = get_presence_dict_by_realm(user_profile.realm) self.assert_length(presence_dct, 0) + self.assertEqual(last_update_id, -1) self.login_user(user_profile) result = self.client_post("/json/users/me/presence", {"status": "active"}) self.assert_json_success(result) + actual_last_update_id = UserPresence.objects.all().latest("last_update_id").last_update_id + slim_presence = False - presence_dct = get_presence_dict_by_realm(user_profile.realm, slim_presence) + presence_dct, last_update_id = get_presence_dict_by_realm(user_profile.realm, slim_presence) self.assert_length(presence_dct, 1) self.assertEqual(presence_dct[email]["website"]["status"], "active") + self.assertEqual(last_update_id, actual_last_update_id) slim_presence = True - presence_dct = get_presence_dict_by_realm(user_profile.realm, slim_presence) + presence_dct, last_update_id = get_presence_dict_by_realm(user_profile.realm, slim_presence) self.assert_length(presence_dct, 1) info = presence_dct[str(user_profile.id)] self.assertEqual(set(info.keys()), {"active_timestamp", "idle_timestamp"}) + self.assertEqual(last_update_id, actual_last_update_id) def back_date(num_weeks: int) -> None: user_presence = UserPresence.objects.get(user_profile=user_profile) @@ -64,20 +69,73 @@ class UserPresenceModelTests(ZulipTestCase): # Simulate the presence being a week old first. Nothing should change. back_date(num_weeks=1) - presence_dct = get_presence_dict_by_realm(user_profile.realm) + presence_dct, last_update_id = get_presence_dict_by_realm(user_profile.realm) self.assert_length(presence_dct, 1) + self.assertEqual(last_update_id, actual_last_update_id) # If the UserPresence row is three weeks old, we ignore it. back_date(num_weeks=3) - presence_dct = get_presence_dict_by_realm(user_profile.realm) + presence_dct, last_update_id = get_presence_dict_by_realm(user_profile.realm) self.assert_length(presence_dct, 0) + self.assertEqual(last_update_id, -1) # If the values are set to "never", ignore it just like for sufficiently old presence rows. UserPresence.objects.filter(id=user_profile.id).update( last_active_time=None, last_connected_time=None ) - presence_dct = get_presence_dict_by_realm(user_profile.realm) + presence_dct, last_update_id = get_presence_dict_by_realm(user_profile.realm) self.assert_length(presence_dct, 0) + self.assertEqual(last_update_id, -1) + + def test_last_update_id_logic(self) -> None: + slim_presence = True + UserPresence.objects.all().delete() + + user_profile = self.example_user("hamlet") + presence_dct, last_update_id = get_presence_dict_by_realm( + user_profile.realm, slim_presence, last_update_id_fetched_by_client=-1 + ) + self.assert_length(presence_dct, 0) + self.assertEqual(last_update_id, -1) + + self.login_user(user_profile) + result = self.client_post("/json/users/me/presence", {"status": "active"}) + self.assert_json_success(result) + + actual_last_update_id = UserPresence.objects.all().latest("last_update_id").last_update_id + + presence_dct, last_update_id = get_presence_dict_by_realm( + user_profile.realm, slim_presence, last_update_id_fetched_by_client=-1 + ) + self.assert_length(presence_dct, 1) + self.assertEqual(last_update_id, actual_last_update_id) + + # Now pass last_update_id as of this latest fetch. The server should only query for data + # updated after that. There's no such data, so we get no presence data back and the + # returned last_update_id remains the same. + presence_dct, last_update_id = get_presence_dict_by_realm( + user_profile.realm, + slim_presence, + last_update_id_fetched_by_client=actual_last_update_id, + ) + self.assert_length(presence_dct, 0) + self.assertEqual(last_update_id, actual_last_update_id) + + # Now generate a new update in the realm. + iago = self.example_user("iago") + self.login_user(iago) + result = self.client_post("/json/users/me/presence", {"status": "active"}) + + # There's a new update now, so we can expect it to be fetched; and no older data. + presence_dct, last_update_id = get_presence_dict_by_realm( + user_profile.realm, + slim_presence, + last_update_id_fetched_by_client=actual_last_update_id, + ) + self.assert_length(presence_dct, 1) + self.assertEqual(presence_dct.keys(), {str(iago.id)}) + # last_update_id is incremented due to the new update. + self.assertEqual(last_update_id, actual_last_update_id + 1) def test_pushable_always_false(self) -> None: # This field was never used by clients of the legacy API, so we @@ -92,7 +150,7 @@ class UserPresenceModelTests(ZulipTestCase): self.assert_json_success(result) def pushable() -> bool: - presence_dct = get_presence_dict_by_realm(user_profile.realm) + presence_dct, _ = get_presence_dict_by_realm(user_profile.realm) self.assert_length(presence_dct, 1) return presence_dct[email]["website"]["pushable"] @@ -139,6 +197,108 @@ class UserPresenceTests(ZulipTestCase): result = self.client_post("/json/users/me/presence", {"status": "foo"}) self.assert_json_error(result, "Invalid status: foo") + def test_last_update_id_api(self) -> None: + UserPresence.objects.all().delete() + + hamlet = self.example_user("hamlet") + othello = self.example_user("othello") + + self.login_user(hamlet) + + params = dict(status="idle", last_update_id=-1) + result = self.client_post("/json/users/me/presence", params) + json = self.assert_json_success(result) + self.assertEqual(set(json["presences"].keys()), {str(hamlet.id)}) + + # In tests, the presence update is processed immediately rather than in the background + # in the queue worker, so we see it reflected immediately. + # In production, our presence update may be processed with some delay, so the last_update_id + # might not include it yet. In such a case, we'd see the original value of -1 returned, + # due to there being no new data to return. + last_update_id = UserPresence.objects.latest("last_update_id").last_update_id + self.assertEqual(json["presence_last_update_id"], last_update_id) + + # Briefly test that we include presence_last_update_id in the response + # also in the legacy format API with slim_presence=False. + # Re-doing an idle status so soon doesn't cause updates + # so this doesn't mutate any state. + params = dict(status="idle", slim_presence="false") + result = self.client_post("/json/users/me/presence", params) + json = self.assert_json_success(result) + self.assertEqual(json["presence_last_update_id"], last_update_id) + + self.login_user(othello) + params = dict(status="idle", last_update_id=-1) + result = self.client_post("/json/users/me/presence", params) + json = self.assert_json_success(result) + self.assertEqual(set(json["presences"].keys()), {str(hamlet.id), str(othello.id)}) + self.assertEqual(json["presence_last_update_id"], last_update_id + 1) + + last_update_id += 1 + # Immediately sending an idle status again doesn't cause updates, so the server + # doesn't have any new data since last_update_id to return. + params = dict(status="idle", last_update_id=last_update_id) + result = self.client_post("/json/users/me/presence", params) + json = self.assert_json_success(result) + self.assertEqual(set(json["presences"].keys()), set()) + # No new data, so the last_update_id is returned back. + self.assertEqual(json["presence_last_update_id"], last_update_id) + + # hamlet sends an active status. othello will next check presence and we'll + # want to verify he gets hamlet's update and nothing else. + self.login_user(hamlet) + params = dict(status="active", last_update_id=-1) + result = self.client_post("/json/users/me/presence", params) + json = self.assert_json_success(result) + + # Make sure UserPresence.last_update_id is incremented. + self.assertEqual( + UserPresence.objects.latest("last_update_id").last_update_id, last_update_id + 1 + ) + + # Now othello checks presence and should get hamlet's update. + self.login_user(othello) + params = dict(status="idle", last_update_id=last_update_id) + result = self.client_post("/json/users/me/presence", params) + json = self.assert_json_success(result) + self.assertEqual(set(json["presences"].keys()), {str(hamlet.id)}) + self.assertEqual(json["presence_last_update_id"], last_update_id + 1) + + def test_last_update_id_api_no_data_edge_cases(self) -> None: + hamlet = self.example_user("hamlet") + + self.login_user(hamlet) + + UserPresence.objects.all().delete() + + params = dict(status="idle", last_update_id=-1) + # Make do_update_user_presence a noop. This simulates a production-like environment + # where the update is processed in a queue worker, so hamlet may not see his update + # reflected back to him in the response. Therefore it is as if there is no presence + # data. + # In such a situation, he should get his last_update_id=-1 back. + with mock.patch("zerver.worker.user_presence.do_update_user_presence"): + result = self.client_post("/json/users/me/presence", params) + json = self.assert_json_success(result) + + self.assertEqual(set(json["presences"].keys()), set()) + self.assertEqual(json["presence_last_update_id"], -1) + self.assertFalse(UserPresence.objects.exists()) + + # Now check the same, but hamlet doesn't pass last_update_id at all, + # like an old slim_presence client would due to an implementation + # prior to the introduction of last_update_id. + params = dict(status="idle") + with mock.patch("zerver.worker.user_presence.do_update_user_presence"): + result = self.client_post("/json/users/me/presence", params) + json = self.assert_json_success(result) + self.assertEqual(set(json["presences"].keys()), set()) + + # When there's no data and the client didn't provide a last_update_id + # value that we could reflect back to it, we fall back to -1. + self.assertEqual(json["presence_last_update_id"], -1) + self.assertFalse(UserPresence.objects.exists()) + def test_set_idle(self) -> None: client = "website" diff --git a/zerver/views/events_register.py b/zerver/views/events_register.py index 520b340d39..8b793a7e57 100644 --- a/zerver/views/events_register.py +++ b/zerver/views/events_register.py @@ -145,6 +145,7 @@ def events_register_backend( apply_markdown, client_gravatar, slim_presence, + None, event_types, queue_lifespan_secs, all_public_streams, diff --git a/zerver/views/presence.py b/zerver/views/presence.py index 67a5f330a3..b25cd3c6f3 100644 --- a/zerver/views/presence.py +++ b/zerver/views/presence.py @@ -154,7 +154,13 @@ def update_active_status_backend( ping_only: Json[bool] = False, new_user_input: Json[bool] = False, slim_presence: Json[bool] = False, + last_update_id: Optional[Json[int]] = None, ) -> HttpResponse: + if last_update_id is not None: + # This param being submitted by the client, means they want to use + # the modern API. + slim_presence = True + status_val = UserPresence.status_from_string(status) if status_val is None: raise JsonableError(_("Invalid status: {status}").format(status=status)) @@ -166,7 +172,9 @@ def update_active_status_backend( if ping_only: ret: Dict[str, Any] = {} else: - ret = get_presence_response(user_profile, slim_presence) + ret = get_presence_response( + user_profile, slim_presence, last_update_id_fetched_by_client=last_update_id + ) if user_profile.realm.is_zephyr_mirror_realm: # In zephyr mirroring realms, users can't see the presence of other @@ -190,4 +198,8 @@ def get_statuses_for_realm(request: HttpRequest, user_profile: UserProfile) -> H # This isn't used by the web app; it's available for API use by # bots and other clients. We may want to add slim_presence # support for it (or just migrate its API wholesale) later. - return json_success(request, data=get_presence_response(user_profile, slim_presence=False)) + data = get_presence_response(user_profile, slim_presence=False) + + # We're not interested in the last_update_id field in this context. + data.pop("presence_last_update_id", None) + return json_success(request, data=data)