From 3f726e25e4f391c79f9b7402553effe73a82d55d Mon Sep 17 00:00:00 2001 From: Aman Agrawal Date: Wed, 2 Oct 2024 04:17:20 +0000 Subject: [PATCH] message_fetch: Add message_ids parameter to /messages request. This allows us to fetch messages for a list of message ids in a single request. --- api_docs/changelog.md | 5 ++ version.py | 2 +- zerver/lib/narrow.py | 94 +++++++++++++-------- zerver/openapi/zulip.yaml | 77 ++++++++++++----- zerver/tests/test_message_fetch.py | 129 +++++++++++++++++++++++++---- zerver/tests/test_openapi.py | 8 +- zerver/views/message_fetch.py | 75 +++++++++++++---- 7 files changed, 302 insertions(+), 88 deletions(-) diff --git a/api_docs/changelog.md b/api_docs/changelog.md index ff7bf8cff4..ce6bf3c2ff 100644 --- a/api_docs/changelog.md +++ b/api_docs/changelog.md @@ -20,6 +20,11 @@ format used by the Zulip server that they are interacting with. ## Changes in Zulip 10.0 +**Feature level 300** + +* [`GET /messages`](/api/get-message): Added a new message_ids parameter, + as an alternative method of specifying which messages to fetch. + **Feature level 299** * `PATCH /realm`, [`POST /register`](/api/register-queue), diff --git a/version.py b/version.py index bcda3a271f..e0ddf97537 100644 --- a/version.py +++ b/version.py @@ -34,7 +34,7 @@ DESKTOP_WARNING_VERSION = "5.9.3" # new level means in api_docs/changelog.md, as well as "**Changes**" # entries in the endpoint's documentation in `zulip.yaml`. -API_FEATURE_LEVEL = 299 # Last bumped for org level group create/manage permissions. +API_FEATURE_LEVEL = 300 # Last bumped for GET /messages API changes # Bump the minor PROVISION_VERSION to indicate that folks should provision # only when going from an old version of the code to a newer version. Bump diff --git a/zerver/lib/narrow.py b/zerver/lib/narrow.py index 2170a390e6..fe6ca5879a 100644 --- a/zerver/lib/narrow.py +++ b/zerver/lib/narrow.py @@ -1334,7 +1334,7 @@ def post_process_limited_query( @dataclass class FetchedMessages(LimitedMessages[Row]): - anchor: int + anchor: int | None include_history: bool is_search: bool @@ -1349,6 +1349,7 @@ def fetch_messages( include_anchor: bool, num_before: int, num_after: int, + client_requested_message_ids: list[int] | None = None, ) -> FetchedMessages: include_history = ok_to_include_history(narrow, user_profile, is_web_public_query) if include_history: @@ -1372,6 +1373,8 @@ def fetch_messages( need_message = True need_user_message = True + # get_base_query_for_search and ok_to_include_history are responsible for ensuring + # that we only include messages the user has access to. query: SelectBase query, inner_msg_id_col = get_base_query_for_search( realm_id=realm.id, @@ -1389,47 +1392,70 @@ def fetch_messages( is_web_public_query=is_web_public_query, ) + anchored_to_left = False + anchored_to_right = False + first_visible_message_id = get_first_visible_message_id(realm) with get_sqlalchemy_connection() as sa_conn: - if anchor is None: - # `anchor=None` corresponds to the anchor="first_unread" parameter. - anchor = find_first_unread_anchor( - sa_conn, - user_profile, - narrow, + if client_requested_message_ids is not None: + query = query.filter(inner_msg_id_col.in_(client_requested_message_ids)) + else: + if anchor is None: + # `anchor=None` corresponds to the anchor="first_unread" parameter. + anchor = find_first_unread_anchor( + sa_conn, + user_profile, + narrow, + ) + + anchored_to_left = anchor == 0 + + # Set value that will be used to short circuit the after_query + # altogether and avoid needless conditions in the before_query. + anchored_to_right = anchor >= LARGER_THAN_MAX_MESSAGE_ID + if anchored_to_right: + num_after = 0 + + query = limit_query_to_range( + query=query, + num_before=num_before, + num_after=num_after, + anchor=anchor, + include_anchor=include_anchor, + anchored_to_left=anchored_to_left, + anchored_to_right=anchored_to_right, + id_col=inner_msg_id_col, + first_visible_message_id=first_visible_message_id, ) - anchored_to_left = anchor == 0 + main_query = query.subquery() + query = ( + select(*main_query.c) + .select_from(main_query) + .order_by(column("message_id", Integer).asc()) + ) - # Set value that will be used to short circuit the after_query - # altogether and avoid needless conditions in the before_query. - anchored_to_right = anchor >= LARGER_THAN_MAX_MESSAGE_ID - if anchored_to_right: - num_after = 0 - - first_visible_message_id = get_first_visible_message_id(realm) - - query = limit_query_to_range( - query=query, - num_before=num_before, - num_after=num_after, - anchor=anchor, - include_anchor=include_anchor, - anchored_to_left=anchored_to_left, - anchored_to_right=anchored_to_right, - id_col=inner_msg_id_col, - first_visible_message_id=first_visible_message_id, - ) - - main_query = query.subquery() - query = ( - select(*main_query.c) - .select_from(main_query) - .order_by(column("message_id", Integer).asc()) - ) # This is a hack to tag the query we use for testing query = query.prefix_with("/* get_messages */") rows = list(sa_conn.execute(query).fetchall()) + if client_requested_message_ids is not None: + # We don't need to do any post-processing in this case. + if first_visible_message_id > 0: + visible_rows = [r for r in rows if r[0] >= first_visible_message_id] + else: + visible_rows = rows + return FetchedMessages( + rows=visible_rows, + found_anchor=False, + found_newest=False, + found_oldest=False, + history_limited=False, + anchor=None, + include_history=include_history, + is_search=is_search, + ) + + assert anchor is not None query_info = post_process_limited_query( rows=rows, num_before=num_before, diff --git a/zerver/openapi/zulip.yaml b/zerver/openapi/zulip.yaml index 24c96f9948..e78485ec37 100644 --- a/zerver/openapi/zulip.yaml +++ b/zerver/openapi/zulip.yaml @@ -6479,33 +6479,37 @@ paths: summary: Get messages tags: ["messages"] description: | - Fetch user's message history from a Zulip server. + This endpoint is the primary way to fetch a messages. It is used by all official + Zulip clients (e.g. the web, desktop, mobile, and terminal clients) as well as + many bots, API clients, backup scripts, etc. - This endpoint is the primary way to fetch a user's message history - from a Zulip server. It is useful both for Zulip clients (e.g. the - web, desktop, mobile, and terminal clients) as well as bots, API - clients, backup scripts, etc. + Most queries will specify a [narrow filter](/api/get-messages#parameter-narrow), + to fetch the messages matching any supported [search + query](/help/search-for-messages). If not specified, it will return messages + corresponding to the user's [combined feed](/help/combined-feed). There are two + ways to specify which messages matching the narrow filter to fetch: + + - A range of messages, described by an `anchor` message ID (or a string-format + specification of how the server should computer an anchor to use) and a maximum + number of messages in each direction from that anchor. + + - A rarely used variant (`message_ids`) where the client specifies the message IDs + to fetch. + + The server returns the matching messages, sorted by message ID, as well as some + metadata that makes it easy for a client to determine whether there are more + messages matching the query that were not returned due to the `num_before` and + `num_after` limits. Note that a user's message history does not contain messages sent to channels before they [subscribe](/api/subscribe), and newly created bot users are not usually subscribed to any channels. - By specifying a [narrow filter](/api/get-messages#parameter-narrow), - you can use this endpoint to fetch the messages matching any search - query that is supported by Zulip's powerful full-text search backend. + We recommend requesting at most 1000 messages in a batch, to avoid generating very + large HTTP responses. A maximum of 5000 messages can be obtained per request; + attempting to exceed this will result in an error. - In either case, you specify an `anchor` message (or ask the server to - calculate the first unread message for you and use that as the - anchor), as well as a number of messages before and after the anchor - message. The server returns those messages, sorted by message ID, as - well as some metadata that makes it easy for a client to determine - whether there are more messages matching the query that were not - returned due to the `num_before` and `num_after` limits. - - We recommend setting `num_before` and `num_after` to no more than 1000, to - avoid generating very large HTTP responses. A maximum of 5000 messages - can be obtained per request; attempting to exceed this will result in an - error. + **Changes**: The `message_ids` option is new in Zulip 10.0 (feature level 300). x-curl-examples-parameters: oneOf: - type: exclude @@ -6515,6 +6519,7 @@ paths: - apply_markdown - use_first_unread_anchor - include_anchor + - message_ids parameters: - name: anchor in: query @@ -6554,20 +6559,22 @@ paths: in: query description: | The number of messages with IDs less than the anchor to retrieve. + Required if `message_ids` is not provided. schema: type: integer minimum: 0 example: 4 - required: true + required: false - name: num_after in: query description: | The number of messages with IDs greater than the anchor to retrieve. + Required if `message_ids` is not provided. schema: type: integer minimum: 0 example: 8 - required: true + required: false - name: narrow in: query description: | @@ -6648,6 +6655,26 @@ paths: type: boolean default: false example: true + - name: message_ids + in: query + description: | + A list of message IDs to fetch. The server will return messages corresponding to the + subset of the requested message IDs that exist and the current user has access to, + potentially filtered by the narrow (if that parameter is provided). + + It is an error to pass this parameter as well as any of the parameters involved in + specifying a range of messages: `anchor`, `include_anchor`, `use_first_unread_anchor`, + `num_before`, and `num_after`. + + **Changes**: New in Zulip 10.0 (feature level 300). Previously, there was + no way to request a specific set of messages IDs. + content: + application/json: + schema: + type: array + items: + type: integer + example: [1, 2, 3] responses: "200": description: Success. @@ -6657,6 +6684,10 @@ paths: allOf: - $ref: "#/components/schemas/JsonSuccessBase" - additionalProperties: false + required: + - result + - msg + - messages properties: result: {} msg: {} @@ -6666,6 +6697,8 @@ paths: description: | The same `anchor` specified in the request (or the computed one, if `use_first_unread_anchor` is `true`). + + Only present if `message_ids` is not provided. found_newest: type: boolean description: | diff --git a/zerver/tests/test_message_fetch.py b/zerver/tests/test_message_fetch.py index c614968670..6c59a8faaa 100644 --- a/zerver/tests/test_message_fetch.py +++ b/zerver/tests/test_message_fetch.py @@ -2085,6 +2085,121 @@ class GetOldMessagesTest(ZulipTestCase): result, "Invalid narrow operator: unknown web-public channel Scotland", status_code=400 ) + def test_get_message_ids(self) -> None: + self.login("iago") + self.subscribe(self.example_user("iago"), "Verona") + msg1 = self.send_stream_message(self.example_user("iago"), "Verona") + msg2 = self.send_stream_message(self.example_user("iago"), "Verona") + result = self.client_get( + "/json/messages", + { + "message_ids": orjson.dumps([msg1, msg2]).decode(), + }, + ) + + self.assert_json_success(result) + messages = orjson.loads(result.content)["messages"] + self.assert_length(messages, 2) + fetched_message_ids = [message["id"] for message in messages] + self.assertEqual(fetched_message_ids.sort(), [msg1, msg2].sort()) + + def test_get_message_ids_web_public(self) -> None: + self.login("iago") + self.subscribe(self.example_user("iago"), "Rome") + self.logout() + msg1 = self.send_stream_message(self.example_user("iago"), "Rome") + msg2 = self.send_stream_message(self.example_user("iago"), "Rome") + result = self.client_get( + "/json/messages", + { + "message_ids": orjson.dumps([msg1, msg2]).decode(), + "narrow": orjson.dumps([dict(operator="channels", operand="web-public")]).decode(), + }, + ) + + self.assert_json_success(result) + messages = orjson.loads(result.content)["messages"] + self.assert_length(messages, 2) + fetched_message_ids = [message["id"] for message in messages] + self.assertEqual(fetched_message_ids.sort(), [msg1, msg2].sort()) + + def test_message_fetch_with_mutually_exclusive_parameters(self) -> None: + mutually_exclusive_params_with_message_ids = ["num_before", "num_after", "anchor"] + for param in mutually_exclusive_params_with_message_ids: + result = self.client_get( + "/json/messages", + { + "message_ids": orjson.dumps([1, 2]).decode(), + param: 1, + }, + ) + error_msg = "Unsupported parameter combination: num_before, num_after, anchor, message_ids, include_anchor, use_first_unread_anchor" + self.assert_json_error(result, error_msg) + + def test_message_fetch_for_inaccessible_message_ids(self) -> None: + # Add new channels + realm = get_realm("zulip") + channel_dicts: list[StreamDict] = [ + { + "name": "private-channel", + "description": "Private channel with non-public history", + "invite_only": True, + }, + { + "name": "private-channel-with-history", + "description": "Private channel with public history", + "invite_only": True, + "history_public_to_subscribers": True, + }, + ] + create_streams_if_needed(realm, channel_dicts) + + iago = self.example_user("iago") + self.login("iago") + message_ids = [] + for stream_name in ["private-channel", "private-channel-with-history"]: + self.subscribe(iago, stream_name) + message_ids.append(self.send_stream_message(iago, stream_name)) + self.logout() + + self.login("hamlet") + result = self.client_get( + "/json/messages", + { + "message_ids": orjson.dumps(message_ids).decode(), + }, + ) + self.assert_json_success(result) + messages = orjson.loads(result.content)["messages"] + self.assert_length(messages, 0) + + self.logout() + self.login("iago") + result = self.client_get( + "/json/messages", + { + "message_ids": orjson.dumps(message_ids).decode(), + }, + ) + self.assert_json_success(result) + messages = orjson.loads(result.content)["messages"] + self.assert_length(messages, 2) + + # These messages are not accessible if they are after first_visible_message_id. + realm = get_realm("zulip") + realm.first_visible_message_id = max(message_ids) + 1 + realm.save(update_fields=["first_visible_message_id"]) + + result = self.client_get( + "/json/messages", + { + "message_ids": orjson.dumps(message_ids).decode(), + }, + ) + self.assert_json_success(result) + messages = orjson.loads(result.content)["messages"] + self.assert_length(messages, 0) + def setup_web_public_test(self, num_web_public_message: int = 1) -> None: """ Send N+2 messages, N in a web-public channel, then one in a non-web-public channel @@ -3682,20 +3797,6 @@ class GetOldMessagesTest(ZulipTestCase): self.assertEqual(data["history_limited"], False) messages_matches_ids(messages, message_ids[6:9]) - def test_missing_params(self) -> None: - """ - anchor, num_before, and num_after are all required - POST parameters for get_messages. - """ - self.login("hamlet") - - required_args: tuple[tuple[str, int], ...] = (("num_before", 1), ("num_after", 1)) - - for i in range(len(required_args)): - post_params = dict(required_args[:i] + required_args[i + 1 :]) - result = self.client_get("/json/messages", post_params) - self.assert_json_error(result, f"Missing '{required_args[i][0]}' argument") - def test_get_messages_limits(self) -> None: """ A call to GET /json/messages requesting more than diff --git a/zerver/tests/test_openapi.py b/zerver/tests/test_openapi.py index c193ae7818..1e64dd0ea2 100644 --- a/zerver/tests/test_openapi.py +++ b/zerver/tests/test_openapi.py @@ -789,7 +789,9 @@ class TestCurlExampleGeneration(ZulipTestCase): self.curl_example("/endpoint", "BREW") # see: HTCPCP def test_generate_and_render_curl_with_array_example(self) -> None: - generated_curl_example = self.curl_example("/messages", "GET", exclude=["use_first_unread_anchor"]) + generated_curl_example = self.curl_example( + "/messages", "GET", exclude=["use_first_unread_anchor", "message_ids"] + ) expected_curl_example = [ "```curl", "curl -sSX GET -G http://localhost:9991/api/v1/messages \\", @@ -862,7 +864,9 @@ class TestCurlExampleGeneration(ZulipTestCase): def test_generate_and_render_curl_example_with_excludes(self) -> None: generated_curl_example = self.curl_example( - "/messages", "GET", exclude=["client_gravatar", "apply_markdown", "use_first_unread_anchor"] + "/messages", + "GET", + exclude=["client_gravatar", "apply_markdown", "use_first_unread_anchor", "message_ids"], ) expected_curl_example = [ "```curl", diff --git a/zerver/views/message_fetch.py b/zerver/views/message_fetch.py index bec4b69687..7e8eb83955 100644 --- a/zerver/views/message_fetch.py +++ b/zerver/views/message_fetch.py @@ -12,7 +12,11 @@ from sqlalchemy.sql import and_, column, join, literal, literal_column, select, from sqlalchemy.types import Integer, Text from zerver.context_processors import get_valid_realm_from_request -from zerver.lib.exceptions import JsonableError, MissingAuthenticationError +from zerver.lib.exceptions import ( + IncompatibleParametersError, + JsonableError, + MissingAuthenticationError, +) from zerver.lib.message import get_first_visible_message_id, messages_for_ids from zerver.lib.narrow import ( NarrowParameter, @@ -106,19 +110,47 @@ def get_messages_backend( *, anchor_val: Annotated[str | None, ApiParamConfig("anchor")] = None, include_anchor: Json[bool] = True, - num_before: Json[NonNegativeInt], - num_after: Json[NonNegativeInt], + num_before: Json[NonNegativeInt] = 0, + num_after: Json[NonNegativeInt] = 0, narrow: Json[list[NarrowParameter] | None] = None, use_first_unread_anchor_val: Annotated[ Json[bool], ApiParamConfig("use_first_unread_anchor") ] = False, client_gravatar: Json[bool] = True, apply_markdown: Json[bool] = True, + client_requested_message_ids: Annotated[ + Json[list[NonNegativeInt] | None], ApiParamConfig("message_ids") + ] = None, ) -> HttpResponse: + # User has to either provide message_ids or both num_before and num_after. + if ( + num_before or num_after or anchor_val is not None or use_first_unread_anchor_val + ) and client_requested_message_ids is not None: + raise IncompatibleParametersError( + [ + "num_before", + "num_after", + "anchor", + "message_ids", + "include_anchor", + "use_first_unread_anchor", + ] + ) + elif client_requested_message_ids is not None: + include_anchor = False + + anchor = None + if client_requested_message_ids is None: + anchor = parse_anchor_value(anchor_val, use_first_unread_anchor_val) + realm = get_valid_realm_from_request(request) - anchor = parse_anchor_value(anchor_val, use_first_unread_anchor_val) narrow = update_narrow_terms_containing_with_operator(realm, maybe_user_profile, narrow) - if num_before + num_after > MAX_MESSAGES_PER_FETCH: + + num_of_messages_requested = num_before + num_after + if client_requested_message_ids is not None: + num_of_messages_requested = len(client_requested_message_ids) + + if num_of_messages_requested > MAX_MESSAGES_PER_FETCH: raise JsonableError( _("Too many messages requested (maximum {max_messages}).").format( max_messages=MAX_MESSAGES_PER_FETCH, @@ -212,6 +244,7 @@ def get_messages_backend( include_anchor=include_anchor, num_before=num_before, num_after=num_after, + client_requested_message_ids=client_requested_message_ids, ) anchor = query_info.anchor @@ -274,16 +307,28 @@ def get_messages_backend( realm=realm, ) - ret = dict( - messages=message_list, - result="success", - msg="", - found_anchor=query_info.found_anchor, - found_oldest=query_info.found_oldest, - found_newest=query_info.found_newest, - history_limited=query_info.history_limited, - anchor=anchor, - ) + if client_requested_message_ids is not None: + ret = dict( + messages=message_list, + result="success", + msg="", + history_limited=query_info.history_limited, + found_anchor=False, + found_oldest=False, + found_newest=False, + ) + else: + ret = dict( + messages=message_list, + result="success", + msg="", + found_anchor=query_info.found_anchor, + found_oldest=query_info.found_oldest, + found_newest=query_info.found_newest, + history_limited=query_info.history_limited, + anchor=anchor, + ) + return json_success(request, data=ret)