message_fetch: Add message_ids parameter to /messages request.

This allows us to fetch messages for a list of message ids in a
single request.
This commit is contained in:
Aman Agrawal 2024-10-02 04:17:20 +00:00 committed by Tim Abbott
parent c16459ca3c
commit 3f726e25e4
7 changed files with 302 additions and 88 deletions

View File

@ -20,6 +20,11 @@ format used by the Zulip server that they are interacting with.
## Changes in Zulip 10.0 ## Changes in Zulip 10.0
**Feature level 300**
* [`GET /messages`](/api/get-message): Added a new message_ids parameter,
as an alternative method of specifying which messages to fetch.
**Feature level 299** **Feature level 299**
* `PATCH /realm`, [`POST /register`](/api/register-queue), * `PATCH /realm`, [`POST /register`](/api/register-queue),

View File

@ -34,7 +34,7 @@ DESKTOP_WARNING_VERSION = "5.9.3"
# new level means in api_docs/changelog.md, as well as "**Changes**" # new level means in api_docs/changelog.md, as well as "**Changes**"
# entries in the endpoint's documentation in `zulip.yaml`. # entries in the endpoint's documentation in `zulip.yaml`.
API_FEATURE_LEVEL = 299 # Last bumped for org level group create/manage permissions. API_FEATURE_LEVEL = 300 # Last bumped for GET /messages API changes
# Bump the minor PROVISION_VERSION to indicate that folks should provision # Bump the minor PROVISION_VERSION to indicate that folks should provision
# only when going from an old version of the code to a newer version. Bump # only when going from an old version of the code to a newer version. Bump

View File

@ -1334,7 +1334,7 @@ def post_process_limited_query(
@dataclass @dataclass
class FetchedMessages(LimitedMessages[Row]): class FetchedMessages(LimitedMessages[Row]):
anchor: int anchor: int | None
include_history: bool include_history: bool
is_search: bool is_search: bool
@ -1349,6 +1349,7 @@ def fetch_messages(
include_anchor: bool, include_anchor: bool,
num_before: int, num_before: int,
num_after: int, num_after: int,
client_requested_message_ids: list[int] | None = None,
) -> FetchedMessages: ) -> FetchedMessages:
include_history = ok_to_include_history(narrow, user_profile, is_web_public_query) include_history = ok_to_include_history(narrow, user_profile, is_web_public_query)
if include_history: if include_history:
@ -1372,6 +1373,8 @@ def fetch_messages(
need_message = True need_message = True
need_user_message = True need_user_message = True
# get_base_query_for_search and ok_to_include_history are responsible for ensuring
# that we only include messages the user has access to.
query: SelectBase query: SelectBase
query, inner_msg_id_col = get_base_query_for_search( query, inner_msg_id_col = get_base_query_for_search(
realm_id=realm.id, realm_id=realm.id,
@ -1389,47 +1392,70 @@ def fetch_messages(
is_web_public_query=is_web_public_query, is_web_public_query=is_web_public_query,
) )
anchored_to_left = False
anchored_to_right = False
first_visible_message_id = get_first_visible_message_id(realm)
with get_sqlalchemy_connection() as sa_conn: with get_sqlalchemy_connection() as sa_conn:
if anchor is None: if client_requested_message_ids is not None:
# `anchor=None` corresponds to the anchor="first_unread" parameter. query = query.filter(inner_msg_id_col.in_(client_requested_message_ids))
anchor = find_first_unread_anchor( else:
sa_conn, if anchor is None:
user_profile, # `anchor=None` corresponds to the anchor="first_unread" parameter.
narrow, anchor = find_first_unread_anchor(
sa_conn,
user_profile,
narrow,
)
anchored_to_left = anchor == 0
# Set value that will be used to short circuit the after_query
# altogether and avoid needless conditions in the before_query.
anchored_to_right = anchor >= LARGER_THAN_MAX_MESSAGE_ID
if anchored_to_right:
num_after = 0
query = limit_query_to_range(
query=query,
num_before=num_before,
num_after=num_after,
anchor=anchor,
include_anchor=include_anchor,
anchored_to_left=anchored_to_left,
anchored_to_right=anchored_to_right,
id_col=inner_msg_id_col,
first_visible_message_id=first_visible_message_id,
) )
anchored_to_left = anchor == 0 main_query = query.subquery()
query = (
select(*main_query.c)
.select_from(main_query)
.order_by(column("message_id", Integer).asc())
)
# Set value that will be used to short circuit the after_query
# altogether and avoid needless conditions in the before_query.
anchored_to_right = anchor >= LARGER_THAN_MAX_MESSAGE_ID
if anchored_to_right:
num_after = 0
first_visible_message_id = get_first_visible_message_id(realm)
query = limit_query_to_range(
query=query,
num_before=num_before,
num_after=num_after,
anchor=anchor,
include_anchor=include_anchor,
anchored_to_left=anchored_to_left,
anchored_to_right=anchored_to_right,
id_col=inner_msg_id_col,
first_visible_message_id=first_visible_message_id,
)
main_query = query.subquery()
query = (
select(*main_query.c)
.select_from(main_query)
.order_by(column("message_id", Integer).asc())
)
# This is a hack to tag the query we use for testing # This is a hack to tag the query we use for testing
query = query.prefix_with("/* get_messages */") query = query.prefix_with("/* get_messages */")
rows = list(sa_conn.execute(query).fetchall()) rows = list(sa_conn.execute(query).fetchall())
if client_requested_message_ids is not None:
# We don't need to do any post-processing in this case.
if first_visible_message_id > 0:
visible_rows = [r for r in rows if r[0] >= first_visible_message_id]
else:
visible_rows = rows
return FetchedMessages(
rows=visible_rows,
found_anchor=False,
found_newest=False,
found_oldest=False,
history_limited=False,
anchor=None,
include_history=include_history,
is_search=is_search,
)
assert anchor is not None
query_info = post_process_limited_query( query_info = post_process_limited_query(
rows=rows, rows=rows,
num_before=num_before, num_before=num_before,

View File

@ -6479,33 +6479,37 @@ paths:
summary: Get messages summary: Get messages
tags: ["messages"] tags: ["messages"]
description: | description: |
Fetch user's message history from a Zulip server. This endpoint is the primary way to fetch a messages. It is used by all official
Zulip clients (e.g. the web, desktop, mobile, and terminal clients) as well as
many bots, API clients, backup scripts, etc.
This endpoint is the primary way to fetch a user's message history Most queries will specify a [narrow filter](/api/get-messages#parameter-narrow),
from a Zulip server. It is useful both for Zulip clients (e.g. the to fetch the messages matching any supported [search
web, desktop, mobile, and terminal clients) as well as bots, API query](/help/search-for-messages). If not specified, it will return messages
clients, backup scripts, etc. corresponding to the user's [combined feed](/help/combined-feed). There are two
ways to specify which messages matching the narrow filter to fetch:
- A range of messages, described by an `anchor` message ID (or a string-format
specification of how the server should computer an anchor to use) and a maximum
number of messages in each direction from that anchor.
- A rarely used variant (`message_ids`) where the client specifies the message IDs
to fetch.
The server returns the matching messages, sorted by message ID, as well as some
metadata that makes it easy for a client to determine whether there are more
messages matching the query that were not returned due to the `num_before` and
`num_after` limits.
Note that a user's message history does not contain messages sent to Note that a user's message history does not contain messages sent to
channels before they [subscribe](/api/subscribe), and newly created channels before they [subscribe](/api/subscribe), and newly created
bot users are not usually subscribed to any channels. bot users are not usually subscribed to any channels.
By specifying a [narrow filter](/api/get-messages#parameter-narrow), We recommend requesting at most 1000 messages in a batch, to avoid generating very
you can use this endpoint to fetch the messages matching any search large HTTP responses. A maximum of 5000 messages can be obtained per request;
query that is supported by Zulip's powerful full-text search backend. attempting to exceed this will result in an error.
In either case, you specify an `anchor` message (or ask the server to **Changes**: The `message_ids` option is new in Zulip 10.0 (feature level 300).
calculate the first unread message for you and use that as the
anchor), as well as a number of messages before and after the anchor
message. The server returns those messages, sorted by message ID, as
well as some metadata that makes it easy for a client to determine
whether there are more messages matching the query that were not
returned due to the `num_before` and `num_after` limits.
We recommend setting `num_before` and `num_after` to no more than 1000, to
avoid generating very large HTTP responses. A maximum of 5000 messages
can be obtained per request; attempting to exceed this will result in an
error.
x-curl-examples-parameters: x-curl-examples-parameters:
oneOf: oneOf:
- type: exclude - type: exclude
@ -6515,6 +6519,7 @@ paths:
- apply_markdown - apply_markdown
- use_first_unread_anchor - use_first_unread_anchor
- include_anchor - include_anchor
- message_ids
parameters: parameters:
- name: anchor - name: anchor
in: query in: query
@ -6554,20 +6559,22 @@ paths:
in: query in: query
description: | description: |
The number of messages with IDs less than the anchor to retrieve. The number of messages with IDs less than the anchor to retrieve.
Required if `message_ids` is not provided.
schema: schema:
type: integer type: integer
minimum: 0 minimum: 0
example: 4 example: 4
required: true required: false
- name: num_after - name: num_after
in: query in: query
description: | description: |
The number of messages with IDs greater than the anchor to retrieve. The number of messages with IDs greater than the anchor to retrieve.
Required if `message_ids` is not provided.
schema: schema:
type: integer type: integer
minimum: 0 minimum: 0
example: 8 example: 8
required: true required: false
- name: narrow - name: narrow
in: query in: query
description: | description: |
@ -6648,6 +6655,26 @@ paths:
type: boolean type: boolean
default: false default: false
example: true example: true
- name: message_ids
in: query
description: |
A list of message IDs to fetch. The server will return messages corresponding to the
subset of the requested message IDs that exist and the current user has access to,
potentially filtered by the narrow (if that parameter is provided).
It is an error to pass this parameter as well as any of the parameters involved in
specifying a range of messages: `anchor`, `include_anchor`, `use_first_unread_anchor`,
`num_before`, and `num_after`.
**Changes**: New in Zulip 10.0 (feature level 300). Previously, there was
no way to request a specific set of messages IDs.
content:
application/json:
schema:
type: array
items:
type: integer
example: [1, 2, 3]
responses: responses:
"200": "200":
description: Success. description: Success.
@ -6657,6 +6684,10 @@ paths:
allOf: allOf:
- $ref: "#/components/schemas/JsonSuccessBase" - $ref: "#/components/schemas/JsonSuccessBase"
- additionalProperties: false - additionalProperties: false
required:
- result
- msg
- messages
properties: properties:
result: {} result: {}
msg: {} msg: {}
@ -6666,6 +6697,8 @@ paths:
description: | description: |
The same `anchor` specified in the request (or the computed one, if The same `anchor` specified in the request (or the computed one, if
`use_first_unread_anchor` is `true`). `use_first_unread_anchor` is `true`).
Only present if `message_ids` is not provided.
found_newest: found_newest:
type: boolean type: boolean
description: | description: |

View File

@ -2085,6 +2085,121 @@ class GetOldMessagesTest(ZulipTestCase):
result, "Invalid narrow operator: unknown web-public channel Scotland", status_code=400 result, "Invalid narrow operator: unknown web-public channel Scotland", status_code=400
) )
def test_get_message_ids(self) -> None:
self.login("iago")
self.subscribe(self.example_user("iago"), "Verona")
msg1 = self.send_stream_message(self.example_user("iago"), "Verona")
msg2 = self.send_stream_message(self.example_user("iago"), "Verona")
result = self.client_get(
"/json/messages",
{
"message_ids": orjson.dumps([msg1, msg2]).decode(),
},
)
self.assert_json_success(result)
messages = orjson.loads(result.content)["messages"]
self.assert_length(messages, 2)
fetched_message_ids = [message["id"] for message in messages]
self.assertEqual(fetched_message_ids.sort(), [msg1, msg2].sort())
def test_get_message_ids_web_public(self) -> None:
self.login("iago")
self.subscribe(self.example_user("iago"), "Rome")
self.logout()
msg1 = self.send_stream_message(self.example_user("iago"), "Rome")
msg2 = self.send_stream_message(self.example_user("iago"), "Rome")
result = self.client_get(
"/json/messages",
{
"message_ids": orjson.dumps([msg1, msg2]).decode(),
"narrow": orjson.dumps([dict(operator="channels", operand="web-public")]).decode(),
},
)
self.assert_json_success(result)
messages = orjson.loads(result.content)["messages"]
self.assert_length(messages, 2)
fetched_message_ids = [message["id"] for message in messages]
self.assertEqual(fetched_message_ids.sort(), [msg1, msg2].sort())
def test_message_fetch_with_mutually_exclusive_parameters(self) -> None:
mutually_exclusive_params_with_message_ids = ["num_before", "num_after", "anchor"]
for param in mutually_exclusive_params_with_message_ids:
result = self.client_get(
"/json/messages",
{
"message_ids": orjson.dumps([1, 2]).decode(),
param: 1,
},
)
error_msg = "Unsupported parameter combination: num_before, num_after, anchor, message_ids, include_anchor, use_first_unread_anchor"
self.assert_json_error(result, error_msg)
def test_message_fetch_for_inaccessible_message_ids(self) -> None:
# Add new channels
realm = get_realm("zulip")
channel_dicts: list[StreamDict] = [
{
"name": "private-channel",
"description": "Private channel with non-public history",
"invite_only": True,
},
{
"name": "private-channel-with-history",
"description": "Private channel with public history",
"invite_only": True,
"history_public_to_subscribers": True,
},
]
create_streams_if_needed(realm, channel_dicts)
iago = self.example_user("iago")
self.login("iago")
message_ids = []
for stream_name in ["private-channel", "private-channel-with-history"]:
self.subscribe(iago, stream_name)
message_ids.append(self.send_stream_message(iago, stream_name))
self.logout()
self.login("hamlet")
result = self.client_get(
"/json/messages",
{
"message_ids": orjson.dumps(message_ids).decode(),
},
)
self.assert_json_success(result)
messages = orjson.loads(result.content)["messages"]
self.assert_length(messages, 0)
self.logout()
self.login("iago")
result = self.client_get(
"/json/messages",
{
"message_ids": orjson.dumps(message_ids).decode(),
},
)
self.assert_json_success(result)
messages = orjson.loads(result.content)["messages"]
self.assert_length(messages, 2)
# These messages are not accessible if they are after first_visible_message_id.
realm = get_realm("zulip")
realm.first_visible_message_id = max(message_ids) + 1
realm.save(update_fields=["first_visible_message_id"])
result = self.client_get(
"/json/messages",
{
"message_ids": orjson.dumps(message_ids).decode(),
},
)
self.assert_json_success(result)
messages = orjson.loads(result.content)["messages"]
self.assert_length(messages, 0)
def setup_web_public_test(self, num_web_public_message: int = 1) -> None: def setup_web_public_test(self, num_web_public_message: int = 1) -> None:
""" """
Send N+2 messages, N in a web-public channel, then one in a non-web-public channel Send N+2 messages, N in a web-public channel, then one in a non-web-public channel
@ -3682,20 +3797,6 @@ class GetOldMessagesTest(ZulipTestCase):
self.assertEqual(data["history_limited"], False) self.assertEqual(data["history_limited"], False)
messages_matches_ids(messages, message_ids[6:9]) messages_matches_ids(messages, message_ids[6:9])
def test_missing_params(self) -> None:
"""
anchor, num_before, and num_after are all required
POST parameters for get_messages.
"""
self.login("hamlet")
required_args: tuple[tuple[str, int], ...] = (("num_before", 1), ("num_after", 1))
for i in range(len(required_args)):
post_params = dict(required_args[:i] + required_args[i + 1 :])
result = self.client_get("/json/messages", post_params)
self.assert_json_error(result, f"Missing '{required_args[i][0]}' argument")
def test_get_messages_limits(self) -> None: def test_get_messages_limits(self) -> None:
""" """
A call to GET /json/messages requesting more than A call to GET /json/messages requesting more than

View File

@ -789,7 +789,9 @@ class TestCurlExampleGeneration(ZulipTestCase):
self.curl_example("/endpoint", "BREW") # see: HTCPCP self.curl_example("/endpoint", "BREW") # see: HTCPCP
def test_generate_and_render_curl_with_array_example(self) -> None: def test_generate_and_render_curl_with_array_example(self) -> None:
generated_curl_example = self.curl_example("/messages", "GET", exclude=["use_first_unread_anchor"]) generated_curl_example = self.curl_example(
"/messages", "GET", exclude=["use_first_unread_anchor", "message_ids"]
)
expected_curl_example = [ expected_curl_example = [
"```curl", "```curl",
"curl -sSX GET -G http://localhost:9991/api/v1/messages \\", "curl -sSX GET -G http://localhost:9991/api/v1/messages \\",
@ -862,7 +864,9 @@ class TestCurlExampleGeneration(ZulipTestCase):
def test_generate_and_render_curl_example_with_excludes(self) -> None: def test_generate_and_render_curl_example_with_excludes(self) -> None:
generated_curl_example = self.curl_example( generated_curl_example = self.curl_example(
"/messages", "GET", exclude=["client_gravatar", "apply_markdown", "use_first_unread_anchor"] "/messages",
"GET",
exclude=["client_gravatar", "apply_markdown", "use_first_unread_anchor", "message_ids"],
) )
expected_curl_example = [ expected_curl_example = [
"```curl", "```curl",

View File

@ -12,7 +12,11 @@ from sqlalchemy.sql import and_, column, join, literal, literal_column, select,
from sqlalchemy.types import Integer, Text from sqlalchemy.types import Integer, Text
from zerver.context_processors import get_valid_realm_from_request from zerver.context_processors import get_valid_realm_from_request
from zerver.lib.exceptions import JsonableError, MissingAuthenticationError from zerver.lib.exceptions import (
IncompatibleParametersError,
JsonableError,
MissingAuthenticationError,
)
from zerver.lib.message import get_first_visible_message_id, messages_for_ids from zerver.lib.message import get_first_visible_message_id, messages_for_ids
from zerver.lib.narrow import ( from zerver.lib.narrow import (
NarrowParameter, NarrowParameter,
@ -106,19 +110,47 @@ def get_messages_backend(
*, *,
anchor_val: Annotated[str | None, ApiParamConfig("anchor")] = None, anchor_val: Annotated[str | None, ApiParamConfig("anchor")] = None,
include_anchor: Json[bool] = True, include_anchor: Json[bool] = True,
num_before: Json[NonNegativeInt], num_before: Json[NonNegativeInt] = 0,
num_after: Json[NonNegativeInt], num_after: Json[NonNegativeInt] = 0,
narrow: Json[list[NarrowParameter] | None] = None, narrow: Json[list[NarrowParameter] | None] = None,
use_first_unread_anchor_val: Annotated[ use_first_unread_anchor_val: Annotated[
Json[bool], ApiParamConfig("use_first_unread_anchor") Json[bool], ApiParamConfig("use_first_unread_anchor")
] = False, ] = False,
client_gravatar: Json[bool] = True, client_gravatar: Json[bool] = True,
apply_markdown: Json[bool] = True, apply_markdown: Json[bool] = True,
client_requested_message_ids: Annotated[
Json[list[NonNegativeInt] | None], ApiParamConfig("message_ids")
] = None,
) -> HttpResponse: ) -> HttpResponse:
# User has to either provide message_ids or both num_before and num_after.
if (
num_before or num_after or anchor_val is not None or use_first_unread_anchor_val
) and client_requested_message_ids is not None:
raise IncompatibleParametersError(
[
"num_before",
"num_after",
"anchor",
"message_ids",
"include_anchor",
"use_first_unread_anchor",
]
)
elif client_requested_message_ids is not None:
include_anchor = False
anchor = None
if client_requested_message_ids is None:
anchor = parse_anchor_value(anchor_val, use_first_unread_anchor_val)
realm = get_valid_realm_from_request(request) realm = get_valid_realm_from_request(request)
anchor = parse_anchor_value(anchor_val, use_first_unread_anchor_val)
narrow = update_narrow_terms_containing_with_operator(realm, maybe_user_profile, narrow) narrow = update_narrow_terms_containing_with_operator(realm, maybe_user_profile, narrow)
if num_before + num_after > MAX_MESSAGES_PER_FETCH:
num_of_messages_requested = num_before + num_after
if client_requested_message_ids is not None:
num_of_messages_requested = len(client_requested_message_ids)
if num_of_messages_requested > MAX_MESSAGES_PER_FETCH:
raise JsonableError( raise JsonableError(
_("Too many messages requested (maximum {max_messages}).").format( _("Too many messages requested (maximum {max_messages}).").format(
max_messages=MAX_MESSAGES_PER_FETCH, max_messages=MAX_MESSAGES_PER_FETCH,
@ -212,6 +244,7 @@ def get_messages_backend(
include_anchor=include_anchor, include_anchor=include_anchor,
num_before=num_before, num_before=num_before,
num_after=num_after, num_after=num_after,
client_requested_message_ids=client_requested_message_ids,
) )
anchor = query_info.anchor anchor = query_info.anchor
@ -274,16 +307,28 @@ def get_messages_backend(
realm=realm, realm=realm,
) )
ret = dict( if client_requested_message_ids is not None:
messages=message_list, ret = dict(
result="success", messages=message_list,
msg="", result="success",
found_anchor=query_info.found_anchor, msg="",
found_oldest=query_info.found_oldest, history_limited=query_info.history_limited,
found_newest=query_info.found_newest, found_anchor=False,
history_limited=query_info.history_limited, found_oldest=False,
anchor=anchor, found_newest=False,
) )
else:
ret = dict(
messages=message_list,
result="success",
msg="",
found_anchor=query_info.found_anchor,
found_oldest=query_info.found_oldest,
found_newest=query_info.found_newest,
history_limited=query_info.history_limited,
anchor=anchor,
)
return json_success(request, data=ret) return json_success(request, data=ret)