message_fetch: Add message_ids parameter to /messages request.

This allows us to fetch messages for a list of message ids in a
single request.
This commit is contained in:
Aman Agrawal 2024-10-02 04:17:20 +00:00 committed by Tim Abbott
parent c16459ca3c
commit 3f726e25e4
7 changed files with 302 additions and 88 deletions

View File

@ -20,6 +20,11 @@ format used by the Zulip server that they are interacting with.
## Changes in Zulip 10.0
**Feature level 300**
* [`GET /messages`](/api/get-message): Added a new message_ids parameter,
as an alternative method of specifying which messages to fetch.
**Feature level 299**
* `PATCH /realm`, [`POST /register`](/api/register-queue),

View File

@ -34,7 +34,7 @@ DESKTOP_WARNING_VERSION = "5.9.3"
# new level means in api_docs/changelog.md, as well as "**Changes**"
# entries in the endpoint's documentation in `zulip.yaml`.
API_FEATURE_LEVEL = 299 # Last bumped for org level group create/manage permissions.
API_FEATURE_LEVEL = 300 # Last bumped for GET /messages API changes
# Bump the minor PROVISION_VERSION to indicate that folks should provision
# only when going from an old version of the code to a newer version. Bump

View File

@ -1334,7 +1334,7 @@ def post_process_limited_query(
@dataclass
class FetchedMessages(LimitedMessages[Row]):
anchor: int
anchor: int | None
include_history: bool
is_search: bool
@ -1349,6 +1349,7 @@ def fetch_messages(
include_anchor: bool,
num_before: int,
num_after: int,
client_requested_message_ids: list[int] | None = None,
) -> FetchedMessages:
include_history = ok_to_include_history(narrow, user_profile, is_web_public_query)
if include_history:
@ -1372,6 +1373,8 @@ def fetch_messages(
need_message = True
need_user_message = True
# get_base_query_for_search and ok_to_include_history are responsible for ensuring
# that we only include messages the user has access to.
query: SelectBase
query, inner_msg_id_col = get_base_query_for_search(
realm_id=realm.id,
@ -1389,47 +1392,70 @@ def fetch_messages(
is_web_public_query=is_web_public_query,
)
anchored_to_left = False
anchored_to_right = False
first_visible_message_id = get_first_visible_message_id(realm)
with get_sqlalchemy_connection() as sa_conn:
if anchor is None:
# `anchor=None` corresponds to the anchor="first_unread" parameter.
anchor = find_first_unread_anchor(
sa_conn,
user_profile,
narrow,
if client_requested_message_ids is not None:
query = query.filter(inner_msg_id_col.in_(client_requested_message_ids))
else:
if anchor is None:
# `anchor=None` corresponds to the anchor="first_unread" parameter.
anchor = find_first_unread_anchor(
sa_conn,
user_profile,
narrow,
)
anchored_to_left = anchor == 0
# Set value that will be used to short circuit the after_query
# altogether and avoid needless conditions in the before_query.
anchored_to_right = anchor >= LARGER_THAN_MAX_MESSAGE_ID
if anchored_to_right:
num_after = 0
query = limit_query_to_range(
query=query,
num_before=num_before,
num_after=num_after,
anchor=anchor,
include_anchor=include_anchor,
anchored_to_left=anchored_to_left,
anchored_to_right=anchored_to_right,
id_col=inner_msg_id_col,
first_visible_message_id=first_visible_message_id,
)
anchored_to_left = anchor == 0
main_query = query.subquery()
query = (
select(*main_query.c)
.select_from(main_query)
.order_by(column("message_id", Integer).asc())
)
# Set value that will be used to short circuit the after_query
# altogether and avoid needless conditions in the before_query.
anchored_to_right = anchor >= LARGER_THAN_MAX_MESSAGE_ID
if anchored_to_right:
num_after = 0
first_visible_message_id = get_first_visible_message_id(realm)
query = limit_query_to_range(
query=query,
num_before=num_before,
num_after=num_after,
anchor=anchor,
include_anchor=include_anchor,
anchored_to_left=anchored_to_left,
anchored_to_right=anchored_to_right,
id_col=inner_msg_id_col,
first_visible_message_id=first_visible_message_id,
)
main_query = query.subquery()
query = (
select(*main_query.c)
.select_from(main_query)
.order_by(column("message_id", Integer).asc())
)
# This is a hack to tag the query we use for testing
query = query.prefix_with("/* get_messages */")
rows = list(sa_conn.execute(query).fetchall())
if client_requested_message_ids is not None:
# We don't need to do any post-processing in this case.
if first_visible_message_id > 0:
visible_rows = [r for r in rows if r[0] >= first_visible_message_id]
else:
visible_rows = rows
return FetchedMessages(
rows=visible_rows,
found_anchor=False,
found_newest=False,
found_oldest=False,
history_limited=False,
anchor=None,
include_history=include_history,
is_search=is_search,
)
assert anchor is not None
query_info = post_process_limited_query(
rows=rows,
num_before=num_before,

View File

@ -6479,33 +6479,37 @@ paths:
summary: Get messages
tags: ["messages"]
description: |
Fetch user's message history from a Zulip server.
This endpoint is the primary way to fetch a messages. It is used by all official
Zulip clients (e.g. the web, desktop, mobile, and terminal clients) as well as
many bots, API clients, backup scripts, etc.
This endpoint is the primary way to fetch a user's message history
from a Zulip server. It is useful both for Zulip clients (e.g. the
web, desktop, mobile, and terminal clients) as well as bots, API
clients, backup scripts, etc.
Most queries will specify a [narrow filter](/api/get-messages#parameter-narrow),
to fetch the messages matching any supported [search
query](/help/search-for-messages). If not specified, it will return messages
corresponding to the user's [combined feed](/help/combined-feed). There are two
ways to specify which messages matching the narrow filter to fetch:
- A range of messages, described by an `anchor` message ID (or a string-format
specification of how the server should computer an anchor to use) and a maximum
number of messages in each direction from that anchor.
- A rarely used variant (`message_ids`) where the client specifies the message IDs
to fetch.
The server returns the matching messages, sorted by message ID, as well as some
metadata that makes it easy for a client to determine whether there are more
messages matching the query that were not returned due to the `num_before` and
`num_after` limits.
Note that a user's message history does not contain messages sent to
channels before they [subscribe](/api/subscribe), and newly created
bot users are not usually subscribed to any channels.
By specifying a [narrow filter](/api/get-messages#parameter-narrow),
you can use this endpoint to fetch the messages matching any search
query that is supported by Zulip's powerful full-text search backend.
We recommend requesting at most 1000 messages in a batch, to avoid generating very
large HTTP responses. A maximum of 5000 messages can be obtained per request;
attempting to exceed this will result in an error.
In either case, you specify an `anchor` message (or ask the server to
calculate the first unread message for you and use that as the
anchor), as well as a number of messages before and after the anchor
message. The server returns those messages, sorted by message ID, as
well as some metadata that makes it easy for a client to determine
whether there are more messages matching the query that were not
returned due to the `num_before` and `num_after` limits.
We recommend setting `num_before` and `num_after` to no more than 1000, to
avoid generating very large HTTP responses. A maximum of 5000 messages
can be obtained per request; attempting to exceed this will result in an
error.
**Changes**: The `message_ids` option is new in Zulip 10.0 (feature level 300).
x-curl-examples-parameters:
oneOf:
- type: exclude
@ -6515,6 +6519,7 @@ paths:
- apply_markdown
- use_first_unread_anchor
- include_anchor
- message_ids
parameters:
- name: anchor
in: query
@ -6554,20 +6559,22 @@ paths:
in: query
description: |
The number of messages with IDs less than the anchor to retrieve.
Required if `message_ids` is not provided.
schema:
type: integer
minimum: 0
example: 4
required: true
required: false
- name: num_after
in: query
description: |
The number of messages with IDs greater than the anchor to retrieve.
Required if `message_ids` is not provided.
schema:
type: integer
minimum: 0
example: 8
required: true
required: false
- name: narrow
in: query
description: |
@ -6648,6 +6655,26 @@ paths:
type: boolean
default: false
example: true
- name: message_ids
in: query
description: |
A list of message IDs to fetch. The server will return messages corresponding to the
subset of the requested message IDs that exist and the current user has access to,
potentially filtered by the narrow (if that parameter is provided).
It is an error to pass this parameter as well as any of the parameters involved in
specifying a range of messages: `anchor`, `include_anchor`, `use_first_unread_anchor`,
`num_before`, and `num_after`.
**Changes**: New in Zulip 10.0 (feature level 300). Previously, there was
no way to request a specific set of messages IDs.
content:
application/json:
schema:
type: array
items:
type: integer
example: [1, 2, 3]
responses:
"200":
description: Success.
@ -6657,6 +6684,10 @@ paths:
allOf:
- $ref: "#/components/schemas/JsonSuccessBase"
- additionalProperties: false
required:
- result
- msg
- messages
properties:
result: {}
msg: {}
@ -6666,6 +6697,8 @@ paths:
description: |
The same `anchor` specified in the request (or the computed one, if
`use_first_unread_anchor` is `true`).
Only present if `message_ids` is not provided.
found_newest:
type: boolean
description: |

View File

@ -2085,6 +2085,121 @@ class GetOldMessagesTest(ZulipTestCase):
result, "Invalid narrow operator: unknown web-public channel Scotland", status_code=400
)
def test_get_message_ids(self) -> None:
self.login("iago")
self.subscribe(self.example_user("iago"), "Verona")
msg1 = self.send_stream_message(self.example_user("iago"), "Verona")
msg2 = self.send_stream_message(self.example_user("iago"), "Verona")
result = self.client_get(
"/json/messages",
{
"message_ids": orjson.dumps([msg1, msg2]).decode(),
},
)
self.assert_json_success(result)
messages = orjson.loads(result.content)["messages"]
self.assert_length(messages, 2)
fetched_message_ids = [message["id"] for message in messages]
self.assertEqual(fetched_message_ids.sort(), [msg1, msg2].sort())
def test_get_message_ids_web_public(self) -> None:
self.login("iago")
self.subscribe(self.example_user("iago"), "Rome")
self.logout()
msg1 = self.send_stream_message(self.example_user("iago"), "Rome")
msg2 = self.send_stream_message(self.example_user("iago"), "Rome")
result = self.client_get(
"/json/messages",
{
"message_ids": orjson.dumps([msg1, msg2]).decode(),
"narrow": orjson.dumps([dict(operator="channels", operand="web-public")]).decode(),
},
)
self.assert_json_success(result)
messages = orjson.loads(result.content)["messages"]
self.assert_length(messages, 2)
fetched_message_ids = [message["id"] for message in messages]
self.assertEqual(fetched_message_ids.sort(), [msg1, msg2].sort())
def test_message_fetch_with_mutually_exclusive_parameters(self) -> None:
mutually_exclusive_params_with_message_ids = ["num_before", "num_after", "anchor"]
for param in mutually_exclusive_params_with_message_ids:
result = self.client_get(
"/json/messages",
{
"message_ids": orjson.dumps([1, 2]).decode(),
param: 1,
},
)
error_msg = "Unsupported parameter combination: num_before, num_after, anchor, message_ids, include_anchor, use_first_unread_anchor"
self.assert_json_error(result, error_msg)
def test_message_fetch_for_inaccessible_message_ids(self) -> None:
# Add new channels
realm = get_realm("zulip")
channel_dicts: list[StreamDict] = [
{
"name": "private-channel",
"description": "Private channel with non-public history",
"invite_only": True,
},
{
"name": "private-channel-with-history",
"description": "Private channel with public history",
"invite_only": True,
"history_public_to_subscribers": True,
},
]
create_streams_if_needed(realm, channel_dicts)
iago = self.example_user("iago")
self.login("iago")
message_ids = []
for stream_name in ["private-channel", "private-channel-with-history"]:
self.subscribe(iago, stream_name)
message_ids.append(self.send_stream_message(iago, stream_name))
self.logout()
self.login("hamlet")
result = self.client_get(
"/json/messages",
{
"message_ids": orjson.dumps(message_ids).decode(),
},
)
self.assert_json_success(result)
messages = orjson.loads(result.content)["messages"]
self.assert_length(messages, 0)
self.logout()
self.login("iago")
result = self.client_get(
"/json/messages",
{
"message_ids": orjson.dumps(message_ids).decode(),
},
)
self.assert_json_success(result)
messages = orjson.loads(result.content)["messages"]
self.assert_length(messages, 2)
# These messages are not accessible if they are after first_visible_message_id.
realm = get_realm("zulip")
realm.first_visible_message_id = max(message_ids) + 1
realm.save(update_fields=["first_visible_message_id"])
result = self.client_get(
"/json/messages",
{
"message_ids": orjson.dumps(message_ids).decode(),
},
)
self.assert_json_success(result)
messages = orjson.loads(result.content)["messages"]
self.assert_length(messages, 0)
def setup_web_public_test(self, num_web_public_message: int = 1) -> None:
"""
Send N+2 messages, N in a web-public channel, then one in a non-web-public channel
@ -3682,20 +3797,6 @@ class GetOldMessagesTest(ZulipTestCase):
self.assertEqual(data["history_limited"], False)
messages_matches_ids(messages, message_ids[6:9])
def test_missing_params(self) -> None:
"""
anchor, num_before, and num_after are all required
POST parameters for get_messages.
"""
self.login("hamlet")
required_args: tuple[tuple[str, int], ...] = (("num_before", 1), ("num_after", 1))
for i in range(len(required_args)):
post_params = dict(required_args[:i] + required_args[i + 1 :])
result = self.client_get("/json/messages", post_params)
self.assert_json_error(result, f"Missing '{required_args[i][0]}' argument")
def test_get_messages_limits(self) -> None:
"""
A call to GET /json/messages requesting more than

View File

@ -789,7 +789,9 @@ class TestCurlExampleGeneration(ZulipTestCase):
self.curl_example("/endpoint", "BREW") # see: HTCPCP
def test_generate_and_render_curl_with_array_example(self) -> None:
generated_curl_example = self.curl_example("/messages", "GET", exclude=["use_first_unread_anchor"])
generated_curl_example = self.curl_example(
"/messages", "GET", exclude=["use_first_unread_anchor", "message_ids"]
)
expected_curl_example = [
"```curl",
"curl -sSX GET -G http://localhost:9991/api/v1/messages \\",
@ -862,7 +864,9 @@ class TestCurlExampleGeneration(ZulipTestCase):
def test_generate_and_render_curl_example_with_excludes(self) -> None:
generated_curl_example = self.curl_example(
"/messages", "GET", exclude=["client_gravatar", "apply_markdown", "use_first_unread_anchor"]
"/messages",
"GET",
exclude=["client_gravatar", "apply_markdown", "use_first_unread_anchor", "message_ids"],
)
expected_curl_example = [
"```curl",

View File

@ -12,7 +12,11 @@ from sqlalchemy.sql import and_, column, join, literal, literal_column, select,
from sqlalchemy.types import Integer, Text
from zerver.context_processors import get_valid_realm_from_request
from zerver.lib.exceptions import JsonableError, MissingAuthenticationError
from zerver.lib.exceptions import (
IncompatibleParametersError,
JsonableError,
MissingAuthenticationError,
)
from zerver.lib.message import get_first_visible_message_id, messages_for_ids
from zerver.lib.narrow import (
NarrowParameter,
@ -106,19 +110,47 @@ def get_messages_backend(
*,
anchor_val: Annotated[str | None, ApiParamConfig("anchor")] = None,
include_anchor: Json[bool] = True,
num_before: Json[NonNegativeInt],
num_after: Json[NonNegativeInt],
num_before: Json[NonNegativeInt] = 0,
num_after: Json[NonNegativeInt] = 0,
narrow: Json[list[NarrowParameter] | None] = None,
use_first_unread_anchor_val: Annotated[
Json[bool], ApiParamConfig("use_first_unread_anchor")
] = False,
client_gravatar: Json[bool] = True,
apply_markdown: Json[bool] = True,
client_requested_message_ids: Annotated[
Json[list[NonNegativeInt] | None], ApiParamConfig("message_ids")
] = None,
) -> HttpResponse:
# User has to either provide message_ids or both num_before and num_after.
if (
num_before or num_after or anchor_val is not None or use_first_unread_anchor_val
) and client_requested_message_ids is not None:
raise IncompatibleParametersError(
[
"num_before",
"num_after",
"anchor",
"message_ids",
"include_anchor",
"use_first_unread_anchor",
]
)
elif client_requested_message_ids is not None:
include_anchor = False
anchor = None
if client_requested_message_ids is None:
anchor = parse_anchor_value(anchor_val, use_first_unread_anchor_val)
realm = get_valid_realm_from_request(request)
anchor = parse_anchor_value(anchor_val, use_first_unread_anchor_val)
narrow = update_narrow_terms_containing_with_operator(realm, maybe_user_profile, narrow)
if num_before + num_after > MAX_MESSAGES_PER_FETCH:
num_of_messages_requested = num_before + num_after
if client_requested_message_ids is not None:
num_of_messages_requested = len(client_requested_message_ids)
if num_of_messages_requested > MAX_MESSAGES_PER_FETCH:
raise JsonableError(
_("Too many messages requested (maximum {max_messages}).").format(
max_messages=MAX_MESSAGES_PER_FETCH,
@ -212,6 +244,7 @@ def get_messages_backend(
include_anchor=include_anchor,
num_before=num_before,
num_after=num_after,
client_requested_message_ids=client_requested_message_ids,
)
anchor = query_info.anchor
@ -274,16 +307,28 @@ def get_messages_backend(
realm=realm,
)
ret = dict(
messages=message_list,
result="success",
msg="",
found_anchor=query_info.found_anchor,
found_oldest=query_info.found_oldest,
found_newest=query_info.found_newest,
history_limited=query_info.history_limited,
anchor=anchor,
)
if client_requested_message_ids is not None:
ret = dict(
messages=message_list,
result="success",
msg="",
history_limited=query_info.history_limited,
found_anchor=False,
found_oldest=False,
found_newest=False,
)
else:
ret = dict(
messages=message_list,
result="success",
msg="",
found_anchor=query_info.found_anchor,
found_oldest=query_info.found_oldest,
found_newest=query_info.found_newest,
history_limited=query_info.history_limited,
anchor=anchor,
)
return json_success(request, data=ret)