2022-10-21 19:29:15 +02:00
|
|
|
from typing import Dict, Iterable, List, Optional, Tuple, Union
|
2020-06-11 00:54:34 +02:00
|
|
|
|
2024-04-10 19:04:52 +02:00
|
|
|
from django.conf import settings
|
2020-08-04 19:33:43 +02:00
|
|
|
from django.contrib.auth.models import AnonymousUser
|
2024-04-10 19:04:52 +02:00
|
|
|
from django.db import connection, transaction
|
2016-06-06 00:32:39 +02:00
|
|
|
from django.http import HttpRequest, HttpResponse
|
2023-08-16 02:51:03 +02:00
|
|
|
from django.utils.html import escape as escape_html
|
2021-04-16 00:57:30 +02:00
|
|
|
from django.utils.translation import gettext as _
|
2024-06-09 14:52:19 +02:00
|
|
|
from pydantic import Json, NonNegativeInt
|
2022-10-21 19:29:15 +02:00
|
|
|
from sqlalchemy.sql import and_, column, join, literal, literal_column, select, table
|
2022-09-09 02:20:21 +02:00
|
|
|
from sqlalchemy.types import Integer, Text
|
2024-06-09 14:52:19 +02:00
|
|
|
from typing_extensions import Annotated
|
2020-06-11 00:54:34 +02:00
|
|
|
|
2020-09-01 13:56:15 +02:00
|
|
|
from zerver.context_processors import get_valid_realm_from_request
|
2022-09-09 02:20:21 +02:00
|
|
|
from zerver.lib.exceptions import JsonableError, MissingAuthenticationError
|
2020-06-22 23:25:37 +02:00
|
|
|
from zerver.lib.message import get_first_visible_message_id, messages_for_ids
|
2022-09-09 02:20:21 +02:00
|
|
|
from zerver.lib.narrow import (
|
2024-06-09 14:52:19 +02:00
|
|
|
NarrowParameter,
|
2022-09-09 02:20:21 +02:00
|
|
|
OptionalNarrowListT,
|
2023-08-05 23:13:30 +02:00
|
|
|
add_narrow_conditions,
|
2022-11-10 00:35:52 +01:00
|
|
|
fetch_messages,
|
2022-09-09 02:20:21 +02:00
|
|
|
is_spectator_compatible,
|
|
|
|
is_web_public_narrow,
|
2022-10-19 04:19:19 +02:00
|
|
|
parse_anchor_value,
|
2022-09-09 02:20:21 +02:00
|
|
|
)
|
2024-06-09 14:52:19 +02:00
|
|
|
from zerver.lib.request import RequestNotes
|
2021-06-30 18:35:50 +02:00
|
|
|
from zerver.lib.response import json_success
|
2016-07-19 08:12:35 +02:00
|
|
|
from zerver.lib.sqlalchemy_utils import get_sqlalchemy_connection
|
2024-04-15 21:40:37 +02:00
|
|
|
from zerver.lib.topic import DB_TOPIC_NAME, MATCH_TOPIC
|
|
|
|
from zerver.lib.topic_sqlalchemy import topic_column_sa
|
2024-06-09 14:52:19 +02:00
|
|
|
from zerver.lib.typed_endpoint import ApiParamConfig, typed_endpoint
|
2021-10-26 09:15:16 +02:00
|
|
|
from zerver.models import UserMessage, UserProfile
|
2016-06-24 02:26:09 +02:00
|
|
|
|
2018-09-09 14:54:52 +02:00
|
|
|
MAX_MESSAGES_PER_FETCH = 5000
|
2017-02-23 05:50:15 +01:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2018-04-24 03:47:28 +02:00
|
|
|
def highlight_string(text: str, locs: Iterable[Tuple[int, int]]) -> str:
|
2017-11-04 05:23:22 +01:00
|
|
|
highlight_start = '<span class="highlight">'
|
2021-02-12 08:20:45 +01:00
|
|
|
highlight_stop = "</span>"
|
2013-11-26 00:41:24 +01:00
|
|
|
pos = 0
|
2021-02-12 08:20:45 +01:00
|
|
|
result = ""
|
2017-04-06 15:59:56 +02:00
|
|
|
in_tag = False
|
2017-10-31 19:03:12 +01:00
|
|
|
|
2013-11-26 00:41:24 +01:00
|
|
|
for loc in locs:
|
|
|
|
(offset, length) = loc
|
2017-10-31 19:03:12 +01:00
|
|
|
|
|
|
|
prefix_start = pos
|
|
|
|
prefix_end = offset
|
|
|
|
match_start = offset
|
|
|
|
match_end = offset + length
|
|
|
|
|
2019-08-28 11:06:38 +02:00
|
|
|
prefix = text[prefix_start:prefix_end]
|
|
|
|
match = text[match_start:match_end]
|
2017-10-31 19:03:12 +01:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
for character in prefix + match:
|
2021-02-12 08:20:45 +01:00
|
|
|
if character == "<":
|
2017-04-06 15:59:56 +02:00
|
|
|
in_tag = True
|
2021-02-12 08:20:45 +01:00
|
|
|
elif character == ">":
|
2017-04-06 15:59:56 +02:00
|
|
|
in_tag = False
|
2022-02-12 03:39:06 +01:00
|
|
|
if in_tag:
|
2017-10-31 19:03:12 +01:00
|
|
|
result += prefix
|
|
|
|
result += match
|
2017-04-06 15:59:56 +02:00
|
|
|
else:
|
2017-10-31 19:03:12 +01:00
|
|
|
result += prefix
|
2017-04-06 15:59:56 +02:00
|
|
|
result += highlight_start
|
2017-10-31 19:03:12 +01:00
|
|
|
result += match
|
2017-04-06 15:59:56 +02:00
|
|
|
result += highlight_stop
|
2017-10-31 19:03:12 +01:00
|
|
|
pos = match_end
|
|
|
|
|
2019-08-28 11:06:38 +02:00
|
|
|
result += text[pos:]
|
2016-08-25 08:00:52 +02:00
|
|
|
return result
|
2013-11-26 00:41:24 +01:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
|
|
|
def get_search_fields(
|
|
|
|
rendered_content: str,
|
2023-08-16 02:51:03 +02:00
|
|
|
topic_name: str,
|
2021-02-12 08:19:30 +01:00
|
|
|
content_matches: Iterable[Tuple[int, int]],
|
|
|
|
topic_matches: Iterable[Tuple[int, int]],
|
|
|
|
) -> Dict[str, str]:
|
2018-11-09 17:25:57 +01:00
|
|
|
return {
|
2021-02-12 08:20:45 +01:00
|
|
|
"match_content": highlight_string(rendered_content, content_matches),
|
2023-08-16 02:51:03 +02:00
|
|
|
MATCH_TOPIC: highlight_string(escape_html(topic_name), topic_matches),
|
2018-11-09 17:25:57 +01:00
|
|
|
}
|
2013-12-12 18:36:32 +01:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2024-03-05 10:59:50 +01:00
|
|
|
def clean_narrow_for_web_public_api(narrow: OptionalNarrowListT) -> OptionalNarrowListT:
|
|
|
|
if narrow is None:
|
|
|
|
return None
|
|
|
|
|
|
|
|
# Remove {'operator': 'in', 'operand': 'home', 'negated': False} from narrow.
|
|
|
|
# This is to allow spectators to access all messages. The narrow should still pass
|
|
|
|
# is_web_public_narrow check after this change.
|
|
|
|
return [
|
|
|
|
term
|
|
|
|
for term in narrow
|
|
|
|
if not (term["operator"] == "in" and term["operand"] == "home" and not term["negated"])
|
|
|
|
]
|
|
|
|
|
|
|
|
|
2024-06-09 14:52:19 +02:00
|
|
|
@typed_endpoint
|
2021-02-12 08:19:30 +01:00
|
|
|
def get_messages_backend(
|
|
|
|
request: HttpRequest,
|
|
|
|
maybe_user_profile: Union[UserProfile, AnonymousUser],
|
2024-06-09 14:52:19 +02:00
|
|
|
*,
|
|
|
|
anchor_val: Annotated[Optional[str], ApiParamConfig("anchor")] = None,
|
|
|
|
include_anchor: Json[bool] = True,
|
|
|
|
num_before: Json[NonNegativeInt],
|
|
|
|
num_after: Json[NonNegativeInt],
|
|
|
|
narrow: Json[Optional[List[NarrowParameter]]] = None,
|
|
|
|
use_first_unread_anchor_val: Annotated[
|
|
|
|
Json[bool], ApiParamConfig("use_first_unread_anchor")
|
|
|
|
] = False,
|
|
|
|
client_gravatar: Json[bool] = True,
|
|
|
|
apply_markdown: Json[bool] = True,
|
2021-02-12 08:19:30 +01:00
|
|
|
) -> HttpResponse:
|
2020-01-29 03:29:15 +01:00
|
|
|
anchor = parse_anchor_value(anchor_val, use_first_unread_anchor_val)
|
2018-09-09 14:54:52 +02:00
|
|
|
if num_before + num_after > MAX_MESSAGES_PER_FETCH:
|
2021-06-30 18:35:50 +02:00
|
|
|
raise JsonableError(
|
2023-07-17 22:40:33 +02:00
|
|
|
_("Too many messages requested (maximum {max_messages}).").format(
|
|
|
|
max_messages=MAX_MESSAGES_PER_FETCH,
|
2021-02-12 08:19:30 +01:00
|
|
|
)
|
|
|
|
)
|
2022-11-11 03:32:09 +01:00
|
|
|
if num_before > 0 and num_after > 0 and not include_anchor:
|
|
|
|
raise JsonableError(_("The anchor can only be excluded at an end of the range"))
|
2014-02-13 16:24:06 +01:00
|
|
|
|
2024-06-09 14:52:19 +02:00
|
|
|
if narrow is not None and len(narrow) > 0:
|
|
|
|
narrow_parameter_list: OptionalNarrowListT = [x.model_dump() for x in narrow]
|
|
|
|
else:
|
|
|
|
narrow_parameter_list = None
|
|
|
|
|
2021-10-03 14:16:07 +02:00
|
|
|
realm = get_valid_realm_from_request(request)
|
2020-08-04 19:33:43 +02:00
|
|
|
if not maybe_user_profile.is_authenticated:
|
|
|
|
# If user is not authenticated, clients must include
|
|
|
|
# `streams:web-public` in their narrow query to indicate this
|
|
|
|
# is a web-public query. This helps differentiate between
|
|
|
|
# cases of web-public queries (where we should return the
|
|
|
|
# web-public results only) and clients with buggy
|
|
|
|
# authentication code (where we should return an auth error).
|
2020-10-07 13:56:30 +02:00
|
|
|
#
|
|
|
|
# GetOldMessagesTest.test_unauthenticated_* tests ensure
|
2023-06-19 16:42:11 +02:00
|
|
|
# that we are not leaking any secure data (direct messages and
|
2022-04-28 05:15:11 +02:00
|
|
|
# non-web-public stream messages) via this path.
|
2021-10-03 14:16:07 +02:00
|
|
|
if not realm.allow_web_public_streams_access():
|
2023-02-04 02:07:20 +01:00
|
|
|
raise MissingAuthenticationError
|
2024-06-09 14:52:19 +02:00
|
|
|
narrow_parameter_list = clean_narrow_for_web_public_api(narrow_parameter_list)
|
|
|
|
if not is_web_public_narrow(narrow_parameter_list):
|
2023-02-04 02:07:20 +01:00
|
|
|
raise MissingAuthenticationError
|
2024-06-09 14:52:19 +02:00
|
|
|
assert narrow_parameter_list is not None
|
|
|
|
if not is_spectator_compatible(narrow_parameter_list):
|
2023-02-04 02:07:20 +01:00
|
|
|
raise MissingAuthenticationError
|
2020-08-04 19:33:43 +02:00
|
|
|
|
|
|
|
# We use None to indicate unauthenticated requests as it's more
|
|
|
|
# readable than using AnonymousUser, and the lack of Django
|
|
|
|
# stubs means that mypy can't check AnonymousUser well.
|
|
|
|
user_profile: Optional[UserProfile] = None
|
|
|
|
is_web_public_query = True
|
|
|
|
else:
|
|
|
|
assert isinstance(maybe_user_profile, UserProfile)
|
|
|
|
user_profile = maybe_user_profile
|
|
|
|
assert user_profile is not None
|
|
|
|
is_web_public_query = False
|
|
|
|
|
|
|
|
assert realm is not None
|
|
|
|
|
2021-10-26 09:15:16 +02:00
|
|
|
if is_web_public_query:
|
|
|
|
# client_gravatar here is just the user-requested value. "finalize_payload" function
|
|
|
|
# is responsible for sending avatar_url based on each individual sender's
|
|
|
|
# email_address_visibility setting.
|
2019-02-05 07:12:37 +01:00
|
|
|
client_gravatar = False
|
|
|
|
|
2024-06-09 14:52:19 +02:00
|
|
|
if narrow_parameter_list is not None:
|
2013-12-12 22:50:49 +01:00
|
|
|
# Add some metadata to our logging data for narrows
|
2013-12-12 18:36:32 +01:00
|
|
|
verbose_operators = []
|
2024-06-09 14:52:19 +02:00
|
|
|
for term in narrow_parameter_list:
|
2021-02-12 08:20:45 +01:00
|
|
|
if term["operator"] == "is":
|
|
|
|
verbose_operators.append("is:" + term["operand"])
|
2013-12-12 18:36:32 +01:00
|
|
|
else:
|
2021-02-12 08:20:45 +01:00
|
|
|
verbose_operators.append(term["operator"])
|
2021-08-21 19:24:20 +02:00
|
|
|
log_data = RequestNotes.get_notes(request).log_data
|
2021-07-09 10:06:04 +02:00
|
|
|
assert log_data is not None
|
|
|
|
log_data["extra"] = "[{}]".format(",".join(verbose_operators))
|
2013-12-12 18:36:32 +01:00
|
|
|
|
2024-04-10 19:04:52 +02:00
|
|
|
with transaction.atomic(durable=True):
|
|
|
|
# We're about to perform a search, and then get results from
|
|
|
|
# it; this is done across multiple queries. To prevent race
|
|
|
|
# conditions, we want the messages returned to be consistent
|
|
|
|
# with the version of the messages that was searched, to
|
|
|
|
# prevent changes which happened between them from leaking to
|
|
|
|
# clients who should not be able to see the new values, and
|
|
|
|
# when messages are deleted in between. We set up
|
|
|
|
# repeatable-read isolation for this transaction, so that we
|
|
|
|
# prevent both phantom reads and non-repeatable reads.
|
|
|
|
#
|
|
|
|
# In a read-only repeatable-read transaction, it is not
|
|
|
|
# possible to encounter deadlocks or need retries due to
|
|
|
|
# serialization errors.
|
|
|
|
#
|
|
|
|
# You can only set the isolation level before any queries in
|
|
|
|
# the transaction, meaning it must be the top-most
|
|
|
|
# transaction, which durable=True establishes. Except in
|
|
|
|
# tests, where durable=True is a lie, because there is an
|
|
|
|
# outer transaction for each test. We thus skip this command
|
|
|
|
# in tests, since it would fail.
|
|
|
|
if not settings.TEST_SUITE: # nocoverage
|
|
|
|
cursor = connection.cursor()
|
|
|
|
cursor.execute("SET TRANSACTION ISOLATION LEVEL REPEATABLE READ READ ONLY")
|
|
|
|
|
|
|
|
query_info = fetch_messages(
|
2024-06-09 14:52:19 +02:00
|
|
|
narrow=narrow_parameter_list,
|
2024-04-10 19:04:52 +02:00
|
|
|
user_profile=user_profile,
|
|
|
|
realm=realm,
|
|
|
|
is_web_public_query=is_web_public_query,
|
|
|
|
anchor=anchor,
|
|
|
|
include_anchor=include_anchor,
|
|
|
|
num_before=num_before,
|
|
|
|
num_after=num_after,
|
|
|
|
)
|
2017-11-07 16:18:42 +01:00
|
|
|
|
2024-04-10 19:04:52 +02:00
|
|
|
anchor = query_info.anchor
|
|
|
|
include_history = query_info.include_history
|
|
|
|
is_search = query_info.is_search
|
|
|
|
rows = query_info.rows
|
|
|
|
|
|
|
|
# The following is a little messy, but ensures that the code paths
|
|
|
|
# are similar regardless of the value of include_history. The
|
|
|
|
# 'user_messages' dictionary maps each message to the user's
|
|
|
|
# UserMessage object for that message, which we will attach to the
|
|
|
|
# rendered message dict before returning it. We attempt to
|
|
|
|
# bulk-fetch rendered message dicts from remote cache using the
|
|
|
|
# 'messages' list.
|
|
|
|
message_ids: List[int] = []
|
|
|
|
user_message_flags: Dict[int, List[str]] = {}
|
|
|
|
if is_web_public_query:
|
|
|
|
# For spectators, we treat all historical messages as read.
|
|
|
|
for row in rows:
|
|
|
|
message_id = row[0]
|
|
|
|
message_ids.append(message_id)
|
|
|
|
user_message_flags[message_id] = ["read"]
|
|
|
|
elif include_history:
|
|
|
|
assert user_profile is not None
|
|
|
|
message_ids = [row[0] for row in rows]
|
|
|
|
|
|
|
|
# TODO: This could be done with an outer join instead of two queries
|
|
|
|
um_rows = UserMessage.objects.filter(
|
|
|
|
user_profile=user_profile, message_id__in=message_ids
|
2022-09-08 03:13:07 +02:00
|
|
|
)
|
2024-04-10 19:04:52 +02:00
|
|
|
user_message_flags = {um.message_id: um.flags_list() for um in um_rows}
|
2013-12-12 18:36:32 +01:00
|
|
|
|
2024-04-10 19:04:52 +02:00
|
|
|
for message_id in message_ids:
|
|
|
|
if message_id not in user_message_flags:
|
|
|
|
user_message_flags[message_id] = ["read", "historical"]
|
|
|
|
else:
|
|
|
|
for row in rows:
|
|
|
|
message_id = row[0]
|
|
|
|
flags = row[1]
|
|
|
|
user_message_flags[message_id] = UserMessage.flags_list_for_flags(flags)
|
|
|
|
message_ids.append(message_id)
|
|
|
|
|
|
|
|
search_fields: Dict[int, Dict[str, str]] = {}
|
|
|
|
if is_search:
|
|
|
|
for row in rows:
|
|
|
|
message_id = row[0]
|
|
|
|
(topic_name, rendered_content, content_matches, topic_matches) = row[-4:]
|
|
|
|
search_fields[message_id] = get_search_fields(
|
|
|
|
rendered_content, topic_name, content_matches, topic_matches
|
|
|
|
)
|
|
|
|
|
|
|
|
message_list = messages_for_ids(
|
|
|
|
message_ids=message_ids,
|
|
|
|
user_message_flags=user_message_flags,
|
|
|
|
search_fields=search_fields,
|
|
|
|
apply_markdown=apply_markdown,
|
|
|
|
client_gravatar=client_gravatar,
|
|
|
|
allow_edit_history=realm.allow_edit_history,
|
|
|
|
user_profile=user_profile,
|
|
|
|
realm=realm,
|
|
|
|
)
|
2017-10-10 09:22:21 +02:00
|
|
|
|
2018-03-15 11:43:51 +01:00
|
|
|
ret = dict(
|
|
|
|
messages=message_list,
|
2021-02-12 08:20:45 +01:00
|
|
|
result="success",
|
|
|
|
msg="",
|
2022-11-10 00:52:13 +01:00
|
|
|
found_anchor=query_info.found_anchor,
|
|
|
|
found_oldest=query_info.found_oldest,
|
|
|
|
found_newest=query_info.found_newest,
|
|
|
|
history_limited=query_info.history_limited,
|
2018-03-15 11:43:51 +01:00
|
|
|
anchor=anchor,
|
|
|
|
)
|
2022-01-31 13:44:02 +01:00
|
|
|
return json_success(request, data=ret)
|
2013-12-12 18:36:32 +01:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2024-06-09 14:52:19 +02:00
|
|
|
@typed_endpoint
|
2021-02-12 08:19:30 +01:00
|
|
|
def messages_in_narrow_backend(
|
|
|
|
request: HttpRequest,
|
|
|
|
user_profile: UserProfile,
|
2024-06-09 14:52:19 +02:00
|
|
|
*,
|
|
|
|
msg_ids: Json[List[int]],
|
|
|
|
narrow: Json[List[NarrowParameter]],
|
2021-02-12 08:19:30 +01:00
|
|
|
) -> HttpResponse:
|
2024-06-09 14:52:19 +02:00
|
|
|
narrow_parameter_list: OptionalNarrowListT = [x.model_dump() for x in narrow]
|
|
|
|
|
2018-01-02 18:33:28 +01:00
|
|
|
first_visible_message_id = get_first_visible_message_id(user_profile.realm)
|
|
|
|
msg_ids = [message_id for message_id in msg_ids if message_id >= first_visible_message_id]
|
2017-06-30 02:24:05 +02:00
|
|
|
# This query is limited to messages the user has access to because they
|
|
|
|
# actually received them, as reflected in `zerver_usermessage`.
|
2022-02-10 04:13:15 +01:00
|
|
|
query = (
|
2023-08-05 23:13:30 +02:00
|
|
|
select(column("message_id", Integer))
|
2022-02-10 04:13:15 +01:00
|
|
|
.where(
|
|
|
|
and_(
|
|
|
|
column("user_profile_id", Integer) == literal(user_profile.id),
|
|
|
|
column("message_id", Integer).in_(msg_ids),
|
|
|
|
)
|
|
|
|
)
|
|
|
|
.select_from(
|
|
|
|
join(
|
|
|
|
table("zerver_usermessage"),
|
|
|
|
table("zerver_message"),
|
|
|
|
literal_column("zerver_usermessage.message_id", Integer)
|
|
|
|
== literal_column("zerver_message.id", Integer),
|
|
|
|
)
|
|
|
|
)
|
2021-02-12 08:19:30 +01:00
|
|
|
)
|
2013-12-10 23:32:29 +01:00
|
|
|
|
2023-08-05 23:13:30 +02:00
|
|
|
inner_msg_id_col = column("message_id", Integer)
|
|
|
|
query, is_search = add_narrow_conditions(
|
|
|
|
user_profile=user_profile,
|
|
|
|
inner_msg_id_col=inner_msg_id_col,
|
|
|
|
query=query,
|
2024-06-09 14:52:19 +02:00
|
|
|
narrow=narrow_parameter_list,
|
2023-08-05 23:13:30 +02:00
|
|
|
is_web_public_query=False,
|
|
|
|
realm=user_profile.realm,
|
|
|
|
)
|
|
|
|
|
|
|
|
if not is_search:
|
|
|
|
# `add_narrow_conditions` adds the following columns only if narrow has search operands.
|
|
|
|
query = query.add_columns(topic_column_sa(), column("rendered_content", Text))
|
2013-12-12 18:36:32 +01:00
|
|
|
|
2020-09-02 08:14:51 +02:00
|
|
|
search_fields = {}
|
2022-02-10 04:59:48 +01:00
|
|
|
with get_sqlalchemy_connection() as sa_conn:
|
2023-08-05 23:13:30 +02:00
|
|
|
for row in sa_conn.execute(query).mappings():
|
|
|
|
message_id = row["message_id"]
|
|
|
|
topic_name: str = row[DB_TOPIC_NAME]
|
|
|
|
rendered_content: str = row["rendered_content"]
|
|
|
|
content_matches = row.get("content_matches", [])
|
|
|
|
topic_matches = row.get("topic_matches", [])
|
2022-02-10 04:59:48 +01:00
|
|
|
search_fields[str(message_id)] = get_search_fields(
|
|
|
|
rendered_content,
|
|
|
|
topic_name,
|
|
|
|
content_matches,
|
|
|
|
topic_matches,
|
|
|
|
)
|
2013-12-10 23:32:29 +01:00
|
|
|
|
2022-01-31 13:44:02 +01:00
|
|
|
return json_success(request, data={"messages": search_fields})
|