2022-09-09 02:20:21 +02:00
|
|
|
from typing import Any, Dict, Iterable, List, Optional, Sequence, Tuple, Union
|
2020-06-11 00:54:34 +02:00
|
|
|
|
2020-08-04 19:33:43 +02:00
|
|
|
from django.contrib.auth.models import AnonymousUser
|
2016-06-06 00:32:39 +02:00
|
|
|
from django.http import HttpRequest, HttpResponse
|
2013-11-26 00:41:24 +01:00
|
|
|
from django.utils.html import escape as escape_html
|
2021-04-16 00:57:30 +02:00
|
|
|
from django.utils.translation import gettext as _
|
2021-08-21 01:07:28 +02:00
|
|
|
from sqlalchemy.engine import Connection, Row
|
2020-06-11 00:54:34 +02:00
|
|
|
from sqlalchemy.sql import (
|
|
|
|
ColumnElement,
|
2020-11-16 22:52:27 +01:00
|
|
|
Select,
|
2020-06-11 00:54:34 +02:00
|
|
|
and_,
|
|
|
|
column,
|
|
|
|
join,
|
|
|
|
literal,
|
|
|
|
literal_column,
|
|
|
|
select,
|
|
|
|
table,
|
|
|
|
union_all,
|
|
|
|
)
|
2022-02-10 03:15:46 +01:00
|
|
|
from sqlalchemy.sql.selectable import SelectBase
|
2022-09-09 02:20:21 +02:00
|
|
|
from sqlalchemy.types import Integer, Text
|
2020-06-11 00:54:34 +02:00
|
|
|
|
2020-09-01 13:56:15 +02:00
|
|
|
from zerver.context_processors import get_valid_realm_from_request
|
2022-09-09 02:20:21 +02:00
|
|
|
from zerver.lib.exceptions import JsonableError, MissingAuthenticationError
|
2020-06-22 23:25:37 +02:00
|
|
|
from zerver.lib.message import get_first_visible_message_id, messages_for_ids
|
2022-09-09 02:20:21 +02:00
|
|
|
from zerver.lib.narrow import (
|
|
|
|
NarrowBuilder,
|
|
|
|
OptionalNarrowListT,
|
|
|
|
add_narrow_conditions,
|
|
|
|
exclude_muting_conditions,
|
|
|
|
get_base_query_for_search,
|
|
|
|
is_spectator_compatible,
|
|
|
|
is_web_public_narrow,
|
|
|
|
narrow_parameter,
|
|
|
|
)
|
2021-08-21 19:24:20 +02:00
|
|
|
from zerver.lib.request import REQ, RequestNotes, has_request_variables
|
2021-06-30 18:35:50 +02:00
|
|
|
from zerver.lib.response import json_success
|
2016-07-19 08:12:35 +02:00
|
|
|
from zerver.lib.sqlalchemy_utils import get_sqlalchemy_connection
|
2022-09-09 02:20:21 +02:00
|
|
|
from zerver.lib.streams import can_access_stream_history_by_id, can_access_stream_history_by_name
|
|
|
|
from zerver.lib.topic import DB_TOPIC_NAME, MATCH_TOPIC, topic_column_sa
|
2013-12-12 18:36:32 +01:00
|
|
|
from zerver.lib.utils import statsd
|
2022-09-09 02:20:21 +02:00
|
|
|
from zerver.lib.validator import check_bool, check_int, check_list, to_non_negative_int
|
|
|
|
from zerver.models import Realm, UserMessage, UserProfile
|
2016-06-24 02:26:09 +02:00
|
|
|
|
2017-02-23 05:50:15 +01:00
|
|
|
LARGER_THAN_MAX_MESSAGE_ID = 10000000000000000
|
2018-09-09 14:54:52 +02:00
|
|
|
MAX_MESSAGES_PER_FETCH = 5000
|
2017-02-23 05:50:15 +01:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2018-04-24 03:47:28 +02:00
|
|
|
def highlight_string(text: str, locs: Iterable[Tuple[int, int]]) -> str:
|
2017-11-04 05:23:22 +01:00
|
|
|
highlight_start = '<span class="highlight">'
|
2021-02-12 08:20:45 +01:00
|
|
|
highlight_stop = "</span>"
|
2013-11-26 00:41:24 +01:00
|
|
|
pos = 0
|
2021-02-12 08:20:45 +01:00
|
|
|
result = ""
|
2017-04-06 15:59:56 +02:00
|
|
|
in_tag = False
|
2017-10-31 19:03:12 +01:00
|
|
|
|
2013-11-26 00:41:24 +01:00
|
|
|
for loc in locs:
|
|
|
|
(offset, length) = loc
|
2017-10-31 19:03:12 +01:00
|
|
|
|
|
|
|
prefix_start = pos
|
|
|
|
prefix_end = offset
|
|
|
|
match_start = offset
|
|
|
|
match_end = offset + length
|
|
|
|
|
2019-08-28 11:06:38 +02:00
|
|
|
prefix = text[prefix_start:prefix_end]
|
|
|
|
match = text[match_start:match_end]
|
2017-10-31 19:03:12 +01:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
for character in prefix + match:
|
2021-02-12 08:20:45 +01:00
|
|
|
if character == "<":
|
2017-04-06 15:59:56 +02:00
|
|
|
in_tag = True
|
2021-02-12 08:20:45 +01:00
|
|
|
elif character == ">":
|
2017-04-06 15:59:56 +02:00
|
|
|
in_tag = False
|
2022-02-12 03:39:06 +01:00
|
|
|
if in_tag:
|
2017-10-31 19:03:12 +01:00
|
|
|
result += prefix
|
|
|
|
result += match
|
2017-04-06 15:59:56 +02:00
|
|
|
else:
|
2017-10-31 19:03:12 +01:00
|
|
|
result += prefix
|
2017-04-06 15:59:56 +02:00
|
|
|
result += highlight_start
|
2017-10-31 19:03:12 +01:00
|
|
|
result += match
|
2017-04-06 15:59:56 +02:00
|
|
|
result += highlight_stop
|
2017-10-31 19:03:12 +01:00
|
|
|
pos = match_end
|
|
|
|
|
2019-08-28 11:06:38 +02:00
|
|
|
result += text[pos:]
|
2016-08-25 08:00:52 +02:00
|
|
|
return result
|
2013-11-26 00:41:24 +01:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
|
|
|
def get_search_fields(
|
|
|
|
rendered_content: str,
|
|
|
|
topic_name: str,
|
|
|
|
content_matches: Iterable[Tuple[int, int]],
|
|
|
|
topic_matches: Iterable[Tuple[int, int]],
|
|
|
|
) -> Dict[str, str]:
|
2018-11-09 17:25:57 +01:00
|
|
|
return {
|
2021-02-12 08:20:45 +01:00
|
|
|
"match_content": highlight_string(rendered_content, content_matches),
|
2018-11-09 17:25:57 +01:00
|
|
|
MATCH_TOPIC: highlight_string(escape_html(topic_name), topic_matches),
|
|
|
|
}
|
2013-12-12 18:36:32 +01:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
|
|
|
def ok_to_include_history(
|
|
|
|
narrow: OptionalNarrowListT, user_profile: Optional[UserProfile], is_web_public_query: bool
|
|
|
|
) -> bool:
|
2014-02-13 16:24:06 +01:00
|
|
|
# There are occasions where we need to find Message rows that
|
|
|
|
# have no corresponding UserMessage row, because the user is
|
|
|
|
# reading a public stream that might include messages that
|
|
|
|
# were sent while the user was not subscribed, but which they are
|
|
|
|
# allowed to see. We have to be very careful about constructing
|
|
|
|
# queries in those situations, so this function should return True
|
|
|
|
# only if we are 100% sure that we're gonna add a clause to the
|
|
|
|
# query that narrows to a particular public stream on the user's realm.
|
|
|
|
# If we screw this up, then we can get into a nasty situation of
|
|
|
|
# polluting our narrow results with messages from other realms.
|
2020-08-04 19:33:43 +02:00
|
|
|
|
|
|
|
# For web-public queries, we are always returning history. The
|
|
|
|
# analogues of the below stream access checks for whether streams
|
|
|
|
# have is_web_public set and banning is operators in this code
|
|
|
|
# path are done directly in NarrowBuilder.
|
|
|
|
if is_web_public_query:
|
|
|
|
assert user_profile is None
|
|
|
|
return True
|
|
|
|
|
|
|
|
assert user_profile is not None
|
|
|
|
|
2013-12-12 18:36:32 +01:00
|
|
|
include_history = False
|
|
|
|
if narrow is not None:
|
2014-02-10 21:45:53 +01:00
|
|
|
for term in narrow:
|
2021-02-12 08:20:45 +01:00
|
|
|
if term["operator"] == "stream" and not term.get("negated", False):
|
|
|
|
operand: Union[str, int] = term["operand"]
|
2019-08-07 17:32:19 +02:00
|
|
|
if isinstance(operand, str):
|
|
|
|
include_history = can_access_stream_history_by_name(user_profile, operand)
|
|
|
|
else:
|
|
|
|
include_history = can_access_stream_history_by_id(user_profile, operand)
|
2021-02-12 08:19:30 +01:00
|
|
|
elif (
|
2021-02-12 08:20:45 +01:00
|
|
|
term["operator"] == "streams"
|
|
|
|
and term["operand"] == "public"
|
|
|
|
and not term.get("negated", False)
|
2021-02-12 08:19:30 +01:00
|
|
|
and user_profile.can_access_public_streams()
|
|
|
|
):
|
2019-08-13 20:20:36 +02:00
|
|
|
include_history = True
|
2014-01-14 22:53:28 +01:00
|
|
|
# Disable historical messages if the user is narrowing on anything
|
|
|
|
# that's a property on the UserMessage table. There cannot be
|
|
|
|
# historical messages in these cases anyway.
|
2014-02-10 21:45:53 +01:00
|
|
|
for term in narrow:
|
2021-02-12 08:20:45 +01:00
|
|
|
if term["operator"] == "is":
|
2013-12-12 18:36:32 +01:00
|
|
|
include_history = False
|
|
|
|
|
2014-02-13 16:24:06 +01:00
|
|
|
return include_history
|
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
|
|
|
def find_first_unread_anchor(
|
|
|
|
sa_conn: Connection, user_profile: Optional[UserProfile], narrow: OptionalNarrowListT
|
|
|
|
) -> int:
|
2020-08-04 19:33:43 +02:00
|
|
|
# For anonymous web users, all messages are treated as read, and so
|
|
|
|
# always return LARGER_THAN_MAX_MESSAGE_ID.
|
|
|
|
if user_profile is None:
|
|
|
|
return LARGER_THAN_MAX_MESSAGE_ID
|
|
|
|
|
2018-04-05 22:17:50 +02:00
|
|
|
# We always need UserMessage in our query, because it has the unread
|
|
|
|
# flag for the user.
|
|
|
|
need_user_message = True
|
|
|
|
|
2018-04-06 17:57:20 +02:00
|
|
|
# Because we will need to call exclude_muting_conditions, unless
|
|
|
|
# the user hasn't muted anything, we will need to include Message
|
|
|
|
# in our query. It may be worth eventually adding an optimization
|
|
|
|
# for the case of a user who hasn't muted anything to avoid the
|
|
|
|
# join in that case, but it's low priority.
|
2018-04-05 22:17:50 +02:00
|
|
|
need_message = True
|
|
|
|
|
|
|
|
query, inner_msg_id_col = get_base_query_for_search(
|
|
|
|
user_profile=user_profile,
|
|
|
|
need_message=need_message,
|
|
|
|
need_user_message=need_user_message,
|
|
|
|
)
|
|
|
|
|
|
|
|
query, is_search = add_narrow_conditions(
|
|
|
|
user_profile=user_profile,
|
|
|
|
inner_msg_id_col=inner_msg_id_col,
|
|
|
|
query=query,
|
|
|
|
narrow=narrow,
|
2020-08-04 19:33:43 +02:00
|
|
|
is_web_public_query=False,
|
|
|
|
realm=user_profile.realm,
|
2018-04-05 22:17:50 +02:00
|
|
|
)
|
|
|
|
|
2020-11-16 22:52:27 +01:00
|
|
|
condition = column("flags", Integer).op("&")(UserMessage.flags.read.mask) == 0
|
2018-04-05 14:52:02 +02:00
|
|
|
|
|
|
|
# We exclude messages on muted topics when finding the first unread
|
|
|
|
# message in this narrow
|
|
|
|
muting_conditions = exclude_muting_conditions(user_profile, narrow)
|
|
|
|
if muting_conditions:
|
|
|
|
condition = and_(condition, *muting_conditions)
|
|
|
|
|
|
|
|
first_unread_query = query.where(condition)
|
|
|
|
first_unread_query = first_unread_query.order_by(inner_msg_id_col.asc()).limit(1)
|
|
|
|
first_unread_result = list(sa_conn.execute(first_unread_query).fetchall())
|
|
|
|
if len(first_unread_result) > 0:
|
|
|
|
anchor = first_unread_result[0][0]
|
|
|
|
else:
|
|
|
|
anchor = LARGER_THAN_MAX_MESSAGE_ID
|
|
|
|
|
|
|
|
return anchor
|
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
|
|
|
def parse_anchor_value(anchor_val: Optional[str], use_first_unread_anchor: bool) -> Optional[int]:
|
2020-01-29 03:29:15 +01:00
|
|
|
"""Given the anchor and use_first_unread_anchor parameters passed by
|
|
|
|
the client, computes what anchor value the client requested,
|
|
|
|
handling backwards-compatibility and the various string-valued
|
|
|
|
fields. We encode use_first_unread_anchor as anchor=None.
|
|
|
|
"""
|
|
|
|
if use_first_unread_anchor:
|
|
|
|
# Backwards-compatibility: Before we added support for the
|
|
|
|
# special string-typed anchor values, clients would pass
|
|
|
|
# anchor=None and use_first_unread_anchor=True to indicate
|
|
|
|
# what is now expressed as anchor="first_unread".
|
2020-01-28 06:37:25 +01:00
|
|
|
return None
|
2020-01-29 03:29:15 +01:00
|
|
|
if anchor_val is None:
|
|
|
|
# Throw an exception if neither an anchor argument not
|
|
|
|
# use_first_unread_anchor was specified.
|
|
|
|
raise JsonableError(_("Missing 'anchor' argument."))
|
2020-01-28 06:37:25 +01:00
|
|
|
if anchor_val == "oldest":
|
|
|
|
return 0
|
|
|
|
if anchor_val == "newest":
|
|
|
|
return LARGER_THAN_MAX_MESSAGE_ID
|
2020-01-29 03:29:15 +01:00
|
|
|
if anchor_val == "first_unread":
|
|
|
|
return None
|
2020-01-28 06:37:25 +01:00
|
|
|
try:
|
|
|
|
# We don't use `.isnumeric()` to support negative numbers for
|
|
|
|
# anchor. We don't recommend it in the API (if you want the
|
|
|
|
# very first message, use 0 or 1), but it used to be supported
|
2021-05-14 00:16:30 +02:00
|
|
|
# and was used by the web app, so we need to continue
|
2020-01-28 06:37:25 +01:00
|
|
|
# supporting it for backwards-compatibility
|
|
|
|
anchor = int(anchor_val)
|
|
|
|
if anchor < 0:
|
|
|
|
return 0
|
2020-11-28 20:30:02 +01:00
|
|
|
elif anchor > LARGER_THAN_MAX_MESSAGE_ID:
|
|
|
|
return LARGER_THAN_MAX_MESSAGE_ID
|
2020-01-28 06:37:25 +01:00
|
|
|
return anchor
|
|
|
|
except ValueError:
|
|
|
|
raise JsonableError(_("Invalid anchor"))
|
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2014-02-13 16:24:06 +01:00
|
|
|
@has_request_variables
|
2021-02-12 08:19:30 +01:00
|
|
|
def get_messages_backend(
|
|
|
|
request: HttpRequest,
|
|
|
|
maybe_user_profile: Union[UserProfile, AnonymousUser],
|
2021-04-07 21:53:14 +02:00
|
|
|
anchor_val: Optional[str] = REQ("anchor", default=None),
|
2021-02-12 08:19:30 +01:00
|
|
|
num_before: int = REQ(converter=to_non_negative_int),
|
|
|
|
num_after: int = REQ(converter=to_non_negative_int),
|
2021-02-12 08:20:45 +01:00
|
|
|
narrow: OptionalNarrowListT = REQ("narrow", converter=narrow_parameter, default=None),
|
2021-02-12 08:19:30 +01:00
|
|
|
use_first_unread_anchor_val: bool = REQ(
|
2021-04-07 22:00:44 +02:00
|
|
|
"use_first_unread_anchor", json_validator=check_bool, default=False
|
2021-02-12 08:19:30 +01:00
|
|
|
),
|
2021-08-05 19:48:43 +02:00
|
|
|
client_gravatar: bool = REQ(json_validator=check_bool, default=True),
|
2021-04-07 22:00:44 +02:00
|
|
|
apply_markdown: bool = REQ(json_validator=check_bool, default=True),
|
2021-02-12 08:19:30 +01:00
|
|
|
) -> HttpResponse:
|
2020-01-29 03:29:15 +01:00
|
|
|
anchor = parse_anchor_value(anchor_val, use_first_unread_anchor_val)
|
2018-09-09 14:54:52 +02:00
|
|
|
if num_before + num_after > MAX_MESSAGES_PER_FETCH:
|
2021-06-30 18:35:50 +02:00
|
|
|
raise JsonableError(
|
2021-02-12 08:19:30 +01:00
|
|
|
_("Too many messages requested (maximum {}).").format(
|
|
|
|
MAX_MESSAGES_PER_FETCH,
|
|
|
|
)
|
|
|
|
)
|
2014-02-13 16:24:06 +01:00
|
|
|
|
2021-10-03 14:16:07 +02:00
|
|
|
realm = get_valid_realm_from_request(request)
|
2020-08-04 19:33:43 +02:00
|
|
|
if not maybe_user_profile.is_authenticated:
|
|
|
|
# If user is not authenticated, clients must include
|
|
|
|
# `streams:web-public` in their narrow query to indicate this
|
|
|
|
# is a web-public query. This helps differentiate between
|
|
|
|
# cases of web-public queries (where we should return the
|
|
|
|
# web-public results only) and clients with buggy
|
|
|
|
# authentication code (where we should return an auth error).
|
2020-10-07 13:56:30 +02:00
|
|
|
#
|
|
|
|
# GetOldMessagesTest.test_unauthenticated_* tests ensure
|
|
|
|
# that we are not leaking any secure data (private messages and
|
2022-04-28 05:15:11 +02:00
|
|
|
# non-web-public stream messages) via this path.
|
2021-10-03 14:16:07 +02:00
|
|
|
if not realm.allow_web_public_streams_access():
|
|
|
|
raise MissingAuthenticationError()
|
2020-08-04 19:33:43 +02:00
|
|
|
if not is_web_public_narrow(narrow):
|
2020-08-31 09:31:50 +02:00
|
|
|
raise MissingAuthenticationError()
|
2020-08-04 19:33:43 +02:00
|
|
|
assert narrow is not None
|
2021-09-04 04:03:07 +02:00
|
|
|
if not is_spectator_compatible(narrow):
|
2020-08-31 09:31:50 +02:00
|
|
|
raise MissingAuthenticationError()
|
2020-08-04 19:33:43 +02:00
|
|
|
|
|
|
|
# We use None to indicate unauthenticated requests as it's more
|
|
|
|
# readable than using AnonymousUser, and the lack of Django
|
|
|
|
# stubs means that mypy can't check AnonymousUser well.
|
|
|
|
user_profile: Optional[UserProfile] = None
|
|
|
|
is_web_public_query = True
|
|
|
|
else:
|
|
|
|
assert isinstance(maybe_user_profile, UserProfile)
|
|
|
|
user_profile = maybe_user_profile
|
|
|
|
assert user_profile is not None
|
|
|
|
is_web_public_query = False
|
|
|
|
|
|
|
|
assert realm is not None
|
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
if (
|
|
|
|
is_web_public_query
|
|
|
|
or realm.email_address_visibility != Realm.EMAIL_ADDRESS_VISIBILITY_EVERYONE
|
|
|
|
):
|
2019-02-05 07:12:37 +01:00
|
|
|
# If email addresses are only available to administrators,
|
|
|
|
# clients cannot compute gravatars, so we force-set it to false.
|
|
|
|
client_gravatar = False
|
|
|
|
|
2020-08-04 19:33:43 +02:00
|
|
|
include_history = ok_to_include_history(narrow, user_profile, is_web_public_query)
|
2018-04-05 22:32:30 +02:00
|
|
|
if include_history:
|
2017-06-30 02:24:05 +02:00
|
|
|
# The initial query in this case doesn't use `zerver_usermessage`,
|
|
|
|
# and isn't yet limited to messages the user is entitled to see!
|
|
|
|
#
|
|
|
|
# This is OK only because we've made sure this is a narrow that
|
2020-08-04 19:33:43 +02:00
|
|
|
# will cause us to limit the query appropriately elsewhere.
|
2017-06-30 02:24:05 +02:00
|
|
|
# See `ok_to_include_history` for details.
|
2020-08-04 19:33:43 +02:00
|
|
|
#
|
|
|
|
# Note that is_web_public_query=True goes here, since
|
|
|
|
# include_history is semantically correct for is_web_public_query.
|
2018-04-05 21:56:27 +02:00
|
|
|
need_message = True
|
|
|
|
need_user_message = False
|
2018-04-05 22:32:30 +02:00
|
|
|
elif narrow is None:
|
2018-04-05 21:56:27 +02:00
|
|
|
# We need to limit to messages the user has received, but we don't actually
|
|
|
|
# need any fields from Message
|
|
|
|
need_message = False
|
|
|
|
need_user_message = True
|
2013-12-12 18:36:32 +01:00
|
|
|
else:
|
2018-04-05 21:56:27 +02:00
|
|
|
need_message = True
|
|
|
|
need_user_message = True
|
|
|
|
|
2022-02-10 03:15:46 +01:00
|
|
|
query: SelectBase
|
2018-04-05 21:56:27 +02:00
|
|
|
query, inner_msg_id_col = get_base_query_for_search(
|
|
|
|
user_profile=user_profile,
|
|
|
|
need_message=need_message,
|
|
|
|
need_user_message=need_user_message,
|
|
|
|
)
|
2013-12-12 18:36:32 +01:00
|
|
|
|
2018-04-05 20:42:37 +02:00
|
|
|
query, is_search = add_narrow_conditions(
|
|
|
|
user_profile=user_profile,
|
|
|
|
inner_msg_id_col=inner_msg_id_col,
|
|
|
|
query=query,
|
|
|
|
narrow=narrow,
|
2020-08-04 19:33:43 +02:00
|
|
|
realm=realm,
|
|
|
|
is_web_public_query=is_web_public_query,
|
2018-04-05 20:42:37 +02:00
|
|
|
)
|
2013-12-12 22:50:49 +01:00
|
|
|
|
2013-12-12 18:36:32 +01:00
|
|
|
if narrow is not None:
|
2013-12-12 22:50:49 +01:00
|
|
|
# Add some metadata to our logging data for narrows
|
2013-12-12 18:36:32 +01:00
|
|
|
verbose_operators = []
|
2014-02-10 21:45:53 +01:00
|
|
|
for term in narrow:
|
2021-02-12 08:20:45 +01:00
|
|
|
if term["operator"] == "is":
|
|
|
|
verbose_operators.append("is:" + term["operand"])
|
2013-12-12 18:36:32 +01:00
|
|
|
else:
|
2021-02-12 08:20:45 +01:00
|
|
|
verbose_operators.append(term["operator"])
|
2021-08-21 19:24:20 +02:00
|
|
|
log_data = RequestNotes.get_notes(request).log_data
|
2021-07-09 10:06:04 +02:00
|
|
|
assert log_data is not None
|
|
|
|
log_data["extra"] = "[{}]".format(",".join(verbose_operators))
|
2013-12-12 18:36:32 +01:00
|
|
|
|
2022-02-10 04:59:48 +01:00
|
|
|
with get_sqlalchemy_connection() as sa_conn:
|
|
|
|
if anchor is None:
|
|
|
|
# `anchor=None` corresponds to the anchor="first_unread" parameter.
|
|
|
|
anchor = find_first_unread_anchor(
|
|
|
|
sa_conn,
|
|
|
|
user_profile,
|
|
|
|
narrow,
|
|
|
|
)
|
2020-08-04 19:33:43 +02:00
|
|
|
|
2022-02-10 04:59:48 +01:00
|
|
|
anchored_to_left = anchor == 0
|
|
|
|
|
|
|
|
# Set value that will be used to short circuit the after_query
|
|
|
|
# altogether and avoid needless conditions in the before_query.
|
|
|
|
anchored_to_right = anchor >= LARGER_THAN_MAX_MESSAGE_ID
|
|
|
|
if anchored_to_right:
|
|
|
|
num_after = 0
|
|
|
|
|
|
|
|
first_visible_message_id = get_first_visible_message_id(realm)
|
|
|
|
|
|
|
|
query = limit_query_to_range(
|
|
|
|
query=query,
|
|
|
|
num_before=num_before,
|
|
|
|
num_after=num_after,
|
|
|
|
anchor=anchor,
|
|
|
|
anchored_to_left=anchored_to_left,
|
|
|
|
anchored_to_right=anchored_to_right,
|
|
|
|
id_col=inner_msg_id_col,
|
|
|
|
first_visible_message_id=first_visible_message_id,
|
|
|
|
)
|
2017-02-22 23:39:40 +01:00
|
|
|
|
2022-02-10 04:59:48 +01:00
|
|
|
main_query = query.subquery()
|
|
|
|
query = (
|
|
|
|
select(*main_query.c)
|
|
|
|
.select_from(main_query)
|
|
|
|
.order_by(column("message_id", Integer).asc())
|
|
|
|
)
|
|
|
|
# This is a hack to tag the query we use for testing
|
|
|
|
query = query.prefix_with("/* get_messages */")
|
|
|
|
rows = list(sa_conn.execute(query).fetchall())
|
2013-12-12 18:36:32 +01:00
|
|
|
|
2018-03-15 11:21:36 +01:00
|
|
|
query_info = post_process_limited_query(
|
|
|
|
rows=rows,
|
|
|
|
num_before=num_before,
|
|
|
|
num_after=num_after,
|
|
|
|
anchor=anchor,
|
|
|
|
anchored_to_left=anchored_to_left,
|
|
|
|
anchored_to_right=anchored_to_right,
|
2018-09-19 14:23:02 +02:00
|
|
|
first_visible_message_id=first_visible_message_id,
|
2018-03-15 11:21:36 +01:00
|
|
|
)
|
|
|
|
|
2021-02-12 08:20:45 +01:00
|
|
|
rows = query_info["rows"]
|
2018-03-15 11:21:36 +01:00
|
|
|
|
2013-12-12 18:36:32 +01:00
|
|
|
# The following is a little messy, but ensures that the code paths
|
|
|
|
# are similar regardless of the value of include_history. The
|
|
|
|
# 'user_messages' dictionary maps each message to the user's
|
|
|
|
# UserMessage object for that message, which we will attach to the
|
|
|
|
# rendered message dict before returning it. We attempt to
|
2016-03-31 03:39:51 +02:00
|
|
|
# bulk-fetch rendered message dicts from remote cache using the
|
2013-12-12 18:36:32 +01:00
|
|
|
# 'messages' list.
|
python: Convert assignment type annotations to Python 3.6 style.
This commit was split by tabbott; this piece covers the vast majority
of files in Zulip, but excludes scripts/, tools/, and puppet/ to help
ensure we at least show the right error messages for Xenial systems.
We can likely further refine the remaining pieces with some testing.
Generated by com2ann, with whitespace fixes and various manual fixes
for runtime issues:
- invoiced_through: Optional[LicenseLedger] = models.ForeignKey(
+ invoiced_through: Optional["LicenseLedger"] = models.ForeignKey(
-_apns_client: Optional[APNsClient] = None
+_apns_client: Optional["APNsClient"] = None
- notifications_stream: Optional[Stream] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
- signup_notifications_stream: Optional[Stream] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
+ notifications_stream: Optional["Stream"] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
+ signup_notifications_stream: Optional["Stream"] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
- author: Optional[UserProfile] = models.ForeignKey('UserProfile', blank=True, null=True, on_delete=CASCADE)
+ author: Optional["UserProfile"] = models.ForeignKey('UserProfile', blank=True, null=True, on_delete=CASCADE)
- bot_owner: Optional[UserProfile] = models.ForeignKey('self', null=True, on_delete=models.SET_NULL)
+ bot_owner: Optional["UserProfile"] = models.ForeignKey('self', null=True, on_delete=models.SET_NULL)
- default_sending_stream: Optional[Stream] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
- default_events_register_stream: Optional[Stream] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
+ default_sending_stream: Optional["Stream"] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
+ default_events_register_stream: Optional["Stream"] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
-descriptors_by_handler_id: Dict[int, ClientDescriptor] = {}
+descriptors_by_handler_id: Dict[int, "ClientDescriptor"] = {}
-worker_classes: Dict[str, Type[QueueProcessingWorker]] = {}
-queues: Dict[str, Dict[str, Type[QueueProcessingWorker]]] = {}
+worker_classes: Dict[str, Type["QueueProcessingWorker"]] = {}
+queues: Dict[str, Dict[str, Type["QueueProcessingWorker"]]] = {}
-AUTH_LDAP_REVERSE_EMAIL_SEARCH: Optional[LDAPSearch] = None
+AUTH_LDAP_REVERSE_EMAIL_SEARCH: Optional["LDAPSearch"] = None
Signed-off-by: Anders Kaseorg <anders@zulipchat.com>
2020-04-22 01:09:50 +02:00
|
|
|
message_ids: List[int] = []
|
|
|
|
user_message_flags: Dict[int, List[str]] = {}
|
2020-08-04 19:33:43 +02:00
|
|
|
if is_web_public_query:
|
2021-06-15 18:03:32 +02:00
|
|
|
# For spectators, we treat all historical messages as read.
|
2020-08-04 19:33:43 +02:00
|
|
|
for row in rows:
|
|
|
|
message_id = row[0]
|
|
|
|
message_ids.append(message_id)
|
|
|
|
user_message_flags[message_id] = ["read"]
|
|
|
|
elif include_history:
|
|
|
|
assert user_profile is not None
|
2018-03-14 12:38:04 +01:00
|
|
|
message_ids = [row[0] for row in rows]
|
2013-12-10 23:32:29 +01:00
|
|
|
|
|
|
|
# TODO: This could be done with an outer join instead of two queries
|
2021-04-22 16:23:09 +02:00
|
|
|
um_rows = UserMessage.objects.filter(user_profile=user_profile, message_id__in=message_ids)
|
2017-11-07 16:18:42 +01:00
|
|
|
user_message_flags = {um.message_id: um.flags_list() for um in um_rows}
|
|
|
|
|
2017-11-07 17:12:27 +01:00
|
|
|
for message_id in message_ids:
|
|
|
|
if message_id not in user_message_flags:
|
2013-12-10 23:32:29 +01:00
|
|
|
user_message_flags[message_id] = ["read", "historical"]
|
2013-12-12 18:36:32 +01:00
|
|
|
else:
|
2018-03-14 12:38:04 +01:00
|
|
|
for row in rows:
|
2013-12-10 23:32:29 +01:00
|
|
|
message_id = row[0]
|
|
|
|
flags = row[1]
|
2017-11-07 18:40:39 +01:00
|
|
|
user_message_flags[message_id] = UserMessage.flags_list_for_flags(flags)
|
2013-12-10 23:32:29 +01:00
|
|
|
message_ids.append(message_id)
|
|
|
|
|
2020-09-02 08:14:51 +02:00
|
|
|
search_fields: Dict[int, Dict[str, str]] = {}
|
2017-11-07 17:12:27 +01:00
|
|
|
if is_search:
|
2018-03-14 12:38:04 +01:00
|
|
|
for row in rows:
|
2017-11-07 17:12:27 +01:00
|
|
|
message_id = row[0]
|
2018-11-09 17:19:17 +01:00
|
|
|
(topic_name, rendered_content, content_matches, topic_matches) = row[-4:]
|
2022-09-08 03:13:07 +02:00
|
|
|
search_fields[message_id] = get_search_fields(
|
|
|
|
rendered_content, topic_name, content_matches, topic_matches
|
|
|
|
)
|
2013-12-12 18:36:32 +01:00
|
|
|
|
2017-11-07 17:36:29 +01:00
|
|
|
message_list = messages_for_ids(
|
|
|
|
message_ids=message_ids,
|
|
|
|
user_message_flags=user_message_flags,
|
|
|
|
search_fields=search_fields,
|
|
|
|
apply_markdown=apply_markdown,
|
|
|
|
client_gravatar=client_gravatar,
|
2020-08-04 19:33:43 +02:00
|
|
|
allow_edit_history=realm.allow_edit_history,
|
2017-11-07 17:36:29 +01:00
|
|
|
)
|
2017-10-10 09:22:21 +02:00
|
|
|
|
2021-02-12 08:20:45 +01:00
|
|
|
statsd.incr("loaded_old_messages", len(message_list))
|
2018-03-15 11:43:51 +01:00
|
|
|
|
|
|
|
ret = dict(
|
|
|
|
messages=message_list,
|
2021-02-12 08:20:45 +01:00
|
|
|
result="success",
|
|
|
|
msg="",
|
|
|
|
found_anchor=query_info["found_anchor"],
|
|
|
|
found_oldest=query_info["found_oldest"],
|
|
|
|
found_newest=query_info["found_newest"],
|
|
|
|
history_limited=query_info["history_limited"],
|
2018-03-15 11:43:51 +01:00
|
|
|
anchor=anchor,
|
|
|
|
)
|
2022-01-31 13:44:02 +01:00
|
|
|
return json_success(request, data=ret)
|
2013-12-12 18:36:32 +01:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
|
|
|
def limit_query_to_range(
|
|
|
|
query: Select,
|
|
|
|
num_before: int,
|
|
|
|
num_after: int,
|
|
|
|
anchor: int,
|
|
|
|
anchored_to_left: bool,
|
|
|
|
anchored_to_right: bool,
|
2022-06-26 10:03:34 +02:00
|
|
|
id_col: ColumnElement[Integer],
|
2021-02-12 08:19:30 +01:00
|
|
|
first_visible_message_id: int,
|
2022-02-10 03:15:46 +01:00
|
|
|
) -> SelectBase:
|
2021-02-12 08:19:30 +01:00
|
|
|
"""
|
2018-03-13 14:29:39 +01:00
|
|
|
This code is actually generic enough that we could move it to a
|
|
|
|
library, but our only caller for now is message search.
|
2021-02-12 08:19:30 +01:00
|
|
|
"""
|
2018-03-13 14:29:39 +01:00
|
|
|
need_before_query = (not anchored_to_left) and (num_before > 0)
|
|
|
|
need_after_query = (not anchored_to_right) and (num_after > 0)
|
|
|
|
|
|
|
|
need_both_sides = need_before_query and need_after_query
|
|
|
|
|
search: Make `num_after`/`num_after` more consistent.
We now consistently set our query limits so that we get at
least `num_after` rows such that id > anchor. (Obviously, the
caveat is that if there aren't enough rows that fulfill the
query, we'll return the full set of rows, but that may be less
than `num_after`.) Likewise for `num_before`.
Before this change, we would sometimes return one too few rows
for narrow queries.
Now, we're still a bit broken, but in a more consistent way. If
we have a query that does not match the anchor row (which could
be true even for a non-narrow query), but which does match lots
of rows after the anchor, we'll return `num_after + 1` rows
on the right hand side, whether or not the query has narrow
parameters.
The off-by-one semantics here have probably been moot all along,
since our windows are approximate to begin with. If we set
num_after to 100, its just a rough performance optimization to
begin with, so it doesn't matter whether we return 99 or 101 rows,
as long as we set the anchor correctly on the subsequent query.
We will make the results more rigorous in a follow up commit.
2018-03-14 13:22:16 +01:00
|
|
|
# The semantics of our flags are as follows:
|
|
|
|
#
|
|
|
|
# num_after = number of rows < anchor
|
|
|
|
# num_after = number of rows > anchor
|
|
|
|
#
|
|
|
|
# But we also want the row where id == anchor (if it exists),
|
|
|
|
# and we don't want to union up to 3 queries. So in some cases
|
|
|
|
# we do things like `after_limit = num_after + 1` to grab the
|
|
|
|
# anchor row in the "after" query.
|
|
|
|
#
|
|
|
|
# Note that in some cases, if the anchor row isn't found, we
|
|
|
|
# actually may fetch an extra row at one of the extremes.
|
2018-03-13 14:29:39 +01:00
|
|
|
if need_both_sides:
|
|
|
|
before_anchor = anchor - 1
|
2018-09-19 14:23:02 +02:00
|
|
|
after_anchor = max(anchor, first_visible_message_id)
|
search: Make `num_after`/`num_after` more consistent.
We now consistently set our query limits so that we get at
least `num_after` rows such that id > anchor. (Obviously, the
caveat is that if there aren't enough rows that fulfill the
query, we'll return the full set of rows, but that may be less
than `num_after`.) Likewise for `num_before`.
Before this change, we would sometimes return one too few rows
for narrow queries.
Now, we're still a bit broken, but in a more consistent way. If
we have a query that does not match the anchor row (which could
be true even for a non-narrow query), but which does match lots
of rows after the anchor, we'll return `num_after + 1` rows
on the right hand side, whether or not the query has narrow
parameters.
The off-by-one semantics here have probably been moot all along,
since our windows are approximate to begin with. If we set
num_after to 100, its just a rough performance optimization to
begin with, so it doesn't matter whether we return 99 or 101 rows,
as long as we set the anchor correctly on the subsequent query.
We will make the results more rigorous in a follow up commit.
2018-03-14 13:22:16 +01:00
|
|
|
before_limit = num_before
|
|
|
|
after_limit = num_after + 1
|
2018-03-13 14:29:39 +01:00
|
|
|
elif need_before_query:
|
|
|
|
before_anchor = anchor
|
search: Make `num_after`/`num_after` more consistent.
We now consistently set our query limits so that we get at
least `num_after` rows such that id > anchor. (Obviously, the
caveat is that if there aren't enough rows that fulfill the
query, we'll return the full set of rows, but that may be less
than `num_after`.) Likewise for `num_before`.
Before this change, we would sometimes return one too few rows
for narrow queries.
Now, we're still a bit broken, but in a more consistent way. If
we have a query that does not match the anchor row (which could
be true even for a non-narrow query), but which does match lots
of rows after the anchor, we'll return `num_after + 1` rows
on the right hand side, whether or not the query has narrow
parameters.
The off-by-one semantics here have probably been moot all along,
since our windows are approximate to begin with. If we set
num_after to 100, its just a rough performance optimization to
begin with, so it doesn't matter whether we return 99 or 101 rows,
as long as we set the anchor correctly on the subsequent query.
We will make the results more rigorous in a follow up commit.
2018-03-14 13:22:16 +01:00
|
|
|
before_limit = num_before
|
|
|
|
if not anchored_to_right:
|
|
|
|
before_limit += 1
|
|
|
|
elif need_after_query:
|
2018-09-19 14:23:02 +02:00
|
|
|
after_anchor = max(anchor, first_visible_message_id)
|
search: Make `num_after`/`num_after` more consistent.
We now consistently set our query limits so that we get at
least `num_after` rows such that id > anchor. (Obviously, the
caveat is that if there aren't enough rows that fulfill the
query, we'll return the full set of rows, but that may be less
than `num_after`.) Likewise for `num_before`.
Before this change, we would sometimes return one too few rows
for narrow queries.
Now, we're still a bit broken, but in a more consistent way. If
we have a query that does not match the anchor row (which could
be true even for a non-narrow query), but which does match lots
of rows after the anchor, we'll return `num_after + 1` rows
on the right hand side, whether or not the query has narrow
parameters.
The off-by-one semantics here have probably been moot all along,
since our windows are approximate to begin with. If we set
num_after to 100, its just a rough performance optimization to
begin with, so it doesn't matter whether we return 99 or 101 rows,
as long as we set the anchor correctly on the subsequent query.
We will make the results more rigorous in a follow up commit.
2018-03-14 13:22:16 +01:00
|
|
|
after_limit = num_after + 1
|
2018-03-13 14:29:39 +01:00
|
|
|
|
|
|
|
if need_before_query:
|
|
|
|
before_query = query
|
|
|
|
|
|
|
|
if not anchored_to_right:
|
|
|
|
before_query = before_query.where(id_col <= before_anchor)
|
|
|
|
|
search: Make `num_after`/`num_after` more consistent.
We now consistently set our query limits so that we get at
least `num_after` rows such that id > anchor. (Obviously, the
caveat is that if there aren't enough rows that fulfill the
query, we'll return the full set of rows, but that may be less
than `num_after`.) Likewise for `num_before`.
Before this change, we would sometimes return one too few rows
for narrow queries.
Now, we're still a bit broken, but in a more consistent way. If
we have a query that does not match the anchor row (which could
be true even for a non-narrow query), but which does match lots
of rows after the anchor, we'll return `num_after + 1` rows
on the right hand side, whether or not the query has narrow
parameters.
The off-by-one semantics here have probably been moot all along,
since our windows are approximate to begin with. If we set
num_after to 100, its just a rough performance optimization to
begin with, so it doesn't matter whether we return 99 or 101 rows,
as long as we set the anchor correctly on the subsequent query.
We will make the results more rigorous in a follow up commit.
2018-03-14 13:22:16 +01:00
|
|
|
before_query = before_query.order_by(id_col.desc())
|
|
|
|
before_query = before_query.limit(before_limit)
|
2018-03-13 14:29:39 +01:00
|
|
|
|
|
|
|
if need_after_query:
|
|
|
|
after_query = query
|
|
|
|
|
|
|
|
if not anchored_to_left:
|
search: Make `num_after`/`num_after` more consistent.
We now consistently set our query limits so that we get at
least `num_after` rows such that id > anchor. (Obviously, the
caveat is that if there aren't enough rows that fulfill the
query, we'll return the full set of rows, but that may be less
than `num_after`.) Likewise for `num_before`.
Before this change, we would sometimes return one too few rows
for narrow queries.
Now, we're still a bit broken, but in a more consistent way. If
we have a query that does not match the anchor row (which could
be true even for a non-narrow query), but which does match lots
of rows after the anchor, we'll return `num_after + 1` rows
on the right hand side, whether or not the query has narrow
parameters.
The off-by-one semantics here have probably been moot all along,
since our windows are approximate to begin with. If we set
num_after to 100, its just a rough performance optimization to
begin with, so it doesn't matter whether we return 99 or 101 rows,
as long as we set the anchor correctly on the subsequent query.
We will make the results more rigorous in a follow up commit.
2018-03-14 13:22:16 +01:00
|
|
|
after_query = after_query.where(id_col >= after_anchor)
|
2018-03-13 14:29:39 +01:00
|
|
|
|
search: Make `num_after`/`num_after` more consistent.
We now consistently set our query limits so that we get at
least `num_after` rows such that id > anchor. (Obviously, the
caveat is that if there aren't enough rows that fulfill the
query, we'll return the full set of rows, but that may be less
than `num_after`.) Likewise for `num_before`.
Before this change, we would sometimes return one too few rows
for narrow queries.
Now, we're still a bit broken, but in a more consistent way. If
we have a query that does not match the anchor row (which could
be true even for a non-narrow query), but which does match lots
of rows after the anchor, we'll return `num_after + 1` rows
on the right hand side, whether or not the query has narrow
parameters.
The off-by-one semantics here have probably been moot all along,
since our windows are approximate to begin with. If we set
num_after to 100, its just a rough performance optimization to
begin with, so it doesn't matter whether we return 99 or 101 rows,
as long as we set the anchor correctly on the subsequent query.
We will make the results more rigorous in a follow up commit.
2018-03-14 13:22:16 +01:00
|
|
|
after_query = after_query.order_by(id_col.asc())
|
|
|
|
after_query = after_query.limit(after_limit)
|
2018-03-13 14:29:39 +01:00
|
|
|
|
|
|
|
if need_both_sides:
|
2020-11-17 03:14:36 +01:00
|
|
|
return union_all(before_query.self_group(), after_query.self_group())
|
2018-03-13 14:29:39 +01:00
|
|
|
elif need_before_query:
|
2020-11-17 03:14:36 +01:00
|
|
|
return before_query
|
2018-03-13 14:29:39 +01:00
|
|
|
elif need_after_query:
|
2020-11-17 03:14:36 +01:00
|
|
|
return after_query
|
2018-03-13 14:29:39 +01:00
|
|
|
else:
|
|
|
|
# If we don't have either a before_query or after_query, it's because
|
|
|
|
# some combination of num_before/num_after/anchor are zero or
|
|
|
|
# use_first_unread_anchor logic found no unread messages.
|
|
|
|
#
|
|
|
|
# The most likely reason is somebody is doing an id search, so searching
|
|
|
|
# for something like `message_id = 42` is exactly what we want. In other
|
|
|
|
# cases, which could possibly be buggy API clients, at least we will
|
|
|
|
# return at most one row here.
|
2020-11-17 03:14:36 +01:00
|
|
|
return query.where(id_col == anchor)
|
2018-03-13 14:29:39 +01:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
|
|
|
def post_process_limited_query(
|
2021-08-21 01:07:28 +02:00
|
|
|
rows: Sequence[Union[Row, Sequence[Any]]],
|
2021-02-12 08:19:30 +01:00
|
|
|
num_before: int,
|
|
|
|
num_after: int,
|
|
|
|
anchor: int,
|
|
|
|
anchored_to_left: bool,
|
|
|
|
anchored_to_right: bool,
|
|
|
|
first_visible_message_id: int,
|
|
|
|
) -> Dict[str, Any]:
|
2018-03-15 11:20:55 +01:00
|
|
|
# Our queries may have fetched extra rows if they added
|
|
|
|
# "headroom" to the limits, but we want to truncate those
|
|
|
|
# rows.
|
|
|
|
#
|
|
|
|
# Also, in cases where we had non-zero values of num_before or
|
|
|
|
# num_after, we want to know found_oldest and found_newest, so
|
|
|
|
# that the clients will know that they got complete results.
|
|
|
|
|
2018-09-19 14:23:02 +02:00
|
|
|
if first_visible_message_id > 0:
|
2021-08-21 01:07:28 +02:00
|
|
|
visible_rows: Sequence[Union[Row, Sequence[Any]]] = [
|
2020-11-16 22:52:27 +01:00
|
|
|
r for r in rows if r[0] >= first_visible_message_id
|
|
|
|
]
|
2018-09-19 14:23:02 +02:00
|
|
|
else:
|
|
|
|
visible_rows = rows
|
|
|
|
|
|
|
|
rows_limited = len(visible_rows) != len(rows)
|
|
|
|
|
2018-03-15 11:20:55 +01:00
|
|
|
if anchored_to_right:
|
|
|
|
num_after = 0
|
2018-09-19 14:23:02 +02:00
|
|
|
before_rows = visible_rows[:]
|
2020-11-16 22:52:27 +01:00
|
|
|
anchor_rows = []
|
|
|
|
after_rows = []
|
2018-03-15 11:20:55 +01:00
|
|
|
else:
|
2018-09-19 14:23:02 +02:00
|
|
|
before_rows = [r for r in visible_rows if r[0] < anchor]
|
|
|
|
anchor_rows = [r for r in visible_rows if r[0] == anchor]
|
|
|
|
after_rows = [r for r in visible_rows if r[0] > anchor]
|
2018-03-15 11:20:55 +01:00
|
|
|
|
|
|
|
if num_before:
|
2021-02-12 08:19:30 +01:00
|
|
|
before_rows = before_rows[-1 * num_before :]
|
2018-03-15 11:20:55 +01:00
|
|
|
|
|
|
|
if num_after:
|
|
|
|
after_rows = after_rows[:num_after]
|
|
|
|
|
2020-11-16 22:52:27 +01:00
|
|
|
visible_rows = [*before_rows, *anchor_rows, *after_rows]
|
2018-03-15 11:20:55 +01:00
|
|
|
|
|
|
|
found_anchor = len(anchor_rows) == 1
|
|
|
|
found_oldest = anchored_to_left or (len(before_rows) < num_before)
|
|
|
|
found_newest = anchored_to_right or (len(after_rows) < num_after)
|
2018-09-19 14:23:02 +02:00
|
|
|
# BUG: history_limited is incorrect False in the event that we had
|
|
|
|
# to bump `anchor` up due to first_visible_message_id, and there
|
|
|
|
# were actually older messages. This may be a rare event in the
|
|
|
|
# context where history_limited is relevant, because it can only
|
|
|
|
# happen in one-sided queries with no num_before (see tests tagged
|
|
|
|
# BUG in PostProcessTest for examples), and we don't generally do
|
|
|
|
# those from the UI, so this might be OK for now.
|
|
|
|
#
|
|
|
|
# The correct fix for this probably involves e.g. making a
|
|
|
|
# `before_query` when we increase `anchor` just to confirm whether
|
|
|
|
# messages were hidden.
|
|
|
|
history_limited = rows_limited and found_oldest
|
2018-03-15 11:20:55 +01:00
|
|
|
|
|
|
|
return dict(
|
2018-09-19 14:23:02 +02:00
|
|
|
rows=visible_rows,
|
2018-03-15 11:20:55 +01:00
|
|
|
found_anchor=found_anchor,
|
|
|
|
found_newest=found_newest,
|
|
|
|
found_oldest=found_oldest,
|
2018-09-19 14:23:02 +02:00
|
|
|
history_limited=history_limited,
|
2018-03-15 11:20:55 +01:00
|
|
|
)
|
2020-08-04 19:33:43 +02:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2013-12-12 18:36:32 +01:00
|
|
|
@has_request_variables
|
2021-02-12 08:19:30 +01:00
|
|
|
def messages_in_narrow_backend(
|
|
|
|
request: HttpRequest,
|
|
|
|
user_profile: UserProfile,
|
2021-04-07 22:00:44 +02:00
|
|
|
msg_ids: List[int] = REQ(json_validator=check_list(check_int)),
|
2021-02-12 08:19:30 +01:00
|
|
|
narrow: OptionalNarrowListT = REQ(converter=narrow_parameter),
|
|
|
|
) -> HttpResponse:
|
2016-07-30 01:27:56 +02:00
|
|
|
|
2018-01-02 18:33:28 +01:00
|
|
|
first_visible_message_id = get_first_visible_message_id(user_profile.realm)
|
|
|
|
msg_ids = [message_id for message_id in msg_ids if message_id >= first_visible_message_id]
|
2017-06-30 02:24:05 +02:00
|
|
|
# This query is limited to messages the user has access to because they
|
|
|
|
# actually received them, as reflected in `zerver_usermessage`.
|
2022-02-10 04:13:15 +01:00
|
|
|
query = (
|
|
|
|
select(column("message_id", Integer), topic_column_sa(), column("rendered_content", Text))
|
|
|
|
.where(
|
|
|
|
and_(
|
|
|
|
column("user_profile_id", Integer) == literal(user_profile.id),
|
|
|
|
column("message_id", Integer).in_(msg_ids),
|
|
|
|
)
|
|
|
|
)
|
|
|
|
.select_from(
|
|
|
|
join(
|
|
|
|
table("zerver_usermessage"),
|
|
|
|
table("zerver_message"),
|
|
|
|
literal_column("zerver_usermessage.message_id", Integer)
|
|
|
|
== literal_column("zerver_message.id", Integer),
|
|
|
|
)
|
|
|
|
)
|
2021-02-12 08:19:30 +01:00
|
|
|
)
|
2013-12-10 23:32:29 +01:00
|
|
|
|
2020-11-16 22:52:27 +01:00
|
|
|
builder = NarrowBuilder(user_profile, column("message_id", Integer), user_profile.realm)
|
2017-03-19 01:46:35 +01:00
|
|
|
if narrow is not None:
|
|
|
|
for term in narrow:
|
|
|
|
query = builder.add_term(query, term)
|
2013-12-12 18:36:32 +01:00
|
|
|
|
2020-09-02 08:14:51 +02:00
|
|
|
search_fields = {}
|
2022-02-10 04:59:48 +01:00
|
|
|
with get_sqlalchemy_connection() as sa_conn:
|
|
|
|
for row in sa_conn.execute(query).fetchall():
|
|
|
|
message_id = row._mapping["message_id"]
|
|
|
|
topic_name = row._mapping[DB_TOPIC_NAME]
|
|
|
|
rendered_content = row._mapping["rendered_content"]
|
|
|
|
if "content_matches" in row._mapping:
|
|
|
|
content_matches = row._mapping["content_matches"]
|
|
|
|
topic_matches = row._mapping["topic_matches"]
|
|
|
|
else:
|
|
|
|
content_matches = topic_matches = []
|
|
|
|
search_fields[str(message_id)] = get_search_fields(
|
|
|
|
rendered_content,
|
|
|
|
topic_name,
|
|
|
|
content_matches,
|
|
|
|
topic_matches,
|
|
|
|
)
|
2013-12-10 23:32:29 +01:00
|
|
|
|
2022-01-31 13:44:02 +01:00
|
|
|
return json_success(request, data={"messages": search_fields})
|