mirror of https://github.com/zulip/zulip.git
1629 lines
71 KiB
Python
1629 lines
71 KiB
Python
from django.utils.translation import ugettext as _
|
|
from django.utils.timezone import now as timezone_now
|
|
from django.conf import settings
|
|
from django.core import validators
|
|
from django.core.exceptions import ValidationError
|
|
from django.db import connection, IntegrityError
|
|
from django.http import HttpRequest, HttpResponse
|
|
from typing import Dict, List, Set, Any, Iterable, \
|
|
Optional, Tuple, Union, Sequence, cast
|
|
from zerver.lib.exceptions import JsonableError, ErrorCode
|
|
from zerver.lib.html_diff import highlight_html_differences
|
|
from zerver.decorator import has_request_variables, \
|
|
REQ, to_non_negative_int
|
|
from django.utils.html import escape as escape_html
|
|
from zerver.lib import bugdown
|
|
from zerver.lib.zcommand import process_zcommands
|
|
from zerver.lib.actions import recipient_for_user_profiles, do_update_message_flags, \
|
|
compute_irc_user_fullname, compute_jabber_user_fullname, \
|
|
create_mirror_user_if_needed, check_send_message, do_update_message, \
|
|
extract_recipients, truncate_body, render_incoming_message, do_delete_messages, \
|
|
do_mark_all_as_read, do_mark_stream_messages_as_read, \
|
|
get_user_info_for_message_updates, check_schedule_message
|
|
from zerver.lib.addressee import get_user_profiles, get_user_profiles_by_ids
|
|
from zerver.lib.queue import queue_json_publish
|
|
from zerver.lib.message import (
|
|
access_message,
|
|
messages_for_ids,
|
|
render_markdown,
|
|
get_first_visible_message_id,
|
|
)
|
|
from zerver.lib.response import json_success, json_error
|
|
from zerver.lib.sqlalchemy_utils import get_sqlalchemy_connection
|
|
from zerver.lib.streams import access_stream_by_id, get_public_streams_queryset, \
|
|
can_access_stream_history_by_name, can_access_stream_history_by_id, \
|
|
get_stream_by_narrow_operand_access_unchecked
|
|
from zerver.lib.timestamp import datetime_to_timestamp, convert_to_UTC
|
|
from zerver.lib.timezone import get_timezone
|
|
from zerver.lib.topic import (
|
|
topic_column_sa,
|
|
topic_match_sa,
|
|
user_message_exists_for_topic,
|
|
DB_TOPIC_NAME,
|
|
LEGACY_PREV_TOPIC,
|
|
MATCH_TOPIC,
|
|
REQ_topic,
|
|
)
|
|
from zerver.lib.topic_mutes import exclude_topic_mutes
|
|
from zerver.lib.utils import statsd
|
|
from zerver.lib.validator import \
|
|
check_list, check_int, check_dict, check_string, check_bool, \
|
|
check_string_or_int_list, check_string_or_int
|
|
from zerver.lib.zephyr import compute_mit_user_fullname
|
|
from zerver.models import Message, UserProfile, Stream, Subscription, Client,\
|
|
Realm, RealmDomain, Recipient, UserMessage, bulk_get_recipients, get_personal_recipient, \
|
|
email_to_domain, get_realm, get_active_streams, get_user_including_cross_realm, \
|
|
get_user_by_id_in_realm_including_cross_realm, get_stream_recipient
|
|
|
|
from sqlalchemy import func
|
|
from sqlalchemy.sql import select, join, column, literal_column, literal, and_, \
|
|
or_, not_, union_all, alias, Selectable, ColumnElement, table
|
|
|
|
from dateutil.parser import parse as dateparser
|
|
import re
|
|
import ujson
|
|
import datetime
|
|
|
|
LARGER_THAN_MAX_MESSAGE_ID = 10000000000000000
|
|
MAX_MESSAGES_PER_FETCH = 5000
|
|
|
|
class BadNarrowOperator(JsonableError):
|
|
code = ErrorCode.BAD_NARROW
|
|
data_fields = ['desc']
|
|
|
|
def __init__(self, desc: str) -> None:
|
|
self.desc = desc # type: str
|
|
|
|
@staticmethod
|
|
def msg_format() -> str:
|
|
return _('Invalid narrow operator: {desc}')
|
|
|
|
# TODO: Should be Select, but sqlalchemy stubs are busted
|
|
Query = Any
|
|
|
|
# TODO: should be Callable[[ColumnElement], ColumnElement], but sqlalchemy stubs are busted
|
|
ConditionTransform = Any
|
|
|
|
OptionalNarrowListT = Optional[List[Dict[str, Any]]]
|
|
|
|
# When you add a new operator to this, also update zerver/lib/narrow.py
|
|
class NarrowBuilder:
|
|
'''
|
|
Build up a SQLAlchemy query to find messages matching a narrow.
|
|
'''
|
|
|
|
# This class has an important security invariant:
|
|
#
|
|
# None of these methods ever *add* messages to a query's result.
|
|
#
|
|
# That is, the `add_term` method, and its helpers the `by_*` methods,
|
|
# are passed a Query object representing a query for messages; they may
|
|
# call some methods on it, and then they return a resulting Query
|
|
# object. Things these methods may do to the queries they handle
|
|
# include
|
|
# * add conditions to filter out rows (i.e., messages), with `query.where`
|
|
# * add columns for more information on the same message, with `query.column`
|
|
# * add a join for more information on the same message
|
|
#
|
|
# Things they may not do include
|
|
# * anything that would pull in additional rows, or information on
|
|
# other messages.
|
|
|
|
def __init__(self, user_profile: UserProfile, msg_id_column: str) -> None:
|
|
self.user_profile = user_profile
|
|
self.msg_id_column = msg_id_column
|
|
self.user_realm = user_profile.realm
|
|
|
|
def add_term(self, query: Query, term: Dict[str, Any]) -> Query:
|
|
"""
|
|
Extend the given query to one narrowed by the given term, and return the result.
|
|
|
|
This method satisfies an important security property: the returned
|
|
query never includes a message that the given query didn't. In
|
|
particular, if the given query will only find messages that a given
|
|
user can legitimately see, then so will the returned query.
|
|
"""
|
|
# To maintain the security property, we hold all the `by_*`
|
|
# methods to the same criterion. See the class's block comment
|
|
# for details.
|
|
|
|
# We have to be careful here because we're letting users call a method
|
|
# by name! The prefix 'by_' prevents it from colliding with builtin
|
|
# Python __magic__ stuff.
|
|
operator = term['operator']
|
|
operand = term['operand']
|
|
|
|
negated = term.get('negated', False)
|
|
|
|
method_name = 'by_' + operator.replace('-', '_')
|
|
method = getattr(self, method_name, None)
|
|
if method is None:
|
|
raise BadNarrowOperator('unknown operator ' + operator)
|
|
|
|
if negated:
|
|
maybe_negate = not_
|
|
else:
|
|
maybe_negate = lambda cond: cond
|
|
|
|
return method(query, operand, maybe_negate)
|
|
|
|
def by_has(self, query: Query, operand: str, maybe_negate: ConditionTransform) -> Query:
|
|
if operand not in ['attachment', 'image', 'link']:
|
|
raise BadNarrowOperator("unknown 'has' operand " + operand)
|
|
col_name = 'has_' + operand
|
|
cond = column(col_name)
|
|
return query.where(maybe_negate(cond))
|
|
|
|
def by_in(self, query: Query, operand: str, maybe_negate: ConditionTransform) -> Query:
|
|
if operand == 'home':
|
|
conditions = exclude_muting_conditions(self.user_profile, [])
|
|
return query.where(and_(*conditions))
|
|
elif operand == 'all':
|
|
return query
|
|
|
|
raise BadNarrowOperator("unknown 'in' operand " + operand)
|
|
|
|
def by_is(self, query: Query, operand: str, maybe_negate: ConditionTransform) -> Query:
|
|
if operand == 'private':
|
|
cond = column("flags").op("&")(UserMessage.flags.is_private.mask) != 0
|
|
return query.where(maybe_negate(cond))
|
|
elif operand == 'starred':
|
|
cond = column("flags").op("&")(UserMessage.flags.starred.mask) != 0
|
|
return query.where(maybe_negate(cond))
|
|
elif operand == 'unread':
|
|
cond = column("flags").op("&")(UserMessage.flags.read.mask) == 0
|
|
return query.where(maybe_negate(cond))
|
|
elif operand == 'mentioned':
|
|
cond1 = column("flags").op("&")(UserMessage.flags.mentioned.mask) != 0
|
|
cond2 = column("flags").op("&")(UserMessage.flags.wildcard_mentioned.mask) != 0
|
|
cond = or_(cond1, cond2)
|
|
return query.where(maybe_negate(cond))
|
|
elif operand == 'alerted':
|
|
cond = column("flags").op("&")(UserMessage.flags.has_alert_word.mask) != 0
|
|
return query.where(maybe_negate(cond))
|
|
raise BadNarrowOperator("unknown 'is' operand " + operand)
|
|
|
|
_alphanum = frozenset(
|
|
'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789')
|
|
|
|
def _pg_re_escape(self, pattern: str) -> str:
|
|
"""
|
|
Escape user input to place in a regex
|
|
|
|
Python's re.escape escapes unicode characters in a way which postgres
|
|
fails on, '\u03bb' to '\\\u03bb'. This function will correctly escape
|
|
them for postgres, '\u03bb' to '\\u03bb'.
|
|
"""
|
|
s = list(pattern)
|
|
for i, c in enumerate(s):
|
|
if c not in self._alphanum:
|
|
if ord(c) >= 128:
|
|
# convert the character to hex postgres regex will take
|
|
# \uXXXX
|
|
s[i] = '\\u{:0>4x}'.format(ord(c))
|
|
else:
|
|
s[i] = '\\' + c
|
|
return ''.join(s)
|
|
|
|
def by_stream(self, query: Query, operand: Union[str, int], maybe_negate: ConditionTransform) -> Query:
|
|
try:
|
|
# Because you can see your own message history for
|
|
# private streams you are no longer subscribed to, we
|
|
# need get_stream_by_narrow_operand_access_unchecked here.
|
|
stream = get_stream_by_narrow_operand_access_unchecked(operand, self.user_profile.realm)
|
|
except Stream.DoesNotExist:
|
|
raise BadNarrowOperator('unknown stream ' + str(operand))
|
|
|
|
if self.user_profile.realm.is_zephyr_mirror_realm:
|
|
# MIT users expect narrowing to "social" to also show messages to
|
|
# /^(un)*social(.d)*$/ (unsocial, ununsocial, social.d, ...).
|
|
|
|
# In `ok_to_include_history`, we assume that a non-negated
|
|
# `stream` term for a public stream will limit the query to
|
|
# that specific stream. So it would be a bug to hit this
|
|
# codepath after relying on this term there. But all streams in
|
|
# a Zephyr realm are private, so that doesn't happen.
|
|
assert(not stream.is_public())
|
|
|
|
m = re.search(r'^(?:un)*(.+?)(?:\.d)*$', stream.name, re.IGNORECASE)
|
|
# Since the regex has a `.+` in it and "" is invalid as a
|
|
# stream name, this will always match
|
|
assert(m is not None)
|
|
base_stream_name = m.group(1)
|
|
|
|
matching_streams = get_active_streams(self.user_profile.realm).filter(
|
|
name__iregex=r'^(un)*%s(\.d)*$' % (self._pg_re_escape(base_stream_name),))
|
|
matching_stream_ids = [matching_stream.id for matching_stream in matching_streams]
|
|
recipients_map = bulk_get_recipients(Recipient.STREAM, matching_stream_ids)
|
|
cond = column("recipient_id").in_([recipient.id for recipient in recipients_map.values()])
|
|
return query.where(maybe_negate(cond))
|
|
|
|
recipient = get_stream_recipient(stream.id)
|
|
cond = column("recipient_id") == recipient.id
|
|
return query.where(maybe_negate(cond))
|
|
|
|
def by_streams(self, query: Query, operand: str, maybe_negate: ConditionTransform) -> Query:
|
|
if operand == 'public':
|
|
# Get all both subscribed and non subscribed public streams
|
|
# but exclude any private subscribed streams.
|
|
public_streams_queryset = get_public_streams_queryset(self.user_profile.realm)
|
|
recipient_ids = Recipient.objects.filter(
|
|
type=Recipient.STREAM,
|
|
type_id__in=public_streams_queryset).values_list('id', flat=True)
|
|
cond = column("recipient_id").in_(recipient_ids)
|
|
return query.where(maybe_negate(cond))
|
|
raise BadNarrowOperator('unknown streams operand ' + operand)
|
|
|
|
def by_topic(self, query: Query, operand: str, maybe_negate: ConditionTransform) -> Query:
|
|
if self.user_profile.realm.is_zephyr_mirror_realm:
|
|
# MIT users expect narrowing to topic "foo" to also show messages to /^foo(.d)*$/
|
|
# (foo, foo.d, foo.d.d, etc)
|
|
m = re.search(r'^(.*?)(?:\.d)*$', operand, re.IGNORECASE)
|
|
# Since the regex has a `.*` in it, this will always match
|
|
assert(m is not None)
|
|
base_topic = m.group(1)
|
|
|
|
# Additionally, MIT users expect the empty instance and
|
|
# instance "personal" to be the same.
|
|
if base_topic in ('', 'personal', '(instance "")'):
|
|
cond = or_(
|
|
topic_match_sa(""),
|
|
topic_match_sa(".d"),
|
|
topic_match_sa(".d.d"),
|
|
topic_match_sa(".d.d.d"),
|
|
topic_match_sa(".d.d.d.d"),
|
|
topic_match_sa("personal"),
|
|
topic_match_sa("personal.d"),
|
|
topic_match_sa("personal.d.d"),
|
|
topic_match_sa("personal.d.d.d"),
|
|
topic_match_sa("personal.d.d.d.d"),
|
|
topic_match_sa('(instance "")'),
|
|
topic_match_sa('(instance "").d'),
|
|
topic_match_sa('(instance "").d.d'),
|
|
topic_match_sa('(instance "").d.d.d'),
|
|
topic_match_sa('(instance "").d.d.d.d'),
|
|
)
|
|
else:
|
|
# We limit `.d` counts, since postgres has much better
|
|
# query planning for this than they do for a regular
|
|
# expression (which would sometimes table scan).
|
|
cond = or_(
|
|
topic_match_sa(base_topic),
|
|
topic_match_sa(base_topic + ".d"),
|
|
topic_match_sa(base_topic + ".d.d"),
|
|
topic_match_sa(base_topic + ".d.d.d"),
|
|
topic_match_sa(base_topic + ".d.d.d.d"),
|
|
)
|
|
return query.where(maybe_negate(cond))
|
|
|
|
cond = topic_match_sa(operand)
|
|
return query.where(maybe_negate(cond))
|
|
|
|
def by_sender(self, query: Query, operand: Union[str, int], maybe_negate: ConditionTransform) -> Query:
|
|
try:
|
|
if isinstance(operand, str):
|
|
sender = get_user_including_cross_realm(operand, self.user_realm)
|
|
else:
|
|
sender = get_user_by_id_in_realm_including_cross_realm(operand, self.user_realm)
|
|
except UserProfile.DoesNotExist:
|
|
raise BadNarrowOperator('unknown user ' + str(operand))
|
|
|
|
cond = column("sender_id") == literal(sender.id)
|
|
return query.where(maybe_negate(cond))
|
|
|
|
def by_near(self, query: Query, operand: str, maybe_negate: ConditionTransform) -> Query:
|
|
return query
|
|
|
|
def by_id(self, query: Query, operand: str, maybe_negate: ConditionTransform) -> Query:
|
|
if not str(operand).isdigit():
|
|
raise BadNarrowOperator("Invalid message ID")
|
|
cond = self.msg_id_column == literal(operand)
|
|
return query.where(maybe_negate(cond))
|
|
|
|
def by_pm_with(self, query: Query, operand: Union[str, Iterable[int]],
|
|
maybe_negate: ConditionTransform) -> Query:
|
|
|
|
try:
|
|
if isinstance(operand, str):
|
|
email_list = operand.split(",")
|
|
user_profiles = get_user_profiles(
|
|
emails=email_list,
|
|
realm=self.user_realm
|
|
)
|
|
else:
|
|
"""
|
|
This is where we handle passing a list of user IDs for the narrow, which is the
|
|
preferred/cleaner API.
|
|
"""
|
|
user_profiles = get_user_profiles_by_ids(
|
|
user_ids=operand,
|
|
realm=self.user_realm
|
|
)
|
|
|
|
recipient = recipient_for_user_profiles(user_profiles=user_profiles,
|
|
forwarded_mirror_message=False,
|
|
forwarder_user_profile=None,
|
|
sender=self.user_profile,
|
|
allow_deactivated=True)
|
|
except (JsonableError, ValidationError):
|
|
raise BadNarrowOperator('unknown user in ' + str(operand))
|
|
|
|
# Group DM
|
|
if recipient.type == Recipient.HUDDLE:
|
|
cond = column("recipient_id") == recipient.id
|
|
return query.where(maybe_negate(cond))
|
|
|
|
# 1:1 PM
|
|
other_participant = None
|
|
|
|
# Find if another person is in PM
|
|
for user in user_profiles:
|
|
if user.id != self.user_profile.id:
|
|
other_participant = user
|
|
|
|
# PM with another person
|
|
if other_participant:
|
|
# We need bidirectional messages PM with another person.
|
|
# But Recipient.PERSONAL objects only encode the person who
|
|
# received the message, and not the other participant in
|
|
# the thread (the sender), we need to do a somewhat
|
|
# complex query to get messages between these two users
|
|
# with either of them as the sender.
|
|
self_recipient = get_personal_recipient(self.user_profile.id)
|
|
cond = or_(and_(column("sender_id") == other_participant.id,
|
|
column("recipient_id") == self_recipient.id),
|
|
and_(column("sender_id") == self.user_profile.id,
|
|
column("recipient_id") == recipient.id))
|
|
return query.where(maybe_negate(cond))
|
|
|
|
# PM with self
|
|
cond = and_(column("sender_id") == self.user_profile.id,
|
|
column("recipient_id") == recipient.id)
|
|
return query.where(maybe_negate(cond))
|
|
|
|
def by_group_pm_with(self, query: Query, operand: Union[str, int],
|
|
maybe_negate: ConditionTransform) -> Query:
|
|
try:
|
|
if isinstance(operand, str):
|
|
narrow_profile = get_user_including_cross_realm(operand, self.user_realm)
|
|
else:
|
|
narrow_profile = get_user_by_id_in_realm_including_cross_realm(operand, self.user_realm)
|
|
except UserProfile.DoesNotExist:
|
|
raise BadNarrowOperator('unknown user ' + str(operand))
|
|
|
|
self_recipient_ids = [
|
|
recipient_tuple['recipient_id'] for recipient_tuple
|
|
in Subscription.objects.filter(
|
|
user_profile=self.user_profile,
|
|
recipient__type=Recipient.HUDDLE
|
|
).values("recipient_id")]
|
|
narrow_recipient_ids = [
|
|
recipient_tuple['recipient_id'] for recipient_tuple
|
|
in Subscription.objects.filter(
|
|
user_profile=narrow_profile,
|
|
recipient__type=Recipient.HUDDLE
|
|
).values("recipient_id")]
|
|
|
|
recipient_ids = set(self_recipient_ids) & set(narrow_recipient_ids)
|
|
cond = column("recipient_id").in_(recipient_ids)
|
|
return query.where(maybe_negate(cond))
|
|
|
|
def by_search(self, query: Query, operand: str, maybe_negate: ConditionTransform) -> Query:
|
|
if settings.USING_PGROONGA:
|
|
return self._by_search_pgroonga(query, operand, maybe_negate)
|
|
else:
|
|
return self._by_search_tsearch(query, operand, maybe_negate)
|
|
|
|
def _by_search_pgroonga(self, query: Query, operand: str,
|
|
maybe_negate: ConditionTransform) -> Query:
|
|
match_positions_character = func.pgroonga_match_positions_character
|
|
query_extract_keywords = func.pgroonga_query_extract_keywords
|
|
operand_escaped = func.escape_html(operand)
|
|
keywords = query_extract_keywords(operand_escaped)
|
|
query = query.column(match_positions_character(column("rendered_content"),
|
|
keywords).label("content_matches"))
|
|
query = query.column(match_positions_character(func.escape_html(topic_column_sa()),
|
|
keywords).label("topic_matches"))
|
|
condition = column("search_pgroonga").op("&@~")(operand_escaped)
|
|
return query.where(maybe_negate(condition))
|
|
|
|
def _by_search_tsearch(self, query: Query, operand: str,
|
|
maybe_negate: ConditionTransform) -> Query:
|
|
tsquery = func.plainto_tsquery(literal("zulip.english_us_search"), literal(operand))
|
|
ts_locs_array = func.ts_match_locs_array
|
|
query = query.column(ts_locs_array(literal("zulip.english_us_search"),
|
|
column("rendered_content"),
|
|
tsquery).label("content_matches"))
|
|
# We HTML-escape the topic in Postgres to avoid doing a server round-trip
|
|
query = query.column(ts_locs_array(literal("zulip.english_us_search"),
|
|
func.escape_html(topic_column_sa()),
|
|
tsquery).label("topic_matches"))
|
|
|
|
# Do quoted string matching. We really want phrase
|
|
# search here so we can ignore punctuation and do
|
|
# stemming, but there isn't a standard phrase search
|
|
# mechanism in Postgres
|
|
for term in re.findall(r'"[^"]+"|\S+', operand):
|
|
if term[0] == '"' and term[-1] == '"':
|
|
term = term[1:-1]
|
|
term = '%' + connection.ops.prep_for_like_query(term) + '%'
|
|
cond = or_(column("content").ilike(term),
|
|
topic_column_sa().ilike(term))
|
|
query = query.where(maybe_negate(cond))
|
|
|
|
cond = column("search_tsvector").op("@@")(tsquery)
|
|
return query.where(maybe_negate(cond))
|
|
|
|
# The offsets we get from PGroonga are counted in characters
|
|
# whereas the offsets from tsearch_extras are in bytes, so we
|
|
# have to account for both cases in the logic below.
|
|
def highlight_string(text: str, locs: Iterable[Tuple[int, int]]) -> str:
|
|
highlight_start = '<span class="highlight">'
|
|
highlight_stop = '</span>'
|
|
pos = 0
|
|
result = ''
|
|
in_tag = False
|
|
|
|
text_utf8 = text.encode('utf8')
|
|
|
|
for loc in locs:
|
|
(offset, length) = loc
|
|
|
|
# These indexes are in byte space for tsearch,
|
|
# and they are in string space for pgroonga.
|
|
prefix_start = pos
|
|
prefix_end = offset
|
|
match_start = offset
|
|
match_end = offset + length
|
|
|
|
if settings.USING_PGROONGA:
|
|
prefix = text[prefix_start:prefix_end]
|
|
match = text[match_start:match_end]
|
|
else:
|
|
prefix = text_utf8[prefix_start:prefix_end].decode()
|
|
match = text_utf8[match_start:match_end].decode()
|
|
|
|
for character in (prefix + match):
|
|
if character == '<':
|
|
in_tag = True
|
|
elif character == '>':
|
|
in_tag = False
|
|
if in_tag:
|
|
result += prefix
|
|
result += match
|
|
else:
|
|
result += prefix
|
|
result += highlight_start
|
|
result += match
|
|
result += highlight_stop
|
|
pos = match_end
|
|
|
|
if settings.USING_PGROONGA:
|
|
final_frag = text[pos:]
|
|
else:
|
|
final_frag = text_utf8[pos:].decode()
|
|
|
|
result += final_frag
|
|
return result
|
|
|
|
def get_search_fields(rendered_content: str, topic_name: str, content_matches: Iterable[Tuple[int, int]],
|
|
topic_matches: Iterable[Tuple[int, int]]) -> Dict[str, str]:
|
|
return {
|
|
'match_content': highlight_string(rendered_content, content_matches),
|
|
MATCH_TOPIC: highlight_string(escape_html(topic_name), topic_matches),
|
|
}
|
|
|
|
def narrow_parameter(json: str) -> OptionalNarrowListT:
|
|
|
|
data = ujson.loads(json)
|
|
if not isinstance(data, list):
|
|
raise ValueError("argument is not a list")
|
|
if len(data) == 0:
|
|
# The "empty narrow" should be None, and not []
|
|
return None
|
|
|
|
def convert_term(elem: Union[Dict[str, Any], List[str]]) -> Dict[str, Any]:
|
|
|
|
# We have to support a legacy tuple format.
|
|
if isinstance(elem, list):
|
|
if (len(elem) != 2 or any(not isinstance(x, str) for x in elem)):
|
|
raise ValueError("element is not a string pair")
|
|
return dict(operator=elem[0], operand=elem[1])
|
|
|
|
if isinstance(elem, dict):
|
|
# Make sure to sync this list to frontend also when adding a new operator.
|
|
# that supports user IDs. Relevant code is located in static/js/message_fetch.js
|
|
# in handle_operators_supporting_id_based_api function where you will need to update
|
|
# operators_supporting_id, or operators_supporting_ids array.
|
|
operators_supporting_id = ['sender', 'group-pm-with', 'stream']
|
|
operators_supporting_ids = ['pm-with']
|
|
|
|
operator = elem.get('operator', '')
|
|
if operator in operators_supporting_id:
|
|
operand_validator = check_string_or_int
|
|
elif operator in operators_supporting_ids:
|
|
operand_validator = check_string_or_int_list
|
|
else:
|
|
operand_validator = check_string
|
|
|
|
validator = check_dict([
|
|
('operator', check_string),
|
|
('operand', operand_validator),
|
|
])
|
|
|
|
error = validator('elem', elem)
|
|
if error:
|
|
raise JsonableError(error)
|
|
|
|
# whitelist the fields we care about for now
|
|
return dict(
|
|
operator=elem['operator'],
|
|
operand=elem['operand'],
|
|
negated=elem.get('negated', False),
|
|
)
|
|
|
|
raise ValueError("element is not a dictionary")
|
|
|
|
return list(map(convert_term, data))
|
|
|
|
def ok_to_include_history(narrow: OptionalNarrowListT, user_profile: UserProfile) -> bool:
|
|
# There are occasions where we need to find Message rows that
|
|
# have no corresponding UserMessage row, because the user is
|
|
# reading a public stream that might include messages that
|
|
# were sent while the user was not subscribed, but which they are
|
|
# allowed to see. We have to be very careful about constructing
|
|
# queries in those situations, so this function should return True
|
|
# only if we are 100% sure that we're gonna add a clause to the
|
|
# query that narrows to a particular public stream on the user's realm.
|
|
# If we screw this up, then we can get into a nasty situation of
|
|
# polluting our narrow results with messages from other realms.
|
|
include_history = False
|
|
if narrow is not None:
|
|
for term in narrow:
|
|
if term['operator'] == "stream" and not term.get('negated', False):
|
|
operand = term['operand'] # type: Union[str, int]
|
|
if isinstance(operand, str):
|
|
include_history = can_access_stream_history_by_name(user_profile, operand)
|
|
else:
|
|
include_history = can_access_stream_history_by_id(user_profile, operand)
|
|
elif (term['operator'] == "streams" and term['operand'] == "public"
|
|
and not term.get('negated', False) and user_profile.can_access_public_streams()):
|
|
include_history = True
|
|
# Disable historical messages if the user is narrowing on anything
|
|
# that's a property on the UserMessage table. There cannot be
|
|
# historical messages in these cases anyway.
|
|
for term in narrow:
|
|
if term['operator'] == "is":
|
|
include_history = False
|
|
|
|
return include_history
|
|
|
|
def get_stream_from_narrow_access_unchecked(narrow: OptionalNarrowListT, realm: Realm) -> Optional[Stream]:
|
|
if narrow is not None:
|
|
for term in narrow:
|
|
if term['operator'] == 'stream':
|
|
return get_stream_by_narrow_operand_access_unchecked(term['operand'], realm)
|
|
return None
|
|
|
|
def exclude_muting_conditions(user_profile: UserProfile,
|
|
narrow: OptionalNarrowListT) -> List[Selectable]:
|
|
conditions = []
|
|
stream_id = None
|
|
try:
|
|
# Note: It is okay here to not check access to stream
|
|
# because we are only using the stream id to exclude data,
|
|
# not to include results.
|
|
stream = get_stream_from_narrow_access_unchecked(narrow, user_profile.realm)
|
|
if stream is not None:
|
|
stream_id = stream.id
|
|
except Stream.DoesNotExist:
|
|
pass
|
|
|
|
if stream_id is None:
|
|
rows = Subscription.objects.filter(
|
|
user_profile=user_profile,
|
|
active=True,
|
|
is_muted=True,
|
|
recipient__type=Recipient.STREAM
|
|
).values('recipient_id')
|
|
muted_recipient_ids = [row['recipient_id'] for row in rows]
|
|
if len(muted_recipient_ids) > 0:
|
|
# Only add the condition if we have muted streams to simplify/avoid warnings.
|
|
condition = not_(column("recipient_id").in_(muted_recipient_ids))
|
|
conditions.append(condition)
|
|
|
|
conditions = exclude_topic_mutes(conditions, user_profile, stream_id)
|
|
|
|
return conditions
|
|
|
|
def get_base_query_for_search(user_profile: UserProfile,
|
|
need_message: bool,
|
|
need_user_message: bool) -> Tuple[Query, ColumnElement]:
|
|
if need_message and need_user_message:
|
|
query = select([column("message_id"), column("flags")],
|
|
column("user_profile_id") == literal(user_profile.id),
|
|
join(table("zerver_usermessage"), table("zerver_message"),
|
|
literal_column("zerver_usermessage.message_id") ==
|
|
literal_column("zerver_message.id")))
|
|
inner_msg_id_col = column("message_id")
|
|
return (query, inner_msg_id_col)
|
|
|
|
if need_user_message:
|
|
query = select([column("message_id"), column("flags")],
|
|
column("user_profile_id") == literal(user_profile.id),
|
|
table("zerver_usermessage"))
|
|
inner_msg_id_col = column("message_id")
|
|
return (query, inner_msg_id_col)
|
|
|
|
else:
|
|
assert(need_message)
|
|
query = select([column("id").label("message_id")],
|
|
None,
|
|
table("zerver_message"))
|
|
inner_msg_id_col = literal_column("zerver_message.id")
|
|
return (query, inner_msg_id_col)
|
|
|
|
def add_narrow_conditions(user_profile: UserProfile,
|
|
inner_msg_id_col: ColumnElement,
|
|
query: Query,
|
|
narrow: OptionalNarrowListT) -> Tuple[Query, bool]:
|
|
is_search = False # for now
|
|
|
|
if narrow is None:
|
|
return (query, is_search)
|
|
|
|
# Build the query for the narrow
|
|
builder = NarrowBuilder(user_profile, inner_msg_id_col)
|
|
search_operands = []
|
|
|
|
# As we loop through terms, builder does most of the work to extend
|
|
# our query, but we need to collect the search operands and handle
|
|
# them after the loop.
|
|
for term in narrow:
|
|
if term['operator'] == 'search':
|
|
search_operands.append(term['operand'])
|
|
else:
|
|
query = builder.add_term(query, term)
|
|
|
|
if search_operands:
|
|
is_search = True
|
|
query = query.column(topic_column_sa()).column(column("rendered_content"))
|
|
search_term = dict(
|
|
operator='search',
|
|
operand=' '.join(search_operands)
|
|
)
|
|
query = builder.add_term(query, search_term)
|
|
|
|
return (query, is_search)
|
|
|
|
def find_first_unread_anchor(sa_conn: Any,
|
|
user_profile: UserProfile,
|
|
narrow: OptionalNarrowListT) -> int:
|
|
# We always need UserMessage in our query, because it has the unread
|
|
# flag for the user.
|
|
need_user_message = True
|
|
|
|
# Because we will need to call exclude_muting_conditions, unless
|
|
# the user hasn't muted anything, we will need to include Message
|
|
# in our query. It may be worth eventually adding an optimization
|
|
# for the case of a user who hasn't muted anything to avoid the
|
|
# join in that case, but it's low priority.
|
|
need_message = True
|
|
|
|
query, inner_msg_id_col = get_base_query_for_search(
|
|
user_profile=user_profile,
|
|
need_message=need_message,
|
|
need_user_message=need_user_message,
|
|
)
|
|
|
|
query, is_search = add_narrow_conditions(
|
|
user_profile=user_profile,
|
|
inner_msg_id_col=inner_msg_id_col,
|
|
query=query,
|
|
narrow=narrow,
|
|
)
|
|
|
|
condition = column("flags").op("&")(UserMessage.flags.read.mask) == 0
|
|
|
|
# We exclude messages on muted topics when finding the first unread
|
|
# message in this narrow
|
|
muting_conditions = exclude_muting_conditions(user_profile, narrow)
|
|
if muting_conditions:
|
|
condition = and_(condition, *muting_conditions)
|
|
|
|
# The mobile app uses narrow=[] and use_first_unread_anchor=True to
|
|
# determine what messages to show when you first load the app.
|
|
# Unfortunately, this means that if you have a years-old unread
|
|
# message, the mobile app could get stuck in the past.
|
|
#
|
|
# To fix this, we enforce that the "first unread anchor" must be on or
|
|
# after the user's current pointer location. Since the pointer
|
|
# location refers to the latest the user has read in the home view,
|
|
# we'll only apply this logic in the home view (ie, when narrow is
|
|
# empty).
|
|
if not narrow:
|
|
pointer_condition = inner_msg_id_col >= user_profile.pointer
|
|
condition = and_(condition, pointer_condition)
|
|
|
|
first_unread_query = query.where(condition)
|
|
first_unread_query = first_unread_query.order_by(inner_msg_id_col.asc()).limit(1)
|
|
first_unread_result = list(sa_conn.execute(first_unread_query).fetchall())
|
|
if len(first_unread_result) > 0:
|
|
anchor = first_unread_result[0][0]
|
|
else:
|
|
anchor = LARGER_THAN_MAX_MESSAGE_ID
|
|
|
|
return anchor
|
|
|
|
@has_request_variables
|
|
def zcommand_backend(request: HttpRequest, user_profile: UserProfile,
|
|
command: str=REQ('command')) -> HttpResponse:
|
|
return json_success(process_zcommands(command, user_profile))
|
|
|
|
@has_request_variables
|
|
def get_messages_backend(request: HttpRequest, user_profile: UserProfile,
|
|
anchor: Optional[int]=REQ(converter=int, default=None),
|
|
num_before: int=REQ(converter=to_non_negative_int),
|
|
num_after: int=REQ(converter=to_non_negative_int),
|
|
narrow: OptionalNarrowListT=REQ('narrow', converter=narrow_parameter, default=None),
|
|
use_first_unread_anchor: bool=REQ(validator=check_bool, default=False),
|
|
client_gravatar: bool=REQ(validator=check_bool, default=False),
|
|
apply_markdown: bool=REQ(validator=check_bool, default=True)) -> HttpResponse:
|
|
if anchor is None and not use_first_unread_anchor:
|
|
return json_error(_("Missing 'anchor' argument (or set 'use_first_unread_anchor'=True)."))
|
|
if num_before + num_after > MAX_MESSAGES_PER_FETCH:
|
|
return json_error(_("Too many messages requested (maximum %s).")
|
|
% (MAX_MESSAGES_PER_FETCH,))
|
|
|
|
if user_profile.realm.email_address_visibility == Realm.EMAIL_ADDRESS_VISIBILITY_ADMINS:
|
|
# If email addresses are only available to administrators,
|
|
# clients cannot compute gravatars, so we force-set it to false.
|
|
client_gravatar = False
|
|
|
|
include_history = ok_to_include_history(narrow, user_profile)
|
|
if include_history:
|
|
# The initial query in this case doesn't use `zerver_usermessage`,
|
|
# and isn't yet limited to messages the user is entitled to see!
|
|
#
|
|
# This is OK only because we've made sure this is a narrow that
|
|
# will cause us to limit the query appropriately later.
|
|
# See `ok_to_include_history` for details.
|
|
need_message = True
|
|
need_user_message = False
|
|
elif narrow is None:
|
|
# We need to limit to messages the user has received, but we don't actually
|
|
# need any fields from Message
|
|
need_message = False
|
|
need_user_message = True
|
|
else:
|
|
need_message = True
|
|
need_user_message = True
|
|
|
|
query, inner_msg_id_col = get_base_query_for_search(
|
|
user_profile=user_profile,
|
|
need_message=need_message,
|
|
need_user_message=need_user_message,
|
|
)
|
|
|
|
query, is_search = add_narrow_conditions(
|
|
user_profile=user_profile,
|
|
inner_msg_id_col=inner_msg_id_col,
|
|
query=query,
|
|
narrow=narrow,
|
|
)
|
|
|
|
if narrow is not None:
|
|
# Add some metadata to our logging data for narrows
|
|
verbose_operators = []
|
|
for term in narrow:
|
|
if term['operator'] == "is":
|
|
verbose_operators.append("is:" + term['operand'])
|
|
else:
|
|
verbose_operators.append(term['operator'])
|
|
request._log_data['extra'] = "[%s]" % (",".join(verbose_operators),)
|
|
|
|
sa_conn = get_sqlalchemy_connection()
|
|
|
|
if use_first_unread_anchor:
|
|
anchor = find_first_unread_anchor(
|
|
sa_conn,
|
|
user_profile,
|
|
narrow,
|
|
)
|
|
|
|
# Hint to mypy that anchor is now unconditionally an integer,
|
|
# since its inference engine can't figure that out.
|
|
assert anchor is not None
|
|
anchored_to_left = (anchor == 0)
|
|
|
|
# Set value that will be used to short circuit the after_query
|
|
# altogether and avoid needless conditions in the before_query.
|
|
anchored_to_right = (anchor == LARGER_THAN_MAX_MESSAGE_ID)
|
|
if anchored_to_right:
|
|
num_after = 0
|
|
|
|
first_visible_message_id = get_first_visible_message_id(user_profile.realm)
|
|
query = limit_query_to_range(
|
|
query=query,
|
|
num_before=num_before,
|
|
num_after=num_after,
|
|
anchor=anchor,
|
|
anchored_to_left=anchored_to_left,
|
|
anchored_to_right=anchored_to_right,
|
|
id_col=inner_msg_id_col,
|
|
first_visible_message_id=first_visible_message_id,
|
|
)
|
|
|
|
main_query = alias(query)
|
|
query = select(main_query.c, None, main_query).order_by(column("message_id").asc())
|
|
# This is a hack to tag the query we use for testing
|
|
query = query.prefix_with("/* get_messages */")
|
|
rows = list(sa_conn.execute(query).fetchall())
|
|
|
|
query_info = post_process_limited_query(
|
|
rows=rows,
|
|
num_before=num_before,
|
|
num_after=num_after,
|
|
anchor=anchor,
|
|
anchored_to_left=anchored_to_left,
|
|
anchored_to_right=anchored_to_right,
|
|
first_visible_message_id=first_visible_message_id,
|
|
)
|
|
|
|
rows = query_info['rows']
|
|
|
|
# The following is a little messy, but ensures that the code paths
|
|
# are similar regardless of the value of include_history. The
|
|
# 'user_messages' dictionary maps each message to the user's
|
|
# UserMessage object for that message, which we will attach to the
|
|
# rendered message dict before returning it. We attempt to
|
|
# bulk-fetch rendered message dicts from remote cache using the
|
|
# 'messages' list.
|
|
message_ids = [] # type: List[int]
|
|
user_message_flags = {} # type: Dict[int, List[str]]
|
|
if include_history:
|
|
message_ids = [row[0] for row in rows]
|
|
|
|
# TODO: This could be done with an outer join instead of two queries
|
|
um_rows = UserMessage.objects.filter(user_profile=user_profile,
|
|
message__id__in=message_ids)
|
|
user_message_flags = {um.message_id: um.flags_list() for um in um_rows}
|
|
|
|
for message_id in message_ids:
|
|
if message_id not in user_message_flags:
|
|
user_message_flags[message_id] = ["read", "historical"]
|
|
else:
|
|
for row in rows:
|
|
message_id = row[0]
|
|
flags = row[1]
|
|
user_message_flags[message_id] = UserMessage.flags_list_for_flags(flags)
|
|
message_ids.append(message_id)
|
|
|
|
search_fields = dict() # type: Dict[int, Dict[str, str]]
|
|
if is_search:
|
|
for row in rows:
|
|
message_id = row[0]
|
|
(topic_name, rendered_content, content_matches, topic_matches) = row[-4:]
|
|
|
|
try:
|
|
search_fields[message_id] = get_search_fields(rendered_content, topic_name,
|
|
content_matches, topic_matches)
|
|
except UnicodeDecodeError as err: # nocoverage
|
|
# No coverage for this block since it should be
|
|
# impossible, and we plan to remove it once we've
|
|
# debugged the case that makes it happen.
|
|
raise Exception(str(err), message_id, narrow)
|
|
|
|
message_list = messages_for_ids(
|
|
message_ids=message_ids,
|
|
user_message_flags=user_message_flags,
|
|
search_fields=search_fields,
|
|
apply_markdown=apply_markdown,
|
|
client_gravatar=client_gravatar,
|
|
allow_edit_history=user_profile.realm.allow_edit_history,
|
|
)
|
|
|
|
statsd.incr('loaded_old_messages', len(message_list))
|
|
|
|
ret = dict(
|
|
messages=message_list,
|
|
result='success',
|
|
msg='',
|
|
found_anchor=query_info['found_anchor'],
|
|
found_oldest=query_info['found_oldest'],
|
|
found_newest=query_info['found_newest'],
|
|
history_limited=query_info['history_limited'],
|
|
anchor=anchor,
|
|
)
|
|
return json_success(ret)
|
|
|
|
def limit_query_to_range(query: Query,
|
|
num_before: int,
|
|
num_after: int,
|
|
anchor: int,
|
|
anchored_to_left: bool,
|
|
anchored_to_right: bool,
|
|
id_col: ColumnElement,
|
|
first_visible_message_id: int) -> Query:
|
|
'''
|
|
This code is actually generic enough that we could move it to a
|
|
library, but our only caller for now is message search.
|
|
'''
|
|
need_before_query = (not anchored_to_left) and (num_before > 0)
|
|
need_after_query = (not anchored_to_right) and (num_after > 0)
|
|
|
|
need_both_sides = need_before_query and need_after_query
|
|
|
|
# The semantics of our flags are as follows:
|
|
#
|
|
# num_after = number of rows < anchor
|
|
# num_after = number of rows > anchor
|
|
#
|
|
# But we also want the row where id == anchor (if it exists),
|
|
# and we don't want to union up to 3 queries. So in some cases
|
|
# we do things like `after_limit = num_after + 1` to grab the
|
|
# anchor row in the "after" query.
|
|
#
|
|
# Note that in some cases, if the anchor row isn't found, we
|
|
# actually may fetch an extra row at one of the extremes.
|
|
if need_both_sides:
|
|
before_anchor = anchor - 1
|
|
after_anchor = max(anchor, first_visible_message_id)
|
|
before_limit = num_before
|
|
after_limit = num_after + 1
|
|
elif need_before_query:
|
|
before_anchor = anchor
|
|
before_limit = num_before
|
|
if not anchored_to_right:
|
|
before_limit += 1
|
|
elif need_after_query:
|
|
after_anchor = max(anchor, first_visible_message_id)
|
|
after_limit = num_after + 1
|
|
|
|
if need_before_query:
|
|
before_query = query
|
|
|
|
if not anchored_to_right:
|
|
before_query = before_query.where(id_col <= before_anchor)
|
|
|
|
before_query = before_query.order_by(id_col.desc())
|
|
before_query = before_query.limit(before_limit)
|
|
|
|
if need_after_query:
|
|
after_query = query
|
|
|
|
if not anchored_to_left:
|
|
after_query = after_query.where(id_col >= after_anchor)
|
|
|
|
after_query = after_query.order_by(id_col.asc())
|
|
after_query = after_query.limit(after_limit)
|
|
|
|
if need_both_sides:
|
|
query = union_all(before_query.self_group(), after_query.self_group())
|
|
elif need_before_query:
|
|
query = before_query
|
|
elif need_after_query:
|
|
query = after_query
|
|
else:
|
|
# If we don't have either a before_query or after_query, it's because
|
|
# some combination of num_before/num_after/anchor are zero or
|
|
# use_first_unread_anchor logic found no unread messages.
|
|
#
|
|
# The most likely reason is somebody is doing an id search, so searching
|
|
# for something like `message_id = 42` is exactly what we want. In other
|
|
# cases, which could possibly be buggy API clients, at least we will
|
|
# return at most one row here.
|
|
query = query.where(id_col == anchor)
|
|
|
|
return query
|
|
|
|
def post_process_limited_query(rows: List[Any],
|
|
num_before: int,
|
|
num_after: int,
|
|
anchor: int,
|
|
anchored_to_left: bool,
|
|
anchored_to_right: bool,
|
|
first_visible_message_id: int) -> Dict[str, Any]:
|
|
# Our queries may have fetched extra rows if they added
|
|
# "headroom" to the limits, but we want to truncate those
|
|
# rows.
|
|
#
|
|
# Also, in cases where we had non-zero values of num_before or
|
|
# num_after, we want to know found_oldest and found_newest, so
|
|
# that the clients will know that they got complete results.
|
|
|
|
if first_visible_message_id > 0:
|
|
visible_rows = [r for r in rows if r[0] >= first_visible_message_id]
|
|
else:
|
|
visible_rows = rows
|
|
|
|
rows_limited = len(visible_rows) != len(rows)
|
|
|
|
if anchored_to_right:
|
|
num_after = 0
|
|
before_rows = visible_rows[:]
|
|
anchor_rows = [] # type: List[Any]
|
|
after_rows = [] # type: List[Any]
|
|
else:
|
|
before_rows = [r for r in visible_rows if r[0] < anchor]
|
|
anchor_rows = [r for r in visible_rows if r[0] == anchor]
|
|
after_rows = [r for r in visible_rows if r[0] > anchor]
|
|
|
|
if num_before:
|
|
before_rows = before_rows[-1 * num_before:]
|
|
|
|
if num_after:
|
|
after_rows = after_rows[:num_after]
|
|
|
|
visible_rows = before_rows + anchor_rows + after_rows
|
|
|
|
found_anchor = len(anchor_rows) == 1
|
|
found_oldest = anchored_to_left or (len(before_rows) < num_before)
|
|
found_newest = anchored_to_right or (len(after_rows) < num_after)
|
|
# BUG: history_limited is incorrect False in the event that we had
|
|
# to bump `anchor` up due to first_visible_message_id, and there
|
|
# were actually older messages. This may be a rare event in the
|
|
# context where history_limited is relevant, because it can only
|
|
# happen in one-sided queries with no num_before (see tests tagged
|
|
# BUG in PostProcessTest for examples), and we don't generally do
|
|
# those from the UI, so this might be OK for now.
|
|
#
|
|
# The correct fix for this probably involves e.g. making a
|
|
# `before_query` when we increase `anchor` just to confirm whether
|
|
# messages were hidden.
|
|
history_limited = rows_limited and found_oldest
|
|
|
|
return dict(
|
|
rows=visible_rows,
|
|
found_anchor=found_anchor,
|
|
found_newest=found_newest,
|
|
found_oldest=found_oldest,
|
|
history_limited=history_limited,
|
|
)
|
|
|
|
@has_request_variables
|
|
def update_message_flags(request: HttpRequest, user_profile: UserProfile,
|
|
messages: List[int]=REQ(validator=check_list(check_int)),
|
|
operation: str=REQ('op'), flag: str=REQ()) -> HttpResponse:
|
|
|
|
count = do_update_message_flags(user_profile, request.client, operation, flag, messages)
|
|
|
|
target_count_str = str(len(messages))
|
|
log_data_str = "[%s %s/%s] actually %s" % (operation, flag, target_count_str, count)
|
|
request._log_data["extra"] = log_data_str
|
|
|
|
return json_success({'result': 'success',
|
|
'messages': messages,
|
|
'msg': ''})
|
|
|
|
@has_request_variables
|
|
def mark_all_as_read(request: HttpRequest, user_profile: UserProfile) -> HttpResponse:
|
|
count = do_mark_all_as_read(user_profile, request.client)
|
|
|
|
log_data_str = "[%s updated]" % (count,)
|
|
request._log_data["extra"] = log_data_str
|
|
|
|
return json_success({'result': 'success',
|
|
'msg': ''})
|
|
|
|
@has_request_variables
|
|
def mark_stream_as_read(request: HttpRequest,
|
|
user_profile: UserProfile,
|
|
stream_id: int=REQ(validator=check_int)) -> HttpResponse:
|
|
stream, recipient, sub = access_stream_by_id(user_profile, stream_id)
|
|
count = do_mark_stream_messages_as_read(user_profile, request.client, stream)
|
|
|
|
log_data_str = "[%s updated]" % (count,)
|
|
request._log_data["extra"] = log_data_str
|
|
|
|
return json_success({'result': 'success',
|
|
'msg': ''})
|
|
|
|
@has_request_variables
|
|
def mark_topic_as_read(request: HttpRequest,
|
|
user_profile: UserProfile,
|
|
stream_id: int=REQ(validator=check_int),
|
|
topic_name: str=REQ()) -> HttpResponse:
|
|
stream, recipient, sub = access_stream_by_id(user_profile, stream_id)
|
|
|
|
if topic_name:
|
|
topic_exists = user_message_exists_for_topic(
|
|
user_profile=user_profile,
|
|
recipient=recipient,
|
|
topic_name=topic_name,
|
|
)
|
|
|
|
if not topic_exists:
|
|
raise JsonableError(_('No such topic \'%s\'') % (topic_name,))
|
|
|
|
count = do_mark_stream_messages_as_read(user_profile, request.client, stream, topic_name)
|
|
|
|
log_data_str = "[%s updated]" % (count,)
|
|
request._log_data["extra"] = log_data_str
|
|
|
|
return json_success({'result': 'success',
|
|
'msg': ''})
|
|
|
|
class InvalidMirrorInput(Exception):
|
|
pass
|
|
|
|
def create_mirrored_message_users(request: HttpRequest, user_profile: UserProfile,
|
|
recipients: Iterable[str]) -> UserProfile:
|
|
if "sender" not in request.POST:
|
|
raise InvalidMirrorInput("No sender")
|
|
|
|
sender_email = request.POST["sender"].strip().lower()
|
|
referenced_users = set([sender_email])
|
|
if request.POST['type'] == 'private':
|
|
for email in recipients:
|
|
referenced_users.add(email.lower())
|
|
|
|
if request.client.name == "zephyr_mirror":
|
|
user_check = same_realm_zephyr_user
|
|
fullname_function = compute_mit_user_fullname
|
|
elif request.client.name == "irc_mirror":
|
|
user_check = same_realm_irc_user
|
|
fullname_function = compute_irc_user_fullname
|
|
elif request.client.name in ("jabber_mirror", "JabberMirror"):
|
|
user_check = same_realm_jabber_user
|
|
fullname_function = compute_jabber_user_fullname
|
|
else:
|
|
raise InvalidMirrorInput("Unrecognized mirroring client")
|
|
|
|
for email in referenced_users:
|
|
# Check that all referenced users are in our realm:
|
|
if not user_check(user_profile, email):
|
|
raise InvalidMirrorInput("At least one user cannot be mirrored")
|
|
|
|
# Create users for the referenced users, if needed.
|
|
for email in referenced_users:
|
|
create_mirror_user_if_needed(user_profile.realm, email, fullname_function)
|
|
|
|
sender = get_user_including_cross_realm(sender_email, user_profile.realm)
|
|
return sender
|
|
|
|
def same_realm_zephyr_user(user_profile: UserProfile, email: str) -> bool:
|
|
#
|
|
# Are the sender and recipient both addresses in the same Zephyr
|
|
# mirroring realm? We have to handle this specially, inferring
|
|
# the domain from the e-mail address, because the recipient may
|
|
# not existing in Zulip and we may need to make a stub Zephyr
|
|
# mirroring user on the fly.
|
|
try:
|
|
validators.validate_email(email)
|
|
except ValidationError:
|
|
return False
|
|
|
|
domain = email_to_domain(email)
|
|
|
|
# Assumes allow_subdomains=False for all RealmDomain's corresponding to
|
|
# these realms.
|
|
return user_profile.realm.is_zephyr_mirror_realm and \
|
|
RealmDomain.objects.filter(realm=user_profile.realm, domain=domain).exists()
|
|
|
|
def same_realm_irc_user(user_profile: UserProfile, email: str) -> bool:
|
|
# Check whether the target email address is an IRC user in the
|
|
# same realm as user_profile, i.e. if the domain were example.com,
|
|
# the IRC user would need to be username@irc.example.com
|
|
try:
|
|
validators.validate_email(email)
|
|
except ValidationError:
|
|
return False
|
|
|
|
domain = email_to_domain(email).replace("irc.", "")
|
|
|
|
# Assumes allow_subdomains=False for all RealmDomain's corresponding to
|
|
# these realms.
|
|
return RealmDomain.objects.filter(realm=user_profile.realm, domain=domain).exists()
|
|
|
|
def same_realm_jabber_user(user_profile: UserProfile, email: str) -> bool:
|
|
try:
|
|
validators.validate_email(email)
|
|
except ValidationError:
|
|
return False
|
|
|
|
# If your Jabber users have a different email domain than the
|
|
# Zulip users, this is where you would do any translation.
|
|
domain = email_to_domain(email)
|
|
|
|
# Assumes allow_subdomains=False for all RealmDomain's corresponding to
|
|
# these realms.
|
|
return RealmDomain.objects.filter(realm=user_profile.realm, domain=domain).exists()
|
|
|
|
def handle_deferred_message(sender: UserProfile, client: Client,
|
|
message_type_name: str,
|
|
message_to: Union[Sequence[str], Sequence[int]],
|
|
topic_name: Optional[str],
|
|
message_content: str, delivery_type: str,
|
|
defer_until: str, tz_guess: Optional[str],
|
|
forwarder_user_profile: UserProfile,
|
|
realm: Optional[Realm]) -> HttpResponse:
|
|
deliver_at = None
|
|
local_tz = 'UTC'
|
|
if tz_guess:
|
|
local_tz = tz_guess
|
|
elif sender.timezone:
|
|
local_tz = sender.timezone
|
|
try:
|
|
deliver_at = dateparser(defer_until)
|
|
except ValueError:
|
|
return json_error(_("Invalid time format"))
|
|
|
|
deliver_at_usertz = deliver_at
|
|
if deliver_at_usertz.tzinfo is None:
|
|
user_tz = get_timezone(local_tz)
|
|
# Since mypy is not able to recognize localize and normalize as attributes of tzinfo we use ignore.
|
|
deliver_at_usertz = user_tz.normalize(user_tz.localize(deliver_at)) # type: ignore # Reason in comment on previous line.
|
|
deliver_at = convert_to_UTC(deliver_at_usertz)
|
|
|
|
if deliver_at <= timezone_now():
|
|
return json_error(_("Time must be in the future."))
|
|
|
|
check_schedule_message(sender, client, message_type_name, message_to,
|
|
topic_name, message_content, delivery_type,
|
|
deliver_at, realm=realm,
|
|
forwarder_user_profile=forwarder_user_profile)
|
|
return json_success({"deliver_at": str(deliver_at_usertz)})
|
|
|
|
# We do not @require_login for send_message_backend, since it is used
|
|
# both from the API and the web service. Code calling
|
|
# send_message_backend should either check the API key or check that
|
|
# the user is logged in.
|
|
@has_request_variables
|
|
def send_message_backend(request: HttpRequest, user_profile: UserProfile,
|
|
message_type_name: str=REQ('type'),
|
|
message_to: Union[Sequence[int], Sequence[str]]=REQ(
|
|
'to', type=Union[List[int], List[str]],
|
|
converter=extract_recipients, default=[]),
|
|
forged: bool=REQ(default=False,
|
|
documentation_pending=True),
|
|
topic_name: Optional[str]=REQ_topic(),
|
|
message_content: str=REQ('content'),
|
|
widget_content: Optional[str]=REQ(default=None,
|
|
documentation_pending=True),
|
|
realm_str: Optional[str]=REQ('realm_str', default=None,
|
|
documentation_pending=True),
|
|
local_id: Optional[str]=REQ(default=None,
|
|
documentation_pending=True),
|
|
queue_id: Optional[str]=REQ(default=None,
|
|
documentation_pending=True),
|
|
delivery_type: Optional[str]=REQ('delivery_type', default='send_now',
|
|
documentation_pending=True),
|
|
defer_until: Optional[str]=REQ('deliver_at', default=None,
|
|
documentation_pending=True),
|
|
tz_guess: Optional[str]=REQ('tz_guess', default=None,
|
|
documentation_pending=True)
|
|
) -> HttpResponse:
|
|
client = request.client
|
|
is_super_user = request.user.is_api_super_user
|
|
if forged and not is_super_user:
|
|
return json_error(_("User not authorized for this query"))
|
|
|
|
realm = None
|
|
if realm_str and realm_str != user_profile.realm.string_id:
|
|
if not is_super_user:
|
|
# The email gateway bot needs to be able to send messages in
|
|
# any realm.
|
|
return json_error(_("User not authorized for this query"))
|
|
try:
|
|
realm = get_realm(realm_str)
|
|
except Realm.DoesNotExist:
|
|
return json_error(_("Unknown organization '%s'") % (realm_str,))
|
|
|
|
if client.name in ["zephyr_mirror", "irc_mirror", "jabber_mirror", "JabberMirror"]:
|
|
# Here's how security works for mirroring:
|
|
#
|
|
# For private messages, the message must be (1) both sent and
|
|
# received exclusively by users in your realm, and (2)
|
|
# received by the forwarding user.
|
|
#
|
|
# For stream messages, the message must be (1) being forwarded
|
|
# by an API superuser for your realm and (2) being sent to a
|
|
# mirrored stream.
|
|
#
|
|
# The security checks are split between the below code
|
|
# (especially create_mirrored_message_users which checks the
|
|
# same-realm constraint) and recipient_for_emails (which
|
|
# checks that PMs are received by the forwarding user)
|
|
if "sender" not in request.POST:
|
|
return json_error(_("Missing sender"))
|
|
if message_type_name != "private" and not is_super_user:
|
|
return json_error(_("User not authorized for this query"))
|
|
|
|
# For now, mirroring only works with recipient emails, not for
|
|
# recipient user IDs.
|
|
if not all(isinstance(to_item, str) for to_item in message_to):
|
|
return json_error(_("Mirroring not allowed with recipient user IDs"))
|
|
|
|
# We need this manual cast so that mypy doesn't complain about
|
|
# create_mirrored_message_users not being able to accept a Sequence[int]
|
|
# type parameter.
|
|
message_to = cast(Sequence[str], message_to)
|
|
|
|
try:
|
|
mirror_sender = create_mirrored_message_users(request, user_profile, message_to)
|
|
except InvalidMirrorInput:
|
|
return json_error(_("Invalid mirrored message"))
|
|
|
|
if client.name == "zephyr_mirror" and not user_profile.realm.is_zephyr_mirror_realm:
|
|
return json_error(_("Zephyr mirroring is not allowed in this organization"))
|
|
sender = mirror_sender
|
|
else:
|
|
sender = user_profile
|
|
|
|
if (delivery_type == 'send_later' or delivery_type == 'remind') and defer_until is None:
|
|
return json_error(_("Missing deliver_at in a request for delayed message delivery"))
|
|
|
|
if (delivery_type == 'send_later' or delivery_type == 'remind') and defer_until is not None:
|
|
return handle_deferred_message(sender, client, message_type_name,
|
|
message_to, topic_name, message_content,
|
|
delivery_type, defer_until, tz_guess,
|
|
forwarder_user_profile=user_profile,
|
|
realm=realm)
|
|
|
|
ret = check_send_message(sender, client, message_type_name, message_to,
|
|
topic_name, message_content, forged=forged,
|
|
forged_timestamp = request.POST.get('time'),
|
|
forwarder_user_profile=user_profile, realm=realm,
|
|
local_id=local_id, sender_queue_id=queue_id,
|
|
widget_content=widget_content)
|
|
return json_success({"id": ret})
|
|
|
|
def fill_edit_history_entries(message_history: List[Dict[str, Any]], message: Message) -> None:
|
|
"""This fills out the message edit history entries from the database,
|
|
which are designed to have the minimum data possible, to instead
|
|
have the current topic + content as of that time, plus data on
|
|
whatever changed. This makes it much simpler to do future
|
|
processing.
|
|
|
|
Note that this mutates what is passed to it, which is sorta a bad pattern.
|
|
"""
|
|
prev_content = message.content
|
|
prev_rendered_content = message.rendered_content
|
|
prev_topic = message.topic_name()
|
|
|
|
# Make sure that the latest entry in the history corresponds to the
|
|
# message's last edit time
|
|
if len(message_history) > 0:
|
|
assert message.last_edit_time is not None
|
|
assert(datetime_to_timestamp(message.last_edit_time) ==
|
|
message_history[0]['timestamp'])
|
|
|
|
for entry in message_history:
|
|
entry['topic'] = prev_topic
|
|
if LEGACY_PREV_TOPIC in entry:
|
|
prev_topic = entry[LEGACY_PREV_TOPIC]
|
|
entry['prev_topic'] = prev_topic
|
|
del entry[LEGACY_PREV_TOPIC]
|
|
|
|
entry['content'] = prev_content
|
|
entry['rendered_content'] = prev_rendered_content
|
|
if 'prev_content' in entry:
|
|
del entry['prev_rendered_content_version']
|
|
prev_content = entry['prev_content']
|
|
prev_rendered_content = entry['prev_rendered_content']
|
|
assert prev_rendered_content is not None
|
|
entry['content_html_diff'] = highlight_html_differences(
|
|
prev_rendered_content,
|
|
entry['rendered_content'],
|
|
message.id)
|
|
|
|
message_history.append(dict(
|
|
topic = prev_topic,
|
|
content = prev_content,
|
|
rendered_content = prev_rendered_content,
|
|
timestamp = datetime_to_timestamp(message.pub_date),
|
|
user_id = message.sender_id,
|
|
))
|
|
|
|
@has_request_variables
|
|
def get_message_edit_history(request: HttpRequest, user_profile: UserProfile,
|
|
message_id: int=REQ(converter=to_non_negative_int,
|
|
path_only=True)) -> HttpResponse:
|
|
if not user_profile.realm.allow_edit_history:
|
|
return json_error(_("Message edit history is disabled in this organization"))
|
|
message, ignored_user_message = access_message(user_profile, message_id)
|
|
|
|
# Extract the message edit history from the message
|
|
if message.edit_history is not None:
|
|
message_edit_history = ujson.loads(message.edit_history)
|
|
else:
|
|
message_edit_history = []
|
|
|
|
# Fill in all the extra data that will make it usable
|
|
fill_edit_history_entries(message_edit_history, message)
|
|
return json_success({"message_history": reversed(message_edit_history)})
|
|
|
|
@has_request_variables
|
|
def update_message_backend(request: HttpRequest, user_profile: UserMessage,
|
|
message_id: int=REQ(converter=to_non_negative_int),
|
|
topic_name: Optional[str]=REQ_topic(),
|
|
propagate_mode: Optional[str]=REQ(default="change_one"),
|
|
content: Optional[str]=REQ(default=None)) -> HttpResponse:
|
|
|
|
if not user_profile.realm.allow_message_editing:
|
|
return json_error(_("Your organization has turned off message editing"))
|
|
|
|
message, ignored_user_message = access_message(user_profile, message_id)
|
|
is_no_topic_msg = (message.topic_name() == "(no topic)")
|
|
|
|
# You only have permission to edit a message if:
|
|
# you change this value also change those two parameters in message_edit.js.
|
|
# 1. You sent it, OR:
|
|
# 2. This is a topic-only edit for a (no topic) message, OR:
|
|
# 3. This is a topic-only edit and you are an admin, OR:
|
|
# 4. This is a topic-only edit and your realm allows users to edit topics.
|
|
if message.sender == user_profile:
|
|
pass
|
|
elif (content is None) and (is_no_topic_msg or
|
|
user_profile.is_realm_admin or
|
|
user_profile.realm.allow_community_topic_editing):
|
|
pass
|
|
else:
|
|
raise JsonableError(_("You don't have permission to edit this message"))
|
|
|
|
# If there is a change to the content, check that it hasn't been too long
|
|
# Allow an extra 20 seconds since we potentially allow editing 15 seconds
|
|
# past the limit, and in case there are network issues, etc. The 15 comes
|
|
# from (min_seconds_to_edit + seconds_left_buffer) in message_edit.js; if
|
|
# you change this value also change those two parameters in message_edit.js.
|
|
edit_limit_buffer = 20
|
|
if content is not None and user_profile.realm.message_content_edit_limit_seconds > 0:
|
|
deadline_seconds = user_profile.realm.message_content_edit_limit_seconds + edit_limit_buffer
|
|
if (timezone_now() - message.pub_date) > datetime.timedelta(seconds=deadline_seconds):
|
|
raise JsonableError(_("The time limit for editing this message has passed"))
|
|
|
|
# If there is a change to the topic, check that the user is allowed to
|
|
# edit it and that it has not been too long. If this is not the user who
|
|
# sent the message, they are not the admin, and the time limit for editing
|
|
# topics is passed, raise an error.
|
|
if content is None and message.sender != user_profile and not user_profile.is_realm_admin and \
|
|
not is_no_topic_msg:
|
|
deadline_seconds = Realm.DEFAULT_COMMUNITY_TOPIC_EDITING_LIMIT_SECONDS + edit_limit_buffer
|
|
if (timezone_now() - message.pub_date) > datetime.timedelta(seconds=deadline_seconds):
|
|
raise JsonableError(_("The time limit for editing this message has passed"))
|
|
|
|
if topic_name is None and content is None:
|
|
return json_error(_("Nothing to change"))
|
|
if topic_name is not None:
|
|
topic_name = topic_name.strip()
|
|
if topic_name == "":
|
|
raise JsonableError(_("Topic can't be empty"))
|
|
rendered_content = None
|
|
links_for_embed = set() # type: Set[str]
|
|
prior_mention_user_ids = set() # type: Set[int]
|
|
mention_user_ids = set() # type: Set[int]
|
|
if content is not None:
|
|
content = content.strip()
|
|
if content == "":
|
|
content = "(deleted)"
|
|
content = truncate_body(content)
|
|
|
|
user_info = get_user_info_for_message_updates(message.id)
|
|
prior_mention_user_ids = user_info['mention_user_ids']
|
|
|
|
# We render the message using the current user's realm; since
|
|
# the cross-realm bots never edit messages, this should be
|
|
# always correct.
|
|
# Note: If rendering fails, the called code will raise a JsonableError.
|
|
rendered_content = render_incoming_message(message,
|
|
content,
|
|
user_info['message_user_ids'],
|
|
user_profile.realm)
|
|
links_for_embed |= message.links_for_preview
|
|
|
|
mention_user_ids = message.mentions_user_ids
|
|
|
|
number_changed = do_update_message(user_profile, message, topic_name,
|
|
propagate_mode, content, rendered_content,
|
|
prior_mention_user_ids,
|
|
mention_user_ids)
|
|
|
|
# Include the number of messages changed in the logs
|
|
request._log_data['extra'] = "[%s]" % (number_changed,)
|
|
if links_for_embed and bugdown.url_embed_preview_enabled(message):
|
|
event_data = {
|
|
'message_id': message.id,
|
|
'message_content': message.content,
|
|
# The choice of `user_profile.realm_id` rather than
|
|
# `sender.realm_id` must match the decision made in the
|
|
# `render_incoming_message` call earlier in this function.
|
|
'message_realm_id': user_profile.realm_id,
|
|
'urls': links_for_embed}
|
|
queue_json_publish('embed_links', event_data)
|
|
return json_success()
|
|
|
|
|
|
def validate_can_delete_message(user_profile: UserProfile, message: Message) -> None:
|
|
if user_profile.is_realm_admin:
|
|
# Admin can delete any message, any time.
|
|
return
|
|
if message.sender != user_profile:
|
|
# Users can only delete messages sent by them.
|
|
raise JsonableError(_("You don't have permission to delete this message"))
|
|
if not user_profile.realm.allow_message_deleting:
|
|
# User can not delete message, if message deleting is not allowed in realm.
|
|
raise JsonableError(_("You don't have permission to delete this message"))
|
|
|
|
deadline_seconds = user_profile.realm.message_content_delete_limit_seconds
|
|
if deadline_seconds == 0:
|
|
# 0 for no time limit to delete message
|
|
return
|
|
if (timezone_now() - message.pub_date) > datetime.timedelta(seconds=deadline_seconds):
|
|
# User can not delete message after deadline time of realm
|
|
raise JsonableError(_("The time limit for deleting this message has passed"))
|
|
return
|
|
|
|
@has_request_variables
|
|
def delete_message_backend(request: HttpRequest, user_profile: UserProfile,
|
|
message_id: int=REQ(converter=to_non_negative_int,
|
|
path_only=True)) -> HttpResponse:
|
|
message, ignored_user_message = access_message(user_profile, message_id)
|
|
validate_can_delete_message(user_profile, message)
|
|
try:
|
|
do_delete_messages(user_profile, [message])
|
|
except (Message.DoesNotExist, IntegrityError):
|
|
raise JsonableError(_("Message already deleted"))
|
|
return json_success()
|
|
|
|
@has_request_variables
|
|
def json_fetch_raw_message(request: HttpRequest, user_profile: UserProfile,
|
|
message_id: int=REQ(converter=to_non_negative_int)) -> HttpResponse:
|
|
(message, user_message) = access_message(user_profile, message_id)
|
|
return json_success({"raw_content": message.content})
|
|
|
|
@has_request_variables
|
|
def render_message_backend(request: HttpRequest, user_profile: UserProfile,
|
|
content: str=REQ()) -> HttpResponse:
|
|
message = Message()
|
|
message.sender = user_profile
|
|
message.content = content
|
|
message.sending_client = request.client
|
|
|
|
rendered_content = render_markdown(message, content, realm=user_profile.realm)
|
|
return json_success({"rendered": rendered_content})
|
|
|
|
@has_request_variables
|
|
def messages_in_narrow_backend(request: HttpRequest, user_profile: UserProfile,
|
|
msg_ids: List[int]=REQ(validator=check_list(check_int)),
|
|
narrow: OptionalNarrowListT=REQ(converter=narrow_parameter)
|
|
) -> HttpResponse:
|
|
|
|
first_visible_message_id = get_first_visible_message_id(user_profile.realm)
|
|
msg_ids = [message_id for message_id in msg_ids if message_id >= first_visible_message_id]
|
|
# This query is limited to messages the user has access to because they
|
|
# actually received them, as reflected in `zerver_usermessage`.
|
|
query = select([column("message_id"), topic_column_sa(), column("rendered_content")],
|
|
and_(column("user_profile_id") == literal(user_profile.id),
|
|
column("message_id").in_(msg_ids)),
|
|
join(table("zerver_usermessage"), table("zerver_message"),
|
|
literal_column("zerver_usermessage.message_id") ==
|
|
literal_column("zerver_message.id")))
|
|
|
|
builder = NarrowBuilder(user_profile, column("message_id"))
|
|
if narrow is not None:
|
|
for term in narrow:
|
|
query = builder.add_term(query, term)
|
|
|
|
sa_conn = get_sqlalchemy_connection()
|
|
query_result = list(sa_conn.execute(query).fetchall())
|
|
|
|
search_fields = dict()
|
|
for row in query_result:
|
|
message_id = row['message_id']
|
|
topic_name = row[DB_TOPIC_NAME]
|
|
rendered_content = row['rendered_content']
|
|
|
|
if 'content_matches' in row:
|
|
content_matches = row['content_matches']
|
|
topic_matches = row['topic_matches']
|
|
search_fields[message_id] = get_search_fields(rendered_content, topic_name,
|
|
content_matches, topic_matches)
|
|
else:
|
|
search_fields[message_id] = {
|
|
'match_content': rendered_content,
|
|
MATCH_TOPIC: escape_html(topic_name),
|
|
}
|
|
|
|
return json_success({"messages": search_fields})
|