mirror of https://github.com/zulip/zulip.git
1197 lines
44 KiB
Python
1197 lines
44 KiB
Python
import copy
|
|
import datetime
|
|
import zlib
|
|
from typing import Any, Dict, List, Optional, Sequence, Set, Tuple
|
|
|
|
import ahocorasick
|
|
import orjson
|
|
from django.db import connection
|
|
from django.db.models import Sum
|
|
from django.utils.timezone import now as timezone_now
|
|
from django.utils.translation import ugettext as _
|
|
from psycopg2.sql import SQL
|
|
from typing_extensions import TypedDict
|
|
|
|
from analytics.lib.counts import COUNT_STATS, RealmCount
|
|
from zerver.lib.avatar import get_avatar_field
|
|
from zerver.lib.cache import (
|
|
cache_with_key,
|
|
generic_bulk_cached_fetch,
|
|
to_dict_cache_key,
|
|
to_dict_cache_key_id,
|
|
)
|
|
from zerver.lib.display_recipient import (
|
|
DisplayRecipientT,
|
|
UserDisplayRecipient,
|
|
bulk_fetch_display_recipients,
|
|
)
|
|
from zerver.lib.markdown import MentionData, markdown_convert, topic_links
|
|
from zerver.lib.markdown import version as markdown_version
|
|
from zerver.lib.request import JsonableError
|
|
from zerver.lib.stream_subscription import get_stream_subscriptions_for_user
|
|
from zerver.lib.timestamp import datetime_to_timestamp
|
|
from zerver.lib.topic import DB_TOPIC_NAME, MESSAGE__TOPIC, TOPIC_LINKS, TOPIC_NAME
|
|
from zerver.lib.topic_mutes import build_topic_mute_checker, topic_is_muted
|
|
from zerver.models import (
|
|
MAX_MESSAGE_LENGTH,
|
|
MAX_TOPIC_NAME_LENGTH,
|
|
Message,
|
|
Reaction,
|
|
Realm,
|
|
Recipient,
|
|
Stream,
|
|
SubMessage,
|
|
Subscription,
|
|
UserMessage,
|
|
UserProfile,
|
|
get_display_recipient_by_id,
|
|
get_user_profile_by_id,
|
|
get_usermessage_by_message_id,
|
|
query_for_ids,
|
|
)
|
|
|
|
RealmAlertWord = Dict[int, List[str]]
|
|
|
|
class RawUnreadMessagesResult(TypedDict):
|
|
pm_dict: Dict[int, Any]
|
|
stream_dict: Dict[int, Any]
|
|
huddle_dict: Dict[int, Any]
|
|
mentions: Set[int]
|
|
muted_stream_ids: List[int]
|
|
unmuted_stream_msgs: Set[int]
|
|
|
|
class UnreadMessagesResult(TypedDict):
|
|
pms: List[Dict[str, Any]]
|
|
streams: List[Dict[str, Any]]
|
|
huddles: List[Dict[str, Any]]
|
|
mentions: List[int]
|
|
count: int
|
|
|
|
# We won't try to fetch more unread message IDs from the database than
|
|
# this limit. The limit is super high, in large part because it means
|
|
# client-side code mostly doesn't need to think about the case that a
|
|
# user has more older unread messages that were cut off.
|
|
MAX_UNREAD_MESSAGES = 50000
|
|
|
|
def truncate_content(content: str, max_length: int, truncation_message: str) -> str:
|
|
if len(content) > max_length:
|
|
content = content[:max_length - len(truncation_message)] + truncation_message
|
|
return content
|
|
|
|
def truncate_body(body: str) -> str:
|
|
return truncate_content(body, MAX_MESSAGE_LENGTH, "\n[message truncated]")
|
|
|
|
def truncate_topic(topic: str) -> str:
|
|
return truncate_content(topic, MAX_TOPIC_NAME_LENGTH, "...")
|
|
|
|
def messages_for_ids(message_ids: List[int],
|
|
user_message_flags: Dict[int, List[str]],
|
|
search_fields: Dict[int, Dict[str, str]],
|
|
apply_markdown: bool,
|
|
client_gravatar: bool,
|
|
allow_edit_history: bool) -> List[Dict[str, Any]]:
|
|
|
|
cache_transformer = MessageDict.build_dict_from_raw_db_row
|
|
id_fetcher = lambda row: row['id']
|
|
|
|
message_dicts = generic_bulk_cached_fetch(
|
|
to_dict_cache_key_id,
|
|
MessageDict.get_raw_db_rows,
|
|
message_ids,
|
|
id_fetcher=id_fetcher,
|
|
cache_transformer=cache_transformer,
|
|
extractor=extract_message_dict,
|
|
setter=stringify_message_dict)
|
|
|
|
message_list: List[Dict[str, Any]] = []
|
|
|
|
for message_id in message_ids:
|
|
msg_dict = message_dicts[message_id]
|
|
msg_dict.update({"flags": user_message_flags[message_id]})
|
|
if message_id in search_fields:
|
|
msg_dict.update(search_fields[message_id])
|
|
# Make sure that we never send message edit history to clients
|
|
# in realms with allow_edit_history disabled.
|
|
if "edit_history" in msg_dict and not allow_edit_history:
|
|
del msg_dict["edit_history"]
|
|
message_list.append(msg_dict)
|
|
|
|
MessageDict.post_process_dicts(message_list, apply_markdown, client_gravatar)
|
|
|
|
return message_list
|
|
|
|
def sew_messages_and_reactions(messages: List[Dict[str, Any]],
|
|
reactions: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
|
"""Given a iterable of messages and reactions stitch reactions
|
|
into messages.
|
|
"""
|
|
# Add all messages with empty reaction item
|
|
for message in messages:
|
|
message['reactions'] = []
|
|
|
|
# Convert list of messages into dictionary to make reaction stitching easy
|
|
converted_messages = {message['id']: message for message in messages}
|
|
|
|
for reaction in reactions:
|
|
converted_messages[reaction['message_id']]['reactions'].append(
|
|
reaction)
|
|
|
|
return list(converted_messages.values())
|
|
|
|
|
|
def sew_messages_and_submessages(messages: List[Dict[str, Any]],
|
|
submessages: List[Dict[str, Any]]) -> None:
|
|
# This is super similar to sew_messages_and_reactions.
|
|
for message in messages:
|
|
message['submessages'] = []
|
|
|
|
message_dict = {message['id']: message for message in messages}
|
|
|
|
for submessage in submessages:
|
|
message_id = submessage['message_id']
|
|
if message_id in message_dict:
|
|
message = message_dict[message_id]
|
|
message['submessages'].append(submessage)
|
|
|
|
def extract_message_dict(message_bytes: bytes) -> Dict[str, Any]:
|
|
return orjson.loads(zlib.decompress(message_bytes))
|
|
|
|
def stringify_message_dict(message_dict: Dict[str, Any]) -> bytes:
|
|
return zlib.compress(orjson.dumps(message_dict))
|
|
|
|
@cache_with_key(to_dict_cache_key, timeout=3600*24)
|
|
def message_to_dict_json(message: Message, realm_id: Optional[int]=None) -> bytes:
|
|
return MessageDict.to_dict_uncached([message], realm_id)[message.id]
|
|
|
|
def save_message_rendered_content(message: Message, content: str) -> str:
|
|
rendered_content = render_markdown(message, content, realm=message.get_realm())
|
|
message.rendered_content = rendered_content
|
|
message.rendered_content_version = markdown_version
|
|
message.save_rendered_content()
|
|
return rendered_content
|
|
|
|
class MessageDict:
|
|
@staticmethod
|
|
def wide_dict(message: Message, realm_id: Optional[int]=None) -> Dict[str, Any]:
|
|
'''
|
|
The next two lines get the cacheable field related
|
|
to our message object, with the side effect of
|
|
populating the cache.
|
|
'''
|
|
json = message_to_dict_json(message, realm_id)
|
|
obj = extract_message_dict(json)
|
|
|
|
'''
|
|
The steps below are similar to what we do in
|
|
post_process_dicts(), except we don't call finalize_payload(),
|
|
since that step happens later in the queue
|
|
processor.
|
|
'''
|
|
MessageDict.bulk_hydrate_sender_info([obj])
|
|
MessageDict.bulk_hydrate_recipient_info([obj])
|
|
|
|
return obj
|
|
|
|
@staticmethod
|
|
def post_process_dicts(objs: List[Dict[str, Any]], apply_markdown: bool, client_gravatar: bool) -> None:
|
|
'''
|
|
NOTE: This function mutates the objects in
|
|
the `objs` list, rather than making
|
|
shallow copies. It might be safer to
|
|
make shallow copies here, but performance
|
|
is somewhat important here, as we are
|
|
often fetching several messages.
|
|
'''
|
|
MessageDict.bulk_hydrate_sender_info(objs)
|
|
MessageDict.bulk_hydrate_recipient_info(objs)
|
|
|
|
for obj in objs:
|
|
MessageDict._finalize_payload(obj, apply_markdown, client_gravatar)
|
|
|
|
@staticmethod
|
|
def finalize_payload(obj: Dict[str, Any],
|
|
apply_markdown: bool,
|
|
client_gravatar: bool,
|
|
keep_rendered_content: bool=False) -> Dict[str, Any]:
|
|
'''
|
|
Make a shallow copy of the incoming dict to avoid
|
|
mutation-related bugs. This function is often
|
|
called when we're sending out message events to
|
|
multiple clients, who often want the final dictionary
|
|
to have different shapes here based on the parameters.
|
|
'''
|
|
new_obj = copy.copy(obj)
|
|
|
|
# Next call our worker, which mutates the record in place.
|
|
MessageDict._finalize_payload(
|
|
new_obj,
|
|
apply_markdown=apply_markdown,
|
|
client_gravatar=client_gravatar,
|
|
keep_rendered_content=keep_rendered_content,
|
|
)
|
|
return new_obj
|
|
|
|
@staticmethod
|
|
def _finalize_payload(obj: Dict[str, Any], apply_markdown: bool, client_gravatar: bool,
|
|
keep_rendered_content: bool=False) -> None:
|
|
MessageDict.set_sender_avatar(obj, client_gravatar)
|
|
if apply_markdown:
|
|
obj['content_type'] = 'text/html'
|
|
obj['content'] = obj['rendered_content']
|
|
else:
|
|
obj['content_type'] = 'text/x-markdown'
|
|
|
|
if not keep_rendered_content:
|
|
del obj['rendered_content']
|
|
del obj['sender_realm_id']
|
|
del obj['sender_avatar_source']
|
|
del obj['sender_delivery_email']
|
|
del obj['sender_avatar_version']
|
|
|
|
del obj['recipient_type']
|
|
del obj['recipient_type_id']
|
|
del obj['sender_is_mirror_dummy']
|
|
|
|
@staticmethod
|
|
def sew_submessages_and_reactions_to_msgs(messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
|
msg_ids = [msg['id'] for msg in messages]
|
|
submessages = SubMessage.get_raw_db_rows(msg_ids)
|
|
sew_messages_and_submessages(messages, submessages)
|
|
|
|
reactions = Reaction.get_raw_db_rows(msg_ids)
|
|
return sew_messages_and_reactions(messages, reactions)
|
|
|
|
@staticmethod
|
|
def to_dict_uncached(messages: List[Message], realm_id: Optional[int]=None) -> Dict[int, bytes]:
|
|
messages_dict = MessageDict.to_dict_uncached_helper(messages, realm_id)
|
|
encoded_messages = {msg['id']: stringify_message_dict(msg) for msg in messages_dict}
|
|
return encoded_messages
|
|
|
|
@staticmethod
|
|
def to_dict_uncached_helper(messages: List[Message],
|
|
realm_id: Optional[int]=None) -> List[Dict[str, Any]]:
|
|
# Near duplicate of the build_message_dict + get_raw_db_rows
|
|
# code path that accepts already fetched Message objects
|
|
# rather than message IDs.
|
|
|
|
def get_rendering_realm_id(message: Message) -> int:
|
|
# realm_id can differ among users, currently only possible
|
|
# with cross realm bots.
|
|
if realm_id is not None:
|
|
return realm_id
|
|
if message.recipient.type == Recipient.STREAM:
|
|
return Stream.objects.get(id=message.recipient.type_id).realm_id
|
|
return message.sender.realm_id
|
|
|
|
message_rows = [{
|
|
'id': message.id,
|
|
DB_TOPIC_NAME: message.topic_name(),
|
|
"date_sent": message.date_sent,
|
|
"last_edit_time": message.last_edit_time,
|
|
"edit_history": message.edit_history,
|
|
"content": message.content,
|
|
"rendered_content": message.rendered_content,
|
|
"rendered_content_version": message.rendered_content_version,
|
|
"recipient_id": message.recipient.id,
|
|
"recipient__type": message.recipient.type,
|
|
"recipient__type_id": message.recipient.type_id,
|
|
"rendering_realm_id": get_rendering_realm_id(message),
|
|
"sender_id": message.sender.id,
|
|
"sending_client__name": message.sending_client.name,
|
|
"sender__realm_id": message.sender.realm_id,
|
|
} for message in messages]
|
|
|
|
MessageDict.sew_submessages_and_reactions_to_msgs(message_rows)
|
|
return [MessageDict.build_dict_from_raw_db_row(row) for row in message_rows]
|
|
|
|
@staticmethod
|
|
def get_raw_db_rows(needed_ids: List[int]) -> List[Dict[str, Any]]:
|
|
# This is a special purpose function optimized for
|
|
# callers like get_messages_backend().
|
|
fields = [
|
|
'id',
|
|
DB_TOPIC_NAME,
|
|
'date_sent',
|
|
'last_edit_time',
|
|
'edit_history',
|
|
'content',
|
|
'rendered_content',
|
|
'rendered_content_version',
|
|
'recipient_id',
|
|
'recipient__type',
|
|
'recipient__type_id',
|
|
'sender_id',
|
|
'sending_client__name',
|
|
'sender__realm_id',
|
|
]
|
|
messages = Message.objects.filter(id__in=needed_ids).values(*fields)
|
|
return MessageDict.sew_submessages_and_reactions_to_msgs(messages)
|
|
|
|
@staticmethod
|
|
def build_dict_from_raw_db_row(row: Dict[str, Any]) -> Dict[str, Any]:
|
|
'''
|
|
row is a row from a .values() call, and it needs to have
|
|
all the relevant fields populated
|
|
'''
|
|
return MessageDict.build_message_dict(
|
|
message_id = row['id'],
|
|
last_edit_time = row['last_edit_time'],
|
|
edit_history = row['edit_history'],
|
|
content = row['content'],
|
|
topic_name = row[DB_TOPIC_NAME],
|
|
date_sent = row['date_sent'],
|
|
rendered_content = row['rendered_content'],
|
|
rendered_content_version = row['rendered_content_version'],
|
|
sender_id = row['sender_id'],
|
|
sender_realm_id = row['sender__realm_id'],
|
|
sending_client_name = row['sending_client__name'],
|
|
rendering_realm_id = row.get('rendering_realm_id', row['sender__realm_id']),
|
|
recipient_id = row['recipient_id'],
|
|
recipient_type = row['recipient__type'],
|
|
recipient_type_id = row['recipient__type_id'],
|
|
reactions=row['reactions'],
|
|
submessages=row['submessages'],
|
|
)
|
|
|
|
@staticmethod
|
|
def build_message_dict(
|
|
message_id: int,
|
|
last_edit_time: Optional[datetime.datetime],
|
|
edit_history: Optional[str],
|
|
content: str,
|
|
topic_name: str,
|
|
date_sent: datetime.datetime,
|
|
rendered_content: Optional[str],
|
|
rendered_content_version: Optional[int],
|
|
sender_id: int,
|
|
sender_realm_id: int,
|
|
sending_client_name: str,
|
|
rendering_realm_id: int,
|
|
recipient_id: int,
|
|
recipient_type: int,
|
|
recipient_type_id: int,
|
|
reactions: List[Dict[str, Any]],
|
|
submessages: List[Dict[str, Any]],
|
|
) -> Dict[str, Any]:
|
|
|
|
obj = dict(
|
|
id = message_id,
|
|
sender_id = sender_id,
|
|
content = content,
|
|
recipient_type_id = recipient_type_id,
|
|
recipient_type = recipient_type,
|
|
recipient_id = recipient_id,
|
|
timestamp = datetime_to_timestamp(date_sent),
|
|
client = sending_client_name)
|
|
|
|
obj[TOPIC_NAME] = topic_name
|
|
obj['sender_realm_id'] = sender_realm_id
|
|
|
|
# Render topic_links with the stream's realm instead of the
|
|
# sender's realm; this is important for messages sent by
|
|
# cross-realm bots like NOTIFICATION_BOT.
|
|
obj[TOPIC_LINKS] = topic_links(rendering_realm_id, topic_name)
|
|
|
|
if last_edit_time is not None:
|
|
obj['last_edit_timestamp'] = datetime_to_timestamp(last_edit_time)
|
|
assert edit_history is not None
|
|
obj['edit_history'] = orjson.loads(edit_history)
|
|
|
|
if Message.need_to_render_content(rendered_content, rendered_content_version, markdown_version):
|
|
# We really shouldn't be rendering objects in this method, but there is
|
|
# a scenario where we upgrade the version of Markdown and fail to run
|
|
# management commands to re-render historical messages, and then we
|
|
# need to have side effects. This method is optimized to not need full
|
|
# blown ORM objects, but the Markdown renderer is unfortunately highly
|
|
# coupled to Message, and we also need to persist the new rendered content.
|
|
# If we don't have a message object passed in, we get one here. The cost
|
|
# of going to the DB here should be overshadowed by the cost of rendering
|
|
# and updating the row.
|
|
# TODO: see #1379 to eliminate Markdown dependencies
|
|
message = Message.objects.select_related().get(id=message_id)
|
|
|
|
assert message is not None # Hint for mypy.
|
|
# It's unfortunate that we need to have side effects on the message
|
|
# in some cases.
|
|
rendered_content = save_message_rendered_content(message, content)
|
|
|
|
if rendered_content is not None:
|
|
obj['rendered_content'] = rendered_content
|
|
else:
|
|
obj['rendered_content'] = ('<p>[Zulip note: Sorry, we could not ' +
|
|
'understand the formatting of your message]</p>')
|
|
|
|
if rendered_content is not None:
|
|
obj['is_me_message'] = Message.is_status_message(content, rendered_content)
|
|
else:
|
|
obj['is_me_message'] = False
|
|
|
|
obj['reactions'] = [ReactionDict.build_dict_from_raw_db_row(reaction)
|
|
for reaction in reactions]
|
|
obj['submessages'] = submessages
|
|
return obj
|
|
|
|
@staticmethod
|
|
def bulk_hydrate_sender_info(objs: List[Dict[str, Any]]) -> None:
|
|
|
|
sender_ids = list({
|
|
obj['sender_id']
|
|
for obj in objs
|
|
})
|
|
|
|
if not sender_ids:
|
|
return
|
|
|
|
query = UserProfile.objects.values(
|
|
'id',
|
|
'full_name',
|
|
'delivery_email',
|
|
'email',
|
|
'realm__string_id',
|
|
'avatar_source',
|
|
'avatar_version',
|
|
'is_mirror_dummy',
|
|
)
|
|
|
|
rows = query_for_ids(query, sender_ids, 'zerver_userprofile.id')
|
|
|
|
sender_dict = {
|
|
row['id']: row
|
|
for row in rows
|
|
}
|
|
|
|
for obj in objs:
|
|
sender_id = obj['sender_id']
|
|
user_row = sender_dict[sender_id]
|
|
obj['sender_full_name'] = user_row['full_name']
|
|
obj['sender_email'] = user_row['email']
|
|
obj['sender_delivery_email'] = user_row['delivery_email']
|
|
obj['sender_realm_str'] = user_row['realm__string_id']
|
|
obj['sender_avatar_source'] = user_row['avatar_source']
|
|
obj['sender_avatar_version'] = user_row['avatar_version']
|
|
obj['sender_is_mirror_dummy'] = user_row['is_mirror_dummy']
|
|
|
|
@staticmethod
|
|
def hydrate_recipient_info(obj: Dict[str, Any], display_recipient: DisplayRecipientT) -> None:
|
|
'''
|
|
This method hyrdrates recipient info with things
|
|
like full names and emails of senders. Eventually
|
|
our clients should be able to hyrdrate these fields
|
|
themselves with info they already have on users.
|
|
'''
|
|
|
|
recipient_type = obj['recipient_type']
|
|
recipient_type_id = obj['recipient_type_id']
|
|
sender_is_mirror_dummy = obj['sender_is_mirror_dummy']
|
|
sender_email = obj['sender_email']
|
|
sender_full_name = obj['sender_full_name']
|
|
sender_id = obj['sender_id']
|
|
|
|
if recipient_type == Recipient.STREAM:
|
|
display_type = "stream"
|
|
elif recipient_type in (Recipient.HUDDLE, Recipient.PERSONAL):
|
|
assert not isinstance(display_recipient, str)
|
|
display_type = "private"
|
|
if len(display_recipient) == 1:
|
|
# add the sender in if this isn't a message between
|
|
# someone and themself, preserving ordering
|
|
recip: UserDisplayRecipient = {
|
|
'email': sender_email,
|
|
'full_name': sender_full_name,
|
|
'id': sender_id,
|
|
'is_mirror_dummy': sender_is_mirror_dummy,
|
|
}
|
|
if recip['email'] < display_recipient[0]['email']:
|
|
display_recipient = [recip, display_recipient[0]]
|
|
elif recip['email'] > display_recipient[0]['email']:
|
|
display_recipient = [display_recipient[0], recip]
|
|
else:
|
|
raise AssertionError(f"Invalid recipient type {recipient_type}")
|
|
|
|
obj['display_recipient'] = display_recipient
|
|
obj['type'] = display_type
|
|
if obj['type'] == 'stream':
|
|
obj['stream_id'] = recipient_type_id
|
|
|
|
@staticmethod
|
|
def bulk_hydrate_recipient_info(objs: List[Dict[str, Any]]) -> None:
|
|
recipient_tuples = { # We use set to eliminate duplicate tuples.
|
|
(
|
|
obj['recipient_id'],
|
|
obj['recipient_type'],
|
|
obj['recipient_type_id'],
|
|
) for obj in objs
|
|
}
|
|
display_recipients = bulk_fetch_display_recipients(recipient_tuples)
|
|
|
|
for obj in objs:
|
|
MessageDict.hydrate_recipient_info(obj, display_recipients[obj['recipient_id']])
|
|
|
|
@staticmethod
|
|
def set_sender_avatar(obj: Dict[str, Any], client_gravatar: bool) -> None:
|
|
sender_id = obj['sender_id']
|
|
sender_realm_id = obj['sender_realm_id']
|
|
sender_delivery_email = obj['sender_delivery_email']
|
|
sender_avatar_source = obj['sender_avatar_source']
|
|
sender_avatar_version = obj['sender_avatar_version']
|
|
|
|
obj['avatar_url'] = get_avatar_field(
|
|
user_id=sender_id,
|
|
realm_id=sender_realm_id,
|
|
email=sender_delivery_email,
|
|
avatar_source=sender_avatar_source,
|
|
avatar_version=sender_avatar_version,
|
|
medium=False,
|
|
client_gravatar=client_gravatar,
|
|
)
|
|
|
|
class ReactionDict:
|
|
@staticmethod
|
|
def build_dict_from_raw_db_row(row: Dict[str, Any]) -> Dict[str, Any]:
|
|
return {'emoji_name': row['emoji_name'],
|
|
'emoji_code': row['emoji_code'],
|
|
'reaction_type': row['reaction_type'],
|
|
# TODO: We plan to remove this redundant user dictionary once
|
|
# clients are updated to support accessing use user_id. See
|
|
# https://github.com/zulip/zulip/pull/14711 for details.
|
|
#
|
|
# When we do that, we can likely update the `.values()` query to
|
|
# not fetch the extra user_profile__* fields from the database
|
|
# as a small performance optimization.
|
|
'user': {'email': row['user_profile__email'],
|
|
'id': row['user_profile__id'],
|
|
'full_name': row['user_profile__full_name']},
|
|
'user_id': row['user_profile__id']}
|
|
|
|
|
|
def access_message(user_profile: UserProfile, message_id: int) -> Tuple[Message, Optional[UserMessage]]:
|
|
"""You can access a message by ID in our APIs that either:
|
|
(1) You received or have previously accessed via starring
|
|
(aka have a UserMessage row for).
|
|
(2) Was sent to a public stream in your realm.
|
|
|
|
We produce consistent, boring error messages to avoid leaking any
|
|
information from a security perspective.
|
|
"""
|
|
try:
|
|
message = Message.objects.select_related().get(id=message_id)
|
|
except Message.DoesNotExist:
|
|
raise JsonableError(_("Invalid message(s)"))
|
|
|
|
user_message = get_usermessage_by_message_id(user_profile, message_id)
|
|
|
|
if has_message_access(user_profile, message, user_message):
|
|
return (message, user_message)
|
|
raise JsonableError(_("Invalid message(s)"))
|
|
|
|
def has_message_access(user_profile: UserProfile, message: Message,
|
|
user_message: Optional[UserMessage]) -> bool:
|
|
if user_message is None:
|
|
if message.recipient.type != Recipient.STREAM:
|
|
# You can't access private messages you didn't receive
|
|
return False
|
|
|
|
stream = Stream.objects.get(id=message.recipient.type_id)
|
|
if stream.realm != user_profile.realm:
|
|
# You can't access public stream messages in other realms
|
|
return False
|
|
|
|
if not stream.is_history_public_to_subscribers():
|
|
# You can't access messages you didn't directly receive
|
|
# unless history is public to subscribers.
|
|
return False
|
|
|
|
if not stream.is_public():
|
|
# This stream is an invite-only stream where message
|
|
# history is available to subscribers. So we check if
|
|
# you're subscribed.
|
|
if not Subscription.objects.filter(user_profile=user_profile, active=True,
|
|
recipient=message.recipient).exists():
|
|
return False
|
|
|
|
# You are subscribed, so let this fall through to the public stream case.
|
|
elif user_profile.is_guest:
|
|
# Guest users don't get automatic access to public stream messages
|
|
if not Subscription.objects.filter(user_profile=user_profile, active=True,
|
|
recipient=message.recipient).exists():
|
|
return False
|
|
else:
|
|
# Otherwise, the message was sent to a public stream in
|
|
# your realm, so return the message, user_message pair
|
|
pass
|
|
|
|
return True
|
|
|
|
def bulk_access_messages(user_profile: UserProfile, messages: Sequence[Message]) -> List[Message]:
|
|
filtered_messages = []
|
|
|
|
for message in messages:
|
|
user_message = get_usermessage_by_message_id(user_profile, message.id)
|
|
if has_message_access(user_profile, message, user_message):
|
|
filtered_messages.append(message)
|
|
return filtered_messages
|
|
|
|
def bulk_access_messages_expect_usermessage(
|
|
user_profile_id: int, message_ids: Sequence[int]) -> List[int]:
|
|
'''
|
|
Like bulk_access_messages, but faster and potentially stricter.
|
|
|
|
Returns a subset of `message_ids` containing only messages the
|
|
user can access. Makes O(1) database queries.
|
|
|
|
Use this function only when the user is expected to have a
|
|
UserMessage row for every message in `message_ids`. If a
|
|
UserMessage row is missing, the message will be omitted even if
|
|
the user has access (e.g. because it went to a public stream.)
|
|
|
|
See also: `access_message`, `bulk_access_messages`.
|
|
'''
|
|
return UserMessage.objects.filter(
|
|
user_profile_id=user_profile_id,
|
|
message_id__in=message_ids,
|
|
).values_list('message_id', flat=True)
|
|
|
|
def render_markdown(message: Message,
|
|
content: str,
|
|
realm: Optional[Realm]=None,
|
|
realm_alert_words_automaton: Optional[ahocorasick.Automaton]=None,
|
|
mention_data: Optional[MentionData]=None,
|
|
email_gateway: bool=False) -> str:
|
|
'''
|
|
This is basically just a wrapper for do_render_markdown.
|
|
'''
|
|
|
|
if realm is None:
|
|
realm = message.get_realm()
|
|
|
|
sender = get_user_profile_by_id(message.sender_id)
|
|
sent_by_bot = sender.is_bot
|
|
translate_emoticons = sender.translate_emoticons
|
|
|
|
rendered_content = do_render_markdown(
|
|
message=message,
|
|
content=content,
|
|
realm=realm,
|
|
realm_alert_words_automaton=realm_alert_words_automaton,
|
|
sent_by_bot=sent_by_bot,
|
|
translate_emoticons=translate_emoticons,
|
|
mention_data=mention_data,
|
|
email_gateway=email_gateway,
|
|
)
|
|
|
|
return rendered_content
|
|
|
|
def do_render_markdown(message: Message,
|
|
content: str,
|
|
realm: Realm,
|
|
sent_by_bot: bool,
|
|
translate_emoticons: bool,
|
|
realm_alert_words_automaton: Optional[ahocorasick.Automaton]=None,
|
|
mention_data: Optional[MentionData]=None,
|
|
email_gateway: bool=False) -> str:
|
|
"""Return HTML for given Markdown. Markdown may add properties to the
|
|
message object such as `mentions_user_ids`, `mentions_user_group_ids`, and
|
|
`mentions_wildcard`. These are only on this Django object and are not
|
|
saved in the database.
|
|
"""
|
|
|
|
message.mentions_wildcard = False
|
|
message.mentions_user_ids = set()
|
|
message.mentions_user_group_ids = set()
|
|
message.alert_words = set()
|
|
message.links_for_preview = set()
|
|
message.user_ids_with_alert_words = set()
|
|
|
|
# DO MAIN WORK HERE -- call markdown_convert to convert
|
|
rendered_content = markdown_convert(
|
|
content,
|
|
realm_alert_words_automaton=realm_alert_words_automaton,
|
|
message=message,
|
|
message_realm=realm,
|
|
sent_by_bot=sent_by_bot,
|
|
translate_emoticons=translate_emoticons,
|
|
mention_data=mention_data,
|
|
email_gateway=email_gateway,
|
|
)
|
|
return rendered_content
|
|
|
|
def huddle_users(recipient_id: int) -> str:
|
|
display_recipient: DisplayRecipientT = get_display_recipient_by_id(
|
|
recipient_id, Recipient.HUDDLE, None,
|
|
)
|
|
|
|
# str is for streams.
|
|
assert not isinstance(display_recipient, str)
|
|
|
|
user_ids: List[int] = [obj['id'] for obj in display_recipient]
|
|
user_ids = sorted(user_ids)
|
|
return ','.join(str(uid) for uid in user_ids)
|
|
|
|
def aggregate_message_dict(input_dict: Dict[int, Dict[str, Any]],
|
|
lookup_fields: List[str],
|
|
collect_senders: bool) -> List[Dict[str, Any]]:
|
|
lookup_dict: Dict[Tuple[Any, ...], Dict[str, Any]] = dict()
|
|
|
|
'''
|
|
A concrete example might help explain the inputs here:
|
|
|
|
input_dict = {
|
|
1002: dict(stream_id=5, topic='foo', sender_id=40),
|
|
1003: dict(stream_id=5, topic='foo', sender_id=41),
|
|
1004: dict(stream_id=6, topic='baz', sender_id=99),
|
|
}
|
|
|
|
lookup_fields = ['stream_id', 'topic']
|
|
|
|
The first time through the loop:
|
|
attribute_dict = dict(stream_id=5, topic='foo', sender_id=40)
|
|
lookup_dict = (5, 'foo')
|
|
|
|
lookup_dict = {
|
|
(5, 'foo'): dict(stream_id=5, topic='foo',
|
|
unread_message_ids=[1002, 1003],
|
|
sender_ids=[40, 41],
|
|
),
|
|
...
|
|
}
|
|
|
|
result = [
|
|
dict(stream_id=5, topic='foo',
|
|
unread_message_ids=[1002, 1003],
|
|
sender_ids=[40, 41],
|
|
),
|
|
...
|
|
]
|
|
'''
|
|
|
|
for message_id, attribute_dict in input_dict.items():
|
|
lookup_key = tuple([attribute_dict[f] for f in lookup_fields])
|
|
if lookup_key not in lookup_dict:
|
|
obj = {}
|
|
for f in lookup_fields:
|
|
obj[f] = attribute_dict[f]
|
|
obj['unread_message_ids'] = []
|
|
if collect_senders:
|
|
obj['sender_ids'] = set()
|
|
lookup_dict[lookup_key] = obj
|
|
|
|
bucket = lookup_dict[lookup_key]
|
|
bucket['unread_message_ids'].append(message_id)
|
|
if collect_senders:
|
|
bucket['sender_ids'].add(attribute_dict['sender_id'])
|
|
|
|
for dct in lookup_dict.values():
|
|
dct['unread_message_ids'].sort()
|
|
if collect_senders:
|
|
dct['sender_ids'] = sorted(list(dct['sender_ids']))
|
|
|
|
sorted_keys = sorted(lookup_dict.keys())
|
|
|
|
return [lookup_dict[k] for k in sorted_keys]
|
|
|
|
def get_inactive_recipient_ids(user_profile: UserProfile) -> List[int]:
|
|
rows = get_stream_subscriptions_for_user(user_profile).filter(
|
|
active=False,
|
|
).values(
|
|
'recipient_id',
|
|
)
|
|
inactive_recipient_ids = [
|
|
row['recipient_id']
|
|
for row in rows]
|
|
return inactive_recipient_ids
|
|
|
|
def get_muted_stream_ids(user_profile: UserProfile) -> List[int]:
|
|
rows = get_stream_subscriptions_for_user(user_profile).filter(
|
|
active=True,
|
|
is_muted=True,
|
|
).values(
|
|
'recipient__type_id',
|
|
)
|
|
muted_stream_ids = [
|
|
row['recipient__type_id']
|
|
for row in rows]
|
|
return muted_stream_ids
|
|
|
|
def get_starred_message_ids(user_profile: UserProfile) -> List[int]:
|
|
return list(UserMessage.objects.filter(
|
|
user_profile=user_profile,
|
|
).extra(
|
|
where=[UserMessage.where_starred()],
|
|
).order_by(
|
|
'message_id',
|
|
).values_list('message_id', flat=True)[0:10000])
|
|
|
|
def get_raw_unread_data(user_profile: UserProfile) -> RawUnreadMessagesResult:
|
|
|
|
excluded_recipient_ids = get_inactive_recipient_ids(user_profile)
|
|
|
|
user_msgs = UserMessage.objects.filter(
|
|
user_profile=user_profile,
|
|
).exclude(
|
|
message__recipient_id__in=excluded_recipient_ids,
|
|
).extra(
|
|
where=[UserMessage.where_unread()],
|
|
).values(
|
|
'message_id',
|
|
'message__sender_id',
|
|
MESSAGE__TOPIC,
|
|
'message__recipient_id',
|
|
'message__recipient__type',
|
|
'message__recipient__type_id',
|
|
'flags',
|
|
).order_by("-message_id")
|
|
|
|
# Limit unread messages for performance reasons.
|
|
user_msgs = list(user_msgs[:MAX_UNREAD_MESSAGES])
|
|
|
|
rows = list(reversed(user_msgs))
|
|
|
|
muted_stream_ids = get_muted_stream_ids(user_profile)
|
|
|
|
topic_mute_checker = build_topic_mute_checker(user_profile)
|
|
|
|
def is_row_muted(stream_id: int, recipient_id: int, topic: str) -> bool:
|
|
if stream_id in muted_stream_ids:
|
|
return True
|
|
|
|
if topic_mute_checker(recipient_id, topic):
|
|
return True
|
|
|
|
return False
|
|
|
|
huddle_cache: Dict[int, str] = {}
|
|
|
|
def get_huddle_users(recipient_id: int) -> str:
|
|
if recipient_id in huddle_cache:
|
|
return huddle_cache[recipient_id]
|
|
|
|
user_ids_string = huddle_users(recipient_id)
|
|
huddle_cache[recipient_id] = user_ids_string
|
|
return user_ids_string
|
|
|
|
pm_dict = {}
|
|
stream_dict = {}
|
|
unmuted_stream_msgs = set()
|
|
huddle_dict = {}
|
|
mentions = set()
|
|
|
|
for row in rows:
|
|
message_id = row['message_id']
|
|
msg_type = row['message__recipient__type']
|
|
recipient_id = row['message__recipient_id']
|
|
sender_id = row['message__sender_id']
|
|
|
|
if msg_type == Recipient.STREAM:
|
|
stream_id = row['message__recipient__type_id']
|
|
topic = row[MESSAGE__TOPIC]
|
|
stream_dict[message_id] = dict(
|
|
stream_id=stream_id,
|
|
topic=topic,
|
|
sender_id=sender_id,
|
|
)
|
|
if not is_row_muted(stream_id, recipient_id, topic):
|
|
unmuted_stream_msgs.add(message_id)
|
|
|
|
elif msg_type == Recipient.PERSONAL:
|
|
if sender_id == user_profile.id:
|
|
other_user_id = row['message__recipient__type_id']
|
|
else:
|
|
other_user_id = sender_id
|
|
|
|
# The `sender_id` field here is misnamed. It's really
|
|
# just the other participant in a PM conversation. For
|
|
# most unread PM messages, the other user is also the sender,
|
|
# but that's not true for certain messages sent from the
|
|
# API. Unfortunately, it's difficult now to rename the
|
|
# field without breaking mobile.
|
|
pm_dict[message_id] = dict(
|
|
sender_id=other_user_id,
|
|
)
|
|
|
|
elif msg_type == Recipient.HUDDLE:
|
|
user_ids_string = get_huddle_users(recipient_id)
|
|
huddle_dict[message_id] = dict(
|
|
user_ids_string=user_ids_string,
|
|
)
|
|
|
|
# TODO: Add support for alert words here as well.
|
|
is_mentioned = (row['flags'] & UserMessage.flags.mentioned) != 0
|
|
is_wildcard_mentioned = (row['flags'] & UserMessage.flags.wildcard_mentioned) != 0
|
|
if is_mentioned:
|
|
mentions.add(message_id)
|
|
if is_wildcard_mentioned:
|
|
if msg_type == Recipient.STREAM:
|
|
stream_id = row['message__recipient__type_id']
|
|
topic = row[MESSAGE__TOPIC]
|
|
if not is_row_muted(stream_id, recipient_id, topic):
|
|
mentions.add(message_id)
|
|
else: # nocoverage # TODO: Test wildcard mentions in PMs.
|
|
mentions.add(message_id)
|
|
|
|
return dict(
|
|
pm_dict=pm_dict,
|
|
stream_dict=stream_dict,
|
|
muted_stream_ids=muted_stream_ids,
|
|
unmuted_stream_msgs=unmuted_stream_msgs,
|
|
huddle_dict=huddle_dict,
|
|
mentions=mentions,
|
|
)
|
|
|
|
def aggregate_unread_data(raw_data: RawUnreadMessagesResult) -> UnreadMessagesResult:
|
|
|
|
pm_dict = raw_data['pm_dict']
|
|
stream_dict = raw_data['stream_dict']
|
|
unmuted_stream_msgs = raw_data['unmuted_stream_msgs']
|
|
huddle_dict = raw_data['huddle_dict']
|
|
mentions = list(raw_data['mentions'])
|
|
|
|
count = len(pm_dict) + len(unmuted_stream_msgs) + len(huddle_dict)
|
|
|
|
pm_objects = aggregate_message_dict(
|
|
input_dict=pm_dict,
|
|
lookup_fields=[
|
|
'sender_id',
|
|
],
|
|
collect_senders=False,
|
|
)
|
|
|
|
stream_objects = aggregate_message_dict(
|
|
input_dict=stream_dict,
|
|
lookup_fields=[
|
|
'stream_id',
|
|
'topic',
|
|
],
|
|
collect_senders=True,
|
|
)
|
|
|
|
huddle_objects = aggregate_message_dict(
|
|
input_dict=huddle_dict,
|
|
lookup_fields=[
|
|
'user_ids_string',
|
|
],
|
|
collect_senders=False,
|
|
)
|
|
|
|
result: UnreadMessagesResult = dict(
|
|
pms=pm_objects,
|
|
streams=stream_objects,
|
|
huddles=huddle_objects,
|
|
mentions=mentions,
|
|
count=count)
|
|
|
|
return result
|
|
|
|
def apply_unread_message_event(user_profile: UserProfile,
|
|
state: RawUnreadMessagesResult,
|
|
message: Dict[str, Any],
|
|
flags: List[str]) -> None:
|
|
message_id = message['id']
|
|
if message['type'] == 'stream':
|
|
message_type = 'stream'
|
|
elif message['type'] == 'private':
|
|
others = [
|
|
recip for recip in message['display_recipient']
|
|
if recip['id'] != user_profile.id
|
|
]
|
|
if len(others) <= 1:
|
|
message_type = 'private'
|
|
else:
|
|
message_type = 'huddle'
|
|
else:
|
|
raise AssertionError("Invalid message type {}".format(message['type']))
|
|
|
|
sender_id = message['sender_id']
|
|
|
|
if message_type == 'stream':
|
|
stream_id = message['stream_id']
|
|
topic = message[TOPIC_NAME]
|
|
new_row = dict(
|
|
stream_id=stream_id,
|
|
topic=topic,
|
|
sender_id=sender_id,
|
|
)
|
|
state['stream_dict'][message_id] = new_row
|
|
|
|
if stream_id not in state['muted_stream_ids']:
|
|
# This next check hits the database.
|
|
if not topic_is_muted(user_profile, stream_id, topic):
|
|
state['unmuted_stream_msgs'].add(message_id)
|
|
|
|
elif message_type == 'private':
|
|
if len(others) == 1:
|
|
other_id = others[0]['id']
|
|
else:
|
|
other_id = user_profile.id
|
|
|
|
# The `sender_id` field here is misnamed.
|
|
new_row = dict(
|
|
sender_id=other_id,
|
|
)
|
|
state['pm_dict'][message_id] = new_row
|
|
|
|
else:
|
|
display_recipient = message['display_recipient']
|
|
user_ids = [obj['id'] for obj in display_recipient]
|
|
user_ids = sorted(user_ids)
|
|
user_ids_string = ','.join(str(uid) for uid in user_ids)
|
|
new_row = dict(
|
|
user_ids_string=user_ids_string,
|
|
)
|
|
state['huddle_dict'][message_id] = new_row
|
|
|
|
if 'mentioned' in flags:
|
|
state['mentions'].add(message_id)
|
|
if 'wildcard_mentioned' in flags:
|
|
if message_id in state['unmuted_stream_msgs']:
|
|
state['mentions'].add(message_id)
|
|
|
|
def remove_message_id_from_unread_mgs(state: RawUnreadMessagesResult,
|
|
message_id: int) -> None:
|
|
# The opposite of apply_unread_message_event; removes a read or
|
|
# deleted message from a raw_unread_msgs data structure.
|
|
state['pm_dict'].pop(message_id, None)
|
|
state['stream_dict'].pop(message_id, None)
|
|
state['huddle_dict'].pop(message_id, None)
|
|
state['unmuted_stream_msgs'].discard(message_id)
|
|
state['mentions'].discard(message_id)
|
|
|
|
def estimate_recent_messages(realm: Realm, hours: int) -> int:
|
|
stat = COUNT_STATS['messages_sent:is_bot:hour']
|
|
d = timezone_now() - datetime.timedelta(hours=hours)
|
|
return RealmCount.objects.filter(property=stat.property, end_time__gt=d,
|
|
realm=realm).aggregate(Sum('value'))['value__sum'] or 0
|
|
|
|
def get_first_visible_message_id(realm: Realm) -> int:
|
|
return realm.first_visible_message_id
|
|
|
|
def maybe_update_first_visible_message_id(realm: Realm, lookback_hours: int) -> None:
|
|
recent_messages_count = estimate_recent_messages(realm, lookback_hours)
|
|
if realm.message_visibility_limit is not None and recent_messages_count > 0:
|
|
update_first_visible_message_id(realm)
|
|
|
|
def update_first_visible_message_id(realm: Realm) -> None:
|
|
if realm.message_visibility_limit is None:
|
|
realm.first_visible_message_id = 0
|
|
else:
|
|
try:
|
|
first_visible_message_id = Message.objects.filter(sender__realm=realm).values('id').\
|
|
order_by('-id')[realm.message_visibility_limit - 1]["id"]
|
|
except IndexError:
|
|
first_visible_message_id = 0
|
|
realm.first_visible_message_id = first_visible_message_id
|
|
realm.save(update_fields=["first_visible_message_id"])
|
|
|
|
|
|
def get_recent_conversations_recipient_id(user_profile: UserProfile,
|
|
recipient_id: int,
|
|
sender_id: int) -> int:
|
|
"""Helper for doing lookups of the recipient_id that
|
|
get_recent_private_conversations would have used to record that
|
|
message in its data structure.
|
|
"""
|
|
my_recipient_id = user_profile.id
|
|
if recipient_id == my_recipient_id:
|
|
return UserProfile.objects.values_list('recipient_id', flat=True).get(id=sender_id)
|
|
return recipient_id
|
|
|
|
def get_recent_private_conversations(user_profile: UserProfile) -> Dict[int, Dict[str, Any]]:
|
|
"""This function uses some carefully optimized SQL queries, designed
|
|
to use the UserMessage index on private_messages. It is
|
|
significantly complicated by the fact that for 1:1 private
|
|
messages, we store the message against a recipient_id of whichever
|
|
user was the recipient, and thus for 1:1 private messages sent
|
|
directly to us, we need to look up the other user from the
|
|
sender_id on those messages. You'll see that pattern repeated
|
|
both here and also in zerver/lib/events.py.
|
|
|
|
Ideally, we would write these queries using Django, but even
|
|
without the UNION ALL, that seems to not be possible, because the
|
|
equivalent Django syntax (for the first part of this query):
|
|
|
|
message_data = UserMessage.objects.select_related("message__recipient_id").filter(
|
|
user_profile=user_profile,
|
|
).extra(
|
|
where=[UserMessage.where_private()]
|
|
).order_by("-message_id")[:1000].values(
|
|
"message__recipient_id").annotate(last_message_id=Max("message_id"))
|
|
|
|
does not properly nest the GROUP BY (from .annotate) with the slicing.
|
|
|
|
We return a dictionary structure for convenient modification
|
|
below; this structure is converted into its final form by
|
|
post_process.
|
|
|
|
"""
|
|
RECENT_CONVERSATIONS_LIMIT = 1000
|
|
|
|
recipient_map = {}
|
|
my_recipient_id = user_profile.recipient_id
|
|
|
|
query = SQL('''
|
|
SELECT
|
|
subquery.recipient_id, MAX(subquery.message_id)
|
|
FROM (
|
|
(SELECT
|
|
um.message_id AS message_id,
|
|
m.recipient_id AS recipient_id
|
|
FROM
|
|
zerver_usermessage um
|
|
JOIN
|
|
zerver_message m
|
|
ON
|
|
um.message_id = m.id
|
|
WHERE
|
|
um.user_profile_id=%(user_profile_id)s AND
|
|
um.flags & 2048 <> 0 AND
|
|
m.recipient_id <> %(my_recipient_id)s
|
|
ORDER BY message_id DESC
|
|
LIMIT %(conversation_limit)s)
|
|
UNION ALL
|
|
(SELECT
|
|
m.id AS message_id,
|
|
sender_profile.recipient_id AS recipient_id
|
|
FROM
|
|
zerver_message m
|
|
JOIN
|
|
zerver_userprofile sender_profile
|
|
ON
|
|
m.sender_id = sender_profile.id
|
|
WHERE
|
|
m.recipient_id=%(my_recipient_id)s
|
|
ORDER BY message_id DESC
|
|
LIMIT %(conversation_limit)s)
|
|
) AS subquery
|
|
GROUP BY subquery.recipient_id
|
|
''')
|
|
|
|
with connection.cursor() as cursor:
|
|
cursor.execute(query, {
|
|
"user_profile_id": user_profile.id,
|
|
"conversation_limit": RECENT_CONVERSATIONS_LIMIT,
|
|
"my_recipient_id": my_recipient_id,
|
|
})
|
|
rows = cursor.fetchall()
|
|
|
|
# The resulting rows will be (recipient_id, max_message_id)
|
|
# objects for all parties we've had recent (group?) private
|
|
# message conversations with, including PMs with yourself (those
|
|
# will generate an empty list of user_ids).
|
|
for recipient_id, max_message_id in rows:
|
|
recipient_map[recipient_id] = dict(
|
|
max_message_id=max_message_id,
|
|
user_ids=list(),
|
|
)
|
|
|
|
# Now we need to map all the recipient_id objects to lists of user IDs
|
|
for (recipient_id, user_profile_id) in Subscription.objects.filter(
|
|
recipient_id__in=recipient_map.keys()).exclude(
|
|
user_profile_id=user_profile.id).values_list(
|
|
"recipient_id", "user_profile_id"):
|
|
recipient_map[recipient_id]['user_ids'].append(user_profile_id)
|
|
|
|
# Sort to prevent test flakes and client bugs.
|
|
for rec in recipient_map.values():
|
|
rec['user_ids'].sort()
|
|
|
|
return recipient_map
|