2016-10-04 15:52:26 +02:00
|
|
|
import datetime
|
|
|
|
import ujson
|
|
|
|
import zlib
|
2019-02-11 15:19:38 +01:00
|
|
|
import ahocorasick
|
2016-10-04 15:52:26 +02:00
|
|
|
|
2016-10-12 02:14:08 +02:00
|
|
|
from django.utils.translation import ugettext as _
|
2017-07-16 09:41:38 +02:00
|
|
|
from django.utils.timezone import now as timezone_now
|
2019-03-20 04:15:58 +01:00
|
|
|
from django.db import connection
|
2018-01-22 21:50:22 +01:00
|
|
|
from django.db.models import Sum
|
|
|
|
|
|
|
|
from analytics.lib.counts import COUNT_STATS, RealmCount
|
2016-10-04 15:52:26 +02:00
|
|
|
|
2017-10-10 05:23:53 +02:00
|
|
|
from zerver.lib.avatar import get_avatar_field
|
2016-10-04 15:52:26 +02:00
|
|
|
import zerver.lib.bugdown as bugdown
|
2017-11-07 17:36:29 +01:00
|
|
|
from zerver.lib.cache import (
|
|
|
|
cache_with_key,
|
|
|
|
generic_bulk_cached_fetch,
|
|
|
|
to_dict_cache_key,
|
|
|
|
to_dict_cache_key_id,
|
|
|
|
)
|
2019-08-18 00:40:35 +02:00
|
|
|
from zerver.lib.display_recipient import UserDisplayRecipient, DisplayRecipientT, \
|
2019-08-18 00:24:46 +02:00
|
|
|
bulk_fetch_display_recipients
|
2016-10-12 02:14:08 +02:00
|
|
|
from zerver.lib.request import JsonableError
|
2017-10-29 17:11:11 +01:00
|
|
|
from zerver.lib.stream_subscription import (
|
|
|
|
get_stream_subscriptions_for_user,
|
|
|
|
)
|
2016-10-04 15:52:26 +02:00
|
|
|
from zerver.lib.timestamp import datetime_to_timestamp
|
2018-11-08 15:33:44 +01:00
|
|
|
from zerver.lib.topic import (
|
|
|
|
DB_TOPIC_NAME,
|
2018-11-08 15:37:37 +01:00
|
|
|
MESSAGE__TOPIC,
|
2018-11-08 15:33:44 +01:00
|
|
|
TOPIC_LINKS,
|
|
|
|
TOPIC_NAME,
|
|
|
|
)
|
Simplify how we apply events for unread messages.
The logic to apply events to page_params['unread_msgs'] was
complicated due to the aggregated data structures that we pass
down to the client.
Now we defer the aggregation logic until after we apply the
events. This leads to some simplifications in that codepath,
as well as some performance enhancements.
The intermediate data structure has sets and dictionaries that
generally are keyed by message_id, so most message-related
updates are O(1) in nature.
Also, by waiting to compute the counts until the end, it's a
bit less messy to try to keep track of increments/decrements.
Instead, we just update the dictionaries and sets during the
event-apply phase.
This change also fixes some corner cases:
* We now respect mutes when updating counts.
* For message updates, instead of bluntly updating
the whole topic bucket, we update individual
message ids.
Unfortunately, this change doesn't seem to address the pesky
test that fails sporadically on Travis, related to mention
updates. It will change the symptom, slightly, though.
2017-10-05 00:34:19 +02:00
|
|
|
from zerver.lib.topic_mutes import (
|
|
|
|
build_topic_mute_checker,
|
|
|
|
topic_is_muted,
|
|
|
|
)
|
2016-10-04 15:52:26 +02:00
|
|
|
|
|
|
|
from zerver.models import (
|
|
|
|
get_display_recipient_by_id,
|
2017-02-03 23:21:56 +01:00
|
|
|
get_user_profile_by_id,
|
2017-10-10 20:10:57 +02:00
|
|
|
query_for_ids,
|
2016-10-04 15:52:26 +02:00
|
|
|
Message,
|
2017-01-18 23:19:18 +01:00
|
|
|
Realm,
|
2016-10-04 15:52:26 +02:00
|
|
|
Recipient,
|
2016-10-12 02:14:08 +02:00
|
|
|
Stream,
|
2018-02-11 14:09:17 +01:00
|
|
|
SubMessage,
|
2017-08-09 02:22:00 +02:00
|
|
|
Subscription,
|
2016-10-04 18:32:46 +02:00
|
|
|
UserProfile,
|
2016-10-12 02:14:08 +02:00
|
|
|
UserMessage,
|
2018-07-27 11:47:07 +02:00
|
|
|
Reaction,
|
|
|
|
get_usermessage_by_message_id,
|
2016-10-04 15:52:26 +02:00
|
|
|
)
|
|
|
|
|
2019-08-18 00:40:35 +02:00
|
|
|
from typing import Any, Dict, List, Optional, Set, Tuple, Sequence
|
2019-08-06 01:29:34 +02:00
|
|
|
from typing_extensions import TypedDict
|
2016-10-04 18:32:46 +02:00
|
|
|
|
2018-05-11 01:40:23 +02:00
|
|
|
RealmAlertWords = Dict[int, List[str]]
|
2016-10-04 15:52:26 +02:00
|
|
|
|
2017-11-10 15:57:43 +01:00
|
|
|
RawUnreadMessagesResult = TypedDict('RawUnreadMessagesResult', {
|
|
|
|
'pm_dict': Dict[int, Any],
|
|
|
|
'stream_dict': Dict[int, Any],
|
|
|
|
'huddle_dict': Dict[int, Any],
|
|
|
|
'mentions': Set[int],
|
|
|
|
'muted_stream_ids': List[int],
|
|
|
|
'unmuted_stream_msgs': Set[int],
|
|
|
|
})
|
|
|
|
|
2017-08-09 04:01:00 +02:00
|
|
|
UnreadMessagesResult = TypedDict('UnreadMessagesResult', {
|
|
|
|
'pms': List[Dict[str, Any]],
|
|
|
|
'streams': List[Dict[str, Any]],
|
|
|
|
'huddles': List[Dict[str, Any]],
|
|
|
|
'mentions': List[int],
|
|
|
|
'count': int,
|
|
|
|
})
|
|
|
|
|
2018-08-01 20:56:40 +02:00
|
|
|
# We won't try to fetch more unread message IDs from the database than
|
|
|
|
# this limit. The limit is super high, in large part because it means
|
|
|
|
# client-side code mostly doesn't need to think about the case that a
|
|
|
|
# user has more older unread messages that were cut off.
|
|
|
|
MAX_UNREAD_MESSAGES = 50000
|
2017-08-01 18:28:56 +02:00
|
|
|
|
2017-11-07 17:36:29 +01:00
|
|
|
def messages_for_ids(message_ids: List[int],
|
|
|
|
user_message_flags: Dict[int, List[str]],
|
2018-05-11 01:40:23 +02:00
|
|
|
search_fields: Dict[int, Dict[str, str]],
|
2017-11-07 17:36:29 +01:00
|
|
|
apply_markdown: bool,
|
|
|
|
client_gravatar: bool,
|
|
|
|
allow_edit_history: bool) -> List[Dict[str, Any]]:
|
|
|
|
|
|
|
|
cache_transformer = MessageDict.build_dict_from_raw_db_row
|
|
|
|
id_fetcher = lambda row: row['id']
|
|
|
|
|
2019-08-08 21:34:06 +02:00
|
|
|
message_dicts = generic_bulk_cached_fetch(
|
|
|
|
to_dict_cache_key_id,
|
|
|
|
MessageDict.get_raw_db_rows,
|
|
|
|
message_ids,
|
|
|
|
id_fetcher=id_fetcher,
|
|
|
|
cache_transformer=cache_transformer,
|
|
|
|
extractor=extract_message_dict,
|
|
|
|
setter=stringify_message_dict)
|
2017-11-07 17:36:29 +01:00
|
|
|
|
|
|
|
message_list = [] # type: List[Dict[str, Any]]
|
|
|
|
|
|
|
|
for message_id in message_ids:
|
|
|
|
msg_dict = message_dicts[message_id]
|
|
|
|
msg_dict.update({"flags": user_message_flags[message_id]})
|
|
|
|
if message_id in search_fields:
|
|
|
|
msg_dict.update(search_fields[message_id])
|
|
|
|
# Make sure that we never send message edit history to clients
|
|
|
|
# in realms with allow_edit_history disabled.
|
|
|
|
if "edit_history" in msg_dict and not allow_edit_history:
|
|
|
|
del msg_dict["edit_history"]
|
|
|
|
message_list.append(msg_dict)
|
|
|
|
|
|
|
|
MessageDict.post_process_dicts(message_list, apply_markdown, client_gravatar)
|
|
|
|
|
|
|
|
return message_list
|
|
|
|
|
2017-11-05 11:15:10 +01:00
|
|
|
def sew_messages_and_reactions(messages: List[Dict[str, Any]],
|
|
|
|
reactions: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
2017-10-14 16:14:54 +02:00
|
|
|
"""Given a iterable of messages and reactions stitch reactions
|
|
|
|
into messages.
|
|
|
|
"""
|
|
|
|
# Add all messages with empty reaction item
|
|
|
|
for message in messages:
|
|
|
|
message['reactions'] = []
|
|
|
|
|
|
|
|
# Convert list of messages into dictionary to make reaction stitching easy
|
|
|
|
converted_messages = {message['id']: message for message in messages}
|
|
|
|
|
|
|
|
for reaction in reactions:
|
|
|
|
converted_messages[reaction['message_id']]['reactions'].append(
|
|
|
|
reaction)
|
|
|
|
|
|
|
|
return list(converted_messages.values())
|
|
|
|
|
|
|
|
|
2018-02-11 14:09:17 +01:00
|
|
|
def sew_messages_and_submessages(messages: List[Dict[str, Any]],
|
|
|
|
submessages: List[Dict[str, Any]]) -> None:
|
|
|
|
# This is super similar to sew_messages_and_reactions.
|
|
|
|
for message in messages:
|
|
|
|
message['submessages'] = []
|
|
|
|
|
|
|
|
message_dict = {message['id']: message for message in messages}
|
|
|
|
|
|
|
|
for submessage in submessages:
|
|
|
|
message_id = submessage['message_id']
|
|
|
|
if message_id in message_dict:
|
|
|
|
message = message_dict[message_id]
|
|
|
|
message['submessages'].append(submessage)
|
|
|
|
|
2017-11-05 11:15:10 +01:00
|
|
|
def extract_message_dict(message_bytes: bytes) -> Dict[str, Any]:
|
2017-11-04 19:22:54 +01:00
|
|
|
return ujson.loads(zlib.decompress(message_bytes).decode("utf-8"))
|
2016-10-04 15:52:26 +02:00
|
|
|
|
2017-11-05 11:15:10 +01:00
|
|
|
def stringify_message_dict(message_dict: Dict[str, Any]) -> bytes:
|
2017-11-04 16:25:20 +01:00
|
|
|
return zlib.compress(ujson.dumps(message_dict).encode())
|
2016-10-04 15:52:26 +02:00
|
|
|
|
|
|
|
@cache_with_key(to_dict_cache_key, timeout=3600*24)
|
2017-11-05 11:15:10 +01:00
|
|
|
def message_to_dict_json(message: Message) -> bytes:
|
2017-10-20 20:29:49 +02:00
|
|
|
return MessageDict.to_dict_uncached(message)
|
2016-10-04 15:52:26 +02:00
|
|
|
|
2018-08-09 17:50:43 +02:00
|
|
|
def save_message_rendered_content(message: Message, content: str) -> str:
|
|
|
|
rendered_content = render_markdown(message, content, realm=message.get_realm())
|
|
|
|
message.rendered_content = rendered_content
|
|
|
|
message.rendered_content_version = bugdown.version
|
|
|
|
message.save_rendered_content()
|
|
|
|
return rendered_content
|
|
|
|
|
2017-11-05 11:37:41 +01:00
|
|
|
class MessageDict:
|
2017-10-20 21:34:05 +02:00
|
|
|
@staticmethod
|
2017-11-05 11:15:10 +01:00
|
|
|
def wide_dict(message: Message) -> Dict[str, Any]:
|
2017-10-20 21:34:05 +02:00
|
|
|
'''
|
|
|
|
The next two lines get the cachable field related
|
|
|
|
to our message object, with the side effect of
|
|
|
|
populating the cache.
|
|
|
|
'''
|
|
|
|
json = message_to_dict_json(message)
|
|
|
|
obj = extract_message_dict(json)
|
|
|
|
|
|
|
|
'''
|
|
|
|
The steps below are similar to what we do in
|
|
|
|
post_process_dicts(), except we don't call finalize_payload(),
|
|
|
|
since that step happens later in the queue
|
|
|
|
processor.
|
|
|
|
'''
|
|
|
|
MessageDict.bulk_hydrate_sender_info([obj])
|
2019-08-07 00:18:13 +02:00
|
|
|
MessageDict.bulk_hydrate_recipient_info([obj])
|
2017-10-20 21:34:05 +02:00
|
|
|
|
|
|
|
return obj
|
|
|
|
|
2017-10-10 09:22:21 +02:00
|
|
|
@staticmethod
|
2017-11-05 11:15:10 +01:00
|
|
|
def post_process_dicts(objs: List[Dict[str, Any]], apply_markdown: bool, client_gravatar: bool) -> None:
|
2017-10-10 20:10:57 +02:00
|
|
|
MessageDict.bulk_hydrate_sender_info(objs)
|
2019-08-07 00:18:13 +02:00
|
|
|
MessageDict.bulk_hydrate_recipient_info(objs)
|
2017-10-10 20:10:57 +02:00
|
|
|
|
2017-10-10 09:22:21 +02:00
|
|
|
for obj in objs:
|
2017-10-31 03:02:23 +01:00
|
|
|
MessageDict.finalize_payload(obj, apply_markdown, client_gravatar)
|
2017-10-14 02:01:20 +02:00
|
|
|
|
2017-10-20 21:27:26 +02:00
|
|
|
@staticmethod
|
2017-11-05 11:15:10 +01:00
|
|
|
def finalize_payload(obj: Dict[str, Any],
|
|
|
|
apply_markdown: bool,
|
2019-11-05 20:35:47 +01:00
|
|
|
client_gravatar: bool,
|
|
|
|
keep_rendered_content: bool=False) -> None:
|
2017-10-31 03:02:23 +01:00
|
|
|
MessageDict.set_sender_avatar(obj, client_gravatar)
|
2017-10-20 21:27:26 +02:00
|
|
|
if apply_markdown:
|
|
|
|
obj['content_type'] = 'text/html'
|
|
|
|
obj['content'] = obj['rendered_content']
|
|
|
|
else:
|
|
|
|
obj['content_type'] = 'text/x-markdown'
|
|
|
|
|
2019-11-05 20:35:47 +01:00
|
|
|
if not keep_rendered_content:
|
|
|
|
del obj['rendered_content']
|
2017-10-20 21:27:26 +02:00
|
|
|
del obj['sender_realm_id']
|
|
|
|
del obj['sender_avatar_source']
|
2019-11-05 20:23:58 +01:00
|
|
|
del obj['sender_delivery_email']
|
2017-10-20 21:27:26 +02:00
|
|
|
del obj['sender_avatar_version']
|
|
|
|
|
|
|
|
del obj['recipient_type']
|
|
|
|
del obj['recipient_type_id']
|
|
|
|
del obj['sender_is_mirror_dummy']
|
2017-10-14 15:44:59 +02:00
|
|
|
|
2016-10-04 15:52:26 +02:00
|
|
|
@staticmethod
|
2017-11-05 11:15:10 +01:00
|
|
|
def to_dict_uncached(message: Message) -> bytes:
|
2017-10-20 20:29:49 +02:00
|
|
|
dct = MessageDict.to_dict_uncached_helper(message)
|
2016-10-04 15:52:26 +02:00
|
|
|
return stringify_message_dict(dct)
|
|
|
|
|
|
|
|
@staticmethod
|
2017-11-05 11:15:10 +01:00
|
|
|
def to_dict_uncached_helper(message: Message) -> Dict[str, Any]:
|
2016-10-04 15:52:26 +02:00
|
|
|
return MessageDict.build_message_dict(
|
2017-01-24 07:06:13 +01:00
|
|
|
message = message,
|
|
|
|
message_id = message.id,
|
|
|
|
last_edit_time = message.last_edit_time,
|
|
|
|
edit_history = message.edit_history,
|
|
|
|
content = message.content,
|
2018-11-08 15:33:44 +01:00
|
|
|
topic_name = message.topic_name(),
|
2019-08-28 02:43:19 +02:00
|
|
|
date_sent = message.date_sent,
|
2017-01-24 07:06:13 +01:00
|
|
|
rendered_content = message.rendered_content,
|
|
|
|
rendered_content_version = message.rendered_content_version,
|
|
|
|
sender_id = message.sender.id,
|
|
|
|
sender_realm_id = message.sender.realm_id,
|
|
|
|
sending_client_name = message.sending_client.name,
|
|
|
|
recipient_id = message.recipient.id,
|
|
|
|
recipient_type = message.recipient.type,
|
|
|
|
recipient_type_id = message.recipient.type_id,
|
2018-02-11 14:09:17 +01:00
|
|
|
reactions = Reaction.get_raw_db_rows([message.id]),
|
|
|
|
submessages = SubMessage.get_raw_db_rows([message.id]),
|
2016-10-04 15:52:26 +02:00
|
|
|
)
|
|
|
|
|
2017-10-14 16:14:54 +02:00
|
|
|
@staticmethod
|
2017-11-05 11:15:10 +01:00
|
|
|
def get_raw_db_rows(needed_ids: List[int]) -> List[Dict[str, Any]]:
|
2017-10-14 16:14:54 +02:00
|
|
|
# This is a special purpose function optimized for
|
|
|
|
# callers like get_messages_backend().
|
|
|
|
fields = [
|
|
|
|
'id',
|
2018-11-08 15:33:44 +01:00
|
|
|
DB_TOPIC_NAME,
|
2019-08-28 02:43:19 +02:00
|
|
|
'date_sent',
|
2017-10-14 16:14:54 +02:00
|
|
|
'last_edit_time',
|
|
|
|
'edit_history',
|
|
|
|
'content',
|
|
|
|
'rendered_content',
|
|
|
|
'rendered_content_version',
|
|
|
|
'recipient_id',
|
|
|
|
'recipient__type',
|
|
|
|
'recipient__type_id',
|
|
|
|
'sender_id',
|
|
|
|
'sending_client__name',
|
Remove sender-related fields from message cache.
This change makes the cache entries smaller for message
dictionaries. It also ensures we get valid data put into
message dictionaries if, for example, the sender's avatar
changes.
After this change, all of the attributes for a message
sender are only fetched during post-processing with two
exceptions:
* We get sender_id for "free" from the message,
and it's the primary key that we need to figure
out which data to fetch in post-processing.
* We need sender_realm_id to be able to cache topic
links, and a sender's realm id will never change,
so it's not a concern for invalidating cache rows.
All the other attributes are either likely to change (e.g.
sender avatar_version) and/or impact the size of cache
entries more severely than the two small id fields above.
This change should improve our overall system performance
by reducing the amount of memory used by every N message
rows we cache, and typically N will be in the thousands or
so on a large realm.
The other major implication of this change is that when
a user changes their avatar, and then later messages that
the user sent are fetched, all of the fields that go into
computing the avatar url will be pulled from the database,
not from cache.
2017-10-14 18:28:43 +02:00
|
|
|
'sender__realm_id',
|
2017-10-14 16:14:54 +02:00
|
|
|
]
|
|
|
|
messages = Message.objects.filter(id__in=needed_ids).values(*fields)
|
|
|
|
|
2018-02-11 14:09:17 +01:00
|
|
|
submessages = SubMessage.get_raw_db_rows(needed_ids)
|
|
|
|
sew_messages_and_submessages(messages, submessages)
|
|
|
|
|
2017-10-14 16:14:54 +02:00
|
|
|
reactions = Reaction.get_raw_db_rows(needed_ids)
|
|
|
|
return sew_messages_and_reactions(messages, reactions)
|
|
|
|
|
2016-10-04 15:52:26 +02:00
|
|
|
@staticmethod
|
2017-11-05 11:15:10 +01:00
|
|
|
def build_dict_from_raw_db_row(row: Dict[str, Any]) -> Dict[str, Any]:
|
2016-10-04 15:52:26 +02:00
|
|
|
'''
|
|
|
|
row is a row from a .values() call, and it needs to have
|
|
|
|
all the relevant fields populated
|
|
|
|
'''
|
|
|
|
return MessageDict.build_message_dict(
|
2017-01-24 07:06:13 +01:00
|
|
|
message = None,
|
|
|
|
message_id = row['id'],
|
|
|
|
last_edit_time = row['last_edit_time'],
|
|
|
|
edit_history = row['edit_history'],
|
|
|
|
content = row['content'],
|
2018-11-08 15:33:44 +01:00
|
|
|
topic_name = row[DB_TOPIC_NAME],
|
2019-08-28 02:43:19 +02:00
|
|
|
date_sent = row['date_sent'],
|
2017-01-24 07:06:13 +01:00
|
|
|
rendered_content = row['rendered_content'],
|
|
|
|
rendered_content_version = row['rendered_content_version'],
|
|
|
|
sender_id = row['sender_id'],
|
Remove sender-related fields from message cache.
This change makes the cache entries smaller for message
dictionaries. It also ensures we get valid data put into
message dictionaries if, for example, the sender's avatar
changes.
After this change, all of the attributes for a message
sender are only fetched during post-processing with two
exceptions:
* We get sender_id for "free" from the message,
and it's the primary key that we need to figure
out which data to fetch in post-processing.
* We need sender_realm_id to be able to cache topic
links, and a sender's realm id will never change,
so it's not a concern for invalidating cache rows.
All the other attributes are either likely to change (e.g.
sender avatar_version) and/or impact the size of cache
entries more severely than the two small id fields above.
This change should improve our overall system performance
by reducing the amount of memory used by every N message
rows we cache, and typically N will be in the thousands or
so on a large realm.
The other major implication of this change is that when
a user changes their avatar, and then later messages that
the user sent are fetched, all of the fields that go into
computing the avatar url will be pulled from the database,
not from cache.
2017-10-14 18:28:43 +02:00
|
|
|
sender_realm_id = row['sender__realm_id'],
|
2017-01-24 07:06:13 +01:00
|
|
|
sending_client_name = row['sending_client__name'],
|
|
|
|
recipient_id = row['recipient_id'],
|
|
|
|
recipient_type = row['recipient__type'],
|
|
|
|
recipient_type_id = row['recipient__type_id'],
|
2018-02-11 14:09:17 +01:00
|
|
|
reactions=row['reactions'],
|
|
|
|
submessages=row['submessages'],
|
2016-10-04 15:52:26 +02:00
|
|
|
)
|
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
def build_message_dict(
|
2018-03-12 02:47:49 +01:00
|
|
|
message: Optional[Message],
|
|
|
|
message_id: int,
|
|
|
|
last_edit_time: Optional[datetime.datetime],
|
2018-05-11 01:40:23 +02:00
|
|
|
edit_history: Optional[str],
|
|
|
|
content: str,
|
2018-11-08 15:33:44 +01:00
|
|
|
topic_name: str,
|
2019-08-28 02:43:19 +02:00
|
|
|
date_sent: datetime.datetime,
|
2018-05-11 01:40:23 +02:00
|
|
|
rendered_content: Optional[str],
|
2018-03-12 02:47:49 +01:00
|
|
|
rendered_content_version: Optional[int],
|
|
|
|
sender_id: int,
|
|
|
|
sender_realm_id: int,
|
2018-05-11 01:40:23 +02:00
|
|
|
sending_client_name: str,
|
2018-03-12 02:47:49 +01:00
|
|
|
recipient_id: int,
|
|
|
|
recipient_type: int,
|
|
|
|
recipient_type_id: int,
|
2018-02-11 14:09:17 +01:00
|
|
|
reactions: List[Dict[str, Any]],
|
|
|
|
submessages: List[Dict[str, Any]]
|
2018-03-12 02:47:49 +01:00
|
|
|
) -> Dict[str, Any]:
|
2016-10-04 15:52:26 +02:00
|
|
|
|
|
|
|
obj = dict(
|
|
|
|
id = message_id,
|
|
|
|
sender_id = sender_id,
|
2017-10-20 20:29:49 +02:00
|
|
|
content = content,
|
2017-10-10 08:12:03 +02:00
|
|
|
recipient_type_id = recipient_type_id,
|
|
|
|
recipient_type = recipient_type,
|
2016-10-04 15:52:26 +02:00
|
|
|
recipient_id = recipient_id,
|
2019-08-28 02:43:19 +02:00
|
|
|
timestamp = datetime_to_timestamp(date_sent),
|
2016-10-04 15:52:26 +02:00
|
|
|
client = sending_client_name)
|
|
|
|
|
2018-11-08 15:33:44 +01:00
|
|
|
obj[TOPIC_NAME] = topic_name
|
2017-10-14 02:01:20 +02:00
|
|
|
obj['sender_realm_id'] = sender_realm_id
|
|
|
|
|
2019-09-26 13:41:46 +02:00
|
|
|
# Render topic_links with the stream's realm instead of the
|
|
|
|
# user's realm; this is important for messages sent by
|
|
|
|
# cross-realm bots like NOTIFICATION_BOT.
|
|
|
|
#
|
|
|
|
# TODO: We could potentially avoid this database query in
|
|
|
|
# common cases by optionally passing through the
|
|
|
|
# stream_realm_id through the code path from do_send_messages
|
|
|
|
# (where we've already fetched the data). It would involve
|
|
|
|
# somewhat messy plumbing, but would probably be worth it.
|
|
|
|
rendering_realm_id = sender_realm_id
|
|
|
|
if message and recipient_type == Recipient.STREAM:
|
|
|
|
rendering_realm_id = Stream.objects.get(id=recipient_type_id).realm_id
|
|
|
|
|
|
|
|
obj[TOPIC_LINKS] = bugdown.topic_links(rendering_realm_id, topic_name)
|
2016-10-04 15:52:26 +02:00
|
|
|
|
2017-01-24 06:11:49 +01:00
|
|
|
if last_edit_time is not None:
|
2016-10-04 15:52:26 +02:00
|
|
|
obj['last_edit_timestamp'] = datetime_to_timestamp(last_edit_time)
|
2017-05-24 21:28:26 +02:00
|
|
|
assert edit_history is not None
|
2016-10-04 15:52:26 +02:00
|
|
|
obj['edit_history'] = ujson.loads(edit_history)
|
|
|
|
|
2017-10-20 20:29:49 +02:00
|
|
|
if Message.need_to_render_content(rendered_content, rendered_content_version, bugdown.version):
|
|
|
|
if message is None:
|
|
|
|
# We really shouldn't be rendering objects in this method, but there is
|
|
|
|
# a scenario where we upgrade the version of bugdown and fail to run
|
|
|
|
# management commands to re-render historical messages, and then we
|
|
|
|
# need to have side effects. This method is optimized to not need full
|
|
|
|
# blown ORM objects, but the bugdown renderer is unfortunately highly
|
|
|
|
# coupled to Message, and we also need to persist the new rendered content.
|
|
|
|
# If we don't have a message object passed in, we get one here. The cost
|
|
|
|
# of going to the DB here should be overshadowed by the cost of rendering
|
|
|
|
# and updating the row.
|
|
|
|
# TODO: see #1379 to eliminate bugdown dependencies
|
|
|
|
message = Message.objects.select_related().get(id=message_id)
|
|
|
|
|
|
|
|
assert message is not None # Hint for mypy.
|
|
|
|
# It's unfortunate that we need to have side effects on the message
|
|
|
|
# in some cases.
|
2018-08-09 17:50:43 +02:00
|
|
|
rendered_content = save_message_rendered_content(message, content)
|
2016-10-04 15:52:26 +02:00
|
|
|
|
2017-10-20 20:29:49 +02:00
|
|
|
if rendered_content is not None:
|
|
|
|
obj['rendered_content'] = rendered_content
|
2016-10-04 15:52:26 +02:00
|
|
|
else:
|
2017-11-03 03:12:25 +01:00
|
|
|
obj['rendered_content'] = ('<p>[Zulip note: Sorry, we could not ' +
|
|
|
|
'understand the formatting of your message]</p>')
|
2016-10-04 15:52:26 +02:00
|
|
|
|
2017-09-25 04:24:13 +02:00
|
|
|
if rendered_content is not None:
|
|
|
|
obj['is_me_message'] = Message.is_status_message(content, rendered_content)
|
|
|
|
else:
|
|
|
|
obj['is_me_message'] = False
|
|
|
|
|
2016-12-06 07:19:34 +01:00
|
|
|
obj['reactions'] = [ReactionDict.build_dict_from_raw_db_row(reaction)
|
|
|
|
for reaction in reactions]
|
2018-02-11 14:09:17 +01:00
|
|
|
obj['submessages'] = submessages
|
2016-10-04 15:52:26 +02:00
|
|
|
return obj
|
|
|
|
|
2017-10-10 20:10:57 +02:00
|
|
|
@staticmethod
|
2017-11-05 11:15:10 +01:00
|
|
|
def bulk_hydrate_sender_info(objs: List[Dict[str, Any]]) -> None:
|
2017-10-10 20:10:57 +02:00
|
|
|
|
|
|
|
sender_ids = list({
|
|
|
|
obj['sender_id']
|
|
|
|
for obj in objs
|
|
|
|
})
|
|
|
|
|
|
|
|
if not sender_ids:
|
|
|
|
return
|
|
|
|
|
|
|
|
query = UserProfile.objects.values(
|
|
|
|
'id',
|
|
|
|
'full_name',
|
|
|
|
'short_name',
|
2019-11-05 20:23:58 +01:00
|
|
|
'delivery_email',
|
Remove sender-related fields from message cache.
This change makes the cache entries smaller for message
dictionaries. It also ensures we get valid data put into
message dictionaries if, for example, the sender's avatar
changes.
After this change, all of the attributes for a message
sender are only fetched during post-processing with two
exceptions:
* We get sender_id for "free" from the message,
and it's the primary key that we need to figure
out which data to fetch in post-processing.
* We need sender_realm_id to be able to cache topic
links, and a sender's realm id will never change,
so it's not a concern for invalidating cache rows.
All the other attributes are either likely to change (e.g.
sender avatar_version) and/or impact the size of cache
entries more severely than the two small id fields above.
This change should improve our overall system performance
by reducing the amount of memory used by every N message
rows we cache, and typically N will be in the thousands or
so on a large realm.
The other major implication of this change is that when
a user changes their avatar, and then later messages that
the user sent are fetched, all of the fields that go into
computing the avatar url will be pulled from the database,
not from cache.
2017-10-14 18:28:43 +02:00
|
|
|
'email',
|
|
|
|
'realm__string_id',
|
|
|
|
'avatar_source',
|
|
|
|
'avatar_version',
|
|
|
|
'is_mirror_dummy',
|
2017-10-10 20:10:57 +02:00
|
|
|
)
|
|
|
|
|
Remove sender-related fields from message cache.
This change makes the cache entries smaller for message
dictionaries. It also ensures we get valid data put into
message dictionaries if, for example, the sender's avatar
changes.
After this change, all of the attributes for a message
sender are only fetched during post-processing with two
exceptions:
* We get sender_id for "free" from the message,
and it's the primary key that we need to figure
out which data to fetch in post-processing.
* We need sender_realm_id to be able to cache topic
links, and a sender's realm id will never change,
so it's not a concern for invalidating cache rows.
All the other attributes are either likely to change (e.g.
sender avatar_version) and/or impact the size of cache
entries more severely than the two small id fields above.
This change should improve our overall system performance
by reducing the amount of memory used by every N message
rows we cache, and typically N will be in the thousands or
so on a large realm.
The other major implication of this change is that when
a user changes their avatar, and then later messages that
the user sent are fetched, all of the fields that go into
computing the avatar url will be pulled from the database,
not from cache.
2017-10-14 18:28:43 +02:00
|
|
|
rows = query_for_ids(query, sender_ids, 'zerver_userprofile.id')
|
2017-10-10 20:10:57 +02:00
|
|
|
|
|
|
|
sender_dict = {
|
|
|
|
row['id']: row
|
|
|
|
for row in rows
|
|
|
|
}
|
|
|
|
|
|
|
|
for obj in objs:
|
|
|
|
sender_id = obj['sender_id']
|
|
|
|
user_row = sender_dict[sender_id]
|
|
|
|
obj['sender_full_name'] = user_row['full_name']
|
|
|
|
obj['sender_short_name'] = user_row['short_name']
|
Remove sender-related fields from message cache.
This change makes the cache entries smaller for message
dictionaries. It also ensures we get valid data put into
message dictionaries if, for example, the sender's avatar
changes.
After this change, all of the attributes for a message
sender are only fetched during post-processing with two
exceptions:
* We get sender_id for "free" from the message,
and it's the primary key that we need to figure
out which data to fetch in post-processing.
* We need sender_realm_id to be able to cache topic
links, and a sender's realm id will never change,
so it's not a concern for invalidating cache rows.
All the other attributes are either likely to change (e.g.
sender avatar_version) and/or impact the size of cache
entries more severely than the two small id fields above.
This change should improve our overall system performance
by reducing the amount of memory used by every N message
rows we cache, and typically N will be in the thousands or
so on a large realm.
The other major implication of this change is that when
a user changes their avatar, and then later messages that
the user sent are fetched, all of the fields that go into
computing the avatar url will be pulled from the database,
not from cache.
2017-10-14 18:28:43 +02:00
|
|
|
obj['sender_email'] = user_row['email']
|
2019-11-05 20:23:58 +01:00
|
|
|
obj['sender_delivery_email'] = user_row['delivery_email']
|
Remove sender-related fields from message cache.
This change makes the cache entries smaller for message
dictionaries. It also ensures we get valid data put into
message dictionaries if, for example, the sender's avatar
changes.
After this change, all of the attributes for a message
sender are only fetched during post-processing with two
exceptions:
* We get sender_id for "free" from the message,
and it's the primary key that we need to figure
out which data to fetch in post-processing.
* We need sender_realm_id to be able to cache topic
links, and a sender's realm id will never change,
so it's not a concern for invalidating cache rows.
All the other attributes are either likely to change (e.g.
sender avatar_version) and/or impact the size of cache
entries more severely than the two small id fields above.
This change should improve our overall system performance
by reducing the amount of memory used by every N message
rows we cache, and typically N will be in the thousands or
so on a large realm.
The other major implication of this change is that when
a user changes their avatar, and then later messages that
the user sent are fetched, all of the fields that go into
computing the avatar url will be pulled from the database,
not from cache.
2017-10-14 18:28:43 +02:00
|
|
|
obj['sender_realm_str'] = user_row['realm__string_id']
|
|
|
|
obj['sender_avatar_source'] = user_row['avatar_source']
|
|
|
|
obj['sender_avatar_version'] = user_row['avatar_version']
|
|
|
|
obj['sender_is_mirror_dummy'] = user_row['is_mirror_dummy']
|
2017-10-10 20:10:57 +02:00
|
|
|
|
2017-10-10 08:12:03 +02:00
|
|
|
@staticmethod
|
2019-08-18 00:40:35 +02:00
|
|
|
def hydrate_recipient_info(obj: Dict[str, Any], display_recipient: DisplayRecipientT) -> None:
|
2017-10-10 08:12:03 +02:00
|
|
|
'''
|
|
|
|
This method hyrdrates recipient info with things
|
|
|
|
like full names and emails of senders. Eventually
|
|
|
|
our clients should be able to hyrdrate these fields
|
|
|
|
themselves with info they already have on users.
|
|
|
|
'''
|
|
|
|
|
|
|
|
recipient_type = obj['recipient_type']
|
|
|
|
recipient_type_id = obj['recipient_type_id']
|
|
|
|
sender_is_mirror_dummy = obj['sender_is_mirror_dummy']
|
|
|
|
sender_email = obj['sender_email']
|
|
|
|
sender_full_name = obj['sender_full_name']
|
|
|
|
sender_short_name = obj['sender_short_name']
|
|
|
|
sender_id = obj['sender_id']
|
|
|
|
|
|
|
|
if recipient_type == Recipient.STREAM:
|
|
|
|
display_type = "stream"
|
|
|
|
elif recipient_type in (Recipient.HUDDLE, Recipient.PERSONAL):
|
2018-05-11 01:40:23 +02:00
|
|
|
assert not isinstance(display_recipient, str)
|
2017-10-10 08:12:03 +02:00
|
|
|
display_type = "private"
|
|
|
|
if len(display_recipient) == 1:
|
|
|
|
# add the sender in if this isn't a message between
|
|
|
|
# someone and themself, preserving ordering
|
|
|
|
recip = {'email': sender_email,
|
|
|
|
'full_name': sender_full_name,
|
|
|
|
'short_name': sender_short_name,
|
|
|
|
'id': sender_id,
|
2019-08-18 00:24:46 +02:00
|
|
|
'is_mirror_dummy': sender_is_mirror_dummy} # type: UserDisplayRecipient
|
2017-10-10 08:12:03 +02:00
|
|
|
if recip['email'] < display_recipient[0]['email']:
|
|
|
|
display_recipient = [recip, display_recipient[0]]
|
|
|
|
elif recip['email'] > display_recipient[0]['email']:
|
|
|
|
display_recipient = [display_recipient[0], recip]
|
|
|
|
else:
|
|
|
|
raise AssertionError("Invalid recipient type %s" % (recipient_type,))
|
|
|
|
|
|
|
|
obj['display_recipient'] = display_recipient
|
|
|
|
obj['type'] = display_type
|
|
|
|
if obj['type'] == 'stream':
|
|
|
|
obj['stream_id'] = recipient_type_id
|
2016-12-06 07:19:34 +01:00
|
|
|
|
2019-08-07 00:18:13 +02:00
|
|
|
@staticmethod
|
|
|
|
def bulk_hydrate_recipient_info(objs: List[Dict[str, Any]]) -> None:
|
|
|
|
recipient_tuples = set( # We use set to eliminate duplicate tuples.
|
|
|
|
(
|
|
|
|
obj['recipient_id'],
|
|
|
|
obj['recipient_type'],
|
|
|
|
obj['recipient_type_id']
|
|
|
|
) for obj in objs
|
|
|
|
)
|
|
|
|
display_recipients = bulk_fetch_display_recipients(recipient_tuples)
|
|
|
|
|
|
|
|
for obj in objs:
|
|
|
|
MessageDict.hydrate_recipient_info(obj, display_recipients[obj['recipient_id']])
|
|
|
|
|
2017-10-14 02:01:20 +02:00
|
|
|
@staticmethod
|
2017-11-05 11:15:10 +01:00
|
|
|
def set_sender_avatar(obj: Dict[str, Any], client_gravatar: bool) -> None:
|
2017-10-14 02:01:20 +02:00
|
|
|
sender_id = obj['sender_id']
|
|
|
|
sender_realm_id = obj['sender_realm_id']
|
2019-11-05 20:23:58 +01:00
|
|
|
sender_delivery_email = obj['sender_delivery_email']
|
2017-10-14 02:01:20 +02:00
|
|
|
sender_avatar_source = obj['sender_avatar_source']
|
|
|
|
sender_avatar_version = obj['sender_avatar_version']
|
|
|
|
|
|
|
|
obj['avatar_url'] = get_avatar_field(
|
|
|
|
user_id=sender_id,
|
|
|
|
realm_id=sender_realm_id,
|
2019-11-05 20:23:58 +01:00
|
|
|
email=sender_delivery_email,
|
2017-10-14 02:01:20 +02:00
|
|
|
avatar_source=sender_avatar_source,
|
|
|
|
avatar_version=sender_avatar_version,
|
|
|
|
medium=False,
|
|
|
|
client_gravatar=client_gravatar,
|
|
|
|
)
|
|
|
|
|
2017-11-05 11:37:41 +01:00
|
|
|
class ReactionDict:
|
2016-12-06 07:19:34 +01:00
|
|
|
@staticmethod
|
2017-11-05 11:15:10 +01:00
|
|
|
def build_dict_from_raw_db_row(row: Dict[str, Any]) -> Dict[str, Any]:
|
2017-06-07 14:09:15 +02:00
|
|
|
return {'emoji_name': row['emoji_name'],
|
2017-05-01 07:29:56 +02:00
|
|
|
'emoji_code': row['emoji_code'],
|
|
|
|
'reaction_type': row['reaction_type'],
|
2017-06-07 14:09:15 +02:00
|
|
|
'user': {'email': row['user_profile__email'],
|
|
|
|
'id': row['user_profile__id'],
|
|
|
|
'full_name': row['user_profile__full_name']}}
|
2016-12-06 07:19:34 +01:00
|
|
|
|
|
|
|
|
2018-07-27 11:47:07 +02:00
|
|
|
def access_message(user_profile: UserProfile, message_id: int) -> Tuple[Message, Optional[UserMessage]]:
|
2016-10-12 02:14:08 +02:00
|
|
|
"""You can access a message by ID in our APIs that either:
|
|
|
|
(1) You received or have previously accessed via starring
|
|
|
|
(aka have a UserMessage row for).
|
|
|
|
(2) Was sent to a public stream in your realm.
|
|
|
|
|
|
|
|
We produce consistent, boring error messages to avoid leaking any
|
|
|
|
information from a security perspective.
|
|
|
|
"""
|
|
|
|
try:
|
|
|
|
message = Message.objects.select_related().get(id=message_id)
|
|
|
|
except Message.DoesNotExist:
|
|
|
|
raise JsonableError(_("Invalid message(s)"))
|
|
|
|
|
2018-07-27 11:47:07 +02:00
|
|
|
user_message = get_usermessage_by_message_id(user_profile, message_id)
|
2016-10-12 02:14:08 +02:00
|
|
|
|
2018-07-27 12:28:42 +02:00
|
|
|
if has_message_access(user_profile, message, user_message):
|
|
|
|
return (message, user_message)
|
|
|
|
raise JsonableError(_("Invalid message(s)"))
|
|
|
|
|
|
|
|
def has_message_access(user_profile: UserProfile, message: Message,
|
|
|
|
user_message: Optional[UserMessage]) -> bool:
|
2016-10-12 02:14:08 +02:00
|
|
|
if user_message is None:
|
|
|
|
if message.recipient.type != Recipient.STREAM:
|
|
|
|
# You can't access private messages you didn't receive
|
2018-07-27 12:28:42 +02:00
|
|
|
return False
|
|
|
|
|
2016-10-12 02:14:08 +02:00
|
|
|
stream = Stream.objects.get(id=message.recipient.type_id)
|
|
|
|
if stream.realm != user_profile.realm:
|
|
|
|
# You can't access public stream messages in other realms
|
2018-07-27 12:28:42 +02:00
|
|
|
return False
|
2016-10-12 02:14:08 +02:00
|
|
|
|
2018-05-28 21:25:57 +02:00
|
|
|
if not stream.is_history_public_to_subscribers():
|
|
|
|
# You can't access messages you didn't directly receive
|
|
|
|
# unless history is public to subscribers.
|
2018-07-27 12:28:42 +02:00
|
|
|
return False
|
2018-04-05 01:12:30 +02:00
|
|
|
|
2018-05-28 21:25:57 +02:00
|
|
|
if not stream.is_public():
|
2018-04-05 01:12:30 +02:00
|
|
|
# This stream is an invite-only stream where message
|
|
|
|
# history is available to subscribers. So we check if
|
|
|
|
# you're subscribed.
|
|
|
|
if not Subscription.objects.filter(user_profile=user_profile, active=True,
|
|
|
|
recipient=message.recipient).exists():
|
2018-07-27 12:28:42 +02:00
|
|
|
return False
|
2018-04-05 01:12:30 +02:00
|
|
|
|
|
|
|
# You are subscribed, so let this fall through to the public stream case.
|
2018-05-24 21:36:35 +02:00
|
|
|
elif user_profile.is_guest:
|
|
|
|
# Guest users don't get automatic access to public stream messages
|
|
|
|
if not Subscription.objects.filter(user_profile=user_profile, active=True,
|
|
|
|
recipient=message.recipient).exists():
|
2018-07-27 12:28:42 +02:00
|
|
|
return False
|
2018-04-05 01:12:30 +02:00
|
|
|
else:
|
|
|
|
# Otherwise, the message was sent to a public stream in
|
|
|
|
# your realm, so return the message, user_message pair
|
|
|
|
pass
|
2018-07-27 12:28:42 +02:00
|
|
|
|
|
|
|
return True
|
2016-10-12 02:14:08 +02:00
|
|
|
|
2018-07-27 16:23:17 +02:00
|
|
|
def bulk_access_messages(user_profile: UserProfile, messages: Sequence[Message]) -> List[Message]:
|
|
|
|
filtered_messages = []
|
|
|
|
|
|
|
|
for message in messages:
|
|
|
|
user_message = get_usermessage_by_message_id(user_profile, message.id)
|
|
|
|
if has_message_access(user_profile, message, user_message):
|
|
|
|
filtered_messages.append(message)
|
|
|
|
return filtered_messages
|
|
|
|
|
2019-02-14 02:01:42 +01:00
|
|
|
def bulk_access_messages_expect_usermessage(
|
|
|
|
user_profile_id: int, message_ids: Sequence[int]) -> List[int]:
|
|
|
|
'''
|
|
|
|
Like bulk_access_messages, but faster and potentially stricter.
|
|
|
|
|
|
|
|
Returns a subset of `message_ids` containing only messages the
|
|
|
|
user can access. Makes O(1) database queries.
|
|
|
|
|
|
|
|
Use this function only when the user is expected to have a
|
|
|
|
UserMessage row for every message in `message_ids`. If a
|
|
|
|
UserMessage row is missing, the message will be omitted even if
|
|
|
|
the user has access (e.g. because it went to a public stream.)
|
|
|
|
|
|
|
|
See also: `access_message`, `bulk_access_messages`.
|
|
|
|
'''
|
|
|
|
return UserMessage.objects.filter(
|
|
|
|
user_profile_id=user_profile_id,
|
|
|
|
message_id__in=message_ids,
|
|
|
|
).values_list('message_id', flat=True)
|
|
|
|
|
2017-11-05 11:15:10 +01:00
|
|
|
def render_markdown(message: Message,
|
2018-05-11 01:40:23 +02:00
|
|
|
content: str,
|
2017-11-05 11:15:10 +01:00
|
|
|
realm: Optional[Realm]=None,
|
2019-02-11 15:19:38 +01:00
|
|
|
realm_alert_words_automaton: Optional[ahocorasick.Automaton]=None,
|
2017-11-05 11:15:10 +01:00
|
|
|
user_ids: Optional[Set[int]]=None,
|
|
|
|
mention_data: Optional[bugdown.MentionData]=None,
|
2018-05-11 01:40:23 +02:00
|
|
|
email_gateway: Optional[bool]=False) -> str:
|
2018-11-02 13:18:31 +01:00
|
|
|
'''
|
|
|
|
This is basically just a wrapper for do_render_markdown.
|
|
|
|
'''
|
2016-10-04 18:32:46 +02:00
|
|
|
|
2017-09-09 02:50:57 +02:00
|
|
|
if user_ids is None:
|
2017-05-17 21:11:03 +02:00
|
|
|
message_user_ids = set() # type: Set[int]
|
2016-10-04 18:32:46 +02:00
|
|
|
else:
|
2017-09-09 02:50:57 +02:00
|
|
|
message_user_ids = user_ids
|
2016-10-04 18:32:46 +02:00
|
|
|
|
2018-03-18 20:37:47 +01:00
|
|
|
if realm is None:
|
|
|
|
realm = message.get_realm()
|
2016-10-04 18:32:46 +02:00
|
|
|
|
2018-11-02 12:50:09 +01:00
|
|
|
sender = get_user_profile_by_id(message.sender_id)
|
|
|
|
sent_by_bot = sender.is_bot
|
|
|
|
translate_emoticons = sender.translate_emoticons
|
2017-02-03 23:21:56 +01:00
|
|
|
|
2018-11-02 13:18:31 +01:00
|
|
|
rendered_content = do_render_markdown(
|
|
|
|
message=message,
|
|
|
|
content=content,
|
|
|
|
realm=realm,
|
2019-02-11 15:19:38 +01:00
|
|
|
realm_alert_words_automaton=realm_alert_words_automaton,
|
2018-11-02 13:18:31 +01:00
|
|
|
message_user_ids=message_user_ids,
|
|
|
|
sent_by_bot=sent_by_bot,
|
|
|
|
translate_emoticons=translate_emoticons,
|
|
|
|
mention_data=mention_data,
|
|
|
|
email_gateway=email_gateway,
|
|
|
|
)
|
|
|
|
|
|
|
|
return rendered_content
|
|
|
|
|
|
|
|
def do_render_markdown(message: Message,
|
|
|
|
content: str,
|
|
|
|
realm: Realm,
|
|
|
|
message_user_ids: Set[int],
|
|
|
|
sent_by_bot: bool,
|
|
|
|
translate_emoticons: bool,
|
2019-02-11 15:19:38 +01:00
|
|
|
realm_alert_words_automaton: Optional[ahocorasick.Automaton]=None,
|
2018-11-02 13:18:31 +01:00
|
|
|
mention_data: Optional[bugdown.MentionData]=None,
|
|
|
|
email_gateway: Optional[bool]=False) -> str:
|
|
|
|
"""Return HTML for given markdown. Bugdown may add properties to the
|
|
|
|
message object such as `mentions_user_ids`, `mentions_user_group_ids`, and
|
|
|
|
`mentions_wildcard`. These are only on this Django object and are not
|
|
|
|
saved in the database.
|
|
|
|
"""
|
|
|
|
|
|
|
|
message.mentions_wildcard = False
|
|
|
|
message.mentions_user_ids = set()
|
|
|
|
message.mentions_user_group_ids = set()
|
|
|
|
message.alert_words = set()
|
|
|
|
message.links_for_preview = set()
|
2019-02-11 15:19:38 +01:00
|
|
|
message.user_ids_with_alert_words = set()
|
2018-11-02 13:18:31 +01:00
|
|
|
|
2016-10-04 18:32:46 +02:00
|
|
|
# DO MAIN WORK HERE -- call bugdown to convert
|
2017-10-24 02:47:09 +02:00
|
|
|
rendered_content = bugdown.convert(
|
|
|
|
content,
|
2019-02-11 15:19:38 +01:00
|
|
|
realm_alert_words_automaton=realm_alert_words_automaton,
|
2017-10-24 02:47:09 +02:00
|
|
|
message=message,
|
|
|
|
message_realm=realm,
|
|
|
|
sent_by_bot=sent_by_bot,
|
2018-11-02 12:50:09 +01:00
|
|
|
translate_emoticons=translate_emoticons,
|
2017-10-24 02:47:09 +02:00
|
|
|
mention_data=mention_data,
|
2017-11-03 12:13:17 +01:00
|
|
|
email_gateway=email_gateway
|
2017-10-24 02:47:09 +02:00
|
|
|
)
|
2016-10-04 18:32:46 +02:00
|
|
|
return rendered_content
|
2017-05-23 03:02:01 +02:00
|
|
|
|
2017-11-05 11:15:10 +01:00
|
|
|
def huddle_users(recipient_id: int) -> str:
|
2017-05-23 03:02:01 +02:00
|
|
|
display_recipient = get_display_recipient_by_id(recipient_id,
|
|
|
|
Recipient.HUDDLE,
|
2019-08-18 00:40:35 +02:00
|
|
|
None) # type: DisplayRecipientT
|
2017-05-23 03:02:01 +02:00
|
|
|
|
2018-05-11 01:40:23 +02:00
|
|
|
# str is for streams.
|
|
|
|
assert not isinstance(display_recipient, str)
|
2017-05-23 03:02:01 +02:00
|
|
|
|
|
|
|
user_ids = [obj['id'] for obj in display_recipient] # type: List[int]
|
|
|
|
user_ids = sorted(user_ids)
|
|
|
|
return ','.join(str(uid) for uid in user_ids)
|
|
|
|
|
2017-11-05 11:15:10 +01:00
|
|
|
def aggregate_message_dict(input_dict: Dict[int, Dict[str, Any]],
|
|
|
|
lookup_fields: List[str],
|
|
|
|
collect_senders: bool) -> List[Dict[str, Any]]:
|
2017-11-02 06:28:31 +01:00
|
|
|
lookup_dict = dict() # type: Dict[Tuple[Any, ...], Dict[str, Any]]
|
2017-05-23 03:02:01 +02:00
|
|
|
|
2017-10-04 18:13:04 +02:00
|
|
|
'''
|
|
|
|
A concrete example might help explain the inputs here:
|
|
|
|
|
|
|
|
input_dict = {
|
2017-10-05 18:35:34 +02:00
|
|
|
1002: dict(stream_id=5, topic='foo', sender_id=40),
|
|
|
|
1003: dict(stream_id=5, topic='foo', sender_id=41),
|
|
|
|
1004: dict(stream_id=6, topic='baz', sender_id=99),
|
2017-10-04 18:13:04 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
lookup_fields = ['stream_id', 'topic']
|
|
|
|
|
|
|
|
The first time through the loop:
|
2017-10-05 18:35:34 +02:00
|
|
|
attribute_dict = dict(stream_id=5, topic='foo', sender_id=40)
|
|
|
|
lookup_dict = (5, 'foo')
|
2017-10-04 18:13:04 +02:00
|
|
|
|
|
|
|
lookup_dict = {
|
2017-10-05 18:35:34 +02:00
|
|
|
(5, 'foo'): dict(stream_id=5, topic='foo',
|
|
|
|
unread_message_ids=[1002, 1003],
|
|
|
|
sender_ids=[40, 41],
|
|
|
|
),
|
2017-10-04 18:13:04 +02:00
|
|
|
...
|
|
|
|
}
|
|
|
|
|
|
|
|
result = [
|
2017-10-05 18:35:34 +02:00
|
|
|
dict(stream_id=5, topic='foo',
|
|
|
|
unread_message_ids=[1002, 1003],
|
|
|
|
sender_ids=[40, 41],
|
|
|
|
),
|
2017-10-04 18:13:04 +02:00
|
|
|
...
|
|
|
|
]
|
|
|
|
'''
|
|
|
|
|
2017-10-05 18:35:34 +02:00
|
|
|
for message_id, attribute_dict in input_dict.items():
|
2017-10-04 18:13:04 +02:00
|
|
|
lookup_key = tuple([attribute_dict[f] for f in lookup_fields])
|
2017-05-23 03:02:01 +02:00
|
|
|
if lookup_key not in lookup_dict:
|
|
|
|
obj = {}
|
|
|
|
for f in lookup_fields:
|
2017-10-04 18:13:04 +02:00
|
|
|
obj[f] = attribute_dict[f]
|
2017-10-05 18:35:34 +02:00
|
|
|
obj['unread_message_ids'] = []
|
|
|
|
if collect_senders:
|
|
|
|
obj['sender_ids'] = set()
|
2017-05-23 03:02:01 +02:00
|
|
|
lookup_dict[lookup_key] = obj
|
|
|
|
|
2017-10-05 18:35:34 +02:00
|
|
|
bucket = lookup_dict[lookup_key]
|
|
|
|
bucket['unread_message_ids'].append(message_id)
|
|
|
|
if collect_senders:
|
|
|
|
bucket['sender_ids'].add(attribute_dict['sender_id'])
|
2017-10-04 18:13:04 +02:00
|
|
|
|
|
|
|
for dct in lookup_dict.values():
|
2017-10-05 18:35:34 +02:00
|
|
|
dct['unread_message_ids'].sort()
|
|
|
|
if collect_senders:
|
|
|
|
dct['sender_ids'] = sorted(list(dct['sender_ids']))
|
2017-05-23 03:02:01 +02:00
|
|
|
|
|
|
|
sorted_keys = sorted(lookup_dict.keys())
|
|
|
|
|
|
|
|
return [lookup_dict[k] for k in sorted_keys]
|
|
|
|
|
2017-11-05 11:15:10 +01:00
|
|
|
def get_inactive_recipient_ids(user_profile: UserProfile) -> List[int]:
|
2017-10-29 17:11:11 +01:00
|
|
|
rows = get_stream_subscriptions_for_user(user_profile).filter(
|
2017-08-09 02:22:00 +02:00
|
|
|
active=False,
|
|
|
|
).values(
|
|
|
|
'recipient_id'
|
|
|
|
)
|
|
|
|
inactive_recipient_ids = [
|
|
|
|
row['recipient_id']
|
|
|
|
for row in rows]
|
|
|
|
return inactive_recipient_ids
|
|
|
|
|
2017-11-05 11:15:10 +01:00
|
|
|
def get_muted_stream_ids(user_profile: UserProfile) -> List[int]:
|
2017-10-29 17:11:11 +01:00
|
|
|
rows = get_stream_subscriptions_for_user(user_profile).filter(
|
2017-08-23 22:45:50 +02:00
|
|
|
active=True,
|
2018-08-02 23:46:05 +02:00
|
|
|
is_muted=True,
|
2017-08-23 22:45:50 +02:00
|
|
|
).values(
|
2017-10-05 16:18:13 +02:00
|
|
|
'recipient__type_id'
|
2017-08-23 22:45:50 +02:00
|
|
|
)
|
2017-10-05 16:18:13 +02:00
|
|
|
muted_stream_ids = [
|
|
|
|
row['recipient__type_id']
|
2017-08-23 22:45:50 +02:00
|
|
|
for row in rows]
|
2017-10-05 16:18:13 +02:00
|
|
|
return muted_stream_ids
|
2017-08-23 22:45:50 +02:00
|
|
|
|
2018-08-14 23:57:20 +02:00
|
|
|
def get_starred_message_ids(user_profile: UserProfile) -> List[int]:
|
2018-08-22 00:03:00 +02:00
|
|
|
return list(UserMessage.objects.filter(
|
2018-08-14 23:57:20 +02:00
|
|
|
user_profile=user_profile,
|
2018-08-22 00:03:00 +02:00
|
|
|
).extra(
|
|
|
|
where=[UserMessage.where_starred()]
|
2018-08-14 23:57:20 +02:00
|
|
|
).order_by(
|
|
|
|
'message_id'
|
2018-08-22 00:03:00 +02:00
|
|
|
).values_list('message_id', flat=True)[0:10000])
|
2018-08-14 23:57:20 +02:00
|
|
|
|
2017-11-05 11:15:10 +01:00
|
|
|
def get_raw_unread_data(user_profile: UserProfile) -> RawUnreadMessagesResult:
|
2017-08-09 02:22:00 +02:00
|
|
|
|
|
|
|
excluded_recipient_ids = get_inactive_recipient_ids(user_profile)
|
|
|
|
|
2017-05-23 03:02:01 +02:00
|
|
|
user_msgs = UserMessage.objects.filter(
|
|
|
|
user_profile=user_profile
|
2017-08-09 02:22:00 +02:00
|
|
|
).exclude(
|
|
|
|
message__recipient_id__in=excluded_recipient_ids
|
2017-05-23 03:02:01 +02:00
|
|
|
).extra(
|
|
|
|
where=[UserMessage.where_unread()]
|
|
|
|
).values(
|
|
|
|
'message_id',
|
|
|
|
'message__sender_id',
|
2018-11-08 15:37:37 +01:00
|
|
|
MESSAGE__TOPIC,
|
2017-05-23 03:02:01 +02:00
|
|
|
'message__recipient_id',
|
|
|
|
'message__recipient__type',
|
|
|
|
'message__recipient__type_id',
|
2017-07-21 20:31:25 +02:00
|
|
|
'flags',
|
2017-08-01 18:28:56 +02:00
|
|
|
).order_by("-message_id")
|
|
|
|
|
|
|
|
# Limit unread messages for performance reasons.
|
|
|
|
user_msgs = list(user_msgs[:MAX_UNREAD_MESSAGES])
|
2017-05-23 03:02:01 +02:00
|
|
|
|
2017-08-01 18:28:56 +02:00
|
|
|
rows = list(reversed(user_msgs))
|
2017-08-23 22:45:50 +02:00
|
|
|
|
2017-10-05 16:18:13 +02:00
|
|
|
muted_stream_ids = get_muted_stream_ids(user_profile)
|
2017-08-31 23:19:05 +02:00
|
|
|
|
|
|
|
topic_mute_checker = build_topic_mute_checker(user_profile)
|
|
|
|
|
2018-05-11 01:40:23 +02:00
|
|
|
def is_row_muted(stream_id: int, recipient_id: int, topic: str) -> bool:
|
2017-10-05 16:18:13 +02:00
|
|
|
if stream_id in muted_stream_ids:
|
2017-08-31 23:19:05 +02:00
|
|
|
return True
|
|
|
|
|
2017-10-04 18:13:04 +02:00
|
|
|
if topic_mute_checker(recipient_id, topic):
|
2017-08-31 23:19:05 +02:00
|
|
|
return True
|
|
|
|
|
|
|
|
return False
|
|
|
|
|
2017-10-04 18:13:04 +02:00
|
|
|
huddle_cache = {} # type: Dict[int, str]
|
|
|
|
|
2017-11-05 11:15:10 +01:00
|
|
|
def get_huddle_users(recipient_id: int) -> str:
|
2017-10-04 18:13:04 +02:00
|
|
|
if recipient_id in huddle_cache:
|
|
|
|
return huddle_cache[recipient_id]
|
2017-08-23 22:45:50 +02:00
|
|
|
|
2017-10-04 18:13:04 +02:00
|
|
|
user_ids_string = huddle_users(recipient_id)
|
|
|
|
huddle_cache[recipient_id] = user_ids_string
|
|
|
|
return user_ids_string
|
2017-05-23 03:02:01 +02:00
|
|
|
|
2017-10-04 18:13:04 +02:00
|
|
|
pm_dict = {}
|
|
|
|
stream_dict = {}
|
|
|
|
unmuted_stream_msgs = set()
|
|
|
|
huddle_dict = {}
|
|
|
|
mentions = set()
|
|
|
|
|
|
|
|
for row in rows:
|
|
|
|
message_id = row['message_id']
|
|
|
|
msg_type = row['message__recipient__type']
|
|
|
|
recipient_id = row['message__recipient_id']
|
2017-10-05 18:35:34 +02:00
|
|
|
sender_id = row['message__sender_id']
|
2017-10-04 18:13:04 +02:00
|
|
|
|
|
|
|
if msg_type == Recipient.STREAM:
|
|
|
|
stream_id = row['message__recipient__type_id']
|
2018-11-08 15:37:37 +01:00
|
|
|
topic = row[MESSAGE__TOPIC]
|
2017-10-04 18:13:04 +02:00
|
|
|
stream_dict[message_id] = dict(
|
|
|
|
stream_id=stream_id,
|
|
|
|
topic=topic,
|
2017-10-05 18:35:34 +02:00
|
|
|
sender_id=sender_id,
|
2017-10-04 18:13:04 +02:00
|
|
|
)
|
2017-10-05 16:18:13 +02:00
|
|
|
if not is_row_muted(stream_id, recipient_id, topic):
|
2017-10-04 18:13:04 +02:00
|
|
|
unmuted_stream_msgs.add(message_id)
|
|
|
|
|
|
|
|
elif msg_type == Recipient.PERSONAL:
|
|
|
|
pm_dict[message_id] = dict(
|
|
|
|
sender_id=sender_id,
|
|
|
|
)
|
|
|
|
|
|
|
|
elif msg_type == Recipient.HUDDLE:
|
|
|
|
user_ids_string = get_huddle_users(recipient_id)
|
|
|
|
huddle_dict[message_id] = dict(
|
|
|
|
user_ids_string=user_ids_string,
|
|
|
|
)
|
|
|
|
|
2019-08-26 05:11:18 +02:00
|
|
|
# TODO: Add support for alert words here as well.
|
2017-10-04 18:13:04 +02:00
|
|
|
is_mentioned = (row['flags'] & UserMessage.flags.mentioned) != 0
|
2019-08-26 05:11:18 +02:00
|
|
|
is_wildcard_mentioned = (row['flags'] & UserMessage.flags.wildcard_mentioned) != 0
|
2017-10-04 18:13:04 +02:00
|
|
|
if is_mentioned:
|
|
|
|
mentions.add(message_id)
|
2019-08-26 05:11:18 +02:00
|
|
|
if is_wildcard_mentioned:
|
|
|
|
if msg_type == Recipient.STREAM:
|
|
|
|
stream_id = row['message__recipient__type_id']
|
|
|
|
topic = row[MESSAGE__TOPIC]
|
|
|
|
if not is_row_muted(stream_id, recipient_id, topic):
|
|
|
|
mentions.add(message_id)
|
|
|
|
else: # nocoverage # TODO: Test wildcard mentions in PMs.
|
|
|
|
mentions.add(message_id)
|
2017-10-04 18:13:04 +02:00
|
|
|
|
|
|
|
return dict(
|
|
|
|
pm_dict=pm_dict,
|
|
|
|
stream_dict=stream_dict,
|
Simplify how we apply events for unread messages.
The logic to apply events to page_params['unread_msgs'] was
complicated due to the aggregated data structures that we pass
down to the client.
Now we defer the aggregation logic until after we apply the
events. This leads to some simplifications in that codepath,
as well as some performance enhancements.
The intermediate data structure has sets and dictionaries that
generally are keyed by message_id, so most message-related
updates are O(1) in nature.
Also, by waiting to compute the counts until the end, it's a
bit less messy to try to keep track of increments/decrements.
Instead, we just update the dictionaries and sets during the
event-apply phase.
This change also fixes some corner cases:
* We now respect mutes when updating counts.
* For message updates, instead of bluntly updating
the whole topic bucket, we update individual
message ids.
Unfortunately, this change doesn't seem to address the pesky
test that fails sporadically on Travis, related to mention
updates. It will change the symptom, slightly, though.
2017-10-05 00:34:19 +02:00
|
|
|
muted_stream_ids=muted_stream_ids,
|
2017-10-04 18:13:04 +02:00
|
|
|
unmuted_stream_msgs=unmuted_stream_msgs,
|
|
|
|
huddle_dict=huddle_dict,
|
|
|
|
mentions=mentions,
|
|
|
|
)
|
|
|
|
|
2017-11-05 11:15:10 +01:00
|
|
|
def aggregate_unread_data(raw_data: RawUnreadMessagesResult) -> UnreadMessagesResult:
|
2017-10-04 18:13:04 +02:00
|
|
|
|
|
|
|
pm_dict = raw_data['pm_dict']
|
|
|
|
stream_dict = raw_data['stream_dict']
|
|
|
|
unmuted_stream_msgs = raw_data['unmuted_stream_msgs']
|
|
|
|
huddle_dict = raw_data['huddle_dict']
|
|
|
|
mentions = list(raw_data['mentions'])
|
|
|
|
|
|
|
|
count = len(pm_dict) + len(unmuted_stream_msgs) + len(huddle_dict)
|
2017-05-23 03:02:01 +02:00
|
|
|
|
2017-10-05 18:35:34 +02:00
|
|
|
pm_objects = aggregate_message_dict(
|
2017-10-04 18:13:04 +02:00
|
|
|
input_dict=pm_dict,
|
2017-05-23 03:02:01 +02:00
|
|
|
lookup_fields=[
|
|
|
|
'sender_id',
|
|
|
|
],
|
2017-10-05 18:35:34 +02:00
|
|
|
collect_senders=False,
|
2017-05-23 03:02:01 +02:00
|
|
|
)
|
|
|
|
|
2017-10-05 18:35:34 +02:00
|
|
|
stream_objects = aggregate_message_dict(
|
2017-10-04 18:13:04 +02:00
|
|
|
input_dict=stream_dict,
|
2017-05-23 03:02:01 +02:00
|
|
|
lookup_fields=[
|
|
|
|
'stream_id',
|
|
|
|
'topic',
|
|
|
|
],
|
2017-10-05 18:35:34 +02:00
|
|
|
collect_senders=True,
|
2017-05-23 03:02:01 +02:00
|
|
|
)
|
|
|
|
|
2017-10-05 18:35:34 +02:00
|
|
|
huddle_objects = aggregate_message_dict(
|
2017-10-04 18:13:04 +02:00
|
|
|
input_dict=huddle_dict,
|
2017-05-23 03:02:01 +02:00
|
|
|
lookup_fields=[
|
2017-10-04 18:13:04 +02:00
|
|
|
'user_ids_string',
|
2017-05-23 03:02:01 +02:00
|
|
|
],
|
2017-10-05 18:35:34 +02:00
|
|
|
collect_senders=False,
|
2017-05-23 03:02:01 +02:00
|
|
|
)
|
|
|
|
|
|
|
|
result = dict(
|
|
|
|
pms=pm_objects,
|
|
|
|
streams=stream_objects,
|
|
|
|
huddles=huddle_objects,
|
2017-10-04 18:13:04 +02:00
|
|
|
mentions=mentions,
|
2017-08-09 04:01:00 +02:00
|
|
|
count=count) # type: UnreadMessagesResult
|
2017-05-23 03:02:01 +02:00
|
|
|
|
|
|
|
return result
|
|
|
|
|
2017-11-05 11:15:10 +01:00
|
|
|
def apply_unread_message_event(user_profile: UserProfile,
|
2019-08-03 02:33:42 +02:00
|
|
|
state: RawUnreadMessagesResult,
|
2017-11-05 11:15:10 +01:00
|
|
|
message: Dict[str, Any],
|
|
|
|
flags: List[str]) -> None:
|
2017-05-23 03:02:01 +02:00
|
|
|
message_id = message['id']
|
|
|
|
if message['type'] == 'stream':
|
|
|
|
message_type = 'stream'
|
|
|
|
elif message['type'] == 'private':
|
|
|
|
others = [
|
|
|
|
recip for recip in message['display_recipient']
|
|
|
|
if recip['id'] != message['sender_id']
|
|
|
|
]
|
|
|
|
if len(others) <= 1:
|
|
|
|
message_type = 'private'
|
|
|
|
else:
|
|
|
|
message_type = 'huddle'
|
2017-08-25 09:39:36 +02:00
|
|
|
else:
|
|
|
|
raise AssertionError("Invalid message type %s" % (message['type'],))
|
2017-05-23 03:02:01 +02:00
|
|
|
|
2017-10-05 18:35:34 +02:00
|
|
|
sender_id = message['sender_id']
|
|
|
|
|
2017-05-23 03:02:01 +02:00
|
|
|
if message_type == 'stream':
|
|
|
|
stream_id = message['stream_id']
|
2018-11-08 15:37:37 +01:00
|
|
|
topic = message[TOPIC_NAME]
|
Simplify how we apply events for unread messages.
The logic to apply events to page_params['unread_msgs'] was
complicated due to the aggregated data structures that we pass
down to the client.
Now we defer the aggregation logic until after we apply the
events. This leads to some simplifications in that codepath,
as well as some performance enhancements.
The intermediate data structure has sets and dictionaries that
generally are keyed by message_id, so most message-related
updates are O(1) in nature.
Also, by waiting to compute the counts until the end, it's a
bit less messy to try to keep track of increments/decrements.
Instead, we just update the dictionaries and sets during the
event-apply phase.
This change also fixes some corner cases:
* We now respect mutes when updating counts.
* For message updates, instead of bluntly updating
the whole topic bucket, we update individual
message ids.
Unfortunately, this change doesn't seem to address the pesky
test that fails sporadically on Travis, related to mention
updates. It will change the symptom, slightly, though.
2017-10-05 00:34:19 +02:00
|
|
|
new_row = dict(
|
2017-05-23 03:02:01 +02:00
|
|
|
stream_id=stream_id,
|
|
|
|
topic=topic,
|
2017-10-05 18:35:34 +02:00
|
|
|
sender_id=sender_id,
|
2017-05-23 03:02:01 +02:00
|
|
|
)
|
Simplify how we apply events for unread messages.
The logic to apply events to page_params['unread_msgs'] was
complicated due to the aggregated data structures that we pass
down to the client.
Now we defer the aggregation logic until after we apply the
events. This leads to some simplifications in that codepath,
as well as some performance enhancements.
The intermediate data structure has sets and dictionaries that
generally are keyed by message_id, so most message-related
updates are O(1) in nature.
Also, by waiting to compute the counts until the end, it's a
bit less messy to try to keep track of increments/decrements.
Instead, we just update the dictionaries and sets during the
event-apply phase.
This change also fixes some corner cases:
* We now respect mutes when updating counts.
* For message updates, instead of bluntly updating
the whole topic bucket, we update individual
message ids.
Unfortunately, this change doesn't seem to address the pesky
test that fails sporadically on Travis, related to mention
updates. It will change the symptom, slightly, though.
2017-10-05 00:34:19 +02:00
|
|
|
state['stream_dict'][message_id] = new_row
|
|
|
|
|
|
|
|
if stream_id not in state['muted_stream_ids']:
|
|
|
|
# This next check hits the database.
|
|
|
|
if not topic_is_muted(user_profile, stream_id, topic):
|
|
|
|
state['unmuted_stream_msgs'].add(message_id)
|
|
|
|
|
2017-05-23 03:02:01 +02:00
|
|
|
elif message_type == 'private':
|
|
|
|
sender_id = message['sender_id']
|
Simplify how we apply events for unread messages.
The logic to apply events to page_params['unread_msgs'] was
complicated due to the aggregated data structures that we pass
down to the client.
Now we defer the aggregation logic until after we apply the
events. This leads to some simplifications in that codepath,
as well as some performance enhancements.
The intermediate data structure has sets and dictionaries that
generally are keyed by message_id, so most message-related
updates are O(1) in nature.
Also, by waiting to compute the counts until the end, it's a
bit less messy to try to keep track of increments/decrements.
Instead, we just update the dictionaries and sets during the
event-apply phase.
This change also fixes some corner cases:
* We now respect mutes when updating counts.
* For message updates, instead of bluntly updating
the whole topic bucket, we update individual
message ids.
Unfortunately, this change doesn't seem to address the pesky
test that fails sporadically on Travis, related to mention
updates. It will change the symptom, slightly, though.
2017-10-05 00:34:19 +02:00
|
|
|
new_row = dict(
|
2017-05-23 03:02:01 +02:00
|
|
|
sender_id=sender_id,
|
|
|
|
)
|
Simplify how we apply events for unread messages.
The logic to apply events to page_params['unread_msgs'] was
complicated due to the aggregated data structures that we pass
down to the client.
Now we defer the aggregation logic until after we apply the
events. This leads to some simplifications in that codepath,
as well as some performance enhancements.
The intermediate data structure has sets and dictionaries that
generally are keyed by message_id, so most message-related
updates are O(1) in nature.
Also, by waiting to compute the counts until the end, it's a
bit less messy to try to keep track of increments/decrements.
Instead, we just update the dictionaries and sets during the
event-apply phase.
This change also fixes some corner cases:
* We now respect mutes when updating counts.
* For message updates, instead of bluntly updating
the whole topic bucket, we update individual
message ids.
Unfortunately, this change doesn't seem to address the pesky
test that fails sporadically on Travis, related to mention
updates. It will change the symptom, slightly, though.
2017-10-05 00:34:19 +02:00
|
|
|
state['pm_dict'][message_id] = new_row
|
|
|
|
|
2017-05-23 03:02:01 +02:00
|
|
|
else:
|
|
|
|
display_recipient = message['display_recipient']
|
|
|
|
user_ids = [obj['id'] for obj in display_recipient]
|
|
|
|
user_ids = sorted(user_ids)
|
Simplify how we apply events for unread messages.
The logic to apply events to page_params['unread_msgs'] was
complicated due to the aggregated data structures that we pass
down to the client.
Now we defer the aggregation logic until after we apply the
events. This leads to some simplifications in that codepath,
as well as some performance enhancements.
The intermediate data structure has sets and dictionaries that
generally are keyed by message_id, so most message-related
updates are O(1) in nature.
Also, by waiting to compute the counts until the end, it's a
bit less messy to try to keep track of increments/decrements.
Instead, we just update the dictionaries and sets during the
event-apply phase.
This change also fixes some corner cases:
* We now respect mutes when updating counts.
* For message updates, instead of bluntly updating
the whole topic bucket, we update individual
message ids.
Unfortunately, this change doesn't seem to address the pesky
test that fails sporadically on Travis, related to mention
updates. It will change the symptom, slightly, though.
2017-10-05 00:34:19 +02:00
|
|
|
user_ids_string = ','.join(str(uid) for uid in user_ids)
|
|
|
|
new_row = dict(
|
|
|
|
user_ids_string=user_ids_string,
|
2017-05-23 03:02:01 +02:00
|
|
|
)
|
Simplify how we apply events for unread messages.
The logic to apply events to page_params['unread_msgs'] was
complicated due to the aggregated data structures that we pass
down to the client.
Now we defer the aggregation logic until after we apply the
events. This leads to some simplifications in that codepath,
as well as some performance enhancements.
The intermediate data structure has sets and dictionaries that
generally are keyed by message_id, so most message-related
updates are O(1) in nature.
Also, by waiting to compute the counts until the end, it's a
bit less messy to try to keep track of increments/decrements.
Instead, we just update the dictionaries and sets during the
event-apply phase.
This change also fixes some corner cases:
* We now respect mutes when updating counts.
* For message updates, instead of bluntly updating
the whole topic bucket, we update individual
message ids.
Unfortunately, this change doesn't seem to address the pesky
test that fails sporadically on Travis, related to mention
updates. It will change the symptom, slightly, though.
2017-10-05 00:34:19 +02:00
|
|
|
state['huddle_dict'][message_id] = new_row
|
2017-05-23 03:02:01 +02:00
|
|
|
|
2017-10-12 01:21:34 +02:00
|
|
|
if 'mentioned' in flags:
|
Simplify how we apply events for unread messages.
The logic to apply events to page_params['unread_msgs'] was
complicated due to the aggregated data structures that we pass
down to the client.
Now we defer the aggregation logic until after we apply the
events. This leads to some simplifications in that codepath,
as well as some performance enhancements.
The intermediate data structure has sets and dictionaries that
generally are keyed by message_id, so most message-related
updates are O(1) in nature.
Also, by waiting to compute the counts until the end, it's a
bit less messy to try to keep track of increments/decrements.
Instead, we just update the dictionaries and sets during the
event-apply phase.
This change also fixes some corner cases:
* We now respect mutes when updating counts.
* For message updates, instead of bluntly updating
the whole topic bucket, we update individual
message ids.
Unfortunately, this change doesn't seem to address the pesky
test that fails sporadically on Travis, related to mention
updates. It will change the symptom, slightly, though.
2017-10-05 00:34:19 +02:00
|
|
|
state['mentions'].add(message_id)
|
2019-08-26 05:11:18 +02:00
|
|
|
if 'wildcard_mentioned' in flags:
|
|
|
|
if message_id in state['unmuted_stream_msgs']:
|
|
|
|
state['mentions'].add(message_id)
|
2018-01-02 18:33:28 +01:00
|
|
|
|
2019-08-03 02:33:42 +02:00
|
|
|
def remove_message_id_from_unread_mgs(state: RawUnreadMessagesResult,
|
2019-08-03 02:24:00 +02:00
|
|
|
message_id: int) -> None:
|
|
|
|
# The opposite of apply_unread_message_event; removes a read or
|
|
|
|
# deleted message from a raw_unread_msgs data structure.
|
2019-08-03 02:33:42 +02:00
|
|
|
state['pm_dict'].pop(message_id, None)
|
|
|
|
state['stream_dict'].pop(message_id, None)
|
|
|
|
state['huddle_dict'].pop(message_id, None)
|
2019-08-03 02:24:00 +02:00
|
|
|
state['unmuted_stream_msgs'].discard(message_id)
|
|
|
|
state['mentions'].discard(message_id)
|
|
|
|
|
2018-01-22 21:50:22 +01:00
|
|
|
def estimate_recent_messages(realm: Realm, hours: int) -> int:
|
|
|
|
stat = COUNT_STATS['messages_sent:is_bot:hour']
|
|
|
|
d = timezone_now() - datetime.timedelta(hours=hours)
|
|
|
|
return RealmCount.objects.filter(property=stat.property, end_time__gt=d,
|
|
|
|
realm=realm).aggregate(Sum('value'))['value__sum'] or 0
|
2018-01-04 13:49:39 +01:00
|
|
|
|
2018-01-22 21:50:22 +01:00
|
|
|
def get_first_visible_message_id(realm: Realm) -> int:
|
2018-10-25 07:54:37 +02:00
|
|
|
return realm.first_visible_message_id
|
2018-01-22 21:50:22 +01:00
|
|
|
|
|
|
|
def maybe_update_first_visible_message_id(realm: Realm, lookback_hours: int) -> None:
|
|
|
|
recent_messages_count = estimate_recent_messages(realm, lookback_hours)
|
2018-10-25 07:54:37 +02:00
|
|
|
if realm.message_visibility_limit is not None and recent_messages_count > 0:
|
2018-01-22 21:50:22 +01:00
|
|
|
update_first_visible_message_id(realm)
|
|
|
|
|
|
|
|
def update_first_visible_message_id(realm: Realm) -> None:
|
2018-10-25 07:54:37 +02:00
|
|
|
if realm.message_visibility_limit is None:
|
|
|
|
realm.first_visible_message_id = 0
|
|
|
|
else:
|
|
|
|
try:
|
|
|
|
first_visible_message_id = Message.objects.filter(sender__realm=realm).values('id').\
|
|
|
|
order_by('-id')[realm.message_visibility_limit - 1]["id"]
|
|
|
|
except IndexError:
|
|
|
|
first_visible_message_id = 0
|
|
|
|
realm.first_visible_message_id = first_visible_message_id
|
|
|
|
realm.save(update_fields=["first_visible_message_id"])
|
2019-03-20 04:15:58 +01:00
|
|
|
|
|
|
|
|
|
|
|
def get_recent_conversations_recipient_id(user_profile: UserProfile,
|
|
|
|
recipient_id: int,
|
|
|
|
sender_id: int) -> int:
|
|
|
|
"""Helper for doing lookups of the recipient_id that
|
|
|
|
get_recent_private_conversations would have used to record that
|
|
|
|
message in its data structure.
|
|
|
|
"""
|
|
|
|
my_recipient_id = Recipient.objects.get(type=Recipient.PERSONAL,
|
|
|
|
type_id=user_profile.id).id
|
|
|
|
if recipient_id == my_recipient_id:
|
|
|
|
return Recipient.objects.get(type=Recipient.PERSONAL,
|
|
|
|
type_id=sender_id).id
|
|
|
|
return recipient_id
|
|
|
|
|
|
|
|
def get_recent_private_conversations(user_profile: UserProfile) -> Dict[int, Dict[str, Any]]:
|
|
|
|
"""This function uses some carefully optimized SQL queries, designed
|
|
|
|
to use the UserMessage index on private_messages. It is
|
|
|
|
significantly complicated by the fact that for 1:1 private
|
|
|
|
messages, we store the message against a recipient_id of whichever
|
|
|
|
user was the recipient, and thus for 1:1 private messages sent
|
|
|
|
directly to us, we need to look up the other user from the
|
|
|
|
sender_id on those messages. You'll see that pattern repeated
|
|
|
|
both here and also in zerver/lib/events.py.
|
|
|
|
|
|
|
|
Ideally, we would write these queries using Django, but even
|
|
|
|
without the UNION ALL, that seems to not be possible, because the
|
|
|
|
equivalent Django syntax (for the first part of this query):
|
|
|
|
|
|
|
|
message_data = UserMessage.objects.select_related("message__recipient_id").filter(
|
|
|
|
user_profile=user_profile,
|
|
|
|
).extra(
|
|
|
|
where=[UserMessage.where_private()]
|
|
|
|
).order_by("-message_id")[:1000].values(
|
|
|
|
"message__recipient_id").annotate(last_message_id=Max("message_id"))
|
|
|
|
|
|
|
|
does not properly nest the GROUP BY (from .annotate) with the slicing.
|
|
|
|
|
|
|
|
We return a dictionary structure for convenient modification
|
|
|
|
below; this structure is converted into its final form by
|
|
|
|
post_process.
|
|
|
|
|
|
|
|
"""
|
|
|
|
RECENT_CONVERSATIONS_LIMIT = 1000
|
|
|
|
|
|
|
|
recipient_map = {}
|
|
|
|
my_recipient_id = Recipient.objects.get(type=Recipient.PERSONAL,
|
|
|
|
type_id=user_profile.id).id
|
|
|
|
|
|
|
|
query = '''
|
|
|
|
SELECT
|
|
|
|
subquery.recipient_id, MAX(subquery.message_id)
|
|
|
|
FROM (
|
|
|
|
(SELECT
|
|
|
|
um.message_id AS message_id,
|
|
|
|
m.recipient_id AS recipient_id
|
|
|
|
FROM
|
|
|
|
zerver_usermessage um
|
|
|
|
JOIN
|
|
|
|
zerver_message m
|
|
|
|
ON
|
|
|
|
um.message_id = m.id
|
|
|
|
WHERE
|
|
|
|
um.user_profile_id=%(user_profile_id)d AND
|
|
|
|
um.flags & 2048 <> 0 AND
|
|
|
|
m.recipient_id <> %(my_recipient_id)d
|
|
|
|
ORDER BY message_id DESC
|
|
|
|
LIMIT %(conversation_limit)d)
|
|
|
|
UNION ALL
|
|
|
|
(SELECT
|
|
|
|
um.message_id AS message_id,
|
|
|
|
r.id AS recipient_id
|
|
|
|
FROM
|
|
|
|
zerver_usermessage um
|
|
|
|
JOIN
|
|
|
|
zerver_message m
|
|
|
|
ON
|
|
|
|
um.message_id = m.id
|
|
|
|
JOIN
|
|
|
|
zerver_recipient r
|
|
|
|
ON
|
|
|
|
r.type = 1 AND
|
|
|
|
r.type_id = m.sender_id
|
|
|
|
WHERE
|
|
|
|
um.user_profile_id=%(user_profile_id)d AND
|
|
|
|
um.flags & 2048 <> 0 AND
|
|
|
|
m.recipient_id=%(my_recipient_id)d
|
|
|
|
ORDER BY message_id DESC
|
|
|
|
LIMIT %(conversation_limit)d)
|
|
|
|
) AS subquery
|
|
|
|
GROUP BY subquery.recipient_id
|
|
|
|
''' % dict(
|
|
|
|
user_profile_id=user_profile.id,
|
|
|
|
conversation_limit=RECENT_CONVERSATIONS_LIMIT,
|
|
|
|
my_recipient_id=my_recipient_id,
|
|
|
|
)
|
|
|
|
|
|
|
|
cursor = connection.cursor()
|
|
|
|
cursor.execute(query)
|
|
|
|
rows = cursor.fetchall()
|
|
|
|
cursor.close()
|
|
|
|
|
|
|
|
# The resulting rows will be (recipient_id, max_message_id)
|
|
|
|
# objects for all parties we've had recent (group?) private
|
|
|
|
# message conversations with, including PMs with yourself (those
|
|
|
|
# will generate an empty list of user_ids).
|
|
|
|
for recipient_id, max_message_id in rows:
|
|
|
|
recipient_map[recipient_id] = dict(
|
|
|
|
max_message_id=max_message_id,
|
|
|
|
user_ids=list(),
|
|
|
|
)
|
|
|
|
|
|
|
|
# Now we need to map all the recipient_id objects to lists of user IDs
|
|
|
|
for (recipient_id, user_profile_id) in Subscription.objects.filter(
|
|
|
|
recipient_id__in=recipient_map.keys()).exclude(
|
|
|
|
user_profile_id=user_profile.id).values_list(
|
|
|
|
"recipient_id", "user_profile_id"):
|
|
|
|
recipient_map[recipient_id]['user_ids'].append(user_profile_id)
|
|
|
|
return recipient_map
|