2021-06-13 00:51:30 +02:00
|
|
|
import functools
|
2017-09-14 19:47:22 +02:00
|
|
|
import re
|
2021-12-28 10:02:27 +01:00
|
|
|
from dataclasses import dataclass
|
2021-06-13 00:51:30 +02:00
|
|
|
from typing import Dict, List, Match, Optional, Set, Tuple
|
|
|
|
|
2023-11-03 07:10:19 +01:00
|
|
|
from django.conf import settings
|
2021-06-13 00:51:30 +02:00
|
|
|
from django.db.models import Q
|
|
|
|
|
2023-12-02 08:54:36 +01:00
|
|
|
from zerver.lib.users import get_inaccessible_user_ids
|
2023-12-15 03:57:04 +01:00
|
|
|
from zerver.models import UserGroup, UserProfile
|
|
|
|
from zerver.models.streams import get_linkable_streams
|
2017-09-14 19:47:22 +02:00
|
|
|
|
2022-06-13 06:02:57 +02:00
|
|
|
BEFORE_MENTION_ALLOWED_REGEX = r"(?<![^\s\'\"\(\{\[\/<])"
|
|
|
|
|
2013-06-28 16:02:58 +02:00
|
|
|
# Match multi-word string between @** ** or match any one-word
|
|
|
|
# sequences after @
|
2022-06-13 06:02:57 +02:00
|
|
|
MENTIONS_RE = re.compile(
|
|
|
|
rf"{BEFORE_MENTION_ALLOWED_REGEX}@(?P<silent>_?)(\*\*(?P<match>[^\*]+)\*\*)"
|
|
|
|
)
|
|
|
|
USER_GROUP_MENTIONS_RE = re.compile(
|
|
|
|
rf"{BEFORE_MENTION_ALLOWED_REGEX}@(?P<silent>_?)(\*(?P<match>[^\*]+)\*)"
|
|
|
|
)
|
2013-06-28 16:02:58 +02:00
|
|
|
|
2023-05-30 08:42:09 +02:00
|
|
|
topic_wildcards = frozenset(["topic"])
|
2023-06-03 16:51:38 +02:00
|
|
|
stream_wildcards = frozenset(["all", "everyone", "stream"])
|
2013-06-28 16:02:58 +02:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2021-12-28 10:02:27 +01:00
|
|
|
@dataclass
|
|
|
|
class FullNameInfo:
|
2021-12-27 19:17:49 +01:00
|
|
|
id: int
|
|
|
|
full_name: str
|
2023-11-04 11:34:15 +01:00
|
|
|
is_active: bool
|
2021-12-27 19:17:49 +01:00
|
|
|
|
|
|
|
|
2021-12-28 14:06:14 +01:00
|
|
|
@dataclass
|
|
|
|
class UserFilter:
|
|
|
|
id: Optional[int]
|
|
|
|
full_name: Optional[str]
|
|
|
|
|
|
|
|
def Q(self) -> Q:
|
|
|
|
if self.full_name is not None and self.id is not None:
|
|
|
|
return Q(full_name__iexact=self.full_name, id=self.id)
|
|
|
|
elif self.id is not None:
|
|
|
|
return Q(id=self.id)
|
|
|
|
elif self.full_name is not None:
|
|
|
|
return Q(full_name__iexact=self.full_name)
|
|
|
|
else:
|
|
|
|
raise AssertionError("totally empty filter makes no sense")
|
|
|
|
|
|
|
|
|
2023-05-30 07:03:12 +02:00
|
|
|
@dataclass
|
|
|
|
class MentionText:
|
|
|
|
text: Optional[str]
|
2023-05-30 08:42:09 +02:00
|
|
|
is_topic_wildcard: bool
|
2023-06-03 16:51:38 +02:00
|
|
|
is_stream_wildcard: bool
|
2023-05-30 07:03:12 +02:00
|
|
|
|
|
|
|
|
2023-05-30 07:23:36 +02:00
|
|
|
@dataclass
|
|
|
|
class PossibleMentions:
|
|
|
|
mention_texts: Set[str]
|
2023-05-30 08:42:09 +02:00
|
|
|
message_has_topic_wildcards: bool
|
2023-06-03 16:51:38 +02:00
|
|
|
message_has_stream_wildcards: bool
|
2023-05-30 07:23:36 +02:00
|
|
|
|
|
|
|
|
2021-12-29 13:52:27 +01:00
|
|
|
class MentionBackend:
|
2023-12-02 08:54:36 +01:00
|
|
|
# Be careful about reuse: MentionBackend contains caches which are
|
|
|
|
# designed to only have the lifespan of a sender user (typically a
|
|
|
|
# single request).
|
|
|
|
#
|
|
|
|
# In particular, user_cache is not robust to message_sender
|
|
|
|
# within the lifetime of a single MentionBackend lifetime.
|
|
|
|
|
2021-12-29 17:54:08 +01:00
|
|
|
def __init__(self, realm_id: int) -> None:
|
|
|
|
self.realm_id = realm_id
|
|
|
|
self.user_cache: Dict[Tuple[int, str], FullNameInfo] = {}
|
2021-12-30 15:02:07 +01:00
|
|
|
self.stream_cache: Dict[str, int] = {}
|
2021-12-29 13:52:27 +01:00
|
|
|
|
2023-12-02 08:54:36 +01:00
|
|
|
def get_full_name_info_list(
|
|
|
|
self, user_filters: List[UserFilter], message_sender: Optional[UserProfile]
|
|
|
|
) -> List[FullNameInfo]:
|
2021-12-29 17:54:08 +01:00
|
|
|
result: List[FullNameInfo] = []
|
|
|
|
unseen_user_filters: List[UserFilter] = []
|
|
|
|
|
|
|
|
# Try to get messages from the user_cache first.
|
|
|
|
# This loop populates two lists:
|
|
|
|
# - results are the objects we pull from cache
|
|
|
|
# - unseen_user_filters are filters where need to hit the DB
|
|
|
|
for user_filter in user_filters:
|
|
|
|
# We expect callers who take advantage of our user_cache to supply both
|
|
|
|
# id and full_name in the user mentions in their messages.
|
|
|
|
if user_filter.id is not None and user_filter.full_name is not None:
|
|
|
|
user = self.user_cache.get((user_filter.id, user_filter.full_name), None)
|
|
|
|
if user is not None:
|
|
|
|
result.append(user)
|
|
|
|
continue
|
|
|
|
|
|
|
|
# BOO! We have to go the database.
|
|
|
|
unseen_user_filters.append(user_filter)
|
|
|
|
|
|
|
|
# Most of the time, we have to go to the database to get user info,
|
|
|
|
# unless our last loop found everything in the cache.
|
|
|
|
if unseen_user_filters:
|
|
|
|
q_list = [user_filter.Q() for user_filter in unseen_user_filters]
|
|
|
|
|
|
|
|
rows = (
|
|
|
|
UserProfile.objects.filter(
|
2023-11-03 07:10:19 +01:00
|
|
|
Q(realm_id=self.realm_id) | Q(email__in=settings.CROSS_REALM_BOT_EMAILS),
|
2021-12-29 17:54:08 +01:00
|
|
|
)
|
|
|
|
.filter(
|
|
|
|
functools.reduce(lambda a, b: a | b, q_list),
|
|
|
|
)
|
|
|
|
.only(
|
|
|
|
"id",
|
|
|
|
"full_name",
|
2023-11-04 11:34:15 +01:00
|
|
|
"is_active",
|
2021-12-29 17:54:08 +01:00
|
|
|
)
|
2021-12-29 13:52:27 +01:00
|
|
|
)
|
2021-12-29 17:54:08 +01:00
|
|
|
|
2023-12-02 08:54:36 +01:00
|
|
|
possible_mention_user_ids = [row.id for row in rows]
|
|
|
|
inaccessible_user_ids = get_inaccessible_user_ids(
|
|
|
|
possible_mention_user_ids, message_sender
|
|
|
|
)
|
|
|
|
|
2023-11-04 11:34:15 +01:00
|
|
|
user_list = [
|
|
|
|
FullNameInfo(id=row.id, full_name=row.full_name, is_active=row.is_active)
|
|
|
|
for row in rows
|
2023-12-02 08:54:36 +01:00
|
|
|
if row.id not in inaccessible_user_ids
|
2023-11-04 11:34:15 +01:00
|
|
|
]
|
2021-12-29 17:54:08 +01:00
|
|
|
|
|
|
|
# We expect callers who take advantage of our cache to supply both
|
|
|
|
# id and full_name in the user mentions in their messages.
|
|
|
|
for user in user_list:
|
2022-06-01 00:09:19 +02:00
|
|
|
self.user_cache[(user.id, user.full_name)] = user
|
2021-12-29 17:54:08 +01:00
|
|
|
|
|
|
|
result += user_list
|
|
|
|
|
|
|
|
return result
|
2021-12-29 13:52:27 +01:00
|
|
|
|
2021-12-30 14:25:46 +01:00
|
|
|
def get_stream_name_map(self, stream_names: Set[str]) -> Dict[str, int]:
|
|
|
|
if not stream_names:
|
|
|
|
return {}
|
|
|
|
|
2021-12-30 15:02:07 +01:00
|
|
|
result: Dict[str, int] = {}
|
|
|
|
unseen_stream_names: List[str] = []
|
2021-12-30 14:25:46 +01:00
|
|
|
|
2021-12-30 15:02:07 +01:00
|
|
|
for stream_name in stream_names:
|
|
|
|
if stream_name in self.stream_cache:
|
|
|
|
result[stream_name] = self.stream_cache[stream_name]
|
|
|
|
else:
|
|
|
|
unseen_stream_names.append(stream_name)
|
|
|
|
|
|
|
|
if unseen_stream_names:
|
|
|
|
q_list = {Q(name=name) for name in unseen_stream_names}
|
|
|
|
|
|
|
|
rows = (
|
|
|
|
get_linkable_streams(
|
|
|
|
realm_id=self.realm_id,
|
|
|
|
)
|
|
|
|
.filter(
|
|
|
|
functools.reduce(lambda a, b: a | b, q_list),
|
|
|
|
)
|
|
|
|
.values(
|
|
|
|
"id",
|
|
|
|
"name",
|
|
|
|
)
|
2021-12-30 14:25:46 +01:00
|
|
|
)
|
|
|
|
|
2021-12-30 15:02:07 +01:00
|
|
|
for row in rows:
|
|
|
|
self.stream_cache[row["name"]] = row["id"]
|
|
|
|
result[row["name"]] = row["id"]
|
|
|
|
|
|
|
|
return result
|
2021-12-30 14:25:46 +01:00
|
|
|
|
2021-12-29 13:52:27 +01:00
|
|
|
|
2023-06-06 09:23:01 +02:00
|
|
|
def user_mention_matches_topic_wildcard(mention: str) -> bool:
|
|
|
|
return mention in topic_wildcards
|
|
|
|
|
|
|
|
|
2023-06-03 16:51:38 +02:00
|
|
|
def user_mention_matches_stream_wildcard(mention: str) -> bool:
|
|
|
|
return mention in stream_wildcards
|
2017-09-14 19:47:22 +02:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2023-05-30 07:03:12 +02:00
|
|
|
def extract_mention_text(m: Match[str]) -> MentionText:
|
2021-05-15 18:55:34 +02:00
|
|
|
text = m.group("match")
|
2023-05-30 08:42:09 +02:00
|
|
|
if text in topic_wildcards:
|
|
|
|
return MentionText(text=None, is_topic_wildcard=True, is_stream_wildcard=False)
|
2023-06-03 16:51:38 +02:00
|
|
|
if text in stream_wildcards:
|
2023-05-30 08:42:09 +02:00
|
|
|
return MentionText(text=None, is_topic_wildcard=False, is_stream_wildcard=True)
|
|
|
|
return MentionText(text=text, is_topic_wildcard=False, is_stream_wildcard=False)
|
2017-09-14 19:47:22 +02:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2023-05-30 07:23:36 +02:00
|
|
|
def possible_mentions(content: str) -> PossibleMentions:
|
2018-11-02 08:22:07 +01:00
|
|
|
# mention texts can either be names, or an extended name|id syntax.
|
2019-11-22 10:38:34 +01:00
|
|
|
texts = set()
|
2023-05-30 08:42:09 +02:00
|
|
|
message_has_topic_wildcards = False
|
2023-06-03 16:51:38 +02:00
|
|
|
message_has_stream_wildcards = False
|
2021-05-15 18:55:34 +02:00
|
|
|
for m in MENTIONS_RE.finditer(content):
|
2023-05-30 07:03:12 +02:00
|
|
|
mention_text = extract_mention_text(m)
|
|
|
|
text = mention_text.text
|
2019-11-22 10:38:34 +01:00
|
|
|
if text:
|
|
|
|
texts.add(text)
|
2023-05-30 08:42:09 +02:00
|
|
|
if mention_text.is_topic_wildcard:
|
|
|
|
message_has_topic_wildcards = True
|
2023-06-03 16:51:38 +02:00
|
|
|
if mention_text.is_stream_wildcard:
|
|
|
|
message_has_stream_wildcards = True
|
|
|
|
return PossibleMentions(
|
2023-05-30 08:42:09 +02:00
|
|
|
mention_texts=texts,
|
|
|
|
message_has_topic_wildcards=message_has_topic_wildcards,
|
|
|
|
message_has_stream_wildcards=message_has_stream_wildcards,
|
2023-06-03 16:51:38 +02:00
|
|
|
)
|
2017-09-25 09:47:15 +02:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2018-05-11 01:40:23 +02:00
|
|
|
def possible_user_group_mentions(content: str) -> Set[str]:
|
2021-05-15 19:44:06 +02:00
|
|
|
return {m.group("match") for m in USER_GROUP_MENTIONS_RE.finditer(content)}
|
2021-06-13 00:51:30 +02:00
|
|
|
|
|
|
|
|
2021-12-29 13:52:27 +01:00
|
|
|
def get_possible_mentions_info(
|
2023-12-02 08:54:36 +01:00
|
|
|
mention_backend: MentionBackend, mention_texts: Set[str], message_sender: Optional[UserProfile]
|
2021-12-29 13:52:27 +01:00
|
|
|
) -> List[FullNameInfo]:
|
2021-06-13 00:51:30 +02:00
|
|
|
if not mention_texts:
|
|
|
|
return []
|
|
|
|
|
2021-12-28 14:06:14 +01:00
|
|
|
user_filters = list()
|
2021-06-13 00:51:30 +02:00
|
|
|
|
|
|
|
name_re = r"(?P<full_name>.+)?\|(?P<mention_id>\d+)$"
|
|
|
|
for mention_text in mention_texts:
|
|
|
|
name_syntax_match = re.match(name_re, mention_text)
|
|
|
|
if name_syntax_match:
|
|
|
|
full_name = name_syntax_match.group("full_name")
|
|
|
|
mention_id = name_syntax_match.group("mention_id")
|
|
|
|
if full_name:
|
|
|
|
# For **name|id** mentions as mention_id
|
|
|
|
# cannot be null inside this block.
|
2021-12-28 14:06:14 +01:00
|
|
|
user_filters.append(UserFilter(full_name=full_name, id=int(mention_id)))
|
2021-06-13 00:51:30 +02:00
|
|
|
else:
|
|
|
|
# For **|id** syntax.
|
2021-12-28 14:06:14 +01:00
|
|
|
user_filters.append(UserFilter(full_name=None, id=int(mention_id)))
|
2021-06-13 00:51:30 +02:00
|
|
|
else:
|
|
|
|
# For **name** syntax.
|
2021-12-28 14:06:14 +01:00
|
|
|
user_filters.append(UserFilter(full_name=mention_text, id=None))
|
|
|
|
|
2023-12-02 08:54:36 +01:00
|
|
|
return mention_backend.get_full_name_info_list(user_filters, message_sender)
|
2021-06-13 00:51:30 +02:00
|
|
|
|
|
|
|
|
|
|
|
class MentionData:
|
2023-12-02 08:54:36 +01:00
|
|
|
def __init__(
|
|
|
|
self, mention_backend: MentionBackend, content: str, message_sender: Optional[UserProfile]
|
|
|
|
) -> None:
|
2021-12-30 14:25:46 +01:00
|
|
|
self.mention_backend = mention_backend
|
2021-12-29 13:52:27 +01:00
|
|
|
realm_id = mention_backend.realm_id
|
2023-05-30 07:23:36 +02:00
|
|
|
mentions = possible_mentions(content)
|
2023-12-02 08:54:36 +01:00
|
|
|
possible_mentions_info = get_possible_mentions_info(
|
|
|
|
mention_backend, mentions.mention_texts, message_sender
|
|
|
|
)
|
2021-12-28 10:02:27 +01:00
|
|
|
self.full_name_info = {row.full_name.lower(): row for row in possible_mentions_info}
|
|
|
|
self.user_id_info = {row.id: row for row in possible_mentions_info}
|
2021-06-13 00:51:30 +02:00
|
|
|
self.init_user_group_data(realm_id=realm_id, content=content)
|
2023-06-03 16:51:38 +02:00
|
|
|
self.has_stream_wildcards = mentions.message_has_stream_wildcards
|
2023-05-30 08:42:09 +02:00
|
|
|
self.has_topic_wildcards = mentions.message_has_topic_wildcards
|
2021-06-13 00:51:30 +02:00
|
|
|
|
2023-06-03 16:51:38 +02:00
|
|
|
def message_has_stream_wildcards(self) -> bool:
|
|
|
|
return self.has_stream_wildcards
|
2021-06-13 00:51:30 +02:00
|
|
|
|
2023-05-30 08:42:09 +02:00
|
|
|
def message_has_topic_wildcards(self) -> bool:
|
|
|
|
return self.has_topic_wildcards
|
|
|
|
|
2021-06-13 00:51:30 +02:00
|
|
|
def init_user_group_data(self, realm_id: int, content: str) -> None:
|
2021-08-05 03:26:56 +02:00
|
|
|
self.user_group_name_info: Dict[str, UserGroup] = {}
|
|
|
|
self.user_group_members: Dict[int, List[int]] = {}
|
2021-06-13 00:51:30 +02:00
|
|
|
user_group_names = possible_user_group_mentions(content)
|
2021-08-05 03:26:56 +02:00
|
|
|
if user_group_names:
|
|
|
|
for group in UserGroup.objects.filter(
|
2021-08-21 16:25:05 +02:00
|
|
|
realm_id=realm_id, name__in=user_group_names, is_system_group=False
|
2021-10-11 08:37:15 +02:00
|
|
|
).prefetch_related("direct_members"):
|
2021-08-05 03:26:56 +02:00
|
|
|
self.user_group_name_info[group.name.lower()] = group
|
2021-10-11 08:37:15 +02:00
|
|
|
self.user_group_members[group.id] = [m.id for m in group.direct_members.all()]
|
2021-06-13 00:51:30 +02:00
|
|
|
|
|
|
|
def get_user_by_name(self, name: str) -> Optional[FullNameInfo]:
|
|
|
|
# warning: get_user_by_name is not dependable if two
|
|
|
|
# users of the same full name are mentioned. Use
|
|
|
|
# get_user_by_id where possible.
|
|
|
|
return self.full_name_info.get(name.lower(), None)
|
|
|
|
|
|
|
|
def get_user_by_id(self, id: int) -> Optional[FullNameInfo]:
|
|
|
|
return self.user_id_info.get(id, None)
|
|
|
|
|
|
|
|
def get_user_ids(self) -> Set[int]:
|
|
|
|
"""
|
|
|
|
Returns the user IDs that might have been mentioned by this
|
|
|
|
content. Note that because this data structure has not parsed
|
|
|
|
the message and does not know about escaping/code blocks, this
|
|
|
|
will overestimate the list of user ids.
|
|
|
|
"""
|
|
|
|
return set(self.user_id_info.keys())
|
|
|
|
|
|
|
|
def get_user_group(self, name: str) -> Optional[UserGroup]:
|
|
|
|
return self.user_group_name_info.get(name.lower(), None)
|
|
|
|
|
|
|
|
def get_group_members(self, user_group_id: int) -> List[int]:
|
|
|
|
return self.user_group_members.get(user_group_id, [])
|
|
|
|
|
2021-12-30 14:25:46 +01:00
|
|
|
def get_stream_name_map(self, stream_names: Set[str]) -> Dict[str, int]:
|
|
|
|
return self.mention_backend.get_stream_name_map(stream_names)
|
2021-12-07 21:41:45 +01:00
|
|
|
|
|
|
|
|
|
|
|
def silent_mention_syntax_for_user(user_profile: UserProfile) -> str:
|
|
|
|
return f"@_**{user_profile.full_name}|{user_profile.id}**"
|