message: Do not include details of inaccessible users in message data.

This commit adds code to not include original details of senders like
name, email and avatar url in the message objects sent through events
and in the response of endpoint used to fetch messages.

This is the last major commit for the project to add support for
limiting guest access to an entire organization.

Fixes #10970.
This commit is contained in:
Sahil Batra 2023-11-08 09:23:05 +05:30 committed by Tim Abbott
parent 22d59f1132
commit 198568522a
10 changed files with 188 additions and 12 deletions

View File

@ -72,7 +72,7 @@ from zerver.lib.stream_subscription import (
num_subscribers_for_stream_id,
)
from zerver.lib.stream_topic import StreamTopicTarget
from zerver.lib.streams import access_stream_for_send_message, ensure_stream
from zerver.lib.streams import access_stream_for_send_message, ensure_stream, subscribed_to_stream
from zerver.lib.string_validation import check_stream_name
from zerver.lib.timestamp import timestamp_to_datetime
from zerver.lib.topic import participants_for_topic
@ -83,7 +83,9 @@ from zerver.lib.users import (
check_user_can_access_all_users,
get_accessible_user_ids,
get_subscribers_of_target_user_subscriptions,
get_user_ids_who_can_access_user,
get_users_involved_in_dms_with_target_users,
user_access_restricted_in_realm,
)
from zerver.lib.validator import check_widget_content
from zerver.lib.widget import do_widget_post_save_actions
@ -1104,6 +1106,7 @@ def do_send_messages(
muted_sender_user_ids=list(send_request.muted_sender_user_ids),
all_bot_user_ids=list(send_request.all_bot_user_ids),
disable_external_notifications=send_request.disable_external_notifications,
realm_host=send_request.realm.host,
)
if send_request.message.is_stream_message():
@ -1123,6 +1126,29 @@ def do_send_messages(
if send_request.stream.first_message_id is None:
send_request.stream.first_message_id = send_request.message.id
send_request.stream.save(update_fields=["first_message_id"])
# Performance note: This check can theoretically do
# database queries in a loop if many messages are being
# sent via a single do_send_messages call.
#
# This is not a practical concern at present, because our
# only use case for bulk-sending messages via this API
# endpoint is for direct messages bulk-sent by system
# bots; and for system bots,
# "user_access_restricted_in_realm" will always return
# False without doing any database queries at all.
if user_access_restricted_in_realm(
send_request.message.sender
) and not subscribed_to_stream(send_request.message.sender, send_request.stream.id):
user_ids_who_can_access_sender = get_user_ids_who_can_access_user(
send_request.message.sender
)
user_ids_receiving_event = {user["id"] for user in users}
user_ids_without_access_to_sender = user_ids_receiving_event - set(
user_ids_who_can_access_sender
)
event["user_ids_without_access_to_sender"] = user_ids_without_access_to_sender
if send_request.local_id is not None:
event["local_id"] = send_request.local_id
if send_request.sender_queue_id is not None:

View File

@ -120,6 +120,7 @@ def get_usable_missed_message_address(address: str) -> MissedMessageEmailAddress
mm_address = MissedMessageEmailAddress.objects.select_related(
"user_profile",
"user_profile__realm",
"user_profile__realm__can_access_all_users_group",
"message",
"message__sender",
"message__recipient",

View File

@ -2,6 +2,7 @@ import copy
import zlib
from dataclasses import dataclass, field
from datetime import datetime, timedelta
from email.headerregistry import Address
from typing import (
Any,
Collection,
@ -28,7 +29,7 @@ from psycopg2.sql import SQL
from analytics.lib.counts import COUNT_STATS
from analytics.models import RealmCount
from zerver.lib.avatar import get_avatar_field
from zerver.lib.avatar import get_avatar_field, get_avatar_for_inaccessible_user
from zerver.lib.cache import (
cache_set_many,
cache_with_key,
@ -60,6 +61,7 @@ from zerver.lib.types import DisplayRecipientT, EditHistoryEvent, UserDisplayRec
from zerver.lib.url_preview.types import UrlEmbedData
from zerver.lib.user_groups import is_user_in_group
from zerver.lib.user_topics import build_get_topic_visibility_policy, get_topic_visibility_policy
from zerver.lib.users import get_inaccessible_user_ids
from zerver.models import (
MAX_TOPIC_NAME_LENGTH,
Message,
@ -74,6 +76,7 @@ from zerver.models import (
UserProfile,
UserTopic,
get_display_recipient_by_id,
get_fake_email_domain,
get_usermessage_by_message_id,
query_for_ids,
)
@ -245,6 +248,8 @@ def messages_for_ids(
apply_markdown: bool,
client_gravatar: bool,
allow_edit_history: bool,
user_profile: Optional[UserProfile],
realm: Realm,
) -> List[Dict[str, Any]]:
cache_transformer = MessageDict.build_dict_from_raw_db_row
id_fetcher = lambda row: row["id"]
@ -261,6 +266,9 @@ def messages_for_ids(
message_list: List[Dict[str, Any]] = []
sender_ids = [message_dicts[message_id]["sender_id"] for message_id in message_ids]
inaccessible_sender_ids = get_inaccessible_user_ids(sender_ids, user_profile)
for message_id in message_ids:
msg_dict = message_dicts[message_id]
flags = user_message_flags[message_id]
@ -278,9 +286,10 @@ def messages_for_ids(
# in realms with allow_edit_history disabled.
if "edit_history" in msg_dict and not allow_edit_history:
del msg_dict["edit_history"]
msg_dict["can_access_sender"] = msg_dict["sender_id"] not in inaccessible_sender_ids
message_list.append(msg_dict)
MessageDict.post_process_dicts(message_list, apply_markdown, client_gravatar)
MessageDict.post_process_dicts(message_list, apply_markdown, client_gravatar, realm)
return message_list
@ -389,7 +398,10 @@ class MessageDict:
@staticmethod
def post_process_dicts(
objs: List[Dict[str, Any]], apply_markdown: bool, client_gravatar: bool
objs: List[Dict[str, Any]],
apply_markdown: bool,
client_gravatar: bool,
realm: Realm,
) -> None:
"""
NOTE: This function mutates the objects in
@ -403,7 +415,15 @@ class MessageDict:
MessageDict.bulk_hydrate_recipient_info(objs)
for obj in objs:
MessageDict.finalize_payload(obj, apply_markdown, client_gravatar, skip_copy=True)
can_access_sender = obj.get("can_access_sender", True)
MessageDict.finalize_payload(
obj,
apply_markdown,
client_gravatar,
skip_copy=True,
can_access_sender=can_access_sender,
realm_host=realm.host,
)
@staticmethod
def finalize_payload(
@ -412,6 +432,8 @@ class MessageDict:
client_gravatar: bool,
keep_rendered_content: bool = False,
skip_copy: bool = False,
can_access_sender: bool = True,
realm_host: str = "",
) -> Dict[str, Any]:
"""
By default, we make a shallow copy of the incoming dict to avoid
@ -428,7 +450,20 @@ class MessageDict:
# here if the current user's role has access to the target user's email address.
client_gravatar = False
MessageDict.set_sender_avatar(obj, client_gravatar)
if not can_access_sender:
# Enforce inability to access details of inaccessible
# users. We should be able to remove the realm_host and
# can_access_user plumbing to this function if/when we
# shift the Zulip API to not send these denormalized
# fields about message senders favor of just sending the
# sender's user ID.
obj["sender_full_name"] = str(UserProfile.INACCESSIBLE_USER_NAME)
sender_id = obj["sender_id"]
obj["sender_email"] = Address(
username=f"user{sender_id}", domain=get_fake_email_domain(realm_host)
).addr_spec
MessageDict.set_sender_avatar(obj, client_gravatar, can_access_sender)
if apply_markdown:
obj["content_type"] = "text/html"
obj["content"] = obj["rendered_content"]
@ -446,6 +481,8 @@ class MessageDict:
del obj["recipient_type_id"]
del obj["sender_is_mirror_dummy"]
del obj["sender_email_address_visibility"]
if "can_access_sender" in obj:
del obj["can_access_sender"]
return obj
@staticmethod
@ -734,7 +771,13 @@ class MessageDict:
MessageDict.hydrate_recipient_info(obj, display_recipients[obj["recipient_id"]])
@staticmethod
def set_sender_avatar(obj: Dict[str, Any], client_gravatar: bool) -> None:
def set_sender_avatar(
obj: Dict[str, Any], client_gravatar: bool, can_access_sender: bool = True
) -> None:
if not can_access_sender:
obj["avatar_url"] = get_avatar_for_inaccessible_user()
return
sender_id = obj["sender_id"]
sender_realm_id = obj["sender_realm_id"]
sender_delivery_email = obj["sender_delivery_email"]

View File

@ -1059,6 +1059,35 @@ class NormalActionsTest(BaseAction):
state_change_expected=True,
)
def test_events_for_message_from_inaccessible_sender(self) -> None:
reset_email_visibility_to_everyone_in_zulip_realm()
self.set_up_db_for_testing_user_access()
othello = self.example_user("othello")
self.user_profile = self.example_user("polonius")
events = self.verify_action(
lambda: self.send_stream_message(
othello, "test_stream1", "hello 2", allow_unsubscribed_sender=True
),
)
check_message("events[0]", events[0])
message_obj = events[0]["message"]
self.assertEqual(message_obj["sender_full_name"], "Unknown user")
self.assertEqual(message_obj["sender_email"], f"user{othello.id}@zulip.testserver")
self.assertTrue(message_obj["avatar_url"].endswith("images/unknown-user-avatar.png"))
iago = self.example_user("iago")
events = self.verify_action(
lambda: self.send_stream_message(
iago, "test_stream1", "hello 2", allow_unsubscribed_sender=True
),
)
check_message("events[0]", events[0])
message_obj = events[0]["message"]
self.assertEqual(message_obj["sender_full_name"], iago.full_name)
self.assertEqual(message_obj["sender_email"], iago.delivery_email)
self.assertIsNone(message_obj["avatar_url"])
def test_add_reaction(self) -> None:
message_id = self.send_stream_message(self.example_user("hamlet"), "Verona", "hello")
message = Message.objects.get(id=message_id)

View File

@ -101,6 +101,7 @@ class MessageDictTest(ZulipTestCase):
[unhydrated_dict],
apply_markdown=apply_markdown,
client_gravatar=client_gravatar,
realm=get_realm("zulip"),
)
final_dict = unhydrated_dict
return final_dict
@ -180,7 +181,9 @@ class MessageDictTest(ZulipTestCase):
rows = list(MessageDict.get_raw_db_rows(ids))
objs = [MessageDict.build_dict_from_raw_db_row(row) for row in rows]
MessageDict.post_process_dicts(objs, apply_markdown=False, client_gravatar=False)
MessageDict.post_process_dicts(
objs, apply_markdown=False, client_gravatar=False, realm=realm
)
self.assert_length(rows, num_ids)
@ -422,6 +425,8 @@ class MessageHydrationTest(ZulipTestCase):
apply_markdown=True,
client_gravatar=True,
allow_edit_history=False,
user_profile=cordelia,
realm=cordelia.realm,
)
self.assert_length(messages, 2)
@ -438,6 +443,46 @@ class MessageHydrationTest(ZulipTestCase):
self.assertIn('class="user-mention"', new_message["content"])
self.assertEqual(new_message["flags"], ["mentioned"])
def test_message_for_ids_for_restricted_user_access(self) -> None:
self.set_up_db_for_testing_user_access()
hamlet = self.example_user("hamlet")
self.send_stream_message(
hamlet,
"test_stream1",
topic_name="test",
content="test message again",
)
realm = get_realm("zulip")
stream = get_stream("test_stream1", realm)
assert stream.recipient_id is not None
message_ids = Message.objects.filter(
recipient_id=stream.recipient_id, realm=realm
).values_list("id", flat=True)
self.assert_length(message_ids, 2)
user_message_flags = {
message_ids[0]: ["read", "historical"],
message_ids[1]: ["read"],
}
messages = messages_for_ids(
message_ids=list(message_ids),
user_message_flags=user_message_flags,
search_fields={},
apply_markdown=True,
client_gravatar=True,
allow_edit_history=False,
user_profile=self.example_user("polonius"),
realm=realm,
)
(inaccessible_sender_msg,) = (msg for msg in messages if msg["sender_id"] != hamlet.id)
self.assertEqual(inaccessible_sender_msg["sender_id"], self.example_user("othello").id)
self.assertEqual(inaccessible_sender_msg["sender_full_name"], "Unknown user")
self.assertTrue(
inaccessible_sender_msg["avatar_url"].endswith("images/unknown-user-avatar.png")
)
def test_display_recipient_up_to_date(self) -> None:
"""
This is a test for a bug where due to caching of message_dicts,
@ -473,6 +518,8 @@ class MessageHydrationTest(ZulipTestCase):
apply_markdown=True,
client_gravatar=True,
allow_edit_history=False,
user_profile=cordelia,
realm=cordelia.realm,
)
message = messages[0]
@ -516,6 +563,8 @@ class TestMessageForIdsDisplayRecipientFetching(ZulipTestCase):
apply_markdown=True,
client_gravatar=True,
allow_edit_history=False,
user_profile=cordelia,
realm=cordelia.realm,
)
self._verify_display_recipient(messages[0]["display_recipient"], [hamlet, cordelia])
@ -537,6 +586,8 @@ class TestMessageForIdsDisplayRecipientFetching(ZulipTestCase):
apply_markdown=True,
client_gravatar=True,
allow_edit_history=False,
user_profile=cordelia,
realm=cordelia.realm,
)
self.assertEqual(messages[0]["display_recipient"], "Verona")
@ -559,6 +610,8 @@ class TestMessageForIdsDisplayRecipientFetching(ZulipTestCase):
apply_markdown=True,
client_gravatar=True,
allow_edit_history=False,
user_profile=cordelia,
realm=cordelia.realm,
)
self._verify_display_recipient(
@ -593,6 +646,8 @@ class TestMessageForIdsDisplayRecipientFetching(ZulipTestCase):
apply_markdown=True,
client_gravatar=True,
allow_edit_history=False,
user_profile=cordelia,
realm=cordelia.realm,
)
self._verify_display_recipient(

View File

@ -55,7 +55,7 @@ class EditMessageTestCase(ZulipTestCase):
def check_message(self, msg_id: int, topic_name: str, content: str) -> None:
# Make sure we saved the message correctly to the DB.
msg = Message.objects.get(id=msg_id)
msg = Message.objects.select_related("realm").get(id=msg_id)
self.assertEqual(msg.topic_name(), topic_name)
self.assertEqual(msg.content, content)
@ -75,6 +75,8 @@ class EditMessageTestCase(ZulipTestCase):
apply_markdown=False,
client_gravatar=False,
allow_edit_history=True,
user_profile=None,
realm=msg.realm,
)
self.assert_length(queries, 1)

View File

@ -1652,7 +1652,12 @@ class StreamMessagesTest(ZulipTestCase):
message = most_recent_message(user_profile)
row = MessageDict.get_raw_db_rows([message.id])[0]
dct = MessageDict.build_dict_from_raw_db_row(row)
MessageDict.post_process_dicts([dct], apply_markdown=True, client_gravatar=False)
MessageDict.post_process_dicts(
[dct],
apply_markdown=True,
client_gravatar=False,
realm=user_profile.realm,
)
self.assertEqual(dct["display_recipient"], "Denmark")
stream = get_stream("Denmark", user_profile.realm)

View File

@ -1044,6 +1044,8 @@ def process_message_event(
muted_sender_user_ids = set(event_template.get("muted_sender_user_ids", []))
all_bot_user_ids = set(event_template.get("all_bot_user_ids", []))
disable_external_notifications = event_template.get("disable_external_notifications", False)
user_ids_without_access_to_sender = event_template.get("user_ids_without_access_to_sender", [])
realm_host = event_template.get("realm_host", "")
wide_dict: Dict[str, Any] = event_template["message_dict"]
@ -1062,11 +1064,15 @@ def process_message_event(
sending_client: str = wide_dict["client"]
@lru_cache(maxsize=None)
def get_client_payload(apply_markdown: bool, client_gravatar: bool) -> Dict[str, Any]:
def get_client_payload(
apply_markdown: bool, client_gravatar: bool, can_access_sender: bool
) -> Dict[str, Any]:
return MessageDict.finalize_payload(
wide_dict,
apply_markdown=apply_markdown,
client_gravatar=client_gravatar,
can_access_sender=can_access_sender,
realm_host=realm_host,
)
# Extra user-specific data to include
@ -1142,7 +1148,10 @@ def process_message_event(
# message data unnecessarily
continue
message_dict = get_client_payload(client.apply_markdown, client.client_gravatar)
can_access_sender = client.user_profile_id not in user_ids_without_access_to_sender
message_dict = get_client_payload(
client.apply_markdown, client.client_gravatar, can_access_sender
)
# Make sure Zephyr mirroring bots know whether stream is invite-only
if "mirror" in client.client_type_name and event_template.get("invite_only"):

View File

@ -195,8 +195,10 @@ def json_fetch_raw_message(
if not maybe_user_profile.is_authenticated:
realm = get_valid_realm_from_request(request)
message = access_web_public_message(realm, message_id)
user_profile = None
else:
(message, user_message) = access_message(maybe_user_profile, message_id)
user_profile = maybe_user_profile
flags = ["read"]
if not maybe_user_profile.is_authenticated:
@ -218,6 +220,8 @@ def json_fetch_raw_message(
apply_markdown=apply_markdown,
client_gravatar=True,
allow_edit_history=allow_edit_history,
user_profile=user_profile,
realm=message.realm,
)
response = dict(
message=message_dict_list[0],

View File

@ -219,6 +219,8 @@ def get_messages_backend(
apply_markdown=apply_markdown,
client_gravatar=client_gravatar,
allow_edit_history=realm.allow_edit_history,
user_profile=user_profile,
realm=realm,
)
ret = dict(