From 198568522abf897dcb16172d93799761af10b63d Mon Sep 17 00:00:00 2001 From: Sahil Batra Date: Wed, 8 Nov 2023 09:23:05 +0530 Subject: [PATCH] message: Do not include details of inaccessible users in message data. This commit adds code to not include original details of senders like name, email and avatar url in the message objects sent through events and in the response of endpoint used to fetch messages. This is the last major commit for the project to add support for limiting guest access to an entire organization. Fixes #10970. --- zerver/actions/message_send.py | 28 ++++++++++++++- zerver/lib/email_mirror.py | 1 + zerver/lib/message.py | 55 +++++++++++++++++++++++++---- zerver/tests/test_events.py | 29 ++++++++++++++++ zerver/tests/test_message_dict.py | 57 ++++++++++++++++++++++++++++++- zerver/tests/test_message_edit.py | 4 ++- zerver/tests/test_message_send.py | 7 +++- zerver/tornado/event_queue.py | 13 +++++-- zerver/views/message_edit.py | 4 +++ zerver/views/message_fetch.py | 2 ++ 10 files changed, 188 insertions(+), 12 deletions(-) diff --git a/zerver/actions/message_send.py b/zerver/actions/message_send.py index 079121555b..a607b25d9b 100644 --- a/zerver/actions/message_send.py +++ b/zerver/actions/message_send.py @@ -72,7 +72,7 @@ from zerver.lib.stream_subscription import ( num_subscribers_for_stream_id, ) from zerver.lib.stream_topic import StreamTopicTarget -from zerver.lib.streams import access_stream_for_send_message, ensure_stream +from zerver.lib.streams import access_stream_for_send_message, ensure_stream, subscribed_to_stream from zerver.lib.string_validation import check_stream_name from zerver.lib.timestamp import timestamp_to_datetime from zerver.lib.topic import participants_for_topic @@ -83,7 +83,9 @@ from zerver.lib.users import ( check_user_can_access_all_users, get_accessible_user_ids, get_subscribers_of_target_user_subscriptions, + get_user_ids_who_can_access_user, get_users_involved_in_dms_with_target_users, + user_access_restricted_in_realm, ) from zerver.lib.validator import check_widget_content from zerver.lib.widget import do_widget_post_save_actions @@ -1104,6 +1106,7 @@ def do_send_messages( muted_sender_user_ids=list(send_request.muted_sender_user_ids), all_bot_user_ids=list(send_request.all_bot_user_ids), disable_external_notifications=send_request.disable_external_notifications, + realm_host=send_request.realm.host, ) if send_request.message.is_stream_message(): @@ -1123,6 +1126,29 @@ def do_send_messages( if send_request.stream.first_message_id is None: send_request.stream.first_message_id = send_request.message.id send_request.stream.save(update_fields=["first_message_id"]) + + # Performance note: This check can theoretically do + # database queries in a loop if many messages are being + # sent via a single do_send_messages call. + # + # This is not a practical concern at present, because our + # only use case for bulk-sending messages via this API + # endpoint is for direct messages bulk-sent by system + # bots; and for system bots, + # "user_access_restricted_in_realm" will always return + # False without doing any database queries at all. + if user_access_restricted_in_realm( + send_request.message.sender + ) and not subscribed_to_stream(send_request.message.sender, send_request.stream.id): + user_ids_who_can_access_sender = get_user_ids_who_can_access_user( + send_request.message.sender + ) + user_ids_receiving_event = {user["id"] for user in users} + user_ids_without_access_to_sender = user_ids_receiving_event - set( + user_ids_who_can_access_sender + ) + event["user_ids_without_access_to_sender"] = user_ids_without_access_to_sender + if send_request.local_id is not None: event["local_id"] = send_request.local_id if send_request.sender_queue_id is not None: diff --git a/zerver/lib/email_mirror.py b/zerver/lib/email_mirror.py index 4042bd328d..7b02c86e19 100644 --- a/zerver/lib/email_mirror.py +++ b/zerver/lib/email_mirror.py @@ -120,6 +120,7 @@ def get_usable_missed_message_address(address: str) -> MissedMessageEmailAddress mm_address = MissedMessageEmailAddress.objects.select_related( "user_profile", "user_profile__realm", + "user_profile__realm__can_access_all_users_group", "message", "message__sender", "message__recipient", diff --git a/zerver/lib/message.py b/zerver/lib/message.py index 201fa6917f..a1c2fab199 100644 --- a/zerver/lib/message.py +++ b/zerver/lib/message.py @@ -2,6 +2,7 @@ import copy import zlib from dataclasses import dataclass, field from datetime import datetime, timedelta +from email.headerregistry import Address from typing import ( Any, Collection, @@ -28,7 +29,7 @@ from psycopg2.sql import SQL from analytics.lib.counts import COUNT_STATS from analytics.models import RealmCount -from zerver.lib.avatar import get_avatar_field +from zerver.lib.avatar import get_avatar_field, get_avatar_for_inaccessible_user from zerver.lib.cache import ( cache_set_many, cache_with_key, @@ -60,6 +61,7 @@ from zerver.lib.types import DisplayRecipientT, EditHistoryEvent, UserDisplayRec from zerver.lib.url_preview.types import UrlEmbedData from zerver.lib.user_groups import is_user_in_group from zerver.lib.user_topics import build_get_topic_visibility_policy, get_topic_visibility_policy +from zerver.lib.users import get_inaccessible_user_ids from zerver.models import ( MAX_TOPIC_NAME_LENGTH, Message, @@ -74,6 +76,7 @@ from zerver.models import ( UserProfile, UserTopic, get_display_recipient_by_id, + get_fake_email_domain, get_usermessage_by_message_id, query_for_ids, ) @@ -245,6 +248,8 @@ def messages_for_ids( apply_markdown: bool, client_gravatar: bool, allow_edit_history: bool, + user_profile: Optional[UserProfile], + realm: Realm, ) -> List[Dict[str, Any]]: cache_transformer = MessageDict.build_dict_from_raw_db_row id_fetcher = lambda row: row["id"] @@ -261,6 +266,9 @@ def messages_for_ids( message_list: List[Dict[str, Any]] = [] + sender_ids = [message_dicts[message_id]["sender_id"] for message_id in message_ids] + inaccessible_sender_ids = get_inaccessible_user_ids(sender_ids, user_profile) + for message_id in message_ids: msg_dict = message_dicts[message_id] flags = user_message_flags[message_id] @@ -278,9 +286,10 @@ def messages_for_ids( # in realms with allow_edit_history disabled. if "edit_history" in msg_dict and not allow_edit_history: del msg_dict["edit_history"] + msg_dict["can_access_sender"] = msg_dict["sender_id"] not in inaccessible_sender_ids message_list.append(msg_dict) - MessageDict.post_process_dicts(message_list, apply_markdown, client_gravatar) + MessageDict.post_process_dicts(message_list, apply_markdown, client_gravatar, realm) return message_list @@ -389,7 +398,10 @@ class MessageDict: @staticmethod def post_process_dicts( - objs: List[Dict[str, Any]], apply_markdown: bool, client_gravatar: bool + objs: List[Dict[str, Any]], + apply_markdown: bool, + client_gravatar: bool, + realm: Realm, ) -> None: """ NOTE: This function mutates the objects in @@ -403,7 +415,15 @@ class MessageDict: MessageDict.bulk_hydrate_recipient_info(objs) for obj in objs: - MessageDict.finalize_payload(obj, apply_markdown, client_gravatar, skip_copy=True) + can_access_sender = obj.get("can_access_sender", True) + MessageDict.finalize_payload( + obj, + apply_markdown, + client_gravatar, + skip_copy=True, + can_access_sender=can_access_sender, + realm_host=realm.host, + ) @staticmethod def finalize_payload( @@ -412,6 +432,8 @@ class MessageDict: client_gravatar: bool, keep_rendered_content: bool = False, skip_copy: bool = False, + can_access_sender: bool = True, + realm_host: str = "", ) -> Dict[str, Any]: """ By default, we make a shallow copy of the incoming dict to avoid @@ -428,7 +450,20 @@ class MessageDict: # here if the current user's role has access to the target user's email address. client_gravatar = False - MessageDict.set_sender_avatar(obj, client_gravatar) + if not can_access_sender: + # Enforce inability to access details of inaccessible + # users. We should be able to remove the realm_host and + # can_access_user plumbing to this function if/when we + # shift the Zulip API to not send these denormalized + # fields about message senders favor of just sending the + # sender's user ID. + obj["sender_full_name"] = str(UserProfile.INACCESSIBLE_USER_NAME) + sender_id = obj["sender_id"] + obj["sender_email"] = Address( + username=f"user{sender_id}", domain=get_fake_email_domain(realm_host) + ).addr_spec + + MessageDict.set_sender_avatar(obj, client_gravatar, can_access_sender) if apply_markdown: obj["content_type"] = "text/html" obj["content"] = obj["rendered_content"] @@ -446,6 +481,8 @@ class MessageDict: del obj["recipient_type_id"] del obj["sender_is_mirror_dummy"] del obj["sender_email_address_visibility"] + if "can_access_sender" in obj: + del obj["can_access_sender"] return obj @staticmethod @@ -734,7 +771,13 @@ class MessageDict: MessageDict.hydrate_recipient_info(obj, display_recipients[obj["recipient_id"]]) @staticmethod - def set_sender_avatar(obj: Dict[str, Any], client_gravatar: bool) -> None: + def set_sender_avatar( + obj: Dict[str, Any], client_gravatar: bool, can_access_sender: bool = True + ) -> None: + if not can_access_sender: + obj["avatar_url"] = get_avatar_for_inaccessible_user() + return + sender_id = obj["sender_id"] sender_realm_id = obj["sender_realm_id"] sender_delivery_email = obj["sender_delivery_email"] diff --git a/zerver/tests/test_events.py b/zerver/tests/test_events.py index 7c03dd7558..02e8bac37c 100644 --- a/zerver/tests/test_events.py +++ b/zerver/tests/test_events.py @@ -1059,6 +1059,35 @@ class NormalActionsTest(BaseAction): state_change_expected=True, ) + def test_events_for_message_from_inaccessible_sender(self) -> None: + reset_email_visibility_to_everyone_in_zulip_realm() + self.set_up_db_for_testing_user_access() + othello = self.example_user("othello") + self.user_profile = self.example_user("polonius") + + events = self.verify_action( + lambda: self.send_stream_message( + othello, "test_stream1", "hello 2", allow_unsubscribed_sender=True + ), + ) + check_message("events[0]", events[0]) + message_obj = events[0]["message"] + self.assertEqual(message_obj["sender_full_name"], "Unknown user") + self.assertEqual(message_obj["sender_email"], f"user{othello.id}@zulip.testserver") + self.assertTrue(message_obj["avatar_url"].endswith("images/unknown-user-avatar.png")) + + iago = self.example_user("iago") + events = self.verify_action( + lambda: self.send_stream_message( + iago, "test_stream1", "hello 2", allow_unsubscribed_sender=True + ), + ) + check_message("events[0]", events[0]) + message_obj = events[0]["message"] + self.assertEqual(message_obj["sender_full_name"], iago.full_name) + self.assertEqual(message_obj["sender_email"], iago.delivery_email) + self.assertIsNone(message_obj["avatar_url"]) + def test_add_reaction(self) -> None: message_id = self.send_stream_message(self.example_user("hamlet"), "Verona", "hello") message = Message.objects.get(id=message_id) diff --git a/zerver/tests/test_message_dict.py b/zerver/tests/test_message_dict.py index a64cd39855..291af89b79 100644 --- a/zerver/tests/test_message_dict.py +++ b/zerver/tests/test_message_dict.py @@ -101,6 +101,7 @@ class MessageDictTest(ZulipTestCase): [unhydrated_dict], apply_markdown=apply_markdown, client_gravatar=client_gravatar, + realm=get_realm("zulip"), ) final_dict = unhydrated_dict return final_dict @@ -180,7 +181,9 @@ class MessageDictTest(ZulipTestCase): rows = list(MessageDict.get_raw_db_rows(ids)) objs = [MessageDict.build_dict_from_raw_db_row(row) for row in rows] - MessageDict.post_process_dicts(objs, apply_markdown=False, client_gravatar=False) + MessageDict.post_process_dicts( + objs, apply_markdown=False, client_gravatar=False, realm=realm + ) self.assert_length(rows, num_ids) @@ -422,6 +425,8 @@ class MessageHydrationTest(ZulipTestCase): apply_markdown=True, client_gravatar=True, allow_edit_history=False, + user_profile=cordelia, + realm=cordelia.realm, ) self.assert_length(messages, 2) @@ -438,6 +443,46 @@ class MessageHydrationTest(ZulipTestCase): self.assertIn('class="user-mention"', new_message["content"]) self.assertEqual(new_message["flags"], ["mentioned"]) + def test_message_for_ids_for_restricted_user_access(self) -> None: + self.set_up_db_for_testing_user_access() + hamlet = self.example_user("hamlet") + self.send_stream_message( + hamlet, + "test_stream1", + topic_name="test", + content="test message again", + ) + realm = get_realm("zulip") + stream = get_stream("test_stream1", realm) + + assert stream.recipient_id is not None + message_ids = Message.objects.filter( + recipient_id=stream.recipient_id, realm=realm + ).values_list("id", flat=True) + self.assert_length(message_ids, 2) + + user_message_flags = { + message_ids[0]: ["read", "historical"], + message_ids[1]: ["read"], + } + + messages = messages_for_ids( + message_ids=list(message_ids), + user_message_flags=user_message_flags, + search_fields={}, + apply_markdown=True, + client_gravatar=True, + allow_edit_history=False, + user_profile=self.example_user("polonius"), + realm=realm, + ) + (inaccessible_sender_msg,) = (msg for msg in messages if msg["sender_id"] != hamlet.id) + self.assertEqual(inaccessible_sender_msg["sender_id"], self.example_user("othello").id) + self.assertEqual(inaccessible_sender_msg["sender_full_name"], "Unknown user") + self.assertTrue( + inaccessible_sender_msg["avatar_url"].endswith("images/unknown-user-avatar.png") + ) + def test_display_recipient_up_to_date(self) -> None: """ This is a test for a bug where due to caching of message_dicts, @@ -473,6 +518,8 @@ class MessageHydrationTest(ZulipTestCase): apply_markdown=True, client_gravatar=True, allow_edit_history=False, + user_profile=cordelia, + realm=cordelia.realm, ) message = messages[0] @@ -516,6 +563,8 @@ class TestMessageForIdsDisplayRecipientFetching(ZulipTestCase): apply_markdown=True, client_gravatar=True, allow_edit_history=False, + user_profile=cordelia, + realm=cordelia.realm, ) self._verify_display_recipient(messages[0]["display_recipient"], [hamlet, cordelia]) @@ -537,6 +586,8 @@ class TestMessageForIdsDisplayRecipientFetching(ZulipTestCase): apply_markdown=True, client_gravatar=True, allow_edit_history=False, + user_profile=cordelia, + realm=cordelia.realm, ) self.assertEqual(messages[0]["display_recipient"], "Verona") @@ -559,6 +610,8 @@ class TestMessageForIdsDisplayRecipientFetching(ZulipTestCase): apply_markdown=True, client_gravatar=True, allow_edit_history=False, + user_profile=cordelia, + realm=cordelia.realm, ) self._verify_display_recipient( @@ -593,6 +646,8 @@ class TestMessageForIdsDisplayRecipientFetching(ZulipTestCase): apply_markdown=True, client_gravatar=True, allow_edit_history=False, + user_profile=cordelia, + realm=cordelia.realm, ) self._verify_display_recipient( diff --git a/zerver/tests/test_message_edit.py b/zerver/tests/test_message_edit.py index c17c7a8343..97673f9c32 100644 --- a/zerver/tests/test_message_edit.py +++ b/zerver/tests/test_message_edit.py @@ -55,7 +55,7 @@ class EditMessageTestCase(ZulipTestCase): def check_message(self, msg_id: int, topic_name: str, content: str) -> None: # Make sure we saved the message correctly to the DB. - msg = Message.objects.get(id=msg_id) + msg = Message.objects.select_related("realm").get(id=msg_id) self.assertEqual(msg.topic_name(), topic_name) self.assertEqual(msg.content, content) @@ -75,6 +75,8 @@ class EditMessageTestCase(ZulipTestCase): apply_markdown=False, client_gravatar=False, allow_edit_history=True, + user_profile=None, + realm=msg.realm, ) self.assert_length(queries, 1) diff --git a/zerver/tests/test_message_send.py b/zerver/tests/test_message_send.py index d79057191a..7b7d5babbf 100644 --- a/zerver/tests/test_message_send.py +++ b/zerver/tests/test_message_send.py @@ -1652,7 +1652,12 @@ class StreamMessagesTest(ZulipTestCase): message = most_recent_message(user_profile) row = MessageDict.get_raw_db_rows([message.id])[0] dct = MessageDict.build_dict_from_raw_db_row(row) - MessageDict.post_process_dicts([dct], apply_markdown=True, client_gravatar=False) + MessageDict.post_process_dicts( + [dct], + apply_markdown=True, + client_gravatar=False, + realm=user_profile.realm, + ) self.assertEqual(dct["display_recipient"], "Denmark") stream = get_stream("Denmark", user_profile.realm) diff --git a/zerver/tornado/event_queue.py b/zerver/tornado/event_queue.py index a9afd1108c..9a7452a21b 100644 --- a/zerver/tornado/event_queue.py +++ b/zerver/tornado/event_queue.py @@ -1044,6 +1044,8 @@ def process_message_event( muted_sender_user_ids = set(event_template.get("muted_sender_user_ids", [])) all_bot_user_ids = set(event_template.get("all_bot_user_ids", [])) disable_external_notifications = event_template.get("disable_external_notifications", False) + user_ids_without_access_to_sender = event_template.get("user_ids_without_access_to_sender", []) + realm_host = event_template.get("realm_host", "") wide_dict: Dict[str, Any] = event_template["message_dict"] @@ -1062,11 +1064,15 @@ def process_message_event( sending_client: str = wide_dict["client"] @lru_cache(maxsize=None) - def get_client_payload(apply_markdown: bool, client_gravatar: bool) -> Dict[str, Any]: + def get_client_payload( + apply_markdown: bool, client_gravatar: bool, can_access_sender: bool + ) -> Dict[str, Any]: return MessageDict.finalize_payload( wide_dict, apply_markdown=apply_markdown, client_gravatar=client_gravatar, + can_access_sender=can_access_sender, + realm_host=realm_host, ) # Extra user-specific data to include @@ -1142,7 +1148,10 @@ def process_message_event( # message data unnecessarily continue - message_dict = get_client_payload(client.apply_markdown, client.client_gravatar) + can_access_sender = client.user_profile_id not in user_ids_without_access_to_sender + message_dict = get_client_payload( + client.apply_markdown, client.client_gravatar, can_access_sender + ) # Make sure Zephyr mirroring bots know whether stream is invite-only if "mirror" in client.client_type_name and event_template.get("invite_only"): diff --git a/zerver/views/message_edit.py b/zerver/views/message_edit.py index de9a8c1786..76806d91bf 100644 --- a/zerver/views/message_edit.py +++ b/zerver/views/message_edit.py @@ -195,8 +195,10 @@ def json_fetch_raw_message( if not maybe_user_profile.is_authenticated: realm = get_valid_realm_from_request(request) message = access_web_public_message(realm, message_id) + user_profile = None else: (message, user_message) = access_message(maybe_user_profile, message_id) + user_profile = maybe_user_profile flags = ["read"] if not maybe_user_profile.is_authenticated: @@ -218,6 +220,8 @@ def json_fetch_raw_message( apply_markdown=apply_markdown, client_gravatar=True, allow_edit_history=allow_edit_history, + user_profile=user_profile, + realm=message.realm, ) response = dict( message=message_dict_list[0], diff --git a/zerver/views/message_fetch.py b/zerver/views/message_fetch.py index 6c5ac66eeb..1dbc333306 100644 --- a/zerver/views/message_fetch.py +++ b/zerver/views/message_fetch.py @@ -219,6 +219,8 @@ def get_messages_backend( apply_markdown=apply_markdown, client_gravatar=client_gravatar, allow_edit_history=realm.allow_edit_history, + user_profile=user_profile, + realm=realm, ) ret = dict(