import itertools from operator import itemgetter from typing import Any, Callable, Collection, Dict, Iterable, List, Mapping, Optional, Set, Tuple from django.core.exceptions import ValidationError from django.db import connection from django.db.models.query import QuerySet from django.utils.translation import gettext as _ from psycopg2.sql import SQL from zerver.lib.email_mirror_helpers import encode_email_address_helper from zerver.lib.exceptions import JsonableError from zerver.lib.stream_color import STREAM_ASSIGNMENT_COLORS from zerver.lib.stream_subscription import ( get_active_subscriptions_for_stream_id, get_stream_subscriptions_for_user, ) from zerver.lib.stream_traffic import get_average_weekly_stream_traffic, get_streams_traffic from zerver.lib.streams import get_web_public_streams_queryset, subscribed_to_stream from zerver.lib.timestamp import datetime_to_timestamp from zerver.lib.types import ( NeverSubscribedStreamDict, RawStreamDict, RawSubscriptionDict, SubscriptionInfo, SubscriptionStreamDict, ) from zerver.models import Realm, Stream, Subscription, UserProfile, get_active_streams def get_web_public_subs(realm: Realm) -> SubscriptionInfo: color_idx = 0 def get_next_color() -> str: nonlocal color_idx color = STREAM_ASSIGNMENT_COLORS[color_idx] color_idx = (color_idx + 1) % len(STREAM_ASSIGNMENT_COLORS) return color subscribed = [] for stream in get_web_public_streams_queryset(realm): # Add Stream fields. date_created = datetime_to_timestamp(stream.date_created) description = stream.description first_message_id = stream.first_message_id history_public_to_subscribers = stream.history_public_to_subscribers invite_only = stream.invite_only is_announcement_only = stream.stream_post_policy == Stream.STREAM_POST_POLICY_ADMINS is_web_public = stream.is_web_public message_retention_days = stream.message_retention_days name = stream.name rendered_description = stream.rendered_description stream_id = stream.id stream_post_policy = stream.stream_post_policy # Add versions of the Subscription fields based on a simulated # new user subscription set. audible_notifications = True color = get_next_color() desktop_notifications = True email_address = "" email_notifications = True in_home_view = True is_muted = False pin_to_top = False push_notifications = True stream_weekly_traffic = get_average_weekly_stream_traffic( stream.id, stream.date_created, {} ) wildcard_mentions_notify = True sub = SubscriptionStreamDict( audible_notifications=audible_notifications, color=color, date_created=date_created, description=description, desktop_notifications=desktop_notifications, email_address=email_address, email_notifications=email_notifications, first_message_id=first_message_id, history_public_to_subscribers=history_public_to_subscribers, in_home_view=in_home_view, invite_only=invite_only, is_announcement_only=is_announcement_only, is_muted=is_muted, is_web_public=is_web_public, message_retention_days=message_retention_days, name=name, pin_to_top=pin_to_top, push_notifications=push_notifications, rendered_description=rendered_description, stream_id=stream_id, stream_post_policy=stream_post_policy, stream_weekly_traffic=stream_weekly_traffic, wildcard_mentions_notify=wildcard_mentions_notify, ) subscribed.append(sub) return SubscriptionInfo( subscriptions=subscribed, unsubscribed=[], never_subscribed=[], ) def build_stream_dict_for_sub( user: UserProfile, sub_dict: RawSubscriptionDict, raw_stream_dict: RawStreamDict, recent_traffic: Dict[int, int], ) -> SubscriptionStreamDict: # Handle Stream.API_FIELDS date_created = datetime_to_timestamp(raw_stream_dict["date_created"]) description = raw_stream_dict["description"] first_message_id = raw_stream_dict["first_message_id"] history_public_to_subscribers = raw_stream_dict["history_public_to_subscribers"] invite_only = raw_stream_dict["invite_only"] is_web_public = raw_stream_dict["is_web_public"] message_retention_days = raw_stream_dict["message_retention_days"] name = raw_stream_dict["name"] rendered_description = raw_stream_dict["rendered_description"] stream_id = raw_stream_dict["id"] stream_post_policy = raw_stream_dict["stream_post_policy"] # Handle Subscription.API_FIELDS. color = sub_dict["color"] is_muted = sub_dict["is_muted"] pin_to_top = sub_dict["pin_to_top"] audible_notifications = sub_dict["audible_notifications"] desktop_notifications = sub_dict["desktop_notifications"] email_notifications = sub_dict["email_notifications"] push_notifications = sub_dict["push_notifications"] wildcard_mentions_notify = sub_dict["wildcard_mentions_notify"] # Backwards-compatibility for clients that haven't been # updated for the in_home_view => is_muted API migration. in_home_view = not is_muted # Backwards-compatibility for clients that haven't been # updated for the is_announcement_only -> stream_post_policy # migration. is_announcement_only = raw_stream_dict["stream_post_policy"] == Stream.STREAM_POST_POLICY_ADMINS # Add a few computed fields not directly from the data models. stream_weekly_traffic = get_average_weekly_stream_traffic( raw_stream_dict["id"], raw_stream_dict["date_created"], recent_traffic ) email_address = encode_email_address_helper( raw_stream_dict["name"], raw_stream_dict["email_token"], show_sender=True ) # Our caller may add a subscribers field. return SubscriptionStreamDict( audible_notifications=audible_notifications, color=color, date_created=date_created, description=description, desktop_notifications=desktop_notifications, email_address=email_address, email_notifications=email_notifications, first_message_id=first_message_id, history_public_to_subscribers=history_public_to_subscribers, in_home_view=in_home_view, invite_only=invite_only, is_announcement_only=is_announcement_only, is_muted=is_muted, is_web_public=is_web_public, message_retention_days=message_retention_days, name=name, pin_to_top=pin_to_top, push_notifications=push_notifications, rendered_description=rendered_description, stream_id=stream_id, stream_post_policy=stream_post_policy, stream_weekly_traffic=stream_weekly_traffic, wildcard_mentions_notify=wildcard_mentions_notify, ) def build_stream_dict_for_never_sub( raw_stream_dict: RawStreamDict, recent_traffic: Dict[int, int], ) -> NeverSubscribedStreamDict: date_created = datetime_to_timestamp(raw_stream_dict["date_created"]) description = raw_stream_dict["description"] first_message_id = raw_stream_dict["first_message_id"] history_public_to_subscribers = raw_stream_dict["history_public_to_subscribers"] invite_only = raw_stream_dict["invite_only"] is_web_public = raw_stream_dict["is_web_public"] message_retention_days = raw_stream_dict["message_retention_days"] name = raw_stream_dict["name"] rendered_description = raw_stream_dict["rendered_description"] stream_id = raw_stream_dict["id"] stream_post_policy = raw_stream_dict["stream_post_policy"] stream_weekly_traffic = get_average_weekly_stream_traffic( raw_stream_dict["id"], raw_stream_dict["date_created"], recent_traffic ) # Backwards-compatibility addition of removed field. is_announcement_only = raw_stream_dict["stream_post_policy"] == Stream.STREAM_POST_POLICY_ADMINS # Our caller may add a subscribers field. return NeverSubscribedStreamDict( date_created=date_created, description=description, first_message_id=first_message_id, history_public_to_subscribers=history_public_to_subscribers, invite_only=invite_only, is_announcement_only=is_announcement_only, is_web_public=is_web_public, message_retention_days=message_retention_days, name=name, rendered_description=rendered_description, stream_id=stream_id, stream_post_policy=stream_post_policy, stream_weekly_traffic=stream_weekly_traffic, ) def validate_user_access_to_subscribers( user_profile: Optional[UserProfile], stream: Stream ) -> None: """Validates whether the user can view the subscribers of a stream. Raises a JsonableError if: * The user and the stream are in different realms * The realm is MIT and the stream is not invite only. * The stream is invite only, requesting_user is passed, and that user does not subscribe to the stream. """ validate_user_access_to_subscribers_helper( user_profile, { "realm_id": stream.realm_id, "is_web_public": stream.is_web_public, "invite_only": stream.invite_only, }, # We use a lambda here so that we only compute whether the # user is subscribed if we have to lambda user_profile: subscribed_to_stream(user_profile, stream.id), ) def validate_user_access_to_subscribers_helper( user_profile: Optional[UserProfile], stream_dict: Mapping[str, Any], check_user_subscribed: Callable[[UserProfile], bool], ) -> None: """Helper for validate_user_access_to_subscribers that doesn't require a full stream object. This function is a bit hard to read, because it is carefully optimized for performance in the two code paths we call it from: * In `bulk_get_subscriber_user_ids`, we already know whether the user was subscribed via `sub_dict`, and so we want to avoid a database query at all (especially since it calls this in a loop); * In `validate_user_access_to_subscribers`, we want to only check if the user is subscribed when we absolutely have to, since it costs a database query. The `check_user_subscribed` argument is a function that reports whether the user is subscribed to the stream. Note also that we raise a ValidationError in cases where the caller is doing the wrong thing (maybe these should be AssertionErrors), and JsonableError for 400 type errors. """ if user_profile is None: raise ValidationError("Missing user to validate access for") if user_profile.realm_id != stream_dict["realm_id"]: raise ValidationError("Requesting user not in given realm") # Even guest users can access subscribers to web-public streams, # since they can freely become subscribers to these streams. if stream_dict["is_web_public"]: return # With the exception of web-public streams, a guest must # be subscribed to a stream (even a public one) in order # to see subscribers. if user_profile.is_guest: if check_user_subscribed(user_profile): return # We could explicitly handle the case where guests aren't # subscribed here in an `else` statement or we can fall # through to the subsequent logic. Tim prefers the latter. # Adding an `else` would ensure better code coverage. if not user_profile.can_access_public_streams() and not stream_dict["invite_only"]: raise JsonableError(_("Subscriber data is not available for this stream")) # Organization administrators can view subscribers for all streams. if user_profile.is_realm_admin: return if stream_dict["invite_only"] and not check_user_subscribed(user_profile): raise JsonableError(_("Unable to retrieve subscribers for private stream")) def bulk_get_subscriber_user_ids( stream_dicts: Collection[Mapping[str, Any]], user_profile: UserProfile, subscribed_stream_ids: Set[int], ) -> Dict[int, List[int]]: """sub_dict maps stream_id => whether the user is subscribed to that stream.""" target_stream_dicts = [] for stream_dict in stream_dicts: stream_id = stream_dict["id"] is_subscribed = stream_id in subscribed_stream_ids try: validate_user_access_to_subscribers_helper( user_profile, stream_dict, lambda user_profile: is_subscribed, ) except JsonableError: continue target_stream_dicts.append(stream_dict) recip_to_stream_id = {stream["recipient_id"]: stream["id"] for stream in target_stream_dicts} recipient_ids = sorted(stream["recipient_id"] for stream in target_stream_dicts) result: Dict[int, List[int]] = {stream["id"]: [] for stream in stream_dicts} if not recipient_ids: return result """ The raw SQL below leads to more than a 2x speedup when tested with 20k+ total subscribers. (For large realms with lots of default streams, this function deals with LOTS of data, so it is important to optimize.) """ query = SQL( """ SELECT zerver_subscription.recipient_id, zerver_subscription.user_profile_id FROM zerver_subscription WHERE zerver_subscription.recipient_id in %(recipient_ids)s AND zerver_subscription.active AND zerver_subscription.is_user_active ORDER BY zerver_subscription.recipient_id, zerver_subscription.user_profile_id """ ) cursor = connection.cursor() cursor.execute(query, {"recipient_ids": tuple(recipient_ids)}) rows = cursor.fetchall() cursor.close() """ Using groupby/itemgetter here is important for performance, at scale. It makes it so that all interpreter overhead is just O(N) in nature. """ for recip_id, recip_rows in itertools.groupby(rows, itemgetter(0)): user_profile_ids = [r[1] for r in recip_rows] stream_id = recip_to_stream_id[recip_id] result[stream_id] = list(user_profile_ids) return result def get_subscribers_query( stream: Stream, requesting_user: Optional[UserProfile] ) -> QuerySet[Subscription]: """Build a query to get the subscribers list for a stream, raising a JsonableError if: 'realm' is optional in stream. The caller can refine this query with select_related(), values(), etc. depending on whether it wants objects or just certain fields """ validate_user_access_to_subscribers(requesting_user, stream) return get_active_subscriptions_for_stream_id(stream.id, include_deactivated_users=False) # In general, it's better to avoid using .values() because it makes # the code pretty ugly, but in this case, it has significant # performance impact for loading / for users with large numbers of # subscriptions, so it's worth optimizing. def gather_subscriptions_helper( user_profile: UserProfile, include_subscribers: bool = True, ) -> SubscriptionInfo: realm = user_profile.realm all_streams: QuerySet[RawStreamDict] = get_active_streams(realm).values( *Stream.API_FIELDS, # The realm_id and recipient_id are generally not needed in the API. "realm_id", "recipient_id", # email_token isn't public to some users with access to # the stream, so doesn't belong in API_FIELDS. "email_token", ) recip_id_to_stream_id: Dict[int, int] = { stream["recipient_id"]: stream["id"] for stream in all_streams } all_streams_map: Dict[int, RawStreamDict] = {stream["id"]: stream for stream in all_streams} sub_dicts_query: Iterable[RawSubscriptionDict] = ( get_stream_subscriptions_for_user(user_profile) .values( *Subscription.API_FIELDS, "recipient_id", "active", ) .order_by("recipient_id") ) # We only care about subscriptions for active streams. sub_dicts: List[RawSubscriptionDict] = [ sub_dict for sub_dict in sub_dicts_query if recip_id_to_stream_id.get(sub_dict["recipient_id"]) ] def get_stream_id(sub_dict: RawSubscriptionDict) -> int: return recip_id_to_stream_id[sub_dict["recipient_id"]] traffic_stream_ids = {get_stream_id(sub_dict) for sub_dict in sub_dicts} recent_traffic = get_streams_traffic(stream_ids=traffic_stream_ids) # Okay, now we finally get to populating our main results, which # will be these three lists. subscribed: List[SubscriptionStreamDict] = [] unsubscribed: List[SubscriptionStreamDict] = [] never_subscribed: List[NeverSubscribedStreamDict] = [] sub_unsub_stream_ids = set() for sub_dict in sub_dicts: stream_id = get_stream_id(sub_dict) sub_unsub_stream_ids.add(stream_id) raw_stream_dict = all_streams_map[stream_id] stream_dict = build_stream_dict_for_sub( user=user_profile, sub_dict=sub_dict, raw_stream_dict=raw_stream_dict, recent_traffic=recent_traffic, ) # is_active is represented in this structure by which list we include it in. is_active = sub_dict["active"] if is_active: subscribed.append(stream_dict) else: unsubscribed.append(stream_dict) if user_profile.can_access_public_streams(): never_subscribed_stream_ids = set(all_streams_map) - sub_unsub_stream_ids else: web_public_stream_ids = {stream["id"] for stream in all_streams if stream["is_web_public"]} never_subscribed_stream_ids = web_public_stream_ids - sub_unsub_stream_ids never_subscribed_streams = [ all_streams_map[stream_id] for stream_id in never_subscribed_stream_ids ] for raw_stream_dict in never_subscribed_streams: is_public = not raw_stream_dict["invite_only"] if is_public or user_profile.is_realm_admin: slim_stream_dict = build_stream_dict_for_never_sub( raw_stream_dict=raw_stream_dict, recent_traffic=recent_traffic ) never_subscribed.append(slim_stream_dict) if include_subscribers: # The highly optimized bulk_get_subscriber_user_ids wants to know which # streams we are subscribed to, for validation purposes, and it uses that # info to know if it's allowed to find OTHER subscribers. subscribed_stream_ids = { get_stream_id(sub_dict) for sub_dict in sub_dicts if sub_dict["active"] } subscriber_map = bulk_get_subscriber_user_ids( all_streams, user_profile, subscribed_stream_ids, ) for lst in [subscribed, unsubscribed]: for stream_dict in lst: assert isinstance(stream_dict["stream_id"], int) stream_id = stream_dict["stream_id"] stream_dict["subscribers"] = subscriber_map[stream_id] for slim_stream_dict in never_subscribed: assert isinstance(slim_stream_dict["stream_id"], int) stream_id = slim_stream_dict["stream_id"] slim_stream_dict["subscribers"] = subscriber_map[stream_id] subscribed.sort(key=lambda x: x["name"]) unsubscribed.sort(key=lambda x: x["name"]) never_subscribed.sort(key=lambda x: x["name"]) return SubscriptionInfo( subscriptions=subscribed, unsubscribed=unsubscribed, never_subscribed=never_subscribed, ) def gather_subscriptions( user_profile: UserProfile, include_subscribers: bool = False, ) -> Tuple[List[SubscriptionStreamDict], List[SubscriptionStreamDict]]: helper_result = gather_subscriptions_helper( user_profile, include_subscribers=include_subscribers, ) subscribed = helper_result.subscriptions unsubscribed = helper_result.unsubscribed return (subscribed, unsubscribed)