cache: Eliminate get-stream-by-name cache.

We remove the cache functionality for the get_realm_stream function, and we also change it to return a thin Stream object (instead of calling select_related with no arguments). The main goal here is to remove code complexity, as we have been prone to at least one caching validation bug related to how Realm and UserGroup interact. That particular bug was more theoretical than practical in terms of its impact, to be clear. Even if we were to be perfectly disciplined about only caching thin stream objects and always making sure to delete cache entries when stream data changed, we would still be prone to ugly situations like having transactions get rolled back before we delete the cache entry. The do_deactivate_stream is a perfect example of where we have to consider the best time to unset the cache. If you unset it too early, then you are prone to races where somebody else churns the cache right before you update the database. If you set it too late, then you can have an invalid entry after a rollback or deadlock situation. If you just eliminate the cache as a moving part, that whole debate is moot. As the lack of test changes here indicates, we rarely fetch streams by name any more in critical sections of our code. The one place where we fetch by name is in loading the home page, but that is **only** when you specify a stream name. And, of course, that only causes about an extra millisecond of time.
2023-07-09 20:24:32 +00:00 · 2023-07-09 20:24:32 +00:00 · 89381a8072
parent 046e4c715b
commit 89381a8072
6 changed files with 2 additions and 50 deletions
--- a/zerver/actions/streams.py
+++ b/zerver/actions/streams.py
@ -17,11 +17,9 @@ from zerver.actions.default_streams import (
 )
 from zerver.actions.message_send import internal_send_stream_message
 from zerver.lib.cache import (
-    cache_delete,
    cache_delete_many,
    cache_set,
    display_recipient_cache_key,
-    get_stream_cache_key,
    to_dict_cache_key_id,
 )
 from zerver.lib.email_mirror_helpers import encode_email_address
@ -118,10 +116,6 @@ def do_deactivate_stream(stream: Stream, *, acting_user: Optional[UserProfile])
    for group in default_stream_groups_for_stream:
        do_remove_streams_from_default_stream_group(stream.realm, group, [stream])

-    # Remove the old stream information from remote cache.
-    old_cache_key = get_stream_cache_key(old_name, stream.realm_id)
-    cache_delete(old_cache_key)
-
    stream_dict = stream.to_dict()
    stream_dict.update(dict(name=old_name, invite_only=was_invite_only))
    event = dict(type="stream", op="delete", streams=[stream_dict])
@ -1102,13 +1096,6 @@ def do_rename_stream(stream: Stream, new_name: str, user_profile: UserProfile) -
    recipient_id: int = stream.recipient_id
    messages = Message.objects.filter(recipient_id=recipient_id).only("id")

-    # Update the display recipient and stream, which are easy single
-    # items to set.
-    old_cache_key = get_stream_cache_key(old_name, stream.realm_id)
-    new_cache_key = get_stream_cache_key(stream.name, stream.realm_id)
-    if old_cache_key != new_cache_key:
-        cache_delete(old_cache_key)
-        cache_set(new_cache_key, stream)
    cache_set(display_recipient_cache_key(recipient_id), stream.name)

    # Delete cache entries for everything else, which is cheaper and
--- a/zerver/lib/cache.py
+++ b/zerver/lib/cache.py
@ -531,10 +531,6 @@ def bot_dicts_in_realm_cache_key(realm_id: int) -> str:
    return f"bot_dicts_in_realm:{realm_id}"


-def get_stream_cache_key(stream_name: str, realm_id: int) -> str:
-    return f"stream_by_realm_and_name:{realm_id}:{make_safe_digest(stream_name.strip().lower())}"
-
-
 def delete_user_profile_caches(user_profiles: Iterable["UserProfile"]) -> None:
    # Imported here to avoid cyclic dependency.
    from zerver.lib.users import get_all_api_keys
@ -672,13 +668,6 @@ def flush_stream(
    from zerver.models import UserProfile

    stream = instance
-    items_for_remote_cache = {}
-
-    if update_fields is None:
-        cache_delete(get_stream_cache_key(stream.name, stream.realm_id))
-    else:
-        items_for_remote_cache[get_stream_cache_key(stream.name, stream.realm_id)] = (stream,)
-        cache_set_many(items_for_remote_cache)

    if (
        update_fields is None
--- a/zerver/lib/cache_helpers.py
+++ b/zerver/lib/cache_helpers.py
@ -17,7 +17,6 @@ from zerver.lib.cache import (
    cache_set_many,
    get_remote_cache_requests,
    get_remote_cache_time,
-    get_stream_cache_key,
    user_profile_by_api_key_cache_key,
    user_profile_cache_key,
 )
@ -27,7 +26,6 @@ from zerver.lib.users import get_all_api_keys
 from zerver.models import (
    Client,
    Huddle,
-    Stream,
    UserProfile,
    get_client_cache_key,
    huddle_hash_cache_key,
@ -46,10 +44,6 @@ def user_cache_items(
    # core serving path for lots of requests.


-def stream_cache_items(items_for_remote_cache: Dict[str, Tuple[Stream]], stream: Stream) -> None:
-    items_for_remote_cache[get_stream_cache_key(stream.name, stream.realm_id)] = (stream,)
-
-
 def client_cache_items(items_for_remote_cache: Dict[str, Tuple[Client]], client: Client) -> None:
    items_for_remote_cache[get_client_cache_key(client.name)] = (client,)

@ -87,18 +81,6 @@ def get_active_realm_ids() -> ValuesQuerySet[RealmCount, int]:
    )


-def get_streams() -> QuerySet[Stream]:
-    return (
-        Stream.objects.select_related()
-        .filter(realm__in=get_active_realm_ids())
-        .exclude(
-            # We filter out Zephyr realms, because they can easily
-            # have 10,000s of streams with only 1 subscriber.
-            is_in_zephyr_realm=True
-        )
-    )
-
-
 def get_users() -> QuerySet[UserProfile]:
    return UserProfile.objects.select_related().filter(
        long_term_idle=False, realm__in=get_active_realm_ids()
@ -121,7 +103,6 @@ cache_fillers: Dict[
        3600 * 24 * 7,
        10000,
    ),
-    "stream": (get_streams, stream_cache_items, 3600 * 24 * 7, 10000),
    "huddle": (
        lambda: Huddle.objects.select_related().all(),
        huddle_cache_items,
--- a/zerver/models.py
+++ b/zerver/models.py
@ -72,7 +72,6 @@ from zerver.lib.cache import (
    flush_used_upload_space_cache,
    flush_user_profile,
    get_realm_used_upload_space_cache_key,
-    get_stream_cache_key,
    realm_alert_words_automaton_cache_key,
    realm_alert_words_cache_key,
    realm_user_dict_fields,
@ -2844,9 +2843,8 @@ def get_client_remote_cache(name: str) -> Client:
    return client


-@cache_with_key(get_stream_cache_key, timeout=3600 * 24 * 7)
 def get_realm_stream(stream_name: str, realm_id: int) -> Stream:
-    return Stream.objects.select_related().get(name__iexact=stream_name.strip(), realm_id=realm_id)
+    return Stream.objects.get(name__iexact=stream_name.strip(), realm_id=realm_id)


 def get_active_streams(realm: Realm) -> QuerySet[Stream]:
--- a/zerver/tests/test_home.py
+++ b/zerver/tests/test_home.py
@ -255,7 +255,7 @@ class HomeTest(ZulipTestCase):
            set(result["Cache-Control"].split(", ")), {"must-revalidate", "no-store", "no-cache"}
        )

-        self.assert_length(cache_mock.call_args_list, 5)
+        self.assert_length(cache_mock.call_args_list, 4)

        html = result.content.decode()

--- a/zerver/tests/test_message_send.py
+++ b/zerver/tests/test_message_send.py
@ -31,7 +31,6 @@ from zerver.actions.streams import do_change_stream_post_policy
 from zerver.actions.user_groups import add_subgroups_to_user_group, check_add_user_group
 from zerver.actions.users import do_change_can_forge_sender, do_deactivate_user
 from zerver.lib.addressee import Addressee
-from zerver.lib.cache import cache_delete, get_stream_cache_key
 from zerver.lib.exceptions import JsonableError
 from zerver.lib.message import MessageDict, get_raw_unread_data, get_recent_private_conversations
 from zerver.lib.streams import create_stream_if_needed
@ -1466,7 +1465,6 @@ class StreamMessagesTest(ZulipTestCase):
        stream_name = "Denmark"
        topic_name = "foo"
        content = "whatever"
-        realm = sender.realm

        # To get accurate count of the queries, we should make sure that
        # caches don't come into play. If we count queries while caches are
@ -1474,7 +1472,6 @@ class StreamMessagesTest(ZulipTestCase):
        # persistent, so our test can also fail if cache is invalidated
        # during the course of the unit test.
        flush_per_request_caches()
-        cache_delete(get_stream_cache_key(stream_name, realm.id))
        with self.assert_database_query_count(13):
            check_send_stream_message(
                sender=sender,