cache: Eliminate get-stream-by-name cache.

We remove the cache functionality for the
get_realm_stream function, and we also change it to
return a thin Stream object (instead of calling
select_related with no arguments).

The main goal here is to remove code complexity, as we
have been prone to at least one caching validation bug
related to how Realm and UserGroup interact. That
particular bug was more theoretical than practical in
terms of its impact, to be clear.

Even if we were to be perfectly disciplined about only
caching thin stream objects and always making sure to
delete cache entries when stream data changed, we would
still be prone to ugly situations like having
transactions get rolled back before we delete the cache
entry. The do_deactivate_stream is a perfect example of
where we have to consider the best time to unset the
cache. If you unset it too early, then you are prone to
races where somebody else churns the cache right before
you update the database. If you set it too late, then
you can have an invalid entry after a rollback or
deadlock situation. If you just eliminate the cache as
a moving part, that whole debate is moot.

As the lack of test changes here indicates, we rarely
fetch streams by name any more in critical sections of
our code.

The one place where we fetch by name is in loading the
home page, but that is **only** when you specify a
stream name. And, of course, that only causes about an
extra millisecond of time.
This commit is contained in:
Steve Howell 2023-07-09 20:24:32 +00:00 committed by Tim Abbott
parent 046e4c715b
commit 89381a8072
6 changed files with 2 additions and 50 deletions

View File

@ -17,11 +17,9 @@ from zerver.actions.default_streams import (
)
from zerver.actions.message_send import internal_send_stream_message
from zerver.lib.cache import (
cache_delete,
cache_delete_many,
cache_set,
display_recipient_cache_key,
get_stream_cache_key,
to_dict_cache_key_id,
)
from zerver.lib.email_mirror_helpers import encode_email_address
@ -118,10 +116,6 @@ def do_deactivate_stream(stream: Stream, *, acting_user: Optional[UserProfile])
for group in default_stream_groups_for_stream:
do_remove_streams_from_default_stream_group(stream.realm, group, [stream])
# Remove the old stream information from remote cache.
old_cache_key = get_stream_cache_key(old_name, stream.realm_id)
cache_delete(old_cache_key)
stream_dict = stream.to_dict()
stream_dict.update(dict(name=old_name, invite_only=was_invite_only))
event = dict(type="stream", op="delete", streams=[stream_dict])
@ -1102,13 +1096,6 @@ def do_rename_stream(stream: Stream, new_name: str, user_profile: UserProfile) -
recipient_id: int = stream.recipient_id
messages = Message.objects.filter(recipient_id=recipient_id).only("id")
# Update the display recipient and stream, which are easy single
# items to set.
old_cache_key = get_stream_cache_key(old_name, stream.realm_id)
new_cache_key = get_stream_cache_key(stream.name, stream.realm_id)
if old_cache_key != new_cache_key:
cache_delete(old_cache_key)
cache_set(new_cache_key, stream)
cache_set(display_recipient_cache_key(recipient_id), stream.name)
# Delete cache entries for everything else, which is cheaper and

View File

@ -531,10 +531,6 @@ def bot_dicts_in_realm_cache_key(realm_id: int) -> str:
return f"bot_dicts_in_realm:{realm_id}"
def get_stream_cache_key(stream_name: str, realm_id: int) -> str:
return f"stream_by_realm_and_name:{realm_id}:{make_safe_digest(stream_name.strip().lower())}"
def delete_user_profile_caches(user_profiles: Iterable["UserProfile"]) -> None:
# Imported here to avoid cyclic dependency.
from zerver.lib.users import get_all_api_keys
@ -672,13 +668,6 @@ def flush_stream(
from zerver.models import UserProfile
stream = instance
items_for_remote_cache = {}
if update_fields is None:
cache_delete(get_stream_cache_key(stream.name, stream.realm_id))
else:
items_for_remote_cache[get_stream_cache_key(stream.name, stream.realm_id)] = (stream,)
cache_set_many(items_for_remote_cache)
if (
update_fields is None

View File

@ -17,7 +17,6 @@ from zerver.lib.cache import (
cache_set_many,
get_remote_cache_requests,
get_remote_cache_time,
get_stream_cache_key,
user_profile_by_api_key_cache_key,
user_profile_cache_key,
)
@ -27,7 +26,6 @@ from zerver.lib.users import get_all_api_keys
from zerver.models import (
Client,
Huddle,
Stream,
UserProfile,
get_client_cache_key,
huddle_hash_cache_key,
@ -46,10 +44,6 @@ def user_cache_items(
# core serving path for lots of requests.
def stream_cache_items(items_for_remote_cache: Dict[str, Tuple[Stream]], stream: Stream) -> None:
items_for_remote_cache[get_stream_cache_key(stream.name, stream.realm_id)] = (stream,)
def client_cache_items(items_for_remote_cache: Dict[str, Tuple[Client]], client: Client) -> None:
items_for_remote_cache[get_client_cache_key(client.name)] = (client,)
@ -87,18 +81,6 @@ def get_active_realm_ids() -> ValuesQuerySet[RealmCount, int]:
)
def get_streams() -> QuerySet[Stream]:
return (
Stream.objects.select_related()
.filter(realm__in=get_active_realm_ids())
.exclude(
# We filter out Zephyr realms, because they can easily
# have 10,000s of streams with only 1 subscriber.
is_in_zephyr_realm=True
)
)
def get_users() -> QuerySet[UserProfile]:
return UserProfile.objects.select_related().filter(
long_term_idle=False, realm__in=get_active_realm_ids()
@ -121,7 +103,6 @@ cache_fillers: Dict[
3600 * 24 * 7,
10000,
),
"stream": (get_streams, stream_cache_items, 3600 * 24 * 7, 10000),
"huddle": (
lambda: Huddle.objects.select_related().all(),
huddle_cache_items,

View File

@ -72,7 +72,6 @@ from zerver.lib.cache import (
flush_used_upload_space_cache,
flush_user_profile,
get_realm_used_upload_space_cache_key,
get_stream_cache_key,
realm_alert_words_automaton_cache_key,
realm_alert_words_cache_key,
realm_user_dict_fields,
@ -2844,9 +2843,8 @@ def get_client_remote_cache(name: str) -> Client:
return client
@cache_with_key(get_stream_cache_key, timeout=3600 * 24 * 7)
def get_realm_stream(stream_name: str, realm_id: int) -> Stream:
return Stream.objects.select_related().get(name__iexact=stream_name.strip(), realm_id=realm_id)
return Stream.objects.get(name__iexact=stream_name.strip(), realm_id=realm_id)
def get_active_streams(realm: Realm) -> QuerySet[Stream]:

View File

@ -255,7 +255,7 @@ class HomeTest(ZulipTestCase):
set(result["Cache-Control"].split(", ")), {"must-revalidate", "no-store", "no-cache"}
)
self.assert_length(cache_mock.call_args_list, 5)
self.assert_length(cache_mock.call_args_list, 4)
html = result.content.decode()

View File

@ -31,7 +31,6 @@ from zerver.actions.streams import do_change_stream_post_policy
from zerver.actions.user_groups import add_subgroups_to_user_group, check_add_user_group
from zerver.actions.users import do_change_can_forge_sender, do_deactivate_user
from zerver.lib.addressee import Addressee
from zerver.lib.cache import cache_delete, get_stream_cache_key
from zerver.lib.exceptions import JsonableError
from zerver.lib.message import MessageDict, get_raw_unread_data, get_recent_private_conversations
from zerver.lib.streams import create_stream_if_needed
@ -1466,7 +1465,6 @@ class StreamMessagesTest(ZulipTestCase):
stream_name = "Denmark"
topic_name = "foo"
content = "whatever"
realm = sender.realm
# To get accurate count of the queries, we should make sure that
# caches don't come into play. If we count queries while caches are
@ -1474,7 +1472,6 @@ class StreamMessagesTest(ZulipTestCase):
# persistent, so our test can also fail if cache is invalidated
# during the course of the unit test.
flush_per_request_caches()
cache_delete(get_stream_cache_key(stream_name, realm.id))
with self.assert_database_query_count(13):
check_send_stream_message(
sender=sender,