cache: Use DB for all bulk get-stream-by-name queries.

This changes bulk_get_streams so that it just uses the
database all the time.  Also, we avoid calling
select_related(), so that we just get back thin and
tidy Stream objects with simple queries.

About not caching any more:

It's actually pretty rare that we fetch streams by name
in the main application. It's usually API requests that
send in stream names to find more info about streams.

It also turns out that for large queries (>= ~30 rows
for my measurements) it's more efficent to hit the
database than memcached. The database is super fast at
scale; it's just the startup cost of having Django
construct the query, and then having the database do
query planning or whatever, that slows us down. I don't
know the exact bottleneck, but you can clearly measure
that one-row queries are slow (on the order of a full
millisecond or so) but the marginal cost of additional
rows is minimal assuming you have a decent index (20
microseconds per row on my droplet).

All the query-count changes in the tests revolve around
unsubscribing somebody from a stream, and that's a
particularly odd use case for bulk_get_streams, since
you generally unsubscribe from a single stream at a
time. If there are some use cases where you do want to
unsubscribe from multiple streams, we should move
toward passing in stream ids, at least from the
application. And even if we don't do that, our cost for
most queries is a couple milliseconds.
This commit is contained in:
Steve Howell 2023-07-11 12:24:06 +00:00 committed by Tim Abbott
parent adb548c7a2
commit 046e4c715b
2 changed files with 19 additions and 36 deletions

View File

@ -7,7 +7,6 @@ from datetime import timedelta
from email.headerregistry import Address
from typing import (
TYPE_CHECKING,
AbstractSet,
Any,
Callable,
Dict,
@ -15,7 +14,6 @@ from typing import (
List,
Optional,
Pattern,
Sequence,
Set,
Tuple,
TypedDict,
@ -63,7 +61,6 @@ from zerver.lib.cache import (
bot_dict_fields,
bot_dicts_in_realm_cache_key,
bot_profile_cache_key,
bulk_cached_fetch,
cache_delete,
cache_set,
cache_with_key,
@ -120,8 +117,6 @@ MAX_LANGUAGE_ID_LENGTH: int = 50
SECONDS_PER_DAY = 86400
STREAM_NAMES = TypeVar("STREAM_NAMES", Sequence[str], AbstractSet[str])
if TYPE_CHECKING:
# We use ModelBackend only for typing. Importing it otherwise causes circular dependency.
from django.contrib.auth.backends import ModelBackend
@ -2884,8 +2879,8 @@ def get_stream_by_id_in_realm(stream_id: int, realm: Realm) -> Stream:
return Stream.objects.select_related().get(id=stream_id, realm=realm)
def bulk_get_streams(realm: Realm, stream_names: STREAM_NAMES) -> Dict[str, Any]:
def fetch_streams_by_name(stream_names: List[str]) -> QuerySet[Stream]:
def bulk_get_streams(realm: Realm, stream_names: Set[str]) -> Dict[str, Any]:
def fetch_streams_by_name(stream_names: Set[str]) -> QuerySet[Stream]:
#
# This should be just
#
@ -2897,24 +2892,12 @@ def bulk_get_streams(realm: Realm, stream_names: STREAM_NAMES) -> Dict[str, Any]
where_clause = (
"upper(zerver_stream.name::text) IN (SELECT upper(name) FROM unnest(%s) AS name)"
)
return (
get_active_streams(realm)
.select_related()
.extra(where=[where_clause], params=(list(stream_names),))
)
return get_active_streams(realm).extra(where=[where_clause], params=(list(stream_names),))
def stream_name_to_cache_key(stream_name: str) -> str:
return get_stream_cache_key(stream_name, realm.id)
def stream_to_lower_name(stream: Stream) -> str:
return stream.name.lower()
return bulk_cached_fetch(
stream_name_to_cache_key,
fetch_streams_by_name,
[stream_name.lower() for stream_name in stream_names],
id_fetcher=stream_to_lower_name,
)
if not stream_names:
return {}
streams = list(fetch_streams_by_name(stream_names))
return {stream.name.lower(): stream for stream in streams}
def get_huddle_recipient(user_profile_ids: Set[int]) -> Recipient:

View File

@ -2418,7 +2418,7 @@ class StreamAdminTest(ZulipTestCase):
If you're not an admin, you can't remove other people from streams except your own bots.
"""
result = self.attempt_unsubscribe_of_principal(
query_count=7,
query_count=8,
target_users=[self.example_user("cordelia")],
is_realm_admin=False,
is_subbed=True,
@ -2433,7 +2433,7 @@ class StreamAdminTest(ZulipTestCase):
those you aren't on.
"""
result = self.attempt_unsubscribe_of_principal(
query_count=15,
query_count=16,
target_users=[self.example_user("cordelia")],
is_realm_admin=True,
is_subbed=True,
@ -2460,8 +2460,8 @@ class StreamAdminTest(ZulipTestCase):
for name in ["cordelia", "prospero", "iago", "hamlet", "outgoing_webhook_bot"]
]
result = self.attempt_unsubscribe_of_principal(
query_count=27,
cache_count=9,
query_count=28,
cache_count=8,
target_users=target_users,
is_realm_admin=True,
is_subbed=True,
@ -2478,7 +2478,7 @@ class StreamAdminTest(ZulipTestCase):
are on.
"""
result = self.attempt_unsubscribe_of_principal(
query_count=16,
query_count=17,
target_users=[self.example_user("cordelia")],
is_realm_admin=True,
is_subbed=True,
@ -2495,7 +2495,7 @@ class StreamAdminTest(ZulipTestCase):
streams you aren't on.
"""
result = self.attempt_unsubscribe_of_principal(
query_count=16,
query_count=17,
target_users=[self.example_user("cordelia")],
is_realm_admin=True,
is_subbed=False,
@ -2509,7 +2509,7 @@ class StreamAdminTest(ZulipTestCase):
def test_cant_remove_others_from_stream_legacy_emails(self) -> None:
result = self.attempt_unsubscribe_of_principal(
query_count=7,
query_count=8,
is_realm_admin=False,
is_subbed=True,
invite_only=False,
@ -2521,7 +2521,7 @@ class StreamAdminTest(ZulipTestCase):
def test_admin_remove_others_from_stream_legacy_emails(self) -> None:
result = self.attempt_unsubscribe_of_principal(
query_count=15,
query_count=16,
target_users=[self.example_user("cordelia")],
is_realm_admin=True,
is_subbed=True,
@ -2535,7 +2535,7 @@ class StreamAdminTest(ZulipTestCase):
def test_admin_remove_multiple_users_from_stream_legacy_emails(self) -> None:
result = self.attempt_unsubscribe_of_principal(
query_count=18,
query_count=19,
target_users=[self.example_user("cordelia"), self.example_user("prospero")],
is_realm_admin=True,
is_subbed=True,
@ -2553,7 +2553,7 @@ class StreamAdminTest(ZulipTestCase):
fails gracefully.
"""
result = self.attempt_unsubscribe_of_principal(
query_count=10,
query_count=11,
target_users=[self.example_user("cordelia")],
is_realm_admin=True,
is_subbed=False,
@ -2583,7 +2583,7 @@ class StreamAdminTest(ZulipTestCase):
webhook_bot = self.example_user("webhook_bot")
do_change_bot_owner(webhook_bot, bot_owner=other_user, acting_user=other_user)
result = self.attempt_unsubscribe_of_principal(
query_count=8,
query_count=9,
target_users=[webhook_bot],
is_realm_admin=False,
is_subbed=True,
@ -4873,7 +4873,7 @@ class SubscriptionAPITest(ZulipTestCase):
# The only known O(N) behavior here is that we call
# principal_to_user_profile for each of our users.
self.assert_length(cache_tries, 4)
self.assert_length(cache_tries, 3)
def test_subscriptions_add_for_principal(self) -> None:
"""