mirror of https://github.com/zulip/zulip.git
cache: Use DB for all bulk get-stream-by-name queries.
This changes bulk_get_streams so that it just uses the database all the time. Also, we avoid calling select_related(), so that we just get back thin and tidy Stream objects with simple queries. About not caching any more: It's actually pretty rare that we fetch streams by name in the main application. It's usually API requests that send in stream names to find more info about streams. It also turns out that for large queries (>= ~30 rows for my measurements) it's more efficent to hit the database than memcached. The database is super fast at scale; it's just the startup cost of having Django construct the query, and then having the database do query planning or whatever, that slows us down. I don't know the exact bottleneck, but you can clearly measure that one-row queries are slow (on the order of a full millisecond or so) but the marginal cost of additional rows is minimal assuming you have a decent index (20 microseconds per row on my droplet). All the query-count changes in the tests revolve around unsubscribing somebody from a stream, and that's a particularly odd use case for bulk_get_streams, since you generally unsubscribe from a single stream at a time. If there are some use cases where you do want to unsubscribe from multiple streams, we should move toward passing in stream ids, at least from the application. And even if we don't do that, our cost for most queries is a couple milliseconds.
This commit is contained in:
parent
adb548c7a2
commit
046e4c715b
|
@ -7,7 +7,6 @@ from datetime import timedelta
|
|||
from email.headerregistry import Address
|
||||
from typing import (
|
||||
TYPE_CHECKING,
|
||||
AbstractSet,
|
||||
Any,
|
||||
Callable,
|
||||
Dict,
|
||||
|
@ -15,7 +14,6 @@ from typing import (
|
|||
List,
|
||||
Optional,
|
||||
Pattern,
|
||||
Sequence,
|
||||
Set,
|
||||
Tuple,
|
||||
TypedDict,
|
||||
|
@ -63,7 +61,6 @@ from zerver.lib.cache import (
|
|||
bot_dict_fields,
|
||||
bot_dicts_in_realm_cache_key,
|
||||
bot_profile_cache_key,
|
||||
bulk_cached_fetch,
|
||||
cache_delete,
|
||||
cache_set,
|
||||
cache_with_key,
|
||||
|
@ -120,8 +117,6 @@ MAX_LANGUAGE_ID_LENGTH: int = 50
|
|||
|
||||
SECONDS_PER_DAY = 86400
|
||||
|
||||
STREAM_NAMES = TypeVar("STREAM_NAMES", Sequence[str], AbstractSet[str])
|
||||
|
||||
if TYPE_CHECKING:
|
||||
# We use ModelBackend only for typing. Importing it otherwise causes circular dependency.
|
||||
from django.contrib.auth.backends import ModelBackend
|
||||
|
@ -2884,8 +2879,8 @@ def get_stream_by_id_in_realm(stream_id: int, realm: Realm) -> Stream:
|
|||
return Stream.objects.select_related().get(id=stream_id, realm=realm)
|
||||
|
||||
|
||||
def bulk_get_streams(realm: Realm, stream_names: STREAM_NAMES) -> Dict[str, Any]:
|
||||
def fetch_streams_by_name(stream_names: List[str]) -> QuerySet[Stream]:
|
||||
def bulk_get_streams(realm: Realm, stream_names: Set[str]) -> Dict[str, Any]:
|
||||
def fetch_streams_by_name(stream_names: Set[str]) -> QuerySet[Stream]:
|
||||
#
|
||||
# This should be just
|
||||
#
|
||||
|
@ -2897,24 +2892,12 @@ def bulk_get_streams(realm: Realm, stream_names: STREAM_NAMES) -> Dict[str, Any]
|
|||
where_clause = (
|
||||
"upper(zerver_stream.name::text) IN (SELECT upper(name) FROM unnest(%s) AS name)"
|
||||
)
|
||||
return (
|
||||
get_active_streams(realm)
|
||||
.select_related()
|
||||
.extra(where=[where_clause], params=(list(stream_names),))
|
||||
)
|
||||
return get_active_streams(realm).extra(where=[where_clause], params=(list(stream_names),))
|
||||
|
||||
def stream_name_to_cache_key(stream_name: str) -> str:
|
||||
return get_stream_cache_key(stream_name, realm.id)
|
||||
|
||||
def stream_to_lower_name(stream: Stream) -> str:
|
||||
return stream.name.lower()
|
||||
|
||||
return bulk_cached_fetch(
|
||||
stream_name_to_cache_key,
|
||||
fetch_streams_by_name,
|
||||
[stream_name.lower() for stream_name in stream_names],
|
||||
id_fetcher=stream_to_lower_name,
|
||||
)
|
||||
if not stream_names:
|
||||
return {}
|
||||
streams = list(fetch_streams_by_name(stream_names))
|
||||
return {stream.name.lower(): stream for stream in streams}
|
||||
|
||||
|
||||
def get_huddle_recipient(user_profile_ids: Set[int]) -> Recipient:
|
||||
|
|
|
@ -2418,7 +2418,7 @@ class StreamAdminTest(ZulipTestCase):
|
|||
If you're not an admin, you can't remove other people from streams except your own bots.
|
||||
"""
|
||||
result = self.attempt_unsubscribe_of_principal(
|
||||
query_count=7,
|
||||
query_count=8,
|
||||
target_users=[self.example_user("cordelia")],
|
||||
is_realm_admin=False,
|
||||
is_subbed=True,
|
||||
|
@ -2433,7 +2433,7 @@ class StreamAdminTest(ZulipTestCase):
|
|||
those you aren't on.
|
||||
"""
|
||||
result = self.attempt_unsubscribe_of_principal(
|
||||
query_count=15,
|
||||
query_count=16,
|
||||
target_users=[self.example_user("cordelia")],
|
||||
is_realm_admin=True,
|
||||
is_subbed=True,
|
||||
|
@ -2460,8 +2460,8 @@ class StreamAdminTest(ZulipTestCase):
|
|||
for name in ["cordelia", "prospero", "iago", "hamlet", "outgoing_webhook_bot"]
|
||||
]
|
||||
result = self.attempt_unsubscribe_of_principal(
|
||||
query_count=27,
|
||||
cache_count=9,
|
||||
query_count=28,
|
||||
cache_count=8,
|
||||
target_users=target_users,
|
||||
is_realm_admin=True,
|
||||
is_subbed=True,
|
||||
|
@ -2478,7 +2478,7 @@ class StreamAdminTest(ZulipTestCase):
|
|||
are on.
|
||||
"""
|
||||
result = self.attempt_unsubscribe_of_principal(
|
||||
query_count=16,
|
||||
query_count=17,
|
||||
target_users=[self.example_user("cordelia")],
|
||||
is_realm_admin=True,
|
||||
is_subbed=True,
|
||||
|
@ -2495,7 +2495,7 @@ class StreamAdminTest(ZulipTestCase):
|
|||
streams you aren't on.
|
||||
"""
|
||||
result = self.attempt_unsubscribe_of_principal(
|
||||
query_count=16,
|
||||
query_count=17,
|
||||
target_users=[self.example_user("cordelia")],
|
||||
is_realm_admin=True,
|
||||
is_subbed=False,
|
||||
|
@ -2509,7 +2509,7 @@ class StreamAdminTest(ZulipTestCase):
|
|||
|
||||
def test_cant_remove_others_from_stream_legacy_emails(self) -> None:
|
||||
result = self.attempt_unsubscribe_of_principal(
|
||||
query_count=7,
|
||||
query_count=8,
|
||||
is_realm_admin=False,
|
||||
is_subbed=True,
|
||||
invite_only=False,
|
||||
|
@ -2521,7 +2521,7 @@ class StreamAdminTest(ZulipTestCase):
|
|||
|
||||
def test_admin_remove_others_from_stream_legacy_emails(self) -> None:
|
||||
result = self.attempt_unsubscribe_of_principal(
|
||||
query_count=15,
|
||||
query_count=16,
|
||||
target_users=[self.example_user("cordelia")],
|
||||
is_realm_admin=True,
|
||||
is_subbed=True,
|
||||
|
@ -2535,7 +2535,7 @@ class StreamAdminTest(ZulipTestCase):
|
|||
|
||||
def test_admin_remove_multiple_users_from_stream_legacy_emails(self) -> None:
|
||||
result = self.attempt_unsubscribe_of_principal(
|
||||
query_count=18,
|
||||
query_count=19,
|
||||
target_users=[self.example_user("cordelia"), self.example_user("prospero")],
|
||||
is_realm_admin=True,
|
||||
is_subbed=True,
|
||||
|
@ -2553,7 +2553,7 @@ class StreamAdminTest(ZulipTestCase):
|
|||
fails gracefully.
|
||||
"""
|
||||
result = self.attempt_unsubscribe_of_principal(
|
||||
query_count=10,
|
||||
query_count=11,
|
||||
target_users=[self.example_user("cordelia")],
|
||||
is_realm_admin=True,
|
||||
is_subbed=False,
|
||||
|
@ -2583,7 +2583,7 @@ class StreamAdminTest(ZulipTestCase):
|
|||
webhook_bot = self.example_user("webhook_bot")
|
||||
do_change_bot_owner(webhook_bot, bot_owner=other_user, acting_user=other_user)
|
||||
result = self.attempt_unsubscribe_of_principal(
|
||||
query_count=8,
|
||||
query_count=9,
|
||||
target_users=[webhook_bot],
|
||||
is_realm_admin=False,
|
||||
is_subbed=True,
|
||||
|
@ -4873,7 +4873,7 @@ class SubscriptionAPITest(ZulipTestCase):
|
|||
|
||||
# The only known O(N) behavior here is that we call
|
||||
# principal_to_user_profile for each of our users.
|
||||
self.assert_length(cache_tries, 4)
|
||||
self.assert_length(cache_tries, 3)
|
||||
|
||||
def test_subscriptions_add_for_principal(self) -> None:
|
||||
"""
|
||||
|
|
Loading…
Reference in New Issue