per-request caches: Add per_request_cache library.

We have historically cached two types of values
on a per-request basis inside of memory:

    * linkifiers
    * display recipients

Both of these caches were hand-written, and they
both actually cache values that are also in memcached,
so the per-request cache essentially only saves us
from a few memcached hits.

I think the linkifier per-request cache is a necessary
evil. It's an important part of message rendering, and
it's not super easy to structure the code to just get
a single value up front and pass it down the stack.

I'm not so sure we even need the display recipient
per-request cache any more, as we are generally pretty
smart now about hydrating recipient data in terms of
how the code is organized. But I haven't done thorough
research on that hypotheseis.

Fortunately, it's not rocket science to just write
a glorified memoize decorator and tie it into key
places in the code:

    * middleware
    * tests (e.g. asserting db counts)
    * queue processors

That's what I did in this commit.

This commit definitely reduces the amount of code
to maintain. I think it also gets us closer to
possibly phasing out this whole technique, but that
effort is beyond the scope of this PR. We could
add some instrumentation to the decorator to see
how often we get a non-trivial number of saved
round trips to memcached.

Note that when we flush linkifiers, we just use
a big hammer and flush the entire per-request
cache for linkifiers, since there is only ever
one realm in the cache.
This commit is contained in:
Steve Howell 2023-07-14 17:46:50 +00:00 committed by Tim Abbott
parent 751b8b5bb5
commit 51db22c86c
11 changed files with 55 additions and 52 deletions

View File

@ -240,10 +240,10 @@ We generally try to avoid in-process backend caching in Zulip's Django
codebase, because every Zulip production installation involves
multiple servers. We do have a few, however:
- `per_request_display_recipient_cache`: A cache flushed at the start
of every request; this simplifies correctly implementing our goal of
not repeatedly fetching the "display recipient" (e.g. stream name)
for each message in the `GET /messages` codebase.
- `@return_same_value_during_entire_request`: We use this decorator to
cache values in memory during the lifetime of a request. We use this
for linkifiers and display recipients. The middleware knows how to
flush the relevant in-memory caches at the start of a request.
- Caches of various data, like the `SourceMap` object, that are
expensive to construct, not needed for most requests, and don't
change once a Zulip server has been deployed in production.

View File

@ -0,0 +1,32 @@
from typing import Any, Callable, Dict, TypeVar
ReturnT = TypeVar("ReturnT")
FUNCTION_NAME_TO_PER_REQUEST_RESULT: Dict[str, Dict[int, Any]] = {}
def return_same_value_during_entire_request(f: Callable[..., ReturnT]) -> Callable[..., ReturnT]:
cache_key = f.__name__
assert cache_key not in FUNCTION_NAME_TO_PER_REQUEST_RESULT
FUNCTION_NAME_TO_PER_REQUEST_RESULT[cache_key] = {}
def wrapper(key: int, *args: Any) -> ReturnT:
if key in FUNCTION_NAME_TO_PER_REQUEST_RESULT[cache_key]:
return FUNCTION_NAME_TO_PER_REQUEST_RESULT[cache_key][key]
result = f(key, *args)
FUNCTION_NAME_TO_PER_REQUEST_RESULT[cache_key][key] = result
return result
return wrapper
def flush_per_request_cache(cache_key: str) -> None:
if cache_key in FUNCTION_NAME_TO_PER_REQUEST_RESULT:
FUNCTION_NAME_TO_PER_REQUEST_RESULT[cache_key] = {}
def flush_per_request_caches() -> None:
for cache_key in FUNCTION_NAME_TO_PER_REQUEST_RESULT:
FUNCTION_NAME_TO_PER_REQUEST_RESULT[cache_key] = {}

View File

@ -55,6 +55,7 @@ from zerver.lib.cache import bounce_key_prefix_for_testing
from zerver.lib.initial_password import initial_password
from zerver.lib.message import access_message
from zerver.lib.notification_data import UserMessageNotificationsData
from zerver.lib.per_request_cache import flush_per_request_caches
from zerver.lib.rate_limiter import bounce_redis_key_prefix_for_testing
from zerver.lib.sessions import get_session_dict_user
from zerver.lib.soft_deactivation import do_soft_deactivate_users
@ -99,7 +100,6 @@ from zerver.models import (
UserProfile,
UserStatus,
clear_supported_auth_backends_cache,
flush_per_request_caches,
get_realm,
get_realm_stream,
get_stream,

View File

@ -46,6 +46,7 @@ from zerver.lib.avatar import avatar_url
from zerver.lib.cache import get_cache_backend
from zerver.lib.db import Params, ParamsT, Query, TimeTrackingCursor
from zerver.lib.integrations import WEBHOOK_INTEGRATIONS
from zerver.lib.per_request_cache import flush_per_request_caches
from zerver.lib.rate_limiter import RateLimitedIPAddr, rules
from zerver.lib.request import RequestNotes
from zerver.lib.upload.s3 import S3UploadBackend
@ -56,7 +57,6 @@ from zerver.models import (
Subscription,
UserMessage,
UserProfile,
flush_per_request_caches,
get_client,
get_realm,
get_stream,

View File

@ -32,6 +32,7 @@ from zerver.lib.debug import maybe_tracemalloc_listen
from zerver.lib.exceptions import ErrorCode, JsonableError, MissingAuthenticationError
from zerver.lib.html_to_text import get_content_description
from zerver.lib.markdown import get_markdown_requests, get_markdown_time
from zerver.lib.per_request_cache import flush_per_request_caches
from zerver.lib.rate_limiter import RateLimitResult
from zerver.lib.request import REQ, RequestNotes, has_request_variables
from zerver.lib.response import (
@ -42,7 +43,7 @@ from zerver.lib.response import (
)
from zerver.lib.subdomains import get_subdomain
from zerver.lib.user_agent import parse_user_agent
from zerver.models import Realm, flush_per_request_caches, get_realm
from zerver.models import Realm, get_realm
ParamT = ParamSpec("ParamT")
logger = logging.getLogger("zulip.requests")

View File

@ -4,7 +4,6 @@ import hashlib
import secrets
import time
from collections import defaultdict
from contextlib import suppress
from datetime import timedelta
from email.headerregistry import Address
from typing import (
@ -83,6 +82,10 @@ from zerver.lib.cache import (
user_profile_cache_key,
)
from zerver.lib.exceptions import JsonableError, RateLimitedError
from zerver.lib.per_request_cache import (
flush_per_request_cache,
return_same_value_during_entire_request,
)
from zerver.lib.pysa import mark_sanitized
from zerver.lib.timestamp import datetime_to_timestamp
from zerver.lib.types import (
@ -183,17 +186,7 @@ def query_for_ids(
return query
# Doing 1000 remote cache requests to get_display_recipient is quite slow,
# so add a local cache as well as the remote cache.
#
# This local cache has a lifetime of just a single request; it is
# cleared inside `flush_per_request_caches` in our middleware. It
# could be replaced with smarter bulk-fetching logic that deduplicates
# queries for the same recipient; this is just a convenient way to
# write that code.
per_request_display_recipient_cache: Dict[int, List[UserDisplayRecipient]] = {}
@return_same_value_during_entire_request
def get_display_recipient_by_id(
recipient_id: int, recipient_type: int, recipient_type_id: Optional[int]
) -> List[UserDisplayRecipient]:
@ -205,10 +198,7 @@ def get_display_recipient_by_id(
# Have to import here, to avoid circular dependency.
from zerver.lib.display_recipient import get_display_recipient_remote_cache
if recipient_id not in per_request_display_recipient_cache:
result = get_display_recipient_remote_cache(recipient_id, recipient_type, recipient_type_id)
per_request_display_recipient_cache[recipient_id] = result
return per_request_display_recipient_cache[recipient_id]
return get_display_recipient_remote_cache(recipient_id, recipient_type, recipient_type_id)
def get_display_recipient(recipient: "Recipient") -> List[UserDisplayRecipient]:
@ -1310,22 +1300,9 @@ def get_linkifiers_cache_key(realm_id: int) -> str:
return f"{cache.KEY_PREFIX}:all_linkifiers_for_realm:{realm_id}"
# We have a per-process cache to avoid doing 1000 remote cache queries during page load
per_request_linkifiers_cache: Dict[int, List[LinkifierDict]] = {}
def realm_in_local_linkifiers_cache(realm_id: int) -> bool:
return realm_id in per_request_linkifiers_cache
def linkifiers_for_realm(realm_id: int) -> List[LinkifierDict]:
if not realm_in_local_linkifiers_cache(realm_id):
per_request_linkifiers_cache[realm_id] = linkifiers_for_realm_remote_cache(realm_id)
return per_request_linkifiers_cache[realm_id]
@return_same_value_during_entire_request
@cache_with_key(get_linkifiers_cache_key, timeout=3600 * 24 * 7)
def linkifiers_for_realm_remote_cache(realm_id: int) -> List[LinkifierDict]:
def linkifiers_for_realm(realm_id: int) -> List[LinkifierDict]:
return [
LinkifierDict(
pattern=linkifier.pattern,
@ -1339,21 +1316,13 @@ def linkifiers_for_realm_remote_cache(realm_id: int) -> List[LinkifierDict]:
def flush_linkifiers(*, instance: RealmFilter, **kwargs: object) -> None:
realm_id = instance.realm_id
cache_delete(get_linkifiers_cache_key(realm_id))
with suppress(KeyError):
per_request_linkifiers_cache.pop(realm_id)
flush_per_request_cache("linkifiers_for_realm")
post_save.connect(flush_linkifiers, sender=RealmFilter)
post_delete.connect(flush_linkifiers, sender=RealmFilter)
def flush_per_request_caches() -> None:
global per_request_display_recipient_cache
per_request_display_recipient_cache = {}
global per_request_linkifiers_cache
per_request_linkifiers_cache = {}
class RealmPlayground(models.Model):
"""Server side storage model to store playground information needed by our
'view code in playground' feature in code blocks.

View File

@ -54,6 +54,7 @@ from zerver.lib.mention import (
topic_wildcards,
)
from zerver.lib.message import render_markdown
from zerver.lib.per_request_cache import flush_per_request_caches
from zerver.lib.test_classes import ZulipTestCase
from zerver.lib.tex import render_tex
from zerver.models import (
@ -63,7 +64,6 @@ from zerver.models import (
UserGroup,
UserMessage,
UserProfile,
flush_per_request_caches,
get_client,
get_realm,
get_stream,

View File

@ -6,6 +6,7 @@ from django.utils.timezone import now as timezone_now
from zerver.lib.cache import cache_delete, to_dict_cache_key_id
from zerver.lib.markdown import version as markdown_version
from zerver.lib.message import MessageDict, messages_for_ids, sew_messages_and_reactions
from zerver.lib.per_request_cache import flush_per_request_caches
from zerver.lib.test_classes import ZulipTestCase
from zerver.lib.test_helpers import make_client
from zerver.lib.topic import TOPIC_LINKS
@ -18,7 +19,6 @@ from zerver.models import (
Recipient,
Stream,
UserProfile,
flush_per_request_caches,
get_display_recipient,
get_realm,
get_stream,

View File

@ -33,6 +33,7 @@ from zerver.actions.users import do_change_can_forge_sender, do_deactivate_user
from zerver.lib.addressee import Addressee
from zerver.lib.exceptions import JsonableError
from zerver.lib.message import MessageDict, get_raw_unread_data, get_recent_private_conversations
from zerver.lib.per_request_cache import flush_per_request_caches
from zerver.lib.streams import create_stream_if_needed
from zerver.lib.test_classes import ZulipTestCase
from zerver.lib.test_helpers import (
@ -55,7 +56,6 @@ from zerver.models import (
UserGroup,
UserMessage,
UserProfile,
flush_per_request_caches,
get_or_create_huddle,
get_realm,
get_stream,

View File

@ -5,8 +5,9 @@ from django.views.decorators.csrf import csrf_exempt
from zerver.decorator import require_post
from zerver.lib.cache import get_cache_backend
from zerver.lib.per_request_cache import flush_per_request_caches
from zerver.lib.response import json_success
from zerver.models import clear_client_cache, flush_per_request_caches
from zerver.models import clear_client_cache
ZULIP_PATH = os.path.join(os.path.dirname(os.path.abspath(__file__)), "../../../")

View File

@ -68,6 +68,7 @@ from zerver.lib.email_notifications import MissedMessageData, handle_missedmessa
from zerver.lib.exceptions import RateLimitedError
from zerver.lib.export import export_realm_wrapper
from zerver.lib.outgoing_webhook import do_rest_call, get_outgoing_webhook_service_handler
from zerver.lib.per_request_cache import flush_per_request_caches
from zerver.lib.push_notifications import (
clear_push_device_tokens,
handle_push_notification,
@ -99,7 +100,6 @@ from zerver.models import (
UserMessage,
UserProfile,
filter_to_valid_prereg_users,
flush_per_request_caches,
get_bot_services,
get_client,
get_system_bot,