From 51db22c86cb0e2b14ba856254871173c793206c0 Mon Sep 17 00:00:00 2001 From: Steve Howell Date: Fri, 14 Jul 2023 17:46:50 +0000 Subject: [PATCH] per-request caches: Add per_request_cache library. We have historically cached two types of values on a per-request basis inside of memory: * linkifiers * display recipients Both of these caches were hand-written, and they both actually cache values that are also in memcached, so the per-request cache essentially only saves us from a few memcached hits. I think the linkifier per-request cache is a necessary evil. It's an important part of message rendering, and it's not super easy to structure the code to just get a single value up front and pass it down the stack. I'm not so sure we even need the display recipient per-request cache any more, as we are generally pretty smart now about hydrating recipient data in terms of how the code is organized. But I haven't done thorough research on that hypotheseis. Fortunately, it's not rocket science to just write a glorified memoize decorator and tie it into key places in the code: * middleware * tests (e.g. asserting db counts) * queue processors That's what I did in this commit. This commit definitely reduces the amount of code to maintain. I think it also gets us closer to possibly phasing out this whole technique, but that effort is beyond the scope of this PR. We could add some instrumentation to the decorator to see how often we get a non-trivial number of saved round trips to memcached. Note that when we flush linkifiers, we just use a big hammer and flush the entire per-request cache for linkifiers, since there is only ever one realm in the cache. --- docs/subsystems/caching.md | 8 ++--- zerver/lib/per_request_cache.py | 32 ++++++++++++++++++++ zerver/lib/test_classes.py | 2 +- zerver/lib/test_helpers.py | 2 +- zerver/middleware.py | 3 +- zerver/models.py | 49 ++++++------------------------- zerver/tests/test_markdown.py | 2 +- zerver/tests/test_message_dict.py | 2 +- zerver/tests/test_message_send.py | 2 +- zerver/views/development/cache.py | 3 +- zerver/worker/queue_processors.py | 2 +- 11 files changed, 55 insertions(+), 52 deletions(-) create mode 100644 zerver/lib/per_request_cache.py diff --git a/docs/subsystems/caching.md b/docs/subsystems/caching.md index 5ac31db49a..d1742d9c40 100644 --- a/docs/subsystems/caching.md +++ b/docs/subsystems/caching.md @@ -240,10 +240,10 @@ We generally try to avoid in-process backend caching in Zulip's Django codebase, because every Zulip production installation involves multiple servers. We do have a few, however: -- `per_request_display_recipient_cache`: A cache flushed at the start - of every request; this simplifies correctly implementing our goal of - not repeatedly fetching the "display recipient" (e.g. stream name) - for each message in the `GET /messages` codebase. +- `@return_same_value_during_entire_request`: We use this decorator to + cache values in memory during the lifetime of a request. We use this + for linkifiers and display recipients. The middleware knows how to + flush the relevant in-memory caches at the start of a request. - Caches of various data, like the `SourceMap` object, that are expensive to construct, not needed for most requests, and don't change once a Zulip server has been deployed in production. diff --git a/zerver/lib/per_request_cache.py b/zerver/lib/per_request_cache.py new file mode 100644 index 0000000000..e010c18695 --- /dev/null +++ b/zerver/lib/per_request_cache.py @@ -0,0 +1,32 @@ +from typing import Any, Callable, Dict, TypeVar + +ReturnT = TypeVar("ReturnT") + +FUNCTION_NAME_TO_PER_REQUEST_RESULT: Dict[str, Dict[int, Any]] = {} + + +def return_same_value_during_entire_request(f: Callable[..., ReturnT]) -> Callable[..., ReturnT]: + cache_key = f.__name__ + + assert cache_key not in FUNCTION_NAME_TO_PER_REQUEST_RESULT + FUNCTION_NAME_TO_PER_REQUEST_RESULT[cache_key] = {} + + def wrapper(key: int, *args: Any) -> ReturnT: + if key in FUNCTION_NAME_TO_PER_REQUEST_RESULT[cache_key]: + return FUNCTION_NAME_TO_PER_REQUEST_RESULT[cache_key][key] + + result = f(key, *args) + FUNCTION_NAME_TO_PER_REQUEST_RESULT[cache_key][key] = result + return result + + return wrapper + + +def flush_per_request_cache(cache_key: str) -> None: + if cache_key in FUNCTION_NAME_TO_PER_REQUEST_RESULT: + FUNCTION_NAME_TO_PER_REQUEST_RESULT[cache_key] = {} + + +def flush_per_request_caches() -> None: + for cache_key in FUNCTION_NAME_TO_PER_REQUEST_RESULT: + FUNCTION_NAME_TO_PER_REQUEST_RESULT[cache_key] = {} diff --git a/zerver/lib/test_classes.py b/zerver/lib/test_classes.py index c470175069..131d2f078f 100644 --- a/zerver/lib/test_classes.py +++ b/zerver/lib/test_classes.py @@ -55,6 +55,7 @@ from zerver.lib.cache import bounce_key_prefix_for_testing from zerver.lib.initial_password import initial_password from zerver.lib.message import access_message from zerver.lib.notification_data import UserMessageNotificationsData +from zerver.lib.per_request_cache import flush_per_request_caches from zerver.lib.rate_limiter import bounce_redis_key_prefix_for_testing from zerver.lib.sessions import get_session_dict_user from zerver.lib.soft_deactivation import do_soft_deactivate_users @@ -99,7 +100,6 @@ from zerver.models import ( UserProfile, UserStatus, clear_supported_auth_backends_cache, - flush_per_request_caches, get_realm, get_realm_stream, get_stream, diff --git a/zerver/lib/test_helpers.py b/zerver/lib/test_helpers.py index 16029ca68e..7bbf5e870b 100644 --- a/zerver/lib/test_helpers.py +++ b/zerver/lib/test_helpers.py @@ -46,6 +46,7 @@ from zerver.lib.avatar import avatar_url from zerver.lib.cache import get_cache_backend from zerver.lib.db import Params, ParamsT, Query, TimeTrackingCursor from zerver.lib.integrations import WEBHOOK_INTEGRATIONS +from zerver.lib.per_request_cache import flush_per_request_caches from zerver.lib.rate_limiter import RateLimitedIPAddr, rules from zerver.lib.request import RequestNotes from zerver.lib.upload.s3 import S3UploadBackend @@ -56,7 +57,6 @@ from zerver.models import ( Subscription, UserMessage, UserProfile, - flush_per_request_caches, get_client, get_realm, get_stream, diff --git a/zerver/middleware.py b/zerver/middleware.py index 7124c893d1..04ca9d57b5 100644 --- a/zerver/middleware.py +++ b/zerver/middleware.py @@ -32,6 +32,7 @@ from zerver.lib.debug import maybe_tracemalloc_listen from zerver.lib.exceptions import ErrorCode, JsonableError, MissingAuthenticationError from zerver.lib.html_to_text import get_content_description from zerver.lib.markdown import get_markdown_requests, get_markdown_time +from zerver.lib.per_request_cache import flush_per_request_caches from zerver.lib.rate_limiter import RateLimitResult from zerver.lib.request import REQ, RequestNotes, has_request_variables from zerver.lib.response import ( @@ -42,7 +43,7 @@ from zerver.lib.response import ( ) from zerver.lib.subdomains import get_subdomain from zerver.lib.user_agent import parse_user_agent -from zerver.models import Realm, flush_per_request_caches, get_realm +from zerver.models import Realm, get_realm ParamT = ParamSpec("ParamT") logger = logging.getLogger("zulip.requests") diff --git a/zerver/models.py b/zerver/models.py index 50d0ca7aeb..f5d4314112 100644 --- a/zerver/models.py +++ b/zerver/models.py @@ -4,7 +4,6 @@ import hashlib import secrets import time from collections import defaultdict -from contextlib import suppress from datetime import timedelta from email.headerregistry import Address from typing import ( @@ -83,6 +82,10 @@ from zerver.lib.cache import ( user_profile_cache_key, ) from zerver.lib.exceptions import JsonableError, RateLimitedError +from zerver.lib.per_request_cache import ( + flush_per_request_cache, + return_same_value_during_entire_request, +) from zerver.lib.pysa import mark_sanitized from zerver.lib.timestamp import datetime_to_timestamp from zerver.lib.types import ( @@ -183,17 +186,7 @@ def query_for_ids( return query -# Doing 1000 remote cache requests to get_display_recipient is quite slow, -# so add a local cache as well as the remote cache. -# -# This local cache has a lifetime of just a single request; it is -# cleared inside `flush_per_request_caches` in our middleware. It -# could be replaced with smarter bulk-fetching logic that deduplicates -# queries for the same recipient; this is just a convenient way to -# write that code. -per_request_display_recipient_cache: Dict[int, List[UserDisplayRecipient]] = {} - - +@return_same_value_during_entire_request def get_display_recipient_by_id( recipient_id: int, recipient_type: int, recipient_type_id: Optional[int] ) -> List[UserDisplayRecipient]: @@ -205,10 +198,7 @@ def get_display_recipient_by_id( # Have to import here, to avoid circular dependency. from zerver.lib.display_recipient import get_display_recipient_remote_cache - if recipient_id not in per_request_display_recipient_cache: - result = get_display_recipient_remote_cache(recipient_id, recipient_type, recipient_type_id) - per_request_display_recipient_cache[recipient_id] = result - return per_request_display_recipient_cache[recipient_id] + return get_display_recipient_remote_cache(recipient_id, recipient_type, recipient_type_id) def get_display_recipient(recipient: "Recipient") -> List[UserDisplayRecipient]: @@ -1310,22 +1300,9 @@ def get_linkifiers_cache_key(realm_id: int) -> str: return f"{cache.KEY_PREFIX}:all_linkifiers_for_realm:{realm_id}" -# We have a per-process cache to avoid doing 1000 remote cache queries during page load -per_request_linkifiers_cache: Dict[int, List[LinkifierDict]] = {} - - -def realm_in_local_linkifiers_cache(realm_id: int) -> bool: - return realm_id in per_request_linkifiers_cache - - -def linkifiers_for_realm(realm_id: int) -> List[LinkifierDict]: - if not realm_in_local_linkifiers_cache(realm_id): - per_request_linkifiers_cache[realm_id] = linkifiers_for_realm_remote_cache(realm_id) - return per_request_linkifiers_cache[realm_id] - - +@return_same_value_during_entire_request @cache_with_key(get_linkifiers_cache_key, timeout=3600 * 24 * 7) -def linkifiers_for_realm_remote_cache(realm_id: int) -> List[LinkifierDict]: +def linkifiers_for_realm(realm_id: int) -> List[LinkifierDict]: return [ LinkifierDict( pattern=linkifier.pattern, @@ -1339,21 +1316,13 @@ def linkifiers_for_realm_remote_cache(realm_id: int) -> List[LinkifierDict]: def flush_linkifiers(*, instance: RealmFilter, **kwargs: object) -> None: realm_id = instance.realm_id cache_delete(get_linkifiers_cache_key(realm_id)) - with suppress(KeyError): - per_request_linkifiers_cache.pop(realm_id) + flush_per_request_cache("linkifiers_for_realm") post_save.connect(flush_linkifiers, sender=RealmFilter) post_delete.connect(flush_linkifiers, sender=RealmFilter) -def flush_per_request_caches() -> None: - global per_request_display_recipient_cache - per_request_display_recipient_cache = {} - global per_request_linkifiers_cache - per_request_linkifiers_cache = {} - - class RealmPlayground(models.Model): """Server side storage model to store playground information needed by our 'view code in playground' feature in code blocks. diff --git a/zerver/tests/test_markdown.py b/zerver/tests/test_markdown.py index 96e6733e45..eb6452d5ff 100644 --- a/zerver/tests/test_markdown.py +++ b/zerver/tests/test_markdown.py @@ -54,6 +54,7 @@ from zerver.lib.mention import ( topic_wildcards, ) from zerver.lib.message import render_markdown +from zerver.lib.per_request_cache import flush_per_request_caches from zerver.lib.test_classes import ZulipTestCase from zerver.lib.tex import render_tex from zerver.models import ( @@ -63,7 +64,6 @@ from zerver.models import ( UserGroup, UserMessage, UserProfile, - flush_per_request_caches, get_client, get_realm, get_stream, diff --git a/zerver/tests/test_message_dict.py b/zerver/tests/test_message_dict.py index 3ffe3554ea..a64cd39855 100644 --- a/zerver/tests/test_message_dict.py +++ b/zerver/tests/test_message_dict.py @@ -6,6 +6,7 @@ from django.utils.timezone import now as timezone_now from zerver.lib.cache import cache_delete, to_dict_cache_key_id from zerver.lib.markdown import version as markdown_version from zerver.lib.message import MessageDict, messages_for_ids, sew_messages_and_reactions +from zerver.lib.per_request_cache import flush_per_request_caches from zerver.lib.test_classes import ZulipTestCase from zerver.lib.test_helpers import make_client from zerver.lib.topic import TOPIC_LINKS @@ -18,7 +19,6 @@ from zerver.models import ( Recipient, Stream, UserProfile, - flush_per_request_caches, get_display_recipient, get_realm, get_stream, diff --git a/zerver/tests/test_message_send.py b/zerver/tests/test_message_send.py index 18e6ad2cca..10b3b2947c 100644 --- a/zerver/tests/test_message_send.py +++ b/zerver/tests/test_message_send.py @@ -33,6 +33,7 @@ from zerver.actions.users import do_change_can_forge_sender, do_deactivate_user from zerver.lib.addressee import Addressee from zerver.lib.exceptions import JsonableError from zerver.lib.message import MessageDict, get_raw_unread_data, get_recent_private_conversations +from zerver.lib.per_request_cache import flush_per_request_caches from zerver.lib.streams import create_stream_if_needed from zerver.lib.test_classes import ZulipTestCase from zerver.lib.test_helpers import ( @@ -55,7 +56,6 @@ from zerver.models import ( UserGroup, UserMessage, UserProfile, - flush_per_request_caches, get_or_create_huddle, get_realm, get_stream, diff --git a/zerver/views/development/cache.py b/zerver/views/development/cache.py index e1b87c7724..94e6f663bf 100644 --- a/zerver/views/development/cache.py +++ b/zerver/views/development/cache.py @@ -5,8 +5,9 @@ from django.views.decorators.csrf import csrf_exempt from zerver.decorator import require_post from zerver.lib.cache import get_cache_backend +from zerver.lib.per_request_cache import flush_per_request_caches from zerver.lib.response import json_success -from zerver.models import clear_client_cache, flush_per_request_caches +from zerver.models import clear_client_cache ZULIP_PATH = os.path.join(os.path.dirname(os.path.abspath(__file__)), "../../../") diff --git a/zerver/worker/queue_processors.py b/zerver/worker/queue_processors.py index 7f4cc5e9e8..20752d56a0 100644 --- a/zerver/worker/queue_processors.py +++ b/zerver/worker/queue_processors.py @@ -68,6 +68,7 @@ from zerver.lib.email_notifications import MissedMessageData, handle_missedmessa from zerver.lib.exceptions import RateLimitedError from zerver.lib.export import export_realm_wrapper from zerver.lib.outgoing_webhook import do_rest_call, get_outgoing_webhook_service_handler +from zerver.lib.per_request_cache import flush_per_request_caches from zerver.lib.push_notifications import ( clear_push_device_tokens, handle_push_notification, @@ -99,7 +100,6 @@ from zerver.models import ( UserMessage, UserProfile, filter_to_valid_prereg_users, - flush_per_request_caches, get_bot_services, get_client, get_system_bot,