diff --git a/docs/subsystems/caching.md b/docs/subsystems/caching.md index 5ac31db49a..d1742d9c40 100644 --- a/docs/subsystems/caching.md +++ b/docs/subsystems/caching.md @@ -240,10 +240,10 @@ We generally try to avoid in-process backend caching in Zulip's Django codebase, because every Zulip production installation involves multiple servers. We do have a few, however: -- `per_request_display_recipient_cache`: A cache flushed at the start - of every request; this simplifies correctly implementing our goal of - not repeatedly fetching the "display recipient" (e.g. stream name) - for each message in the `GET /messages` codebase. +- `@return_same_value_during_entire_request`: We use this decorator to + cache values in memory during the lifetime of a request. We use this + for linkifiers and display recipients. The middleware knows how to + flush the relevant in-memory caches at the start of a request. - Caches of various data, like the `SourceMap` object, that are expensive to construct, not needed for most requests, and don't change once a Zulip server has been deployed in production. diff --git a/zerver/lib/per_request_cache.py b/zerver/lib/per_request_cache.py new file mode 100644 index 0000000000..e010c18695 --- /dev/null +++ b/zerver/lib/per_request_cache.py @@ -0,0 +1,32 @@ +from typing import Any, Callable, Dict, TypeVar + +ReturnT = TypeVar("ReturnT") + +FUNCTION_NAME_TO_PER_REQUEST_RESULT: Dict[str, Dict[int, Any]] = {} + + +def return_same_value_during_entire_request(f: Callable[..., ReturnT]) -> Callable[..., ReturnT]: + cache_key = f.__name__ + + assert cache_key not in FUNCTION_NAME_TO_PER_REQUEST_RESULT + FUNCTION_NAME_TO_PER_REQUEST_RESULT[cache_key] = {} + + def wrapper(key: int, *args: Any) -> ReturnT: + if key in FUNCTION_NAME_TO_PER_REQUEST_RESULT[cache_key]: + return FUNCTION_NAME_TO_PER_REQUEST_RESULT[cache_key][key] + + result = f(key, *args) + FUNCTION_NAME_TO_PER_REQUEST_RESULT[cache_key][key] = result + return result + + return wrapper + + +def flush_per_request_cache(cache_key: str) -> None: + if cache_key in FUNCTION_NAME_TO_PER_REQUEST_RESULT: + FUNCTION_NAME_TO_PER_REQUEST_RESULT[cache_key] = {} + + +def flush_per_request_caches() -> None: + for cache_key in FUNCTION_NAME_TO_PER_REQUEST_RESULT: + FUNCTION_NAME_TO_PER_REQUEST_RESULT[cache_key] = {} diff --git a/zerver/lib/test_classes.py b/zerver/lib/test_classes.py index c470175069..131d2f078f 100644 --- a/zerver/lib/test_classes.py +++ b/zerver/lib/test_classes.py @@ -55,6 +55,7 @@ from zerver.lib.cache import bounce_key_prefix_for_testing from zerver.lib.initial_password import initial_password from zerver.lib.message import access_message from zerver.lib.notification_data import UserMessageNotificationsData +from zerver.lib.per_request_cache import flush_per_request_caches from zerver.lib.rate_limiter import bounce_redis_key_prefix_for_testing from zerver.lib.sessions import get_session_dict_user from zerver.lib.soft_deactivation import do_soft_deactivate_users @@ -99,7 +100,6 @@ from zerver.models import ( UserProfile, UserStatus, clear_supported_auth_backends_cache, - flush_per_request_caches, get_realm, get_realm_stream, get_stream, diff --git a/zerver/lib/test_helpers.py b/zerver/lib/test_helpers.py index 16029ca68e..7bbf5e870b 100644 --- a/zerver/lib/test_helpers.py +++ b/zerver/lib/test_helpers.py @@ -46,6 +46,7 @@ from zerver.lib.avatar import avatar_url from zerver.lib.cache import get_cache_backend from zerver.lib.db import Params, ParamsT, Query, TimeTrackingCursor from zerver.lib.integrations import WEBHOOK_INTEGRATIONS +from zerver.lib.per_request_cache import flush_per_request_caches from zerver.lib.rate_limiter import RateLimitedIPAddr, rules from zerver.lib.request import RequestNotes from zerver.lib.upload.s3 import S3UploadBackend @@ -56,7 +57,6 @@ from zerver.models import ( Subscription, UserMessage, UserProfile, - flush_per_request_caches, get_client, get_realm, get_stream, diff --git a/zerver/middleware.py b/zerver/middleware.py index 7124c893d1..04ca9d57b5 100644 --- a/zerver/middleware.py +++ b/zerver/middleware.py @@ -32,6 +32,7 @@ from zerver.lib.debug import maybe_tracemalloc_listen from zerver.lib.exceptions import ErrorCode, JsonableError, MissingAuthenticationError from zerver.lib.html_to_text import get_content_description from zerver.lib.markdown import get_markdown_requests, get_markdown_time +from zerver.lib.per_request_cache import flush_per_request_caches from zerver.lib.rate_limiter import RateLimitResult from zerver.lib.request import REQ, RequestNotes, has_request_variables from zerver.lib.response import ( @@ -42,7 +43,7 @@ from zerver.lib.response import ( ) from zerver.lib.subdomains import get_subdomain from zerver.lib.user_agent import parse_user_agent -from zerver.models import Realm, flush_per_request_caches, get_realm +from zerver.models import Realm, get_realm ParamT = ParamSpec("ParamT") logger = logging.getLogger("zulip.requests") diff --git a/zerver/models.py b/zerver/models.py index 50d0ca7aeb..f5d4314112 100644 --- a/zerver/models.py +++ b/zerver/models.py @@ -4,7 +4,6 @@ import hashlib import secrets import time from collections import defaultdict -from contextlib import suppress from datetime import timedelta from email.headerregistry import Address from typing import ( @@ -83,6 +82,10 @@ from zerver.lib.cache import ( user_profile_cache_key, ) from zerver.lib.exceptions import JsonableError, RateLimitedError +from zerver.lib.per_request_cache import ( + flush_per_request_cache, + return_same_value_during_entire_request, +) from zerver.lib.pysa import mark_sanitized from zerver.lib.timestamp import datetime_to_timestamp from zerver.lib.types import ( @@ -183,17 +186,7 @@ def query_for_ids( return query -# Doing 1000 remote cache requests to get_display_recipient is quite slow, -# so add a local cache as well as the remote cache. -# -# This local cache has a lifetime of just a single request; it is -# cleared inside `flush_per_request_caches` in our middleware. It -# could be replaced with smarter bulk-fetching logic that deduplicates -# queries for the same recipient; this is just a convenient way to -# write that code. -per_request_display_recipient_cache: Dict[int, List[UserDisplayRecipient]] = {} - - +@return_same_value_during_entire_request def get_display_recipient_by_id( recipient_id: int, recipient_type: int, recipient_type_id: Optional[int] ) -> List[UserDisplayRecipient]: @@ -205,10 +198,7 @@ def get_display_recipient_by_id( # Have to import here, to avoid circular dependency. from zerver.lib.display_recipient import get_display_recipient_remote_cache - if recipient_id not in per_request_display_recipient_cache: - result = get_display_recipient_remote_cache(recipient_id, recipient_type, recipient_type_id) - per_request_display_recipient_cache[recipient_id] = result - return per_request_display_recipient_cache[recipient_id] + return get_display_recipient_remote_cache(recipient_id, recipient_type, recipient_type_id) def get_display_recipient(recipient: "Recipient") -> List[UserDisplayRecipient]: @@ -1310,22 +1300,9 @@ def get_linkifiers_cache_key(realm_id: int) -> str: return f"{cache.KEY_PREFIX}:all_linkifiers_for_realm:{realm_id}" -# We have a per-process cache to avoid doing 1000 remote cache queries during page load -per_request_linkifiers_cache: Dict[int, List[LinkifierDict]] = {} - - -def realm_in_local_linkifiers_cache(realm_id: int) -> bool: - return realm_id in per_request_linkifiers_cache - - -def linkifiers_for_realm(realm_id: int) -> List[LinkifierDict]: - if not realm_in_local_linkifiers_cache(realm_id): - per_request_linkifiers_cache[realm_id] = linkifiers_for_realm_remote_cache(realm_id) - return per_request_linkifiers_cache[realm_id] - - +@return_same_value_during_entire_request @cache_with_key(get_linkifiers_cache_key, timeout=3600 * 24 * 7) -def linkifiers_for_realm_remote_cache(realm_id: int) -> List[LinkifierDict]: +def linkifiers_for_realm(realm_id: int) -> List[LinkifierDict]: return [ LinkifierDict( pattern=linkifier.pattern, @@ -1339,21 +1316,13 @@ def linkifiers_for_realm_remote_cache(realm_id: int) -> List[LinkifierDict]: def flush_linkifiers(*, instance: RealmFilter, **kwargs: object) -> None: realm_id = instance.realm_id cache_delete(get_linkifiers_cache_key(realm_id)) - with suppress(KeyError): - per_request_linkifiers_cache.pop(realm_id) + flush_per_request_cache("linkifiers_for_realm") post_save.connect(flush_linkifiers, sender=RealmFilter) post_delete.connect(flush_linkifiers, sender=RealmFilter) -def flush_per_request_caches() -> None: - global per_request_display_recipient_cache - per_request_display_recipient_cache = {} - global per_request_linkifiers_cache - per_request_linkifiers_cache = {} - - class RealmPlayground(models.Model): """Server side storage model to store playground information needed by our 'view code in playground' feature in code blocks. diff --git a/zerver/tests/test_markdown.py b/zerver/tests/test_markdown.py index 96e6733e45..eb6452d5ff 100644 --- a/zerver/tests/test_markdown.py +++ b/zerver/tests/test_markdown.py @@ -54,6 +54,7 @@ from zerver.lib.mention import ( topic_wildcards, ) from zerver.lib.message import render_markdown +from zerver.lib.per_request_cache import flush_per_request_caches from zerver.lib.test_classes import ZulipTestCase from zerver.lib.tex import render_tex from zerver.models import ( @@ -63,7 +64,6 @@ from zerver.models import ( UserGroup, UserMessage, UserProfile, - flush_per_request_caches, get_client, get_realm, get_stream, diff --git a/zerver/tests/test_message_dict.py b/zerver/tests/test_message_dict.py index 3ffe3554ea..a64cd39855 100644 --- a/zerver/tests/test_message_dict.py +++ b/zerver/tests/test_message_dict.py @@ -6,6 +6,7 @@ from django.utils.timezone import now as timezone_now from zerver.lib.cache import cache_delete, to_dict_cache_key_id from zerver.lib.markdown import version as markdown_version from zerver.lib.message import MessageDict, messages_for_ids, sew_messages_and_reactions +from zerver.lib.per_request_cache import flush_per_request_caches from zerver.lib.test_classes import ZulipTestCase from zerver.lib.test_helpers import make_client from zerver.lib.topic import TOPIC_LINKS @@ -18,7 +19,6 @@ from zerver.models import ( Recipient, Stream, UserProfile, - flush_per_request_caches, get_display_recipient, get_realm, get_stream, diff --git a/zerver/tests/test_message_send.py b/zerver/tests/test_message_send.py index 18e6ad2cca..10b3b2947c 100644 --- a/zerver/tests/test_message_send.py +++ b/zerver/tests/test_message_send.py @@ -33,6 +33,7 @@ from zerver.actions.users import do_change_can_forge_sender, do_deactivate_user from zerver.lib.addressee import Addressee from zerver.lib.exceptions import JsonableError from zerver.lib.message import MessageDict, get_raw_unread_data, get_recent_private_conversations +from zerver.lib.per_request_cache import flush_per_request_caches from zerver.lib.streams import create_stream_if_needed from zerver.lib.test_classes import ZulipTestCase from zerver.lib.test_helpers import ( @@ -55,7 +56,6 @@ from zerver.models import ( UserGroup, UserMessage, UserProfile, - flush_per_request_caches, get_or_create_huddle, get_realm, get_stream, diff --git a/zerver/views/development/cache.py b/zerver/views/development/cache.py index e1b87c7724..94e6f663bf 100644 --- a/zerver/views/development/cache.py +++ b/zerver/views/development/cache.py @@ -5,8 +5,9 @@ from django.views.decorators.csrf import csrf_exempt from zerver.decorator import require_post from zerver.lib.cache import get_cache_backend +from zerver.lib.per_request_cache import flush_per_request_caches from zerver.lib.response import json_success -from zerver.models import clear_client_cache, flush_per_request_caches +from zerver.models import clear_client_cache ZULIP_PATH = os.path.join(os.path.dirname(os.path.abspath(__file__)), "../../../") diff --git a/zerver/worker/queue_processors.py b/zerver/worker/queue_processors.py index 7f4cc5e9e8..20752d56a0 100644 --- a/zerver/worker/queue_processors.py +++ b/zerver/worker/queue_processors.py @@ -68,6 +68,7 @@ from zerver.lib.email_notifications import MissedMessageData, handle_missedmessa from zerver.lib.exceptions import RateLimitedError from zerver.lib.export import export_realm_wrapper from zerver.lib.outgoing_webhook import do_rest_call, get_outgoing_webhook_service_handler +from zerver.lib.per_request_cache import flush_per_request_caches from zerver.lib.push_notifications import ( clear_push_device_tokens, handle_push_notification, @@ -99,7 +100,6 @@ from zerver.models import ( UserMessage, UserProfile, filter_to_valid_prereg_users, - flush_per_request_caches, get_bot_services, get_client, get_system_bot,