2018-07-31 23:07:42 +02:00
|
|
|
# See https://zulip.readthedocs.io/en/latest/subsystems/caching.html for docs
|
2018-08-01 23:15:14 +02:00
|
|
|
import logging
|
2024-07-12 02:30:25 +02:00
|
|
|
from collections.abc import Callable, Iterable
|
2023-11-19 19:45:19 +01:00
|
|
|
from datetime import timedelta
|
2024-07-12 02:30:25 +02:00
|
|
|
from typing import Any
|
2020-06-11 00:54:34 +02:00
|
|
|
|
|
|
|
from django.conf import settings
|
|
|
|
from django.contrib.sessions.models import Session
|
2023-11-08 17:42:13 +01:00
|
|
|
from django.db import connection
|
2023-03-04 01:52:14 +01:00
|
|
|
from django.db.models import QuerySet
|
2020-06-11 00:54:34 +02:00
|
|
|
from django.utils.timezone import now as timezone_now
|
2018-08-01 23:15:14 +02:00
|
|
|
|
2013-03-13 19:15:29 +01:00
|
|
|
# This file needs to be different from cache.py because cache.py
|
2013-07-29 23:03:31 +02:00
|
|
|
# cannot import anything from zerver.models or we'd have an import
|
2013-03-13 19:15:29 +01:00
|
|
|
# loop
|
2018-08-01 23:15:14 +02:00
|
|
|
from analytics.models import RealmCount
|
2020-06-11 00:54:34 +02:00
|
|
|
from zerver.lib.cache import (
|
|
|
|
cache_set_many,
|
|
|
|
get_remote_cache_requests,
|
|
|
|
get_remote_cache_time,
|
|
|
|
user_profile_by_api_key_cache_key,
|
2024-03-04 21:08:53 +01:00
|
|
|
user_profile_cache_key_id,
|
2020-06-11 00:54:34 +02:00
|
|
|
)
|
2022-07-27 23:39:07 +02:00
|
|
|
from zerver.lib.safe_session_cached_db import SessionStore
|
2020-07-01 00:31:28 +02:00
|
|
|
from zerver.lib.sessions import session_engine
|
2018-08-01 10:53:40 +02:00
|
|
|
from zerver.lib.users import get_all_api_keys
|
2023-12-15 04:33:19 +01:00
|
|
|
from zerver.models import Client, UserProfile
|
|
|
|
from zerver.models.clients import get_client_cache_key
|
2013-01-09 20:35:19 +01:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
|
|
|
def user_cache_items(
|
2024-07-12 02:30:17 +02:00
|
|
|
items_for_remote_cache: dict[str, tuple[UserProfile]], user_profile: UserProfile
|
2021-02-12 08:19:30 +01:00
|
|
|
) -> None:
|
2018-08-01 10:53:40 +02:00
|
|
|
for api_key in get_all_api_keys(user_profile):
|
|
|
|
items_for_remote_cache[user_profile_by_api_key_cache_key(api_key)] = (user_profile,)
|
2024-03-04 21:08:53 +01:00
|
|
|
items_for_remote_cache[user_profile_cache_key_id(user_profile.email, user_profile.realm_id)] = (
|
2021-02-12 08:19:30 +01:00
|
|
|
user_profile,
|
|
|
|
)
|
2018-08-01 21:56:24 +02:00
|
|
|
# We have other user_profile caches, but none of them are on the
|
|
|
|
# core serving path for lots of requests.
|
2013-03-13 18:52:54 +01:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2024-07-12 02:30:17 +02:00
|
|
|
def client_cache_items(items_for_remote_cache: dict[str, tuple[Client]], client: Client) -> None:
|
2016-03-31 03:21:05 +02:00
|
|
|
items_for_remote_cache[get_client_cache_key(client.name)] = (client,)
|
2013-03-26 17:07:20 +01:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2022-07-27 23:39:07 +02:00
|
|
|
def session_cache_items(
|
2024-07-12 02:30:17 +02:00
|
|
|
items_for_remote_cache: dict[str, dict[str, object]], session: Session
|
2022-07-27 23:39:07 +02:00
|
|
|
) -> None:
|
2022-03-09 02:41:27 +01:00
|
|
|
if settings.SESSION_ENGINE != "zerver.lib.safe_session_cached_db":
|
2018-11-19 05:02:28 +01:00
|
|
|
# If we're not using the cached_db session engine, we there
|
|
|
|
# will be no store.cache_key attribute, and in any case we
|
|
|
|
# don't need to fill the cache, since it won't exist.
|
|
|
|
return
|
2020-07-01 00:31:28 +02:00
|
|
|
store = session_engine.SessionStore(session_key=session.session_key)
|
2022-07-27 23:39:07 +02:00
|
|
|
assert isinstance(store, SessionStore)
|
2016-03-31 03:21:05 +02:00
|
|
|
items_for_remote_cache[store.cache_key] = store.decode(session.session_data)
|
2013-04-23 21:17:01 +02:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2024-08-25 02:30:41 +02:00
|
|
|
def get_active_realm_ids() -> QuerySet[RealmCount, int]:
|
2020-06-09 00:58:42 +02:00
|
|
|
"""For installations like Zulip Cloud hosting a lot of realms, it only makes
|
2018-08-01 23:15:14 +02:00
|
|
|
sense to do cache-filling work for realms that have any currently
|
|
|
|
active users/clients. Otherwise, we end up with every single-user
|
|
|
|
trial organization that has ever been created costing us N streams
|
|
|
|
worth of cache work (where N is the number of default streams for
|
|
|
|
a new organization).
|
|
|
|
"""
|
2023-11-19 19:45:19 +01:00
|
|
|
date = timezone_now() - timedelta(days=2)
|
2021-02-12 08:19:30 +01:00
|
|
|
return (
|
|
|
|
RealmCount.objects.filter(end_time__gte=date, property="1day_actives::day", value__gt=0)
|
|
|
|
.distinct("realm_id")
|
|
|
|
.values_list("realm_id", flat=True)
|
|
|
|
)
|
|
|
|
|
2018-08-01 23:15:14 +02:00
|
|
|
|
2022-06-23 19:55:54 +02:00
|
|
|
def get_users() -> QuerySet[UserProfile]:
|
2023-07-18 19:33:19 +02:00
|
|
|
return UserProfile.objects.select_related("realm", "bot_owner").filter(
|
2021-02-12 08:19:30 +01:00
|
|
|
long_term_idle=False, realm__in=get_active_realm_ids()
|
|
|
|
)
|
|
|
|
|
2018-08-01 23:15:14 +02:00
|
|
|
|
2013-03-26 18:38:39 +01:00
|
|
|
# Format is (objects query, items filler function, timeout, batch size)
|
|
|
|
#
|
|
|
|
# The objects queries are put inside lambdas to prevent Django from
|
|
|
|
# doing any setup for things we're unlikely to use (without the lambda
|
|
|
|
# wrapper the below adds an extra 3ms or so to startup time for
|
|
|
|
# anything importing this file).
|
2024-07-12 02:30:17 +02:00
|
|
|
cache_fillers: dict[
|
|
|
|
str, tuple[Callable[[], Iterable[Any]], Callable[[dict[str, Any], Any], None], int, int]
|
2021-02-12 08:19:30 +01:00
|
|
|
] = {
|
2021-02-12 08:20:45 +01:00
|
|
|
"user": (get_users, user_cache_items, 3600 * 24 * 7, 10000),
|
|
|
|
"client": (
|
2024-03-01 02:56:37 +01:00
|
|
|
Client.objects.all,
|
2021-02-12 08:19:30 +01:00
|
|
|
client_cache_items,
|
|
|
|
3600 * 24 * 7,
|
|
|
|
10000,
|
|
|
|
),
|
2024-03-01 02:56:37 +01:00
|
|
|
"session": (Session.objects.all, session_cache_items, 3600 * 24 * 7, 10000),
|
python: Convert assignment type annotations to Python 3.6 style.
This commit was split by tabbott; this piece covers the vast majority
of files in Zulip, but excludes scripts/, tools/, and puppet/ to help
ensure we at least show the right error messages for Xenial systems.
We can likely further refine the remaining pieces with some testing.
Generated by com2ann, with whitespace fixes and various manual fixes
for runtime issues:
- invoiced_through: Optional[LicenseLedger] = models.ForeignKey(
+ invoiced_through: Optional["LicenseLedger"] = models.ForeignKey(
-_apns_client: Optional[APNsClient] = None
+_apns_client: Optional["APNsClient"] = None
- notifications_stream: Optional[Stream] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
- signup_notifications_stream: Optional[Stream] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
+ notifications_stream: Optional["Stream"] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
+ signup_notifications_stream: Optional["Stream"] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
- author: Optional[UserProfile] = models.ForeignKey('UserProfile', blank=True, null=True, on_delete=CASCADE)
+ author: Optional["UserProfile"] = models.ForeignKey('UserProfile', blank=True, null=True, on_delete=CASCADE)
- bot_owner: Optional[UserProfile] = models.ForeignKey('self', null=True, on_delete=models.SET_NULL)
+ bot_owner: Optional["UserProfile"] = models.ForeignKey('self', null=True, on_delete=models.SET_NULL)
- default_sending_stream: Optional[Stream] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
- default_events_register_stream: Optional[Stream] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
+ default_sending_stream: Optional["Stream"] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
+ default_events_register_stream: Optional["Stream"] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
-descriptors_by_handler_id: Dict[int, ClientDescriptor] = {}
+descriptors_by_handler_id: Dict[int, "ClientDescriptor"] = {}
-worker_classes: Dict[str, Type[QueueProcessingWorker]] = {}
-queues: Dict[str, Dict[str, Type[QueueProcessingWorker]]] = {}
+worker_classes: Dict[str, Type["QueueProcessingWorker"]] = {}
+queues: Dict[str, Dict[str, Type["QueueProcessingWorker"]]] = {}
-AUTH_LDAP_REVERSE_EMAIL_SEARCH: Optional[LDAPSearch] = None
+AUTH_LDAP_REVERSE_EMAIL_SEARCH: Optional["LDAPSearch"] = None
Signed-off-by: Anders Kaseorg <anders@zulipchat.com>
2020-04-22 01:09:50 +02:00
|
|
|
}
|
2013-03-26 17:24:02 +01:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2023-11-08 17:42:13 +01:00
|
|
|
class SQLQueryCounter:
|
|
|
|
def __init__(self) -> None:
|
|
|
|
self.count = 0
|
|
|
|
|
|
|
|
def __call__(
|
|
|
|
self,
|
2024-07-12 02:30:17 +02:00
|
|
|
execute: Callable[[str, Any, bool, dict[str, Any]], Any],
|
2023-11-08 17:42:13 +01:00
|
|
|
sql: str,
|
|
|
|
params: Any,
|
|
|
|
many: bool,
|
2024-07-12 02:30:17 +02:00
|
|
|
context: dict[str, Any],
|
2023-11-08 17:42:13 +01:00
|
|
|
) -> Any:
|
|
|
|
self.count += 1
|
|
|
|
return execute(sql, params, many, context)
|
|
|
|
|
|
|
|
|
2017-11-05 11:15:10 +01:00
|
|
|
def fill_remote_cache(cache: str) -> None:
|
2016-03-31 03:23:21 +02:00
|
|
|
remote_cache_time_start = get_remote_cache_time()
|
2016-03-31 03:24:05 +02:00
|
|
|
remote_cache_requests_start = get_remote_cache_requests()
|
2024-07-12 02:30:17 +02:00
|
|
|
items_for_remote_cache: dict[str, Any] = {}
|
2013-03-26 18:38:39 +01:00
|
|
|
(objects, items_filler, timeout, batch_size) = cache_fillers[cache]
|
|
|
|
count = 0
|
2023-11-08 17:42:13 +01:00
|
|
|
db_query_counter = SQLQueryCounter()
|
|
|
|
with connection.execute_wrapper(db_query_counter):
|
|
|
|
for obj in objects():
|
|
|
|
items_filler(items_for_remote_cache, obj)
|
|
|
|
count += 1
|
|
|
|
if count % batch_size == 0:
|
|
|
|
cache_set_many(items_for_remote_cache, timeout=3600 * 24)
|
|
|
|
items_for_remote_cache = {}
|
|
|
|
cache_set_many(items_for_remote_cache, timeout=3600 * 24 * 7)
|
2021-02-12 08:19:30 +01:00
|
|
|
logging.info(
|
2023-11-08 17:42:13 +01:00
|
|
|
"Successfully populated %s cache: %d items, %d DB queries, %d memcached sets, %.2f seconds",
|
2021-02-12 08:19:30 +01:00
|
|
|
cache,
|
2023-11-08 17:42:13 +01:00
|
|
|
count,
|
|
|
|
db_query_counter.count,
|
2021-02-12 08:19:30 +01:00
|
|
|
get_remote_cache_requests() - remote_cache_requests_start,
|
2023-11-08 17:42:13 +01:00
|
|
|
get_remote_cache_time() - remote_cache_time_start,
|
2021-02-12 08:19:30 +01:00
|
|
|
)
|