docs: Refactor BS work with use of cache_with_key.

Refactor the potentially expensive work done by Beautiful Soup into a
function that is called by the alter_content function, so that we can
cache the result.  Saves a significant portion of the runtime of
loading of all of our /help/ and /api/ documentation pages (e.g. 12ms
for /api).

Fixes #11088.

Tweaked by tabbott to use the URL path as the cache key, clean up
argument structure, and use a clearer name for the function.
This commit is contained in:
Wyatt Hoodes 2019-01-26 20:18:19 -10:00 committed by Tim Abbott
parent 370cf1a2cb
commit 8eac361fb5
2 changed files with 13 additions and 2 deletions

View File

@ -7,6 +7,8 @@ from django.core.cache import caches
from django.conf import settings
from django.db.models import Q
from django.core.cache.backends.base import BaseCache
from django.http import HttpRequest
from django import template
from typing import cast, Any, Callable, Dict, Iterable, List, Optional, Union, Set, TypeVar, Tuple
@ -454,6 +456,9 @@ def to_dict_cache_key_id(message_id: int) -> str:
def to_dict_cache_key(message: 'Message') -> str:
return to_dict_cache_key_id(message.id)
def open_graph_description_cache_key(content: Any, request: HttpRequest) -> str:
return 'open_graph_description_path:%s' % (make_safe_digest(request.META['PATH_INFO']))
def flush_message(sender: Any, **kwargs: Any) -> None:
message = kwargs['instance']
cache_delete(to_dict_cache_key_id(message.id))

View File

@ -21,7 +21,8 @@ from django.utils.translation import ugettext as _
from django.views.csrf import csrf_failure as html_csrf_failure
from zerver.lib.bugdown import get_bugdown_requests, get_bugdown_time
from zerver.lib.cache import get_remote_cache_requests, get_remote_cache_time
from zerver.lib.cache import get_remote_cache_requests, get_remote_cache_time, \
cache_with_key, open_graph_description_cache_key
from zerver.lib.debug import maybe_tracemalloc_listen
from zerver.lib.db import reset_queries
from zerver.lib.exceptions import ErrorCode, JsonableError, RateLimited
@ -448,7 +449,8 @@ class SetRemoteAddrFromForwardedFor(MiddlewareMixin):
class FinalizeOpenGraphDescription(MiddlewareMixin):
def process_response(self, request: HttpRequest,
response: StreamingHttpResponse) -> StreamingHttpResponse:
def alter_content(content: bytes) -> bytes:
@cache_with_key(open_graph_description_cache_key, timeout=3600*24)
def get_content_description(content: bytes, request: HttpRequest) -> str:
str_content = content.decode("utf-8")
bs = BeautifulSoup(str_content, features='lxml')
# Skip any admonition (warning) blocks, since they're
@ -460,6 +462,10 @@ class FinalizeOpenGraphDescription(MiddlewareMixin):
# Find the first paragraph after that, and convert it from HTML to text.
first_paragraph_text = bs.find('p').text.replace('\n', ' ')
return first_paragraph_text
def alter_content(content: bytes) -> bytes:
first_paragraph_text = get_content_description(content, request)
return content.replace(request.placeholder_open_graph_description.encode("utf-8"),
first_paragraph_text.encode("utf-8"))