documentation: Move OpenGraph description updating out of middleware.

This middleware was highly-specific to a set of URLs, and pulled in a
beautifulsoup dependency for Tornado.  Move it closer to where it is
used, minimizing action at a distance, as well as trimming out a
dependency.
This commit is contained in:
Alex Vandiver 2024-04-16 16:27:55 +00:00 committed by Tim Abbott
parent 16a08a8b6b
commit 1424a2e748
6 changed files with 33 additions and 38 deletions

View File

@ -26,7 +26,6 @@ from django.conf import settings
from django.core.cache import caches
from django.core.cache.backends.base import BaseCache
from django.db.models import Q
from django.http import HttpRequest
from django_stubs_ext import QuerySetAny
from typing_extensions import ParamSpec
@ -684,10 +683,8 @@ def to_dict_cache_key(message: "Message", realm_id: Optional[int] = None) -> str
return to_dict_cache_key_id(message.id)
def open_graph_description_cache_key(content: bytes, request: HttpRequest) -> str:
return "open_graph_description_path:{}".format(
hashlib.sha1(request.META["PATH_INFO"].encode()).hexdigest()
)
def open_graph_description_cache_key(content: bytes, request_url: str) -> str:
return f"open_graph_description_path:{hashlib.sha1(request_url.encode()).hexdigest()}"
def flush_message(*, instance: "Message", **kwargs: object) -> None:

View File

@ -1,7 +1,6 @@
from typing import Mapping, Union
from bs4 import BeautifulSoup
from django.http import HttpRequest
from django.utils.html import escape
from zerver.lib.cache import cache_with_key, open_graph_description_cache_key
@ -35,5 +34,5 @@ def html_to_text(content: Union[str, bytes], tags: Mapping[str, str] = {"p": " |
@cache_with_key(open_graph_description_cache_key, timeout=3600 * 24)
def get_content_description(content: bytes, request: HttpRequest) -> str:
def get_content_description(content: bytes, request_url: str) -> str:
return html_to_text(content)

View File

@ -66,7 +66,6 @@ class RequestNotes(BaseNotes[HttpRequest, "RequestNotes"]):
ratelimits_applied: List[rate_limiter.RateLimitResult] = field(default_factory=list)
query: Optional[str] = None
error_format: Optional[str] = None
placeholder_open_graph_description: Optional[str] = None
saved_response: Optional[HttpResponse] = None
tornado_handler_id: Optional[int] = None
processed_parameters: Set[str] = field(default_factory=set)

View File

@ -28,7 +28,6 @@ from zerver.lib.cache import get_remote_cache_requests, get_remote_cache_time
from zerver.lib.db import reset_queries
from zerver.lib.debug import maybe_tracemalloc_listen
from zerver.lib.exceptions import ErrorCode, JsonableError, MissingAuthenticationError, WebhookError
from zerver.lib.html_to_text import get_content_description
from zerver.lib.markdown import get_markdown_requests, get_markdown_time
from zerver.lib.per_request_cache import flush_per_request_caches
from zerver.lib.rate_limiter import RateLimitResult
@ -669,28 +668,6 @@ class DetectProxyMisconfiguration(MiddlewareMixin):
raise ProxyMisconfigurationError(proxy_state_header)
def alter_content(request: HttpRequest, content: bytes) -> bytes:
first_paragraph_text = get_content_description(content, request)
placeholder_open_graph_description = RequestNotes.get_notes(
request
).placeholder_open_graph_description
assert placeholder_open_graph_description is not None
return content.replace(
placeholder_open_graph_description.encode(),
first_paragraph_text.encode(),
)
class FinalizeOpenGraphDescription(MiddlewareMixin):
def process_response(
self, request: HttpRequest, response: HttpResponseBase
) -> HttpResponseBase:
if RequestNotes.get_notes(request).placeholder_open_graph_description is not None:
assert isinstance(response, HttpResponse)
response.content = alter_content(request, response.content)
return response
def validate_scim_bearer_token(request: HttpRequest) -> bool:
"""
This function verifies the request is allowed to make SCIM requests on this subdomain,

View File

@ -3,11 +3,12 @@ import random
import re
from collections import OrderedDict
from dataclasses import dataclass
from typing import Any, Dict, Optional
from typing import Any, Callable, Dict, List, Optional
from django.conf import settings
from django.http import HttpRequest, HttpResponse, HttpResponseNotFound
from django.template import loader
from django.template.response import TemplateResponse
from django.views.generic import TemplateView
from lxml import html
from lxml.etree import Element, SubElement, XPath, _Element
@ -16,6 +17,7 @@ from typing_extensions import override
from zerver.context_processors import zulip_default_context
from zerver.decorator import add_google_analytics_context
from zerver.lib.html_to_text import get_content_description
from zerver.lib.integrations import (
CATEGORIES,
INTEGRATIONS,
@ -24,7 +26,7 @@ from zerver.lib.integrations import (
WebhookIntegration,
get_all_event_types_for_integration,
)
from zerver.lib.request import REQ, RequestNotes, has_request_variables
from zerver.lib.request import REQ, has_request_variables
from zerver.lib.subdomains import get_subdomain
from zerver.lib.templates import render_markdown_path
from zerver.models import Realm
@ -81,6 +83,15 @@ class MarkdownDirectoryView(ApiURLView):
help_view = False
api_doc_view = False
def __init__(self, **kwargs: Any) -> None:
super().__init__(**kwargs)
self._post_render_callbacks: List[Callable[[HttpResponse], Optional[HttpResponse]]] = []
def add_post_render_callback(
self, callback: Callable[[HttpResponse], Optional[HttpResponse]]
) -> None:
self._post_render_callbacks.append(callback)
def get_path(self, article: str) -> DocumentationArticle:
http_status = 200
if article == "":
@ -223,11 +234,22 @@ class MarkdownDirectoryView(ApiURLView):
context["PAGE_TITLE"] = f"{article_title} | {title_base}"
else:
context["PAGE_TITLE"] = title_base
request_notes = RequestNotes.get_notes(self.request)
request_notes.placeholder_open_graph_description = (
placeholder_open_graph_description = (
f"REPLACEMENT_PAGE_DESCRIPTION_{int(2**24 * random.random())}"
)
context["PAGE_DESCRIPTION"] = request_notes.placeholder_open_graph_description
context["PAGE_DESCRIPTION"] = placeholder_open_graph_description
def update_description(response: HttpResponse) -> None:
if placeholder_open_graph_description.encode() in response.content:
first_paragraph_text = get_content_description(
response.content, context["PAGE_METADATA_URL"]
)
response.content = response.content.replace(
placeholder_open_graph_description.encode(),
first_paragraph_text.encode(),
)
self.add_post_render_callback(update_description)
# An "article" might require the api_url_context to be rendered
api_url_context: Dict[str, Any] = {}
@ -283,6 +305,9 @@ class MarkdownDirectoryView(ApiURLView):
documentation_article = self.get_path(article)
http_status = documentation_article.article_http_status
result = super().get(request, article=article)
assert isinstance(result, TemplateResponse)
for callback in self._post_render_callbacks:
result.add_post_render_callback(callback)
if http_status != 200:
result.status_code = http_status
return result

View File

@ -178,8 +178,6 @@ MIDDLEWARE = [
# Make sure 2FA middlewares come after authentication middleware.
"django_otp.middleware.OTPMiddleware", # Required by two factor auth.
"two_factor.middleware.threadlocals.ThreadLocals", # Required by Twilio
# Needs to be after CommonMiddleware, which sets Content-Length
"zerver.middleware.FinalizeOpenGraphDescription",
]
AUTH_USER_MODEL = "zerver.UserProfile"