From 9c3f38a56487d709233b61cc9037b9eae8339437 Mon Sep 17 00:00:00 2001 From: Tim Abbott Date: Fri, 14 Dec 2018 14:28:00 -0800 Subject: [PATCH] docs: Automatically construct OpenAPI metadata for help center. This is somewhat hacky, in that in order to do what we're doing, we need to parse the HTML of the rendered page to extract the first paragraph to include in the open graph description field. But BeautifulSoup does a good job of it. This carries a nontrivial performance penalty for loading these pages, but overall /help/ is a low-traffic site compared to the main app, so it doesn't matter much. (As a sidenote, it wouldn't be a bad idea to cache this stuff). There's lots of things we can improve in this, largely through editing the articles, but we can deal with that over time. Thanks to Rishi for writing all the tests. --- templates/zerver/meta_tags.html | 14 +++++++- zerver/middleware.py | 28 +++++++++++++++ zerver/tests/test_middleware.py | 63 +++++++++++++++++++++++++++++++++ zerver/views/integrations.py | 21 ++++++++++- zproject/settings.py | 2 ++ 5 files changed, 126 insertions(+), 2 deletions(-) diff --git a/templates/zerver/meta_tags.html b/templates/zerver/meta_tags.html index 211aecaf64..c7efc8e0a2 100644 --- a/templates/zerver/meta_tags.html +++ b/templates/zerver/meta_tags.html @@ -4,16 +4,28 @@ {% else %} {% endif %} - + + +{% if OPEN_GRAPH_TITLE %} + + +{% else %} +{% endif %} +{% if OPEN_GRAPH_TITLE %} + + +{% else %} +{% endif %} + diff --git a/zerver/middleware.py b/zerver/middleware.py index 0ad829587b..18f46b39b6 100644 --- a/zerver/middleware.py +++ b/zerver/middleware.py @@ -6,6 +6,7 @@ import traceback from typing import Any, AnyStr, Callable, Dict, \ Iterable, List, MutableMapping, Optional +from bs4 import BeautifulSoup from django.conf import settings from django.contrib.sessions.backends.base import UpdateError from django.contrib.sessions.middleware import SessionMiddleware @@ -443,3 +444,30 @@ class SetRemoteAddrFromForwardedFor(MiddlewareMixin): # For NGINX reverse proxy servers, the client's IP will be the first one. real_ip = real_ip.split(",")[0].strip() request.META['REMOTE_ADDR'] = real_ip + +class FinalizeOpenGraphDescription(MiddlewareMixin): + def process_response(self, request: HttpRequest, + response: StreamingHttpResponse) -> StreamingHttpResponse: + def alter_content(content: bytes) -> bytes: + str_content = content.decode("utf-8") + bs = BeautifulSoup(str_content, features='lxml') + # Skip any admonition (warning) blocks, since they're + # usually something about users needing to be an + # organization administrator, and not useful for + # describing the page. + for tag in bs.find_all('div', class_="admonition"): + tag.clear() + + # Find the first paragraph after that, and convert it from HTML to text. + first_paragraph_text = bs.find('p').text.replace('\n', ' ') + return content.replace(request.placeholder_open_graph_description.encode("utf-8"), + first_paragraph_text.encode("utf-8")) + + def wrap_streaming_content(content: Iterable[bytes]) -> Iterable[bytes]: + for chunk in content: + yield alter_content(chunk) + + if getattr(request, "placeholder_open_graph_description", None) is not None: + assert not response.streaming + response.content = alter_content(response.content) + return response diff --git a/zerver/tests/test_middleware.py b/zerver/tests/test_middleware.py index 189bcc46d8..7ac2c0ec05 100644 --- a/zerver/tests/test_middleware.py +++ b/zerver/tests/test_middleware.py @@ -1,5 +1,6 @@ import time +from django.http import HttpResponse from django.test import override_settings from unittest.mock import Mock, patch from zerver.lib.test_classes import ZulipTestCase @@ -49,3 +50,65 @@ class SlowQueryTest(ZulipTestCase): write_log_line(self.log_data, path='/socket/open', method='SOCKET', remote_ip='123.456.789.012', email='unknown', client_name='?') mock_internal_send_message.assert_not_called() + +class OpenGraphTest(ZulipTestCase): + def check_title_and_description(self, path: str, title: str, description: str) -> None: + response = self.client_get(path) + self.assert_in_success_response([ + # Open graph + ''.format(title), + ''.format(description), + # Twitter + ''.format(title), + ''.format(description), + ], response) + + def test_admonition_and_link(self) -> None: + # disable-message-edit-history starts with an {!admin-only.md!}, and has a link + # in the first paragraph. + self.check_title_and_description( + '/help/disable-message-edit-history', + "Disable message edit history (Zulip Help Center)", + "By default, Zulip displays messages that have been edited with an EDITED tag, " + + "and users can view the edit history of a message.") + + def test_settings_tab(self) -> None: + # deactivate-your-account starts with {settings_tab|your-account} + self.check_title_and_description( + '/help/deactivate-your-account', + "Deactivate your account (Zulip Help Center)", + # Ideally, we'd grab the second and third paragraphs as well, if + # the first paragraph is this short + "Go to Your account.") + + def test_tabs(self) -> None: + # logging-out starts with {start_tabs} + self.check_title_and_description( + '/help/logging-out', + "Logging out (Zulip Help Center)", + # Ideally we'd do something better here + "") + + def test_index_pages(self) -> None: + self.check_title_and_description( + '/help/', + "Zulip Help Center", + ("Zulip is a group chat app. Its most distinctive characteristic is that " + "conversation within an organization is divided into “streams” and further " + "subdivided into “topics”, so that much finer-grained conversations are possible " + "than with IRC or other chat tools.")) + + self.check_title_and_description( + '/api/', + "Zulip API Documentation", + ("Zulip's APIs allow you to integrate other services with Zulip. This " + "guide should help you find the API you need:")) + + def test_nonexistent_page(self) -> None: + response = self.client_get('/help/not-a-real-page') + # Test that our open graph logic doesn't throw a 500 + self.assertEqual(response.status_code, 404) + self.assert_in_response( + # Probably we should make this "Zulip Help Center" + '', response) + self.assert_in_response('', response) diff --git a/zerver/views/integrations.py b/zerver/views/integrations.py index db107d320e..77cb928980 100644 --- a/zerver/views/integrations.py +++ b/zerver/views/integrations.py @@ -6,7 +6,9 @@ from django.http import HttpRequest, HttpResponse, HttpResponseNotFound from django.template import loader from django.shortcuts import render +import lxml import os +import random import re import ujson @@ -95,11 +97,29 @@ class MarkdownDirectoryView(ApiURLView): (sidebar_index, http_status_ignored) = self.get_path("include/sidebar_index") # We want the sliding/collapsing behavior for /help pages only sidebar_class = "sidebar slide" + title_base = "Zulip Help Center" else: context["page_is_api_center"] = True context["doc_root"] = "/api/" (sidebar_index, http_status_ignored) = self.get_path("sidebar_index") sidebar_class = "sidebar" + title_base = "Zulip API Documentation" + + # The following is a somewhat hacky approach to extract titles from articles. + # Hack: `context["article"] has a leading `/`, so we use + to add directories. + article_path = os.path.join(settings.DEPLOY_ROOT, 'templates') + context["article"] + if os.path.exists(article_path): + with open(article_path) as article_file: + first_line = article_file.readlines()[0] + # Strip the header and then use the first line to get the article title + article_title = first_line.strip().lstrip("# ") + if context["not_index_page"]: + context["OPEN_GRAPH_TITLE"] = "%s (%s)" % (article_title, title_base) + else: + context["OPEN_GRAPH_TITLE"] = title_base + self.request.placeholder_open_graph_description = ( + "REPLACMENT_OPEN_GRAPH_DESCRIPTION_%s" % (int(2**24 * random.random()),)) + context["OPEN_GRAPH_DESCRIPTION"] = self.request.placeholder_open_graph_description context["sidebar_index"] = sidebar_index context["sidebar_class"] = sidebar_class @@ -116,7 +136,6 @@ class MarkdownDirectoryView(ApiURLView): result.status_code = http_status return result - def add_integrations_context(context: Dict[str, Any]) -> None: alphabetical_sorted_categories = OrderedDict(sorted(CATEGORIES.items())) alphabetical_sorted_integration = OrderedDict(sorted(INTEGRATIONS.items())) diff --git a/zproject/settings.py b/zproject/settings.py index 4779fb7d95..099384431e 100644 --- a/zproject/settings.py +++ b/zproject/settings.py @@ -543,6 +543,8 @@ MIDDLEWARE = ( # Make sure 2FA middlewares come after authentication middleware. 'django_otp.middleware.OTPMiddleware', # Required by Two Factor auth. 'two_factor.middleware.threadlocals.ThreadLocals', # Required by Twilio + # Needs to be after CommonMiddleware, which sets Content-Length + 'zerver.middleware.FinalizeOpenGraphDescription', ) ANONYMOUS_USER_ID = None