docs: Automatically construct OpenAPI metadata for help center.

This is somewhat hacky, in that in order to do what we're doing, we
need to parse the HTML of the rendered page to extract the first
paragraph to include in the open graph description field.  But
BeautifulSoup does a good job of it.

This carries a nontrivial performance penalty for loading these pages,
but overall /help/ is a low-traffic site compared to the main app, so
it doesn't matter much.

(As a sidenote, it wouldn't be a bad idea to cache this stuff).

There's lots of things we can improve in this, largely through editing
the articles, but we can deal with that over time.

Thanks to Rishi for writing all the tests.
This commit is contained in:
Tim Abbott 2018-12-14 14:28:00 -08:00
parent 0fb90dcdb3
commit 9c3f38a564
5 changed files with 126 additions and 2 deletions

View File

@ -4,16 +4,28 @@
{% else %}
<meta name="robots" content="noindex,nofollow">
{% endif %}
<!-- Facebook Meta Tags -->
<!-- Open Graph / Facebook Meta Tags -->
<meta property="og:url" content="{{ realm_uri }}">
<meta property="og:type" content="website">
<meta property="og:site_name" content="Zulip">
{% if OPEN_GRAPH_TITLE %}
<meta property="og:title" content="{{ OPEN_GRAPH_TITLE }}">
<meta property="og:description" content="{{ OPEN_GRAPH_DESCRIPTION }}">
{% else %}
<meta property="og:title" content="The world's most productive team chat">
<meta property="og:description" content="Zulip combines the immediacy of real-time chat with an email threading model. With Zulip, you can catch up on important conversations while ignoring irrelevant ones.">
{% endif %}
<meta property="og:image" content="{{ realm_uri }}/static/images/logo/zulip-icon-128x128.png">
<!-- Twitter Meta Tags -->
<meta name="twitter:card" content="summary">
{% if OPEN_GRAPH_TITLE %}
<meta property="twitter:title" content="{{ OPEN_GRAPH_TITLE }}">
<meta name="twitter:description" content="{{ OPEN_GRAPH_DESCRIPTION }}">
{% else %}
<meta property="twitter:title" content="The world's most productive team chat">
<meta name="twitter:description" content="Zulip combines the immediacy of real-time chat with an email threading model. With Zulip, you can catch up on important conversations while ignoring irrelevant ones.">
{% endif %}
<meta name="twitter:image" content="{{ realm_uri }}/static/images/logo/zulip-icon-128x128.png">

View File

@ -6,6 +6,7 @@ import traceback
from typing import Any, AnyStr, Callable, Dict, \
Iterable, List, MutableMapping, Optional
from bs4 import BeautifulSoup
from django.conf import settings
from django.contrib.sessions.backends.base import UpdateError
from django.contrib.sessions.middleware import SessionMiddleware
@ -443,3 +444,30 @@ class SetRemoteAddrFromForwardedFor(MiddlewareMixin):
# For NGINX reverse proxy servers, the client's IP will be the first one.
real_ip = real_ip.split(",")[0].strip()
request.META['REMOTE_ADDR'] = real_ip
class FinalizeOpenGraphDescription(MiddlewareMixin):
def process_response(self, request: HttpRequest,
response: StreamingHttpResponse) -> StreamingHttpResponse:
def alter_content(content: bytes) -> bytes:
str_content = content.decode("utf-8")
bs = BeautifulSoup(str_content, features='lxml')
# Skip any admonition (warning) blocks, since they're
# usually something about users needing to be an
# organization administrator, and not useful for
# describing the page.
for tag in bs.find_all('div', class_="admonition"):
tag.clear()
# Find the first paragraph after that, and convert it from HTML to text.
first_paragraph_text = bs.find('p').text.replace('\n', ' ')
return content.replace(request.placeholder_open_graph_description.encode("utf-8"),
first_paragraph_text.encode("utf-8"))
def wrap_streaming_content(content: Iterable[bytes]) -> Iterable[bytes]:
for chunk in content:
yield alter_content(chunk)
if getattr(request, "placeholder_open_graph_description", None) is not None:
assert not response.streaming
response.content = alter_content(response.content)
return response

View File

@ -1,5 +1,6 @@
import time
from django.http import HttpResponse
from django.test import override_settings
from unittest.mock import Mock, patch
from zerver.lib.test_classes import ZulipTestCase
@ -49,3 +50,65 @@ class SlowQueryTest(ZulipTestCase):
write_log_line(self.log_data, path='/socket/open', method='SOCKET',
remote_ip='123.456.789.012', email='unknown', client_name='?')
mock_internal_send_message.assert_not_called()
class OpenGraphTest(ZulipTestCase):
def check_title_and_description(self, path: str, title: str, description: str) -> None:
response = self.client_get(path)
self.assert_in_success_response([
# Open graph
'<meta property="og:title" content="{}">'.format(title),
'<meta property="og:description" content="{}">'.format(description),
# Twitter
'<meta property="twitter:title" content="{}">'.format(title),
'<meta name="twitter:description" content="{}">'.format(description),
], response)
def test_admonition_and_link(self) -> None:
# disable-message-edit-history starts with an {!admin-only.md!}, and has a link
# in the first paragraph.
self.check_title_and_description(
'/help/disable-message-edit-history',
"Disable message edit history (Zulip Help Center)",
"By default, Zulip displays messages that have been edited with an EDITED tag, " +
"and users can view the edit history of a message.")
def test_settings_tab(self) -> None:
# deactivate-your-account starts with {settings_tab|your-account}
self.check_title_and_description(
'/help/deactivate-your-account',
"Deactivate your account (Zulip Help Center)",
# Ideally, we'd grab the second and third paragraphs as well, if
# the first paragraph is this short
"Go to Your account.")
def test_tabs(self) -> None:
# logging-out starts with {start_tabs}
self.check_title_and_description(
'/help/logging-out',
"Logging out (Zulip Help Center)",
# Ideally we'd do something better here
"")
def test_index_pages(self) -> None:
self.check_title_and_description(
'/help/',
"Zulip Help Center",
("Zulip is a group chat app. Its most distinctive characteristic is that "
"conversation within an organization is divided into “streams” and further "
"subdivided into “topics”, so that much finer-grained conversations are possible "
"than with IRC or other chat tools."))
self.check_title_and_description(
'/api/',
"Zulip API Documentation",
("Zulip's APIs allow you to integrate other services with Zulip. This "
"guide should help you find the API you need:"))
def test_nonexistent_page(self) -> None:
response = self.client_get('/help/not-a-real-page')
# Test that our open graph logic doesn't throw a 500
self.assertEqual(response.status_code, 404)
self.assert_in_response(
# Probably we should make this "Zulip Help Center"
'<meta property="og:title" content="No such article. (Zulip Help Center)">', response)
self.assert_in_response('<meta property="og:description" content="No such article.">', response)

View File

@ -6,7 +6,9 @@ from django.http import HttpRequest, HttpResponse, HttpResponseNotFound
from django.template import loader
from django.shortcuts import render
import lxml
import os
import random
import re
import ujson
@ -95,11 +97,29 @@ class MarkdownDirectoryView(ApiURLView):
(sidebar_index, http_status_ignored) = self.get_path("include/sidebar_index")
# We want the sliding/collapsing behavior for /help pages only
sidebar_class = "sidebar slide"
title_base = "Zulip Help Center"
else:
context["page_is_api_center"] = True
context["doc_root"] = "/api/"
(sidebar_index, http_status_ignored) = self.get_path("sidebar_index")
sidebar_class = "sidebar"
title_base = "Zulip API Documentation"
# The following is a somewhat hacky approach to extract titles from articles.
# Hack: `context["article"] has a leading `/`, so we use + to add directories.
article_path = os.path.join(settings.DEPLOY_ROOT, 'templates') + context["article"]
if os.path.exists(article_path):
with open(article_path) as article_file:
first_line = article_file.readlines()[0]
# Strip the header and then use the first line to get the article title
article_title = first_line.strip().lstrip("# ")
if context["not_index_page"]:
context["OPEN_GRAPH_TITLE"] = "%s (%s)" % (article_title, title_base)
else:
context["OPEN_GRAPH_TITLE"] = title_base
self.request.placeholder_open_graph_description = (
"REPLACMENT_OPEN_GRAPH_DESCRIPTION_%s" % (int(2**24 * random.random()),))
context["OPEN_GRAPH_DESCRIPTION"] = self.request.placeholder_open_graph_description
context["sidebar_index"] = sidebar_index
context["sidebar_class"] = sidebar_class
@ -116,7 +136,6 @@ class MarkdownDirectoryView(ApiURLView):
result.status_code = http_status
return result
def add_integrations_context(context: Dict[str, Any]) -> None:
alphabetical_sorted_categories = OrderedDict(sorted(CATEGORIES.items()))
alphabetical_sorted_integration = OrderedDict(sorted(INTEGRATIONS.items()))

View File

@ -543,6 +543,8 @@ MIDDLEWARE = (
# Make sure 2FA middlewares come after authentication middleware.
'django_otp.middleware.OTPMiddleware', # Required by Two Factor auth.
'two_factor.middleware.threadlocals.ThreadLocals', # Required by Twilio
# Needs to be after CommonMiddleware, which sets Content-Length
'zerver.middleware.FinalizeOpenGraphDescription',
)
ANONYMOUS_USER_ID = None