views: Add a /health healthcheck endpoint.

This endpoint verifies that the services that Zulip needs to function
are running, and Django can talk to them.  It is designed to be used
as a readiness probe[^1] for Zulip, either by Kubernetes, or some other
reverse-proxy load-balancer in front of Zulip.  Because of this, it
limits access to only localhost and the IP addresses of configured
reverse proxies.

Tests are limited because we cannot stop running services (which would
impact other concurrent tests) and there would be extremely limited
utility to mocking the very specific methods we're calling to raising
the exceptions that we're looking for.

[^1]: https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/
This commit is contained in:
Alex Vandiver 2023-09-12 20:34:54 +00:00 committed by Tim Abbott
parent e60a4c4d01
commit 5ee4b642ad
7 changed files with 120 additions and 1 deletions

View File

@ -66,6 +66,14 @@ class zulip::app_frontend_base {
notify => Service['nginx'],
}
}
file { '/etc/nginx/zulip-include/app.d/healthcheck.conf':
require => File['/etc/nginx/zulip-include/app.d'],
owner => 'root',
group => 'root',
mode => '0644',
content => template('zulip/nginx/healthcheck.conf.template.erb'),
notify => Service['nginx'],
}
file { '/etc/nginx/zulip-include/upstreams':
require => Package[$zulip::common::nginx],

View File

@ -0,0 +1,12 @@
location /health {
allow 127.0.0.1;
allow ::1;
<% @loadbalancers.each do |host| -%>
allow <%= host %>;
<% end -%>
deny all;
include uwsgi_params;
}

View File

@ -42,6 +42,7 @@ class ErrorCode(Enum):
MOVE_MESSAGES_TIME_LIMIT_EXCEEDED = auto()
REACTION_ALREADY_EXISTS = auto()
REACTION_DOES_NOT_EXIST = auto()
SERVER_NOT_READY = auto()
class JsonableError(Exception):
@ -533,3 +534,8 @@ class ApiParamValidationError(JsonableError):
def __init__(self, msg: str, error_type: str) -> None:
super().__init__(msg)
self.error_type = error_type
class ServerNotReadyError(JsonableError):
code = ErrorCode.SERVER_NOT_READY
http_status_code = 500

View File

@ -539,7 +539,7 @@ class HostDomainMiddleware(MiddlewareMixin):
#
# API authentication will end up checking for an invalid
# realm, and throw a JSON-format error if appropriate.
if request.path.startswith(("/static/", "/api/", "/json/")):
if request.path.startswith(("/static/", "/api/", "/json/")) or request.path == "/health":
return None
subdomain = get_subdomain(request)

View File

@ -0,0 +1,24 @@
from unittest import mock
from zerver.lib.exceptions import ServerNotReadyError
from zerver.lib.test_classes import ZulipTestCase
class HealthTest(ZulipTestCase):
def test_healthy(self) -> None:
# We do not actually use rabbitmq in tests, so this fails
# unless it's mocked out.
with mock.patch("zerver.views.health.check_rabbitmq"):
result = self.client_get("/health")
self.assert_json_success(result)
def test_database_failure(self) -> None:
with mock.patch(
"zerver.views.health.check_database",
side_effect=ServerNotReadyError("Cannot query postgresql"),
), self.assertLogs(level="ERROR") as logs:
result = self.client_get("/health")
self.assert_json_error(result, "Cannot query postgresql", status_code=500)
self.assertIn(
"zerver.lib.exceptions.ServerNotReadyError: Cannot query postgresql", logs.output[0]
)

65
zerver/views/health.py Normal file
View File

@ -0,0 +1,65 @@
from django.db.migrations.recorder import MigrationRecorder
from django.http import HttpRequest, HttpResponse
from django.utils.crypto import get_random_string
from django.utils.translation import gettext as _
from pika import BlockingConnection
from zerver.lib.cache import cache_delete, cache_get, cache_set
from zerver.lib.exceptions import ServerNotReadyError
from zerver.lib.queue import get_queue_client
from zerver.lib.redis_utils import get_redis_client
from zerver.lib.response import json_success
def check_database() -> None:
try:
if not MigrationRecorder.Migration.objects.exists():
raise ServerNotReadyError(_("Database is empty")) # nocoverage
except ServerNotReadyError: # nocoverage
raise
except Exception: # nocoverage
raise ServerNotReadyError(_("Cannot query postgresql"))
def check_rabbitmq() -> None: # nocoverage
try:
conn = get_queue_client().connection
if conn is None:
raise ServerNotReadyError(_("Cannot connect to rabbitmq"))
assert isinstance(conn, BlockingConnection)
conn.process_data_events()
except ServerNotReadyError:
raise
except Exception:
raise ServerNotReadyError(_("Cannot query rabbitmq"))
def check_redis() -> None:
try:
get_redis_client().ping()
except Exception: # nocoverage
raise ServerNotReadyError(_("Cannot query redis"))
def check_memcached() -> None:
try:
roundtrip_key = "health_check_" + get_random_string(32)
roundtrip_value = get_random_string(32)
cache_set(roundtrip_key, roundtrip_value)
got_value = cache_get(roundtrip_key)[0]
if got_value != roundtrip_value:
raise ServerNotReadyError(_("Cannot write to memcached")) # nocoverage
cache_delete(roundtrip_key)
except ServerNotReadyError: # nocoverage
raise
except Exception: # nocoverage
raise ServerNotReadyError(_("Cannot query memcached"))
def health(request: HttpRequest) -> HttpResponse:
check_database()
check_rabbitmq()
check_redis()
check_memcached()
return json_success(request)

View File

@ -54,6 +54,7 @@ from zerver.views.documentation import IntegrationView, MarkdownDirectoryView, i
from zerver.views.drafts import create_drafts, delete_draft, edit_draft, fetch_drafts
from zerver.views.email_mirror import email_mirror_message
from zerver.views.events_register import events_register_backend
from zerver.views.health import health
from zerver.views.home import accounts_accept_terms, desktop_home, home
from zerver.views.hotspots import mark_hotspot_as_read
from zerver.views.invite import (
@ -836,6 +837,9 @@ urls += [
path("api/v1/", include(v1_api_mobile_patterns)),
]
# Healthcheck URL
urls += [path("health", health)]
# The sequence is important; if i18n URLs don't come first then
# reverse URL mapping points to i18n URLs which causes the frontend
# tests to fail