mirror of https://github.com/zulip/zulip.git
views: Add a /health healthcheck endpoint.
This endpoint verifies that the services that Zulip needs to function are running, and Django can talk to them. It is designed to be used as a readiness probe[^1] for Zulip, either by Kubernetes, or some other reverse-proxy load-balancer in front of Zulip. Because of this, it limits access to only localhost and the IP addresses of configured reverse proxies. Tests are limited because we cannot stop running services (which would impact other concurrent tests) and there would be extremely limited utility to mocking the very specific methods we're calling to raising the exceptions that we're looking for. [^1]: https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/
This commit is contained in:
parent
e60a4c4d01
commit
5ee4b642ad
|
@ -66,6 +66,14 @@ class zulip::app_frontend_base {
|
||||||
notify => Service['nginx'],
|
notify => Service['nginx'],
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
file { '/etc/nginx/zulip-include/app.d/healthcheck.conf':
|
||||||
|
require => File['/etc/nginx/zulip-include/app.d'],
|
||||||
|
owner => 'root',
|
||||||
|
group => 'root',
|
||||||
|
mode => '0644',
|
||||||
|
content => template('zulip/nginx/healthcheck.conf.template.erb'),
|
||||||
|
notify => Service['nginx'],
|
||||||
|
}
|
||||||
|
|
||||||
file { '/etc/nginx/zulip-include/upstreams':
|
file { '/etc/nginx/zulip-include/upstreams':
|
||||||
require => Package[$zulip::common::nginx],
|
require => Package[$zulip::common::nginx],
|
||||||
|
|
|
@ -0,0 +1,12 @@
|
||||||
|
location /health {
|
||||||
|
allow 127.0.0.1;
|
||||||
|
allow ::1;
|
||||||
|
|
||||||
|
<% @loadbalancers.each do |host| -%>
|
||||||
|
allow <%= host %>;
|
||||||
|
<% end -%>
|
||||||
|
|
||||||
|
deny all;
|
||||||
|
|
||||||
|
include uwsgi_params;
|
||||||
|
}
|
|
@ -42,6 +42,7 @@ class ErrorCode(Enum):
|
||||||
MOVE_MESSAGES_TIME_LIMIT_EXCEEDED = auto()
|
MOVE_MESSAGES_TIME_LIMIT_EXCEEDED = auto()
|
||||||
REACTION_ALREADY_EXISTS = auto()
|
REACTION_ALREADY_EXISTS = auto()
|
||||||
REACTION_DOES_NOT_EXIST = auto()
|
REACTION_DOES_NOT_EXIST = auto()
|
||||||
|
SERVER_NOT_READY = auto()
|
||||||
|
|
||||||
|
|
||||||
class JsonableError(Exception):
|
class JsonableError(Exception):
|
||||||
|
@ -533,3 +534,8 @@ class ApiParamValidationError(JsonableError):
|
||||||
def __init__(self, msg: str, error_type: str) -> None:
|
def __init__(self, msg: str, error_type: str) -> None:
|
||||||
super().__init__(msg)
|
super().__init__(msg)
|
||||||
self.error_type = error_type
|
self.error_type = error_type
|
||||||
|
|
||||||
|
|
||||||
|
class ServerNotReadyError(JsonableError):
|
||||||
|
code = ErrorCode.SERVER_NOT_READY
|
||||||
|
http_status_code = 500
|
||||||
|
|
|
@ -539,7 +539,7 @@ class HostDomainMiddleware(MiddlewareMixin):
|
||||||
#
|
#
|
||||||
# API authentication will end up checking for an invalid
|
# API authentication will end up checking for an invalid
|
||||||
# realm, and throw a JSON-format error if appropriate.
|
# realm, and throw a JSON-format error if appropriate.
|
||||||
if request.path.startswith(("/static/", "/api/", "/json/")):
|
if request.path.startswith(("/static/", "/api/", "/json/")) or request.path == "/health":
|
||||||
return None
|
return None
|
||||||
|
|
||||||
subdomain = get_subdomain(request)
|
subdomain = get_subdomain(request)
|
||||||
|
|
|
@ -0,0 +1,24 @@
|
||||||
|
from unittest import mock
|
||||||
|
|
||||||
|
from zerver.lib.exceptions import ServerNotReadyError
|
||||||
|
from zerver.lib.test_classes import ZulipTestCase
|
||||||
|
|
||||||
|
|
||||||
|
class HealthTest(ZulipTestCase):
|
||||||
|
def test_healthy(self) -> None:
|
||||||
|
# We do not actually use rabbitmq in tests, so this fails
|
||||||
|
# unless it's mocked out.
|
||||||
|
with mock.patch("zerver.views.health.check_rabbitmq"):
|
||||||
|
result = self.client_get("/health")
|
||||||
|
self.assert_json_success(result)
|
||||||
|
|
||||||
|
def test_database_failure(self) -> None:
|
||||||
|
with mock.patch(
|
||||||
|
"zerver.views.health.check_database",
|
||||||
|
side_effect=ServerNotReadyError("Cannot query postgresql"),
|
||||||
|
), self.assertLogs(level="ERROR") as logs:
|
||||||
|
result = self.client_get("/health")
|
||||||
|
self.assert_json_error(result, "Cannot query postgresql", status_code=500)
|
||||||
|
self.assertIn(
|
||||||
|
"zerver.lib.exceptions.ServerNotReadyError: Cannot query postgresql", logs.output[0]
|
||||||
|
)
|
|
@ -0,0 +1,65 @@
|
||||||
|
from django.db.migrations.recorder import MigrationRecorder
|
||||||
|
from django.http import HttpRequest, HttpResponse
|
||||||
|
from django.utils.crypto import get_random_string
|
||||||
|
from django.utils.translation import gettext as _
|
||||||
|
from pika import BlockingConnection
|
||||||
|
|
||||||
|
from zerver.lib.cache import cache_delete, cache_get, cache_set
|
||||||
|
from zerver.lib.exceptions import ServerNotReadyError
|
||||||
|
from zerver.lib.queue import get_queue_client
|
||||||
|
from zerver.lib.redis_utils import get_redis_client
|
||||||
|
from zerver.lib.response import json_success
|
||||||
|
|
||||||
|
|
||||||
|
def check_database() -> None:
|
||||||
|
try:
|
||||||
|
if not MigrationRecorder.Migration.objects.exists():
|
||||||
|
raise ServerNotReadyError(_("Database is empty")) # nocoverage
|
||||||
|
except ServerNotReadyError: # nocoverage
|
||||||
|
raise
|
||||||
|
except Exception: # nocoverage
|
||||||
|
raise ServerNotReadyError(_("Cannot query postgresql"))
|
||||||
|
|
||||||
|
|
||||||
|
def check_rabbitmq() -> None: # nocoverage
|
||||||
|
try:
|
||||||
|
conn = get_queue_client().connection
|
||||||
|
if conn is None:
|
||||||
|
raise ServerNotReadyError(_("Cannot connect to rabbitmq"))
|
||||||
|
assert isinstance(conn, BlockingConnection)
|
||||||
|
conn.process_data_events()
|
||||||
|
except ServerNotReadyError:
|
||||||
|
raise
|
||||||
|
except Exception:
|
||||||
|
raise ServerNotReadyError(_("Cannot query rabbitmq"))
|
||||||
|
|
||||||
|
|
||||||
|
def check_redis() -> None:
|
||||||
|
try:
|
||||||
|
get_redis_client().ping()
|
||||||
|
except Exception: # nocoverage
|
||||||
|
raise ServerNotReadyError(_("Cannot query redis"))
|
||||||
|
|
||||||
|
|
||||||
|
def check_memcached() -> None:
|
||||||
|
try:
|
||||||
|
roundtrip_key = "health_check_" + get_random_string(32)
|
||||||
|
roundtrip_value = get_random_string(32)
|
||||||
|
cache_set(roundtrip_key, roundtrip_value)
|
||||||
|
got_value = cache_get(roundtrip_key)[0]
|
||||||
|
if got_value != roundtrip_value:
|
||||||
|
raise ServerNotReadyError(_("Cannot write to memcached")) # nocoverage
|
||||||
|
cache_delete(roundtrip_key)
|
||||||
|
except ServerNotReadyError: # nocoverage
|
||||||
|
raise
|
||||||
|
except Exception: # nocoverage
|
||||||
|
raise ServerNotReadyError(_("Cannot query memcached"))
|
||||||
|
|
||||||
|
|
||||||
|
def health(request: HttpRequest) -> HttpResponse:
|
||||||
|
check_database()
|
||||||
|
check_rabbitmq()
|
||||||
|
check_redis()
|
||||||
|
check_memcached()
|
||||||
|
|
||||||
|
return json_success(request)
|
|
@ -54,6 +54,7 @@ from zerver.views.documentation import IntegrationView, MarkdownDirectoryView, i
|
||||||
from zerver.views.drafts import create_drafts, delete_draft, edit_draft, fetch_drafts
|
from zerver.views.drafts import create_drafts, delete_draft, edit_draft, fetch_drafts
|
||||||
from zerver.views.email_mirror import email_mirror_message
|
from zerver.views.email_mirror import email_mirror_message
|
||||||
from zerver.views.events_register import events_register_backend
|
from zerver.views.events_register import events_register_backend
|
||||||
|
from zerver.views.health import health
|
||||||
from zerver.views.home import accounts_accept_terms, desktop_home, home
|
from zerver.views.home import accounts_accept_terms, desktop_home, home
|
||||||
from zerver.views.hotspots import mark_hotspot_as_read
|
from zerver.views.hotspots import mark_hotspot_as_read
|
||||||
from zerver.views.invite import (
|
from zerver.views.invite import (
|
||||||
|
@ -836,6 +837,9 @@ urls += [
|
||||||
path("api/v1/", include(v1_api_mobile_patterns)),
|
path("api/v1/", include(v1_api_mobile_patterns)),
|
||||||
]
|
]
|
||||||
|
|
||||||
|
# Healthcheck URL
|
||||||
|
urls += [path("health", health)]
|
||||||
|
|
||||||
# The sequence is important; if i18n URLs don't come first then
|
# The sequence is important; if i18n URLs don't come first then
|
||||||
# reverse URL mapping points to i18n URLs which causes the frontend
|
# reverse URL mapping points to i18n URLs which causes the frontend
|
||||||
# tests to fail
|
# tests to fail
|
||||||
|
|
Loading…
Reference in New Issue