mirror of https://github.com/zulip/zulip.git
tornado: Move SIGTERM shutdown handler into a callback.
A SIGTERM can show up at any point in the ioloop, even in places which are not prepared to handle it. This results in the process ignoring the `sys.exit` which the SIGTERM handler calls, with an uncaught SystemExit exception: ``` 2021-11-09 15:37:49.368 ERR [tornado.application:9803] Uncaught exception Traceback (most recent call last): File "/home/zulip/deployments/2021-11-08-05-10-23/zulip-py3-venv/lib/python3.6/site-packages/tornado/http1connection.py", line 238, in _read_message delegate.finish() File "/home/zulip/deployments/2021-11-08-05-10-23/zulip-py3-venv/lib/python3.6/site-packages/tornado/httpserver.py", line 314, in finish self.delegate.finish() File "/home/zulip/deployments/2021-11-08-05-10-23/zulip-py3-venv/lib/python3.6/site-packages/tornado/routing.py", line 251, in finish self.delegate.finish() File "/home/zulip/deployments/2021-11-08-05-10-23/zulip-py3-venv/lib/python3.6/site-packages/tornado/web.py", line 2097, in finish self.execute() File "/home/zulip/deployments/2021-11-08-05-10-23/zulip-py3-venv/lib/python3.6/site-packages/tornado/web.py", line 2130, in execute **self.path_kwargs) File "/home/zulip/deployments/2021-11-08-05-10-23/zulip-py3-venv/lib/python3.6/site-packages/tornado/gen.py", line 307, in wrapper yielded = next(result) File "/home/zulip/deployments/2021-11-08-05-10-23/zulip-py3-venv/lib/python3.6/site-packages/tornado/web.py", line 1510, in _execute result = method(*self.path_args, **self.path_kwargs) File "/home/zulip/deployments/2021-11-08-05-10-23/zerver/tornado/handlers.py", line 150, in get request = self.convert_tornado_request_to_django_request() File "/home/zulip/deployments/2021-11-08-05-10-23/zerver/tornado/handlers.py", line 113, in convert_tornado_request_to_django_request request = WSGIRequest(environ) File "/home/zulip/deployments/2021-11-08-05-10-23/zulip-py3-venv/lib/python3.6/site-packages/django/core/handlers/wsgi.py", line 66, in __init__ script_name = get_script_name(environ) File "/home/zulip/deployments/2021-11-08-05-10-23/zerver/tornado/event_queue.py", line 611, in <lambda> signal.signal(signal.SIGTERM, lambda signum, stack: sys.exit(1)) SystemExit: 1 ``` Supervisor then terminates the process with a SIGKILL, which results in dropping data held in the tornado process, as it does not dump its queue. The only command which is safe to run in the signal handler is `ioloop.add_callback_from_signal`, which schedules the callback to run during the course of the normal ioloop. This callbacks does an orderly shutdown of the server and the ioloop before exiting.
This commit is contained in:
parent
847bf8207f
commit
bc5539d871
|
@ -104,7 +104,7 @@ class Command(BaseCommand):
|
||||||
from zerver.tornado.ioloop_logging import logging_data
|
from zerver.tornado.ioloop_logging import logging_data
|
||||||
|
|
||||||
logging_data["port"] = str(port)
|
logging_data["port"] = str(port)
|
||||||
setup_event_queue(port)
|
setup_event_queue(http_server, port)
|
||||||
add_client_gc_hook(missedmessage_hook)
|
add_client_gc_hook(missedmessage_hook)
|
||||||
setup_tornado_rabbitmq()
|
setup_tornado_rabbitmq()
|
||||||
|
|
||||||
|
|
|
@ -22,6 +22,7 @@ from typing import (
|
||||||
List,
|
List,
|
||||||
Mapping,
|
Mapping,
|
||||||
MutableMapping,
|
MutableMapping,
|
||||||
|
NoReturn,
|
||||||
Optional,
|
Optional,
|
||||||
Sequence,
|
Sequence,
|
||||||
Set,
|
Set,
|
||||||
|
@ -603,12 +604,24 @@ def send_restart_events(immediate: bool = False) -> None:
|
||||||
client.add_event(event)
|
client.add_event(event)
|
||||||
|
|
||||||
|
|
||||||
def setup_event_queue(port: int) -> None:
|
def handle_sigterm(server: tornado.httpserver.HTTPServer) -> NoReturn:
|
||||||
|
logging.warning("Got SIGTERM, shutting down...")
|
||||||
|
server.stop()
|
||||||
|
tornado.ioloop.IOLoop.instance().stop()
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
|
||||||
|
def setup_event_queue(server: tornado.httpserver.HTTPServer, port: int) -> None:
|
||||||
|
ioloop = tornado.ioloop.IOLoop.instance()
|
||||||
|
|
||||||
if not settings.TEST_SUITE:
|
if not settings.TEST_SUITE:
|
||||||
load_event_queues(port)
|
load_event_queues(port)
|
||||||
atexit.register(dump_event_queues, port)
|
atexit.register(dump_event_queues, port)
|
||||||
# Make sure we dump event queues even if we exit via signal
|
# Make sure we dump event queues even if we exit via signal
|
||||||
signal.signal(signal.SIGTERM, lambda signum, stack: sys.exit(1))
|
signal.signal(
|
||||||
|
signal.SIGTERM,
|
||||||
|
lambda signum, frame: ioloop.add_callback_from_signal(handle_sigterm, server),
|
||||||
|
)
|
||||||
add_reload_hook(lambda: dump_event_queues(port))
|
add_reload_hook(lambda: dump_event_queues(port))
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
@ -617,7 +630,6 @@ def setup_event_queue(port: int) -> None:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
# Set up event queue garbage collection
|
# Set up event queue garbage collection
|
||||||
ioloop = tornado.ioloop.IOLoop.instance()
|
|
||||||
pc = tornado.ioloop.PeriodicCallback(
|
pc = tornado.ioloop.PeriodicCallback(
|
||||||
lambda: gc_event_queues(port), EVENT_QUEUE_GC_FREQ_MSECS, ioloop
|
lambda: gc_event_queues(port), EVENT_QUEUE_GC_FREQ_MSECS, ioloop
|
||||||
)
|
)
|
||||||
|
|
Loading…
Reference in New Issue