zulip/scripts/lib/supervisor.py

76 lines
2.4 KiB
Python
Raw Normal View History

import socket
supervisor: Retry, with backoff, to connect to supervisor socket. If `zulip-puppet-apply` is run during an upgrade, it will immediately try to re-`stop-server` before running migrations; if the last step in the puppet application was to restart `supervisor`, it may not be listening on its UNIX socket yet. In such cases, `socket.connect()` throws a `FileNotFoundError`: ``` Traceback (most recent call last): File "./scripts/stop-server", line 53, in <module> services = list_supervisor_processes(services, only_running=True) File "./scripts/lib/supervisor.py", line 34, in list_supervisor_processes processes = rpc().supervisor.getAllProcessInfo() File "/usr/lib/python3.9/xmlrpc/client.py", line 1116, in __call__ return self.__send(self.__name, args) File "/usr/lib/python3.9/xmlrpc/client.py", line 1456, in __request response = self.__transport.request( File "/usr/lib/python3.9/xmlrpc/client.py", line 1160, in request return self.single_request(host, handler, request_body, verbose) File "/usr/lib/python3.9/xmlrpc/client.py", line 1172, in single_request http_conn = self.send_request(host, handler, request_body, verbose) File "/usr/lib/python3.9/xmlrpc/client.py", line 1285, in send_request self.send_content(connection, request_body) File "/usr/lib/python3.9/xmlrpc/client.py", line 1315, in send_content connection.endheaders(request_body) File "/usr/lib/python3.9/http/client.py", line 1250, in endheaders self._send_output(message_body, encode_chunked=encode_chunked) File "/usr/lib/python3.9/http/client.py", line 1010, in _send_output self.send(msg) File "/usr/lib/python3.9/http/client.py", line 950, in send self.connect() File "./scripts/lib/supervisor.py", line 10, in connect self.sock.connect(self.host) FileNotFoundError: [Errno 2] No such file or directory ``` Catch the `FileNotFoundError` and retry twice more, with backoff. If it fails repeatedly, point to `service supervisor status` for further debugging, as `FileNotFoundError` is rather misleading -- the file exists, it simply is not accepting connections.
2023-05-12 20:15:26 +02:00
import time
from http.client import HTTPConnection
from typing import Dict, List, Optional, Tuple, Union
from xmlrpc import client
class UnixStreamHTTPConnection(HTTPConnection):
def connect(self) -> None:
self.sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
supervisor: Retry, with backoff, to connect to supervisor socket. If `zulip-puppet-apply` is run during an upgrade, it will immediately try to re-`stop-server` before running migrations; if the last step in the puppet application was to restart `supervisor`, it may not be listening on its UNIX socket yet. In such cases, `socket.connect()` throws a `FileNotFoundError`: ``` Traceback (most recent call last): File "./scripts/stop-server", line 53, in <module> services = list_supervisor_processes(services, only_running=True) File "./scripts/lib/supervisor.py", line 34, in list_supervisor_processes processes = rpc().supervisor.getAllProcessInfo() File "/usr/lib/python3.9/xmlrpc/client.py", line 1116, in __call__ return self.__send(self.__name, args) File "/usr/lib/python3.9/xmlrpc/client.py", line 1456, in __request response = self.__transport.request( File "/usr/lib/python3.9/xmlrpc/client.py", line 1160, in request return self.single_request(host, handler, request_body, verbose) File "/usr/lib/python3.9/xmlrpc/client.py", line 1172, in single_request http_conn = self.send_request(host, handler, request_body, verbose) File "/usr/lib/python3.9/xmlrpc/client.py", line 1285, in send_request self.send_content(connection, request_body) File "/usr/lib/python3.9/xmlrpc/client.py", line 1315, in send_content connection.endheaders(request_body) File "/usr/lib/python3.9/http/client.py", line 1250, in endheaders self._send_output(message_body, encode_chunked=encode_chunked) File "/usr/lib/python3.9/http/client.py", line 1010, in _send_output self.send(msg) File "/usr/lib/python3.9/http/client.py", line 950, in send self.connect() File "./scripts/lib/supervisor.py", line 10, in connect self.sock.connect(self.host) FileNotFoundError: [Errno 2] No such file or directory ``` Catch the `FileNotFoundError` and retry twice more, with backoff. If it fails repeatedly, point to `service supervisor status` for further debugging, as `FileNotFoundError` is rather misleading -- the file exists, it simply is not accepting connections.
2023-05-12 20:15:26 +02:00
connected = False
for i in range(0, 2):
try:
self.sock.connect(self.host)
connected = True
break
except FileNotFoundError:
# Backoff and retry
time.sleep(2**i)
if not connected:
raise Exception(
"Failed to connect to supervisor -- check that it is running, by running 'service supervisor status'"
)
class UnixStreamTransport(client.Transport):
def __init__(self, socket_path: str) -> None:
self.socket_path = socket_path
super().__init__()
def make_connection(
self, host: Union[Tuple[str, Dict[str, str]], str]
) -> UnixStreamHTTPConnection:
return UnixStreamHTTPConnection(self.socket_path)
def rpc() -> client.ServerProxy:
return client.ServerProxy(
"http://localhost", transport=UnixStreamTransport("/var/run/supervisor.sock")
)
def list_supervisor_processes(
filter_names: Optional[List[str]] = None, *, only_running: Optional[bool] = None
) -> List[str]:
results = []
processes = rpc().supervisor.getAllProcessInfo()
assert isinstance(processes, list)
for process in processes:
if process["group"] != process["name"]:
name = f"{process['group']}:{process['name']}"
else:
name = process["name"]
if filter_names:
match = False
for filter_name in filter_names:
# zulip-tornado:* matches zulip-tornado:9800 and zulip-tornado
if filter_name.endswith(":*") and (
name.startswith(filter_name[:-1]) or name == filter_name[:-2]
):
match = True
break
if name == filter_name:
match = True
break
if not match:
continue
if only_running is None:
results.append(name)
elif only_running == (process["statename"] in ("RUNNING", "STARTING")):
results.append(name)
return results