From 0da62e7cda2c44582c01179798909654aa7e2cb8 Mon Sep 17 00:00:00 2001 From: Alex Vandiver Date: Fri, 12 May 2023 18:15:26 +0000 Subject: [PATCH] supervisor: Retry, with backoff, to connect to supervisor socket. If `zulip-puppet-apply` is run during an upgrade, it will immediately try to re-`stop-server` before running migrations; if the last step in the puppet application was to restart `supervisor`, it may not be listening on its UNIX socket yet. In such cases, `socket.connect()` throws a `FileNotFoundError`: ``` Traceback (most recent call last): File "./scripts/stop-server", line 53, in services = list_supervisor_processes(services, only_running=True) File "./scripts/lib/supervisor.py", line 34, in list_supervisor_processes processes = rpc().supervisor.getAllProcessInfo() File "/usr/lib/python3.9/xmlrpc/client.py", line 1116, in __call__ return self.__send(self.__name, args) File "/usr/lib/python3.9/xmlrpc/client.py", line 1456, in __request response = self.__transport.request( File "/usr/lib/python3.9/xmlrpc/client.py", line 1160, in request return self.single_request(host, handler, request_body, verbose) File "/usr/lib/python3.9/xmlrpc/client.py", line 1172, in single_request http_conn = self.send_request(host, handler, request_body, verbose) File "/usr/lib/python3.9/xmlrpc/client.py", line 1285, in send_request self.send_content(connection, request_body) File "/usr/lib/python3.9/xmlrpc/client.py", line 1315, in send_content connection.endheaders(request_body) File "/usr/lib/python3.9/http/client.py", line 1250, in endheaders self._send_output(message_body, encode_chunked=encode_chunked) File "/usr/lib/python3.9/http/client.py", line 1010, in _send_output self.send(msg) File "/usr/lib/python3.9/http/client.py", line 950, in send self.connect() File "./scripts/lib/supervisor.py", line 10, in connect self.sock.connect(self.host) FileNotFoundError: [Errno 2] No such file or directory ``` Catch the `FileNotFoundError` and retry twice more, with backoff. If it fails repeatedly, point to `service supervisor status` for further debugging, as `FileNotFoundError` is rather misleading -- the file exists, it simply is not accepting connections. --- scripts/lib/supervisor.py | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/scripts/lib/supervisor.py b/scripts/lib/supervisor.py index 3b0c1b7611..41998fee13 100644 --- a/scripts/lib/supervisor.py +++ b/scripts/lib/supervisor.py @@ -1,4 +1,5 @@ import socket +import time from http.client import HTTPConnection from typing import Dict, List, Optional, Tuple, Union from xmlrpc import client @@ -7,7 +8,19 @@ from xmlrpc import client class UnixStreamHTTPConnection(HTTPConnection): def connect(self) -> None: self.sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM) - self.sock.connect(self.host) + connected = False + for i in range(0, 2): + try: + self.sock.connect(self.host) + connected = True + break + except FileNotFoundError: + # Backoff and retry + time.sleep(2**i) + if not connected: + raise Exception( + "Failed to connect to supervisor -- check that it is running, by running 'service supervisor status'" + ) class UnixStreamTransport(client.Transport):