scripts: Add {start,stop,restart}-server support for postgresql role.

During the upgrade process of a postgresql-only Zulip installation, (`puppet_classes = zulip::profile::postgresql` in `/etc/zulip/zulip.conf`) either `scripts/start-server` or `scripts/stop-server` fail because they try to handle supervisor services that are not available (e.g. Tornado) since only `/etc/supervisor/conf.d/zulip/zulip_db.conf` is present and not `/etc/supervisor/conf.d/zulip/zulip.conf`. While this wasn't previously supported, it's a pretty reasonable thing to do, and can be readily supported by just adding a few conditionals.
2021-04-27 20:48:19 +02:00 · 2021-04-27 20:48:19 +02:00 · 534d78232c
parent 772500d1c6
commit 534d78232c
3 changed files with 71 additions and 60 deletions
--- a/scripts/lib/zulip_tools.py
+++ b/scripts/lib/zulip_tools.py
@ -599,6 +599,10 @@ def is_vagrant_env_host(path: str) -> bool:
    return ".vagrant" in os.listdir(path)


+def has_application_server() -> bool:
+    return os.path.exists("/etc/supervisor/conf.d/zulip/zulip.conf")
+
+
 def deport(netloc: str) -> str:
    """Remove the port from a hostname:port string.  Brackets on a literal
    IPv6 address are included."""
--- a/scripts/restart-server
+++ b/scripts/restart-server
@ -16,6 +16,7 @@ from scripts.lib.zulip_tools import (
    WARNING,
    get_config_file,
    get_tornado_ports,
+    has_application_server,
    overwrite_symlink,
 )

@ -62,35 +63,37 @@ if change_symlink:

 config_file = get_config_file()
 tornado_ports = get_tornado_ports(config_file)
+workers = []

-# Start by restarting the workers and similar processes, one at a
-# time.  Workers can always support processing events with old event
-# contents, but cannot necessarily understand events enqueued by a
-# newer Django process.  Restarting them one at a time, rather than
-# all-at-once, minimizes the downtime of each, and reduces startup
-# contention.
-#
-# For "start" or less-graceful circumstances, we don't need to
-# iterate; we'll stop all of them at once, and start them all later.
-# In those cases, using the glob form is faster -- but if we do need
-# to iterate, we need to expand the glob.
-if action == "start" or args.less_graceful:
-    workers = ["zulip-workers:*"]
-else:
-    worker_status = subprocess.run(
-        ["supervisorctl", "status", "zulip-workers:*"],
-        universal_newlines=True,
-        stdout=subprocess.PIPE,
-    )
-    # `supercisorctl status` returns 3 if any are stopped, which is fine here.
-    if worker_status.returncode not in (0, 3):
-        worker_status.check_returncode()
-    workers = [status_line.split()[0] for status_line in worker_status.stdout.splitlines()]
+if has_application_server():
+    # Start by restarting the workers and similar processes, one at a
+    # time.  Workers can always support processing events with old event
+    # contents, but cannot necessarily understand events enqueued by a
+    # newer Django process.  Restarting them one at a time, rather than
+    # all-at-once, minimizes the downtime of each, and reduces startup
+    # contention.
+    #
+    # For "start" or less-graceful circumstances, we don't need to
+    # iterate; we'll stop all of them at once, and start them all later.
+    # In those cases, using the glob form is faster -- but if we do need
+    # to iterate, we need to expand the glob.
+    if action == "start" or args.less_graceful:
+        workers.append("zulip-workers:*")
+    else:
+        worker_status = subprocess.run(
+            ["supervisorctl", "status", "zulip-workers:*"],
+            universal_newlines=True,
+            stdout=subprocess.PIPE,
+        )
+        # `supercisorctl status` returns 3 if any are stopped, which is fine here.
+        if worker_status.returncode not in (0, 3):
+            worker_status.check_returncode()
+        workers.extend(status_line.split()[0] for status_line in worker_status.stdout.splitlines())

 if os.path.exists("/etc/supervisor/conf.d/zulip/zulip_db.conf"):
    workers.append("process-fts-updates")

-if action == "restart":
+if action == "restart" and len(workers) > 0:
    if args.less_graceful:
        # The less graceful form stops every worker now; we start them
        # back up at the end.
@ -105,41 +108,44 @@ if action == "restart":
            logging.info("Restarting %s", worker)
            subprocess.check_call(["supervisorctl", "restart", worker])

-# Next, we restart the Tornado processes sequentially, in order to
-# minimize downtime of the tornado service caused by too many Python
-# processes restarting at the same time, resulting in each receiving
-# insufficient priority.  This is important, because Tornado is the
-# main source of user-visible downtime when we restart a Zulip server.
-# We do this before restarting Django, in case there are new event
-# types which it will need to know how to deal with.
-if len(tornado_ports) > 1:
-    for p in tornado_ports:
-        # Restart Tornado processes individually for a better rate of
-        # restarts.  This also avoids behavior with restarting a whole
-        # supervisord group where if any individual process is slow to
-        # stop, the whole bundle stays stopped for an extended time.
-        logging.info("%s Tornado process on port %s", verbing, p)
-        subprocess.check_call(["supervisorctl", action, f"zulip-tornado:zulip-tornado-port-{p}"])
-else:
-    logging.info("%s Tornado process", verbing)
-    subprocess.check_call(["supervisorctl", action, "zulip-tornado", "zulip-tornado:*"])
+if has_application_server():
+    # Next, we restart the Tornado processes sequentially, in order to
+    # minimize downtime of the tornado service caused by too many Python
+    # processes restarting at the same time, resulting in each receiving
+    # insufficient priority.  This is important, because Tornado is the
+    # main source of user-visible downtime when we restart a Zulip server.
+    # We do this before restarting Django, in case there are new event
+    # types which it will need to know how to deal with.
+    if len(tornado_ports) > 1:
+        for p in tornado_ports:
+            # Restart Tornado processes individually for a better rate of
+            # restarts.  This also avoids behavior with restarting a whole
+            # supervisord group where if any individual process is slow to
+            # stop, the whole bundle stays stopped for an extended time.
+            logging.info("%s Tornado process on port %s", verbing, p)
+            subprocess.check_call(
+                ["supervisorctl", action, f"zulip-tornado:zulip-tornado-port-{p}"]
+            )
+    else:
+        logging.info("%s Tornado process", verbing)
+        subprocess.check_call(["supervisorctl", action, "zulip-tornado", "zulip-tornado:*"])

-# Finally, restart the Django uWSGI processes.
-logging.info("%s django server", verbing)
-subprocess.check_call(["supervisorctl", action, "zulip-django"])
+    # Finally, restart the Django uWSGI processes.
+    logging.info("%s django server", verbing)
+    subprocess.check_call(["supervisorctl", action, "zulip-django"])
+
+    using_sso = subprocess.check_output(["./scripts/get-django-setting", "USING_APACHE_SSO"])
+    if using_sso.strip() == b"True":
+        logging.info("Restarting Apache WSGI process...")
+        subprocess.check_call(["pkill", "-x", "apache2", "-u", "zulip"])

 # If we were doing this non-gracefully, or starting as opposed to
 # restarting, we need to turn the workers (back) on.  There's no
 # advantage to doing this not-all-at-once.
-if action == "start" or args.less_graceful:
+if (action == "start" or args.less_graceful) and len(workers) > 0:
    logging.info("Starting workers")
    subprocess.check_call(["supervisorctl", "start", *workers])

-using_sso = subprocess.check_output(["./scripts/get-django-setting", "USING_APACHE_SSO"])
-if using_sso.strip() == b"True":
-    logging.info("Restarting Apache WSGI process...")
-    subprocess.check_call(["pkill", "-x", "apache2", "-u", "zulip"])
-
 logging.info("Done!")
 print(OKGREEN + f"Zulip {action}ed successfully!" + ENDC)

--- a/scripts/stop-server
+++ b/scripts/stop-server
@ -7,7 +7,7 @@ import sys
 import time

 sys.path.append(os.path.join(os.path.dirname(__file__), ".."))
-from scripts.lib.zulip_tools import ENDC, OKGREEN, WARNING
+from scripts.lib.zulip_tools import ENDC, OKGREEN, WARNING, has_application_server

 deploy_path = os.path.realpath(os.path.join(os.path.dirname(__file__), ".."))
 os.chdir(deploy_path)
@ -25,14 +25,15 @@ services = []
 if os.path.exists("/etc/supervisor/conf.d/zulip/zulip_db.conf"):
    services.append("process-fts-updates")

-# Contrary to the order in (re)start-server, we stop django before the
-# workers, to increase the chance that we finish processing any work
-# that may have been enqueued by the Django, leaving the final state
-# closer to "empty."  We stop Django before Tornado so it doesn't try
-# to make requests to make queues with a down'd Tornado.
-services.append("zulip-django")
-services.extend(["zulip-tornado", "zulip-tornado:*"])
-services.append("zulip-workers:*")
+if has_application_server():
+    # Contrary to the order in (re)start-server, we stop django before the
+    # workers, to increase the chance that we finish processing any work
+    # that may have been enqueued by the Django, leaving the final state
+    # closer to "empty."  We stop Django before Tornado so it doesn't try
+    # to make requests to make queues with a down'd Tornado.
+    services.append("zulip-django")
+    services.extend(["zulip-tornado", "zulip-tornado:*"])
+    services.append("zulip-workers:*")

 subprocess.check_call(["supervisorctl", "stop", *services])