From 27d53ecbe1398b644cf47d131b68ca600ebb78a8 Mon Sep 17 00:00:00 2001 From: Alex Vandiver Date: Thu, 8 Feb 2024 21:04:07 +0000 Subject: [PATCH] restart-server: Remove --skip-tornado flag. This flag was generally used not because we wanted to avoid restarting Tornado, but because we wanted to avoid increasing load the server when all of the clients were told to reload. Since we have laid the groundwork for separately telling Tornado to tell clients to restart, we remove the --skip-tornado flag; the next commit will add the ability to skip client restarts. --- scripts/lib/upgrade-zulip-stage-2 | 43 +++++++++---------------------- scripts/lib/zulip_tools.py | 5 ---- scripts/restart-server | 17 ++++++------ 3 files changed, 20 insertions(+), 45 deletions(-) diff --git a/scripts/lib/upgrade-zulip-stage-2 b/scripts/lib/upgrade-zulip-stage-2 index 7c09cc218f..b7105e452f 100755 --- a/scripts/lib/upgrade-zulip-stage-2 +++ b/scripts/lib/upgrade-zulip-stage-2 @@ -110,30 +110,18 @@ parser.add_argument( ) args = parser.parse_args() -# There are two reasons we might want to do a "minimal change" which -# asserts puppet/database are unchanged: if we're setting up for a -# post-upgrade manual restart (--skip-restart), or if we're only -# restarting part of the system (--skip-tornado). -minimal_change = args.skip_restart or args.skip_tornado -if args.skip_restart and args.skip_tornado: - logging.warning("Ignored --skip-tornado; all upgrades with --skip-restart skip all restarts.") - args.skip_tornado = False -if minimal_change: - if args.skip_restart: - flagname = "--skip-restart" - else: - flagname = "--skip-tornado" +if args.skip_restart: if args.less_graceful: - logging.warning("Ignored --less-graceful; %s is always graceful.", flagname) + logging.warning("Ignored --less-graceful; --skip-restart is always graceful.") args.less_graceful = False if args.skip_migrations: logging.warning( - "Ignored --skip-migrations; all upgrades with %s assert no migrations.", flagname + "Ignored --skip-migrations; all upgrades with --skip-restart asserts no migrations." ) args.skip_migrations = False if args.skip_puppet: logging.warning( - "Ignored --skip-puppet; all upgrades with %s assert no puppet changes.", flagname + "Ignored --skip-puppet; all upgrades with --skip-restart asserts no puppet changes." ) args.skip_puppet = False @@ -170,7 +158,7 @@ if os.path.exists("/var/lib/rabbitmq/.erlang.cookie"): cookie_size = len(cookie_fh.readline()) else: logging.info("No RabbitMQ erlang cookie found, not auditing RabbitMQ security.") -if (minimal_change or args.skip_puppet) and rabbitmq_dist_listen: +if (args.skip_restart or args.skip_puppet) and rabbitmq_dist_listen: logging.error( "RabbitMQ is publicly-accessible on %s; this is a security vulnerability!", ", ".join(rabbitmq_dist_listen), @@ -208,7 +196,7 @@ def fill_memcached_caches() -> None: def shutdown_server(fill_caches: bool = True) -> None: global IS_SERVER_UP - if minimal_change: + if args.skip_restart: logging.info("Upgrade would require shutting down Zulip -- aborting!") sys.exit(1) @@ -229,7 +217,7 @@ if glob.glob("/usr/share/postgresql/*/extension/tsearch_extras.control"): ) subprocess.check_call(["apt-get", "remove", "-y", "postgresql-*-tsearch-extras"]) -if not (minimal_change or args.skip_puppet): +if not (args.skip_restart or args.skip_puppet): # We need to temporarily hold pgroonga, if installed -- upgrading # it without running the appropriate upgrade SQL can cause # PostgreSQL to crash @@ -288,7 +276,7 @@ class_renames = { classes = re.split(r"\s*,\s*", get_config(config_file, "machine", "puppet_classes")) new_classes = [class_renames.get(c, c) for c in classes if c != "zulip::base"] if classes != new_classes: - if minimal_change: + if args.skip_restart: logging.error("Would need to adjust puppet classes -- aborting!") sys.exit(1) logging.info("Adjusting Puppet classes for renames...") @@ -312,7 +300,7 @@ if os.path.exists(emoji_path): emoji_data = f.read() emoji_sha = hashlib.sha1(emoji_data).hexdigest() if emoji_sha == "47033121dc20b376e0f86f4916969872ad22a293": - if minimal_change: + if args.skip_restart: logging.error("Would need to delete images-google-64 -- aborting!") sys.exit(1) shutil.rmtree("/home/zulip/prod-static/generated/emoji/images-google-64") @@ -384,15 +372,10 @@ elif not args.skip_migrations: if line_str.startswith("[ ]"): migrations_needed = True -if minimal_change and migrations_needed: +if args.skip_restart and migrations_needed: logging.error("Would need to apply migrations -- aborting!") sys.exit(1) -# If we're skipping tornado restarts, we need to ensure we keep the -# same cache prefix as we currently have. -if args.skip_tornado: - shutil.copyfile("../current/var/remote_cache_prefix", "./var/remote_cache_prefix") - # Install hooks before we check for puppet changes, so we skip an # unnecessary stop/start cycle if hook changes are the only ones. logging.info("Installing hooks") @@ -413,7 +396,7 @@ if not args.skip_puppet and IS_SERVER_UP: check=False, ) if try_puppet.returncode == 0: - if minimal_change: + if args.skip_restart: logging.info("Verified no Puppet changes are necessary.") else: logging.info("No puppet changes found, skipping!") @@ -422,7 +405,7 @@ if not args.skip_puppet and IS_SERVER_UP: elif try_puppet.returncode == 2: logging.error("Puppet error -- aborting!") sys.exit(1) - elif minimal_change: + elif args.skip_restart: logging.error("Would need to apply puppet changes -- aborting!") sys.exit(1) @@ -516,8 +499,6 @@ else: if not HAS_FILLED_CACHES: start_args.append("--fill-cache") if IS_SERVER_UP: - if args.skip_tornado: - start_args.append("--skip-tornado") if args.less_graceful: start_args.append("--less-graceful") subprocess.check_call(["./scripts/restart-server", *start_args], preexec_fn=su_to_zulip) diff --git a/scripts/lib/zulip_tools.py b/scripts/lib/zulip_tools.py index ce031d1dad..3f392beb49 100755 --- a/scripts/lib/zulip_tools.py +++ b/scripts/lib/zulip_tools.py @@ -688,11 +688,6 @@ def start_arg_parser(action: str, add_help: bool = False) -> argparse.ArgumentPa action="store_true", help="Restart with more concern for expediency than minimizing availability interruption", ) - parser.add_argument( - "--skip-tornado", - action="store_true", - help="Do not restart Tornado processes", - ) return parser diff --git a/scripts/restart-server b/scripts/restart-server index c7664aa027..2c1bfe7d46 100755 --- a/scripts/restart-server +++ b/scripts/restart-server @@ -162,15 +162,14 @@ if action == "restart" and len(workers) > 0: if has_application_server(): # Next, we restart the Tornado processes sequentially, in order to - # minimize downtime of the tornado service caused by too many Python - # processes restarting at the same time, resulting in each receiving - # insufficient priority. This is important, because Tornado is the - # main source of user-visible downtime when we restart a Zulip server. - # We do this before restarting Django, in case there are new event - # types which it will need to know how to deal with. - if action == "restart" and args.skip_tornado: - logging.info("Skipping restart of Tornado") - elif len(tornado_ports) > 1: + # minimize downtime of the tornado service caused by too many + # Python processes restarting at the same time, resulting in each + # receiving insufficient priority. This is important, because + # Tornado being unavailable for too long is the main source of + # user-visible downtime when we restart a Zulip server. We do + # this before restarting Django, in case there are new event types + # which it will need to know how to deal with. + if len(tornado_ports) > 1: for p in tornado_ports: # Restart Tornado processes individually for a better rate of # restarts. This also avoids behavior with restarting a whole