restart-server: Remove --skip-tornado flag.

This flag was generally used not because we wanted to avoid
restarting Tornado, but because we wanted to avoid increasing load
the server when all of the clients were told to reload.

Since we have laid the groundwork for separately telling Tornado to
tell clients to restart, we remove the --skip-tornado flag; the next
commit will add the ability to skip client restarts.
This commit is contained in:
Alex Vandiver 2024-02-08 21:04:07 +00:00 committed by Tim Abbott
parent 0079688c49
commit 27d53ecbe1
3 changed files with 20 additions and 45 deletions

View File

@ -110,30 +110,18 @@ parser.add_argument(
)
args = parser.parse_args()
# There are two reasons we might want to do a "minimal change" which
# asserts puppet/database are unchanged: if we're setting up for a
# post-upgrade manual restart (--skip-restart), or if we're only
# restarting part of the system (--skip-tornado).
minimal_change = args.skip_restart or args.skip_tornado
if args.skip_restart and args.skip_tornado:
logging.warning("Ignored --skip-tornado; all upgrades with --skip-restart skip all restarts.")
args.skip_tornado = False
if minimal_change:
if args.skip_restart:
flagname = "--skip-restart"
else:
flagname = "--skip-tornado"
if args.less_graceful:
logging.warning("Ignored --less-graceful; %s is always graceful.", flagname)
logging.warning("Ignored --less-graceful; --skip-restart is always graceful.")
args.less_graceful = False
if args.skip_migrations:
logging.warning(
"Ignored --skip-migrations; all upgrades with %s assert no migrations.", flagname
"Ignored --skip-migrations; all upgrades with --skip-restart asserts no migrations."
)
args.skip_migrations = False
if args.skip_puppet:
logging.warning(
"Ignored --skip-puppet; all upgrades with %s assert no puppet changes.", flagname
"Ignored --skip-puppet; all upgrades with --skip-restart asserts no puppet changes."
)
args.skip_puppet = False
@ -170,7 +158,7 @@ if os.path.exists("/var/lib/rabbitmq/.erlang.cookie"):
cookie_size = len(cookie_fh.readline())
else:
logging.info("No RabbitMQ erlang cookie found, not auditing RabbitMQ security.")
if (minimal_change or args.skip_puppet) and rabbitmq_dist_listen:
if (args.skip_restart or args.skip_puppet) and rabbitmq_dist_listen:
logging.error(
"RabbitMQ is publicly-accessible on %s; this is a security vulnerability!",
", ".join(rabbitmq_dist_listen),
@ -208,7 +196,7 @@ def fill_memcached_caches() -> None:
def shutdown_server(fill_caches: bool = True) -> None:
global IS_SERVER_UP
if minimal_change:
if args.skip_restart:
logging.info("Upgrade would require shutting down Zulip -- aborting!")
sys.exit(1)
@ -229,7 +217,7 @@ if glob.glob("/usr/share/postgresql/*/extension/tsearch_extras.control"):
)
subprocess.check_call(["apt-get", "remove", "-y", "postgresql-*-tsearch-extras"])
if not (minimal_change or args.skip_puppet):
if not (args.skip_restart or args.skip_puppet):
# We need to temporarily hold pgroonga, if installed -- upgrading
# it without running the appropriate upgrade SQL can cause
# PostgreSQL to crash
@ -288,7 +276,7 @@ class_renames = {
classes = re.split(r"\s*,\s*", get_config(config_file, "machine", "puppet_classes"))
new_classes = [class_renames.get(c, c) for c in classes if c != "zulip::base"]
if classes != new_classes:
if minimal_change:
if args.skip_restart:
logging.error("Would need to adjust puppet classes -- aborting!")
sys.exit(1)
logging.info("Adjusting Puppet classes for renames...")
@ -312,7 +300,7 @@ if os.path.exists(emoji_path):
emoji_data = f.read()
emoji_sha = hashlib.sha1(emoji_data).hexdigest()
if emoji_sha == "47033121dc20b376e0f86f4916969872ad22a293":
if minimal_change:
if args.skip_restart:
logging.error("Would need to delete images-google-64 -- aborting!")
sys.exit(1)
shutil.rmtree("/home/zulip/prod-static/generated/emoji/images-google-64")
@ -384,15 +372,10 @@ elif not args.skip_migrations:
if line_str.startswith("[ ]"):
migrations_needed = True
if minimal_change and migrations_needed:
if args.skip_restart and migrations_needed:
logging.error("Would need to apply migrations -- aborting!")
sys.exit(1)
# If we're skipping tornado restarts, we need to ensure we keep the
# same cache prefix as we currently have.
if args.skip_tornado:
shutil.copyfile("../current/var/remote_cache_prefix", "./var/remote_cache_prefix")
# Install hooks before we check for puppet changes, so we skip an
# unnecessary stop/start cycle if hook changes are the only ones.
logging.info("Installing hooks")
@ -413,7 +396,7 @@ if not args.skip_puppet and IS_SERVER_UP:
check=False,
)
if try_puppet.returncode == 0:
if minimal_change:
if args.skip_restart:
logging.info("Verified no Puppet changes are necessary.")
else:
logging.info("No puppet changes found, skipping!")
@ -422,7 +405,7 @@ if not args.skip_puppet and IS_SERVER_UP:
elif try_puppet.returncode == 2:
logging.error("Puppet error -- aborting!")
sys.exit(1)
elif minimal_change:
elif args.skip_restart:
logging.error("Would need to apply puppet changes -- aborting!")
sys.exit(1)
@ -516,8 +499,6 @@ else:
if not HAS_FILLED_CACHES:
start_args.append("--fill-cache")
if IS_SERVER_UP:
if args.skip_tornado:
start_args.append("--skip-tornado")
if args.less_graceful:
start_args.append("--less-graceful")
subprocess.check_call(["./scripts/restart-server", *start_args], preexec_fn=su_to_zulip)

View File

@ -688,11 +688,6 @@ def start_arg_parser(action: str, add_help: bool = False) -> argparse.ArgumentPa
action="store_true",
help="Restart with more concern for expediency than minimizing availability interruption",
)
parser.add_argument(
"--skip-tornado",
action="store_true",
help="Do not restart Tornado processes",
)
return parser

View File

@ -162,15 +162,14 @@ if action == "restart" and len(workers) > 0:
if has_application_server():
# Next, we restart the Tornado processes sequentially, in order to
# minimize downtime of the tornado service caused by too many Python
# processes restarting at the same time, resulting in each receiving
# insufficient priority. This is important, because Tornado is the
# main source of user-visible downtime when we restart a Zulip server.
# We do this before restarting Django, in case there are new event
# types which it will need to know how to deal with.
if action == "restart" and args.skip_tornado:
logging.info("Skipping restart of Tornado")
elif len(tornado_ports) > 1:
# minimize downtime of the tornado service caused by too many
# Python processes restarting at the same time, resulting in each
# receiving insufficient priority. This is important, because
# Tornado being unavailable for too long is the main source of
# user-visible downtime when we restart a Zulip server. We do
# this before restarting Django, in case there are new event types
# which it will need to know how to deal with.
if len(tornado_ports) > 1:
for p in tornado_ports:
# Restart Tornado processes individually for a better rate of
# restarts. This also avoids behavior with restarting a whole