install: Check for a supported version of the OS in shell.

Testing for it in Python means that we have to worry about keeping the
`upgrade-zulip-stage-2` backwards-compatible with all versions of
Python which we could ever be upgrading from -- which is all of them.

Factor out the "supported operating systems" check, and share it
between upgrade and install codepaths.
This commit is contained in:
Alex Vandiver 2024-04-13 01:40:49 +00:00 committed by Tim Abbott
parent 693b959656
commit 7f4bc05f8e
4 changed files with 546 additions and 536 deletions

View File

@ -241,9 +241,7 @@ if [ -f /etc/os-release ]; then
esac esac
fi fi
case "$os_id $os_version_id" in if ! "$ZULIP_PATH/scripts/lib/supported-os"; then
'debian 12' | 'ubuntu 22.04' | 'ubuntu 24.04') ;;
*)
system_requirements_failure <<EOF system_requirements_failure <<EOF
Unsupported OS release: $os_id $os_version_id Unsupported OS release: $os_id $os_version_id
@ -252,8 +250,7 @@ Zulip in production is supported only on:
- Ubuntu 22.04 LTS - Ubuntu 22.04 LTS
- Ubuntu 24.04 LTS - Ubuntu 24.04 LTS
EOF EOF
;; fi
esac
machine="$(uname -m)" machine="$(uname -m)"
if [ "$machine" != x86_64 ] && [ "$machine" != aarch64 ]; then if [ "$machine" != x86_64 ] && [ "$machine" != aarch64 ]; then

25
scripts/lib/supported-os Executable file
View File

@ -0,0 +1,25 @@
#!/usr/bin/env bash
#
# This script serves only to verify that the OS is a supported
# version, before we attempt to rely on that version in
# upgrade-zulip-stage-3
if [ -f /etc/os-release ]; then
os_info="$(
. /etc/os-release
printf '%s\n' "$ID" "$VERSION_ID"
)"
{
read -r os_id
read -r os_version_id
} <<<"$os_info"
fi
case "$os_id $os_version_id" in
'debian 12' | 'ubuntu 22.04' | 'ubuntu 24.04')
exit 0
;;
*)
exit 1
;;
esac

View File

@ -1,531 +1,19 @@
#!/usr/bin/env python3 #!/usr/bin/env bash
# #
# This script contains the actual logic for upgrading from an old # This script serves only to verify that the OS is a supported
# version of Zulip to the new version. upgrade-zulip-stage-2 is # version, before we attempt to rely on that version in
# always run from the new version of Zulip, so any bug fixes take # upgrade-zulip-stage-3
# effect on the very next upgrade.
import argparse set -eu
import glob
import hashlib ZULIP_PATH="$(readlink -f "$(dirname "$0")"/../..)"
import logging if ! "$ZULIP_PATH/scripts/lib/supported-os"; then
import os echo "Unsupported platform!"
import re echo
import shutil echo "Sorry! The support for your OS has been discontinued."
import subprocess echo "Please upgrade your OS to a supported release first."
import sys echo "See https://zulip.readthedocs.io/en/latest/production/upgrade.html#upgrading-the-operating-system"
import time exit 1
from typing import Literal, NoReturn, Optional fi
os.environ["PYTHONUNBUFFERED"] = "y" exec "$ZULIP_PATH/scripts/lib/upgrade-zulip-stage-3" "$@"
# Force a known locale. Some packages on PyPI fail to install in some locales.
os.environ["LC_ALL"] = "C.UTF-8"
os.environ["LANG"] = "C.UTF-8"
os.environ["LANGUAGE"] = "C.UTF-8"
sys.path.append(os.path.join(os.path.dirname(__file__), "..", ".."))
from scripts.lib.zulip_tools import (
DEPLOYMENTS_DIR,
assert_running_as_root,
get_config,
get_config_file,
get_zulip_pwent,
listening_publicly,
parse_os_release,
parse_version_from,
run_psql_as_postgres,
start_arg_parser,
su_to_zulip,
)
assert_running_as_root()
# Set a known, reliable PATH
os.environ["PATH"] = "/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin"
logging.Formatter.converter = time.gmtime
logging.basicConfig(format="%(asctime)s upgrade-zulip-stage-2: %(message)s", level=logging.INFO)
def error_desupported_os(vendor: str, os_version: str) -> NoReturn:
# Link to documentation for how to correctly upgrade the OS.
logging.critical("Unsupported platform: %s %s", vendor, os_version)
logging.info(
"Sorry! The support for your OS has been discontinued.\n"
"Please upgrade your OS to a supported release first.\n"
"See https://zulip.readthedocs.io/en/latest/production/"
"upgrade.html#upgrading-the-operating-system"
)
sys.exit(1)
# Do not upgrade on unsupported OS versions.
UNSUPPORTED_DISTROS = [
("ubuntu", "14.04"),
("ubuntu", "16.04"),
("ubuntu", "18.04"),
("ubuntu", "20.04"),
("debian", "9"),
("debian", "10"),
("debian", "11"),
]
distro_info = parse_os_release()
vendor = distro_info["ID"]
os_version = distro_info["VERSION_ID"]
if (vendor, os_version) in UNSUPPORTED_DISTROS:
error_desupported_os(vendor, os_version)
# make sure we have appropriate file permissions
os.umask(0o22)
restart_parser = start_arg_parser(action="restart", add_help=False)
parser = argparse.ArgumentParser(parents=[restart_parser])
parser.add_argument("deploy_path", metavar="deploy_path", help="Path to deployment directory")
parser.add_argument(
"--skip-restart",
action="store_true",
help="Configure, but do not restart into, the new version; aborts if any system-wide changes would happen.",
)
parser.add_argument("--skip-puppet", action="store_true", help="Skip doing puppet/apt upgrades.")
parser.add_argument("--skip-migrations", action="store_true", help="Skip doing migrations.")
parser.add_argument(
"--skip-downgrade-check",
action="store_true",
help="Skip the safety check to prevent database downgrades.",
)
parser.add_argument(
"--from-git", action="store_true", help="Upgrading from git, so run update-prod-static."
)
parser.add_argument(
"--ignore-static-assets",
action="store_true",
help="Do not attempt to copy/manage static assets.",
)
parser.add_argument(
"--skip-purge-old-deployments", action="store_true", help="Skip purging old deployments."
)
parser.add_argument(
"--audit-fts-indexes", action="store_true", help="Audit and fix full text search indexes."
)
args = parser.parse_args()
if args.skip_restart:
if args.less_graceful:
logging.warning("Ignored --less-graceful; --skip-restart is always graceful.")
args.less_graceful = False
if args.skip_migrations:
logging.warning(
"Ignored --skip-migrations; all upgrades with --skip-restart asserts no migrations."
)
args.skip_migrations = False
if args.skip_puppet:
logging.warning(
"Ignored --skip-puppet; all upgrades with --skip-restart asserts no puppet changes."
)
args.skip_puppet = False
if not args.skip_puppet and args.less_graceful:
logging.warning("Ignored --less-graceful; all upgrades without --skip-puppet are ungraceful.")
deploy_path = args.deploy_path
os.chdir(deploy_path)
config_file = get_config_file()
IS_SERVER_UP = True
HAS_FILLED_CACHES = False
if args.from_git:
logging.info("Caching Zulip Git version...")
subprocess.check_call(["./tools/cache-zulip-git-version"], preexec_fn=su_to_zulip)
from version import ZULIP_MERGE_BASE as NEW_ZULIP_MERGE_BASE
from version import ZULIP_VERSION as NEW_ZULIP_VERSION
old_version = parse_version_from(DEPLOYMENTS_DIR + "/current")
logging.info("Upgrading from %s to %s, in %s", old_version, NEW_ZULIP_VERSION, deploy_path)
old_merge_base = (
parse_version_from(DEPLOYMENTS_DIR + "/current", merge_base=True) if args.from_git else ""
)
# Check if rabbitmq port 25672 is listening on anything except 127.0.0.1
rabbitmq_dist_listen = listening_publicly(25672)
# Check the erlang magic cookie size
cookie_size: Optional[int] = None
if os.path.exists("/var/lib/rabbitmq/.erlang.cookie"):
with open("/var/lib/rabbitmq/.erlang.cookie") as cookie_fh:
cookie_size = len(cookie_fh.readline())
else:
logging.info("No RabbitMQ erlang cookie found, not auditing RabbitMQ security.")
if (args.skip_restart or args.skip_puppet) and rabbitmq_dist_listen:
logging.error(
"RabbitMQ is publicly-accessible on %s; this is a security vulnerability!",
", ".join(rabbitmq_dist_listen),
)
issue = "issue"
if cookie_size is not None and cookie_size == 20:
# See the below comment -- this is used as a lightweight
# signal for a cookie made with Erlang's bad randomizer.
logging.error(
"RabbitMQ erlang cookie is insecure; this is a critical security vulnerability!"
)
issue = "issues"
logging.error(
"To fix the above security %s, re-run the upgrade without --skip-puppet "
"(which may be set in /etc/zulip/zulip.conf), in order to restart the "
"necessary services. Running zulip-puppet-apply by itself is not sufficient.",
issue,
)
sys.exit(1)
migrations_needed = False
def fill_memcached_caches() -> None:
global HAS_FILLED_CACHES
if HAS_FILLED_CACHES or migrations_needed:
return
subprocess.check_call(
["./manage.py", "fill_memcached_caches", "--skip-checks"], preexec_fn=su_to_zulip
)
HAS_FILLED_CACHES = True
def shutdown_server(fill_caches: bool = True) -> None:
global IS_SERVER_UP
if args.skip_restart:
logging.info("Upgrade would require shutting down Zulip -- aborting!")
sys.exit(1)
if fill_caches:
fill_memcached_caches()
if IS_SERVER_UP:
logging.info("Stopping Zulip...")
subprocess.check_call(["./scripts/stop-server"], preexec_fn=su_to_zulip)
IS_SERVER_UP = False
if glob.glob("/usr/share/postgresql/*/extension/tsearch_extras.control"):
# Remove legacy tsearch_extras package references
run_psql_as_postgres(
config_file=config_file,
sql_query="DROP EXTENSION IF EXISTS tsearch_extras;",
)
subprocess.check_call(["apt-get", "remove", "-y", "postgresql-*-tsearch-extras"])
if not (args.skip_restart or args.skip_puppet):
# We need to temporarily hold pgroonga, if installed -- upgrading
# it without running the appropriate upgrade SQL can cause
# PostgreSQL to crash
if get_config(config_file, "machine", "pgroonga", False):
subprocess.check_call(["apt-mark", "hold", "postgresql-*-pgdg-pgroonga"])
logging.info("Upgrading system packages...")
subprocess.check_call(["apt-get", "update"])
subprocess.check_call(["apt-get", "-y", "--with-new-pkgs", "upgrade"])
if get_config(config_file, "machine", "pgroonga", False):
subprocess.check_call(["apt-mark", "unhold", "postgresql-*-pgdg-pgroonga"])
# To bootstrap zulip-puppet-apply, we need to install the system yaml
# package; new installs get this, but old installs may not have it.
if not os.path.exists("/usr/share/doc/python3-yaml"):
logging.info("Installing system YAML package, for puppet...")
subprocess.check_call(["apt-get", "install", "python3-yaml"])
if not os.path.exists(os.path.join(deploy_path, "zproject/prod_settings.py")):
# This is normally done in unpack-zulip, but for upgrading from
# zulip<1.4.0, we need to do it. See discussion in commit 586b23637.
os.symlink("/etc/zulip/settings.py", os.path.join(deploy_path, "zproject/prod_settings.py"))
# Now we should have an environment set up where we can run our tools;
# first, creating the production venv.
subprocess.check_call(
[os.path.join(deploy_path, "scripts", "lib", "create-production-venv"), deploy_path]
)
# Check to make sure that this upgrade is not actually a database
# downgrade.
if not args.skip_downgrade_check:
subprocess.check_call(
[os.path.join(deploy_path, "scripts", "lib", "check-database-compatibility")],
preexec_fn=su_to_zulip,
)
# Make sure the right version of node is installed
subprocess.check_call([os.path.join(deploy_path, "scripts", "lib", "install-node")])
# Generate any new secrets that were added in the new version required.
# TODO: Do caching to only run this when it has changed.
subprocess.check_call(
[os.path.join(deploy_path, "scripts", "setup", "generate_secrets.py"), "--production"]
)
# Adjust Puppet class names for the manifest renames in the 4.0 release
class_renames = {
"zulip::app_frontend": "zulip::profile::app_frontend",
"zulip::dockervoyager": "zulip::profile::docker",
"zulip::memcached": "zulip::profile::memcached",
"zulip::postgres_appdb_tuned": "zulip::profile::postgresql",
"zulip::postgres_backups": "zulip::postgresql_backups",
"zulip::rabbit": "zulip::profile::rabbitmq",
"zulip::voyager": "zulip::profile::standalone",
}
classes = re.split(r"\s*,\s*", get_config(config_file, "machine", "puppet_classes", ""))
new_classes = [class_renames.get(c, c) for c in classes if c != "zulip::base"]
if classes != new_classes:
if args.skip_restart:
logging.error("Would need to adjust puppet classes -- aborting!")
sys.exit(1)
logging.info("Adjusting Puppet classes for renames...")
subprocess.check_call(
[
"crudini",
"--set",
"/etc/zulip/zulip.conf",
"machine",
"puppet_classes",
", ".join(new_classes),
]
)
# Unpleasant migration: Remove any legacy deployed copies of
# images-google-64 from before we renamed that emojiset to
# "googleblob":
emoji_path = "/home/zulip/prod-static/generated/emoji/images-google-64/1f32d.png"
if os.path.exists(emoji_path):
with open(emoji_path, "rb") as f:
emoji_data = f.read()
emoji_sha = hashlib.sha1(emoji_data).hexdigest()
if emoji_sha == "47033121dc20b376e0f86f4916969872ad22a293":
if args.skip_restart:
logging.error("Would need to delete images-google-64 -- aborting!")
sys.exit(1)
shutil.rmtree("/home/zulip/prod-static/generated/emoji/images-google-64")
# And then, building/installing the static assets.
if args.ignore_static_assets:
# For the OS version upgrade use case, the static assets are
# already in place, and we don't need to do anything. Further,
# neither of the options below will work for all installations,
# because if we installed from Git, `prod-static/serve` may be
# empty so we can't do the non-Git thing, whereas if we installed
# from a tarball, we won't have a `tools/` directory and thus
# cannot run `tools/update-prod-static`.
pass
elif args.from_git:
# Because `upgrade-zulip-from-git` needs to build static assets, it
# is at risk of being OOM killed on systems with limited free RAM.
mem_bytes = os.sysconf("SC_PAGE_SIZE") * os.sysconf("SC_PHYS_PAGES")
mem_gib = mem_bytes / (1024.0**3) # e.g. 3.74
# Ideally, we'd have 2 thresholds here, depending on whether the
# system is running queue workers multithreaded or multiprocess.
# See puppet/zulip/manifests/app_frontend_base.pp for background.
if mem_gib < 4.2:
logging.info("Shutting down server to ensure sufficient free RAM for webpack.")
shutdown_server(fill_caches=False)
# Note: The fact that this is before we apply Puppet changes means
# that we don't support adding new Puppet dependencies of
# update-prod-static with the Git upgrade process. But it'll fail
# safely; this seems like a worthwhile tradeoff to minimize downtime.
logging.info("Building static assets...")
try:
subprocess.check_call(["./tools/update-prod-static"], preexec_fn=su_to_zulip)
except subprocess.CalledProcessError:
logging.error("Failed to build static assets.")
if IS_SERVER_UP:
logging.error("Usually the cause is insufficient free RAM to run webpack.")
logging.error("Try stopping the Zulip server (scripts/stop-server) and trying again.")
sys.exit(1)
else:
# Since this doesn't do any actual work, it's likely safe to have
# this run before we apply Puppet changes (saving a bit of downtime).
logging.info("Installing static assets...")
subprocess.check_call(
["cp", "-rT", os.path.join(deploy_path, "prod-static/serve"), "/home/zulip/prod-static"],
preexec_fn=su_to_zulip,
)
# Perform system checks -- including database checks, so we don't need
# to do them when we do migrations, below.
if not args.skip_checks:
subprocess.check_call(["./manage.py", "check", "--database", "default"], preexec_fn=su_to_zulip)
# Our next optimization is to check whether any migrations are needed
# before we start the critical section of the restart. This saves
# about 1s of downtime in a no-op upgrade. We omit this check if we
# already stopped the server above, due to low memory.
if not IS_SERVER_UP:
migrations_needed = True
elif not args.skip_migrations:
logging.info("Checking for needed migrations")
migrations_output = subprocess.check_output(
["./manage.py", "showmigrations", "--skip-checks"], preexec_fn=su_to_zulip, text=True
)
for ln in migrations_output.split("\n"):
line_str = ln.strip()
if line_str.startswith("[ ]"):
migrations_needed = True
if args.skip_restart and migrations_needed:
logging.error("Would need to apply migrations -- aborting!")
sys.exit(1)
# Install hooks before we check for puppet changes, so we skip an
# unnecessary stop/start cycle if hook changes are the only ones.
logging.info("Installing hooks")
subprocess.check_call(
["./scripts/zulip-puppet-apply", "--tags", "hooks", "--force"],
)
# If we are planning on running puppet, we can pre-run it in --noop
# mode and see if it will actually make any changes; if not, we can
# skip it during the upgrade. We omit this check if the server is
# already stopped, since it's not better than just pressing on.
has_puppet_changes = True
if not args.skip_puppet and IS_SERVER_UP:
logging.info("Pre-checking for puppet changes...")
try_puppet = subprocess.run(
["./scripts/zulip-puppet-apply", "--noop", "--force"],
stdout=subprocess.DEVNULL,
check=False,
)
if try_puppet.returncode == 0:
if args.skip_restart:
logging.info("Verified no Puppet changes are necessary.")
else:
logging.info("No puppet changes found, skipping!")
args.skip_puppet = True
has_puppet_changes = False
elif try_puppet.returncode == 2:
logging.error("Puppet error -- aborting!")
sys.exit(1)
elif args.skip_restart:
logging.error("Would need to apply puppet changes -- aborting!")
sys.exit(1)
def run_hooks(kind: Literal["pre-deploy", "post-deploy"]) -> None:
# Updates to the above literal to add a new hook type should
# adjust puppet's `app_frontend_base.pp` as well.
path = f"/etc/zulip/hooks/{kind}.d"
if not os.path.exists(path):
return
# Pass in, via environment variables, the old/new "version
# string" (which is a `git describe` output)
env = os.environ.copy()
env["ZULIP_OLD_VERSION"] = old_version
env["ZULIP_NEW_VERSION"] = NEW_ZULIP_VERSION
# preexec_fn=su_to_zulip normally handles this, but our explicit
# env overrides that
env["HOME"] = get_zulip_pwent().pw_dir
def resolve_version_string(version: str) -> str:
return subprocess.check_output(
["git", "rev-parse", version], cwd=deploy_path, preexec_fn=su_to_zulip, text=True
).strip()
if args.from_git:
# If we have a git repo, we also resolve those `git describe`
# values to full commit hashes, as well as provide the
# merge-base of the old/new commits with mainline.
env["ZULIP_OLD_COMMIT"] = resolve_version_string(old_version)
env["ZULIP_NEW_COMMIT"] = resolve_version_string(NEW_ZULIP_VERSION)
env["ZULIP_OLD_MERGE_BASE_COMMIT"] = resolve_version_string(old_merge_base)
env["ZULIP_NEW_MERGE_BASE_COMMIT"] = resolve_version_string(NEW_ZULIP_MERGE_BASE)
for script_name in sorted(f for f in os.listdir(path) if f.endswith(".hook")):
subprocess.check_call(
[os.path.join(path, script_name)],
cwd=deploy_path,
preexec_fn=su_to_zulip,
env=env,
)
if args.skip_restart:
logging.info("Successfully configured in %s!", deploy_path)
else:
# NOTE: Here begins the most likely critical period, where we may be
# shutting down the server; we should strive to minimize the number of
# steps that happen between here and the "Restarting Zulip" line
# below.
run_hooks("pre-deploy")
if rabbitmq_dist_listen:
shutdown_server()
logging.info("Shutting down rabbitmq to adjust its ports...")
subprocess.check_call(["/usr/sbin/service", "rabbitmq-server", "stop"])
if cookie_size is not None and cookie_size == 20:
# Checking for a 20-character cookie is used as a signal that it
# was generated by Erlang's insecure randomizer, which only
# provides between 20 and 36 bits of entropy; were it 20
# characters long by a good randomizer, it would be 96 bits and
# more than sufficient. We generate, using good randomness, a
# 255-character cookie, the max allowed length.
shutdown_server()
logging.info("Generating a secure erlang cookie...")
subprocess.check_call(["./scripts/setup/generate-rabbitmq-cookie"])
if not args.skip_puppet:
# Puppet may adjust random services; to minimize risk of issues
# due to inconsistent state, we shut down the server first.
shutdown_server()
logging.info("Applying Puppet changes...")
subprocess.check_call(["./scripts/zulip-puppet-apply", "--force"])
subprocess.check_call(["apt-get", "-y", "--with-new-pkgs", "upgrade"])
# Puppet may have reloaded supervisor, and in so doing started
# services; mark as potentially needing to stop the server.
IS_SERVER_UP = True
if migrations_needed:
# Database migrations assume that they run on a database in
# quiesced state.
shutdown_server()
logging.info("Applying database migrations...")
subprocess.check_call(
["./manage.py", "migrate", "--noinput", "--skip-checks"], preexec_fn=su_to_zulip
)
logging.info("Restarting Zulip...")
start_args = ["--skip-checks", "--skip-client-reloads"]
if not HAS_FILLED_CACHES:
start_args.append("--fill-cache")
if IS_SERVER_UP:
if args.less_graceful:
start_args.append("--less-graceful")
subprocess.check_call(["./scripts/restart-server", *start_args], preexec_fn=su_to_zulip)
else:
subprocess.check_call(["./scripts/start-server", *start_args], preexec_fn=su_to_zulip)
logging.info("Upgrade complete!")
run_hooks("post-deploy")
if not args.skip_client_reloads:
subprocess.check_call(["./scripts/reload-clients"], preexec_fn=su_to_zulip)
if args.audit_fts_indexes:
logging.info("Correcting full-text search indexes for updated dictionary files")
logging.info("This may take a while but the server should work while it runs.")
subprocess.check_call(
["./manage.py", "audit_fts_indexes", "--skip-checks"], preexec_fn=su_to_zulip
)
if not args.skip_purge_old_deployments:
logging.info("Purging old deployments...")
subprocess.check_call(["./scripts/purge-old-deployments"])
else:
logging.info("Skipping purging old deployments.")
if args.skip_puppet and has_puppet_changes:
logging.info("Showing un-applied Puppet changes:")
subprocess.check_call(["./scripts/zulip-puppet-apply", "--noop", "--show_diff"])

500
scripts/lib/upgrade-zulip-stage-3 Executable file
View File

@ -0,0 +1,500 @@
#!/usr/bin/env python3
#
# This script contains the actual logic for upgrading from an old
# version of Zulip to the new version. upgrade-zulip-stage-3 is
# always run from the new version of Zulip, so any bug fixes take
# effect on the very next upgrade.
import argparse
import glob
import hashlib
import logging
import os
import re
import shutil
import subprocess
import sys
import time
from typing import Literal, Optional
os.environ["PYTHONUNBUFFERED"] = "y"
# Force a known locale. Some packages on PyPI fail to install in some locales.
os.environ["LC_ALL"] = "C.UTF-8"
os.environ["LANG"] = "C.UTF-8"
os.environ["LANGUAGE"] = "C.UTF-8"
sys.path.append(os.path.join(os.path.dirname(__file__), "..", ".."))
from scripts.lib.zulip_tools import (
DEPLOYMENTS_DIR,
assert_running_as_root,
get_config,
get_config_file,
get_zulip_pwent,
listening_publicly,
parse_version_from,
run_psql_as_postgres,
start_arg_parser,
su_to_zulip,
)
assert_running_as_root()
# Set a known, reliable PATH
os.environ["PATH"] = "/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin"
logging.Formatter.converter = time.gmtime
logging.basicConfig(format="%(asctime)s upgrade-zulip-stage-3: %(message)s", level=logging.INFO)
# make sure we have appropriate file permissions
os.umask(0o22)
restart_parser = start_arg_parser(action="restart", add_help=False)
parser = argparse.ArgumentParser(parents=[restart_parser])
parser.add_argument("deploy_path", metavar="deploy_path", help="Path to deployment directory")
parser.add_argument(
"--skip-restart",
action="store_true",
help="Configure, but do not restart into, the new version; aborts if any system-wide changes would happen.",
)
parser.add_argument("--skip-puppet", action="store_true", help="Skip doing puppet/apt upgrades.")
parser.add_argument("--skip-migrations", action="store_true", help="Skip doing migrations.")
parser.add_argument(
"--skip-downgrade-check",
action="store_true",
help="Skip the safety check to prevent database downgrades.",
)
parser.add_argument(
"--from-git", action="store_true", help="Upgrading from git, so run update-prod-static."
)
parser.add_argument(
"--ignore-static-assets",
action="store_true",
help="Do not attempt to copy/manage static assets.",
)
parser.add_argument(
"--skip-purge-old-deployments", action="store_true", help="Skip purging old deployments."
)
parser.add_argument(
"--audit-fts-indexes", action="store_true", help="Audit and fix full text search indexes."
)
args = parser.parse_args()
if args.skip_restart:
if args.less_graceful:
logging.warning("Ignored --less-graceful; --skip-restart is always graceful.")
args.less_graceful = False
if args.skip_migrations:
logging.warning(
"Ignored --skip-migrations; all upgrades with --skip-restart asserts no migrations."
)
args.skip_migrations = False
if args.skip_puppet:
logging.warning(
"Ignored --skip-puppet; all upgrades with --skip-restart asserts no puppet changes."
)
args.skip_puppet = False
if not args.skip_puppet and args.less_graceful:
logging.warning("Ignored --less-graceful; all upgrades without --skip-puppet are ungraceful.")
deploy_path = args.deploy_path
os.chdir(deploy_path)
config_file = get_config_file()
IS_SERVER_UP = True
HAS_FILLED_CACHES = False
if args.from_git:
logging.info("Caching Zulip Git version...")
subprocess.check_call(["./tools/cache-zulip-git-version"], preexec_fn=su_to_zulip)
from version import ZULIP_MERGE_BASE as NEW_ZULIP_MERGE_BASE
from version import ZULIP_VERSION as NEW_ZULIP_VERSION
old_version = parse_version_from(DEPLOYMENTS_DIR + "/current")
logging.info("Upgrading from %s to %s, in %s", old_version, NEW_ZULIP_VERSION, deploy_path)
old_merge_base = (
parse_version_from(DEPLOYMENTS_DIR + "/current", merge_base=True) if args.from_git else ""
)
# Check if rabbitmq port 25672 is listening on anything except 127.0.0.1
rabbitmq_dist_listen = listening_publicly(25672)
# Check the erlang magic cookie size
cookie_size: Optional[int] = None
if os.path.exists("/var/lib/rabbitmq/.erlang.cookie"):
with open("/var/lib/rabbitmq/.erlang.cookie") as cookie_fh:
cookie_size = len(cookie_fh.readline())
else:
logging.info("No RabbitMQ erlang cookie found, not auditing RabbitMQ security.")
if (args.skip_restart or args.skip_puppet) and rabbitmq_dist_listen:
logging.error(
"RabbitMQ is publicly-accessible on %s; this is a security vulnerability!",
", ".join(rabbitmq_dist_listen),
)
issue = "issue"
if cookie_size is not None and cookie_size == 20:
# See the below comment -- this is used as a lightweight
# signal for a cookie made with Erlang's bad randomizer.
logging.error(
"RabbitMQ erlang cookie is insecure; this is a critical security vulnerability!"
)
issue = "issues"
logging.error(
"To fix the above security %s, re-run the upgrade without --skip-puppet "
"(which may be set in /etc/zulip/zulip.conf), in order to restart the "
"necessary services. Running zulip-puppet-apply by itself is not sufficient.",
issue,
)
sys.exit(1)
migrations_needed = False
def fill_memcached_caches() -> None:
global HAS_FILLED_CACHES
if HAS_FILLED_CACHES or migrations_needed:
return
subprocess.check_call(
["./manage.py", "fill_memcached_caches", "--skip-checks"], preexec_fn=su_to_zulip
)
HAS_FILLED_CACHES = True
def shutdown_server(fill_caches: bool = True) -> None:
global IS_SERVER_UP
if args.skip_restart:
logging.info("Upgrade would require shutting down Zulip -- aborting!")
sys.exit(1)
if fill_caches:
fill_memcached_caches()
if IS_SERVER_UP:
logging.info("Stopping Zulip...")
subprocess.check_call(["./scripts/stop-server"], preexec_fn=su_to_zulip)
IS_SERVER_UP = False
if glob.glob("/usr/share/postgresql/*/extension/tsearch_extras.control"):
# Remove legacy tsearch_extras package references
run_psql_as_postgres(
config_file=config_file,
sql_query="DROP EXTENSION IF EXISTS tsearch_extras;",
)
subprocess.check_call(["apt-get", "remove", "-y", "postgresql-*-tsearch-extras"])
if not (args.skip_restart or args.skip_puppet):
# We need to temporarily hold pgroonga, if installed -- upgrading
# it without running the appropriate upgrade SQL can cause
# PostgreSQL to crash
if get_config(config_file, "machine", "pgroonga", False):
subprocess.check_call(["apt-mark", "hold", "postgresql-*-pgdg-pgroonga"])
logging.info("Upgrading system packages...")
subprocess.check_call(["apt-get", "update"])
subprocess.check_call(["apt-get", "-y", "--with-new-pkgs", "upgrade"])
if get_config(config_file, "machine", "pgroonga", False):
subprocess.check_call(["apt-mark", "unhold", "postgresql-*-pgdg-pgroonga"])
# To bootstrap zulip-puppet-apply, we need to install the system yaml
# package; new installs get this, but old installs may not have it.
if not os.path.exists("/usr/share/doc/python3-yaml"):
logging.info("Installing system YAML package, for puppet...")
subprocess.check_call(["apt-get", "install", "python3-yaml"])
if not os.path.exists(os.path.join(deploy_path, "zproject/prod_settings.py")):
# This is normally done in unpack-zulip, but for upgrading from
# zulip<1.4.0, we need to do it. See discussion in commit 586b23637.
os.symlink("/etc/zulip/settings.py", os.path.join(deploy_path, "zproject/prod_settings.py"))
# Now we should have an environment set up where we can run our tools;
# first, creating the production venv.
subprocess.check_call(
[os.path.join(deploy_path, "scripts", "lib", "create-production-venv"), deploy_path]
)
# Check to make sure that this upgrade is not actually a database
# downgrade.
if not args.skip_downgrade_check:
subprocess.check_call(
[os.path.join(deploy_path, "scripts", "lib", "check-database-compatibility")],
preexec_fn=su_to_zulip,
)
# Make sure the right version of node is installed
subprocess.check_call([os.path.join(deploy_path, "scripts", "lib", "install-node")])
# Generate any new secrets that were added in the new version required.
# TODO: Do caching to only run this when it has changed.
subprocess.check_call(
[os.path.join(deploy_path, "scripts", "setup", "generate_secrets.py"), "--production"]
)
# Adjust Puppet class names for the manifest renames in the 4.0 release
class_renames = {
"zulip::app_frontend": "zulip::profile::app_frontend",
"zulip::dockervoyager": "zulip::profile::docker",
"zulip::memcached": "zulip::profile::memcached",
"zulip::postgres_appdb_tuned": "zulip::profile::postgresql",
"zulip::postgres_backups": "zulip::postgresql_backups",
"zulip::rabbit": "zulip::profile::rabbitmq",
"zulip::voyager": "zulip::profile::standalone",
}
classes = re.split(r"\s*,\s*", get_config(config_file, "machine", "puppet_classes", ""))
new_classes = [class_renames.get(c, c) for c in classes if c != "zulip::base"]
if classes != new_classes:
if args.skip_restart:
logging.error("Would need to adjust puppet classes -- aborting!")
sys.exit(1)
logging.info("Adjusting Puppet classes for renames...")
subprocess.check_call(
[
"crudini",
"--set",
"/etc/zulip/zulip.conf",
"machine",
"puppet_classes",
", ".join(new_classes),
]
)
# Unpleasant migration: Remove any legacy deployed copies of
# images-google-64 from before we renamed that emojiset to
# "googleblob":
emoji_path = "/home/zulip/prod-static/generated/emoji/images-google-64/1f32d.png"
if os.path.exists(emoji_path):
with open(emoji_path, "rb") as f:
emoji_data = f.read()
emoji_sha = hashlib.sha1(emoji_data).hexdigest()
if emoji_sha == "47033121dc20b376e0f86f4916969872ad22a293":
if args.skip_restart:
logging.error("Would need to delete images-google-64 -- aborting!")
sys.exit(1)
shutil.rmtree("/home/zulip/prod-static/generated/emoji/images-google-64")
# And then, building/installing the static assets.
if args.ignore_static_assets:
# For the OS version upgrade use case, the static assets are
# already in place, and we don't need to do anything. Further,
# neither of the options below will work for all installations,
# because if we installed from Git, `prod-static/serve` may be
# empty so we can't do the non-Git thing, whereas if we installed
# from a tarball, we won't have a `tools/` directory and thus
# cannot run `tools/update-prod-static`.
pass
elif args.from_git:
# Because `upgrade-zulip-from-git` needs to build static assets, it
# is at risk of being OOM killed on systems with limited free RAM.
mem_bytes = os.sysconf("SC_PAGE_SIZE") * os.sysconf("SC_PHYS_PAGES")
mem_gib = mem_bytes / (1024.0**3) # e.g. 3.74
# Ideally, we'd have 2 thresholds here, depending on whether the
# system is running queue workers multithreaded or multiprocess.
# See puppet/zulip/manifests/app_frontend_base.pp for background.
if mem_gib < 4.2:
logging.info("Shutting down server to ensure sufficient free RAM for webpack.")
shutdown_server(fill_caches=False)
# Note: The fact that this is before we apply Puppet changes means
# that we don't support adding new Puppet dependencies of
# update-prod-static with the Git upgrade process. But it'll fail
# safely; this seems like a worthwhile tradeoff to minimize downtime.
logging.info("Building static assets...")
try:
subprocess.check_call(["./tools/update-prod-static"], preexec_fn=su_to_zulip)
except subprocess.CalledProcessError:
logging.error("Failed to build static assets.")
if IS_SERVER_UP:
logging.error("Usually the cause is insufficient free RAM to run webpack.")
logging.error("Try stopping the Zulip server (scripts/stop-server) and trying again.")
sys.exit(1)
else:
# Since this doesn't do any actual work, it's likely safe to have
# this run before we apply Puppet changes (saving a bit of downtime).
logging.info("Installing static assets...")
subprocess.check_call(
["cp", "-rT", os.path.join(deploy_path, "prod-static/serve"), "/home/zulip/prod-static"],
preexec_fn=su_to_zulip,
)
# Perform system checks -- including database checks, so we don't need
# to do them when we do migrations, below.
if not args.skip_checks:
subprocess.check_call(["./manage.py", "check", "--database", "default"], preexec_fn=su_to_zulip)
# Our next optimization is to check whether any migrations are needed
# before we start the critical section of the restart. This saves
# about 1s of downtime in a no-op upgrade. We omit this check if we
# already stopped the server above, due to low memory.
if not IS_SERVER_UP:
migrations_needed = True
elif not args.skip_migrations:
logging.info("Checking for needed migrations")
migrations_output = subprocess.check_output(
["./manage.py", "showmigrations", "--skip-checks"], preexec_fn=su_to_zulip, text=True
)
for ln in migrations_output.split("\n"):
line_str = ln.strip()
if line_str.startswith("[ ]"):
migrations_needed = True
if args.skip_restart and migrations_needed:
logging.error("Would need to apply migrations -- aborting!")
sys.exit(1)
# Install hooks before we check for puppet changes, so we skip an
# unnecessary stop/start cycle if hook changes are the only ones.
logging.info("Installing hooks")
subprocess.check_call(
["./scripts/zulip-puppet-apply", "--tags", "hooks", "--force"],
)
# If we are planning on running puppet, we can pre-run it in --noop
# mode and see if it will actually make any changes; if not, we can
# skip it during the upgrade. We omit this check if the server is
# already stopped, since it's not better than just pressing on.
has_puppet_changes = True
if not args.skip_puppet and IS_SERVER_UP:
logging.info("Pre-checking for puppet changes...")
try_puppet = subprocess.run(
["./scripts/zulip-puppet-apply", "--noop", "--force"],
stdout=subprocess.DEVNULL,
check=False,
)
if try_puppet.returncode == 0:
if args.skip_restart:
logging.info("Verified no Puppet changes are necessary.")
else:
logging.info("No puppet changes found, skipping!")
args.skip_puppet = True
has_puppet_changes = False
elif try_puppet.returncode == 2:
logging.error("Puppet error -- aborting!")
sys.exit(1)
elif args.skip_restart:
logging.error("Would need to apply puppet changes -- aborting!")
sys.exit(1)
def run_hooks(kind: Literal["pre-deploy", "post-deploy"]) -> None:
# Updates to the above literal to add a new hook type should
# adjust puppet's `app_frontend_base.pp` as well.
path = f"/etc/zulip/hooks/{kind}.d"
if not os.path.exists(path):
return
# Pass in, via environment variables, the old/new "version
# string" (which is a `git describe` output)
env = os.environ.copy()
env["ZULIP_OLD_VERSION"] = old_version
env["ZULIP_NEW_VERSION"] = NEW_ZULIP_VERSION
# preexec_fn=su_to_zulip normally handles this, but our explicit
# env overrides that
env["HOME"] = get_zulip_pwent().pw_dir
def resolve_version_string(version: str) -> str:
return subprocess.check_output(
["git", "rev-parse", version], cwd=deploy_path, preexec_fn=su_to_zulip, text=True
).strip()
if args.from_git:
# If we have a git repo, we also resolve those `git describe`
# values to full commit hashes, as well as provide the
# merge-base of the old/new commits with mainline.
env["ZULIP_OLD_COMMIT"] = resolve_version_string(old_version)
env["ZULIP_NEW_COMMIT"] = resolve_version_string(NEW_ZULIP_VERSION)
env["ZULIP_OLD_MERGE_BASE_COMMIT"] = resolve_version_string(old_merge_base)
env["ZULIP_NEW_MERGE_BASE_COMMIT"] = resolve_version_string(NEW_ZULIP_MERGE_BASE)
for script_name in sorted(f for f in os.listdir(path) if f.endswith(".hook")):
subprocess.check_call(
[os.path.join(path, script_name)],
cwd=deploy_path,
preexec_fn=su_to_zulip,
env=env,
)
if args.skip_restart:
logging.info("Successfully configured in %s!", deploy_path)
else:
# NOTE: Here begins the most likely critical period, where we may be
# shutting down the server; we should strive to minimize the number of
# steps that happen between here and the "Restarting Zulip" line
# below.
run_hooks("pre-deploy")
if rabbitmq_dist_listen:
shutdown_server()
logging.info("Shutting down rabbitmq to adjust its ports...")
subprocess.check_call(["/usr/sbin/service", "rabbitmq-server", "stop"])
if cookie_size is not None and cookie_size == 20:
# Checking for a 20-character cookie is used as a signal that it
# was generated by Erlang's insecure randomizer, which only
# provides between 20 and 36 bits of entropy; were it 20
# characters long by a good randomizer, it would be 96 bits and
# more than sufficient. We generate, using good randomness, a
# 255-character cookie, the max allowed length.
shutdown_server()
logging.info("Generating a secure erlang cookie...")
subprocess.check_call(["./scripts/setup/generate-rabbitmq-cookie"])
if not args.skip_puppet:
# Puppet may adjust random services; to minimize risk of issues
# due to inconsistent state, we shut down the server first.
shutdown_server()
logging.info("Applying Puppet changes...")
subprocess.check_call(["./scripts/zulip-puppet-apply", "--force"])
subprocess.check_call(["apt-get", "-y", "--with-new-pkgs", "upgrade"])
# Puppet may have reloaded supervisor, and in so doing started
# services; mark as potentially needing to stop the server.
IS_SERVER_UP = True
if migrations_needed:
# Database migrations assume that they run on a database in
# quiesced state.
shutdown_server()
logging.info("Applying database migrations...")
subprocess.check_call(
["./manage.py", "migrate", "--noinput", "--skip-checks"], preexec_fn=su_to_zulip
)
logging.info("Restarting Zulip...")
start_args = ["--skip-checks", "--skip-client-reloads"]
if not HAS_FILLED_CACHES:
start_args.append("--fill-cache")
if IS_SERVER_UP:
if args.less_graceful:
start_args.append("--less-graceful")
subprocess.check_call(["./scripts/restart-server", *start_args], preexec_fn=su_to_zulip)
else:
subprocess.check_call(["./scripts/start-server", *start_args], preexec_fn=su_to_zulip)
logging.info("Upgrade complete!")
run_hooks("post-deploy")
if not args.skip_client_reloads:
subprocess.check_call(["./scripts/reload-clients"], preexec_fn=su_to_zulip)
if args.audit_fts_indexes:
logging.info("Correcting full-text search indexes for updated dictionary files")
logging.info("This may take a while but the server should work while it runs.")
subprocess.check_call(
["./manage.py", "audit_fts_indexes", "--skip-checks"], preexec_fn=su_to_zulip
)
if not args.skip_purge_old_deployments:
logging.info("Purging old deployments...")
subprocess.check_call(["./scripts/purge-old-deployments"])
else:
logging.info("Skipping purging old deployments.")
if args.skip_puppet and has_puppet_changes:
logging.info("Showing un-applied Puppet changes:")
subprocess.check_call(["./scripts/zulip-puppet-apply", "--noop", "--show_diff"])