py3: Switch almost all shebang lines to use `python3`.
This causes `upgrade-zulip-from-git`, as well as a no-option run of
`tools/build-release-tarball`, to produce a Zulip install running
Python 3, rather than Python 2. In particular this means that the
virtualenv we create, in which all application code runs, is Python 3.
One shebang line, on `zulip-ec2-configure-interfaces`, explicitly
keeps Python 2, and at least one external ops script, `wal-e`, also
still runs on Python 2. See discussion on the respective previous
commits that made those explicit. There may also be some other
third-party scripts we use, outside of this source tree and running
outside our virtualenv, that still run on Python 2.
2017-08-02 23:15:16 +02:00
|
|
|
#!/usr/bin/env python3
|
2019-01-14 17:30:53 +01:00
|
|
|
import argparse
|
2018-11-28 02:09:00 +01:00
|
|
|
import configparser
|
2013-01-31 16:49:09 +01:00
|
|
|
import os
|
|
|
|
import sys
|
2013-06-19 21:16:39 +02:00
|
|
|
import pwd
|
2013-01-31 16:49:09 +01:00
|
|
|
import subprocess
|
2013-03-13 19:26:51 +01:00
|
|
|
import logging
|
2013-04-18 22:58:32 +02:00
|
|
|
import time
|
2019-09-20 02:23:23 +02:00
|
|
|
import shlex
|
2013-10-25 23:20:40 +02:00
|
|
|
|
2013-10-25 23:46:02 +02:00
|
|
|
sys.path.append(os.path.join(os.path.dirname(__file__), '..'))
|
2019-09-20 02:23:23 +02:00
|
|
|
from scripts.lib.zulip_tools import ENDC, OKGREEN, WARNING, DEPLOYMENTS_DIR, overwrite_symlink
|
2013-03-13 19:26:51 +01:00
|
|
|
|
2018-08-12 01:56:58 +02:00
|
|
|
logging.Formatter.converter = time.gmtime
|
2013-03-13 19:26:51 +01:00
|
|
|
logging.basicConfig(format="%(asctime)s restart-server: %(message)s",
|
|
|
|
level=logging.INFO)
|
2013-01-31 16:49:09 +01:00
|
|
|
|
2019-01-14 17:30:53 +01:00
|
|
|
parser = argparse.ArgumentParser()
|
|
|
|
parser.add_argument('--fill-cache', action='store_true', dest='fill_cache', default=False,
|
|
|
|
help='Fill the memcached caches')
|
|
|
|
args = parser.parse_args()
|
|
|
|
|
2013-06-03 19:29:52 +02:00
|
|
|
deploy_path = os.path.realpath(os.path.join(os.path.dirname(__file__), '..'))
|
|
|
|
os.chdir(deploy_path)
|
2013-01-31 16:49:09 +01:00
|
|
|
|
2016-07-23 20:33:58 +02:00
|
|
|
if pwd.getpwuid(os.getuid()).pw_name != "zulip":
|
2013-11-01 00:00:30 +01:00
|
|
|
logging.error("Must be run as user 'zulip'.")
|
|
|
|
sys.exit(1)
|
2013-06-19 17:25:42 +02:00
|
|
|
|
2013-04-18 22:58:32 +02:00
|
|
|
# Send a statsd event on restarting the server
|
2016-11-22 01:44:16 +01:00
|
|
|
subprocess.check_call(["./manage.py", "send_stats", "incr", "events.server_restart", str(int(time.time()))])
|
2013-04-18 22:58:32 +02:00
|
|
|
|
2019-01-14 17:30:53 +01:00
|
|
|
if args.fill_cache:
|
|
|
|
logging.info("Filling memcached caches")
|
|
|
|
subprocess.check_call(["./manage.py", "fill_memcached_caches"])
|
2013-05-30 21:05:34 +02:00
|
|
|
|
2018-08-11 01:28:06 +02:00
|
|
|
core_server_services = ["zulip-django", "zulip-senders:*"]
|
2017-05-24 02:46:52 +02:00
|
|
|
if os.path.exists("/etc/supervisor/conf.d/thumbor.conf"):
|
|
|
|
core_server_services.append("zulip-thumbor")
|
|
|
|
|
2018-08-11 01:28:06 +02:00
|
|
|
current_symlink = os.path.join(DEPLOYMENTS_DIR, "current")
|
|
|
|
last_symlink = os.path.join(DEPLOYMENTS_DIR, "last")
|
2019-09-20 02:23:23 +02:00
|
|
|
change_symlink = os.readlink(current_symlink) != deploy_path
|
|
|
|
if change_symlink:
|
2018-07-18 23:50:15 +02:00
|
|
|
overwrite_symlink(os.readlink(current_symlink), last_symlink)
|
|
|
|
overwrite_symlink(deploy_path, current_symlink)
|
2018-08-11 01:28:06 +02:00
|
|
|
|
2018-11-28 02:09:00 +01:00
|
|
|
config_file = configparser.RawConfigParser()
|
|
|
|
config_file.read("/etc/zulip/zulip.conf")
|
|
|
|
|
|
|
|
try:
|
|
|
|
tornado_processes = int(config_file.get('application_server', 'tornado_processes'))
|
|
|
|
except (configparser.NoSectionError, configparser.NoOptionError):
|
|
|
|
tornado_processes = 1
|
|
|
|
|
2018-08-11 01:28:06 +02:00
|
|
|
# We restart just the zulip-tornado service early, in order to
|
|
|
|
# minimize downtime of the tornado service caused by too many Python
|
|
|
|
# processes restarting at the same time resulting in it receiving
|
|
|
|
# insufficient priority. This is important, because Tornado is the
|
|
|
|
# main source of user-visible downtime when we restart a Zulip server.
|
2018-11-28 02:09:00 +01:00
|
|
|
if tornado_processes > 1:
|
|
|
|
subprocess.check_call(["supervisorctl", "restart", "zulip-tornado:*"])
|
|
|
|
else:
|
|
|
|
subprocess.check_call(["supervisorctl", "restart", "zulip-tornado", "zulip-tornado:*"])
|
2018-08-11 01:28:06 +02:00
|
|
|
|
2016-11-23 13:36:09 +01:00
|
|
|
# Restart the uWSGI and related processes via supervisorctl.
|
[manual] restart-server: Minimize downtime for message sender worker.
The manual step here is that we need to do the `puppet apply` before
pushing this commit, or `restart-server` will crash.
Previously we shut down everything in one group, which performed
poorly with supervisor's bad performance on restarting many daemons at
once. Now we shut down the unimportant stuff, then the important
stuff, bring back the important stuff, and then bring back the
unimportant stuff.
This new model has a little over 5s of downtime for the core
user-facing daemons -- which is still far more than would be ideal,
but a lot less than the 13s or so that we had before.
Here's some logs with the current setup for the tornado/django downtime:
2013-12-19 20:16:51,995 restart-server: Stopping daemons
2013-12-19 20:16:53,461 restart-server: Starting daemons
2013-12-19 20:16:57,146 restart-server: Starting workers
Compare with the behavior on master today:
2013-12-19 20:21:45,281 restart-server: Stopping daemons
2013-12-19 20:21:49,225 restart-server: Starting daemons
2013-12-19 20:21:58,463 restart-server: Done!
(imported from commit b2c1ba77f3dc989551d0939779208465a8410435)
2013-12-19 21:07:02 +01:00
|
|
|
logging.info("Stopping workers")
|
|
|
|
subprocess.check_call(["supervisorctl", "stop", "zulip-workers:*"])
|
|
|
|
logging.info("Stopping server core")
|
2017-05-24 02:46:52 +02:00
|
|
|
subprocess.check_call(["supervisorctl", "stop"] + core_server_services)
|
2016-08-05 01:58:57 +02:00
|
|
|
|
[manual] restart-server: Minimize downtime for message sender worker.
The manual step here is that we need to do the `puppet apply` before
pushing this commit, or `restart-server` will crash.
Previously we shut down everything in one group, which performed
poorly with supervisor's bad performance on restarting many daemons at
once. Now we shut down the unimportant stuff, then the important
stuff, bring back the important stuff, and then bring back the
unimportant stuff.
This new model has a little over 5s of downtime for the core
user-facing daemons -- which is still far more than would be ideal,
but a lot less than the 13s or so that we had before.
Here's some logs with the current setup for the tornado/django downtime:
2013-12-19 20:16:51,995 restart-server: Stopping daemons
2013-12-19 20:16:53,461 restart-server: Starting daemons
2013-12-19 20:16:57,146 restart-server: Starting workers
Compare with the behavior on master today:
2013-12-19 20:21:45,281 restart-server: Stopping daemons
2013-12-19 20:21:49,225 restart-server: Starting daemons
2013-12-19 20:21:58,463 restart-server: Done!
(imported from commit b2c1ba77f3dc989551d0939779208465a8410435)
2013-12-19 21:07:02 +01:00
|
|
|
logging.info("Starting server core")
|
2017-05-24 02:46:52 +02:00
|
|
|
subprocess.check_call(["supervisorctl", "start"] + core_server_services)
|
[manual] restart-server: Minimize downtime for message sender worker.
The manual step here is that we need to do the `puppet apply` before
pushing this commit, or `restart-server` will crash.
Previously we shut down everything in one group, which performed
poorly with supervisor's bad performance on restarting many daemons at
once. Now we shut down the unimportant stuff, then the important
stuff, bring back the important stuff, and then bring back the
unimportant stuff.
This new model has a little over 5s of downtime for the core
user-facing daemons -- which is still far more than would be ideal,
but a lot less than the 13s or so that we had before.
Here's some logs with the current setup for the tornado/django downtime:
2013-12-19 20:16:51,995 restart-server: Stopping daemons
2013-12-19 20:16:53,461 restart-server: Starting daemons
2013-12-19 20:16:57,146 restart-server: Starting workers
Compare with the behavior on master today:
2013-12-19 20:21:45,281 restart-server: Stopping daemons
2013-12-19 20:21:49,225 restart-server: Starting daemons
2013-12-19 20:21:58,463 restart-server: Done!
(imported from commit b2c1ba77f3dc989551d0939779208465a8410435)
2013-12-19 21:07:02 +01:00
|
|
|
logging.info("Starting workers")
|
|
|
|
subprocess.check_call(["supervisorctl", "start", "zulip-workers:*"])
|
2013-01-31 16:49:09 +01:00
|
|
|
|
2016-05-08 04:02:32 +02:00
|
|
|
using_sso = subprocess.check_output(['./scripts/get-django-setting', 'USING_APACHE_SSO'])
|
2016-07-26 06:40:05 +02:00
|
|
|
if using_sso.strip() == b'True':
|
2013-11-15 00:40:23 +01:00
|
|
|
logging.info("Restarting Apache WSGI process...")
|
|
|
|
subprocess.check_call(["pkill", "-f", "apache2", "-u", "zulip"])
|
|
|
|
|
2018-07-31 01:27:53 +02:00
|
|
|
if os.path.exists("/etc/supervisor/conf.d/zulip_db.conf"):
|
|
|
|
subprocess.check_call(["supervisorctl", "restart", "process-fts-updates"])
|
|
|
|
|
2013-03-13 19:26:51 +01:00
|
|
|
logging.info("Done!")
|
2016-03-10 17:15:34 +01:00
|
|
|
print(OKGREEN + "Application restarted successfully!" + ENDC)
|
2019-09-20 02:23:23 +02:00
|
|
|
|
|
|
|
if change_symlink and "PWD" in os.environ:
|
|
|
|
for symlink in [last_symlink, current_symlink]:
|
|
|
|
if os.path.commonprefix([os.environ["PWD"], symlink]) == symlink:
|
|
|
|
print(
|
|
|
|
"""
|
|
|
|
%sYour shell entered its current directory through a symlink:
|
|
|
|
%s
|
|
|
|
which has now changed. Your shell will not see this change until you run:
|
|
|
|
cd %s
|
|
|
|
to traverse the symlink again.%s
|
|
|
|
"""
|
|
|
|
% (WARNING, symlink, shlex.quote(os.environ["PWD"]), ENDC),
|
|
|
|
file=sys.stderr,
|
|
|
|
)
|