#!/usr/bin/env python3
import argparse
import logging
import os
import pwd
import shlex
import subprocess
import sys
import time

sys.path.append(os.path.join(os.path.dirname(__file__), ".."))
from scripts.lib.zulip_tools import (
    DEPLOYMENTS_DIR,
    ENDC,
    OKGREEN,
    WARNING,
    get_config_file,
    get_tornado_ports,
    overwrite_symlink,
)

action = "restart"
if not sys.argv[0].endswith("restart-server"):
    action = "start"
verbing = action.title() + "ing"

logging.Formatter.converter = time.gmtime
logging.basicConfig(format=f"%(asctime)s {action}-server: %(message)s", level=logging.INFO)

parser = argparse.ArgumentParser()
parser.add_argument("--fill-cache", action="store_true", help="Fill the memcached caches")
if action == "restart":
    parser.add_argument(
        "--less-graceful",
        action="store_true",
        help="Restart with more concern for expediency than minimizing availability interruption",
    )
args = parser.parse_args()

deploy_path = os.path.realpath(os.path.join(os.path.dirname(__file__), ".."))
os.chdir(deploy_path)

if pwd.getpwuid(os.getuid()).pw_name != "zulip":
    logging.error("Must be run as user 'zulip'.")
    sys.exit(1)

# Send a statsd event on restarting the server
subprocess.check_call(
    ["./manage.py", "send_stats", "incr", "events.server_restart", str(int(time.time()))]
)

if args.fill_cache:
    logging.info("Filling memcached caches")
    subprocess.check_call(["./manage.py", "fill_memcached_caches"])

current_symlink = os.path.join(DEPLOYMENTS_DIR, "current")
last_symlink = os.path.join(DEPLOYMENTS_DIR, "last")
change_symlink = os.readlink(current_symlink) != deploy_path
if change_symlink:
    overwrite_symlink(os.readlink(current_symlink), last_symlink)
    overwrite_symlink(deploy_path, current_symlink)

config_file = get_config_file()
tornado_ports = get_tornado_ports(config_file)

# Start by restarting the workers and similar processes, one at a
# time.  Workers can always support processing events with old event
# contents, but cannot necessarily understand events enqueued by a
# newer Django process.  Restarting them one at a time, rather than
# all-at-once, minimizes the downtime of each, and reduces startup
# contention.
#
# For "start" or less-graceful circumstances, we don't need to
# iterate; we'll stop all of them at once, and start them all later.
# In those cases, using the glob form is faster -- but if we do need
# to iterate, we need to expand the glob.
if action == "start" or args.less_graceful:
    workers = ["zulip-workers:*"]
else:
    worker_status = subprocess.run(
        ["supervisorctl", "status", "zulip-workers:*"],
        universal_newlines=True,
        stdout=subprocess.PIPE,
    )
    # `supercisorctl status` returns 3 if any are stopped, which is fine here.
    if worker_status.returncode not in (0, 3):
        worker_status.check_returncode()
    workers = [status_line.split()[0] for status_line in worker_status.stdout.splitlines()]

if os.path.exists("/etc/supervisor/conf.d/zulip/zulip_db.conf"):
    workers.append("process-fts-updates")

if action == "restart":
    if args.less_graceful:
        # The less graceful form stops every worker now; we start them
        # back up at the end.
        logging.info("Stopping workers")
        subprocess.check_call(["supervisorctl", "stop", *workers])
    else:
        # We cannot pass all of these to one `supervisorctl restart`
        # because that takes them all down at once, waits until they are
        # all down, and then brings them back up; doing them sequentially
        # requires multiple `supervisorctl restart` calls.
        for worker in workers:
            logging.info("Restarting %s", worker)
            subprocess.check_call(["supervisorctl", "restart", worker])

# Next, we restart the Tornado processes sequentially, in order to
# minimize downtime of the tornado service caused by too many Python
# processes restarting at the same time, resulting in each receiving
# insufficient priority.  This is important, because Tornado is the
# main source of user-visible downtime when we restart a Zulip server.
# We do this before restarting Django, in case there are new event
# types which it will need to know how to deal with.
if len(tornado_ports) > 1:
    for p in tornado_ports:
        # Restart Tornado processes individually for a better rate of
        # restarts.  This also avoids behavior with restarting a whole
        # supervisord group where if any individual process is slow to
        # stop, the whole bundle stays stopped for an extended time.
        logging.info("%s Tornado process on port %s", verbing, p)
        subprocess.check_call(["supervisorctl", action, f"zulip-tornado:zulip-tornado-port-{p}"])
else:
    logging.info("%s Tornado process", verbing)
    subprocess.check_call(["supervisorctl", action, "zulip-tornado", "zulip-tornado:*"])

# Finally, restart the Django uWSGI processes.
logging.info("%s django server", verbing)
subprocess.check_call(["supervisorctl", action, "zulip-django"])

# If we were doing this non-gracefully, or starting as opposed to
# restarting, we need to turn the workers (back) on.  There's no
# advantage to doing this not-all-at-once.
if action == "start" or args.less_graceful:
    logging.info("Starting workers")
    subprocess.check_call(["supervisorctl", "start", *workers])

using_sso = subprocess.check_output(["./scripts/get-django-setting", "USING_APACHE_SSO"])
if using_sso.strip() == b"True":
    logging.info("Restarting Apache WSGI process...")
    subprocess.check_call(["pkill", "-x", "apache2", "-u", "zulip"])

logging.info("Done!")
print(OKGREEN + f"Zulip {action}ed successfully!" + ENDC)

if change_symlink and "PWD" in os.environ:
    for symlink in [last_symlink, current_symlink]:
        if os.path.commonprefix([os.environ["PWD"], symlink]) == symlink:
            print(
                """
{}Your shell entered its current directory through a symlink:
  {}
which has now changed. Your shell will not see this change until you run:
  cd {}
to traverse the symlink again.{}
""".format(
                    WARNING, symlink, shlex.quote(os.environ["PWD"]), ENDC
                ),
                file=sys.stderr,
            )