From 2a12fedcf1d8828447bc9a8e2e93a48e6a32b2ca Mon Sep 17 00:00:00 2001 From: Alex Vandiver Date: Mon, 14 Sep 2020 17:01:33 -0700 Subject: [PATCH] tornado: Remove explicit tornado_processes setting; compute it. We can compute the intended number of processes from the sharding configuration. In doing so, also validate that all of the ports are contiguous. This removes a discrepancy between `scripts/lib/sharding.py` and other parts of the codebase about if merely having a `[tornado_sharding]` section is sufficient to enable sharding. Having behaviour which changes merely based on if an empty section exists is surprising. This does require that a (presumably empty) `9800` configuration line exist, but making that default explicit is useful. After this commit, configuring sharding can be done by adding to `zulip.conf`: ``` [tornado_sharding] 9800 = # default 9801 = other_realm ``` Followed by running `./scripts/refresh-sharding-and-restart`. --- .../lib/puppet/parser/functions/zulipconf.rb | 10 +++++++ puppet/zulip/manifests/app_frontend_base.pp | 1 + puppet/zulip/manifests/tornado_sharding.pp | 13 ++------- .../nginx/tornado-upstreams.conf.template.erb | 2 +- .../supervisor/zulip.conf.template.erb | 4 +-- scripts/lib/sharding.py | 29 ++++++++++++------- scripts/lib/upgrade-zulip-stage-2 | 3 +- scripts/lib/zulip_tools.py | 8 +++++ scripts/nagios/check-rabbitmq-consumers | 11 ++----- scripts/restart-server | 24 ++++++++------- zproject/computed_settings.py | 6 +++- 11 files changed, 66 insertions(+), 45 deletions(-) diff --git a/puppet/zulip/lib/puppet/parser/functions/zulipconf.rb b/puppet/zulip/lib/puppet/parser/functions/zulipconf.rb index 43e15c95d3..0778716ea9 100644 --- a/puppet/zulip/lib/puppet/parser/functions/zulipconf.rb +++ b/puppet/zulip/lib/puppet/parser/functions/zulipconf.rb @@ -11,6 +11,16 @@ module Puppet::Parser::Functions end end + newfunction(:zulipconf_keys, :type => :rvalue, :arity => 1) do |args| + zulip_conf_path = lookupvar('zulip_conf_path') + output = `/usr/bin/crudini --get #{zulip_conf_path} #{args[0]} 2>&1`; result=$?.success? + if result + return output.lines.map { |l| l.strip } + else + return [] + end + end + newfunction(:zulipconf_nagios_hosts, :type => :rvalue, :arity => 0) do |args| section = "nagios" prefix = "hosts_" diff --git a/puppet/zulip/manifests/app_frontend_base.pp b/puppet/zulip/manifests/app_frontend_base.pp index 942890b2f5..4750c7754d 100644 --- a/puppet/zulip/manifests/app_frontend_base.pp +++ b/puppet/zulip/manifests/app_frontend_base.pp @@ -72,6 +72,7 @@ class zulip::app_frontend_base { } else { $uwsgi_default_processes = 4 } + $tornado_ports = $zulip::tornado_sharding::tornado_ports file { "${zulip::common::supervisor_conf_dir}/zulip.conf": ensure => file, require => Package[supervisor], diff --git a/puppet/zulip/manifests/tornado_sharding.pp b/puppet/zulip/manifests/tornado_sharding.pp index fef7cd10af..a2da188ac5 100644 --- a/puppet/zulip/manifests/tornado_sharding.pp +++ b/puppet/zulip/manifests/tornado_sharding.pp @@ -38,16 +38,9 @@ class zulip::tornado_sharding { loglevel => 'warning', } - # The number of Tornado processes to run on the server; this - # defaults to 1, since Tornado sharding is currently only at the - # Realm level. - $tornado_processes = Integer(zulipconf('application_server', 'tornado_processes', 1)) - if $tornado_processes > 1 { - $tornado_ports = range(9800, 9800 + $tornado_processes - 1) - $tornado_multiprocess = true - } else { - $tornado_multiprocess = false - } + # The ports of Tornado processes to run on the server; defaults to + # 9800. + $tornado_ports = zulipconf_keys('tornado_sharding') file { '/etc/nginx/zulip-include/tornado-upstreams': require => Package[$zulip::common::nginx], diff --git a/puppet/zulip/templates/nginx/tornado-upstreams.conf.template.erb b/puppet/zulip/templates/nginx/tornado-upstreams.conf.template.erb index b60d581c3a..2591d5603f 100644 --- a/puppet/zulip/templates/nginx/tornado-upstreams.conf.template.erb +++ b/puppet/zulip/templates/nginx/tornado-upstreams.conf.template.erb @@ -1,4 +1,4 @@ -<% if @tornado_multiprocess -%> +<% if @tornado_ports.length > 1 -%> <% @tornado_ports.each do |port| -%> upstream tornado<%= port %> { server 127.0.0.1:<%= port %>; diff --git a/puppet/zulip/templates/supervisor/zulip.conf.template.erb b/puppet/zulip/templates/supervisor/zulip.conf.template.erb index c142bd59e6..30a22220dd 100644 --- a/puppet/zulip/templates/supervisor/zulip.conf.template.erb +++ b/puppet/zulip/templates/supervisor/zulip.conf.template.erb @@ -23,7 +23,7 @@ stopasgroup=true ; Without this, we leak processes every restart killasgroup=true ; Without this, we leak processes every restart directory=/home/zulip/deployments/current/ -<% if @tornado_multiprocess -%> +<% if @tornado_ports.length > 1 -%> [program:zulip-tornado] command=env PYTHONUNBUFFERED=1 /home/zulip/deployments/current/manage.py runtornado 127.0.0.1:98%(process_num)02d process_name=zulip-tornado-port-98%(process_num)02d @@ -38,7 +38,7 @@ stdout_logfile=/var/log/zulip/tornado-98%(process_num)02d.log ; stdout l stdout_logfile_maxbytes=100MB ; max # logfile bytes b4 rotation (default 50MB) stdout_logfile_backups=10 ; # of stdout logfile backups (default 10) directory=/home/zulip/deployments/current/ -numprocs=<%= @tornado_processes %> +numprocs=<%= @tornado_ports.length %> <% else -%> [program:zulip-tornado] command=env PYTHONUNBUFFERED=1 /home/zulip/deployments/current/manage.py runtornado 127.0.0.1:9800 diff --git a/scripts/lib/sharding.py b/scripts/lib/sharding.py index 8999669de0..44ab8dc7ec 100755 --- a/scripts/lib/sharding.py +++ b/scripts/lib/sharding.py @@ -13,7 +13,7 @@ from scripts.lib.setup_path import setup_path setup_path() -from scripts.lib.zulip_tools import get_config_file +from scripts.lib.zulip_tools import get_config_file, get_tornado_ports def write_realm_nginx_config_line(f: Any, host: str, port: str) -> None: @@ -61,7 +61,13 @@ with open('/etc/zulip/nginx_sharding.conf.tmp', 'w') as nginx_sharding_conf_f, \ nginx_sharding_conf_f.write(f"# Configuration hash: {new_hash}\n") config_file = get_config_file() - if not config_file.has_section("tornado_sharding"): + ports = get_tornado_ports(config_file) + + expected_ports = list(range(9800, max(ports)+1)) + assert sorted(ports) == expected_ports, \ + f"ports ({sorted(ports)}) must be contiguous, starting with 9800" + + if len(ports) == 1: nginx_sharding_conf_f.write("set $tornado_server http://tornado;\n") sharding_json_f.write('{}\n') sys.exit(0) @@ -72,16 +78,17 @@ with open('/etc/zulip/nginx_sharding.conf.tmp', 'w') as nginx_sharding_conf_f, \ 'EXTERNAL_HOST'], universal_newlines=True).strip() for port in config_file["tornado_sharding"]: - shards = config_file["tornado_sharding"][port].strip().split(' ') + shards = config_file["tornado_sharding"][port].strip() - for shard in shards: - if '.' in shard: - host = shard - else: - host = f"{shard}.{external_host}" - assert host not in shard_map, f"host {host} duplicated" - shard_map[host] = int(port) - write_realm_nginx_config_line(nginx_sharding_conf_f, host, port) + if shards: + for shard in shards.split(' '): + if '.' in shard: + host = shard + else: + host = f"{shard}.{external_host}" + assert host not in shard_map, f"host {host} duplicated" + shard_map[host] = int(port) + write_realm_nginx_config_line(nginx_sharding_conf_f, host, port) nginx_sharding_conf_f.write('\n') sharding_json_f.write(json.dumps(shard_map) + '\n') diff --git a/scripts/lib/upgrade-zulip-stage-2 b/scripts/lib/upgrade-zulip-stage-2 index 9fe9e9a141..01dd567c2b 100755 --- a/scripts/lib/upgrade-zulip-stage-2 +++ b/scripts/lib/upgrade-zulip-stage-2 @@ -30,6 +30,7 @@ from scripts.lib.zulip_tools import ( assert_running_as_root, get_config, get_config_file, + get_tornado_ports, parse_os_release, set_config, su_to_zulip, @@ -89,7 +90,7 @@ deploy_path = args.deploy_path os.chdir(deploy_path) config_file = get_config_file() -tornado_processes = int(get_config(config_file, 'application_server', 'tornado_processes', '1')) +tornado_processes = len(get_tornado_ports(config_file)) IS_SERVER_UP = True diff --git a/scripts/lib/zulip_tools.py b/scripts/lib/zulip_tools.py index f707cc59db..743cfe0d47 100755 --- a/scripts/lib/zulip_tools.py +++ b/scripts/lib/zulip_tools.py @@ -515,6 +515,14 @@ def get_config_file() -> configparser.RawConfigParser: def get_deploy_options(config_file: configparser.RawConfigParser) -> List[str]: return get_config(config_file, 'deployment', 'deploy_options', "").strip().split() +def get_tornado_ports(config_file: configparser.RawConfigParser) -> List[int]: + ports = [] + if config_file.has_section("tornado_sharding"): + ports = [int(port) for port in config_file.options("tornado_sharding")] + if not ports: + ports = [9800] + return ports + def get_or_create_dev_uuid_var_path(path: str) -> str: absolute_path = '{}/{}'.format(get_dev_uuid_var_path(), path) os.makedirs(absolute_path, exist_ok=True) diff --git a/scripts/nagios/check-rabbitmq-consumers b/scripts/nagios/check-rabbitmq-consumers index 80e444a9ff..0b7b99a6f4 100755 --- a/scripts/nagios/check-rabbitmq-consumers +++ b/scripts/nagios/check-rabbitmq-consumers @@ -1,6 +1,5 @@ #!/usr/bin/env python3 import argparse -import configparser import os import subprocess import sys @@ -11,6 +10,7 @@ from typing import Dict ZULIP_PATH = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) sys.path.append(ZULIP_PATH) from scripts.lib.check_rabbitmq_queue import normal_queues +from scripts.lib.zulip_tools import get_config_file, get_tornado_ports states = { 0: "OK", @@ -33,13 +33,8 @@ parser.add_argument('--min-threshold', options = parser.parse_args() -config_file = configparser.RawConfigParser() -config_file.read("/etc/zulip/zulip.conf") -def get_config(section: str, key: str, default_value: str) -> str: - if config_file.has_option(section, key): - return config_file.get(section, key) - return default_value -TORNADO_PROCESSES = int(get_config('application_server', 'tornado_processes', '1')) +config_file = get_config_file() +TORNADO_PROCESSES = len(get_tornado_ports(config_file)) output = subprocess.check_output(['/usr/sbin/rabbitmqctl', 'list_consumers'], universal_newlines=True) diff --git a/scripts/restart-server b/scripts/restart-server index 6b318fa1aa..eae0b3f695 100755 --- a/scripts/restart-server +++ b/scripts/restart-server @@ -1,6 +1,5 @@ #!/usr/bin/env python3 import argparse -import configparser import logging import os import pwd @@ -10,7 +9,15 @@ import sys import time sys.path.append(os.path.join(os.path.dirname(__file__), '..')) -from scripts.lib.zulip_tools import DEPLOYMENTS_DIR, ENDC, OKGREEN, WARNING, overwrite_symlink +from scripts.lib.zulip_tools import ( + DEPLOYMENTS_DIR, + ENDC, + OKGREEN, + WARNING, + get_config_file, + get_tornado_ports, + overwrite_symlink, +) logging.Formatter.converter = time.gmtime logging.basicConfig(format="%(asctime)s restart-server: %(message)s", @@ -46,21 +53,16 @@ if change_symlink: overwrite_symlink(os.readlink(current_symlink), last_symlink) overwrite_symlink(deploy_path, current_symlink) -config_file = configparser.RawConfigParser() -config_file.read("/etc/zulip/zulip.conf") - -try: - tornado_processes = int(config_file.get('application_server', 'tornado_processes')) -except (configparser.NoSectionError, configparser.NoOptionError): - tornado_processes = 1 +config_file = get_config_file() +tornado_ports = get_tornado_ports(config_file) # We restart just the zulip-tornado service early, in order to # minimize downtime of the tornado service caused by too many Python # processes restarting at the same time resulting in it receiving # insufficient priority. This is important, because Tornado is the # main source of user-visible downtime when we restart a Zulip server. -if tornado_processes > 1: - for p in range(9800, 9800+tornado_processes): +if len(tornado_ports) > 1: + for p in tornado_ports: # Restart Tornado processes individually for a better rate of # restarts. This also avoids behavior with restarting a whole # supervisord group where if any individual process is slow to diff --git a/zproject/computed_settings.py b/zproject/computed_settings.py index 15d01690eb..f85a693f92 100644 --- a/zproject/computed_settings.py +++ b/zproject/computed_settings.py @@ -8,12 +8,14 @@ from urllib.parse import urljoin from django.template.loaders import app_directories import zerver.lib.logging_util +from scripts.lib.zulip_tools import get_tornado_ports from zerver.lib.db import TimeTrackingConnection from .config import ( DEPLOY_ROOT, DEVELOPMENT, PRODUCTION, + config_file, get_config, get_from_file_if_exists, get_secret, @@ -231,7 +233,9 @@ INSTALLED_APPS += EXTRA_INSTALLED_APPS ZILENCER_ENABLED = 'zilencer' in INSTALLED_APPS CORPORATE_ENABLED = 'corporate' in INSTALLED_APPS -TORNADO_PROCESSES = int(get_config('application_server', 'tornado_processes', '1')) +TORNADO_PORTS = get_tornado_ports(config_file) +TORNADO_PROCESSES = len(TORNADO_PORTS) + RUNNING_INSIDE_TORNADO = False AUTORELOAD = DEBUG