tornado: Remove explicit tornado_processes setting; compute it.

We can compute the intended number of processes from the sharding
configuration.  In doing so, also validate that all of the ports are
contiguous.

This removes a discrepancy between `scripts/lib/sharding.py` and other
parts of the codebase about if merely having a `[tornado_sharding]`
section is sufficient to enable sharding.  Having behaviour which
changes merely based on if an empty section exists is surprising.

This does require that a (presumably empty) `9800` configuration line
exist, but making that default explicit is useful.

After this commit, configuring sharding can be done by adding to
`zulip.conf`:

```
[tornado_sharding]
9800 =              # default
9801 = other_realm
```

Followed by running `./scripts/refresh-sharding-and-restart`.
This commit is contained in:
Alex Vandiver 2020-09-14 17:01:33 -07:00 committed by Tim Abbott
parent ece0aaa6cc
commit 2a12fedcf1
11 changed files with 66 additions and 45 deletions

View File

@ -11,6 +11,16 @@ module Puppet::Parser::Functions
end end
end end
newfunction(:zulipconf_keys, :type => :rvalue, :arity => 1) do |args|
zulip_conf_path = lookupvar('zulip_conf_path')
output = `/usr/bin/crudini --get #{zulip_conf_path} #{args[0]} 2>&1`; result=$?.success?
if result
return output.lines.map { |l| l.strip }
else
return []
end
end
newfunction(:zulipconf_nagios_hosts, :type => :rvalue, :arity => 0) do |args| newfunction(:zulipconf_nagios_hosts, :type => :rvalue, :arity => 0) do |args|
section = "nagios" section = "nagios"
prefix = "hosts_" prefix = "hosts_"

View File

@ -72,6 +72,7 @@ class zulip::app_frontend_base {
} else { } else {
$uwsgi_default_processes = 4 $uwsgi_default_processes = 4
} }
$tornado_ports = $zulip::tornado_sharding::tornado_ports
file { "${zulip::common::supervisor_conf_dir}/zulip.conf": file { "${zulip::common::supervisor_conf_dir}/zulip.conf":
ensure => file, ensure => file,
require => Package[supervisor], require => Package[supervisor],

View File

@ -38,16 +38,9 @@ class zulip::tornado_sharding {
loglevel => 'warning', loglevel => 'warning',
} }
# The number of Tornado processes to run on the server; this # The ports of Tornado processes to run on the server; defaults to
# defaults to 1, since Tornado sharding is currently only at the # 9800.
# Realm level. $tornado_ports = zulipconf_keys('tornado_sharding')
$tornado_processes = Integer(zulipconf('application_server', 'tornado_processes', 1))
if $tornado_processes > 1 {
$tornado_ports = range(9800, 9800 + $tornado_processes - 1)
$tornado_multiprocess = true
} else {
$tornado_multiprocess = false
}
file { '/etc/nginx/zulip-include/tornado-upstreams': file { '/etc/nginx/zulip-include/tornado-upstreams':
require => Package[$zulip::common::nginx], require => Package[$zulip::common::nginx],

View File

@ -1,4 +1,4 @@
<% if @tornado_multiprocess -%> <% if @tornado_ports.length > 1 -%>
<% @tornado_ports.each do |port| -%> <% @tornado_ports.each do |port| -%>
upstream tornado<%= port %> { upstream tornado<%= port %> {
server 127.0.0.1:<%= port %>; server 127.0.0.1:<%= port %>;

View File

@ -23,7 +23,7 @@ stopasgroup=true ; Without this, we leak processes every restart
killasgroup=true ; Without this, we leak processes every restart killasgroup=true ; Without this, we leak processes every restart
directory=/home/zulip/deployments/current/ directory=/home/zulip/deployments/current/
<% if @tornado_multiprocess -%> <% if @tornado_ports.length > 1 -%>
[program:zulip-tornado] [program:zulip-tornado]
command=env PYTHONUNBUFFERED=1 /home/zulip/deployments/current/manage.py runtornado 127.0.0.1:98%(process_num)02d command=env PYTHONUNBUFFERED=1 /home/zulip/deployments/current/manage.py runtornado 127.0.0.1:98%(process_num)02d
process_name=zulip-tornado-port-98%(process_num)02d process_name=zulip-tornado-port-98%(process_num)02d
@ -38,7 +38,7 @@ stdout_logfile=/var/log/zulip/tornado-98%(process_num)02d.log ; stdout l
stdout_logfile_maxbytes=100MB ; max # logfile bytes b4 rotation (default 50MB) stdout_logfile_maxbytes=100MB ; max # logfile bytes b4 rotation (default 50MB)
stdout_logfile_backups=10 ; # of stdout logfile backups (default 10) stdout_logfile_backups=10 ; # of stdout logfile backups (default 10)
directory=/home/zulip/deployments/current/ directory=/home/zulip/deployments/current/
numprocs=<%= @tornado_processes %> numprocs=<%= @tornado_ports.length %>
<% else -%> <% else -%>
[program:zulip-tornado] [program:zulip-tornado]
command=env PYTHONUNBUFFERED=1 /home/zulip/deployments/current/manage.py runtornado 127.0.0.1:9800 command=env PYTHONUNBUFFERED=1 /home/zulip/deployments/current/manage.py runtornado 127.0.0.1:9800

View File

@ -13,7 +13,7 @@ from scripts.lib.setup_path import setup_path
setup_path() setup_path()
from scripts.lib.zulip_tools import get_config_file from scripts.lib.zulip_tools import get_config_file, get_tornado_ports
def write_realm_nginx_config_line(f: Any, host: str, port: str) -> None: def write_realm_nginx_config_line(f: Any, host: str, port: str) -> None:
@ -61,7 +61,13 @@ with open('/etc/zulip/nginx_sharding.conf.tmp', 'w') as nginx_sharding_conf_f, \
nginx_sharding_conf_f.write(f"# Configuration hash: {new_hash}\n") nginx_sharding_conf_f.write(f"# Configuration hash: {new_hash}\n")
config_file = get_config_file() config_file = get_config_file()
if not config_file.has_section("tornado_sharding"): ports = get_tornado_ports(config_file)
expected_ports = list(range(9800, max(ports)+1))
assert sorted(ports) == expected_ports, \
f"ports ({sorted(ports)}) must be contiguous, starting with 9800"
if len(ports) == 1:
nginx_sharding_conf_f.write("set $tornado_server http://tornado;\n") nginx_sharding_conf_f.write("set $tornado_server http://tornado;\n")
sharding_json_f.write('{}\n') sharding_json_f.write('{}\n')
sys.exit(0) sys.exit(0)
@ -72,16 +78,17 @@ with open('/etc/zulip/nginx_sharding.conf.tmp', 'w') as nginx_sharding_conf_f, \
'EXTERNAL_HOST'], 'EXTERNAL_HOST'],
universal_newlines=True).strip() universal_newlines=True).strip()
for port in config_file["tornado_sharding"]: for port in config_file["tornado_sharding"]:
shards = config_file["tornado_sharding"][port].strip().split(' ') shards = config_file["tornado_sharding"][port].strip()
for shard in shards: if shards:
if '.' in shard: for shard in shards.split(' '):
host = shard if '.' in shard:
else: host = shard
host = f"{shard}.{external_host}" else:
assert host not in shard_map, f"host {host} duplicated" host = f"{shard}.{external_host}"
shard_map[host] = int(port) assert host not in shard_map, f"host {host} duplicated"
write_realm_nginx_config_line(nginx_sharding_conf_f, host, port) shard_map[host] = int(port)
write_realm_nginx_config_line(nginx_sharding_conf_f, host, port)
nginx_sharding_conf_f.write('\n') nginx_sharding_conf_f.write('\n')
sharding_json_f.write(json.dumps(shard_map) + '\n') sharding_json_f.write(json.dumps(shard_map) + '\n')

View File

@ -30,6 +30,7 @@ from scripts.lib.zulip_tools import (
assert_running_as_root, assert_running_as_root,
get_config, get_config,
get_config_file, get_config_file,
get_tornado_ports,
parse_os_release, parse_os_release,
set_config, set_config,
su_to_zulip, su_to_zulip,
@ -89,7 +90,7 @@ deploy_path = args.deploy_path
os.chdir(deploy_path) os.chdir(deploy_path)
config_file = get_config_file() config_file = get_config_file()
tornado_processes = int(get_config(config_file, 'application_server', 'tornado_processes', '1')) tornado_processes = len(get_tornado_ports(config_file))
IS_SERVER_UP = True IS_SERVER_UP = True

View File

@ -515,6 +515,14 @@ def get_config_file() -> configparser.RawConfigParser:
def get_deploy_options(config_file: configparser.RawConfigParser) -> List[str]: def get_deploy_options(config_file: configparser.RawConfigParser) -> List[str]:
return get_config(config_file, 'deployment', 'deploy_options', "").strip().split() return get_config(config_file, 'deployment', 'deploy_options', "").strip().split()
def get_tornado_ports(config_file: configparser.RawConfigParser) -> List[int]:
ports = []
if config_file.has_section("tornado_sharding"):
ports = [int(port) for port in config_file.options("tornado_sharding")]
if not ports:
ports = [9800]
return ports
def get_or_create_dev_uuid_var_path(path: str) -> str: def get_or_create_dev_uuid_var_path(path: str) -> str:
absolute_path = '{}/{}'.format(get_dev_uuid_var_path(), path) absolute_path = '{}/{}'.format(get_dev_uuid_var_path(), path)
os.makedirs(absolute_path, exist_ok=True) os.makedirs(absolute_path, exist_ok=True)

View File

@ -1,6 +1,5 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
import argparse import argparse
import configparser
import os import os
import subprocess import subprocess
import sys import sys
@ -11,6 +10,7 @@ from typing import Dict
ZULIP_PATH = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) ZULIP_PATH = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
sys.path.append(ZULIP_PATH) sys.path.append(ZULIP_PATH)
from scripts.lib.check_rabbitmq_queue import normal_queues from scripts.lib.check_rabbitmq_queue import normal_queues
from scripts.lib.zulip_tools import get_config_file, get_tornado_ports
states = { states = {
0: "OK", 0: "OK",
@ -33,13 +33,8 @@ parser.add_argument('--min-threshold',
options = parser.parse_args() options = parser.parse_args()
config_file = configparser.RawConfigParser() config_file = get_config_file()
config_file.read("/etc/zulip/zulip.conf") TORNADO_PROCESSES = len(get_tornado_ports(config_file))
def get_config(section: str, key: str, default_value: str) -> str:
if config_file.has_option(section, key):
return config_file.get(section, key)
return default_value
TORNADO_PROCESSES = int(get_config('application_server', 'tornado_processes', '1'))
output = subprocess.check_output(['/usr/sbin/rabbitmqctl', 'list_consumers'], output = subprocess.check_output(['/usr/sbin/rabbitmqctl', 'list_consumers'],
universal_newlines=True) universal_newlines=True)

View File

@ -1,6 +1,5 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
import argparse import argparse
import configparser
import logging import logging
import os import os
import pwd import pwd
@ -10,7 +9,15 @@ import sys
import time import time
sys.path.append(os.path.join(os.path.dirname(__file__), '..')) sys.path.append(os.path.join(os.path.dirname(__file__), '..'))
from scripts.lib.zulip_tools import DEPLOYMENTS_DIR, ENDC, OKGREEN, WARNING, overwrite_symlink from scripts.lib.zulip_tools import (
DEPLOYMENTS_DIR,
ENDC,
OKGREEN,
WARNING,
get_config_file,
get_tornado_ports,
overwrite_symlink,
)
logging.Formatter.converter = time.gmtime logging.Formatter.converter = time.gmtime
logging.basicConfig(format="%(asctime)s restart-server: %(message)s", logging.basicConfig(format="%(asctime)s restart-server: %(message)s",
@ -46,21 +53,16 @@ if change_symlink:
overwrite_symlink(os.readlink(current_symlink), last_symlink) overwrite_symlink(os.readlink(current_symlink), last_symlink)
overwrite_symlink(deploy_path, current_symlink) overwrite_symlink(deploy_path, current_symlink)
config_file = configparser.RawConfigParser() config_file = get_config_file()
config_file.read("/etc/zulip/zulip.conf") tornado_ports = get_tornado_ports(config_file)
try:
tornado_processes = int(config_file.get('application_server', 'tornado_processes'))
except (configparser.NoSectionError, configparser.NoOptionError):
tornado_processes = 1
# We restart just the zulip-tornado service early, in order to # We restart just the zulip-tornado service early, in order to
# minimize downtime of the tornado service caused by too many Python # minimize downtime of the tornado service caused by too many Python
# processes restarting at the same time resulting in it receiving # processes restarting at the same time resulting in it receiving
# insufficient priority. This is important, because Tornado is the # insufficient priority. This is important, because Tornado is the
# main source of user-visible downtime when we restart a Zulip server. # main source of user-visible downtime when we restart a Zulip server.
if tornado_processes > 1: if len(tornado_ports) > 1:
for p in range(9800, 9800+tornado_processes): for p in tornado_ports:
# Restart Tornado processes individually for a better rate of # Restart Tornado processes individually for a better rate of
# restarts. This also avoids behavior with restarting a whole # restarts. This also avoids behavior with restarting a whole
# supervisord group where if any individual process is slow to # supervisord group where if any individual process is slow to

View File

@ -8,12 +8,14 @@ from urllib.parse import urljoin
from django.template.loaders import app_directories from django.template.loaders import app_directories
import zerver.lib.logging_util import zerver.lib.logging_util
from scripts.lib.zulip_tools import get_tornado_ports
from zerver.lib.db import TimeTrackingConnection from zerver.lib.db import TimeTrackingConnection
from .config import ( from .config import (
DEPLOY_ROOT, DEPLOY_ROOT,
DEVELOPMENT, DEVELOPMENT,
PRODUCTION, PRODUCTION,
config_file,
get_config, get_config,
get_from_file_if_exists, get_from_file_if_exists,
get_secret, get_secret,
@ -231,7 +233,9 @@ INSTALLED_APPS += EXTRA_INSTALLED_APPS
ZILENCER_ENABLED = 'zilencer' in INSTALLED_APPS ZILENCER_ENABLED = 'zilencer' in INSTALLED_APPS
CORPORATE_ENABLED = 'corporate' in INSTALLED_APPS CORPORATE_ENABLED = 'corporate' in INSTALLED_APPS
TORNADO_PROCESSES = int(get_config('application_server', 'tornado_processes', '1')) TORNADO_PORTS = get_tornado_ports(config_file)
TORNADO_PROCESSES = len(TORNADO_PORTS)
RUNNING_INSIDE_TORNADO = False RUNNING_INSIDE_TORNADO = False
AUTORELOAD = DEBUG AUTORELOAD = DEBUG