tornado: Remove explicit tornado_processes setting; compute it.

We can compute the intended number of processes from the sharding
configuration.  In doing so, also validate that all of the ports are
contiguous.

This removes a discrepancy between `scripts/lib/sharding.py` and other
parts of the codebase about if merely having a `[tornado_sharding]`
section is sufficient to enable sharding.  Having behaviour which
changes merely based on if an empty section exists is surprising.

This does require that a (presumably empty) `9800` configuration line
exist, but making that default explicit is useful.

After this commit, configuring sharding can be done by adding to
`zulip.conf`:

```
[tornado_sharding]
9800 =              # default
9801 = other_realm
```

Followed by running `./scripts/refresh-sharding-and-restart`.
This commit is contained in:
Alex Vandiver 2020-09-14 17:01:33 -07:00 committed by Tim Abbott
parent ece0aaa6cc
commit 2a12fedcf1
11 changed files with 66 additions and 45 deletions

View File

@ -11,6 +11,16 @@ module Puppet::Parser::Functions
end
end
newfunction(:zulipconf_keys, :type => :rvalue, :arity => 1) do |args|
zulip_conf_path = lookupvar('zulip_conf_path')
output = `/usr/bin/crudini --get #{zulip_conf_path} #{args[0]} 2>&1`; result=$?.success?
if result
return output.lines.map { |l| l.strip }
else
return []
end
end
newfunction(:zulipconf_nagios_hosts, :type => :rvalue, :arity => 0) do |args|
section = "nagios"
prefix = "hosts_"

View File

@ -72,6 +72,7 @@ class zulip::app_frontend_base {
} else {
$uwsgi_default_processes = 4
}
$tornado_ports = $zulip::tornado_sharding::tornado_ports
file { "${zulip::common::supervisor_conf_dir}/zulip.conf":
ensure => file,
require => Package[supervisor],

View File

@ -38,16 +38,9 @@ class zulip::tornado_sharding {
loglevel => 'warning',
}
# The number of Tornado processes to run on the server; this
# defaults to 1, since Tornado sharding is currently only at the
# Realm level.
$tornado_processes = Integer(zulipconf('application_server', 'tornado_processes', 1))
if $tornado_processes > 1 {
$tornado_ports = range(9800, 9800 + $tornado_processes - 1)
$tornado_multiprocess = true
} else {
$tornado_multiprocess = false
}
# The ports of Tornado processes to run on the server; defaults to
# 9800.
$tornado_ports = zulipconf_keys('tornado_sharding')
file { '/etc/nginx/zulip-include/tornado-upstreams':
require => Package[$zulip::common::nginx],

View File

@ -1,4 +1,4 @@
<% if @tornado_multiprocess -%>
<% if @tornado_ports.length > 1 -%>
<% @tornado_ports.each do |port| -%>
upstream tornado<%= port %> {
server 127.0.0.1:<%= port %>;

View File

@ -23,7 +23,7 @@ stopasgroup=true ; Without this, we leak processes every restart
killasgroup=true ; Without this, we leak processes every restart
directory=/home/zulip/deployments/current/
<% if @tornado_multiprocess -%>
<% if @tornado_ports.length > 1 -%>
[program:zulip-tornado]
command=env PYTHONUNBUFFERED=1 /home/zulip/deployments/current/manage.py runtornado 127.0.0.1:98%(process_num)02d
process_name=zulip-tornado-port-98%(process_num)02d
@ -38,7 +38,7 @@ stdout_logfile=/var/log/zulip/tornado-98%(process_num)02d.log ; stdout l
stdout_logfile_maxbytes=100MB ; max # logfile bytes b4 rotation (default 50MB)
stdout_logfile_backups=10 ; # of stdout logfile backups (default 10)
directory=/home/zulip/deployments/current/
numprocs=<%= @tornado_processes %>
numprocs=<%= @tornado_ports.length %>
<% else -%>
[program:zulip-tornado]
command=env PYTHONUNBUFFERED=1 /home/zulip/deployments/current/manage.py runtornado 127.0.0.1:9800

View File

@ -13,7 +13,7 @@ from scripts.lib.setup_path import setup_path
setup_path()
from scripts.lib.zulip_tools import get_config_file
from scripts.lib.zulip_tools import get_config_file, get_tornado_ports
def write_realm_nginx_config_line(f: Any, host: str, port: str) -> None:
@ -61,7 +61,13 @@ with open('/etc/zulip/nginx_sharding.conf.tmp', 'w') as nginx_sharding_conf_f, \
nginx_sharding_conf_f.write(f"# Configuration hash: {new_hash}\n")
config_file = get_config_file()
if not config_file.has_section("tornado_sharding"):
ports = get_tornado_ports(config_file)
expected_ports = list(range(9800, max(ports)+1))
assert sorted(ports) == expected_ports, \
f"ports ({sorted(ports)}) must be contiguous, starting with 9800"
if len(ports) == 1:
nginx_sharding_conf_f.write("set $tornado_server http://tornado;\n")
sharding_json_f.write('{}\n')
sys.exit(0)
@ -72,9 +78,10 @@ with open('/etc/zulip/nginx_sharding.conf.tmp', 'w') as nginx_sharding_conf_f, \
'EXTERNAL_HOST'],
universal_newlines=True).strip()
for port in config_file["tornado_sharding"]:
shards = config_file["tornado_sharding"][port].strip().split(' ')
shards = config_file["tornado_sharding"][port].strip()
for shard in shards:
if shards:
for shard in shards.split(' '):
if '.' in shard:
host = shard
else:

View File

@ -30,6 +30,7 @@ from scripts.lib.zulip_tools import (
assert_running_as_root,
get_config,
get_config_file,
get_tornado_ports,
parse_os_release,
set_config,
su_to_zulip,
@ -89,7 +90,7 @@ deploy_path = args.deploy_path
os.chdir(deploy_path)
config_file = get_config_file()
tornado_processes = int(get_config(config_file, 'application_server', 'tornado_processes', '1'))
tornado_processes = len(get_tornado_ports(config_file))
IS_SERVER_UP = True

View File

@ -515,6 +515,14 @@ def get_config_file() -> configparser.RawConfigParser:
def get_deploy_options(config_file: configparser.RawConfigParser) -> List[str]:
return get_config(config_file, 'deployment', 'deploy_options', "").strip().split()
def get_tornado_ports(config_file: configparser.RawConfigParser) -> List[int]:
ports = []
if config_file.has_section("tornado_sharding"):
ports = [int(port) for port in config_file.options("tornado_sharding")]
if not ports:
ports = [9800]
return ports
def get_or_create_dev_uuid_var_path(path: str) -> str:
absolute_path = '{}/{}'.format(get_dev_uuid_var_path(), path)
os.makedirs(absolute_path, exist_ok=True)

View File

@ -1,6 +1,5 @@
#!/usr/bin/env python3
import argparse
import configparser
import os
import subprocess
import sys
@ -11,6 +10,7 @@ from typing import Dict
ZULIP_PATH = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
sys.path.append(ZULIP_PATH)
from scripts.lib.check_rabbitmq_queue import normal_queues
from scripts.lib.zulip_tools import get_config_file, get_tornado_ports
states = {
0: "OK",
@ -33,13 +33,8 @@ parser.add_argument('--min-threshold',
options = parser.parse_args()
config_file = configparser.RawConfigParser()
config_file.read("/etc/zulip/zulip.conf")
def get_config(section: str, key: str, default_value: str) -> str:
if config_file.has_option(section, key):
return config_file.get(section, key)
return default_value
TORNADO_PROCESSES = int(get_config('application_server', 'tornado_processes', '1'))
config_file = get_config_file()
TORNADO_PROCESSES = len(get_tornado_ports(config_file))
output = subprocess.check_output(['/usr/sbin/rabbitmqctl', 'list_consumers'],
universal_newlines=True)

View File

@ -1,6 +1,5 @@
#!/usr/bin/env python3
import argparse
import configparser
import logging
import os
import pwd
@ -10,7 +9,15 @@ import sys
import time
sys.path.append(os.path.join(os.path.dirname(__file__), '..'))
from scripts.lib.zulip_tools import DEPLOYMENTS_DIR, ENDC, OKGREEN, WARNING, overwrite_symlink
from scripts.lib.zulip_tools import (
DEPLOYMENTS_DIR,
ENDC,
OKGREEN,
WARNING,
get_config_file,
get_tornado_ports,
overwrite_symlink,
)
logging.Formatter.converter = time.gmtime
logging.basicConfig(format="%(asctime)s restart-server: %(message)s",
@ -46,21 +53,16 @@ if change_symlink:
overwrite_symlink(os.readlink(current_symlink), last_symlink)
overwrite_symlink(deploy_path, current_symlink)
config_file = configparser.RawConfigParser()
config_file.read("/etc/zulip/zulip.conf")
try:
tornado_processes = int(config_file.get('application_server', 'tornado_processes'))
except (configparser.NoSectionError, configparser.NoOptionError):
tornado_processes = 1
config_file = get_config_file()
tornado_ports = get_tornado_ports(config_file)
# We restart just the zulip-tornado service early, in order to
# minimize downtime of the tornado service caused by too many Python
# processes restarting at the same time resulting in it receiving
# insufficient priority. This is important, because Tornado is the
# main source of user-visible downtime when we restart a Zulip server.
if tornado_processes > 1:
for p in range(9800, 9800+tornado_processes):
if len(tornado_ports) > 1:
for p in tornado_ports:
# Restart Tornado processes individually for a better rate of
# restarts. This also avoids behavior with restarting a whole
# supervisord group where if any individual process is slow to

View File

@ -8,12 +8,14 @@ from urllib.parse import urljoin
from django.template.loaders import app_directories
import zerver.lib.logging_util
from scripts.lib.zulip_tools import get_tornado_ports
from zerver.lib.db import TimeTrackingConnection
from .config import (
DEPLOY_ROOT,
DEVELOPMENT,
PRODUCTION,
config_file,
get_config,
get_from_file_if_exists,
get_secret,
@ -231,7 +233,9 @@ INSTALLED_APPS += EXTRA_INSTALLED_APPS
ZILENCER_ENABLED = 'zilencer' in INSTALLED_APPS
CORPORATE_ENABLED = 'corporate' in INSTALLED_APPS
TORNADO_PROCESSES = int(get_config('application_server', 'tornado_processes', '1'))
TORNADO_PORTS = get_tornado_ports(config_file)
TORNADO_PROCESSES = len(TORNADO_PORTS)
RUNNING_INSIDE_TORNADO = False
AUTORELOAD = DEBUG