sharding: Add basic sharding configuration for Tornado.

This allows straight-forward configuration of realm-based Tornado
sharding through simply editing /etc/zulip/zulip.conf to configure
shards and running scripts/refresh-sharding-and-restart.

Co-Author-By: Mateusz Mandera <mateusz.mandera@zulip.com>
This commit is contained in:
Tim Abbott 2018-11-06 17:13:37 -08:00 committed by Tim Abbott
parent cdd3b7efbc
commit 220620e7cf
9 changed files with 126 additions and 4 deletions

View File

@ -18,7 +18,7 @@ location /static/ {
# Send longpoll requests to Tornado # Send longpoll requests to Tornado
location /json/events { location /json/events {
proxy_pass http://tornado; proxy_pass $tornado_server;
include /etc/nginx/zulip-include/proxy_longpolling; include /etc/nginx/zulip-include/proxy_longpolling;
proxy_set_header X-Real-IP $remote_addr; proxy_set_header X-Real-IP $remote_addr;
@ -32,13 +32,12 @@ location /api/v1/events {
return 204; return 204;
} }
proxy_pass http://tornado; proxy_pass $tornado_server;
include /etc/nginx/zulip-include/proxy_longpolling; include /etc/nginx/zulip-include/proxy_longpolling;
proxy_set_header X-Real-IP $remote_addr; proxy_set_header X-Real-IP $remote_addr;
} }
# Send everything else to Django via uWSGI # Send everything else to Django via uWSGI
location / { location / {
include uwsgi_params; include uwsgi_params;

View File

@ -5,6 +5,7 @@ class zulip::app_frontend_base {
include zulip::nginx include zulip::nginx
include zulip::sasl_modules include zulip::sasl_modules
include zulip::supervisor include zulip::supervisor
include zulip::tornado_sharding
if $::osfamily == 'debian' { if $::osfamily == 'debian' {
$web_packages = [ $web_packages = [

View File

@ -0,0 +1,28 @@
class zulip::tornado_sharding {
include zulip::base
include zulip::common
include zulip::nginx
# The file entries below serve only to initialize the sharding config files
# with the correct default content for the "only one shard" setup. For this
# reason they use "replace => false", because the files are managed by
# the sharding script afterwards and puppet shouldn't overwrite them.
file { '/etc/zulip/nginx_sharding.conf':
ensure => file,
owner => 'root',
group => 'root',
mode => '0640',
notify => Service['nginx'],
content => "set \$tornado_server http://tornado;\n",
replace => false,
}
file { '/etc/zulip/sharding.json':
ensure => file,
require => User['zulip'],
owner => 'zulip',
group => 'zulip',
mode => '0640',
content => "{}\n",
replace => false,
}
}

View File

@ -40,6 +40,7 @@ server {
alias /home/zulip/local-static; alias /home/zulip/local-static;
} }
include /etc/zulip/nginx_sharding.conf;
include /etc/nginx/zulip-include/certbot; include /etc/nginx/zulip-include/certbot;
include /etc/nginx/zulip-include/app; include /etc/nginx/zulip-include/app;
} }

View File

@ -12,5 +12,6 @@ server {
server_name zulipchat.com *.zulipchat.com; server_name zulipchat.com *.zulipchat.com;
include /etc/zulip/nginx_sharding.conf;
include /etc/nginx/zulip-include/app; include /etc/nginx/zulip-include/app;
} }

View File

@ -15,5 +15,6 @@ server {
server_name staging.zulip.com; server_name staging.zulip.com;
include /etc/zulip/nginx_sharding.conf;
include /etc/nginx/zulip-include/app; include /etc/nginx/zulip-include/app;
} }

56
scripts/lib/sharding.py Executable file
View File

@ -0,0 +1,56 @@
#!/usr/bin/env python3
import json
import os
import subprocess
import sys
from typing import Any, Dict
BASE_DIR = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
sys.path.append(BASE_DIR)
from scripts.lib.setup_path import setup_path
setup_path()
from scripts.lib.zulip_tools import get_config_file
def write_realm_nginx_config_line(f: Any, host: str, port: str) -> None:
f.write("""if ($host = '%s') {
set $tornado_server http://tornado%s;
}\n""" % (host, port))
# Basic system to do Tornado sharding. Writes two output .tmp files that need
# to be renamed to the following files to finalize the changes:
# * /etc/zulip/nginx_sharding.conf; nginx needs to be reloaded after changing.
# * /etc/zulip/sharding.json; supervisor Django process needs to be reloaded
# after changing. TODO: We can probably make this live-reload by statting the file.
#
# TODO: Restructure this to automatically generate a sharding layout.
with open('/etc/zulip/nginx_sharding.conf.tmp', 'w') as nginx_sharding_conf_f, \
open('/etc/zulip/sharding.json.tmp', 'w') as sharding_json_f:
config_file = get_config_file()
if not config_file.has_section("tornado_sharding"):
nginx_sharding_conf_f.write("set $tornado_server http://tornado;\n")
sharding_json_f.write('{}\n')
sys.exit(0)
nginx_sharding_conf_f.write("set $tornado_server http://tornado9800;\n")
shard_map: Dict[str, int] = {}
external_host = subprocess.check_output([os.path.join(BASE_DIR, 'scripts/get-django-setting'),
'EXTERNAL_HOST'],
universal_newlines=True).strip()
for port in config_file["tornado_sharding"]:
shards = config_file["tornado_sharding"][port].strip().split(' ')
for shard in shards:
if '.' in shard:
host = shard
else:
host = "{}.{}".format(shard, external_host)
assert host not in shard_map, "host %s duplicated" % (host,)
shard_map[host] = int(port)
write_realm_nginx_config_line(nginx_sharding_conf_f, host, port)
nginx_sharding_conf_f.write('\n')
sharding_json_f.write(json.dumps(shard_map) + '\n')

View File

@ -0,0 +1,29 @@
#!/usr/bin/env bash
set -e
"$(dirname "$0")/zulip-puppet-apply" -f
# The step above should have generated the config files, now we need to move them into place:
chown root:root /etc/zulip/nginx_sharding.conf.tmp
chmod 640 /etc/zulip/nginx_sharding.conf.tmp
chown zulip:zulip /etc/zulip/sharding.json.tmp
chmod 640 /etc/zulip/sharding.json.tmp
mv /etc/zulip/nginx_sharding.conf.tmp /etc/zulip/nginx_sharding.conf
mv /etc/zulip/sharding.json.tmp /etc/zulip/sharding.json
# In the ordering of operations below, the crucial detail is that
# zulip-django and zulip-workers:* need to be restarted before
# reloading nginx. Django has an in-memory map of which realm belongs
# to which shard. Reloading nginx will cause users' tornado requests
# to be routed according to the new sharding scheme. If that happens
# before Django is restarted, updating its realm->shard map, users on
# realms, whose shard has changed, will have their tornado requests
# handled by the new tornado process, while Django will still use the
# old process for its internal communication with tornado when
# servicing the user's requests. That's a bad state that leads to
# clients getting into reload loops ending in crashing on 500 response
# while Django is restarting. For this reason it's important to
# reload nginx only after Django.
supervisorctl restart zulip-django
supervisorctl restart zulip-workers:*
service nginx reload

View File

@ -1,13 +1,19 @@
from django.conf import settings from django.conf import settings
from zerver.models import Realm from zerver.models import Realm
import json
import os
shard_map = {}
if os.path.exists("/etc/zulip/sharding.json"):
with open("/etc/zulip/sharding.json") as f:
shard_map = json.loads(f.read())
def get_tornado_port(realm: Realm) -> int: def get_tornado_port(realm: Realm) -> int:
if settings.TORNADO_SERVER is None: if settings.TORNADO_SERVER is None:
return 9993 return 9993
if settings.TORNADO_PROCESSES == 1: if settings.TORNADO_PROCESSES == 1:
return int(settings.TORNADO_SERVER.split(":")[-1]) return int(settings.TORNADO_SERVER.split(":")[-1])
return 9993 return shard_map.get(realm.host, 9800)
def get_tornado_uri(realm: Realm) -> str: def get_tornado_uri(realm: Realm) -> str:
if settings.TORNADO_PROCESSES == 1: if settings.TORNADO_PROCESSES == 1: