mirror of https://github.com/zulip/zulip.git
sharding: Add basic sharding configuration for Tornado.
This allows straight-forward configuration of realm-based Tornado sharding through simply editing /etc/zulip/zulip.conf to configure shards and running scripts/refresh-sharding-and-restart. Co-Author-By: Mateusz Mandera <mateusz.mandera@zulip.com>
This commit is contained in:
parent
cdd3b7efbc
commit
220620e7cf
|
@ -18,7 +18,7 @@ location /static/ {
|
|||
|
||||
# Send longpoll requests to Tornado
|
||||
location /json/events {
|
||||
proxy_pass http://tornado;
|
||||
proxy_pass $tornado_server;
|
||||
include /etc/nginx/zulip-include/proxy_longpolling;
|
||||
|
||||
proxy_set_header X-Real-IP $remote_addr;
|
||||
|
@ -32,13 +32,12 @@ location /api/v1/events {
|
|||
return 204;
|
||||
}
|
||||
|
||||
proxy_pass http://tornado;
|
||||
proxy_pass $tornado_server;
|
||||
include /etc/nginx/zulip-include/proxy_longpolling;
|
||||
|
||||
proxy_set_header X-Real-IP $remote_addr;
|
||||
}
|
||||
|
||||
|
||||
# Send everything else to Django via uWSGI
|
||||
location / {
|
||||
include uwsgi_params;
|
||||
|
|
|
@ -5,6 +5,7 @@ class zulip::app_frontend_base {
|
|||
include zulip::nginx
|
||||
include zulip::sasl_modules
|
||||
include zulip::supervisor
|
||||
include zulip::tornado_sharding
|
||||
|
||||
if $::osfamily == 'debian' {
|
||||
$web_packages = [
|
||||
|
|
|
@ -0,0 +1,28 @@
|
|||
class zulip::tornado_sharding {
|
||||
include zulip::base
|
||||
include zulip::common
|
||||
include zulip::nginx
|
||||
|
||||
# The file entries below serve only to initialize the sharding config files
|
||||
# with the correct default content for the "only one shard" setup. For this
|
||||
# reason they use "replace => false", because the files are managed by
|
||||
# the sharding script afterwards and puppet shouldn't overwrite them.
|
||||
file { '/etc/zulip/nginx_sharding.conf':
|
||||
ensure => file,
|
||||
owner => 'root',
|
||||
group => 'root',
|
||||
mode => '0640',
|
||||
notify => Service['nginx'],
|
||||
content => "set \$tornado_server http://tornado;\n",
|
||||
replace => false,
|
||||
}
|
||||
file { '/etc/zulip/sharding.json':
|
||||
ensure => file,
|
||||
require => User['zulip'],
|
||||
owner => 'zulip',
|
||||
group => 'zulip',
|
||||
mode => '0640',
|
||||
content => "{}\n",
|
||||
replace => false,
|
||||
}
|
||||
}
|
|
@ -40,6 +40,7 @@ server {
|
|||
alias /home/zulip/local-static;
|
||||
}
|
||||
|
||||
include /etc/zulip/nginx_sharding.conf;
|
||||
include /etc/nginx/zulip-include/certbot;
|
||||
include /etc/nginx/zulip-include/app;
|
||||
}
|
||||
|
|
|
@ -12,5 +12,6 @@ server {
|
|||
|
||||
server_name zulipchat.com *.zulipchat.com;
|
||||
|
||||
include /etc/zulip/nginx_sharding.conf;
|
||||
include /etc/nginx/zulip-include/app;
|
||||
}
|
||||
|
|
|
@ -15,5 +15,6 @@ server {
|
|||
|
||||
server_name staging.zulip.com;
|
||||
|
||||
include /etc/zulip/nginx_sharding.conf;
|
||||
include /etc/nginx/zulip-include/app;
|
||||
}
|
||||
|
|
|
@ -0,0 +1,56 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
import json
|
||||
import os
|
||||
import subprocess
|
||||
import sys
|
||||
from typing import Any, Dict
|
||||
|
||||
BASE_DIR = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
sys.path.append(BASE_DIR)
|
||||
from scripts.lib.setup_path import setup_path
|
||||
|
||||
setup_path()
|
||||
|
||||
from scripts.lib.zulip_tools import get_config_file
|
||||
|
||||
def write_realm_nginx_config_line(f: Any, host: str, port: str) -> None:
|
||||
f.write("""if ($host = '%s') {
|
||||
set $tornado_server http://tornado%s;
|
||||
}\n""" % (host, port))
|
||||
|
||||
# Basic system to do Tornado sharding. Writes two output .tmp files that need
|
||||
# to be renamed to the following files to finalize the changes:
|
||||
# * /etc/zulip/nginx_sharding.conf; nginx needs to be reloaded after changing.
|
||||
# * /etc/zulip/sharding.json; supervisor Django process needs to be reloaded
|
||||
# after changing. TODO: We can probably make this live-reload by statting the file.
|
||||
#
|
||||
# TODO: Restructure this to automatically generate a sharding layout.
|
||||
with open('/etc/zulip/nginx_sharding.conf.tmp', 'w') as nginx_sharding_conf_f, \
|
||||
open('/etc/zulip/sharding.json.tmp', 'w') as sharding_json_f:
|
||||
|
||||
config_file = get_config_file()
|
||||
if not config_file.has_section("tornado_sharding"):
|
||||
nginx_sharding_conf_f.write("set $tornado_server http://tornado;\n")
|
||||
sharding_json_f.write('{}\n')
|
||||
sys.exit(0)
|
||||
|
||||
nginx_sharding_conf_f.write("set $tornado_server http://tornado9800;\n")
|
||||
shard_map: Dict[str, int] = {}
|
||||
external_host = subprocess.check_output([os.path.join(BASE_DIR, 'scripts/get-django-setting'),
|
||||
'EXTERNAL_HOST'],
|
||||
universal_newlines=True).strip()
|
||||
for port in config_file["tornado_sharding"]:
|
||||
shards = config_file["tornado_sharding"][port].strip().split(' ')
|
||||
|
||||
for shard in shards:
|
||||
if '.' in shard:
|
||||
host = shard
|
||||
else:
|
||||
host = "{}.{}".format(shard, external_host)
|
||||
assert host not in shard_map, "host %s duplicated" % (host,)
|
||||
shard_map[host] = int(port)
|
||||
write_realm_nginx_config_line(nginx_sharding_conf_f, host, port)
|
||||
nginx_sharding_conf_f.write('\n')
|
||||
|
||||
sharding_json_f.write(json.dumps(shard_map) + '\n')
|
|
@ -0,0 +1,29 @@
|
|||
#!/usr/bin/env bash
|
||||
|
||||
set -e
|
||||
|
||||
"$(dirname "$0")/zulip-puppet-apply" -f
|
||||
# The step above should have generated the config files, now we need to move them into place:
|
||||
chown root:root /etc/zulip/nginx_sharding.conf.tmp
|
||||
chmod 640 /etc/zulip/nginx_sharding.conf.tmp
|
||||
chown zulip:zulip /etc/zulip/sharding.json.tmp
|
||||
chmod 640 /etc/zulip/sharding.json.tmp
|
||||
mv /etc/zulip/nginx_sharding.conf.tmp /etc/zulip/nginx_sharding.conf
|
||||
mv /etc/zulip/sharding.json.tmp /etc/zulip/sharding.json
|
||||
|
||||
# In the ordering of operations below, the crucial detail is that
|
||||
# zulip-django and zulip-workers:* need to be restarted before
|
||||
# reloading nginx. Django has an in-memory map of which realm belongs
|
||||
# to which shard. Reloading nginx will cause users' tornado requests
|
||||
# to be routed according to the new sharding scheme. If that happens
|
||||
# before Django is restarted, updating its realm->shard map, users on
|
||||
# realms, whose shard has changed, will have their tornado requests
|
||||
# handled by the new tornado process, while Django will still use the
|
||||
# old process for its internal communication with tornado when
|
||||
# servicing the user's requests. That's a bad state that leads to
|
||||
# clients getting into reload loops ending in crashing on 500 response
|
||||
# while Django is restarting. For this reason it's important to
|
||||
# reload nginx only after Django.
|
||||
supervisorctl restart zulip-django
|
||||
supervisorctl restart zulip-workers:*
|
||||
service nginx reload
|
|
@ -1,13 +1,19 @@
|
|||
from django.conf import settings
|
||||
|
||||
from zerver.models import Realm
|
||||
import json
|
||||
import os
|
||||
shard_map = {}
|
||||
if os.path.exists("/etc/zulip/sharding.json"):
|
||||
with open("/etc/zulip/sharding.json") as f:
|
||||
shard_map = json.loads(f.read())
|
||||
|
||||
def get_tornado_port(realm: Realm) -> int:
|
||||
if settings.TORNADO_SERVER is None:
|
||||
return 9993
|
||||
if settings.TORNADO_PROCESSES == 1:
|
||||
return int(settings.TORNADO_SERVER.split(":")[-1])
|
||||
return 9993
|
||||
return shard_map.get(realm.host, 9800)
|
||||
|
||||
def get_tornado_uri(realm: Realm) -> str:
|
||||
if settings.TORNADO_PROCESSES == 1:
|
||||
|
|
Loading…
Reference in New Issue