sharding: Support Tornado sharding by regexes.

One should now be able to configure a regex by appending _regex to the
port number:

[tornado_sharding]
9802_regex = ^[l-p].*\.zulipchat\.com$

Signed-off-by: Anders Kaseorg <anders@zulip.com>
This commit is contained in:
Anders Kaseorg 2022-08-31 16:11:27 -07:00 committed by Tim Abbott
parent 7666ff603d
commit 987ab741f9
4 changed files with 41 additions and 15 deletions

View File

@ -43,7 +43,7 @@ class zulip::tornado_sharding {
# The ports of Tornado processes to run on the server; defaults to # The ports of Tornado processes to run on the server; defaults to
# 9800. # 9800.
$tornado_ports = zulipconf_keys('tornado_sharding') $tornado_ports = unique(zulipconf_keys('tornado_sharding').map |$key| { regsubst($key, /_regex$/, '') })
file { '/etc/nginx/zulip-include/tornado-upstreams': file { '/etc/nginx/zulip-include/tornado-upstreams':
require => [Package[$zulip::common::nginx], Exec['stage_updated_sharding']], require => [Package[$zulip::common::nginx], Exec['stage_updated_sharding']],

View File

@ -31,10 +31,8 @@ def write_updated_configs() -> None:
config_file = get_config_file() config_file = get_config_file()
ports = get_tornado_ports(config_file) ports = get_tornado_ports(config_file)
expected_ports = list(range(9800, max(ports) + 1)) expected_ports = list(range(9800, ports[-1] + 1))
assert ( assert ports == expected_ports, f"ports ({ports}) must be contiguous, starting with 9800"
sorted(ports) == expected_ports
), f"ports ({sorted(ports)}) must be contiguous, starting with 9800"
with open("/etc/zulip/nginx_sharding_map.conf.tmp", "w") as nginx_sharding_conf_f, open( with open("/etc/zulip/nginx_sharding_map.conf.tmp", "w") as nginx_sharding_conf_f, open(
"/etc/zulip/sharding.json.tmp", "w" "/etc/zulip/sharding.json.tmp", "w"
@ -50,26 +48,33 @@ def write_updated_configs() -> None:
nginx_sharding_conf_f.write("map $http_host $tornado_server {\n") nginx_sharding_conf_f.write("map $http_host $tornado_server {\n")
nginx_sharding_conf_f.write(" default http://tornado9800;\n") nginx_sharding_conf_f.write(" default http://tornado9800;\n")
shard_map: Dict[str, int] = {} shard_map: Dict[str, int] = {}
shard_regexes = []
external_host = subprocess.check_output( external_host = subprocess.check_output(
[os.path.join(BASE_DIR, "scripts/get-django-setting"), "EXTERNAL_HOST"], [os.path.join(BASE_DIR, "scripts/get-django-setting"), "EXTERNAL_HOST"],
text=True, text=True,
).strip() ).strip()
for port in config_file["tornado_sharding"]: for key, shards in config_file["tornado_sharding"].items():
shards = config_file["tornado_sharding"][port].strip() if key.endswith("_regex"):
port = int(key[: -len("_regex")])
if shards: shard_regexes.append((shards, port))
for shard in shards.split(" "): nginx_sharding_conf_f.write(
f" {nginx_quote('~*' + shards)} http://tornado{port};\n"
)
else:
port = int(key)
for shard in shards.split():
if "." in shard: if "." in shard:
host = shard host = shard
else: else:
host = f"{shard}.{external_host}" host = f"{shard}.{external_host}"
assert host not in shard_map, f"host {host} duplicated" assert host not in shard_map, f"host {host} duplicated"
shard_map[host] = int(port) shard_map[host] = port
nginx_sharding_conf_f.write(f" {nginx_quote(host)} http://tornado{port};\n") nginx_sharding_conf_f.write(f" {nginx_quote(host)} http://tornado{port};\n")
nginx_sharding_conf_f.write("\n") nginx_sharding_conf_f.write("\n")
nginx_sharding_conf_f.write("}\n") nginx_sharding_conf_f.write("}\n")
sharding_json_f.write(json.dumps(shard_map) + "\n") data = {"shard_map": shard_map, "shard_regexes": shard_regexes}
sharding_json_f.write(json.dumps(data) + "\n")
parser = argparse.ArgumentParser( parser = argparse.ArgumentParser(

View File

@ -593,7 +593,12 @@ def run_psql_as_postgres(
def get_tornado_ports(config_file: configparser.RawConfigParser) -> List[int]: def get_tornado_ports(config_file: configparser.RawConfigParser) -> List[int]:
ports = [] ports = []
if config_file.has_section("tornado_sharding"): if config_file.has_section("tornado_sharding"):
ports = [int(port) for port in config_file.options("tornado_sharding")] ports = sorted(
{
int(key[: -len("_regex")] if key.endswith("_regex") else key)
for key in config_file.options("tornado_sharding")
}
)
if not ports: if not ports:
ports = [9800] ports = [9800]
return ports return ports

View File

@ -1,18 +1,34 @@
import json import json
import os import os
import re
from django.conf import settings from django.conf import settings
from zerver.models import Realm from zerver.models import Realm
shard_map = {} shard_map = {}
shard_regexes = []
if os.path.exists("/etc/zulip/sharding.json"): if os.path.exists("/etc/zulip/sharding.json"):
with open("/etc/zulip/sharding.json") as f: with open("/etc/zulip/sharding.json") as f:
shard_map = json.loads(f.read()) data = json.loads(f.read())
shard_map = data.get(
"shard_map",
data, # backwards compatibility
)
shard_regexes = [
(re.compile(regex, re.I), port) for regex, port in data.get("shard_regexes", [])
]
def get_tornado_port(realm: Realm) -> int: def get_tornado_port(realm: Realm) -> int:
return shard_map.get(realm.host, settings.TORNADO_PORTS[0]) if realm.host in shard_map:
return shard_map[realm.host]
for regex, port in shard_regexes:
if regex.match(realm.host):
return port
return settings.TORNADO_PORTS[0]
def get_tornado_uri(realm: Realm) -> str: def get_tornado_uri(realm: Realm) -> str: