diff --git a/puppet/zulip/manifests/tornado_sharding.pp b/puppet/zulip/manifests/tornado_sharding.pp index 5da3c310af..ded1e6a298 100644 --- a/puppet/zulip/manifests/tornado_sharding.pp +++ b/puppet/zulip/manifests/tornado_sharding.pp @@ -43,7 +43,7 @@ class zulip::tornado_sharding { # The ports of Tornado processes to run on the server; defaults to # 9800. - $tornado_ports = zulipconf_keys('tornado_sharding') + $tornado_ports = unique(zulipconf_keys('tornado_sharding').map |$key| { regsubst($key, /_regex$/, '') }) file { '/etc/nginx/zulip-include/tornado-upstreams': require => [Package[$zulip::common::nginx], Exec['stage_updated_sharding']], diff --git a/scripts/lib/sharding.py b/scripts/lib/sharding.py index 48cb61c07b..cd5283c079 100755 --- a/scripts/lib/sharding.py +++ b/scripts/lib/sharding.py @@ -31,10 +31,8 @@ def write_updated_configs() -> None: config_file = get_config_file() ports = get_tornado_ports(config_file) - expected_ports = list(range(9800, max(ports) + 1)) - assert ( - sorted(ports) == expected_ports - ), f"ports ({sorted(ports)}) must be contiguous, starting with 9800" + expected_ports = list(range(9800, ports[-1] + 1)) + assert ports == expected_ports, f"ports ({ports}) must be contiguous, starting with 9800" with open("/etc/zulip/nginx_sharding_map.conf.tmp", "w") as nginx_sharding_conf_f, open( "/etc/zulip/sharding.json.tmp", "w" @@ -50,26 +48,33 @@ def write_updated_configs() -> None: nginx_sharding_conf_f.write("map $http_host $tornado_server {\n") nginx_sharding_conf_f.write(" default http://tornado9800;\n") shard_map: Dict[str, int] = {} + shard_regexes = [] external_host = subprocess.check_output( [os.path.join(BASE_DIR, "scripts/get-django-setting"), "EXTERNAL_HOST"], text=True, ).strip() - for port in config_file["tornado_sharding"]: - shards = config_file["tornado_sharding"][port].strip() - - if shards: - for shard in shards.split(" "): + for key, shards in config_file["tornado_sharding"].items(): + if key.endswith("_regex"): + port = int(key[: -len("_regex")]) + shard_regexes.append((shards, port)) + nginx_sharding_conf_f.write( + f" {nginx_quote('~*' + shards)} http://tornado{port};\n" + ) + else: + port = int(key) + for shard in shards.split(): if "." in shard: host = shard else: host = f"{shard}.{external_host}" assert host not in shard_map, f"host {host} duplicated" - shard_map[host] = int(port) + shard_map[host] = port nginx_sharding_conf_f.write(f" {nginx_quote(host)} http://tornado{port};\n") nginx_sharding_conf_f.write("\n") nginx_sharding_conf_f.write("}\n") - sharding_json_f.write(json.dumps(shard_map) + "\n") + data = {"shard_map": shard_map, "shard_regexes": shard_regexes} + sharding_json_f.write(json.dumps(data) + "\n") parser = argparse.ArgumentParser( diff --git a/scripts/lib/zulip_tools.py b/scripts/lib/zulip_tools.py index 02c3f500f5..0f6cf3cbff 100755 --- a/scripts/lib/zulip_tools.py +++ b/scripts/lib/zulip_tools.py @@ -593,7 +593,12 @@ def run_psql_as_postgres( def get_tornado_ports(config_file: configparser.RawConfigParser) -> List[int]: ports = [] if config_file.has_section("tornado_sharding"): - ports = [int(port) for port in config_file.options("tornado_sharding")] + ports = sorted( + { + int(key[: -len("_regex")] if key.endswith("_regex") else key) + for key in config_file.options("tornado_sharding") + } + ) if not ports: ports = [9800] return ports diff --git a/zerver/tornado/sharding.py b/zerver/tornado/sharding.py index 0cc38c823b..338a73aeaf 100644 --- a/zerver/tornado/sharding.py +++ b/zerver/tornado/sharding.py @@ -1,18 +1,34 @@ import json import os +import re from django.conf import settings from zerver.models import Realm shard_map = {} +shard_regexes = [] if os.path.exists("/etc/zulip/sharding.json"): with open("/etc/zulip/sharding.json") as f: - shard_map = json.loads(f.read()) + data = json.loads(f.read()) + shard_map = data.get( + "shard_map", + data, # backwards compatibility + ) + shard_regexes = [ + (re.compile(regex, re.I), port) for regex, port in data.get("shard_regexes", []) + ] def get_tornado_port(realm: Realm) -> int: - return shard_map.get(realm.host, settings.TORNADO_PORTS[0]) + if realm.host in shard_map: + return shard_map[realm.host] + + for regex, port in shard_regexes: + if regex.match(realm.host): + return port + + return settings.TORNADO_PORTS[0] def get_tornado_uri(realm: Realm) -> str: