From 49ad188449a559f931a74b5aedf85c2dbe8dde70 Mon Sep 17 00:00:00 2001 From: Alex Vandiver Date: Wed, 3 Nov 2021 13:43:02 -0700 Subject: [PATCH] rate_limit: Add a flag to lump all TOR exit node IPs together. TOR users are legitimate users of the system; however, that system can also be used for abuse -- specifically, by evading IP-based rate-limiting. For the purposes of IP-based rate-limiting, add a RATE_LIMIT_TOR_TOGETHER flag, defaulting to false, which lumps all requests from TOR exit nodes into the same bucket. This may allow a TOR user to deny other TOR users access to the find-my-account and new-realm endpoints, but this is a low cost for cutting off a significant potential abuse vector. If enabled, the list of TOR exit nodes is fetched from their public endpoint once per hour, via a cron job, and cached on disk. Django processes load this data from disk, and cache it in memcached. Requests are spared from the burden of checking disk on failure via a circuitbreaker, which trips of there are two failures in a row, and only begins trying again after 10 minutes. --- .../zulip/files/cron.d/fetch-tor-exit-nodes | 5 + puppet/zulip/manifests/app_frontend_base.pp | 9 ++ pyproject.toml | 1 + requirements/common.in | 3 + requirements/dev.txt | 3 + requirements/prod.txt | 3 + version.py | 2 +- zerver/decorator.py | 47 ++++++- .../commands/fetch_tor_exit_nodes.py | 74 +++++++++++ zerver/tests/test_external.py | 123 +++++++++++++++++- zproject/computed_settings.py | 6 + zproject/default_settings.py | 1 + zproject/prod_settings_template.py | 4 + 13 files changed, 278 insertions(+), 3 deletions(-) create mode 100644 puppet/zulip/files/cron.d/fetch-tor-exit-nodes create mode 100644 zerver/management/commands/fetch_tor_exit_nodes.py diff --git a/puppet/zulip/files/cron.d/fetch-tor-exit-nodes b/puppet/zulip/files/cron.d/fetch-tor-exit-nodes new file mode 100644 index 0000000000..6b78d98366 --- /dev/null +++ b/puppet/zulip/files/cron.d/fetch-tor-exit-nodes @@ -0,0 +1,5 @@ +SHELL=/bin/bash +PATH=/usr/local/sbin:/usr/local/bin:/sbin:/bin:/usr/sbin:/usr/bin +USER=zulip + +0 * * * * zulip /home/zulip/deployments/current/manage.py fetch_tor_exit_nodes diff --git a/puppet/zulip/manifests/app_frontend_base.pp b/puppet/zulip/manifests/app_frontend_base.pp index 86fc22d2ab..204015d167 100644 --- a/puppet/zulip/manifests/app_frontend_base.pp +++ b/puppet/zulip/manifests/app_frontend_base.pp @@ -199,4 +199,13 @@ class zulip::app_frontend_base { mode => '0755', source => 'puppet:///modules/zulip/nagios_plugins/zulip_app_frontend', } + + # This cron job does nothing unless RATE_LIMIT_TOR_TOGETHER is enabled. + file { '/etc/cron.d/fetch-for-exit-nodes': + ensure => file, + owner => 'root', + group => 'root', + mode => '0644', + source => 'puppet:///modules/zulip/cron.d/fetch-tor-exit-nodes', + } } diff --git a/pyproject.toml b/pyproject.toml index 7a5202657f..f6d5611bee 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -54,6 +54,7 @@ module = [ "bs4.*", "bson.*", "cairosvg.*", + "circuitbreaker.*", "coverage.*", "cssutils.*", "defusedxml.*", diff --git a/requirements/common.in b/requirements/common.in index 0d28552d3c..7f560a4654 100644 --- a/requirements/common.in +++ b/requirements/common.in @@ -194,3 +194,6 @@ django-scim2 # CSS manipulation soupsieve + +# Circuit-breaking for outgoing services +circuitbreaker diff --git a/requirements/dev.txt b/requirements/dev.txt index c53e6276ad..725b122239 100644 --- a/requirements/dev.txt +++ b/requirements/dev.txt @@ -214,6 +214,9 @@ charset-normalizer==2.0.7 \ --hash=sha256:e019de665e2bcf9c2b64e2e5aa025fa991da8720daa3c1138cadd2fd1856aed0 \ --hash=sha256:f7af805c321bfa1ce6714c51f254e0d5bb5e5834039bc17db7ebe3a4cec9492b # via requests +circuitbreaker==1.3.2 \ + --hash=sha256:747d4ced5c0797e2ab1d3e00a03b312db23e7ec65106148fc63beec25bbba50f + # via -r requirements/common.in click==8.0.3 \ --hash=sha256:353f466495adaeb40b6b5f592f9f91cb22372351c84caeb068132442a4518ef3 \ --hash=sha256:410e932b050f5eed773c4cda94de75971c89cdb3155a72a0831139a79e5ecb5b diff --git a/requirements/prod.txt b/requirements/prod.txt index 13a2867aab..061fd4e3ff 100644 --- a/requirements/prod.txt +++ b/requirements/prod.txt @@ -161,6 +161,9 @@ charset-normalizer==2.0.7 \ --hash=sha256:e019de665e2bcf9c2b64e2e5aa025fa991da8720daa3c1138cadd2fd1856aed0 \ --hash=sha256:f7af805c321bfa1ce6714c51f254e0d5bb5e5834039bc17db7ebe3a4cec9492b # via requests +circuitbreaker==1.3.2 \ + --hash=sha256:747d4ced5c0797e2ab1d3e00a03b312db23e7ec65106148fc63beec25bbba50f + # via -r requirements/common.in click==8.0.3 \ --hash=sha256:353f466495adaeb40b6b5f592f9f91cb22372351c84caeb068132442a4518ef3 \ --hash=sha256:410e932b050f5eed773c4cda94de75971c89cdb3155a72a0831139a79e5ecb5b diff --git a/version.py b/version.py index 438559289e..57ec384a5c 100644 --- a/version.py +++ b/version.py @@ -48,4 +48,4 @@ API_FEATURE_LEVEL = 107 # historical commits sharing the same major version, in which case a # minor version bump suffices. -PROVISION_VERSION = "165.2" +PROVISION_VERSION = "165.3" diff --git a/zerver/decorator.py b/zerver/decorator.py index e2a49b8f6b..5c744bf7c5 100644 --- a/zerver/decorator.py +++ b/zerver/decorator.py @@ -4,9 +4,11 @@ import logging import urllib from functools import wraps from io import BytesIO -from typing import Callable, Dict, Optional, Sequence, Tuple, TypeVar, Union, cast, overload +from typing import Callable, Dict, Optional, Sequence, Set, Tuple, TypeVar, Union, cast, overload import django_otp +import orjson +from circuitbreaker import CircuitBreakerError, circuit from django.conf import settings from django.contrib.auth import REDIRECT_FIELD_NAME from django.contrib.auth import login as django_login @@ -23,6 +25,7 @@ from django.views.decorators.csrf import csrf_exempt from django_otp import user_has_device from two_factor.utils import default_device +from zerver.lib.cache import cache_with_key from zerver.lib.exceptions import ( AccessDeniedError, ErrorCode, @@ -884,6 +887,30 @@ def rate_limit_user(request: HttpRequest, user: UserProfile, domain: str) -> Non RateLimitedUser(user, domain=domain).rate_limit_request(request) +@cache_with_key(lambda: "tor_ip_addresses:", timeout=60 * 60) +@circuit(failure_threshold=2, recovery_timeout=60 * 10) +def get_tor_ips() -> Set[str]: + if not settings.RATE_LIMIT_TOR_TOGETHER: + return set() + + # Cron job in /etc/cron.d/fetch-for-exit-nodes fetches this + # hourly; we cache it in memcached to prevent going to disk on + # every unauth'd request. In case of failures to read, we + # circuit-break so 2 failures cause a 10-minute backoff. + + with open(settings.TOR_EXIT_NODE_FILE_PATH, "rb") as f: + exit_node_list = orjson.loads(f.read()) + + # This should always be non-empty; if it's empty, assume something + # went wrong with writing and treat it as a non-existent file. + # Circuit-breaking will ensure that we back off on re-reading the + # file. + if len(exit_node_list) == 0: + raise IOError("File is empty") + + return set(exit_node_list) + + def rate_limit_ip(request: HttpRequest, ip_addr: str, domain: str) -> None: RateLimitedIPAddr(ip_addr, domain=domain).rate_limit_request(request) @@ -894,6 +921,24 @@ def rate_limit_request_by_ip(request: HttpRequest, domain: str) -> None: # IP address to use - without worrying we'll grab the IP of a proxy. ip_addr = request.META["REMOTE_ADDR"] assert ip_addr + + try: + # We lump all TOR exit nodes into one bucket; this prevents + # abuse from TOR, while still allowing some access to these + # endpoints for legitimate users. Checking for local + # addresses is a shortcut somewhat for ease of testing without + # mocking the TOR endpoint in every test. + if is_local_addr(ip_addr): + pass + elif ip_addr in get_tor_ips(): + ip_addr = "tor-exit-node" + except (IOError, CircuitBreakerError) as err: + # In the event that we can't get an updated list of TOR exit + # nodes, assume the IP is _not_ one, and leave it unchanged. + # We log a warning so that this endpoint being taken out of + # service doesn't silently remove this functionality. + rate_limiter_logger.warning("Failed to fetch TOR exit node list: %s", err) + pass rate_limit_ip(request, ip_addr, domain=domain) diff --git a/zerver/management/commands/fetch_tor_exit_nodes.py b/zerver/management/commands/fetch_tor_exit_nodes.py new file mode 100644 index 0000000000..99c630b5fe --- /dev/null +++ b/zerver/management/commands/fetch_tor_exit_nodes.py @@ -0,0 +1,74 @@ +import os +from argparse import ArgumentParser +from typing import Any, Set + +import orjson +from django.conf import settings +from requests.packages.urllib3.util.retry import Retry + +from zerver.lib.management import ZulipBaseCommand +from zerver.lib.outgoing_http import OutgoingSession + + +class TorDataSession(OutgoingSession): + def __init__(self, max_retries: int) -> None: + Retry.BACKOFF_MAX = 64 + retry = Retry( + total=max_retries, + backoff_factor=2.0, + status_forcelist={ # Retry on these + 429, # The formal rate-limiting response code + 500, # Server error + 502, # Bad gateway + 503, # Service unavailable + }, + ) + super().__init__(role="tor_data", timeout=3, max_retries=retry) + + +class Command(ZulipBaseCommand): + help = """Fetch the list of TOR exit nodes, and write the list of IP addresses +to a file for access from Django for rate-limiting purposes. + +Does nothing unless RATE_LIMIT_TOR_TOGETHER is enabled. +""" + + def add_arguments(self, parser: ArgumentParser) -> None: + parser.add_argument( + "--max-retries", + type=int, + default=10, + help="Number of times to retry fetching data from TOR", + ) + + def handle(*args: Any, **options: Any) -> None: + if not settings.RATE_LIMIT_TOR_TOGETHER: + return + + certificates = os.environ.get("CUSTOM_CA_CERTIFICATES") + session = TorDataSession(max_retries=options["max_retries"]) + response = session.get( + "https://check.torproject.org/exit-addresses", + verify=certificates, + ) + response.raise_for_status() + + # Format: + # ExitNode 4273E6D162ED2717A1CF4207A254004CD3F5307B + # Published 2021-11-02 11:01:07 + # LastStatus 2021-11-02 23:00:00 + # ExitAddress 176.10.99.200 2021-11-02 23:17:02 + exit_nodes: Set[str] = set() + for line in response.text.splitlines(): + if line.startswith("ExitAddress "): + exit_nodes.add(line.split()[1]) + + # Write to a tmpfile to ensure we can't read a partially-written file + with open(settings.TOR_EXIT_NODE_FILE_PATH + ".tmp", "wb") as f: + f.write(orjson.dumps(list(exit_nodes))) + + # Do an atomic rename into place + os.rename( + settings.TOR_EXIT_NODE_FILE_PATH + ".tmp", + settings.TOR_EXIT_NODE_FILE_PATH, + ) diff --git a/zerver/tests/test_external.py b/zerver/tests/test_external.py index 286d097f2e..86a026ca38 100644 --- a/zerver/tests/test_external.py +++ b/zerver/tests/test_external.py @@ -1,15 +1,20 @@ import time from contextlib import contextmanager -from typing import Any, Callable, Iterator +from typing import IO, Any, Callable, Iterator, Optional, Sequence from unittest import mock, skipUnless import DNS +import orjson +from circuitbreaker import CircuitBreakerMonitor from django.conf import settings from django.core.exceptions import ValidationError from django.http import HttpResponse +from django.test import override_settings from django.utils.timezone import now as timezone_now +from zerver import decorator from zerver.forms import email_is_not_mit_mailing_list +from zerver.lib.cache import cache_delete from zerver.lib.rate_limiter import ( RateLimitedIPAddr, RateLimitedUser, @@ -287,6 +292,122 @@ class RateLimitTests(ZulipTestCase): self.do_test_hit_ratelimits(alternate_requests, is_json=False) + @contextmanager + def tor_mock( + self, + side_effect: Optional[Exception] = None, + read_data: Sequence[str] = ["1.2.3.4", "5.6.7.8"], + ) -> Iterator[mock.Mock]: + # We need to reset the circuitbreaker before starting. We + # patch the .opened property to be false, then call the + # function, so it resets to closed. + with mock.patch("builtins.open", mock.mock_open(read_data=orjson.dumps(["1.2.3.4"]))): + with mock.patch( + "circuitbreaker.CircuitBreaker.opened", new_callable=mock.PropertyMock + ) as mock_opened: + mock_opened.return_value = False + decorator.get_tor_ips() + + # Having closed it, it's now cached. Clear the cache. + assert CircuitBreakerMonitor.get("get_tor_ips").closed + cache_delete("tor_ip_addresses:") + + builtin_open = open + if side_effect: + tor_open = mock.MagicMock(side_effect=side_effect) + else: + tor_open = mock.mock_open(read_data=orjson.dumps(read_data)) + + def selective_mock_open(*args: Any, **kwargs: Any) -> IO[Any]: + if args[0] == settings.TOR_EXIT_NODE_FILE_PATH: + return tor_open(*args, **kwargs) + return builtin_open(*args, **kwargs) + + with mock.patch("builtins.open", selective_mock_open): + yield tor_open + + @rate_limit_rule(1, 5, domain="api_by_ip") + @override_settings(RATE_LIMIT_TOR_TOGETHER=True) + def test_tor_ip_limits(self) -> None: + request_count = 0 + for ip in ["1.2.3.4", "5.6.7.8", "tor-exit-node"]: + RateLimitedIPAddr(ip, domain="api_by_ip").clear_history() + + def alternate_requests() -> HttpResponse: + nonlocal request_count + request_count += 1 + if request_count % 2 == 1: + return self.send_unauthed_api_request(REMOTE_ADDR="1.2.3.4") + else: + return self.send_unauthed_api_request(REMOTE_ADDR="5.6.7.8") + + with self.tor_mock(read_data=["1.2.3.4", "5.6.7.8"]) as tor_open: + self.do_test_hit_ratelimits(alternate_requests) + + # This is only read once, despite being used on each request + tor_open.assert_called_once_with(settings.TOR_EXIT_NODE_FILE_PATH, "rb") + tor_open().read.assert_called_once() + + @rate_limit_rule(1, 5, domain="api_by_ip") + @override_settings(RATE_LIMIT_TOR_TOGETHER=True) + def test_tor_file_empty(self) -> None: + for ip in ["1.2.3.4", "5.6.7.8", "tor-exit-node"]: + RateLimitedIPAddr(ip, domain="api_by_ip").clear_history() + + # An empty list of IPs is treated as some error in parsing the + # input, and as such should not be cached; rate-limiting + # should work as normal, per-IP + with self.tor_mock(read_data=[]) as tor_open: + with self.assertLogs("zerver.lib.rate_limiter", level="WARNING"): + self.do_test_hit_ratelimits( + lambda: self.send_unauthed_api_request(REMOTE_ADDR="1.2.3.4") + ) + resp = self.send_unauthed_api_request(REMOTE_ADDR="5.6.7.8") + self.assertNotEqual(resp.status_code, 429) + + # Was not cached, so tried to read twice before hitting the + # circuit-breaker, and stopping trying + tor_open().read.assert_has_calls([mock.call(), mock.call()]) + + @rate_limit_rule(1, 5, domain="api_by_ip") + @override_settings(RATE_LIMIT_TOR_TOGETHER=True) + def test_tor_file_not_found(self) -> None: + for ip in ["1.2.3.4", "5.6.7.8", "tor-exit-node"]: + RateLimitedIPAddr(ip, domain="api_by_ip").clear_history() + + with self.tor_mock(side_effect=FileNotFoundError("File not found")) as tor_open: + # If we cannot get a list of TOR exit nodes, then + # rate-limiting works as normal, per-IP + with self.assertLogs("zerver.lib.rate_limiter", level="WARNING") as log_mock: + self.do_test_hit_ratelimits( + lambda: self.send_unauthed_api_request(REMOTE_ADDR="1.2.3.4") + ) + resp = self.send_unauthed_api_request(REMOTE_ADDR="5.6.7.8") + self.assertNotEqual(resp.status_code, 429) + + # Tries twice before hitting the circuit-breaker, and stopping trying + tor_open.assert_has_calls( + [ + mock.call(settings.TOR_EXIT_NODE_FILE_PATH, "rb"), + mock.call(settings.TOR_EXIT_NODE_FILE_PATH, "rb"), + ] + ) + + self.assert_length(log_mock.output, 8) + self.assertEqual( + log_mock.output[0:2], + [ + "WARNING:zerver.lib.rate_limiter:Failed to fetch TOR exit node list: {}".format( + "File not found" + ) + ] + * 2, + ) + self.assertIn( + 'Failed to fetch TOR exit node list: Circuit "get_tor_ips" OPEN', + log_mock.output[3], + ) + @skipUnless(settings.ZILENCER_ENABLED, "requires zilencer") @rate_limit_rule(1, 5, domain="api_by_remote_server") def test_hit_ratelimits_as_remote_server(self) -> None: diff --git a/zproject/computed_settings.py b/zproject/computed_settings.py index ae1ce0e191..0e7beb85d7 100644 --- a/zproject/computed_settings.py +++ b/zproject/computed_settings.py @@ -416,6 +416,12 @@ RATE_LIMITING_MIRROR_REALM_RULES = [ DEBUG_RATE_LIMITING = DEBUG REDIS_PASSWORD = get_secret("redis_password") +# See RATE_LIMIT_TOR_TOGETHER +if DEVELOPMENT: + TOR_EXIT_NODE_FILE_PATH = os.path.join(DEPLOY_ROOT, "var/tor-exit-nodes.json") +else: + TOR_EXIT_NODE_FILE_PATH = "/var/lib/zulip/tor-exit-nodes.json" + ######################################################################## # SECURITY SETTINGS ######################################################################## diff --git a/zproject/default_settings.py b/zproject/default_settings.py index e535c2ffcc..924d56a4fb 100644 --- a/zproject/default_settings.py +++ b/zproject/default_settings.py @@ -195,6 +195,7 @@ SUBMIT_USAGE_STATISTICS = True PROMOTE_SPONSORING_ZULIP = True RATE_LIMITING = True RATE_LIMITING_AUTHENTICATE = True +RATE_LIMIT_TOR_TOGETHER = False SEND_LOGIN_EMAILS = True EMBEDDED_BOTS_ENABLED = False diff --git a/zproject/prod_settings_template.py b/zproject/prod_settings_template.py index 462c45c8b3..0e422f10e2 100644 --- a/zproject/prod_settings_template.py +++ b/zproject/prod_settings_template.py @@ -756,6 +756,10 @@ CAMO_URI = "/external_content/" ## Controls whether Zulip will rate-limit user requests. # RATE_LIMITING = True +## Fetch TOR exit node list every hour, and group all TOR exit nodes +## together into one bucket when applying rate-limiting. +# RATE_LIMIT_TOR_TOGETHER = False + ## If you want to set a Terms of Service for your server, set the path ## to your Markdown file, and uncomment the following line. # TERMS_OF_SERVICE = '/etc/zulip/terms.md'