diff --git a/mypy.ini b/mypy.ini index 104a3f4410..b6d122e219 100644 --- a/mypy.ini +++ b/mypy.ini @@ -76,9 +76,6 @@ strict_optional = False [mypy-zerver/management/commands/purge_queue] #24: error: Item "None" of "Optional[Any]" has no attribute "queue_purge" strict_optional = False -[mypy-zerver.tornado.handlers] # Delayed setup of ASyncDjangoHandler._request_middleware (Optional), line 200 error -strict_optional = False - # Tests (may be many issues in file; comment is just one error noted) [mypy-zerver/tests/test_tornado] #202: error: Item "None" of "Optional[Morsel[Any]]" has no attribute "coded_value" diff --git a/zerver/decorator.py b/zerver/decorator.py index 693a417aef..01833ba226 100644 --- a/zerver/decorator.py +++ b/zerver/decorator.py @@ -60,30 +60,6 @@ webhook_unexpected_events_logger = logging.getLogger("zulip.zerver.lib.webhooks. log_to_file(webhook_unexpected_events_logger, settings.WEBHOOK_UNEXPECTED_EVENTS_LOG_PATH) -class _RespondAsynchronously: - pass - -# Return RespondAsynchronously from an @asynchronous view if the -# response will be provided later by calling handler.zulip_finish(), -# or has already been provided this way. We use this for longpolling -# mode. -RespondAsynchronously = _RespondAsynchronously() - -AsyncWrapperT = Callable[..., Union[HttpResponse, _RespondAsynchronously]] -def asynchronous(method: Callable[..., Union[HttpResponse, _RespondAsynchronously]]) -> AsyncWrapperT: - # TODO: this should be the correct annotation when mypy gets fixed: type: - # (Callable[[HttpRequest, base.BaseHandler, Sequence[Any], Dict[str, Any]], - # Union[HttpResponse, _RespondAsynchronously]]) -> - # Callable[[HttpRequest, Sequence[Any], Dict[str, Any]], Union[HttpResponse, _RespondAsynchronously]] - # TODO: see https://github.com/python/mypy/issues/1655 - @wraps(method) - def wrapper(request: HttpRequest, *args: Any, - **kwargs: Any) -> Union[HttpResponse, _RespondAsynchronously]: - return method(request, handler=request._tornado_handler, *args, **kwargs) - if getattr(method, 'csrf_exempt', False): # nocoverage # Our one @asynchronous route requires CSRF - wrapper.csrf_exempt = True # type: ignore # https://github.com/JukkaL/mypy/issues/1170 - return wrapper - def cachify(method: Callable[..., ReturnT]) -> Callable[..., ReturnT]: dct = {} # type: Dict[Tuple[Any, ...], ReturnT] diff --git a/zerver/lib/rest.py b/zerver/lib/rest.py index 0dd4085881..e34195d027 100644 --- a/zerver/lib/rest.py +++ b/zerver/lib/rest.py @@ -6,7 +6,7 @@ from django.utils.cache import add_never_cache_headers from django.views.decorators.csrf import csrf_exempt, csrf_protect from zerver.decorator import authenticated_json_view, authenticated_rest_api_view, \ - process_as_post, authenticated_uploads_api_view, RespondAsynchronously, \ + process_as_post, authenticated_uploads_api_view, \ ReturnT from zerver.lib.response import json_method_not_allowed, json_unauthorized from django.http import HttpRequest, HttpResponse, HttpResponseRedirect @@ -21,18 +21,11 @@ def default_never_cache_responses( decorator that adds headers to a response so that it will never be cached, unless the view code has already set a Cache-Control header. - - We also need to patch this because our Django+Tornado - RespondAsynchronously hack involves returning a value that isn't a - Django response object, on which add_never_cache_headers would - crash. This only occurs in a case where client-side caching - wouldn't be possible anyway (we aren't returning a response to the - client yet -- it's for longpolling). """ @wraps(view_func) def _wrapped_view_func(request: HttpRequest, *args: Any, **kwargs: Any) -> ReturnT: response = view_func(request, *args, **kwargs) - if response is RespondAsynchronously or response.has_header("Cache-Control"): + if response.has_header("Cache-Control"): return response add_never_cache_headers(response) @@ -66,6 +59,11 @@ def rest_dispatch(request: HttpRequest, **kwargs: Any) -> HttpResponse: """ supported_methods = {} # type: Dict[str, Any] + if hasattr(request, "saved_response"): + # For completing long-polled Tornado requests, we skip the + # view function logic and just return the response. + return request.saved_response + # duplicate kwargs so we can mutate the original as we go for arg in list(kwargs): if arg in METHODS: diff --git a/zerver/middleware.py b/zerver/middleware.py index f23b54b7e3..d3b8640d40 100644 --- a/zerver/middleware.py +++ b/zerver/middleware.py @@ -240,11 +240,27 @@ class LogRequests(MiddlewareMixin): # method here too def process_request(self, request: HttpRequest) -> None: maybe_tracemalloc_listen() + + if hasattr(request, "_log_data"): + # Sanity check to ensure this is being called from the + # Tornado code path that returns responses asynchronously. + assert getattr(request, "saved_response", False) + + # Avoid re-initializing request._log_data if it's already there. + return + request._log_data = dict() record_request_start_data(request._log_data) def process_view(self, request: HttpRequest, view_func: ViewFuncT, args: List[str], kwargs: Dict[str, Any]) -> None: + if hasattr(request, "saved_response"): + # The below logging adjustments are unnecessary (because + # we've already imported everything) and incorrect + # (because they'll overwrite data from pre-long-poll + # request processing) when returning a saved response. + return + # process_request was already run; we save the initialization # time (i.e. the time between receiving the request and # figuring out which view function to call, which is primarily @@ -256,6 +272,12 @@ class LogRequests(MiddlewareMixin): def process_response(self, request: HttpRequest, response: StreamingHttpResponse) -> StreamingHttpResponse: + if getattr(response, "asynchronous", False): + # This special Tornado "asynchronous" response is + # discarded after going through this code path as Tornado + # intends to block, so we stop here to avoid unnecessary work. + return response + # The reverse proxy might have sent us the real external IP remote_ip = request.META.get('HTTP_X_REAL_IP') if remote_ip is None: @@ -371,6 +393,12 @@ class FlushDisplayRecipientCache(MiddlewareMixin): class SessionHostDomainMiddleware(SessionMiddleware): def process_response(self, request: HttpRequest, response: HttpResponse) -> HttpResponse: + if getattr(response, "asynchronous", False): + # This special Tornado "asynchronous" response is + # discarded after going through this code path as Tornado + # intends to block, so we stop here to avoid unnecessary work. + return response + try: request.get_host() except DisallowedHost: diff --git a/zerver/tornado/handlers.py b/zerver/tornado/handlers.py index 12656fe7a8..694d4e83cb 100644 --- a/zerver/tornado/handlers.py +++ b/zerver/tornado/handlers.py @@ -1,32 +1,26 @@ import logging -import sys import urllib -from threading import Lock -from typing import Any, Callable, Dict, List, Optional +from typing import Any, Dict, List import tornado.web from django import http -from django.conf import settings -from django.core import exceptions, signals -from django.urls import resolvers -from django.core.exceptions import MiddlewareNotUsed +from django.core import signals from django.core.handlers import base -from django.core.handlers.exception import convert_exception_to_response from django.core.handlers.wsgi import WSGIRequest, get_script_name -from django.urls import set_script_prefix, set_urlconf +from django.urls import set_script_prefix from django.http import HttpRequest, HttpResponse -from django.utils.module_loading import import_string from tornado.wsgi import WSGIContainer -from zerver.decorator import RespondAsynchronously from zerver.lib.response import json_response -from zerver.lib.types import ViewFuncT from zerver.middleware import async_request_timer_restart, async_request_timer_stop from zerver.tornado.descriptors import get_descriptor_by_handler_id current_handler_id = 0 handlers = {} # type: Dict[int, 'AsyncDjangoHandler'] +# Copied from django.core.handlers.base +logger = logging.getLogger('django.request') + def get_handler_by_id(handler_id: int) -> 'AsyncDjangoHandler': return handlers[handler_id] @@ -72,23 +66,16 @@ def finish_handler(handler_id: int, event_queue_id: str, logging.exception(err_msg) -# Modified version of the base Tornado handler for Django -# We mark this for nocoverage, since we only change 1 line of actual code. -class AsyncDjangoHandlerBase(tornado.web.RequestHandler, base.BaseHandler): # nocoverage - initLock = Lock() - +class AsyncDjangoHandler(tornado.web.RequestHandler, base.BaseHandler): def __init__(self, *args: Any, **kwargs: Any) -> None: super().__init__(*args, **kwargs) - # Set up middleware if needed. We couldn't do this earlier, because - # settings weren't available. - self._request_middleware = None # type: Optional[List[Callable[[HttpRequest], HttpResponse]]] - self.initLock.acquire() - # Check that middleware is still uninitialised. - if self._request_middleware is None: - self.load_middleware() - self.initLock.release() + # Copied from the django.core.handlers.wsgi __init__() method. + self.load_middleware() + + # Prevent Tornado from automatically finishing the request self._auto_finish = False + # Handler IDs are allocated here, and the handler ID map must # be cleared when the handler finishes its response allocate_handler_id(self) @@ -97,52 +84,6 @@ class AsyncDjangoHandlerBase(tornado.web.RequestHandler, base.BaseHandler): # n descriptor = get_descriptor_by_handler_id(self.handler_id) return "AsyncDjangoHandler<%s, %s>" % (self.handler_id, descriptor) - def load_middleware(self) -> None: - """ - Populate middleware lists from settings.MIDDLEWARE. This is copied - from Django. This uses settings.MIDDLEWARE setting with the old - business logic. The middleware architecture is not compatible - with our asynchronous handlers. The problem occurs when we return - None from our handler. The Django middlewares throw exception - because they can't handler None, so we can either upgrade the Django - middlewares or just override this method to use the new setting with - the old logic. The added advantage is that due to this our event - system code doesn't change. - """ - self._request_middleware = [] - self._view_middleware = [] # type: List[Callable[[HttpRequest, ViewFuncT, List[str], Dict[str, Any]], Optional[HttpResponse]]] - self._template_response_middleware = [] # type: List[Callable[[HttpRequest, HttpResponse], HttpResponse]] - self._response_middleware = [] # type: List[Callable[[HttpRequest, HttpResponse], HttpResponse]] - self._exception_middleware = [] # type: List[Callable[[HttpRequest, Exception], Optional[HttpResponse]]] - - handler = convert_exception_to_response(self._legacy_get_response) - for middleware_path in settings.MIDDLEWARE: - mw_class = import_string(middleware_path) - try: - mw_instance = mw_class() - except MiddlewareNotUsed as exc: - if settings.DEBUG: - if str(exc): - base.logger.debug('MiddlewareNotUsed(%r): %s', middleware_path, exc) - else: - base.logger.debug('MiddlewareNotUsed: %r', middleware_path) - continue - - if hasattr(mw_instance, 'process_request'): - self._request_middleware.append(mw_instance.process_request) - if hasattr(mw_instance, 'process_view'): - self._view_middleware.append(mw_instance.process_view) - if hasattr(mw_instance, 'process_template_response'): - self._template_response_middleware.insert(0, mw_instance.process_template_response) - if hasattr(mw_instance, 'process_response'): - self._response_middleware.insert(0, mw_instance.process_response) - if hasattr(mw_instance, 'process_exception'): - self._exception_middleware.insert(0, mw_instance.process_exception) - - # We only assign to this when initialization is complete as it is used - # as a flag for initialization being complete. - self._middleware_chain = handler - def convert_tornado_request_to_django_request(self) -> HttpRequest: # This takes the WSGI environment that Tornado received (which # fully describes the HTTP request that was sent to Tornado) @@ -196,11 +137,30 @@ class AsyncDjangoHandlerBase(tornado.web.RequestHandler, base.BaseHandler): # n try: response = self.get_response(request) - if not response: + if hasattr(response, "asynchronous"): + # For asynchronous requests, this is where we exit + # without returning the HttpResponse that Django + # generated back to the user in order to long-poll the + # connection. We save some timers here in order to + # support accurate accounting of the total resources + # consumed by the request when it eventually returns a + # response and is logged. + async_request_timer_stop(request) return finally: + # Tell Django that we're done processing this request on + # the Django side; this triggers cleanup work like + # resetting the urlconf and any cache/database + # connections. signals.request_finished.send(sender=self.__class__) + # For normal/synchronous requests that don't end up + # long-polling, we fall through to here and just need to write + # the HTTP response that Django prepared for us via Tornado. + + # Mark this handler ID as finished for Zulip's own tracking. + clear_handler_by_id(self.handler_id) + self.write_django_response_as_tornado_response(response) def head(self, *args: Any, **kwargs: Any) -> None: @@ -213,157 +173,22 @@ class AsyncDjangoHandlerBase(tornado.web.RequestHandler, base.BaseHandler): # n self.get(*args, **kwargs) def on_connection_close(self) -> None: + # Register a Tornado handler that runs when client-side + # connections are closed to notify the events system. + # + # TODO: Theoretically, this code should run when you Ctrl-C + # curl to cause it to break a `GET /events` connection, but + # that seems to no longer run this code. Investigate what's up. client_descriptor = get_descriptor_by_handler_id(self.handler_id) if client_descriptor is not None: client_descriptor.disconnect_handler(client_closed=True) - # Based on django.core.handlers.base: get_response - def get_response(self, request: HttpRequest) -> HttpResponse: - "Returns an HttpResponse object for the given HttpRequest" - try: - try: - # Setup default url resolver for this thread. - urlconf = settings.ROOT_URLCONF - set_urlconf(urlconf) - resolver = resolvers.RegexURLResolver(r'^/', urlconf) - - response = None - - # Apply request middleware - for middleware_method in self._request_middleware: - response = middleware_method(request) - if response: - break - - if hasattr(request, "urlconf"): - # Reset url resolver with a custom urlconf. - urlconf = request.urlconf - set_urlconf(urlconf) - resolver = resolvers.RegexURLResolver(r'^/', urlconf) - - ### ADDED BY ZULIP - request._resolver = resolver - ### END ADDED BY ZULIP - - callback, callback_args, callback_kwargs = resolver.resolve( - request.path_info) - - # Apply view middleware - if response is None: - for view_middleware_method in self._view_middleware: - response = view_middleware_method(request, callback, - callback_args, callback_kwargs) - if response: - break - - ### THIS BLOCK MODIFIED BY ZULIP - if response is None: - try: - response = callback(request, *callback_args, **callback_kwargs) - if response is RespondAsynchronously: - async_request_timer_stop(request) - return None - clear_handler_by_id(self.handler_id) - except Exception as e: - clear_handler_by_id(self.handler_id) - # If the view raised an exception, run it through exception - # middleware, and if the exception middleware returns a - # response, use that. Otherwise, reraise the exception. - for exception_middleware_method in self._exception_middleware: - response = exception_middleware_method(request, e) - if response: - break - if response is None: - raise - - if response is None: - try: - view_name = callback.__name__ - except AttributeError: - view_name = callback.__class__.__name__ + '.__call__' - raise ValueError("The view %s.%s returned None." % - (callback.__module__, view_name)) - - # If the response supports deferred rendering, apply template - # response middleware and the render the response - if hasattr(response, 'render') and callable(response.render): - for template_middleware_method in self._template_response_middleware: - response = template_middleware_method(request, response) - response = response.render() - - except http.Http404 as e: - if settings.DEBUG: - from django.views import debug - response = debug.technical_404_response(request, e) - else: - try: - callback, param_dict = resolver.resolve404() - response = callback(request, **param_dict) - except Exception: - try: - response = self.handle_uncaught_exception(request, resolver, - sys.exc_info()) - finally: - signals.got_request_exception.send(sender=self.__class__, - request=request) - except exceptions.PermissionDenied: - logging.warning( - 'Forbidden (Permission denied): %s', request.path, - extra={ - 'status_code': 403, - 'request': request - }) - try: - callback, param_dict = resolver.resolve403() - response = callback(request, **param_dict) - except Exception: - try: - response = self.handle_uncaught_exception(request, - resolver, sys.exc_info()) - finally: - signals.got_request_exception.send( - sender=self.__class__, request=request) - except SystemExit: - # See https://code.djangoproject.com/ticket/4701 - raise - except Exception: - exc_info = sys.exc_info() - signals.got_request_exception.send(sender=self.__class__, request=request) - return self.handle_uncaught_exception(request, resolver, exc_info) - finally: - # Reset urlconf on the way out for isolation - set_urlconf(None) - - ### ZULIP CHANGE: The remainder of this function was moved - ### into its own function, just below, so we can call it from - ### finish(). - response = self.apply_response_middleware(request, response, resolver) - - return response - - ### Copied from get_response (above in this file) - def apply_response_middleware(self, request: HttpRequest, response: HttpResponse, - resolver: resolvers.RegexURLResolver) -> HttpResponse: - try: - # Apply response middleware, regardless of the response - for middleware_method in self._response_middleware: - response = middleware_method(request, response) - if hasattr(self, 'apply_response_fixes'): - response = self.apply_response_fixes(request, response) - except Exception: # Any exception should be gathered and handled - signals.got_request_exception.send(sender=self.__class__, request=request) - response = self.handle_uncaught_exception(request, resolver, sys.exc_info()) - - return response - -class AsyncDjangoHandler(AsyncDjangoHandlerBase): - def zulip_finish(self, result_dict: Dict[str, Any], request: HttpRequest, + def zulip_finish(self, result_dict: Dict[str, Any], old_request: HttpRequest, apply_markdown: bool) -> None: - # Make sure that Markdown rendering really happened, if requested. - # This is a security issue because it's where we escape HTML. - # c.f. ticket #64 - # - # apply_markdown=True is the fail-safe default. + # Function called when we want to break a long-polled + # get_events request and return a response to the client. + + # Marshall the response data from result_dict. if result_dict['result'] == 'success' and 'messages' in result_dict and apply_markdown: for msg in result_dict['messages']: if msg['content_type'] != 'text/html': @@ -372,14 +197,47 @@ class AsyncDjangoHandler(AsyncDjangoHandlerBase): if result_dict['result'] == 'error': self.set_status(400) - # Call the Django response middleware on our object so that - # e.g. our own logging code can run; but don't actually use - # the headers from that since sending those to Tornado seems - # tricky; instead just send the (already json-rendered) - # content on to Tornado - django_response = json_response(res_type=result_dict['result'], - data=result_dict, status=self.get_status()) - django_response = self.apply_response_middleware(request, django_response, - request._resolver) + # The `result` dictionary contains the data we want to return + # to the client. We want to do so in a proper Tornado HTTP + # response after running the Django response middleware (which + # does things like log the request, add rate-limit headers, + # etc.). The Django middleware API expects to receive a fresh + # HttpRequest object, and so to minimize hacks, our strategy + # is to create a duplicate Django HttpRequest object, tagged + # to automatically return our data in its response, and call + # Django's main self.get_response() handler to generate an + # HttpResponse with all Django middleware run. + request = self.convert_tornado_request_to_django_request() - self.write_django_response_as_tornado_response(django_response) + # Add to this new HttpRequest logging data from the processing of + # the original request; we will need these for logging. + # + # TODO: Design a cleaner way to manage these attributes, + # perhaps via creating a ZulipHttpRequest class that contains + # these attributes with a copy method. + request._log_data = old_request._log_data + if hasattr(request, "_rate_limit"): + request._rate_limit = old_request._rate_limit + request._email = old_request._email + request.user = old_request.user + request.client = old_request.client + + # The saved_response attribute, if present, causes + # rest_dispatch to return the response immediately before + # doing any work. This arrangement allows Django's full + # request/middleware system to run unmodified while avoiding + # running expensive things like Zulip's authentication code a + # second time. + request.saved_response = json_response(res_type=result_dict['result'], + data=result_dict, status=self.get_status()) + + try: + response = self.get_response(request) + finally: + # Tell Django we're done processing this request + # + # TODO: Investigate whether this (and other call points in + # this file) should be using response.close() instead. + signals.request_finished.send(sender=self.__class__) + + self.write_django_response_as_tornado_response(response) diff --git a/zerver/tornado/views.py b/zerver/tornado/views.py index 77656c8e97..bb4f8f55b6 100644 --- a/zerver/tornado/views.py +++ b/zerver/tornado/views.py @@ -1,19 +1,18 @@ import time -from typing import Iterable, Optional, Sequence, Union +from typing import Iterable, Optional, Sequence import ujson -from django.core.handlers.base import BaseHandler from django.http import HttpRequest, HttpResponse from django.utils.translation import ugettext as _ -from zerver.decorator import REQ, RespondAsynchronously, \ - _RespondAsynchronously, asynchronous, to_non_negative_int, \ +from zerver.decorator import REQ, to_non_negative_int, \ has_request_variables, internal_notify_view, process_client from zerver.lib.response import json_error, json_success from zerver.lib.validator import check_bool, check_int, check_list, check_string from zerver.models import Client, UserProfile, get_client, get_user_profile_by_id from zerver.tornado.event_queue import fetch_events, \ get_client_descriptor, process_notification +from zerver.tornado.handlers import AsyncDjangoHandler from zerver.tornado.exceptions import BadEventQueueIdError @internal_notify_view(True) @@ -33,26 +32,20 @@ def cleanup_event_queue(request: HttpRequest, user_profile: UserProfile, client.cleanup() return json_success() -@asynchronous @internal_notify_view(True) @has_request_variables -def get_events_internal( - request: HttpRequest, - handler: BaseHandler, - user_profile_id: int = REQ(validator=check_int), -) -> Union[HttpResponse, _RespondAsynchronously]: +def get_events_internal(request: HttpRequest, + user_profile_id: int = REQ(validator=check_int)) -> HttpResponse: user_profile = get_user_profile_by_id(user_profile_id) request._email = user_profile.delivery_email process_client(request, user_profile, client_name="internal") - return get_events_backend(request, user_profile, handler) + return get_events_backend(request, user_profile) -@asynchronous -def get_events(request: HttpRequest, user_profile: UserProfile, - handler: BaseHandler) -> Union[HttpResponse, _RespondAsynchronously]: - return get_events_backend(request, user_profile, handler) +def get_events(request: HttpRequest, user_profile: UserProfile) -> HttpResponse: + return get_events_backend(request, user_profile) @has_request_variables -def get_events_backend(request: HttpRequest, user_profile: UserProfile, handler: BaseHandler, +def get_events_backend(request: HttpRequest, user_profile: UserProfile, # user_client is intended only for internal Django=>Tornado requests # and thus shouldn't be documented for external use. user_client: Optional[Client]=REQ(converter=get_client, default=None, @@ -78,7 +71,10 @@ def get_events_backend(request: HttpRequest, user_profile: UserProfile, handler: intentionally_undocumented=True), lifespan_secs: int=REQ(default=0, converter=to_non_negative_int, intentionally_undocumented=True) - ) -> Union[HttpResponse, _RespondAsynchronously]: + ) -> HttpResponse: + # Extract the Tornado handler from the request + handler = request._tornado_handler # type: AsyncDjangoHandler + if user_client is None: valid_user_client = request.client else: @@ -115,8 +111,13 @@ def get_events_backend(request: HttpRequest, user_profile: UserProfile, handler: request._log_data['extra'] = result["extra_log_data"] if result["type"] == "async": + # Mark this response with .asynchronous; this will result in + # Tornado discarding the response and instead long-polling the + # request. See zulip_finish for more design details. handler._request = request - return RespondAsynchronously + response = json_success() + response.asynchronous = True + return response if result["type"] == "error": raise result["exception"] return json_success(result["response"]) diff --git a/zproject/urls.py b/zproject/urls.py index 35922e2ba0..ec3332d771 100644 --- a/zproject/urls.py +++ b/zproject/urls.py @@ -714,6 +714,9 @@ for app_name in settings.EXTRA_INSTALLED_APPS: # Tornado views urls += [ # Used internally for communication between Django and Tornado processes + # + # Since these views don't use rest_dispatch, they cannot have + # asynchronous Tornado behavior. url(r'^notify_tornado$', zerver.tornado.views.notify, name='zerver.tornado.views.notify'), url(r'^api/v1/events/internal$', zerver.tornado.views.get_events_internal), ]