import logging import urllib from typing import Any, Dict, List import tornado.web from django import http from django.core import signals from django.core.handlers import base from django.core.handlers.wsgi import WSGIRequest, get_script_name from django.urls import set_script_prefix from django.http import HttpRequest, HttpResponse from tornado.wsgi import WSGIContainer from zerver.lib.response import json_response from zerver.middleware import async_request_timer_restart, async_request_timer_stop from zerver.tornado.descriptors import get_descriptor_by_handler_id current_handler_id = 0 handlers = {} # type: Dict[int, 'AsyncDjangoHandler'] # Copied from django.core.handlers.base logger = logging.getLogger('django.request') def get_handler_by_id(handler_id: int) -> 'AsyncDjangoHandler': return handlers[handler_id] def allocate_handler_id(handler: 'AsyncDjangoHandler') -> int: global current_handler_id handlers[current_handler_id] = handler handler.handler_id = current_handler_id current_handler_id += 1 return handler.handler_id def clear_handler_by_id(handler_id: int) -> None: del handlers[handler_id] def handler_stats_string() -> str: return "%s handlers, latest ID %s" % (len(handlers), current_handler_id) def finish_handler(handler_id: int, event_queue_id: str, contents: List[Dict[str, Any]], apply_markdown: bool) -> None: err_msg = "Got error finishing handler for queue %s" % (event_queue_id,) try: # We call async_request_timer_restart here in case we are # being finished without any events (because another # get_events request has supplanted this request) handler = get_handler_by_id(handler_id) request = handler._request async_request_timer_restart(request) if len(contents) != 1: request._log_data['extra'] = "[%s/1]" % (event_queue_id,) else: request._log_data['extra'] = "[%s/1/%s]" % (event_queue_id, contents[0]["type"]) handler.zulip_finish(dict(result='success', msg='', events=contents, queue_id=event_queue_id), request, apply_markdown=apply_markdown) except OSError as e: if str(e) != 'Stream is closed': logging.exception(err_msg) except AssertionError as e: if str(e) != 'Request closed': logging.exception(err_msg) except Exception: logging.exception(err_msg) class AsyncDjangoHandler(tornado.web.RequestHandler, base.BaseHandler): def __init__(self, *args: Any, **kwargs: Any) -> None: super().__init__(*args, **kwargs) # Copied from the django.core.handlers.wsgi __init__() method. self.load_middleware() # Prevent Tornado from automatically finishing the request self._auto_finish = False # Handler IDs are allocated here, and the handler ID map must # be cleared when the handler finishes its response allocate_handler_id(self) def __repr__(self) -> str: descriptor = get_descriptor_by_handler_id(self.handler_id) return "AsyncDjangoHandler<%s, %s>" % (self.handler_id, descriptor) def convert_tornado_request_to_django_request(self) -> HttpRequest: # This takes the WSGI environment that Tornado received (which # fully describes the HTTP request that was sent to Tornado) # and pass it to Django's WSGIRequest to generate a Django # HttpRequest object with the original Tornado request's HTTP # headers, parameters, etc. environ = WSGIContainer.environ(self.request) environ['PATH_INFO'] = urllib.parse.unquote(environ['PATH_INFO']) # Django WSGIRequest setup code that should match logic from # Django's WSGIHandler.__call__ before the call to # `get_response()`. set_script_prefix(get_script_name(environ)) signals.request_started.send(sender=self.__class__) request = WSGIRequest(environ) # Provide a way for application code to access this handler # given the HttpRequest object. request._tornado_handler = self return request def write_django_response_as_tornado_response(self, response: HttpResponse) -> None: # This takes a Django HttpResponse and copies its HTTP status # code, headers, cookies, and content onto this # tornado.web.RequestHandler (which is how Tornado prepares a # response to write). # Copy the HTTP status code. self.set_status(response.status_code) # Copy the HTTP headers (iterating through a Django # HttpResponse is the way to access its headers as key/value pairs) for h in response.items(): self.set_header(h[0], h[1]) # Copy any cookies if not hasattr(self, "_new_cookies"): self._new_cookies = [] # type: List[http.cookie.SimpleCookie[str]] self._new_cookies.append(response.cookies) # Copy the response content self.write(response.content) # Close the connection. self.finish() def get(self, *args: Any, **kwargs: Any) -> None: request = self.convert_tornado_request_to_django_request() try: response = self.get_response(request) if hasattr(response, "asynchronous"): # For asynchronous requests, this is where we exit # without returning the HttpResponse that Django # generated back to the user in order to long-poll the # connection. We save some timers here in order to # support accurate accounting of the total resources # consumed by the request when it eventually returns a # response and is logged. async_request_timer_stop(request) return finally: # Tell Django that we're done processing this request on # the Django side; this triggers cleanup work like # resetting the urlconf and any cache/database # connections. signals.request_finished.send(sender=self.__class__) # For normal/synchronous requests that don't end up # long-polling, we fall through to here and just need to write # the HTTP response that Django prepared for us via Tornado. # Mark this handler ID as finished for Zulip's own tracking. clear_handler_by_id(self.handler_id) self.write_django_response_as_tornado_response(response) def head(self, *args: Any, **kwargs: Any) -> None: self.get(*args, **kwargs) def post(self, *args: Any, **kwargs: Any) -> None: self.get(*args, **kwargs) def delete(self, *args: Any, **kwargs: Any) -> None: self.get(*args, **kwargs) def on_connection_close(self) -> None: # Register a Tornado handler that runs when client-side # connections are closed to notify the events system. # # TODO: Theoretically, this code should run when you Ctrl-C # curl to cause it to break a `GET /events` connection, but # that seems to no longer run this code. Investigate what's up. client_descriptor = get_descriptor_by_handler_id(self.handler_id) if client_descriptor is not None: client_descriptor.disconnect_handler(client_closed=True) def zulip_finish(self, result_dict: Dict[str, Any], old_request: HttpRequest, apply_markdown: bool) -> None: # Function called when we want to break a long-polled # get_events request and return a response to the client. # Marshall the response data from result_dict. if result_dict['result'] == 'success' and 'messages' in result_dict and apply_markdown: for msg in result_dict['messages']: if msg['content_type'] != 'text/html': self.set_status(500) self.finish('Internal error: bad message format') if result_dict['result'] == 'error': self.set_status(400) # The `result` dictionary contains the data we want to return # to the client. We want to do so in a proper Tornado HTTP # response after running the Django response middleware (which # does things like log the request, add rate-limit headers, # etc.). The Django middleware API expects to receive a fresh # HttpRequest object, and so to minimize hacks, our strategy # is to create a duplicate Django HttpRequest object, tagged # to automatically return our data in its response, and call # Django's main self.get_response() handler to generate an # HttpResponse with all Django middleware run. request = self.convert_tornado_request_to_django_request() # Add to this new HttpRequest logging data from the processing of # the original request; we will need these for logging. # # TODO: Design a cleaner way to manage these attributes, # perhaps via creating a ZulipHttpRequest class that contains # these attributes with a copy method. request._log_data = old_request._log_data if hasattr(request, "_rate_limit"): request._rate_limit = old_request._rate_limit if hasattr(request, "_requestor_for_logs"): request._requestor_for_logs = old_request._requestor_for_logs request.user = old_request.user request.client = old_request.client # The saved_response attribute, if present, causes # rest_dispatch to return the response immediately before # doing any work. This arrangement allows Django's full # request/middleware system to run unmodified while avoiding # running expensive things like Zulip's authentication code a # second time. request.saved_response = json_response(res_type=result_dict['result'], data=result_dict, status=self.get_status()) try: response = self.get_response(request) finally: # Tell Django we're done processing this request # # TODO: Investigate whether this (and other call points in # this file) should be using response.close() instead. signals.request_finished.send(sender=self.__class__) self.write_django_response_as_tornado_response(response)