mirror of https://github.com/zulip/zulip.git
debug: Add facility to dump tracemalloc snapshots.
Originally this used signals, namely SIGRTMIN. But in prod, the
signal handler never fired; I debugged fruitlessly for a while, and
suspect uwsgi was foiling it in a mysterious way (which is kind of
the only way uwsgi does anything.)
So, we listen on a socket. Bit more code, and a bit trickier to
invoke, but it works.
This was developed for the investigation of memory-bloating on
chat.zulip.org that led to a331b4f64
"Optimize query_all_subs_by_stream()".
For usage instructions, see docstring.
This commit is contained in:
parent
6dd639454e
commit
b6cc21b438
|
@ -1,12 +1,21 @@
|
|||
|
||||
import code
|
||||
import traceback
|
||||
import gc
|
||||
import logging
|
||||
import os
|
||||
import signal
|
||||
|
||||
import socket
|
||||
import threading
|
||||
import traceback
|
||||
import tracemalloc
|
||||
from types import FrameType
|
||||
|
||||
from django.conf import settings
|
||||
from django.utils.timezone import now as timezone_now
|
||||
from typing import Optional
|
||||
|
||||
logger = logging.getLogger('zulip.debug')
|
||||
|
||||
# Interactive debugging code from
|
||||
# http://stackoverflow.com/questions/132058/showing-the-stack-trace-from-a-running-python-application
|
||||
# (that link also points to code for an interactive remote debugger
|
||||
|
@ -29,3 +38,76 @@ def interactive_debug(sig: int, frame: FrameType) -> None:
|
|||
def interactive_debug_listen() -> None:
|
||||
signal.signal(signal.SIGUSR1, lambda sig, stack: traceback.print_stack(stack))
|
||||
signal.signal(signal.SIGUSR2, interactive_debug)
|
||||
|
||||
def tracemalloc_dump():
|
||||
# type: () -> None
|
||||
if not tracemalloc.is_tracing():
|
||||
logger.warning("pid {}: tracemalloc off, nothing to dump"
|
||||
.format(os.getpid()))
|
||||
return
|
||||
# Despite our name for it, `timezone_now` always deals in UTC.
|
||||
basename = "snap.{}.{}".format(os.getpid(),
|
||||
timezone_now().strftime("%F-%T"))
|
||||
path = os.path.join(settings.TRACEMALLOC_DUMP_DIR, basename)
|
||||
os.makedirs(settings.TRACEMALLOC_DUMP_DIR, exist_ok=True)
|
||||
|
||||
gc.collect()
|
||||
tracemalloc.take_snapshot().dump(path)
|
||||
|
||||
procstat = open('/proc/{}/stat'.format(os.getpid()), 'rb').read().split()
|
||||
rss_pages = int(procstat[23])
|
||||
logger.info("tracemalloc dump: tracing {} MiB ({} MiB peak), using {} MiB; rss {} MiB; dumped {}"
|
||||
.format(tracemalloc.get_traced_memory()[0] // 1048576,
|
||||
tracemalloc.get_traced_memory()[1] // 1048576,
|
||||
tracemalloc.get_tracemalloc_memory() // 1048576,
|
||||
rss_pages // 256,
|
||||
basename))
|
||||
|
||||
def tracemalloc_listen_sock(sock):
|
||||
# type: (socket.socket) -> None
|
||||
logger.debug('pid {}: tracemalloc_listen_sock started!'.format(os.getpid()))
|
||||
while True:
|
||||
sock.recv(1)
|
||||
tracemalloc_dump()
|
||||
|
||||
listener_pid = None # Optional[int]
|
||||
|
||||
def tracemalloc_listen():
|
||||
# type: () -> None
|
||||
global listener_pid
|
||||
if listener_pid == os.getpid():
|
||||
# Already set up -- and in this process, not just its parent.
|
||||
return
|
||||
logger.debug('pid {}: tracemalloc_listen working...'.format(os.getpid()))
|
||||
listener_pid = os.getpid()
|
||||
|
||||
sock = socket.socket(socket.AF_UNIX, socket.SOCK_DGRAM)
|
||||
path = "/tmp/tracemalloc.{}".format(os.getpid())
|
||||
sock.bind(path)
|
||||
thread = threading.Thread(target=lambda: tracemalloc_listen_sock(sock),
|
||||
daemon=True)
|
||||
thread.start()
|
||||
logger.debug('pid {}: tracemalloc_listen done: {}'.format(
|
||||
os.getpid(), path))
|
||||
|
||||
def maybe_tracemalloc_listen():
|
||||
# type: () -> None
|
||||
'''If tracemalloc tracing enabled, listen for requests to dump a snapshot.
|
||||
|
||||
To trigger once this is listening:
|
||||
echo | socat -u stdin unix-sendto:/tmp/tracemalloc.$pid
|
||||
|
||||
To enable in the Zulip web server: edit /etc/zulip/uwsgi.ini ,
|
||||
and add e.g. ` PYTHONTRACEMALLOC=5` to the `env=` line.
|
||||
This function is called in middleware, so the process will
|
||||
automatically start listening.
|
||||
|
||||
To enable in other contexts: see upstream docs
|
||||
https://docs.python.org/3/library/tracemalloc .
|
||||
You may also have to add a call to this function somewhere.
|
||||
|
||||
'''
|
||||
if os.environ.get('PYTHONTRACEMALLOC'):
|
||||
# If the server was started with `tracemalloc` tracing on, then
|
||||
# listen for a signal to dump `tracemalloc` snapshots.
|
||||
tracemalloc_listen()
|
||||
|
|
|
@ -20,6 +20,7 @@ from django.views.csrf import csrf_failure as html_csrf_failure
|
|||
|
||||
from zerver.lib.bugdown import get_bugdown_requests, get_bugdown_time
|
||||
from zerver.lib.cache import get_remote_cache_requests, get_remote_cache_time
|
||||
from zerver.lib.debug import maybe_tracemalloc_listen
|
||||
from zerver.lib.exceptions import ErrorCode, JsonableError, RateLimited
|
||||
from zerver.lib.queue import queue_json_publish
|
||||
from zerver.lib.response import json_error, json_response_from_error
|
||||
|
@ -233,6 +234,7 @@ class LogRequests(MiddlewareMixin):
|
|||
# method here too
|
||||
def process_request(self, request):
|
||||
# type: (HttpRequest) -> None
|
||||
maybe_tracemalloc_listen()
|
||||
request._log_data = dict()
|
||||
record_request_start_data(request._log_data)
|
||||
if connection.connection is not None:
|
||||
|
|
|
@ -1188,6 +1188,7 @@ ZULIP_PATHS = [
|
|||
("ANALYTICS_LOCK_DIR", "/home/zulip/deployments/analytics-lock-dir"),
|
||||
("API_KEY_ONLY_WEBHOOK_LOG_PATH", "/var/log/zulip/webhooks_errors.log"),
|
||||
("SOFT_DEACTIVATION_LOG_PATH", "/var/log/zulip/soft_deactivation.log"),
|
||||
("TRACEMALLOC_DUMP_DIR", "/var/log/zulip/tracemalloc"),
|
||||
]
|
||||
|
||||
# The Event log basically logs most significant database changes,
|
||||
|
|
Loading…
Reference in New Issue