2017-11-16 19:51:44 +01:00
|
|
|
# System documented in https://zulip.readthedocs.io/en/latest/subsystems/logging.html
|
2016-08-27 07:55:54 +02:00
|
|
|
import hashlib
|
2013-03-15 17:03:56 +01:00
|
|
|
import logging
|
errors: Fix infinite recursion during memcached outages.
We have a very useful piece of code, _RateLimitFilter, which is
designed to avoid sending us a billion error emails in the event that
a Zulip production server is down in a way that throws the same
exception a lot. The code uses memcached to ensure we send each
traceback roughly once per Zulip server per 10 minutes (or if
memcached is unavailable, at most 1/process/10 minutes, since we use
memcached to coordinate between processes)
However, if memcached is down, there is a logging.error call internal
to the Django/memcached setup that happens inside the cache.set() call,
and those aren't caught by the `except Exception` block around it.
This ends up resulting in infinite recursion, eventually leading to
Fatal Python error: Cannot recover from stack overflow., since this
handler is configured to run for logging.error in addition to
logging.exception.
We fix this using a thread-local variable to detect whether we are
being called recursively.
This change should prevent some nasty failure modes we've had in the
past where memcached being down resulted in infinite recursion
(resulting in extra resources being consumed by our error
notifications code, and most importantly, the error notifications not
being sent).
Fixes #12595.
2019-09-03 06:08:14 +02:00
|
|
|
import threading
|
2016-08-27 07:55:54 +02:00
|
|
|
import traceback
|
2020-06-05 06:55:20 +02:00
|
|
|
from datetime import datetime, timedelta, timezone
|
2020-06-11 00:54:34 +02:00
|
|
|
from logging import Logger
|
|
|
|
from typing import Optional, Tuple
|
|
|
|
|
2020-09-04 03:38:24 +02:00
|
|
|
import orjson
|
2017-03-22 22:09:38 +01:00
|
|
|
from django.conf import settings
|
errors: Fix infinite recursion during memcached outages.
We have a very useful piece of code, _RateLimitFilter, which is
designed to avoid sending us a billion error emails in the event that
a Zulip production server is down in a way that throws the same
exception a lot. The code uses memcached to ensure we send each
traceback roughly once per Zulip server per 10 minutes (or if
memcached is unavailable, at most 1/process/10 minutes, since we use
memcached to coordinate between processes)
However, if memcached is down, there is a logging.error call internal
to the Django/memcached setup that happens inside the cache.set() call,
and those aren't caught by the `except Exception` block around it.
This ends up resulting in infinite recursion, eventually leading to
Fatal Python error: Cannot recover from stack overflow., since this
handler is configured to run for logging.error in addition to
logging.exception.
We fix this using a thread-local variable to detect whether we are
being called recursively.
This change should prevent some nasty failure modes we've had in the
past where memcached being down resulted in infinite recursion
(resulting in extra resources being consumed by our error
notifications code, and most importantly, the error notifications not
being sent).
Fixes #12595.
2019-09-03 06:08:14 +02:00
|
|
|
from django.core.cache import cache
|
2020-06-11 00:54:34 +02:00
|
|
|
from django.utils.timezone import now as timezone_now
|
|
|
|
|
2012-12-05 18:01:43 +01:00
|
|
|
|
2017-11-05 11:37:41 +01:00
|
|
|
class _RateLimitFilter:
|
errors: Fix infinite recursion during memcached outages.
We have a very useful piece of code, _RateLimitFilter, which is
designed to avoid sending us a billion error emails in the event that
a Zulip production server is down in a way that throws the same
exception a lot. The code uses memcached to ensure we send each
traceback roughly once per Zulip server per 10 minutes (or if
memcached is unavailable, at most 1/process/10 minutes, since we use
memcached to coordinate between processes)
However, if memcached is down, there is a logging.error call internal
to the Django/memcached setup that happens inside the cache.set() call,
and those aren't caught by the `except Exception` block around it.
This ends up resulting in infinite recursion, eventually leading to
Fatal Python error: Cannot recover from stack overflow., since this
handler is configured to run for logging.error in addition to
logging.exception.
We fix this using a thread-local variable to detect whether we are
being called recursively.
This change should prevent some nasty failure modes we've had in the
past where memcached being down resulted in infinite recursion
(resulting in extra resources being consumed by our error
notifications code, and most importantly, the error notifications not
being sent).
Fixes #12595.
2019-09-03 06:08:14 +02:00
|
|
|
"""This class is designed to rate-limit Django error reporting
|
|
|
|
notifications so that it won't send thousands of emails if the
|
|
|
|
database or cache is completely down. It uses a remote shared
|
|
|
|
cache (shared by all Django processes) for its default behavior
|
|
|
|
(so that the deduplication is global, not per-process), and a
|
|
|
|
local in-process cache for when it can't access the remote cache.
|
|
|
|
|
|
|
|
This is critical code because it is called every time
|
|
|
|
`logging.error` or `logging.exception` (or an exception) happens
|
|
|
|
in the codebase.
|
|
|
|
|
|
|
|
Adapted from https://djangosnippets.org/snippets/2242/.
|
|
|
|
|
|
|
|
"""
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2020-06-05 06:55:20 +02:00
|
|
|
last_error = datetime.min.replace(tzinfo=timezone.utc)
|
errors: Fix infinite recursion during memcached outages.
We have a very useful piece of code, _RateLimitFilter, which is
designed to avoid sending us a billion error emails in the event that
a Zulip production server is down in a way that throws the same
exception a lot. The code uses memcached to ensure we send each
traceback roughly once per Zulip server per 10 minutes (or if
memcached is unavailable, at most 1/process/10 minutes, since we use
memcached to coordinate between processes)
However, if memcached is down, there is a logging.error call internal
to the Django/memcached setup that happens inside the cache.set() call,
and those aren't caught by the `except Exception` block around it.
This ends up resulting in infinite recursion, eventually leading to
Fatal Python error: Cannot recover from stack overflow., since this
handler is configured to run for logging.error in addition to
logging.exception.
We fix this using a thread-local variable to detect whether we are
being called recursively.
This change should prevent some nasty failure modes we've had in the
past where memcached being down resulted in infinite recursion
(resulting in extra resources being consumed by our error
notifications code, and most importantly, the error notifications not
being sent).
Fixes #12595.
2019-09-03 06:08:14 +02:00
|
|
|
# This thread-local variable is used to detect recursive
|
|
|
|
# exceptions during exception handling (primarily intended for
|
|
|
|
# when accessing the shared cache throws an exception).
|
|
|
|
handling_exception = threading.local()
|
|
|
|
should_reset_handling_exception = False
|
|
|
|
|
|
|
|
def can_use_remote_cache(self) -> Tuple[bool, bool]:
|
2021-02-12 08:20:45 +01:00
|
|
|
if getattr(self.handling_exception, "value", False):
|
errors: Fix infinite recursion during memcached outages.
We have a very useful piece of code, _RateLimitFilter, which is
designed to avoid sending us a billion error emails in the event that
a Zulip production server is down in a way that throws the same
exception a lot. The code uses memcached to ensure we send each
traceback roughly once per Zulip server per 10 minutes (or if
memcached is unavailable, at most 1/process/10 minutes, since we use
memcached to coordinate between processes)
However, if memcached is down, there is a logging.error call internal
to the Django/memcached setup that happens inside the cache.set() call,
and those aren't caught by the `except Exception` block around it.
This ends up resulting in infinite recursion, eventually leading to
Fatal Python error: Cannot recover from stack overflow., since this
handler is configured to run for logging.error in addition to
logging.exception.
We fix this using a thread-local variable to detect whether we are
being called recursively.
This change should prevent some nasty failure modes we've had in the
past where memcached being down resulted in infinite recursion
(resulting in extra resources being consumed by our error
notifications code, and most importantly, the error notifications not
being sent).
Fixes #12595.
2019-09-03 06:08:14 +02:00
|
|
|
# If we're processing an exception that occurred
|
|
|
|
# while handling an exception, this almost
|
|
|
|
# certainly was because interacting with the
|
|
|
|
# remote cache is failing (e.g. because the cache
|
|
|
|
# is down). Fall back to tracking duplicate
|
|
|
|
# exceptions in memory without the remote shared cache.
|
|
|
|
return False, False
|
|
|
|
|
|
|
|
# Now we test if the remote cache is accessible.
|
|
|
|
#
|
|
|
|
# This code path can only be reached if we are not potentially
|
|
|
|
# handling a recursive exception, so here we set
|
|
|
|
# self.handling_exception (in case the cache access we're
|
|
|
|
# about to do triggers a `logging.error` or exception that
|
|
|
|
# might recurse into this filter class), and actually record
|
|
|
|
# that this is the main exception handler thread.
|
|
|
|
try:
|
|
|
|
self.handling_exception.value = True
|
2021-02-12 08:20:45 +01:00
|
|
|
cache.set("RLF_TEST_KEY", 1, 1)
|
|
|
|
return cache.get("RLF_TEST_KEY") == 1, True
|
errors: Fix infinite recursion during memcached outages.
We have a very useful piece of code, _RateLimitFilter, which is
designed to avoid sending us a billion error emails in the event that
a Zulip production server is down in a way that throws the same
exception a lot. The code uses memcached to ensure we send each
traceback roughly once per Zulip server per 10 minutes (or if
memcached is unavailable, at most 1/process/10 minutes, since we use
memcached to coordinate between processes)
However, if memcached is down, there is a logging.error call internal
to the Django/memcached setup that happens inside the cache.set() call,
and those aren't caught by the `except Exception` block around it.
This ends up resulting in infinite recursion, eventually leading to
Fatal Python error: Cannot recover from stack overflow., since this
handler is configured to run for logging.error in addition to
logging.exception.
We fix this using a thread-local variable to detect whether we are
being called recursively.
This change should prevent some nasty failure modes we've had in the
past where memcached being down resulted in infinite recursion
(resulting in extra resources being consumed by our error
notifications code, and most importantly, the error notifications not
being sent).
Fixes #12595.
2019-09-03 06:08:14 +02:00
|
|
|
except Exception:
|
|
|
|
return False, True
|
2012-12-05 18:01:43 +01:00
|
|
|
|
2017-11-05 11:15:10 +01:00
|
|
|
def filter(self, record: logging.LogRecord) -> bool:
|
errors: Fix infinite recursion during memcached outages.
We have a very useful piece of code, _RateLimitFilter, which is
designed to avoid sending us a billion error emails in the event that
a Zulip production server is down in a way that throws the same
exception a lot. The code uses memcached to ensure we send each
traceback roughly once per Zulip server per 10 minutes (or if
memcached is unavailable, at most 1/process/10 minutes, since we use
memcached to coordinate between processes)
However, if memcached is down, there is a logging.error call internal
to the Django/memcached setup that happens inside the cache.set() call,
and those aren't caught by the `except Exception` block around it.
This ends up resulting in infinite recursion, eventually leading to
Fatal Python error: Cannot recover from stack overflow., since this
handler is configured to run for logging.error in addition to
logging.exception.
We fix this using a thread-local variable to detect whether we are
being called recursively.
This change should prevent some nasty failure modes we've had in the
past where memcached being down resulted in infinite recursion
(resulting in extra resources being consumed by our error
notifications code, and most importantly, the error notifications not
being sent).
Fixes #12595.
2019-09-03 06:08:14 +02:00
|
|
|
# When the original filter() call finishes executing, it's
|
|
|
|
# going to change handling_exception.value to False. The
|
|
|
|
# local variable below tracks whether the *current*,
|
|
|
|
# potentially recursive, filter() call is allowed to touch
|
|
|
|
# that value (only the original will find this to be True
|
|
|
|
# at the end of its execution)
|
|
|
|
should_reset_handling_exception = False
|
|
|
|
try:
|
|
|
|
# Track duplicate errors
|
|
|
|
duplicate = False
|
2021-02-12 08:20:45 +01:00
|
|
|
rate = getattr(settings, f"{self.__class__.__name__.upper()}_LIMIT", 600) # seconds
|
errors: Fix infinite recursion during memcached outages.
We have a very useful piece of code, _RateLimitFilter, which is
designed to avoid sending us a billion error emails in the event that
a Zulip production server is down in a way that throws the same
exception a lot. The code uses memcached to ensure we send each
traceback roughly once per Zulip server per 10 minutes (or if
memcached is unavailable, at most 1/process/10 minutes, since we use
memcached to coordinate between processes)
However, if memcached is down, there is a logging.error call internal
to the Django/memcached setup that happens inside the cache.set() call,
and those aren't caught by the `except Exception` block around it.
This ends up resulting in infinite recursion, eventually leading to
Fatal Python error: Cannot recover from stack overflow., since this
handler is configured to run for logging.error in addition to
logging.exception.
We fix this using a thread-local variable to detect whether we are
being called recursively.
This change should prevent some nasty failure modes we've had in the
past where memcached being down resulted in infinite recursion
(resulting in extra resources being consumed by our error
notifications code, and most importantly, the error notifications not
being sent).
Fixes #12595.
2019-09-03 06:08:14 +02:00
|
|
|
|
|
|
|
if rate > 0:
|
|
|
|
(use_cache, should_reset_handling_exception) = self.can_use_remote_cache()
|
|
|
|
if use_cache:
|
|
|
|
if record.exc_info is not None:
|
2021-02-12 08:20:45 +01:00
|
|
|
tb = "\n".join(traceback.format_exception(*record.exc_info))
|
errors: Fix infinite recursion during memcached outages.
We have a very useful piece of code, _RateLimitFilter, which is
designed to avoid sending us a billion error emails in the event that
a Zulip production server is down in a way that throws the same
exception a lot. The code uses memcached to ensure we send each
traceback roughly once per Zulip server per 10 minutes (or if
memcached is unavailable, at most 1/process/10 minutes, since we use
memcached to coordinate between processes)
However, if memcached is down, there is a logging.error call internal
to the Django/memcached setup that happens inside the cache.set() call,
and those aren't caught by the `except Exception` block around it.
This ends up resulting in infinite recursion, eventually leading to
Fatal Python error: Cannot recover from stack overflow., since this
handler is configured to run for logging.error in addition to
logging.exception.
We fix this using a thread-local variable to detect whether we are
being called recursively.
This change should prevent some nasty failure modes we've had in the
past where memcached being down resulted in infinite recursion
(resulting in extra resources being consumed by our error
notifications code, and most importantly, the error notifications not
being sent).
Fixes #12595.
2019-09-03 06:08:14 +02:00
|
|
|
else:
|
|
|
|
tb = str(record)
|
|
|
|
key = self.__class__.__name__.upper() + hashlib.sha1(tb.encode()).hexdigest()
|
|
|
|
duplicate = cache.get(key) == 1
|
|
|
|
if not duplicate:
|
|
|
|
cache.set(key, 1, rate)
|
2016-12-20 20:42:57 +01:00
|
|
|
else:
|
errors: Fix infinite recursion during memcached outages.
We have a very useful piece of code, _RateLimitFilter, which is
designed to avoid sending us a billion error emails in the event that
a Zulip production server is down in a way that throws the same
exception a lot. The code uses memcached to ensure we send each
traceback roughly once per Zulip server per 10 minutes (or if
memcached is unavailable, at most 1/process/10 minutes, since we use
memcached to coordinate between processes)
However, if memcached is down, there is a logging.error call internal
to the Django/memcached setup that happens inside the cache.set() call,
and those aren't caught by the `except Exception` block around it.
This ends up resulting in infinite recursion, eventually leading to
Fatal Python error: Cannot recover from stack overflow., since this
handler is configured to run for logging.error in addition to
logging.exception.
We fix this using a thread-local variable to detect whether we are
being called recursively.
This change should prevent some nasty failure modes we've had in the
past where memcached being down resulted in infinite recursion
(resulting in extra resources being consumed by our error
notifications code, and most importantly, the error notifications not
being sent).
Fixes #12595.
2019-09-03 06:08:14 +02:00
|
|
|
min_date = timezone_now() - timedelta(seconds=rate)
|
2021-02-12 08:19:30 +01:00
|
|
|
duplicate = self.last_error >= min_date
|
errors: Fix infinite recursion during memcached outages.
We have a very useful piece of code, _RateLimitFilter, which is
designed to avoid sending us a billion error emails in the event that
a Zulip production server is down in a way that throws the same
exception a lot. The code uses memcached to ensure we send each
traceback roughly once per Zulip server per 10 minutes (or if
memcached is unavailable, at most 1/process/10 minutes, since we use
memcached to coordinate between processes)
However, if memcached is down, there is a logging.error call internal
to the Django/memcached setup that happens inside the cache.set() call,
and those aren't caught by the `except Exception` block around it.
This ends up resulting in infinite recursion, eventually leading to
Fatal Python error: Cannot recover from stack overflow., since this
handler is configured to run for logging.error in addition to
logging.exception.
We fix this using a thread-local variable to detect whether we are
being called recursively.
This change should prevent some nasty failure modes we've had in the
past where memcached being down resulted in infinite recursion
(resulting in extra resources being consumed by our error
notifications code, and most importantly, the error notifications not
being sent).
Fixes #12595.
2019-09-03 06:08:14 +02:00
|
|
|
if not duplicate:
|
|
|
|
self.last_error = timezone_now()
|
|
|
|
|
|
|
|
return not duplicate
|
|
|
|
finally:
|
|
|
|
if should_reset_handling_exception:
|
|
|
|
self.handling_exception.value = False
|
2012-12-06 22:00:34 +01:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2013-08-06 21:37:34 +02:00
|
|
|
class ZulipLimiter(_RateLimitFilter):
|
2012-12-06 22:00:34 +01:00
|
|
|
pass
|
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2012-12-06 22:00:34 +01:00
|
|
|
class EmailLimiter(_RateLimitFilter):
|
|
|
|
pass
|
2013-03-15 17:03:56 +01:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2013-03-15 17:03:56 +01:00
|
|
|
class ReturnTrue(logging.Filter):
|
2017-11-05 11:15:10 +01:00
|
|
|
def filter(self, record: logging.LogRecord) -> bool:
|
2013-03-15 17:03:56 +01:00
|
|
|
return True
|
2013-06-10 18:57:59 +02:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2017-03-22 22:09:38 +01:00
|
|
|
class ReturnEnabled(logging.Filter):
|
2017-11-05 11:15:10 +01:00
|
|
|
def filter(self, record: logging.LogRecord) -> bool:
|
2018-03-21 00:53:39 +01:00
|
|
|
return settings.LOGGING_ENABLED
|
2017-03-22 22:09:38 +01:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2013-06-10 18:57:59 +02:00
|
|
|
class RequireReallyDeployed(logging.Filter):
|
2017-11-05 11:15:10 +01:00
|
|
|
def filter(self, record: logging.LogRecord) -> bool:
|
2013-06-10 18:57:59 +02:00
|
|
|
from django.conf import settings
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2015-08-21 09:18:44 +02:00
|
|
|
return settings.PRODUCTION
|
2017-01-18 12:52:01 +01:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2017-11-05 11:15:10 +01:00
|
|
|
def skip_200_and_304(record: logging.LogRecord) -> bool:
|
2017-01-18 12:52:01 +01:00
|
|
|
# Apparently, `status_code` is added by Django and is not an actual
|
|
|
|
# attribute of LogRecord; as a result, mypy throws an error if we
|
|
|
|
# access the `status_code` attribute directly.
|
2021-02-12 08:20:45 +01:00
|
|
|
if getattr(record, "status_code", None) in [200, 304]:
|
2017-01-18 12:52:01 +01:00
|
|
|
return False
|
|
|
|
|
|
|
|
return True
|
2017-03-15 10:08:27 +01:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2017-11-05 11:15:10 +01:00
|
|
|
def skip_site_packages_logs(record: logging.LogRecord) -> bool:
|
2017-03-15 10:08:27 +01:00
|
|
|
# This skips the log records that are generated from libraries
|
|
|
|
# installed in site packages.
|
|
|
|
# Workaround for https://code.djangoproject.com/ticket/26886
|
2021-02-12 08:20:45 +01:00
|
|
|
if "site-packages" in record.pathname:
|
2017-03-15 10:08:27 +01:00
|
|
|
return False
|
|
|
|
return True
|
2017-08-25 15:25:00 +02:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2017-11-05 11:15:10 +01:00
|
|
|
def find_log_caller_module(record: logging.LogRecord) -> Optional[str]:
|
2021-02-12 08:19:30 +01:00
|
|
|
"""Find the module name corresponding to where this record was logged.
|
2017-12-01 01:26:37 +01:00
|
|
|
|
|
|
|
Sadly `record.module` is just the innermost component of the full
|
|
|
|
module name, so we have to go reconstruct this ourselves.
|
2021-02-12 08:19:30 +01:00
|
|
|
"""
|
2017-09-27 03:08:43 +02:00
|
|
|
# Repeat a search similar to that in logging.Logger.findCaller.
|
|
|
|
# The logging call should still be on the stack somewhere; search until
|
|
|
|
# we find something in the same source file, and that should give the
|
|
|
|
# right module name.
|
2019-08-10 00:30:33 +02:00
|
|
|
f = logging.currentframe()
|
2020-04-18 03:55:04 +02:00
|
|
|
while True:
|
2017-09-27 03:08:43 +02:00
|
|
|
if f.f_code.co_filename == record.pathname:
|
2021-02-12 08:20:45 +01:00
|
|
|
return f.f_globals.get("__name__")
|
2020-04-18 03:55:04 +02:00
|
|
|
if f.f_back is None:
|
|
|
|
return None
|
2017-09-27 03:08:43 +02:00
|
|
|
f = f.f_back
|
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2017-09-27 23:46:44 +02:00
|
|
|
logger_nicknames = {
|
2021-02-12 08:20:45 +01:00
|
|
|
"root": "", # This one is more like undoing a nickname.
|
|
|
|
"zulip.requests": "zr", # Super common.
|
2017-09-27 23:46:44 +02:00
|
|
|
}
|
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2017-11-05 11:15:10 +01:00
|
|
|
def find_log_origin(record: logging.LogRecord) -> str:
|
2017-09-27 03:08:43 +02:00
|
|
|
logger_name = logger_nicknames.get(record.name, record.name)
|
|
|
|
|
|
|
|
if settings.LOGGING_SHOW_MODULE:
|
|
|
|
module_name = find_log_caller_module(record)
|
|
|
|
if module_name == logger_name or module_name == record.name:
|
|
|
|
# Abbreviate a bit.
|
2020-03-11 15:16:23 +01:00
|
|
|
pass
|
2017-09-27 03:08:43 +02:00
|
|
|
else:
|
2021-02-12 08:20:45 +01:00
|
|
|
logger_name = "{}/{}".format(logger_name, module_name or "?")
|
2020-03-11 15:16:23 +01:00
|
|
|
|
|
|
|
if settings.RUNNING_INSIDE_TORNADO:
|
|
|
|
# In multi-sharded Tornado, it's often valuable to have which shard is
|
|
|
|
# responsible for the request in the logs.
|
|
|
|
from zerver.tornado.ioloop_logging import logging_data
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2021-02-12 08:20:45 +01:00
|
|
|
shard = logging_data.get("port", "unknown")
|
2020-06-09 00:25:09 +02:00
|
|
|
logger_name = f"{logger_name}:{shard}"
|
2020-03-11 15:16:23 +01:00
|
|
|
|
|
|
|
return logger_name
|
2017-09-27 01:42:03 +02:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2017-09-27 23:53:48 +02:00
|
|
|
log_level_abbrevs = {
|
2021-02-12 08:20:45 +01:00
|
|
|
"DEBUG": "DEBG",
|
|
|
|
"INFO": "INFO",
|
|
|
|
"WARNING": "WARN",
|
|
|
|
"ERROR": "ERR",
|
|
|
|
"CRITICAL": "CRIT",
|
2017-09-27 23:53:48 +02:00
|
|
|
}
|
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2017-11-05 11:15:10 +01:00
|
|
|
def abbrev_log_levelname(levelname: str) -> str:
|
2017-09-27 23:53:48 +02:00
|
|
|
# It's unlikely someone will set a custom log level with a custom name,
|
|
|
|
# but it's an option, so we shouldn't crash if someone does.
|
|
|
|
return log_level_abbrevs.get(levelname, levelname[:4])
|
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2017-09-27 23:40:38 +02:00
|
|
|
class ZulipFormatter(logging.Formatter):
|
2017-09-28 00:38:54 +02:00
|
|
|
# Used in the base implementation. Default uses `,`.
|
2021-02-12 08:20:45 +01:00
|
|
|
default_msec_format = "%s.%03d"
|
2017-09-28 00:38:54 +02:00
|
|
|
|
2017-11-05 11:15:10 +01:00
|
|
|
def __init__(self) -> None:
|
2017-10-05 01:41:00 +02:00
|
|
|
super().__init__(fmt=self._compute_fmt())
|
|
|
|
|
2017-11-05 11:15:10 +01:00
|
|
|
def _compute_fmt(self) -> str:
|
2021-02-12 08:20:45 +01:00
|
|
|
pieces = ["%(asctime)s", "%(zulip_level_abbrev)-4s"]
|
2017-10-05 01:41:00 +02:00
|
|
|
if settings.LOGGING_SHOW_PID:
|
2021-02-12 08:20:45 +01:00
|
|
|
pieces.append("pid:%(process)d")
|
|
|
|
pieces.extend(["[%(zulip_origin)s]", "%(message)s"])
|
|
|
|
return " ".join(pieces)
|
2017-09-27 23:40:38 +02:00
|
|
|
|
2017-11-05 11:15:10 +01:00
|
|
|
def format(self, record: logging.LogRecord) -> str:
|
2021-02-12 08:20:45 +01:00
|
|
|
if not getattr(record, "zulip_decorated", False):
|
2017-09-27 01:42:03 +02:00
|
|
|
# The `setattr` calls put this logic explicitly outside the bounds of the
|
|
|
|
# type system; otherwise mypy would complain LogRecord lacks these attributes.
|
2021-02-12 08:20:45 +01:00
|
|
|
setattr(record, "zulip_level_abbrev", abbrev_log_levelname(record.levelname))
|
|
|
|
setattr(record, "zulip_origin", find_log_origin(record))
|
|
|
|
setattr(record, "zulip_decorated", True)
|
2017-09-27 23:40:38 +02:00
|
|
|
return super().format(record)
|
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2020-09-04 03:38:24 +02:00
|
|
|
class ZulipWebhookFormatter(ZulipFormatter):
|
|
|
|
def _compute_fmt(self) -> str:
|
|
|
|
basic = super()._compute_fmt()
|
|
|
|
multiline = [
|
|
|
|
basic,
|
|
|
|
"user: %(user)s",
|
|
|
|
"client: %(client)s",
|
|
|
|
"url: %(url)s",
|
|
|
|
"content_type: %(content_type)s",
|
|
|
|
"custom_headers:",
|
|
|
|
"%(custom_headers)s",
|
|
|
|
"payload:",
|
|
|
|
"%(payload)s",
|
|
|
|
]
|
|
|
|
return "\n".join(multiline)
|
|
|
|
|
|
|
|
def format(self, record: logging.LogRecord) -> str:
|
|
|
|
from zerver.lib.request import get_current_request
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2020-09-04 03:38:24 +02:00
|
|
|
request = get_current_request()
|
|
|
|
if not request:
|
2021-02-12 08:20:45 +01:00
|
|
|
setattr(record, "user", None)
|
|
|
|
setattr(record, "client", None)
|
|
|
|
setattr(record, "url", None)
|
|
|
|
setattr(record, "content_type", None)
|
|
|
|
setattr(record, "custom_headers", None)
|
|
|
|
setattr(record, "payload", None)
|
2020-09-04 03:38:24 +02:00
|
|
|
return super().format(record)
|
|
|
|
|
2021-02-12 08:20:45 +01:00
|
|
|
if request.content_type == "application/json":
|
2020-09-04 03:38:24 +02:00
|
|
|
payload = request.body
|
|
|
|
else:
|
2021-02-12 08:20:45 +01:00
|
|
|
payload = request.POST.get("payload")
|
2020-09-04 03:38:24 +02:00
|
|
|
|
|
|
|
try:
|
|
|
|
payload = orjson.dumps(orjson.loads(payload), option=orjson.OPT_INDENT_2).decode()
|
|
|
|
except orjson.JSONDecodeError:
|
|
|
|
pass
|
|
|
|
|
|
|
|
custom_header_template = "{header}: {value}\n"
|
|
|
|
|
|
|
|
header_text = ""
|
|
|
|
for header in request.META.keys():
|
2021-02-12 08:20:45 +01:00
|
|
|
if header.lower().startswith("http_x"):
|
2020-09-04 03:38:24 +02:00
|
|
|
header_text += custom_header_template.format(
|
2021-02-12 08:19:30 +01:00
|
|
|
header=header, value=request.META[header]
|
|
|
|
)
|
2020-09-04 03:38:24 +02:00
|
|
|
|
|
|
|
header_message = header_text if header_text else None
|
2021-07-09 18:10:51 +02:00
|
|
|
from zerver.lib.request import get_request_notes
|
|
|
|
|
|
|
|
client = get_request_notes(request).client
|
|
|
|
assert client is not None
|
2020-09-04 03:38:24 +02:00
|
|
|
|
2021-02-12 08:20:45 +01:00
|
|
|
setattr(record, "user", f"{request.user.delivery_email} ({request.user.realm.string_id})")
|
2021-07-09 18:10:51 +02:00
|
|
|
setattr(record, "client", client.name)
|
2021-02-12 08:20:45 +01:00
|
|
|
setattr(record, "url", request.META.get("PATH_INFO", None))
|
|
|
|
setattr(record, "content_type", request.content_type)
|
|
|
|
setattr(record, "custom_headers", header_message)
|
|
|
|
setattr(record, "payload", payload)
|
2020-09-04 03:38:24 +02:00
|
|
|
return super().format(record)
|
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
|
|
|
def log_to_file(
|
|
|
|
logger: Logger,
|
|
|
|
filename: str,
|
|
|
|
log_format: str = "%(asctime)s %(levelname)-8s %(message)s",
|
|
|
|
) -> None:
|
2020-08-19 21:55:28 +02:00
|
|
|
"""Note: `filename` should be declared in zproject/computed_settings.py with zulip_path."""
|
2017-12-13 01:45:57 +01:00
|
|
|
formatter = logging.Formatter(log_format)
|
|
|
|
handler = logging.FileHandler(filename)
|
|
|
|
handler.setFormatter(formatter)
|
|
|
|
logger.addHandler(handler)
|