2017-11-16 19:51:44 +01:00
|
|
|
# System documented in https://zulip.readthedocs.io/en/latest/subsystems/logging.html
|
2013-10-10 21:37:26 +02:00
|
|
|
|
2017-04-15 04:03:56 +02:00
|
|
|
from django.utils.timezone import now as timezone_now
|
2017-04-15 03:29:56 +02:00
|
|
|
from django.utils.timezone import utc as timezone_utc
|
2017-02-26 04:58:25 +01:00
|
|
|
|
2016-08-27 07:55:54 +02:00
|
|
|
import hashlib
|
2013-03-15 17:03:56 +01:00
|
|
|
import logging
|
2017-09-16 17:07:06 +02:00
|
|
|
import re
|
2016-08-27 07:55:54 +02:00
|
|
|
import traceback
|
2017-09-27 03:08:43 +02:00
|
|
|
from typing import Optional
|
2012-12-05 18:01:43 +01:00
|
|
|
from datetime import datetime, timedelta
|
2017-03-22 22:09:38 +01:00
|
|
|
from django.conf import settings
|
2017-08-25 15:25:00 +02:00
|
|
|
from logging import Logger
|
2012-12-05 18:01:43 +01:00
|
|
|
|
|
|
|
# Adapted http://djangosnippets.org/snippets/2242/ by user s29 (October 25, 2010)
|
|
|
|
|
2017-11-05 11:37:41 +01:00
|
|
|
class _RateLimitFilter:
|
2017-04-15 03:29:56 +02:00
|
|
|
last_error = datetime.min.replace(tzinfo=timezone_utc)
|
2012-12-05 18:01:43 +01:00
|
|
|
|
2017-11-05 11:15:10 +01:00
|
|
|
def filter(self, record: logging.LogRecord) -> bool:
|
2012-12-05 18:01:43 +01:00
|
|
|
from django.conf import settings
|
|
|
|
from django.core.cache import cache
|
|
|
|
|
|
|
|
# Track duplicate errors
|
|
|
|
duplicate = False
|
2016-11-30 22:49:02 +01:00
|
|
|
rate = getattr(settings, '%s_LIMIT' % self.__class__.__name__.upper(),
|
2016-12-03 00:04:17 +01:00
|
|
|
600) # seconds
|
2012-12-05 18:01:43 +01:00
|
|
|
if rate > 0:
|
|
|
|
# Test if the cache works
|
|
|
|
try:
|
|
|
|
cache.set('RLF_TEST_KEY', 1, 1)
|
|
|
|
use_cache = cache.get('RLF_TEST_KEY') == 1
|
2017-03-05 10:25:27 +01:00
|
|
|
except Exception:
|
2012-12-05 18:01:43 +01:00
|
|
|
use_cache = False
|
|
|
|
|
|
|
|
if use_cache:
|
2016-12-20 20:42:57 +01:00
|
|
|
if record.exc_info is not None:
|
2017-11-04 16:34:55 +01:00
|
|
|
tb = '\n'.join(traceback.format_exception(*record.exc_info))
|
2016-12-20 20:42:57 +01:00
|
|
|
else:
|
2017-11-04 16:34:55 +01:00
|
|
|
tb = str(record)
|
|
|
|
key = self.__class__.__name__.upper() + hashlib.sha1(tb.encode()).hexdigest()
|
2012-12-06 22:00:34 +01:00
|
|
|
duplicate = cache.get(key) == 1
|
2016-08-27 07:55:54 +02:00
|
|
|
if not duplicate:
|
|
|
|
cache.set(key, 1, rate)
|
2012-12-05 18:01:43 +01:00
|
|
|
else:
|
2017-04-15 04:03:56 +02:00
|
|
|
min_date = timezone_now() - timedelta(seconds=rate)
|
2012-12-05 18:01:43 +01:00
|
|
|
duplicate = (self.last_error >= min_date)
|
|
|
|
if not duplicate:
|
2017-04-15 04:03:56 +02:00
|
|
|
self.last_error = timezone_now()
|
2012-12-05 18:01:43 +01:00
|
|
|
|
|
|
|
return not duplicate
|
2012-12-06 22:00:34 +01:00
|
|
|
|
2013-08-06 21:37:34 +02:00
|
|
|
class ZulipLimiter(_RateLimitFilter):
|
2012-12-06 22:00:34 +01:00
|
|
|
pass
|
|
|
|
|
|
|
|
class EmailLimiter(_RateLimitFilter):
|
|
|
|
pass
|
2013-03-15 17:03:56 +01:00
|
|
|
|
|
|
|
class ReturnTrue(logging.Filter):
|
2017-11-05 11:15:10 +01:00
|
|
|
def filter(self, record: logging.LogRecord) -> bool:
|
2013-03-15 17:03:56 +01:00
|
|
|
return True
|
2013-06-10 18:57:59 +02:00
|
|
|
|
2017-03-22 22:09:38 +01:00
|
|
|
class ReturnEnabled(logging.Filter):
|
2017-11-05 11:15:10 +01:00
|
|
|
def filter(self, record: logging.LogRecord) -> bool:
|
2017-03-22 22:09:38 +01:00
|
|
|
return settings.LOGGING_NOT_DISABLED
|
|
|
|
|
2013-06-10 18:57:59 +02:00
|
|
|
class RequireReallyDeployed(logging.Filter):
|
2017-11-05 11:15:10 +01:00
|
|
|
def filter(self, record: logging.LogRecord) -> bool:
|
2013-06-10 18:57:59 +02:00
|
|
|
from django.conf import settings
|
2015-08-21 09:18:44 +02:00
|
|
|
return settings.PRODUCTION
|
2017-01-18 12:52:01 +01:00
|
|
|
|
2017-11-05 11:15:10 +01:00
|
|
|
def skip_200_and_304(record: logging.LogRecord) -> bool:
|
2017-01-18 12:52:01 +01:00
|
|
|
# Apparently, `status_code` is added by Django and is not an actual
|
|
|
|
# attribute of LogRecord; as a result, mypy throws an error if we
|
|
|
|
# access the `status_code` attribute directly.
|
|
|
|
if getattr(record, 'status_code') in [200, 304]:
|
|
|
|
return False
|
|
|
|
|
|
|
|
return True
|
2017-03-15 10:08:27 +01:00
|
|
|
|
2017-09-16 17:07:06 +02:00
|
|
|
IGNORABLE_404_URLS = [
|
|
|
|
re.compile(r'^/apple-touch-icon.*\.png$'),
|
|
|
|
re.compile(r'^/favicon\.ico$'),
|
|
|
|
re.compile(r'^/robots\.txt$'),
|
|
|
|
re.compile(r'^/django_static_404.html$'),
|
|
|
|
re.compile(r'^/wp-login.php$'),
|
|
|
|
]
|
|
|
|
|
2017-11-05 11:15:10 +01:00
|
|
|
def skip_boring_404s(record: logging.LogRecord) -> bool:
|
2017-09-16 17:07:06 +02:00
|
|
|
"""Prevents Django's 'Not Found' warnings from being logged for common
|
|
|
|
404 errors that don't reflect a problem in Zulip. The overall
|
|
|
|
result is to keep the Zulip error logs cleaner than they would
|
|
|
|
otherwise be.
|
|
|
|
|
|
|
|
Assumes that its input is a django.request log record.
|
|
|
|
"""
|
|
|
|
# Apparently, `status_code` is added by Django and is not an actual
|
|
|
|
# attribute of LogRecord; as a result, mypy throws an error if we
|
|
|
|
# access the `status_code` attribute directly.
|
|
|
|
if getattr(record, 'status_code') != 404:
|
|
|
|
return True
|
|
|
|
|
|
|
|
# We're only interested in filtering the "Not Found" errors.
|
|
|
|
if getattr(record, 'msg') != 'Not Found: %s':
|
|
|
|
return True
|
|
|
|
|
|
|
|
path = getattr(record, 'args', [''])[0]
|
|
|
|
for pattern in IGNORABLE_404_URLS:
|
|
|
|
if re.match(pattern, path):
|
|
|
|
return False
|
|
|
|
return True
|
|
|
|
|
2017-11-05 11:15:10 +01:00
|
|
|
def skip_site_packages_logs(record: logging.LogRecord) -> bool:
|
2017-03-15 10:08:27 +01:00
|
|
|
# This skips the log records that are generated from libraries
|
|
|
|
# installed in site packages.
|
|
|
|
# Workaround for https://code.djangoproject.com/ticket/26886
|
|
|
|
if 'site-packages' in record.pathname:
|
|
|
|
return False
|
|
|
|
return True
|
2017-08-25 15:25:00 +02:00
|
|
|
|
2017-11-05 11:15:10 +01:00
|
|
|
def find_log_caller_module(record: logging.LogRecord) -> Optional[str]:
|
2017-12-01 01:26:37 +01:00
|
|
|
'''Find the module name corresponding to where this record was logged.
|
|
|
|
|
|
|
|
Sadly `record.module` is just the innermost component of the full
|
|
|
|
module name, so we have to go reconstruct this ourselves.
|
|
|
|
'''
|
2017-09-27 03:08:43 +02:00
|
|
|
# Repeat a search similar to that in logging.Logger.findCaller.
|
|
|
|
# The logging call should still be on the stack somewhere; search until
|
|
|
|
# we find something in the same source file, and that should give the
|
|
|
|
# right module name.
|
|
|
|
f = logging.currentframe() # type: ignore # Not in typeshed, and arguably shouldn't be
|
|
|
|
while f is not None:
|
|
|
|
if f.f_code.co_filename == record.pathname:
|
|
|
|
return f.f_globals.get('__name__')
|
|
|
|
f = f.f_back
|
|
|
|
return None
|
|
|
|
|
2017-09-27 23:46:44 +02:00
|
|
|
logger_nicknames = {
|
|
|
|
'root': '', # This one is more like undoing a nickname.
|
|
|
|
'zulip.requests': 'zr', # Super common.
|
|
|
|
}
|
|
|
|
|
2017-11-05 11:15:10 +01:00
|
|
|
def find_log_origin(record: logging.LogRecord) -> str:
|
2017-09-27 03:08:43 +02:00
|
|
|
logger_name = logger_nicknames.get(record.name, record.name)
|
|
|
|
|
|
|
|
if settings.LOGGING_SHOW_MODULE:
|
|
|
|
module_name = find_log_caller_module(record)
|
|
|
|
if module_name == logger_name or module_name == record.name:
|
|
|
|
# Abbreviate a bit.
|
|
|
|
return logger_name
|
|
|
|
else:
|
|
|
|
return '{}/{}'.format(logger_name, module_name or '?')
|
|
|
|
else:
|
|
|
|
return logger_name
|
2017-09-27 01:42:03 +02:00
|
|
|
|
2017-09-27 23:53:48 +02:00
|
|
|
log_level_abbrevs = {
|
|
|
|
'DEBUG': 'DEBG',
|
|
|
|
'INFO': 'INFO',
|
|
|
|
'WARNING': 'WARN',
|
|
|
|
'ERROR': 'ERR',
|
|
|
|
'CRITICAL': 'CRIT',
|
|
|
|
}
|
|
|
|
|
2017-11-05 11:15:10 +01:00
|
|
|
def abbrev_log_levelname(levelname: str) -> str:
|
2017-09-27 23:53:48 +02:00
|
|
|
# It's unlikely someone will set a custom log level with a custom name,
|
|
|
|
# but it's an option, so we shouldn't crash if someone does.
|
|
|
|
return log_level_abbrevs.get(levelname, levelname[:4])
|
|
|
|
|
2017-09-27 23:40:38 +02:00
|
|
|
class ZulipFormatter(logging.Formatter):
|
2017-09-28 00:38:54 +02:00
|
|
|
# Used in the base implementation. Default uses `,`.
|
|
|
|
default_msec_format = '%s.%03d'
|
|
|
|
|
2017-11-05 11:15:10 +01:00
|
|
|
def __init__(self) -> None:
|
2017-10-05 01:41:00 +02:00
|
|
|
super().__init__(fmt=self._compute_fmt())
|
|
|
|
|
2017-11-05 11:15:10 +01:00
|
|
|
def _compute_fmt(self) -> str:
|
2017-10-05 01:41:00 +02:00
|
|
|
pieces = ['%(asctime)s', '%(zulip_level_abbrev)-4s']
|
|
|
|
if settings.LOGGING_SHOW_PID:
|
|
|
|
pieces.append('pid:%(process)d')
|
|
|
|
pieces.extend(['[%(zulip_origin)s]', '%(message)s'])
|
|
|
|
return ' '.join(pieces)
|
2017-09-27 23:40:38 +02:00
|
|
|
|
2017-11-05 11:15:10 +01:00
|
|
|
def format(self, record: logging.LogRecord) -> str:
|
2017-09-27 01:42:03 +02:00
|
|
|
if not getattr(record, 'zulip_decorated', False):
|
|
|
|
# The `setattr` calls put this logic explicitly outside the bounds of the
|
|
|
|
# type system; otherwise mypy would complain LogRecord lacks these attributes.
|
2017-09-27 23:53:48 +02:00
|
|
|
setattr(record, 'zulip_level_abbrev', abbrev_log_levelname(record.levelname))
|
2017-09-27 01:42:03 +02:00
|
|
|
setattr(record, 'zulip_origin', find_log_origin(record))
|
|
|
|
setattr(record, 'zulip_decorated', True)
|
2017-09-27 23:40:38 +02:00
|
|
|
return super().format(record)
|
|
|
|
|
2017-12-13 01:45:57 +01:00
|
|
|
def log_to_file(logger: Logger,
|
|
|
|
filename: str,
|
2018-01-18 02:11:21 +01:00
|
|
|
log_format: str="%(asctime)s %(levelname)-8s %(message)s",
|
2017-12-13 01:45:57 +01:00
|
|
|
) -> None:
|
|
|
|
"""Note: `filename` should be declared in zproject/settings.py in ZULIP_PATHS."""
|
|
|
|
formatter = logging.Formatter(log_format)
|
|
|
|
handler = logging.FileHandler(filename)
|
|
|
|
handler.setFormatter(formatter)
|
|
|
|
logger.addHandler(handler)
|