[puppet] Report enterprise and prod errors to staging.

Errors are sent to a queue processor that posts them to staging,
just like the feedback bot.

(imported from commit 4a8d099672a1b3e48a8bc94148d8b53db73d2c64)
This commit is contained in:
Kevin Mehall 2013-11-13 13:12:22 -05:00
parent 3ec7663cb2
commit f7f2ec0aca
8 changed files with 244 additions and 64 deletions

View File

@ -10,4 +10,5 @@ USER=root
* * * * * root /home/zulip/deployments/current/bin/write-rabbitmq-consumers-state-file signups
* * * * * root /home/zulip/deployments/current/bin/write-rabbitmq-consumers-state-file message_sender
* * * * * root /home/zulip/deployments/current/bin/write-rabbitmq-consumers-state-file feedback_messages
* * * * * root /home/zulip/deployments/current/bin/write-rabbitmq-consumers-state-file error_reports
* * * * * root /home/zulip/deployments/current/bin/write-rabbitmq-consumers-state-file digest_emails

View File

@ -164,6 +164,18 @@ redirect_stderr=true ; redirect proc stderr to stdout (default false)
stdout_logfile=/var/log/zulip/events-feedback_messages.log ; stdout log path, NONE for none; default AUTO
directory=/home/zulip/deployments/current/
[program:zulip-events-error_reports]
command=python /home/zulip/deployments/current/manage.py process_queue error_reports
priority=600 ; the relative start priority (default 999)
autostart=true ; start at supervisord start (default: true)
autorestart=true ; whether/when to restart (default: unexpected)
stopsignal=TERM ; signal used to kill process (default TERM)
stopwaitsecs=30 ; max num secs to wait b4 SIGKILL (default 10)
user=zulip ; setuid to this UNIX account to run the program
redirect_stderr=true ; redirect proc stderr to stdout (default false)
stdout_logfile=/var/log/zulip/events-error_reports.log ; stdout log path, NONE for none; default AUTO
directory=/home/zulip/deployments/current/
[program:zulip-events-digest_emails]
command=python /home/zulip/deployments/current/manage.py process_queue digest_emails
priority=600 ; the relative start priority (default 999)
@ -181,7 +193,7 @@ directory=/home/zulip/deployments/current/
; process groups.
[group:zulip-workers]
programs=zulip-events-user-activity,zulip-events-user-activity-interval,zulip-events-user-presence,zulip-events-signups,zulip-events-confirmation-emails,zulip-events-missedmessage_reminders,zulip-events-slowqueries,zulip-events-message_sender,zulip-events-feedback_messages,zulip-events-digest_emails ; each refers to 'x' in [program:x] definitions
programs=zulip-events-user-activity,zulip-events-user-activity-interval,zulip-events-user-presence,zulip-events-signups,zulip-events-confirmation-emails,zulip-events-missedmessage_reminders,zulip-events-slowqueries,zulip-events-message_sender,zulip-events-feedback_messages,zulip-events-digest_emails,zulip-events-error_reports ; each refers to 'x' in [program:x] definitions
; The [include] section can just contain the "files" setting. This
; setting can list multiple files (separated by whitespace or

View File

@ -10,6 +10,8 @@ from django.core import mail
from django.utils.log import AdminEmailHandler
from django.views.debug import ExceptionReporter, get_exception_reporter_filter
from zerver.lib.queue import queue_json_publish
def format_record(record):
"""
Given a Django error LogRecord, format and return the interesting details,
@ -32,10 +34,8 @@ def format_record(record):
return (subject, stack_trace, user_info)
class AdminZulipHandler(logging.Handler):
"""An exception log handler that Zulips log entries to the Zulip realm.
If the request is passed as the first argument to the log record,
request data will be provided in the email report.
"""An exception log handler that sends the exception to the queue to be
sent to the Zulip feedback server.
"""
# adapted in part from django/utils/log.py
@ -44,49 +44,63 @@ class AdminZulipHandler(logging.Handler):
logging.Handler.__init__(self)
def emit(self, record):
if not settings.ERROR_BOT:
return
# We have to defer imports to avoid circular imports in settings.py.
from zerver.lib.actions import internal_send_message
try:
request = record.request
filter = get_exception_reporter_filter(request)
request_repr = "Request info:\n~~~~\n"
request_repr += "- path: %s\n" % (request.path,)
if request.method == "GET":
request_repr += "- GET: %s\n" % (request.GET,)
elif request.method == "POST":
request_repr += "- POST: %s\n" % (filter.get_post_parameters(request),)
for field in ["REMOTE_ADDR", "QUERY_STRING", "SERVER_NAME"]:
request_repr += "- %s: \"%s\"\n" % (field, request.META.get(field, "(None)"))
request_repr += "~~~~"
except Exception:
request_repr = "Log record message:\n%s" % (record.getMessage(),)
subject, stack_trace, user_info = format_record(record)
if record.exc_info:
stack_trace = ''.join(traceback.format_exception(*record.exc_info))
else:
stack_trace = None
try:
user_profile = request.user
user_full_name = user_profile.full_name
user_email = user_profile.email
except Exception:
traceback.print_exc()
# Error was triggered by an anonymous user.
user_full_name = None
user_email = None
report = dict(
node = platform.node(),
method = request.method,
path = request.path,
data = request.GET if request.method == 'GET'
else filter.get_post_parameters(request),
remote_addr = request.META.get('REMOTE_ADDR', None),
query_string = request.META.get('QUERY_STRING', None),
server_name = request.META.get('SERVER_NAME', None),
message = record.getMessage(),
stack_trace = stack_trace,
user_full_name = user_full_name,
user_email = user_email,
)
except:
traceback.print_exc()
report = dict(
node = platform.node(),
message = record.getMessage(),
)
try:
internal_send_message(settings.ERROR_BOT,
"stream", "errors", self.format_subject(subject),
"Error generated by %s\n\n~~~~ pytb\n%s\n\n~~~~\n%s" % (
user_info, stack_trace, request_repr))
if not settings.STAGING_DEPLOYED:
queue_json_publish('error_reports', dict(
type = "server",
report = report,
), lambda x: None)
else:
# On staging, process the report directly so it can happen inside this
# try/except to prevent looping
from zilencer.error_notify import notify_server_error
notify_server_error(report)
except:
# If this breaks, complain loudly but don't pass the traceback up the stream
# However, we *don't* want to use logging.exception since that could trigger a loop.
logging.warning("Reporting an exception triggered an exception!", exc_info=True)
def format_subject(self, subject):
"""
Escape CR and LF characters, and limit length to MAX_SUBJECT_LENGTH.
"""
from zerver.models import MAX_SUBJECT_LENGTH
formatted_subject = subject.replace('\n', '\\n').replace('\r', '\\r')
return formatted_subject[:MAX_SUBJECT_LENGTH]
class ZulipAdminEmailHandler(AdminEmailHandler):
"""An exception log handler that emails log entries to site admins.

View File

@ -1930,34 +1930,32 @@ def json_report_error(request, user_profile, message=REQ, stacktrace=REQ,
ui_message=REQ(converter=json_to_bool), user_agent=REQ,
href=REQ, log=REQ,
more_info=REQ(converter=json_to_dict, default=None)):
subject = "error for %s" % (user_profile.email,)
if ui_message:
subject = "User-visible browser " + subject
else:
subject = "Browser " + subject
if js_source_map:
stacktrace = js_source_map.annotate_stacktrace(stacktrace)
body = ("Message:\n%s\n\nStacktrace:\n%s\n\nUser agent: %s\nhref: %s\n"
"User saw error in UI: %s\n"
% (message, stacktrace, user_agent, href, ui_message))
body += "Server path: %s\n" % (settings.DEPLOY_ROOT,)
try:
body += "Deployed version: %s" % (
subprocess.check_output(["git", "log", "HEAD^..HEAD", "--oneline"]),)
version = subprocess.check_output(["git", "log", "HEAD^..HEAD", "--oneline"])
except Exception:
body += "Could not determine current git commit ID.\n"
version = None
if more_info is not None:
body += "\nAdditional information:"
for (key, value) in more_info.iteritems():
body += "\n %s: %s" % (key, value)
queue_json_publish('error_reports', dict(
type = "browser",
report = dict(
user_email = user_profile.email,
user_full_name = user_profile.full_name,
user_visible = ui_message,
server_path = settings.DEPLOY_ROOT,
version = version,
user_agent = user_agent,
href = href,
message = message,
stacktrace = stacktrace,
log = log,
more_info = more_info,
)
), lambda x: None)
body += "\n\nLog:\n%s" % (log,)
mail_admins(subject, body)
return json_success()
@authenticated_json_post_view

View File

@ -188,16 +188,19 @@ class MissedMessageWorker(QueueProcessingWorker):
# of messages
time.sleep(2 * 60)
def make_feedback_client():
sys.path.append(os.path.join(os.path.dirname(__file__), '../../api'))
import zulip
return zulip.Client(
email=settings.DEPLOYMENT_ROLE_NAME,
api_key=settings.DEPLOYMENT_ROLE_KEY,
verbose=True,
site=settings.FEEDBACK_TARGET)
@assign_queue('feedback_messages')
class FeedbackBot(QueueProcessingWorker):
def start(self):
sys.path.append(os.path.join(os.path.dirname(__file__), '../../api'))
import zulip
self.staging_client = zulip.Client(
email=settings.DEPLOYMENT_ROLE_NAME,
api_key=settings.DEPLOYMENT_ROLE_KEY,
verbose=True,
site=settings.FEEDBACK_TARGET)
self.staging_client = make_feedback_client()
self.staging_client._register(
'forward_feedback',
method='POST',
@ -209,6 +212,21 @@ class FeedbackBot(QueueProcessingWorker):
def consume(self, event):
self.staging_client.forward_feedback(event)
@assign_queue('error_reports')
class ErrorReporter(QueueProcessingWorker):
def start(self):
self.staging_client = make_feedback_client()
self.staging_client._register(
'forward_error',
method='POST',
url='deployments/report_error',
make_request=(lambda type, report: {'type': type, 'report': simplejson.dumps(report)}),
)
QueueProcessingWorker.start(self)
def consume(self, event):
self.staging_client.forward_error(event['type'], event['report'])
@assign_queue('slow_queries')
class SlowQueryWorker(QueueProcessingWorker):
def start(self):

125
zilencer/error_notify.py Normal file
View File

@ -0,0 +1,125 @@
from collections import defaultdict
import logging
from django.conf import settings
from django.core.mail import mail_admins
from zerver.lib.actions import internal_send_message
def format_subject(subject):
"""
Escape CR and LF characters, and limit length to MAX_SUBJECT_LENGTH.
"""
from zerver.models import MAX_SUBJECT_LENGTH
subject = subject.replace('\n', '\\n').replace('\r', '\\r')
if len(subject) > MAX_SUBJECT_LENGTH:
subject = subject[:MAX_SUBJECT_LENGTH-3].rstrip() + "..."
return subject
def user_info_str(report):
if report['user_full_name'] and report['user_email']:
user_info = "%(user_full_name)s (%(user_email)s)" % (report)
else:
user_info = "Anonymous user (not logged in)"
user_info += " on %s deployment" % (report['deployment'],)
return user_info
def notify_browser_error(report):
report = defaultdict(lambda: None, report)
if settings.ERROR_BOT:
zulip_browser_error(report)
email_browser_error(report)
def email_browser_error(report):
subject = "Browser error for %s" % (user_info_str(report))
body = ("User: %(user_full_name)s <%(user_email)s> on %(deployment)s\n\n"
"Message:\n%(message)s\n\nStacktrace:\n%(stacktrace)s\n\n"
"User agent: %(user_agent)s\n"
"href: %(href)s\n"
"Server path: %(server_path)s\n"
"Deployed version: %(version)s\n"
% report)
more_info = report['more_info']
if more_info is not None:
body += "\nAdditional information:"
for (key, value) in more_info.iteritems():
body += "\n %s: %s" % (key, value)
body += "\n\nLog:\n%s" % (report['log'],)
mail_admins(subject, body)
def zulip_browser_error(report):
subject = "JS error: %s" % (report['user_email'],)
user_info = user_info_str(report)
body = "User: %s\n" % (user_info,)
body += (
"Message: %(message)s\n"
"~~~\n%(stacktrace)s\n~~~\n"
"User agent: `%(user_agent)s`\n"
"href: `%(href)s`\n"
"Server path: `%(server_path)s`\n"
"Deployed version: `%(version)s`\n"
% report )
more_info = report['more_info']
if more_info is not None:
body += "\nAdditional information:\n~~~"
for (key, value) in more_info.iteritems():
body += "\n %s: %s" % (key, value)
body += "Log:\n~~~\n%s\n~~~" % (report['log'],)
internal_send_message(settings.ERROR_BOT,
"stream", "errors", format_subject(subject), body)
def notify_server_error(report):
report = defaultdict(lambda: None, report)
email_server_error(report)
if settings.ERROR_BOT:
zulip_server_error(report)
def zulip_server_error(report):
subject = '%(node)s: %(message)s' % report
stack_trace = report['stack_trace'] or "No stack trace available"
user_info = user_info_str(report)
request_repr = (
"Request info:\n~~~~\n"
"- path: %(path)s\n"
"- %(method)s: %(data)s\n") % (report)
for field in ["REMOTE_ADDR", "QUERY_STRING", "SERVER_NAME"]:
request_repr += "- %s: \"%s\"\n" % (field, report.get(field.lower()))
request_repr += "~~~~"
internal_send_message(settings.ERROR_BOT,
"stream", "errors", format_subject(subject),
"Error generated by %s\n\n~~~~ pytb\n%s\n\n~~~~\n%s" % (
user_info, stack_trace, request_repr))
def email_server_error(report):
subject = '%(node)s: %(message)s' % (report)
user_info = user_info_str(report)
request_repr = (
"Request info:\n"
"- path: %(path)s\n"
"- %(method)s: %(data)s\n") % (report)
for field in ["REMOTE_ADDR", "QUERY_STRING", "SERVER_NAME"]:
request_repr += "- %s: \"%s\"\n" % (field, report.get(field.lower()))
message = "Error generated by %s\n\n%s\n\n%s" % (user_info, report['stack_trace'],
request_repr)
mail_admins(format_subject(subject), message, fail_silently=True)

View File

@ -3,5 +3,7 @@ from django.conf.urls import patterns, url, include
urlpatterns = patterns('zilencer.views',
url('^feedback$', 'rest_dispatch',
{'POST': 'submit_feedback'}),
url('^report_error$', 'rest_dispatch',
{'POST': 'report_error'}),
url('^endpoints$', 'lookup_endpoints_for_user'),
)

View File

@ -9,7 +9,7 @@ from zerver.lib.response import json_success, json_error, json_response, json_me
from zerver.lib.rest import rest_dispatch as _rest_dispatch
from zerver.models import get_realm, get_user_profile_by_email, email_to_domain, \
UserProfile
from error_notify import notify_server_error, notify_browser_error
rest_dispatch = csrf_exempt((lambda request, *args, **kwargs: _rest_dispatch(request, globals(), *args, **kwargs)))
@ -51,6 +51,16 @@ def submit_feedback(request, deployment, message=REQ(converter=json_to_dict)):
return HttpResponse(message['sender_email'])
@has_request_variables
def report_error(request, deployment, type=REQ, report=REQ(converter=json_to_dict)):
report['deployment'] = deployment.name
if type == 'browser':
notify_browser_error(report)
elif type == 'server':
notify_server_error(report)
else:
return json_error("Invalid type parameter")
return json_response({})
def realm_for_email(email):
try: