2023-01-28 02:00:15 +01:00
|
|
|
import os
|
2024-07-12 02:30:23 +02:00
|
|
|
from typing import TYPE_CHECKING, Any, Optional
|
2020-07-02 03:40:54 +02:00
|
|
|
|
2020-07-02 02:20:55 +02:00
|
|
|
import sentry_sdk
|
models: Force the translated role into a translated string.
The return type of `ugettext_lazy('...')` (aliased as `_`) is a
promise, which is only forced into a string when it is dealt with in
string context. This `django.utils.functional.lazy.__proxy__` object
is not entirely transparent, however -- it cannot be serialized by
`orjson`, and `isinstance(x, str) == False`, which can lead to
surprising action-at-a-distance.
In the two places which will serialize the role value (either into
Zulip's own error reporting queue, or Sentry's), force the return
value. Failure to do this results in errors being dropped
mostly-silently, as they cannot be serialized and enqueued by the
error reporter logger, which has no recourse but to just log a
warning; see previous commit.
When we do this forcing, explicitly override the language to be the
realm default. Failure to provide this override would translate the
role into the role in the language of the _request_, yielding varying
results.
2020-08-14 02:59:07 +02:00
|
|
|
from django.utils.translation import override as override_language
|
2020-07-02 02:20:55 +02:00
|
|
|
from sentry_sdk.integrations.django import DjangoIntegration
|
sentry: Ignore all SuspiciousOperation loggers.
django.security.DisallowedHost is only one of a set of exceptions that
are "SuspiciousOperation" exceptions; all return a 400 to the user
when they bubble up[1]; all of them are uninteresting to Sentry.
While they may, in bulk, show a mis-configuration of some sort of the
application, such a failure should be detected via the increase in
400's, not via these, which are uninteresting individually.
While all of these are subclasses of SuspiciousOperation, we enumerate
them explicitly for a number of reasons:
- There is no one logger we can ignore that captures all of them.
Each of the errors uses its own logger, and django does not supply
a `django.security` logger that all of them feed into.
- Nor can we catch this by examining the exception object. The
SuspiciousOperation exception is raised too early in the stack for
us to catch the exception by way of middleware and check
`isinstance`. But at the Sentry level, in `add_context`, it is no
longer an exception but a log entry, and as such we have no
`isinstance` that can be applied; we only know the logger name.
- Finally, there is the semantic argument that while we have decided
to ignore this set of security warnings, we _may_ wish to log new
ones that may be added at some point in the future. It is better
to opt into those ignores than to blanket ignore all messages from
the security logger.
This moves the DisallowedHost `ignore_logger` to be adjacent to its
kin, and not on the middleware that may trigger it. Consistency is
more important than locality in this case.
Of these, the DisallowedHost logger if left as the only one that is
explicitly ignored in the LOGGING configuration in
`computed_settings.py`; it is by far the most frequent, and the least
likely to be malicious or impactful (unlike, say, RequestDataTooBig).
[1] https://docs.djangoproject.com/en/3.0/ref/exceptions/#suspiciousoperation
2020-08-12 04:06:04 +02:00
|
|
|
from sentry_sdk.integrations.logging import ignore_logger
|
2020-07-02 02:20:55 +02:00
|
|
|
from sentry_sdk.integrations.redis import RedisIntegration
|
|
|
|
from sentry_sdk.integrations.sqlalchemy import SqlalchemyIntegration
|
2020-07-02 03:40:54 +02:00
|
|
|
from sentry_sdk.utils import capture_internal_exceptions
|
2020-07-02 02:20:55 +02:00
|
|
|
|
2020-07-28 02:09:57 +02:00
|
|
|
from version import ZULIP_VERSION
|
2023-01-28 02:00:15 +01:00
|
|
|
from zproject.config import DEPLOY_ROOT
|
2020-07-28 02:09:57 +02:00
|
|
|
|
2020-07-02 03:40:54 +02:00
|
|
|
if TYPE_CHECKING:
|
|
|
|
from sentry_sdk._types import Event, Hint
|
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2021-02-12 08:20:45 +01:00
|
|
|
def add_context(event: "Event", hint: "Hint") -> Optional["Event"]:
|
2020-08-08 04:36:12 +02:00
|
|
|
if "exc_info" in hint:
|
|
|
|
_, exc_value, _ = hint["exc_info"]
|
|
|
|
# Ignore GeneratorExit, KeyboardInterrupt, and SystemExit exceptions
|
|
|
|
if not isinstance(exc_value, Exception):
|
|
|
|
return None
|
models: Force the translated role into a translated string.
The return type of `ugettext_lazy('...')` (aliased as `_`) is a
promise, which is only forced into a string when it is dealt with in
string context. This `django.utils.functional.lazy.__proxy__` object
is not entirely transparent, however -- it cannot be serialized by
`orjson`, and `isinstance(x, str) == False`, which can lead to
surprising action-at-a-distance.
In the two places which will serialize the role value (either into
Zulip's own error reporting queue, or Sentry's), force the return
value. Failure to do this results in errors being dropped
mostly-silently, as they cannot be serialized and enqueued by the
error reporter logger, which has no recourse but to just log a
warning; see previous commit.
When we do this forcing, explicitly override the language to be the
realm default. Failure to provide this override would translate the
role into the role in the language of the _request_, yielding varying
results.
2020-08-14 02:59:07 +02:00
|
|
|
from django.conf import settings
|
|
|
|
|
2023-12-15 01:16:00 +01:00
|
|
|
from zerver.models.users import get_user_profile_by_id
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2020-07-02 03:40:54 +02:00
|
|
|
with capture_internal_exceptions():
|
2020-08-14 08:07:57 +02:00
|
|
|
# event.user is the user context, from Sentry, which is
|
|
|
|
# pre-populated with some keys via its Django integration:
|
|
|
|
# https://docs.sentry.io/platforms/python/guides/django/enriching-error-data/additional-data/identify-user/
|
2020-09-04 03:09:01 +02:00
|
|
|
event.setdefault("tags", {})
|
2020-07-02 03:40:54 +02:00
|
|
|
user_info = event.get("user", {})
|
2024-03-21 03:52:33 +01:00
|
|
|
user_id = user_info.get("id")
|
|
|
|
if isinstance(user_id, str):
|
|
|
|
user_profile = get_user_profile_by_id(int(user_id))
|
2021-02-12 08:20:45 +01:00
|
|
|
event["tags"]["realm"] = user_info["realm"] = user_profile.realm.string_id or "root"
|
models: Force the translated role into a translated string.
The return type of `ugettext_lazy('...')` (aliased as `_`) is a
promise, which is only forced into a string when it is dealt with in
string context. This `django.utils.functional.lazy.__proxy__` object
is not entirely transparent, however -- it cannot be serialized by
`orjson`, and `isinstance(x, str) == False`, which can lead to
surprising action-at-a-distance.
In the two places which will serialize the role value (either into
Zulip's own error reporting queue, or Sentry's), force the return
value. Failure to do this results in errors being dropped
mostly-silently, as they cannot be serialized and enqueued by the
error reporter logger, which has no recourse but to just log a
warning; see previous commit.
When we do this forcing, explicitly override the language to be the
realm default. Failure to provide this override would translate the
role into the role in the language of the _request_, yielding varying
results.
2020-08-14 02:59:07 +02:00
|
|
|
with override_language(settings.LANGUAGE_CODE):
|
|
|
|
# str() to force the lazy-translation to apply now,
|
|
|
|
# since it won't serialize into json for Sentry otherwise
|
|
|
|
user_info["role"] = str(user_profile.get_role_name())
|
2020-09-04 03:09:01 +02:00
|
|
|
|
2020-08-14 08:07:57 +02:00
|
|
|
# These are PII, and should be scrubbed
|
|
|
|
if "username" in user_info:
|
|
|
|
del user_info["username"]
|
|
|
|
if "email" in user_info:
|
|
|
|
del user_info["email"]
|
2020-09-04 03:09:01 +02:00
|
|
|
|
2020-07-02 03:40:54 +02:00
|
|
|
return event
|
2020-07-02 02:20:55 +02:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2024-07-12 02:30:23 +02:00
|
|
|
def traces_sampler(sampling_context: dict[str, Any]) -> float | bool:
|
2024-02-15 18:12:27 +01:00
|
|
|
from django.conf import settings
|
|
|
|
|
|
|
|
queue = sampling_context.get("queue")
|
|
|
|
if queue is not None and isinstance(queue, str):
|
2024-05-23 22:00:37 +02:00
|
|
|
if isinstance(settings.SENTRY_TRACE_WORKER_RATE, dict):
|
2024-02-15 18:12:27 +01:00
|
|
|
return settings.SENTRY_TRACE_WORKER_RATE.get(queue, 0.0)
|
2024-05-23 22:00:37 +02:00
|
|
|
else:
|
|
|
|
return settings.SENTRY_TRACE_WORKER_RATE
|
2024-02-15 18:12:27 +01:00
|
|
|
else:
|
|
|
|
return settings.SENTRY_TRACE_RATE
|
|
|
|
|
|
|
|
|
2024-07-12 02:30:23 +02:00
|
|
|
def setup_sentry(dsn: str | None, environment: str) -> None:
|
2024-02-15 18:15:30 +01:00
|
|
|
from django.conf import settings
|
|
|
|
|
2020-07-02 02:20:55 +02:00
|
|
|
if not dsn:
|
|
|
|
return
|
2023-01-28 02:00:15 +01:00
|
|
|
|
|
|
|
sentry_release = ZULIP_VERSION
|
|
|
|
if os.path.exists(os.path.join(DEPLOY_ROOT, "sentry-release")):
|
|
|
|
with open(os.path.join(DEPLOY_ROOT, "sentry-release")) as sentry_release_file:
|
|
|
|
sentry_release = sentry_release_file.readline().strip()
|
2022-06-01 23:53:07 +02:00
|
|
|
sentry_sdk.init(
|
2020-07-02 02:20:55 +02:00
|
|
|
dsn=dsn,
|
2021-07-09 02:38:46 +02:00
|
|
|
environment=environment,
|
2023-01-28 02:00:15 +01:00
|
|
|
release=sentry_release,
|
2020-07-02 02:20:55 +02:00
|
|
|
integrations=[
|
|
|
|
DjangoIntegration(),
|
|
|
|
RedisIntegration(),
|
|
|
|
SqlalchemyIntegration(),
|
|
|
|
],
|
2020-07-02 03:40:54 +02:00
|
|
|
before_send=add_context,
|
2021-11-08 21:30:40 +01:00
|
|
|
# Increase possible max wait to send exceptions during
|
|
|
|
# shutdown, from 2 to 10; potentially-large exceptions are of
|
|
|
|
# value to catch during shutdown.
|
|
|
|
shutdown_timeout=10,
|
2020-08-14 08:07:57 +02:00
|
|
|
# Because we strip the email/username from the Sentry data
|
|
|
|
# above, the effect of this flag is that the requests/users
|
|
|
|
# involved in exceptions will be identified in Sentry only by
|
|
|
|
# their IP address, user ID, realm, and role. We consider
|
|
|
|
# this an appropriate balance between avoiding Sentry getting
|
|
|
|
# PII while having the identifiers needed to determine that an
|
|
|
|
# exception only affects a small subset of users or realms.
|
|
|
|
send_default_pii=True,
|
2024-02-15 18:12:27 +01:00
|
|
|
traces_sampler=traces_sampler,
|
2024-02-15 18:15:30 +01:00
|
|
|
profiles_sample_rate=settings.SENTRY_PROFILE_RATE,
|
2020-07-02 02:20:55 +02:00
|
|
|
)
|
sentry: Ignore all SuspiciousOperation loggers.
django.security.DisallowedHost is only one of a set of exceptions that
are "SuspiciousOperation" exceptions; all return a 400 to the user
when they bubble up[1]; all of them are uninteresting to Sentry.
While they may, in bulk, show a mis-configuration of some sort of the
application, such a failure should be detected via the increase in
400's, not via these, which are uninteresting individually.
While all of these are subclasses of SuspiciousOperation, we enumerate
them explicitly for a number of reasons:
- There is no one logger we can ignore that captures all of them.
Each of the errors uses its own logger, and django does not supply
a `django.security` logger that all of them feed into.
- Nor can we catch this by examining the exception object. The
SuspiciousOperation exception is raised too early in the stack for
us to catch the exception by way of middleware and check
`isinstance`. But at the Sentry level, in `add_context`, it is no
longer an exception but a log entry, and as such we have no
`isinstance` that can be applied; we only know the logger name.
- Finally, there is the semantic argument that while we have decided
to ignore this set of security warnings, we _may_ wish to log new
ones that may be added at some point in the future. It is better
to opt into those ignores than to blanket ignore all messages from
the security logger.
This moves the DisallowedHost `ignore_logger` to be adjacent to its
kin, and not on the middleware that may trigger it. Consistency is
more important than locality in this case.
Of these, the DisallowedHost logger if left as the only one that is
explicitly ignored in the LOGGING configuration in
`computed_settings.py`; it is by far the most frequent, and the least
likely to be malicious or impactful (unlike, say, RequestDataTooBig).
[1] https://docs.djangoproject.com/en/3.0/ref/exceptions/#suspiciousoperation
2020-08-12 04:06:04 +02:00
|
|
|
|
|
|
|
# Ignore all of the loggers from django.security that are for user
|
2024-05-24 16:57:31 +02:00
|
|
|
# errors; see https://docs.djangoproject.com/en/5.0/ref/exceptions/#suspiciousoperation
|
2020-08-28 21:13:09 +02:00
|
|
|
ignore_logger("django.security.SuspiciousOperation")
|
sentry: Ignore all SuspiciousOperation loggers.
django.security.DisallowedHost is only one of a set of exceptions that
are "SuspiciousOperation" exceptions; all return a 400 to the user
when they bubble up[1]; all of them are uninteresting to Sentry.
While they may, in bulk, show a mis-configuration of some sort of the
application, such a failure should be detected via the increase in
400's, not via these, which are uninteresting individually.
While all of these are subclasses of SuspiciousOperation, we enumerate
them explicitly for a number of reasons:
- There is no one logger we can ignore that captures all of them.
Each of the errors uses its own logger, and django does not supply
a `django.security` logger that all of them feed into.
- Nor can we catch this by examining the exception object. The
SuspiciousOperation exception is raised too early in the stack for
us to catch the exception by way of middleware and check
`isinstance`. But at the Sentry level, in `add_context`, it is no
longer an exception but a log entry, and as such we have no
`isinstance` that can be applied; we only know the logger name.
- Finally, there is the semantic argument that while we have decided
to ignore this set of security warnings, we _may_ wish to log new
ones that may be added at some point in the future. It is better
to opt into those ignores than to blanket ignore all messages from
the security logger.
This moves the DisallowedHost `ignore_logger` to be adjacent to its
kin, and not on the middleware that may trigger it. Consistency is
more important than locality in this case.
Of these, the DisallowedHost logger if left as the only one that is
explicitly ignored in the LOGGING configuration in
`computed_settings.py`; it is by far the most frequent, and the least
likely to be malicious or impactful (unlike, say, RequestDataTooBig).
[1] https://docs.djangoproject.com/en/3.0/ref/exceptions/#suspiciousoperation
2020-08-12 04:06:04 +02:00
|
|
|
ignore_logger("django.security.DisallowedHost")
|
|
|
|
ignore_logger("django.security.DisallowedModelAdminLookup")
|
|
|
|
ignore_logger("django.security.DisallowedModelAdminToField")
|
|
|
|
ignore_logger("django.security.DisallowedRedirect")
|
|
|
|
ignore_logger("django.security.InvalidSessionKey")
|
|
|
|
ignore_logger("django.security.RequestDataTooBig")
|
|
|
|
ignore_logger("django.security.SuspiciousFileOperation")
|
|
|
|
ignore_logger("django.security.SuspiciousMultipartForm")
|
|
|
|
ignore_logger("django.security.SuspiciousSession")
|
|
|
|
ignore_logger("django.security.TooManyFieldsSent")
|