zulip/zerver/lib/remote_server.py

294 lines
10 KiB
Python
Raw Normal View History

import logging
import urllib
from typing import Any, Dict, List, Mapping, Optional, Tuple, Union
import orjson
import requests
from django.conf import settings
from django.forms.models import model_to_dict
from django.utils.translation import gettext as _
2023-11-27 02:06:23 +01:00
from pydantic import UUID4, BaseModel, ConfigDict, field_validator
from analytics.models import InstallationCount, RealmCount
from version import ZULIP_VERSION
from zerver.lib.exceptions import JsonableError, MissingRemoteRealmError
from zerver.lib.export import floatify_datetime_fields
from zerver.lib.outgoing_http import OutgoingSession
2023-11-27 02:06:23 +01:00
from zerver.models import OrgTypeEnum, Realm, RealmAuditLog
class PushBouncerSession(OutgoingSession):
def __init__(self) -> None:
super().__init__(role="push_bouncer", timeout=30)
class PushNotificationBouncerError(Exception):
pass
class PushNotificationBouncerRetryLaterError(JsonableError):
http_status_code = 502
class RealmDataForAnalytics(BaseModel):
model_config = ConfigDict(extra="forbid")
id: int
host: str
url: str
2023-11-27 02:06:23 +01:00
org_type: int = 0
date_created: float
deactivated: bool
uuid: UUID4
uuid_owner_secret: str
2023-11-27 02:06:23 +01:00
@field_validator("org_type")
@classmethod
def check_is_allowed_value(cls, value: int) -> int:
if value not in [org_type.value for org_type in OrgTypeEnum]:
raise ValueError("Not a valid org_type value")
return value
class UserDataForRemoteBilling(BaseModel):
uuid: UUID4
email: str
full_name: str
def send_to_push_bouncer(
method: str,
endpoint: str,
post_data: Union[bytes, Mapping[str, Union[str, int, None, bytes]]],
extra_headers: Mapping[str, str] = {},
) -> Dict[str, object]:
"""While it does actually send the notice, this function has a lot of
code and comments around error handling for the push notifications
bouncer. There are several classes of failures, each with its own
potential solution:
* Network errors with requests.request. We raise an exception to signal
it to the callers.
* 500 errors from the push bouncer or other unexpected responses;
we don't try to parse the response, but do make clear the cause.
* 400 errors from the push bouncer. Here there are 2 categories:
Our server failed to connect to the push bouncer (should throw)
vs. client-side errors like an invalid token.
"""
assert settings.PUSH_NOTIFICATION_BOUNCER_URL is not None
assert settings.ZULIP_ORG_ID is not None
assert settings.ZULIP_ORG_KEY is not None
url = urllib.parse.urljoin(
settings.PUSH_NOTIFICATION_BOUNCER_URL, "/api/v1/remotes/" + endpoint
)
api_auth = requests.auth.HTTPBasicAuth(settings.ZULIP_ORG_ID, settings.ZULIP_ORG_KEY)
headers = {"User-agent": f"ZulipServer/{ZULIP_VERSION}"}
headers.update(extra_headers)
try:
res = PushBouncerSession().request(
method, url, data=post_data, auth=api_auth, verify=True, headers=headers
)
except (
requests.exceptions.Timeout,
requests.exceptions.SSLError,
requests.exceptions.ConnectionError,
) as e:
raise PushNotificationBouncerRetryLaterError(
f"{type(e).__name__} while trying to connect to push notification bouncer"
)
if res.status_code >= 500:
# 500s should be resolved by the people who run the push
# notification bouncer service, and they'll get an appropriate
# error notification from the server. We raise an exception to signal
# to the callers that the attempt failed and they can retry.
error_msg = "Received 500 from push notification bouncer"
logging.warning(error_msg)
raise PushNotificationBouncerRetryLaterError(error_msg)
elif res.status_code >= 400:
# If JSON parsing errors, just let that exception happen
result_dict = orjson.loads(res.content)
msg = result_dict["msg"]
if "code" in result_dict and result_dict["code"] == "INVALID_ZULIP_SERVER":
# Invalid Zulip server credentials should email this server's admins
raise PushNotificationBouncerError(
_("Push notifications bouncer error: {error}").format(error=msg)
)
elif (
endpoint == "push/test_notification"
and "code" in result_dict
and result_dict["code"] == "INVALID_REMOTE_PUSH_DEVICE_TOKEN"
):
# This error from the notification debugging endpoint should just be directly
# communicated to the device.
# TODO: Extend this to use a more general mechanism when we add more such error responses.
from zerver.lib.push_notifications import InvalidRemotePushDeviceTokenError
raise InvalidRemotePushDeviceTokenError
elif (
endpoint == "server/billing"
and "code" in result_dict
and result_dict["code"] == "MISSING_REMOTE_REALM"
): # nocoverage
# The callers requesting this endpoint want the exception to propagate
# so they can catch it.
raise MissingRemoteRealmError
else:
# But most other errors coming from the push bouncer
# server are client errors (e.g. never-registered token)
# and should be handled as such.
raise JsonableError(msg)
elif res.status_code != 200:
# Anything else is unexpected and likely suggests a bug in
# this version of Zulip, so we throw an exception that will
# email the server admins.
raise PushNotificationBouncerError(
f"Push notification bouncer returned unexpected status code {res.status_code}"
)
# If we don't throw an exception, it's a successful bounce!
return orjson.loads(res.content)
def send_json_to_push_bouncer(
method: str, endpoint: str, post_data: Mapping[str, object]
) -> Dict[str, object]:
return send_to_push_bouncer(
method,
endpoint,
orjson.dumps(post_data),
extra_headers={"Content-type": "application/json"},
)
2019-10-03 02:01:36 +02:00
REALMAUDITLOG_PUSHED_FIELDS = [
"id",
"realm",
"event_time",
"backfilled",
migration: Add `extra_data_json` for audit log models. Note that we use the DjangoJSONEncoder so that we have builtin support for parsing Decimal and datetime. During this intermediate state, the migration that creates extra_data_json field has been run. We prepare for running the backfilling migration that populates extra_data_json from extra_data. This change implements double-write, which is important to keep the state of extra data consistent. For most extra_data usage, this is handled by the overriden `save` method on `AbstractRealmAuditLog`, where we either generates extra_data_json using orjson.loads or ast.literal_eval. While backfilling ensures that old realm audit log entries have extra_data_json populated, double-write ensures that any new entries generated will also have extra_data_json set. So that we can then safely rename extra_data_json to extra_data while ensuring the non-nullable invariant. For completeness, we additionally set RealmAuditLog.NEW_VALUE for the USER_FULL_NAME_CHANGED event. This cannot be handled with the overridden `save`. This addresses: https://github.com/zulip/zulip/pull/23116#discussion_r1040277795 Note that extra_data_json at this point is not used yet. So the test cases do not need to switch to testing extra_data_json. This is later done after we rename extra_data_json to extra_data. Double-write for the remote server audit logs is special, because we only get the dumped bytes from an external source. Luckily, none of the payload carries extra_data that is not generated using orjson.dumps for audit logs of event types in SYNC_BILLING_EVENTS. This can be verified by looking at: `git grep -A 6 -E "event_type=.*(USER_CREATED|USER_ACTIVATED|USER_DEACTIVATED|USER_REACTIVATED|USER_ROLE_CHANGED|REALM_DEACTIVATED|REALM_REACTIVATED)"` Therefore, we just need to populate extra_data_json doing an orjson.loads call after a None-check. Co-authored-by: Zixuan James Li <p359101898@gmail.com>
2023-06-07 21:14:43 +02:00
# Note that we don't need to add extra_data_json here because
# the view remote_server_post_analytics populates extra_data_json
# from the provided extra_data.
"extra_data",
"event_type",
]
def build_analytics_data(
realm_count_query: Any, installation_count_query: Any, realmauditlog_query: Any
) -> Tuple[List[Dict[str, Any]], List[Dict[str, Any]], List[Dict[str, Any]]]:
# We limit the batch size on the client side to avoid OOM kills timeouts, etc.
MAX_CLIENT_BATCH_SIZE = 10000
data = {}
data["analytics_realmcount"] = [
model_to_dict(row) for row in realm_count_query.order_by("id")[0:MAX_CLIENT_BATCH_SIZE]
]
data["analytics_installationcount"] = [
model_to_dict(row)
for row in installation_count_query.order_by("id")[0:MAX_CLIENT_BATCH_SIZE]
]
data["zerver_realmauditlog"] = [
model_to_dict(row, fields=REALMAUDITLOG_PUSHED_FIELDS)
for row in realmauditlog_query.order_by("id")[0:MAX_CLIENT_BATCH_SIZE]
2019-10-03 02:01:36 +02:00
]
floatify_datetime_fields(data, "analytics_realmcount")
floatify_datetime_fields(data, "analytics_installationcount")
floatify_datetime_fields(data, "zerver_realmauditlog")
return (
data["analytics_realmcount"],
data["analytics_installationcount"],
data["zerver_realmauditlog"],
)
def get_realms_info_for_push_bouncer(realm_id: Optional[int] = None) -> List[RealmDataForAnalytics]:
realms = Realm.objects.order_by("id")
if realm_id is not None: # nocoverage
realms = realms.filter(id=realm_id)
realm_info_list = [
RealmDataForAnalytics(
id=realm.id,
uuid=realm.uuid,
uuid_owner_secret=realm.uuid_owner_secret,
host=realm.host,
url=realm.uri,
deactivated=realm.deactivated,
date_created=realm.date_created.timestamp(),
2023-11-27 02:06:23 +01:00
org_type=realm.org_type,
)
for realm in realms
]
return realm_info_list
def send_analytics_to_push_bouncer() -> None:
logger = logging.getLogger("zulip.analytics")
# first, check what's latest
try:
result = send_to_push_bouncer("GET", "server/analytics/status", {})
except PushNotificationBouncerRetryLaterError as e:
logger.warning(e.msg, exc_info=True)
return
last_acked_realm_count_id = result["last_realm_count_id"]
last_acked_installation_count_id = result["last_installation_count_id"]
last_acked_realmauditlog_id = result["last_realmauditlog_id"]
# Gather only entries with IDs greater than the last ID received by the push bouncer.
# We don't re-send old data that's already been submitted.
2019-10-03 02:01:36 +02:00
(realm_count_data, installation_count_data, realmauditlog_data) = build_analytics_data(
realm_count_query=RealmCount.objects.filter(id__gt=last_acked_realm_count_id),
installation_count_query=InstallationCount.objects.filter(
id__gt=last_acked_installation_count_id
),
2019-10-03 02:01:36 +02:00
realmauditlog_query=RealmAuditLog.objects.filter(
event_type__in=RealmAuditLog.SYNCED_BILLING_EVENTS, id__gt=last_acked_realmauditlog_id
),
)
record_count = len(realm_count_data) + len(installation_count_data) + len(realmauditlog_data)
if record_count == 0:
logger.info("No new records to report.")
return
request = {
"realm_counts": orjson.dumps(realm_count_data).decode(),
"installation_counts": orjson.dumps(installation_count_data).decode(),
"realmauditlog_rows": orjson.dumps(realmauditlog_data).decode(),
"realms": orjson.dumps(
[dict(realm_data) for realm_data in get_realms_info_for_push_bouncer()]
).decode(),
"version": orjson.dumps(ZULIP_VERSION).decode(),
}
try:
send_to_push_bouncer("POST", "server/analytics", request)
except JsonableError as e:
logger.warning(e.msg)
logger.info("Reported %d records", record_count)
def send_realms_only_to_push_bouncer() -> None:
request = {
"realm_counts": "[]",
"installation_counts": "[]",
"realms": orjson.dumps(
[dict(realm_data) for realm_data in get_realms_info_for_push_bouncer()]
).decode(),
"version": orjson.dumps(ZULIP_VERSION).decode(),
}
# We don't catch JsonableError here, because we want it to propagate further
# to either explicitly, loudly fail or be error-handled by the caller.
send_to_push_bouncer("POST", "server/analytics", request)