2019-02-12 06:16:10 +01:00
|
|
|
import logging
|
2019-01-31 00:44:02 +01:00
|
|
|
import urllib
|
2023-11-15 22:44:24 +01:00
|
|
|
from typing import Any, Dict, List, Mapping, Optional, Tuple, Union
|
2019-01-31 00:44:02 +01:00
|
|
|
|
2020-08-07 01:09:47 +02:00
|
|
|
import orjson
|
2020-06-11 00:54:34 +02:00
|
|
|
import requests
|
2019-01-31 00:44:02 +01:00
|
|
|
from django.conf import settings
|
2019-01-31 00:39:02 +01:00
|
|
|
from django.forms.models import model_to_dict
|
2021-04-16 00:57:30 +02:00
|
|
|
from django.utils.translation import gettext as _
|
2023-11-27 02:06:23 +01:00
|
|
|
from pydantic import UUID4, BaseModel, ConfigDict, field_validator
|
2019-01-31 00:44:02 +01:00
|
|
|
|
2019-01-31 00:39:02 +01:00
|
|
|
from analytics.models import InstallationCount, RealmCount
|
2019-01-31 00:44:02 +01:00
|
|
|
from version import ZULIP_VERSION
|
2023-11-15 22:44:24 +01:00
|
|
|
from zerver.lib.exceptions import JsonableError, MissingRemoteRealmError
|
2019-01-31 00:39:02 +01:00
|
|
|
from zerver.lib.export import floatify_datetime_fields
|
2021-05-07 03:54:25 +02:00
|
|
|
from zerver.lib.outgoing_http import OutgoingSession
|
2023-11-27 02:06:23 +01:00
|
|
|
from zerver.models import OrgTypeEnum, Realm, RealmAuditLog
|
2019-01-31 00:44:02 +01:00
|
|
|
|
2020-06-11 00:54:34 +02:00
|
|
|
|
2021-05-07 03:54:25 +02:00
|
|
|
class PushBouncerSession(OutgoingSession):
|
|
|
|
def __init__(self) -> None:
|
|
|
|
super().__init__(role="push_bouncer", timeout=30)
|
|
|
|
|
|
|
|
|
2022-11-17 09:30:48 +01:00
|
|
|
class PushNotificationBouncerError(Exception):
|
2019-01-31 00:44:02 +01:00
|
|
|
pass
|
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2019-12-02 19:46:11 +01:00
|
|
|
class PushNotificationBouncerRetryLaterError(JsonableError):
|
2019-12-03 20:19:38 +01:00
|
|
|
http_status_code = 502
|
2019-12-02 19:46:11 +01:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2023-11-17 14:07:41 +01:00
|
|
|
class RealmDataForAnalytics(BaseModel):
|
|
|
|
model_config = ConfigDict(extra="forbid")
|
|
|
|
|
|
|
|
id: int
|
|
|
|
host: str
|
|
|
|
url: str
|
2023-11-27 02:06:23 +01:00
|
|
|
org_type: int = 0
|
2023-11-17 14:07:41 +01:00
|
|
|
date_created: float
|
|
|
|
deactivated: bool
|
|
|
|
|
2023-11-22 18:22:22 +01:00
|
|
|
uuid: UUID4
|
2023-11-17 14:07:41 +01:00
|
|
|
uuid_owner_secret: str
|
|
|
|
|
2023-11-27 02:06:23 +01:00
|
|
|
@field_validator("org_type")
|
|
|
|
@classmethod
|
|
|
|
def check_is_allowed_value(cls, value: int) -> int:
|
|
|
|
if value not in [org_type.value for org_type in OrgTypeEnum]:
|
|
|
|
raise ValueError("Not a valid org_type value")
|
|
|
|
|
|
|
|
return value
|
|
|
|
|
2023-11-17 14:07:41 +01:00
|
|
|
|
2023-11-15 22:44:24 +01:00
|
|
|
class UserDataForRemoteBilling(BaseModel):
|
|
|
|
uuid: UUID4
|
|
|
|
email: str
|
|
|
|
full_name: str
|
|
|
|
|
|
|
|
|
2020-07-05 20:46:41 +02:00
|
|
|
def send_to_push_bouncer(
|
|
|
|
method: str,
|
|
|
|
endpoint: str,
|
2021-07-24 20:12:10 +02:00
|
|
|
post_data: Union[bytes, Mapping[str, Union[str, int, None, bytes]]],
|
2020-07-05 20:46:41 +02:00
|
|
|
extra_headers: Mapping[str, str] = {},
|
|
|
|
) -> Dict[str, object]:
|
2019-01-31 00:44:02 +01:00
|
|
|
"""While it does actually send the notice, this function has a lot of
|
|
|
|
code and comments around error handling for the push notifications
|
|
|
|
bouncer. There are several classes of failures, each with its own
|
|
|
|
potential solution:
|
|
|
|
|
2019-12-03 20:19:38 +01:00
|
|
|
* Network errors with requests.request. We raise an exception to signal
|
|
|
|
it to the callers.
|
2019-01-31 00:44:02 +01:00
|
|
|
|
|
|
|
* 500 errors from the push bouncer or other unexpected responses;
|
|
|
|
we don't try to parse the response, but do make clear the cause.
|
|
|
|
|
|
|
|
* 400 errors from the push bouncer. Here there are 2 categories:
|
|
|
|
Our server failed to connect to the push bouncer (should throw)
|
2021-06-11 22:51:27 +02:00
|
|
|
vs. client-side errors like an invalid token.
|
2019-01-31 00:44:02 +01:00
|
|
|
|
|
|
|
"""
|
2021-08-18 17:54:22 +02:00
|
|
|
assert settings.PUSH_NOTIFICATION_BOUNCER_URL is not None
|
2023-01-03 00:33:25 +01:00
|
|
|
assert settings.ZULIP_ORG_ID is not None
|
|
|
|
assert settings.ZULIP_ORG_KEY is not None
|
2021-02-12 08:19:30 +01:00
|
|
|
url = urllib.parse.urljoin(
|
2021-02-12 08:20:45 +01:00
|
|
|
settings.PUSH_NOTIFICATION_BOUNCER_URL, "/api/v1/remotes/" + endpoint
|
2021-02-12 08:19:30 +01:00
|
|
|
)
|
|
|
|
api_auth = requests.auth.HTTPBasicAuth(settings.ZULIP_ORG_ID, settings.ZULIP_ORG_KEY)
|
2019-01-31 00:44:02 +01:00
|
|
|
|
2020-06-10 06:41:04 +02:00
|
|
|
headers = {"User-agent": f"ZulipServer/{ZULIP_VERSION}"}
|
2020-06-13 03:34:01 +02:00
|
|
|
headers.update(extra_headers)
|
2019-01-31 00:44:02 +01:00
|
|
|
|
2019-12-03 20:19:38 +01:00
|
|
|
try:
|
2021-05-07 03:54:25 +02:00
|
|
|
res = PushBouncerSession().request(
|
|
|
|
method, url, data=post_data, auth=api_auth, verify=True, headers=headers
|
2021-02-12 08:19:30 +01:00
|
|
|
)
|
|
|
|
except (
|
|
|
|
requests.exceptions.Timeout,
|
|
|
|
requests.exceptions.SSLError,
|
|
|
|
requests.exceptions.ConnectionError,
|
|
|
|
) as e:
|
2019-12-03 20:19:38 +01:00
|
|
|
raise PushNotificationBouncerRetryLaterError(
|
2022-10-08 07:35:48 +02:00
|
|
|
f"{type(e).__name__} while trying to connect to push notification bouncer"
|
2021-02-12 08:19:30 +01:00
|
|
|
)
|
2019-01-31 00:44:02 +01:00
|
|
|
|
|
|
|
if res.status_code >= 500:
|
|
|
|
# 500s should be resolved by the people who run the push
|
2019-02-12 06:16:10 +01:00
|
|
|
# notification bouncer service, and they'll get an appropriate
|
2019-12-03 20:19:38 +01:00
|
|
|
# error notification from the server. We raise an exception to signal
|
|
|
|
# to the callers that the attempt failed and they can retry.
|
|
|
|
error_msg = "Received 500 from push notification bouncer"
|
|
|
|
logging.warning(error_msg)
|
|
|
|
raise PushNotificationBouncerRetryLaterError(error_msg)
|
2019-01-31 00:44:02 +01:00
|
|
|
elif res.status_code >= 400:
|
|
|
|
# If JSON parsing errors, just let that exception happen
|
2020-08-07 01:09:47 +02:00
|
|
|
result_dict = orjson.loads(res.content)
|
2021-02-12 08:20:45 +01:00
|
|
|
msg = result_dict["msg"]
|
|
|
|
if "code" in result_dict and result_dict["code"] == "INVALID_ZULIP_SERVER":
|
2019-01-31 00:44:02 +01:00
|
|
|
# Invalid Zulip server credentials should email this server's admins
|
2022-11-17 09:30:48 +01:00
|
|
|
raise PushNotificationBouncerError(
|
2023-07-17 22:40:33 +02:00
|
|
|
_("Push notifications bouncer error: {error}").format(error=msg)
|
2021-02-12 08:19:30 +01:00
|
|
|
)
|
2023-10-08 00:43:41 +02:00
|
|
|
elif (
|
|
|
|
endpoint == "push/test_notification"
|
|
|
|
and "code" in result_dict
|
|
|
|
and result_dict["code"] == "INVALID_REMOTE_PUSH_DEVICE_TOKEN"
|
|
|
|
):
|
|
|
|
# This error from the notification debugging endpoint should just be directly
|
|
|
|
# communicated to the device.
|
|
|
|
# TODO: Extend this to use a more general mechanism when we add more such error responses.
|
|
|
|
from zerver.lib.push_notifications import InvalidRemotePushDeviceTokenError
|
|
|
|
|
|
|
|
raise InvalidRemotePushDeviceTokenError
|
2023-11-15 22:44:24 +01:00
|
|
|
elif (
|
|
|
|
endpoint == "server/billing"
|
|
|
|
and "code" in result_dict
|
|
|
|
and result_dict["code"] == "MISSING_REMOTE_REALM"
|
|
|
|
): # nocoverage
|
|
|
|
# The callers requesting this endpoint want the exception to propagate
|
|
|
|
# so they can catch it.
|
|
|
|
raise MissingRemoteRealmError
|
2019-01-31 00:44:02 +01:00
|
|
|
else:
|
|
|
|
# But most other errors coming from the push bouncer
|
|
|
|
# server are client errors (e.g. never-registered token)
|
|
|
|
# and should be handled as such.
|
|
|
|
raise JsonableError(msg)
|
|
|
|
elif res.status_code != 200:
|
|
|
|
# Anything else is unexpected and likely suggests a bug in
|
|
|
|
# this version of Zulip, so we throw an exception that will
|
|
|
|
# email the server admins.
|
2022-11-17 09:30:48 +01:00
|
|
|
raise PushNotificationBouncerError(
|
2021-02-12 08:19:30 +01:00
|
|
|
f"Push notification bouncer returned unexpected status code {res.status_code}"
|
|
|
|
)
|
2019-01-31 00:44:02 +01:00
|
|
|
|
|
|
|
# If we don't throw an exception, it's a successful bounce!
|
2020-08-07 01:09:47 +02:00
|
|
|
return orjson.loads(res.content)
|
2019-01-31 00:44:02 +01:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2021-09-28 14:17:16 +02:00
|
|
|
def send_json_to_push_bouncer(
|
|
|
|
method: str, endpoint: str, post_data: Mapping[str, object]
|
|
|
|
) -> Dict[str, object]:
|
|
|
|
return send_to_push_bouncer(
|
2019-01-31 00:44:02 +01:00
|
|
|
method,
|
|
|
|
endpoint,
|
2020-08-07 01:09:47 +02:00
|
|
|
orjson.dumps(post_data),
|
2019-01-31 00:44:02 +01:00
|
|
|
extra_headers={"Content-type": "application/json"},
|
|
|
|
)
|
2019-01-31 00:39:02 +01:00
|
|
|
|
2019-10-03 02:01:36 +02:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
REALMAUDITLOG_PUSHED_FIELDS = [
|
2021-02-12 08:20:45 +01:00
|
|
|
"id",
|
|
|
|
"realm",
|
|
|
|
"event_time",
|
|
|
|
"backfilled",
|
2023-06-07 21:14:43 +02:00
|
|
|
# Note that we don't need to add extra_data_json here because
|
|
|
|
# the view remote_server_post_analytics populates extra_data_json
|
|
|
|
# from the provided extra_data.
|
2021-02-12 08:20:45 +01:00
|
|
|
"extra_data",
|
|
|
|
"event_type",
|
2021-02-12 08:19:30 +01:00
|
|
|
]
|
|
|
|
|
|
|
|
|
|
|
|
def build_analytics_data(
|
|
|
|
realm_count_query: Any, installation_count_query: Any, realmauditlog_query: Any
|
|
|
|
) -> Tuple[List[Dict[str, Any]], List[Dict[str, Any]], List[Dict[str, Any]]]:
|
2019-02-02 20:48:16 +01:00
|
|
|
# We limit the batch size on the client side to avoid OOM kills timeouts, etc.
|
|
|
|
MAX_CLIENT_BATCH_SIZE = 10000
|
2019-01-31 00:39:02 +01:00
|
|
|
data = {}
|
2021-02-12 08:20:45 +01:00
|
|
|
data["analytics_realmcount"] = [
|
2021-02-12 08:19:30 +01:00
|
|
|
model_to_dict(row) for row in realm_count_query.order_by("id")[0:MAX_CLIENT_BATCH_SIZE]
|
2019-01-31 00:39:02 +01:00
|
|
|
]
|
2021-02-12 08:20:45 +01:00
|
|
|
data["analytics_installationcount"] = [
|
2021-02-12 08:19:30 +01:00
|
|
|
model_to_dict(row)
|
|
|
|
for row in installation_count_query.order_by("id")[0:MAX_CLIENT_BATCH_SIZE]
|
2019-01-31 00:39:02 +01:00
|
|
|
]
|
2021-02-12 08:20:45 +01:00
|
|
|
data["zerver_realmauditlog"] = [
|
2021-02-12 08:19:30 +01:00
|
|
|
model_to_dict(row, fields=REALMAUDITLOG_PUSHED_FIELDS)
|
|
|
|
for row in realmauditlog_query.order_by("id")[0:MAX_CLIENT_BATCH_SIZE]
|
2019-10-03 02:01:36 +02:00
|
|
|
]
|
2019-01-31 00:39:02 +01:00
|
|
|
|
2021-02-12 08:20:45 +01:00
|
|
|
floatify_datetime_fields(data, "analytics_realmcount")
|
|
|
|
floatify_datetime_fields(data, "analytics_installationcount")
|
|
|
|
floatify_datetime_fields(data, "zerver_realmauditlog")
|
2021-02-12 08:19:30 +01:00
|
|
|
return (
|
2021-02-12 08:20:45 +01:00
|
|
|
data["analytics_realmcount"],
|
|
|
|
data["analytics_installationcount"],
|
|
|
|
data["zerver_realmauditlog"],
|
2021-02-12 08:19:30 +01:00
|
|
|
)
|
|
|
|
|
2019-01-31 00:39:02 +01:00
|
|
|
|
2023-11-15 22:44:24 +01:00
|
|
|
def get_realms_info_for_push_bouncer(realm_id: Optional[int] = None) -> List[RealmDataForAnalytics]:
|
2023-10-30 23:50:53 +01:00
|
|
|
realms = Realm.objects.order_by("id")
|
2023-11-15 22:44:24 +01:00
|
|
|
if realm_id is not None: # nocoverage
|
|
|
|
realms = realms.filter(id=realm_id)
|
|
|
|
|
2023-11-17 14:07:41 +01:00
|
|
|
realm_info_list = [
|
|
|
|
RealmDataForAnalytics(
|
2023-10-30 23:50:53 +01:00
|
|
|
id=realm.id,
|
2023-11-22 18:22:22 +01:00
|
|
|
uuid=realm.uuid,
|
2023-10-30 23:50:53 +01:00
|
|
|
uuid_owner_secret=realm.uuid_owner_secret,
|
|
|
|
host=realm.host,
|
|
|
|
url=realm.uri,
|
|
|
|
deactivated=realm.deactivated,
|
|
|
|
date_created=realm.date_created.timestamp(),
|
2023-11-27 02:06:23 +01:00
|
|
|
org_type=realm.org_type,
|
2023-10-30 23:50:53 +01:00
|
|
|
)
|
|
|
|
for realm in realms
|
|
|
|
]
|
|
|
|
|
2023-11-17 14:07:41 +01:00
|
|
|
return realm_info_list
|
2023-10-30 23:50:53 +01:00
|
|
|
|
|
|
|
|
2023-10-25 02:48:39 +02:00
|
|
|
def send_analytics_to_push_bouncer() -> None:
|
2023-11-17 19:43:25 +01:00
|
|
|
logger = logging.getLogger("zulip.analytics")
|
2019-01-31 00:39:02 +01:00
|
|
|
# first, check what's latest
|
2019-12-03 20:19:38 +01:00
|
|
|
try:
|
|
|
|
result = send_to_push_bouncer("GET", "server/analytics/status", {})
|
|
|
|
except PushNotificationBouncerRetryLaterError as e:
|
2023-11-17 19:43:25 +01:00
|
|
|
logger.warning(e.msg, exc_info=True)
|
2019-02-12 06:16:10 +01:00
|
|
|
return
|
|
|
|
|
2021-02-12 08:20:45 +01:00
|
|
|
last_acked_realm_count_id = result["last_realm_count_id"]
|
|
|
|
last_acked_installation_count_id = result["last_installation_count_id"]
|
|
|
|
last_acked_realmauditlog_id = result["last_realmauditlog_id"]
|
2019-01-31 00:39:02 +01:00
|
|
|
|
2023-10-25 02:51:59 +02:00
|
|
|
# Gather only entries with IDs greater than the last ID received by the push bouncer.
|
|
|
|
# We don't re-send old data that's already been submitted.
|
2019-10-03 02:01:36 +02:00
|
|
|
(realm_count_data, installation_count_data, realmauditlog_data) = build_analytics_data(
|
2021-02-12 08:19:30 +01:00
|
|
|
realm_count_query=RealmCount.objects.filter(id__gt=last_acked_realm_count_id),
|
2019-01-31 00:39:02 +01:00
|
|
|
installation_count_query=InstallationCount.objects.filter(
|
2021-02-12 08:19:30 +01:00
|
|
|
id__gt=last_acked_installation_count_id
|
|
|
|
),
|
2019-10-03 02:01:36 +02:00
|
|
|
realmauditlog_query=RealmAuditLog.objects.filter(
|
2021-02-12 08:19:30 +01:00
|
|
|
event_type__in=RealmAuditLog.SYNCED_BILLING_EVENTS, id__gt=last_acked_realmauditlog_id
|
|
|
|
),
|
|
|
|
)
|
2019-01-31 00:39:02 +01:00
|
|
|
|
2023-11-17 16:57:59 +01:00
|
|
|
record_count = len(realm_count_data) + len(installation_count_data) + len(realmauditlog_data)
|
|
|
|
if record_count == 0:
|
|
|
|
logger.info("No new records to report.")
|
2019-01-31 00:39:02 +01:00
|
|
|
return
|
|
|
|
|
|
|
|
request = {
|
2021-02-12 08:20:45 +01:00
|
|
|
"realm_counts": orjson.dumps(realm_count_data).decode(),
|
|
|
|
"installation_counts": orjson.dumps(installation_count_data).decode(),
|
|
|
|
"realmauditlog_rows": orjson.dumps(realmauditlog_data).decode(),
|
2023-11-17 14:07:41 +01:00
|
|
|
"realms": orjson.dumps(
|
|
|
|
[dict(realm_data) for realm_data in get_realms_info_for_push_bouncer()]
|
|
|
|
).decode(),
|
2021-02-12 08:20:45 +01:00
|
|
|
"version": orjson.dumps(ZULIP_VERSION).decode(),
|
2019-01-31 00:39:02 +01:00
|
|
|
}
|
|
|
|
|
2019-09-03 03:47:10 +02:00
|
|
|
try:
|
|
|
|
send_to_push_bouncer("POST", "server/analytics", request)
|
|
|
|
except JsonableError as e:
|
2023-11-17 19:43:25 +01:00
|
|
|
logger.warning(e.msg)
|
2023-11-17 16:57:59 +01:00
|
|
|
logger.info("Reported %d records", record_count)
|
2023-11-16 15:25:58 +01:00
|
|
|
|
|
|
|
|
|
|
|
def send_realms_only_to_push_bouncer() -> None:
|
|
|
|
request = {
|
|
|
|
"realm_counts": "[]",
|
|
|
|
"installation_counts": "[]",
|
2023-11-17 14:07:41 +01:00
|
|
|
"realms": orjson.dumps(
|
|
|
|
[dict(realm_data) for realm_data in get_realms_info_for_push_bouncer()]
|
|
|
|
).decode(),
|
2023-11-16 15:25:58 +01:00
|
|
|
"version": orjson.dumps(ZULIP_VERSION).decode(),
|
|
|
|
}
|
|
|
|
|
|
|
|
# We don't catch JsonableError here, because we want it to propagate further
|
|
|
|
# to either explicitly, loudly fail or be error-handled by the caller.
|
|
|
|
send_to_push_bouncer("POST", "server/analytics", request)
|