mirror of https://github.com/zulip/zulip.git
remote_server: Serialize analytics requests with Pydantic.
Signed-off-by: Anders Kaseorg <anders@zulip.com>
This commit is contained in:
parent
abdfdeffe4
commit
0400614a48
|
@ -5,14 +5,13 @@ from urllib.parse import urljoin
|
||||||
import orjson
|
import orjson
|
||||||
import requests
|
import requests
|
||||||
from django.conf import settings
|
from django.conf import settings
|
||||||
from django.forms.models import model_to_dict
|
from django.db.models import QuerySet
|
||||||
from django.utils.translation import gettext as _
|
from django.utils.translation import gettext as _
|
||||||
from pydantic import UUID4, BaseModel, ConfigDict, Field, field_validator
|
from pydantic import UUID4, BaseModel, ConfigDict, Field, Json, field_validator
|
||||||
|
|
||||||
from analytics.models import InstallationCount, RealmCount
|
from analytics.models import InstallationCount, RealmCount
|
||||||
from version import ZULIP_VERSION
|
from version import ZULIP_VERSION
|
||||||
from zerver.lib.exceptions import JsonableError, MissingRemoteRealmError
|
from zerver.lib.exceptions import JsonableError, MissingRemoteRealmError
|
||||||
from zerver.lib.export import floatify_datetime_fields
|
|
||||||
from zerver.lib.outgoing_http import OutgoingSession
|
from zerver.lib.outgoing_http import OutgoingSession
|
||||||
from zerver.lib.queue import queue_json_publish
|
from zerver.lib.queue import queue_json_publish
|
||||||
from zerver.lib.types import RemoteRealmDictValue
|
from zerver.lib.types import RemoteRealmDictValue
|
||||||
|
@ -36,6 +35,32 @@ class PushNotificationBouncerServerError(PushNotificationBouncerRetryLaterError)
|
||||||
http_status_code = 502
|
http_status_code = 502
|
||||||
|
|
||||||
|
|
||||||
|
class RealmCountDataForAnalytics(BaseModel):
|
||||||
|
property: str
|
||||||
|
realm: int
|
||||||
|
id: int
|
||||||
|
end_time: float
|
||||||
|
subgroup: Optional[str]
|
||||||
|
value: int
|
||||||
|
|
||||||
|
|
||||||
|
class InstallationCountDataForAnalytics(BaseModel):
|
||||||
|
property: str
|
||||||
|
id: int
|
||||||
|
end_time: float
|
||||||
|
subgroup: Optional[str]
|
||||||
|
value: int
|
||||||
|
|
||||||
|
|
||||||
|
class RealmAuditLogDataForAnalytics(BaseModel):
|
||||||
|
id: int
|
||||||
|
realm: int
|
||||||
|
event_time: float
|
||||||
|
backfilled: bool
|
||||||
|
extra_data: Optional[Union[str, Dict[str, Any]]]
|
||||||
|
event_type: int
|
||||||
|
|
||||||
|
|
||||||
class RealmDataForAnalytics(BaseModel):
|
class RealmDataForAnalytics(BaseModel):
|
||||||
model_config = ConfigDict(extra="forbid")
|
model_config = ConfigDict(extra="forbid")
|
||||||
|
|
||||||
|
@ -61,6 +86,14 @@ class RealmDataForAnalytics(BaseModel):
|
||||||
return value
|
return value
|
||||||
|
|
||||||
|
|
||||||
|
class AnalyticsRequest(BaseModel):
|
||||||
|
realm_counts: Json[List[RealmCountDataForAnalytics]]
|
||||||
|
installation_counts: Json[List[InstallationCountDataForAnalytics]]
|
||||||
|
realmauditlog_rows: Optional[Json[List[RealmAuditLogDataForAnalytics]]] = None
|
||||||
|
realms: Json[List[RealmDataForAnalytics]]
|
||||||
|
version: Optional[Json[str]]
|
||||||
|
|
||||||
|
|
||||||
class UserDataForRemoteBilling(BaseModel):
|
class UserDataForRemoteBilling(BaseModel):
|
||||||
uuid: UUID4
|
uuid: UUID4
|
||||||
email: str
|
email: str
|
||||||
|
@ -189,45 +222,54 @@ def send_json_to_push_bouncer(
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
REALMAUDITLOG_PUSHED_FIELDS = [
|
def build_analytics_data(
|
||||||
"id",
|
realm_count_query: QuerySet[RealmCount],
|
||||||
"realm",
|
installation_count_query: QuerySet[InstallationCount],
|
||||||
"event_time",
|
realmauditlog_query: QuerySet[RealmAuditLog],
|
||||||
"backfilled",
|
) -> Tuple[
|
||||||
|
List[RealmCountDataForAnalytics],
|
||||||
|
List[InstallationCountDataForAnalytics],
|
||||||
|
List[RealmAuditLogDataForAnalytics],
|
||||||
|
]:
|
||||||
|
# We limit the batch size on the client side to avoid OOM kills timeouts, etc.
|
||||||
|
MAX_CLIENT_BATCH_SIZE = 10000
|
||||||
|
realm_count_data = [
|
||||||
|
RealmCountDataForAnalytics(
|
||||||
|
property=row.property,
|
||||||
|
realm=row.realm.id,
|
||||||
|
id=row.id,
|
||||||
|
end_time=row.end_time.timestamp(),
|
||||||
|
subgroup=row.subgroup,
|
||||||
|
value=row.value,
|
||||||
|
)
|
||||||
|
for row in realm_count_query.order_by("id")[0:MAX_CLIENT_BATCH_SIZE]
|
||||||
|
]
|
||||||
|
installation_count_data = [
|
||||||
|
InstallationCountDataForAnalytics(
|
||||||
|
property=row.property,
|
||||||
|
id=row.id,
|
||||||
|
end_time=row.end_time.timestamp(),
|
||||||
|
subgroup=row.subgroup,
|
||||||
|
value=row.value,
|
||||||
|
)
|
||||||
|
for row in installation_count_query.order_by("id")[0:MAX_CLIENT_BATCH_SIZE]
|
||||||
|
]
|
||||||
|
zerver_realmauditlog = [
|
||||||
|
RealmAuditLogDataForAnalytics(
|
||||||
|
id=row.id,
|
||||||
|
realm=row.realm.id,
|
||||||
|
event_time=row.event_time.timestamp(),
|
||||||
|
backfilled=row.backfilled,
|
||||||
# Note that we don't need to add extra_data_json here because
|
# Note that we don't need to add extra_data_json here because
|
||||||
# the view remote_server_post_analytics populates extra_data_json
|
# the view remote_server_post_analytics populates extra_data_json
|
||||||
# from the provided extra_data.
|
# from the provided extra_data.
|
||||||
"extra_data",
|
extra_data=row.extra_data,
|
||||||
"event_type",
|
event_type=row.event_type,
|
||||||
]
|
)
|
||||||
|
|
||||||
|
|
||||||
def build_analytics_data(
|
|
||||||
realm_count_query: Any, installation_count_query: Any, realmauditlog_query: Any
|
|
||||||
) -> Tuple[List[Dict[str, Any]], List[Dict[str, Any]], List[Dict[str, Any]]]:
|
|
||||||
# We limit the batch size on the client side to avoid OOM kills timeouts, etc.
|
|
||||||
MAX_CLIENT_BATCH_SIZE = 10000
|
|
||||||
data = {}
|
|
||||||
data["analytics_realmcount"] = [
|
|
||||||
model_to_dict(row) for row in realm_count_query.order_by("id")[0:MAX_CLIENT_BATCH_SIZE]
|
|
||||||
]
|
|
||||||
data["analytics_installationcount"] = [
|
|
||||||
model_to_dict(row)
|
|
||||||
for row in installation_count_query.order_by("id")[0:MAX_CLIENT_BATCH_SIZE]
|
|
||||||
]
|
|
||||||
data["zerver_realmauditlog"] = [
|
|
||||||
model_to_dict(row, fields=REALMAUDITLOG_PUSHED_FIELDS)
|
|
||||||
for row in realmauditlog_query.order_by("id")[0:MAX_CLIENT_BATCH_SIZE]
|
for row in realmauditlog_query.order_by("id")[0:MAX_CLIENT_BATCH_SIZE]
|
||||||
]
|
]
|
||||||
|
|
||||||
floatify_datetime_fields(data, "analytics_realmcount")
|
return realm_count_data, installation_count_data, zerver_realmauditlog
|
||||||
floatify_datetime_fields(data, "analytics_installationcount")
|
|
||||||
floatify_datetime_fields(data, "zerver_realmauditlog")
|
|
||||||
return (
|
|
||||||
data["analytics_realmcount"],
|
|
||||||
data["analytics_installationcount"],
|
|
||||||
data["zerver_realmauditlog"],
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def get_realms_info_for_push_bouncer(realm_id: Optional[int] = None) -> List[RealmDataForAnalytics]:
|
def get_realms_info_for_push_bouncer(realm_id: Optional[int] = None) -> List[RealmDataForAnalytics]:
|
||||||
|
@ -287,36 +329,36 @@ def send_analytics_to_push_bouncer() -> None:
|
||||||
)
|
)
|
||||||
|
|
||||||
record_count = len(realm_count_data) + len(installation_count_data) + len(realmauditlog_data)
|
record_count = len(realm_count_data) + len(installation_count_data) + len(realmauditlog_data)
|
||||||
request = {
|
request = AnalyticsRequest.model_construct(
|
||||||
"realm_counts": orjson.dumps(realm_count_data).decode(),
|
realm_counts=realm_count_data,
|
||||||
"installation_counts": orjson.dumps(installation_count_data).decode(),
|
installation_counts=installation_count_data,
|
||||||
"realmauditlog_rows": orjson.dumps(realmauditlog_data).decode(),
|
realmauditlog_rows=realmauditlog_data,
|
||||||
"realms": orjson.dumps(
|
realms=get_realms_info_for_push_bouncer(),
|
||||||
[dict(realm_data) for realm_data in get_realms_info_for_push_bouncer()]
|
version=ZULIP_VERSION,
|
||||||
).decode(),
|
)
|
||||||
"version": orjson.dumps(ZULIP_VERSION).decode(),
|
|
||||||
}
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
send_to_push_bouncer("POST", "server/analytics", request)
|
send_to_push_bouncer("POST", "server/analytics", request.model_dump(round_trip=True))
|
||||||
except JsonableError as e:
|
except JsonableError as e:
|
||||||
logger.warning(e.msg)
|
logger.warning(e.msg)
|
||||||
logger.info("Reported %d records", record_count)
|
logger.info("Reported %d records", record_count)
|
||||||
|
|
||||||
|
|
||||||
def send_realms_only_to_push_bouncer() -> Dict[str, RemoteRealmDictValue]:
|
def send_realms_only_to_push_bouncer() -> Dict[str, RemoteRealmDictValue]:
|
||||||
request = {
|
request = AnalyticsRequest.model_construct(
|
||||||
"realm_counts": "[]",
|
realm_counts=[],
|
||||||
"installation_counts": "[]",
|
installation_counts=[],
|
||||||
"realms": orjson.dumps(
|
realms=get_realms_info_for_push_bouncer(),
|
||||||
[dict(realm_data) for realm_data in get_realms_info_for_push_bouncer()]
|
version=ZULIP_VERSION,
|
||||||
).decode(),
|
)
|
||||||
"version": orjson.dumps(ZULIP_VERSION).decode(),
|
|
||||||
}
|
|
||||||
|
|
||||||
# We don't catch JsonableError here, because we want it to propagate further
|
# We don't catch JsonableError here, because we want it to propagate further
|
||||||
# to either explicitly, loudly fail or be error-handled by the caller.
|
# to either explicitly, loudly fail or be error-handled by the caller.
|
||||||
response = send_to_push_bouncer("POST", "server/analytics", request)
|
response = send_to_push_bouncer(
|
||||||
|
"POST",
|
||||||
|
"server/analytics",
|
||||||
|
request.model_dump(round_trip=True, exclude={"realmauditlog_rows"}),
|
||||||
|
)
|
||||||
assert isinstance(response["realms"], dict) # for mypy
|
assert isinstance(response["realms"], dict) # for mypy
|
||||||
|
|
||||||
return response["realms"]
|
return response["realms"]
|
||||||
|
|
|
@ -60,6 +60,7 @@ from zerver.lib.push_notifications import (
|
||||||
send_notifications_to_bouncer,
|
send_notifications_to_bouncer,
|
||||||
)
|
)
|
||||||
from zerver.lib.remote_server import (
|
from zerver.lib.remote_server import (
|
||||||
|
AnalyticsRequest,
|
||||||
PushNotificationBouncerError,
|
PushNotificationBouncerError,
|
||||||
PushNotificationBouncerRetryLaterError,
|
PushNotificationBouncerRetryLaterError,
|
||||||
PushNotificationBouncerServerError,
|
PushNotificationBouncerServerError,
|
||||||
|
@ -1351,14 +1352,17 @@ class AnalyticsBouncerTest(BouncerTestCase):
|
||||||
(realm_count_data, installation_count_data, realmauditlog_data) = build_analytics_data(
|
(realm_count_data, installation_count_data, realmauditlog_data) = build_analytics_data(
|
||||||
RealmCount.objects.all(), InstallationCount.objects.all(), RealmAuditLog.objects.all()
|
RealmCount.objects.all(), InstallationCount.objects.all(), RealmAuditLog.objects.all()
|
||||||
)
|
)
|
||||||
|
request = AnalyticsRequest.model_construct(
|
||||||
|
realm_counts=realm_count_data,
|
||||||
|
installation_counts=installation_count_data,
|
||||||
|
realmauditlog_rows=realmauditlog_data,
|
||||||
|
realms=[],
|
||||||
|
version=None,
|
||||||
|
)
|
||||||
result = self.uuid_post(
|
result = self.uuid_post(
|
||||||
self.server_uuid,
|
self.server_uuid,
|
||||||
"/api/v1/remotes/server/analytics",
|
"/api/v1/remotes/server/analytics",
|
||||||
{
|
request.model_dump(round_trip=True, exclude={"realms", "version"}),
|
||||||
"realm_counts": orjson.dumps(realm_count_data).decode(),
|
|
||||||
"installation_counts": orjson.dumps(installation_count_data).decode(),
|
|
||||||
"realmauditlog_rows": orjson.dumps(realmauditlog_data).decode(),
|
|
||||||
},
|
|
||||||
subdomain="",
|
subdomain="",
|
||||||
)
|
)
|
||||||
self.assert_json_error(result, "Data is out of order.")
|
self.assert_json_error(result, "Data is out of order.")
|
||||||
|
@ -1382,19 +1386,21 @@ class AnalyticsBouncerTest(BouncerTestCase):
|
||||||
check_counts(11, 11, 3, 2, 8)
|
check_counts(11, 11, 3, 2, 8)
|
||||||
|
|
||||||
# Test that only valid org_type values are accepted - integers defined in OrgTypeEnum.
|
# Test that only valid org_type values are accepted - integers defined in OrgTypeEnum.
|
||||||
realms_data = [dict(realm) for realm in get_realms_info_for_push_bouncer()]
|
realms_data = get_realms_info_for_push_bouncer()
|
||||||
# Not a valid org_type value:
|
# Not a valid org_type value:
|
||||||
realms_data[0]["org_type"] = 11
|
realms_data[0].org_type = 11
|
||||||
|
|
||||||
|
request = AnalyticsRequest.model_construct(
|
||||||
|
realm_counts=[],
|
||||||
|
installation_counts=[],
|
||||||
|
realmauditlog_rows=[],
|
||||||
|
realms=realms_data,
|
||||||
|
version=None,
|
||||||
|
)
|
||||||
result = self.uuid_post(
|
result = self.uuid_post(
|
||||||
self.server_uuid,
|
self.server_uuid,
|
||||||
"/api/v1/remotes/server/analytics",
|
"/api/v1/remotes/server/analytics",
|
||||||
{
|
request.model_dump(round_trip=True, exclude={"version", "api_feature_level"}),
|
||||||
"realm_counts": orjson.dumps([]).decode(),
|
|
||||||
"installation_counts": orjson.dumps([]).decode(),
|
|
||||||
"realmauditlog_rows": orjson.dumps([]).decode(),
|
|
||||||
"realms": orjson.dumps(realms_data).decode(),
|
|
||||||
},
|
|
||||||
subdomain="",
|
subdomain="",
|
||||||
)
|
)
|
||||||
self.assert_json_error(
|
self.assert_json_error(
|
||||||
|
@ -1435,15 +1441,17 @@ class AnalyticsBouncerTest(BouncerTestCase):
|
||||||
)
|
)
|
||||||
|
|
||||||
# This first post should fail because of excessive audit log event types.
|
# This first post should fail because of excessive audit log event types.
|
||||||
|
request = AnalyticsRequest.model_construct(
|
||||||
|
realm_counts=realm_count_data,
|
||||||
|
installation_counts=installation_count_data,
|
||||||
|
realmauditlog_rows=realmauditlog_data,
|
||||||
|
realms=[],
|
||||||
|
version=None,
|
||||||
|
)
|
||||||
result = self.uuid_post(
|
result = self.uuid_post(
|
||||||
self.server_uuid,
|
self.server_uuid,
|
||||||
"/api/v1/remotes/server/analytics",
|
"/api/v1/remotes/server/analytics",
|
||||||
{
|
request.model_dump(round_trip=True, exclude={"version"}),
|
||||||
"realm_counts": orjson.dumps(realm_count_data).decode(),
|
|
||||||
"installation_counts": orjson.dumps(installation_count_data).decode(),
|
|
||||||
"realmauditlog_rows": orjson.dumps(realmauditlog_data).decode(),
|
|
||||||
"realms": orjson.dumps([]).decode(),
|
|
||||||
},
|
|
||||||
subdomain="",
|
subdomain="",
|
||||||
)
|
)
|
||||||
self.assert_json_error(result, "Invalid event type.")
|
self.assert_json_error(result, "Invalid event type.")
|
||||||
|
@ -1458,15 +1466,17 @@ class AnalyticsBouncerTest(BouncerTestCase):
|
||||||
# Send the data to the bouncer without any realms data. This should lead
|
# Send the data to the bouncer without any realms data. This should lead
|
||||||
# to successful saving of the data, but with the remote_realm foreign key
|
# to successful saving of the data, but with the remote_realm foreign key
|
||||||
# set to NULL.
|
# set to NULL.
|
||||||
|
request = AnalyticsRequest.model_construct(
|
||||||
|
realm_counts=realm_count_data,
|
||||||
|
installation_counts=installation_count_data,
|
||||||
|
realmauditlog_rows=realmauditlog_data,
|
||||||
|
realms=[],
|
||||||
|
version=None,
|
||||||
|
)
|
||||||
result = self.uuid_post(
|
result = self.uuid_post(
|
||||||
self.server_uuid,
|
self.server_uuid,
|
||||||
"/api/v1/remotes/server/analytics",
|
"/api/v1/remotes/server/analytics",
|
||||||
{
|
request.model_dump(round_trip=True, exclude={"version"}),
|
||||||
"realm_counts": orjson.dumps(realm_count_data).decode(),
|
|
||||||
"installation_counts": orjson.dumps(installation_count_data).decode(),
|
|
||||||
"realmauditlog_rows": orjson.dumps(realmauditlog_data).decode(),
|
|
||||||
"realms": orjson.dumps([]).decode(),
|
|
||||||
},
|
|
||||||
subdomain="",
|
subdomain="",
|
||||||
)
|
)
|
||||||
self.assert_json_success(result)
|
self.assert_json_success(result)
|
||||||
|
|
|
@ -34,7 +34,12 @@ from zerver.lib.push_notifications import (
|
||||||
send_apple_push_notification,
|
send_apple_push_notification,
|
||||||
send_test_push_notification_directly_to_devices,
|
send_test_push_notification_directly_to_devices,
|
||||||
)
|
)
|
||||||
from zerver.lib.remote_server import RealmDataForAnalytics
|
from zerver.lib.remote_server import (
|
||||||
|
InstallationCountDataForAnalytics,
|
||||||
|
RealmAuditLogDataForAnalytics,
|
||||||
|
RealmCountDataForAnalytics,
|
||||||
|
RealmDataForAnalytics,
|
||||||
|
)
|
||||||
from zerver.lib.request import REQ, has_request_variables
|
from zerver.lib.request import REQ, has_request_variables
|
||||||
from zerver.lib.response import json_success
|
from zerver.lib.response import json_success
|
||||||
from zerver.lib.timestamp import datetime_to_timestamp, timestamp_to_datetime
|
from zerver.lib.timestamp import datetime_to_timestamp, timestamp_to_datetime
|
||||||
|
@ -659,32 +664,6 @@ def update_remote_realm_data_for_server(
|
||||||
RemoteRealmAuditLog.objects.bulk_create(remote_realm_audit_logs)
|
RemoteRealmAuditLog.objects.bulk_create(remote_realm_audit_logs)
|
||||||
|
|
||||||
|
|
||||||
class RealmAuditLogDataForAnalytics(BaseModel):
|
|
||||||
id: int
|
|
||||||
realm: int
|
|
||||||
event_time: float
|
|
||||||
backfilled: bool
|
|
||||||
extra_data: Optional[Union[str, Dict[str, Any]]]
|
|
||||||
event_type: int
|
|
||||||
|
|
||||||
|
|
||||||
class RealmCountDataForAnalytics(BaseModel):
|
|
||||||
property: str
|
|
||||||
realm: int
|
|
||||||
id: int
|
|
||||||
end_time: float
|
|
||||||
subgroup: Optional[str]
|
|
||||||
value: int
|
|
||||||
|
|
||||||
|
|
||||||
class InstallationCountDataForAnalytics(BaseModel):
|
|
||||||
property: str
|
|
||||||
id: int
|
|
||||||
end_time: float
|
|
||||||
subgroup: Optional[str]
|
|
||||||
value: int
|
|
||||||
|
|
||||||
|
|
||||||
@typed_endpoint
|
@typed_endpoint
|
||||||
@transaction.atomic
|
@transaction.atomic
|
||||||
def remote_server_post_analytics(
|
def remote_server_post_analytics(
|
||||||
|
|
Loading…
Reference in New Issue