realm_export: Add support to create full data export via /export/realm.

Earlier, only public data export was possible via `POST /export/realm`
endpoint. This commit adds support to create full data export with
member consent via that endpoint.

Also, this adds a 'export_type' parameter to the dictionaries
in `realm_export` event type and `GET /export/realm` response.

Fixes part of #31201.
This commit is contained in:
Prakhar Pratyush 2024-10-05 03:25:11 +05:30 committed by Tim Abbott
parent 2367c46455
commit 55f97cd06f
9 changed files with 114 additions and 30 deletions

View File

@ -20,6 +20,17 @@ format used by the Zulip server that they are interacting with.
## Changes in Zulip 10.0
**Feature level 304**
* [`GET /export/realm`](/api/get-realm-exports),
[`GET /events`](/api/get-events): Added `export_type` field
to the dictionaries in `exports` array. It indicates whether
the export is of public data or full data with user consent.
* [`POST /export/realm`](/api/get-realm-exports): Added `export_type`
parameter to add support for admins to decide whether to create a
public data export or a full data export with member consent.
**Feature level 303**
* [`POST /register`](/api/register-queue), [`GET /user_groups`](/api/get-user-groups),

View File

@ -119,8 +119,8 @@
* [Reorder custom profile fields](/api/reorder-custom-profile-fields)
* [Create a custom profile field](/api/create-custom-profile-field)
* [Update realm-level defaults of user settings](/api/update-realm-user-settings-defaults)
* [Get all public data exports](/api/get-realm-exports)
* [Create a public data export](/api/export-realm)
* [Get all data exports](/api/get-realm-exports)
* [Create a data export](/api/export-realm)
* [Get data export consent state](/api/get-realm-export-consents)
#### Real-time events

View File

@ -34,7 +34,7 @@ DESKTOP_WARNING_VERSION = "5.9.3"
# new level means in api_docs/changelog.md, as well as "**Changes**"
# entries in the endpoint's documentation in `zulip.yaml`.
API_FEATURE_LEVEL = 303 # Last bumped for handling deactivated users in groups.
API_FEATURE_LEVEL = 304 # Last bumped for adding `export_type` field.
# Bump the minor PROVISION_VERSION to indicate that folks should provision
# only when going from an old version of the code to a newer version. Bump

View File

@ -23,6 +23,7 @@ const realm_export_schema = z.object({
deleted_timestamp: z.number().nullable(),
failed_timestamp: z.number().nullable(),
pending: z.boolean(),
export_type: z.number(),
});
type RealmExport = z.output<typeof realm_export_schema>;

View File

@ -480,6 +480,7 @@ exports.fixtures = {
deleted_timestamp: null,
failed_timestamp: null,
pending: true,
export_type: 1,
},
],
},

View File

@ -833,6 +833,7 @@ export_type = DictType(
("deleted_timestamp", OptionalType(NumberType())),
("failed_timestamp", OptionalType(NumberType())),
("pending", bool),
("export_type", int),
]
)

View File

@ -2588,5 +2588,6 @@ def get_realm_exports_serialized(realm: Realm) -> list[dict[str, Any]]:
deleted_timestamp=deleted_timestamp,
failed_timestamp=failed_timestamp,
pending=pending,
export_type=export.type,
)
return sorted(exports_dict.values(), key=lambda export_dict: export_dict["id"])

View File

@ -3816,7 +3816,7 @@ paths:
- type: object
additionalProperties: false
description: |
Event sent to the user who requested a public
Event sent to the user who requested a
[data export](/help/export-your-organization)
when the status of the data export changes.
properties:
@ -3831,7 +3831,10 @@ paths:
type: array
description: |
An array of dictionaries where each dictionary contains
details about a public data export of the organization.
details about a data export of the organization.
**Changes**: Prior to Zulip 10.0 (feature level 304), `export_type`
parameter was not present as only public data export was supported via API.
items:
$ref: "#/components/schemas/RealmExport"
example:
@ -3847,6 +3850,7 @@ paths:
"deleted_timestamp": null,
"failed_timestamp": 1594825444.4363360405,
"pending": false,
"export_type": 1,
},
],
"id": 1,
@ -13063,14 +13067,20 @@ paths:
/export/realm:
get:
operationId: get-realm-exports
summary: Get all public data exports
summary: Get all data exports
tags: ["server_and_organizations"]
x-requires-administrator: true
description: |
Fetch all public [data exports](/help/export-your-organization)
of the organization.
Fetch all [public data exports][public-data-export] and
[full data exports with member consent][full-data-export] of the organization.
**Changes**: New in Zulip 2.1.
**Changes**: Prior to Zulip 10.0 (feature level 304), only
public data exports could be fetched using this endpoint.
New in Zulip 2.1.
[public-data-export]: /help/export-your-organization#export-of-public-data
[full-data-export]: /help/export-your-organization#full-export-with-member-consent
responses:
"200":
description: Success.
@ -13088,7 +13098,7 @@ paths:
type: array
description: |
An array of dictionaries where each dictionary contains
details about a public data export of the organization.
details about a data export of the organization.
items:
$ref: "#/components/schemas/RealmExport"
example:
@ -13098,6 +13108,7 @@ paths:
{
"acting_user_id": 11,
"deleted_timestamp": null,
"export_type": 1,
"export_time": 1722243168.134179,
"export_url": "http://example.zulipchat.com/user_avatars/exports/2/FprbwiF0c_sCN0O-rf-ryFtc/zulip-export-p6yuxc45.tar.gz",
"id": 323,
@ -13110,12 +13121,12 @@ paths:
}
post:
operationId: export-realm
summary: Create a public data export
summary: Create a data export
tags: ["server_and_organizations"]
x-requires-administrator: true
description: |
Create a public [data export](/help/export-your-organization)
of the organization.
Create a [public data export][public-data-export] or a
[full data export with member consent][full-data-export] of the organization.
!!! warn ""
@ -13123,10 +13134,39 @@ paths:
you may be looking for the documentation on [server data export and
import][data-export] or [server backups][backups].
**Changes**: New in Zulip 2.1.
**Changes**: Prior to Zulip 10.0 (feature level 304), only
public data exports could be created using this endpoint.
New in Zulip 2.1.
[public-data-export]: /help/export-your-organization#export-of-public-data
[full-data-export]: /help/export-your-organization#full-export-with-member-consent
[data-export]: https://zulip.readthedocs.io/en/stable/production/export-and-import.html#data-export
[backups]: https://zulip.readthedocs.io/en/stable/production/export-and-import.html#backups
requestBody:
required: false
content:
application/x-www-form-urlencoded:
schema:
type: object
properties:
export_type:
description: |
Whether to create a public export or a full export with member consent.
- 1 = Public data export.
- 2 = Full data export with member consent.
If not specified, defaults to 1.
**Changes**: New in Zulip 10.0 (feature level 304). Previously,
all export requests were public data exports.
type: integer
enum:
- 1
- 2
default: 1
example: 2
responses:
"200":
description: Success
@ -13143,7 +13183,7 @@ paths:
id:
type: integer
description: |
The ID of the public data export created.
The ID of the data export created.
**Changes**: New in Zulip 7.0 (feature level 182).
example: {"id": 1, "result": "success", "msg": ""}
@ -13161,7 +13201,7 @@ paths:
"result": "error",
}
description: |
An example JSON error response for when the public data export
An example JSON error response for when the data export
exceeds the maximum allowed data export size.
/export/realm/consents:
get:
@ -21945,6 +21985,18 @@ components:
Depending on the size of the organization, it can take
anywhere from seconds to an hour to generate the data
export.
export_type:
type: integer
description: |
Whether the data export is a public data export or a
full data export with member consent.
- 1 = Public data export.
- 2 = Full data export with member consent.
**Changes**: New in Zulip 10.0 (feature level 304). Previously,
the export type was not included in these objects because only
public data exports could be created or listed via the API or UI.
UserGroup:
type: object
additionalProperties: false

View File

@ -1,10 +1,12 @@
from datetime import timedelta
from typing import Annotated
from django.conf import settings
from django.db import transaction
from django.http import HttpRequest, HttpResponse
from django.utils.timezone import now as timezone_now
from django.utils.translation import gettext as _
from pydantic import Json
from analytics.models import RealmCount
from zerver.actions.realm_export import do_delete_realm_export, notify_realm_export
@ -13,13 +15,27 @@ from zerver.lib.exceptions import JsonableError
from zerver.lib.export import get_realm_exports_serialized
from zerver.lib.queue import queue_json_publish
from zerver.lib.response import json_success
from zerver.lib.typed_endpoint import typed_endpoint
from zerver.lib.typed_endpoint_validators import check_int_in_validator
from zerver.models import RealmExport, UserProfile
@transaction.atomic(durable=True)
@require_realm_admin
def export_realm(request: HttpRequest, user: UserProfile) -> HttpResponse:
# Currently only supports public-data-only exports.
@typed_endpoint
def export_realm(
request: HttpRequest,
user: UserProfile,
*,
export_type: Json[
Annotated[
int,
check_int_in_validator(
[RealmExport.EXPORT_PUBLIC, RealmExport.EXPORT_FULL_WITH_CONSENT]
),
]
] = RealmExport.EXPORT_PUBLIC,
) -> HttpResponse:
realm = user.realm
EXPORT_LIMIT = 5
@ -41,19 +57,20 @@ def export_realm(request: HttpRequest, user: UserProfile) -> HttpResponse:
if limit_check >= EXPORT_LIMIT:
raise JsonableError(_("Exceeded rate limit."))
# The RealmCount analytics table lets us efficiently get an
# estimate for the number of public stream messages in an
# organization. It won't match the actual number of messages in
# the export, because this measures the number of messages that
# went to a public stream at the time they were sent. Thus,
# messages that were deleted or moved between streams will be
# The RealmCount analytics table lets us efficiently get an estimate
# for the number of messages in an organization. It won't match the
# actual number of messages in the export, because this measures the
# number of messages that went to DMs / Group DMs / public or private
# channels at the time they were sent.
# Thus, messages that were deleted or moved between channels and
# private messages for which the users didn't consent for export will be
# treated differently for this check vs. in the export code.
exportable_messages_estimate = sum(
realm_count.value
for realm_count in RealmCount.objects.filter(
realm=realm, property="messages_sent:message_type:day", subgroup="public_stream"
)
realm_count_query = RealmCount.objects.filter(
realm=realm, property="messages_sent:message_type:day"
)
if export_type == RealmExport.EXPORT_PUBLIC:
realm_count_query.filter(subgroup="public_stream")
exportable_messages_estimate = sum(realm_count.value for realm_count in realm_count_query)
if (
exportable_messages_estimate > MAX_MESSAGE_HISTORY
@ -67,7 +84,7 @@ def export_realm(request: HttpRequest, user: UserProfile) -> HttpResponse:
row = RealmExport.objects.create(
realm=realm,
type=RealmExport.EXPORT_PUBLIC,
type=export_type,
acting_user=user,
status=RealmExport.REQUESTED,
date_requested=timezone_now(),