realm_export: Add support to create full data export via /export/realm.

Earlier, only public data export was possible via `POST /export/realm`
endpoint. This commit adds support to create full data export with
member consent via that endpoint.

Also, this adds a 'export_type' parameter to the dictionaries
in `realm_export` event type and `GET /export/realm` response.

Fixes part of #31201.
This commit is contained in:
Prakhar Pratyush 2024-10-05 03:25:11 +05:30 committed by Tim Abbott
parent 2367c46455
commit 55f97cd06f
9 changed files with 114 additions and 30 deletions

View File

@ -20,6 +20,17 @@ format used by the Zulip server that they are interacting with.
## Changes in Zulip 10.0 ## Changes in Zulip 10.0
**Feature level 304**
* [`GET /export/realm`](/api/get-realm-exports),
[`GET /events`](/api/get-events): Added `export_type` field
to the dictionaries in `exports` array. It indicates whether
the export is of public data or full data with user consent.
* [`POST /export/realm`](/api/get-realm-exports): Added `export_type`
parameter to add support for admins to decide whether to create a
public data export or a full data export with member consent.
**Feature level 303** **Feature level 303**
* [`POST /register`](/api/register-queue), [`GET /user_groups`](/api/get-user-groups), * [`POST /register`](/api/register-queue), [`GET /user_groups`](/api/get-user-groups),

View File

@ -119,8 +119,8 @@
* [Reorder custom profile fields](/api/reorder-custom-profile-fields) * [Reorder custom profile fields](/api/reorder-custom-profile-fields)
* [Create a custom profile field](/api/create-custom-profile-field) * [Create a custom profile field](/api/create-custom-profile-field)
* [Update realm-level defaults of user settings](/api/update-realm-user-settings-defaults) * [Update realm-level defaults of user settings](/api/update-realm-user-settings-defaults)
* [Get all public data exports](/api/get-realm-exports) * [Get all data exports](/api/get-realm-exports)
* [Create a public data export](/api/export-realm) * [Create a data export](/api/export-realm)
* [Get data export consent state](/api/get-realm-export-consents) * [Get data export consent state](/api/get-realm-export-consents)
#### Real-time events #### Real-time events

View File

@ -34,7 +34,7 @@ DESKTOP_WARNING_VERSION = "5.9.3"
# new level means in api_docs/changelog.md, as well as "**Changes**" # new level means in api_docs/changelog.md, as well as "**Changes**"
# entries in the endpoint's documentation in `zulip.yaml`. # entries in the endpoint's documentation in `zulip.yaml`.
API_FEATURE_LEVEL = 303 # Last bumped for handling deactivated users in groups. API_FEATURE_LEVEL = 304 # Last bumped for adding `export_type` field.
# Bump the minor PROVISION_VERSION to indicate that folks should provision # Bump the minor PROVISION_VERSION to indicate that folks should provision
# only when going from an old version of the code to a newer version. Bump # only when going from an old version of the code to a newer version. Bump

View File

@ -23,6 +23,7 @@ const realm_export_schema = z.object({
deleted_timestamp: z.number().nullable(), deleted_timestamp: z.number().nullable(),
failed_timestamp: z.number().nullable(), failed_timestamp: z.number().nullable(),
pending: z.boolean(), pending: z.boolean(),
export_type: z.number(),
}); });
type RealmExport = z.output<typeof realm_export_schema>; type RealmExport = z.output<typeof realm_export_schema>;

View File

@ -480,6 +480,7 @@ exports.fixtures = {
deleted_timestamp: null, deleted_timestamp: null,
failed_timestamp: null, failed_timestamp: null,
pending: true, pending: true,
export_type: 1,
}, },
], ],
}, },

View File

@ -833,6 +833,7 @@ export_type = DictType(
("deleted_timestamp", OptionalType(NumberType())), ("deleted_timestamp", OptionalType(NumberType())),
("failed_timestamp", OptionalType(NumberType())), ("failed_timestamp", OptionalType(NumberType())),
("pending", bool), ("pending", bool),
("export_type", int),
] ]
) )

View File

@ -2588,5 +2588,6 @@ def get_realm_exports_serialized(realm: Realm) -> list[dict[str, Any]]:
deleted_timestamp=deleted_timestamp, deleted_timestamp=deleted_timestamp,
failed_timestamp=failed_timestamp, failed_timestamp=failed_timestamp,
pending=pending, pending=pending,
export_type=export.type,
) )
return sorted(exports_dict.values(), key=lambda export_dict: export_dict["id"]) return sorted(exports_dict.values(), key=lambda export_dict: export_dict["id"])

View File

@ -3816,7 +3816,7 @@ paths:
- type: object - type: object
additionalProperties: false additionalProperties: false
description: | description: |
Event sent to the user who requested a public Event sent to the user who requested a
[data export](/help/export-your-organization) [data export](/help/export-your-organization)
when the status of the data export changes. when the status of the data export changes.
properties: properties:
@ -3831,7 +3831,10 @@ paths:
type: array type: array
description: | description: |
An array of dictionaries where each dictionary contains An array of dictionaries where each dictionary contains
details about a public data export of the organization. details about a data export of the organization.
**Changes**: Prior to Zulip 10.0 (feature level 304), `export_type`
parameter was not present as only public data export was supported via API.
items: items:
$ref: "#/components/schemas/RealmExport" $ref: "#/components/schemas/RealmExport"
example: example:
@ -3847,6 +3850,7 @@ paths:
"deleted_timestamp": null, "deleted_timestamp": null,
"failed_timestamp": 1594825444.4363360405, "failed_timestamp": 1594825444.4363360405,
"pending": false, "pending": false,
"export_type": 1,
}, },
], ],
"id": 1, "id": 1,
@ -13063,14 +13067,20 @@ paths:
/export/realm: /export/realm:
get: get:
operationId: get-realm-exports operationId: get-realm-exports
summary: Get all public data exports summary: Get all data exports
tags: ["server_and_organizations"] tags: ["server_and_organizations"]
x-requires-administrator: true x-requires-administrator: true
description: | description: |
Fetch all public [data exports](/help/export-your-organization) Fetch all [public data exports][public-data-export] and
of the organization. [full data exports with member consent][full-data-export] of the organization.
**Changes**: New in Zulip 2.1. **Changes**: Prior to Zulip 10.0 (feature level 304), only
public data exports could be fetched using this endpoint.
New in Zulip 2.1.
[public-data-export]: /help/export-your-organization#export-of-public-data
[full-data-export]: /help/export-your-organization#full-export-with-member-consent
responses: responses:
"200": "200":
description: Success. description: Success.
@ -13088,7 +13098,7 @@ paths:
type: array type: array
description: | description: |
An array of dictionaries where each dictionary contains An array of dictionaries where each dictionary contains
details about a public data export of the organization. details about a data export of the organization.
items: items:
$ref: "#/components/schemas/RealmExport" $ref: "#/components/schemas/RealmExport"
example: example:
@ -13098,6 +13108,7 @@ paths:
{ {
"acting_user_id": 11, "acting_user_id": 11,
"deleted_timestamp": null, "deleted_timestamp": null,
"export_type": 1,
"export_time": 1722243168.134179, "export_time": 1722243168.134179,
"export_url": "http://example.zulipchat.com/user_avatars/exports/2/FprbwiF0c_sCN0O-rf-ryFtc/zulip-export-p6yuxc45.tar.gz", "export_url": "http://example.zulipchat.com/user_avatars/exports/2/FprbwiF0c_sCN0O-rf-ryFtc/zulip-export-p6yuxc45.tar.gz",
"id": 323, "id": 323,
@ -13110,12 +13121,12 @@ paths:
} }
post: post:
operationId: export-realm operationId: export-realm
summary: Create a public data export summary: Create a data export
tags: ["server_and_organizations"] tags: ["server_and_organizations"]
x-requires-administrator: true x-requires-administrator: true
description: | description: |
Create a public [data export](/help/export-your-organization) Create a [public data export][public-data-export] or a
of the organization. [full data export with member consent][full-data-export] of the organization.
!!! warn "" !!! warn ""
@ -13123,10 +13134,39 @@ paths:
you may be looking for the documentation on [server data export and you may be looking for the documentation on [server data export and
import][data-export] or [server backups][backups]. import][data-export] or [server backups][backups].
**Changes**: New in Zulip 2.1. **Changes**: Prior to Zulip 10.0 (feature level 304), only
public data exports could be created using this endpoint.
New in Zulip 2.1.
[public-data-export]: /help/export-your-organization#export-of-public-data
[full-data-export]: /help/export-your-organization#full-export-with-member-consent
[data-export]: https://zulip.readthedocs.io/en/stable/production/export-and-import.html#data-export [data-export]: https://zulip.readthedocs.io/en/stable/production/export-and-import.html#data-export
[backups]: https://zulip.readthedocs.io/en/stable/production/export-and-import.html#backups [backups]: https://zulip.readthedocs.io/en/stable/production/export-and-import.html#backups
requestBody:
required: false
content:
application/x-www-form-urlencoded:
schema:
type: object
properties:
export_type:
description: |
Whether to create a public export or a full export with member consent.
- 1 = Public data export.
- 2 = Full data export with member consent.
If not specified, defaults to 1.
**Changes**: New in Zulip 10.0 (feature level 304). Previously,
all export requests were public data exports.
type: integer
enum:
- 1
- 2
default: 1
example: 2
responses: responses:
"200": "200":
description: Success description: Success
@ -13143,7 +13183,7 @@ paths:
id: id:
type: integer type: integer
description: | description: |
The ID of the public data export created. The ID of the data export created.
**Changes**: New in Zulip 7.0 (feature level 182). **Changes**: New in Zulip 7.0 (feature level 182).
example: {"id": 1, "result": "success", "msg": ""} example: {"id": 1, "result": "success", "msg": ""}
@ -13161,7 +13201,7 @@ paths:
"result": "error", "result": "error",
} }
description: | description: |
An example JSON error response for when the public data export An example JSON error response for when the data export
exceeds the maximum allowed data export size. exceeds the maximum allowed data export size.
/export/realm/consents: /export/realm/consents:
get: get:
@ -21945,6 +21985,18 @@ components:
Depending on the size of the organization, it can take Depending on the size of the organization, it can take
anywhere from seconds to an hour to generate the data anywhere from seconds to an hour to generate the data
export. export.
export_type:
type: integer
description: |
Whether the data export is a public data export or a
full data export with member consent.
- 1 = Public data export.
- 2 = Full data export with member consent.
**Changes**: New in Zulip 10.0 (feature level 304). Previously,
the export type was not included in these objects because only
public data exports could be created or listed via the API or UI.
UserGroup: UserGroup:
type: object type: object
additionalProperties: false additionalProperties: false

View File

@ -1,10 +1,12 @@
from datetime import timedelta from datetime import timedelta
from typing import Annotated
from django.conf import settings from django.conf import settings
from django.db import transaction from django.db import transaction
from django.http import HttpRequest, HttpResponse from django.http import HttpRequest, HttpResponse
from django.utils.timezone import now as timezone_now from django.utils.timezone import now as timezone_now
from django.utils.translation import gettext as _ from django.utils.translation import gettext as _
from pydantic import Json
from analytics.models import RealmCount from analytics.models import RealmCount
from zerver.actions.realm_export import do_delete_realm_export, notify_realm_export from zerver.actions.realm_export import do_delete_realm_export, notify_realm_export
@ -13,13 +15,27 @@ from zerver.lib.exceptions import JsonableError
from zerver.lib.export import get_realm_exports_serialized from zerver.lib.export import get_realm_exports_serialized
from zerver.lib.queue import queue_json_publish from zerver.lib.queue import queue_json_publish
from zerver.lib.response import json_success from zerver.lib.response import json_success
from zerver.lib.typed_endpoint import typed_endpoint
from zerver.lib.typed_endpoint_validators import check_int_in_validator
from zerver.models import RealmExport, UserProfile from zerver.models import RealmExport, UserProfile
@transaction.atomic(durable=True) @transaction.atomic(durable=True)
@require_realm_admin @require_realm_admin
def export_realm(request: HttpRequest, user: UserProfile) -> HttpResponse: @typed_endpoint
# Currently only supports public-data-only exports. def export_realm(
request: HttpRequest,
user: UserProfile,
*,
export_type: Json[
Annotated[
int,
check_int_in_validator(
[RealmExport.EXPORT_PUBLIC, RealmExport.EXPORT_FULL_WITH_CONSENT]
),
]
] = RealmExport.EXPORT_PUBLIC,
) -> HttpResponse:
realm = user.realm realm = user.realm
EXPORT_LIMIT = 5 EXPORT_LIMIT = 5
@ -41,19 +57,20 @@ def export_realm(request: HttpRequest, user: UserProfile) -> HttpResponse:
if limit_check >= EXPORT_LIMIT: if limit_check >= EXPORT_LIMIT:
raise JsonableError(_("Exceeded rate limit.")) raise JsonableError(_("Exceeded rate limit."))
# The RealmCount analytics table lets us efficiently get an # The RealmCount analytics table lets us efficiently get an estimate
# estimate for the number of public stream messages in an # for the number of messages in an organization. It won't match the
# organization. It won't match the actual number of messages in # actual number of messages in the export, because this measures the
# the export, because this measures the number of messages that # number of messages that went to DMs / Group DMs / public or private
# went to a public stream at the time they were sent. Thus, # channels at the time they were sent.
# messages that were deleted or moved between streams will be # Thus, messages that were deleted or moved between channels and
# private messages for which the users didn't consent for export will be
# treated differently for this check vs. in the export code. # treated differently for this check vs. in the export code.
exportable_messages_estimate = sum( realm_count_query = RealmCount.objects.filter(
realm_count.value realm=realm, property="messages_sent:message_type:day"
for realm_count in RealmCount.objects.filter(
realm=realm, property="messages_sent:message_type:day", subgroup="public_stream"
)
) )
if export_type == RealmExport.EXPORT_PUBLIC:
realm_count_query.filter(subgroup="public_stream")
exportable_messages_estimate = sum(realm_count.value for realm_count in realm_count_query)
if ( if (
exportable_messages_estimate > MAX_MESSAGE_HISTORY exportable_messages_estimate > MAX_MESSAGE_HISTORY
@ -67,7 +84,7 @@ def export_realm(request: HttpRequest, user: UserProfile) -> HttpResponse:
row = RealmExport.objects.create( row = RealmExport.objects.create(
realm=realm, realm=realm,
type=RealmExport.EXPORT_PUBLIC, type=export_type,
acting_user=user, acting_user=user,
status=RealmExport.REQUESTED, status=RealmExport.REQUESTED,
date_requested=timezone_now(), date_requested=timezone_now(),