From 55f97cd06f563846e15a215be0ace805430efe57 Mon Sep 17 00:00:00 2001 From: Prakhar Pratyush Date: Sat, 5 Oct 2024 03:25:11 +0530 Subject: [PATCH] realm_export: Add support to create full data export via /export/realm. Earlier, only public data export was possible via `POST /export/realm` endpoint. This commit adds support to create full data export with member consent via that endpoint. Also, this adds a 'export_type' parameter to the dictionaries in `realm_export` event type and `GET /export/realm` response. Fixes part of #31201. --- api_docs/changelog.md | 11 +++++ api_docs/include/rest-endpoints.md | 4 +- version.py | 2 +- web/src/settings_exports.ts | 1 + web/tests/lib/events.js | 1 + zerver/lib/event_schema.py | 1 + zerver/lib/export.py | 1 + zerver/openapi/zulip.yaml | 78 +++++++++++++++++++++++++----- zerver/views/realm_export.py | 45 +++++++++++------ 9 files changed, 114 insertions(+), 30 deletions(-) diff --git a/api_docs/changelog.md b/api_docs/changelog.md index f8656cac66..eb4623f11f 100644 --- a/api_docs/changelog.md +++ b/api_docs/changelog.md @@ -20,6 +20,17 @@ format used by the Zulip server that they are interacting with. ## Changes in Zulip 10.0 +**Feature level 304** + +* [`GET /export/realm`](/api/get-realm-exports), + [`GET /events`](/api/get-events): Added `export_type` field + to the dictionaries in `exports` array. It indicates whether + the export is of public data or full data with user consent. + +* [`POST /export/realm`](/api/get-realm-exports): Added `export_type` + parameter to add support for admins to decide whether to create a + public data export or a full data export with member consent. + **Feature level 303** * [`POST /register`](/api/register-queue), [`GET /user_groups`](/api/get-user-groups), diff --git a/api_docs/include/rest-endpoints.md b/api_docs/include/rest-endpoints.md index 7d1c039bd9..69618ce4cd 100644 --- a/api_docs/include/rest-endpoints.md +++ b/api_docs/include/rest-endpoints.md @@ -119,8 +119,8 @@ * [Reorder custom profile fields](/api/reorder-custom-profile-fields) * [Create a custom profile field](/api/create-custom-profile-field) * [Update realm-level defaults of user settings](/api/update-realm-user-settings-defaults) -* [Get all public data exports](/api/get-realm-exports) -* [Create a public data export](/api/export-realm) +* [Get all data exports](/api/get-realm-exports) +* [Create a data export](/api/export-realm) * [Get data export consent state](/api/get-realm-export-consents) #### Real-time events diff --git a/version.py b/version.py index 3bc9fc6114..64f1f0712a 100644 --- a/version.py +++ b/version.py @@ -34,7 +34,7 @@ DESKTOP_WARNING_VERSION = "5.9.3" # new level means in api_docs/changelog.md, as well as "**Changes**" # entries in the endpoint's documentation in `zulip.yaml`. -API_FEATURE_LEVEL = 303 # Last bumped for handling deactivated users in groups. +API_FEATURE_LEVEL = 304 # Last bumped for adding `export_type` field. # Bump the minor PROVISION_VERSION to indicate that folks should provision # only when going from an old version of the code to a newer version. Bump diff --git a/web/src/settings_exports.ts b/web/src/settings_exports.ts index e9aeee820f..10677d12bd 100644 --- a/web/src/settings_exports.ts +++ b/web/src/settings_exports.ts @@ -23,6 +23,7 @@ const realm_export_schema = z.object({ deleted_timestamp: z.number().nullable(), failed_timestamp: z.number().nullable(), pending: z.boolean(), + export_type: z.number(), }); type RealmExport = z.output; diff --git a/web/tests/lib/events.js b/web/tests/lib/events.js index e342568ad4..557a007925 100644 --- a/web/tests/lib/events.js +++ b/web/tests/lib/events.js @@ -480,6 +480,7 @@ exports.fixtures = { deleted_timestamp: null, failed_timestamp: null, pending: true, + export_type: 1, }, ], }, diff --git a/zerver/lib/event_schema.py b/zerver/lib/event_schema.py index 18486e1ac6..e793651bbb 100644 --- a/zerver/lib/event_schema.py +++ b/zerver/lib/event_schema.py @@ -833,6 +833,7 @@ export_type = DictType( ("deleted_timestamp", OptionalType(NumberType())), ("failed_timestamp", OptionalType(NumberType())), ("pending", bool), + ("export_type", int), ] ) diff --git a/zerver/lib/export.py b/zerver/lib/export.py index 8a342a524c..e8be4e7771 100644 --- a/zerver/lib/export.py +++ b/zerver/lib/export.py @@ -2588,5 +2588,6 @@ def get_realm_exports_serialized(realm: Realm) -> list[dict[str, Any]]: deleted_timestamp=deleted_timestamp, failed_timestamp=failed_timestamp, pending=pending, + export_type=export.type, ) return sorted(exports_dict.values(), key=lambda export_dict: export_dict["id"]) diff --git a/zerver/openapi/zulip.yaml b/zerver/openapi/zulip.yaml index 95618e09ee..2ce2711406 100644 --- a/zerver/openapi/zulip.yaml +++ b/zerver/openapi/zulip.yaml @@ -3816,7 +3816,7 @@ paths: - type: object additionalProperties: false description: | - Event sent to the user who requested a public + Event sent to the user who requested a [data export](/help/export-your-organization) when the status of the data export changes. properties: @@ -3831,7 +3831,10 @@ paths: type: array description: | An array of dictionaries where each dictionary contains - details about a public data export of the organization. + details about a data export of the organization. + + **Changes**: Prior to Zulip 10.0 (feature level 304), `export_type` + parameter was not present as only public data export was supported via API. items: $ref: "#/components/schemas/RealmExport" example: @@ -3847,6 +3850,7 @@ paths: "deleted_timestamp": null, "failed_timestamp": 1594825444.4363360405, "pending": false, + "export_type": 1, }, ], "id": 1, @@ -13063,14 +13067,20 @@ paths: /export/realm: get: operationId: get-realm-exports - summary: Get all public data exports + summary: Get all data exports tags: ["server_and_organizations"] x-requires-administrator: true description: | - Fetch all public [data exports](/help/export-your-organization) - of the organization. + Fetch all [public data exports][public-data-export] and + [full data exports with member consent][full-data-export] of the organization. - **Changes**: New in Zulip 2.1. + **Changes**: Prior to Zulip 10.0 (feature level 304), only + public data exports could be fetched using this endpoint. + + New in Zulip 2.1. + + [public-data-export]: /help/export-your-organization#export-of-public-data + [full-data-export]: /help/export-your-organization#full-export-with-member-consent responses: "200": description: Success. @@ -13088,7 +13098,7 @@ paths: type: array description: | An array of dictionaries where each dictionary contains - details about a public data export of the organization. + details about a data export of the organization. items: $ref: "#/components/schemas/RealmExport" example: @@ -13098,6 +13108,7 @@ paths: { "acting_user_id": 11, "deleted_timestamp": null, + "export_type": 1, "export_time": 1722243168.134179, "export_url": "http://example.zulipchat.com/user_avatars/exports/2/FprbwiF0c_sCN0O-rf-ryFtc/zulip-export-p6yuxc45.tar.gz", "id": 323, @@ -13110,12 +13121,12 @@ paths: } post: operationId: export-realm - summary: Create a public data export + summary: Create a data export tags: ["server_and_organizations"] x-requires-administrator: true description: | - Create a public [data export](/help/export-your-organization) - of the organization. + Create a [public data export][public-data-export] or a + [full data export with member consent][full-data-export] of the organization. !!! warn "" @@ -13123,10 +13134,39 @@ paths: you may be looking for the documentation on [server data export and import][data-export] or [server backups][backups]. - **Changes**: New in Zulip 2.1. + **Changes**: Prior to Zulip 10.0 (feature level 304), only + public data exports could be created using this endpoint. + New in Zulip 2.1. + + [public-data-export]: /help/export-your-organization#export-of-public-data + [full-data-export]: /help/export-your-organization#full-export-with-member-consent [data-export]: https://zulip.readthedocs.io/en/stable/production/export-and-import.html#data-export [backups]: https://zulip.readthedocs.io/en/stable/production/export-and-import.html#backups + requestBody: + required: false + content: + application/x-www-form-urlencoded: + schema: + type: object + properties: + export_type: + description: | + Whether to create a public export or a full export with member consent. + + - 1 = Public data export. + - 2 = Full data export with member consent. + + If not specified, defaults to 1. + + **Changes**: New in Zulip 10.0 (feature level 304). Previously, + all export requests were public data exports. + type: integer + enum: + - 1 + - 2 + default: 1 + example: 2 responses: "200": description: Success @@ -13143,7 +13183,7 @@ paths: id: type: integer description: | - The ID of the public data export created. + The ID of the data export created. **Changes**: New in Zulip 7.0 (feature level 182). example: {"id": 1, "result": "success", "msg": ""} @@ -13161,7 +13201,7 @@ paths: "result": "error", } description: | - An example JSON error response for when the public data export + An example JSON error response for when the data export exceeds the maximum allowed data export size. /export/realm/consents: get: @@ -21945,6 +21985,18 @@ components: Depending on the size of the organization, it can take anywhere from seconds to an hour to generate the data export. + export_type: + type: integer + description: | + Whether the data export is a public data export or a + full data export with member consent. + + - 1 = Public data export. + - 2 = Full data export with member consent. + + **Changes**: New in Zulip 10.0 (feature level 304). Previously, + the export type was not included in these objects because only + public data exports could be created or listed via the API or UI. UserGroup: type: object additionalProperties: false diff --git a/zerver/views/realm_export.py b/zerver/views/realm_export.py index ed5aff1851..5374ce73bb 100644 --- a/zerver/views/realm_export.py +++ b/zerver/views/realm_export.py @@ -1,10 +1,12 @@ from datetime import timedelta +from typing import Annotated from django.conf import settings from django.db import transaction from django.http import HttpRequest, HttpResponse from django.utils.timezone import now as timezone_now from django.utils.translation import gettext as _ +from pydantic import Json from analytics.models import RealmCount from zerver.actions.realm_export import do_delete_realm_export, notify_realm_export @@ -13,13 +15,27 @@ from zerver.lib.exceptions import JsonableError from zerver.lib.export import get_realm_exports_serialized from zerver.lib.queue import queue_json_publish from zerver.lib.response import json_success +from zerver.lib.typed_endpoint import typed_endpoint +from zerver.lib.typed_endpoint_validators import check_int_in_validator from zerver.models import RealmExport, UserProfile @transaction.atomic(durable=True) @require_realm_admin -def export_realm(request: HttpRequest, user: UserProfile) -> HttpResponse: - # Currently only supports public-data-only exports. +@typed_endpoint +def export_realm( + request: HttpRequest, + user: UserProfile, + *, + export_type: Json[ + Annotated[ + int, + check_int_in_validator( + [RealmExport.EXPORT_PUBLIC, RealmExport.EXPORT_FULL_WITH_CONSENT] + ), + ] + ] = RealmExport.EXPORT_PUBLIC, +) -> HttpResponse: realm = user.realm EXPORT_LIMIT = 5 @@ -41,19 +57,20 @@ def export_realm(request: HttpRequest, user: UserProfile) -> HttpResponse: if limit_check >= EXPORT_LIMIT: raise JsonableError(_("Exceeded rate limit.")) - # The RealmCount analytics table lets us efficiently get an - # estimate for the number of public stream messages in an - # organization. It won't match the actual number of messages in - # the export, because this measures the number of messages that - # went to a public stream at the time they were sent. Thus, - # messages that were deleted or moved between streams will be + # The RealmCount analytics table lets us efficiently get an estimate + # for the number of messages in an organization. It won't match the + # actual number of messages in the export, because this measures the + # number of messages that went to DMs / Group DMs / public or private + # channels at the time they were sent. + # Thus, messages that were deleted or moved between channels and + # private messages for which the users didn't consent for export will be # treated differently for this check vs. in the export code. - exportable_messages_estimate = sum( - realm_count.value - for realm_count in RealmCount.objects.filter( - realm=realm, property="messages_sent:message_type:day", subgroup="public_stream" - ) + realm_count_query = RealmCount.objects.filter( + realm=realm, property="messages_sent:message_type:day" ) + if export_type == RealmExport.EXPORT_PUBLIC: + realm_count_query.filter(subgroup="public_stream") + exportable_messages_estimate = sum(realm_count.value for realm_count in realm_count_query) if ( exportable_messages_estimate > MAX_MESSAGE_HISTORY @@ -67,7 +84,7 @@ def export_realm(request: HttpRequest, user: UserProfile) -> HttpResponse: row = RealmExport.objects.create( realm=realm, - type=RealmExport.EXPORT_PUBLIC, + type=export_type, acting_user=user, status=RealmExport.REQUESTED, date_requested=timezone_now(),