From 940830055b7b59bedad1c5bc9bec6036918419d4 Mon Sep 17 00:00:00 2001 From: Mateusz Mandera Date: Tue, 20 Sep 2022 21:24:59 +0200 Subject: [PATCH] delete_in_topic: Split up the deletion into batches. Fixes #22821. As explained in the comment in the code: Topics can be large enough that this request will inevitably time out. In such a case, it's good for some progress to be accomplished, so that full deletion can be achieved by repeating the request. For that purpose, we delete messages in atomic batches, committing after each batch. The additional perk is that the ordering of messages should prevent some hypothetical deadlocks - ref #19054 --- templates/zerver/api/changelog.md | 7 +++++++ version.py | 2 +- zerver/openapi/zulip.yaml | 6 ++++++ zerver/views/streams.py | 19 +++++++++++++++---- 4 files changed, 29 insertions(+), 5 deletions(-) diff --git a/templates/zerver/api/changelog.md b/templates/zerver/api/changelog.md index 6e26a135e6..ed50d848a8 100644 --- a/templates/zerver/api/changelog.md +++ b/templates/zerver/api/changelog.md @@ -20,6 +20,13 @@ format used by the Zulip server that they are interacting with. ## Changes in Zulip 6.0 +**Feature level 147** + +* [`POST /streams/{stream_id}/delete_topic`](/api/delete-topic): + Messages now are deleted in batches, starting from the newest, so + that progress will be made even if the request times out because of + an extremely large topic. + **Feature level 146** * [`POST /realm/profile_fields`](/api/create-custom-profile-field), diff --git a/version.py b/version.py index 97524b849f..c6e4ee55e7 100644 --- a/version.py +++ b/version.py @@ -33,7 +33,7 @@ DESKTOP_WARNING_VERSION = "5.4.3" # Changes should be accompanied by documentation explaining what the # new level means in templates/zerver/api/changelog.md, as well as # "**Changes**" entries in the endpoint's documentation in `zulip.yaml`. -API_FEATURE_LEVEL = 146 +API_FEATURE_LEVEL = 147 # Bump the minor PROVISION_VERSION to indicate that folks should provision # only when going from an old version of the code to a newer version. Bump diff --git a/zerver/openapi/zulip.yaml b/zerver/openapi/zulip.yaml index a2a651e1c6..873733f56e 100644 --- a/zerver/openapi/zulip.yaml +++ b/zerver/openapi/zulip.yaml @@ -14232,6 +14232,12 @@ paths: Topics are a field on messages (not an independent data structure), so deleting all the messages in the topic deletes the topic from Zulip. + + **Changes**: Before Zulip 6.0 (feature level 147), this + request did a single atomic operation, which could time out + for very large topics. It now deletes messages in batches, + starting with the newest messages, so that progress will be + made even if the request times out. parameters: - $ref: "#/components/parameters/StreamIdInPath" - name: topic_name diff --git a/zerver/views/streams.py b/zerver/views/streams.py index 8982f3d6de..818c36001f 100644 --- a/zerver/views/streams.py +++ b/zerver/views/streams.py @@ -55,6 +55,7 @@ from zerver.lib.exceptions import ( from zerver.lib.mention import MentionBackend, silent_mention_syntax_for_user from zerver.lib.request import REQ, has_request_variables from zerver.lib.response import json_success +from zerver.lib.retention import STREAM_MESSAGE_BATCH_SIZE as RETENTION_STREAM_MESSAGE_BATCH_SIZE from zerver.lib.retention import parse_message_retention_days from zerver.lib.streams import ( StreamDict, @@ -848,7 +849,6 @@ def get_topics_backend( return json_success(request, data=dict(topics=result)) -@transaction.atomic @require_realm_admin @has_request_variables def delete_in_topic( @@ -867,9 +867,20 @@ def delete_in_topic( ).values_list("message_id", flat=True) messages = messages.filter(id__in=deletable_message_ids) - messages = messages.select_for_update(of=("self",)) - - do_delete_messages(user_profile.realm, messages) + # Topics can be large enough that this request will inevitably time out. + # In such a case, it's good for some progress to be accomplished, so that + # full deletion can be achieved by repeating the request. For that purpose, + # we delete messages in atomic batches, committing after each batch. + # TODO: Ideally this should be moved to the deferred_work queue. + batch_size = RETENTION_STREAM_MESSAGE_BATCH_SIZE + while True: + with transaction.atomic(durable=True): + messages_to_delete = messages.order_by("-id")[0:batch_size].select_for_update( + of=("self",) + ) + if not messages_to_delete: + break + do_delete_messages(user_profile.realm, messages_to_delete) return json_success(request)