do_mark_all_as_read: Split up the work into batches.

Fixes #15403.
2022-10-02 21:32:36 +02:00 · 2022-10-02 21:32:36 +02:00 · a410f6b241
parent ef468322f1
commit a410f6b241
10 changed files with 179 additions and 32 deletions
--- a/static/js/unread_ops.js
+++ b/static/js/unread_ops.js
@ -22,6 +22,13 @@ export function mark_all_as_read() {
        success: () => {
            // After marking all messages as read, we reload the browser.
            // This is useful to avoid leaving ourselves deep in the past.
+            // This is also the currently intended behavior in case of partial success,
+            // (response code 200 with result "partially_completed")
+            // where the request times out after marking some messages as read,
+            // so we don't need to distinguish that scenario here.
+            // TODO: The frontend handling of partial success can be improved
+            // by re-running the request in a loop, while showing some status indicator
+            // to the user.
            reload.initiate({
                immediate: true,
                save_pointer: false,
--- a/templates/zerver/api/changelog.md
+++ b/templates/zerver/api/changelog.md
@ -20,6 +20,14 @@ format used by the Zulip server that they are interacting with.

 ## Changes in Zulip 6.0

+**Feature level 153**
+
+* [`POST /mark_all_as_read`](/api/mark-all-as-read): Messages are now
+  marked as read in batches, so that progress will be made even if the
+  request times out because of an extremely large number of unread
+  messages to process. Upon timeout, a success response with a
+  "partially_completed" result will be returned by the server.
+
 **Feature level 152**

 * [`PATCH /messages/{message_id}`](/api/update-message): The
--- a/version.py
+++ b/version.py
@ -33,7 +33,7 @@ DESKTOP_WARNING_VERSION = "5.4.3"
 # Changes should be accompanied by documentation explaining what the
 # new level means in templates/zerver/api/changelog.md, as well as
 # "**Changes**" entries in the endpoint's documentation in `zulip.yaml`.
-API_FEATURE_LEVEL = 152
+API_FEATURE_LEVEL = 153

 # Bump the minor PROVISION_VERSION to indicate that folks should provision
 # only when going from an old version of the code to a newer version. Bump
--- a/zerver/actions/message_flags.py
+++ b/zerver/actions/message_flags.py
@ -44,15 +44,43 @@ def do_mark_all_as_read(user_profile: UserProfile) -> int:
    )
    do_clear_mobile_push_notifications_for_ids([user_profile.id], all_push_message_ids)

-    with transaction.atomic(savepoint=False):
-        query = (
-            UserMessage.select_for_update_query()
-            .filter(user_profile=user_profile)
-            .extra(where=[UserMessage.where_unread()])
-        )
-        count = query.update(
-            flags=F("flags").bitor(UserMessage.flags.read),
-        )
+    batch_size = 2000
+    count = 0
+    while True:
+        with transaction.atomic(savepoint=False):
+            query = (
+                UserMessage.select_for_update_query()
+                .filter(user_profile=user_profile)
+                .extra(where=[UserMessage.where_unread()])[:batch_size]
+            )
+            # This updated_count is the same as the number of UserMessage
+            # rows selected, because due to the FOR UPDATE lock, we're guaranteed
+            # that all the selected rows will indeed be updated.
+            # UPDATE queries don't support LIMIT, so we have to use a subquery
+            # to do batching.
+            updated_count = UserMessage.objects.filter(id__in=query).update(
+                flags=F("flags").bitor(UserMessage.flags.read),
+            )
+
+            event_time = timezone_now()
+            do_increment_logging_stat(
+                user_profile,
+                COUNT_STATS["messages_read::hour"],
+                None,
+                event_time,
+                increment=updated_count,
+            )
+            do_increment_logging_stat(
+                user_profile,
+                COUNT_STATS["messages_read_interactions::hour"],
+                None,
+                event_time,
+                increment=min(1, updated_count),
+            )
+
+            count += updated_count
+            if updated_count < batch_size:
+                break

    event = asdict(
        ReadMessagesEvent(
@ -60,21 +88,8 @@ def do_mark_all_as_read(user_profile: UserProfile) -> int:
            all=True,
        )
    )
-    event_time = timezone_now()
-
    send_event(user_profile.realm, event, [user_profile.id])

-    do_increment_logging_stat(
-        user_profile, COUNT_STATS["messages_read::hour"], None, event_time, increment=count
-    )
-    do_increment_logging_stat(
-        user_profile,
-        COUNT_STATS["messages_read_interactions::hour"],
-        None,
-        event_time,
-        increment=min(1, count),
-    )
-
    return count


--- a/zerver/lib/exceptions.py
+++ b/zerver/lib/exceptions.py
@ -37,6 +37,7 @@ class ErrorCode(Enum):
    PASSWORD_RESET_REQUIRED = auto()
    AUTHENTICATION_FAILED = auto()
    UNAUTHORIZED = auto()
+    REQUEST_TIMEOUT = auto()


 class JsonableError(Exception):
--- a/zerver/lib/response.py
+++ b/zerver/lib/response.py
@ -45,6 +45,10 @@ def json_success(request: HttpRequest, data: Mapping[str, Any] = {}) -> HttpResp
    return json_response(data=data)


+def json_partial_success(request: HttpRequest, data: Mapping[str, Any] = {}) -> HttpResponse:
+    return json_response(res_type="partially_completed", data=data, status=200)
+
+
 def json_response_from_error(exception: JsonableError) -> HttpResponse:
    """
    This should only be needed in middleware; in app code, just raise.
--- a/zerver/openapi/openapi.py
+++ b/zerver/openapi/openapi.py
@ -412,7 +412,9 @@ def validate_against_openapi_schema(
    if (endpoint, method) in EXCLUDE_DOCUMENTED_ENDPOINTS:
        return True
    # Check if the response matches its code
-    if status_code.startswith("2") and (content.get("result", "success").lower() != "success"):
+    if status_code.startswith("2") and (
+        content.get("result", "success").lower() not in ["success", "partially_completed"]
+    ):
        raise SchemaError("Response is not 200 but is validating against 200 schema")
    # Code is not declared but appears in various 400 responses. If
    # common, it can be added to 400 response schema
--- a/zerver/openapi/zulip.yaml
+++ b/zerver/openapi/zulip.yaml
@ -4533,9 +4533,41 @@ paths:
      tags: ["messages"]
      description: |
        Marks all of the current user's unread messages as read.
+
+        **Changes**: Before Zulip 6.0 (feature level 153), this
+        request did a single atomic operation, which could time out
+        with 10,000s of unread messages to mark as read.
+
+        It now marks messages as read in batches, starting with the
+        newest messages, so that progress will be made even if the
+        request times out.
+
+        If the server's processing is interrupted by a timeout, it
+        will return an HTTP 200 success response with result
+        "partially_completed". A correct client should repeat the
+        request when handling such a response.
      responses:
        "200":
-          $ref: "#/components/responses/SimpleSuccess"
+          description: Success or partial success.
+          content:
+            application/json:
+              schema:
+                oneOf:
+                  - allOf:
+                      - $ref: "#/components/schemas/JsonSuccess"
+                      - $ref: "#/components/schemas/SuccessDescription"
+                  - allOf:
+                      - $ref: "#/components/schemas/PartiallyCompleted"
+                      - example:
+                          {
+                            "code": "REQUEST_TIMEOUT",
+                            "msg": "",
+                            "result": "partially_completed",
+                          }
+                        description: |
+                          If the request exceeds its processing time limit after having
+                          successfully marked some messages as read, response code 200
+                          with result "partially_completed" and code "REQUEST_TIMEOUT" will be returned like this:
  /mark_stream_as_read:
    post:
      operationId: mark-stream-as-read
@ -16695,6 +16727,23 @@ components:
                - error
            msg:
              type: string
+    PartiallyCompleted:
+      allOf:
+        - $ref: "#/components/schemas/JsonResponseBase"
+        - required:
+            - result
+            - code
+          additionalProperties: false
+          properties:
+            result:
+              enum:
+                - partially_completed
+            code:
+              type: string
+              description: |
+                A string that identifies the cause of the partial completion of the request.
+            msg:
+              type: string
    ApiKeyResponse:
      allOf:
        - $ref: "#/components/schemas/JsonSuccessBase"
--- a/zerver/tests/test_message_flags.py
+++ b/zerver/tests/test_message_flags.py
@ -1,4 +1,5 @@
-from typing import TYPE_CHECKING, Any, List, Mapping, Set
+from contextlib import contextmanager
+from typing import TYPE_CHECKING, Any, Callable, Iterator, List, Mapping, Set
 from unittest import mock

 import orjson
@ -22,6 +23,7 @@ from zerver.lib.message import (
 )
 from zerver.lib.test_classes import ZulipTestCase
 from zerver.lib.test_helpers import get_subscription
+from zerver.lib.timeout import TimeoutExpired
 from zerver.lib.user_topics import add_topic_mute
 from zerver.models import (
    Message,
@ -49,6 +51,18 @@ def check_flags(flags: List[str], expected: Set[str]) -> None:
        raise AssertionError(f"expected flags (ignoring has_alert_word) to be {expected}")


+@contextmanager
+def timeout_mock() -> Iterator[None]:
+    # timeout() doesn't work in test environment with database operations
+    # and they don't get committed - so we need to replace it with a mock
+    # that just calls the function.
+    def mock_timeout(seconds: int, func: Callable[[], object]) -> object:
+        return func()
+
+    with mock.patch("zerver.views.message_flags.timeout", new=mock_timeout):
+        yield
+
+
 class FirstUnreadAnchorTests(ZulipTestCase):
    """
    HISTORICAL NOTE:
@ -62,7 +76,8 @@ class FirstUnreadAnchorTests(ZulipTestCase):
        self.login("hamlet")

        # Mark all existing messages as read
-        result = self.client_post("/json/mark_all_as_read")
+        with timeout_mock():
+            result = self.client_post("/json/mark_all_as_read")
        self.assert_json_success(result)

        # Send a new message (this will be unread)
@ -121,7 +136,8 @@ class FirstUnreadAnchorTests(ZulipTestCase):
    def test_visible_messages_use_first_unread_anchor(self) -> None:
        self.login("hamlet")

-        result = self.client_post("/json/mark_all_as_read")
+        with timeout_mock():
+            result = self.client_post("/json/mark_all_as_read")
        self.assert_json_success(result)

        new_message_id = self.send_stream_message(self.example_user("othello"), "Verona", "test")
@ -563,11 +579,52 @@ class PushNotificationMarkReadFlowsTest(ZulipTestCase):
            [third_message_id, fourth_message_id],
        )

-        result = self.client_post("/json/mark_all_as_read", {})
+        with timeout_mock():
+            result = self.client_post("/json/mark_all_as_read", {})
        self.assertEqual(self.get_mobile_push_notification_ids(user_profile), [])
        mock_push_notifications.assert_called()


+class MarkAllAsReadEndpointTest(ZulipTestCase):
+    def test_mark_all_as_read_endpoint(self) -> None:
+        self.login("hamlet")
+        hamlet = self.example_user("hamlet")
+        othello = self.example_user("othello")
+        self.subscribe(hamlet, "Denmark")
+
+        for i in range(0, 4):
+            self.send_stream_message(othello, "Verona", "test")
+            self.send_personal_message(othello, hamlet, "test")
+
+        unread_count = (
+            UserMessage.objects.filter(user_profile=hamlet)
+            .extra(where=[UserMessage.where_unread()])
+            .count()
+        )
+        self.assertNotEqual(unread_count, 0)
+        with timeout_mock():
+            result = self.client_post("/json/mark_all_as_read", {})
+        self.assert_json_success(result)
+
+        new_unread_count = (
+            UserMessage.objects.filter(user_profile=hamlet)
+            .extra(where=[UserMessage.where_unread()])
+            .count()
+        )
+        self.assertEqual(new_unread_count, 0)
+
+    def test_mark_all_as_read_timeout_response(self) -> None:
+        self.login("hamlet")
+        with mock.patch("zerver.views.message_flags.timeout", side_effect=TimeoutExpired):
+            result = self.client_post("/json/mark_all_as_read", {})
+            self.assertEqual(result.status_code, 200)
+
+            result_dict = orjson.loads(result.content)
+            self.assertEqual(
+                result_dict, {"result": "partially_completed", "msg": "", "code": "REQUEST_TIMEOUT"}
+            )
+
+
 class GetUnreadMsgsTest(ZulipTestCase):
    def mute_stream(self, user_profile: UserProfile, stream: Stream) -> None:
        recipient = Recipient.objects.get(type_id=stream.id, type=Recipient.STREAM)
--- a/zerver/views/message_flags.py
+++ b/zerver/views/message_flags.py
@ -8,10 +8,11 @@ from zerver.actions.message_flags import (
    do_mark_stream_messages_as_read,
    do_update_message_flags,
 )
-from zerver.lib.exceptions import JsonableError
+from zerver.lib.exceptions import ErrorCode, JsonableError
 from zerver.lib.request import REQ, RequestNotes, has_request_variables
-from zerver.lib.response import json_success
+from zerver.lib.response import json_partial_success, json_success
 from zerver.lib.streams import access_stream_by_id
+from zerver.lib.timeout import TimeoutExpired, timeout
 from zerver.lib.topic import user_message_exists_for_topic
 from zerver.lib.validator import check_int, check_list
 from zerver.models import UserActivity, UserProfile
@ -50,7 +51,10 @@ def update_message_flags(
@has_request_variables
 def mark_all_as_read(request: HttpRequest, user_profile: UserProfile) -> HttpResponse:
    request_notes = RequestNotes.get_notes(request)
-    count = do_mark_all_as_read(user_profile)
+    try:
+        count = timeout(50, lambda: do_mark_all_as_read(user_profile))
+    except TimeoutExpired:
+        return json_partial_success(request, data={"code": ErrorCode.REQUEST_TIMEOUT.name})

    log_data_str = f"[{count} updated]"
    assert request_notes.log_data is not None