From f531f3a27fc91f3022774941cb2315bf676fcd8c Mon Sep 17 00:00:00 2001 From: Alex Vandiver Date: Tue, 8 Mar 2022 09:49:10 -0800 Subject: [PATCH] push_notifications: Drop FCM retries to 2, not 10. This reverts bc15085098709d746ba69c99d61601f8d4316e6e (which provided not justification for its change) and moves further, down to 2 retries from the default of 5. 10 retries, with exponential backoff, is equivalent to sleeping 2^11 seconds, or just about 34 minutes (though the code uses a jitter which may make this up to 51 minutes). This is an unreasonable amount of time to spend in this codepath -- as only one worker is used, and it is single-threaded, this could effectively block all missed message notifications for half an hour or longer. This is also necessary because messages sent through the push bouncer are sent synchronously; the sending server uses a 30-second timeout, set in PushBouncerSession. Having retries which linger longer than this can cause duplicate messages; the sending server will time out and re-queue the message in RabbitMQ, while the push bouncer's request will continue, and may succeed. Limit to 2 retries (APNS currently uses 3), and results expected max of 4 seconds of sleep, potentially up to 6. If this fails, there exists another retry loop above it, at the RabbitMQ layer (either locally, or via the remote server's queue), which will result in up to 3 additional retries -- all told, the request will me made to FCM up to 12 times. --- zerver/lib/push_notifications.py | 5 ++++- zilencer/views.py | 6 ++++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/zerver/lib/push_notifications.py b/zerver/lib/push_notifications.py index 9c0f8628a8..12c0cb44bd 100644 --- a/zerver/lib/push_notifications.py +++ b/zerver/lib/push_notifications.py @@ -343,7 +343,10 @@ def send_android_push_notification( # Two kwargs `retries` and `session` get eaten by `json_request`; # the rest pass through to the GCM server. res = gcm_client.json_request( - registration_ids=reg_ids, priority=priority, data=data, retries=10 + registration_ids=reg_ids, + priority=priority, + data=data, + retries=2, ) except OSError: logger.warning("Error while pushing to GCM", exc_info=True) diff --git a/zilencer/views.py b/zilencer/views.py index e0859e1d97..c48b538fcb 100644 --- a/zilencer/views.py +++ b/zilencer/views.py @@ -257,6 +257,12 @@ def remote_server_notify_push( payload["zulip_message_ids"] = ",".join(str(id) for id in truncated_ids) return payload + # The full request must complete within 30s, the timeout set by + # Zulip remote hosts for push notification requests (see + # PushBouncerSession). The timeouts in the FCM and APNS codepaths + # must be set accordingly; see send_android_push_notification and + # send_apple_push_notification. + gcm_payload = truncate_payload(gcm_payload) send_android_push_notification( user_id, android_devices, gcm_payload, gcm_options, remote=server