web: Use better exponential backoff algorithm.

This matches the algorithm that we designed for the Python API, except that we use a ratio of 2 rather than sqrt(2) in the message_fetch code path, because it's a heavier request. We increase the number of failures before showing a user-facing error to roughly preserve the same time period before a user-facing error is shown.
2024-02-02 14:12:40 -08:00 · 2024-02-02 14:12:40 -08:00 · b5b496c752
parent e3960c22be
commit b5b496c752
2 changed files with 11 additions and 7 deletions
--- a/web/src/message_fetch.js
+++ b/web/src/message_fetch.js
@ -377,11 +377,10 @@ export function load_messages(opts, attempt = 1) {
            // the server is giving us 500s/502s.
            //
            // So we do the maximum of the retry-after header and an exponential
-            // backoff with full jitter: up to 2s, 4s, 8s, 16s, 32s
+            // backoff with ratio 2 and half jitter. Starts at 1-2s and ends at
-            let backoff_delay_secs = Math.random() * 2 ** attempt * 2;
+            // 16-32s after 5 failures.
-            if (attempt >= 5) {
+            const backoff_scale = Math.min(2 ** attempt, 32);
-                backoff_delay_secs = 30;
+            const backoff_delay_secs = ((1 + Math.random()) / 2) * backoff_scale;
            }
            let rate_limit_delay_secs = 0;
            if (xhr.status === 429 && xhr.responseJSON?.code === "RATE_LIMIT_HIT") {
                // Add a bit of jitter to the required delay suggested by the
--- a/web/src/server_events.js
+++ b/web/src/server_events.js
@ -231,7 +231,7 @@ function get_events({dont_block = false} = {}) {
                    get_events_failures += 1;
                }
-                if (get_events_failures >= 5) {
+                if (get_events_failures >= 8) {
                    show_ui_connection_error();
                } else {
                    hide_ui_connection_error();
@ -243,7 +243,12 @@ function get_events({dont_block = false} = {}) {
            // We need to respect the server's rate-limiting headers, but beyond
            // that, we also want to avoid contributing to a thundering herd if
            // the server is giving us 500s/502s.
-            const backoff_delay_secs = Math.min(90, Math.exp(get_events_failures / 2));
+            //
            // So we do the maximum of the retry-after header and an exponential
            // backoff with ratio sqrt(2) and half jitter. Starts at 1-2s and ends at
            // 45-90s after enough failures.
            const backoff_scale = Math.min(2 ** ((get_events_failures + 1) / 2), 90);
            const backoff_delay_secs = ((1 + Math.random()) / 2) * backoff_scale;
            let rate_limit_delay_secs = 0;
            if (xhr.status === 429 && xhr.responseJSON?.code === "RATE_LIMIT_HIT") {
                // Add a bit of jitter to the required delay suggested