From b5b496c752ce63a09bf99035a2e9f2e64058214a Mon Sep 17 00:00:00 2001 From: Tim Abbott Date: Fri, 2 Feb 2024 14:12:40 -0800 Subject: [PATCH] web: Use better exponential backoff algorithm. This matches the algorithm that we designed for the Python API, except that we use a ratio of 2 rather than sqrt(2) in the message_fetch code path, because it's a heavier request. We increase the number of failures before showing a user-facing error to roughly preserve the same time period before a user-facing error is shown. --- web/src/message_fetch.js | 9 ++++----- web/src/server_events.js | 9 +++++++-- 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/web/src/message_fetch.js b/web/src/message_fetch.js index d91205d73e..fff2adbba2 100644 --- a/web/src/message_fetch.js +++ b/web/src/message_fetch.js @@ -377,11 +377,10 @@ export function load_messages(opts, attempt = 1) { // the server is giving us 500s/502s. // // So we do the maximum of the retry-after header and an exponential - // backoff with full jitter: up to 2s, 4s, 8s, 16s, 32s - let backoff_delay_secs = Math.random() * 2 ** attempt * 2; - if (attempt >= 5) { - backoff_delay_secs = 30; - } + // backoff with ratio 2 and half jitter. Starts at 1-2s and ends at + // 16-32s after 5 failures. + const backoff_scale = Math.min(2 ** attempt, 32); + const backoff_delay_secs = ((1 + Math.random()) / 2) * backoff_scale; let rate_limit_delay_secs = 0; if (xhr.status === 429 && xhr.responseJSON?.code === "RATE_LIMIT_HIT") { // Add a bit of jitter to the required delay suggested by the diff --git a/web/src/server_events.js b/web/src/server_events.js index cd5d7587e9..822c3cff06 100644 --- a/web/src/server_events.js +++ b/web/src/server_events.js @@ -231,7 +231,7 @@ function get_events({dont_block = false} = {}) { get_events_failures += 1; } - if (get_events_failures >= 5) { + if (get_events_failures >= 8) { show_ui_connection_error(); } else { hide_ui_connection_error(); @@ -243,7 +243,12 @@ function get_events({dont_block = false} = {}) { // We need to respect the server's rate-limiting headers, but beyond // that, we also want to avoid contributing to a thundering herd if // the server is giving us 500s/502s. - const backoff_delay_secs = Math.min(90, Math.exp(get_events_failures / 2)); + // + // So we do the maximum of the retry-after header and an exponential + // backoff with ratio sqrt(2) and half jitter. Starts at 1-2s and ends at + // 45-90s after enough failures. + const backoff_scale = Math.min(2 ** ((get_events_failures + 1) / 2), 90); + const backoff_delay_secs = ((1 + Math.random()) / 2) * backoff_scale; let rate_limit_delay_secs = 0; if (xhr.status === 429 && xhr.responseJSON?.code === "RATE_LIMIT_HIT") { // Add a bit of jitter to the required delay suggested