fetch-contributor-data: Use builtin backoff.

This correctly handles connection timeouts, not just non-200 status
codes.
This commit is contained in:
Alex Vandiver 2021-06-29 12:26:01 -07:00 committed by Tim Abbott
parent bf9780267d
commit 66aa2a2505
2 changed files with 25 additions and 23 deletions

View File

@ -10,8 +10,6 @@ import os
import sys import sys
import unicodedata import unicodedata
from datetime import date from datetime import date
from random import randrange
from time import sleep
from typing import Dict, List, Optional, Union from typing import Dict, List, Optional, Union
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..")) sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
@ -21,8 +19,8 @@ setup_path()
os.environ["DJANGO_SETTINGS_MODULE"] = "zproject.settings" os.environ["DJANGO_SETTINGS_MODULE"] = "zproject.settings"
import django import django
import requests
from django.conf import settings from django.conf import settings
from requests.packages.urllib3.util.retry import Retry
from typing_extensions import TypedDict from typing_extensions import TypedDict
django.setup() django.setup()
@ -58,7 +56,6 @@ logger = logging.getLogger("zulip.fetch_contributors_json")
def fetch_contributors(repo_name: str, max_retries: int) -> List[Contributor]: def fetch_contributors(repo_name: str, max_retries: int) -> List[Contributor]:
contributors: List[Contributor] = [] contributors: List[Contributor] = []
retry_attempts = 0
page_index = 1 page_index = 1
api_link = f"https://api.github.com/repos/zulip/{repo_name}/contributors" api_link = f"https://api.github.com/repos/zulip/{repo_name}/contributors"
@ -70,29 +67,33 @@ def fetch_contributors(repo_name: str, max_retries: int) -> List[Contributor]:
if personal_access_token is not None: if personal_access_token is not None:
headers = {"Authorization": f"token {personal_access_token}"} headers = {"Authorization": f"token {personal_access_token}"}
session = GithubSession() Retry.BACKOFF_MAX = 64
retry = Retry(
total=max_retries,
backoff_factor=2.0,
status_forcelist=set(
[
403, # Github does unauth rate-limiting via 403's
429, # The formal rate-limiting response code
502, # Bad gateway
503, # Service unavailable
]
),
)
session = GithubSession(max_retries=retry)
while True: while True:
response: requests.Response = session.get( response = session.get(
api_link, api_link,
params={**api_data, "page": f"{page_index}"}, params={**api_data, "page": f"{page_index}"},
verify=certificates, verify=certificates,
headers=headers, headers=headers,
) )
if response.status_code == 200: response.raise_for_status()
data = response.json() data = response.json()
if len(data) == 0: if len(data) == 0:
return contributors return contributors
contributors.extend(data) contributors.extend(data)
retry_attempts = 0
page_index += 1 page_index += 1
else:
retry_attempts += 1
if retry_attempts > args.max_retries:
logger.warning("Failed retries fetching contributors data from GitHub.")
sys.exit(1)
sleep_time = randrange(0, min(64, 2 ** retry_attempts))
sleep(sleep_time)
def write_to_disk(json_data: ContributorsJSON, out_file: str) -> None: def write_to_disk(json_data: ContributorsJSON, out_file: str) -> None:

View File

@ -1,5 +1,6 @@
import json import json
import logging import logging
from typing import Any
import requests import requests
@ -10,8 +11,8 @@ logger = logging.getLogger(__name__)
class GithubSession(OutgoingSession): class GithubSession(OutgoingSession):
def __init__(self) -> None: def __init__(self, **kwargs: Any) -> None:
super().__init__(role="github", timeout=5) super().__init__(role="github", timeout=5, **kwargs)
def get_latest_github_release_version_for_repo(repo: str) -> str: def get_latest_github_release_version_for_repo(repo: str) -> str: