fetch-contributor-data: Use builtin backoff.

This correctly handles connection timeouts, not just non-200 status
codes.
This commit is contained in:
Alex Vandiver 2021-06-29 12:26:01 -07:00 committed by Tim Abbott
parent bf9780267d
commit 66aa2a2505
2 changed files with 25 additions and 23 deletions

View File

@ -10,8 +10,6 @@ import os
import sys
import unicodedata
from datetime import date
from random import randrange
from time import sleep
from typing import Dict, List, Optional, Union
sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
@ -21,8 +19,8 @@ setup_path()
os.environ["DJANGO_SETTINGS_MODULE"] = "zproject.settings"
import django
import requests
from django.conf import settings
from requests.packages.urllib3.util.retry import Retry
from typing_extensions import TypedDict
django.setup()
@ -58,7 +56,6 @@ logger = logging.getLogger("zulip.fetch_contributors_json")
def fetch_contributors(repo_name: str, max_retries: int) -> List[Contributor]:
contributors: List[Contributor] = []
retry_attempts = 0
page_index = 1
api_link = f"https://api.github.com/repos/zulip/{repo_name}/contributors"
@ -70,29 +67,33 @@ def fetch_contributors(repo_name: str, max_retries: int) -> List[Contributor]:
if personal_access_token is not None:
headers = {"Authorization": f"token {personal_access_token}"}
session = GithubSession()
Retry.BACKOFF_MAX = 64
retry = Retry(
total=max_retries,
backoff_factor=2.0,
status_forcelist=set(
[
403, # Github does unauth rate-limiting via 403's
429, # The formal rate-limiting response code
502, # Bad gateway
503, # Service unavailable
]
),
)
session = GithubSession(max_retries=retry)
while True:
response: requests.Response = session.get(
response = session.get(
api_link,
params={**api_data, "page": f"{page_index}"},
verify=certificates,
headers=headers,
)
if response.status_code == 200:
response.raise_for_status()
data = response.json()
if len(data) == 0:
return contributors
contributors.extend(data)
retry_attempts = 0
page_index += 1
else:
retry_attempts += 1
if retry_attempts > args.max_retries:
logger.warning("Failed retries fetching contributors data from GitHub.")
sys.exit(1)
sleep_time = randrange(0, min(64, 2 ** retry_attempts))
sleep(sleep_time)
def write_to_disk(json_data: ContributorsJSON, out_file: str) -> None:

View File

@ -1,5 +1,6 @@
import json
import logging
from typing import Any
import requests
@ -10,8 +11,8 @@ logger = logging.getLogger(__name__)
class GithubSession(OutgoingSession):
def __init__(self) -> None:
super().__init__(role="github", timeout=5)
def __init__(self, **kwargs: Any) -> None:
super().__init__(role="github", timeout=5, **kwargs)
def get_latest_github_release_version_for_repo(repo: str) -> str: