mirror of https://github.com/zulip/zulip.git
tools: Back off after request failure in fetch-contributor-data.
If a request fails the tool sleeps for some time before making further requests. The sleep time is a random number between 0 and 2^failures capped at 64 seconds. More details about the algorithm can be found at https://chat.zulip.org/#narrow/stream/ 92-learning/topic/exponential.20backoff.20--.20with.20jitter
This commit is contained in:
parent
449f7e2d4b
commit
31a5119892
|
@ -16,12 +16,17 @@ import sys
|
|||
import argparse
|
||||
from time import sleep
|
||||
from datetime import date
|
||||
from random import randrange
|
||||
import logging
|
||||
|
||||
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
|
||||
from scripts.lib.setup_path import setup_path
|
||||
setup_path()
|
||||
|
||||
os.environ['DJANGO_SETTINGS_MODULE'] = 'zproject.settings'
|
||||
import django
|
||||
django.setup()
|
||||
|
||||
from django.conf import settings
|
||||
|
||||
import requests
|
||||
|
@ -30,10 +35,8 @@ import json
|
|||
duplicate_commits_file = os.path.join(os.path.dirname(__file__), 'duplicate_commits.json')
|
||||
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('--max-retries', type=int, default=3,
|
||||
parser.add_argument('--max-retries', type=int, default=10,
|
||||
help='Number of times to retry fetching data from Github')
|
||||
parser.add_argument('--not-required', action='store_true', default=False,
|
||||
help='Consider failures to reach GitHub nonfatal')
|
||||
args = parser.parse_args()
|
||||
|
||||
ContributorsJSON = TypedDict('ContributorsJSON', {
|
||||
|
@ -41,6 +44,8 @@ ContributorsJSON = TypedDict('ContributorsJSON', {
|
|||
'contrib': List[Dict[str, Union[str, int]]],
|
||||
})
|
||||
|
||||
logger = logging.getLogger('zulip.fetch_contributors_json')
|
||||
|
||||
def fetch_contributors(repo_link: str) -> Optional[List[Dict[str, Dict[str, Any]]]]:
|
||||
r = requests.get(repo_link, verify=os.environ.get('CUSTOM_CA_CERTIFICATES')) # type: requests.Response
|
||||
return r.json() if r.status_code == 200 else None
|
||||
|
@ -50,7 +55,7 @@ def write_to_disk(json_data: ContributorsJSON, out_file: str) -> None:
|
|||
try:
|
||||
f.write("{}\n".format(json.dumps(json_data, indent=2, sort_keys=True)))
|
||||
except IOError as e:
|
||||
print(e)
|
||||
logger.warning(e)
|
||||
sys.exit(1)
|
||||
|
||||
def update_contributor_data_file() -> None:
|
||||
|
@ -77,8 +82,9 @@ def update_contributor_data_file() -> None:
|
|||
|
||||
data = dict(date=str(date.today()), contrib=[]) # type: ContributorsJSON
|
||||
contribs_list = {} # type: Dict[str, Dict[str, Union[str, int]]]
|
||||
retry_attempts = 0
|
||||
|
||||
for _ in range(args.max_retries):
|
||||
while True:
|
||||
repos_done = []
|
||||
for name, link in repositories.items():
|
||||
contribs = fetch_contributors(link)
|
||||
|
@ -108,6 +114,15 @@ def update_contributor_data_file() -> None:
|
|||
contribs_list[username].update(contrib_data)
|
||||
else:
|
||||
contribs_list[username] = contrib_data
|
||||
retry_attempts = 0
|
||||
else:
|
||||
retry_attempts += 1
|
||||
if retry_attempts > args.max_retries:
|
||||
logger.warning("Failed retries fetching contributors data from Github.")
|
||||
sys.exit(1)
|
||||
|
||||
sleep_time = randrange(0, min(64, 2**retry_attempts))
|
||||
sleep(sleep_time)
|
||||
|
||||
# remove duplicate contributions count
|
||||
# find commits at the time of split and subtract from zulip-server
|
||||
|
@ -127,13 +142,6 @@ def update_contributor_data_file() -> None:
|
|||
if not repositories:
|
||||
break
|
||||
|
||||
# Wait before retrying failed requests for Github to aggregate data.
|
||||
sleep(2)
|
||||
else:
|
||||
print("ERROR: Failed fetching contributors data from Github.")
|
||||
if not args.not_required:
|
||||
sys.exit(1)
|
||||
|
||||
for contributor_name, contributor_data in contribs_list.items():
|
||||
contributor_data['name'] = contributor_name
|
||||
data['contrib'].append(contributor_data)
|
||||
|
|
Loading…
Reference in New Issue