/team: Fetch contributors data from all major repos.

Also wait 2 seconds before trying again.
This commit is contained in:
Tommy Ip 2017-11-16 13:05:26 +00:00 committed by Tim Abbott
parent 84384196f2
commit 771abf4179
1 changed files with 59 additions and 49 deletions

View File

@ -9,10 +9,12 @@ from lib import sanity_check
sanity_check.check_venv(__file__)
from typing import Any, Dict, List, Optional, Union, Text
from mypy_extensions import TypedDict
import os
import sys
import argparse
from time import sleep
from datetime import date
import subprocess
@ -25,7 +27,6 @@ from django.conf import settings
from zerver.lib.utils import split_by
FIXTURE_FILE = os.path.join(os.path.dirname(__file__), '../zerver/fixtures/authors.json')
GITHUB_LINK = 'https://api.github.com/repos/zulip/zulip/stats/contributors'
parser = argparse.ArgumentParser()
parser.add_argument('--max-retries', type=int, default=3,
@ -38,22 +39,18 @@ parser.add_argument('--not-required', action='store_true', default=False,
help='Consider failures to reach GitHub nonfatal')
args = parser.parse_args()
def fetch_data(retries, link):
# type: (int, str) -> Optional[List[Dict[str, Any]]]
for _ in range(retries):
try:
r = requests.get(link) # type: requests.Response
if r.status_code == 200:
return r.json()
else:
print('Github API return non 200 status.')
except requests.exceptions.RequestException as e:
print(e)
ContributorsJSON = TypedDict('ContributorsJSON', {
'date': str,
'contrib': Dict[str, Dict[str, Union[str, int]]],
})
return None
def write_to_disk(json_data, out_file):
# type: (Dict[str, Any], str) -> None
def fetch_contributors(repo_link: str) -> Optional[List[Dict[str, Dict[str, Any]]]]:
r = requests.get(repo_link) # type: requests.Response
return r.json() if r.status_code == 200 else None
def write_to_disk(json_data: ContributorsJSON, out_file: str) -> None:
with open(out_file, 'w') as f:
try:
f.write("{}\n".format(json.dumps(json_data)))
@ -61,53 +58,66 @@ def write_to_disk(json_data, out_file):
print(e)
sys.exit(1)
def run_production():
# type: () -> None
def run_production() -> None:
"""
Fetch data from Github and stored it in
`static/generated/github-contributors.json`
Get contributors data from Github and insert them into a temporaril
dictionary. Retry fetching each repository if responded with non HTTP
200 status.
"""
json_data = fetch_data(args.max_retries, GITHUB_LINK) # type: Optional[List[Dict[str, Any]]]
if json_data:
# Successfully fetch data from Github
contribs = []
for user in json_data:
author = user.get('author')
if author is None:
print("Unable to access fields for %s" % (user,))
continue
result_user = dict(
avatar=author.get('avatar_url'),
name=author.get('login'),
commits=user.get('total')
)
contribs.append(result_user)
repositories = {
'server': 'https://api.github.com/repos/zulip/zulip/stats/contributors',
'desktop': 'https://api.github.com/repos/zulip/zulip-electron/stats/contributors',
'mobile': 'https://api.github.com/repos/zulip/zulip-mobile/stats/contributors',
'python-zulip-api': 'https://api.github.com/repos/zulip/python-zulip-api/stats/contributors',
'zulipbot': 'https://api.github.com/repos/zulip/zulipbot/stats/contributors',
}
out_contrib_data = sorted(
contribs,
key=lambda k: k.get('commits'),
reverse=True
) # type: List[Dict[str, Union[Text, int]]]
data = dict(date=str(date.today()), contrib={}) # type: ContributorsJSON
contribs_data = {} # type: Dict[str, Dict[str, Union[str, int]]]
out_data = dict(
data=out_contrib_data,
date=str(date.today())
) # type: Dict[str, Any]
for t in range(args.max_retries):
repo_done = []
for k, v in repositories.items():
contribs = fetch_contributors(v)
if contribs:
repo_done.append(k)
for contrib in contribs:
username = contrib.get('author').get('login')
contrib_data = {
'avatar': contrib.get('author').get('avatar_url'),
k: contrib.get('total'),
}
if username in contribs_data:
contribs_data[username].update(contrib_data)
else:
contribs_data[username] = contrib_data
for k in repo_done:
del repositories[k]
write_to_disk(out_data, settings.CONTRIBUTORS_DATA)
if not repositories:
break
elif not args.not_required:
print('Fail to fetch data from Github.')
sys.exit(1)
# Wait before retrying failed requests for Github to aggregate data.
sleep(2)
else:
print("ERROR: Failed fetching contributors data from Github.")
if not args.not_required:
sys.exit(1)
def copy_fixture():
# type: () -> None
data['contrib'] = contribs_data
write_to_disk(data, settings.CONTRIBUTORS_DATA)
def copy_fixture() -> None:
"""
Copy test fixture file from zerver/fixtures. This is used to avoid
constantly fetching data from Github during testing.
"""
subprocess.check_call(['cp', FIXTURE_FILE, settings.CONTRIBUTORS_DATA])
if args.use_fixture:
copy_fixture()
else: