zulip/tools/fetch-contributor-data

134 lines
4.9 KiB
Plaintext
Raw Normal View History

#!/usr/bin/env python3
"""
Fetch contributors data from Github using their API, convert it to structured
JSON data for the /team page contributors section.
"""
import os
import sys
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
from scripts.lib.setup_path import setup_path
setup_path()
import argparse
import logging
from datetime import date
from random import randrange
from time import sleep
from typing import Dict, List, Union
from typing_extensions import TypedDict
os.environ['DJANGO_SETTINGS_MODULE'] = 'zproject.settings'
import django
django.setup()
import json
import requests
from django.conf import settings
duplicate_commits_file = os.path.join(os.path.dirname(__file__), 'duplicate_commits.json')
parser = argparse.ArgumentParser()
parser.add_argument('--max-retries', type=int, default=10,
help='Number of times to retry fetching data from Github')
args = parser.parse_args()
class ContributorsJSON(TypedDict):
date: str
contrib: List[Dict[str, Union[int, str]]]
class Contributor(TypedDict):
avatar_url: str
contributions: int
login: str
logger = logging.getLogger('zulip.fetch_contributors_json')
def fetch_contributors(repo_link: str, max_retries: int) -> List[Contributor]:
contributors: List[Contributor] = []
retry_attempts = 0
page_index = 1
while True:
response: requests.Response = requests.get(f"{repo_link}?page={page_index}", verify=os.environ.get('CUSTOM_CA_CERTIFICATES'))
if response.status_code == 200:
data = response.json()
if len(data) == 0:
return contributors
contributors.extend(data)
retry_attempts = 0
page_index += 1
else:
retry_attempts += 1
if retry_attempts > args.max_retries:
logger.warning("Failed retries fetching contributors data from Github.")
sys.exit(1)
sleep_time = randrange(0, min(64, 2**retry_attempts))
sleep(sleep_time)
def write_to_disk(json_data: ContributorsJSON, out_file: str) -> None:
with open(out_file, 'w') as f:
json.dump(json_data, f, indent=2, sort_keys=True)
f.write("\n")
def update_contributor_data_file() -> None:
"""
Get contributors data from Github and insert them into a temporary
dictionary. Retry fetching each repository if responded with non HTTP 200
status.
"""
# This dictionary should hold all repositories that should be included in
# the total count, including those that should *not* have tabs on the team
# page (e.g. if they are deprecated).
repositories = {
'server': 'https://api.github.com/repos/zulip/zulip/contributors',
'desktop': 'https://api.github.com/repos/zulip/zulip-desktop/contributors',
'mobile': 'https://api.github.com/repos/zulip/zulip-mobile/contributors',
'python-zulip-api': 'https://api.github.com/repos/zulip/python-zulip-api/contributors',
'zulip-js': 'https://api.github.com/repos/zulip/zulip-js/contributors',
'zulipbot': 'https://api.github.com/repos/zulip/zulipbot/contributors',
'terminal': 'https://api.github.com/repos/zulip/zulip-terminal/contributors',
'zulip-ios-legacy': 'https://api.github.com/repos/zulip/zulip-ios-legacy/contributors',
'zulip-android': 'https://api.github.com/repos/zulip/zulip-android/contributors',
}
data: ContributorsJSON = dict(date=str(date.today()), contrib=[])
contribs_list: Dict[str, Dict[str, Union[str, int]]] = {}
for name, link in repositories.items():
contributors = fetch_contributors(link, args.max_retries)
for contributor in contributors:
username = contributor['login']
if username in contribs_list:
contribs_list[username][name] = contributor['contributions']
else:
contribs_list[username] = {
'avatar': contributor['avatar_url'],
'name': username,
name: contributor['contributions']
}
# remove duplicate contributions count
# find commits at the time of split and subtract from zulip-server
with open(duplicate_commits_file) as f:
duplicate_commits = json.load(f)
for committer in duplicate_commits:
if committer in contribs_list and contribs_list[committer].get('server'):
total_commits = contribs_list[committer]['server']
assert isinstance(total_commits, int)
duplicate_commits_count = duplicate_commits[committer]
original_commits = total_commits - duplicate_commits_count
contribs_list[committer]['server'] = original_commits
data['contrib'] = list(contribs_list.values())
write_to_disk(data, settings.CONTRIBUTOR_DATA_FILE_PATH)
if __name__ == "__main__":
update_contributor_data_file()