#!/usr/bin/env python3 """ Fetch contributors data from GitHub using their API, convert it to structured JSON data for the /team page contributors section. """ import os import sys sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..')) from scripts.lib.setup_path import setup_path setup_path() import argparse import logging from datetime import date from random import randrange from time import sleep from typing import Dict, List, Optional, Union from typing_extensions import TypedDict os.environ['DJANGO_SETTINGS_MODULE'] = 'zproject.settings' import django django.setup() import json import requests from django.conf import settings from zerver.lib.avatar_hash import gravatar_hash from zproject.config import get_secret duplicate_commits_file = os.path.join(os.path.dirname(__file__), 'duplicate_commits.json') parser = argparse.ArgumentParser() parser.add_argument('--max-retries', type=int, default=10, help='Number of times to retry fetching data from GitHub') args = parser.parse_args() class ContributorsJSON(TypedDict): date: str contributors: List[Dict[str, Union[int, str]]] class Contributor(TypedDict): avatar_url: Optional[str] contributions: int login: Optional[str] email: Optional[str] name: Optional[str] logger = logging.getLogger('zulip.fetch_contributors_json') def fetch_contributors(repo_name: str, max_retries: int) -> List[Contributor]: contributors: List[Contributor] = [] retry_attempts = 0 page_index = 1 api_link = f"https://api.github.com/repos/zulip/{repo_name}/contributors" api_data = {"anon": "1"} certificates = os.environ.get('CUSTOM_CA_CERTIFICATES') headers: Dict[str, str] = {} personal_access_token = get_secret('github_personal_access_token') if personal_access_token is not None: headers = {"Authorization": f"token {personal_access_token}"} while True: response: requests.Response = requests.get(api_link, {**api_data, "page": f"{page_index}"}, verify=certificates, headers=headers) if response.status_code == 200: data = response.json() if len(data) == 0: return contributors contributors.extend(data) retry_attempts = 0 page_index += 1 else: retry_attempts += 1 if retry_attempts > args.max_retries: logger.warning("Failed retries fetching contributors data from GitHub.") sys.exit(1) sleep_time = randrange(0, min(64, 2**retry_attempts)) sleep(sleep_time) def write_to_disk(json_data: ContributorsJSON, out_file: str) -> None: with open(out_file, 'w') as f: json.dump(json_data, f, indent=2, sort_keys=True) f.write("\n") def update_contributor_data_file() -> None: # This list should hold all repositories that should be included in # the total count, including those that should *not* have tabs on the team # page (e.g. if they are deprecated). repo_names = ['zulip', 'zulip-desktop', 'zulip-mobile', 'python-zulip-api', 'zulip-js', 'zulipbot', 'zulip-terminal', 'zulip-ios-legacy', 'zulip-android'] data: ContributorsJSON = dict(date=str(date.today()), contributors=[]) contributor_username_to_data: Dict[str, Dict[str, Union[str, int]]] = {} for repo_name in repo_names: contributors = fetch_contributors(repo_name, args.max_retries) for contributor in contributors: username = contributor.get('login') or contributor.get('email') assert(username is not None) if username in contributor_username_to_data: contributor_username_to_data[username][repo_name] = contributor['contributions'] else: contributor_username_to_data[username] = { repo_name: contributor['contributions'] } avatar_url = contributor.get('avatar_url') if avatar_url is not None: contributor_username_to_data[username]['avatar'] = avatar_url email = contributor.get('email') if email is not None: contributor_username_to_data[username]["email"] = email hash_key = gravatar_hash(email) gravatar_url = f"https://secure.gravatar.com/avatar/{hash_key}?d=identicon" contributor_username_to_data[username]['avatar'] = gravatar_url login = contributor.get('login') if login is not None: contributor_username_to_data[username]["github_username"] = login name = contributor.get('name') if name is not None: contributor_username_to_data[username]["name"] = name # remove duplicate contributions count # find commits at the time of split and subtract from zulip-server with open(duplicate_commits_file) as f: duplicate_commits = json.load(f) for committer in duplicate_commits: if committer in contributor_username_to_data and contributor_username_to_data[committer].get('zulip'): total_commits = contributor_username_to_data[committer]['zulip'] assert isinstance(total_commits, int) duplicate_commits_count = duplicate_commits[committer] original_commits = total_commits - duplicate_commits_count contributor_username_to_data[committer]['zulip'] = original_commits data['contributors'] = list(contributor_username_to_data.values()) write_to_disk(data, settings.CONTRIBUTOR_DATA_FILE_PATH) if __name__ == "__main__": update_contributor_data_file()