#!/usr/bin/env python3 """ Fetch contributors data from GitHub using their API, convert it to structured JSON data for the /team/ page contributors section. """ import argparse import json import logging import os import sys import unicodedata from datetime import datetime, timezone sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..")) from scripts.lib.setup_path import setup_path setup_path() os.environ["DJANGO_SETTINGS_MODULE"] = "zproject.settings" from typing import TypedDict import django from django.conf import settings from urllib3.util import Retry django.setup() from zerver.lib.avatar_hash import gravatar_hash from zerver.lib.github import GithubSession from zproject.config import get_secret duplicate_commits_file = os.path.join(os.path.dirname(__file__), "duplicate_commits.json") parser = argparse.ArgumentParser() parser.add_argument( "--max-retries", type=int, default=10, help="Number of times to retry fetching data from GitHub" ) args = parser.parse_args() class ContributorsJSON(TypedDict): date: str contributors: list[dict[str, int | str]] class Contributor(TypedDict): avatar_url: str | None contributions: int login: str | None email: str | None name: str | None logger = logging.getLogger("zulip.fetch_contributors_json") def fetch_contributors(repo_name: str, max_retries: int) -> list[Contributor]: contributors: list[Contributor] = [] page_index = 1 api_link = f"https://api.github.com/repos/zulip/{repo_name}/contributors" api_data = {"anon": "1"} certificates = os.environ.get("CUSTOM_CA_CERTIFICATES") headers: dict[str, str] = {} personal_access_token = get_secret("github_personal_access_token") if personal_access_token is not None: headers = {"Authorization": f"token {personal_access_token}"} Retry.DEFAULT_BACKOFF_MAX = 64 retry = Retry( total=max_retries, backoff_factor=2.0, status_forcelist={ 403, # Github does unauth rate-limiting via 403's 429, # The formal rate-limiting response code 502, # Bad gateway 503, # Service unavailable }, ) session = GithubSession(max_retries=retry) while True: response = session.get( api_link, params={**api_data, "page": f"{page_index}"}, verify=certificates, headers=headers, ) response.raise_for_status() data = response.json() if len(data) == 0: return contributors contributors.extend(data) page_index += 1 def write_to_disk(json_data: ContributorsJSON, out_file: str) -> None: with open(out_file, "w") as f: json.dump(json_data, f, indent=2, sort_keys=True) f.write("\n") def update_contributor_data_file() -> None: # This list should hold all repositories that should be included in # the total count, including those that should *not* have tabs on the team # page (e.g. if they are deprecated). repo_names = [ "docker-zulip", "errbot-backend-zulip", "github-actions-zulip", "hubot-zulip", "puppet-zulip", "python-zulip-api", "trello-to-zulip", "swift-zulip-api", "zulint", "zulip", "zulip-android-legacy", "zulip-architecture", "zulip-archive", "zulip-csharp", "zulip-desktop", "zulip-desktop-legacy", "zulip-flutter", "zulip-ios-legacy", "zulip-js", "zulip-mobile", "zulip-redmine-plugin", "zulip-terminal", "zulip-zapier", "zulipbot", ] data: ContributorsJSON = dict(date=str(datetime.now(tz=timezone.utc).date()), contributors=[]) contributor_username_to_data: dict[str, dict[str, str | int]] = {} for repo_name in repo_names: contributors = fetch_contributors(repo_name, args.max_retries) for contributor in contributors: username = contributor.get("login") or contributor.get("email") assert username is not None if username in contributor_username_to_data: contributor_username_to_data[username][repo_name] = contributor["contributions"] else: contributor_username_to_data[username] = {repo_name: contributor["contributions"]} avatar_url = contributor.get("avatar_url") if avatar_url is not None: contributor_username_to_data[username]["avatar"] = avatar_url email = contributor.get("email") if email is not None: contributor_username_to_data[username]["email"] = email hash_key = gravatar_hash(email) gravatar_url = f"https://secure.gravatar.com/avatar/{hash_key}?d=identicon" contributor_username_to_data[username]["avatar"] = gravatar_url login = contributor.get("login") if login is not None: contributor_username_to_data[username]["github_username"] = login name = contributor.get("name") if name is not None: contributor_username_to_data[username]["name"] = unicodedata.normalize( "NFC", name ) # remove duplicate contributions count # find commits at the time of split and subtract from zulip-server with open(duplicate_commits_file) as f: duplicate_commits = json.load(f) for committer in duplicate_commits: if committer in contributor_username_to_data and contributor_username_to_data[ committer ].get("zulip"): total_commits = contributor_username_to_data[committer]["zulip"] assert isinstance(total_commits, int) duplicate_commits_count = duplicate_commits[committer] original_commits = total_commits - duplicate_commits_count contributor_username_to_data[committer]["zulip"] = original_commits data["contributors"] = list(contributor_username_to_data.values()) write_to_disk(data, settings.CONTRIBUTOR_DATA_FILE_PATH) if __name__ == "__main__": update_contributor_data_file()