#!/usr/bin/env python3 import argparse import os import pathlib import subprocess import sys from collections import defaultdict from typing import Dict, List bot_commits = 0 def add_log(committer_dict: Dict[str, int], input: List[str]) -> None: for dataset in input: committer_name = dataset.split("\t")[1] commit_count = int(dataset.split("\t")[0]) if committer_name.endswith("[bot]"): # Exclude dependabot[bot] and other GitHub bots. global bot_commits bot_commits += commit_count continue committer_dict[committer_name] += commit_count def retrieve_log(repo: str, lower_version: str, upper_version: str) -> List[str]: return subprocess.check_output( ["git", "shortlog", "-s", lower_version + ".." + upper_version], cwd=find_path(repo), text=True, ).splitlines() def find_path(repository: str) -> str: return os.path.dirname((pathlib.Path().resolve()).parents[0]) + "/" + repository def find_last_commit_before_time(repository: str, branch: str, time: str) -> str: """Find the latest release version for the target repository as of the specified time. """ return subprocess.check_output( ["git", "rev-list", "-1", f"--before={time}", branch, "--"], cwd=find_path(repository), text=True, ).strip() # argparse parser = argparse.ArgumentParser( prog="python3 total-contributions", formatter_class=argparse.RawTextHelpFormatter, description="""\ Aggregates the total commit contributions to Zulip that should be attributed to the time window between the two provided zulip/zulip versions (tags or branches). The attribution algorithm used by this tool attributes all changes for a Zulip project between: * The last release of the target project before the first zulip/zulip version. * The last release of the target project before the last zulip/zulip version. This algorithm has the key property that the totals for a given contributor of 2.1.0..4.0 will equal the sum of 2.1.0..3.0 and 3.0..4.0. Its main downside is that contributions to projects other than zulip/zulip in the last few weeks before a zulip/zulip release will be delayed (i.e. counted in the total for the next zulip/zulip release). Expects that all Zulip repositories repositories are in the current working directory, which does not need to be the directory this is run from. # Changes between two major releases. total-contributions 4.0 5.0 # Changes between a release and the current main branch. total-contributions 4.0 main total-contributions 2.1.0 """, ) parser.add_argument( "version", metavar="version", nargs="*", # TODO: Ideally, we'd replace "1.3.0" with "First commit", to # simplify including contributions before the 1.3.0 release. default=["1.3.0", "main"], help="Git tag or branch in zulip/zulip specifying one end of the commit range to use.", ) parser.add_argument( "-a", "--ascending", action="store_true", help="Sort contributors based on number of commits(ascending order)", ) args = parser.parse_args() if len(args.version) > 2: parser.error("Expects 0 to 2 version number(s)") lower_zulip_version = args.version[0] if len(args.version) == 1: upper_zulip_version = "main" else: upper_zulip_version = args.version[1] subprocess.check_call(["git", "fetch"], cwd=find_path("zulip")) # Extract git version and time. It's important that we use the commit # date (%ci), not the author date (%ai), since while those are often # near identical for release commits, if we pass a branch like `main`, # it's possible the latest commit on the branch might have a months # old author date if the last pull request merged was started at that # time. try: lower_time = subprocess.check_output( ["git", "log", "-1", "--format=%ci", lower_zulip_version], stderr=subprocess.DEVNULL, text=True, ).split()[0] upper_time = subprocess.check_output( ["git", "log", "-1", "--format=%ci", upper_zulip_version], stderr=subprocess.DEVNULL, text=True, ).split()[0] except subprocess.CalledProcessError: print("Specified version(s) don't exist") sys.exit(0) print( f"Commit range {lower_zulip_version}..{upper_zulip_version} corresponds to {lower_time} to {upper_time}" ) repository_dict: Dict[str, int] = defaultdict(int) out_dict: Dict[str, int] = defaultdict(int) subprocess.check_call(["git", "fetch"], cwd=find_path("zulip")) commit_count = len( subprocess.check_output( ["git", "log", "--pretty=oneline", f"{lower_zulip_version}..{upper_zulip_version}"], cwd=find_path("zulip"), text=True, ).splitlines() ) repo_log = retrieve_log("zulip", lower_zulip_version, upper_zulip_version) print( f"{commit_count} commits from zulip/zulip: {lower_zulip_version[0:12]}..{upper_zulip_version[0:12]}" ) add_log(out_dict, repo_log) # TODO: We should migrate the last couple repositories to use the # `main` default branch name and then simplify this. for (full_repository, branch) in [ ("zulip/zulip-mobile", "main"), ("zulip/zulip-desktop", "main"), ("zulip/docker-zulip", "main"), ("zulip/python-zulip-api", "main"), ("zulip/zulip-terminal", "main"), ("zulip/zulint", "main"), ("zulip/github-actions-zulip", "main"), ("zulip/zulip-js", "main"), ("zulip/zulip-archive", "master"), ("zulip/zulipbot", "main"), ("zulip/zulip-zapier", "master"), ]: repository = os.path.basename(full_repository) if not os.path.exists(find_path(repository)): subprocess.check_call( ["git", "clone", f"git@github.com:{full_repository}.git"], cwd=os.path.dirname(find_path(repository)), ) subprocess.check_call(["git", "fetch"], cwd=find_path(repository)) lower_repo_version = find_last_commit_before_time(repository, branch, lower_time) upper_repo_version = find_last_commit_before_time(repository, branch, upper_time) commit_count = len( subprocess.check_output( ["git", "log", "--pretty=oneline", f"{lower_repo_version}..{upper_repo_version}"], cwd=find_path(repository), text=True, ).splitlines() ) repo_log = retrieve_log(repository, lower_repo_version, upper_repo_version) print( f"{commit_count} commits from {full_repository}: {lower_repo_version[0:12]}..{upper_repo_version[0:12]}" ) add_log(out_dict, repo_log) # Sorting based on number of commits grand_total = 0 for committer_name, commit_count in sorted( out_dict.items(), key=lambda item: item[1], reverse=not args.ascending ): print(str(commit_count) + "\t" + committer_name) grand_total += commit_count print(f"Excluded {bot_commits} commits authored by bots.") print( f"{grand_total} total commits by {len(out_dict)} contributors between " f"{lower_zulip_version} and {upper_zulip_version}." )