zulip/tools/total-contributions

184 lines
6.1 KiB
Python
Executable File

#!/usr/bin/env python3
import argparse
import os
import pathlib
import subprocess
import sys
from collections import defaultdict
from typing import Dict, List
def add_log(committer_dict: Dict[str, int], input: List[str]) -> None:
for dataset in input:
committer_name = dataset.split("\t")[1]
commit_count = int(dataset.split("\t")[0])
if committer_name.endswith("[bot]"):
# Exclude dependabot[bot] and other GitHub bots.
continue
committer_dict[committer_name] += commit_count
def retrieve_log(repo: str, lower_version: str, upper_version: str) -> List[str]:
return subprocess.check_output(
["git", "shortlog", "-s", lower_version + ".." + upper_version],
cwd=find_path(repo),
text=True,
).splitlines()
def find_path(repository: str) -> str:
return os.path.dirname((pathlib.Path().resolve()).parents[0]) + "/" + repository
def find_last_commit_before_time(repository: str, branch: str, time: str) -> str:
"""Find the latest release version for the target repository as of the
specified time.
"""
return subprocess.check_output(
["git", "rev-list", "-1", f"--before={time}", branch, "--"],
cwd=find_path(repository),
text=True,
).strip()
# argparse
parser = argparse.ArgumentParser(
prog="python3 total-contributions",
formatter_class=argparse.RawTextHelpFormatter,
description="""\
Aggregates the total commit contributions to Zulip that should be
attributed to the time window between the two provided
zulip/zulip versions (tags or branches).
The attribution algorithm used by this tool attributes all changes for
a Zulip project between:
* The last release of the target project before the first zulip/zulip version.
* The last release of the target project before the last zulip/zulip version.
This algorithm has the key property that the totals for a given contributor of
2.1.0..4.0 will equal the sum of 2.1.0..3.0 and 3.0..4.0.
Its main downside is that contributions to projects other than
zulip/zulip in the last few weeks before a zulip/zulip release will be
delayed (i.e. counted in the total for the next zulip/zulip release).
Expects that all Zulip repositories repositories are in the current working
directory, which does not need to be the directory this is run from.
# Changes between two major releases.
total-contributions 4.0 5.0
# Changes between a release and the current main branch.
total-contributions 4.0 main
total-contributions 2.1.0
""",
)
parser.add_argument(
"version",
metavar="version",
nargs="*",
# TODO: Ideally, we'd replace "1.3.0" with "First commit", to
# simplify including contributions before the 1.3.0 release.
default=["1.3.0", "main"],
help="Git tag or branch in zulip/zulip specifying one end of the commit range to use.",
)
parser.add_argument(
"-a",
"--ascending",
action="store_true",
help="Sort contributors based on number of commits(ascending order)",
)
args = parser.parse_args()
if len(args.version) > 2:
parser.error("Expects 0 to 2 version number(s)")
lower_zulip_version = args.version[0]
if len(args.version) == 1:
upper_zulip_version = "main"
else:
upper_zulip_version = args.version[1]
subprocess.check_call(["git", "fetch"], cwd=find_path("zulip"))
# Extract git version and time. It's important that we use the commit
# date (%ci), not the author date (%ai), since while those are often
# near identical for release commits, if we pass a branch like `main`,
# it's possible the latest commit on the branch might have a months
# old author date if the last pull request merged was started at that
# time.
try:
lower_time = subprocess.check_output(
["git", "log", "-1", "--format=%ci", lower_zulip_version],
stderr=subprocess.DEVNULL,
text=True,
).split()[0]
upper_time = subprocess.check_output(
["git", "log", "-1", "--format=%ci", upper_zulip_version],
stderr=subprocess.DEVNULL,
text=True,
).split()[0]
except subprocess.CalledProcessError:
print("Specified version(s) don't exist")
sys.exit(0)
print(
f"Commit range {lower_zulip_version}..{upper_zulip_version} corresponds to {lower_time} to {upper_time}"
)
out_dict: Dict[str, int] = defaultdict(int)
subprocess.check_call(["git", "fetch"], cwd=find_path("zulip"))
zulip = retrieve_log("zulip", lower_zulip_version, upper_zulip_version)
print(f"Commit range for zulip/zulip: {lower_zulip_version[0:12]}..{upper_zulip_version[0:12]}")
add_log(out_dict, zulip)
# TODO: We should migrate the last couple repositories to use the
# `main` default branch name and then simplify this.
for (full_repository, branch) in [
("zulip/zulip-mobile", "main"),
("zulip/zulip-desktop", "main"),
("zulip/docker-zulip", "main"),
("zulip/python-zulip-api", "main"),
("zulip/zulip-terminal", "main"),
("zulip/zulint", "main"),
("zulip/github-actions-zulip", "main"),
("zulip/zulip-js", "main"),
("zulip/zulip-archive", "master"),
("zulip/zulipbot", "main"),
("zulip/zulip-zapier", "master"),
]:
repository = os.path.basename(full_repository)
if not os.path.exists(find_path(repository)):
subprocess.check_call(
["git", "clone", f"git@github.com:{full_repository}.git"],
cwd=os.path.dirname(find_path(repository)),
)
subprocess.check_call(["git", "fetch"], cwd=find_path(repository))
lower_repo_version = find_last_commit_before_time(repository, branch, lower_time)
upper_repo_version = find_last_commit_before_time(repository, branch, upper_time)
repo_log = retrieve_log(repository, lower_repo_version, upper_repo_version)
print(
f"Commit range for {full_repository}: {lower_repo_version[0:12]}..{upper_repo_version[0:12]}"
)
add_log(out_dict, repo_log)
# Sorting based on number of commits
grand_total = 0
for committer_name, commit_count in sorted(
out_dict.items(), key=lambda item: item[1], reverse=not args.ascending
):
print(str(commit_count) + "\t" + committer_name)
grand_total += commit_count
print(
f"{grand_total} total commits by {len(out_dict)} contributors between "
f"{lower_zulip_version} and {upper_repo_version}."
)