zulip/tools/total-contributions

232 lines
7.3 KiB
Python
Executable File

#!/usr/bin/env python3
import argparse
import os
import pathlib
import subprocess
import sys
from collections import defaultdict
from typing import Dict, List
bot_commits = 0
ZULIP_PATH = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
os.chdir(ZULIP_PATH)
def add_log(committer_dict: Dict[str, int], input: List[str]) -> None:
for dataset in input:
committer_name = dataset.split("\t")[1]
commit_count = int(dataset.split("\t")[0])
if committer_name.endswith("[bot]"):
# Exclude dependabot[bot] and other GitHub bots.
global bot_commits
bot_commits += commit_count
continue
committer_dict[committer_name] += commit_count
def retrieve_log(repo: str, revisions: str) -> List[str]:
return subprocess.check_output(
["git", "shortlog", "-s", revisions],
cwd=find_path(repo),
text=True,
).splitlines()
def find_path(repository: str) -> str:
return str(pathlib.Path().resolve().parents[0] / repository)
def process_repo(
*,
out_dict: Dict[str, int],
repo_short: str,
repo_full: str,
lower_version: str,
upper_version: str,
) -> None:
if not lower_version:
revisions = upper_version
revisions_display = f"(start)..{upper_version[0:12]}"
else:
revisions = f"{lower_version}..{upper_version}"
revisions_display = f"{lower_version[0:12]}..{upper_version[0:12]}"
commit_count = len(
subprocess.check_output(
["git", "log", "--pretty=oneline", revisions],
cwd=find_path(repo_short),
text=True,
).splitlines()
)
repo_log = retrieve_log(repo_short, revisions)
print(f"{commit_count} commits from {repo_full}: {revisions_display}")
add_log(out_dict, repo_log)
def find_last_commit_before_time(repository: str, branch: str, time: str) -> str:
"""Find the latest release version for the target repository as of the
specified time.
"""
return subprocess.check_output(
["git", "rev-list", "-1", f"--before={time}", branch, "--"],
cwd=find_path(repository),
text=True,
).strip()
# argparse
parser = argparse.ArgumentParser(
prog="python3 total-contributions",
formatter_class=argparse.RawTextHelpFormatter,
description="""\
Aggregates the total commit contributions to Zulip that should be
attributed to the time window between the two provided
zulip/zulip versions (tags or branches).
The attribution algorithm used by this tool attributes all changes for
a Zulip project between:
* The last release of the target project before the first zulip/zulip version.
* The last release of the target project before the last zulip/zulip version.
This algorithm has the key property that the totals for a given contributor of
2.1.0..4.0 will equal the sum of 2.1.0..3.0 and 3.0..4.0.
Its main downside is that contributions to projects other than
zulip/zulip in the last few weeks before a zulip/zulip release will be
delayed (i.e. counted in the total for the next zulip/zulip release).
Expects that all Zulip repositories are in the current working
directory, which does not need to be the directory this is run from.
# Changes between two major releases.
total-contributions 4.0 5.0
# Changes between a release and the current main branch.
total-contributions 4.0 main
total-contributions 2.1.0
""",
)
parser.add_argument(
"version",
metavar="version",
nargs="*",
# TODO: Ideally, we'd replace "1.3.0" with "First commit", to
# simplify including contributions before the 1.3.0 release.
default=["1.3.0", "main"],
help="Git tag or branch in zulip/zulip specifying one end of the commit range to use.",
)
parser.add_argument(
"-a",
"--ascending",
action="store_true",
help="Sort contributors based on number of commits(ascending order)",
)
args = parser.parse_args()
if len(args.version) > 2:
parser.error("Expects 0 to 2 version number(s)")
lower_zulip_version = args.version[0]
if len(args.version) == 1:
upper_zulip_version = "main"
else:
upper_zulip_version = args.version[1]
subprocess.check_call(["git", "fetch"], cwd=find_path("zulip"))
# Extract git version and time. It's important that we use the commit
# date (%ci), not the author date (%ai), since while those are often
# near identical for release commits, if we pass a branch like `main`,
# it's possible the latest commit on the branch might have a months
# old author date if the last pull request merged was started at that
# time.
try:
lower_time = subprocess.check_output(
["git", "log", "-1", "--format=%ci", lower_zulip_version],
stderr=subprocess.DEVNULL,
text=True,
).split()[0]
upper_time = subprocess.check_output(
["git", "log", "-1", "--format=%ci", upper_zulip_version],
stderr=subprocess.DEVNULL,
text=True,
).split()[0]
except subprocess.CalledProcessError:
print("Specified version(s) don't exist")
sys.exit(0)
print(
f"Commit range {lower_zulip_version}..{upper_zulip_version} corresponds to {lower_time} to {upper_time}"
)
repository_dict: Dict[str, int] = defaultdict(int)
out_dict: Dict[str, int] = defaultdict(int)
subprocess.check_call(["git", "fetch"], cwd=find_path("zulip"))
process_repo(
out_dict=out_dict,
repo_short="zulip",
repo_full="zulip/zulip",
lower_version=lower_zulip_version,
upper_version=upper_zulip_version,
)
# TODO: We should migrate the last couple repositories to use the
# `main` default branch name and then simplify this.
for full_repository, branch in [
("zulip/zulip-mobile", "main"),
("zulip/zulip-flutter", "main"),
("zulip/zulip-desktop", "main"),
("zulip/docker-zulip", "main"),
("zulip/python-zulip-api", "main"),
("zulip/zulip-terminal", "main"),
("zulip/zulint", "main"),
("zulip/github-actions-zulip", "main"),
("zulip/zulip-js", "main"),
("zulip/zulip-archive", "master"),
("zulip/zulipbot", "main"),
("zulip/zulip-zapier", "master"),
]:
repository = os.path.basename(full_repository)
if os.path.exists(find_path(repository)):
# Update the checkout for the project in question.
subprocess.check_call(
["git", "pull", "--rebase", "-q"],
cwd=find_path(repository),
)
else:
subprocess.check_call(
["git", "clone", f"git@github.com:{full_repository}.git"],
cwd=os.path.dirname(find_path(repository)),
)
subprocess.check_call(["git", "fetch", "-a"], cwd=find_path(repository))
lower_repo_version = find_last_commit_before_time(repository, branch, lower_time)
upper_repo_version = find_last_commit_before_time(repository, branch, upper_time)
process_repo(
out_dict=out_dict,
repo_short=repository,
repo_full=full_repository,
lower_version=lower_repo_version,
upper_version=upper_repo_version,
)
# Sorting based on number of commits
grand_total = 0
for committer_name, commit_count in sorted(
out_dict.items(), key=lambda item: item[1], reverse=not args.ascending
):
print(str(commit_count) + "\t" + committer_name)
grand_total += commit_count
print(f"Excluded {bot_commits} commits authored by bots.")
print(
f"{grand_total} total commits by {len(out_dict)} contributors between "
f"{lower_zulip_version} and {upper_zulip_version}."
)