py3: Switch almost all shebang lines to use `python3`.
This causes `upgrade-zulip-from-git`, as well as a no-option run of
`tools/build-release-tarball`, to produce a Zulip install running
Python 3, rather than Python 2. In particular this means that the
virtualenv we create, in which all application code runs, is Python 3.
One shebang line, on `zulip-ec2-configure-interfaces`, explicitly
keeps Python 2, and at least one external ops script, `wal-e`, also
still runs on Python 2. See discussion on the respective previous
commits that made those explicit. There may also be some other
third-party scripts we use, outside of this source tree and running
outside our virtualenv, that still run on Python 2.
2017-08-02 23:15:16 +02:00
|
|
|
#!/usr/bin/env python3
|
2017-01-06 18:56:36 +01:00
|
|
|
"""
|
2020-10-23 02:43:28 +02:00
|
|
|
Fetch contributors data from GitHub using their API, convert it to structured
|
2017-10-31 20:08:32 +01:00
|
|
|
JSON data for the /team page contributors section.
|
2017-01-06 18:56:36 +01:00
|
|
|
"""
|
|
|
|
import argparse
|
2021-08-24 00:23:51 +02:00
|
|
|
import json
|
2020-06-11 00:54:34 +02:00
|
|
|
import logging
|
2021-08-24 00:23:51 +02:00
|
|
|
import os
|
|
|
|
import sys
|
2021-08-24 00:24:45 +02:00
|
|
|
import unicodedata
|
2017-01-06 18:56:36 +01:00
|
|
|
from datetime import date
|
2020-07-24 13:45:27 +02:00
|
|
|
from typing import Dict, List, Optional, Union
|
2020-06-11 00:54:34 +02:00
|
|
|
|
2021-08-24 00:23:51 +02:00
|
|
|
sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
|
|
|
|
from scripts.lib.setup_path import setup_path
|
2017-01-06 18:56:36 +01:00
|
|
|
|
2021-08-24 00:23:51 +02:00
|
|
|
setup_path()
|
2021-02-12 08:20:45 +01:00
|
|
|
os.environ["DJANGO_SETTINGS_MODULE"] = "zproject.settings"
|
2020-04-07 19:27:07 +02:00
|
|
|
|
2021-08-24 00:23:51 +02:00
|
|
|
import django
|
2020-06-11 00:54:34 +02:00
|
|
|
from django.conf import settings
|
2021-08-24 00:23:51 +02:00
|
|
|
from typing_extensions import TypedDict
|
2022-01-13 22:02:54 +01:00
|
|
|
from urllib3.util import Retry
|
2021-08-24 00:23:51 +02:00
|
|
|
|
|
|
|
django.setup()
|
2017-01-06 18:56:36 +01:00
|
|
|
|
2020-07-24 13:45:27 +02:00
|
|
|
from zerver.lib.avatar_hash import gravatar_hash
|
2021-05-07 03:54:25 +02:00
|
|
|
from zerver.lib.github import GithubSession
|
2020-07-24 13:45:27 +02:00
|
|
|
from zproject.config import get_secret
|
|
|
|
|
2021-02-12 08:20:45 +01:00
|
|
|
duplicate_commits_file = os.path.join(os.path.dirname(__file__), "duplicate_commits.json")
|
2017-01-06 18:56:36 +01:00
|
|
|
|
|
|
|
parser = argparse.ArgumentParser()
|
2021-02-12 08:19:30 +01:00
|
|
|
parser.add_argument(
|
2021-02-12 08:20:45 +01:00
|
|
|
"--max-retries", type=int, default=10, help="Number of times to retry fetching data from GitHub"
|
2021-02-12 08:19:30 +01:00
|
|
|
)
|
2017-01-06 18:56:36 +01:00
|
|
|
args = parser.parse_args()
|
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2020-05-02 06:24:43 +02:00
|
|
|
class ContributorsJSON(TypedDict):
|
|
|
|
date: str
|
2020-07-23 13:52:05 +02:00
|
|
|
contributors: List[Dict[str, Union[int, str]]]
|
2020-07-23 13:11:12 +02:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2020-07-23 13:11:12 +02:00
|
|
|
class Contributor(TypedDict):
|
2020-07-24 13:45:27 +02:00
|
|
|
avatar_url: Optional[str]
|
2020-07-23 13:11:12 +02:00
|
|
|
contributions: int
|
2020-07-24 13:45:27 +02:00
|
|
|
login: Optional[str]
|
|
|
|
email: Optional[str]
|
|
|
|
name: Optional[str]
|
2017-11-16 14:05:26 +01:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2021-02-12 08:20:45 +01:00
|
|
|
logger = logging.getLogger("zulip.fetch_contributors_json")
|
2020-04-08 19:20:46 +02:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2020-07-23 13:37:28 +02:00
|
|
|
def fetch_contributors(repo_name: str, max_retries: int) -> List[Contributor]:
|
2020-07-23 13:11:12 +02:00
|
|
|
contributors: List[Contributor] = []
|
|
|
|
page_index = 1
|
|
|
|
|
2020-09-13 00:11:30 +02:00
|
|
|
api_link = f"https://api.github.com/repos/zulip/{repo_name}/contributors"
|
|
|
|
api_data = {"anon": "1"}
|
2021-02-12 08:20:45 +01:00
|
|
|
certificates = os.environ.get("CUSTOM_CA_CERTIFICATES")
|
2020-07-24 13:45:27 +02:00
|
|
|
|
|
|
|
headers: Dict[str, str] = {}
|
2021-02-12 08:20:45 +01:00
|
|
|
personal_access_token = get_secret("github_personal_access_token")
|
2020-07-24 13:45:27 +02:00
|
|
|
if personal_access_token is not None:
|
|
|
|
headers = {"Authorization": f"token {personal_access_token}"}
|
2020-07-23 13:37:28 +02:00
|
|
|
|
2022-01-22 01:55:31 +01:00
|
|
|
Retry.DEFAULT_BACKOFF_MAX = 64
|
2021-06-29 21:26:01 +02:00
|
|
|
retry = Retry(
|
|
|
|
total=max_retries,
|
|
|
|
backoff_factor=2.0,
|
2021-09-22 06:09:46 +02:00
|
|
|
status_forcelist={
|
|
|
|
403, # Github does unauth rate-limiting via 403's
|
|
|
|
429, # The formal rate-limiting response code
|
|
|
|
502, # Bad gateway
|
|
|
|
503, # Service unavailable
|
|
|
|
},
|
2021-06-29 21:26:01 +02:00
|
|
|
)
|
|
|
|
session = GithubSession(max_retries=retry)
|
2020-07-23 13:11:12 +02:00
|
|
|
while True:
|
2021-06-29 21:26:01 +02:00
|
|
|
response = session.get(
|
2021-05-07 03:54:25 +02:00
|
|
|
api_link,
|
|
|
|
params={**api_data, "page": f"{page_index}"},
|
|
|
|
verify=certificates,
|
|
|
|
headers=headers,
|
2021-02-12 08:19:30 +01:00
|
|
|
)
|
2021-06-29 21:26:01 +02:00
|
|
|
response.raise_for_status()
|
|
|
|
data = response.json()
|
|
|
|
if len(data) == 0:
|
|
|
|
return contributors
|
|
|
|
contributors.extend(data)
|
|
|
|
page_index += 1
|
2017-01-06 18:56:36 +01:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2017-11-16 14:05:26 +01:00
|
|
|
def write_to_disk(json_data: ContributorsJSON, out_file: str) -> None:
|
2021-02-12 08:20:45 +01:00
|
|
|
with open(out_file, "w") as f:
|
2020-06-12 01:35:37 +02:00
|
|
|
json.dump(json_data, f, indent=2, sort_keys=True)
|
|
|
|
f.write("\n")
|
2017-01-06 18:56:36 +01:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2020-04-07 19:27:07 +02:00
|
|
|
def update_contributor_data_file() -> None:
|
2020-07-23 13:37:28 +02:00
|
|
|
# This list should hold all repositories that should be included in
|
2018-08-25 19:16:33 +02:00
|
|
|
# the total count, including those that should *not* have tabs on the team
|
|
|
|
# page (e.g. if they are deprecated).
|
2021-02-12 08:19:30 +01:00
|
|
|
repo_names = [
|
2021-02-12 08:20:45 +01:00
|
|
|
"zulip",
|
|
|
|
"zulip-desktop",
|
|
|
|
"zulip-mobile",
|
|
|
|
"python-zulip-api",
|
|
|
|
"zulip-js",
|
|
|
|
"zulipbot",
|
|
|
|
"zulip-terminal",
|
|
|
|
"zulip-ios-legacy",
|
|
|
|
"zulip-android",
|
2021-02-12 08:19:30 +01:00
|
|
|
]
|
2017-11-16 14:05:26 +01:00
|
|
|
|
2020-07-23 13:52:05 +02:00
|
|
|
data: ContributorsJSON = dict(date=str(date.today()), contributors=[])
|
2020-07-23 13:44:13 +02:00
|
|
|
contributor_username_to_data: Dict[str, Dict[str, Union[str, int]]] = {}
|
2017-11-16 14:05:26 +01:00
|
|
|
|
2020-07-23 13:37:28 +02:00
|
|
|
for repo_name in repo_names:
|
|
|
|
contributors = fetch_contributors(repo_name, args.max_retries)
|
2020-07-23 13:11:12 +02:00
|
|
|
for contributor in contributors:
|
2021-02-12 08:20:45 +01:00
|
|
|
username = contributor.get("login") or contributor.get("email")
|
2021-02-12 08:19:30 +01:00
|
|
|
assert username is not None
|
2020-07-23 13:44:13 +02:00
|
|
|
if username in contributor_username_to_data:
|
2021-02-12 08:20:45 +01:00
|
|
|
contributor_username_to_data[username][repo_name] = contributor["contributions"]
|
2020-04-08 19:20:46 +02:00
|
|
|
else:
|
2021-02-12 08:20:45 +01:00
|
|
|
contributor_username_to_data[username] = {repo_name: contributor["contributions"]}
|
2017-11-16 14:05:26 +01:00
|
|
|
|
2021-02-12 08:20:45 +01:00
|
|
|
avatar_url = contributor.get("avatar_url")
|
2020-07-24 13:45:27 +02:00
|
|
|
if avatar_url is not None:
|
2021-02-12 08:20:45 +01:00
|
|
|
contributor_username_to_data[username]["avatar"] = avatar_url
|
2020-07-24 13:45:27 +02:00
|
|
|
|
2021-02-12 08:20:45 +01:00
|
|
|
email = contributor.get("email")
|
2020-07-24 13:45:27 +02:00
|
|
|
if email is not None:
|
|
|
|
contributor_username_to_data[username]["email"] = email
|
|
|
|
hash_key = gravatar_hash(email)
|
|
|
|
gravatar_url = f"https://secure.gravatar.com/avatar/{hash_key}?d=identicon"
|
2021-02-12 08:20:45 +01:00
|
|
|
contributor_username_to_data[username]["avatar"] = gravatar_url
|
2020-07-24 13:45:27 +02:00
|
|
|
|
2021-02-12 08:20:45 +01:00
|
|
|
login = contributor.get("login")
|
2020-07-24 13:45:27 +02:00
|
|
|
if login is not None:
|
|
|
|
contributor_username_to_data[username]["github_username"] = login
|
|
|
|
|
2021-02-12 08:20:45 +01:00
|
|
|
name = contributor.get("name")
|
2020-07-24 13:45:27 +02:00
|
|
|
if name is not None:
|
2021-08-24 00:24:45 +02:00
|
|
|
contributor_username_to_data[username]["name"] = unicodedata.normalize(
|
|
|
|
"NFC", name
|
|
|
|
)
|
2020-07-24 13:45:27 +02:00
|
|
|
|
2020-04-21 21:07:39 +02:00
|
|
|
# remove duplicate contributions count
|
|
|
|
# find commits at the time of split and subtract from zulip-server
|
|
|
|
with open(duplicate_commits_file) as f:
|
|
|
|
duplicate_commits = json.load(f)
|
|
|
|
for committer in duplicate_commits:
|
2021-02-12 08:19:30 +01:00
|
|
|
if committer in contributor_username_to_data and contributor_username_to_data[
|
|
|
|
committer
|
2021-02-12 08:20:45 +01:00
|
|
|
].get("zulip"):
|
|
|
|
total_commits = contributor_username_to_data[committer]["zulip"]
|
2020-04-21 21:07:39 +02:00
|
|
|
assert isinstance(total_commits, int)
|
|
|
|
duplicate_commits_count = duplicate_commits[committer]
|
|
|
|
original_commits = total_commits - duplicate_commits_count
|
2021-02-12 08:20:45 +01:00
|
|
|
contributor_username_to_data[committer]["zulip"] = original_commits
|
2020-04-21 21:07:39 +02:00
|
|
|
|
2021-02-12 08:20:45 +01:00
|
|
|
data["contributors"] = list(contributor_username_to_data.values())
|
2020-04-07 19:27:07 +02:00
|
|
|
write_to_disk(data, settings.CONTRIBUTOR_DATA_FILE_PATH)
|
2017-11-16 14:05:26 +01:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2020-04-07 19:27:07 +02:00
|
|
|
if __name__ == "__main__":
|
|
|
|
update_contributor_data_file()
|