py3: Switch almost all shebang lines to use `python3`.
This causes `upgrade-zulip-from-git`, as well as a no-option run of
`tools/build-release-tarball`, to produce a Zulip install running
Python 3, rather than Python 2. In particular this means that the
virtualenv we create, in which all application code runs, is Python 3.
One shebang line, on `zulip-ec2-configure-interfaces`, explicitly
keeps Python 2, and at least one external ops script, `wal-e`, also
still runs on Python 2. See discussion on the respective previous
commits that made those explicit. There may also be some other
third-party scripts we use, outside of this source tree and running
outside our virtualenv, that still run on Python 2.
2017-08-02 23:15:16 +02:00
|
|
|
#!/usr/bin/env python3
|
2017-01-06 18:56:36 +01:00
|
|
|
"""
|
|
|
|
Fetch contributors data from Github using their API, convert it to structured
|
2017-10-31 20:08:32 +01:00
|
|
|
JSON data for the /team page contributors section.
|
2017-01-06 18:56:36 +01:00
|
|
|
"""
|
2020-04-12 21:13:15 +02:00
|
|
|
import os
|
|
|
|
import sys
|
|
|
|
|
|
|
|
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
|
|
|
|
from scripts.lib.setup_path import setup_path
|
2017-02-05 21:24:28 +01:00
|
|
|
|
2020-06-11 00:54:34 +02:00
|
|
|
setup_path()
|
2017-01-06 18:56:36 +01:00
|
|
|
|
|
|
|
import argparse
|
2020-06-11 00:54:34 +02:00
|
|
|
import logging
|
2017-01-06 18:56:36 +01:00
|
|
|
from datetime import date
|
2020-04-08 19:20:46 +02:00
|
|
|
from random import randrange
|
2020-06-11 00:54:34 +02:00
|
|
|
from time import sleep
|
|
|
|
from typing import Any, Dict, List, Optional, Union
|
|
|
|
|
|
|
|
from typing_extensions import TypedDict
|
2017-01-06 18:56:36 +01:00
|
|
|
|
2020-04-07 19:27:07 +02:00
|
|
|
os.environ['DJANGO_SETTINGS_MODULE'] = 'zproject.settings'
|
2020-04-08 19:20:46 +02:00
|
|
|
import django
|
2020-06-11 00:54:34 +02:00
|
|
|
|
2020-04-08 19:20:46 +02:00
|
|
|
django.setup()
|
|
|
|
|
2020-06-11 00:54:34 +02:00
|
|
|
import json
|
2020-04-07 19:27:07 +02:00
|
|
|
|
2017-01-06 18:56:36 +01:00
|
|
|
import requests
|
2020-06-11 00:54:34 +02:00
|
|
|
from django.conf import settings
|
2017-01-06 18:56:36 +01:00
|
|
|
|
2020-04-06 16:20:41 +02:00
|
|
|
duplicate_commits_file = os.path.join(os.path.dirname(__file__), 'duplicate_commits.json')
|
2017-01-06 18:56:36 +01:00
|
|
|
|
|
|
|
parser = argparse.ArgumentParser()
|
2020-04-08 19:20:46 +02:00
|
|
|
parser.add_argument('--max-retries', type=int, default=10,
|
2017-01-06 18:56:36 +01:00
|
|
|
help='Number of times to retry fetching data from Github')
|
|
|
|
args = parser.parse_args()
|
|
|
|
|
2020-05-02 06:24:43 +02:00
|
|
|
class ContributorsJSON(TypedDict):
|
|
|
|
date: str
|
|
|
|
contrib: List[Dict[str, Union[str, int]]]
|
2017-11-16 14:05:26 +01:00
|
|
|
|
2020-04-08 19:20:46 +02:00
|
|
|
logger = logging.getLogger('zulip.fetch_contributors_json')
|
|
|
|
|
2017-11-16 14:05:26 +01:00
|
|
|
def fetch_contributors(repo_link: str) -> Optional[List[Dict[str, Dict[str, Any]]]]:
|
2020-04-22 01:09:50 +02:00
|
|
|
r: requests.Response = requests.get(repo_link, verify=os.environ.get('CUSTOM_CA_CERTIFICATES'))
|
2017-11-16 14:05:26 +01:00
|
|
|
return r.json() if r.status_code == 200 else None
|
2017-01-06 18:56:36 +01:00
|
|
|
|
2017-11-16 14:05:26 +01:00
|
|
|
def write_to_disk(json_data: ContributorsJSON, out_file: str) -> None:
|
2017-01-06 18:56:36 +01:00
|
|
|
with open(out_file, 'w') as f:
|
|
|
|
try:
|
2020-06-09 00:25:09 +02:00
|
|
|
f.write(f"{json.dumps(json_data, indent=2, sort_keys=True)}\n")
|
2020-04-09 21:51:58 +02:00
|
|
|
except OSError as e:
|
2020-04-08 19:20:46 +02:00
|
|
|
logger.warning(e)
|
2017-01-06 18:56:36 +01:00
|
|
|
sys.exit(1)
|
|
|
|
|
2020-04-07 19:27:07 +02:00
|
|
|
def update_contributor_data_file() -> None:
|
2017-01-06 18:56:36 +01:00
|
|
|
"""
|
2017-11-20 21:49:03 +01:00
|
|
|
Get contributors data from Github and insert them into a temporary
|
|
|
|
dictionary. Retry fetching each repository if responded with non HTTP 200
|
|
|
|
status.
|
2017-01-06 18:56:36 +01:00
|
|
|
"""
|
2018-08-25 19:16:33 +02:00
|
|
|
|
|
|
|
# This dictionary should hold all repositories that should be included in
|
|
|
|
# the total count, including those that should *not* have tabs on the team
|
|
|
|
# page (e.g. if they are deprecated).
|
2017-11-16 14:05:26 +01:00
|
|
|
repositories = {
|
|
|
|
'server': 'https://api.github.com/repos/zulip/zulip/stats/contributors',
|
2019-05-20 14:01:13 +02:00
|
|
|
'desktop': 'https://api.github.com/repos/zulip/zulip-desktop/stats/contributors',
|
2017-11-16 14:05:26 +01:00
|
|
|
'mobile': 'https://api.github.com/repos/zulip/zulip-mobile/stats/contributors',
|
|
|
|
'python-zulip-api': 'https://api.github.com/repos/zulip/python-zulip-api/stats/contributors',
|
2018-01-15 15:54:22 +01:00
|
|
|
'zulip-js': 'https://api.github.com/repos/zulip/zulip-js/stats/contributors',
|
2017-11-16 14:05:26 +01:00
|
|
|
'zulipbot': 'https://api.github.com/repos/zulip/zulipbot/stats/contributors',
|
2018-03-23 00:23:38 +01:00
|
|
|
'terminal': 'https://api.github.com/repos/zulip/zulip-terminal/stats/contributors',
|
2018-08-25 19:16:33 +02:00
|
|
|
'zulip-ios-legacy': 'https://api.github.com/repos/zulip/zulip-ios-legacy/stats/contributors',
|
|
|
|
'zulip-android': 'https://api.github.com/repos/zulip/zulip-android/stats/contributors',
|
2017-11-16 14:05:26 +01:00
|
|
|
}
|
|
|
|
|
2020-04-22 01:09:50 +02:00
|
|
|
data: ContributorsJSON = dict(date=str(date.today()), contrib=[])
|
|
|
|
contribs_list: Dict[str, Dict[str, Union[str, int]]] = {}
|
2020-04-08 19:20:46 +02:00
|
|
|
retry_attempts = 0
|
2017-11-16 14:05:26 +01:00
|
|
|
|
2020-04-08 19:20:46 +02:00
|
|
|
while True:
|
2017-11-20 21:49:03 +01:00
|
|
|
repos_done = []
|
|
|
|
for name, link in repositories.items():
|
|
|
|
contribs = fetch_contributors(link)
|
2017-11-16 14:05:26 +01:00
|
|
|
if contribs:
|
2017-11-20 21:49:03 +01:00
|
|
|
repos_done.append(name)
|
2017-11-16 14:05:26 +01:00
|
|
|
for contrib in contribs:
|
2018-03-23 17:31:23 +01:00
|
|
|
assert contrib is not None # TODO: To improve/clarify
|
|
|
|
|
|
|
|
author = contrib.get('author')
|
|
|
|
if author is None:
|
2018-02-08 21:16:14 +01:00
|
|
|
# This happens for users who've deleted their GitHub account.
|
|
|
|
continue
|
2018-03-23 17:31:23 +01:00
|
|
|
|
|
|
|
username = author.get('login')
|
|
|
|
assert username is not None # TODO: To improve/clarify
|
|
|
|
|
|
|
|
avatar = author.get('avatar_url')
|
|
|
|
assert avatar is not None # TODO: To improve/clarify
|
|
|
|
total = contrib.get('total')
|
|
|
|
assert total is not None # TODO: To improve/clarify
|
|
|
|
|
2017-11-16 14:05:26 +01:00
|
|
|
contrib_data = {
|
2018-03-23 17:31:23 +01:00
|
|
|
'avatar': avatar,
|
|
|
|
name: total,
|
2017-11-16 14:05:26 +01:00
|
|
|
}
|
2017-11-20 21:49:03 +01:00
|
|
|
if username in contribs_list:
|
|
|
|
contribs_list[username].update(contrib_data)
|
2017-11-16 14:05:26 +01:00
|
|
|
else:
|
2017-11-20 21:49:03 +01:00
|
|
|
contribs_list[username] = contrib_data
|
2020-04-08 19:20:46 +02:00
|
|
|
retry_attempts = 0
|
|
|
|
else:
|
|
|
|
retry_attempts += 1
|
|
|
|
if retry_attempts > args.max_retries:
|
|
|
|
logger.warning("Failed retries fetching contributors data from Github.")
|
|
|
|
sys.exit(1)
|
|
|
|
|
|
|
|
sleep_time = randrange(0, min(64, 2**retry_attempts))
|
|
|
|
sleep(sleep_time)
|
2018-03-03 13:21:55 +01:00
|
|
|
|
2017-11-20 21:49:03 +01:00
|
|
|
for repo in repos_done:
|
|
|
|
del repositories[repo]
|
2017-11-16 14:05:26 +01:00
|
|
|
|
|
|
|
if not repositories:
|
|
|
|
break
|
|
|
|
|
2020-04-21 21:07:39 +02:00
|
|
|
# remove duplicate contributions count
|
|
|
|
# find commits at the time of split and subtract from zulip-server
|
|
|
|
with open(duplicate_commits_file) as f:
|
|
|
|
duplicate_commits = json.load(f)
|
|
|
|
for committer in duplicate_commits:
|
|
|
|
if committer in contribs_list and contribs_list[committer].get('server'):
|
|
|
|
total_commits = contribs_list[committer]['server']
|
|
|
|
assert isinstance(total_commits, int)
|
|
|
|
duplicate_commits_count = duplicate_commits[committer]
|
|
|
|
original_commits = total_commits - duplicate_commits_count
|
|
|
|
contribs_list[committer]['server'] = original_commits
|
|
|
|
|
2017-11-20 21:49:03 +01:00
|
|
|
for contributor_name, contributor_data in contribs_list.items():
|
|
|
|
contributor_data['name'] = contributor_name
|
|
|
|
data['contrib'].append(contributor_data)
|
2017-11-16 14:05:26 +01:00
|
|
|
|
2020-04-07 19:27:07 +02:00
|
|
|
write_to_disk(data, settings.CONTRIBUTOR_DATA_FILE_PATH)
|
2017-11-16 14:05:26 +01:00
|
|
|
|
2020-04-07 19:27:07 +02:00
|
|
|
if __name__ == "__main__":
|
|
|
|
update_contributor_data_file()
|