zulip/tools/update-authors-json

#!/usr/bin/env python3
"""
Fetch contributors data from Github using their API, convert it to structured
JSON data for the /team page contributors section.
"""

# check for the venv
from lib import sanity_check
sanity_check.check_venv(__file__)

from typing import Any, Dict, List, Optional, Union, cast
from mypy_extensions import TypedDict

import os
import shutil
import sys
import argparse
from time import sleep
from datetime import date

import requests
import json

sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
os.environ['DJANGO_SETTINGS_MODULE'] = 'zproject.settings'
from django.conf import settings
from zerver.lib.utils import split_by

FIXTURE_FILE = os.path.join(os.path.dirname(__file__), '../zerver/tests/fixtures/authors.json')
duplicate_commits_file = os.path.join(os.path.dirname(__file__),
                                      '../zerver/tests/fixtures/duplicate_commits.json')

parser = argparse.ArgumentParser()
parser.add_argument('--max-retries', type=int, default=3,
                    help='Number of times to retry fetching data from Github')
# In Travis CI and development environment, we use test fixture to avoid
# fetching from Github constantly.
parser.add_argument('--use-fixture', action='store_true', default=False,
                    help='Use fixture data instead of fetching from Github')
parser.add_argument('--not-required', action='store_true', default=False,
                    help='Consider failures to reach GitHub nonfatal')
args = parser.parse_args()


ContributorsJSON = TypedDict('ContributorsJSON', {
    'date': str,
    'contrib': List[Dict[str, Union[str, int]]],
})


def fetch_contributors(repo_link: str) -> Optional[List[Dict[str, Dict[str, Any]]]]:
    r = requests.get(repo_link)  # type: requests.Response
    return r.json() if r.status_code == 200 else None

def write_to_disk(json_data: ContributorsJSON, out_file: str) -> None:
    with open(out_file, 'w') as f:
        try:
            f.write("{}\n".format(json.dumps(json_data, indent=2, sort_keys=True)))
        except IOError as e:
            print(e)
            sys.exit(1)


def run_production() -> None:
    """
    Get contributors data from Github and insert them into a temporary
    dictionary. Retry fetching each repository if responded with non HTTP 200
    status.
    """
    repositories = {
        'server': 'https://api.github.com/repos/zulip/zulip/stats/contributors',
        'desktop': 'https://api.github.com/repos/zulip/zulip-electron/stats/contributors',
        'mobile': 'https://api.github.com/repos/zulip/zulip-mobile/stats/contributors',
        'python-zulip-api': 'https://api.github.com/repos/zulip/python-zulip-api/stats/contributors',
        'zulip-js': 'https://api.github.com/repos/zulip/zulip-js/stats/contributors',
        'zulipbot': 'https://api.github.com/repos/zulip/zulipbot/stats/contributors',
        'terminal': 'https://api.github.com/repos/zulip/zulip-terminal/stats/contributors',
    }

    data = dict(date=str(date.today()), contrib=[])  # type: ContributorsJSON
    contribs_list = {}  # type: Dict[str, Dict[str, Union[str, int]]]

    for _ in range(args.max_retries):
        repos_done = []
        for name, link in repositories.items():
            contribs = fetch_contributors(link)
            if contribs:
                repos_done.append(name)
                for contrib in contribs:
                    assert contrib is not None  # TODO: To improve/clarify

                    author = contrib.get('author')
                    if author is None:
                        # This happens for users who've deleted their GitHub account.
                        continue

                    username = author.get('login')
                    assert username is not None  # TODO: To improve/clarify

                    avatar = author.get('avatar_url')
                    assert avatar is not None  # TODO: To improve/clarify
                    total = contrib.get('total')
                    assert total is not None  # TODO: To improve/clarify

                    contrib_data = {
                        'avatar': avatar,
                        name: total,
                    }
                    if username in contribs_list:
                        contribs_list[username].update(contrib_data)
                    else:
                        contribs_list[username] = contrib_data

        # remove duplicate contributions count
        # find commits at the time of split and substract from zulip-server
        with open(duplicate_commits_file, 'r') as f:
            duplicate_commits = json.loads(f.read())
            for committer in duplicate_commits:
                if committer in contribs_list and contribs_list[committer].get('server'):
                    total_commits = cast(int, contribs_list[committer]['server'])
                    duplicate_commits_count = duplicate_commits[committer]
                    original_commits = total_commits - duplicate_commits_count
                    contribs_list[committer]['server'] = original_commits

        for repo in repos_done:
            del repositories[repo]

        if not repositories:
            break

        # Wait before retrying failed requests for Github to aggregate data.
        sleep(2)
    else:
        print("ERROR: Failed fetching contributors data from Github.")
        if not args.not_required:
            sys.exit(1)

    for contributor_name, contributor_data in contribs_list.items():
        contributor_data['name'] = contributor_name
        data['contrib'].append(contributor_data)

    write_to_disk(data, settings.CONTRIBUTORS_DATA)


def copy_fixture() -> None:
    """
    Copy test fixture file from zerver/tests/fixtures. This is used to avoid
    constantly fetching data from Github during testing.
    """
    shutil.copyfile(FIXTURE_FILE, settings.CONTRIBUTORS_DATA)


if args.use_fixture:
    copy_fixture()
else:
    run_production()
py3: Switch almost all shebang lines to use `python3`. This causes `upgrade-zulip-from-git`, as well as a no-option run of `tools/build-release-tarball`, to produce a Zulip install running Python 3, rather than Python 2. In particular this means that the virtualenv we create, in which all application code runs, is Python 3. One shebang line, on `zulip-ec2-configure-interfaces`, explicitly keeps Python 2, and at least one external ops script, `wal-e`, also still runs on Python 2. See discussion on the respective previous commits that made those explicit. There may also be some other third-party scripts we use, outside of this source tree and running outside our virtualenv, that still run on Python 2. 2017-08-02 23:15:16 +02:00			`#!/usr/bin/env python3`
Add /authors page. Contributor visualization showing the avatar, user name and number of commits for each contributors. The JSON data would be updated upon deployment, triggered by the `update-prod-static` script. 2017-01-06 18:56:36 +01:00			`"""`
			`Fetch contributors data from Github using their API, convert it to structured`
landing-page: Replace /about with /team and /history in links. 2017-10-31 20:08:32 +01:00			`JSON data for the /team page contributors section.`
Add /authors page. Contributor visualization showing the avatar, user name and number of commits for each contributors. The JSON data would be updated upon deployment, triggered by the `update-prod-static` script. 2017-01-06 18:56:36 +01:00			`"""`

tools: Create more consistent checks for venv. This helps make the Zulip development environment somewhat more robust to new contributors, since it will give them a nice warning if they try running any of our development tools outside the Zulip virtualenv. Fixes #3468. 2017-02-05 21:24:28 +01:00			`# check for the venv`
			`from lib import sanity_check`
			`sanity_check.check_venv(__file__)`

tools: Change use of typing.Text to str. 2018-05-14 00:53:45 +02:00			`from typing import Any, Dict, List, Optional, Union, cast`
/team: Fetch contributors data from all major repos. Also wait 2 seconds before trying again. 2017-11-16 14:05:26 +01:00			`from mypy_extensions import TypedDict`
Add /authors page. Contributor visualization showing the avatar, user name and number of commits for each contributors. The JSON data would be updated upon deployment, triggered by the `update-prod-static` script. 2017-01-06 18:56:36 +01:00
			`import os`
tools/update-authors-json: Avoid shelling out for cp. Signed-off-by: Anders Kaseorg <andersk@mit.edu> 2018-07-18 23:50:16 +02:00			`import shutil`
Add /authors page. Contributor visualization showing the avatar, user name and number of commits for each contributors. The JSON data would be updated upon deployment, triggered by the `update-prod-static` script. 2017-01-06 18:56:36 +01:00			`import sys`
			`import argparse`
/team: Fetch contributors data from all major repos. Also wait 2 seconds before trying again. 2017-11-16 14:05:26 +01:00			`from time import sleep`
Add /authors page. Contributor visualization showing the avatar, user name and number of commits for each contributors. The JSON data would be updated upon deployment, triggered by the `update-prod-static` script. 2017-01-06 18:56:36 +01:00			`from datetime import date`

			`import requests`
json: Replace most use of simplejson with json. This is progress towards removing simplejson as a dependency. 2017-10-12 07:54:25 +02:00			`import json`
Add /authors page. Contributor visualization showing the avatar, user name and number of commits for each contributors. The JSON data would be updated upon deployment, triggered by the `update-prod-static` script. 2017-01-06 18:56:36 +01:00
			`sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))`
			`os.environ['DJANGO_SETTINGS_MODULE'] = 'zproject.settings'`
			`from django.conf import settings`
			`from zerver.lib.utils import split_by`

tests: Move zerver/fixtures to zerver/tests/fixtures for clarity. Fixes #9153. 2018-04-19 20:17:24 +02:00			`FIXTURE_FILE = os.path.join(os.path.dirname(__file__), '../zerver/tests/fixtures/authors.json')`
			`duplicate_commits_file = os.path.join(os.path.dirname(__file__),`
			`'../zerver/tests/fixtures/duplicate_commits.json')`
Add /authors page. Contributor visualization showing the avatar, user name and number of commits for each contributors. The JSON data would be updated upon deployment, triggered by the `update-prod-static` script. 2017-01-06 18:56:36 +01:00
			`parser = argparse.ArgumentParser()`
			`parser.add_argument('--max-retries', type=int, default=3,`
			`help='Number of times to retry fetching data from Github')`
			`# In Travis CI and development environment, we use test fixture to avoid`
			`# fetching from Github constantly.`
			`parser.add_argument('--use-fixture', action='store_true', default=False,`
			`help='Use fixture data instead of fetching from Github')`
upgrade: Don't require authors updates deploying from Git. Fixes #3392. 2017-01-24 07:19:25 +01:00			`parser.add_argument('--not-required', action='store_true', default=False,`
			`help='Consider failures to reach GitHub nonfatal')`
Add /authors page. Contributor visualization showing the avatar, user name and number of commits for each contributors. The JSON data would be updated upon deployment, triggered by the `update-prod-static` script. 2017-01-06 18:56:36 +01:00			`args = parser.parse_args()`

/team: Use list instead of dict for contributors data. 2017-11-20 21:49:03 +01:00
/team: Fetch contributors data from all major repos. Also wait 2 seconds before trying again. 2017-11-16 14:05:26 +01:00			`ContributorsJSON = TypedDict('ContributorsJSON', {`
			`'date': str,`
/team: Use list instead of dict for contributors data. 2017-11-20 21:49:03 +01:00			`'contrib': List[Dict[str, Union[str, int]]],`
/team: Fetch contributors data from all major repos. Also wait 2 seconds before trying again. 2017-11-16 14:05:26 +01:00			`})`

Add /authors page. Contributor visualization showing the avatar, user name and number of commits for each contributors. The JSON data would be updated upon deployment, triggered by the `update-prod-static` script. 2017-01-06 18:56:36 +01:00
/team: Fetch contributors data from all major repos. Also wait 2 seconds before trying again. 2017-11-16 14:05:26 +01:00			`def fetch_contributors(repo_link: str) -> Optional[List[Dict[str, Dict[str, Any]]]]:`
			`r = requests.get(repo_link) # type: requests.Response`
			`return r.json() if r.status_code == 200 else None`
Add /authors page. Contributor visualization showing the avatar, user name and number of commits for each contributors. The JSON data would be updated upon deployment, triggered by the `update-prod-static` script. 2017-01-06 18:56:36 +01:00
/team: Fetch contributors data from all major repos. Also wait 2 seconds before trying again. 2017-11-16 14:05:26 +01:00			`def write_to_disk(json_data: ContributorsJSON, out_file: str) -> None:`
Add /authors page. Contributor visualization showing the avatar, user name and number of commits for each contributors. The JSON data would be updated upon deployment, triggered by the `update-prod-static` script. 2017-01-06 18:56:36 +01:00			`with open(out_file, 'w') as f:`
			`try:`
update-authors-json: Pretty-print output with sorted keys. This makes working with this file for debugging/etc. much nicer. 2018-06-02 01:36:58 +02:00			`f.write("{}\n".format(json.dumps(json_data, indent=2, sort_keys=True)))`
Add /authors page. Contributor visualization showing the avatar, user name and number of commits for each contributors. The JSON data would be updated upon deployment, triggered by the `update-prod-static` script. 2017-01-06 18:56:36 +01:00			`except IOError as e:`
			`print(e)`
			`sys.exit(1)`

/team: Fetch contributors data from all major repos. Also wait 2 seconds before trying again. 2017-11-16 14:05:26 +01:00
			`def run_production() -> None:`
Add /authors page. Contributor visualization showing the avatar, user name and number of commits for each contributors. The JSON data would be updated upon deployment, triggered by the `update-prod-static` script. 2017-01-06 18:56:36 +01:00			`"""`
/team: Use list instead of dict for contributors data. 2017-11-20 21:49:03 +01:00			`Get contributors data from Github and insert them into a temporary`
			`dictionary. Retry fetching each repository if responded with non HTTP 200`
			`status.`
Add /authors page. Contributor visualization showing the avatar, user name and number of commits for each contributors. The JSON data would be updated upon deployment, triggered by the `update-prod-static` script. 2017-01-06 18:56:36 +01:00			`"""`
/team: Fetch contributors data from all major repos. Also wait 2 seconds before trying again. 2017-11-16 14:05:26 +01:00			`repositories = {`
			`'server': 'https://api.github.com/repos/zulip/zulip/stats/contributors',`
			`'desktop': 'https://api.github.com/repos/zulip/zulip-electron/stats/contributors',`
			`'mobile': 'https://api.github.com/repos/zulip/zulip-mobile/stats/contributors',`
			`'python-zulip-api': 'https://api.github.com/repos/zulip/python-zulip-api/stats/contributors',`
/team: Fetch zulip-js repository data. 2018-01-15 15:54:22 +01:00			`'zulip-js': 'https://api.github.com/repos/zulip/zulip-js/stats/contributors',`
/team: Fetch contributors data from all major repos. Also wait 2 seconds before trying again. 2017-11-16 14:05:26 +01:00			`'zulipbot': 'https://api.github.com/repos/zulip/zulipbot/stats/contributors',`
team page: Add contributors of zulip-terminal. It adds zulip terminal contributions to team page using github apis. 2018-03-23 00:23:38 +01:00			`'terminal': 'https://api.github.com/repos/zulip/zulip-terminal/stats/contributors',`
/team: Fetch contributors data from all major repos. Also wait 2 seconds before trying again. 2017-11-16 14:05:26 +01:00			`}`

/team: Use list instead of dict for contributors data. 2017-11-20 21:49:03 +01:00			`data = dict(date=str(date.today()), contrib=[]) # type: ContributorsJSON`
			`contribs_list = {} # type: Dict[str, Dict[str, Union[str, int]]]`
/team: Fetch contributors data from all major repos. Also wait 2 seconds before trying again. 2017-11-16 14:05:26 +01:00
/team: Use list instead of dict for contributors data. 2017-11-20 21:49:03 +01:00			`for _ in range(args.max_retries):`
			`repos_done = []`
			`for name, link in repositories.items():`
			`contribs = fetch_contributors(link)`
/team: Fetch contributors data from all major repos. Also wait 2 seconds before trying again. 2017-11-16 14:05:26 +01:00			`if contribs:`
/team: Use list instead of dict for contributors data. 2017-11-20 21:49:03 +01:00			`repos_done.append(name)`
/team: Fetch contributors data from all major repos. Also wait 2 seconds before trying again. 2017-11-16 14:05:26 +01:00			`for contrib in contribs:`
mypy: Add asserts of intermediate values in update-authors-json. 2018-03-23 17:31:23 +01:00			`assert contrib is not None # TODO: To improve/clarify`

			`author = contrib.get('author')`
			`if author is None:`
update-authors-json: Fix handling deleted GitHub accounts. Apparently, we've now had the first time one of our contributors had their account deleted (at least, the author page for the contributor who has 21 commits in python-zulip-api now 404s). 2018-02-08 21:16:14 +01:00			`# This happens for users who've deleted their GitHub account.`
			`continue`
mypy: Add asserts of intermediate values in update-authors-json. 2018-03-23 17:31:23 +01:00
			`username = author.get('login')`
			`assert username is not None # TODO: To improve/clarify`

			`avatar = author.get('avatar_url')`
			`assert avatar is not None # TODO: To improve/clarify`
			`total = contrib.get('total')`
			`assert total is not None # TODO: To improve/clarify`

/team: Fetch contributors data from all major repos. Also wait 2 seconds before trying again. 2017-11-16 14:05:26 +01:00			`contrib_data = {`
mypy: Add asserts of intermediate values in update-authors-json. 2018-03-23 17:31:23 +01:00			`'avatar': avatar,`
			`name: total,`
/team: Fetch contributors data from all major repos. Also wait 2 seconds before trying again. 2017-11-16 14:05:26 +01:00			`}`
/team: Use list instead of dict for contributors data. 2017-11-20 21:49:03 +01:00			`if username in contribs_list:`
			`contribs_list[username].update(contrib_data)`
/team: Fetch contributors data from all major repos. Also wait 2 seconds before trying again. 2017-11-16 14:05:26 +01:00			`else:`
/team: Use list instead of dict for contributors data. 2017-11-20 21:49:03 +01:00			`contribs_list[username] = contrib_data`
team page: Remove duplicate contribution counts. It removes duplicate contribution count from zulip-server. Fixes #7836. 2018-03-03 13:21:55 +01:00
			`# remove duplicate contributions count`
			`# find commits at the time of split and substract from zulip-server`
			`with open(duplicate_commits_file, 'r') as f:`
			`duplicate_commits = json.loads(f.read())`
			`for committer in duplicate_commits:`
update-authors-json: Improve error handling for duplicate commits. It's possible that a user with duplicate commits might not be in the top 100 users for a given repo. 2018-03-22 22:57:22 +01:00			`if committer in contribs_list and contribs_list[committer].get('server'):`
team page: Remove duplicate contribution counts. It removes duplicate contribution count from zulip-server. Fixes #7836. 2018-03-03 13:21:55 +01:00			`total_commits = cast(int, contribs_list[committer]['server'])`
			`duplicate_commits_count = duplicate_commits[committer]`
			`original_commits = total_commits - duplicate_commits_count`
			`contribs_list[committer]['server'] = original_commits`

/team: Use list instead of dict for contributors data. 2017-11-20 21:49:03 +01:00			`for repo in repos_done:`
			`del repositories[repo]`
/team: Fetch contributors data from all major repos. Also wait 2 seconds before trying again. 2017-11-16 14:05:26 +01:00
			`if not repositories:`
			`break`

			`# Wait before retrying failed requests for Github to aggregate data.`
			`sleep(2)`
			`else:`
			`print("ERROR: Failed fetching contributors data from Github.")`
			`if not args.not_required:`
			`sys.exit(1)`

/team: Use list instead of dict for contributors data. 2017-11-20 21:49:03 +01:00			`for contributor_name, contributor_data in contribs_list.items():`
			`contributor_data['name'] = contributor_name`
			`data['contrib'].append(contributor_data)`
/team: Fetch contributors data from all major repos. Also wait 2 seconds before trying again. 2017-11-16 14:05:26 +01:00
			`write_to_disk(data, settings.CONTRIBUTORS_DATA)`


			`def copy_fixture() -> None:`
Add /authors page. Contributor visualization showing the avatar, user name and number of commits for each contributors. The JSON data would be updated upon deployment, triggered by the `update-prod-static` script. 2017-01-06 18:56:36 +01:00			`"""`
tests: Move zerver/fixtures to zerver/tests/fixtures for clarity. Fixes #9153. 2018-04-19 20:17:24 +02:00			`Copy test fixture file from zerver/tests/fixtures. This is used to avoid`
Add /authors page. Contributor visualization showing the avatar, user name and number of commits for each contributors. The JSON data would be updated upon deployment, triggered by the `update-prod-static` script. 2017-01-06 18:56:36 +01:00			`constantly fetching data from Github during testing.`
			`"""`
tools/update-authors-json: Avoid shelling out for cp. Signed-off-by: Anders Kaseorg <andersk@mit.edu> 2018-07-18 23:50:16 +02:00			`shutil.copyfile(FIXTURE_FILE, settings.CONTRIBUTORS_DATA)`
Add /authors page. Contributor visualization showing the avatar, user name and number of commits for each contributors. The JSON data would be updated upon deployment, triggered by the `update-prod-static` script. 2017-01-06 18:56:36 +01:00
/team: Fetch contributors data from all major repos. Also wait 2 seconds before trying again. 2017-11-16 14:05:26 +01:00
Add /authors page. Contributor visualization showing the avatar, user name and number of commits for each contributors. The JSON data would be updated upon deployment, triggered by the `update-prod-static` script. 2017-01-06 18:56:36 +01:00			`if args.use_fixture:`
			`copy_fixture()`
			`else:`
			`run_production()`