zulip/tools/update-authors-json

#!/usr/bin/env python3
"""
Fetch contributors data from Github using their API, convert it to structured
JSON data for the /team page contributors section.
"""

# check for the venv
from lib import sanity_check
sanity_check.check_venv(__file__)

from typing import Any, Dict, List, Optional, Union, Text
from mypy_extensions import TypedDict

import os
import sys
import argparse
from time import sleep
from datetime import date
import subprocess

import requests
import json

sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
os.environ['DJANGO_SETTINGS_MODULE'] = 'zproject.settings'
from django.conf import settings
from zerver.lib.utils import split_by

FIXTURE_FILE = os.path.join(os.path.dirname(__file__), '../zerver/fixtures/authors.json')

parser = argparse.ArgumentParser()
parser.add_argument('--max-retries', type=int, default=3,
                    help='Number of times to retry fetching data from Github')
# In Travis CI and development environment, we use test fixture to avoid
# fetching from Github constantly.
parser.add_argument('--use-fixture', action='store_true', default=False,
                    help='Use fixture data instead of fetching from Github')
parser.add_argument('--not-required', action='store_true', default=False,
                    help='Consider failures to reach GitHub nonfatal')
args = parser.parse_args()


ContributorsJSON = TypedDict('ContributorsJSON', {
    'date': str,
    'contrib': List[Dict[str, Union[str, int]]],
})


def fetch_contributors(repo_link: str) -> Optional[List[Dict[str, Dict[str, Any]]]]:
    r = requests.get(repo_link)  # type: requests.Response
    return r.json() if r.status_code == 200 else None


def write_to_disk(json_data: ContributorsJSON, out_file: str) -> None:
    with open(out_file, 'w') as f:
        try:
            f.write("{}\n".format(json.dumps(json_data)))
        except IOError as e:
            print(e)
            sys.exit(1)


def run_production() -> None:
    """
    Get contributors data from Github and insert them into a temporary
    dictionary. Retry fetching each repository if responded with non HTTP 200
    status.
    """
    repositories = {
        'server': 'https://api.github.com/repos/zulip/zulip/stats/contributors',
        'desktop': 'https://api.github.com/repos/zulip/zulip-electron/stats/contributors',
        'mobile': 'https://api.github.com/repos/zulip/zulip-mobile/stats/contributors',
        'python-zulip-api': 'https://api.github.com/repos/zulip/python-zulip-api/stats/contributors',
        'zulip-js': 'https://api.github.com/repos/zulip/zulip-js/stats/contributors',
        'zulipbot': 'https://api.github.com/repos/zulip/zulipbot/stats/contributors',
    }

    data = dict(date=str(date.today()), contrib=[])  # type: ContributorsJSON
    contribs_list = {}  # type: Dict[str, Dict[str, Union[str, int]]]

    for _ in range(args.max_retries):
        repos_done = []
        for name, link in repositories.items():
            contribs = fetch_contributors(link)
            if contribs:
                repos_done.append(name)
                for contrib in contribs:
                    username = contrib.get('author').get('login')
                    contrib_data = {
                        'avatar': contrib.get('author').get('avatar_url'),
                        name: contrib.get('total'),
                    }
                    if username in contribs_list:
                        contribs_list[username].update(contrib_data)
                    else:
                        contribs_list[username] = contrib_data
        for repo in repos_done:
            del repositories[repo]

        if not repositories:
            break

        # Wait before retrying failed requests for Github to aggregate data.
        sleep(2)
    else:
        print("ERROR: Failed fetching contributors data from Github.")
        if not args.not_required:
            sys.exit(1)

    for contributor_name, contributor_data in contribs_list.items():
        contributor_data['name'] = contributor_name
        data['contrib'].append(contributor_data)

    write_to_disk(data, settings.CONTRIBUTORS_DATA)


def copy_fixture() -> None:
    """
    Copy test fixture file from zerver/fixtures. This is used to avoid
    constantly fetching data from Github during testing.
    """
    subprocess.check_call(['cp', FIXTURE_FILE, settings.CONTRIBUTORS_DATA])


if args.use_fixture:
    copy_fixture()
else:
    run_production()
py3: Switch almost all shebang lines to use `python3`. This causes `upgrade-zulip-from-git`, as well as a no-option run of `tools/build-release-tarball`, to produce a Zulip install running Python 3, rather than Python 2. In particular this means that the virtualenv we create, in which all application code runs, is Python 3. One shebang line, on `zulip-ec2-configure-interfaces`, explicitly keeps Python 2, and at least one external ops script, `wal-e`, also still runs on Python 2. See discussion on the respective previous commits that made those explicit. There may also be some other third-party scripts we use, outside of this source tree and running outside our virtualenv, that still run on Python 2. 2017-08-02 23:15:16 +02:00			`#!/usr/bin/env python3`
Add /authors page. Contributor visualization showing the avatar, user name and number of commits for each contributors. The JSON data would be updated upon deployment, triggered by the `update-prod-static` script. 2017-01-06 18:56:36 +01:00			`"""`
			`Fetch contributors data from Github using their API, convert it to structured`
landing-page: Replace /about with /team and /history in links. 2017-10-31 20:08:32 +01:00			`JSON data for the /team page contributors section.`
Add /authors page. Contributor visualization showing the avatar, user name and number of commits for each contributors. The JSON data would be updated upon deployment, triggered by the `update-prod-static` script. 2017-01-06 18:56:36 +01:00			`"""`

tools: Create more consistent checks for venv. This helps make the Zulip development environment somewhat more robust to new contributors, since it will give them a nice warning if they try running any of our development tools outside the Zulip virtualenv. Fixes #3468. 2017-02-05 21:24:28 +01:00			`# check for the venv`
			`from lib import sanity_check`
			`sanity_check.check_venv(__file__)`

Add /authors page. Contributor visualization showing the avatar, user name and number of commits for each contributors. The JSON data would be updated upon deployment, triggered by the `update-prod-static` script. 2017-01-06 18:56:36 +01:00			`from typing import Any, Dict, List, Optional, Union, Text`
/team: Fetch contributors data from all major repos. Also wait 2 seconds before trying again. 2017-11-16 14:05:26 +01:00			`from mypy_extensions import TypedDict`
Add /authors page. Contributor visualization showing the avatar, user name and number of commits for each contributors. The JSON data would be updated upon deployment, triggered by the `update-prod-static` script. 2017-01-06 18:56:36 +01:00
			`import os`
			`import sys`
			`import argparse`
/team: Fetch contributors data from all major repos. Also wait 2 seconds before trying again. 2017-11-16 14:05:26 +01:00			`from time import sleep`
Add /authors page. Contributor visualization showing the avatar, user name and number of commits for each contributors. The JSON data would be updated upon deployment, triggered by the `update-prod-static` script. 2017-01-06 18:56:36 +01:00			`from datetime import date`
			`import subprocess`

			`import requests`
json: Replace most use of simplejson with json. This is progress towards removing simplejson as a dependency. 2017-10-12 07:54:25 +02:00			`import json`
Add /authors page. Contributor visualization showing the avatar, user name and number of commits for each contributors. The JSON data would be updated upon deployment, triggered by the `update-prod-static` script. 2017-01-06 18:56:36 +01:00
			`sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))`
			`os.environ['DJANGO_SETTINGS_MODULE'] = 'zproject.settings'`
			`from django.conf import settings`
			`from zerver.lib.utils import split_by`

			`FIXTURE_FILE = os.path.join(os.path.dirname(__file__), '../zerver/fixtures/authors.json')`

			`parser = argparse.ArgumentParser()`
			`parser.add_argument('--max-retries', type=int, default=3,`
			`help='Number of times to retry fetching data from Github')`
			`# In Travis CI and development environment, we use test fixture to avoid`
			`# fetching from Github constantly.`
			`parser.add_argument('--use-fixture', action='store_true', default=False,`
			`help='Use fixture data instead of fetching from Github')`
upgrade: Don't require authors updates deploying from Git. Fixes #3392. 2017-01-24 07:19:25 +01:00			`parser.add_argument('--not-required', action='store_true', default=False,`
			`help='Consider failures to reach GitHub nonfatal')`
Add /authors page. Contributor visualization showing the avatar, user name and number of commits for each contributors. The JSON data would be updated upon deployment, triggered by the `update-prod-static` script. 2017-01-06 18:56:36 +01:00			`args = parser.parse_args()`

/team: Use list instead of dict for contributors data. 2017-11-20 21:49:03 +01:00
/team: Fetch contributors data from all major repos. Also wait 2 seconds before trying again. 2017-11-16 14:05:26 +01:00			`ContributorsJSON = TypedDict('ContributorsJSON', {`
			`'date': str,`
/team: Use list instead of dict for contributors data. 2017-11-20 21:49:03 +01:00			`'contrib': List[Dict[str, Union[str, int]]],`
/team: Fetch contributors data from all major repos. Also wait 2 seconds before trying again. 2017-11-16 14:05:26 +01:00			`})`

Add /authors page. Contributor visualization showing the avatar, user name and number of commits for each contributors. The JSON data would be updated upon deployment, triggered by the `update-prod-static` script. 2017-01-06 18:56:36 +01:00
/team: Fetch contributors data from all major repos. Also wait 2 seconds before trying again. 2017-11-16 14:05:26 +01:00			`def fetch_contributors(repo_link: str) -> Optional[List[Dict[str, Dict[str, Any]]]]:`
			`r = requests.get(repo_link) # type: requests.Response`
			`return r.json() if r.status_code == 200 else None`
Add /authors page. Contributor visualization showing the avatar, user name and number of commits for each contributors. The JSON data would be updated upon deployment, triggered by the `update-prod-static` script. 2017-01-06 18:56:36 +01:00
/team: Fetch contributors data from all major repos. Also wait 2 seconds before trying again. 2017-11-16 14:05:26 +01:00
			`def write_to_disk(json_data: ContributorsJSON, out_file: str) -> None:`
Add /authors page. Contributor visualization showing the avatar, user name and number of commits for each contributors. The JSON data would be updated upon deployment, triggered by the `update-prod-static` script. 2017-01-06 18:56:36 +01:00			`with open(out_file, 'w') as f:`
			`try:`
			`f.write("{}\n".format(json.dumps(json_data)))`
			`except IOError as e:`
			`print(e)`
			`sys.exit(1)`

/team: Fetch contributors data from all major repos. Also wait 2 seconds before trying again. 2017-11-16 14:05:26 +01:00
			`def run_production() -> None:`
Add /authors page. Contributor visualization showing the avatar, user name and number of commits for each contributors. The JSON data would be updated upon deployment, triggered by the `update-prod-static` script. 2017-01-06 18:56:36 +01:00			`"""`
/team: Use list instead of dict for contributors data. 2017-11-20 21:49:03 +01:00			`Get contributors data from Github and insert them into a temporary`
			`dictionary. Retry fetching each repository if responded with non HTTP 200`
			`status.`
Add /authors page. Contributor visualization showing the avatar, user name and number of commits for each contributors. The JSON data would be updated upon deployment, triggered by the `update-prod-static` script. 2017-01-06 18:56:36 +01:00			`"""`
/team: Fetch contributors data from all major repos. Also wait 2 seconds before trying again. 2017-11-16 14:05:26 +01:00			`repositories = {`
			`'server': 'https://api.github.com/repos/zulip/zulip/stats/contributors',`
			`'desktop': 'https://api.github.com/repos/zulip/zulip-electron/stats/contributors',`
			`'mobile': 'https://api.github.com/repos/zulip/zulip-mobile/stats/contributors',`
			`'python-zulip-api': 'https://api.github.com/repos/zulip/python-zulip-api/stats/contributors',`
/team: Fetch zulip-js repository data. 2018-01-15 15:54:22 +01:00			`'zulip-js': 'https://api.github.com/repos/zulip/zulip-js/stats/contributors',`
/team: Fetch contributors data from all major repos. Also wait 2 seconds before trying again. 2017-11-16 14:05:26 +01:00			`'zulipbot': 'https://api.github.com/repos/zulip/zulipbot/stats/contributors',`
			`}`

/team: Use list instead of dict for contributors data. 2017-11-20 21:49:03 +01:00			`data = dict(date=str(date.today()), contrib=[]) # type: ContributorsJSON`
			`contribs_list = {} # type: Dict[str, Dict[str, Union[str, int]]]`
/team: Fetch contributors data from all major repos. Also wait 2 seconds before trying again. 2017-11-16 14:05:26 +01:00
/team: Use list instead of dict for contributors data. 2017-11-20 21:49:03 +01:00			`for _ in range(args.max_retries):`
			`repos_done = []`
			`for name, link in repositories.items():`
			`contribs = fetch_contributors(link)`
/team: Fetch contributors data from all major repos. Also wait 2 seconds before trying again. 2017-11-16 14:05:26 +01:00			`if contribs:`
/team: Use list instead of dict for contributors data. 2017-11-20 21:49:03 +01:00			`repos_done.append(name)`
/team: Fetch contributors data from all major repos. Also wait 2 seconds before trying again. 2017-11-16 14:05:26 +01:00			`for contrib in contribs:`
			`username = contrib.get('author').get('login')`
			`contrib_data = {`
			`'avatar': contrib.get('author').get('avatar_url'),`
/team: Use list instead of dict for contributors data. 2017-11-20 21:49:03 +01:00			`name: contrib.get('total'),`
/team: Fetch contributors data from all major repos. Also wait 2 seconds before trying again. 2017-11-16 14:05:26 +01:00			`}`
/team: Use list instead of dict for contributors data. 2017-11-20 21:49:03 +01:00			`if username in contribs_list:`
			`contribs_list[username].update(contrib_data)`
/team: Fetch contributors data from all major repos. Also wait 2 seconds before trying again. 2017-11-16 14:05:26 +01:00			`else:`
/team: Use list instead of dict for contributors data. 2017-11-20 21:49:03 +01:00			`contribs_list[username] = contrib_data`
			`for repo in repos_done:`
			`del repositories[repo]`
/team: Fetch contributors data from all major repos. Also wait 2 seconds before trying again. 2017-11-16 14:05:26 +01:00
			`if not repositories:`
			`break`

			`# Wait before retrying failed requests for Github to aggregate data.`
			`sleep(2)`
			`else:`
			`print("ERROR: Failed fetching contributors data from Github.")`
			`if not args.not_required:`
			`sys.exit(1)`

/team: Use list instead of dict for contributors data. 2017-11-20 21:49:03 +01:00			`for contributor_name, contributor_data in contribs_list.items():`
			`contributor_data['name'] = contributor_name`
			`data['contrib'].append(contributor_data)`
/team: Fetch contributors data from all major repos. Also wait 2 seconds before trying again. 2017-11-16 14:05:26 +01:00
			`write_to_disk(data, settings.CONTRIBUTORS_DATA)`


			`def copy_fixture() -> None:`
Add /authors page. Contributor visualization showing the avatar, user name and number of commits for each contributors. The JSON data would be updated upon deployment, triggered by the `update-prod-static` script. 2017-01-06 18:56:36 +01:00			`"""`
			`Copy test fixture file from zerver/fixtures. This is used to avoid`
			`constantly fetching data from Github during testing.`
			`"""`
			`subprocess.check_call(['cp', FIXTURE_FILE, settings.CONTRIBUTORS_DATA])`

/team: Fetch contributors data from all major repos. Also wait 2 seconds before trying again. 2017-11-16 14:05:26 +01:00
Add /authors page. Contributor visualization showing the avatar, user name and number of commits for each contributors. The JSON data would be updated upon deployment, triggered by the `update-prod-static` script. 2017-01-06 18:56:36 +01:00			`if args.use_fixture:`
			`copy_fixture()`
			`else:`
			`run_production()`