team: Generate team page data using cron job.

This eliminates the contributors data as a possible source of
flakiness when installing Zulip from Git.

Fixes #14351.
This commit is contained in:
Vishnu KS 2020-04-07 22:57:07 +05:30 committed by Tim Abbott
parent 8415a1472a
commit 449f7e2d4b
10 changed files with 52 additions and 70 deletions

View File

@ -300,15 +300,6 @@ comes from the [pygments][] package. `tools/setup/build_pygments_data` is
responsible for generating `static/generated/pygments_data.json` so that responsible for generating `static/generated/pygments_data.json` so that
our JavaScript markdown processor has access to the supported list. our JavaScript markdown processor has access to the supported list.
### Authors data
Zulip maintains data on the developers who have contributed the most to
the current version of Zulip in the /about page. These data are
fetched using the GitHub API with `tools/fetch-contributor-data`. In
development, it just returns some basic test data to avoid adding load
to GitHub's APIs unnecessarily; it's primarily run as part of building
a release tarball.
## Modifying provisioning ## Modifying provisioning
When making changes to Zulip's provisioning process or dependencies, When making changes to Zulip's provisioning process or dependencies,

View File

@ -0,0 +1,5 @@
SHELL=/bin/bash
PATH=/usr/local/sbin:/usr/local/bin:/sbin:/bin:/usr/sbin:/usr/bin
USER=zulip
0 8 * * * zulip /home/zulip/deployments/current/tools/fetch-contributor-data

View File

@ -59,4 +59,13 @@ class zulip_ops::app_frontend {
content => zulipsecret('secrets', 'redis_password', ''), content => zulipsecret('secrets', 'redis_password', ''),
} }
# Each server does its own fetching of contributor data, since
# we don't have a way to synchronize that among several servers.
file { '/etc/cron.d/fetch-contributor-data':
ensure => file,
owner => 'root',
group => 'root',
mode => '0644',
source => 'puppet:///modules/zulip_ops/cron.d/fetch-contributor-data',
}
} }

3
static/.gitignore vendored
View File

@ -10,8 +10,5 @@
/generated/emoji-styles /generated/emoji-styles
# From passing pygments data to the frontend # From passing pygments data to the frontend
/generated/pygments_data.json /generated/pygments_data.json
# From `tools/fetch-contributor-data`
/generated/github-contributors.json
# Legacy emoji data directory # Legacy emoji data directory
/third/emoji-data /third/emoji-data

View File

@ -12,36 +12,35 @@ from typing import Any, Dict, List, Optional, Union
from typing_extensions import TypedDict from typing_extensions import TypedDict
import os import os
import shutil
import sys import sys
import argparse import argparse
from time import sleep from time import sleep
from datetime import date from datetime import date
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
from scripts.lib.setup_path import setup_path
setup_path()
os.environ['DJANGO_SETTINGS_MODULE'] = 'zproject.settings'
from django.conf import settings
import requests import requests
import json import json
FIXTURE_FILE = os.path.join(os.path.dirname(__file__), '../zerver/tests/fixtures/authors.json')
duplicate_commits_file = os.path.join(os.path.dirname(__file__), 'duplicate_commits.json') duplicate_commits_file = os.path.join(os.path.dirname(__file__), 'duplicate_commits.json')
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
parser.add_argument('--max-retries', type=int, default=3, parser.add_argument('--max-retries', type=int, default=3,
help='Number of times to retry fetching data from Github') help='Number of times to retry fetching data from Github')
# In Travis CI and development environment, we use test fixture to avoid
# fetching from Github constantly.
parser.add_argument('--use-fixture', action='store_true', default=False,
help='Use fixture data instead of fetching from Github')
parser.add_argument('--not-required', action='store_true', default=False, parser.add_argument('--not-required', action='store_true', default=False,
help='Consider failures to reach GitHub nonfatal') help='Consider failures to reach GitHub nonfatal')
args = parser.parse_args() args = parser.parse_args()
ContributorsJSON = TypedDict('ContributorsJSON', { ContributorsJSON = TypedDict('ContributorsJSON', {
'date': str, 'date': str,
'contrib': List[Dict[str, Union[str, int]]], 'contrib': List[Dict[str, Union[str, int]]],
}) })
def fetch_contributors(repo_link: str) -> Optional[List[Dict[str, Dict[str, Any]]]]: def fetch_contributors(repo_link: str) -> Optional[List[Dict[str, Dict[str, Any]]]]:
r = requests.get(repo_link, verify=os.environ.get('CUSTOM_CA_CERTIFICATES')) # type: requests.Response r = requests.get(repo_link, verify=os.environ.get('CUSTOM_CA_CERTIFICATES')) # type: requests.Response
return r.json() if r.status_code == 200 else None return r.json() if r.status_code == 200 else None
@ -54,8 +53,7 @@ def write_to_disk(json_data: ContributorsJSON, out_file: str) -> None:
print(e) print(e)
sys.exit(1) sys.exit(1)
def update_contributor_data_file() -> None:
def run_production() -> None:
""" """
Get contributors data from Github and insert them into a temporary Get contributors data from Github and insert them into a temporary
dictionary. Retry fetching each repository if responded with non HTTP 200 dictionary. Retry fetching each repository if responded with non HTTP 200
@ -140,18 +138,7 @@ def run_production() -> None:
contributor_data['name'] = contributor_name contributor_data['name'] = contributor_name
data['contrib'].append(contributor_data) data['contrib'].append(contributor_data)
write_to_disk(data, 'static/generated/github-contributors.json') write_to_disk(data, settings.CONTRIBUTOR_DATA_FILE_PATH)
if __name__ == "__main__":
def copy_fixture() -> None: update_contributor_data_file()
"""
Copy test fixture file from zerver/tests/fixtures. This is used to avoid
constantly fetching data from Github during testing.
"""
shutil.copyfile(FIXTURE_FILE, 'static/generated/github-contributors.json')
if args.use_fixture:
copy_fixture()
else:
run_production()

View File

@ -134,12 +134,6 @@ def main(options: argparse.Namespace) -> int:
else: else:
print("No need to run `tools/setup/build_pygments_data`.") print("No need to run `tools/setup/build_pygments_data`.")
update_authors_json_paths = ["tools/fetch-contributor-data", "zerver/tests/fixtures/authors.json"]
if file_or_package_hash_updated(update_authors_json_paths, "update_authors_json_hash", options.is_force):
run(["tools/fetch-contributor-data", "--use-fixture"])
else:
print("No need to run `tools/fetch-contributor-data`.")
email_source_paths = ["scripts/setup/inline-email-css", "templates/zerver/emails/email.css"] email_source_paths = ["scripts/setup/inline-email-css", "templates/zerver/emails/email.css"]
email_source_paths += glob.glob('templates/zerver/emails/*.source.html') email_source_paths += glob.glob('templates/zerver/emails/*.source.html')
if file_or_package_hash_updated(email_source_paths, "last_email_source_files_hash", options.is_force): if file_or_package_hash_updated(email_source_paths, "last_email_source_files_hash", options.is_force):

View File

@ -54,14 +54,6 @@ run(['./tools/setup/build_pygments_data'], stdout=fp, stderr=fp)
# Create webpack bundle # Create webpack bundle
run(['./tools/webpack'], stdout=fp, stderr=fp) run(['./tools/webpack'], stdout=fp, stderr=fp)
# Generate /team page markdown for authors
authors_cmd = ['./tools/fetch-contributor-data']
if os.environ.get("TRAVIS"):
authors_cmd.append("--use-fixture")
if args.authors_not_required:
authors_cmd.append("--not-required")
run(authors_cmd, stdout=fp, stderr=fp)
# Collect the files that we're going to serve; this creates prod-static/serve. # Collect the files that we're going to serve; this creates prod-static/serve.
run([ run([
'./manage.py', 'collectstatic', '--no-default-ignore', './manage.py', 'collectstatic', '--no-default-ignore',

View File

@ -1,8 +1,8 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
import os import os
import subprocess
import ujson import ujson
import mock
from django.conf import settings from django.conf import settings
from django.test import TestCase, override_settings from django.test import TestCase, override_settings
@ -10,7 +10,6 @@ from django.http import HttpResponse
from typing import Any, Dict, List from typing import Any, Dict, List
from zerver.lib.integrations import INTEGRATIONS from zerver.lib.integrations import INTEGRATIONS
from zerver.lib.storage import static_path
from zerver.lib.test_classes import ZulipTestCase from zerver.lib.test_classes import ZulipTestCase
from zerver.lib.test_helpers import HostRequestMock from zerver.lib.test_helpers import HostRequestMock
from zerver.lib.test_runner import slow from zerver.lib.test_runner import slow
@ -329,24 +328,22 @@ class IntegrationTest(TestCase):
'<a target="_blank" href="/#streams">streams page</a>') '<a target="_blank" href="/#streams">streams page</a>')
class AboutPageTest(ZulipTestCase): class AboutPageTest(ZulipTestCase):
def setUp(self) -> None:
""" Manual installation which did not execute `tools/provision`
would not have the `static/generated/github-contributors.json` fixture
file.
"""
super().setUp()
# This block has unreliable test coverage due to the implicit
# caching here, so we exclude it from coverage.
if not os.path.exists(static_path('generated/github-contributors.json')):
# Copy the fixture file in `zerver/tests/fixtures` to `static/generated`
update_script = os.path.join(os.path.dirname(__file__),
'../../tools/fetch-contributor-data') # nocoverage
subprocess.check_call([update_script, '--use-fixture']) # nocoverage
def test_endpoint(self) -> None: def test_endpoint(self) -> None:
""" We can't check the contributors list since it is rendered client-side """
result = self.client_get('/team/') result = self.client_get('/team/')
self.assert_in_success_response(['Our amazing community'], result) self.assert_in_success_response(['Our amazing community'], result)
self.assert_in_success_response(['2017-11-20'], result)
self.assert_in_success_response(['timabbott', 'showell', 'gnprice', 'rishig'], result)
with mock.patch("zerver.views.portico.open", side_effect=FileNotFoundError) as m:
result = self.client_get('/team/')
self.assertEqual(result.status_code, 200)
self.assert_in_success_response(['Never ran'], result)
m.called_once()
with self.settings(ZILENCER_ENABLED=False):
result = self.client_get('/team/')
self.assertEqual(result.status_code, 301)
self.assertEqual(result["Location"], "https://zulipchat.com/team/")
def test_split_by(self) -> None: def test_split_by(self) -> None:
"""Utility function primarily used in authors page""" """Utility function primarily used in authors page"""

View File

@ -6,7 +6,6 @@ import ujson
from zerver.context_processors import get_realm_from_request from zerver.context_processors import get_realm_from_request
from zerver.decorator import redirect_to_login from zerver.decorator import redirect_to_login
from zerver.lib.storage import static_path
from zerver.models import Realm from zerver.models import Realm
from version import LATEST_DESKTOP_VERSION from version import LATEST_DESKTOP_VERSION
@ -32,8 +31,14 @@ def plans_view(request: HttpRequest) -> HttpResponse:
return render(request, "zerver/plans.html", context={"realm_plan_type": realm_plan_type}) return render(request, "zerver/plans.html", context={"realm_plan_type": realm_plan_type})
def team_view(request: HttpRequest) -> HttpResponse: def team_view(request: HttpRequest) -> HttpResponse:
with open(static_path('generated/github-contributors.json')) as f: if not settings.ZILENCER_ENABLED:
data = ujson.load(f) return HttpResponseRedirect('https://zulipchat.com/team/', status=301)
try:
with open(settings.CONTRIBUTOR_DATA_FILE_PATH) as f:
data = ujson.load(f)
except FileNotFoundError:
data = {'contrib': {}, 'date': "Never ran."}
return render( return render(
request, request,

View File

@ -904,6 +904,11 @@ LOGGING = {
} }
} # type: Dict[str, Any] } # type: Dict[str, Any]
if DEVELOPMENT:
CONTRIBUTOR_DATA_FILE_PATH = os.path.join(DEPLOY_ROOT, 'var/github-contributors.json')
else:
CONTRIBUTOR_DATA_FILE_PATH = '/var/lib/zulip/github-contributors.json'
LOGIN_REDIRECT_URL = '/' LOGIN_REDIRECT_URL = '/'
# Client-side polling timeout for get_events, in milliseconds. # Client-side polling timeout for get_events, in milliseconds.