mirror of https://github.com/zulip/zulip.git
ci: Split out installation testing.
This commit is contained in:
parent
36335ecd39
commit
f621e1b7e4
|
@ -138,6 +138,12 @@ aliases:
|
|||
sudo service rabbitmq-server restart
|
||||
sudo mispipe "/tmp/production-install 2>&1" ts
|
||||
|
||||
- &verify_production
|
||||
run:
|
||||
name: verify install
|
||||
command: |
|
||||
sudo mispipe "/tmp/production-verify 2>&1" ts
|
||||
|
||||
- &check_xenial_provision_error
|
||||
run:
|
||||
name: check tools/provision error message on xenial
|
||||
|
@ -290,6 +296,7 @@ jobs:
|
|||
- success-http-headers-bionic.txt
|
||||
- success-http-headers-focal.txt
|
||||
- production-install
|
||||
- production-verify
|
||||
- production
|
||||
- production-extract-tarball
|
||||
- *notify_failure_status
|
||||
|
@ -313,6 +320,7 @@ jobs:
|
|||
- *production_extract_tarball
|
||||
- *restore_cache_package_json
|
||||
- *install_production
|
||||
- *verify_production
|
||||
- *save_cache_package_json
|
||||
- *notify_failure_status
|
||||
|
||||
|
@ -342,6 +350,7 @@ jobs:
|
|||
- *production_extract_tarball
|
||||
- *restore_cache_package_json
|
||||
- *install_production
|
||||
- *verify_production
|
||||
- *save_cache_package_json
|
||||
- *notify_failure_status
|
||||
|
||||
|
|
|
@ -35,6 +35,7 @@ cp -a \
|
|||
tools/ci/success-http-headers-bionic.txt \
|
||||
tools/ci/success-http-headers-focal.txt \
|
||||
tools/ci/production-install \
|
||||
tools/ci/production-verify \
|
||||
tools/ci/production-extract-tarball \
|
||||
\
|
||||
/tmp/
|
||||
|
|
|
@ -1,7 +1,6 @@
|
|||
#!/usr/bin/env bash
|
||||
# This test installs a Zulip production environment (from the release
|
||||
# tarball from production-build), and then runs some Nagios checks and
|
||||
# other tools to verify that everything is working properly.
|
||||
# tarball from production-build).
|
||||
set -e
|
||||
set -x
|
||||
|
||||
|
@ -24,103 +23,5 @@ fi
|
|||
# Install Zulip
|
||||
"$ZULIP_PATH"/scripts/setup/install --self-signed-cert --hostname 127.0.0.1 --email circleci@example.com
|
||||
|
||||
cat >>/etc/zulip/settings.py <<EOF
|
||||
# CircleCI override settings above
|
||||
AUTHENTICATION_BACKENDS = ( 'zproject.backends.EmailAuthBackend', )
|
||||
NOREPLY_EMAIL_ADDRESS = 'noreply@circleci.example.com'
|
||||
ALLOWED_HOSTS = []
|
||||
EOF
|
||||
|
||||
echo; echo "Now testing that the supervisord jobs are running properly"; echo
|
||||
sleep 15 # Guaranteed to have a working supervisord process get an extra digit
|
||||
if supervisorctl status | grep -vq RUNNING || supervisorctl status | sed 's/^.*uptime //' | grep -q 0:00:0; then
|
||||
set +x
|
||||
echo
|
||||
echo "FAILURE: Supervisor output shows daemons are crashing:"
|
||||
echo
|
||||
supervisorctl status
|
||||
echo
|
||||
echo "DEBUG: printing Zulip server's error log:"
|
||||
cat /var/log/zulip/errors.log
|
||||
echo
|
||||
echo "DEBUG: printing Zulip server's workers log:"
|
||||
cat /var/log/zulip/workers.log
|
||||
echo
|
||||
echo "DEBUG: printing Zulip server's tornado log:"
|
||||
cat /var/log/zulip/tornado.log
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# TODO: Ideally this would test actually logging in, but this is a start.
|
||||
echo; echo "Now testing that the newly installed server's homepage loads"; echo
|
||||
|
||||
wget https://localhost -O /tmp/index.html --no-check-certificate -S 2> /tmp/wget-output || true # || true so we see errors.log if this 500s
|
||||
grep -vi '\(Vary\|Content-Language\|expires\|issued by\|modified\|saved\|[.][.][.]\|Date\|[-][-]\)' /tmp/wget-output > /tmp/http-headers-processed
|
||||
|
||||
# Simplify the diff by getting replacing 4-5 digit length numbers with <Length>.
|
||||
sed -i 's|Length: [0-9]\+\( [(][0-9]\+[.][0-9]K[)]\)\?|Length: <Length>|' /tmp/http-headers-processed
|
||||
sed -i 's|Length: [0-9]\+\( [(][0-9]\+[.][0-9]K[)]\)\?|Length: <Length>|' /tmp/success-http-headers-"$os_version_codename".txt
|
||||
if ! diff -ur /tmp/http-headers-processed /tmp/success-http-headers-"$os_version_codename".txt; then
|
||||
set +x
|
||||
echo
|
||||
echo "FAILURE: The HTTP Headers returned from loading the homepage on the server do not match the contents of tools/ci/success-http-headers.txt. Typically, this means that the server threw a 500 when trying to load the homepage."
|
||||
echo "Displaying the contents of the server's error log:"
|
||||
echo
|
||||
cat /var/log/zulip/errors.log
|
||||
echo
|
||||
echo "Displaying the contents of the main server log:"
|
||||
echo
|
||||
cat /var/log/zulip/server.log
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Start the RabbitMQ queue worker related section
|
||||
echo; echo "Now confirming all the RabbitMQ queue processors are correctly registered!"; echo
|
||||
# These hacky shell scripts just extract the sorted list of queue processors, running and expected
|
||||
supervisorctl status | cut -f1 -dR | cut -f2- -d: | grep events | cut -f1 -d" " | cut -f3- -d_ | cut -f1 -d- | sort -u > /tmp/running_queue_processors.txt
|
||||
su zulip -c /home/zulip/deployments/current/scripts/lib/queue_workers.py | grep -v ^test$ | sort -u > /tmp/expected_queue_processors.txt
|
||||
if ! diff /tmp/expected_queue_processors.txt /tmp/running_queue_processors.txt >/dev/null; then
|
||||
set +x
|
||||
echo "FAILURE: Runnable queue processors declared in zerver/worker/queue_processors.py "
|
||||
echo "do not match those in puppet/manifests/zulip/base.pp"
|
||||
echo "See https://zulip.readthedocs.io/en/latest/subsystems/queuing.html for details."
|
||||
echo
|
||||
diff -ur /tmp/expected_queue_processors.txt /tmp/running_queue_processors.txt
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo; echo "Now running RabbitMQ consumer Nagios tests"; echo
|
||||
# First run the check that usually runs in cron and populates the state files
|
||||
/home/zulip/deployments/current/scripts/nagios/check-rabbitmq-consumers
|
||||
|
||||
# Then, compute the list of all Django queue workers to run Nagios checks against
|
||||
consumer_list=$(/home/zulip/deployments/current/scripts/lib/queue_workers.py --queue-type=consumer)
|
||||
for consumer in $consumer_list; do
|
||||
if ! /usr/lib/nagios/plugins/zulip_app_frontend/check_rabbitmq_consumers "$consumer"; then
|
||||
set +x
|
||||
echo
|
||||
echo "FAILURE: Missing Nagios consumer for $consumer; displaying full consumer output:"
|
||||
set -x
|
||||
rabbitmqctl list_consumers
|
||||
supervisorctl status
|
||||
tail -n +1 /var/log/zulip/events*.log
|
||||
exit 1
|
||||
fi
|
||||
done
|
||||
|
||||
# Some of the Nagios tests have been temporarily disabled to work
|
||||
# around a Travis CI infrastructure issue.
|
||||
echo; echo "Now running additional Nagios tests"; echo
|
||||
if ! /usr/lib/nagios/plugins/zulip_app_frontend/check_queue_worker_errors || \
|
||||
! su zulip -c /usr/lib/nagios/plugins/zulip_postgres_appdb/check_fts_update_log; then # || \
|
||||
# ! su zulip -c "/usr/lib/nagios/plugins/zulip_app_frontend/check_send_receive_time --site=https://127.0.0.1/api --nagios --insecure"; then
|
||||
set +x
|
||||
echo
|
||||
echo "FAILURE: Nagios checks don't pass:"
|
||||
echo
|
||||
echo "DEBUG: printing Zulip server's error log:"
|
||||
cat /var/log/zulip/errors.log
|
||||
exit 1
|
||||
fi
|
||||
echo "Production installation test successful!"
|
||||
echo "Production installation complete!"
|
||||
exit 0
|
||||
|
|
|
@ -0,0 +1,112 @@
|
|||
#!/usr/bin/env bash
|
||||
# This tests a Zulip production environment (installed via
|
||||
# production-install) with some Nagios checks and other tools to
|
||||
# verify that everything is working properly.
|
||||
set -e
|
||||
set -x
|
||||
|
||||
if [ -f /etc/os-release ]; then
|
||||
os_info="$(. /etc/os-release; printf '%s\n' "$VERSION_CODENAME")"
|
||||
{ read -r os_version_codename || true; } <<< "$os_info"
|
||||
fi
|
||||
|
||||
cat >>/etc/zulip/settings.py <<EOF
|
||||
# CircleCI override settings above
|
||||
AUTHENTICATION_BACKENDS = ( 'zproject.backends.EmailAuthBackend', )
|
||||
NOREPLY_EMAIL_ADDRESS = 'noreply@circleci.example.com'
|
||||
ALLOWED_HOSTS = []
|
||||
EOF
|
||||
|
||||
echo; echo "Now testing that the supervisord jobs are running properly"; echo
|
||||
sleep 15 # Guaranteed to have a working supervisord process get an extra digit
|
||||
if supervisorctl status | grep -vq RUNNING || supervisorctl status | sed 's/^.*uptime //' | grep -q 0:00:0; then
|
||||
set +x
|
||||
echo
|
||||
echo "FAILURE: Supervisor output shows daemons are crashing:"
|
||||
echo
|
||||
supervisorctl status
|
||||
echo
|
||||
echo "DEBUG: printing Zulip server's error log:"
|
||||
cat /var/log/zulip/errors.log
|
||||
echo
|
||||
echo "DEBUG: printing Zulip server's workers log:"
|
||||
cat /var/log/zulip/workers.log
|
||||
echo
|
||||
echo "DEBUG: printing Zulip server's tornado log:"
|
||||
cat /var/log/zulip/tornado.log
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# TODO: Ideally this would test actually logging in, but this is a start.
|
||||
echo; echo "Now testing that the newly installed server's homepage loads"; echo
|
||||
|
||||
wget https://localhost -O /tmp/index.html --no-check-certificate -S 2> /tmp/wget-output || true # || true so we see errors.log if this 500s
|
||||
grep -vi '\(Vary\|Content-Language\|expires\|issued by\|modified\|saved\|[.][.][.]\|Date\|[-][-]\)' /tmp/wget-output > /tmp/http-headers-processed
|
||||
|
||||
# Simplify the diff by getting replacing 4-5 digit length numbers with <Length>.
|
||||
sed -i 's|Length: [0-9]\+\( [(][0-9]\+[.][0-9]K[)]\)\?|Length: <Length>|' /tmp/http-headers-processed
|
||||
sed -i 's|Length: [0-9]\+\( [(][0-9]\+[.][0-9]K[)]\)\?|Length: <Length>|' /tmp/success-http-headers-"$os_version_codename".txt
|
||||
if ! diff -ur /tmp/http-headers-processed /tmp/success-http-headers-"$os_version_codename".txt; then
|
||||
set +x
|
||||
echo
|
||||
echo "FAILURE: The HTTP Headers returned from loading the homepage on the server do not match the contents of tools/ci/success-http-headers.txt. Typically, this means that the server threw a 500 when trying to load the homepage."
|
||||
echo "Displaying the contents of the server's error log:"
|
||||
echo
|
||||
cat /var/log/zulip/errors.log
|
||||
echo
|
||||
echo "Displaying the contents of the main server log:"
|
||||
echo
|
||||
cat /var/log/zulip/server.log
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Start the RabbitMQ queue worker related section
|
||||
echo; echo "Now confirming all the RabbitMQ queue processors are correctly registered!"; echo
|
||||
# These hacky shell scripts just extract the sorted list of queue processors, running and expected
|
||||
supervisorctl status | cut -f1 -dR | cut -f2- -d: | grep events | cut -f1 -d" " | cut -f3- -d_ | cut -f1 -d- | sort -u > /tmp/running_queue_processors.txt
|
||||
su zulip -c /home/zulip/deployments/current/scripts/lib/queue_workers.py | grep -v ^test$ | sort -u > /tmp/expected_queue_processors.txt
|
||||
if ! diff /tmp/expected_queue_processors.txt /tmp/running_queue_processors.txt >/dev/null; then
|
||||
set +x
|
||||
echo "FAILURE: Runnable queue processors declared in zerver/worker/queue_processors.py "
|
||||
echo "do not match those in puppet/manifests/zulip/base.pp"
|
||||
echo "See https://zulip.readthedocs.io/en/latest/subsystems/queuing.html for details."
|
||||
echo
|
||||
diff -ur /tmp/expected_queue_processors.txt /tmp/running_queue_processors.txt
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo; echo "Now running RabbitMQ consumer Nagios tests"; echo
|
||||
# First run the check that usually runs in cron and populates the state files
|
||||
/home/zulip/deployments/current/scripts/nagios/check-rabbitmq-consumers
|
||||
|
||||
# Then, compute the list of all Django queue workers to run Nagios checks against
|
||||
consumer_list=$(/home/zulip/deployments/current/scripts/lib/queue_workers.py --queue-type=consumer)
|
||||
for consumer in $consumer_list; do
|
||||
if ! /usr/lib/nagios/plugins/zulip_app_frontend/check_rabbitmq_consumers "$consumer"; then
|
||||
set +x
|
||||
echo
|
||||
echo "FAILURE: Missing Nagios consumer for $consumer; displaying full consumer output:"
|
||||
set -x
|
||||
rabbitmqctl list_consumers
|
||||
supervisorctl status
|
||||
tail -n +1 /var/log/zulip/events*.log
|
||||
exit 1
|
||||
fi
|
||||
done
|
||||
|
||||
# Some of the Nagios tests have been temporarily disabled to work
|
||||
# around a Travis CI infrastructure issue.
|
||||
echo; echo "Now running additional Nagios tests"; echo
|
||||
if ! /usr/lib/nagios/plugins/zulip_app_frontend/check_queue_worker_errors || \
|
||||
! su zulip -c /usr/lib/nagios/plugins/zulip_postgres_appdb/check_fts_update_log; then # || \
|
||||
# ! su zulip -c "/usr/lib/nagios/plugins/zulip_app_frontend/check_send_receive_time --site=https://127.0.0.1/api --nagios --insecure"; then
|
||||
set +x
|
||||
echo
|
||||
echo "FAILURE: Nagios checks don't pass:"
|
||||
echo
|
||||
echo "DEBUG: printing Zulip server's error log:"
|
||||
cat /var/log/zulip/errors.log
|
||||
exit 1
|
||||
fi
|
||||
echo "Production installation test successful!"
|
||||
exit 0
|
Loading…
Reference in New Issue