test-backend: Add steps to deal with potential database leaks.

A function was written in `test_fixtures.py` to drop a test database
template if the corresponding database id doesn't belong to a file.
Alongside this fact, every file that is written is removed after 60
minutes.  Meaning any potential database template can never exist
longer than one hour.

This follow-up work was added to deal with the potential race
conditions when running `test-backend`.  Ensuring that all templates
are properly dealt with.

Essentially rewritten by tabbott for cleanliness.

Fixes the remainder of #12426.
This commit is contained in:
Wyatt Hoodes 2019-06-07 15:57:19 -10:00 committed by Tim Abbott
parent 0b05d91e62
commit 6b033c7909
4 changed files with 71 additions and 5 deletions

View File

@ -526,7 +526,8 @@ def main(options):
import django import django
django.setup() django.setup()
from zerver.lib.test_fixtures import template_database_status, run_db_migrations from zerver.lib.test_fixtures import template_database_status, run_db_migrations, \
destroy_leaked_test_databases
try: try:
from zerver.lib.queue import SimpleQueueClient from zerver.lib.queue import SimpleQueueClient
@ -574,6 +575,10 @@ def main(options):
else: else:
print("No need to run `manage.py compilemessages`.") print("No need to run `manage.py compilemessages`.")
destroyed = destroy_leaked_test_databases()
if destroyed:
print("Dropped %s stale test databases!" % (destroyed,))
run(["scripts/lib/clean-unused-caches"]) run(["scripts/lib/clean-unused-caches"])
# Keeping this cache file around can cause eslint to throw # Keeping this cache file around can cause eslint to throw

View File

@ -461,6 +461,11 @@ def main() -> None:
# an important clue as to why tests fail. # an important clue as to why tests fail.
report_slow_tests() report_slow_tests()
# Ideally, we'd check for any leaked test databases here;
# but that needs some hackery with database names.
#
# destroy_leaked_test_databases()
# We'll have printed whether tests passed or failed above # We'll have printed whether tests passed or failed above
sys.exit(bool(failures)) sys.exit(bool(failures))

View File

@ -21,4 +21,4 @@ LATEST_RELEASE_ANNOUNCEMENT = "https://blog.zulip.org/2019/03/01/zulip-2-0-relea
# Typically, adding a dependency only requires a minor version bump, and # Typically, adding a dependency only requires a minor version bump, and
# removing a dependency requires a major version bump. # removing a dependency requires a major version bump.
PROVISION_VERSION = '34.2' PROVISION_VERSION = '34.3'

View File

@ -5,12 +5,14 @@ import re
import hashlib import hashlib
import subprocess import subprocess
import sys import sys
from typing import Any, List, Optional from typing import Any, List, Optional, Set
from importlib import import_module from importlib import import_module
from io import StringIO from io import StringIO
import glob import glob
import time
from django.db import connections, DEFAULT_DB_ALIAS from django.db import connections, DEFAULT_DB_ALIAS, ProgrammingError, \
connection
from django.db.utils import OperationalError from django.db.utils import OperationalError
from django.apps import apps from django.apps import apps
from django.conf import settings from django.conf import settings
@ -18,7 +20,8 @@ from django.core.management import call_command
from django.utils.module_loading import module_has_submodule from django.utils.module_loading import module_has_submodule
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(__file__)))) sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
from scripts.lib.zulip_tools import get_dev_uuid_var_path, run, file_or_package_hash_updated from scripts.lib.zulip_tools import get_dev_uuid_var_path, run, \
file_or_package_hash_updated, TEMPLATE_DATABASE_DIR
UUID_VAR_DIR = get_dev_uuid_var_path() UUID_VAR_DIR = get_dev_uuid_var_path()
FILENAME_SPLITTER = re.compile(r'[\W\-_]') FILENAME_SPLITTER = re.compile(r'[\W\-_]')
@ -240,3 +243,56 @@ def template_database_status(
return 'current' return 'current'
return 'needs_rebuild' return 'needs_rebuild'
def destroy_leaked_test_databases(expiry_time: int = 60 * 60) -> int:
"""The logic in zerver/lib/test_runner.py tries to delete all the
temporary test databases generated by test-backend threads, but it
cannot guarantee it handles all race conditions correctly. This
is a catch-all function designed to delete any that might have
been leaked due to crashes (etc.). The high-level algorithm is to:
* Delete every database with a name like zulip_test_template_*
* Unless it is registered in a file under TEMPLATE_DATABASE_DIR as
part of a currently running test-backend invocation
* And that file is less expiry_time old.
This should ensure we ~never break a running test-backend process,
while also ensuring we will eventually delete all leaked databases.
"""
files = glob.glob(os.path.join(UUID_VAR_DIR, TEMPLATE_DATABASE_DIR, "*"))
test_databases = set() # type: Set[str]
try:
with connection.cursor() as cursor:
cursor.execute("SELECT datname FROM pg_database;")
rows = cursor.fetchall()
for row in rows:
if 'zulip_test_template_' in row[0]:
test_databases.add(row[0])
except ProgrammingError:
pass
databases_in_use = set() # type: Set[str]
for file in files:
if round(time.time()) - os.path.getmtime(file) < expiry_time:
with open(file, "r") as f:
for line in f:
databases_in_use.add('zulip_test_template_{}'.format(line).rstrip())
else:
# Any test-backend run older than expiry_time can be
# cleaned up, both the database and the file listing its
# databases.
os.remove(file)
databases_to_drop = test_databases - databases_in_use
if not databases_to_drop:
return 0
commands = "\n".join("DROP DATABASE IF EXISTS %s;" % (db,) for db in databases_to_drop)
p = subprocess.Popen(["psql", "-q", "-v", "ON_ERROR_STOP=1", "-h", "localhost",
"postgres", "zulip_test"],
stdin=subprocess.PIPE)
p.communicate(input=commands.encode())
if p.returncode != 0:
raise RuntimeError("Error cleaning up test databases!")
return len(databases_to_drop)