test-backend: Add steps to deal with potential database leaks.

A function was written in `test_fixtures.py` to drop a test database
template if the corresponding database id doesn't belong to a file.
Alongside this fact, every file that is written is removed after 60
minutes.  Meaning any potential database template can never exist
longer than one hour.

This follow-up work was added to deal with the potential race
conditions when running `test-backend`.  Ensuring that all templates
are properly dealt with.

Essentially rewritten by tabbott for cleanliness.

Fixes the remainder of #12426.
This commit is contained in:
Wyatt Hoodes 2019-06-07 15:57:19 -10:00 committed by Tim Abbott
parent 0b05d91e62
commit 6b033c7909
4 changed files with 71 additions and 5 deletions

View File

@ -526,7 +526,8 @@ def main(options):
import django
django.setup()
from zerver.lib.test_fixtures import template_database_status, run_db_migrations
from zerver.lib.test_fixtures import template_database_status, run_db_migrations, \
destroy_leaked_test_databases
try:
from zerver.lib.queue import SimpleQueueClient
@ -574,6 +575,10 @@ def main(options):
else:
print("No need to run `manage.py compilemessages`.")
destroyed = destroy_leaked_test_databases()
if destroyed:
print("Dropped %s stale test databases!" % (destroyed,))
run(["scripts/lib/clean-unused-caches"])
# Keeping this cache file around can cause eslint to throw

View File

@ -461,6 +461,11 @@ def main() -> None:
# an important clue as to why tests fail.
report_slow_tests()
# Ideally, we'd check for any leaked test databases here;
# but that needs some hackery with database names.
#
# destroy_leaked_test_databases()
# We'll have printed whether tests passed or failed above
sys.exit(bool(failures))

View File

@ -21,4 +21,4 @@ LATEST_RELEASE_ANNOUNCEMENT = "https://blog.zulip.org/2019/03/01/zulip-2-0-relea
# Typically, adding a dependency only requires a minor version bump, and
# removing a dependency requires a major version bump.
PROVISION_VERSION = '34.2'
PROVISION_VERSION = '34.3'

View File

@ -5,12 +5,14 @@ import re
import hashlib
import subprocess
import sys
from typing import Any, List, Optional
from typing import Any, List, Optional, Set
from importlib import import_module
from io import StringIO
import glob
import time
from django.db import connections, DEFAULT_DB_ALIAS
from django.db import connections, DEFAULT_DB_ALIAS, ProgrammingError, \
connection
from django.db.utils import OperationalError
from django.apps import apps
from django.conf import settings
@ -18,7 +20,8 @@ from django.core.management import call_command
from django.utils.module_loading import module_has_submodule
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
from scripts.lib.zulip_tools import get_dev_uuid_var_path, run, file_or_package_hash_updated
from scripts.lib.zulip_tools import get_dev_uuid_var_path, run, \
file_or_package_hash_updated, TEMPLATE_DATABASE_DIR
UUID_VAR_DIR = get_dev_uuid_var_path()
FILENAME_SPLITTER = re.compile(r'[\W\-_]')
@ -240,3 +243,56 @@ def template_database_status(
return 'current'
return 'needs_rebuild'
def destroy_leaked_test_databases(expiry_time: int = 60 * 60) -> int:
"""The logic in zerver/lib/test_runner.py tries to delete all the
temporary test databases generated by test-backend threads, but it
cannot guarantee it handles all race conditions correctly. This
is a catch-all function designed to delete any that might have
been leaked due to crashes (etc.). The high-level algorithm is to:
* Delete every database with a name like zulip_test_template_*
* Unless it is registered in a file under TEMPLATE_DATABASE_DIR as
part of a currently running test-backend invocation
* And that file is less expiry_time old.
This should ensure we ~never break a running test-backend process,
while also ensuring we will eventually delete all leaked databases.
"""
files = glob.glob(os.path.join(UUID_VAR_DIR, TEMPLATE_DATABASE_DIR, "*"))
test_databases = set() # type: Set[str]
try:
with connection.cursor() as cursor:
cursor.execute("SELECT datname FROM pg_database;")
rows = cursor.fetchall()
for row in rows:
if 'zulip_test_template_' in row[0]:
test_databases.add(row[0])
except ProgrammingError:
pass
databases_in_use = set() # type: Set[str]
for file in files:
if round(time.time()) - os.path.getmtime(file) < expiry_time:
with open(file, "r") as f:
for line in f:
databases_in_use.add('zulip_test_template_{}'.format(line).rstrip())
else:
# Any test-backend run older than expiry_time can be
# cleaned up, both the database and the file listing its
# databases.
os.remove(file)
databases_to_drop = test_databases - databases_in_use
if not databases_to_drop:
return 0
commands = "\n".join("DROP DATABASE IF EXISTS %s;" % (db,) for db in databases_to_drop)
p = subprocess.Popen(["psql", "-q", "-v", "ON_ERROR_STOP=1", "-h", "localhost",
"postgres", "zulip_test"],
stdin=subprocess.PIPE)
p.communicate(input=commands.encode())
if p.returncode != 0:
raise RuntimeError("Error cleaning up test databases!")
return len(databases_to_drop)