mirror of https://github.com/zulip/zulip.git
test-backend: Add steps to deal with potential database leaks.
A function was written in `test_fixtures.py` to drop a test database template if the corresponding database id doesn't belong to a file. Alongside this fact, every file that is written is removed after 60 minutes. Meaning any potential database template can never exist longer than one hour. This follow-up work was added to deal with the potential race conditions when running `test-backend`. Ensuring that all templates are properly dealt with. Essentially rewritten by tabbott for cleanliness. Fixes the remainder of #12426.
This commit is contained in:
parent
0b05d91e62
commit
6b033c7909
|
@ -526,7 +526,8 @@ def main(options):
|
|||
import django
|
||||
django.setup()
|
||||
|
||||
from zerver.lib.test_fixtures import template_database_status, run_db_migrations
|
||||
from zerver.lib.test_fixtures import template_database_status, run_db_migrations, \
|
||||
destroy_leaked_test_databases
|
||||
|
||||
try:
|
||||
from zerver.lib.queue import SimpleQueueClient
|
||||
|
@ -574,6 +575,10 @@ def main(options):
|
|||
else:
|
||||
print("No need to run `manage.py compilemessages`.")
|
||||
|
||||
destroyed = destroy_leaked_test_databases()
|
||||
if destroyed:
|
||||
print("Dropped %s stale test databases!" % (destroyed,))
|
||||
|
||||
run(["scripts/lib/clean-unused-caches"])
|
||||
|
||||
# Keeping this cache file around can cause eslint to throw
|
||||
|
|
|
@ -461,6 +461,11 @@ def main() -> None:
|
|||
# an important clue as to why tests fail.
|
||||
report_slow_tests()
|
||||
|
||||
# Ideally, we'd check for any leaked test databases here;
|
||||
# but that needs some hackery with database names.
|
||||
#
|
||||
# destroy_leaked_test_databases()
|
||||
|
||||
# We'll have printed whether tests passed or failed above
|
||||
sys.exit(bool(failures))
|
||||
|
||||
|
|
|
@ -21,4 +21,4 @@ LATEST_RELEASE_ANNOUNCEMENT = "https://blog.zulip.org/2019/03/01/zulip-2-0-relea
|
|||
# Typically, adding a dependency only requires a minor version bump, and
|
||||
# removing a dependency requires a major version bump.
|
||||
|
||||
PROVISION_VERSION = '34.2'
|
||||
PROVISION_VERSION = '34.3'
|
||||
|
|
|
@ -5,12 +5,14 @@ import re
|
|||
import hashlib
|
||||
import subprocess
|
||||
import sys
|
||||
from typing import Any, List, Optional
|
||||
from typing import Any, List, Optional, Set
|
||||
from importlib import import_module
|
||||
from io import StringIO
|
||||
import glob
|
||||
import time
|
||||
|
||||
from django.db import connections, DEFAULT_DB_ALIAS
|
||||
from django.db import connections, DEFAULT_DB_ALIAS, ProgrammingError, \
|
||||
connection
|
||||
from django.db.utils import OperationalError
|
||||
from django.apps import apps
|
||||
from django.conf import settings
|
||||
|
@ -18,7 +20,8 @@ from django.core.management import call_command
|
|||
from django.utils.module_loading import module_has_submodule
|
||||
|
||||
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
|
||||
from scripts.lib.zulip_tools import get_dev_uuid_var_path, run, file_or_package_hash_updated
|
||||
from scripts.lib.zulip_tools import get_dev_uuid_var_path, run, \
|
||||
file_or_package_hash_updated, TEMPLATE_DATABASE_DIR
|
||||
|
||||
UUID_VAR_DIR = get_dev_uuid_var_path()
|
||||
FILENAME_SPLITTER = re.compile(r'[\W\-_]')
|
||||
|
@ -240,3 +243,56 @@ def template_database_status(
|
|||
return 'current'
|
||||
|
||||
return 'needs_rebuild'
|
||||
|
||||
def destroy_leaked_test_databases(expiry_time: int = 60 * 60) -> int:
|
||||
"""The logic in zerver/lib/test_runner.py tries to delete all the
|
||||
temporary test databases generated by test-backend threads, but it
|
||||
cannot guarantee it handles all race conditions correctly. This
|
||||
is a catch-all function designed to delete any that might have
|
||||
been leaked due to crashes (etc.). The high-level algorithm is to:
|
||||
|
||||
* Delete every database with a name like zulip_test_template_*
|
||||
* Unless it is registered in a file under TEMPLATE_DATABASE_DIR as
|
||||
part of a currently running test-backend invocation
|
||||
* And that file is less expiry_time old.
|
||||
|
||||
This should ensure we ~never break a running test-backend process,
|
||||
while also ensuring we will eventually delete all leaked databases.
|
||||
"""
|
||||
files = glob.glob(os.path.join(UUID_VAR_DIR, TEMPLATE_DATABASE_DIR, "*"))
|
||||
test_databases = set() # type: Set[str]
|
||||
try:
|
||||
with connection.cursor() as cursor:
|
||||
cursor.execute("SELECT datname FROM pg_database;")
|
||||
rows = cursor.fetchall()
|
||||
for row in rows:
|
||||
if 'zulip_test_template_' in row[0]:
|
||||
test_databases.add(row[0])
|
||||
except ProgrammingError:
|
||||
pass
|
||||
|
||||
databases_in_use = set() # type: Set[str]
|
||||
for file in files:
|
||||
if round(time.time()) - os.path.getmtime(file) < expiry_time:
|
||||
with open(file, "r") as f:
|
||||
for line in f:
|
||||
databases_in_use.add('zulip_test_template_{}'.format(line).rstrip())
|
||||
else:
|
||||
# Any test-backend run older than expiry_time can be
|
||||
# cleaned up, both the database and the file listing its
|
||||
# databases.
|
||||
os.remove(file)
|
||||
|
||||
databases_to_drop = test_databases - databases_in_use
|
||||
|
||||
if not databases_to_drop:
|
||||
return 0
|
||||
|
||||
commands = "\n".join("DROP DATABASE IF EXISTS %s;" % (db,) for db in databases_to_drop)
|
||||
p = subprocess.Popen(["psql", "-q", "-v", "ON_ERROR_STOP=1", "-h", "localhost",
|
||||
"postgres", "zulip_test"],
|
||||
stdin=subprocess.PIPE)
|
||||
p.communicate(input=commands.encode())
|
||||
if p.returncode != 0:
|
||||
raise RuntimeError("Error cleaning up test databases!")
|
||||
return len(databases_to_drop)
|
||||
|
|
Loading…
Reference in New Issue