From 6b033c7909c21fb3e0c4581a54a554ffa658858f Mon Sep 17 00:00:00 2001 From: Wyatt Hoodes Date: Fri, 7 Jun 2019 15:57:19 -1000 Subject: [PATCH] test-backend: Add steps to deal with potential database leaks. A function was written in `test_fixtures.py` to drop a test database template if the corresponding database id doesn't belong to a file. Alongside this fact, every file that is written is removed after 60 minutes. Meaning any potential database template can never exist longer than one hour. This follow-up work was added to deal with the potential race conditions when running `test-backend`. Ensuring that all templates are properly dealt with. Essentially rewritten by tabbott for cleanliness. Fixes the remainder of #12426. --- tools/lib/provision.py | 7 ++++- tools/test-backend | 5 +++ version.py | 2 +- zerver/lib/test_fixtures.py | 62 +++++++++++++++++++++++++++++++++++-- 4 files changed, 71 insertions(+), 5 deletions(-) diff --git a/tools/lib/provision.py b/tools/lib/provision.py index 237ef0f954..d9cff23741 100755 --- a/tools/lib/provision.py +++ b/tools/lib/provision.py @@ -526,7 +526,8 @@ def main(options): import django django.setup() - from zerver.lib.test_fixtures import template_database_status, run_db_migrations + from zerver.lib.test_fixtures import template_database_status, run_db_migrations, \ + destroy_leaked_test_databases try: from zerver.lib.queue import SimpleQueueClient @@ -574,6 +575,10 @@ def main(options): else: print("No need to run `manage.py compilemessages`.") + destroyed = destroy_leaked_test_databases() + if destroyed: + print("Dropped %s stale test databases!" % (destroyed,)) + run(["scripts/lib/clean-unused-caches"]) # Keeping this cache file around can cause eslint to throw diff --git a/tools/test-backend b/tools/test-backend index 30fe74ed81..301aea8898 100755 --- a/tools/test-backend +++ b/tools/test-backend @@ -461,6 +461,11 @@ def main() -> None: # an important clue as to why tests fail. report_slow_tests() + # Ideally, we'd check for any leaked test databases here; + # but that needs some hackery with database names. + # + # destroy_leaked_test_databases() + # We'll have printed whether tests passed or failed above sys.exit(bool(failures)) diff --git a/version.py b/version.py index 51dd59fc7f..ac1331adb4 100644 --- a/version.py +++ b/version.py @@ -21,4 +21,4 @@ LATEST_RELEASE_ANNOUNCEMENT = "https://blog.zulip.org/2019/03/01/zulip-2-0-relea # Typically, adding a dependency only requires a minor version bump, and # removing a dependency requires a major version bump. -PROVISION_VERSION = '34.2' +PROVISION_VERSION = '34.3' diff --git a/zerver/lib/test_fixtures.py b/zerver/lib/test_fixtures.py index 400d7a2fd6..95c3e6f72f 100644 --- a/zerver/lib/test_fixtures.py +++ b/zerver/lib/test_fixtures.py @@ -5,12 +5,14 @@ import re import hashlib import subprocess import sys -from typing import Any, List, Optional +from typing import Any, List, Optional, Set from importlib import import_module from io import StringIO import glob +import time -from django.db import connections, DEFAULT_DB_ALIAS +from django.db import connections, DEFAULT_DB_ALIAS, ProgrammingError, \ + connection from django.db.utils import OperationalError from django.apps import apps from django.conf import settings @@ -18,7 +20,8 @@ from django.core.management import call_command from django.utils.module_loading import module_has_submodule sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(__file__)))) -from scripts.lib.zulip_tools import get_dev_uuid_var_path, run, file_or_package_hash_updated +from scripts.lib.zulip_tools import get_dev_uuid_var_path, run, \ + file_or_package_hash_updated, TEMPLATE_DATABASE_DIR UUID_VAR_DIR = get_dev_uuid_var_path() FILENAME_SPLITTER = re.compile(r'[\W\-_]') @@ -240,3 +243,56 @@ def template_database_status( return 'current' return 'needs_rebuild' + +def destroy_leaked_test_databases(expiry_time: int = 60 * 60) -> int: + """The logic in zerver/lib/test_runner.py tries to delete all the + temporary test databases generated by test-backend threads, but it + cannot guarantee it handles all race conditions correctly. This + is a catch-all function designed to delete any that might have + been leaked due to crashes (etc.). The high-level algorithm is to: + + * Delete every database with a name like zulip_test_template_* + * Unless it is registered in a file under TEMPLATE_DATABASE_DIR as + part of a currently running test-backend invocation + * And that file is less expiry_time old. + + This should ensure we ~never break a running test-backend process, + while also ensuring we will eventually delete all leaked databases. + """ + files = glob.glob(os.path.join(UUID_VAR_DIR, TEMPLATE_DATABASE_DIR, "*")) + test_databases = set() # type: Set[str] + try: + with connection.cursor() as cursor: + cursor.execute("SELECT datname FROM pg_database;") + rows = cursor.fetchall() + for row in rows: + if 'zulip_test_template_' in row[0]: + test_databases.add(row[0]) + except ProgrammingError: + pass + + databases_in_use = set() # type: Set[str] + for file in files: + if round(time.time()) - os.path.getmtime(file) < expiry_time: + with open(file, "r") as f: + for line in f: + databases_in_use.add('zulip_test_template_{}'.format(line).rstrip()) + else: + # Any test-backend run older than expiry_time can be + # cleaned up, both the database and the file listing its + # databases. + os.remove(file) + + databases_to_drop = test_databases - databases_in_use + + if not databases_to_drop: + return 0 + + commands = "\n".join("DROP DATABASE IF EXISTS %s;" % (db,) for db in databases_to_drop) + p = subprocess.Popen(["psql", "-q", "-v", "ON_ERROR_STOP=1", "-h", "localhost", + "postgres", "zulip_test"], + stdin=subprocess.PIPE) + p.communicate(input=commands.encode()) + if p.returncode != 0: + raise RuntimeError("Error cleaning up test databases!") + return len(databases_to_drop)