zulip/zerver/lib/test_fixtures.py

334 lines
13 KiB
Python
Raw Normal View History

import json
import os
import re
import hashlib
import subprocess
import sys
from typing import Any, List, Set
from importlib import import_module
from io import StringIO
import glob
import time
import shutil
from django.db import connections, DEFAULT_DB_ALIAS, ProgrammingError, \
connection
from django.db.utils import OperationalError
from django.apps import apps
from django.conf import settings
from django.core.management import call_command
from django.utils.module_loading import module_has_submodule
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
from scripts.lib.zulip_tools import get_dev_uuid_var_path, run, \
file_or_package_hash_updated, TEMPLATE_DATABASE_DIR
class DatabaseType:
def __init__(self, database_name: str, settings: str, migration_status: str):
self.database_name = database_name
self.settings = settings
self.migration_status = migration_status
UUID_VAR_DIR = get_dev_uuid_var_path()
FILENAME_SPLITTER = re.compile(r'[\W\-_]')
DEV_DATABASE_TYPE = DatabaseType(database_name='zulip',
settings='zproject.settings',
migration_status=os.path.join(UUID_VAR_DIR, "migration_status_dev"))
TEST_DATABASE_TYPE = DatabaseType(database_name='zulip_test_template',
settings='zproject.test_settings',
migration_status=os.path.join(UUID_VAR_DIR, 'migration_status_test'))
def run_db_migrations(platform: str) -> None:
if platform == 'dev':
migration_status_file = 'migration_status_dev'
settings = 'zproject.settings'
test_fixtures: Fix bug with run_db_migrations for test platform. In this commit we are fixing a kinda serious un-noticed bug with the way run_db_migrations worked for test db. Basically run_db_migrations runs new migrations on db (dev or test). When we talk about the dev platform this process is straight forward. We have a single DB zulip which was once created and now has some data. Introduction of new migration causes a schema change or does something else but bottom line being we just migrate the zulip DB and stuff works fine. Now coming to zulip test db (zulip_test) situation is a bit complex in comparision to dev db. Basically this is because we make use of what we call zulip_test_template to make test fixture restoration after tests run fast. Now before we introduced the performance optimisation of just doing migrations when possible, introduction of a migration would ideally result in provisioning do a full rebuild of the test database. When that used to happen sequence of events used to be something like this: * Create a zulip_test db from zulip_test_base template (An absolute basic schema holding) * Migrate and populate the zulip_test db. * Create/Re-create zulip_test_template from the latest zulip_test. Now after we introduced just do migrations instead of full db rebuild when possible, what used to happen was that zulip_test db got successfully migrated but when test suites would run they would try to create zulip_test from zulip_test_template (so that individual tests don't affect each other on db level). This is where the problem resides; zulip_test_template wasn't migrated and we just scrapped zulip_test and re-created it using zulip_test_template as a template and hence zulip_test will not hold the latest schema. This is what we fix in this commit.
2018-07-09 08:56:55 +02:00
db_name = 'ZULIP_DB_NAME=zulip'
elif platform == 'test':
migration_status_file = 'migration_status_test'
settings = 'zproject.test_settings'
test_fixtures: Fix bug with run_db_migrations for test platform. In this commit we are fixing a kinda serious un-noticed bug with the way run_db_migrations worked for test db. Basically run_db_migrations runs new migrations on db (dev or test). When we talk about the dev platform this process is straight forward. We have a single DB zulip which was once created and now has some data. Introduction of new migration causes a schema change or does something else but bottom line being we just migrate the zulip DB and stuff works fine. Now coming to zulip test db (zulip_test) situation is a bit complex in comparision to dev db. Basically this is because we make use of what we call zulip_test_template to make test fixture restoration after tests run fast. Now before we introduced the performance optimisation of just doing migrations when possible, introduction of a migration would ideally result in provisioning do a full rebuild of the test database. When that used to happen sequence of events used to be something like this: * Create a zulip_test db from zulip_test_base template (An absolute basic schema holding) * Migrate and populate the zulip_test db. * Create/Re-create zulip_test_template from the latest zulip_test. Now after we introduced just do migrations instead of full db rebuild when possible, what used to happen was that zulip_test db got successfully migrated but when test suites would run they would try to create zulip_test from zulip_test_template (so that individual tests don't affect each other on db level). This is where the problem resides; zulip_test_template wasn't migrated and we just scrapped zulip_test and re-created it using zulip_test_template as a template and hence zulip_test will not hold the latest schema. This is what we fix in this commit.
2018-07-09 08:56:55 +02:00
db_name = 'ZULIP_DB_NAME=zulip_test_template'
# We shell out to `manage.py` and pass `DJANGO_SETTINGS_MODULE` on
# the command line rather than just calling the migration
# functions, because Django doesn't support changing settings like
# what the database is as runtime.
test_fixtures: Fix bug with run_db_migrations for test platform. In this commit we are fixing a kinda serious un-noticed bug with the way run_db_migrations worked for test db. Basically run_db_migrations runs new migrations on db (dev or test). When we talk about the dev platform this process is straight forward. We have a single DB zulip which was once created and now has some data. Introduction of new migration causes a schema change or does something else but bottom line being we just migrate the zulip DB and stuff works fine. Now coming to zulip test db (zulip_test) situation is a bit complex in comparision to dev db. Basically this is because we make use of what we call zulip_test_template to make test fixture restoration after tests run fast. Now before we introduced the performance optimisation of just doing migrations when possible, introduction of a migration would ideally result in provisioning do a full rebuild of the test database. When that used to happen sequence of events used to be something like this: * Create a zulip_test db from zulip_test_base template (An absolute basic schema holding) * Migrate and populate the zulip_test db. * Create/Re-create zulip_test_template from the latest zulip_test. Now after we introduced just do migrations instead of full db rebuild when possible, what used to happen was that zulip_test db got successfully migrated but when test suites would run they would try to create zulip_test from zulip_test_template (so that individual tests don't affect each other on db level). This is where the problem resides; zulip_test_template wasn't migrated and we just scrapped zulip_test and re-created it using zulip_test_template as a template and hence zulip_test will not hold the latest schema. This is what we fix in this commit.
2018-07-09 08:56:55 +02:00
# Also we export DB_NAME which is ignored by dev platform but
# recognised by test platform and used to migrate correct db.
run(['env', ('DJANGO_SETTINGS_MODULE=%s' % (settings,)), db_name,
test_fixtures: Fix bug with run_db_migrations for test platform. In this commit we are fixing a kinda serious un-noticed bug with the way run_db_migrations worked for test db. Basically run_db_migrations runs new migrations on db (dev or test). When we talk about the dev platform this process is straight forward. We have a single DB zulip which was once created and now has some data. Introduction of new migration causes a schema change or does something else but bottom line being we just migrate the zulip DB and stuff works fine. Now coming to zulip test db (zulip_test) situation is a bit complex in comparision to dev db. Basically this is because we make use of what we call zulip_test_template to make test fixture restoration after tests run fast. Now before we introduced the performance optimisation of just doing migrations when possible, introduction of a migration would ideally result in provisioning do a full rebuild of the test database. When that used to happen sequence of events used to be something like this: * Create a zulip_test db from zulip_test_base template (An absolute basic schema holding) * Migrate and populate the zulip_test db. * Create/Re-create zulip_test_template from the latest zulip_test. Now after we introduced just do migrations instead of full db rebuild when possible, what used to happen was that zulip_test db got successfully migrated but when test suites would run they would try to create zulip_test from zulip_test_template (so that individual tests don't affect each other on db level). This is where the problem resides; zulip_test_template wasn't migrated and we just scrapped zulip_test and re-created it using zulip_test_template as a template and hence zulip_test will not hold the latest schema. This is what we fix in this commit.
2018-07-09 08:56:55 +02:00
'./manage.py', 'migrate', '--no-input'])
run(['env', ('DJANGO_SETTINGS_MODULE=%s' % (settings,)), db_name,
test_fixtures: Fix bug with run_db_migrations for test platform. In this commit we are fixing a kinda serious un-noticed bug with the way run_db_migrations worked for test db. Basically run_db_migrations runs new migrations on db (dev or test). When we talk about the dev platform this process is straight forward. We have a single DB zulip which was once created and now has some data. Introduction of new migration causes a schema change or does something else but bottom line being we just migrate the zulip DB and stuff works fine. Now coming to zulip test db (zulip_test) situation is a bit complex in comparision to dev db. Basically this is because we make use of what we call zulip_test_template to make test fixture restoration after tests run fast. Now before we introduced the performance optimisation of just doing migrations when possible, introduction of a migration would ideally result in provisioning do a full rebuild of the test database. When that used to happen sequence of events used to be something like this: * Create a zulip_test db from zulip_test_base template (An absolute basic schema holding) * Migrate and populate the zulip_test db. * Create/Re-create zulip_test_template from the latest zulip_test. Now after we introduced just do migrations instead of full db rebuild when possible, what used to happen was that zulip_test db got successfully migrated but when test suites would run they would try to create zulip_test from zulip_test_template (so that individual tests don't affect each other on db level). This is where the problem resides; zulip_test_template wasn't migrated and we just scrapped zulip_test and re-created it using zulip_test_template as a template and hence zulip_test will not hold the latest schema. This is what we fix in this commit.
2018-07-09 08:56:55 +02:00
'./manage.py', 'get_migration_status',
'--output=%s' % (migration_status_file,)])
def update_test_databases_if_required(use_force: bool=False,
rebuild_test_database: bool=False) -> None:
"""Checks whether the zulip_test_template database template, is
consistent with our database migrations; if not, it updates it
in the fastest way possible:
* If all we need to do is add some migrations, just runs those
migrations on the template database.
* Otherwise, we rebuild the test template database from scratch.
The default behavior is sufficient for the `test-backend` use
case, where the test runner code will clone directly from the
template database.
The `rebuild_test_database` option (used by our Casper tests) asks
us to drop and re-cloning the zulip_test database from the
template so those test suites can run with a fresh copy.
If use_force is specified, it will always do a full rebuild.
"""
generate_fixtures_command = ['tools/setup/generate-fixtures']
test_template_db_status = template_database_status('test')
if use_force or test_template_db_status == 'needs_rebuild':
generate_fixtures_command.append('--force')
elif test_template_db_status == 'run_migrations':
run_db_migrations('test')
elif not rebuild_test_database:
return
subprocess.check_call(generate_fixtures_command)
def database_exists(database_name: str) -> bool:
try:
connection = connections[DEFAULT_DB_ALIAS]
with connection.cursor() as cursor:
cursor.execute("SELECT 1 from pg_database WHERE datname='{}';".format(database_name))
return_value = bool(cursor.fetchone())
connections.close_all()
return return_value
except OperationalError:
return False
def get_migration_status(**options: Any) -> str:
verbosity = options.get('verbosity', 1)
for app_config in apps.get_app_configs():
if module_has_submodule(app_config.module, "management"):
import_module('.management', app_config.name)
app_label = options['app_label'] if options.get('app_label') else None
db = options.get('database', DEFAULT_DB_ALIAS)
out = StringIO()
command_args = ['--list', ]
if app_label:
command_args.append(app_label)
call_command(
'showmigrations',
*command_args,
database=db,
no_color=options.get('no_color', False),
settings=options.get('settings', os.environ['DJANGO_SETTINGS_MODULE']),
stdout=out,
traceback=options.get('traceback', True),
verbosity=verbosity,
)
connections.close_all()
out.seek(0)
output = out.read()
return re.sub(r'\x1b\[(1|0)m', '', output)
def extract_migrations_as_list(migration_status: str) -> List[str]:
MIGRATIONS_RE = re.compile(r'\[[X| ]\] (\d+_.+)\n')
return MIGRATIONS_RE.findall(migration_status)
def what_to_do_with_migrations(migration_file: str, **options: Any) -> str:
if not os.path.exists(migration_file):
return 'scrap'
with open(migration_file) as f:
previous_migration_status = f.read()
current_migration_status = get_migration_status(**options)
all_curr_migrations = extract_migrations_as_list(current_migration_status)
all_prev_migrations = extract_migrations_as_list(previous_migration_status)
if len(all_curr_migrations) < len(all_prev_migrations):
return 'scrap'
for migration in all_prev_migrations:
if migration not in all_curr_migrations:
return 'scrap'
if len(all_curr_migrations) == len(all_prev_migrations):
return 'migrations_are_latest'
return 'migrate'
def _get_hash_file_path(source_file_path: str, status_dir: str) -> str:
basename = os.path.basename(source_file_path)
filename = '_'.join(FILENAME_SPLITTER.split(basename)).lower()
return os.path.join(status_dir, filename)
def _check_hash(source_hash_file: str, target_content: str) -> bool:
"""
This function has a side effect of creating a new hash file or
updating the old hash file.
"""
target_hash_content = hashlib.sha1(target_content.encode('utf8')).hexdigest()
if not os.path.exists(source_hash_file):
source_hash_content = None
else:
with open(source_hash_file) as f:
source_hash_content = f.read().strip()
with open(source_hash_file, 'w') as f:
f.write(target_hash_content)
return source_hash_content == target_hash_content
def check_file_hash(target_file_path: str, status_dir: str) -> bool:
source_hash_file = _get_hash_file_path(target_file_path, status_dir)
with open(target_file_path) as f:
target_content = f.read()
return _check_hash(source_hash_file, target_content)
def check_setting_hash(setting_name: str, status_dir: str) -> bool:
hash_filename = '_'.join(['settings', setting_name])
source_hash_file = os.path.join(status_dir, hash_filename)
target_content = json.dumps(getattr(settings, setting_name), sort_keys=True)
return _check_hash(source_hash_file, target_content)
def template_database_status(database_type: str) -> str:
# This function returns a status string specifying the type of
# state the template db is in and thus the kind of action required.
if database_type == 'dev':
database = DEV_DATABASE_TYPE
elif database_type == 'test':
database = TEST_DATABASE_TYPE
check_files = [
'zilencer/management/commands/populate_db.py',
'zerver/lib/bulk_create.py',
'zerver/lib/generate_test_data.py',
'zerver/lib/server_initialization.py',
'tools/setup/postgres-init-test-db',
'tools/setup/postgres-init-dev-db',
'zerver/migrations/0258_enable_online_push_notifications_default.py',
]
check_settings = [
'REALM_INTERNAL_BOTS',
]
# Construct a directory to store hashes named after the target database.
status_dir = os.path.join(UUID_VAR_DIR, database.database_name + '_db_status')
if not os.path.exists(status_dir):
os.mkdir(status_dir)
if not database_exists(database.database_name):
# TODO: It's possible that `database_exists` will
# return `False` even though the database
# exists, but we just have the wrong password,
# probably due to changing the secrets file.
#
# The only problem this causes is that we waste
# some time rebuilding the whole database, but
# it's better to err on that side, generally.
return 'needs_rebuild'
# To ensure Python evaluates all the hash tests (and thus creates the
# hash files about the current state), we evaluate them in a
# list and then process the result
files_hash_status = all([check_file_hash(fn, status_dir) for fn in check_files])
settings_hash_status = all([check_setting_hash(setting_name, status_dir)
for setting_name in check_settings])
hash_status = files_hash_status and settings_hash_status
if not hash_status:
return 'needs_rebuild'
# Here we hash and compare our migration files before doing
# the work of seeing what to do with them; if there are no
# changes, we can safely assume we don't need to run
# migrations without spending a few 100ms parsing all the
# Python migration code.
paths = [
*glob.glob('*/migrations/*.py'),
'requirements/dev.txt',
]
check_migrations = file_or_package_hash_updated(paths, "migrations_hash_" + database.database_name)
if not check_migrations:
return 'current'
migration_op = what_to_do_with_migrations(database.migration_status, settings=database.settings)
if migration_op == 'scrap':
return 'needs_rebuild'
if migration_op == 'migrate':
return 'run_migrations'
return 'current'
def destroy_leaked_test_databases(expiry_time: int = 60 * 60) -> int:
"""The logic in zerver/lib/test_runner.py tries to delete all the
temporary test databases generated by test-backend threads, but it
cannot guarantee it handles all race conditions correctly. This
is a catch-all function designed to delete any that might have
been leaked due to crashes (etc.). The high-level algorithm is to:
* Delete every database with a name like zulip_test_template_*
* Unless it is registered in a file under TEMPLATE_DATABASE_DIR as
part of a currently running test-backend invocation
* And that file is less expiry_time old.
This should ensure we ~never break a running test-backend process,
while also ensuring we will eventually delete all leaked databases.
"""
files = glob.glob(os.path.join(UUID_VAR_DIR, TEMPLATE_DATABASE_DIR, "*"))
test_databases = set() # type: Set[str]
try:
with connection.cursor() as cursor:
cursor.execute("SELECT datname FROM pg_database;")
rows = cursor.fetchall()
for row in rows:
if 'zulip_test_template_' in row[0]:
test_databases.add(row[0])
except ProgrammingError:
pass
databases_in_use = set() # type: Set[str]
for file in files:
if round(time.time()) - os.path.getmtime(file) < expiry_time:
with open(file) as f:
for line in f:
databases_in_use.add('zulip_test_template_{}'.format(line).rstrip())
else:
# Any test-backend run older than expiry_time can be
# cleaned up, both the database and the file listing its
# databases.
os.remove(file)
databases_to_drop = test_databases - databases_in_use
if not databases_to_drop:
return 0
commands = "\n".join("DROP DATABASE IF EXISTS %s;" % (db,) for db in databases_to_drop)
p = subprocess.Popen(["psql", "-q", "-v", "ON_ERROR_STOP=1", "-h", "localhost",
"postgres", "zulip_test"],
stdin=subprocess.PIPE)
p.communicate(input=commands.encode())
if p.returncode != 0:
raise RuntimeError("Error cleaning up test databases!")
return len(databases_to_drop)
def remove_test_run_directories(expiry_time: int = 60 * 60) -> int:
removed = 0
directories = glob.glob(os.path.join(UUID_VAR_DIR, "test-backend", "run_*"))
for test_run in directories:
if round(time.time()) - os.path.getmtime(test_run) > expiry_time:
try:
shutil.rmtree(test_run)
removed += 1
except FileNotFoundError:
pass
return removed