zulip/zerver/lib/test_fixtures.py

import json
import os
import re
import hashlib
import subprocess
import sys
from typing import Any, List, Set
from importlib import import_module
from io import StringIO
import glob
import time
import shutil

from django.db import connections, DEFAULT_DB_ALIAS, ProgrammingError, \
    connection
from django.db.utils import OperationalError
from django.apps import apps
from django.conf import settings
from django.core.management import call_command
from django.utils.module_loading import module_has_submodule

sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
from scripts.lib.zulip_tools import get_dev_uuid_var_path, run, \
    file_or_package_hash_updated, TEMPLATE_DATABASE_DIR

class DatabaseType:
    def __init__(self, database_name: str, settings: str, migration_status: str):
        self.database_name = database_name
        self.settings = settings
        self.migration_status = migration_status

UUID_VAR_DIR = get_dev_uuid_var_path()
FILENAME_SPLITTER = re.compile(r'[\W\-_]')

DEV_DATABASE_TYPE = DatabaseType(database_name='zulip',
                                 settings='zproject.settings',
                                 migration_status=os.path.join(UUID_VAR_DIR, "migration_status_dev"))

TEST_DATABASE_TYPE = DatabaseType(database_name='zulip_test_template',
                                  settings='zproject.test_settings',
                                  migration_status=os.path.join(UUID_VAR_DIR, 'migration_status_test'))

def run_db_migrations(platform: str) -> None:
    if platform == 'dev':
        migration_status_file = 'migration_status_dev'
        settings = 'zproject.settings'
        db_name = 'ZULIP_DB_NAME=zulip'
    elif platform == 'test':
        migration_status_file = 'migration_status_test'
        settings = 'zproject.test_settings'
        db_name = 'ZULIP_DB_NAME=zulip_test_template'

    # We shell out to `manage.py` and pass `DJANGO_SETTINGS_MODULE` on
    # the command line rather than just calling the migration
    # functions, because Django doesn't support changing settings like
    # what the database is as runtime.
    # Also we export DB_NAME which is ignored by dev platform but
    # recognised by test platform and used to migrate correct db.
    run(['env', ('DJANGO_SETTINGS_MODULE=%s' % (settings,)), db_name,
         './manage.py', 'migrate', '--no-input'])
    run(['env', ('DJANGO_SETTINGS_MODULE=%s' % (settings,)), db_name,
         './manage.py', 'get_migration_status',
         '--output=%s' % (migration_status_file,)])

def update_test_databases_if_required(use_force: bool=False,
                                      rebuild_test_database: bool=False) -> None:
    """Checks whether the zulip_test_template database template, is
    consistent with our database migrations; if not, it updates it
    in the fastest way possible:

    * If all we need to do is add some migrations, just runs those
      migrations on the template database.
    * Otherwise, we rebuild the test template database from scratch.

    The default behavior is sufficient for the `test-backend` use
    case, where the test runner code will clone directly from the
    template database.

    The `rebuild_test_database` option (used by our Casper tests) asks
    us to drop and re-cloning the zulip_test database from the
    template so those test suites can run with a fresh copy.

    If use_force is specified, it will always do a full rebuild.
    """
    generate_fixtures_command = ['tools/setup/generate-fixtures']
    test_template_db_status = template_database_status('test')
    if use_force or test_template_db_status == 'needs_rebuild':
        generate_fixtures_command.append('--force')
    elif test_template_db_status == 'run_migrations':
        run_db_migrations('test')
    elif not rebuild_test_database:
        return
    subprocess.check_call(generate_fixtures_command)

def database_exists(database_name: str) -> bool:
    try:
        connection = connections[DEFAULT_DB_ALIAS]

        with connection.cursor() as cursor:
            cursor.execute("SELECT 1 from pg_database WHERE datname='{}';".format(database_name))
            return_value = bool(cursor.fetchone())
        connections.close_all()
        return return_value
    except OperationalError:
        return False

def get_migration_status(**options: Any) -> str:
    verbosity = options.get('verbosity', 1)

    for app_config in apps.get_app_configs():
        if module_has_submodule(app_config.module, "management"):
            import_module('.management', app_config.name)

    app_label = options['app_label'] if options.get('app_label') else None
    db = options.get('database', DEFAULT_DB_ALIAS)
    out = StringIO()
    command_args = ['--list', ]
    if app_label:
        command_args.append(app_label)

    call_command(
        'showmigrations',
        *command_args,
        database=db,
        no_color=options.get('no_color', False),
        settings=options.get('settings', os.environ['DJANGO_SETTINGS_MODULE']),
        stdout=out,
        traceback=options.get('traceback', True),
        verbosity=verbosity,
    )
    connections.close_all()
    out.seek(0)
    output = out.read()
    return re.sub(r'\x1b\[(1|0)m', '', output)

def extract_migrations_as_list(migration_status: str) -> List[str]:
    MIGRATIONS_RE = re.compile(r'\[[X| ]\] (\d+_.+)\n')
    return MIGRATIONS_RE.findall(migration_status)

def what_to_do_with_migrations(migration_file: str, **options: Any) -> str:
    if not os.path.exists(migration_file):
        return 'scrap'

    with open(migration_file) as f:
        previous_migration_status = f.read()
    current_migration_status = get_migration_status(**options)
    all_curr_migrations = extract_migrations_as_list(current_migration_status)
    all_prev_migrations = extract_migrations_as_list(previous_migration_status)

    if len(all_curr_migrations) < len(all_prev_migrations):
        return 'scrap'

    for migration in all_prev_migrations:
        if migration not in all_curr_migrations:
            return 'scrap'

    if len(all_curr_migrations) == len(all_prev_migrations):
        return 'migrations_are_latest'

    return 'migrate'

def _get_hash_file_path(source_file_path: str, status_dir: str) -> str:
    basename = os.path.basename(source_file_path)
    filename = '_'.join(FILENAME_SPLITTER.split(basename)).lower()
    return os.path.join(status_dir, filename)

def _check_hash(source_hash_file: str, target_content: str) -> bool:
    """
    This function has a side effect of creating a new hash file or
    updating the old hash file.
    """
    target_hash_content = hashlib.sha1(target_content.encode('utf8')).hexdigest()

    if not os.path.exists(source_hash_file):
        source_hash_content = None
    else:
        with open(source_hash_file) as f:
            source_hash_content = f.read().strip()

    with open(source_hash_file, 'w') as f:
        f.write(target_hash_content)

    return source_hash_content == target_hash_content

def check_file_hash(target_file_path: str, status_dir: str) -> bool:
    source_hash_file = _get_hash_file_path(target_file_path, status_dir)

    with open(target_file_path) as f:
        target_content = f.read()

    return _check_hash(source_hash_file, target_content)

def check_setting_hash(setting_name: str, status_dir: str) -> bool:
    hash_filename = '_'.join(['settings', setting_name])
    source_hash_file = os.path.join(status_dir, hash_filename)

    target_content = json.dumps(getattr(settings, setting_name), sort_keys=True)

    return _check_hash(source_hash_file, target_content)

def template_database_status(database_type: str) -> str:
    # This function returns a status string specifying the type of
    # state the template db is in and thus the kind of action required.
    if database_type == 'dev':
        database = DEV_DATABASE_TYPE
    elif database_type == 'test':
        database = TEST_DATABASE_TYPE

    check_files = [
        'zilencer/management/commands/populate_db.py',
        'zerver/lib/bulk_create.py',
        'zerver/lib/generate_test_data.py',
        'zerver/lib/server_initialization.py',
        'tools/setup/postgres-init-test-db',
        'tools/setup/postgres-init-dev-db',
        'zerver/migrations/0258_enable_online_push_notifications_default.py',
    ]
    check_settings = [
        'REALM_INTERNAL_BOTS',
    ]

    # Construct a directory to store hashes named after the target database.
    status_dir = os.path.join(UUID_VAR_DIR, database.database_name + '_db_status')
    if not os.path.exists(status_dir):
        os.mkdir(status_dir)

    if not database_exists(database.database_name):
        # TODO: It's possible that `database_exists` will
        #       return `False` even though the database
        #       exists, but we just have the wrong password,
        #       probably due to changing the secrets file.
        #
        #       The only problem this causes is that we waste
        #       some time rebuilding the whole database, but
        #       it's better to err on that side, generally.
        return 'needs_rebuild'

    # To ensure Python evaluates all the hash tests (and thus creates the
    # hash files about the current state), we evaluate them in a
    # list and then process the result
    files_hash_status = all([check_file_hash(fn, status_dir) for fn in check_files])
    settings_hash_status = all([check_setting_hash(setting_name, status_dir)
                                for setting_name in check_settings])
    hash_status = files_hash_status and settings_hash_status
    if not hash_status:
        return 'needs_rebuild'

    # Here we hash and compare our migration files before doing
    # the work of seeing what to do with them; if there are no
    # changes, we can safely assume we don't need to run
    # migrations without spending a few 100ms parsing all the
    # Python migration code.
    paths = [
        *glob.glob('*/migrations/*.py'),
        'requirements/dev.txt',
    ]
    check_migrations = file_or_package_hash_updated(paths, "migrations_hash_" + database.database_name)
    if not check_migrations:
        return 'current'

    migration_op = what_to_do_with_migrations(database.migration_status, settings=database.settings)
    if migration_op == 'scrap':
        return 'needs_rebuild'

    if migration_op == 'migrate':
        return 'run_migrations'

    return 'current'

def destroy_leaked_test_databases(expiry_time: int = 60 * 60) -> int:
    """The logic in zerver/lib/test_runner.py tries to delete all the
    temporary test databases generated by test-backend threads, but it
    cannot guarantee it handles all race conditions correctly.  This
    is a catch-all function designed to delete any that might have
    been leaked due to crashes (etc.).  The high-level algorithm is to:

    * Delete every database with a name like zulip_test_template_*
    * Unless it is registered in a file under TEMPLATE_DATABASE_DIR as
      part of a currently running test-backend invocation
    * And that file is less expiry_time old.

    This should ensure we ~never break a running test-backend process,
    while also ensuring we will eventually delete all leaked databases.
    """
    files = glob.glob(os.path.join(UUID_VAR_DIR, TEMPLATE_DATABASE_DIR, "*"))
    test_databases = set()  # type: Set[str]
    try:
        with connection.cursor() as cursor:
            cursor.execute("SELECT datname FROM pg_database;")
            rows = cursor.fetchall()
            for row in rows:
                if 'zulip_test_template_' in row[0]:
                    test_databases.add(row[0])
    except ProgrammingError:
        pass

    databases_in_use = set()  # type: Set[str]
    for file in files:
        if round(time.time()) - os.path.getmtime(file) < expiry_time:
            with open(file) as f:
                for line in f:
                    databases_in_use.add('zulip_test_template_{}'.format(line).rstrip())
        else:
            # Any test-backend run older than expiry_time can be
            # cleaned up, both the database and the file listing its
            # databases.
            os.remove(file)

    databases_to_drop = test_databases - databases_in_use

    if not databases_to_drop:
        return 0

    commands = "\n".join("DROP DATABASE IF EXISTS %s;" % (db,) for db in databases_to_drop)
    p = subprocess.Popen(["psql", "-q", "-v", "ON_ERROR_STOP=1", "-h", "localhost",
                          "postgres", "zulip_test"],
                         stdin=subprocess.PIPE)
    p.communicate(input=commands.encode())
    if p.returncode != 0:
        raise RuntimeError("Error cleaning up test databases!")
    return len(databases_to_drop)

def remove_test_run_directories(expiry_time: int = 60 * 60) -> int:
    removed = 0
    directories = glob.glob(os.path.join(UUID_VAR_DIR, "test-backend", "run_*"))
    for test_run in directories:
        if round(time.time()) - os.path.getmtime(test_run) > expiry_time:
            try:
                shutil.rmtree(test_run)
                removed += 1
            except FileNotFoundError:
                pass
    return removed