diff --git a/version.py b/version.py index 9e71c76ca3..992e1bf972 100644 --- a/version.py +++ b/version.py @@ -44,4 +44,4 @@ API_FEATURE_LEVEL = 1 # historical commits sharing the same major version, in which case a # minor version bump suffices. -PROVISION_VERSION = '79.3' +PROVISION_VERSION = '80.0' diff --git a/zerver/lib/test_fixtures.py b/zerver/lib/test_fixtures.py index 59961af0e0..20c5e46c8e 100644 --- a/zerver/lib/test_fixtures.py +++ b/zerver/lib/test_fixtures.py @@ -1,7 +1,6 @@ import json import os import re -import hashlib import subprocess import sys from typing import Any, List, Set @@ -26,7 +25,35 @@ from scripts.lib.zulip_tools import ( ) UUID_VAR_DIR = get_dev_uuid_var_path() -FILENAME_SPLITTER = re.compile(r'[\W\-_]') + +IMPORTANT_FILES = [ + 'zilencer/management/commands/populate_db.py', + 'zerver/lib/bulk_create.py', + 'zerver/lib/generate_test_data.py', + 'zerver/lib/server_initialization.py', + 'tools/setup/postgres-init-test-db', + 'tools/setup/postgres-init-dev-db', + 'zerver/migrations/0258_enable_online_push_notifications_default.py', +] + +VERBOSE_MESSAGE_ABOUT_HASH_TRANSITION = ''' + NOTE!!!! + + We are rebuilding your database for a one-time transition. + + We have a hashing scheme that we use to detect whether any + important files used in the construction of the database + have changed. + + We are changing that scheme so it only uses one file + instead of a directory of files. + + In order to prevent errors due to this transition, we are + doing a one-time rebuild of your database. This should + be the last time this happens (for this particular reason, + at least), unless you go back to older branches. + +''' def migration_paths() -> List[str]: return [ @@ -38,6 +65,7 @@ class Database: def __init__(self, platform: str, database_name: str, settings: str): self.database_name = database_name self.settings = settings + self.digest_name = 'db_files_hash_for_' + platform self.migration_status_file = 'migration_status_' + platform self.migration_status_path = os.path.join( UUID_VAR_DIR, @@ -45,6 +73,17 @@ class Database: ) self.migration_digest_file = "migrations_hash_" + database_name + def important_settings(self) -> List[str]: + def get(setting_name: str) -> str: + value = getattr(settings, setting_name, {}) + return json.dumps(value, sort_keys=True) + + return [ + get('INTERNAL_BOTS'), + get('REALM_INTERNAL_BOTS'), + get('DISABLED_REALM_INTERNAL_BOTS'), + ] + def run_db_migrations(self) -> None: # We shell out to `manage.py` and pass `DJANGO_SETTINGS_MODULE` on # the command line rather than just calling the migration @@ -106,29 +145,35 @@ class Database: except OperationalError: return False + def files_or_settings_have_changed(self) -> bool: + database_name = self.database_name + + # Deal with legacy hash files. We can kill off this code when + # enough time has passed since April 2020 that we're not + # worried about anomalies doing `git bisect`--probably a few + # months is sufficient. + legacy_status_dir = os.path.join(UUID_VAR_DIR, database_name + '_db_status') + if os.path.exists(legacy_status_dir): + print(VERBOSE_MESSAGE_ABOUT_HASH_TRANSITION) + + # Remove the old digest for several reasons: + # - tidiness + # - preventing false positives if you bisect + # - make this only a one-time headache (generally) + shutil.rmtree(legacy_status_dir) + + # Return True to force a one-time rebuild. + return True + + return is_digest_obsolete( + self.digest_name, + IMPORTANT_FILES, + self.important_settings(), + ) + def template_status(self) -> str: # This function returns a status string specifying the type of # state the template db is in and thus the kind of action required. - database_name = self.database_name - - check_files = [ - 'zilencer/management/commands/populate_db.py', - 'zerver/lib/bulk_create.py', - 'zerver/lib/generate_test_data.py', - 'zerver/lib/server_initialization.py', - 'tools/setup/postgres-init-test-db', - 'tools/setup/postgres-init-dev-db', - 'zerver/migrations/0258_enable_online_push_notifications_default.py', - ] - check_settings = [ - 'REALM_INTERNAL_BOTS', - ] - - # Construct a directory to store hashes named after the target database. - status_dir = os.path.join(UUID_VAR_DIR, database_name + '_db_status') - if not os.path.exists(status_dir): - os.mkdir(status_dir) - if not self.database_exists(): # TODO: It's possible that `database_exists` will # return `False` even though the database @@ -140,14 +185,16 @@ class Database: # it's better to err on that side, generally. return 'needs_rebuild' - # To ensure Python evaluates all the hash tests (and thus creates the - # hash files about the current state), we evaluate them in a - # list and then process the result - files_hash_status = all([check_file_hash(fn, status_dir) for fn in check_files]) - settings_hash_status = all([check_setting_hash(setting_name, status_dir) - for setting_name in check_settings]) - hash_status = files_hash_status and settings_hash_status - if not hash_status: + if self.files_or_settings_have_changed(): + # Write the new hash, relying on our callers to + # actually rebuild the db successfully. + # TODO: Move this code to the callers, and write + # the digest only AFTER the rebuild succeeds. + write_new_digest( + self.digest_name, + IMPORTANT_FILES, + self.important_settings(), + ) return 'needs_rebuild' # Here we hash and compare our migration files before doing @@ -266,45 +313,6 @@ def extract_migrations_as_list(migration_status: str) -> List[str]: MIGRATIONS_RE = re.compile(r'\[[X| ]\] (\d+_.+)\n') return MIGRATIONS_RE.findall(migration_status) -def _get_hash_file_path(source_file_path: str, status_dir: str) -> str: - basename = os.path.basename(source_file_path) - filename = '_'.join(FILENAME_SPLITTER.split(basename)).lower() - return os.path.join(status_dir, filename) - -def _check_hash(source_hash_file: str, target_content: str) -> bool: - """ - This function has a side effect of creating a new hash file or - updating the old hash file. - """ - target_hash_content = hashlib.sha1(target_content.encode('utf8')).hexdigest() - - if not os.path.exists(source_hash_file): - source_hash_content = None - else: - with open(source_hash_file) as f: - source_hash_content = f.read().strip() - - with open(source_hash_file, 'w') as f: - f.write(target_hash_content) - - return source_hash_content == target_hash_content - -def check_file_hash(target_file_path: str, status_dir: str) -> bool: - source_hash_file = _get_hash_file_path(target_file_path, status_dir) - - with open(target_file_path) as f: - target_content = f.read() - - return _check_hash(source_hash_file, target_content) - -def check_setting_hash(setting_name: str, status_dir: str) -> bool: - hash_filename = '_'.join(['settings', setting_name]) - source_hash_file = os.path.join(status_dir, hash_filename) - - target_content = json.dumps(getattr(settings, setting_name), sort_keys=True) - - return _check_hash(source_hash_file, target_content) - def destroy_leaked_test_databases(expiry_time: int = 60 * 60) -> int: """The logic in zerver/lib/test_runner.py tries to delete all the temporary test databases generated by test-backend threads, but it