2020-06-11 00:54:34 +02:00
|
|
|
import glob
|
2018-05-21 18:09:55 +02:00
|
|
|
import json
|
2016-09-13 22:40:13 +02:00
|
|
|
import os
|
|
|
|
import re
|
2020-06-11 00:54:34 +02:00
|
|
|
import shutil
|
2018-06-06 00:49:48 +02:00
|
|
|
import subprocess
|
2017-10-18 04:23:06 +02:00
|
|
|
import sys
|
2020-06-11 00:54:34 +02:00
|
|
|
import time
|
2016-09-13 22:40:13 +02:00
|
|
|
from importlib import import_module
|
2017-11-06 02:56:09 +01:00
|
|
|
from io import StringIO
|
2020-06-11 00:54:34 +02:00
|
|
|
from typing import Any, List, Set
|
2016-09-13 22:40:13 +02:00
|
|
|
|
|
|
|
from django.apps import apps
|
2017-10-18 04:23:06 +02:00
|
|
|
from django.conf import settings
|
2016-09-13 22:40:13 +02:00
|
|
|
from django.core.management import call_command
|
2020-06-11 00:54:34 +02:00
|
|
|
from django.db import DEFAULT_DB_ALIAS, ProgrammingError, connection, connections
|
|
|
|
from django.db.utils import OperationalError
|
2016-09-13 22:40:13 +02:00
|
|
|
from django.utils.module_loading import module_has_submodule
|
|
|
|
|
2017-10-18 04:23:06 +02:00
|
|
|
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
|
2020-04-20 15:16:16 +02:00
|
|
|
from scripts.lib.zulip_tools import (
|
2020-06-11 00:54:34 +02:00
|
|
|
TEMPLATE_DATABASE_DIR,
|
|
|
|
get_dev_uuid_var_path,
|
|
|
|
is_digest_obsolete,
|
|
|
|
run,
|
|
|
|
write_new_digest,
|
2020-04-20 15:16:16 +02:00
|
|
|
)
|
2017-10-18 04:23:06 +02:00
|
|
|
|
2022-08-10 05:50:02 +02:00
|
|
|
BACKEND_DATABASE_TEMPLATE = "zulip_test_template"
|
2017-10-18 04:23:06 +02:00
|
|
|
UUID_VAR_DIR = get_dev_uuid_var_path()
|
db tools: Use common scheme for digests.
We have two different digest schemes to make
sure we keep the database up to date. There
is the migration digest, which is NOT in the
scope of this commit, and which already
used the mechanism we use for other tools.
Here we are talking about the digest for
important files like `populate_db.py`.
Now our scheme is more consistent with how we
check file changes for other tools (as
well as the aformentioned migration files).
And we only write one hash file, instead of
seven.
And we only write the file when things have
actually changed.
And we are explicit about side effects.
Finally, we include a couple new bot settings
in the digest:
INTERNAL_BOTS
DISABLED_REALM_INTERNAL_BOTS
NOTE: This will require a one-time transition,
where we rebuild both databases (dev/test).
It takes a little over two minutes for me,
so it's not super painful.
I bump the provision version here, even
though you don't technically need it (since
the relevant tools are actually using the
digest files to determine if they need to
rebuild the database). I figure it's just
good to explicitly make this commit trigger
a provision, and the user will then see
the one-time migration of the hash files
with a little bit less of a surprise.
And I do a major bump, not a minor bump,
because when we go in the reverse direction,
the old code will have to rebuild the
database due to the legacy hash files not
being around, so, again, I just prefer it
to be explicit.
2020-04-22 11:47:03 +02:00
|
|
|
|
|
|
|
IMPORTANT_FILES = [
|
2021-02-12 08:20:45 +01:00
|
|
|
"zilencer/management/commands/populate_db.py",
|
|
|
|
"zerver/lib/bulk_create.py",
|
|
|
|
"zerver/lib/generate_test_data.py",
|
|
|
|
"zerver/lib/server_initialization.py",
|
|
|
|
"tools/setup/postgresql-init-test-db",
|
|
|
|
"tools/setup/postgresql-init-dev-db",
|
|
|
|
"zerver/migrations/0258_enable_online_push_notifications_default.py",
|
db tools: Use common scheme for digests.
We have two different digest schemes to make
sure we keep the database up to date. There
is the migration digest, which is NOT in the
scope of this commit, and which already
used the mechanism we use for other tools.
Here we are talking about the digest for
important files like `populate_db.py`.
Now our scheme is more consistent with how we
check file changes for other tools (as
well as the aformentioned migration files).
And we only write one hash file, instead of
seven.
And we only write the file when things have
actually changed.
And we are explicit about side effects.
Finally, we include a couple new bot settings
in the digest:
INTERNAL_BOTS
DISABLED_REALM_INTERNAL_BOTS
NOTE: This will require a one-time transition,
where we rebuild both databases (dev/test).
It takes a little over two minutes for me,
so it's not super painful.
I bump the provision version here, even
though you don't technically need it (since
the relevant tools are actually using the
digest files to determine if they need to
rebuild the database). I figure it's just
good to explicitly make this commit trigger
a provision, and the user will then see
the one-time migration of the hash files
with a little bit less of a surprise.
And I do a major bump, not a minor bump,
because when we go in the reverse direction,
the old code will have to rebuild the
database due to the legacy hash files not
being around, so, again, I just prefer it
to be explicit.
2020-04-22 11:47:03 +02:00
|
|
|
]
|
|
|
|
|
2021-02-12 08:20:45 +01:00
|
|
|
VERBOSE_MESSAGE_ABOUT_HASH_TRANSITION = """
|
db tools: Use common scheme for digests.
We have two different digest schemes to make
sure we keep the database up to date. There
is the migration digest, which is NOT in the
scope of this commit, and which already
used the mechanism we use for other tools.
Here we are talking about the digest for
important files like `populate_db.py`.
Now our scheme is more consistent with how we
check file changes for other tools (as
well as the aformentioned migration files).
And we only write one hash file, instead of
seven.
And we only write the file when things have
actually changed.
And we are explicit about side effects.
Finally, we include a couple new bot settings
in the digest:
INTERNAL_BOTS
DISABLED_REALM_INTERNAL_BOTS
NOTE: This will require a one-time transition,
where we rebuild both databases (dev/test).
It takes a little over two minutes for me,
so it's not super painful.
I bump the provision version here, even
though you don't technically need it (since
the relevant tools are actually using the
digest files to determine if they need to
rebuild the database). I figure it's just
good to explicitly make this commit trigger
a provision, and the user will then see
the one-time migration of the hash files
with a little bit less of a surprise.
And I do a major bump, not a minor bump,
because when we go in the reverse direction,
the old code will have to rebuild the
database due to the legacy hash files not
being around, so, again, I just prefer it
to be explicit.
2020-04-22 11:47:03 +02:00
|
|
|
NOTE!!!!
|
|
|
|
|
|
|
|
We are rebuilding your database for a one-time transition.
|
|
|
|
|
|
|
|
We have a hashing scheme that we use to detect whether any
|
|
|
|
important files used in the construction of the database
|
|
|
|
have changed.
|
|
|
|
|
|
|
|
We are changing that scheme so it only uses one file
|
|
|
|
instead of a directory of files.
|
|
|
|
|
|
|
|
In order to prevent errors due to this transition, we are
|
|
|
|
doing a one-time rebuild of your database. This should
|
|
|
|
be the last time this happens (for this particular reason,
|
|
|
|
at least), unless you go back to older branches.
|
|
|
|
|
2021-02-12 08:20:45 +01:00
|
|
|
"""
|
2016-10-21 12:48:15 +02:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2020-04-20 13:18:06 +02:00
|
|
|
def migration_paths() -> List[str]:
|
|
|
|
return [
|
2021-02-12 08:20:45 +01:00
|
|
|
*glob.glob("*/migrations/*.py"),
|
|
|
|
"requirements/dev.txt",
|
2020-04-20 13:18:06 +02:00
|
|
|
]
|
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2020-04-20 16:21:41 +02:00
|
|
|
class Database:
|
|
|
|
def __init__(self, platform: str, database_name: str, settings: str):
|
|
|
|
self.database_name = database_name
|
|
|
|
self.settings = settings
|
2021-02-12 08:20:45 +01:00
|
|
|
self.digest_name = "db_files_hash_for_" + platform
|
|
|
|
self.migration_status_file = "migration_status_" + platform
|
2020-04-20 16:21:41 +02:00
|
|
|
self.migration_status_path = os.path.join(
|
|
|
|
UUID_VAR_DIR,
|
python: Use trailing commas consistently.
Automatically generated by the following script, based on the output
of lint with flake8-comma:
import re
import sys
last_filename = None
last_row = None
lines = []
for msg in sys.stdin:
m = re.match(
r"\x1b\[35mflake8 \|\x1b\[0m \x1b\[1;31m(.+):(\d+):(\d+): (\w+)", msg
)
if m:
filename, row_str, col_str, err = m.groups()
row, col = int(row_str), int(col_str)
if filename == last_filename:
assert last_row != row
else:
if last_filename is not None:
with open(last_filename, "w") as f:
f.writelines(lines)
with open(filename) as f:
lines = f.readlines()
last_filename = filename
last_row = row
line = lines[row - 1]
if err in ["C812", "C815"]:
lines[row - 1] = line[: col - 1] + "," + line[col - 1 :]
elif err in ["C819"]:
assert line[col - 2] == ","
lines[row - 1] = line[: col - 2] + line[col - 1 :].lstrip(" ")
if last_filename is not None:
with open(last_filename, "w") as f:
f.writelines(lines)
Signed-off-by: Anders Kaseorg <anders@zulipchat.com>
2020-04-10 05:23:40 +02:00
|
|
|
self.migration_status_file,
|
2020-04-20 16:21:41 +02:00
|
|
|
)
|
2020-04-20 15:16:16 +02:00
|
|
|
self.migration_digest_file = "migrations_hash_" + database_name
|
2020-04-20 16:21:41 +02:00
|
|
|
|
db tools: Use common scheme for digests.
We have two different digest schemes to make
sure we keep the database up to date. There
is the migration digest, which is NOT in the
scope of this commit, and which already
used the mechanism we use for other tools.
Here we are talking about the digest for
important files like `populate_db.py`.
Now our scheme is more consistent with how we
check file changes for other tools (as
well as the aformentioned migration files).
And we only write one hash file, instead of
seven.
And we only write the file when things have
actually changed.
And we are explicit about side effects.
Finally, we include a couple new bot settings
in the digest:
INTERNAL_BOTS
DISABLED_REALM_INTERNAL_BOTS
NOTE: This will require a one-time transition,
where we rebuild both databases (dev/test).
It takes a little over two minutes for me,
so it's not super painful.
I bump the provision version here, even
though you don't technically need it (since
the relevant tools are actually using the
digest files to determine if they need to
rebuild the database). I figure it's just
good to explicitly make this commit trigger
a provision, and the user will then see
the one-time migration of the hash files
with a little bit less of a surprise.
And I do a major bump, not a minor bump,
because when we go in the reverse direction,
the old code will have to rebuild the
database due to the legacy hash files not
being around, so, again, I just prefer it
to be explicit.
2020-04-22 11:47:03 +02:00
|
|
|
def important_settings(self) -> List[str]:
|
|
|
|
def get(setting_name: str) -> str:
|
|
|
|
value = getattr(settings, setting_name, {})
|
|
|
|
return json.dumps(value, sort_keys=True)
|
|
|
|
|
|
|
|
return [
|
2021-02-12 08:20:45 +01:00
|
|
|
get("LOCAL_DATABASE_PASSWORD"),
|
|
|
|
get("INTERNAL_BOTS"),
|
|
|
|
get("REALM_INTERNAL_BOTS"),
|
|
|
|
get("DISABLED_REALM_INTERNAL_BOTS"),
|
db tools: Use common scheme for digests.
We have two different digest schemes to make
sure we keep the database up to date. There
is the migration digest, which is NOT in the
scope of this commit, and which already
used the mechanism we use for other tools.
Here we are talking about the digest for
important files like `populate_db.py`.
Now our scheme is more consistent with how we
check file changes for other tools (as
well as the aformentioned migration files).
And we only write one hash file, instead of
seven.
And we only write the file when things have
actually changed.
And we are explicit about side effects.
Finally, we include a couple new bot settings
in the digest:
INTERNAL_BOTS
DISABLED_REALM_INTERNAL_BOTS
NOTE: This will require a one-time transition,
where we rebuild both databases (dev/test).
It takes a little over two minutes for me,
so it's not super painful.
I bump the provision version here, even
though you don't technically need it (since
the relevant tools are actually using the
digest files to determine if they need to
rebuild the database). I figure it's just
good to explicitly make this commit trigger
a provision, and the user will then see
the one-time migration of the hash files
with a little bit less of a surprise.
And I do a major bump, not a minor bump,
because when we go in the reverse direction,
the old code will have to rebuild the
database due to the legacy hash files not
being around, so, again, I just prefer it
to be explicit.
2020-04-22 11:47:03 +02:00
|
|
|
]
|
|
|
|
|
2020-04-20 17:21:22 +02:00
|
|
|
def run_db_migrations(self) -> None:
|
|
|
|
# We shell out to `manage.py` and pass `DJANGO_SETTINGS_MODULE` on
|
|
|
|
# the command line rather than just calling the migration
|
|
|
|
# functions, because Django doesn't support changing settings like
|
|
|
|
# what the database is as runtime.
|
|
|
|
# Also we export ZULIP_DB_NAME which is ignored by dev platform but
|
|
|
|
# recognised by test platform and used to migrate correct db.
|
2020-09-02 02:50:08 +02:00
|
|
|
manage_py = [
|
2021-02-12 08:20:45 +01:00
|
|
|
"env",
|
|
|
|
"DJANGO_SETTINGS_MODULE=" + self.settings,
|
|
|
|
"ZULIP_DB_NAME=" + self.database_name,
|
|
|
|
"./manage.py",
|
2020-04-20 17:21:22 +02:00
|
|
|
]
|
|
|
|
|
2021-02-12 08:20:45 +01:00
|
|
|
run([*manage_py, "migrate", "--no-input"])
|
2020-04-20 17:21:22 +02:00
|
|
|
|
2021-02-12 08:20:45 +01:00
|
|
|
run([*manage_py, "get_migration_status", "--output=" + self.migration_status_file])
|
2020-04-20 17:21:22 +02:00
|
|
|
|
2020-04-20 20:10:26 +02:00
|
|
|
def what_to_do_with_migrations(self) -> str:
|
|
|
|
status_fn = self.migration_status_path
|
|
|
|
settings = self.settings
|
|
|
|
|
|
|
|
if not os.path.exists(status_fn):
|
2021-02-12 08:20:45 +01:00
|
|
|
return "scrap"
|
2020-04-20 20:10:26 +02:00
|
|
|
|
|
|
|
with open(status_fn) as f:
|
|
|
|
previous_migration_status = f.read()
|
|
|
|
|
|
|
|
current_migration_status = get_migration_status(settings=settings)
|
|
|
|
all_curr_migrations = extract_migrations_as_list(current_migration_status)
|
|
|
|
all_prev_migrations = extract_migrations_as_list(previous_migration_status)
|
|
|
|
|
|
|
|
if len(all_curr_migrations) < len(all_prev_migrations):
|
2021-02-12 08:20:45 +01:00
|
|
|
return "scrap"
|
2020-04-20 20:10:26 +02:00
|
|
|
|
|
|
|
for migration in all_prev_migrations:
|
|
|
|
if migration not in all_curr_migrations:
|
2021-02-12 08:20:45 +01:00
|
|
|
return "scrap"
|
2020-04-20 20:10:26 +02:00
|
|
|
|
|
|
|
if len(all_curr_migrations) == len(all_prev_migrations):
|
2021-02-12 08:20:45 +01:00
|
|
|
return "migrations_are_latest"
|
2020-04-20 20:10:26 +02:00
|
|
|
|
2021-02-12 08:20:45 +01:00
|
|
|
return "migrate"
|
2020-04-20 20:10:26 +02:00
|
|
|
|
2020-04-20 20:52:35 +02:00
|
|
|
def database_exists(self) -> bool:
|
|
|
|
try:
|
|
|
|
connection = connections[DEFAULT_DB_ALIAS]
|
|
|
|
|
|
|
|
with connection.cursor() as cursor:
|
|
|
|
cursor.execute(
|
2021-02-12 08:19:30 +01:00
|
|
|
"SELECT 1 from pg_database WHERE datname=%s;",
|
|
|
|
[self.database_name],
|
2020-04-20 20:52:35 +02:00
|
|
|
)
|
|
|
|
return_value = bool(cursor.fetchone())
|
|
|
|
connections.close_all()
|
|
|
|
return return_value
|
|
|
|
except OperationalError:
|
|
|
|
return False
|
|
|
|
|
db tools: Use common scheme for digests.
We have two different digest schemes to make
sure we keep the database up to date. There
is the migration digest, which is NOT in the
scope of this commit, and which already
used the mechanism we use for other tools.
Here we are talking about the digest for
important files like `populate_db.py`.
Now our scheme is more consistent with how we
check file changes for other tools (as
well as the aformentioned migration files).
And we only write one hash file, instead of
seven.
And we only write the file when things have
actually changed.
And we are explicit about side effects.
Finally, we include a couple new bot settings
in the digest:
INTERNAL_BOTS
DISABLED_REALM_INTERNAL_BOTS
NOTE: This will require a one-time transition,
where we rebuild both databases (dev/test).
It takes a little over two minutes for me,
so it's not super painful.
I bump the provision version here, even
though you don't technically need it (since
the relevant tools are actually using the
digest files to determine if they need to
rebuild the database). I figure it's just
good to explicitly make this commit trigger
a provision, and the user will then see
the one-time migration of the hash files
with a little bit less of a surprise.
And I do a major bump, not a minor bump,
because when we go in the reverse direction,
the old code will have to rebuild the
database due to the legacy hash files not
being around, so, again, I just prefer it
to be explicit.
2020-04-22 11:47:03 +02:00
|
|
|
def files_or_settings_have_changed(self) -> bool:
|
2020-04-20 20:41:44 +02:00
|
|
|
database_name = self.database_name
|
|
|
|
|
db tools: Use common scheme for digests.
We have two different digest schemes to make
sure we keep the database up to date. There
is the migration digest, which is NOT in the
scope of this commit, and which already
used the mechanism we use for other tools.
Here we are talking about the digest for
important files like `populate_db.py`.
Now our scheme is more consistent with how we
check file changes for other tools (as
well as the aformentioned migration files).
And we only write one hash file, instead of
seven.
And we only write the file when things have
actually changed.
And we are explicit about side effects.
Finally, we include a couple new bot settings
in the digest:
INTERNAL_BOTS
DISABLED_REALM_INTERNAL_BOTS
NOTE: This will require a one-time transition,
where we rebuild both databases (dev/test).
It takes a little over two minutes for me,
so it's not super painful.
I bump the provision version here, even
though you don't technically need it (since
the relevant tools are actually using the
digest files to determine if they need to
rebuild the database). I figure it's just
good to explicitly make this commit trigger
a provision, and the user will then see
the one-time migration of the hash files
with a little bit less of a surprise.
And I do a major bump, not a minor bump,
because when we go in the reverse direction,
the old code will have to rebuild the
database due to the legacy hash files not
being around, so, again, I just prefer it
to be explicit.
2020-04-22 11:47:03 +02:00
|
|
|
# Deal with legacy hash files. We can kill off this code when
|
|
|
|
# enough time has passed since April 2020 that we're not
|
|
|
|
# worried about anomalies doing `git bisect`--probably a few
|
|
|
|
# months is sufficient.
|
2021-02-12 08:20:45 +01:00
|
|
|
legacy_status_dir = os.path.join(UUID_VAR_DIR, database_name + "_db_status")
|
db tools: Use common scheme for digests.
We have two different digest schemes to make
sure we keep the database up to date. There
is the migration digest, which is NOT in the
scope of this commit, and which already
used the mechanism we use for other tools.
Here we are talking about the digest for
important files like `populate_db.py`.
Now our scheme is more consistent with how we
check file changes for other tools (as
well as the aformentioned migration files).
And we only write one hash file, instead of
seven.
And we only write the file when things have
actually changed.
And we are explicit about side effects.
Finally, we include a couple new bot settings
in the digest:
INTERNAL_BOTS
DISABLED_REALM_INTERNAL_BOTS
NOTE: This will require a one-time transition,
where we rebuild both databases (dev/test).
It takes a little over two minutes for me,
so it's not super painful.
I bump the provision version here, even
though you don't technically need it (since
the relevant tools are actually using the
digest files to determine if they need to
rebuild the database). I figure it's just
good to explicitly make this commit trigger
a provision, and the user will then see
the one-time migration of the hash files
with a little bit less of a surprise.
And I do a major bump, not a minor bump,
because when we go in the reverse direction,
the old code will have to rebuild the
database due to the legacy hash files not
being around, so, again, I just prefer it
to be explicit.
2020-04-22 11:47:03 +02:00
|
|
|
if os.path.exists(legacy_status_dir):
|
|
|
|
print(VERBOSE_MESSAGE_ABOUT_HASH_TRANSITION)
|
|
|
|
|
|
|
|
# Remove the old digest for several reasons:
|
|
|
|
# - tidiness
|
|
|
|
# - preventing false positives if you bisect
|
|
|
|
# - make this only a one-time headache (generally)
|
|
|
|
shutil.rmtree(legacy_status_dir)
|
2020-04-20 20:41:44 +02:00
|
|
|
|
db tools: Use common scheme for digests.
We have two different digest schemes to make
sure we keep the database up to date. There
is the migration digest, which is NOT in the
scope of this commit, and which already
used the mechanism we use for other tools.
Here we are talking about the digest for
important files like `populate_db.py`.
Now our scheme is more consistent with how we
check file changes for other tools (as
well as the aformentioned migration files).
And we only write one hash file, instead of
seven.
And we only write the file when things have
actually changed.
And we are explicit about side effects.
Finally, we include a couple new bot settings
in the digest:
INTERNAL_BOTS
DISABLED_REALM_INTERNAL_BOTS
NOTE: This will require a one-time transition,
where we rebuild both databases (dev/test).
It takes a little over two minutes for me,
so it's not super painful.
I bump the provision version here, even
though you don't technically need it (since
the relevant tools are actually using the
digest files to determine if they need to
rebuild the database). I figure it's just
good to explicitly make this commit trigger
a provision, and the user will then see
the one-time migration of the hash files
with a little bit less of a surprise.
And I do a major bump, not a minor bump,
because when we go in the reverse direction,
the old code will have to rebuild the
database due to the legacy hash files not
being around, so, again, I just prefer it
to be explicit.
2020-04-22 11:47:03 +02:00
|
|
|
# Return True to force a one-time rebuild.
|
|
|
|
return True
|
2020-04-20 20:41:44 +02:00
|
|
|
|
db tools: Use common scheme for digests.
We have two different digest schemes to make
sure we keep the database up to date. There
is the migration digest, which is NOT in the
scope of this commit, and which already
used the mechanism we use for other tools.
Here we are talking about the digest for
important files like `populate_db.py`.
Now our scheme is more consistent with how we
check file changes for other tools (as
well as the aformentioned migration files).
And we only write one hash file, instead of
seven.
And we only write the file when things have
actually changed.
And we are explicit about side effects.
Finally, we include a couple new bot settings
in the digest:
INTERNAL_BOTS
DISABLED_REALM_INTERNAL_BOTS
NOTE: This will require a one-time transition,
where we rebuild both databases (dev/test).
It takes a little over two minutes for me,
so it's not super painful.
I bump the provision version here, even
though you don't technically need it (since
the relevant tools are actually using the
digest files to determine if they need to
rebuild the database). I figure it's just
good to explicitly make this commit trigger
a provision, and the user will then see
the one-time migration of the hash files
with a little bit less of a surprise.
And I do a major bump, not a minor bump,
because when we go in the reverse direction,
the old code will have to rebuild the
database due to the legacy hash files not
being around, so, again, I just prefer it
to be explicit.
2020-04-22 11:47:03 +02:00
|
|
|
return is_digest_obsolete(
|
|
|
|
self.digest_name,
|
|
|
|
IMPORTANT_FILES,
|
|
|
|
self.important_settings(),
|
|
|
|
)
|
|
|
|
|
|
|
|
def template_status(self) -> str:
|
|
|
|
# This function returns a status string specifying the type of
|
|
|
|
# state the template db is in and thus the kind of action required.
|
2020-04-20 20:52:35 +02:00
|
|
|
if not self.database_exists():
|
2020-04-20 20:41:44 +02:00
|
|
|
# TODO: It's possible that `database_exists` will
|
|
|
|
# return `False` even though the database
|
|
|
|
# exists, but we just have the wrong password,
|
|
|
|
# probably due to changing the secrets file.
|
|
|
|
#
|
|
|
|
# The only problem this causes is that we waste
|
|
|
|
# some time rebuilding the whole database, but
|
|
|
|
# it's better to err on that side, generally.
|
2021-02-12 08:20:45 +01:00
|
|
|
return "needs_rebuild"
|
2020-04-20 20:41:44 +02:00
|
|
|
|
db tools: Use common scheme for digests.
We have two different digest schemes to make
sure we keep the database up to date. There
is the migration digest, which is NOT in the
scope of this commit, and which already
used the mechanism we use for other tools.
Here we are talking about the digest for
important files like `populate_db.py`.
Now our scheme is more consistent with how we
check file changes for other tools (as
well as the aformentioned migration files).
And we only write one hash file, instead of
seven.
And we only write the file when things have
actually changed.
And we are explicit about side effects.
Finally, we include a couple new bot settings
in the digest:
INTERNAL_BOTS
DISABLED_REALM_INTERNAL_BOTS
NOTE: This will require a one-time transition,
where we rebuild both databases (dev/test).
It takes a little over two minutes for me,
so it's not super painful.
I bump the provision version here, even
though you don't technically need it (since
the relevant tools are actually using the
digest files to determine if they need to
rebuild the database). I figure it's just
good to explicitly make this commit trigger
a provision, and the user will then see
the one-time migration of the hash files
with a little bit less of a surprise.
And I do a major bump, not a minor bump,
because when we go in the reverse direction,
the old code will have to rebuild the
database due to the legacy hash files not
being around, so, again, I just prefer it
to be explicit.
2020-04-22 11:47:03 +02:00
|
|
|
if self.files_or_settings_have_changed():
|
2021-02-12 08:20:45 +01:00
|
|
|
return "needs_rebuild"
|
2020-04-20 20:41:44 +02:00
|
|
|
|
|
|
|
# Here we hash and compare our migration files before doing
|
|
|
|
# the work of seeing what to do with them; if there are no
|
|
|
|
# changes, we can safely assume we don't need to run
|
|
|
|
# migrations without spending a few 100ms parsing all the
|
|
|
|
# Python migration code.
|
2020-04-22 11:51:06 +02:00
|
|
|
if not self.is_migration_digest_obsolete():
|
2021-02-12 08:20:45 +01:00
|
|
|
return "current"
|
2020-04-20 20:41:44 +02:00
|
|
|
|
2021-02-12 08:20:45 +01:00
|
|
|
"""
|
2020-04-20 15:16:16 +02:00
|
|
|
NOTE:
|
|
|
|
We immediately update the digest, assuming our
|
|
|
|
callers will do what it takes to run the migrations.
|
|
|
|
|
|
|
|
Ideally our callers would just do it themselves
|
|
|
|
AFTER the migrations actually succeeded, but the
|
|
|
|
caller codepaths are kind of complicated here.
|
2021-02-12 08:20:45 +01:00
|
|
|
"""
|
2020-04-22 11:51:06 +02:00
|
|
|
self.write_new_migration_digest()
|
2020-04-20 15:16:16 +02:00
|
|
|
|
2020-04-20 20:41:44 +02:00
|
|
|
migration_op = self.what_to_do_with_migrations()
|
2021-02-12 08:20:45 +01:00
|
|
|
if migration_op == "scrap":
|
|
|
|
return "needs_rebuild"
|
2020-04-20 20:41:44 +02:00
|
|
|
|
2021-02-12 08:20:45 +01:00
|
|
|
if migration_op == "migrate":
|
|
|
|
return "run_migrations"
|
2020-04-20 20:41:44 +02:00
|
|
|
|
2021-02-12 08:20:45 +01:00
|
|
|
return "current"
|
2020-04-20 20:41:44 +02:00
|
|
|
|
2020-04-22 11:51:06 +02:00
|
|
|
def is_migration_digest_obsolete(self) -> bool:
|
2020-04-20 15:16:16 +02:00
|
|
|
return is_digest_obsolete(
|
|
|
|
self.migration_digest_file,
|
|
|
|
migration_paths(),
|
|
|
|
)
|
|
|
|
|
2020-04-22 11:51:06 +02:00
|
|
|
def write_new_migration_digest(self) -> None:
|
2020-04-20 15:16:16 +02:00
|
|
|
write_new_digest(
|
|
|
|
self.migration_digest_file,
|
|
|
|
migration_paths(),
|
|
|
|
)
|
|
|
|
|
2020-04-30 09:25:29 +02:00
|
|
|
def write_new_db_digest(self) -> None:
|
|
|
|
write_new_digest(
|
|
|
|
self.digest_name,
|
|
|
|
IMPORTANT_FILES,
|
|
|
|
self.important_settings(),
|
|
|
|
)
|
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2020-04-20 16:21:41 +02:00
|
|
|
DEV_DATABASE = Database(
|
2021-02-12 08:20:45 +01:00
|
|
|
platform="dev",
|
|
|
|
database_name="zulip",
|
|
|
|
settings="zproject.settings",
|
2020-04-20 16:21:41 +02:00
|
|
|
)
|
|
|
|
|
|
|
|
TEST_DATABASE = Database(
|
2021-02-12 08:20:45 +01:00
|
|
|
platform="test",
|
|
|
|
database_name="zulip_test_template",
|
|
|
|
settings="zproject.test_settings",
|
2020-04-20 16:21:41 +02:00
|
|
|
)
|
2020-02-10 14:22:58 +01:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
|
|
|
def update_test_databases_if_required(rebuild_test_database: bool = False) -> None:
|
2019-05-29 00:10:49 +02:00
|
|
|
"""Checks whether the zulip_test_template database template, is
|
2019-05-29 00:15:11 +02:00
|
|
|
consistent with our database migrations; if not, it updates it
|
2019-05-29 00:10:49 +02:00
|
|
|
in the fastest way possible:
|
|
|
|
|
|
|
|
* If all we need to do is add some migrations, just runs those
|
2019-05-29 00:15:11 +02:00
|
|
|
migrations on the template database.
|
|
|
|
* Otherwise, we rebuild the test template database from scratch.
|
|
|
|
|
|
|
|
The default behavior is sufficient for the `test-backend` use
|
|
|
|
case, where the test runner code will clone directly from the
|
|
|
|
template database.
|
|
|
|
|
2020-09-16 23:57:11 +02:00
|
|
|
The `rebuild_test_database` option (used by our frontend and API
|
|
|
|
tests) asks us to drop and re-cloning the zulip_test database from
|
|
|
|
the template so those test suites can run with a fresh copy.
|
|
|
|
|
2019-05-29 00:10:49 +02:00
|
|
|
"""
|
2020-04-20 20:41:44 +02:00
|
|
|
test_template_db_status = TEST_DATABASE.template_status()
|
2020-04-21 22:45:37 +02:00
|
|
|
|
2021-02-12 08:20:45 +01:00
|
|
|
if test_template_db_status == "needs_rebuild":
|
|
|
|
run(["tools/rebuild-test-database"])
|
2020-04-30 09:25:29 +02:00
|
|
|
TEST_DATABASE.write_new_db_digest()
|
2020-04-21 22:45:37 +02:00
|
|
|
return
|
|
|
|
|
2021-02-12 08:20:45 +01:00
|
|
|
if test_template_db_status == "run_migrations":
|
2020-04-20 17:21:22 +02:00
|
|
|
TEST_DATABASE.run_db_migrations()
|
2021-02-12 08:20:45 +01:00
|
|
|
run(["tools/setup/generate-fixtures"])
|
2019-05-29 00:15:11 +02:00
|
|
|
return
|
2020-04-21 22:45:37 +02:00
|
|
|
|
|
|
|
if rebuild_test_database:
|
2021-02-12 08:20:45 +01:00
|
|
|
run(["tools/setup/generate-fixtures"])
|
2018-06-06 00:49:48 +02:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2017-11-05 11:15:10 +01:00
|
|
|
def get_migration_status(**options: Any) -> str:
|
2021-02-12 08:20:45 +01:00
|
|
|
verbosity = options.get("verbosity", 1)
|
2016-09-13 22:40:13 +02:00
|
|
|
|
|
|
|
for app_config in apps.get_app_configs():
|
|
|
|
if module_has_submodule(app_config.module, "management"):
|
2021-02-12 08:20:45 +01:00
|
|
|
import_module(".management", app_config.name)
|
2016-09-13 22:40:13 +02:00
|
|
|
|
2021-02-12 08:20:45 +01:00
|
|
|
app_label = options["app_label"] if options.get("app_label") else None
|
|
|
|
db = options.get("database", DEFAULT_DB_ALIAS)
|
2016-09-13 22:40:13 +02:00
|
|
|
out = StringIO()
|
2021-02-12 08:20:45 +01:00
|
|
|
command_args = ["--list"]
|
2020-02-04 13:13:36 +01:00
|
|
|
if app_label:
|
|
|
|
command_args.append(app_label)
|
|
|
|
|
2016-09-13 22:40:13 +02:00
|
|
|
call_command(
|
2021-02-12 08:20:45 +01:00
|
|
|
"showmigrations",
|
2020-02-04 13:13:36 +01:00
|
|
|
*command_args,
|
2016-09-13 22:40:13 +02:00
|
|
|
database=db,
|
2021-02-12 08:20:45 +01:00
|
|
|
no_color=options.get("no_color", False),
|
|
|
|
settings=options.get("settings", os.environ["DJANGO_SETTINGS_MODULE"]),
|
2016-09-13 22:40:13 +02:00
|
|
|
stdout=out,
|
2021-02-12 08:20:45 +01:00
|
|
|
traceback=options.get("traceback", True),
|
2016-09-13 22:40:13 +02:00
|
|
|
verbosity=verbosity,
|
|
|
|
)
|
|
|
|
connections.close_all()
|
|
|
|
out.seek(0)
|
|
|
|
output = out.read()
|
2021-02-12 08:20:45 +01:00
|
|
|
return re.sub(r"\x1b\[(1|0)m", "", output)
|
2016-09-13 22:40:13 +02:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2018-06-06 01:08:27 +02:00
|
|
|
def extract_migrations_as_list(migration_status: str) -> List[str]:
|
2021-02-12 08:20:45 +01:00
|
|
|
MIGRATIONS_RE = re.compile(r"\[[X| ]\] (\d+_.+)\n")
|
2018-06-06 01:08:27 +02:00
|
|
|
return MIGRATIONS_RE.findall(migration_status)
|
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2019-06-08 03:57:19 +02:00
|
|
|
def destroy_leaked_test_databases(expiry_time: int = 60 * 60) -> int:
|
|
|
|
"""The logic in zerver/lib/test_runner.py tries to delete all the
|
|
|
|
temporary test databases generated by test-backend threads, but it
|
|
|
|
cannot guarantee it handles all race conditions correctly. This
|
|
|
|
is a catch-all function designed to delete any that might have
|
|
|
|
been leaked due to crashes (etc.). The high-level algorithm is to:
|
|
|
|
|
|
|
|
* Delete every database with a name like zulip_test_template_*
|
|
|
|
* Unless it is registered in a file under TEMPLATE_DATABASE_DIR as
|
|
|
|
part of a currently running test-backend invocation
|
|
|
|
* And that file is less expiry_time old.
|
|
|
|
|
|
|
|
This should ensure we ~never break a running test-backend process,
|
|
|
|
while also ensuring we will eventually delete all leaked databases.
|
|
|
|
"""
|
|
|
|
files = glob.glob(os.path.join(UUID_VAR_DIR, TEMPLATE_DATABASE_DIR, "*"))
|
python: Convert assignment type annotations to Python 3.6 style.
This commit was split by tabbott; this piece covers the vast majority
of files in Zulip, but excludes scripts/, tools/, and puppet/ to help
ensure we at least show the right error messages for Xenial systems.
We can likely further refine the remaining pieces with some testing.
Generated by com2ann, with whitespace fixes and various manual fixes
for runtime issues:
- invoiced_through: Optional[LicenseLedger] = models.ForeignKey(
+ invoiced_through: Optional["LicenseLedger"] = models.ForeignKey(
-_apns_client: Optional[APNsClient] = None
+_apns_client: Optional["APNsClient"] = None
- notifications_stream: Optional[Stream] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
- signup_notifications_stream: Optional[Stream] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
+ notifications_stream: Optional["Stream"] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
+ signup_notifications_stream: Optional["Stream"] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
- author: Optional[UserProfile] = models.ForeignKey('UserProfile', blank=True, null=True, on_delete=CASCADE)
+ author: Optional["UserProfile"] = models.ForeignKey('UserProfile', blank=True, null=True, on_delete=CASCADE)
- bot_owner: Optional[UserProfile] = models.ForeignKey('self', null=True, on_delete=models.SET_NULL)
+ bot_owner: Optional["UserProfile"] = models.ForeignKey('self', null=True, on_delete=models.SET_NULL)
- default_sending_stream: Optional[Stream] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
- default_events_register_stream: Optional[Stream] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
+ default_sending_stream: Optional["Stream"] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
+ default_events_register_stream: Optional["Stream"] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
-descriptors_by_handler_id: Dict[int, ClientDescriptor] = {}
+descriptors_by_handler_id: Dict[int, "ClientDescriptor"] = {}
-worker_classes: Dict[str, Type[QueueProcessingWorker]] = {}
-queues: Dict[str, Dict[str, Type[QueueProcessingWorker]]] = {}
+worker_classes: Dict[str, Type["QueueProcessingWorker"]] = {}
+queues: Dict[str, Dict[str, Type["QueueProcessingWorker"]]] = {}
-AUTH_LDAP_REVERSE_EMAIL_SEARCH: Optional[LDAPSearch] = None
+AUTH_LDAP_REVERSE_EMAIL_SEARCH: Optional["LDAPSearch"] = None
Signed-off-by: Anders Kaseorg <anders@zulipchat.com>
2020-04-22 01:09:50 +02:00
|
|
|
test_databases: Set[str] = set()
|
2019-06-08 03:57:19 +02:00
|
|
|
try:
|
|
|
|
with connection.cursor() as cursor:
|
|
|
|
cursor.execute("SELECT datname FROM pg_database;")
|
|
|
|
rows = cursor.fetchall()
|
|
|
|
for row in rows:
|
2021-02-12 08:20:45 +01:00
|
|
|
if "zulip_test_template_" in row[0]:
|
2019-06-08 03:57:19 +02:00
|
|
|
test_databases.add(row[0])
|
|
|
|
except ProgrammingError:
|
|
|
|
pass
|
|
|
|
|
python: Convert assignment type annotations to Python 3.6 style.
This commit was split by tabbott; this piece covers the vast majority
of files in Zulip, but excludes scripts/, tools/, and puppet/ to help
ensure we at least show the right error messages for Xenial systems.
We can likely further refine the remaining pieces with some testing.
Generated by com2ann, with whitespace fixes and various manual fixes
for runtime issues:
- invoiced_through: Optional[LicenseLedger] = models.ForeignKey(
+ invoiced_through: Optional["LicenseLedger"] = models.ForeignKey(
-_apns_client: Optional[APNsClient] = None
+_apns_client: Optional["APNsClient"] = None
- notifications_stream: Optional[Stream] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
- signup_notifications_stream: Optional[Stream] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
+ notifications_stream: Optional["Stream"] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
+ signup_notifications_stream: Optional["Stream"] = models.ForeignKey('Stream', related_name='+', null=True, blank=True, on_delete=CASCADE)
- author: Optional[UserProfile] = models.ForeignKey('UserProfile', blank=True, null=True, on_delete=CASCADE)
+ author: Optional["UserProfile"] = models.ForeignKey('UserProfile', blank=True, null=True, on_delete=CASCADE)
- bot_owner: Optional[UserProfile] = models.ForeignKey('self', null=True, on_delete=models.SET_NULL)
+ bot_owner: Optional["UserProfile"] = models.ForeignKey('self', null=True, on_delete=models.SET_NULL)
- default_sending_stream: Optional[Stream] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
- default_events_register_stream: Optional[Stream] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
+ default_sending_stream: Optional["Stream"] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
+ default_events_register_stream: Optional["Stream"] = models.ForeignKey('zerver.Stream', null=True, related_name='+', on_delete=CASCADE)
-descriptors_by_handler_id: Dict[int, ClientDescriptor] = {}
+descriptors_by_handler_id: Dict[int, "ClientDescriptor"] = {}
-worker_classes: Dict[str, Type[QueueProcessingWorker]] = {}
-queues: Dict[str, Dict[str, Type[QueueProcessingWorker]]] = {}
+worker_classes: Dict[str, Type["QueueProcessingWorker"]] = {}
+queues: Dict[str, Dict[str, Type["QueueProcessingWorker"]]] = {}
-AUTH_LDAP_REVERSE_EMAIL_SEARCH: Optional[LDAPSearch] = None
+AUTH_LDAP_REVERSE_EMAIL_SEARCH: Optional["LDAPSearch"] = None
Signed-off-by: Anders Kaseorg <anders@zulipchat.com>
2020-04-22 01:09:50 +02:00
|
|
|
databases_in_use: Set[str] = set()
|
2019-06-08 03:57:19 +02:00
|
|
|
for file in files:
|
|
|
|
if round(time.time()) - os.path.getmtime(file) < expiry_time:
|
2020-04-09 21:51:58 +02:00
|
|
|
with open(file) as f:
|
2019-06-08 03:57:19 +02:00
|
|
|
for line in f:
|
2021-02-12 08:20:45 +01:00
|
|
|
databases_in_use.add(f"zulip_test_template_{line}".rstrip())
|
2019-06-08 03:57:19 +02:00
|
|
|
else:
|
|
|
|
# Any test-backend run older than expiry_time can be
|
|
|
|
# cleaned up, both the database and the file listing its
|
|
|
|
# databases.
|
|
|
|
os.remove(file)
|
|
|
|
|
|
|
|
databases_to_drop = test_databases - databases_in_use
|
|
|
|
|
|
|
|
if not databases_to_drop:
|
|
|
|
return 0
|
|
|
|
|
2020-06-10 06:41:04 +02:00
|
|
|
commands = "\n".join(f"DROP DATABASE IF EXISTS {db};" for db in databases_to_drop)
|
2021-02-12 08:19:30 +01:00
|
|
|
subprocess.run(
|
|
|
|
["psql", "-q", "-v", "ON_ERROR_STOP=1", "-h", "localhost", "postgres", "zulip_test"],
|
|
|
|
input=commands,
|
|
|
|
check=True,
|
2022-01-22 07:52:54 +01:00
|
|
|
text=True,
|
2021-02-12 08:19:30 +01:00
|
|
|
)
|
2019-06-08 03:57:19 +02:00
|
|
|
return len(databases_to_drop)
|
2019-07-06 00:29:17 +02:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
|
2019-07-06 00:29:17 +02:00
|
|
|
def remove_test_run_directories(expiry_time: int = 60 * 60) -> int:
|
|
|
|
removed = 0
|
|
|
|
directories = glob.glob(os.path.join(UUID_VAR_DIR, "test-backend", "run_*"))
|
|
|
|
for test_run in directories:
|
|
|
|
if round(time.time()) - os.path.getmtime(test_run) > expiry_time:
|
|
|
|
try:
|
|
|
|
shutil.rmtree(test_run)
|
|
|
|
removed += 1
|
|
|
|
except FileNotFoundError:
|
|
|
|
pass
|
|
|
|
return removed
|
2021-03-24 12:25:30 +01:00
|
|
|
|
|
|
|
|
|
|
|
def reset_zulip_test_database() -> None:
|
|
|
|
"""
|
|
|
|
This function is used to reset the zulip_test database fastest way possible,
|
|
|
|
i.e. First, it deletes the database and then clones it from zulip_test_template.
|
|
|
|
This function is used with puppeteer tests, so it can quickly reset the test
|
|
|
|
database after each run.
|
|
|
|
"""
|
|
|
|
from zerver.lib.test_runner import destroy_test_databases
|
|
|
|
|
|
|
|
# Make sure default database is 'zulip_test'.
|
|
|
|
assert connections["default"].settings_dict["NAME"] == "zulip_test"
|
|
|
|
|
|
|
|
# Clearing all the active PSQL sessions with 'zulip_test'.
|
|
|
|
run(
|
|
|
|
[
|
|
|
|
"env",
|
|
|
|
"PGHOST=localhost",
|
|
|
|
"PGUSER=zulip_test",
|
|
|
|
"scripts/setup/terminate-psql-sessions",
|
|
|
|
"zulip_test",
|
|
|
|
]
|
|
|
|
)
|
|
|
|
|
|
|
|
destroy_test_databases()
|
|
|
|
# Pointing default database to test database template, so we can instantly clone it.
|
2022-08-10 05:50:02 +02:00
|
|
|
settings.DATABASES["default"]["NAME"] = BACKEND_DATABASE_TEMPLATE
|
2021-03-24 12:25:30 +01:00
|
|
|
connection = connections["default"]
|
|
|
|
clone_database_suffix = "clone"
|
|
|
|
connection.creation.clone_test_db(
|
|
|
|
suffix=clone_database_suffix,
|
|
|
|
)
|
|
|
|
settings_dict = connection.creation.get_test_db_clone_settings(clone_database_suffix)
|
|
|
|
# We manually rename the clone database to 'zulip_test' because when cloning it,
|
|
|
|
# its name is set to original database name + some suffix.
|
|
|
|
# Also, we need it to be 'zulip_test' so that our running server can recognize it.
|
|
|
|
with connection.cursor() as cursor:
|
|
|
|
cursor.execute("ALTER DATABASE zulip_test_template_clone RENAME TO zulip_test;")
|
|
|
|
settings_dict["NAME"] = "zulip_test"
|
|
|
|
# connection.settings_dict must be updated in place for changes to be
|
|
|
|
# reflected in django.db.connections. If the following line assigned
|
|
|
|
# connection.settings_dict = settings_dict, new threads would connect
|
|
|
|
# to the default database instead of the appropriate clone.
|
|
|
|
connection.settings_dict.update(settings_dict)
|
|
|
|
connection.close()
|