2019-01-16 03:17:33 +01:00
|
|
|
import os
|
2019-04-13 01:48:34 +02:00
|
|
|
import re
|
2019-01-16 03:17:33 +01:00
|
|
|
import tempfile
|
|
|
|
from argparse import ArgumentParser, RawTextHelpFormatter
|
2023-10-10 22:51:22 +02:00
|
|
|
from typing import Any
|
2019-01-16 03:17:33 +01:00
|
|
|
|
|
|
|
from django.conf import settings
|
2021-08-14 16:51:57 +02:00
|
|
|
from django.core.management.base import CommandParser
|
2019-02-14 02:03:26 +01:00
|
|
|
from django.db import connection
|
2019-01-16 03:17:33 +01:00
|
|
|
from django.utils.timezone import now as timezone_now
|
2023-10-12 19:43:45 +02:00
|
|
|
from typing_extensions import override
|
2019-01-16 03:17:33 +01:00
|
|
|
|
2020-01-14 21:59:46 +01:00
|
|
|
from scripts.lib.zulip_tools import TIMESTAMP_FORMAT, parse_os_release, run
|
2019-02-14 02:03:26 +01:00
|
|
|
from version import ZULIP_VERSION
|
2019-01-16 03:17:33 +01:00
|
|
|
from zerver.lib.management import ZulipBaseCommand
|
2019-02-14 02:03:26 +01:00
|
|
|
from zerver.logging_handlers import try_git_describe
|
2019-01-16 03:17:33 +01:00
|
|
|
|
|
|
|
|
|
|
|
class Command(ZulipBaseCommand):
|
|
|
|
# Fix support for multi-line usage strings
|
2023-10-12 19:43:45 +02:00
|
|
|
@override
|
2021-08-14 16:51:57 +02:00
|
|
|
def create_parser(self, prog_name: str, subcommand: str, **kwargs: Any) -> CommandParser:
|
|
|
|
parser = super().create_parser(prog_name, subcommand, **kwargs)
|
2019-01-16 03:17:33 +01:00
|
|
|
parser.formatter_class = RawTextHelpFormatter
|
|
|
|
return parser
|
|
|
|
|
2023-10-12 19:43:45 +02:00
|
|
|
@override
|
2019-01-16 03:17:33 +01:00
|
|
|
def add_arguments(self, parser: ArgumentParser) -> None:
|
2020-09-02 21:02:34 +02:00
|
|
|
parser.add_argument("--output", help="Filename of output tarball")
|
2021-02-12 08:20:45 +01:00
|
|
|
parser.add_argument("--skip-db", action="store_true", help="Skip database backup")
|
|
|
|
parser.add_argument("--skip-uploads", action="store_true", help="Skip uploads backup")
|
2019-01-16 03:17:33 +01:00
|
|
|
|
2023-10-12 19:43:45 +02:00
|
|
|
@override
|
2019-01-16 03:17:33 +01:00
|
|
|
def handle(self, *args: Any, **options: Any) -> None:
|
|
|
|
timestamp = timezone_now().strftime(TIMESTAMP_FORMAT)
|
|
|
|
with tempfile.TemporaryDirectory(
|
python: Use trailing commas consistently.
Automatically generated by the following script, based on the output
of lint with flake8-comma:
import re
import sys
last_filename = None
last_row = None
lines = []
for msg in sys.stdin:
m = re.match(
r"\x1b\[35mflake8 \|\x1b\[0m \x1b\[1;31m(.+):(\d+):(\d+): (\w+)", msg
)
if m:
filename, row_str, col_str, err = m.groups()
row, col = int(row_str), int(col_str)
if filename == last_filename:
assert last_row != row
else:
if last_filename is not None:
with open(last_filename, "w") as f:
f.writelines(lines)
with open(filename) as f:
lines = f.readlines()
last_filename = filename
last_row = row
line = lines[row - 1]
if err in ["C812", "C815"]:
lines[row - 1] = line[: col - 1] + "," + line[col - 1 :]
elif err in ["C819"]:
assert line[col - 2] == ","
lines[row - 1] = line[: col - 2] + line[col - 1 :].lstrip(" ")
if last_filename is not None:
with open(last_filename, "w") as f:
f.writelines(lines)
Signed-off-by: Anders Kaseorg <anders@zulipchat.com>
2020-04-10 05:23:40 +02:00
|
|
|
prefix=f"zulip-backup-{timestamp}-",
|
2019-01-16 03:17:33 +01:00
|
|
|
) as tmp:
|
|
|
|
os.mkdir(os.path.join(tmp, "zulip-backup"))
|
|
|
|
members = []
|
2019-04-13 01:48:34 +02:00
|
|
|
paths = []
|
2019-01-16 03:17:33 +01:00
|
|
|
|
2019-02-14 02:03:26 +01:00
|
|
|
with open(os.path.join(tmp, "zulip-backup", "zulip-version"), "w") as f:
|
|
|
|
print(ZULIP_VERSION, file=f)
|
|
|
|
git = try_git_describe()
|
|
|
|
if git:
|
|
|
|
print(git, file=f)
|
|
|
|
members.append("zulip-backup/zulip-version")
|
|
|
|
|
|
|
|
with open(os.path.join(tmp, "zulip-backup", "os-version"), "w") as f:
|
|
|
|
print(
|
2019-08-25 01:23:14 +02:00
|
|
|
"{ID} {VERSION_ID}".format(**parse_os_release()),
|
2019-02-14 02:03:26 +01:00
|
|
|
file=f,
|
|
|
|
)
|
|
|
|
members.append("zulip-backup/os-version")
|
|
|
|
|
|
|
|
with open(os.path.join(tmp, "zulip-backup", "postgres-version"), "w") as f:
|
2023-10-10 22:51:22 +02:00
|
|
|
pg_server_version = connection.cursor().connection.server_version
|
backup: Call the pg_dump binary whose version we are running.
`/usr/bin/pg_dump` on Ubuntu and Debian is actually a tool which
attempts to choose which `pg_dump` binary from all of the
`postgresql-client-*` packages that are installed to run. However,
its logic is confused by passing empty `--host` and `--port` options
-- instead of looking at the running server instance on the server, it
instead assumes some remote host and chooses the highest versioned
`pg_dump` which is installed.
Because Zulip writes binary database backups, they are sensitive to
the version of the client `pg_dump` binary is used -- and the output
may not be backwards compatible. Using a PostgreSQL 16 `pg_dump`
writes archive format 1.15, which cannot be read by a PostgreSQL 15
`pg_restore`.
Zulip does not currently support PostgreSQL 16 as a server. This
means that backups on servers with `postgresql-client-16` installed
did not successfully round-trip Zulip backups -- their backups are
written using PostgreSQL 16's client, and the `pg_restore` chosen on
restore was correctly chosen as the one whose version matched the
server (PostgreSQL 15 or below), and thus did not understand the new
archive format.
Existing `./manage.py backups` taken since `postgresql-client-16` were
installed are thus not directly usable by the `restore-backup` script.
They are not useless, however, since they can theoretically be
converted into a format readable by PostgreSQL 15 -- by importing into
a PostgreSQL 16 instance, and re-dumping with a PostgreSQL 15
`pg_dump`.
Fix this issue by hard-coding path to the binary whose version matches
the version of the server we are connected to. This may theoretically
fail if we are connected to a remote PostgreSQL instance and we do not
have a `postgresql-client` package locally installed which matches the
remote PostgreSQL server's version. However, choosing a matching
version is the only way to ensure that it will be able to be imported
cleanly -- and it is preferable that we fail the backup process rather
than write backups that we cannot easily restore from.
Fixes: #27160.
2023-10-10 22:53:31 +02:00
|
|
|
major_pg_version = pg_server_version // 10000
|
2023-10-10 22:51:22 +02:00
|
|
|
print(pg_server_version, file=f)
|
2019-02-14 02:03:26 +01:00
|
|
|
members.append("zulip-backup/postgres-version")
|
|
|
|
|
2019-01-16 03:17:33 +01:00
|
|
|
if settings.DEVELOPMENT:
|
2019-04-13 01:48:34 +02:00
|
|
|
members.append(
|
python: Use trailing commas consistently.
Automatically generated by the following script, based on the output
of lint with flake8-comma:
import re
import sys
last_filename = None
last_row = None
lines = []
for msg in sys.stdin:
m = re.match(
r"\x1b\[35mflake8 \|\x1b\[0m \x1b\[1;31m(.+):(\d+):(\d+): (\w+)", msg
)
if m:
filename, row_str, col_str, err = m.groups()
row, col = int(row_str), int(col_str)
if filename == last_filename:
assert last_row != row
else:
if last_filename is not None:
with open(last_filename, "w") as f:
f.writelines(lines)
with open(filename) as f:
lines = f.readlines()
last_filename = filename
last_row = row
line = lines[row - 1]
if err in ["C812", "C815"]:
lines[row - 1] = line[: col - 1] + "," + line[col - 1 :]
elif err in ["C819"]:
assert line[col - 2] == ","
lines[row - 1] = line[: col - 2] + line[col - 1 :].lstrip(" ")
if last_filename is not None:
with open(last_filename, "w") as f:
f.writelines(lines)
Signed-off-by: Anders Kaseorg <anders@zulipchat.com>
2020-04-10 05:23:40 +02:00
|
|
|
os.path.join(settings.DEPLOY_ROOT, "zproject", "dev-secrets.conf"),
|
2019-04-13 01:48:34 +02:00
|
|
|
)
|
|
|
|
paths.append(
|
python: Use trailing commas consistently.
Automatically generated by the following script, based on the output
of lint with flake8-comma:
import re
import sys
last_filename = None
last_row = None
lines = []
for msg in sys.stdin:
m = re.match(
r"\x1b\[35mflake8 \|\x1b\[0m \x1b\[1;31m(.+):(\d+):(\d+): (\w+)", msg
)
if m:
filename, row_str, col_str, err = m.groups()
row, col = int(row_str), int(col_str)
if filename == last_filename:
assert last_row != row
else:
if last_filename is not None:
with open(last_filename, "w") as f:
f.writelines(lines)
with open(filename) as f:
lines = f.readlines()
last_filename = filename
last_row = row
line = lines[row - 1]
if err in ["C812", "C815"]:
lines[row - 1] = line[: col - 1] + "," + line[col - 1 :]
elif err in ["C819"]:
assert line[col - 2] == ","
lines[row - 1] = line[: col - 2] + line[col - 1 :].lstrip(" ")
if last_filename is not None:
with open(last_filename, "w") as f:
f.writelines(lines)
Signed-off-by: Anders Kaseorg <anders@zulipchat.com>
2020-04-10 05:23:40 +02:00
|
|
|
("zproject", os.path.join(settings.DEPLOY_ROOT, "zproject")),
|
2019-01-16 03:17:33 +01:00
|
|
|
)
|
|
|
|
else:
|
2019-04-13 01:48:34 +02:00
|
|
|
members.append("/etc/zulip")
|
|
|
|
paths.append(("settings", "/etc/zulip"))
|
2019-01-16 03:17:33 +01:00
|
|
|
|
2021-02-12 08:20:45 +01:00
|
|
|
if not options["skip_db"]:
|
2020-02-03 09:31:12 +01:00
|
|
|
pg_dump_command = [
|
backup: Call the pg_dump binary whose version we are running.
`/usr/bin/pg_dump` on Ubuntu and Debian is actually a tool which
attempts to choose which `pg_dump` binary from all of the
`postgresql-client-*` packages that are installed to run. However,
its logic is confused by passing empty `--host` and `--port` options
-- instead of looking at the running server instance on the server, it
instead assumes some remote host and chooses the highest versioned
`pg_dump` which is installed.
Because Zulip writes binary database backups, they are sensitive to
the version of the client `pg_dump` binary is used -- and the output
may not be backwards compatible. Using a PostgreSQL 16 `pg_dump`
writes archive format 1.15, which cannot be read by a PostgreSQL 15
`pg_restore`.
Zulip does not currently support PostgreSQL 16 as a server. This
means that backups on servers with `postgresql-client-16` installed
did not successfully round-trip Zulip backups -- their backups are
written using PostgreSQL 16's client, and the `pg_restore` chosen on
restore was correctly chosen as the one whose version matched the
server (PostgreSQL 15 or below), and thus did not understand the new
archive format.
Existing `./manage.py backups` taken since `postgresql-client-16` were
installed are thus not directly usable by the `restore-backup` script.
They are not useless, however, since they can theoretically be
converted into a format readable by PostgreSQL 15 -- by importing into
a PostgreSQL 16 instance, and re-dumping with a PostgreSQL 15
`pg_dump`.
Fix this issue by hard-coding path to the binary whose version matches
the version of the server we are connected to. This may theoretically
fail if we are connected to a remote PostgreSQL instance and we do not
have a `postgresql-client` package locally installed which matches the
remote PostgreSQL server's version. However, choosing a matching
version is the only way to ensure that it will be able to be imported
cleanly -- and it is preferable that we fail the backup process rather
than write backups that we cannot easily restore from.
Fixes: #27160.
2023-10-10 22:53:31 +02:00
|
|
|
f"/usr/lib/postgresql/{major_pg_version}/bin/pg_dump",
|
2020-02-03 09:31:12 +01:00
|
|
|
"--format=directory",
|
2020-09-03 05:58:10 +02:00
|
|
|
"--file=" + os.path.join(tmp, "zulip-backup", "database"),
|
|
|
|
"--username=" + settings.DATABASES["default"]["USER"],
|
|
|
|
"--dbname=" + settings.DATABASES["default"]["NAME"],
|
2020-02-03 09:31:12 +01:00
|
|
|
"--no-password",
|
|
|
|
]
|
2023-10-10 22:53:53 +02:00
|
|
|
if settings.DATABASES["default"]["HOST"] != "":
|
|
|
|
pg_dump_command += ["--host=" + settings.DATABASES["default"]["HOST"]]
|
|
|
|
if settings.DATABASES["default"]["PORT"] != "":
|
|
|
|
pg_dump_command += ["--port=" + settings.DATABASES["default"]["PORT"]]
|
|
|
|
|
2020-02-03 09:31:12 +01:00
|
|
|
os.environ["PGPASSWORD"] = settings.DATABASES["default"]["PASSWORD"]
|
|
|
|
|
2019-04-17 09:16:18 +02:00
|
|
|
run(
|
2020-02-03 09:31:12 +01:00
|
|
|
pg_dump_command,
|
2019-04-17 09:16:18 +02:00
|
|
|
cwd=tmp,
|
|
|
|
)
|
|
|
|
members.append("zulip-backup/database")
|
2019-01-16 03:17:33 +01:00
|
|
|
|
2021-02-12 08:19:30 +01:00
|
|
|
if (
|
2021-02-12 08:20:45 +01:00
|
|
|
not options["skip_uploads"]
|
2021-02-12 08:19:30 +01:00
|
|
|
and settings.LOCAL_UPLOADS_DIR is not None
|
|
|
|
and os.path.exists(
|
|
|
|
os.path.join(settings.DEPLOY_ROOT, settings.LOCAL_UPLOADS_DIR),
|
|
|
|
)
|
2019-01-16 03:17:33 +01:00
|
|
|
):
|
2019-04-13 01:48:34 +02:00
|
|
|
members.append(
|
python: Use trailing commas consistently.
Automatically generated by the following script, based on the output
of lint with flake8-comma:
import re
import sys
last_filename = None
last_row = None
lines = []
for msg in sys.stdin:
m = re.match(
r"\x1b\[35mflake8 \|\x1b\[0m \x1b\[1;31m(.+):(\d+):(\d+): (\w+)", msg
)
if m:
filename, row_str, col_str, err = m.groups()
row, col = int(row_str), int(col_str)
if filename == last_filename:
assert last_row != row
else:
if last_filename is not None:
with open(last_filename, "w") as f:
f.writelines(lines)
with open(filename) as f:
lines = f.readlines()
last_filename = filename
last_row = row
line = lines[row - 1]
if err in ["C812", "C815"]:
lines[row - 1] = line[: col - 1] + "," + line[col - 1 :]
elif err in ["C819"]:
assert line[col - 2] == ","
lines[row - 1] = line[: col - 2] + line[col - 1 :].lstrip(" ")
if last_filename is not None:
with open(last_filename, "w") as f:
f.writelines(lines)
Signed-off-by: Anders Kaseorg <anders@zulipchat.com>
2020-04-10 05:23:40 +02:00
|
|
|
os.path.join(settings.DEPLOY_ROOT, settings.LOCAL_UPLOADS_DIR),
|
2019-04-13 01:48:34 +02:00
|
|
|
)
|
|
|
|
paths.append(
|
|
|
|
(
|
|
|
|
"uploads",
|
|
|
|
os.path.join(settings.DEPLOY_ROOT, settings.LOCAL_UPLOADS_DIR),
|
python: Use trailing commas consistently.
Automatically generated by the following script, based on the output
of lint with flake8-comma:
import re
import sys
last_filename = None
last_row = None
lines = []
for msg in sys.stdin:
m = re.match(
r"\x1b\[35mflake8 \|\x1b\[0m \x1b\[1;31m(.+):(\d+):(\d+): (\w+)", msg
)
if m:
filename, row_str, col_str, err = m.groups()
row, col = int(row_str), int(col_str)
if filename == last_filename:
assert last_row != row
else:
if last_filename is not None:
with open(last_filename, "w") as f:
f.writelines(lines)
with open(filename) as f:
lines = f.readlines()
last_filename = filename
last_row = row
line = lines[row - 1]
if err in ["C812", "C815"]:
lines[row - 1] = line[: col - 1] + "," + line[col - 1 :]
elif err in ["C819"]:
assert line[col - 2] == ","
lines[row - 1] = line[: col - 2] + line[col - 1 :].lstrip(" ")
if last_filename is not None:
with open(last_filename, "w") as f:
f.writelines(lines)
Signed-off-by: Anders Kaseorg <anders@zulipchat.com>
2020-04-10 05:23:40 +02:00
|
|
|
),
|
2019-01-16 03:17:33 +01:00
|
|
|
)
|
2019-04-13 01:48:34 +02:00
|
|
|
|
|
|
|
assert not any("|" in name or "|" in path for name, path in paths)
|
|
|
|
transform_args = [
|
|
|
|
r"--transform=s|^{}(/.*)?$|zulip-backup/{}\1|x".format(
|
2021-02-12 08:19:30 +01:00
|
|
|
re.escape(path),
|
|
|
|
name.replace("\\", r"\\"),
|
2019-04-13 01:48:34 +02:00
|
|
|
)
|
|
|
|
for name, path in paths
|
|
|
|
]
|
2019-01-16 03:17:33 +01:00
|
|
|
|
|
|
|
try:
|
|
|
|
if options["output"] is None:
|
|
|
|
tarball_path = tempfile.NamedTemporaryFile(
|
2020-06-10 06:41:04 +02:00
|
|
|
prefix=f"zulip-backup-{timestamp}-",
|
2019-01-16 03:17:33 +01:00
|
|
|
suffix=".tar.gz",
|
|
|
|
delete=False,
|
|
|
|
).name
|
|
|
|
else:
|
|
|
|
tarball_path = options["output"]
|
|
|
|
|
2019-04-13 01:48:34 +02:00
|
|
|
run(
|
2021-02-12 08:19:30 +01:00
|
|
|
[
|
|
|
|
"tar",
|
|
|
|
f"--directory={tmp}",
|
2024-08-25 07:41:12 +02:00
|
|
|
"-cPhzf",
|
2021-02-12 08:19:30 +01:00
|
|
|
tarball_path,
|
|
|
|
*transform_args,
|
|
|
|
"--",
|
|
|
|
*members,
|
|
|
|
]
|
2019-04-13 01:48:34 +02:00
|
|
|
)
|
2020-06-10 06:41:04 +02:00
|
|
|
print(f"Backup tarball written to {tarball_path}")
|
2019-01-16 03:17:33 +01:00
|
|
|
except BaseException:
|
|
|
|
if options["output"] is None:
|
|
|
|
os.unlink(tarball_path)
|
|
|
|
raise
|