2020-06-26 23:32:36 +02:00
|
|
|
#!/usr/bin/env bash
|
2020-10-14 23:34:34 +02:00
|
|
|
set -eo pipefail
|
2020-06-26 23:32:36 +02:00
|
|
|
|
|
|
|
if [ "$EUID" -ne 0 ]; then
|
|
|
|
echo "Error: This script must be run as root" >&2
|
|
|
|
exit 1
|
|
|
|
fi
|
|
|
|
|
2022-12-06 03:06:14 +01:00
|
|
|
# Force a known locale; some OS X terminals set an invalid LC_CTYPE
|
|
|
|
# which crashes pg_upgradecluster
|
|
|
|
export LC_ALL=C.UTF-8
|
|
|
|
export LANG=C.UTF-8
|
|
|
|
export LANGUAGE=C.UTF-8
|
|
|
|
|
2021-11-06 03:10:55 +01:00
|
|
|
UPGRADE_TO=${1:-14}
|
2020-06-26 23:32:36 +02:00
|
|
|
UPGRADE_FROM=$(crudini --get /etc/zulip/zulip.conf postgresql version)
|
|
|
|
ZULIP_PATH="$(dirname "$0")/../.."
|
|
|
|
|
|
|
|
if [ "$UPGRADE_TO" = "$UPGRADE_FROM" ]; then
|
|
|
|
echo "Already running PostgreSQL $UPGRADE_TO!"
|
|
|
|
exit 1
|
|
|
|
fi
|
|
|
|
|
|
|
|
set -x
|
|
|
|
|
|
|
|
"$ZULIP_PATH"/scripts/lib/setup-apt-repo
|
|
|
|
apt-get install -y "postgresql-$UPGRADE_TO"
|
|
|
|
if pg_lsclusters -h | grep -qE "^$UPGRADE_TO\s+main\b"; then
|
|
|
|
pg_dropcluster "$UPGRADE_TO" main --stop
|
|
|
|
fi
|
|
|
|
|
2020-07-07 01:39:37 +02:00
|
|
|
(
|
2020-10-23 02:43:28 +02:00
|
|
|
# Two-stage application of Puppet; we apply the bare-bones
|
|
|
|
# PostgreSQL configuration first, so that FTS will be configured
|
2020-07-07 01:39:37 +02:00
|
|
|
# prior to the pg_upgradecluster.
|
|
|
|
TEMP_CONF_DIR=$(mktemp -d)
|
|
|
|
cp /etc/zulip/zulip.conf "$TEMP_CONF_DIR"
|
|
|
|
ZULIP_CONF="${TEMP_CONF_DIR}/zulip.conf"
|
|
|
|
crudini --set "$ZULIP_CONF" postgresql version "$UPGRADE_TO"
|
2020-10-20 04:10:17 +02:00
|
|
|
crudini --set "$ZULIP_CONF" machine puppet_classes zulip::profile::base,zulip::postgresql_base
|
2020-07-07 01:39:37 +02:00
|
|
|
touch "/usr/share/postgresql/$UPGRADE_TO/pgroonga_setup.sql.applied"
|
|
|
|
|
2021-08-18 22:35:44 +02:00
|
|
|
FACTER_LEAVE_SUPERVISOR=true "$ZULIP_PATH"/scripts/zulip-puppet-apply -f --config "$ZULIP_CONF"
|
2020-07-07 01:39:37 +02:00
|
|
|
rm -rf "$TEMP_CONF_DIR"
|
|
|
|
)
|
|
|
|
|
upgrade: Use the in-place pg_upgrade, not a full dump/restore.
pg_upgradecluster has two possibilities for `--method`: `dump`, and
`upgrade`. The former is the default, and does a `pg_dump` of all of
the databases in the old cluster and feeds them into the new cluster.
This is a sure-fire way of getting the same information in both
databases, but may be extremely slow on large databases, and is
guaranteed to fail on servers whose databases take up >50% of their
disk.
The `--method=upgrade` method, by contrast, uses pg_upgrade to copy
the raw database data file over to the new cluster, and then fiddles
with their internal structure as needed by the upgrade to let them be
correct for the new version[1]. This is slightly faster than the
dump/load method, since it skips the serialization step, but still
requires that there be enough space on disk for both old and new
versions at once. `pg_upgrade` is currently supported for all
versions of PostgreSQL from 8.4 to 12.
Using `pg_upgrade` incurs slightly more risk, but since the it is
widely used by now, using it in the relatively-controlled Zulip server
environment is reasonable. The expected worst failure is failure to
upgrade, not corruption or data loss.
Additionally passing `--link` uses hardlinks to link the data files
into both the old and new directories simultaneously. This resolve
both the runtime of the operation, as well as the disk space usage.
The only potential downside to this is that as soon as writes have
occurred on the upgraded cluster, the old cluster can no longer be
started. Since this tooling intends to remove the old cluster
immediately after the upgrade completes successfully, this is not a
significant drawback.
Switch to using `--method=upgrade --link`. This technique spits out
two shell scripts which are expected to be run after completion of the
upgrade; one re-analyzes the statistics, the other does an `rm -rf` of
the data where it is still hardlinked in the old cluster. Extract the
location of these scripts from parsing the `pg_upgradecluster` output;
since the path is not static, we must rely on it being relatively easy
to parse. The risk of the path changing is lower, and has more
obvious failure modes, than inserting the current contents of these
upgrade steps into the overall `upgrade-postgres`.
[1] https://www.postgresql.org/docs/12/pgupgrade.html
2020-07-10 01:24:46 +02:00
|
|
|
# Capture the output so we know where the path to the post-upgrade scripts is
|
2020-10-26 22:50:18 +01:00
|
|
|
UPGRADE_LOG=$(mktemp "/var/log/zulip/upgrade-postgresql-$UPGRADE_FROM-$UPGRADE_TO.XXXXXXXXX.log")
|
2020-10-01 21:56:59 +02:00
|
|
|
pg_upgradecluster -v "$UPGRADE_TO" "$UPGRADE_FROM" main --method=upgrade --link | tee "$UPGRADE_LOG"
|
upgrade: Use the in-place pg_upgrade, not a full dump/restore.
pg_upgradecluster has two possibilities for `--method`: `dump`, and
`upgrade`. The former is the default, and does a `pg_dump` of all of
the databases in the old cluster and feeds them into the new cluster.
This is a sure-fire way of getting the same information in both
databases, but may be extremely slow on large databases, and is
guaranteed to fail on servers whose databases take up >50% of their
disk.
The `--method=upgrade` method, by contrast, uses pg_upgrade to copy
the raw database data file over to the new cluster, and then fiddles
with their internal structure as needed by the upgrade to let them be
correct for the new version[1]. This is slightly faster than the
dump/load method, since it skips the serialization step, but still
requires that there be enough space on disk for both old and new
versions at once. `pg_upgrade` is currently supported for all
versions of PostgreSQL from 8.4 to 12.
Using `pg_upgrade` incurs slightly more risk, but since the it is
widely used by now, using it in the relatively-controlled Zulip server
environment is reasonable. The expected worst failure is failure to
upgrade, not corruption or data loss.
Additionally passing `--link` uses hardlinks to link the data files
into both the old and new directories simultaneously. This resolve
both the runtime of the operation, as well as the disk space usage.
The only potential downside to this is that as soon as writes have
occurred on the upgraded cluster, the old cluster can no longer be
started. Since this tooling intends to remove the old cluster
immediately after the upgrade completes successfully, this is not a
significant drawback.
Switch to using `--method=upgrade --link`. This technique spits out
two shell scripts which are expected to be run after completion of the
upgrade; one re-analyzes the statistics, the other does an `rm -rf` of
the data where it is still hardlinked in the old cluster. Extract the
location of these scripts from parsing the `pg_upgradecluster` output;
since the path is not static, we must rely on it being relatively easy
to parse. The risk of the path changing is lower, and has more
obvious failure modes, than inserting the current contents of these
upgrade steps into the overall `upgrade-postgres`.
[1] https://www.postgresql.org/docs/12/pgupgrade.html
2020-07-10 01:24:46 +02:00
|
|
|
SCRIPTS_PATH=$(grep -o "/var/log/postgresql/pg_upgradecluster-$UPGRADE_FROM-$UPGRADE_TO-main.*" "$UPGRADE_LOG" || true)
|
2020-06-26 23:32:36 +02:00
|
|
|
|
2020-07-10 01:41:18 +02:00
|
|
|
# If the upgrade completed successfully, lock in the new version in
|
|
|
|
# our configuration immediately
|
2020-06-26 23:32:36 +02:00
|
|
|
crudini --set /etc/zulip/zulip.conf postgresql version "$UPGRADE_TO"
|
upgrade: Use the in-place pg_upgrade, not a full dump/restore.
pg_upgradecluster has two possibilities for `--method`: `dump`, and
`upgrade`. The former is the default, and does a `pg_dump` of all of
the databases in the old cluster and feeds them into the new cluster.
This is a sure-fire way of getting the same information in both
databases, but may be extremely slow on large databases, and is
guaranteed to fail on servers whose databases take up >50% of their
disk.
The `--method=upgrade` method, by contrast, uses pg_upgrade to copy
the raw database data file over to the new cluster, and then fiddles
with their internal structure as needed by the upgrade to let them be
correct for the new version[1]. This is slightly faster than the
dump/load method, since it skips the serialization step, but still
requires that there be enough space on disk for both old and new
versions at once. `pg_upgrade` is currently supported for all
versions of PostgreSQL from 8.4 to 12.
Using `pg_upgrade` incurs slightly more risk, but since the it is
widely used by now, using it in the relatively-controlled Zulip server
environment is reasonable. The expected worst failure is failure to
upgrade, not corruption or data loss.
Additionally passing `--link` uses hardlinks to link the data files
into both the old and new directories simultaneously. This resolve
both the runtime of the operation, as well as the disk space usage.
The only potential downside to this is that as soon as writes have
occurred on the upgraded cluster, the old cluster can no longer be
started. Since this tooling intends to remove the old cluster
immediately after the upgrade completes successfully, this is not a
significant drawback.
Switch to using `--method=upgrade --link`. This technique spits out
two shell scripts which are expected to be run after completion of the
upgrade; one re-analyzes the statistics, the other does an `rm -rf` of
the data where it is still hardlinked in the old cluster. Extract the
location of these scripts from parsing the `pg_upgradecluster` output;
since the path is not static, we must rely on it being relatively easy
to parse. The risk of the path changing is lower, and has more
obvious failure modes, than inserting the current contents of these
upgrade steps into the overall `upgrade-postgres`.
[1] https://www.postgresql.org/docs/12/pgupgrade.html
2020-07-10 01:24:46 +02:00
|
|
|
|
|
|
|
# Update the statistics
|
upgrade-postgresql: Switch to vacuumdb --all --analzyze-only --jobs 10.
The `analyze_new_cluster.sh` script output by `pg_upgrade` just runs
`vacuumdb --all --analyze-in-stages`, which runs three passes over the
database, getting better stats each time. Each of these passes is
independent; the third pass does not require the first two.
`--analyze-in-stages` is only provided to get "something" into the
database, on the theory that it could then be started and used. Since
we wait for all three passes to complete before starting the database,
the first two passes add no value.
Additionally, PosttgreSQL 14 and up stop writing the
`analyze_new_cluster.sh` script as part of `pg_upgrade`, suggesting
the equivalent `vacuumdb --all --analyze-in-stages` call instead.
Switch to explicitly call `vacuumdb --all --analyze-only`, since we do
not gain any benefit from `--analyze-in-stages`. We also enable
parallelism, with `--jobs 10`, in order to analyze up to 10 tables in
parallel. This may increase load, but will accelerate the upgrade
process.
2021-11-08 19:27:24 +01:00
|
|
|
su postgres -c "/usr/lib/postgresql/$UPGRADE_TO/bin/vacuumdb --all --analyze-only --jobs 10"
|
upgrade: Use the in-place pg_upgrade, not a full dump/restore.
pg_upgradecluster has two possibilities for `--method`: `dump`, and
`upgrade`. The former is the default, and does a `pg_dump` of all of
the databases in the old cluster and feeds them into the new cluster.
This is a sure-fire way of getting the same information in both
databases, but may be extremely slow on large databases, and is
guaranteed to fail on servers whose databases take up >50% of their
disk.
The `--method=upgrade` method, by contrast, uses pg_upgrade to copy
the raw database data file over to the new cluster, and then fiddles
with their internal structure as needed by the upgrade to let them be
correct for the new version[1]. This is slightly faster than the
dump/load method, since it skips the serialization step, but still
requires that there be enough space on disk for both old and new
versions at once. `pg_upgrade` is currently supported for all
versions of PostgreSQL from 8.4 to 12.
Using `pg_upgrade` incurs slightly more risk, but since the it is
widely used by now, using it in the relatively-controlled Zulip server
environment is reasonable. The expected worst failure is failure to
upgrade, not corruption or data loss.
Additionally passing `--link` uses hardlinks to link the data files
into both the old and new directories simultaneously. This resolve
both the runtime of the operation, as well as the disk space usage.
The only potential downside to this is that as soon as writes have
occurred on the upgraded cluster, the old cluster can no longer be
started. Since this tooling intends to remove the old cluster
immediately after the upgrade completes successfully, this is not a
significant drawback.
Switch to using `--method=upgrade --link`. This technique spits out
two shell scripts which are expected to be run after completion of the
upgrade; one re-analyzes the statistics, the other does an `rm -rf` of
the data where it is still hardlinked in the old cluster. Extract the
location of these scripts from parsing the `pg_upgradecluster` output;
since the path is not static, we must rely on it being relatively easy
to parse. The risk of the path changing is lower, and has more
obvious failure modes, than inserting the current contents of these
upgrade steps into the overall `upgrade-postgres`.
[1] https://www.postgresql.org/docs/12/pgupgrade.html
2020-07-10 01:24:46 +02:00
|
|
|
|
2020-07-10 01:41:18 +02:00
|
|
|
# Start the database up cleanly
|
2020-06-26 23:32:36 +02:00
|
|
|
"$ZULIP_PATH"/scripts/zulip-puppet-apply -f
|
|
|
|
|
2020-07-10 01:41:18 +02:00
|
|
|
# Drop the old data, binaries, and scripts
|
2020-06-26 23:32:36 +02:00
|
|
|
pg_dropcluster "$UPGRADE_FROM" main
|
|
|
|
apt remove -y "postgresql-$UPGRADE_FROM"
|
upgrade: Use the in-place pg_upgrade, not a full dump/restore.
pg_upgradecluster has two possibilities for `--method`: `dump`, and
`upgrade`. The former is the default, and does a `pg_dump` of all of
the databases in the old cluster and feeds them into the new cluster.
This is a sure-fire way of getting the same information in both
databases, but may be extremely slow on large databases, and is
guaranteed to fail on servers whose databases take up >50% of their
disk.
The `--method=upgrade` method, by contrast, uses pg_upgrade to copy
the raw database data file over to the new cluster, and then fiddles
with their internal structure as needed by the upgrade to let them be
correct for the new version[1]. This is slightly faster than the
dump/load method, since it skips the serialization step, but still
requires that there be enough space on disk for both old and new
versions at once. `pg_upgrade` is currently supported for all
versions of PostgreSQL from 8.4 to 12.
Using `pg_upgrade` incurs slightly more risk, but since the it is
widely used by now, using it in the relatively-controlled Zulip server
environment is reasonable. The expected worst failure is failure to
upgrade, not corruption or data loss.
Additionally passing `--link` uses hardlinks to link the data files
into both the old and new directories simultaneously. This resolve
both the runtime of the operation, as well as the disk space usage.
The only potential downside to this is that as soon as writes have
occurred on the upgraded cluster, the old cluster can no longer be
started. Since this tooling intends to remove the old cluster
immediately after the upgrade completes successfully, this is not a
significant drawback.
Switch to using `--method=upgrade --link`. This technique spits out
two shell scripts which are expected to be run after completion of the
upgrade; one re-analyzes the statistics, the other does an `rm -rf` of
the data where it is still hardlinked in the old cluster. Extract the
location of these scripts from parsing the `pg_upgradecluster` output;
since the path is not static, we must rely on it being relatively easy
to parse. The risk of the path changing is lower, and has more
obvious failure modes, than inserting the current contents of these
upgrade steps into the overall `upgrade-postgres`.
[1] https://www.postgresql.org/docs/12/pgupgrade.html
2020-07-10 01:24:46 +02:00
|
|
|
if [ -n "$SCRIPTS_PATH" ]; then
|
2021-11-19 02:04:51 +01:00
|
|
|
if [ -f "$SCRIPTS_PATH/update_extensions.sql" ]; then
|
|
|
|
su postgres -c "psql $SCRIPTS_PATH/update_extensions.sql"
|
|
|
|
fi
|
upgrade: Use the in-place pg_upgrade, not a full dump/restore.
pg_upgradecluster has two possibilities for `--method`: `dump`, and
`upgrade`. The former is the default, and does a `pg_dump` of all of
the databases in the old cluster and feeds them into the new cluster.
This is a sure-fire way of getting the same information in both
databases, but may be extremely slow on large databases, and is
guaranteed to fail on servers whose databases take up >50% of their
disk.
The `--method=upgrade` method, by contrast, uses pg_upgrade to copy
the raw database data file over to the new cluster, and then fiddles
with their internal structure as needed by the upgrade to let them be
correct for the new version[1]. This is slightly faster than the
dump/load method, since it skips the serialization step, but still
requires that there be enough space on disk for both old and new
versions at once. `pg_upgrade` is currently supported for all
versions of PostgreSQL from 8.4 to 12.
Using `pg_upgrade` incurs slightly more risk, but since the it is
widely used by now, using it in the relatively-controlled Zulip server
environment is reasonable. The expected worst failure is failure to
upgrade, not corruption or data loss.
Additionally passing `--link` uses hardlinks to link the data files
into both the old and new directories simultaneously. This resolve
both the runtime of the operation, as well as the disk space usage.
The only potential downside to this is that as soon as writes have
occurred on the upgraded cluster, the old cluster can no longer be
started. Since this tooling intends to remove the old cluster
immediately after the upgrade completes successfully, this is not a
significant drawback.
Switch to using `--method=upgrade --link`. This technique spits out
two shell scripts which are expected to be run after completion of the
upgrade; one re-analyzes the statistics, the other does an `rm -rf` of
the data where it is still hardlinked in the old cluster. Extract the
location of these scripts from parsing the `pg_upgradecluster` output;
since the path is not static, we must rely on it being relatively easy
to parse. The risk of the path changing is lower, and has more
obvious failure modes, than inserting the current contents of these
upgrade steps into the overall `upgrade-postgres`.
[1] https://www.postgresql.org/docs/12/pgupgrade.html
2020-07-10 01:24:46 +02:00
|
|
|
su postgres -c "$SCRIPTS_PATH/delete_old_cluster.sh"
|
|
|
|
rm -rf "$SCRIPTS_PATH"
|
|
|
|
else
|
|
|
|
set +x
|
|
|
|
echo
|
|
|
|
echo
|
|
|
|
echo ">>>>> pg_upgradecluster succeeded, but post-upgrade scripts path could not"
|
|
|
|
echo " be parsed out! Please read the pg_upgradecluster output to understand"
|
|
|
|
echo " the current status of your cluster:"
|
|
|
|
echo " $UPGRADE_LOG"
|
2020-10-26 22:27:53 +01:00
|
|
|
echo " and report this bug with the PostgreSQL $UPGRADE_FROM -> $UPGRADE_TO upgrade to:"
|
upgrade: Use the in-place pg_upgrade, not a full dump/restore.
pg_upgradecluster has two possibilities for `--method`: `dump`, and
`upgrade`. The former is the default, and does a `pg_dump` of all of
the databases in the old cluster and feeds them into the new cluster.
This is a sure-fire way of getting the same information in both
databases, but may be extremely slow on large databases, and is
guaranteed to fail on servers whose databases take up >50% of their
disk.
The `--method=upgrade` method, by contrast, uses pg_upgrade to copy
the raw database data file over to the new cluster, and then fiddles
with their internal structure as needed by the upgrade to let them be
correct for the new version[1]. This is slightly faster than the
dump/load method, since it skips the serialization step, but still
requires that there be enough space on disk for both old and new
versions at once. `pg_upgrade` is currently supported for all
versions of PostgreSQL from 8.4 to 12.
Using `pg_upgrade` incurs slightly more risk, but since the it is
widely used by now, using it in the relatively-controlled Zulip server
environment is reasonable. The expected worst failure is failure to
upgrade, not corruption or data loss.
Additionally passing `--link` uses hardlinks to link the data files
into both the old and new directories simultaneously. This resolve
both the runtime of the operation, as well as the disk space usage.
The only potential downside to this is that as soon as writes have
occurred on the upgraded cluster, the old cluster can no longer be
started. Since this tooling intends to remove the old cluster
immediately after the upgrade completes successfully, this is not a
significant drawback.
Switch to using `--method=upgrade --link`. This technique spits out
two shell scripts which are expected to be run after completion of the
upgrade; one re-analyzes the statistics, the other does an `rm -rf` of
the data where it is still hardlinked in the old cluster. Extract the
location of these scripts from parsing the `pg_upgradecluster` output;
since the path is not static, we must rely on it being relatively easy
to parse. The risk of the path changing is lower, and has more
obvious failure modes, than inserting the current contents of these
upgrade steps into the overall `upgrade-postgres`.
[1] https://www.postgresql.org/docs/12/pgupgrade.html
2020-07-10 01:24:46 +02:00
|
|
|
echo " https://github.com/zulip/zulip/issues"
|
|
|
|
echo
|
|
|
|
echo
|
|
|
|
fi
|