Unverified Commit 7ac1688b authored by Alexander Kukushkin's avatar Alexander Kukushkin Committed by GitHub
Browse files

Fix bug with major upgrade after clone (#557)

When working on the code removing bg_mon from shared_preload_libraries before executing pg_upgrade a little bug was introduced. Specifically, shared_preload_libraries for the old version was overwritten by the value taken from the new version. As a result, the old cluster was failing to start due to missing (not existing) libraries and upgrade was failing.

This commit is fixing the wrong behavior and improves tests to catch similar issues in the future.
Showing with 18 additions and 14 deletions
+18 -14
...@@ -210,6 +210,9 @@ class _PostgresqlUpgrade(Postgresql): ...@@ -210,6 +210,9 @@ class _PostgresqlUpgrade(Postgresql):
self.set_bin_dir(version) self.set_bin_dir(version)
# shared_preload_libraries for the old cluster, cleaned from incompatible/missing libs
old_shared_preload_libraries = self.config.get('parameters').get('shared_preload_libraries')
# restore original values of archive_mode and shared_preload_libraries # restore original values of archive_mode and shared_preload_libraries
if getattr(self, '_old_config_values', None): if getattr(self, '_old_config_values', None):
for name, value in self._old_config_values.items(): for name, value in self._old_config_values.items():
...@@ -218,6 +221,7 @@ class _PostgresqlUpgrade(Postgresql): ...@@ -218,6 +221,7 @@ class _PostgresqlUpgrade(Postgresql):
else: else:
self.config.get('parameters')[name] = value self.config.get('parameters')[name] = value
# for the new version we maybe need to add some libs to the shared_preload_libraries
shared_preload_libraries = self.config.get('parameters').get('shared_preload_libraries') shared_preload_libraries = self.config.get('parameters').get('shared_preload_libraries')
if shared_preload_libraries: if shared_preload_libraries:
self._old_shared_preload_libraries = self.config.get('parameters')['shared_preload_libraries'] =\ self._old_shared_preload_libraries = self.config.get('parameters')['shared_preload_libraries'] =\
...@@ -238,8 +242,8 @@ class _PostgresqlUpgrade(Postgresql): ...@@ -238,8 +242,8 @@ class _PostgresqlUpgrade(Postgresql):
self.config._postgresql_conf = old_postgresql_conf self.config._postgresql_conf = old_postgresql_conf
self._version_file = old_version_file self._version_file = old_version_file
if shared_preload_libraries: if old_shared_preload_libraries:
self.config.get('parameters')['shared_preload_libraries'] = shared_preload_libraries self.config.get('parameters')['shared_preload_libraries'] = old_shared_preload_libraries
self.no_bg_mon() self.no_bg_mon()
self.configure_server_parameters() self.configure_server_parameters()
return True return True
......
...@@ -38,6 +38,7 @@ services: ...@@ -38,6 +38,7 @@ services:
ETCDCTL_ENDPOINTS: http://etcd:2379 ETCDCTL_ENDPOINTS: http://etcd:2379
ETCD_HOST: "etcd:2379" ETCD_HOST: "etcd:2379"
SCOPE: demo SCOPE: demo
ENABLE_PG_MON: 'true'
SPILO_CONFIGURATION: | SPILO_CONFIGURATION: |
bootstrap: bootstrap:
dcs: dcs:
......
...@@ -50,17 +50,19 @@ function docker_exec() { ...@@ -50,17 +50,19 @@ function docker_exec() {
function find_leader() { function find_leader() {
local container=$1 local container=$1
local silent=$2
declare -r timeout=$TIMEOUT declare -r timeout=$TIMEOUT
local attempts=0 local attempts=0
while true; do while true; do
leader=$(docker_exec "$container" 'patronictl list -f tsv' 2> /dev/null | awk '($4 == "Leader"){print $2}') leader=$(docker_exec "$container" 'patronictl list -f tsv' 2> /dev/null | awk '($4 == "Leader"){print $2}')
if [[ -n "$leader" ]]; then if [[ -n "$leader" ]]; then
echo "$leader" [ -z "$silent" ] && echo "$leader"
return return
fi fi
((attempts++)) ((attempts++))
if [[ $attempts -ge $timeout ]]; then if [[ $attempts -ge $timeout ]]; then
docker logs "$container"
log_error "Leader is not running after $timeout seconds" log_error "Leader is not running after $timeout seconds"
fi fi
sleep 1 sleep 1
...@@ -230,10 +232,14 @@ function verify_clone_with_wale_upgrade() { ...@@ -230,10 +232,14 @@ function verify_clone_with_wale_upgrade() {
} }
function verify_clone_with_basebackup_upgrade() { function verify_clone_with_basebackup_upgrade() {
log_info "Waiting for clone with basebackup and upgrade 10->11 to complete..."
find_leader "$1" 1
wait_query "$1" "SELECT current_setting('server_version_num')::int/10000" 11 2> /dev/null wait_query "$1" "SELECT current_setting('server_version_num')::int/10000" 11 2> /dev/null
} }
function verify_clone_with_wale_upgrade_to_13() { function verify_clone_with_wale_upgrade_to_13() {
log_info "Waiting for clone with wal-e and upgrade 9.5->13 to complete..."
find_leader "$1" 1
wait_query "$1" "SELECT current_setting('server_version_num')::int/10000" 13 2> /dev/null wait_query "$1" "SELECT current_setting('server_version_num')::int/10000" 13 2> /dev/null
} }
...@@ -274,16 +280,13 @@ function test_spilo() { ...@@ -274,16 +280,13 @@ function test_spilo() {
run_test test_pg_upgrade_to_12_check_failed "$container" # pg_upgrade --check complains about OID run_test test_pg_upgrade_to_12_check_failed "$container" # pg_upgrade --check complains about OID
wait_backup "$container"
wait_zero_lag "$container"
log_info "Waiting for clone with wal-e and upgrade 9.5->13 to complete..."
find_leader "$upgrade_container" > /dev/null
docker logs "$upgrade_container"
run_test verify_clone_with_wale_upgrade_to_13 "$upgrade_container" run_test verify_clone_with_wale_upgrade_to_13 "$upgrade_container"
docker rm -f "$upgrade_container" docker rm -f "$upgrade_container"
wait_backup "$container"
wait_zero_lag "$container"
upgrade_container=$(start_clone_with_wale_upgrade_container) upgrade_container=$(start_clone_with_wale_upgrade_container)
log_info "Started $upgrade_container for testing major upgrade 9.6->10 after clone with wal-e" log_info "Started $upgrade_container for testing major upgrade 9.6->10 after clone with wal-e"
...@@ -310,8 +313,7 @@ function test_spilo() { ...@@ -310,8 +313,7 @@ function test_spilo() {
wait_backup "$container" wait_backup "$container"
log_info "Waiting for clone with wal-e and upgrade 9.6->10 to complete..." log_info "Waiting for clone with wal-e and upgrade 9.6->10 to complete..."
find_leader "$upgrade_container" > /dev/null find_leader "$upgrade_container" 1
docker logs "$upgrade_container"
run_test verify_clone_with_wale_upgrade "$upgrade_container" run_test verify_clone_with_wale_upgrade "$upgrade_container"
wait_backup "$upgrade_container" wait_backup "$upgrade_container"
...@@ -328,9 +330,6 @@ function test_spilo() { ...@@ -328,9 +330,6 @@ function test_spilo() {
log_info "Waiting for postgres to start in the $upgrade_replica_container..." log_info "Waiting for postgres to start in the $upgrade_replica_container..."
run_test verify_clone_with_wale_upgrade "$upgrade_replica_container" run_test verify_clone_with_wale_upgrade "$upgrade_replica_container"
log_info "Waiting for clone with basebackup and upgrade 10->11 to complete..."
find_leader "$basebackup_container" > /dev/null
docker logs "$basebackup_container"
run_test verify_clone_with_basebackup_upgrade "$basebackup_container" run_test verify_clone_with_basebackup_upgrade "$basebackup_container"
} }
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment