#!/bin/bash
#
# This script reproduces the 2nd problem in the "pg_basebackup from cascading
# standby after timeline switch":
#
#    However, thinking about this some more, there's a another bug in the way
#    WAL files are included in the backup, when a timeline switch happens.
#    basebackup.c includes all the WAL files on ThisTimeLineID, but when the
#    backup is taken from a standby, the standby might've followed a timeline
#    switch. So it's possible that some of the WAL files should come from
#    timeline 1, while others should come from timeline 2. This leads to an
#    error like "requested WAL segment 00000001000000000000000C has already
#    been removed" in pg_basebackup.

TESTDIR=/home/heikki/pgsql.master
PATH=/home/heikki/pgsql.master/bin:$PATH

# exit on error
set -e

mkdir $TESTDIR/walarchive

# Set up master
initdb -D $TESTDIR/data-master
echo "wal_keep_segments=50" >> $TESTDIR/data-master/postgresql.conf
echo "wal_level=hot_standby" >> $TESTDIR/data-master/postgresql.conf
echo "max_wal_senders=5" >> $TESTDIR/data-master/postgresql.conf
echo "checkpoint_segments=50" >> $TESTDIR/data-master/postgresql.conf
echo "shared_buffers=1MB" >> $TESTDIR/data-master/postgresql.conf
echo "log_line_prefix='M  %m %p '" >> $TESTDIR/data-master/postgresql.conf
echo "archive_mode=on" >> $TESTDIR/data-master/postgresql.conf
echo "archive_command='cp %p $TESTDIR/walarchive/%f'" >> $TESTDIR/data-master/postgresql.conf

# Accept replication connections
echo "local   replication     heikki                                trust" >> $TESTDIR/data-master/pg_hba.conf
echo "host   replication     heikki             127.0.01/32                   trust" >> $TESTDIR/data-master/pg_hba.conf
echo "host   replication     heikki             ::1/128                   trust" >> $TESTDIR/data-master/pg_hba.conf

pg_ctl -w -D $TESTDIR/data-master start


# 1. Generate some data on the master, just to make it larger, so that
# the laster pg_basebackup call will take some time
psql -c "create table dummy(d text) with (fillfactor=10)" postgres
psql -c "insert into dummy select 'foooooooooooo' || a from generate_series(1, 1000000) a; " postgres

psql -c "create table tbl1(d text)" postgres

# Take an (offline) base backup, to initialize the standby
pg_ctl -w -D $TESTDIR/data-master stop
cp -a $TESTDIR/data-master $TESTDIR/data-backup
pg_ctl -w -D $TESTDIR/data-master start

# 2. Set up standby B, following the master through the archive

cp -a $TESTDIR/data-backup $TESTDIR/data-standbyB

sed -i "s/log_line_prefix=.*/log_line_prefix='B %m %p '/g" $TESTDIR/data-standbyB/postgresql.conf
echo "port=5433" >> $TESTDIR/data-standbyB/postgresql.conf
echo "hot_standby=on" >> $TESTDIR/data-standbyB/postgresql.conf


echo "primary_conninfo=''" >> $TESTDIR/data-standbyB/recovery.conf
echo "restore_command='cp $TESTDIR/walarchive/%f %p'" >> $TESTDIR/data-standbyB/recovery.conf
echo "standby_mode=on" >> $TESTDIR/data-standbyB/recovery.conf
echo "recovery_target_timeline='latest'" >> $TESTDIR/data-standbyB/recovery.conf

pg_ctl -w -D $TESTDIR/data-standbyB start

#######
#
# Ok, we have the master and standby running. Now let's start to take a 
# base backup from the standby, and while it's been taken, generate more
# WAL and a timeline switch in the master

# Launch the backup. Let it run for one second, then pause it.
pg_basebackup -D $TESTDIR/data-standbyC -p 5433 -x -P &
BACKUPPID=$!
sleep 1
kill -SIGSTOP $BACKUPPID

# Generate some WAL on the master
psql -c "insert into tbl1 values ('before timeline switch')" postgres;

# Bump timeline. This simulates a failover from master to another standby
pg_ctl -w -D $TESTDIR/data-master stop
echo "restore_command='/bin/false'" > $TESTDIR/data-master/recovery.conf
pg_ctl -w -D $TESTDIR/data-master start

# Generate some WAL on the master, after the timeline bump
psql -c "insert into tbl1 values ('timeline was bumped in master'); select pg_switch_xlog()" postgres

sleep 10 # wait for the standby to follow the new timeline, and replay all WAL

PGPORT=5433 psql -c "checkpoint" postgres # restartpoint

# Let the backup finish
kill -SIGCONT $BACKUPPID
wait $BACKUPPID

# XXX Due to the bug, pg_basebackup fails when it tries to back up the WAL files

# Start up new standby from the backup
sed -i "s/log_line_prefix=.*/log_line_prefix='C %m %p '/g" $TESTDIR/data-standbyC/postgresql.conf
echo "port=5434" >> $TESTDIR/data-standbyC/postgresql.conf
#echo "hot_standby=on" >> $TESTDIR/data-standbyC/postgresql.conf
rm $TESTDIR/data-standbyC/recovery.conf

pg_ctl -w -D $TESTDIR/data-standbyC start