#!/bin/bash
#
# PostgreSQL Streaming Replication Bug - Cascade Restart Reproducer
# Shows that restarting the CASCADE ITSELF triggers the bug
#
# Prerequisites:
#   - PostgreSQL 17+ binaries in PATH
#   - Ports 15432-15434 available
#   - Write access to /tmp
#

# Verify PostgreSQL is available
if ! command -v initdb >/dev/null 2>&1; then
    echo "Error: PostgreSQL binaries not found in PATH"
    echo "Please ensure initdb, pg_ctl, psql, pg_basebackup are available"
    exit 1
fi

BASE="/tmp/pgbug_cascade"
ARCHIVE="$BASE/archive"

echo "PostgreSQL Bug - CASCADE RESTART Reproducer"
echo "============================================"
echo "This shows restarting CASCADE ITSELF triggers the bug"
echo ""

# Cleanup
cleanup() {
    pkill -f "postgres.*$BASE" 2>/dev/null || true
    sleep 1
    rm -rf "$BASE"
}
trap cleanup EXIT
cleanup 2>/dev/null || true

# Setup directories
mkdir -p "$BASE"/{primary,standby,cascade} "$ARCHIVE"

# 1. Primary with archiving
echo "1. Setting up primary..."
initdb -D "$BASE/primary" >/dev/null 2>&1

cat >> "$BASE/primary/postgresql.conf" <<EOF
port = 15432
unix_socket_directories = '/tmp'
wal_level = replica
max_wal_senders = 1
archive_mode = on
archive_command = 'cp %p $ARCHIVE/%f'
archive_timeout = 5s
log_line_prefix = '%m PRIMARY: '
EOF

pg_ctl -D "$BASE/primary" start -w >/dev/null
echo "   Primary started"

# Generate initial data
psql -h /tmp -p 15432 postgres -c "
CREATE TABLE test (id serial, data text);
INSERT INTO test SELECT generate_series(1,10000), repeat('X', 100);
" >/dev/null

# Continuous writes
(
while true; do
    psql -h /tmp -p 15432 postgres -c "INSERT INTO test VALUES (DEFAULT, 'data');" >/dev/null 2>&1
    sleep 0.1
done
) &
LOAD_PID=$!

# 2. Archive-only standby
echo "2. Creating standby (archive-only)..."
pg_basebackup -h /tmp -p 15432 -D "$BASE/standby" -X none >/dev/null 2>&1
chmod 700 "$BASE/standby"

# Disable streaming after backup
psql -h /tmp -p 15432 postgres -c "ALTER SYSTEM SET max_wal_senders = 0;" >/dev/null
psql -h /tmp -p 15432 postgres -c "SELECT pg_reload_conf();" >/dev/null

cat >> "$BASE/standby/postgresql.conf" <<EOF
port = 15433
unix_socket_directories = '/tmp'
restore_command = 'cp $ARCHIVE/%f %p 2>/dev/null'
max_wal_senders = 10
hot_standby = on
log_line_prefix = '%m STANDBY: '
EOF

touch "$BASE/standby/standby.signal"
pg_ctl -D "$BASE/standby" start -w >/dev/null
echo "   Standby started (archive-only)"

sleep 5

# 3. Cascading standby
echo "3. Creating cascading standby..."
pg_basebackup -h /tmp -p 15433 -D "$BASE/cascade" -X none >/dev/null 2>&1
chmod 700 "$BASE/cascade"

cat >> "$BASE/cascade/postgresql.conf" <<EOF
port = 15434
unix_socket_directories = '/tmp'
primary_conninfo = 'host=127.0.0.1 port=15433'
restore_command = 'cp $ARCHIVE/%f %p 2>/dev/null'
log_line_prefix = '%m CASCADE: '
log_min_messages = DEBUG1
EOF

touch "$BASE/cascade/standby.signal"

echo "4. Starting initial cascade..."
pg_ctl -D "$BASE/cascade" start -l "$BASE/cascade.log" >/dev/null

sleep 5

# Verify initial connection works
echo ""
echo "Initial state - streaming connection established:"
psql -h /tmp -p 15433 postgres -tAc "
SELECT 'Active streaming connections: ' || count(*) FROM pg_stat_replication;
" 2>/dev/null

# Get initial position
echo "Cascade replay position before restart:"
psql -h /tmp -p 15434 postgres -tAc "
SELECT 'Replay LSN: ' || pg_last_wal_replay_lsn();
" 2>/dev/null

echo ""
echo "============================================================"
echo "5. RESTARTING CASCADE (simulating maintenance/pod restart)..."
echo "============================================================"
pg_ctl -D "$BASE/cascade" restart -l "$BASE/cascade.log" >/dev/null

sleep 10

# Check if bug triggered
echo ""
echo "6. Checking for bug after cascade restart..."
echo "============================================================"

# Count errors in log
ERROR_COUNT=$(grep -c "requested starting point.*is ahead" "$BASE/cascade.log" 2>/dev/null || echo "0")

if [ "$ERROR_COUNT" -gt 0 ]; then
    echo ""
    echo "============================================================"
    echo "              BUG TRIGGERED BY CASCADE RESTART!"
    echo "============================================================"
    echo ""

    # Show errors
    echo "Errors after cascade restart:"
    grep "requested starting point.*is ahead" "$BASE/cascade.log" | head -3

    echo ""
    echo "Implications:"
    echo "- Routine maintenance triggers permanent failure"
    echo "- Pod restarts in Kubernetes break replication"
    echo "- Configuration changes become impossible"
    echo "- No recovery without manual intervention"
else
    echo "Bug did not trigger (unexpected)"
fi

# Show final state
echo ""
echo "Final streaming status:"
psql -h /tmp -p 15433 postgres -tAc "
SELECT 'Active streaming connections: ' || count(*) FROM pg_stat_replication;
" 2>/dev/null

kill $LOAD_PID 2>/dev/null

echo ""
echo "Test complete - check $BASE/cascade.log for full details"