#!/bin/bash
#
# PostgreSQL Streaming Replication Bug - Restart Upstream Reproducer
# Triggers bug by restarting upstream standby to force reconnection
#
# Prerequisites:
#   - PostgreSQL 17+ binaries in PATH
#   - Ports 15432-15434 available
#   - Write access to /tmp
#

# Verify PostgreSQL is available
if ! command -v initdb >/dev/null 2>&1; then
    echo "Error: PostgreSQL binaries not found in PATH"
    echo "Please ensure initdb, pg_ctl, psql, pg_basebackup are available"
    exit 1
fi

BASE="/tmp/pgbug_restart"
ARCHIVE="$BASE/archive"

echo "PostgreSQL Bug - Restart Upstream Reproducer"
echo "============================================="

# Cleanup
cleanup() {
    pkill -f "postgres.*$BASE" 2>/dev/null || true
    sleep 1
    rm -rf "$BASE"
}
trap cleanup EXIT
cleanup 2>/dev/null || true

# Setup
mkdir -p "$BASE"/{primary,standby,cascade} "$ARCHIVE"

# 1. Primary
echo ""
echo "1. Setting up primary..."
initdb -D "$BASE/primary" >/dev/null 2>&1

cat >> "$BASE/primary/postgresql.conf" <<EOF
port = 15432
unix_socket_directories = '/tmp'
wal_level = replica
max_wal_senders = 1
archive_mode = on
archive_command = 'cp %p $ARCHIVE/%f'
archive_timeout = 5s
log_line_prefix = '%m PRIMARY: '
EOF

pg_ctl -D "$BASE/primary" start -w >/dev/null
echo "   Primary started"

# Generate data
psql -h /tmp -p 15432 postgres -c "
CREATE TABLE test (id serial, data text);
INSERT INTO test SELECT generate_series(1,10000), repeat('X', 100);
" >/dev/null

# Continuous load
(
while true; do
    psql -h /tmp -p 15432 postgres -c "INSERT INTO test VALUES (DEFAULT, 'data');" >/dev/null 2>&1
    sleep 0.1
done
) &
LOAD_PID=$!

# 2. Standby (archive-only)
echo "2. Creating standby (archive-only)..."
pg_basebackup -h /tmp -p 15432 -D "$BASE/standby" -X none >/dev/null 2>&1
chmod 700 "$BASE/standby"

# Disable streaming on primary after backup
psql -h /tmp -p 15432 postgres -c "ALTER SYSTEM SET max_wal_senders = 0;" >/dev/null
psql -h /tmp -p 15432 postgres -c "SELECT pg_reload_conf();" >/dev/null

cat >> "$BASE/standby/postgresql.conf" <<EOF
port = 15433
unix_socket_directories = '/tmp'
restore_command = 'cp $ARCHIVE/%f %p 2>/dev/null'
max_wal_senders = 10
hot_standby = on
log_line_prefix = '%m STANDBY: '
EOF

touch "$BASE/standby/standby.signal"
pg_ctl -D "$BASE/standby" start -w >/dev/null
echo "   Standby started (archive-only)"

sleep 5

# 3. Cascading standby
echo "3. Creating cascading standby..."
pg_basebackup -h /tmp -p 15433 -D "$BASE/cascade" -X none >/dev/null 2>&1
chmod 700 "$BASE/cascade"

cat >> "$BASE/cascade/postgresql.conf" <<EOF
port = 15434
unix_socket_directories = '/tmp'
primary_conninfo = 'host=127.0.0.1 port=15433'
restore_command = 'cp $ARCHIVE/%f %p 2>/dev/null'
log_line_prefix = '%m CASCADE: '
log_min_messages = DEBUG1
EOF

touch "$BASE/cascade/standby.signal"

echo "4. Starting cascade (will connect successfully first)..."
postgres -D "$BASE/cascade" >>"$BASE/cascade.log" 2>&1 &
CASCADE_PID=$!

sleep 5

# Check initial connection
echo ""
echo "Initial streaming status:"
psql -h /tmp -p 15433 postgres -tAc "
SELECT 'Streaming connections: ' || count(*) FROM pg_stat_replication;
" 2>/dev/null

echo ""
echo "5. Restarting upstream standby to break connection..."
pg_ctl -D "$BASE/standby" restart -w >/dev/null

echo "6. Monitoring for bug (cascade trying to reconnect)..."
echo "======================================================"

# Monitor log
timeout 30 tail -f "$BASE/cascade.log" 2>/dev/null | while read -r line; do
    if echo "$line" | grep -q "requested starting point.*is ahead"; then
        echo ""
        echo "========================================"
        echo "      BUG TRIGGERED!"
        echo "========================================"
        echo ""
        echo "$line"
        echo ""
        echo "Explanation:"
        echo "1. Cascade initially connected successfully"
        echo "2. Upstream restart broke the connection"
        echo "3. Cascade fell back to archive"
        echo "4. Tried to reconnect - triggered bug!"
        echo "5. Requested next segment boundary"
        echo "6. Archive-only standby doesn't have it"

        pkill -f "tail.*cascade.log" 2>/dev/null
        break
    elif echo "$line" | grep -qE "(FATAL|ERROR)"; then
        echo "$line"
    fi
done

kill $LOAD_PID 2>/dev/null
kill $CASCADE_PID 2>/dev/null

echo ""
echo "Test complete"