From 14e88f283c6513fff07a58b301c350a3cdcc1388 Mon Sep 17 00:00:00 2001 From: Rui Zhao Date: Tue, 12 May 2026 16:35:39 +0800 Subject: [PATCH v1 2/2] Benchmark scripts for lazy snapshot distribution Provides single-run and matrix benchmark scripts to measure the impact of the lazy snapshot distribution patch on logical decoding spill bytes and decoding time. Scenario: one long-running write transaction with K=1 INSERT, coexisting with N concurrent CREATE/DROP TABLE pairs (each its own catalog-modifying commit), then drained via pg_logical_slot_get_changes() with the test_decoding output plugin. Captures spill_bytes, spill_count, total_bytes, and end-to-end decoding wall time. Files: bench/setup_cluster.sh Spins up a throwaway PG cluster from a specified install directory, configured for logical decoding (wal_level=logical, max_wal_senders, max_replication_slots, configurable logical_decoding_work_mem). bench/lazy_snapshot_bench.sh Runs a single (N, K) scenario against a running cluster. The long transaction runs in a background psql session that does K INSERTs and then pg_sleep()s long enough for the concurrent DDL loop to complete. DDLs are batched in a single psql session for ~100x throughput over per-statement connections. Outputs one CSV row. bench/run_matrix.sh Driver that iterates over a list of N values with configurable repeat count (default 3). Emits a CSV with all replicates. bench/aggregate.sh Aggregates CSVs (from one or more run_matrix.sh invocations) into median-per-cell summary tables, plus a side-by-side master vs patch comparison with computed speedup and bytes-saved ratios. Usage: # Build master and patch versions of PostgreSQL git checkout master ./configure --prefix=/tmp/pg_master_install ... && make install git checkout lazy-snapshot-distribution make clean && ./configure --prefix=/tmp/pg_patch_install ... && make install # Bring up master cluster, run matrix eval "$(./bench/setup_cluster.sh /tmp/pg_master_install /tmp/pg_master_data 55432)" ./bench/run_matrix.sh master 500 1000 2000 5000 > master.csv pg_ctl -D /tmp/pg_master_data stop # Bring up patch cluster on a different port, run matrix in parallel eval "$(./bench/setup_cluster.sh /tmp/pg_patch_install /tmp/pg_patch_data 55433)" ./bench/run_matrix.sh patch 500 1000 2000 5000 > patch.csv # Aggregate cat master.csv patch.csv | ./bench/aggregate.sh - These scripts produced the empirical data cited in the "Performance impact" section of the cover letter for v1-0001. --- bench/aggregate.sh | 99 +++++++++++++++++++++ bench/lazy_snapshot_bench.sh | 161 +++++++++++++++++++++++++++++++++++ bench/run_matrix.sh | 57 +++++++++++++ bench/setup_cluster.sh | 75 ++++++++++++++++ 4 files changed, 392 insertions(+) create mode 100755 bench/aggregate.sh create mode 100755 bench/lazy_snapshot_bench.sh create mode 100755 bench/run_matrix.sh create mode 100755 bench/setup_cluster.sh diff --git a/bench/aggregate.sh b/bench/aggregate.sh new file mode 100755 index 0000000000..481e00c549 --- /dev/null +++ b/bench/aggregate.sh @@ -0,0 +1,99 @@ +#!/bin/bash +# +# Aggregate the CSV from run_matrix.sh into a comparison table suitable for +# pasting into a pgsql-hackers email. +# +# Usage: +# $0 +# or +# cat master.csv patch.csv | $0 - +# +# Outputs, per (N, label) combo: median of decoding_ms / spill_bytes / +# total_bytes across iterations. Then a side-by-side comparison of master +# vs patch. + +set -euo pipefail + +if [[ $# -lt 1 ]]; then + echo "Usage: $0 (or '-' for stdin)" >&2 + exit 1 +fi + +INPUT="$1" +[[ "$INPUT" == "-" ]] && INPUT=/dev/stdin + +awk -F, ' +$1 == "label" { next } # skip any header row (works for concatenated CSVs) +{ + key = $1 "," $2 # label,N + decoding[key, ++cnt_dec[key]] = $5 + spill[key, ++cnt_sp[key]] = $9 + total[key, ++cnt_tot[key]] = $10 + labels[$1] = 1 + ns[$2 + 0] = 1 + # remember K (assume constant) and max repeat seen + k = $3 + ldwm = $4 + if (cnt_dec[key] > rep_max) rep_max = cnt_dec[key] +} +END { + # median helper baked into END via re-sort per-key (small N, OK) + for (key in cnt_dec) { + n = cnt_dec[key] + # collect into arr + delete arr + for (i = 1; i <= n; i++) arr[i] = decoding[key, i] + asort(arr) + if (n % 2) m_dec[key] = arr[(n+1)/2] + else m_dec[key] = (arr[n/2] + arr[n/2 + 1]) / 2 + + delete arr + for (i = 1; i <= n; i++) arr[i] = spill[key, i] + asort(arr) + if (n % 2) m_sp[key] = arr[(n+1)/2] + else m_sp[key] = (arr[n/2] + arr[n/2 + 1]) / 2 + + delete arr + for (i = 1; i <= n; i++) arr[i] = total[key, i] + asort(arr) + if (n % 2) m_tot[key] = arr[(n+1)/2] + else m_tot[key] = (arr[n/2] + arr[n/2 + 1]) / 2 + } + + # Sorted N values + n_count = 0 + for (n in ns) sorted_ns[++n_count] = n + for (i = 1; i <= n_count; i++) + for (j = i + 1; j <= n_count; j++) + if (sorted_ns[i] + 0 > sorted_ns[j] + 0) { + t = sorted_ns[i]; sorted_ns[i] = sorted_ns[j]; sorted_ns[j] = t + } + + printf "Config: K=%s, logical_decoding_work_mem=%s, REPEAT=%d (median)\n\n", k, ldwm, rep_max + printf "%-8s %-7s %-14s %-14s %-14s\n", "label", "N", "decode_ms", "spill_bytes", "total_bytes" + for (i = 1; i <= n_count; i++) { + N = sorted_ns[i] + for (lbl in labels) { + key = lbl "," N + if (key in m_dec) + printf "%-8s %-7d %-14d %-14d %-14d\n", lbl, N, m_dec[key], m_sp[key], m_tot[key] + } + } + + # Side-by-side, if exactly master + patch are present + if ("master" in labels && "patch" in labels) { + printf "\n%-7s %-12s %-12s %-12s %-12s %-12s %-12s\n", \ + "N", "master_dec", "patch_dec", "speedup", "master_spill", "patch_spill", "saved_x" + for (i = 1; i <= n_count; i++) { + N = sorted_ns[i] + mk = "master," N; pk = "patch," N + if ((mk in m_dec) && (pk in m_dec)) { + speedup = (m_dec[pk] > 0) ? m_dec[mk] / m_dec[pk] : 0 + saved = (m_sp[pk] > 0) ? m_sp[mk] / m_sp[pk] : (m_sp[mk] > 0 ? 999 : 1) + printf "%-7d %-12d %-12d %-12.2f %-12d %-12d %-12.2f\n", \ + N, m_dec[mk], m_dec[pk], speedup, m_sp[mk], m_sp[pk], saved + } + } + } +} +' "$INPUT" diff --git a/bench/lazy_snapshot_bench.sh b/bench/lazy_snapshot_bench.sh new file mode 100755 index 0000000000..f16a1a0c1f --- /dev/null +++ b/bench/lazy_snapshot_bench.sh @@ -0,0 +1,161 @@ +#!/bin/bash +# +# Single benchmark run for "lazy snapshot distribution" patch. +# +# Simulates: one long-running transaction (with K inserts) coexists with N +# concurrent catalog-modifying commits, then drains via pg_logical_slot_get_changes. +# +# Output (CSV row to stdout): +# label,N,K,ldwm,decoding_ms,decoded_changes,spill_txns,spill_count,spill_bytes,total_bytes,ddl_loop_sec +# +# Assumes a running PG cluster with wal_level=logical. To compare master vs +# patch, point this script at two different clusters built from each binary. + +set -euo pipefail + +# -------- args -------- +N="" +K="1" +LABEL="" +LDWM="64kB" +SLOT="lazy_bench" +VERBOSE=0 + +usage() { + cat < -l