From f127592cd1b095d03a0e38f539948ee68283d5b1 Mon Sep 17 00:00:00 2001
From: Thomas Munro <thomas.munro@gmail.com>
Date: Fri, 27 Feb 2026 22:21:36 +1300
Subject: [PATCH v2 05/19] Provide stack buffer API.

Several places use a fixed-size array on the stack for non-escaping
memory of dynamic size, with fallback to palloc()/pfree().  Create a
standard API for that, and implement it with alloca() instead if
possible, while applying our traditional cap on stack growth.

Three implementations are provided:

* STACK_BUFFER_USE_ALLOCA:     use alloca() or builtins
* STACK_BUFFER_USE_ARRAY:      the traditioanl approach
* STACK_BUFFER_USE_PALLOC:     development/testing
* STACK_BUFFER_USE_PALLOC_LOG: development/testing

Later patches will adopt the API in various parts of the tree to speed
up common operations.

Reviewed-by:
Discussion:
---
 src/include/c.h                  |  10 +
 src/include/utils/stack_buffer.h | 461 +++++++++++++++++++++++++++++++
 2 files changed, 471 insertions(+)
 create mode 100644 src/include/utils/stack_buffer.h

diff --git a/src/include/c.h b/src/include/c.h
index 5a524365ccc..c1074371c40 100644
--- a/src/include/c.h
+++ b/src/include/c.h
@@ -923,6 +923,16 @@ typedef NameData *Name;
 /* we don't currently need wider versions of the other ALIGN macros */
 #define MAXALIGN64(LEN)			TYPEALIGN64(MAXIMUM_ALIGNOF, (LEN))
 
+/*
+ * MAXIMUM_ALIGNOF is respected by palloc() and similar.  The C standard
+ * defines max_align_t as a type to be used for similar purposes, but Visual
+ * Studio forgot to define it in <stddef.h>.  Supply the definition Clang uses
+ * on that platform, for use in contexts that refer to the standard library or
+ * compiler's behavior rather than PostgreSQL's.
+ */
+#ifdef _MSC_VER
+typedef double max_align_t;
+#endif
 
 /* ----------------------------------------------------------------
  *				Section 6:	assertions
diff --git a/src/include/utils/stack_buffer.h b/src/include/utils/stack_buffer.h
new file mode 100644
index 00000000000..e4057a452e6
--- /dev/null
+++ b/src/include/utils/stack_buffer.h
@@ -0,0 +1,461 @@
+/*-------------------------------------------------------------------------
+ *
+ * stack_buffer.h
+ *		Allocator for objects that don't escape the current lexical scope.
+ *
+ * A palloc()-like interface to alloca(), for allocating memory efficiently on
+ * the stack.  Raw alloca() is usually considered dangerous because of its
+ * inherent stack overflow risk, but this interface imposes limits on stack
+ * size and falls back to regular palloc() when they would be exceeded.
+ *
+ * Memory should still be freed explicitly with stack_buffer_free().  It is a
+ * no-op in the common case that pfree() doesn't need to be called.
+ *
+ * XXX It might be possible to use something like "defer" or equivalent
+ * compiler extensions to clean up palloc()'d memory automatically, in future
+ * work, and then stack_buffer_free() would not be necessary.
+ *
+ * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/utils/stack_buffer.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef STACK_BUFFER_H
+#define STACK_BUFFER_H
+
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "miscadmin.h"
+
+#include <limits.h>
+#include <unistd.h>
+
+
+/* #define STACK_BUFFER_USE_PALLOC_LOG "/tmp/stack_buffer.csv" */
+
+/* Choose which implementation to use, if not defined manually. */
+#if !defined(STACK_BUFFER_USE_ARRAY) && \
+	!defined(STACK_BUFFER_USE_ALLOC) && \
+	!defined(STACK_BUFFER_USE_PALLOC) && \
+	!defined(STACK_BUFFER_USE_PALLOC_LOG)
+#if pg_has_builtin(__builtin_alloca_with_align)
+/* Use GCC/Clang builtin with explicit alignment argument. */
+#define STACK_BUFFER_USE_ALLOCA
+#elif defined(_MSC_VER)
+/*
+ * MSVC's alloca() maps to a builtin that works in function arguments.  We can
+ * adjust for stricter-than-default alignment ourselves.
+ */
+#include <malloc.h> */
+#define STACK_BUFFER_USE_ALLOCA
+#else
+/* Traditional approach. */
+#define STACK_BUFFER_USE_ARRAY
+#endif
+#endif
+
+/*
+ * A default size of 128 bytes should be enough for many arrays of datums and
+ * null flags, without posing a danger to recursive code.
+ *
+ * If using alloca(), we don't need to be so cautious: unused capacity doesn't
+ * consume stack space, and a secondary limit is computed based on the
+ * remaining stack space.
+ */
+#define STACK_BUFFER_DEFAULT \
+	(stack_buffer_total_size_limited ? STACK_BUFFER_LARGE : 128)
+
+/*
+ * This tiny size is intended for cases that risk deep recursion, but can
+ * still often benefit from avoiding a palloc() call.
+ */
+#define STACK_BUFFER_TINY \
+	(stack_buffer_total_size_limited ? STACK_BUFFER_LARGE : 16)
+
+/*
+ * This larger size is intended only for non-recursive uses including
+ * conversions to C string format before calling standard library routines.
+ * Avoid allowing more than one buffer of this size to be active on the stack
+ * at a time.
+ */
+#define STACK_BUFFER_LARGE 1024
+
+
+/*-------------------------------------------------------------------------
+ *
+ * Public stack buffer API.
+ *
+ *-------------------------------------------------------------------------
+ */
+
+/* Declare a stack buffer of default size. */
+#define DECLARE_STACK_BUFFER() \
+	DECLARE_STACK_BUFFER_SIZE(STACK_BUFFER_DEFAULT)
+
+/* As above, using the standard "tiny" size (see notes above). */
+#define DECLARE_STACK_BUFFER_TINY() \
+	DECLARE_STACK_BUFFER_SIZE(STACK_BUFFER_TINY)
+
+/* As above, using the standard "large" size (see notes above). */
+#define DECLARE_STACK_BUFFER_LARGE() \
+	DECLARE_STACK_BUFFER_SIZE(STACK_BUFFER_LARGE)
+
+/* As above, but with a caller-specified limit on stack usage. */
+#define DECLARE_STACK_BUFFER_SIZE(size) \
+	size_t stack_buffer_let_size;		/* temp, avoids double eval */	\
+	bool stack_buffer_maybe_pfree = false;								\
+	DECLARE_STACK_BUFFER_IMPL(size)
+
+/* Allocate memory, optionally with explicit alignment. */
+#define stack_buffer_alloc(size)						\
+	stack_buffer_alloc_aligned((size), MAXIMUM_ALIGNOF)
+#define stack_buffer_alloc_aligned(size, align)							\
+	(stack_buffer_sanity_checks(),										\
+	 stack_buffer_let_size = (size),									\
+	 stack_buffer_let_size = Max(stack_buffer_let_size, 1),				\
+	 stack_buffer_alloc_aligned_impl(stack_buffer_let_size, (align)))
+
+/* As above, but also zero the memory. */
+#define stack_buffer_alloc0(size) \
+	stack_buffer_alloc0_aligned((size), MAXIMUM_ALIGNOF)
+#define stack_buffer_alloc0_aligned(size, align) \
+	(stack_buffer_sanity_checks(),										\
+	 stack_buffer_let_size = (size),									\
+	 stack_buffer_let_size = Max(stack_buffer_let_size, 1),				\
+	 memset(stack_buffer_alloc_aligned_impl(stack_buffer_let_size,		\
+											(align)),					\
+			0,															\
+			stack_buffer_let_size))
+
+/* As above, but for a given type T. */
+#define stack_buffer_alloc_object(T)			\
+	stack_buffer_alloc_array(T, 1)
+#define stack_buffer_alloc_array(T, n)								\
+	((T *) stack_buffer_alloc_aligned((n) * sizeof(T), alignof(T)))
+#define stack_buffer_alloc0_object(T)			\
+	stack_buffer_alloc0_array(T, 1)
+#define stack_buffer_alloc0_array(T, n)									\
+	((T *) stack_buffer_alloc0_aligned((n) * sizeof(T), alignof(T)))
+
+/* Copy a string. */
+#define stack_buffer_strdup(cstr)						\
+	stack_buffer_strdup_with_len((cstr), strlen(cstr))
+#define stack_buffer_strndup(cstr, n)							\
+	stack_buffer_strdup_with_len((cstr), strnlen((cstr), (n)))
+#define stack_buffer_strdup_with_len(data, size)						\
+	(stack_buffer_sanity_checks(),										\
+	 stack_buffer_let_size = (size),									\
+	 stack_buffer_strdup_with_len_impl(stack_buffer_alloc_aligned_impl(stack_buffer_let_size + 1, \
+																	   alignof(char)), \
+									   (data),							\
+									   stack_buffer_let_size))
+#define stack_buffer_text_to_cstring(text) \
+	stack_buffer_strdup_with_len(VARDATA_ANY(text), VARSIZE_ANY_EXHDR(text))
+#define stack_buffer_text_datum_to_cstring(datum) \
+	stack_buffer_text_to_cstring((text *) DatumGetPointer(datum))
+
+/*
+ * Free memory allocated with the above interfaces.  We don't expect to
+ * receive pointers allocated by palloc() directly and not via this API.  That
+ * would break the stack_buffer_maybe_pfree optimization, and might limit
+ * future implementation techniques.
+ */
+#define stack_buffer_free(ptr)											\
+	do																	\
+	{																	\
+		Assert(stack_buffer_stack_p(ptr) || stack_buffer_maybe_pfree);	\
+		if (unlikely(stack_buffer_maybe_pfree) &&						\
+			!stack_buffer_stack_p(ptr))									\
+			pfree(ptr);													\
+	}																	\
+	while (0)
+
+
+/*-------------------------------------------------------------------------
+ *
+ * Private helper code common to all implementations.
+ *
+ *-------------------------------------------------------------------------
+ */
+
+/* Cannot work across setjmp()/longjmp() due to stack cloberring. */
+#define stack_buffer_sanity_checks()									\
+	(StaticAssertExpr(!pg_in_lexical_scope_p(PG_TRY),					\
+					  "stack buffer API not allowed in PG_TRY"),		\
+	 StaticAssertExpr(!pg_in_lexical_scope_p(PG_CATCH),					\
+					  "stack buffer API not allowed in PG_CATCH"),		\
+	 StaticAssertExpr(!pg_in_lexical_scope_p(PG_FINALLY),				\
+					  "stack buffer API not allowed in PG_FINALLY"))
+
+/* Post-allocation part of stack_buffer_strdup_with_len(). */
+static inline char *
+stack_buffer_strdup_with_len_impl(char *dst, const char *data, size_t size)
+{
+	memcpy(dst, data, size);
+	dst[size] = 0;
+	return dst;
+}
+
+/*
+ * Allocate with palloc() or palloc_aligned().  We waste a register
+ * remembering if we've ever had to do this, to generate better straight-line
+ * code for the case where we don't have to free anything.
+ */
+#define stack_buffer_palloc_aligned(size, align) \
+	((stack_buffer_maybe_pfree = true),			 \
+	 ((align) > MAXIMUM_ALIGNOF ?										\
+	  palloc_aligned((size), (align), 0) :								\
+	  palloc(size)))			/* can't ask for smaller alignment */
+
+
+/*-------------------------------------------------------------------------
+ *
+ * Low-level implementations below this point supply the following macros,
+ * which should not be used directly:
+ *
+ * 1. stack_buffer_total_size_limited (true/false)
+ * 2. DECLARE_STACK_BUFFER_IMPL(size)
+ * 3. stack_buffer_alloc_aligned(size, align)
+ * 4. stack_buffer_stack_p(ptr)
+ *
+ *-------------------------------------------------------------------------
+ */
+
+/*-------------------------------------------------------------------------
+ *
+ * Toy implementations for debugging.
+ *
+ *-------------------------------------------------------------------------
+ */
+
+/* Just use palloc. */
+#ifdef STACK_BUFFER_USE_PALLOC
+#define stack_buffer_total_size_limited false
+#define DECLARE_STACK_BUFFER_IMPL(size)
+#define stack_buffer_alloc_aligned_impl(size, align)	\
+	stack_buffer_palloc_aligned((size), (align))
+#define stack_buffer_stack_p(ptr) false
+#endif
+
+/*
+ * Same, but log "location,function,size,depth" entries to a path specified
+ * by the STACK_BUFFER_USE_PALLOC_LOG macro.
+ */
+#ifdef STACK_BUFFER_USE_PALLOC_LOG
+#define stack_buffer_total_size_limited false
+#define DECLARE_STACK_BUFFER_IMPL(size)									\
+	FILE *stack_buffer_log												\
+	__attribute__((cleanup(stack_buffer_close_log))) =					\
+	fopen(STACK_BUFFER_USE_PALLOC_LOG, "a+")
+#define stack_buffer_alloc_aligned_impl(size, align)					\
+	(fprintf(stack_buffer_log,											\
+			 "%s:%d,%s,%zu,%zu\n",										\
+			 __FILE__,													\
+			 __LINE__,													\
+			 __func__,													\
+			 (size_t) (size),											\
+			 ((const char *) stack_base_ptr -							\
+			  (const char *) __builtin_stack_address())),				\
+	 stack_buffer_palloc_aligned((size), (align)))
+#define stack_buffer_stack_p(ptr) false
+static inline void
+stack_buffer_close_log(FILE **f)
+{
+	fclose(*f);
+}
+#endif
+
+/*-------------------------------------------------------------------------
+ *
+ * Simple array-based implementation.
+ *
+ * This is entirely standard C requiring no compiler extensions, but it leaves
+ * a big hole in the stack when you call another function and has no ability
+ * to respect the total stack size limit so we have to be much more cautious
+ * about sizing when we use it.
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifdef STACK_BUFFER_USE_ARRAY
+
+#define stack_buffer_total_size_limited false
+
+#define DECLARE_STACK_BUFFER_IMPL(size)									\
+	char stack_buffer_array[(size)];									\
+	char *stack_buffer_sp = stack_buffer_array + (size)
+
+#define stack_buffer_alloc_aligned_impl(size, align)				\
+	stack_buffer_alloc_aligned_from_array(&stack_buffer_array[0],	\
+										  &stack_buffer_sp,			\
+										  (size),					\
+										  (align))
+
+#define stack_buffer_stack_p(ptr)										\
+	((char *) (ptr) >= &stack_buffer_array[0] &&						\
+	 (char *) (ptr) < &stack_buffer_array[sizeof(stack_buffer_array)])
+
+static inline void *
+stack_buffer_alloc_aligned_from_array(const char *array,
+									  char **sp,
+									  size_t size,
+									  size_t align)
+{
+	if (likely(size >= (uintptr_t) *sp))	/* avoids overflow with huge size */
+	{
+		char	   *result = *sp - size;
+
+		if (align > 1)
+			result = (char *) TYPEALIGN_DOWN(align, result);
+
+		if (likely(result >= array))
+		{
+			*sp = result;
+			return result;
+		}
+	}
+
+	return stack_buffer_palloc_aligned(size, align);
+}
+
+#endif
+
+/*-------------------------------------------------------------------------
+ *
+ * alloca()-based implementation.
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifdef STACK_BUFFER_USE_ALLOCA
+
+#define stack_buffer_total_size_limited true
+
+#define DECLARE_STACK_BUFFER_IMPL(size)									\
+	stack_buffer_declare_impl											\
+	const char *stack_buffer_limit =									\
+		stack_buffer_least_deep(stack_soft_limit_ptr,					\
+								stack_buffer_sp +						\
+								(size) * PG_STACK_DIRECTION)
+
+#define stack_buffer_alloc_aligned_impl(size, align)					\
+	(likely(stack_buffer_alloca_would_fit_p((size), (align))) ?			\
+	 stack_buffer_alloca_aligned((size), (align)) :						\
+	 stack_buffer_palloc_aligned((size), (align)))
+
+#define stack_buffer_stack_p(ptr)					\
+	(!stack_ptr_deeper_p((ptr), stack_buffer_sp) &&	\
+	 !stack_ptr_deeper_p(stack_buffer_base, (ptr)))
+
+
+/* Portability details. */
+
+/*
+ * Builtins for interacting with the stack registers, if available.  For all
+ * but __builtin_frame_address, which is older and unlikely to differ across
+ * the selected C, C++ and bitcode compilers, use pg_has_builtin().
+  */
+#if HAVE__BUILTIN_FRAME_ADDRESS
+#define stack_buffer_base ((const char *) __builtin_frame_address(0))
+#endif
+#if pg_has_builtin(__builtin_stack_address)
+#define stack_buffer_sp ((const char *) __builtin_stack_address())
+#endif
+#if pg_has_builtin(__builtin_alloca_with_align)
+#define stack_buffer_alloca_aligned_impl(size, align)		\
+	__builtin_alloca_with_align((size), (align) * CHAR_BIT)
+#endif
+
+#if !defined(stack_buffer_alloca_aligned_impl)
+/* Use traditional alloca(), with adjustments for strict alignment. */
+#define stack_buffer_alloca_aligned_impl(size, align)					\
+	((align) <= stack_buffer_alignof_alloca ?							\
+	 alloca(size) :														\
+	 ((void *) TYPEALIGN((align), alloca((size) + (align) - 1))))
+#endif
+
+/*
+ * We assume alloca() and similar have alignment at least as strict as C
+ * requires, for estimation purposes.
+ *
+ * XXX In real systems it's usually higher (eg 16 bytes rather than 8).  We
+ * could estimate more accurately if we had that information.
+ */
+#define stack_buffer_alignof_alloca alignof(max_align_t)
+
+/* Do we need variables to track the bounds of the stack buffer? */
+#if defined(stack_buffer_sp) && defined(stack_buffer_base)
+/* Case 1: nothing to do, we have "base" and "sp" builtins. */
+#define stack_buffer_declare_impl
+#define stack_buffer_alloca_aligned stack_buffer_alloca_aligned_impl
+#else
+/* Cases 2 and 3: we need to track one or both of them ourselves... */
+#if defined(stack_buffer_base)
+/*
+ * Case 2: we have a "base" builtin, but not "sp", so we need a variable.  A
+ * pointer to itself is a good initial value.  The real stack pointer is
+ * probably less deep, as the compiler probably didn't allocate space for some
+ * variables.  That's OK: we err on the cautious side of the truth, and after
+ * the first allocation we'll have an accurate value.
+ */
+#define stack_buffer_declare_impl				\
+	char *stack_buffer_sp = (char *) &stack_buffer_sp;
+#else
+/*
+ * Case 3: we have neither "base" nor "sp".  As case 2, but we'll also use
+ * stack_depth.c's base pointer which is certainly less deep than anything in
+ * this stack frame.
+ *
+ * XXX If we used the address of one of our own stack variables as base, we'd
+ * need add some kind of slop factor since the compiler might not actually
+ * allocate space to it if is never spilled, and then alloca() might return an
+ * address on the wrong side of it, which would break stack_buffer_stack_p().
+ *
+ * XXX Another way to get an initial value for both variables would be to make
+ * a dummy alloca(1) call, wasting (probably) 16 bytes of stack.  It is, after
+ * all, a builtin that gives you the stack pointer, it just happens to consume
+ * space while doing so...
+ */
+#define stack_buffer_declare_impl										\
+	char *stack_buffer_sp = (char *) &stack_buffer_sp;					\
+	char *stack_buffer_base = (char *) stack_base_ptr;
+#endif
+/* Allocation wrapper for cases 2 and 3 that updates stack_buffer_sp. */
+#define stack_buffer_alloca_aligned(size, align)						\
+	((stack_buffer_sp = stack_buffer_alloca_aligned_impl((size),		\
+														 (align))),		\
+	 (PG_STACK_DIRECTION < 0 ? stack_buffer_sp : ((stack_buffer_sp +=	\
+												   (size))) - (size)))
+#endif
+
+/* Like Min(), but in the direction of stack growth. */
+#define stack_buffer_least_deep(p1, p2)				\
+	(stack_ptr_deeper_p((p1), (p2)) ? (p2) : (p1))
+
+/*
+ * Predict new end of stack after hypothetical allocation.  This might
+ * undershoot by a few bytes due stack_buffer_alignof_alloca being too low,
+ * and because we don't bother to do TYPEALIGN(stack_buffer_alignof_alloca,
+ * size), but that doesn't matter much for our purposes.
+ */
+static inline const char *
+stack_buffer_guess_new_sp(const char *sp, size_t size, size_t align)
+{
+	if (align <= stack_buffer_alignof_alloca)
+		return sp + size * PG_STACK_DIRECTION;
+
+	if (PG_STACK_DIRECTION < 0)
+		return (const char *) TYPEALIGN_DOWN(align, sp - size);
+	else
+		return (const char *) TYPEALIGN(align, sp) + size;
+}
+
+#define stack_buffer_alloca_would_fit_p(size, align)					\
+	(stack_ptr_deeper_p(stack_buffer_limit,								\
+						stack_buffer_guess_new_sp(stack_buffer_sp,		\
+												  (size),				\
+												  (align))))
+
+#endif
+
+#endif
-- 
2.53.0

