From bcd35959d96d237208643faa3e9d6ed196a34391 Mon Sep 17 00:00:00 2001 From: David Rowley Date: Fri, 30 Jan 2026 23:18:45 +1300 Subject: [PATCH v7 4/4] Various experimental changes --- src/backend/access/common/tupdesc.c | 6 ++ src/backend/executor/execTuples.c | 48 ++++----- src/include/access/tupmacs.h | 155 ++++++++++++++++++++++++++++ 3 files changed, 180 insertions(+), 29 deletions(-) diff --git a/src/backend/access/common/tupdesc.c b/src/backend/access/common/tupdesc.c index 25364db630a..ca393af67c9 100644 --- a/src/backend/access/common/tupdesc.c +++ b/src/backend/access/common/tupdesc.c @@ -105,6 +105,12 @@ populate_compact_attribute_internal(Form_pg_attribute src, elog(ERROR, "invalid attalign value: %c", src->attalign); break; } + + /* Check for unsupported byval attlens */ + if (src->attbyval && src->attlen != sizeof(char) && + src->attlen != sizeof(int16) && src->attlen != sizeof(int32) && + src->attlen != sizeof(int64)) + elog(ERROR, "unsupported byval length: %d", src->attlen); } /* diff --git a/src/backend/executor/execTuples.c b/src/backend/executor/execTuples.c index 36d0aaed2fb..c3bc010d824 100644 --- a/src/backend/executor/execTuples.c +++ b/src/backend/executor/execTuples.c @@ -1029,24 +1029,26 @@ slot_deform_heap_tuple(TupleTableSlot *slot, HeapTuple tuple, uint32 *offp, /* We can only fetch as many attributes as the tuple has. */ natts = Min(HeapTupleHeaderGetNatts(tup), natts); attnum = slot->tts_nvalid; + values = slot->tts_values; + isnull = slot->tts_isnull; firstNonCacheOffsetAttr = Min(tupleDesc->firstNonCachedOffAttr, natts); if (hasnulls) { + tp = (char *) tup + tup->t_hoff; bp = tup->t_bits; firstNullAttr = first_null_attr(bp, natts); firstNonCacheOffsetAttr = Min(firstNonCacheOffsetAttr, firstNullAttr); + populate_isnull_array(bp, natts, isnull); } else { + tp = (char *) tup + MAXALIGN(offsetof(HeapTupleHeaderData, t_bits)); bp = NULL; firstNullAttr = natts; + memset(isnull, 0, sizeof(bool) * natts); } - values = slot->tts_values; - isnull = slot->tts_isnull; - tp = (char *) tup + tup->t_hoff; - /* * Handle the portion of the tuple that we have cached the offset for up * to the first NULL attribute. The offset is effectively fixed for these @@ -1065,7 +1067,6 @@ slot_deform_heap_tuple(TupleTableSlot *slot, HeapTuple tuple, uint32 *offp, #endif do { - isnull[attnum] = false; cattr = TupleDescCompactAttr(tupleDesc, attnum); #ifdef USE_ASSERT_CHECKING @@ -1101,19 +1102,14 @@ slot_deform_heap_tuple(TupleTableSlot *slot, HeapTuple tuple, uint32 *offp, */ for (; attnum < firstNullAttr; attnum++) { - isnull[attnum] = false; cattr = TupleDescCompactAttr(tupleDesc, attnum); - /* align the offset for this attribute */ - off = att_pointer_alignby(off, - cattr->attalignby, - cattr->attlen, - tp + off); - - values[attnum] = fetchatt(cattr, tp + off); - - /* move the offset beyond this attribute */ - off = att_addlength_pointer(off, cattr->attlen, tp + off); + /* align 'off', fetch the datum, and increment off beyond the datum */ + values[attnum] = align_fetch_then_add(tp, + &off, + cattr->attbyval, + cattr->attlen, + cattr->attalignby); } /* @@ -1122,26 +1118,20 @@ slot_deform_heap_tuple(TupleTableSlot *slot, HeapTuple tuple, uint32 *offp, */ for (; attnum < natts; attnum++) { - if (att_isnull(attnum, bp)) + if (isnull[attnum]) { values[attnum] = (Datum) 0; - isnull[attnum] = true; continue; } - isnull[attnum] = false; cattr = TupleDescCompactAttr(tupleDesc, attnum); - /* align the offset for this attribute */ - off = att_pointer_alignby(off, - cattr->attalignby, - cattr->attlen, - tp + off); - - values[attnum] = fetchatt(cattr, tp + off); - - /* move the offset beyond this attribute */ - off = att_addlength_pointer(off, cattr->attlen, tp + off); + /* align 'off', fetch the datum, and increment off beyond the datum */ + values[attnum] = align_fetch_then_add(tp, + &off, + cattr->attbyval, + cattr->attlen, + cattr->attalignby); } /* diff --git a/src/include/access/tupmacs.h b/src/include/access/tupmacs.h index 150a7a24785..21ee3cc3594 100644 --- a/src/include/access/tupmacs.h +++ b/src/include/access/tupmacs.h @@ -16,7 +16,11 @@ #include "catalog/pg_type_d.h" /* for TYPALIGN macros */ #include "port/pg_bitutils.h" +#include "varatt.h" +#ifdef DO_AVX512_VERSION +#include +#endif /* * Check a tuple's null bitmap to determine whether the attribute is null. @@ -29,6 +33,90 @@ att_isnull(int ATT, const bits8 *BITS) return !(BITS[ATT >> 3] & (1 << (ATT & 0x07))); } +/* + * populate_isnull_array + * Transform a tuple's null array into a boolean array. + * + * XXX there does not seem to be an efficient way to do this without AVX512. + * Here we use a 256 element array with all possible patterns for 8 isnull + * array elements for each possible byte value for a bitmask element. + */ +static inline void +populate_isnull_array(const bits8 *bits, int natts, bool *isnull) +{ + int n_full_bytes = natts >> 3; + int attnum = n_full_bytes << 3; + bool *isnull_ptr = isnull; + +#ifndef DO_AVX512_VERSION + /* This is 2 kilobytes! */ + static const uint64 isnull_to_array[256] = { + 0x0101010101010101, 0x0101010101010100, 0x0101010101010001, 0x0101010101010000, 0x0101010101000101, 0x0101010101000100, 0x0101010101000001, 0x0101010101000000, + 0x0101010100010101, 0x0101010100010100, 0x0101010100010001, 0x0101010100010000, 0x0101010100000101, 0x0101010100000100, 0x0101010100000001, 0x0101010100000000, + 0x0101010001010101, 0x0101010001010100, 0x0101010001010001, 0x0101010001010000, 0x0101010001000101, 0x0101010001000100, 0x0101010001000001, 0x0101010001000000, + 0x0101010000010101, 0x0101010000010100, 0x0101010000010001, 0x0101010000010000, 0x0101010000000101, 0x0101010000000100, 0x0101010000000001, 0x0101010000000000, + 0x0101000101010101, 0x0101000101010100, 0x0101000101010001, 0x0101000101010000, 0x0101000101000101, 0x0101000101000100, 0x0101000101000001, 0x0101000101000000, + 0x0101000100010101, 0x0101000100010100, 0x0101000100010001, 0x0101000100010000, 0x0101000100000101, 0x0101000100000100, 0x0101000100000001, 0x0101000100000000, + 0x0101000001010101, 0x0101000001010100, 0x0101000001010001, 0x0101000001010000, 0x0101000001000101, 0x0101000001000100, 0x0101000001000001, 0x0101000001000000, + 0x0101000000010101, 0x0101000000010100, 0x0101000000010001, 0x0101000000010000, 0x0101000000000101, 0x0101000000000100, 0x0101000000000001, 0x0101000000000000, + 0x0100010101010101, 0x0100010101010100, 0x0100010101010001, 0x0100010101010000, 0x0100010101000101, 0x0100010101000100, 0x0100010101000001, 0x0100010101000000, + 0x0100010100010101, 0x0100010100010100, 0x0100010100010001, 0x0100010100010000, 0x0100010100000101, 0x0100010100000100, 0x0100010100000001, 0x0100010100000000, + 0x0100010001010101, 0x0100010001010100, 0x0100010001010001, 0x0100010001010000, 0x0100010001000101, 0x0100010001000100, 0x0100010001000001, 0x0100010001000000, + 0x0100010000010101, 0x0100010000010100, 0x0100010000010001, 0x0100010000010000, 0x0100010000000101, 0x0100010000000100, 0x0100010000000001, 0x0100010000000000, + 0x0100000101010101, 0x0100000101010100, 0x0100000101010001, 0x0100000101010000, 0x0100000101000101, 0x0100000101000100, 0x0100000101000001, 0x0100000101000000, + 0x0100000100010101, 0x0100000100010100, 0x0100000100010001, 0x0100000100010000, 0x0100000100000101, 0x0100000100000100, 0x0100000100000001, 0x0100000100000000, + 0x0100000001010101, 0x0100000001010100, 0x0100000001010001, 0x0100000001010000, 0x0100000001000101, 0x0100000001000100, 0x0100000001000001, 0x0100000001000000, + 0x0100000000010101, 0x0100000000010100, 0x0100000000010001, 0x0100000000010000, 0x0100000000000101, 0x0100000000000100, 0x0100000000000001, 0x0100000000000000, + 0x0001010101010101, 0x0001010101010100, 0x0001010101010001, 0x0001010101010000, 0x0001010101000101, 0x0001010101000100, 0x0001010101000001, 0x0001010101000000, + 0x0001010100010101, 0x0001010100010100, 0x0001010100010001, 0x0001010100010000, 0x0001010100000101, 0x0001010100000100, 0x0001010100000001, 0x0001010100000000, + 0x0001010001010101, 0x0001010001010100, 0x0001010001010001, 0x0001010001010000, 0x0001010001000101, 0x0001010001000100, 0x0001010001000001, 0x0001010001000000, + 0x0001010000010101, 0x0001010000010100, 0x0001010000010001, 0x0001010000010000, 0x0001010000000101, 0x0001010000000100, 0x0001010000000001, 0x0001010000000000, + 0x0001000101010101, 0x0001000101010100, 0x0001000101010001, 0x0001000101010000, 0x0001000101000101, 0x0001000101000100, 0x0001000101000001, 0x0001000101000000, + 0x0001000100010101, 0x0001000100010100, 0x0001000100010001, 0x0001000100010000, 0x0001000100000101, 0x0001000100000100, 0x0001000100000001, 0x0001000100000000, + 0x0001000001010101, 0x0001000001010100, 0x0001000001010001, 0x0001000001010000, 0x0001000001000101, 0x0001000001000100, 0x0001000001000001, 0x0001000001000000, + 0x0001000000010101, 0x0001000000010100, 0x0001000000010001, 0x0001000000010000, 0x0001000000000101, 0x0001000000000100, 0x0001000000000001, 0x0001000000000000, + 0x0000010101010101, 0x0000010101010100, 0x0000010101010001, 0x0000010101010000, 0x0000010101000101, 0x0000010101000100, 0x0000010101000001, 0x0000010101000000, + 0x0000010100010101, 0x0000010100010100, 0x0000010100010001, 0x0000010100010000, 0x0000010100000101, 0x0000010100000100, 0x0000010100000001, 0x0000010100000000, + 0x0000010001010101, 0x0000010001010100, 0x0000010001010001, 0x0000010001010000, 0x0000010001000101, 0x0000010001000100, 0x0000010001000001, 0x0000010001000000, + 0x0000010000010101, 0x0000010000010100, 0x0000010000010001, 0x0000010000010000, 0x0000010000000101, 0x0000010000000100, 0x0000010000000001, 0x0000010000000000, + 0x0000000101010101, 0x0000000101010100, 0x0000000101010001, 0x0000000101010000, 0x0000000101000101, 0x0000000101000100, 0x0000000101000001, 0x0000000101000000, + 0x0000000100010101, 0x0000000100010100, 0x0000000100010001, 0x0000000100010000, 0x0000000100000101, 0x0000000100000100, 0x0000000100000001, 0x0000000100000000, + 0x0000000001010101, 0x0000000001010100, 0x0000000001010001, 0x0000000001010000, 0x0000000001000101, 0x0000000001000100, 0x0000000001000001, 0x0000000001000000, + 0x0000000000010101, 0x0000000000010100, 0x0000000000010001, 0x0000000000010000, 0x0000000000000101, 0x0000000000000100, 0x0000000000000001, 0x0000000000000000 + }; +#endif + + for (int i = 0; i < n_full_bytes; i++) + { +#ifdef DO_AVX512_VERSION + /* The array isn't required when AVX512 is available. Testing only */ + /* + * XXX requires CFLAGS="-D DO_AVX512_VERSION -march=x86-64-v4" and an + * avx512 machine + */ + + /* + * The bits array has 1s for values and 0s for NULLs. Bit-flip that to + * get 1s for NULLs and use that mask to populate the register with + * true values and zeros (falses) when the mask bit isn't set. + */ + __m128i res = _mm_maskz_set1_epi8(~bits[i], true); + + /* Grab lower 64-bits of the 128-bit register */ + uint64 src = _mm_cvtsi128_si64(res); + + memcpy(isnull_ptr, &src, sizeof(uint64)); +#else + memcpy(isnull_ptr, &isnull_to_array[bits[i]], sizeof(uint64)); +#endif + isnull_ptr += 8; + } + + /* handle remaining attributes */ + for (; attnum < natts; attnum++) + isnull[attnum] = att_isnull(attnum, bits); +} + #ifndef FRONTEND /* * Given an attbyval and an attlen from either a Form_pg_attribute or @@ -71,6 +159,73 @@ fetch_att(const void *T, bool attbyval, int attlen) return PointerGetDatum(T); } +/* + * align_fetch_then_add + * Applies all the functionality of att_pointer_alignby(), fetch_att() + * and att_addlength_pointer() resulting in *off pointer to the perhaps + * unaligned number of bytes into 'tupptr', ready to deform the next + * attribute. + * + * tupptr: pointer to the beginning of the tuple, after the header and any + * NULL bitmask. + * off: offset in bytes for reading tuple data, possibly unaligned. + * attbyval, attlen, attalignby are values from CompactAttribute. + */ +static inline Datum +align_fetch_then_add(const char *tupptr, uint32 *off, bool attbyval, int attlen, + uint8 attalignby) +{ + Datum res; + + if (attlen > 0) + { + const char *offset_ptr; + + *off = TYPEALIGN(attalignby, *off); + offset_ptr = tupptr + *off; + *off += attlen; + if (attbyval) + { + switch (attlen) + { + case sizeof(char): + return CharGetDatum(*((const char *) offset_ptr)); + case sizeof(int16): + return Int16GetDatum(*((const int16 *) offset_ptr)); + case sizeof(int32): + return Int32GetDatum(*((const int32 *) offset_ptr)); + default: + + /* + * populate_compact_attribute_internal() should have + * checked + */ + Assert(attlen == sizeof(int64)); + return Int64GetDatum(*((const int64 *) offset_ptr)); + } + } + return PointerGetDatum(offset_ptr); + } + else if (attlen == -1) + { + + if (!VARATT_IS_SHORT(tupptr + *off)) + *off = TYPEALIGN(attalignby, *off); + + res = PointerGetDatum(tupptr + *off); + *off += VARSIZE_ANY(DatumGetPointer(res)); + return res; + } + else + { + Assert(attlen == -2); + *off = TYPEALIGN(attalignby, *off); + res = PointerGetDatum(tupptr + *off); + *off += strlen(tupptr + *off) + 1; + return res; + } +} + #ifndef HAVE__BUILTIN_CTZ /* * For returning the 0-based position of the right-most 0 bit of a uint8, or 8 -- 2.51.0