From c6dc1ef00d597d13e79df60eab320dc35fee7336 Mon Sep 17 00:00:00 2001 From: David Rowley Date: Fri, 20 Mar 2026 11:01:13 +1300 Subject: [PATCH v2] Introduce bms_offset_members() function Effectively a function to bitshift members by the specified number of bits. We have various fragments of code doing this manually with a bms_next_member() loop. We can do this more efficiently with bitshifting. --- src/backend/nodes/bitmapset.c | 160 ++++++++++++++++++ src/backend/optimizer/plan/setrefs.c | 10 +- src/backend/optimizer/prep/prepjointree.c | 28 +-- src/backend/rewrite/rewriteManip.c | 8 +- src/backend/statistics/extended_stats.c | 12 +- src/include/nodes/bitmapset.h | 1 + .../expected/test_bitmapset.out | 38 +++++ .../test_bitmapset/sql/test_bitmapset.sql | 13 ++ .../test_bitmapset/test_bitmapset--1.0.sql | 8 + .../modules/test_bitmapset/test_bitmapset.c | 87 +++++++++- 10 files changed, 326 insertions(+), 39 deletions(-) diff --git a/src/backend/nodes/bitmapset.c b/src/backend/nodes/bitmapset.c index f053d8c4d64..458f8f69c51 100644 --- a/src/backend/nodes/bitmapset.c +++ b/src/backend/nodes/bitmapset.c @@ -991,6 +991,166 @@ bms_replace_members(Bitmapset *a, const Bitmapset *b) return a; } +/* + * bms_offset_members + * Adjust all existing members of the given set by adding 'offset' to + * each member. The offset value may be negative or positive. Members + * which would become negative as a result of a negative offset will be + * removed from the set, whereas too large an offset which would result + * in a member going > INT_MAX will result in an ERROR. + * + * The set is modified in-place when possible, but callers must assume the set + * can be reallocated and use the returned set. + */ +Bitmapset * +bms_offset_members(Bitmapset *a, int offset) +{ + int offset_words; + int offset_bits; + int new_nwords; + int old_nwords; + int32 high_bit; + + Assert(bms_is_valid_set(a)); + + /* Don't go to any effort if there's nothing to do */ + if (a == NULL) + return NULL; + + old_nwords = a->nwords; + offset_words = WORDNUM(offset); + offset_bits = BITNUM(offset); + new_nwords = a->nwords + offset_words; + high_bit = bmw_leftmost_one_pos(a->words[a->nwords - 1]); + + /* Adjust the number of words in the new set to how many we'll need */ + new_nwords += (offset_bits + high_bit) >= BITS_PER_BITMAPWORD; + new_nwords -= (offset_bits + high_bit) < 0; + + /* If we don't need any words then it must be an empty set */ + if (new_nwords <= 0) + { + pfree(a); + return NULL; + } + + /* Handle a positive offset (bitshift left) */ + if (offset > 0) + { + int old_highest = (old_nwords - 1) * BITS_PER_BITMAPWORD + high_bit; + + /* + * An unlikely scenario, but check we're not going to create a member + * greater than PG_INT32_MAX. + */ + if ((uint64) old_highest + offset > PG_INT32_MAX) + elog(ERROR, "bitmapset overflow"); + + /* + * Reallocate the memory for the set if we need more words, otherwise + * we reuse the existing set. + */ + if (new_nwords > a->nwords) + a = (Bitmapset *) repalloc0(a, BITMAPSET_SIZE(old_nwords), + BITMAPSET_SIZE(new_nwords)); + + a->nwords = new_nwords; + + /* Shift words up to higher array elements, if needed */ + if (offset_words > 0) + { + for (int i = old_nwords - 1; i >= 0; i--) + a->words[i + offset_words] = a->words[i]; + + /* zero out the old members from the lower, now unused words */ + for (int i = 0; i < offset_words; i++) + a->words[i] = 0; + } + + /* Now handle shifting up the bits, if needed */ + if (offset_bits > 0) + { + int carry_bits = BITS_PER_BITMAPWORD - offset_bits; + bitmapword prev_carry = 0; + + for (int i = offset_words; i < new_nwords; i++) + { + bitmapword carry = (a->words[i] >> carry_bits); + + /* shift bits up and carry the bits from the previous word */ + a->words[i] = (a->words[i] << offset_bits) | prev_carry; + prev_carry = carry; + } + } + } + + /* Handle negative offset (bitshift right) */ + else + { + /* Make the negative shift_words and shift_bits positive numbers */ + offset_words = 0 - offset_words; + offset_bits = 0 - offset_bits; + + /* + * Shrink the set to store the new number of words. We can't repalloc + * here as we need to still access the memory for the upper words to + * move them. In any case, repalloc with a smaller allocation likely + * is just going to return the same chunk, so doing so would just be + * needless overhead. + */ + a->nwords = new_nwords; + + /* Shift words down to lower elements, if needed */ + if (offset_words > 0) + { + for (int i = 0; i <= new_nwords; i++) + a->words[i] = (i + offset_words) >= old_nwords ? 0 : a->words[i + offset_words]; + + /* + * Don't bother with zeroing the upper words. They're no longer + * visible due to reducing nwords down to the new size. + */ + } + + /* Now handle shifting down the bits, if needed */ + if (offset_bits > 0) + { + int carry_bits = BITS_PER_BITMAPWORD - offset_bits; + bitmapword prev_carry = 0; + + if (new_nwords < old_nwords) + prev_carry = (a->words[new_nwords] << carry_bits); + + for (int i = new_nwords - 1; i >= 0; i--) + { + bitmapword carry = (a->words[i] << carry_bits); + + /* shift bits down and carry the bits from the previous word */ + a->words[i] = (a->words[i] >> offset_bits) | prev_carry; + prev_carry = carry; + } + } + } + +#ifdef REALLOCATE_BITMAPSETS + + /* + * There's no guarantee that the repalloc returned a new pointer, so copy + * and free unconditionally here. + */ + a = bms_copy_and_free(a); +#else + + /* + * That was a fair bit of bit twiddling, so make sure we've still got a + * valid set with no leading zero words + */ + Assert(bms_is_valid_set(a)); +#endif + + return a; +} + /* * bms_add_range * Add members in the range of 'lower' to 'upper' to the set. diff --git a/src/backend/optimizer/plan/setrefs.c b/src/backend/optimizer/plan/setrefs.c index ff0e875f2a2..86a8d993997 100644 --- a/src/backend/optimizer/plan/setrefs.c +++ b/src/backend/optimizer/plan/setrefs.c @@ -2057,16 +2057,10 @@ set_hash_references(PlannerInfo *root, Plan *plan, int rtoffset) static Relids offset_relid_set(Relids relids, int rtoffset) { - Relids result = NULL; - int rtindex; - - /* If there's no offset to apply, we needn't recompute the value */ + /* If there's no offset to apply, we needn't make a copy */ if (rtoffset == 0) return relids; - rtindex = -1; - while ((rtindex = bms_next_member(relids, rtindex)) >= 0) - result = bms_add_member(result, rtindex + rtoffset); - return result; + return bms_offset_members(bms_copy(relids), rtoffset); } /* diff --git a/src/backend/optimizer/prep/prepjointree.c b/src/backend/optimizer/prep/prepjointree.c index 95bf51606cc..072fd47db9e 100644 --- a/src/backend/optimizer/prep/prepjointree.c +++ b/src/backend/optimizer/prep/prepjointree.c @@ -3751,22 +3751,22 @@ has_notnull_forced_var(PlannerInfo *root, List *forced_null_vars, if (bms_is_member(varno, right_state->nullable_rels)) continue; - /* - * Iterate over attributes and adjust the bitmap indexes by - * FirstLowInvalidHeapAttributeNumber to get the actual attribute - * numbers. - */ - attno = -1; - while ((attno = bms_next_member(attrs, attno)) >= 0) - { - AttrNumber real_attno = attno + FirstLowInvalidHeapAttributeNumber; + /* Find the lowest member to check if system columns are present */ + attno = bms_next_member(attrs, -1); - /* system columns cannot be NULL */ - if (real_attno < 0) - return true; + /* We checked for an empty set above */ + Assert(attno >= 0); - forcednullattnums = bms_add_member(forcednullattnums, real_attno); - } + /* system columns cannot be NULL */ + if (attno + FirstLowInvalidHeapAttributeNumber < 0) + return true; + + /* + * Adjust the bitmap indexes by FirstLowInvalidHeapAttributeNumber to + * get the actual attribute numbers. + */ + forcednullattnums = bms_offset_members(bms_copy(attrs), + FirstLowInvalidHeapAttributeNumber); rte = rt_fetch(varno, root->parse->rtable); diff --git a/src/backend/rewrite/rewriteManip.c b/src/backend/rewrite/rewriteManip.c index 4bf4aa0d6d1..e1017ddf42c 100644 --- a/src/backend/rewrite/rewriteManip.c +++ b/src/backend/rewrite/rewriteManip.c @@ -527,13 +527,7 @@ OffsetVarNodes(Node *node, int offset, int sublevels_up) static Relids offset_relid_set(Relids relids, int offset) { - Relids result = NULL; - int rtindex; - - rtindex = -1; - while ((rtindex = bms_next_member(relids, rtindex)) >= 0) - result = bms_add_member(result, rtindex + offset); - return result; + return bms_offset_members(bms_copy(relids), offset); } /* diff --git a/src/backend/statistics/extended_stats.c b/src/backend/statistics/extended_stats.c index 2b83355d26e..f2c0e2d35d9 100644 --- a/src/backend/statistics/extended_stats.c +++ b/src/backend/statistics/extended_stats.c @@ -1673,8 +1673,7 @@ statext_is_compatible_clause(PlannerInfo *root, Node *clause, Index relid, */ if (!leakproof) { - Bitmapset *clause_attnums = NULL; - int attnum = -1; + Bitmapset *clause_attnums; /* * We have to check per-column privileges. *attnums has the attnums @@ -1685,13 +1684,8 @@ statext_is_compatible_clause(PlannerInfo *root, Node *clause, Index relid, * while attnums within *attnums aren't. Convert *attnums to the * offset style so we can combine the results. */ - while ((attnum = bms_next_member(*attnums, attnum)) >= 0) - { - clause_attnums = - bms_add_member(clause_attnums, - attnum - FirstLowInvalidHeapAttributeNumber); - } - + clause_attnums = bms_offset_members(bms_copy(*attnums), + 0 - FirstLowInvalidHeapAttributeNumber); /* Now merge attnums from *exprs into clause_attnums */ if (*exprs != NIL) pull_varattnos((Node *) *exprs, relid, &clause_attnums); diff --git a/src/include/nodes/bitmapset.h b/src/include/nodes/bitmapset.h index 067ec72e99b..748fa2ad58b 100644 --- a/src/include/nodes/bitmapset.h +++ b/src/include/nodes/bitmapset.h @@ -123,6 +123,7 @@ extern Bitmapset *bms_add_member(Bitmapset *a, int x); extern Bitmapset *bms_del_member(Bitmapset *a, int x); extern Bitmapset *bms_add_members(Bitmapset *a, const Bitmapset *b); extern Bitmapset *bms_replace_members(Bitmapset *a, const Bitmapset *b); +extern Bitmapset *bms_offset_members(Bitmapset *a, int offset); extern Bitmapset *bms_add_range(Bitmapset *a, int lower, int upper); extern Bitmapset *bms_int_members(Bitmapset *a, const Bitmapset *b); extern Bitmapset *bms_del_members(Bitmapset *a, const Bitmapset *b); diff --git a/src/test/modules/test_bitmapset/expected/test_bitmapset.out b/src/test/modules/test_bitmapset/expected/test_bitmapset.out index f75cb46b869..3a6f08427cd 100644 --- a/src/test/modules/test_bitmapset/expected/test_bitmapset.out +++ b/src/test/modules/test_bitmapset/expected/test_bitmapset.out @@ -760,6 +760,37 @@ SELECT test_bms_compare(NULL, NULL) AS result; 0 (1 row) +-- bms_offset_members() +-- Ensure overflow detection works +SELECT test_bms_offset_members('(b 1)', 2147483647); +ERROR: bitmapset overflow +SELECT test_bms_offset_members('(b 2)', 2147483646); +ERROR: bitmapset overflow +-- Ensure members are all offset as expected +SELECT test_bms_offset_members('(b 1 3 5)', 1); + test_bms_offset_members +------------------------- + (b 2 4 6) +(1 row) + +SELECT test_bms_offset_members('(b 1 3 5)', -1); + test_bms_offset_members +------------------------- + (b 0 2 4) +(1 row) + +SELECT test_bms_offset_members('(b 31 32 63 64)', 1); + test_bms_offset_members +------------------------- + (b 32 33 64 65) +(1 row) + +SELECT test_bms_offset_members('(b 31 32 63 64)', -1); + test_bms_offset_members +------------------------- + (b 30 31 62 63) +(1 row) + -- bms_add_range() SELECT test_bms_add_range('(b)', -5, 10); -- error ERROR: negative bitmapset member not allowed @@ -1575,4 +1606,11 @@ SELECT test_random_operations(-1, 10000, 81920, 0) > 0 AS result; t (1 row) +-- perform some random test on bms_offset_members() +SELECT test_random_offset_operations(-1, 10000, 1024, 0) AS result; + result +-------- + 10000 +(1 row) + DROP EXTENSION test_bitmapset; diff --git a/src/test/modules/test_bitmapset/sql/test_bitmapset.sql b/src/test/modules/test_bitmapset/sql/test_bitmapset.sql index e75ab8f620a..5c5cfe3fc15 100644 --- a/src/test/modules/test_bitmapset/sql/test_bitmapset.sql +++ b/src/test/modules/test_bitmapset/sql/test_bitmapset.sql @@ -201,6 +201,16 @@ SELECT test_bms_compare('(b 5)', NULL) AS result; SELECT test_bms_compare(NULL, '(b 5)') AS result; SELECT test_bms_compare(NULL, NULL) AS result; +-- bms_offset_members() +-- Ensure overflow detection works +SELECT test_bms_offset_members('(b 1)', 2147483647); +SELECT test_bms_offset_members('(b 2)', 2147483646); +-- Ensure members are all offset as expected +SELECT test_bms_offset_members('(b 1 3 5)', 1); +SELECT test_bms_offset_members('(b 1 3 5)', -1); +SELECT test_bms_offset_members('(b 31 32 63 64)', 1); +SELECT test_bms_offset_members('(b 31 32 63 64)', -1); + -- bms_add_range() SELECT test_bms_add_range('(b)', -5, 10); -- error SELECT test_bms_add_range('(b)', 5, 7) AS result; @@ -403,4 +413,7 @@ SELECT test_bms_nonempty_difference('(b 1 2)', '(b 50 100)') AS result; -- random operations SELECT test_random_operations(-1, 10000, 81920, 0) > 0 AS result; +-- perform some random test on bms_offset_members() +SELECT test_random_offset_operations(-1, 10000, 1024, 0) AS result; + DROP EXTENSION test_bitmapset; diff --git a/src/test/modules/test_bitmapset/test_bitmapset--1.0.sql b/src/test/modules/test_bitmapset/test_bitmapset--1.0.sql index 227ecb5aa3b..ea12ec40057 100644 --- a/src/test/modules/test_bitmapset/test_bitmapset--1.0.sql +++ b/src/test/modules/test_bitmapset/test_bitmapset--1.0.sql @@ -120,6 +120,10 @@ CREATE FUNCTION test_bms_replace_members(text, text) RETURNS text AS 'MODULE_PATHNAME' LANGUAGE C; +CREATE FUNCTION test_bms_offset_members(text, integer) +RETURNS text +AS 'MODULE_PATHNAME' LANGUAGE C; + CREATE FUNCTION test_bms_join(text, text) RETURNS text AS 'MODULE_PATHNAME' LANGUAGE C; @@ -137,4 +141,8 @@ CREATE FUNCTION test_random_operations(integer, integer, integer, integer) RETURNS integer STRICT AS 'MODULE_PATHNAME' LANGUAGE C; +CREATE FUNCTION test_random_offset_operations(integer, integer, integer, integer) +RETURNS integer STRICT +AS 'MODULE_PATHNAME' LANGUAGE C; + COMMENT ON EXTENSION test_bitmapset IS 'Test code for Bitmapset'; diff --git a/src/test/modules/test_bitmapset/test_bitmapset.c b/src/test/modules/test_bitmapset/test_bitmapset.c index 272bed390a6..1fbf9ffc200 100644 --- a/src/test/modules/test_bitmapset/test_bitmapset.c +++ b/src/test/modules/test_bitmapset/test_bitmapset.c @@ -59,12 +59,14 @@ PG_FUNCTION_INFO_V1(test_bms_add_members); PG_FUNCTION_INFO_V1(test_bms_int_members); PG_FUNCTION_INFO_V1(test_bms_del_members); PG_FUNCTION_INFO_V1(test_bms_replace_members); +PG_FUNCTION_INFO_V1(test_bms_offset_members); PG_FUNCTION_INFO_V1(test_bms_join); PG_FUNCTION_INFO_V1(test_bitmap_hash); PG_FUNCTION_INFO_V1(test_bitmap_match); /* Test utility functions */ PG_FUNCTION_INFO_V1(test_random_operations); +PG_FUNCTION_INFO_V1(test_random_offset_operations); /* Convenient macros to test results */ #define EXPECT_TRUE(expr) \ @@ -530,6 +532,18 @@ test_bms_replace_members(PG_FUNCTION_ARGS) PG_RETURN_BITMAPSET_AS_TEXT(bms1); } +Datum +test_bms_offset_members(PG_FUNCTION_ARGS) +{ + Bitmapset *bms = PG_ARG_GETBITMAPSET(0); + int offset = PG_GETARG_INT32(1); + + /* left input gets recycled */ + bms = bms_offset_members(bms, offset); + + PG_RETURN_BITMAPSET_AS_TEXT(bms); +} + Datum test_bms_join(PG_FUNCTION_ARGS) { @@ -580,7 +594,7 @@ test_bitmap_match(PG_FUNCTION_ARGS) } /* - * Contrary to all the other functions which are one-one mappings with the + * Contrary to most of the other functions which are one-one mappings with the * equivalent C functions, this stresses Bitmapsets in a random fashion for * various operations. * @@ -742,3 +756,74 @@ test_random_operations(PG_FUNCTION_ARGS) PG_RETURN_INT32(total_ops); } + +/* + * Random testing for bms_offset_members(). Generates a random set and then + * picks a number to offset the members by. We then create another set, which + * is built by looping over the members of the random set and performing + * bms_add_member and adding on the offset to create a known good set to + * compare the result of bms_offset_members() to. + * + * Arguments: + * arg1: optional random seed, or < 0 to use a random seed. + * arg2: the number of operations to perform. + * arg3: the maximum bitmapset member number to use in the random set. + * arg4: the minimum bitmapset member number to use in the random set. + */ +Datum +test_random_offset_operations(PG_FUNCTION_ARGS) +{ + Bitmapset *bms1; + Bitmapset *bms2; + pg_prng_state state; + uint64 seed = GetCurrentTimestamp(); + int num_ops; + int max_range; + int min_value; + int member; + + if (PG_GETARG_INT32(0) > 0) + seed = PG_GETARG_INT32(0); + + num_ops = PG_GETARG_INT32(1); + max_range = PG_GETARG_INT32(2); + min_value = PG_GETARG_INT32(3); + + pg_prng_seed(&state, seed); + + for (int op = 0; op < num_ops; op++) + { + int offset; + + bms1 = NULL; + for (int i = 0; i < 10; i++) + { + member = pg_prng_uint32(&state) % max_range + min_value; + + bms1 = bms_add_member(bms1, member); + } + + offset = (pg_prng_uint32(&state) % max_range) - (pg_prng_uint32(&state) % max_range); + + /* create a known-good set the old fashioned way */ + bms2 = NULL; + member = -1; + while ((member = bms_next_member(bms1, member)) >= 0) + { + if (member + offset >= 0) + bms2 = bms_add_member(bms2, member + offset); + } + + /* do the offsetting */ + bms1 = bms_offset_members(bms1, offset); + + /* check against the known-good set */ + if (!bms_equal(bms1, bms2)) + elog(ERROR, "bms_offset_members failed with offset %d seed " INT64_FORMAT, offset, seed); + + bms_free(bms1); + bms_free(bms2); + } + + PG_RETURN_INT32(num_ops); +} -- 2.51.0