diff --git a/src/backend/nodes/bitmapset.c b/src/backend/nodes/bitmapset.c index 7ba3cf635b..6cdc1663d2 100644 --- a/src/backend/nodes/bitmapset.c +++ b/src/backend/nodes/bitmapset.c @@ -5,8 +5,10 @@ * * A bitmap set can represent any set of nonnegative integers, although * it is mainly intended for sets where the maximum value is not large, - * say at most a few hundred. By convention, we always represent the - * empty set by a NULL pointer. + * say at most a few hundred. By convention, we always represent a set with + * the minimum possible number of words, i.e, there are never any trailing + * zero words. Enforcing this requires that an empty set is represented as + * NULL. * * * Copyright (c) 2003-2023, PostgreSQL Global Development Group @@ -66,6 +68,22 @@ static bool bms_is_empty_internal(const Bitmapset *a); +#ifdef USE_ASSERT_CHECKING +static void +check_bitmapset_invariants(const Bitmapset *set) +{ + /* Verify that the set does not have any trailing zero words */ + if (set != NULL) + { + int wordnum = set->nwords - 1; + + while (set->words[wordnum--] == 0) + Assert(false); + } +} +#else +#define check_bitmapset_invariants(set) ((void) 0) +#endif /* USE_ASSERT_CHECKING */ /* * bms_copy - make a palloc'd copy of a bitmapset @@ -76,6 +94,8 @@ bms_copy(const Bitmapset *a) Bitmapset *result; size_t size; + check_bitmapset_invariants(a); + if (a == NULL) return NULL; size = BITMAPSET_SIZE(a->nwords); @@ -86,18 +106,12 @@ bms_copy(const Bitmapset *a) /* * bms_equal - are two bitmapsets equal? - * - * This is logical not physical equality; in particular, a NULL pointer will - * be reported as equal to a palloc'd value containing no members. */ bool bms_equal(const Bitmapset *a, const Bitmapset *b) { - const Bitmapset *shorter; - const Bitmapset *longer; - int shortlen; - int longlen; - int i; + check_bitmapset_invariants(a); + check_bitmapset_invariants(b); /* Handle cases where either input is NULL */ if (a == NULL) @@ -108,30 +122,18 @@ bms_equal(const Bitmapset *a, const Bitmapset *b) } else if (b == NULL) return false; - /* Identify shorter and longer input */ - if (a->nwords <= b->nwords) - { - shorter = a; - longer = b; - } - else - { - shorter = b; - longer = a; - } - /* And process */ - shortlen = shorter->nwords; - for (i = 0; i < shortlen; i++) - { - if (shorter->words[i] != longer->words[i]) - return false; - } - longlen = longer->nwords; - for (; i < longlen; i++) + + /* can't be equal if the word counts don't match */ + if (a->nwords != b->nwords) + return false; + + /* check each word matches */ + for (int i = 0; i < a->nwords; i++) { - if (longer->words[i] != 0) + if (a->words[i] != b->words[i]) return false; } + return true; } @@ -146,29 +148,24 @@ bms_equal(const Bitmapset *a, const Bitmapset *b) int bms_compare(const Bitmapset *a, const Bitmapset *b) { - int shortlen; - int i; + int nwords; + + check_bitmapset_invariants(a); + check_bitmapset_invariants(b); /* Handle cases where either input is NULL */ if (a == NULL) return (b == NULL) ? 0 : -1; else if (b == NULL) return +1; - /* Handle cases where one input is longer than the other */ - shortlen = Min(a->nwords, b->nwords); - for (i = shortlen; i < a->nwords; i++) - { - if (a->words[i] != 0) - return +1; - } - for (i = shortlen; i < b->nwords; i++) - { - if (b->words[i] != 0) - return -1; - } - /* Process words in common */ - i = shortlen; - while (--i >= 0) + + /* the set with the most words must be greater */ + if (a->nwords != b->nwords) + return (a->nwords > b->nwords) ? +1 : -1; + + /* do the actual comparison */ + nwords = a->nwords; + for (int i = 0; i < nwords; i++) { bitmapword aw = a->words[i]; bitmapword bw = b->words[i]; @@ -208,6 +205,8 @@ bms_make_singleton(int x) void bms_free(Bitmapset *a) { + check_bitmapset_invariants(a); + if (a) pfree(a); } @@ -228,7 +227,9 @@ bms_union(const Bitmapset *a, const Bitmapset *b) Bitmapset *result; const Bitmapset *other; int otherlen; - int i; + + check_bitmapset_invariants(a); + check_bitmapset_invariants(b); /* Handle cases where either input is NULL */ if (a == NULL) @@ -248,7 +249,7 @@ bms_union(const Bitmapset *a, const Bitmapset *b) } /* And union the shorter input into the result */ otherlen = other->nwords; - for (i = 0; i < otherlen; i++) + for (int i = 0; i < otherlen; i++) result->words[i] |= other->words[i]; return result; } @@ -262,7 +263,10 @@ bms_intersect(const Bitmapset *a, const Bitmapset *b) Bitmapset *result; const Bitmapset *other; int resultlen; - int i; + int nonzeroidx; + + check_bitmapset_invariants(a); + check_bitmapset_invariants(b); /* Handle cases where either input is NULL */ if (a == NULL || b == NULL) @@ -280,14 +284,23 @@ bms_intersect(const Bitmapset *a, const Bitmapset *b) } /* And intersect the longer input with the result */ resultlen = result->nwords; - for (i = 0; i < resultlen; i++) + nonzeroidx = -1; + for (int i = 0; i < resultlen; i++) + { result->words[i] &= other->words[i]; + + if (result->words[i] != 0) + nonzeroidx = i; + } /* If we computed an empty result, we must return NULL */ - if (bms_is_empty_internal(result)) + if (nonzeroidx == -1) { pfree(result); return NULL; } + + /* get rid of trailing zero words */ + result->nwords = nonzeroidx + 1; return result; } @@ -299,7 +312,10 @@ bms_difference(const Bitmapset *a, const Bitmapset *b) { Bitmapset *result; int shortlen; - int i; + int nonzeroidx; + + check_bitmapset_invariants(a); + check_bitmapset_invariants(b); /* Handle cases where either input is NULL */ if (a == NULL) @@ -319,8 +335,19 @@ bms_difference(const Bitmapset *a, const Bitmapset *b) result = bms_copy(a); /* And remove b's bits from result */ shortlen = Min(a->nwords, b->nwords); - for (i = 0; i < shortlen; i++) + nonzeroidx = -1; + for (int i = 0; i < shortlen; i++) + { result->words[i] &= ~b->words[i]; + + if (result->words[i] != 0) + nonzeroidx = i; + } + + /* get rid of trailing zero words */ + result->nwords = nonzeroidx + 1; + Assert(result->nwords != 0); + /* Need not check for empty result, since we handled that case above */ return result; } @@ -331,32 +358,25 @@ bms_difference(const Bitmapset *a, const Bitmapset *b) bool bms_is_subset(const Bitmapset *a, const Bitmapset *b) { - int shortlen; - int longlen; - int i; + check_bitmapset_invariants(a); + check_bitmapset_invariants(b); /* Handle cases where either input is NULL */ if (a == NULL) return true; /* empty set is a subset of anything */ if (b == NULL) return false; - /* Check common words */ - shortlen = Min(a->nwords, b->nwords); - for (i = 0; i < shortlen; i++) + + /* 'a' can't be a subset of 'b' if it contains more words */ + if (a->nwords > b->nwords) + return false; + + /* Check all 'a' members are set in 'b' */ + for (int i = 0; i < a->nwords; i++) { if ((a->words[i] & ~b->words[i]) != 0) return false; } - /* Check extra words */ - if (a->nwords > b->nwords) - { - longlen = a->nwords; - for (; i < longlen; i++) - { - if (a->words[i] != 0) - return false; - } - } return true; } @@ -370,8 +390,9 @@ bms_subset_compare(const Bitmapset *a, const Bitmapset *b) { BMS_Comparison result; int shortlen; - int longlen; - int i; + + check_bitmapset_invariants(a); + check_bitmapset_invariants(b); /* Handle cases where either input is NULL */ if (a == NULL) @@ -385,7 +406,7 @@ bms_subset_compare(const Bitmapset *a, const Bitmapset *b) /* Check common words */ result = BMS_EQUAL; /* status so far */ shortlen = Min(a->nwords, b->nwords); - for (i = 0; i < shortlen; i++) + for (int i = 0; i < shortlen; i++) { bitmapword aword = a->words[i]; bitmapword bword = b->words[i]; @@ -408,31 +429,17 @@ bms_subset_compare(const Bitmapset *a, const Bitmapset *b) /* Check extra words */ if (a->nwords > b->nwords) { - longlen = a->nwords; - for (; i < longlen; i++) - { - if (a->words[i] != 0) - { - /* a is not a subset of b */ - if (result == BMS_SUBSET1) - return BMS_DIFFERENT; - result = BMS_SUBSET2; - } - } + /* if a has more words then a is not a subset of b */ + if (result == BMS_SUBSET1) + return BMS_DIFFERENT; + return BMS_SUBSET2; } else if (a->nwords < b->nwords) { - longlen = b->nwords; - for (; i < longlen; i++) - { - if (b->words[i] != 0) - { - /* b is not a subset of a */ - if (result == BMS_SUBSET2) - return BMS_DIFFERENT; - result = BMS_SUBSET1; - } - } + /* if b has more words then b is not a subset of a */ + if (result == BMS_SUBSET2) + return BMS_DIFFERENT; + return BMS_SUBSET1; } return result; } @@ -446,6 +453,8 @@ bms_is_member(int x, const Bitmapset *a) int wordnum, bitnum; + check_bitmapset_invariants(a); + /* XXX better to just return false for x<0 ? */ if (x < 0) elog(ERROR, "negative bitmapset member not allowed"); @@ -469,12 +478,13 @@ bms_is_member(int x, const Bitmapset *a) int bms_member_index(Bitmapset *a, int x) { - int i; int bitnum; int wordnum; int result = 0; bitmapword mask; + check_bitmapset_invariants(a); + /* return -1 if not a member of the bitmap */ if (!bms_is_member(x, a)) return -1; @@ -483,7 +493,7 @@ bms_member_index(Bitmapset *a, int x) bitnum = BITNUM(x); /* count bits in preceding words */ - for (i = 0; i < wordnum; i++) + for (int i = 0; i < wordnum; i++) { bitmapword w = a->words[i]; @@ -511,14 +521,16 @@ bool bms_overlap(const Bitmapset *a, const Bitmapset *b) { int shortlen; - int i; + + check_bitmapset_invariants(a); + check_bitmapset_invariants(b); /* Handle cases where either input is NULL */ if (a == NULL || b == NULL) return false; /* Check words in common */ shortlen = Min(a->nwords, b->nwords); - for (i = 0; i < shortlen; i++) + for (int i = 0; i < shortlen; i++) { if ((a->words[i] & b->words[i]) != 0) return true; @@ -536,6 +548,8 @@ bms_overlap_list(const Bitmapset *a, const List *b) int wordnum, bitnum; + check_bitmapset_invariants(a); + if (a == NULL || b == NIL) return false; @@ -563,27 +577,23 @@ bms_overlap_list(const Bitmapset *a, const List *b) bool bms_nonempty_difference(const Bitmapset *a, const Bitmapset *b) { - int shortlen; - int i; + check_bitmapset_invariants(a); + check_bitmapset_invariants(b); /* Handle cases where either input is NULL */ if (a == NULL) return false; if (b == NULL) return true; - /* Check words in common */ - shortlen = Min(a->nwords, b->nwords); - for (i = 0; i < shortlen; i++) + /* if a has more words then it must contain additional members */ + if (a->nwords > b->nwords) + return true; + /* Check all 'a' members are set in 'b' */ + for (int i = 0; i < a->nwords; i++) { if ((a->words[i] & ~b->words[i]) != 0) return true; } - /* Check extra words in a */ - for (; i < a->nwords; i++) - { - if (a->words[i] != 0) - return true; - } return false; } @@ -599,6 +609,8 @@ bms_singleton_member(const Bitmapset *a) int nwords; int wordnum; + check_bitmapset_invariants(a); + if (a == NULL) elog(ERROR, "bitmapset is empty"); nwords = a->nwords; @@ -637,6 +649,8 @@ bms_get_singleton_member(const Bitmapset *a, int *member) int nwords; int wordnum; + check_bitmapset_invariants(a); + if (a == NULL) return false; nwords = a->nwords; @@ -668,6 +682,8 @@ bms_num_members(const Bitmapset *a) int nwords; int wordnum; + check_bitmapset_invariants(a); + if (a == NULL) return 0; nwords = a->nwords; @@ -690,17 +706,20 @@ bms_num_members(const Bitmapset *a) BMS_Membership bms_membership(const Bitmapset *a) { - BMS_Membership result = BMS_EMPTY_SET; - int nwords; - int wordnum; + BMS_Membership result; + + check_bitmapset_invariants(a); if (a == NULL) return BMS_EMPTY_SET; - nwords = a->nwords; - for (wordnum = 0; wordnum < nwords; wordnum++) - { - bitmapword w = a->words[wordnum]; + /* fast path for common case */ + if (a->words[0] != 0 && HAS_MULTIPLE_ONES(a->words[0])) + return BMS_MULTIPLE; + result = BMS_EMPTY_SET; + for (int i = 0; i < a->nwords; i++) + { + bitmapword w = a->words[i]; if (w != 0) { if (result != BMS_EMPTY_SET || HAS_MULTIPLE_ONES(w)) @@ -757,6 +776,8 @@ bms_add_member(Bitmapset *a, int x) int wordnum, bitnum; + check_bitmapset_invariants(a); + if (x < 0) elog(ERROR, "negative bitmapset member not allowed"); if (a == NULL) @@ -794,6 +815,8 @@ bms_del_member(Bitmapset *a, int x) int wordnum, bitnum; + check_bitmapset_invariants(a); + if (x < 0) elog(ERROR, "negative bitmapset member not allowed"); if (a == NULL) @@ -822,6 +845,9 @@ bms_add_members(Bitmapset *a, const Bitmapset *b) int otherlen; int i; + check_bitmapset_invariants(a); + check_bitmapset_invariants(b); + /* Handle cases where either input is NULL */ if (a == NULL) return bms_copy(b); @@ -864,6 +890,8 @@ bms_add_range(Bitmapset *a, int lower, int upper) ushiftbits, wordnum; + check_bitmapset_invariants(a); + /* do nothing if nothing is called for, without further checking */ if (upper < lower) return a; @@ -928,8 +956,12 @@ Bitmapset * bms_int_members(Bitmapset *a, const Bitmapset *b) { int shortlen; + int nonzeroidx; int i; + check_bitmapset_invariants(a); + check_bitmapset_invariants(b); + /* Handle cases where either input is NULL */ if (a == NULL) return NULL; @@ -940,16 +972,24 @@ bms_int_members(Bitmapset *a, const Bitmapset *b) } /* Intersect b into a; we need never copy */ shortlen = Min(a->nwords, b->nwords); + nonzeroidx = -1; for (i = 0; i < shortlen; i++) + { a->words[i] &= b->words[i]; - for (; i < a->nwords; i++) - a->words[i] = 0; + + if (a->words[i] != 0) + nonzeroidx = i; + } + /* If we computed an empty result, we must return NULL */ - if (bms_is_empty_internal(a)) + if (nonzeroidx == -1) { pfree(a); return NULL; } + + /* get rid of trailing zero words */ + a->nwords = nonzeroidx + 1; return a; } @@ -961,6 +1001,10 @@ bms_del_members(Bitmapset *a, const Bitmapset *b) { int shortlen; int i; + int nonzeroidx; + + check_bitmapset_invariants(a); + check_bitmapset_invariants(b); /* Handle cases where either input is NULL */ if (a == NULL) @@ -969,14 +1013,25 @@ bms_del_members(Bitmapset *a, const Bitmapset *b) return a; /* Remove b's bits from a; we need never copy */ shortlen = Min(a->nwords, b->nwords); + nonzeroidx = -1; for (i = 0; i < shortlen; i++) + { a->words[i] &= ~b->words[i]; + + if (a->words[i] != 0) + nonzeroidx = i; + } + /* If we computed an empty result, we must return NULL */ - if (bms_is_empty_internal(a)) + if (nonzeroidx == -1) { pfree(a); return NULL; } + + /* get rid of trailing zero words */ + a->nwords = nonzeroidx + 1; + return a; } @@ -991,6 +1046,9 @@ bms_join(Bitmapset *a, Bitmapset *b) int otherlen; int i; + check_bitmapset_invariants(a); + check_bitmapset_invariants(b); + /* Handle cases where either input is NULL */ if (a == NULL) return b; @@ -1042,6 +1100,8 @@ bms_next_member(const Bitmapset *a, int prevbit) int wordnum; bitmapword mask; + check_bitmapset_invariants(a); + if (a == NULL) return -2; nwords = a->nwords; @@ -1101,6 +1161,8 @@ bms_prev_member(const Bitmapset *a, int prevbit) int ushiftbits; bitmapword mask; + check_bitmapset_invariants(a); + /* * If set is NULL or if there are no more bits to the right then we've * nothing to do. @@ -1151,6 +1213,8 @@ bms_hash_value(const Bitmapset *a) { int lastword; + check_bitmapset_invariants(a); + if (a == NULL) return 0; /* All empty sets hash to 0 */ for (lastword = a->nwords; --lastword >= 0;)