From 778bfa79f06f8b6fd6923b6e149cd64648771eb4 Mon Sep 17 00:00:00 2001
From: Tomas Vondra <tomas@2ndquadrant.com>
Date: Tue, 14 Feb 2023 20:28:08 +0100
Subject: [PATCH 3/9] Introduce bloom_filter_size

Wrap calculation of Bloom filter parameters (from ndistinct and false
positive rate) into a function. We'll need to do this calculation in
other places, and this makes it more consistent.
---
 src/backend/access/brin/brin_bloom.c | 63 +++++++++++++++++++++-------
 1 file changed, 47 insertions(+), 16 deletions(-)

diff --git a/src/backend/access/brin/brin_bloom.c b/src/backend/access/brin/brin_bloom.c
index 6c716924fff..4ff80aeb0cc 100644
--- a/src/backend/access/brin/brin_bloom.c
+++ b/src/backend/access/brin/brin_bloom.c
@@ -259,6 +259,48 @@ typedef struct BloomFilter
 	char		data[FLEXIBLE_ARRAY_MEMBER];
 } BloomFilter;
 
+/*
+ * bloom_filter_size
+ *		Calculate Bloom filter parameters (nbits, nbytes, nhashes).
+ *
+ * Given expected number of distinct values and desired false positive rate,
+ * calculates the optimal parameters of the Bloom filter.
+ *
+ * The resulting parameters are returned through nbytesp (number of bytes),
+ * nbitsp (number of bits) and nhashesp (number of hash functions). If a
+ * pointer is NULL, the parameter is not returned.
+ */
+static void
+bloom_filter_size(int ndistinct, double false_positive_rate,
+				  int *nbytesp, int *nbitsp, int *nhashesp)
+{
+	double	k;
+	int		nbits,
+			nbytes;
+
+	/* sizing bloom filter: -(n * ln(p)) / (ln(2))^2 */
+	nbits = ceil(-(ndistinct * log(false_positive_rate)) / pow(log(2.0), 2));
+
+	/* round m to whole bytes */
+	nbytes = ((nbits + 7) / 8);
+	nbits = nbytes * 8;
+
+	/*
+	 * round(log(2.0) * m / ndistinct), but assume round() may not be
+	 * available on Windows
+	 */
+	k = log(2.0) * nbits / ndistinct;
+	k = (k - floor(k) >= 0.5) ? ceil(k) : floor(k);
+
+	if (nbytesp)
+		*nbytesp = nbytes;
+
+	if (nbitsp)
+		*nbitsp = nbits;
+
+	if (nhashesp)
+		*nhashesp = (int) k;
+}
 
 /*
  * bloom_init
@@ -275,19 +317,15 @@ bloom_init(int ndistinct, double false_positive_rate)
 
 	int			nbits;			/* size of filter / number of bits */
 	int			nbytes;			/* size of filter / number of bytes */
-
-	double		k;				/* number of hash functions */
+	int			nhashes;		/* number of hash functions */
 
 	Assert(ndistinct > 0);
 	Assert((false_positive_rate >= BLOOM_MIN_FALSE_POSITIVE_RATE) &&
 		   (false_positive_rate < BLOOM_MAX_FALSE_POSITIVE_RATE));
 
-	/* sizing bloom filter: -(n * ln(p)) / (ln(2))^2 */
-	nbits = ceil(-(ndistinct * log(false_positive_rate)) / pow(log(2.0), 2));
-
-	/* round m to whole bytes */
-	nbytes = ((nbits + 7) / 8);
-	nbits = nbytes * 8;
+	/* calculate bloom filter size / parameters */
+	bloom_filter_size(ndistinct, false_positive_rate,
+					  &nbytes, &nbits, &nhashes);
 
 	/*
 	 * Reject filters that are obviously too large to store on a page.
@@ -310,13 +348,6 @@ bloom_init(int ndistinct, double false_positive_rate)
 		elog(ERROR, "the bloom filter is too large (%d > %zu)", nbytes,
 			 BloomMaxFilterSize);
 
-	/*
-	 * round(log(2.0) * m / ndistinct), but assume round() may not be
-	 * available on Windows
-	 */
-	k = log(2.0) * nbits / ndistinct;
-	k = (k - floor(k) >= 0.5) ? ceil(k) : floor(k);
-
 	/*
 	 * We allocate the whole filter. Most of it is going to be 0 bits, so the
 	 * varlena is easy to compress.
@@ -326,7 +357,7 @@ bloom_init(int ndistinct, double false_positive_rate)
 	filter = (BloomFilter *) palloc0(len);
 
 	filter->flags = 0;
-	filter->nhashes = (int) k;
+	filter->nhashes = nhashes;
 	filter->nbits = nbits;
 
 	SET_VARSIZE(filter, len);
-- 
2.40.1

