From 39ff4c701b619aee0feba6767d71ffec6ae256ca Mon Sep 17 00:00:00 2001
From: Tomas Vondra <tomas.vondra@postgresql.org>
Date: Sun, 27 Nov 2022 22:44:56 +0100
Subject: [PATCH 3/3] Store BRIN summaries for empty ranges

Instead of ignoring summaries representing ranges with no tuples (e.g.
after a large batch DELETE), store the tuple with the "impossible"
combination of all_nulls/has_nulls flags.

When querying the index, we then consider these ranges as not matching
any condition.
---
 src/backend/access/brin/brin.c                | 58 ++++++-------------
 src/backend/access/brin/brin_tuple.c          | 14 +----
 ...summarization-and-inprogress-insertion.out | 18 +++++-
 ...ummarization-and-inprogress-insertion.spec |  4 +-
 src/test/modules/brin/t/01_workitems.pl       | 11 ++--
 src/test/regress/expected/brin.out            |  2 +-
 src/test/regress/expected/brin_bloom.out      |  2 +-
 src/test/regress/expected/brin_multi.out      |  2 +-
 8 files changed, 46 insertions(+), 65 deletions(-)

diff --git a/src/backend/access/brin/brin.c b/src/backend/access/brin/brin.c
index 3ed8eefab86..8cf17156f50 100644
--- a/src/backend/access/brin/brin.c
+++ b/src/backend/access/brin/brin.c
@@ -591,6 +591,17 @@ bringetbitmap(IndexScanDesc scan, TIDBitmap *tbm)
 
 					bval = &dtup->bt_columns[attno - 1];
 
+					/*
+					 * If the range has both allnulls and hasnulls set, it means
+					 * there are no rows in the range, so we can skip it (we have
+					 * scan keys, and we know there's nothing to match).
+					 */
+					if (bval->bv_allnulls && bval->bv_hasnulls)
+					{
+						addrange = false;
+						break;
+					}
+
 					/*
 					 * First check if there are any IS [NOT] NULL scan keys,
 					 * and if we're violating them. In that case we can
@@ -1568,48 +1579,15 @@ form_and_insert_tuple(BrinBuildState *state)
 {
 	BrinTuple  *tup;
 	Size		size;
-	bool		modified = false;
-	BrinMemTuple   *dtuple = state->bs_dtuple;
-	int				i;
 
-	/*
-	 * Check if any rows were processed for the page range represented by this
-	 * memtuple. We initially set both allnulls/hasnulls to true to identify
-	 * if the range is in this initial/empty state.
-	 *
-	 * XXX It should be enough to check only the first summary - either all
-	 * summaries are empty or none of them.
-	 */
-	for (i = 0; i < state->bs_bdesc->bd_tupdesc->natts; i++)
-	{
-		if (!(dtuple->bt_columns[i].bv_allnulls &&
-			  dtuple->bt_columns[i].bv_hasnulls))
-		{
-			modified = true;
-			break;
-		}
-	}
+	tup = brin_form_tuple(state->bs_bdesc, state->bs_currRangeStart,
+						  state->bs_dtuple, &size);
+	brin_doinsert(state->bs_irel, state->bs_pagesPerRange, state->bs_rmAccess,
+				  &state->bs_currentInsertBuf, state->bs_currRangeStart,
+				  tup, size);
+	state->bs_numtuples++;
 
-	/*
-	 * If the memtuple was modified (i.e. we added any rows to it), insert it
-	 * into the index. That is, we don't store index tuples not representing
-	 * any rows from table.
-	 *
-	 * XXX This has the undesirable consequence, that if the table has a gap
-	 * (a long sequence of pages with no remaining tuples), we won't have any
-	 * BRIN summaries for this part of the table. Which means that we'll have
-	 * to scan this gap for each bitmap index scan.
-	 */
-	if (modified)
-	{
-		tup = brin_form_tuple(state->bs_bdesc, state->bs_currRangeStart,
-							  state->bs_dtuple, &size);
-		brin_doinsert(state->bs_irel, state->bs_pagesPerRange, state->bs_rmAccess,
-					  &state->bs_currentInsertBuf, state->bs_currRangeStart,
-					  tup, size);
-		state->bs_numtuples++;
-		pfree(tup);
-	}
+	pfree(tup);
 }
 
 /*
diff --git a/src/backend/access/brin/brin_tuple.c b/src/backend/access/brin/brin_tuple.c
index 1b5e72cde24..308c12a255b 100644
--- a/src/backend/access/brin/brin_tuple.c
+++ b/src/backend/access/brin/brin_tuple.c
@@ -136,13 +136,6 @@ brin_form_tuple(BrinDesc *brdesc, BlockNumber blkno, BrinMemTuple *tuple,
 	{
 		int			datumno;
 
-		/*
-		 * We should never get here for memtuples in the initial state, i.e.
-		 * before any rows were added to it.
-		 */
-		Assert(!(tuple->bt_columns[keyno].bv_hasnulls &&
-				 tuple->bt_columns[keyno].bv_allnulls));
-
 		/*
 		 * "allnulls" is set when there's no nonnull value in any row in the
 		 * column; when this happens, there is no data to store.  Thus set the
@@ -594,11 +587,10 @@ brin_deform_tuple(BrinDesc *brdesc, BrinTuple *tuple, BrinMemTuple *dMemtuple)
 	{
 		int			i;
 
-		Assert(!(allnulls[keyno] && hasnulls[keyno]));
-
 		/*
-		 * Make sure to overwrite the hasnulls flag, because it might have
-		 * been initialized as true by brin_memtuple_initialize.
+		 * Make sure to overwrite the hasnulls flag, because it was initialized
+		 * to true by brin_memtuple_initialize and we don't want to skip it if
+		 * allnulls.
 		 */
 		dtup->bt_columns[keyno].bv_hasnulls = hasnulls[keyno];
 
diff --git a/src/test/modules/brin/expected/summarization-and-inprogress-insertion.out b/src/test/modules/brin/expected/summarization-and-inprogress-insertion.out
index 2266012eac7..584ac2602f7 100644
--- a/src/test/modules/brin/expected/summarization-and-inprogress-insertion.out
+++ b/src/test/modules/brin/expected/summarization-and-inprogress-insertion.out
@@ -1,6 +1,12 @@
 Parsed test spec with 2 sessions
 
-starting permutation: s1b s2b s1i s2summ s1c s2c s2check
+starting permutation: s2check s1b s2b s1i s2summ s1c s2c s2check
+step s2check: SELECT * FROM brin_page_items(get_raw_page('brinidx', 2), 'brinidx'::regclass);
+itemoffset|blknum|attnum|allnulls|hasnulls|placeholder|value   
+----------+------+------+--------+--------+-----------+--------
+         1|     0|     1|f       |t       |f          |{1 .. 1}
+(1 row)
+
 step s1b: BEGIN ISOLATION LEVEL REPEATABLE READ;
 step s2b: BEGIN ISOLATION LEVEL REPEATABLE READ; SELECT 1;
 ?column?
@@ -12,7 +18,7 @@ step s1i: INSERT INTO brin_iso VALUES (1000);
 step s2summ: SELECT brin_summarize_new_values('brinidx'::regclass);
 brin_summarize_new_values
 -------------------------
-                        2
+                        1
 (1 row)
 
 step s1c: COMMIT;
@@ -25,7 +31,13 @@ itemoffset|blknum|attnum|allnulls|hasnulls|placeholder|value
 (2 rows)
 
 
-starting permutation: s1b s1i s2vacuum s1c s2check
+starting permutation: s2check s1b s1i s2vacuum s1c s2check
+step s2check: SELECT * FROM brin_page_items(get_raw_page('brinidx', 2), 'brinidx'::regclass);
+itemoffset|blknum|attnum|allnulls|hasnulls|placeholder|value   
+----------+------+------+--------+--------+-----------+--------
+         1|     0|     1|f       |t       |f          |{1 .. 1}
+(1 row)
+
 step s1b: BEGIN ISOLATION LEVEL REPEATABLE READ;
 step s1i: INSERT INTO brin_iso VALUES (1000);
 step s2vacuum: VACUUM brin_iso;
diff --git a/src/test/modules/brin/specs/summarization-and-inprogress-insertion.spec b/src/test/modules/brin/specs/summarization-and-inprogress-insertion.spec
index 6319ae4c38d..18ba92b7ba1 100644
--- a/src/test/modules/brin/specs/summarization-and-inprogress-insertion.spec
+++ b/src/test/modules/brin/specs/summarization-and-inprogress-insertion.spec
@@ -41,5 +41,5 @@ step "s2vacuum"	{ VACUUM brin_iso; }
 
 step "s2check"	{ SELECT * FROM brin_page_items(get_raw_page('brinidx', 2), 'brinidx'::regclass); }
 
-permutation "s1b" "s2b" "s1i" "s2summ" "s1c" "s2c" "s2check"
-permutation "s1b" "s1i" "s2vacuum" "s1c" "s2check"
+permutation "s2check" "s1b" "s2b" "s1i" "s2summ" "s1c" "s2c" "s2check"
+permutation "s2check" "s1b" "s1i" "s2vacuum" "s1c" "s2check"
diff --git a/src/test/modules/brin/t/01_workitems.pl b/src/test/modules/brin/t/01_workitems.pl
index eeec44b0060..3108c02cf4d 100644
--- a/src/test/modules/brin/t/01_workitems.pl
+++ b/src/test/modules/brin/t/01_workitems.pl
@@ -24,21 +24,20 @@ $node->safe_psql(
 	 create index brin_wi_idx on brin_wi using brin (a) with (pages_per_range=1, autosummarize=on);
 	 '
 );
+my $count = $node->safe_psql('postgres',
+	"select count(*) from brin_page_items(get_raw_page('brin_wi_idx', 2), 'brin_wi_idx'::regclass)"
+);
+is($count, '1', "initial index state is correct");
 
 $node->safe_psql('postgres',
 	'insert into brin_wi select * from generate_series(1, 100)');
 
-$node->poll_query_until(
-	'postgres',
-	"select pg_relation_size('brin_wi_idx'::regclass) / current_setting('block_size')::int = 3",
-	't');
-
 $node->poll_query_until(
 	'postgres',
 	"select count(*) > 1 from brin_page_items(get_raw_page('brin_wi_idx', 2), 'brin_wi_idx'::regclass)",
 	't');
 
-my $count = $node->safe_psql('postgres',
+$count = $node->safe_psql('postgres',
 	"select count(*) > 1 from brin_page_items(get_raw_page('brin_wi_idx', 2), 'brin_wi_idx'::regclass)"
 );
 is($count, 't', "index got summarized");
diff --git a/src/test/regress/expected/brin.out b/src/test/regress/expected/brin.out
index ebc31222354..73fa38396e4 100644
--- a/src/test/regress/expected/brin.out
+++ b/src/test/regress/expected/brin.out
@@ -454,7 +454,7 @@ $$;
 SELECT brin_summarize_range('brin_summarize_idx', 0);
  brin_summarize_range 
 ----------------------
-                    1
+                    0
 (1 row)
 
 -- nothing: already summarized
diff --git a/src/test/regress/expected/brin_bloom.out b/src/test/regress/expected/brin_bloom.out
index 6e847f9113d..32c56a996a2 100644
--- a/src/test/regress/expected/brin_bloom.out
+++ b/src/test/regress/expected/brin_bloom.out
@@ -373,7 +373,7 @@ $$;
 SELECT brin_summarize_range('brin_summarize_bloom_idx', 0);
  brin_summarize_range 
 ----------------------
-                    1
+                    0
 (1 row)
 
 -- nothing: already summarized
diff --git a/src/test/regress/expected/brin_multi.out b/src/test/regress/expected/brin_multi.out
index e65f1c20d4f..f3309f433f8 100644
--- a/src/test/regress/expected/brin_multi.out
+++ b/src/test/regress/expected/brin_multi.out
@@ -407,7 +407,7 @@ $$;
 SELECT brin_summarize_range('brin_summarize_multi_idx', 0);
  brin_summarize_range 
 ----------------------
-                    1
+                    0
 (1 row)
 
 -- nothing: already summarized
-- 
2.38.1

