From 12794631ee790d637a664f81679c9cdd7d00e2c4 Mon Sep 17 00:00:00 2001
From: Aleksander Alekseev <aleksander@timescale.com>
Date: Mon, 13 Feb 2023 16:15:45 +0300
Subject: [PATCH v3 1/3] Support SK_SEARCHNULL / SK_SEARCHNOTNULL for heap-only
 scans

Previously it was not supported which could be of inconvenience for the
extension authors.

Author: Aleksander Alekseev
Reviewed-by: Andres Freund
Discussion: https://postgr.es/m/CAJ7c6TPKeh7UwEN9ORqP_dmR8uxiFhxT8Pecq01zw1HKpTBMBg@mail.gmail.com
---
 src/include/access/skey.h                |  7 +--
 src/include/access/valid.h               | 41 ++++++++++-----
 src/test/regress/expected/heapscan.out   | 33 ++++++++++++
 src/test/regress/expected/test_setup.out | 13 +++++
 src/test/regress/parallel_schedule       |  2 +-
 src/test/regress/regress.c               | 67 ++++++++++++++++++++++++
 src/test/regress/sql/heapscan.sql        | 12 +++++
 src/test/regress/sql/test_setup.sql      | 16 ++++++
 8 files changed, 175 insertions(+), 16 deletions(-)
 create mode 100644 src/test/regress/expected/heapscan.out
 create mode 100644 src/test/regress/sql/heapscan.sql

diff --git a/src/include/access/skey.h b/src/include/access/skey.h
index fbdb23c5c7..81b0530277 100644
--- a/src/include/access/skey.h
+++ b/src/include/access/skey.h
@@ -46,9 +46,10 @@
  * and the sk_strategy, sk_subtype, sk_collation, and sk_func fields are
  * not used (unless set by the index AM).
  *
- * SK_SEARCHARRAY, SK_SEARCHNULL and SK_SEARCHNOTNULL are supported only
- * for index scans, not heap scans; and not all index AMs support them,
- * only those that set amsearcharray or amsearchnulls respectively.
+ * SK_SEARCHARRAY is supported only for index scans, not heap scans; and not all
+ * index AMs support it, only those that set amsearcharray. SK_SEARCHNULL and
+ * SK_SEARCHNOTNULL are supported for heap and index scans but similarly not all
+ * index AMs support them, only those that set amsearchnulls.
  *
  * A ScanKey can also represent an ordering operator invocation, that is
  * an ordering requirement "ORDER BY indexedcol op constant".  This looks
diff --git a/src/include/access/valid.h b/src/include/access/valid.h
index 85d476aab5..477ac75242 100644
--- a/src/include/access/valid.h
+++ b/src/include/access/valid.h
@@ -32,24 +32,41 @@ HeapKeyTest(HeapTuple tuple, TupleDesc tupdesc, int nkeys, ScanKey keys)
 
 	for (; cur_nkeys--; cur_key++)
 	{
-		Datum		atp;
 		bool		isnull;
-		Datum		test;
 
-		if (cur_key->sk_flags & SK_ISNULL)
-			return false;
+		if (cur_key->sk_flags & (SK_SEARCHNULL | SK_SEARCHNOTNULL))
+		{
+			/* special case: looking for NULL / NOT NULL values */
+			Assert(cur_key->sk_flags & SK_ISNULL);
 
-		atp = heap_getattr(tuple, cur_key->sk_attno, tupdesc, &isnull);
+			isnull = heap_attisnull(tuple, cur_key->sk_attno, tupdesc);
 
-		if (isnull)
-			return false;
+			if (isnull && (cur_key->sk_flags & SK_SEARCHNOTNULL))
+				return false;
 
-		test = FunctionCall2Coll(&cur_key->sk_func,
-								 cur_key->sk_collation,
-								 atp, cur_key->sk_argument);
+			if (!isnull && (cur_key->sk_flags & SK_SEARCHNULL))
+				return false;
+		}
+		else
+		{
+			Datum		atp;
+			Datum		test;
 
-		if (!DatumGetBool(test))
-			return false;
+			if (cur_key->sk_flags & SK_ISNULL)
+				return false;
+
+			atp = heap_getattr(tuple, cur_key->sk_attno, tupdesc, &isnull);
+
+			if (isnull)
+				return false;
+
+			test = FunctionCall2Coll(&cur_key->sk_func,
+									 cur_key->sk_collation,
+									 atp, cur_key->sk_argument);
+
+			if (!DatumGetBool(test))
+				return false;
+		}
 	}
 
 	return true;
diff --git a/src/test/regress/expected/heapscan.out b/src/test/regress/expected/heapscan.out
new file mode 100644
index 0000000000..055bf9bc62
--- /dev/null
+++ b/src/test/regress/expected/heapscan.out
@@ -0,0 +1,33 @@
+-- make sure that initially the table is empty
+SELECT * FROM phonebook;
+ id | name | phone 
+----+------+-------
+(0 rows)
+
+SELECT phonebook_find_first_phone(isnull => false);
+ phonebook_find_first_phone 
+----------------------------
+                         -1
+(1 row)
+
+SELECT phonebook_find_first_phone(isnull => true);
+ phonebook_find_first_phone 
+----------------------------
+                         -1
+(1 row)
+
+INSERT INTO phonebook (id, name, phone) VALUES
+(1, 'Alice', 123456),
+(2, 'Bob', NULL);
+SELECT phonebook_find_first_phone(isnull => false);
+ phonebook_find_first_phone 
+----------------------------
+                          1
+(1 row)
+
+SELECT phonebook_find_first_phone(isnull => true);
+ phonebook_find_first_phone 
+----------------------------
+                          2
+(1 row)
+
diff --git a/src/test/regress/expected/test_setup.out b/src/test/regress/expected/test_setup.out
index 5d9e6bf12b..70db759697 100644
--- a/src/test/regress/expected/test_setup.out
+++ b/src/test/regress/expected/test_setup.out
@@ -213,6 +213,19 @@ CREATE FUNCTION get_columns_length(oid[])
     RETURNS int
     AS :'regresslib'
     LANGUAGE C STRICT STABLE PARALLEL SAFE;
+--
+-- These table and function are used for testing the support of SK_SEARCHNULL
+-- and SK_SEARCHNOTNULL flags for heap-only scans.
+--
+CREATE TABLE phonebook(
+    id INT PRIMARY KEY NOT NULL,
+    name NAME NOT NULL,
+    phone INT /* nullable */
+);
+CREATE FUNCTION phonebook_find_first_phone(isnull bool)
+    RETURNS int
+    AS :'regresslib', 'phonebook_find_first_phone'
+    LANGUAGE C;
 -- Use hand-rolled hash functions and operator classes to get predictable
 -- result on different machines.  The hash function for int4 simply returns
 -- the sum of the values passed to it and the one for text returns the length
diff --git a/src/test/regress/parallel_schedule b/src/test/regress/parallel_schedule
index 4df9d8503b..d0eb5433eb 100644
--- a/src/test/regress/parallel_schedule
+++ b/src/test/regress/parallel_schedule
@@ -119,7 +119,7 @@ test: plancache limit plpgsql copy2 temp domain rangefuncs prepare conversion tr
 # The stats test resets stats, so nothing else needing stats access can be in
 # this group.
 # ----------
-test: partition_join partition_prune reloptions hash_part indexing partition_aggregate partition_info tuplesort explain compression memoize stats
+test: partition_join partition_prune reloptions hash_part indexing partition_aggregate partition_info tuplesort explain compression memoize stats heapscan
 
 # event_trigger depends on create_am and cannot run concurrently with
 # any test that runs DDL
diff --git a/src/test/regress/regress.c b/src/test/regress/regress.c
index bcbc6d910f..c7f2eed699 100644
--- a/src/test/regress/regress.c
+++ b/src/test/regress/regress.c
@@ -20,6 +20,7 @@
 #include <signal.h>
 
 #include "access/detoast.h"
+#include "access/heapam.h"
 #include "access/htup_details.h"
 #include "access/transam.h"
 #include "access/xact.h"
@@ -45,6 +46,7 @@
 #include "utils/lsyscache.h"
 #include "utils/memutils.h"
 #include "utils/rel.h"
+#include "utils/snapmgr.h"
 #include "utils/typcache.h"
 
 #define EXPECT_TRUE(expr)	\
@@ -1262,3 +1264,68 @@ get_columns_length(PG_FUNCTION_ARGS)
 
 	PG_RETURN_INT32(column_offset);
 }
+
+/*
+ * Used for testing SK_SEARCHNULL/SK_SEARCHNOTNULL support for heap-only scans.
+ *
+ * Returns the primary key of the first row found in the "phonebook" table for
+ * which "phone" is NULL or NOT NULL, depending on the "isnull" argument.
+ *
+ * Returns -1 if nothing was found.
+ */
+PG_FUNCTION_INFO_V1(phonebook_find_first_phone);
+
+typedef enum Anum_phonebook
+{
+	Anum_phonebook_id = 1,
+	Anum_phonebook_name,
+	Anum_phonebook_phone,
+	_Anum_phonebook_max,
+}			Anum_phonebook;
+
+#define Natts_phonebook (_Anum_phonebook_max - 1)
+
+Datum
+phonebook_find_first_phone(PG_FUNCTION_ARGS)
+{
+	bool		isnull = PG_GETARG_BOOL(0);
+	int			flags = SK_ISNULL;
+	int32		found_id = -1;
+	Relation	rel;
+	HeapTuple	tup;
+	TableScanDesc scan;
+	Datum		tbl_oid_datum;
+	ScanKeyData scanKey;
+
+	flags |= isnull ? SK_SEARCHNULL : SK_SEARCHNOTNULL;
+	ScanKeyEntryInitialize(
+						   &scanKey,
+						   flags,
+						   Anum_phonebook_phone,
+						   InvalidStrategy, /* no strategy */
+						   InvalidOid,	/* no strategy subtype */
+						   InvalidOid,	/* no collation */
+						   InvalidOid,	/* no reg proc for this */
+						   (Datum) 0);	/* constant */
+
+	tbl_oid_datum = DirectFunctionCall1(to_regclass,
+										CStringGetTextDatum("phonebook"));
+
+	rel = table_open(DatumGetObjectId(tbl_oid_datum), AccessShareLock);
+	scan = table_beginscan(rel, GetTransactionSnapshot(), 1, &scanKey);
+
+	while ((tup = heap_getnext(scan, ForwardScanDirection)) != NULL)
+	{
+		Datum		values[Natts_phonebook];
+		bool		isnull[Natts_phonebook];
+
+		heap_deform_tuple(tup, RelationGetDescr(rel), values, isnull);
+		found_id = DatumGetInt32(values[Anum_phonebook_id - 1]);
+		break;
+	}
+
+	table_endscan(scan);
+	table_close(rel, AccessShareLock);
+
+	PG_RETURN_INT32(found_id);
+}
diff --git a/src/test/regress/sql/heapscan.sql b/src/test/regress/sql/heapscan.sql
new file mode 100644
index 0000000000..de6528f3dd
--- /dev/null
+++ b/src/test/regress/sql/heapscan.sql
@@ -0,0 +1,12 @@
+-- make sure that initially the table is empty
+SELECT * FROM phonebook;
+
+SELECT phonebook_find_first_phone(isnull => false);
+SELECT phonebook_find_first_phone(isnull => true);
+
+INSERT INTO phonebook (id, name, phone) VALUES
+(1, 'Alice', 123456),
+(2, 'Bob', NULL);
+
+SELECT phonebook_find_first_phone(isnull => false);
+SELECT phonebook_find_first_phone(isnull => true);
\ No newline at end of file
diff --git a/src/test/regress/sql/test_setup.sql b/src/test/regress/sql/test_setup.sql
index 1b2d434683..25b16e75d6 100644
--- a/src/test/regress/sql/test_setup.sql
+++ b/src/test/regress/sql/test_setup.sql
@@ -262,6 +262,22 @@ CREATE FUNCTION get_columns_length(oid[])
     AS :'regresslib'
     LANGUAGE C STRICT STABLE PARALLEL SAFE;
 
+--
+-- These table and function are used for testing the support of SK_SEARCHNULL
+-- and SK_SEARCHNOTNULL flags for heap-only scans.
+--
+
+CREATE TABLE phonebook(
+    id INT PRIMARY KEY NOT NULL,
+    name NAME NOT NULL,
+    phone INT /* nullable */
+);
+
+CREATE FUNCTION phonebook_find_first_phone(isnull bool)
+    RETURNS int
+    AS :'regresslib', 'phonebook_find_first_phone'
+    LANGUAGE C;
+
 -- Use hand-rolled hash functions and operator classes to get predictable
 -- result on different machines.  The hash function for int4 simply returns
 -- the sum of the values passed to it and the one for text returns the length
-- 
2.34.1

