From 38ef30684b0248bae0d0e82e3919118ed629613f Mon Sep 17 00:00:00 2001
From: Heikki Linnakangas <heikki.linnakangas@iki.fi>
Date: Thu, 15 Jul 2021 08:51:35 +0300
Subject: [PATCH v2 1/1] Amcheck for GIN

Author: Grigory Kryachko
Discussion: https://www.postgresql.org/message-id/CAF3eApa07-BajjG8%2BRYx-Dr_cq28ZA0GsZmUQrGu5b2ayRhB5A%40mail.gmail.com
---
 contrib/amcheck/Makefile               |   8 +-
 contrib/amcheck/amcheck--1.3--1.4.sql  |  15 +
 contrib/amcheck/amcheck.c              | 161 ++++++
 contrib/amcheck/amcheck.control        |   2 +-
 contrib/amcheck/amcheck.h              |  23 +
 contrib/amcheck/expected/check_gin.out |  60 +++
 contrib/amcheck/sql/check_gin.sql      |  40 ++
 contrib/amcheck/verify_gin.c           | 695 +++++++++++++++++++++++++
 doc/src/sgml/amcheck.sgml              |  19 +
 9 files changed, 1020 insertions(+), 3 deletions(-)
 create mode 100644 contrib/amcheck/amcheck--1.3--1.4.sql
 create mode 100644 contrib/amcheck/amcheck.c
 create mode 100644 contrib/amcheck/amcheck.h
 create mode 100644 contrib/amcheck/expected/check_gin.out
 create mode 100644 contrib/amcheck/sql/check_gin.sql
 create mode 100644 contrib/amcheck/verify_gin.c

diff --git a/contrib/amcheck/Makefile b/contrib/amcheck/Makefile
index b82f221e50b..3418c30c50b 100644
--- a/contrib/amcheck/Makefile
+++ b/contrib/amcheck/Makefile
@@ -3,14 +3,18 @@
 MODULE_big	= amcheck
 OBJS = \
 	$(WIN32RES) \
+	amcheck.o \
+	verify_gin.o \
 	verify_heapam.o \
 	verify_nbtree.o
 
 EXTENSION = amcheck
-DATA = amcheck--1.2--1.3.sql amcheck--1.1--1.2.sql amcheck--1.0--1.1.sql amcheck--1.0.sql
+DATA =  amcheck--1.3--1.4.sql amcheck--1.2--1.3.sql \
+	amcheck--1.1--1.2.sql amcheck--1.0--1.1.sql \
+	amcheck--1.0.sql
 PGFILEDESC = "amcheck - function for verifying relation integrity"
 
-REGRESS = check check_btree check_heap
+REGRESS = check check_btree check_heap check_gin
 
 TAP_TESTS = 1
 
diff --git a/contrib/amcheck/amcheck--1.3--1.4.sql b/contrib/amcheck/amcheck--1.3--1.4.sql
new file mode 100644
index 00000000000..60d9dd1ab94
--- /dev/null
+++ b/contrib/amcheck/amcheck--1.3--1.4.sql
@@ -0,0 +1,15 @@
+/* contrib/amcheck/amcheck--1.3--1.4.sql */
+
+-- complain if script is sourced in psql, rather than via CREATE EXTENSION
+\echo Use "ALTER EXTENSION amcheck UPDATE TO '1.4'" to load this file. \quit
+
+
+--
+-- gin_index_parent_check()
+--
+CREATE FUNCTION gin_index_parent_check(index regclass)
+RETURNS VOID
+AS 'MODULE_PATHNAME', 'gin_index_parent_check'
+LANGUAGE C STRICT;
+
+REVOKE ALL ON FUNCTION gin_index_parent_check(regclass) FROM PUBLIC;
diff --git a/contrib/amcheck/amcheck.c b/contrib/amcheck/amcheck.c
new file mode 100644
index 00000000000..e068b31b431
--- /dev/null
+++ b/contrib/amcheck/amcheck.c
@@ -0,0 +1,161 @@
+/*-------------------------------------------------------------------------
+ *
+ * amcheck.c
+ *		Utility functions common to all access methods.
+ *
+ * Copyright (c) 2017-2019, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ *	  contrib/amcheck/amcheck.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "access/genam.h"
+#include "access/table.h"
+#include "access/tableam.h"
+#include "amcheck.h"
+#include "catalog/index.h"
+#include "commands/tablecmds.h"
+
+
+/*
+ * Lock acquisition reused across different am types
+ */
+void
+amcheck_lock_relation(Oid indrelid, Relation *indrel,
+					  Relation *heaprel, LOCKMODE lockmode)
+{
+	Oid			heapid;
+
+	/*
+	 * We must lock table before index to avoid deadlocks.  However, if the
+	 * passed indrelid isn't an index then IndexGetRelation() will fail.
+	 * Rather than emitting a not-very-helpful error message, postpone
+	 * complaining, expecting that the is-it-an-index test below will fail.
+	 *
+	 * In hot standby mode this will raise an error when lockmode is
+	 * AccessExclusiveLock.
+	 */
+	heapid = IndexGetRelation(indrelid, true);
+	if (OidIsValid(heapid))
+		*heaprel = table_open(heapid, lockmode);
+	else
+		*heaprel = NULL;
+
+	/*
+	 * Open the target index relations separately (like relation_openrv(), but
+	 * with heap relation locked first to prevent deadlocking).  In hot
+	 * standby mode this will raise an error when lockmode is
+	 * AccessExclusiveLock.
+	 *
+	 * There is no need for the usual indcheckxmin usability horizon test
+	 * here, even in the nbtree heapallindexed case, because index undergoing
+	 * verification only needs to have entries for a new transaction snapshot.
+	 * (If caller is about to do nbtree parentcheck verification, there is no
+	 * question about committed or recently dead heap tuples lacking index
+	 * entries due to concurrent activity.)
+	 */
+	*indrel = index_open(indrelid, lockmode);
+
+	/*
+	 * Since we did the IndexGetRelation call above without any lock, it's
+	 * barely possible that a race against an index drop/recreation could have
+	 * netted us the wrong table.
+	 */
+	if (*heaprel == NULL || heapid != IndexGetRelation(indrelid, false))
+		ereport(ERROR,
+				(errcode(ERRCODE_UNDEFINED_TABLE),
+				 errmsg("could not open parent table of index %s",
+						RelationGetRelationName(*indrel))));
+}
+
+/*
+ * Unlock index and heap relations early for amcheck_lock_relation() caller.
+ *
+ * This is ok because nothing in the called routines will trigger shared cache
+ * invalidations to be sent, so we can relax the usual pattern of only
+ * releasing locks after commit.
+ */
+void
+amcheck_unlock_relation(Oid indrelid, Relation indrel, Relation heaprel,
+						LOCKMODE lockmode)
+{
+	index_close(indrel, lockmode);
+	if (heaprel)
+		table_close(heaprel, lockmode);
+}
+
+/*
+ * Check if index relation should have a file for its main relation
+ * fork.  Verification uses this to skip unlogged indexes when in hot standby
+ * mode, where there is simply nothing to verify.
+ *
+ * NB: Caller should call btree_index_checkable() or gist_index_checkable()
+ * before calling here.
+ */
+bool
+amcheck_index_mainfork_expected(Relation rel)
+{
+	if (rel->rd_rel->relpersistence != RELPERSISTENCE_UNLOGGED ||
+		!RecoveryInProgress())
+		return true;
+
+	ereport(NOTICE,
+			(errcode(ERRCODE_READ_ONLY_SQL_TRANSACTION),
+			 errmsg("cannot verify unlogged index \"%s\" during recovery, skipping",
+					RelationGetRelationName(rel))));
+
+	return false;
+}
+
+/*
+ * PageGetItemId() wrapper that validates returned line pointer.
+ *
+ * Buffer page/page item access macros generally trust that line pointers are
+ * not corrupt, which might cause problems for verification itself.  For
+ * example, there is no bounds checking in PageGetItem().  Passing it a
+ * corrupt line pointer can cause it to return a tuple/pointer that is unsafe
+ * to dereference.
+ *
+ * Validating line pointers before tuples avoids undefined behavior and
+ * assertion failures with corrupt indexes, making the verification process
+ * more robust and predictable.
+ */
+ItemId
+PageGetItemIdCareful(Relation rel, BlockNumber block, Page page,
+					 OffsetNumber offset, size_t opaquesize)
+{
+	ItemId		itemid = PageGetItemId(page, offset);
+
+	if (ItemIdGetOffset(itemid) + ItemIdGetLength(itemid) >
+		BLCKSZ - opaquesize)
+		ereport(ERROR,
+				(errcode(ERRCODE_INDEX_CORRUPTED),
+				 errmsg("line pointer points past end of tuple space in index \"%s\"",
+						RelationGetRelationName(rel)),
+				 errdetail_internal("Index tid=(%u,%u) lp_off=%u, lp_len=%u lp_flags=%u.",
+									block, offset, ItemIdGetOffset(itemid),
+									ItemIdGetLength(itemid),
+									ItemIdGetFlags(itemid))));
+
+	/*
+	 * Verify that line pointer isn't LP_REDIRECT or LP_UNUSED, since nbtree and gist
+	 * never uses either.  Verify that line pointer has storage, too, since
+	 * even LP_DEAD items should.
+	 */
+// TODO adapt for gin and uncomment
+//	if (ItemIdIsRedirected(itemid) || !ItemIdIsUsed(itemid) ||
+//		ItemIdGetLength(itemid) == 0)
+//		ereport(ERROR,
+//				(errcode(ERRCODE_INDEX_CORRUPTED),
+//				 errmsg("invalid line pointer storage in index \"%s\"",
+//						RelationGetRelationName(rel)),
+//				 errdetail_internal("Index tid=(%u,%u) lp_off=%u, lp_len=%u lp_flags=%u.",
+//									block, offset, ItemIdGetOffset(itemid),
+//									ItemIdGetLength(itemid),
+//									ItemIdGetFlags(itemid))));
+
+	return itemid;
+}
diff --git a/contrib/amcheck/amcheck.control b/contrib/amcheck/amcheck.control
index ab50931f754..e67ace01c99 100644
--- a/contrib/amcheck/amcheck.control
+++ b/contrib/amcheck/amcheck.control
@@ -1,5 +1,5 @@
 # amcheck extension
 comment = 'functions for verifying relation integrity'
-default_version = '1.3'
+default_version = '1.4'
 module_pathname = '$libdir/amcheck'
 relocatable = true
diff --git a/contrib/amcheck/amcheck.h b/contrib/amcheck/amcheck.h
new file mode 100644
index 00000000000..b19e6171778
--- /dev/null
+++ b/contrib/amcheck/amcheck.h
@@ -0,0 +1,23 @@
+/*-------------------------------------------------------------------------
+ *
+ * amcheck.h
+ *		Shared routines for amcheck verifications.
+ *
+ * Copyright (c) 2019, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ *	  contrib/amcheck/amcheck.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "storage/lockdefs.h"
+#include "utils/relcache.h"
+
+extern void amcheck_lock_relation(Oid indrelid, Relation *indrel,
+								  Relation *heaprel, LOCKMODE lockmode);
+extern void amcheck_unlock_relation(Oid indrelid, Relation indrel,
+									Relation heaprel, LOCKMODE lockmode);
+extern bool amcheck_index_mainfork_expected(Relation rel);
+
+extern ItemId PageGetItemIdCareful(Relation rel, BlockNumber block,
+					 Page page, OffsetNumber offset, size_t opaquesize);
\ No newline at end of file
diff --git a/contrib/amcheck/expected/check_gin.out b/contrib/amcheck/expected/check_gin.out
new file mode 100644
index 00000000000..35e3f0dc330
--- /dev/null
+++ b/contrib/amcheck/expected/check_gin.out
@@ -0,0 +1,60 @@
+-- minimal test, basically just verifying that amcheck works with GIN
+SELECT setseed(1);
+ setseed 
+---------
+ 
+(1 row)
+
+CREATE TABLE "gin_check"("Column1" int[]);
+-- posting trees
+INSERT INTO gin_check select array_agg(round(random()*255) ) from generate_series(1, 1000000) as i group by i % 100000;
+-- posting leaves
+INSERT INTO gin_check select array_agg(255 + round(random()*100)) from generate_series(1, 100) as i group by i % 100;
+CREATE INDEX gin_check_idx on "gin_check" USING GIN("Column1");
+SELECT gin_index_parent_check('gin_check_idx');
+ gin_index_parent_check 
+------------------------
+ 
+(1 row)
+
+-- cleanup
+DROP TABLE gin_check;
+-- minimal test, basically just verifying that amcheck works with GIN
+SELECT setseed(1);
+ setseed 
+---------
+ 
+(1 row)
+
+CREATE TABLE "gin_check"("Column1" int[]);
+CREATE INDEX gin_check_idx on "gin_check" USING GIN("Column1");
+ALTER INDEX gin_check_idx SET (fastupdate = false);
+-- posting trees
+INSERT INTO gin_check select array_agg(round(random()*255) ) from generate_series(1, 1000000) as i group by i % 100000;
+-- posting leaves
+INSERT INTO gin_check select array_agg(100 + round(random()*255)) from generate_series(1, 100) as i group by i % 100;
+SELECT gin_index_parent_check('gin_check_idx');
+ gin_index_parent_check 
+------------------------
+ 
+(1 row)
+
+-- cleanup
+DROP TABLE gin_check;
+-- minimal test, basically just verifying that amcheck works with GIN
+SELECT setseed(1);
+ setseed 
+---------
+ 
+(1 row)
+
+CREATE TABLE "gin_check_text_array"("Column1" text[]);
+-- posting trees
+INSERT INTO gin_check_text_array select array_agg(md5(round(random()*300)::text)::text) from generate_series(1, 1000000) as i group by i % 100000;
+-- posting leaves
+INSERT INTO gin_check_text_array select array_agg(md5(round(random()*300 + 300)::text)::text) from generate_series(1, 10000) as i group by i % 100;
+CREATE INDEX gin_check_text_array_idx on "gin_check_text_array" USING GIN("Column1");
+SELECT gin_index_parent_check('gin_check_text_array');
+ERROR:  "gin_check_text_array" is not an index
+-- cleanup
+DROP TABLE gin_check_text_array;
diff --git a/contrib/amcheck/sql/check_gin.sql b/contrib/amcheck/sql/check_gin.sql
new file mode 100644
index 00000000000..c81db477f11
--- /dev/null
+++ b/contrib/amcheck/sql/check_gin.sql
@@ -0,0 +1,40 @@
+-- minimal test, basically just verifying that amcheck works with GIN
+SELECT setseed(1);
+CREATE TABLE "gin_check"("Column1" int[]);
+-- posting trees
+INSERT INTO gin_check select array_agg(round(random()*255) ) from generate_series(1, 1000000) as i group by i % 100000;
+-- posting leaves
+INSERT INTO gin_check select array_agg(255 + round(random()*100)) from generate_series(1, 100) as i group by i % 100;
+CREATE INDEX gin_check_idx on "gin_check" USING GIN("Column1");
+SELECT gin_index_parent_check('gin_check_idx');
+
+-- cleanup
+DROP TABLE gin_check;
+
+-- minimal test, basically just verifying that amcheck works with GIN
+SELECT setseed(1);
+CREATE TABLE "gin_check"("Column1" int[]);
+CREATE INDEX gin_check_idx on "gin_check" USING GIN("Column1");
+ALTER INDEX gin_check_idx SET (fastupdate = false);
+-- posting trees
+INSERT INTO gin_check select array_agg(round(random()*255) ) from generate_series(1, 1000000) as i group by i % 100000;
+-- posting leaves
+INSERT INTO gin_check select array_agg(100 + round(random()*255)) from generate_series(1, 100) as i group by i % 100;
+
+SELECT gin_index_parent_check('gin_check_idx');
+
+-- cleanup
+DROP TABLE gin_check;
+
+-- minimal test, basically just verifying that amcheck works with GIN
+SELECT setseed(1);
+CREATE TABLE "gin_check_text_array"("Column1" text[]);
+-- posting trees
+INSERT INTO gin_check_text_array select array_agg(md5(round(random()*300)::text)::text) from generate_series(1, 1000000) as i group by i % 100000;
+-- posting leaves
+INSERT INTO gin_check_text_array select array_agg(md5(round(random()*300 + 300)::text)::text) from generate_series(1, 10000) as i group by i % 100;
+CREATE INDEX gin_check_text_array_idx on "gin_check_text_array" USING GIN("Column1");
+SELECT gin_index_parent_check('gin_check_text_array');
+
+-- cleanup
+DROP TABLE gin_check_text_array;
diff --git a/contrib/amcheck/verify_gin.c b/contrib/amcheck/verify_gin.c
new file mode 100644
index 00000000000..65104f09ce6
--- /dev/null
+++ b/contrib/amcheck/verify_gin.c
@@ -0,0 +1,695 @@
+/*-------------------------------------------------------------------------
+ *
+ * verify_gin.c
+ *		Verifies the integrity of GIN indexes based on invariants.
+ *
+ * Verification checks that all paths in GIN graph contain
+ * consistent keys: tuples on parent pages consistently include tuples
+ * from children pages. Also, verification checks graph invariants:
+ * internal page must have at least one downlinks, internal page can
+ * reference either only leaf pages or only internal pages.
+ *
+ *
+ * Copyright (c) 2017-2019, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ *	  contrib/amcheck/verify_gin.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "access/gin_private.h"
+#include "access/nbtree.h"
+#include "amcheck.h"
+#include "catalog/pg_am.h"
+#include "miscadmin.h"
+#include "utils/memutils.h"
+#include "utils/rel.h"
+#include "string.h"
+
+/*
+ * GinScanItem represents one item of depth-first scan of GIN index.
+ */
+typedef struct GinScanItem
+{
+	int			depth;
+	IndexTuple	parenttup;
+	BlockNumber parentblk;
+	XLogRecPtr	parentlsn;
+	BlockNumber blkno;
+	struct GinScanItem *next;
+}			GinScanItem;
+
+/*
+ * GinPostingTreeScanItem represents one item of depth-first scan of GIN  posting tree.
+ */
+typedef struct GinPostingTreeScanItem
+{
+	int			depth;
+	ItemPointer parentkey;
+	BlockNumber blkno;
+	struct GinPostingTreeScanItem *next;
+}			GinPostingTreeScanItem;
+
+
+PG_FUNCTION_INFO_V1(gin_index_parent_check);
+
+static void gin_index_checkable(Relation rel);
+static void gin_check_parent_keys_consistency(Relation rel);
+static void check_index_page(Relation rel, Buffer buffer, BlockNumber blockNo);
+static IndexTuple gin_refind_parent(Relation rel, BlockNumber parentblkno,
+									BlockNumber childblkno,
+									BufferAccessStrategy strategy);
+
+/*
+ * gin_index_parent_check(index regclass)
+ *
+ * Verify integrity of GIN index.
+ *
+ * Acquires AccessShareLock on heap & index relations.
+ */
+Datum
+gin_index_parent_check(PG_FUNCTION_ARGS)
+{
+	Oid			indrelid = PG_GETARG_OID(0);
+	Relation	indrel;
+	Relation	heaprel;
+	LOCKMODE	lockmode = AccessShareLock;
+
+	/* lock table and index with neccesary level */
+	amcheck_lock_relation(indrelid, &indrel, &heaprel, lockmode);
+
+	/* verify that this is GIN eligible for check */
+	gin_index_checkable(indrel);
+
+	if (amcheck_index_mainfork_expected(indrel))
+		gin_check_parent_keys_consistency(indrel);
+
+	/* Unlock index and table */
+	amcheck_unlock_relation(indrelid, indrel, heaprel, lockmode);
+
+	PG_RETURN_VOID();
+}
+
+/*
+ * Read item pointers from leaf entry tuple.
+ *
+ * Returns a palloc'd array of ItemPointers. The number of items is returned
+ * in *nitems.
+ */
+static ItemPointer
+ginReadTupleWithoutState(IndexTuple itup, int *nitems)
+{
+	Pointer		ptr = GinGetPosting(itup);
+	int			nipd = GinGetNPosting(itup);
+	ItemPointer ipd;
+	int			ndecoded;
+
+	if (GinItupIsCompressed(itup))
+	{
+		if (nipd > 0)
+		{
+			ipd = ginPostingListDecode((GinPostingList *) ptr, &ndecoded);
+			if (nipd != ndecoded)
+				elog(ERROR, "number of items mismatch in GIN entry tuple, %d in tuple header, %d decoded",
+					 nipd, ndecoded);
+		}
+		else
+		{
+			ipd = palloc(0);
+		}
+	}
+	else
+	{
+		ipd = (ItemPointer) palloc(sizeof(ItemPointerData) * nipd);
+		memcpy(ipd, ptr, sizeof(ItemPointerData) * nipd);
+	}
+	*nitems = nipd;
+	return ipd;
+}
+
+
+/*
+ * Check that relation is eligible for GIN verification
+ */
+static void
+gin_index_checkable(Relation rel)
+{
+	if (rel->rd_rel->relkind != RELKIND_INDEX ||
+		rel->rd_rel->relam != GIN_AM_OID)
+		ereport(ERROR,
+				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+				 errmsg("only GIN indexes are supported as targets for this verification"),
+				 errdetail("Relation \"%s\" is not a GIN index.",
+						   RelationGetRelationName(rel))));
+
+	if (RELATION_IS_OTHER_TEMP(rel))
+		ereport(ERROR,
+				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+				 errmsg("cannot access temporary tables of other sessions"),
+				 errdetail("Index \"%s\" is associated with temporary relation.",
+						   RelationGetRelationName(rel))));
+
+	if (!rel->rd_index->indisvalid)
+		ereport(ERROR,
+				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+				 errmsg("cannot check index \"%s\"",
+						RelationGetRelationName(rel)),
+				 errdetail("Index is not valid")));
+}
+
+/*
+ * Allocates memory context and scans through postigTree graph
+ *
+ */
+static void
+gin_check_posting_tree_parent_keys_consistency(Relation rel, BlockNumber posting_tree_root)
+{
+	BufferAccessStrategy strategy = GetAccessStrategy(BAS_BULKREAD);
+	GinPostingTreeScanItem *stack;
+	MemoryContext mctx;
+	MemoryContext oldcontext;
+
+	int			leafdepth;
+
+	mctx = AllocSetContextCreate(CurrentMemoryContext,
+								 "amcheck context",
+								 ALLOCSET_DEFAULT_SIZES);
+	oldcontext = MemoryContextSwitchTo(mctx);
+
+	/*
+	 * We don't know the height of the tree yet, but as soon as we encounter a
+	 * leaf page, we will set 'leafdepth' to its depth.
+	 */
+	leafdepth = -1;
+
+	/* Start the scan at the root page */
+	stack = (GinPostingTreeScanItem *) palloc0(sizeof(GinPostingTreeScanItem));
+	stack->depth = 0;
+	stack->parentkey = NULL;
+	stack->blkno = posting_tree_root;
+
+	while (stack)
+	{
+		GinPostingTreeScanItem *stack_next;
+		Buffer		buffer;
+		Page		page;
+		OffsetNumber i,
+					maxoff;
+
+		//elog(INFO, "processing block %u", stack->blkno);
+
+		CHECK_FOR_INTERRUPTS();
+
+		buffer = ReadBufferExtended(rel, MAIN_FORKNUM, stack->blkno,
+									RBM_NORMAL, strategy);
+		LockBuffer(buffer, GIN_SHARE);
+		page = (Page) BufferGetPage(buffer);
+		Assert(GinPageIsData(page));
+
+		/* Check that the tree has the same height in all branches */
+		if (GinPageIsLeaf(page))
+		{
+			ItemPointerData minItem;
+			int			nlist;
+			ItemPointerData *list;
+
+			ItemPointerSetMin(&minItem);
+
+			if (leafdepth == -1)
+				leafdepth = stack->depth;
+			else if (stack->depth != leafdepth)
+				ereport(ERROR,
+						(errcode(ERRCODE_INDEX_CORRUPTED),
+						 errmsg("index \"%s\": internal pages traversal encountered leaf page unexpectedly on block %u",
+								RelationGetRelationName(rel), stack->blkno)));
+			list = GinDataLeafPageGetItems(page, &nlist, minItem);
+			if (stack->parentkey && ItemPointerCompare(stack->parentkey, &list[0]) < 0)
+			{
+				ereport(ERROR,
+						(errcode(ERRCODE_INDEX_CORRUPTED),
+						 errmsg("index \"%s\": tid exceeds parent's high key in postingTree leaf on block %u",
+								RelationGetRelationName(rel), stack->blkno)));
+			}
+		}
+		else
+		{
+
+
+			/*
+			 * Check that tuples in each page are properly ordered and
+			 * consistent with parent high key
+			 */
+			maxoff = PageGetMaxOffsetNumber(page);
+			for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
+			{
+				PostingItem *posting_item = GinDataPageGetPostingItem(page, i);
+
+				if (ItemPointerGetBlockNumberNoCheck(&posting_item->key) == 0 ||
+					ItemPointerGetOffsetNumberNoCheck(&posting_item->key) == 0)
+				{
+					continue;
+				}
+				elog(INFO, "key block %u offset %u", ItemPointerGetBlockNumber(&posting_item->key),
+					 ItemPointerGetOffsetNumber(&posting_item->key));
+
+				if (i != FirstOffsetNumber)
+				{
+					PostingItem *previous_posting_item = GinDataPageGetPostingItem(page, i - 1);
+
+					if (ItemPointerCompare(&posting_item->key, &previous_posting_item->key) < 0)
+					{
+
+						ereport(ERROR,
+								(errcode(ERRCODE_INDEX_CORRUPTED),
+								 errmsg("index \"%s\" has wrong tuple order in posting tree, block %u, offset %u",
+										RelationGetRelationName(rel), stack->blkno, i)));
+					}
+
+				}
+
+				/*
+				 * Check if this tuple is consistent with the downlink in the
+				 * parent.
+				 */
+				if (stack->parentkey && i == maxoff)
+				{
+					if (ItemPointerCompare(stack->parentkey, &posting_item->key) < 0)
+					{
+						ereport(ERROR,
+								(errcode(ERRCODE_INDEX_CORRUPTED),
+								 errmsg("index \"%s\" has inconsistent records on page %u offset %u",
+										RelationGetRelationName(rel), stack->blkno, i)));
+
+					}
+				}
+
+				/* If this is an internal page, recurse into the child */
+				if (!GinPageIsLeaf(page))
+				{
+					GinPostingTreeScanItem *ptr;
+
+					ptr = (GinPostingTreeScanItem *) palloc(sizeof(GinPostingTreeScanItem));
+					ptr->depth = stack->depth + 1;
+					ptr->parentkey = &posting_item->key;
+					ptr->blkno = BlockIdGetBlockNumber(&posting_item->child_blkno);
+					ptr->next = stack->next;
+					stack->next = ptr;
+				}
+
+			}
+		}
+		LockBuffer(buffer, GIN_UNLOCK);
+		ReleaseBuffer(buffer);
+
+		/* Step to next item in the queue */
+		stack_next = stack->next;
+		pfree(stack);
+		stack = stack_next;
+	}
+
+	MemoryContextSwitchTo(oldcontext);
+	MemoryContextDelete(mctx);
+}
+
+static void
+validate_leaf(Page page, Relation rel, BlockNumber blkno)
+{
+	OffsetNumber i,
+				maxoff;
+
+	maxoff = PageGetMaxOffsetNumber(page);
+	for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
+	{
+		ItemId		iid = PageGetItemIdCareful(rel, blkno, page, i, sizeof(GinPageOpaqueData));
+		IndexTuple	idxtuple = (IndexTuple) PageGetItem(page, iid);
+
+		if (GinIsPostingTree(idxtuple))
+		{
+/*             elog(INFO, "validating posting tree on page %u, block %u, offset %u", page, blkno, i); */
+			BlockNumber rootPostingTree = GinGetPostingTree(idxtuple);
+
+			gin_check_posting_tree_parent_keys_consistency(rel, rootPostingTree);
+		}
+		else
+		{
+/*             elog(INFO, "validating posting list on page %u, block %u, offset %u", page, blkno, i); */
+
+			ItemPointer ipd;
+			int			nipd;
+
+			ipd = ginReadTupleWithoutState(idxtuple, &nipd);
+			if (!OffsetNumberIsValid(ItemPointerGetOffsetNumber(&ipd[nipd - 1])))
+			{
+				ereport(ERROR,
+						(errcode(ERRCODE_INDEX_CORRUPTED),
+						 errmsg("index \"%s\": posting list contains invalid heap pointer on block %u",
+								RelationGetRelationName(rel), blkno)));
+			}
+			pfree(ipd);
+		}
+	}
+}
+
+/*
+ * Main entry point for GIN check. Allocates memory context and scans through
+ * GIN graph.
+ */
+static void
+gin_check_parent_keys_consistency(Relation rel)
+{
+	BufferAccessStrategy strategy = GetAccessStrategy(BAS_BULKREAD);
+	GinScanItem *stack;
+	MemoryContext mctx;
+	MemoryContext oldcontext;
+	GinState	state;
+
+	int			leafdepth;
+
+	mctx = AllocSetContextCreate(CurrentMemoryContext,
+								 "amcheck context",
+								 ALLOCSET_DEFAULT_SIZES);
+	oldcontext = MemoryContextSwitchTo(mctx);
+	initGinState(&state, rel);
+
+	/*
+	 * We don't know the height of the tree yet, but as soon as we encounter a
+	 * leaf page, we will set 'leafdepth' to its depth.
+	 */
+	leafdepth = -1;
+
+	/* Start the scan at the root page */
+	stack = (GinScanItem *) palloc0(sizeof(GinScanItem));
+	stack->depth = 0;
+	stack->parenttup = NULL;
+	stack->parentblk = InvalidBlockNumber;
+	stack->parentlsn = InvalidXLogRecPtr;
+	stack->blkno = GIN_ROOT_BLKNO;
+
+	while (stack)
+	{
+		GinScanItem *stack_next;
+		Buffer		buffer;
+		Page		page;
+		OffsetNumber i,
+					maxoff;
+		XLogRecPtr	lsn;
+		IndexTuple	prev_tuple;
+
+		//elog(INFO, "processing block %u", stack->blkno);
+
+		CHECK_FOR_INTERRUPTS();
+
+		buffer = ReadBufferExtended(rel, MAIN_FORKNUM, stack->blkno,
+									RBM_NORMAL, strategy);
+		LockBuffer(buffer, GIN_SHARE);
+		page = (Page) BufferGetPage(buffer);
+		lsn = BufferGetLSNAtomic(buffer);
+
+		/* Do basic sanity checks on the page headers */
+		check_index_page(rel, buffer, stack->blkno);
+
+		/*
+		 * It's possible that the page was split since we looked at the
+		 * parent, so that we didn't missed the downlink of the right sibling
+		 * when we scanned the parent.  If so, add the right sibling to the
+		 * stack now.
+		 */
+		if (stack->parenttup != NULL)
+		{
+			GinNullCategory parent_key_category;
+			Datum		parent_key = gintuple_get_key(&state, stack->parenttup, &parent_key_category);
+			OffsetNumber maxoff = PageGetMaxOffsetNumber(page);
+			ItemId		iid = PageGetItemIdCareful(rel, stack->blkno, page, maxoff, sizeof(GinPageOpaqueData));
+			IndexTuple	idxtuple = (IndexTuple) PageGetItem(page, iid);
+			OffsetNumber attnum = gintuple_get_attrnum(&state, idxtuple);
+			GinNullCategory page_max_key_category;
+			Datum		page_max_key = gintuple_get_key(&state, idxtuple, &page_max_key_category);
+
+			if (GinPageGetOpaque(page)->rightlink != InvalidBlockNumber &&
+				ginCompareEntries(&state, attnum, page_max_key, page_max_key_category, parent_key, parent_key_category) > 0)
+			{
+				/* split page detected, install right link to the stack */
+				GinScanItem *ptr;
+
+				elog(INFO, "split detected");
+
+				ptr = (GinScanItem *) palloc(sizeof(GinScanItem));
+				ptr->depth = stack->depth;
+				ptr->parenttup = CopyIndexTuple(stack->parenttup);
+				ptr->parentblk = stack->parentblk;
+				ptr->parentlsn = stack->parentlsn;
+				ptr->blkno = GinPageGetOpaque(page)->rightlink;
+				ptr->next = stack->next;
+				stack->next = ptr;
+			}
+		}
+
+		/* Check that the tree has the same height in all branches */
+		if (GinPageIsLeaf(page))
+		{
+			if (leafdepth == -1)
+				leafdepth = stack->depth;
+			else if (stack->depth != leafdepth)
+				ereport(ERROR,
+						(errcode(ERRCODE_INDEX_CORRUPTED),
+						 errmsg("index \"%s\": internal pages traversal encountered leaf page unexpectedly on block %u",
+								RelationGetRelationName(rel), stack->blkno)));
+		}
+
+		/*
+		 * Check that tuples in each page are properly ordered and consistent
+		 * with parent high key
+		 */
+		maxoff = PageGetMaxOffsetNumber(page);
+		prev_tuple = NULL;
+		for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
+		{
+			ItemId		iid = PageGetItemIdCareful(rel, stack->blkno, page, i, sizeof(GinPageOpaqueData));
+			IndexTuple	idxtuple = (IndexTuple) PageGetItem(page, iid);
+			OffsetNumber attnum = gintuple_get_attrnum(&state, idxtuple);
+			GinNullCategory prev_key_category;
+			Datum		prev_key;
+			GinNullCategory current_key_category;
+			Datum		current_key;
+
+			if (MAXALIGN(ItemIdGetLength(iid)) != MAXALIGN(IndexTupleSize(idxtuple)))
+				ereport(ERROR,
+						(errcode(ERRCODE_INDEX_CORRUPTED),
+						 errmsg("index \"%s\" has inconsistent tuple sizes, block %u, offset %u",
+								RelationGetRelationName(rel), stack->blkno, i)));
+
+			current_key = gintuple_get_key(&state, idxtuple, &current_key_category);
+/*             elog(INFO, "current_key %s, depth %u", DatumGetCString(current_key), stack->depth); */
+
+			/* (apparently) first block is metadata, skip order check */
+			if (i != FirstOffsetNumber && stack->blkno != (BlockNumber) 1)
+			{
+				prev_key = gintuple_get_key(&state, prev_tuple, &prev_key_category);
+				if (ginCompareEntries(&state, attnum, prev_key, prev_key_category, current_key, current_key_category) >= 0)
+					ereport(ERROR,
+							(errcode(ERRCODE_INDEX_CORRUPTED),
+							 errmsg("index \"%s\" has wrong tuple order, block %u, offset %u",
+									RelationGetRelationName(rel), stack->blkno, i)));
+			}
+
+			/*
+			 * Check if this tuple is consistent with the downlink in the
+			 * parent.
+			 */
+			if (stack->parenttup &&
+				i == maxoff)
+			{
+				GinNullCategory parent_key_category;
+				Datum		parent_key = gintuple_get_key(&state, stack->parenttup, &parent_key_category);
+
+				if (ginCompareEntries(&state, attnum, current_key, current_key_category, parent_key, parent_key_category) > 0)
+				{
+					/*
+					 * There was a discrepancy between parent and child
+					 * tuples. We need to verify it is not a result of
+					 * concurrent call of gistplacetopage(). So, lock parent
+					 * and try to find downlink for current page. It may be
+					 * missing due to concurrent page split, this is OK.
+					 */
+					pfree(stack->parenttup);
+					stack->parenttup = gin_refind_parent(rel, stack->parentblk,
+														 stack->blkno, strategy);
+
+					/* We found it - make a final check before failing */
+					if (!stack->parenttup)
+						elog(NOTICE, "Unable to find parent tuple for block %u on block %u due to concurrent split",
+							 stack->blkno, stack->parentblk);
+					else
+					{
+						parent_key = gintuple_get_key(&state, stack->parenttup, &parent_key_category);
+						if (ginCompareEntries(&state, attnum, current_key, current_key_category, parent_key, parent_key_category) > 0)
+							ereport(ERROR,
+									(errcode(ERRCODE_INDEX_CORRUPTED),
+									 errmsg("index \"%s\" has inconsistent records on page %u offset %u",
+											RelationGetRelationName(rel), stack->blkno, i)));
+						else
+						{
+							/*
+							 * But now it is properly adjusted - nothing to do
+							 * here.
+							 */
+						}
+					}
+				}
+			}
+
+			/* If this is an internal page, recurse into the child */
+			if (!GinPageIsLeaf(page))
+			{
+				GinScanItem *ptr;
+
+				ptr = (GinScanItem *) palloc(sizeof(GinScanItem));
+				ptr->depth = stack->depth + 1;
+				/* last tuple in layer has no high key */
+				if (i != maxoff && !GinPageGetOpaque(page)->rightlink)
+				{
+					ptr->parenttup = CopyIndexTuple(idxtuple);
+				}
+				else
+				{
+					ptr->parenttup = NULL;
+				}
+				ptr->parentblk = stack->blkno;
+				ptr->blkno = ItemPointerGetBlockNumber(&(idxtuple->t_tid));
+				ptr->parentlsn = lsn;
+				ptr->next = stack->next;
+				stack->next = ptr;
+			}
+			else
+			{
+				validate_leaf(page, rel, stack->blkno);
+			}
+
+
+			prev_tuple = CopyIndexTuple(idxtuple);
+		}
+
+		LockBuffer(buffer, GIN_UNLOCK);
+		ReleaseBuffer(buffer);
+
+		/* Step to next item in the queue */
+		stack_next = stack->next;
+		if (stack->parenttup)
+			pfree(stack->parenttup);
+		pfree(stack);
+		stack = stack_next;
+	}
+
+	MemoryContextSwitchTo(oldcontext);
+	MemoryContextDelete(mctx);
+}
+
+/*
+ * Verify that a freshly-read page looks sane.
+ */
+static void
+gincheckpage(Relation rel, Buffer buf)
+{
+	Page		page = BufferGetPage(buf);
+
+	/*
+	 * ReadBuffer verifies that every newly-read page passes
+	 * PageHeaderIsValid, which means it either contains a reasonably sane
+	 * page header or is all-zero.  We have to defend against the all-zero
+	 * case, however.
+	 */
+	if (PageIsNew(page))
+		ereport(ERROR,
+				(errcode(ERRCODE_INDEX_CORRUPTED),
+				 errmsg("index \"%s\" contains unexpected zero page at block %u",
+						RelationGetRelationName(rel),
+						BufferGetBlockNumber(buf)),
+				 errhint("Please REINDEX it.")));
+
+	/*
+	 * Additionally check that the special area looks sane.
+	 */
+	if (PageGetSpecialSize(page) != MAXALIGN(sizeof(GinPageOpaqueData)))
+		ereport(ERROR,
+				(errcode(ERRCODE_INDEX_CORRUPTED),
+				 errmsg("index \"%s\" contains corrupted page at block %u",
+						RelationGetRelationName(rel),
+						BufferGetBlockNumber(buf)),
+				 errhint("Please REINDEX it.")));
+}
+
+static void
+check_index_page(Relation rel, Buffer buffer, BlockNumber blockNo)
+{
+	Page		page = BufferGetPage(buffer);
+
+	gincheckpage(rel, buffer);
+
+	if (GinPageIsDeleted(page))
+	{
+		if (!GinPageIsLeaf(page))
+			ereport(ERROR,
+					(errcode(ERRCODE_INDEX_CORRUPTED),
+					 errmsg("index \"%s\" has deleted internal page %d",
+							RelationGetRelationName(rel), blockNo)));
+		if (PageGetMaxOffsetNumber(page) > InvalidOffsetNumber)
+			ereport(ERROR,
+					(errcode(ERRCODE_INDEX_CORRUPTED),
+					 errmsg("index \"%s\" has deleted page %d with tuples",
+							RelationGetRelationName(rel), blockNo)));
+	}
+	else if (PageGetMaxOffsetNumber(page) > MaxIndexTuplesPerPage)
+		ereport(ERROR,
+				(errcode(ERRCODE_INDEX_CORRUPTED),
+				 errmsg("index \"%s\" has page %d with exceeding count of tuples",
+						RelationGetRelationName(rel), blockNo)));
+}
+
+/*
+ * Try to re-find downlink pointing to 'blkno', in 'parentblkno'.
+ *
+ * If found, returns a palloc'd copy of the downlink tuple. Otherwise,
+ * returns NULL.
+ */
+static IndexTuple
+gin_refind_parent(Relation rel, BlockNumber parentblkno,
+				  BlockNumber childblkno, BufferAccessStrategy strategy)
+{
+	Buffer		parentbuf;
+	Page		parentpage;
+	OffsetNumber o,
+				parent_maxoff;
+	IndexTuple	result = NULL;
+
+	parentbuf = ReadBufferExtended(rel, MAIN_FORKNUM, parentblkno, RBM_NORMAL,
+								   strategy);
+
+	LockBuffer(parentbuf, GIN_SHARE);
+	parentpage = BufferGetPage(parentbuf);
+
+	if (GinPageIsLeaf(parentpage))
+	{
+		UnlockReleaseBuffer(parentbuf);
+		return result;
+	}
+
+	parent_maxoff = PageGetMaxOffsetNumber(parentpage);
+	for (o = FirstOffsetNumber; o <= parent_maxoff; o = OffsetNumberNext(o))
+	{
+		ItemId		p_iid = PageGetItemIdCareful(rel, parentblkno, parentpage, o, sizeof(GinPageOpaqueData));
+		IndexTuple	itup = (IndexTuple) PageGetItem(parentpage, p_iid);
+
+		if (ItemPointerGetBlockNumber(&(itup->t_tid)) == childblkno)
+		{
+			/* Found it! Make copy and return it */
+			result = CopyIndexTuple(itup);
+			break;
+		}
+	}
+
+	UnlockReleaseBuffer(parentbuf);
+
+	return result;
+}
diff --git a/doc/src/sgml/amcheck.sgml b/doc/src/sgml/amcheck.sgml
index a2571d33ae6..ff3ce555f12 100644
--- a/doc/src/sgml/amcheck.sgml
+++ b/doc/src/sgml/amcheck.sgml
@@ -343,6 +343,25 @@ SET client_min_messages = DEBUG1;
      </variablelist>
     </listitem>
    </varlistentry>
+
+   <varlistentry>
+    <term>
+     <function>gin_index_parent_check(index regclass) returns void</function>
+     <indexterm>
+      <primary>gin_index_parent_check</primary>
+     </indexterm>
+    </term>
+
+    <listitem>
+     <para>
+      <function>gin_index_parent_check</function> tests that its target GIN index
+      has consistent parent-child tuples relations (no parent tuples
+      require tuple adjustement) and page graph respects balanced-tree
+      invariants (internal pages reference only leaf page or only internal
+      pages).
+     </para>
+    </listitem>
+   </varlistentry>
   </variablelist>
  </sect2>
 
-- 
2.30.2

