From e3076287d170a21c9a7816be48b7a44d67192409 Mon Sep 17 00:00:00 2001
From: Claudio Freire <klaussfreire@gmail.com>
Date: Wed, 17 Aug 2016 21:42:42 -0300
Subject: [PATCH 2/2] [B-Tree] Keep indexes sorted by physical location

This attached patch tries to maintain the initial status of B-Tree indexes, which are created with equal-key runs in physical order, during the whole life of the B-Tree, and make key-tid pairs efficiently searchable in the process.

The patch doesn't add an interface to perform the key-tid pairs, nor does it address all potential issues that could arise from the implementation, but passes all tests, and seems to work well with the workloads I have tried until now, which include:

 - dump/restore/compare (with lots of indexes, both unique and non)
 - sanity check of index lookups
 - running a complex application on top of it, no crashes or corruption (detected)

So this is considered a WIP at this point. Some further work remains regarding unique indexes and perhaps crash recovery.
---
 src/backend/access/brin/brin_pageops.c          |   2 +-
 src/backend/access/brin/brin_xlog.c             |   2 +-
 src/backend/access/common/indextuple.c          |  16 ++
 src/backend/access/nbtree/README                |  67 ++++++--
 src/backend/access/nbtree/nbtinsert.c           | 210 +++++++++++++++++++-----
 src/backend/access/nbtree/nbtpage.c             |  13 +-
 src/backend/access/nbtree/nbtsearch.c           | 136 +++++++++++++--
 src/backend/access/nbtree/nbtsort.c             |  59 +++++--
 src/backend/access/nbtree/nbtutils.c            |  16 +-
 src/backend/access/nbtree/nbtxlog.c             |   4 +
 src/backend/storage/page/bufpage.c              |  43 ++++-
 src/backend/utils/sort/tuplesort.c              |   6 +-
 src/include/access/itup.h                       |   1 +
 src/include/access/nbtree.h                     |  21 ++-
 src/include/storage/bufpage.h                   |   4 +-
 src/test/regress/expected/aggregates.out        |   4 +-
 src/test/regress/expected/alter_generic.out     |  66 ++++----
 src/test/regress/expected/alter_table.out       |  20 +--
 src/test/regress/expected/collate.out           |  18 +-
 src/test/regress/expected/create_function_3.out |  36 ++--
 src/test/regress/expected/dependency.out        |   4 +-
 src/test/regress/expected/domain.out            |   4 +-
 src/test/regress/expected/event_trigger.out     |  75 +++++----
 src/test/regress/expected/foreign_data.out      |  24 +--
 src/test/regress/expected/foreign_key.out       |   2 +-
 src/test/regress/expected/inherit.out           |  19 +--
 src/test/regress/expected/matview.out           |  18 +-
 src/test/regress/expected/rowsecurity.out       |   4 +-
 src/test/regress/expected/select_into.out       |   4 +-
 src/test/regress/expected/triggers.out          |   4 +-
 src/test/regress/expected/truncate.out          |   4 +-
 src/test/regress/expected/typed_table.out       |  16 +-
 src/test/regress/expected/updatable_views.out   |  28 +---
 src/test/regress/input/tablespace.source        |   2 +
 src/test/regress/output/tablespace.source       |   6 +-
 src/test/regress/sql/collate.sql                |   2 +
 src/test/regress/sql/foreign_data.sql           |   4 +
 src/test/regress/sql/inherit.sql                |   4 +
 src/test/regress/sql/matview.sql                |   2 +
 src/test/regress/sql/updatable_views.sql        |   4 +
 40 files changed, 680 insertions(+), 294 deletions(-)

diff --git a/src/backend/access/brin/brin_pageops.c b/src/backend/access/brin/brin_pageops.c
index 6ebfedd..65d4c7a 100644
--- a/src/backend/access/brin/brin_pageops.c
+++ b/src/backend/access/brin/brin_pageops.c
@@ -179,7 +179,7 @@ brin_doupdate(Relation idxrel, BlockNumber pagesPerRange,
 
 		START_CRIT_SECTION();
 		PageIndexDeleteNoCompact(oldpage, &oldoff, 1);
-		if (PageAddItemExtended(oldpage, (Item) newtup, newsz, oldoff,
+		if (PageAddItemExtended(oldpage, (Item) newtup, newsz, newtup, 0, oldoff,
 				PAI_OVERWRITE | PAI_ALLOW_FAR_OFFSET) == InvalidOffsetNumber)
 			elog(ERROR, "failed to add BRIN tuple");
 		MarkBufferDirty(oldbuf);
diff --git a/src/backend/access/brin/brin_xlog.c b/src/backend/access/brin/brin_xlog.c
index 27ba0a9..f417735 100644
--- a/src/backend/access/brin/brin_xlog.c
+++ b/src/backend/access/brin/brin_xlog.c
@@ -193,7 +193,7 @@ brin_xlog_samepage_update(XLogReaderState *record)
 			elog(PANIC, "brin_xlog_samepage_update: invalid max offset number");
 
 		PageIndexDeleteNoCompact(page, &offnum, 1);
-		offnum = PageAddItemExtended(page, (Item) brintuple, tuplen, offnum,
+		offnum = PageAddItemExtended(page, (Item) brintuple, tuplen, (Item) brintuple, 0, offnum,
 									 PAI_OVERWRITE | PAI_ALLOW_FAR_OFFSET);
 		if (offnum == InvalidOffsetNumber)
 			elog(PANIC, "brin_xlog_samepage_update: failed to add tuple");
diff --git a/src/backend/access/common/indextuple.c b/src/backend/access/common/indextuple.c
index 274a6c2..e8d619b 100644
--- a/src/backend/access/common/indextuple.c
+++ b/src/backend/access/common/indextuple.c
@@ -441,3 +441,19 @@ CopyIndexTuple(IndexTuple source)
 	memcpy(result, source, size);
 	return result;
 }
+
+/*
+ * Create a palloc'd copy of an index tuple, adding room for
+ * extra header data. headerSize must be MAXALIGN already.
+ */
+IndexTuple
+CopyIndexTupleAddHeader(IndexTuple source, Size headerSize)
+{
+	IndexTuple	result;
+	Size		size;
+
+	size = IndexTupleSize(source);
+	result = (IndexTuple) palloc(size + headerSize);
+	memcpy(((char*)result) + headerSize, source, size);
+	return result;
+}
diff --git a/src/backend/access/nbtree/README b/src/backend/access/nbtree/README
index 067d15c..71010d5 100644
--- a/src/backend/access/nbtree/README
+++ b/src/backend/access/nbtree/README
@@ -56,8 +56,9 @@ bounding key in an upper tree level must descend to the left of that
 key to ensure it finds any equal keys in the preceding page.  An
 insertion that sees the high key of its target page is equal to the key
 to be inserted has a choice whether or not to move right, since the new
-key could go on either page.  (Currently, we try to find a page where
-there is room for the new key without a split.)
+key could go on either page.  (Currently, we seek an insertion point
+that keeps the links to heap tuples sorted in physical order,
+equivalent to appending the link as a final sorting key)
 
 Lehman and Yao don't require read locks, but assume that in-memory
 copies of tree pages are unshared.  Postgres shares in-memory buffers
@@ -146,6 +147,45 @@ each of the resulting pages.  Note we must include the incoming item in
 this calculation, otherwise it is possible to find that the incoming
 item doesn't fit on the split page where it needs to go!
 
+To support physically sorted inserts in non-unique indexes, non-leaf pages
+use a modified index tuple format with two links, one for the descending
+page inside the index itself, and another with the link to the heap
+tuple. This allows searches to use that heap link as a comparison key.
+While having two index tuple formats makes the code complex, the resulting
+space savings justify the inconvenience, since leaf pages, which are
+the most common, don't need any descending link and already contain the
+link to the heap tuple, so they have no overhead over L&Y.
+
+Unique indexes need to scan same-key rows for conflicts. While unique
+indexes can have at most one live tuple for a key, there could be
+arbitrarily many index tuples pointing to various dead (or otherwise
+invisible) tuples, and so the uniqueness check needs to scan the whole
+run of same-key references. Since inserts will seek an insertion point
+that maintains the physical ordering of index tuples, two possible
+situations aries: the insertion point can be on the same page as the
+first index tuple on the same-key run, or it can be on previous pages.
+
+For the first case, a secondary binary search can be performed while
+holding the read lock, and should be the most common case for unique
+indexes without heavy bloat. The second case can be detected with that
+same binary search, when the search points to the first item in the page,
+where it could be the case that there were equal keys on a previous page.
+In this case another descent is made from the last node that descended
+into a different key, remembered in the stack during the initial descent.
+That page could have split, in which case we move further up, at worst
+until the root page. 
+
+For the second descent, the write lock on the insertion page has to be
+traded back for a read lock, otherwise deadlocks could occur, and then
+traded again for a write lock after the descent is done. This results
+in two pinned pages, that will remain pinned for the duration of the
+uniqueness check, which are required for correctness (otherwise concurrent
+inserts could invalidate the uniqueness check). While it entails more
+work, the expectancy that unique indexes won't have large runs of dead
+same-key tuples spanning multiple pages should make it an uncommon 
+occurrence.
+
+
 The Deletion Algorithm
 ----------------------
 
@@ -613,15 +653,20 @@ On a leaf page, the data items are simply links to (TIDs of) tuples
 in the relation being indexed, with the associated key values.
 
 On a non-leaf page, the data items are down-links to child pages with
-bounding keys.  The key in each data item is the *lower* bound for
-keys on that child page, so logically the key is to the left of that
-downlink.  The high key (if present) is the upper bound for the last
-downlink.  The first data item on each such page has no lower bound
---- or lower bound of minus infinity, if you prefer.  The comparison
-routines must treat it accordingly.  The actual key stored in the
-item is irrelevant, and need not be stored at all.  This arrangement
-corresponds to the fact that an L&Y non-leaf page has one more pointer
-than key.
+bounding key/leaf-link paris. The leaf link is the link to the heap
+tuple associated to the bounding key, which could be stale by now
+(they're not vacuumed) but is a reference for insertions. It is 
+contained on the first IndexTupleData header, of which non-leaf items 
+have two. The second header contains the down-link to the child pages 
+associated with the bounding key. The key in each data item is the 
+*lower* bound for keys on that child page, so logically the key is to 
+the left of that downlink.  The high key (if present) is the upper 
+bound for the last downlink.  The first data item on each such page has 
+no lower bound --- or lower bound of minus infinity, if you prefer.
+The comparison routines must treat it accordingly. The actual key 
+stored in the item is irrelevant, and need not be stored at all. 
+This arrangement corresponds to the fact that an L&Y non-leaf page has 
+one more pointer than key.
 
 Notes to Operator Class Implementors
 ------------------------------------
diff --git a/src/backend/access/nbtree/nbtinsert.c b/src/backend/access/nbtree/nbtinsert.c
index ef69290..9a39df2 100644
--- a/src/backend/access/nbtree/nbtinsert.c
+++ b/src/backend/access/nbtree/nbtinsert.c
@@ -49,7 +49,7 @@ typedef struct
 static Buffer _bt_newroot(Relation rel, Buffer lbuf, Buffer rbuf);
 
 static TransactionId _bt_check_unique(Relation rel, IndexTuple itup,
-				 Relation heapRel, Buffer buf, OffsetNumber offset,
+				 Relation heapRel, Buffer buf, Buffer midBuf, OffsetNumber offset,
 				 ScanKey itup_scankey,
 				 IndexUniqueCheck checkUnique, bool *is_unique,
 				 uint32 *speculativeToken);
@@ -79,7 +79,7 @@ static void _bt_checksplitloc(FindSplitData *state,
 				  OffsetNumber firstoldonright, bool newitemonleft,
 				  int dataitemstoleft, Size firstoldonrightsz);
 static bool _bt_pgaddtup(Page page, Size itemsize, IndexTuple itup,
-			 OffsetNumber itup_off);
+			 OffsetNumber itup_off, ItemPointer leaf_tid);
 static bool _bt_isequal(TupleDesc itupdesc, Page page, OffsetNumber offnum,
 			int keysz, ScanKey scankey);
 static void _bt_vacuum_one_page(Relation rel, Buffer buffer, Relation heapRel);
@@ -111,15 +111,15 @@ _bt_doinsert(Relation rel, IndexTuple itup,
 	int			natts = rel->rd_rel->relnatts;
 	ScanKey		itup_scankey;
 	BTStack		stack;
-	Buffer		buf;
-	OffsetNumber offset;
+	Buffer		buf, nbuf = InvalidBuffer;
+	OffsetNumber offset, first_offset;
 
 	/* we need an insertion scan key to do our search, so build one */
 	itup_scankey = _bt_mkscankey(rel, itup);
 
 top:
 	/* find the first page containing this key */
-	stack = _bt_search(rel, natts, itup_scankey, false, &buf, BT_WRITE, NULL);
+	stack = _bt_search(rel, natts, itup_scankey, &(itup->t_tid), false, &buf, BT_WRITE, NULL, NULL);
 
 	offset = InvalidOffsetNumber;
 
@@ -134,7 +134,7 @@ top:
 	 * move right in the tree.  See Lehman and Yao for an excruciatingly
 	 * precise description.
 	 */
-	buf = _bt_moveright(rel, buf, natts, itup_scankey, false,
+	buf = _bt_moveright(rel, buf, natts, itup_scankey, &(itup->t_tid), false,
 						true, stack, BT_WRITE, NULL);
 
 	/*
@@ -163,9 +163,76 @@ top:
 		TransactionId xwait;
 		uint32		speculativeToken;
 
-		offset = _bt_binsrch(rel, buf, natts, itup_scankey, false);
-		xwait = _bt_check_unique(rel, itup, heapRel, buf, offset, itup_scankey,
-								 checkUnique, &is_unique, &speculativeToken);
+		first_offset = _bt_binsrch(rel, buf, natts, itup_scankey, NULL, false, NULL);
+		if (first_offset <= P_FIRSTDATAKEY((BTPageOpaque) PageGetSpecialPointer(BufferGetPage(buf)))) {
+			/* 
+			 * The actual first item may be on a previous page. Walking back may be slow,
+			 * so we'll re-execute the search from the first inner page that didn't follow
+			 * an equal key. We need to relinquish the lock on buf, however, while we do this,
+			 * or we'll deadlock since we'll most likely hit buf again.
+			 * After that, we'll have to re-lock it before checking for uniqueness (otherwise
+			 * concurrent inserts may invalidate the check), and re-check for page splits.
+			 */
+			BTStack base = stack;
+			BTStack substack;
+
+			while (base != NULL && BTStackIsEqualKey(base))
+				base = base->bts_parent;
+
+			/* TODO: check for base splits */
+
+			LockBuffer(buf, BUFFER_LOCK_UNLOCK);
+			LockBuffer(buf, BT_READ);
+
+			substack = _bt_search(rel, natts, itup_scankey, NULL, false, &nbuf, BT_WRITE, NULL, base);
+			first_offset = _bt_binsrch(rel, nbuf, natts, itup_scankey, NULL, false, NULL);
+			_bt_freestack_to_base(substack, base);
+
+			if (BufferGetBlockNumber(nbuf) == BufferGetBlockNumber(buf)) {
+				/* 
+				 * Same block, so it was the first item by pure chance. Do the uniqueness check with the
+				 * write-locked buffer as a starting point. We have to release nbuf first or
+				 * locking it will deadlock.
+				 */
+				_bt_relbuf(rel, nbuf);
+
+				LockBuffer(buf, BUFFER_LOCK_UNLOCK);
+				LockBuffer(buf, BT_WRITE);
+
+				buf = _bt_moveright(rel, buf, natts, itup_scankey, &(itup->t_tid), false,
+									true, stack, BT_WRITE, NULL);
+
+				first_offset = _bt_binsrch(rel, buf, natts, itup_scankey, NULL, false, NULL);
+
+				xwait = _bt_check_unique(rel, itup, heapRel, buf, InvalidBuffer, first_offset, itup_scankey,
+										 checkUnique, &is_unique, &speculativeToken);
+			} else {
+				/* 
+				 * We found the actual first item on another block, so we have to perform
+				 * a two-step search - first half, until our write-locked buffer, then another
+				 * starting from our write-locked buffer.
+				 */
+				LockBuffer(buf, BUFFER_LOCK_UNLOCK);
+				LockBuffer(buf, BT_WRITE);
+
+				buf = _bt_moveright(rel, buf, natts, itup_scankey, &(itup->t_tid), false,
+									true, stack, BT_WRITE, NULL);
+
+				first_offset = _bt_binsrch(rel, buf, natts, itup_scankey, NULL, false, NULL);
+
+				xwait = _bt_check_unique(rel, itup, heapRel, nbuf, buf, first_offset, itup_scankey,
+										 checkUnique, &is_unique, &speculativeToken);
+
+				_bt_relbuf(rel, nbuf);
+			}
+		} else {
+			/* 
+			 * A midpoint first_offset guarantees earlier pages don't contain equal keys,
+			 * so we can make the check rightaway. This should be a common path for unique indexes.
+			 */
+			xwait = _bt_check_unique(rel, itup, heapRel, buf, InvalidBuffer, first_offset, itup_scankey,
+									 checkUnique, &is_unique, &speculativeToken);
+		}
 
 		if (TransactionIdIsValid(xwait))
 		{
@@ -237,7 +304,7 @@ top:
  */
 static TransactionId
 _bt_check_unique(Relation rel, IndexTuple itup, Relation heapRel,
-				 Buffer buf, OffsetNumber offset, ScanKey itup_scankey,
+				 Buffer buf, Buffer midBuf, OffsetNumber offset, ScanKey itup_scankey,
 				 IndexUniqueCheck checkUnique, bool *is_unique,
 				 uint32 *speculativeToken)
 {
@@ -249,10 +316,16 @@ _bt_check_unique(Relation rel, IndexTuple itup, Relation heapRel,
 	BTPageOpaque opaque;
 	Buffer		nbuf = InvalidBuffer;
 	bool		found = false;
+	BlockNumber	midBlkno;
 
 	/* Assume unique until we find a duplicate */
 	*is_unique = true;
 
+	if (!BufferIsInvalid(midBuf))
+		midBlkno = BufferGetBlockNumber(midBuf);
+	else
+		midBlkno = 0;
+
 	InitDirtySnapshot(SnapshotDirty);
 
 	page = BufferGetPage(buf);
@@ -472,9 +545,16 @@ _bt_check_unique(Relation rel, IndexTuple itup, Relation heapRel,
 			for (;;)
 			{
 				nblkno = opaque->btpo_next;
-				nbuf = _bt_relandgetbuf(rel, nbuf, nblkno, BT_READ);
-				page = BufferGetPage(nbuf);
-				opaque = (BTPageOpaque) PageGetSpecialPointer(page);
+				if (BufferIsInvalid(midBuf) || midBlkno != nblkno) {
+					nbuf = _bt_relandgetbuf(rel, nbuf, nblkno, BT_READ);
+					page = BufferGetPage(nbuf);
+					opaque = (BTPageOpaque) PageGetSpecialPointer(page);
+				} else {
+					_bt_relbuf(rel, nbuf);
+					nbuf = InvalidBuffer;
+					page = BufferGetPage(midBuf);
+					opaque = (BTPageOpaque) PageGetSpecialPointer(page);
+				}
 				if (!P_IGNORE(opaque))
 					break;
 				if (P_RIGHTMOST(opaque))
@@ -548,7 +628,7 @@ _bt_findinsertloc(Relation rel,
 {
 	Buffer		buf = *bufptr;
 	Page		page = BufferGetPage(buf);
-	Size		itemsz;
+	Size		itemsz, reservesz;
 	BTPageOpaque lpageop;
 	bool		movedright,
 				vacuumed;
@@ -561,6 +641,8 @@ _bt_findinsertloc(Relation rel,
 	itemsz = MAXALIGN(itemsz);	/* be safe, PageAddItem will do this but we
 								 * need to be consistent */
 
+	reservesz = P_ISLEAF(lpageop) ? sizeof(IndexTupleData) : 0;
+
 	/*
 	 * Check whether the item can fit on a btree page at all. (Eventually, we
 	 * ought to try to apply TOAST methods if not.) We actually need to be
@@ -570,7 +652,7 @@ _bt_findinsertloc(Relation rel,
 	 *
 	 * NOTE: if you change this, see also the similar code in _bt_buildadd().
 	 */
-	if (itemsz > BTMaxItemSize(page))
+	if ((itemsz+reservesz) > BTMaxItemSize(page))
 		ereport(ERROR,
 				(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
 			errmsg("index row size %zu exceeds maximum %zu for index \"%s\"",
@@ -629,7 +711,7 @@ _bt_findinsertloc(Relation rel,
 		 * nope, so check conditions (b) and (c) enumerated above
 		 */
 		if (P_RIGHTMOST(lpageop) ||
-			_bt_compare(rel, keysz, scankey, page, P_HIKEY) != 0 ||
+			_bt_compare_tid(rel, keysz, scankey, &(newtup->t_tid), page, P_HIKEY) != 0 ||
 			random() <= (MAX_RANDOM_VALUE / 100))
 			break;
 
@@ -690,7 +772,7 @@ _bt_findinsertloc(Relation rel,
 	else if (firstlegaloff != InvalidOffsetNumber && !vacuumed)
 		newitemoff = firstlegaloff;
 	else
-		newitemoff = _bt_binsrch(rel, buf, keysz, scankey, false);
+		newitemoff = _bt_binsrch(rel, buf, keysz, scankey, &(newtup->t_tid), false, NULL);
 
 	*bufptr = buf;
 	*offsetptr = newitemoff;
@@ -757,7 +839,6 @@ _bt_insertonpg(Relation rel,
 	itemsz = IndexTupleDSize(*itup);
 	itemsz = MAXALIGN(itemsz);	/* be safe, PageAddItem will do this but we
 								 * need to be consistent */
-
 	/*
 	 * Do we need to split the page to fit the item on it?
 	 *
@@ -809,6 +890,8 @@ _bt_insertonpg(Relation rel,
 		BTMetaPageData *metad = NULL;
 		OffsetNumber itup_off;
 		BlockNumber itup_blkno;
+		ItemPointer leaf_tid;
+		IndexTuple	oitup = itup;
 
 		itup_off = newitemoff;
 		itup_blkno = BufferGetBlockNumber(buf);
@@ -839,7 +922,11 @@ _bt_insertonpg(Relation rel,
 		/* Do the update.  No ereport(ERROR) until changes are logged */
 		START_CRIT_SECTION();
 
-		if (!_bt_pgaddtup(page, itemsz, itup, newitemoff))
+		leaf_tid = &(itup->t_tid);
+		if (!P_ISLEAF(lpageop) && IndexTupleSize(itup) > sizeof(IndexTupleData))
+			itup++;
+
+		if (!_bt_pgaddtup(page, itemsz, itup, newitemoff, leaf_tid))
 			elog(PANIC, "failed to add new item to block %u in index \"%s\"",
 				 itup_blkno, RelationGetRelationName(rel));
 
@@ -913,7 +1000,7 @@ _bt_insertonpg(Relation rel,
 									sizeof(IndexTupleData));
 			}
 			else
-				XLogRegisterBufData(0, (char *) itup, IndexTupleDSize(*itup));
+				XLogRegisterBufData(0, (char *) oitup, IndexTupleDSize(*oitup));
 
 			recptr = XLogInsert(RM_BTREE_ID, xlinfo);
 
@@ -957,7 +1044,7 @@ _bt_insertonpg(Relation rel,
  */
 static Buffer
 _bt_split(Relation rel, Buffer buf, Buffer cbuf, OffsetNumber firstright,
-		  OffsetNumber newitemoff, Size newitemsz, IndexTuple newitem,
+		  OffsetNumber newitemoff, Size newitemsz, IndexTuple newitem, 
 		  bool newitemonleft)
 {
 	Buffer		rbuf;
@@ -975,6 +1062,7 @@ _bt_split(Relation rel, Buffer buf, Buffer cbuf, OffsetNumber firstright,
 	Size		itemsz;
 	ItemId		itemid;
 	IndexTuple	item;
+	ItemPointer	leaf_tid;
 	OffsetNumber leftoff,
 				rightoff;
 	OffsetNumber maxoff;
@@ -1105,12 +1193,16 @@ _bt_split(Relation rel, Buffer buf, Buffer cbuf, OffsetNumber firstright,
 		itemsz = ItemIdGetLength(itemid);
 		item = (IndexTuple) PageGetItem(origpage, itemid);
 
+		leaf_tid = &(item->t_tid);
+		if (!isleaf && IndexTupleSize(item) > sizeof(IndexTupleData)) 
+			item++;
+
 		/* does new item belong before this one? */
 		if (i == newitemoff)
 		{
 			if (newitemonleft)
 			{
-				if (!_bt_pgaddtup(leftpage, newitemsz, newitem, leftoff))
+				if (!_bt_pgaddtup(leftpage, newitemsz, newitem, leftoff, leaf_tid))
 				{
 					memset(rightpage, 0, BufferGetPageSize(rbuf));
 					elog(ERROR, "failed to add new item to the left sibling"
@@ -1121,7 +1213,7 @@ _bt_split(Relation rel, Buffer buf, Buffer cbuf, OffsetNumber firstright,
 			}
 			else
 			{
-				if (!_bt_pgaddtup(rightpage, newitemsz, newitem, rightoff))
+				if (!_bt_pgaddtup(rightpage, newitemsz, newitem, rightoff, leaf_tid))
 				{
 					memset(rightpage, 0, BufferGetPageSize(rbuf));
 					elog(ERROR, "failed to add new item to the right sibling"
@@ -1135,7 +1227,7 @@ _bt_split(Relation rel, Buffer buf, Buffer cbuf, OffsetNumber firstright,
 		/* decide which page to put it on */
 		if (i < firstright)
 		{
-			if (!_bt_pgaddtup(leftpage, itemsz, item, leftoff))
+			if (!_bt_pgaddtup(leftpage, itemsz, item, leftoff, leaf_tid))
 			{
 				memset(rightpage, 0, BufferGetPageSize(rbuf));
 				elog(ERROR, "failed to add old item to the left sibling"
@@ -1146,7 +1238,7 @@ _bt_split(Relation rel, Buffer buf, Buffer cbuf, OffsetNumber firstright,
 		}
 		else
 		{
-			if (!_bt_pgaddtup(rightpage, itemsz, item, rightoff))
+			if (!_bt_pgaddtup(rightpage, itemsz, item, rightoff, leaf_tid))
 			{
 				memset(rightpage, 0, BufferGetPageSize(rbuf));
 				elog(ERROR, "failed to add old item to the right sibling"
@@ -1166,7 +1258,16 @@ _bt_split(Relation rel, Buffer buf, Buffer cbuf, OffsetNumber firstright,
 		 * not be splitting the page).
 		 */
 		Assert(!newitemonleft);
-		if (!_bt_pgaddtup(rightpage, newitemsz, newitem, rightoff))
+		leaf_tid = &(item->t_tid);
+		if (!isleaf) {
+			Assert(newitemsz >= 2*sizeof(IndexTupleData));
+			item = newitem+1;
+			itemsz = newitemsz - sizeof(IndexTupleData);
+		} else {
+			item = newitem;
+			itemsz = newitemsz;
+		}
+		if (!_bt_pgaddtup(rightpage, itemsz, item, rightoff, leaf_tid))
 		{
 			memset(rightpage, 0, BufferGetPageSize(rbuf));
 			elog(ERROR, "failed to add new item to the right sibling"
@@ -1668,17 +1769,18 @@ _bt_insert_parent(Relation rel,
 		BlockNumber bknum = BufferGetBlockNumber(buf);
 		BlockNumber rbknum = BufferGetBlockNumber(rbuf);
 		Page		page = BufferGetPage(buf);
-		IndexTuple	new_item;
+		IndexTuple	new_item, onew_item;
 		BTStackData fakestack;
 		IndexTuple	ritem;
 		Buffer		pbuf;
+		BTPageOpaque lpageop;
+
+		lpageop = (BTPageOpaque) PageGetSpecialPointer(page);
 
 		if (stack == NULL)
 		{
-			BTPageOpaque lpageop;
 
 			elog(DEBUG2, "concurrent ROOT page split");
-			lpageop = (BTPageOpaque) PageGetSpecialPointer(page);
 			/* Find the leftmost page at the next level up */
 			pbuf = _bt_get_endpoint(rel, lpageop->btpo.level + 1, false,
 									NULL);
@@ -1696,7 +1798,16 @@ _bt_insert_parent(Relation rel,
 										 PageGetItemId(page, P_HIKEY));
 
 		/* form an index tuple that points at the new right page */
-		new_item = CopyIndexTuple(ritem);
+                if (P_ISLEAF(lpageop)) {
+			/* If we're on a leaf page, we need to add the leaf tid */
+			new_item = onew_item = CopyIndexTupleAddHeader(ritem, sizeof(IndexTupleData));
+			new_item->t_info = IndexTupleSize(ritem) + sizeof(IndexTupleData);
+			ItemPointerCopy(&(ritem->t_tid), &(new_item->t_tid));
+		} else {
+			/* Otherwise, it's already there */
+			new_item = onew_item = CopyIndexTuple(ritem);
+		}
+		new_item++;
 		ItemPointerSet(&(new_item->t_tid), rbknum, P_HIKEY);
 
 		/*
@@ -1722,11 +1833,11 @@ _bt_insert_parent(Relation rel,
 
 		/* Recursively update the parent */
 		_bt_insertonpg(rel, pbuf, buf, stack->bts_parent,
-					   new_item, stack->bts_offset + 1,
+					   onew_item, stack->bts_offset + 1,
 					   is_only);
 
 		/* be tidy */
-		pfree(new_item);
+		pfree(onew_item);
 	}
 }
 
@@ -1861,6 +1972,8 @@ _bt_getstackbuf(Relation rel, BTStack stack, int access)
 			{
 				itemid = PageGetItemId(page, offnum);
 				item = (IndexTuple) PageGetItem(page, itemid);
+				if (IndexTupleSize(item) > sizeof(IndexTupleData))
+					item++;
 				if (BTEntrySame(item, &stack->bts_btentry))
 				{
 					/* Return accurate pointer to where link is now */
@@ -1876,6 +1989,8 @@ _bt_getstackbuf(Relation rel, BTStack stack, int access)
 			{
 				itemid = PageGetItemId(page, offnum);
 				item = (IndexTuple) PageGetItem(page, itemid);
+				if (IndexTupleSize(item) > sizeof(IndexTupleData))
+					item++;
 				if (BTEntrySame(item, &stack->bts_btentry))
 				{
 					/* Return accurate pointer to where link is now */
@@ -1971,8 +2086,16 @@ _bt_newroot(Relation rel, Buffer lbuf, Buffer rbuf)
 	itemid = PageGetItemId(lpage, P_HIKEY);
 	right_item_sz = ItemIdGetLength(itemid);
 	item = (IndexTuple) PageGetItem(lpage, itemid);
-	right_item = CopyIndexTuple(item);
-	ItemPointerSet(&(right_item->t_tid), rbkno, P_HIKEY);
+	if (P_ISLEAF(lopaque)) {
+		/* We need to add the leaf tid */
+		right_item = CopyIndexTupleAddHeader(item, sizeof(IndexTupleData));
+		right_item->t_info = IndexTupleSize(item) + sizeof(IndexTupleData);
+		ItemPointerCopy(&(item->t_tid), &(right_item->t_tid));
+		right_item_sz += sizeof(IndexTupleData);
+	} else {
+		right_item = CopyIndexTuple(item);
+	}
+	ItemPointerSet(&((right_item+1)->t_tid), rbkno, P_HIKEY);
 
 	/* NO EREPORT(ERROR) from here till newroot op is logged */
 	START_CRIT_SECTION();
@@ -2092,10 +2215,11 @@ static bool
 _bt_pgaddtup(Page page,
 			 Size itemsize,
 			 IndexTuple itup,
-			 OffsetNumber itup_off)
+			 OffsetNumber itup_off,
+                         ItemPointer leaf_tid)
 {
 	BTPageOpaque opaque = (BTPageOpaque) PageGetSpecialPointer(page);
-	IndexTupleData trunctuple;
+	IndexTupleData trunctuple, inner_header;
 
 	if (!P_ISLEAF(opaque) && itup_off == P_FIRSTDATAKEY(opaque))
 	{
@@ -2105,9 +2229,17 @@ _bt_pgaddtup(Page page,
 		itemsize = sizeof(IndexTupleData);
 	}
 
-	if (PageAddItem(page, (Item) itup, itemsize, itup_off,
-					false, false) == InvalidOffsetNumber)
-		return false;
+        if (!P_ISLEAF(opaque)) {
+                ItemPointerCopy(leaf_tid, &(inner_header.t_tid));
+                inner_header.t_info = IndexTupleDSize(*itup) + sizeof(IndexTuple);
+		if (PageAddItemWithHeader(page, (Item) itup, itemsize, (Item) &inner_header, sizeof(inner_header), itup_off,
+						false, false) == InvalidOffsetNumber)
+			return false;
+	} else {
+		if (PageAddItem(page, (Item) itup, itemsize, itup_off,
+						false, false) == InvalidOffsetNumber)
+			return false;
+	}
 
 	return true;
 }
diff --git a/src/backend/access/nbtree/nbtpage.c b/src/backend/access/nbtree/nbtpage.c
index 2001dc1..9a6a3ed 100644
--- a/src/backend/access/nbtree/nbtpage.c
+++ b/src/backend/access/nbtree/nbtpage.c
@@ -1207,9 +1207,10 @@ _bt_pagedel(Relation rel, Buffer buf)
 				IndexTuple	targetkey;
 				Buffer		lbuf;
 				BlockNumber leftsib;
+				int	isinner = (P_ISLEAF(opaque) ? 0 : 1);
 
 				itemid = PageGetItemId(page, P_HIKEY);
-				targetkey = CopyIndexTuple((IndexTuple) PageGetItem(page, itemid));
+				targetkey = CopyIndexTuple(((IndexTuple) PageGetItem(page, itemid)) + isinner);
 
 				leftsib = opaque->btpo_prev;
 
@@ -1254,8 +1255,8 @@ _bt_pagedel(Relation rel, Buffer buf)
 				/* we need an insertion scan key for the search, so build one */
 				itup_scankey = _bt_mkscankey(rel, targetkey);
 				/* find the leftmost leaf page containing this key */
-				stack = _bt_search(rel, rel->rd_rel->relnatts, itup_scankey,
-								   false, &lbuf, BT_READ, NULL);
+				stack = _bt_search(rel, rel->rd_rel->relnatts, itup_scankey, NULL,
+								   false, &lbuf, BT_READ, NULL, NULL);
 				/* don't need a pin on the page */
 				_bt_relbuf(rel, lbuf);
 
@@ -1391,12 +1392,16 @@ _bt_mark_page_halfdead(Relation rel, Buffer leafbuf, BTStack stack)
 #ifdef USE_ASSERT_CHECKING
 	itemid = PageGetItemId(page, topoff);
 	itup = (IndexTuple) PageGetItem(page, itemid);
+        if (IndexTupleSize(itup) > sizeof(IndexTupleData))
+		itup++;
 	Assert(ItemPointerGetBlockNumber(&(itup->t_tid)) == target);
 #endif
 
 	nextoffset = OffsetNumberNext(topoff);
 	itemid = PageGetItemId(page, nextoffset);
 	itup = (IndexTuple) PageGetItem(page, itemid);
+        if (IndexTupleSize(itup) > sizeof(IndexTupleData))
+		itup++;
 	if (ItemPointerGetBlockNumber(&(itup->t_tid)) != rightsib)
 		elog(ERROR, "right sibling %u of block %u is not next child %u of block %u in index \"%s\"",
 			 rightsib, target, ItemPointerGetBlockNumber(&(itup->t_tid)),
@@ -1422,6 +1427,8 @@ _bt_mark_page_halfdead(Relation rel, Buffer leafbuf, BTStack stack)
 
 	itemid = PageGetItemId(page, topoff);
 	itup = (IndexTuple) PageGetItem(page, itemid);
+        if (IndexTupleSize(itup) > sizeof(IndexTupleData))
+		itup++;
 	ItemPointerSet(&(itup->t_tid), rightsib, P_HIKEY);
 
 	nextoffset = OffsetNumberNext(topoff);
diff --git a/src/backend/access/nbtree/nbtsearch.c b/src/backend/access/nbtree/nbtsearch.c
index ee46023..e476104 100644
--- a/src/backend/access/nbtree/nbtsearch.c
+++ b/src/backend/access/nbtree/nbtsearch.c
@@ -88,15 +88,22 @@ _bt_drop_lock_and_maybe_pin(IndexScanDesc scan, BTScanPos sp)
  * to be created and returned.  When access = BT_READ, an empty index
  * will result in *bufP being set to InvalidBuffer.  Also, in BT_WRITE mode,
  * any incomplete splits encountered during the search will be finished.
+ *
+ * If the stack_top parameter is not NULL, it will continue a partial
+ * search from the specified stack position. The returned stack will inherit
+ * the given stack.
  */
 BTStack
-_bt_search(Relation rel, int keysz, ScanKey scankey, bool nextkey,
-		   Buffer *bufP, int access, Snapshot snapshot)
+_bt_search(Relation rel, int keysz, ScanKey scankey, ItemPointer scantid, bool nextkey,
+		   Buffer *bufP, int access, Snapshot snapshot, BTStack stack_top)
 {
-	BTStack		stack_in = NULL;
+	BTStack		stack_in = stack_top;
 
 	/* Get the root page to start with */
-	*bufP = _bt_getroot(rel, access);
+	if (stack_top == NULL)
+		*bufP = _bt_getroot(rel, access);
+	else
+		*bufP = _bt_getbuf(rel, stack_top->bts_blkno, access);
 
 	/* If index is empty and access = BT_READ, no root page is created. */
 	if (!BufferIsValid(*bufP))
@@ -113,6 +120,7 @@ _bt_search(Relation rel, int keysz, ScanKey scankey, bool nextkey,
 		BlockNumber blkno;
 		BlockNumber par_blkno;
 		BTStack		new_stack;
+		bool	isequal;
 
 		/*
 		 * Race -- the page we just grabbed may have split since we read its
@@ -126,7 +134,7 @@ _bt_search(Relation rel, int keysz, ScanKey scankey, bool nextkey,
 		 * if the leaf page is split and we insert to the parent page).  But
 		 * this is a good opportunity to finish splits of internal pages too.
 		 */
-		*bufP = _bt_moveright(rel, *bufP, keysz, scankey, nextkey,
+		*bufP = _bt_moveright(rel, *bufP, keysz, scankey, scantid, nextkey,
 							  (access == BT_WRITE), stack_in,
 							  BT_READ, snapshot);
 
@@ -140,9 +148,11 @@ _bt_search(Relation rel, int keysz, ScanKey scankey, bool nextkey,
 		 * Find the appropriate item on the internal page, and get the child
 		 * page that it points to.
 		 */
-		offnum = _bt_binsrch(rel, *bufP, keysz, scankey, nextkey);
+		offnum = _bt_binsrch(rel, *bufP, keysz, scankey, scantid, nextkey, &isequal);
 		itemid = PageGetItemId(page, offnum);
 		itup = (IndexTuple) PageGetItem(page, itemid);
+                if (IndexTupleSize(itup) > sizeof(IndexTupleData))
+			itup++;
 		blkno = ItemPointerGetBlockNumber(&(itup->t_tid));
 		par_blkno = BufferGetBlockNumber(*bufP);
 
@@ -161,6 +171,7 @@ _bt_search(Relation rel, int keysz, ScanKey scankey, bool nextkey,
 		new_stack->bts_offset = offnum;
 		memcpy(&new_stack->bts_btentry, itup, sizeof(IndexTupleData));
 		new_stack->bts_parent = stack_in;
+		new_stack->bts_flags = (isequal) ? BTS_EQUAL_KEY : 0;
 
 		/* drop the read lock on the parent page, acquire one on the child */
 		*bufP = _bt_relandgetbuf(rel, *bufP, blkno, BT_READ);
@@ -211,6 +222,7 @@ _bt_moveright(Relation rel,
 			  Buffer buf,
 			  int keysz,
 			  ScanKey scankey,
+                          ItemPointer scantid,
 			  bool nextkey,
 			  bool forupdate,
 			  BTStack stack,
@@ -219,7 +231,7 @@ _bt_moveright(Relation rel,
 {
 	Page		page;
 	BTPageOpaque opaque;
-	int32		cmpval;
+	int32		cmpval, cmpres;
 
 	/*
 	 * When nextkey = false (normal case): if the scan key that brought us to
@@ -271,7 +283,13 @@ _bt_moveright(Relation rel,
 			continue;
 		}
 
-		if (P_IGNORE(opaque) || _bt_compare(rel, keysz, scankey, page, P_HIKEY) >= cmpval)
+                if (P_IGNORE(opaque)) {
+                    cmpres = cmpval;
+                } else {
+                    cmpres = _bt_compare_tid(rel, keysz, scankey, scantid, page, P_HIKEY);
+                }
+
+		if (cmpres >= cmpval)
 		{
 			/* step right one page */
 			buf = _bt_relandgetbuf(rel, buf, opaque->btpo_next, access);
@@ -311,6 +329,9 @@ _bt_moveright(Relation rel,
  * right place to descend to be sure we find all leaf keys >= given scankey
  * (or leaf keys > given scankey when nextkey is true).
  *
+ * Whe scantid != NULL and is valid, leaf tids will also be compared against 
+ * scantid to return the location of the given scankey-scantid pair.
+ *
  * This procedure is not responsible for walking right, it just examines
  * the given page.  _bt_binsrch() has no lock or refcount side effects
  * on the buffer.
@@ -320,7 +341,9 @@ _bt_binsrch(Relation rel,
 			Buffer buf,
 			int keysz,
 			ScanKey scankey,
-			bool nextkey)
+                        ItemPointer scantid,
+			bool nextkey,
+			bool *isequal)
 {
 	Page		page;
 	BTPageOpaque opaque;
@@ -328,6 +351,12 @@ _bt_binsrch(Relation rel,
 				high;
 	int32		result,
 				cmpval;
+	bool lisequal;
+
+        /* Local copies to allow the compiler to optimize better */
+        BlockNumber scantidBlock;
+        OffsetNumber scantidOffset;
+	IndexTuple itup;
 
 	page = BufferGetPage(buf);
 	opaque = (BTPageOpaque) PageGetSpecialPointer(page);
@@ -335,6 +364,14 @@ _bt_binsrch(Relation rel,
 	low = P_FIRSTDATAKEY(opaque);
 	high = PageGetMaxOffsetNumber(page);
 
+        if (scantid != NULL && ItemPointerIsValid(scantid)) {
+            scantidBlock = ItemPointerGetBlockNumber(scantid);
+            scantidOffset = ItemPointerGetOffsetNumber(scantid);
+        } else if (scantid != NULL) {
+            /* Simplify future checks, we don't care about an invalid scantid */
+            scantid = NULL;
+        }
+
 	/*
 	 * If there are no keys on the page, return the first available slot. Note
 	 * this covers two cases: the page is really empty (no keys), or it
@@ -342,8 +379,11 @@ _bt_binsrch(Relation rel,
 	 * This can never happen on an internal page, however, since they are
 	 * never empty (an internal page must have children).
 	 */
-	if (high < low)
+	if (high < low) {
+		if (isequal != NULL) 
+			*isequal = false;
 		return low;
+	}
 
 	/*
 	 * Binary search to find the first key on the page >= scan key, or first
@@ -368,6 +408,17 @@ _bt_binsrch(Relation rel,
 		/* We have low <= mid < high, so mid points at a real slot */
 
 		result = _bt_compare(rel, keysz, scankey, page, mid);
+                if (scantid != NULL && result == 0) {
+		    lisequal = true;
+                    itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, mid));
+                    if (ItemPointerGetBlockNumber(&(itup->t_tid)) != scantidBlock) {
+                        result = (scantidBlock < ItemPointerGetBlockNumber(&(itup->t_tid))) ? -1 : 1;
+                    } else {
+                        result = scantidOffset - ItemPointerGetOffsetNumber(&(itup->t_tid));
+                    }
+                } else {
+		    lisequal = false;
+		}
 
 		if (result >= cmpval)
 			low = mid + 1;
@@ -375,6 +426,9 @@ _bt_binsrch(Relation rel,
 			high = mid;
 	}
 
+	if (isequal != NULL)
+		*isequal = lisequal;
+
 	/*
 	 * At this point we have high == low, but be careful: they could point
 	 * past the last slot on the page.
@@ -440,6 +494,10 @@ _bt_compare(Relation rel,
 		return 1;
 
 	itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, offnum));
+        if (!P_ISLEAF(opaque) && IndexTupleSize(itup) > sizeof(IndexTupleData)) {
+            /* Skip leaf header copy */
+            itup++;
+        }
 
 	/*
 	 * The scan key is set up with the attribute number associated with each
@@ -508,6 +566,54 @@ _bt_compare(Relation rel,
 	return 0;
 }
 
+/*----------
+ *	_bt_compare_tid() -- Compare scankey-scantid pairs to a particular tuple 
+ * on the page.
+ *
+ * The passed scankey-scantid pair must be an insertion-type scankey 
+ * (see nbtree/README), but it can omit the rightmost column(s) of the index.
+ *
+ *	keysz: number of key conditions to be checked (might be less than the
+ *		number of index columns!)
+ *	page/offnum: location of btree item to be compared to.
+ *
+ *		This routine returns:
+ *			<0 if scankey-scantid < tuple at offnum;
+ *			 0 if scankey-scantid == tuple at offnum;
+ *			>0 if scankey-scantid > tuple at offnum.
+ *		NULLs in the keys are treated as sortable values.  Therefore
+ *		"equality" does not necessarily mean that the item should be
+ *		returned to the caller as a matching key!
+ *      scantid: item pointer to look for, can be NULL, in which case
+ *              the function will behave just like _bt_compare(). It must not
+ *              be given unless the scankey includes all index columns, or the
+ *              result won't be valid (it's up to the caller to check).
+ *
+ * See _bt_compare()
+ *----------
+ */
+int32
+_bt_compare_tid(Relation rel,
+			int keysz,
+			ScanKey scankey,
+			ItemPointer scantid,
+			Page page,
+			OffsetNumber offnum)
+{
+        IndexTuple itup;
+	int32 result;
+	result = _bt_compare(rel, keysz, scankey, page, offnum);
+        if (scantid != NULL && result == 0) {
+            itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, offnum));
+            if (ItemPointerGetBlockNumber(&(itup->t_tid)) != ItemPointerGetBlockNumber(scantid)) {
+                result = (ItemPointerGetBlockNumber(scantid) < ItemPointerGetBlockNumber(&(itup->t_tid))) ? -1 : 1;
+            } else {
+                result = ItemPointerGetOffsetNumber(scantid) - ItemPointerGetOffsetNumber(&(itup->t_tid));
+            }
+        }
+        return result;
+}
+
 /*
  *	_bt_first() -- Find the first item in a scan.
  *
@@ -980,8 +1086,8 @@ _bt_first(IndexScanDesc scan, ScanDirection dir)
 	 * Use the manufactured insertion scan key to descend the tree and
 	 * position ourselves on the target leaf page.
 	 */
-	stack = _bt_search(rel, keysCount, scankeys, nextkey, &buf, BT_READ,
-					   scan->xs_snapshot);
+	stack = _bt_search(rel, keysCount, scankeys, NULL, nextkey, &buf, BT_READ,
+					   scan->xs_snapshot, NULL);
 
 	/* don't need to keep the stack around... */
 	_bt_freestack(stack);
@@ -1014,7 +1120,7 @@ _bt_first(IndexScanDesc scan, ScanDirection dir)
 	Assert(so->markItemIndex == -1);
 
 	/* position to the precise item on the page */
-	offnum = _bt_binsrch(rel, buf, keysCount, scankeys, nextkey);
+	offnum = _bt_binsrch(rel, buf, keysCount, scankeys, NULL, nextkey, NULL);
 
 	/*
 	 * If nextkey = false, we are positioned at the first item >= scan key, or
@@ -1637,6 +1743,10 @@ _bt_get_endpoint(Relation rel, uint32 level, bool rightmost,
 			offnum = P_FIRSTDATAKEY(opaque);
 
 		itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, offnum));
+	        if (!P_ISLEAF(opaque) && IndexTupleSize(itup) > sizeof(IndexTupleData)) {
+        	    /* Skip leaf header copy */
+	            itup++;
+        	}
 		blkno = ItemPointerGetBlockNumber(&(itup->t_tid));
 
 		buf = _bt_relandgetbuf(rel, buf, blkno, BT_READ);
diff --git a/src/backend/access/nbtree/nbtsort.c b/src/backend/access/nbtree/nbtsort.c
index 99a014e..2cebf87 100644
--- a/src/backend/access/nbtree/nbtsort.c
+++ b/src/backend/access/nbtree/nbtsort.c
@@ -108,6 +108,7 @@ typedef struct BTPageState
 	Page		btps_page;		/* workspace for page building */
 	BlockNumber btps_blkno;		/* block # to write this page at */
 	IndexTuple	btps_minkey;	/* copy of minimum key (first item) on page */
+        ItemPointerData btps_minkey_tid;/* copy of minimum key's original tid, as in the leaf page */
 	OffsetNumber btps_lastoff;	/* last item offset loaded */
 	uint32		btps_level;		/* tree level (0 = leaf) */
 	Size		btps_full;		/* "full" if less than this much free space */
@@ -132,9 +133,10 @@ static Page _bt_blnewpage(uint32 level);
 static BTPageState *_bt_pagestate(BTWriteState *wstate, uint32 level);
 static void _bt_slideleft(Page page);
 static void _bt_sortaddtup(Page page, Size itemsize,
-			   IndexTuple itup, OffsetNumber itup_off);
+			   IndexTuple itup, OffsetNumber itup_off, 
+                           ItemPointer leaf_tid);
 static void _bt_buildadd(BTWriteState *wstate, BTPageState *state,
-			 IndexTuple itup);
+			 IndexTuple itup, ItemPointer leaf_tid);
 static void _bt_uppershutdown(BTWriteState *wstate, BTPageState *state);
 static void _bt_load(BTWriteState *wstate,
 		 BTSpool *btspool, BTSpool *btspool2);
@@ -397,10 +399,11 @@ static void
 _bt_sortaddtup(Page page,
 			   Size itemsize,
 			   IndexTuple itup,
-			   OffsetNumber itup_off)
+			   OffsetNumber itup_off,
+                           ItemPointer leaf_tid)
 {
 	BTPageOpaque opaque = (BTPageOpaque) PageGetSpecialPointer(page);
-	IndexTupleData trunctuple;
+	IndexTupleData trunctuple, inner_header;
 
 	if (!P_ISLEAF(opaque) && itup_off == P_FIRSTKEY)
 	{
@@ -410,9 +413,17 @@ _bt_sortaddtup(Page page,
 		itemsize = sizeof(IndexTupleData);
 	}
 
-	if (PageAddItem(page, (Item) itup, itemsize, itup_off,
-					false, false) == InvalidOffsetNumber)
-		elog(ERROR, "failed to add item to the index page");
+        if (!P_ISLEAF(opaque)) {
+                ItemPointerCopy(leaf_tid, &(inner_header.t_tid));
+                inner_header.t_info = IndexTupleDSize(*itup) + sizeof(inner_header);
+		if (PageAddItemWithHeader(page, (Item) itup, itemsize, (Item) &inner_header, sizeof(inner_header), itup_off,
+						false, false) == InvalidOffsetNumber)
+			elog(ERROR, "failed to add item to the index page");
+	} else {
+		if (PageAddItem(page, (Item) itup, itemsize, itup_off,
+						false, false) == InvalidOffsetNumber)
+			elog(ERROR, "failed to add item to the index page");
+	}
 }
 
 /*----------
@@ -449,13 +460,13 @@ _bt_sortaddtup(Page page,
  *----------
  */
 static void
-_bt_buildadd(BTWriteState *wstate, BTPageState *state, IndexTuple itup)
+_bt_buildadd(BTWriteState *wstate, BTPageState *state, IndexTuple itup, ItemPointer leaf_tid)
 {
 	Page		npage;
 	BlockNumber nblkno;
 	OffsetNumber last_off;
 	Size		pgspc;
-	Size		itupsz;
+	Size		itupsz, reservesz;
 
 	/*
 	 * This is a handy place to check for cancel interrupts during the btree
@@ -470,6 +481,7 @@ _bt_buildadd(BTWriteState *wstate, BTPageState *state, IndexTuple itup)
 	pgspc = PageGetFreeSpace(npage);
 	itupsz = IndexTupleDSize(*itup);
 	itupsz = MAXALIGN(itupsz);
+	reservesz = (P_ISLEAF((BTPageOpaque) PageGetSpecialPointer(npage))) ? sizeof(IndexTupleData) : 0;
 
 	/*
 	 * Check whether the item can fit on a btree page at all. (Eventually, we
@@ -482,7 +494,7 @@ _bt_buildadd(BTWriteState *wstate, BTPageState *state, IndexTuple itup)
 	 * oversize items being inserted into an already-existing index. But
 	 * during creation of an index, we don't go through there.
 	 */
-	if (itupsz > BTMaxItemSize(npage))
+	if ((itupsz+reservesz) > BTMaxItemSize(npage))
 		ereport(ERROR,
 				(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
 			errmsg("index row size %zu exceeds maximum %zu for index \"%s\"",
@@ -510,6 +522,8 @@ _bt_buildadd(BTWriteState *wstate, BTPageState *state, IndexTuple itup)
 		ItemId		ii;
 		ItemId		hii;
 		IndexTuple	oitup;
+                Size            oituplen;
+                ItemPointerData oituptid;
 
 		/* Create new page of same level */
 		npage = _bt_blnewpage(state->btps_level);
@@ -527,7 +541,15 @@ _bt_buildadd(BTWriteState *wstate, BTPageState *state, IndexTuple itup)
 		Assert(last_off > P_FIRSTKEY);
 		ii = PageGetItemId(opage, last_off);
 		oitup = (IndexTuple) PageGetItem(opage, ii);
-		_bt_sortaddtup(npage, ItemIdGetLength(ii), oitup, P_FIRSTKEY);
+                oituplen = ItemIdGetLength(ii);
+                ItemPointerCopy(&(oitup->t_tid), &oituptid);
+                if (state->btps_level > 0) {
+                    /* Skip the leaf header */
+                    Assert(oituplen >= (2 * sizeof(IndexTupleData)));
+                    oituplen -= sizeof(IndexTupleData);
+                    oitup++;
+                }
+		_bt_sortaddtup(npage, oituplen, oitup, P_FIRSTKEY, &oituptid);
 
 		/*
 		 * Move 'last' into the high key position on opage
@@ -546,8 +568,9 @@ _bt_buildadd(BTWriteState *wstate, BTPageState *state, IndexTuple itup)
 			state->btps_next = _bt_pagestate(wstate, state->btps_level + 1);
 
 		Assert(state->btps_minkey != NULL);
+                ItemPointerCopy(&(state->btps_minkey_tid), &oituptid);
 		ItemPointerSet(&(state->btps_minkey->t_tid), oblkno, P_HIKEY);
-		_bt_buildadd(wstate, state->btps_next, state->btps_minkey);
+		_bt_buildadd(wstate, state->btps_next, state->btps_minkey, &oituptid);
 		pfree(state->btps_minkey);
 
 		/*
@@ -556,6 +579,7 @@ _bt_buildadd(BTWriteState *wstate, BTPageState *state, IndexTuple itup)
 		 * level.
 		 */
 		state->btps_minkey = CopyIndexTuple(oitup);
+		ItemPointerCopy(&oituptid, &(state->btps_minkey_tid));
 
 		/*
 		 * Set the sibling links for both pages.
@@ -591,13 +615,14 @@ _bt_buildadd(BTWriteState *wstate, BTPageState *state, IndexTuple itup)
 	{
 		Assert(state->btps_minkey == NULL);
 		state->btps_minkey = CopyIndexTuple(itup);
+                ItemPointerCopy(leaf_tid, &(state->btps_minkey_tid));
 	}
 
 	/*
 	 * Add the new item into the current page.
 	 */
 	last_off = OffsetNumberNext(last_off);
-	_bt_sortaddtup(npage, itupsz, itup, last_off);
+	_bt_sortaddtup(npage, itupsz, itup, last_off, leaf_tid);
 
 	state->btps_page = npage;
 	state->btps_blkno = nblkno;
@@ -644,7 +669,7 @@ _bt_uppershutdown(BTWriteState *wstate, BTPageState *state)
 		{
 			Assert(s->btps_minkey != NULL);
 			ItemPointerSet(&(s->btps_minkey->t_tid), blkno, P_HIKEY);
-			_bt_buildadd(wstate, s->btps_next, s->btps_minkey);
+			_bt_buildadd(wstate, s->btps_next, s->btps_minkey, &(s->btps_minkey_tid));
 			pfree(s->btps_minkey);
 			s->btps_minkey = NULL;
 		}
@@ -774,7 +799,7 @@ _bt_load(BTWriteState *wstate, BTSpool *btspool, BTSpool *btspool2)
 
 			if (load1)
 			{
-				_bt_buildadd(wstate, state, itup);
+				_bt_buildadd(wstate, state, itup, &(itup->t_tid));
 				if (should_free)
 					pfree(itup);
 				itup = tuplesort_getindextuple(btspool->sortstate,
@@ -782,7 +807,7 @@ _bt_load(BTWriteState *wstate, BTSpool *btspool, BTSpool *btspool2)
 			}
 			else
 			{
-				_bt_buildadd(wstate, state, itup2);
+				_bt_buildadd(wstate, state, itup2, &(itup->t_tid));
 				if (should_free2)
 					pfree(itup2);
 				itup2 = tuplesort_getindextuple(btspool2->sortstate,
@@ -801,7 +826,7 @@ _bt_load(BTWriteState *wstate, BTSpool *btspool, BTSpool *btspool2)
 			if (state == NULL)
 				state = _bt_pagestate(wstate, 0);
 
-			_bt_buildadd(wstate, state, itup);
+			_bt_buildadd(wstate, state, itup, &(itup->t_tid));
 			if (should_free)
 				pfree(itup);
 		}
diff --git a/src/backend/access/nbtree/nbtutils.c b/src/backend/access/nbtree/nbtutils.c
index 5d335c7..3e4030a 100644
--- a/src/backend/access/nbtree/nbtutils.c
+++ b/src/backend/access/nbtree/nbtutils.c
@@ -161,11 +161,11 @@ _bt_freeskey(ScanKey skey)
  * free a retracement stack made by _bt_search.
  */
 void
-_bt_freestack(BTStack stack)
+_bt_freestack_to_base(BTStack stack, BTStack base)
 {
 	BTStack		ostack;
 
-	while (stack != NULL)
+	while (stack != base)
 	{
 		ostack = stack;
 		stack = stack->bts_parent;
@@ -173,6 +173,14 @@ _bt_freestack(BTStack stack)
 	}
 }
 
+/*
+ * free a retracement stack made by _bt_search.
+ */
+void
+_bt_freestack(BTStack stack)
+{
+	_bt_freestack_to_base(stack, NULL);
+}
 
 /*
  *	_bt_preprocess_array_keys() -- Preprocess SK_SEARCHARRAY scan keys
@@ -1363,6 +1371,7 @@ _bt_checkkeys(IndexScanDesc scan,
 	IndexTuple	tuple;
 	TupleDesc	tupdesc;
 	BTScanOpaque so;
+        BTPageOpaque opaque;
 	int			keysz;
 	int			ikey;
 	ScanKey		key;
@@ -1402,7 +1411,9 @@ _bt_checkkeys(IndexScanDesc scan,
 	else
 		tuple_alive = true;
 
+	opaque = (BTPageOpaque) PageGetSpecialPointer(page);
 	tuple = (IndexTuple) PageGetItem(page, iid);
+        if (!P_ISLEAF(opaque) && IndexTupleSize(tuple) > sizeof(IndexTupleData)) tuple++;
 
 	tupdesc = RelationGetDescr(scan->indexRelation);
 	so = (BTScanOpaque) scan->opaque;
@@ -1798,6 +1809,7 @@ _bt_killitems(IndexScanDesc scan)
 		{
 			ItemId		iid = PageGetItemId(page, offnum);
 			IndexTuple	ituple = (IndexTuple) PageGetItem(page, iid);
+                        if (!P_ISLEAF(opaque) && IndexTupleSize(ituple) > sizeof(IndexTupleData)) ituple++;
 
 			if (ItemPointerEquals(&ituple->t_tid, &kitem->heapTid))
 			{
diff --git a/src/backend/access/nbtree/nbtxlog.c b/src/backend/access/nbtree/nbtxlog.c
index c536e22..e9a53eb 100644
--- a/src/backend/access/nbtree/nbtxlog.c
+++ b/src/backend/access/nbtree/nbtxlog.c
@@ -753,10 +753,14 @@ btree_xlog_mark_page_halfdead(uint8 info, XLogReaderState *record)
 		nextoffset = OffsetNumberNext(poffset);
 		itemid = PageGetItemId(page, nextoffset);
 		itup = (IndexTuple) PageGetItem(page, itemid);
+                if (IndexTupleSize(itup) > sizeof(IndexTupleData))
+			itup++;
 		rightsib = ItemPointerGetBlockNumber(&itup->t_tid);
 
 		itemid = PageGetItemId(page, poffset);
 		itup = (IndexTuple) PageGetItem(page, itemid);
+                if (IndexTupleSize(itup) > sizeof(IndexTupleData))
+			itup++;
 		ItemPointerSet(&(itup->t_tid), rightsib, P_HIKEY);
 		nextoffset = OffsetNumberNext(poffset);
 		PageIndexTupleDelete(page, nextoffset);
diff --git a/src/backend/storage/page/bufpage.c b/src/backend/storage/page/bufpage.c
index f2a07f2..8fc2439 100644
--- a/src/backend/storage/page/bufpage.c
+++ b/src/backend/storage/page/bufpage.c
@@ -174,12 +174,17 @@ PageIsVerified(Page page, BlockNumber blkno)
  *	If flag PAI_ALLOW_FAR_OFFSET is not set, we disallow placing items
  *	beyond one past the last existing item.
  *
+ *      If headerSize != 0, header is prepended to the added item, in which
+ *      case headerSize must be MAXALIGN or the operation will fail.
+ *
  *	!!! EREPORT(ERROR) IS DISALLOWED HERE !!!
  */
 OffsetNumber
 PageAddItemExtended(Page page,
 					Item item,
 					Size size,
+                                        Item header,
+                                        Size headerSize,
 					OffsetNumber offsetNumber,
 					int flags)
 {
@@ -276,6 +281,12 @@ PageAddItemExtended(Page page,
 		elog(WARNING, "can't put more than MaxHeapTuplesPerPage items in a heap page");
 		return InvalidOffsetNumber;
 	}
+        
+        /* Reject nonzero non-maxaligned headers */
+        if (headerSize > 0 && MAXALIGN(headerSize) != headerSize) {
+               	elog(WARNING, "misaligned header data (%d)", headerSize);
+		return InvalidOffsetNumber;
+	}
 
 	/*
 	 * Compute new lower and upper pointers for page, see if it'll fit.
@@ -291,7 +302,7 @@ PageAddItemExtended(Page page,
 	else
 		lower = phdr->pd_lower;
 
-	alignedSize = MAXALIGN(size);
+	alignedSize = MAXALIGN(size) + headerSize;
 
 	upper = (int) phdr->pd_upper - (int) alignedSize;
 
@@ -323,9 +334,14 @@ PageAddItemExtended(Page page,
 	 * this as an error, but it is safe to ignore.
 	 */
 	VALGRIND_CHECK_MEM_IS_DEFINED(item, size);
+        if (headerSize > 0) {
+	        VALGRIND_CHECK_MEM_IS_DEFINED(header, headerSize);
+        }
 
 	/* copy the item's data onto the page */
-	memcpy((char *) page + upper, item, size);
+        if (headerSize > 0)
+	        memcpy((char *) page + upper, header, headerSize);
+	memcpy((char *) page + upper + headerSize, item, size);
 
 	/* adjust page header */
 	phdr->pd_lower = (LocationIndex) lower;
@@ -350,7 +366,28 @@ OffsetNumber
 PageAddItem(Page page, Item item, Size size, OffsetNumber offsetNumber,
 			bool overwrite, bool is_heap)
 {
-	return PageAddItemExtended(page, item, size, offsetNumber,
+	return PageAddItemExtended(page, item, size, item, 0, offsetNumber,
+							   overwrite ? PAI_OVERWRITE : 0 |
+							   is_heap ? PAI_IS_HEAP : 0);
+}
+
+/*
+ *	PageAddItemWithHeader
+ *
+ *	Add an item to a page and prepend a header to it.  Return value is offset 
+ *      at which it was inserted, or InvalidOffsetNumber if the item is not inserted for
+ *	any reason.
+ *
+ *	Passing the 'overwrite' and 'is_heap' parameters as true causes the
+ *	PAI_OVERWRITE and PAI_IS_HEAP flags to be set, respectively.
+ *
+ *	!!! EREPORT(ERROR) IS DISALLOWED HERE !!!
+ */
+OffsetNumber
+PageAddItemWithHeader(Page page, Item item, Size size, Item header, Size headerSize, 
+                      OffsetNumber offsetNumber, bool overwrite, bool is_heap)
+{
+	return PageAddItemExtended(page, item, size, header, headerSize, offsetNumber,
 							   overwrite ? PAI_OVERWRITE : 0 |
 							   is_heap ? PAI_IS_HEAP : 0);
 }
diff --git a/src/backend/utils/sort/tuplesort.c b/src/backend/utils/sort/tuplesort.c
index 510565c..a15af00 100644
--- a/src/backend/utils/sort/tuplesort.c
+++ b/src/backend/utils/sort/tuplesort.c
@@ -4456,9 +4456,9 @@ comparetup_index_btree(const SortTuple *a, const SortTuple *b,
 	}
 
 	/*
-	 * If key values are equal, we sort on ItemPointer.  This does not affect
-	 * validity of the finished index, but it may be useful to have index
-	 * scans in physical order.
+	 * If key values are equal, we sort on ItemPointer.  This is required
+	 * to ensure validity of the finished index, since ItemPointers are
+         * used as an implicit last sort column in a btree index.
 	 */
 	{
 		BlockNumber blk1 = ItemPointerGetBlockNumber(&tuple1->t_tid);
diff --git a/src/include/access/itup.h b/src/include/access/itup.h
index 8350fa0..ccd3fbb 100644
--- a/src/include/access/itup.h
+++ b/src/include/access/itup.h
@@ -147,5 +147,6 @@ extern Datum nocache_index_getattr(IndexTuple tup, int attnum,
 extern void index_deform_tuple(IndexTuple tup, TupleDesc tupleDescriptor,
 				   Datum *values, bool *isnull);
 extern IndexTuple CopyIndexTuple(IndexTuple source);
+extern IndexTuple CopyIndexTupleAddHeader(IndexTuple source, Size headerSize);
 
 #endif   /* ITUP_H */
diff --git a/src/include/access/nbtree.h b/src/include/access/nbtree.h
index c580f51..1668476 100644
--- a/src/include/access/nbtree.h
+++ b/src/include/access/nbtree.h
@@ -472,6 +472,11 @@ typedef struct xl_btree_newroot
  *	Yao's update algorithm guarantees that under no circumstances can
  *	our private stack give us an irredeemably bad picture up the tree.
  *	Again, see the paper for details.
+ *
+ *      bts_flags can contain hints useful for re-executing searches during
+ *      unique checks. BTS_EQUAL_KEY marks a descent into a page whose minkey
+ *      compares equal to the scankey, which is a useful reference for finding
+ *      the first equal key.
  */
 
 typedef struct BTStackData
@@ -480,10 +485,15 @@ typedef struct BTStackData
 	OffsetNumber bts_offset;
 	IndexTupleData bts_btentry;
 	struct BTStackData *bts_parent;
+        uint16 bts_flags;
 } BTStackData;
 
+#define BTS_EQUAL_KEY 1
+
 typedef BTStackData *BTStack;
 
+#define BTStackIsEqualKey(stack) ((stack->bts_flags & BTS_EQUAL_KEY) != 0)
+
 /*
  * BTScanOpaqueData is the btree-private state needed for an indexscan.
  * This consists of preprocessed scan keys (see _bt_preprocess_keys() for
@@ -710,15 +720,17 @@ extern int	_bt_pagedel(Relation rel, Buffer buf);
  * prototypes for functions in nbtsearch.c
  */
 extern BTStack _bt_search(Relation rel,
-		   int keysz, ScanKey scankey, bool nextkey,
-		   Buffer *bufP, int access, Snapshot snapshot);
+		   int keysz, ScanKey scankey, ItemPointer scantid, bool nextkey,
+		   Buffer *bufP, int access, Snapshot snapshot, BTStack stack_top);
 extern Buffer _bt_moveright(Relation rel, Buffer buf, int keysz,
-			  ScanKey scankey, bool nextkey, bool forupdate, BTStack stack,
+			  ScanKey scankey, ItemPointer scantid, bool nextkey, bool forupdate, BTStack stack,
 			  int access, Snapshot snapshot);
 extern OffsetNumber _bt_binsrch(Relation rel, Buffer buf, int keysz,
-			ScanKey scankey, bool nextkey);
+			ScanKey scankey, ItemPointer scantid, bool nextkey, bool *isequal);
 extern int32 _bt_compare(Relation rel, int keysz, ScanKey scankey,
 			Page page, OffsetNumber offnum);
+extern int32 _bt_compare_tid(Relation rel, int keysz, ScanKey scankey, ItemPointer scantid,
+			Page page, OffsetNumber offnum);
 extern bool _bt_first(IndexScanDesc scan, ScanDirection dir);
 extern bool _bt_next(IndexScanDesc scan, ScanDirection dir);
 extern Buffer _bt_get_endpoint(Relation rel, uint32 level, bool rightmost,
@@ -731,6 +743,7 @@ extern ScanKey _bt_mkscankey(Relation rel, IndexTuple itup);
 extern ScanKey _bt_mkscankey_nodata(Relation rel);
 extern void _bt_freeskey(ScanKey skey);
 extern void _bt_freestack(BTStack stack);
+extern void _bt_freestack_to_base(BTStack stack, BTStack base);
 extern void _bt_preprocess_array_keys(IndexScanDesc scan);
 extern void _bt_start_array_keys(IndexScanDesc scan, ScanDirection dir);
 extern bool _bt_advance_array_keys(IndexScanDesc scan, ScanDirection dir);
diff --git a/src/include/storage/bufpage.h b/src/include/storage/bufpage.h
index 15cebfc..3922614 100644
--- a/src/include/storage/bufpage.h
+++ b/src/include/storage/bufpage.h
@@ -415,7 +415,9 @@ extern void PageInit(Page page, Size pageSize, Size specialSize);
 extern bool PageIsVerified(Page page, BlockNumber blkno);
 extern OffsetNumber PageAddItem(Page page, Item item, Size size,
 			OffsetNumber offsetNumber, bool overwrite, bool is_heap);
-extern OffsetNumber PageAddItemExtended(Page page, Item item, Size size,
+extern OffsetNumber PageAddItemWithHeader(Page page, Item item, Size size, Item header, Size headerSize,
+			OffsetNumber offsetNumber, bool overwrite, bool is_heap);
+extern OffsetNumber PageAddItemExtended(Page page, Item item, Size size, Item header, Size headerSize,
 					OffsetNumber offsetNumber, int flags);
 extern Page PageGetTempPage(Page page);
 extern Page PageGetTempPageCopy(Page page);
diff --git a/src/test/regress/expected/aggregates.out b/src/test/regress/expected/aggregates.out
index 14646c6..ff8c63a 100644
--- a/src/test/regress/expected/aggregates.out
+++ b/src/test/regress/expected/aggregates.out
@@ -866,9 +866,9 @@ select distinct min(f1), max(f1) from minmaxtest;
 
 drop table minmaxtest cascade;
 NOTICE:  drop cascades to 3 other objects
-DETAIL:  drop cascades to table minmaxtest1
+DETAIL:  drop cascades to table minmaxtest3
 drop cascades to table minmaxtest2
-drop cascades to table minmaxtest3
+drop cascades to table minmaxtest1
 -- check for correct detection of nested-aggregate errors
 select max(min(unique1)) from tenk1;
 ERROR:  aggregate function calls cannot be nested
diff --git a/src/test/regress/expected/alter_generic.out b/src/test/regress/expected/alter_generic.out
index b01be59..f7bff60 100644
--- a/src/test/regress/expected/alter_generic.out
+++ b/src/test/regress/expected/alter_generic.out
@@ -640,43 +640,43 @@ DROP LANGUAGE alt_lang4 CASCADE;
 ERROR:  language "alt_lang4" does not exist
 DROP SCHEMA alt_nsp1 CASCADE;
 NOTICE:  drop cascades to 26 other objects
-DETAIL:  drop cascades to function alt_func3(integer)
-drop cascades to function alt_agg3(integer)
-drop cascades to function alt_func4(integer)
-drop cascades to function alt_func2(integer)
-drop cascades to function alt_agg4(integer)
-drop cascades to function alt_agg2(integer)
-drop cascades to conversion alt_conv3
-drop cascades to conversion alt_conv4
-drop cascades to conversion alt_conv2
-drop cascades to operator @+@(integer,integer)
-drop cascades to operator @-@(integer,integer)
-drop cascades to operator family alt_opf3 for access method hash
-drop cascades to operator family alt_opc1 for access method hash
-drop cascades to operator family alt_opc2 for access method hash
-drop cascades to operator family alt_opf4 for access method hash
-drop cascades to operator family alt_opf2 for access method hash
-drop cascades to text search dictionary alt_ts_dict3
-drop cascades to text search dictionary alt_ts_dict4
-drop cascades to text search dictionary alt_ts_dict2
-drop cascades to text search configuration alt_ts_conf3
-drop cascades to text search configuration alt_ts_conf4
-drop cascades to text search configuration alt_ts_conf2
-drop cascades to text search template alt_ts_temp3
-drop cascades to text search template alt_ts_temp2
+DETAIL:  drop cascades to text search parser alt_ts_prs2
 drop cascades to text search parser alt_ts_prs3
-drop cascades to text search parser alt_ts_prs2
+drop cascades to text search template alt_ts_temp2
+drop cascades to text search template alt_ts_temp3
+drop cascades to text search configuration alt_ts_conf2
+drop cascades to text search configuration alt_ts_conf4
+drop cascades to text search configuration alt_ts_conf3
+drop cascades to text search dictionary alt_ts_dict2
+drop cascades to text search dictionary alt_ts_dict4
+drop cascades to text search dictionary alt_ts_dict3
+drop cascades to operator family alt_opf2 for access method hash
+drop cascades to operator family alt_opf4 for access method hash
+drop cascades to operator family alt_opc2 for access method hash
+drop cascades to operator family alt_opc1 for access method hash
+drop cascades to operator family alt_opf3 for access method hash
+drop cascades to operator @-@(integer,integer)
+drop cascades to operator @+@(integer,integer)
+drop cascades to conversion alt_conv2
+drop cascades to conversion alt_conv4
+drop cascades to conversion alt_conv3
+drop cascades to function alt_agg2(integer)
+drop cascades to function alt_agg4(integer)
+drop cascades to function alt_func2(integer)
+drop cascades to function alt_func4(integer)
+drop cascades to function alt_agg3(integer)
+drop cascades to function alt_func3(integer)
 DROP SCHEMA alt_nsp2 CASCADE;
 NOTICE:  drop cascades to 9 other objects
-DETAIL:  drop cascades to function alt_nsp2.alt_func2(integer)
-drop cascades to function alt_nsp2.alt_agg2(integer)
-drop cascades to conversion alt_conv2
-drop cascades to operator alt_nsp2.@-@(integer,integer)
-drop cascades to operator family alt_nsp2.alt_opf2 for access method hash
-drop cascades to text search dictionary alt_ts_dict2
-drop cascades to text search configuration alt_ts_conf2
+DETAIL:  drop cascades to text search parser alt_ts_prs2
 drop cascades to text search template alt_ts_temp2
-drop cascades to text search parser alt_ts_prs2
+drop cascades to text search configuration alt_ts_conf2
+drop cascades to text search dictionary alt_ts_dict2
+drop cascades to operator family alt_nsp2.alt_opf2 for access method hash
+drop cascades to operator alt_nsp2.@-@(integer,integer)
+drop cascades to conversion alt_conv2
+drop cascades to function alt_nsp2.alt_agg2(integer)
+drop cascades to function alt_nsp2.alt_func2(integer)
 DROP USER regress_alter_user1;
 DROP USER regress_alter_user2;
 DROP USER regress_alter_user3;
diff --git a/src/test/regress/expected/alter_table.out b/src/test/regress/expected/alter_table.out
index 3232cda..d6ac614 100644
--- a/src/test/regress/expected/alter_table.out
+++ b/src/test/regress/expected/alter_table.out
@@ -2336,19 +2336,19 @@ select alter2.plus1(41);
 -- clean up
 drop schema alter2 cascade;
 NOTICE:  drop cascades to 13 other objects
-DETAIL:  drop cascades to table alter2.t1
-drop cascades to view alter2.v1
-drop cascades to function alter2.plus1(integer)
-drop cascades to type alter2.posint
-drop cascades to operator family alter2.ctype_hash_ops for access method hash
+DETAIL:  drop cascades to text search template tmpl
+drop cascades to text search dictionary dict
+drop cascades to text search parser prs
+drop cascades to text search configuration cfg
+drop cascades to conversion ascii_to_utf8
 drop cascades to type alter2.ctype
 drop cascades to function alter2.same(alter2.ctype,alter2.ctype)
 drop cascades to operator alter2.=(alter2.ctype,alter2.ctype)
-drop cascades to conversion ascii_to_utf8
-drop cascades to text search parser prs
-drop cascades to text search configuration cfg
-drop cascades to text search template tmpl
-drop cascades to text search dictionary dict
+drop cascades to operator family alter2.ctype_hash_ops for access method hash
+drop cascades to type alter2.posint
+drop cascades to function alter2.plus1(integer)
+drop cascades to table alter2.t1
+drop cascades to view alter2.v1
 --
 -- composite types
 --
diff --git a/src/test/regress/expected/collate.out b/src/test/regress/expected/collate.out
index f076a4d..2a3c70c 100644
--- a/src/test/regress/expected/collate.out
+++ b/src/test/regress/expected/collate.out
@@ -650,20 +650,6 @@ SELECT collation for ((SELECT b FROM collate_test1 LIMIT 1));
 -- trying to run any platform-specific collation tests later, so we
 -- must get rid of them.
 --
+--begin-word-ignore--drop cascades to
 DROP SCHEMA collate_tests CASCADE;
-NOTICE:  drop cascades to 15 other objects
-DETAIL:  drop cascades to table collate_test1
-drop cascades to table collate_test_like
-drop cascades to table collate_test2
-drop cascades to type testdomain_p
-drop cascades to table collate_test4
-drop cascades to table collate_test5
-drop cascades to table collate_test10
-drop cascades to view collview1
-drop cascades to view collview2
-drop cascades to view collview3
-drop cascades to type testdomain
-drop cascades to function dup(anyelement)
-drop cascades to table collate_test20
-drop cascades to table collate_test21
-drop cascades to table collate_test22
+--end-ignore--
diff --git a/src/test/regress/expected/create_function_3.out b/src/test/regress/expected/create_function_3.out
index 7bb957b..b2327e1 100644
--- a/src/test/regress/expected/create_function_3.out
+++ b/src/test/regress/expected/create_function_3.out
@@ -220,24 +220,24 @@ SELECT routine_name, ordinal_position, parameter_name, parameter_default
 -- Cleanups
 DROP SCHEMA temp_func_test CASCADE;
 NOTICE:  drop cascades to 19 other objects
-DETAIL:  drop cascades to function functest_a_1(text,date)
-drop cascades to function functest_a_2(text[])
-drop cascades to function functest_a_3()
-drop cascades to function functest_b_1(integer)
-drop cascades to function functest_b_2(integer)
-drop cascades to function functest_b_3(integer)
-drop cascades to function functest_b_4(integer)
-drop cascades to function functext_c_1(integer)
-drop cascades to function functext_c_2(integer)
-drop cascades to function functext_c_3(integer)
-drop cascades to function functext_e_1(integer)
-drop cascades to function functext_e_2(integer)
-drop cascades to function functext_f_1(integer)
-drop cascades to function functext_f_2(integer)
-drop cascades to function functext_f_3(integer)
-drop cascades to function functext_f_4(integer)
-drop cascades to function functest_is_1(integer,integer,text)
+DETAIL:  drop cascades to function functest_is_3(integer)
 drop cascades to function functest_is_2(integer)
-drop cascades to function functest_is_3(integer)
+drop cascades to function functest_is_1(integer,integer,text)
+drop cascades to function functext_f_4(integer)
+drop cascades to function functext_f_3(integer)
+drop cascades to function functext_f_2(integer)
+drop cascades to function functext_f_1(integer)
+drop cascades to function functext_e_2(integer)
+drop cascades to function functext_e_1(integer)
+drop cascades to function functext_c_3(integer)
+drop cascades to function functext_c_2(integer)
+drop cascades to function functext_c_1(integer)
+drop cascades to function functest_b_4(integer)
+drop cascades to function functest_b_3(integer)
+drop cascades to function functest_b_2(integer)
+drop cascades to function functest_b_1(integer)
+drop cascades to function functest_a_3()
+drop cascades to function functest_a_2(text[])
+drop cascades to function functest_a_1(text,date)
 DROP USER regress_unpriv_user;
 RESET search_path;
diff --git a/src/test/regress/expected/dependency.out b/src/test/regress/expected/dependency.out
index 8e50f8f..8d31110 100644
--- a/src/test/regress/expected/dependency.out
+++ b/src/test/regress/expected/dependency.out
@@ -128,9 +128,9 @@ FROM pg_type JOIN pg_class c ON typrelid = c.oid WHERE typname = 'deptest_t';
 -- doesn't work: grant still exists
 DROP USER regress_dep_user1;
 ERROR:  role "regress_dep_user1" cannot be dropped because some objects depend on it
-DETAIL:  owner of default privileges on new relations belonging to role regress_dep_user1 in schema deptest
+DETAIL:  privileges for table deptest1
 privileges for database regression
-privileges for table deptest1
+owner of default privileges on new relations belonging to role regress_dep_user1 in schema deptest
 DROP OWNED BY regress_dep_user1;
 DROP USER regress_dep_user1;
 \set VERBOSITY terse
diff --git a/src/test/regress/expected/domain.out b/src/test/regress/expected/domain.out
index 41b70e2..8dcf10f 100644
--- a/src/test/regress/expected/domain.out
+++ b/src/test/regress/expected/domain.out
@@ -308,8 +308,8 @@ alter domain dnotnulltest drop not null;
 update domnotnull set col1 = null;
 drop domain dnotnulltest cascade;
 NOTICE:  drop cascades to 2 other objects
-DETAIL:  drop cascades to table domnotnull column col1
-drop cascades to table domnotnull column col2
+DETAIL:  drop cascades to table domnotnull column col2
+drop cascades to table domnotnull column col1
 -- Test ALTER DOMAIN .. DEFAULT ..
 create table domdeftest (col1 ddef1);
 insert into domdeftest default values;
diff --git a/src/test/regress/expected/event_trigger.out b/src/test/regress/expected/event_trigger.out
index e124552..0229462 100644
--- a/src/test/regress/expected/event_trigger.out
+++ b/src/test/regress/expected/event_trigger.out
@@ -137,9 +137,9 @@ ERROR:  event trigger "regress_event_trigger" does not exist
 -- should fail, regress_evt_user owns some objects
 drop role regress_evt_user;
 ERROR:  role "regress_evt_user" cannot be dropped because some objects depend on it
-DETAIL:  owner of event trigger regress_event_trigger3
+DETAIL:  owner of user mapping for regress_evt_user on server useless_server
 owner of default privileges on new relations belonging to role regress_evt_user
-owner of user mapping for regress_evt_user on server useless_server
+owner of event trigger regress_event_trigger3
 -- cleanup before next test
 -- these are all OK; the second one should emit a NOTICE
 drop event trigger if exists regress_event_trigger2;
@@ -226,14 +226,13 @@ CREATE EVENT TRIGGER regress_event_trigger_drop_objects ON sql_drop
 ALTER TABLE schema_one.table_one DROP COLUMN a;
 DROP SCHEMA schema_one, schema_two CASCADE;
 NOTICE:  drop cascades to 7 other objects
-DETAIL:  drop cascades to table schema_two.table_two
-drop cascades to table schema_two.table_three
-drop cascades to function schema_two.add(integer,integer)
+DETAIL:  drop cascades to function schema_two.add(integer,integer)
 drop cascades to function schema_two.newton(integer)
-drop cascades to table schema_one.table_one
-drop cascades to table schema_one."table two"
+drop cascades to table schema_two.table_three
+drop cascades to table schema_two.table_two
 drop cascades to table schema_one.table_three
-NOTICE:  table "schema_two_table_two" does not exist, skipping
+drop cascades to table schema_one."table two"
+drop cascades to table schema_one.table_one
 NOTICE:  table "audit_tbls_schema_two_table_three" does not exist, skipping
 ERROR:  object audit_tbls.schema_two_table_three of type table cannot be dropped
 CONTEXT:  PL/pgSQL function undroppable() line 14 at RAISE
@@ -242,61 +241,61 @@ PL/pgSQL function test_evtrig_dropped_objects() line 8 at EXECUTE
 DELETE FROM undroppable_objs WHERE object_identity = 'audit_tbls.schema_two_table_three';
 DROP SCHEMA schema_one, schema_two CASCADE;
 NOTICE:  drop cascades to 7 other objects
-DETAIL:  drop cascades to table schema_two.table_two
-drop cascades to table schema_two.table_three
-drop cascades to function schema_two.add(integer,integer)
+DETAIL:  drop cascades to function schema_two.add(integer,integer)
 drop cascades to function schema_two.newton(integer)
-drop cascades to table schema_one.table_one
-drop cascades to table schema_one."table two"
+drop cascades to table schema_two.table_three
+drop cascades to table schema_two.table_two
 drop cascades to table schema_one.table_three
-NOTICE:  table "schema_two_table_two" does not exist, skipping
+drop cascades to table schema_one."table two"
+drop cascades to table schema_one.table_one
 NOTICE:  table "audit_tbls_schema_two_table_three" does not exist, skipping
-NOTICE:  table "schema_one_table_one" does not exist, skipping
-NOTICE:  table "schema_one_table two" does not exist, skipping
+NOTICE:  table "schema_two_table_two" does not exist, skipping
 NOTICE:  table "schema_one_table_three" does not exist, skipping
+NOTICE:  table "schema_one_table two" does not exist, skipping
+NOTICE:  table "schema_one_table_one" does not exist, skipping
 ERROR:  object schema_one.table_three of type table cannot be dropped
 CONTEXT:  PL/pgSQL function undroppable() line 14 at RAISE
 DELETE FROM undroppable_objs WHERE object_identity = 'schema_one.table_three';
 DROP SCHEMA schema_one, schema_two CASCADE;
 NOTICE:  drop cascades to 7 other objects
-DETAIL:  drop cascades to table schema_two.table_two
-drop cascades to table schema_two.table_three
-drop cascades to function schema_two.add(integer,integer)
+DETAIL:  drop cascades to function schema_two.add(integer,integer)
 drop cascades to function schema_two.newton(integer)
-drop cascades to table schema_one.table_one
-drop cascades to table schema_one."table two"
+drop cascades to table schema_two.table_three
+drop cascades to table schema_two.table_two
 drop cascades to table schema_one.table_three
-NOTICE:  table "schema_two_table_two" does not exist, skipping
+drop cascades to table schema_one."table two"
+drop cascades to table schema_one.table_one
 NOTICE:  table "audit_tbls_schema_two_table_three" does not exist, skipping
-NOTICE:  table "schema_one_table_one" does not exist, skipping
-NOTICE:  table "schema_one_table two" does not exist, skipping
+NOTICE:  table "schema_two_table_two" does not exist, skipping
 NOTICE:  table "schema_one_table_three" does not exist, skipping
+NOTICE:  table "schema_one_table two" does not exist, skipping
+NOTICE:  table "schema_one_table_one" does not exist, skipping
 SELECT * FROM dropped_objects WHERE schema IS NULL OR schema <> 'pg_toast';
      type     |   schema   |               object                
 --------------+------------+-------------------------------------
  table column | schema_one | schema_one.table_one.a
  schema       |            | schema_two
- table        | schema_two | schema_two.table_two
- type         | schema_two | schema_two.table_two
- type         | schema_two | schema_two.table_two[]
+ function     | schema_two | schema_two.add(integer,integer)
+ aggregate    | schema_two | schema_two.newton(integer)
  table        | audit_tbls | audit_tbls.schema_two_table_three
  type         | audit_tbls | audit_tbls.schema_two_table_three
  type         | audit_tbls | audit_tbls.schema_two_table_three[]
  table        | schema_two | schema_two.table_three
  type         | schema_two | schema_two.table_three
  type         | schema_two | schema_two.table_three[]
- function     | schema_two | schema_two.add(integer,integer)
- aggregate    | schema_two | schema_two.newton(integer)
+ table        | schema_two | schema_two.table_two
+ type         | schema_two | schema_two.table_two
+ type         | schema_two | schema_two.table_two[]
  schema       |            | schema_one
- table        | schema_one | schema_one.table_one
- type         | schema_one | schema_one.table_one
- type         | schema_one | schema_one.table_one[]
- table        | schema_one | schema_one."table two"
- type         | schema_one | schema_one."table two"
- type         | schema_one | schema_one."table two"[]
  table        | schema_one | schema_one.table_three
  type         | schema_one | schema_one.table_three
  type         | schema_one | schema_one.table_three[]
+ table        | schema_one | schema_one."table two"
+ type         | schema_one | schema_one."table two"
+ type         | schema_one | schema_one."table two"[]
+ table        | schema_one | schema_one.table_one
+ type         | schema_one | schema_one.table_one
+ type         | schema_one | schema_one.table_one[]
 (23 rows)
 
 DROP OWNED BY regress_evt_user;
@@ -345,13 +344,13 @@ DROP INDEX evttrig.one_idx;
 NOTICE:  NORMAL: orig=t normal=f istemp=f type=index identity=evttrig.one_idx name={evttrig,one_idx} args={}
 DROP SCHEMA evttrig CASCADE;
 NOTICE:  drop cascades to 2 other objects
-DETAIL:  drop cascades to table evttrig.one
-drop cascades to table evttrig.two
+DETAIL:  drop cascades to table evttrig.two
+drop cascades to table evttrig.one
 NOTICE:  NORMAL: orig=t normal=f istemp=f type=schema identity=evttrig name={evttrig} args={}
+NOTICE:  NORMAL: orig=f normal=t istemp=f type=table identity=evttrig.two name={evttrig,two} args={}
 NOTICE:  NORMAL: orig=f normal=t istemp=f type=table identity=evttrig.one name={evttrig,one} args={}
 NOTICE:  NORMAL: orig=f normal=t istemp=f type=sequence identity=evttrig.one_col_a_seq name={evttrig,one_col_a_seq} args={}
 NOTICE:  NORMAL: orig=f normal=t istemp=f type=default value identity=for evttrig.one.col_a name={evttrig,one,col_a} args={}
-NOTICE:  NORMAL: orig=f normal=t istemp=f type=table identity=evttrig.two name={evttrig,two} args={}
 DROP TABLE a_temp_tbl;
 NOTICE:  NORMAL: orig=t normal=f istemp=t type=table identity=pg_temp.a_temp_tbl name={pg_temp,a_temp_tbl} args={}
 DROP EVENT TRIGGER regress_event_trigger_report_dropped;
diff --git a/src/test/regress/expected/foreign_data.out b/src/test/regress/expected/foreign_data.out
index d6c1900..507cce4 100644
--- a/src/test/regress/expected/foreign_data.out
+++ b/src/test/regress/expected/foreign_data.out
@@ -419,8 +419,8 @@ ALTER SERVER s1 OWNER TO regress_test_indirect;
 RESET ROLE;
 DROP ROLE regress_test_indirect;                            -- ERROR
 ERROR:  role "regress_test_indirect" cannot be dropped because some objects depend on it
-DETAIL:  owner of server s1
-privileges for foreign-data wrapper foo
+DETAIL:  privileges for foreign-data wrapper foo
+owner of server s1
 \des+
                                                                                  List of foreign servers
  Name |           Owner           | Foreign-data wrapper |                   Access privileges                   |  Type  | Version |             FDW Options              | Description 
@@ -1181,8 +1181,8 @@ GRANT USAGE ON FOREIGN SERVER s9 TO regress_test_role;
 CREATE USER MAPPING FOR current_user SERVER s9;
 DROP SERVER s9 CASCADE;
 NOTICE:  drop cascades to 2 other objects
-DETAIL:  drop cascades to user mapping for public on server s9
-drop cascades to user mapping for regress_unprivileged_role on server s9
+DETAIL:  drop cascades to user mapping for regress_unprivileged_role on server s9
+drop cascades to user mapping for public on server s9
 RESET ROLE;
 CREATE SERVER s9 FOREIGN DATA WRAPPER foo;
 GRANT USAGE ON FOREIGN SERVER s9 TO regress_unprivileged_role;
@@ -1529,15 +1529,15 @@ Inherits: pt1
 Child tables: ct3,
               ft3
 
+--begin-word-ignore--depends on foreign table
 DROP FOREIGN TABLE ft2; -- ERROR
 ERROR:  cannot drop foreign table ft2 because other objects depend on it
-DETAIL:  table ct3 depends on foreign table ft2
-foreign table ft3 depends on foreign table ft2
 HINT:  Use DROP ... CASCADE to drop the dependent objects too.
+--end-ignore--
+--begin-word-ignore-- table
 DROP FOREIGN TABLE ft2 CASCADE;
 NOTICE:  drop cascades to 2 other objects
-DETAIL:  drop cascades to table ct3
-drop cascades to foreign table ft3
+--end-ignore--
 CREATE FOREIGN TABLE ft2 (
 	c1 integer NOT NULL,
 	c2 text,
@@ -1755,9 +1755,9 @@ NOTICE:  drop cascades to user mapping for regress_test_role on server s5
 DROP SCHEMA foreign_schema CASCADE;
 DROP ROLE regress_test_role;                                -- ERROR
 ERROR:  role "regress_test_role" cannot be dropped because some objects depend on it
-DETAIL:  privileges for server s4
+DETAIL:  owner of user mapping for regress_test_role on server s6
 privileges for foreign-data wrapper foo
-owner of user mapping for regress_test_role on server s6
+privileges for server s4
 DROP SERVER t1 CASCADE;
 NOTICE:  drop cascades to user mapping for public on server t1
 DROP USER MAPPING FOR regress_test_role SERVER s6;
@@ -1769,8 +1769,8 @@ NOTICE:  drop cascades to 5 other objects
 \set VERBOSITY default
 DROP SERVER s8 CASCADE;
 NOTICE:  drop cascades to 2 other objects
-DETAIL:  drop cascades to user mapping for regress_foreign_data_user on server s8
-drop cascades to user mapping for public on server s8
+DETAIL:  drop cascades to user mapping for public on server s8
+drop cascades to user mapping for regress_foreign_data_user on server s8
 DROP ROLE regress_test_indirect;
 DROP ROLE regress_test_role;
 DROP ROLE regress_unprivileged_role;                        -- ERROR
diff --git a/src/test/regress/expected/foreign_key.out b/src/test/regress/expected/foreign_key.out
index 044881a..be57acb 100644
--- a/src/test/regress/expected/foreign_key.out
+++ b/src/test/regress/expected/foreign_key.out
@@ -255,7 +255,7 @@ SELECT * FROM FKTABLE;
 -- this should fail for lack of CASCADE
 DROP TABLE PKTABLE;
 ERROR:  cannot drop table pktable because other objects depend on it
-DETAIL:  constraint constrname2 on table fktable depends on table pktable
+DETAIL:  constraint constrname2 on table fktable depends on index pktable_pkey
 HINT:  Use DROP ... CASCADE to drop the dependent objects too.
 DROP TABLE PKTABLE CASCADE;
 NOTICE:  drop cascades to constraint constrname2 on table fktable
diff --git a/src/test/regress/expected/inherit.out b/src/test/regress/expected/inherit.out
index d8b5b1d..7ebf9e0 100644
--- a/src/test/regress/expected/inherit.out
+++ b/src/test/regress/expected/inherit.out
@@ -831,11 +831,10 @@ Check constraints:
 Inherits: c1,
           c2
 
+--begin-word-ignore--rop cascades to table c
 drop table p1 cascade;
 NOTICE:  drop cascades to 3 other objects
-DETAIL:  drop cascades to table c1
-drop cascades to table c2
-drop cascades to table c3
+--end-ignore--
 drop table p2 cascade;
 create table pp1 (f1 int);
 create table cc1 (f2 text, f3 int) inherits (pp1);
@@ -977,12 +976,10 @@ SELECT a.attrelid::regclass, a.attname, a.attinhcount, e.expected
  inhts    | c       |           1 |        1
 (12 rows)
 
+--begin-word-ignore--drop cascades to table 
 DROP TABLE inht1, inhs1 CASCADE;
 NOTICE:  drop cascades to 4 other objects
-DETAIL:  drop cascades to table inht2
-drop cascades to table inhts
-drop cascades to table inht3
-drop cascades to table inht4
+--end-ignore--
 -- Test non-inheritable indices [UNIQUE, EXCLUDE] constraints
 CREATE TABLE test_constraints (id int, val1 varchar, val2 int, UNIQUE(val1, val2));
 CREATE TABLE test_constraints_inh () INHERITS (test_constraints);
@@ -1158,8 +1155,8 @@ select * from patest0 join (select f1 from int4_tbl limit 1) ss on id = f1;
 
 drop table patest0 cascade;
 NOTICE:  drop cascades to 2 other objects
-DETAIL:  drop cascades to table patest1
-drop cascades to table patest2
+DETAIL:  drop cascades to table patest2
+drop cascades to table patest1
 --
 -- Test merge-append plans for inheritance trees
 --
@@ -1303,9 +1300,9 @@ select min(1-id) from matest0;
 reset enable_seqscan;
 drop table matest0 cascade;
 NOTICE:  drop cascades to 3 other objects
-DETAIL:  drop cascades to table matest1
+DETAIL:  drop cascades to table matest3
 drop cascades to table matest2
-drop cascades to table matest3
+drop cascades to table matest1
 --
 -- Check that use of an index with an extraneous column doesn't produce
 -- a plan with extraneous sorting
diff --git a/src/test/regress/expected/matview.out b/src/test/regress/expected/matview.out
index e7d0ad1..07fbdf1 100644
--- a/src/test/regress/expected/matview.out
+++ b/src/test/regress/expected/matview.out
@@ -310,15 +310,15 @@ SELECT type, m.totamt AS mtot, v.totamt AS vtot FROM mvtest_tm m LEFT JOIN mvtes
 -- make sure that dependencies are reported properly when they block the drop
 DROP TABLE mvtest_t;
 ERROR:  cannot drop table mvtest_t because other objects depend on it
-DETAIL:  view mvtest_tv depends on table mvtest_t
+DETAIL:  materialized view mvtest_tm depends on table mvtest_t
+materialized view mvtest_tmm depends on materialized view mvtest_tm
+view mvtest_tv depends on table mvtest_t
 view mvtest_tvv depends on view mvtest_tv
 materialized view mvtest_tvvm depends on view mvtest_tvv
 view mvtest_tvvmv depends on materialized view mvtest_tvvm
 materialized view mvtest_bb depends on view mvtest_tvvmv
 materialized view mvtest_mvschema.mvtest_tvm depends on view mvtest_tv
 materialized view mvtest_tvmm depends on materialized view mvtest_mvschema.mvtest_tvm
-materialized view mvtest_tm depends on table mvtest_t
-materialized view mvtest_tmm depends on materialized view mvtest_tm
 HINT:  Use DROP ... CASCADE to drop the dependent objects too.
 -- make sure dependencies are dropped and reported
 -- and make sure that transactional behavior is correct on rollback
@@ -326,15 +326,15 @@ HINT:  Use DROP ... CASCADE to drop the dependent objects too.
 BEGIN;
 DROP TABLE mvtest_t CASCADE;
 NOTICE:  drop cascades to 9 other objects
-DETAIL:  drop cascades to view mvtest_tv
+DETAIL:  drop cascades to materialized view mvtest_tm
+drop cascades to materialized view mvtest_tmm
+drop cascades to view mvtest_tv
 drop cascades to view mvtest_tvv
 drop cascades to materialized view mvtest_tvvm
 drop cascades to view mvtest_tvvmv
 drop cascades to materialized view mvtest_bb
 drop cascades to materialized view mvtest_mvschema.mvtest_tvm
 drop cascades to materialized view mvtest_tvmm
-drop cascades to materialized view mvtest_tm
-drop cascades to materialized view mvtest_tmm
 ROLLBACK;
 -- some additional tests not using base tables
 CREATE VIEW mvtest_vt1 AS SELECT 1 moo;
@@ -502,12 +502,10 @@ SELECT * FROM mvtest_mv_v_4;
   1 | 3
 (1 row)
 
+--begin-word-ignore--drop cascades to materialized view mvtest_mv_v
 DROP TABLE mvtest_v CASCADE;
 NOTICE:  drop cascades to 4 other objects
-DETAIL:  drop cascades to materialized view mvtest_mv_v
-drop cascades to materialized view mvtest_mv_v_2
-drop cascades to materialized view mvtest_mv_v_3
-drop cascades to materialized view mvtest_mv_v_4
+--end-ignore--
 -- make sure that create WITH NO DATA does not plan the query (bug #13907)
 create materialized view mvtest_error as select 1/0 as x;  -- fail
 ERROR:  division by zero
diff --git a/src/test/regress/expected/rowsecurity.out b/src/test/regress/expected/rowsecurity.out
index c15bf95..c937bc3 100644
--- a/src/test/regress/expected/rowsecurity.out
+++ b/src/test/regress/expected/rowsecurity.out
@@ -3058,8 +3058,8 @@ SELECT refclassid::regclass, deptype
 SAVEPOINT q;
 DROP ROLE regress_rls_eve; --fails due to dependency on POLICY p
 ERROR:  role "regress_rls_eve" cannot be dropped because some objects depend on it
-DETAIL:  target of policy p on table tbl1
-privileges for table tbl1
+DETAIL:  privileges for table tbl1
+target of policy p on table tbl1
 ROLLBACK TO q;
 ALTER POLICY p ON tbl1 TO regress_rls_frank USING (true);
 SAVEPOINT q;
diff --git a/src/test/regress/expected/select_into.out b/src/test/regress/expected/select_into.out
index 5d54bbf..ebdbcfc 100644
--- a/src/test/regress/expected/select_into.out
+++ b/src/test/regress/expected/select_into.out
@@ -46,9 +46,9 @@ CREATE TABLE selinto_schema.tmp3 (a,b,c)
 RESET SESSION AUTHORIZATION;
 DROP SCHEMA selinto_schema CASCADE;
 NOTICE:  drop cascades to 3 other objects
-DETAIL:  drop cascades to table selinto_schema.tmp1
+DETAIL:  drop cascades to table selinto_schema.tmp3
 drop cascades to table selinto_schema.tmp2
-drop cascades to table selinto_schema.tmp3
+drop cascades to table selinto_schema.tmp1
 DROP USER regress_selinto_user;
 -- Tests for WITH NO DATA and column name consistency
 CREATE TABLE ctas_base (i int, j int);
diff --git a/src/test/regress/expected/triggers.out b/src/test/regress/expected/triggers.out
index a7bf5dc..68acbb9 100644
--- a/src/test/regress/expected/triggers.out
+++ b/src/test/regress/expected/triggers.out
@@ -526,9 +526,9 @@ LINE 2: FOR EACH STATEMENT WHEN (OLD.* IS DISTINCT FROM NEW.*)
 -- check dependency restrictions
 ALTER TABLE main_table DROP COLUMN b;
 ERROR:  cannot drop table main_table column b because other objects depend on it
-DETAIL:  trigger after_upd_b_row_trig on table main_table depends on table main_table column b
+DETAIL:  trigger after_upd_b_stmt_trig on table main_table depends on table main_table column b
 trigger after_upd_a_b_row_trig on table main_table depends on table main_table column b
-trigger after_upd_b_stmt_trig on table main_table depends on table main_table column b
+trigger after_upd_b_row_trig on table main_table depends on table main_table column b
 HINT:  Use DROP ... CASCADE to drop the dependent objects too.
 -- this should succeed, but we'll roll it back to keep the triggers around
 begin;
diff --git a/src/test/regress/expected/truncate.out b/src/test/regress/expected/truncate.out
index 5c5277e..324d7ac 100644
--- a/src/test/regress/expected/truncate.out
+++ b/src/test/regress/expected/truncate.out
@@ -278,9 +278,9 @@ SELECT * FROM trunc_faa;
 ROLLBACK;
 DROP TABLE trunc_f CASCADE;
 NOTICE:  drop cascades to 3 other objects
-DETAIL:  drop cascades to table trunc_fa
+DETAIL:  drop cascades to table trunc_fb
+drop cascades to table trunc_fa
 drop cascades to table trunc_faa
-drop cascades to table trunc_fb
 -- Test ON TRUNCATE triggers
 CREATE TABLE trunc_trigger_test (f1 int, f2 text, f3 text);
 CREATE TABLE trunc_trigger_log (tgop text, tglevel text, tgwhen text,
diff --git a/src/test/regress/expected/typed_table.out b/src/test/regress/expected/typed_table.out
index dce6098..b42381e 100644
--- a/src/test/regress/expected/typed_table.out
+++ b/src/test/regress/expected/typed_table.out
@@ -77,17 +77,17 @@ CREATE TABLE persons4 OF person_type (
 ERROR:  column "name" specified more than once
 DROP TYPE person_type RESTRICT;
 ERROR:  cannot drop type person_type because other objects depend on it
-DETAIL:  table persons depends on type person_type
-function get_all_persons() depends on type person_type
+DETAIL:  table persons3 depends on type person_type
 table persons2 depends on type person_type
-table persons3 depends on type person_type
+function get_all_persons() depends on type person_type
+table persons depends on type person_type
 HINT:  Use DROP ... CASCADE to drop the dependent objects too.
 DROP TYPE person_type CASCADE;
 NOTICE:  drop cascades to 4 other objects
-DETAIL:  drop cascades to table persons
-drop cascades to function get_all_persons()
+DETAIL:  drop cascades to table persons3
 drop cascades to table persons2
-drop cascades to table persons3
+drop cascades to function get_all_persons()
+drop cascades to table persons
 CREATE TABLE persons5 OF stuff; -- only CREATE TYPE AS types may be used
 ERROR:  type stuff is not a composite type
 DROP TABLE stuff;
@@ -104,5 +104,5 @@ SELECT id, namelen(persons) FROM persons;
 
 DROP TYPE person_type CASCADE;
 NOTICE:  drop cascades to 2 other objects
-DETAIL:  drop cascades to table persons
-drop cascades to function namelen(person_type)
+DETAIL:  drop cascades to function namelen(person_type)
+drop cascades to table persons
diff --git a/src/test/regress/expected/updatable_views.out b/src/test/regress/expected/updatable_views.out
index f60991e..68a3249 100644
--- a/src/test/regress/expected/updatable_views.out
+++ b/src/test/regress/expected/updatable_views.out
@@ -335,24 +335,10 @@ UPDATE ro_view20 SET b=upper(b);
 ERROR:  cannot update view "ro_view20"
 DETAIL:  Views that return set-returning functions are not automatically updatable.
 HINT:  To enable updating the view, provide an INSTEAD OF UPDATE trigger or an unconditional ON UPDATE DO INSTEAD rule.
+--begin-word-ignore--drop cascades to view r
 DROP TABLE base_tbl CASCADE;
 NOTICE:  drop cascades to 16 other objects
-DETAIL:  drop cascades to view ro_view1
-drop cascades to view ro_view17
-drop cascades to view ro_view2
-drop cascades to view ro_view3
-drop cascades to view ro_view5
-drop cascades to view ro_view6
-drop cascades to view ro_view7
-drop cascades to view ro_view8
-drop cascades to view ro_view9
-drop cascades to view ro_view11
-drop cascades to view ro_view13
-drop cascades to view rw_view15
-drop cascades to view rw_view16
-drop cascades to view ro_view20
-drop cascades to view ro_view4
-drop cascades to view rw_view14
+--end-ignore--
 DROP VIEW ro_view10, ro_view12, ro_view18;
 DROP SEQUENCE seq CASCADE;
 NOTICE:  drop cascades to view ro_view19
@@ -1063,8 +1049,8 @@ SELECT * FROM base_tbl;
 RESET SESSION AUTHORIZATION;
 DROP TABLE base_tbl CASCADE;
 NOTICE:  drop cascades to 2 other objects
-DETAIL:  drop cascades to view rw_view1
-drop cascades to view rw_view2
+DETAIL:  drop cascades to view rw_view2
+drop cascades to view rw_view1
 DROP USER regress_view_user1;
 DROP USER regress_view_user2;
 -- column defaults
@@ -1325,8 +1311,8 @@ SELECT events & 4 != 0 AS upd,
 DROP TABLE base_tbl CASCADE;
 NOTICE:  drop cascades to 3 other objects
 DETAIL:  drop cascades to view rw_view1
-drop cascades to view rw_view2
 drop cascades to view rw_view3
+drop cascades to view rw_view2
 -- inheritance tests
 CREATE TABLE base_tbl_parent (a int);
 CREATE TABLE base_tbl_child (CHECK (a > 0)) INHERITS (base_tbl_parent);
@@ -1423,10 +1409,10 @@ SELECT * FROM base_tbl_child ORDER BY a;
  20
 (6 rows)
 
+--begin-word-ignore--drop cascades to view rw_view
 DROP TABLE base_tbl_parent, base_tbl_child CASCADE;
 NOTICE:  drop cascades to 2 other objects
-DETAIL:  drop cascades to view rw_view1
-drop cascades to view rw_view2
+--end-ignore--
 -- simple WITH CHECK OPTION
 CREATE TABLE base_tbl (a int, b int DEFAULT 10);
 INSERT INTO base_tbl VALUES (1,2), (2,3), (1,-1);
diff --git a/src/test/regress/input/tablespace.source b/src/test/regress/input/tablespace.source
index 041ec97..167e8f0 100644
--- a/src/test/regress/input/tablespace.source
+++ b/src/test/regress/input/tablespace.source
@@ -84,7 +84,9 @@ ALTER TABLE ALL IN TABLESPACE regress_tblspace_renamed SET TABLESPACE pg_default
 -- Should succeed
 DROP TABLESPACE regress_tblspace_renamed;
 
+--begin-word-ignore--drop cascades to table testschema.
 DROP SCHEMA testschema CASCADE;
+--end-ignore--
 
 DROP ROLE regress_tablespace_user1;
 DROP ROLE regress_tablespace_user2;
diff --git a/src/test/regress/output/tablespace.source b/src/test/regress/output/tablespace.source
index 384f689..ed94d71 100644
--- a/src/test/regress/output/tablespace.source
+++ b/src/test/regress/output/tablespace.source
@@ -100,11 +100,9 @@ ALTER TABLE ALL IN TABLESPACE regress_tblspace_renamed SET TABLESPACE pg_default
 NOTICE:  no matching relations in tablespace "regress_tblspace_renamed" found
 -- Should succeed
 DROP TABLESPACE regress_tblspace_renamed;
+--begin-word-ignore--drop cascades to table testschema.
 DROP SCHEMA testschema CASCADE;
 NOTICE:  drop cascades to 4 other objects
-DETAIL:  drop cascades to table testschema.foo
-drop cascades to table testschema.asselect
-drop cascades to table testschema.asexecute
-drop cascades to table testschema.atable
+--end-ignore--
 DROP ROLE regress_tablespace_user1;
 DROP ROLE regress_tablespace_user2;
diff --git a/src/test/regress/sql/collate.sql b/src/test/regress/sql/collate.sql
index e8ca085..99138b9 100644
--- a/src/test/regress/sql/collate.sql
+++ b/src/test/regress/sql/collate.sql
@@ -246,4 +246,6 @@ SELECT collation for ((SELECT b FROM collate_test1 LIMIT 1));
 -- trying to run any platform-specific collation tests later, so we
 -- must get rid of them.
 --
+--begin-word-ignore--drop cascades to
 DROP SCHEMA collate_tests CASCADE;
+--end-ignore--
diff --git a/src/test/regress/sql/foreign_data.sql b/src/test/regress/sql/foreign_data.sql
index 38e1d41..3e42362 100644
--- a/src/test/regress/sql/foreign_data.sql
+++ b/src/test/regress/sql/foreign_data.sql
@@ -614,8 +614,12 @@ SELECT relname, conname, contype, conislocal, coninhcount, connoinherit
 -- child does not inherit NO INHERIT constraints
 \d+ pt1
 \d+ ft2
+--begin-word-ignore--depends on foreign table
 DROP FOREIGN TABLE ft2; -- ERROR
+--end-ignore--
+--begin-word-ignore-- table
 DROP FOREIGN TABLE ft2 CASCADE;
+--end-ignore--
 CREATE FOREIGN TABLE ft2 (
 	c1 integer NOT NULL,
 	c2 text,
diff --git a/src/test/regress/sql/inherit.sql b/src/test/regress/sql/inherit.sql
index b307a50..a522a26 100644
--- a/src/test/regress/sql/inherit.sql
+++ b/src/test/regress/sql/inherit.sql
@@ -246,7 +246,9 @@ create table c2(f3 int) inherits(p1,p2);
 \d c2
 create table c3 (f4 int) inherits(c1,c2);
 \d c3
+--begin-word-ignore--rop cascades to table c
 drop table p1 cascade;
+--end-ignore--
 drop table p2 cascade;
 
 create table pp1 (f1 int);
@@ -296,7 +298,9 @@ SELECT a.attrelid::regclass, a.attname, a.attinhcount, e.expected
   JOIN pg_attribute a ON e.inhrelid = a.attrelid WHERE NOT attislocal
   ORDER BY a.attrelid::regclass::name, a.attnum;
 
+--begin-word-ignore--drop cascades to table 
 DROP TABLE inht1, inhs1 CASCADE;
+--end-ignore--
 
 
 -- Test non-inheritable indices [UNIQUE, EXCLUDE] constraints
diff --git a/src/test/regress/sql/matview.sql b/src/test/regress/sql/matview.sql
index 5f3269d..07eabb2 100644
--- a/src/test/regress/sql/matview.sql
+++ b/src/test/regress/sql/matview.sql
@@ -196,7 +196,9 @@ SELECT * FROM mvtest_mv_v;
 SELECT * FROM mvtest_mv_v_2;
 SELECT * FROM mvtest_mv_v_3;
 SELECT * FROM mvtest_mv_v_4;
+--begin-word-ignore--drop cascades to materialized view mvtest_mv_v
 DROP TABLE mvtest_v CASCADE;
+--end-ignore--
 
 -- make sure that create WITH NO DATA does not plan the query (bug #13907)
 create materialized view mvtest_error as select 1/0 as x;  -- fail
diff --git a/src/test/regress/sql/updatable_views.sql b/src/test/regress/sql/updatable_views.sql
index 03c3f9d..f4694ee 100644
--- a/src/test/regress/sql/updatable_views.sql
+++ b/src/test/regress/sql/updatable_views.sql
@@ -98,7 +98,9 @@ DELETE FROM ro_view18;
 UPDATE ro_view19 SET max_value=1000;
 UPDATE ro_view20 SET b=upper(b);
 
+--begin-word-ignore--drop cascades to view r
 DROP TABLE base_tbl CASCADE;
+--end-ignore--
 DROP VIEW ro_view10, ro_view12, ro_view18;
 DROP SEQUENCE seq CASCADE;
 
@@ -634,7 +636,9 @@ DELETE FROM ONLY rw_view2 WHERE a IN (-8, 8); -- Should delete -8 only
 SELECT * FROM ONLY base_tbl_parent ORDER BY a;
 SELECT * FROM base_tbl_child ORDER BY a;
 
+--begin-word-ignore--drop cascades to view rw_view
 DROP TABLE base_tbl_parent, base_tbl_child CASCADE;
+--end-ignore--
 
 -- simple WITH CHECK OPTION
 
-- 
2.6.6

