From 21894a693904a6ec270906fee403880768ef3db5 Mon Sep 17 00:00:00 2001
From: ashu <ashu@localhost.localdomain>
Date: Sun, 2 Apr 2017 03:43:20 +0530
Subject: [PATCH] Improve locking startegy during VACUUM in Hash Index v3

---
 src/backend/access/hash/README     |  2 +-
 src/backend/access/hash/hash.c     | 21 ++++++++++-----------
 src/backend/access/hash/hashovfl.c |  4 +---
 3 files changed, 12 insertions(+), 15 deletions(-)

diff --git a/src/backend/access/hash/README b/src/backend/access/hash/README
index 063656d..a3f2445 100644
--- a/src/backend/access/hash/README
+++ b/src/backend/access/hash/README
@@ -380,8 +380,8 @@ The fourth operation is garbage collection (bulk deletion):
 			mark the target page dirty
 			write WAL for deleting tuples from target page
 			if this is the last bucket page, break out of loop
-			pin and x-lock next page
 			release prior lock and pin (except keep pin on primary bucket page)
+			pin and x-lock next page
 		if the page we have locked is not the primary bucket page:
 			release lock and take exclusive lock on primary bucket page
 		if there are no other pins on the primary bucket page:
diff --git a/src/backend/access/hash/hash.c b/src/backend/access/hash/hash.c
index 582163a..4c82868 100644
--- a/src/backend/access/hash/hash.c
+++ b/src/backend/access/hash/hash.c
@@ -670,11 +670,9 @@ hashvacuumcleanup(IndexVacuumInfo *info, IndexBulkDeleteResult *stats)
  * that the next valid TID will be greater than or equal to the current
  * valid TID.  There can't be any concurrent scans in progress when we first
  * enter this function because of the cleanup lock we hold on the primary
- * bucket page, but as soon as we release that lock, there might be.  We
- * handle that by conspiring to prevent those scans from passing our cleanup
- * scan.  To do that, we lock the next page in the bucket chain before
- * releasing the lock on the previous page.  (This type of lock chaining is
- * not ideal, so we might want to look for a better solution at some point.)
+ * bucket page, but as soon as we release that lock, there might be. But,
+ * we do not have to bother about it, as the hash index scan work in page
+ * at a time mode.
  *
  * We need to retain a pin on the primary bucket to ensure that no concurrent
  * split can start.
@@ -843,19 +841,20 @@ hashbucketcleanup(Relation rel, Bucket cur_bucket, Buffer bucket_buf,
 		if (!BlockNumberIsValid(blkno))
 			break;
 
-		next_buf = _hash_getbuf_with_strategy(rel, blkno, HASH_WRITE,
-											  LH_OVERFLOW_PAGE,
-											  bstrategy);
-
 		/*
-		 * release the lock on previous page after acquiring the lock on next
-		 * page
+		 * As the hash index scan work in page at a time mode,
+		 * vacuum can release the lock on previous page before
+		 * acquiring lock on the next page.
 		 */
 		if (retain_pin)
 			LockBuffer(buf, BUFFER_LOCK_UNLOCK);
 		else
 			_hash_relbuf(rel, buf);
 
+		next_buf = _hash_getbuf_with_strategy(rel, blkno, HASH_WRITE,
+											  LH_OVERFLOW_PAGE,
+											  bstrategy);
+
 		buf = next_buf;
 	}
 
diff --git a/src/backend/access/hash/hashovfl.c b/src/backend/access/hash/hashovfl.c
index a3cae21..dc119a3 100644
--- a/src/backend/access/hash/hashovfl.c
+++ b/src/backend/access/hash/hashovfl.c
@@ -778,9 +778,7 @@ _hash_initbitmapbuffer(Buffer buf, uint16 bmsize, bool initpage)
  *	Caller must acquire cleanup lock on the primary page of the target
  *	bucket to exclude any scans that are in progress, which could easily
  *	be confused into returning the same tuple more than once or some tuples
- *	not at all by the rearrangement we are performing here.  To prevent
- *	any concurrent scan to cross the squeeze scan we use lock chaining
- *	similar to hasbucketcleanup.  Refer comments atop hashbucketcleanup.
+ *	not at all by the rearrangement we are performing here.
  *
  *	We need to retain a pin on the primary bucket to ensure that no concurrent
  *	split can start.
-- 
1.8.3.1

