*** a/src/backend/access/transam/clog.c
--- b/src/backend/access/transam/clog.c
***************
*** 54,63 ****
  #define CLOG_XACTS_PER_PAGE (BLCKSZ * CLOG_XACTS_PER_BYTE)
  #define CLOG_XACT_BITMASK	((1 << CLOG_BITS_PER_XACT) - 1)
  
! #define TransactionIdToPage(xid)	((xid) / (TransactionId) CLOG_XACTS_PER_PAGE)
! #define TransactionIdToPgIndex(xid) ((xid) % (TransactionId) CLOG_XACTS_PER_PAGE)
! #define TransactionIdToByte(xid)	(TransactionIdToPgIndex(xid) / CLOG_XACTS_PER_BYTE)
! #define TransactionIdToBIndex(xid)	((xid) % (TransactionId) CLOG_XACTS_PER_BYTE)
  
  /* We store the latest async LSN for each group of transactions */
  #define CLOG_XACTS_PER_LSN_GROUP	32	/* keep this a power of 2 */
--- 54,65 ----
  #define CLOG_XACTS_PER_PAGE (BLCKSZ * CLOG_XACTS_PER_BYTE)
  #define CLOG_XACT_BITMASK	((1 << CLOG_BITS_PER_XACT) - 1)
  
! #define XidStripe(xid) (NUM_CLOG_PARTITIONS * ((xid) / (TransactionId) NUM_CLOG_PARTITIONS))
! #define TransactionIdToPartition(xid) ((xid) % (TransactionId) NUM_CLOG_PARTITIONS)
! #define TransactionIdToPage(xid)	(XidStripe(xid) / (TransactionId) CLOG_XACTS_PER_PAGE)
! #define TransactionIdToPgIndex(xid) (XidStripe(xid) % (TransactionId) CLOG_XACTS_PER_PAGE)
! #define TransactionIdToByte(xid)	(TransactionIdToPgIndex(XidStripe(xid)) / CLOG_XACTS_PER_BYTE)
! #define TransactionIdToBIndex(xid)	(XidStripe(xid) % (TransactionId) CLOG_XACTS_PER_BYTE)
  
  /* We store the latest async LSN for each group of transactions */
  #define CLOG_XACTS_PER_LSN_GROUP	32	/* keep this a power of 2 */
***************
*** 66,88 ****
  #define GetLSNIndex(slotno, xid)	((slotno) * CLOG_LSNS_PER_PAGE + \
  	((xid) % (TransactionId) CLOG_XACTS_PER_PAGE) / CLOG_XACTS_PER_LSN_GROUP)
  
  
  /*
   * Link to shared-memory data structures for CLOG control
   */
! static SlruCtlData ClogCtlData;
! 
! #define ClogCtl (&ClogCtlData)
! 
  
! static int	ZeroCLOGPage(int pageno, bool writeXlog);
  static bool CLOGPagePrecedes(int page1, int page2);
! static void WriteZeroPageXlogRec(int pageno);
! static void WriteTruncateXlogRec(int pageno);
! static void TransactionIdSetPageStatus(TransactionId xid, int nsubxids,
  						   TransactionId *subxids, XidStatus status,
  						   XLogRecPtr lsn, int pageno);
! static void TransactionIdSetStatusBit(TransactionId xid, XidStatus status,
  						  XLogRecPtr lsn, int slotno);
  static void set_status_by_pages(int nsubxids, TransactionId *subxids,
  					XidStatus status, XLogRecPtr lsn);
--- 68,92 ----
  #define GetLSNIndex(slotno, xid)	((slotno) * CLOG_LSNS_PER_PAGE + \
  	((xid) % (TransactionId) CLOG_XACTS_PER_PAGE) / CLOG_XACTS_PER_LSN_GROUP)
  
+ typedef struct xl_clog_page
+ {
+ 	int			partition;
+ 	int			pageno;
+ } xl_clog_page;
  
  /*
   * Link to shared-memory data structures for CLOG control
   */
! static SlruCtlData ClogCtl[NUM_CLOG_PARTITIONS];
  
! static int	ZeroCLOGPage(int partition, int pageno, bool writeXlog);
  static bool CLOGPagePrecedes(int page1, int page2);
! static void WriteZeroPageXlogRec(int partition, int pageno);
! static void WriteTruncateXlogRec(int partition, int pageno);
! static void TransactionIdSetPageStatus(int partition, TransactionId xid, int nsubxids,
  						   TransactionId *subxids, XidStatus status,
  						   XLogRecPtr lsn, int pageno);
! static void TransactionIdSetStatusBit(int partition, TransactionId xid, XidStatus status,
  						  XLogRecPtr lsn, int slotno);
  static void set_status_by_pages(int nsubxids, TransactionId *subxids,
  					XidStatus status, XLogRecPtr lsn);
***************
*** 144,149 **** TransactionIdSetTreeStatus(TransactionId xid, int nsubxids,
--- 148,154 ----
  					TransactionId *subxids, XidStatus status, XLogRecPtr lsn)
  {
  	int			pageno = TransactionIdToPage(xid);		/* get page of parent */
+ 	int			partition = TransactionIdToPartition(xid); /* of parent */
  	int			i;
  
  	Assert(status == TRANSACTION_STATUS_COMMITTED ||
***************
*** 151,161 **** TransactionIdSetTreeStatus(TransactionId xid, int nsubxids,
  
  	/*
  	 * See how many subxids, if any, are on the same page as the parent, if
! 	 * any.
  	 */
  	for (i = 0; i < nsubxids; i++)
  	{
! 		if (TransactionIdToPage(subxids[i]) != pageno)
  			break;
  	}
  
--- 156,167 ----
  
  	/*
  	 * See how many subxids, if any, are on the same page as the parent, if
! 	 * any. Notice that having more partitions most likely reduces this number.
  	 */
  	for (i = 0; i < nsubxids; i++)
  	{
! 		if (TransactionIdToPage(subxids[i]) != pageno ||
! 			TransactionIdToPartition(subxids[i] != partition))
  			break;
  	}
  
***************
*** 167,173 **** TransactionIdSetTreeStatus(TransactionId xid, int nsubxids,
  		/*
  		 * Set the parent and all subtransactions in a single call
  		 */
! 		TransactionIdSetPageStatus(xid, nsubxids, subxids, status, lsn,
  								   pageno);
  	}
  	else
--- 173,179 ----
  		/*
  		 * Set the parent and all subtransactions in a single call
  		 */
! 		TransactionIdSetPageStatus(partition, xid, nsubxids, subxids, status, lsn,
  								   pageno);
  	}
  	else
***************
*** 183,188 **** TransactionIdSetTreeStatus(TransactionId xid, int nsubxids,
--- 189,198 ----
  		 *
  		 * To avoid touching the first page twice, skip marking subcommitted
  		 * for the subxids on that first page.
+ 		 *
+ 		 * Notice that all the complexity of clog partitions is hidden within
+ 		 * set_status_by_pages. The parent transaction still exists on one
+ 		 * page in one partition, so that part is unchaned by partitioning.
  		 */
  		if (status == TRANSACTION_STATUS_COMMITTED)
  			set_status_by_pages(nsubxids - nsubxids_on_first_page,
***************
*** 194,200 **** TransactionIdSetTreeStatus(TransactionId xid, int nsubxids,
  		 * if any
  		 */
  		pageno = TransactionIdToPage(xid);
! 		TransactionIdSetPageStatus(xid, nsubxids_on_first_page, subxids, status,
  								   lsn, pageno);
  
  		/*
--- 204,210 ----
  		 * if any
  		 */
  		pageno = TransactionIdToPage(xid);
! 		TransactionIdSetPageStatus(partition, xid, nsubxids_on_first_page, subxids, status,
  								   lsn, pageno);
  
  		/*
***************
*** 212,217 **** TransactionIdSetTreeStatus(TransactionId xid, int nsubxids,
--- 222,228 ----
   * transactions, chunking in the separate CLOG pages involved. We never
   * pass the whole transaction tree to this function, only subtransactions
   * that are on different pages to the top level transaction id.
+  * We sift the array once for each partition.
   */
  static void
  set_status_by_pages(int nsubxids, TransactionId *subxids,
***************
*** 220,241 **** set_status_by_pages(int nsubxids, TransactionId *subxids,
  	int			pageno = TransactionIdToPage(subxids[0]);
  	int			offset = 0;
  	int			i = 0;
  
! 	while (i < nsubxids)
  	{
! 		int			num_on_page = 0;
! 
! 		while (TransactionIdToPage(subxids[i]) == pageno && i < nsubxids)
  		{
! 			num_on_page++;
! 			i++;
  		}
  
! 		TransactionIdSetPageStatus(InvalidTransactionId,
! 								   num_on_page, subxids + offset,
! 								   status, lsn, pageno);
! 		offset = i;
! 		pageno = TransactionIdToPage(subxids[offset]);
  	}
  }
  
--- 231,280 ----
  	int			pageno = TransactionIdToPage(subxids[0]);
  	int			offset = 0;
  	int			i = 0;
+ 	int			partition;
+ 	int			part_nsubxids;
+ 	int			max_part_nsubxids = 32;
+ 	TransactionId *part_subxids = palloc(32 * sizeof(TransactionId));
  
! 	for (partition = 0; partition < NUM_CLOG_PARTITIONS; partition++)
  	{
! 		part_nsubxids = 0;
! 		for (i = 0; i < nsubxids; i++)
  		{
! 			/*
! 			 * Collect up all the xids for this partition
! 			 */
! 			if (TransactionIdToPartition(subxids[i]) == partition)
! 			{
! 				part_subxids[part_nsubxids++] = subxids[i];
! 				if (part_nsubxids >= max_part_nsubxids)
! 				{
! 					max_part_nsubxids *= 2;
! 					part_subxids = repalloc(part_subxids, max_part_nsubxids * sizeof(TransactionId));
! 				}
! 			}
  		}
  
! 		/*
! 		 * Now apply the changes by page, just for this partition
! 		 */
! 		i = 0;
! 		while (i < part_nsubxids)
! 		{
! 			int			num_on_page = 0;
! 
! 			while (TransactionIdToPage(part_subxids[i]) == pageno && i < part_nsubxids)
! 			{
! 				num_on_page++;
! 				i++;
! 			}
! 
! 			TransactionIdSetPageStatus(partition, InvalidTransactionId,
! 									   num_on_page, part_subxids + offset,
! 									   status, lsn, pageno);
! 			offset = i;
! 			pageno = TransactionIdToPage(part_subxids[offset]);
! 		}
  	}
  }
  
***************
*** 243,263 **** set_status_by_pages(int nsubxids, TransactionId *subxids,
   * Record the final state of transaction entries in the commit log for
   * all entries on a single page.  Atomic only on this page.
   *
!  * Otherwise API is same as TransactionIdSetTreeStatus()
   */
  static void
! TransactionIdSetPageStatus(TransactionId xid, int nsubxids,
  						   TransactionId *subxids, XidStatus status,
  						   XLogRecPtr lsn, int pageno)
  {
  	int			slotno;
  	int			i;
  
  	Assert(status == TRANSACTION_STATUS_COMMITTED ||
  		   status == TRANSACTION_STATUS_ABORTED ||
  		   (status == TRANSACTION_STATUS_SUB_COMMITTED && !TransactionIdIsValid(xid)));
  
! 	LWLockAcquire(CLogControlLock, LW_EXCLUSIVE);
  
  	/*
  	 * If we're doing an async commit (ie, lsn is valid), then we must wait
--- 282,304 ----
   * Record the final state of transaction entries in the commit log for
   * all entries on a single page.  Atomic only on this page.
   *
!  * Otherwise API is same as TransactionIdSetTreeStatus(), apart from
!  * the partition the page number is in.
   */
  static void
! TransactionIdSetPageStatus(int partition, TransactionId xid, int nsubxids,
  						   TransactionId *subxids, XidStatus status,
  						   XLogRecPtr lsn, int pageno)
  {
  	int			slotno;
  	int			i;
+ 	SlruCtlData *ClogCtlP = &ClogCtl[partition];
  
  	Assert(status == TRANSACTION_STATUS_COMMITTED ||
  		   status == TRANSACTION_STATUS_ABORTED ||
  		   (status == TRANSACTION_STATUS_SUB_COMMITTED && !TransactionIdIsValid(xid)));
  
! 	LWLockAcquire(ClogCtlP->shared->ControlLock, LW_EXCLUSIVE);
  
  	/*
  	 * If we're doing an async commit (ie, lsn is valid), then we must wait
***************
*** 268,274 **** TransactionIdSetPageStatus(TransactionId xid, int nsubxids,
  	 * write-busy, since we don't care if the update reaches disk sooner than
  	 * we think.
  	 */
! 	slotno = SimpleLruReadPage(ClogCtl, pageno, XLogRecPtrIsInvalid(lsn), xid);
  
  	/*
  	 * Set the main transaction id, if any.
--- 309,315 ----
  	 * write-busy, since we don't care if the update reaches disk sooner than
  	 * we think.
  	 */
! 	slotno = SimpleLruReadPage(ClogCtlP, pageno, XLogRecPtrIsInvalid(lsn), xid);
  
  	/*
  	 * Set the main transaction id, if any.
***************
*** 286,312 **** TransactionIdSetPageStatus(TransactionId xid, int nsubxids,
  		{
  			for (i = 0; i < nsubxids; i++)
  			{
! 				Assert(ClogCtl->shared->page_number[slotno] == TransactionIdToPage(subxids[i]));
! 				TransactionIdSetStatusBit(subxids[i],
  										  TRANSACTION_STATUS_SUB_COMMITTED,
  										  lsn, slotno);
  			}
  		}
  
  		/* ... then the main transaction */
! 		TransactionIdSetStatusBit(xid, status, lsn, slotno);
  	}
  
  	/* Set the subtransactions */
  	for (i = 0; i < nsubxids; i++)
  	{
! 		Assert(ClogCtl->shared->page_number[slotno] == TransactionIdToPage(subxids[i]));
! 		TransactionIdSetStatusBit(subxids[i], status, lsn, slotno);
  	}
  
! 	ClogCtl->shared->page_dirty[slotno] = true;
  
! 	LWLockRelease(CLogControlLock);
  }
  
  /*
--- 327,353 ----
  		{
  			for (i = 0; i < nsubxids; i++)
  			{
! 				Assert(ClogCtlP->shared->page_number[slotno] == TransactionIdToPage(subxids[i]));
! 				TransactionIdSetStatusBit(partition, subxids[i],
  										  TRANSACTION_STATUS_SUB_COMMITTED,
  										  lsn, slotno);
  			}
  		}
  
  		/* ... then the main transaction */
! 		TransactionIdSetStatusBit(partition, xid, status, lsn, slotno);
  	}
  
  	/* Set the subtransactions */
  	for (i = 0; i < nsubxids; i++)
  	{
! 		Assert(ClogCtlP->shared->page_number[slotno] == TransactionIdToPage(subxids[i]));
! 		TransactionIdSetStatusBit(partition, subxids[i], status, lsn, slotno);
  	}
  
! 	ClogCtlP->shared->page_dirty[slotno] = true;
  
! 	LWLockRelease(ClogCtlP->shared->ControlLock);
  }
  
  /*
***************
*** 315,329 **** TransactionIdSetPageStatus(TransactionId xid, int nsubxids,
   * Must be called with CLogControlLock held
   */
  static void
! TransactionIdSetStatusBit(TransactionId xid, XidStatus status, XLogRecPtr lsn, int slotno)
  {
  	int			byteno = TransactionIdToByte(xid);
  	int			bshift = TransactionIdToBIndex(xid) * CLOG_BITS_PER_XACT;
  	char	   *byteptr;
  	char		byteval;
  	char		curval;
  
! 	byteptr = ClogCtl->shared->page_buffer[slotno] + byteno;
  	curval = (*byteptr >> bshift) & CLOG_XACT_BITMASK;
  
  	/*
--- 356,373 ----
   * Must be called with CLogControlLock held
   */
  static void
! TransactionIdSetStatusBit(int partition, TransactionId xid, XidStatus status, XLogRecPtr lsn, int slotno)
  {
  	int			byteno = TransactionIdToByte(xid);
  	int			bshift = TransactionIdToBIndex(xid) * CLOG_BITS_PER_XACT;
  	char	   *byteptr;
  	char		byteval;
  	char		curval;
+ 	SlruCtlData *ClogCtlP = &ClogCtl[partition];
  
! 	Assert(TransactionIdToPartition(xid) == partition);
! 
! 	byteptr = ClogCtlP->shared->page_buffer[slotno] + byteno;
  	curval = (*byteptr >> bshift) & CLOG_XACT_BITMASK;
  
  	/*
***************
*** 363,370 **** TransactionIdSetStatusBit(TransactionId xid, XidStatus status, XLogRecPtr lsn, i
  	{
  		int			lsnindex = GetLSNIndex(slotno, xid);
  
! 		if (XLByteLT(ClogCtl->shared->group_lsn[lsnindex], lsn))
! 			ClogCtl->shared->group_lsn[lsnindex] = lsn;
  	}
  }
  
--- 407,414 ----
  	{
  		int			lsnindex = GetLSNIndex(slotno, xid);
  
! 		if (XLByteLT(ClogCtlP->shared->group_lsn[lsnindex], lsn))
! 			ClogCtlP->shared->group_lsn[lsnindex] = lsn;
  	}
  }
  
***************
*** 386,391 **** TransactionIdSetStatusBit(TransactionId xid, XidStatus status, XLogRecPtr lsn, i
--- 430,436 ----
  XidStatus
  TransactionIdGetStatus(TransactionId xid, XLogRecPtr *lsn)
  {
+ 	int			partition = TransactionIdToPartition(xid);
  	int			pageno = TransactionIdToPage(xid);
  	int			byteno = TransactionIdToByte(xid);
  	int			bshift = TransactionIdToBIndex(xid) * CLOG_BITS_PER_XACT;
***************
*** 393,410 **** TransactionIdGetStatus(TransactionId xid, XLogRecPtr *lsn)
  	int			lsnindex;
  	char	   *byteptr;
  	XidStatus	status;
  
  	/* lock is acquired by SimpleLruReadPage_ReadOnly */
  
! 	slotno = SimpleLruReadPage_ReadOnly(ClogCtl, pageno, xid);
! 	byteptr = ClogCtl->shared->page_buffer[slotno] + byteno;
  
  	status = (*byteptr >> bshift) & CLOG_XACT_BITMASK;
  
  	lsnindex = GetLSNIndex(slotno, xid);
! 	*lsn = ClogCtl->shared->group_lsn[lsnindex];
  
! 	LWLockRelease(CLogControlLock);
  
  	return status;
  }
--- 438,456 ----
  	int			lsnindex;
  	char	   *byteptr;
  	XidStatus	status;
+ 	SlruCtlData *ClogCtlP = &ClogCtl[partition];
  
  	/* lock is acquired by SimpleLruReadPage_ReadOnly */
  
! 	slotno = SimpleLruReadPage_ReadOnly(ClogCtlP, pageno, xid);
! 	byteptr = ClogCtlP->shared->page_buffer[slotno] + byteno;
  
  	status = (*byteptr >> bshift) & CLOG_XACT_BITMASK;
  
  	lsnindex = GetLSNIndex(slotno, xid);
! 	*lsn = ClogCtlP->shared->group_lsn[lsnindex];
  
! 	LWLockRelease(ClogCtlP->shared->ControlLock);
  
  	return status;
  }
***************
*** 416,430 **** TransactionIdGetStatus(TransactionId xid, XLogRecPtr *lsn)
  Size
  CLOGShmemSize(void)
  {
! 	return SimpleLruShmemSize(NUM_CLOG_BUFFERS, CLOG_LSNS_PER_PAGE);
  }
  
  void
  CLOGShmemInit(void)
  {
  	ClogCtl->PagePrecedes = CLOGPagePrecedes;
! 	SimpleLruInit(ClogCtl, "CLOG Ctl", NUM_CLOG_BUFFERS, CLOG_LSNS_PER_PAGE,
! 				  CLogControlLock, "pg_clog");
  }
  
  /*
--- 462,476 ----
  Size
  CLOGShmemSize(void)
  {
! 	return SimpleLruShmemSize(NUM_CLOG_PARTITIONS, NUM_CLOG_BUFFERS, CLOG_LSNS_PER_PAGE);
  }
  
  void
  CLOGShmemInit(void)
  {
  	ClogCtl->PagePrecedes = CLOGPagePrecedes;
! 		SimpleLruInit(ClogCtl, "CLOG Ctl", NUM_CLOG_PARTITIONS, NUM_CLOG_BUFFERS, CLOG_LSNS_PER_PAGE,
! 				  FirstClogControlLock, "pg_clog");
  }
  
  /*
***************
*** 437,453 **** void
  BootStrapCLOG(void)
  {
  	int			slotno;
  
! 	LWLockAcquire(CLogControlLock, LW_EXCLUSIVE);
  
! 	/* Create and zero the first page of the commit log */
! 	slotno = ZeroCLOGPage(0, false);
  
! 	/* Make sure it's written out */
! 	SimpleLruWritePage(ClogCtl, slotno);
! 	Assert(!ClogCtl->shared->page_dirty[slotno]);
  
! 	LWLockRelease(CLogControlLock);
  }
  
  /*
--- 483,505 ----
  BootStrapCLOG(void)
  {
  	int			slotno;
+ 	int			partition;
  
! 	for (partition = 0; partition < NUM_CLOG_PARTITIONS; partition++)
! 	{
! 		SlruCtlData *ClogCtlP = &ClogCtl[partition];
  
! 		LWLockAcquire(ClogCtlP->shared->ControlLock, LW_EXCLUSIVE);
  
! 		/* Create and zero the first page of the commit log */
! 		slotno = ZeroCLOGPage(partition, 0, false);
  
! 		/* Make sure it's written out */
! 		SimpleLruWritePage(ClogCtlP, slotno);
! 		Assert(!ClogCtlP->shared->page_dirty[slotno]);
! 
! 		LWLockRelease(ClogCtlP->shared->ControlLock);
! 	}
  }
  
  /*
***************
*** 460,473 **** BootStrapCLOG(void)
   * Control lock must be held at entry, and will be held at exit.
   */
  static int
! ZeroCLOGPage(int pageno, bool writeXlog)
  {
  	int			slotno;
  
! 	slotno = SimpleLruZeroPage(ClogCtl, pageno);
  
  	if (writeXlog)
! 		WriteZeroPageXlogRec(pageno);
  
  	return slotno;
  }
--- 512,525 ----
   * Control lock must be held at entry, and will be held at exit.
   */
  static int
! ZeroCLOGPage(int partition, int pageno, bool writeXlog)
  {
  	int			slotno;
  
! 	slotno = SimpleLruZeroPage(&ClogCtl[partition], pageno);
  
  	if (writeXlog)
! 		WriteZeroPageXlogRec(partition, pageno);
  
  	return slotno;
  }
***************
*** 481,495 **** StartupCLOG(void)
  {
  	TransactionId xid = ShmemVariableCache->nextXid;
  	int			pageno = TransactionIdToPage(xid);
  
! 	LWLockAcquire(CLogControlLock, LW_EXCLUSIVE);
  
! 	/*
! 	 * Initialize our idea of the latest page number.
! 	 */
! 	ClogCtl->shared->latest_page_number = pageno;
  
! 	LWLockRelease(CLogControlLock);
  }
  
  /*
--- 533,553 ----
  {
  	TransactionId xid = ShmemVariableCache->nextXid;
  	int			pageno = TransactionIdToPage(xid);
+ 	int			partition;
+ 
+ 	for (partition = 0; partition < NUM_CLOG_PARTITIONS; partition++)
+ 	{
+ 		SlruCtlData *ClogCtlP = &ClogCtl[partition];
  
! 		LWLockAcquire(ClogCtlP->shared->ControlLock, LW_EXCLUSIVE);
  
! 		/*
! 		 * Initialize our idea of the latest page number.
! 		 */
! 		ClogCtl->shared->latest_page_number = pageno;
  
! 		LWLockRelease(ClogCtlP->shared->ControlLock);
! 	}
  }
  
  /*
***************
*** 500,544 **** TrimCLOG(void)
  {
  	TransactionId xid = ShmemVariableCache->nextXid;
  	int			pageno = TransactionIdToPage(xid);
  
! 	LWLockAcquire(CLogControlLock, LW_EXCLUSIVE);
  
! 	/*
! 	 * Re-Initialize our idea of the latest page number.
! 	 */
! 	ClogCtl->shared->latest_page_number = pageno;
  
! 	/*
! 	 * Zero out the remainder of the current clog page.  Under normal
! 	 * circumstances it should be zeroes already, but it seems at least
! 	 * theoretically possible that XLOG replay will have settled on a nextXID
! 	 * value that is less than the last XID actually used and marked by the
! 	 * previous database lifecycle (since subtransaction commit writes clog
! 	 * but makes no WAL entry).  Let's just be safe. (We need not worry about
! 	 * pages beyond the current one, since those will be zeroed when first
! 	 * used.  For the same reason, there is no need to do anything when
! 	 * nextXid is exactly at a page boundary; and it's likely that the
! 	 * "current" page doesn't exist yet in that case.)
! 	 */
! 	if (TransactionIdToPgIndex(xid) != 0)
! 	{
! 		int			byteno = TransactionIdToByte(xid);
! 		int			bshift = TransactionIdToBIndex(xid) * CLOG_BITS_PER_XACT;
! 		int			slotno;
! 		char	   *byteptr;
  
! 		slotno = SimpleLruReadPage(ClogCtl, pageno, false, xid);
! 		byteptr = ClogCtl->shared->page_buffer[slotno] + byteno;
  
! 		/* Zero so-far-unused positions in the current byte */
! 		*byteptr &= (1 << bshift) - 1;
! 		/* Zero the rest of the page */
! 		MemSet(byteptr + 1, 0, BLCKSZ - byteno - 1);
  
! 		ClogCtl->shared->page_dirty[slotno] = true;
! 	}
  
! 	LWLockRelease(CLogControlLock);
  }
  
  /*
--- 558,608 ----
  {
  	TransactionId xid = ShmemVariableCache->nextXid;
  	int			pageno = TransactionIdToPage(xid);
+ 	int			partition;
  
! 	for (partition = 0; partition < NUM_CLOG_PARTITIONS; partition++)
! 	{
! 		SlruCtlData *ClogCtlP = &ClogCtl[partition];
  
! 		LWLockAcquire(ClogCtlP->shared->ControlLock, LW_EXCLUSIVE);
  
! 		/*
! 		 * Re-Initialize our idea of the latest page number.
! 		 */
! 		ClogCtlP->shared->latest_page_number = pageno;
  
! 		/*
! 		 * Zero out the remainder of the current clog page.  Under normal
! 		 * circumstances it should be zeroes already, but it seems at least
! 		 * theoretically possible that XLOG replay will have settled on a nextXID
! 		 * value that is less than the last XID actually used and marked by the
! 		 * previous database lifecycle (since subtransaction commit writes clog
! 		 * but makes no WAL entry).  Let's just be safe. (We need not worry about
! 		 * pages beyond the current one, since those will be zeroed when first
! 		 * used.  For the same reason, there is no need to do anything when
! 		 * nextXid is exactly at a page boundary; and it's likely that the
! 		 * "current" page doesn't exist yet in that case.)
! 		 */
! 		if (TransactionIdToPgIndex(xid) != 0)
! 		{
! 			int			byteno = TransactionIdToByte(xid);	/* XXX fix me! */
! 			int			bshift = TransactionIdToBIndex(xid) * CLOG_BITS_PER_XACT;
! 			int			slotno;
! 			char	   *byteptr;
  
! 			slotno = SimpleLruReadPage(ClogCtlP, pageno, false, xid);
! 			byteptr = ClogCtlP->shared->page_buffer[slotno] + byteno;
  
! 			/* Zero so-far-unused positions in the current byte */
! 			*byteptr &= (1 << bshift) - 1;
! 			/* Zero the rest of the page */
! 			MemSet(byteptr + 1, 0, BLCKSZ - byteno - 1);
  
! 			ClogCtlP->shared->page_dirty[slotno] = true;
! 		}
! 
! 		LWLockRelease(ClogCtlP->shared->ControlLock);
! 	}
  }
  
  /*
***************
*** 547,555 **** TrimCLOG(void)
  void
  ShutdownCLOG(void)
  {
  	/* Flush dirty CLOG pages to disk */
  	TRACE_POSTGRESQL_CLOG_CHECKPOINT_START(false);
! 	SimpleLruFlush(ClogCtl, false);
  	TRACE_POSTGRESQL_CLOG_CHECKPOINT_DONE(false);
  }
  
--- 611,622 ----
  void
  ShutdownCLOG(void)
  {
+ 	int			partition;
+ 
  	/* Flush dirty CLOG pages to disk */
  	TRACE_POSTGRESQL_CLOG_CHECKPOINT_START(false);
! 	for (partition = 0; partition < NUM_CLOG_PARTITIONS; partition++)
! 		SimpleLruFlush(&ClogCtl[partition], false);
  	TRACE_POSTGRESQL_CLOG_CHECKPOINT_DONE(false);
  }
  
***************
*** 559,567 **** ShutdownCLOG(void)
  void
  CheckPointCLOG(void)
  {
  	/* Flush dirty CLOG pages to disk */
  	TRACE_POSTGRESQL_CLOG_CHECKPOINT_START(true);
! 	SimpleLruFlush(ClogCtl, true);
  	TRACE_POSTGRESQL_CLOG_CHECKPOINT_DONE(true);
  }
  
--- 626,637 ----
  void
  CheckPointCLOG(void)
  {
+ 	int			partition;
+ 
  	/* Flush dirty CLOG pages to disk */
  	TRACE_POSTGRESQL_CLOG_CHECKPOINT_START(true);
! 	for (partition = 0; partition < NUM_CLOG_PARTITIONS; partition++)
! 		SimpleLruFlush(&ClogCtl[partition], true);
  	TRACE_POSTGRESQL_CLOG_CHECKPOINT_DONE(true);
  }
  
***************
*** 578,583 **** void
--- 648,655 ----
  ExtendCLOG(TransactionId newestXact)
  {
  	int			pageno;
+ 	int			partition = TransactionIdToPartition(newestXact);
+ 	SlruCtlData *ClogCtlP = &ClogCtl[partition];
  
  	/*
  	 * No work except at first XID of a page.  But beware: just after
***************
*** 589,600 **** ExtendCLOG(TransactionId newestXact)
  
  	pageno = TransactionIdToPage(newestXact);
  
! 	LWLockAcquire(CLogControlLock, LW_EXCLUSIVE);
  
  	/* Zero the page and make an XLOG entry about it */
! 	ZeroCLOGPage(pageno, !InRecovery);
  
! 	LWLockRelease(CLogControlLock);
  }
  
  
--- 661,672 ----
  
  	pageno = TransactionIdToPage(newestXact);
  
! 	LWLockAcquire(ClogCtlP->shared->ControlLock, LW_EXCLUSIVE);
  
  	/* Zero the page and make an XLOG entry about it */
! 	ZeroCLOGPage(partition, pageno, !InRecovery);
  
! 	LWLockRelease(ClogCtlP->shared->ControlLock);
  }
  
  
***************
*** 617,622 **** void
--- 689,695 ----
  TruncateCLOG(TransactionId oldestXact)
  {
  	int			cutoffPage;
+ 	int			partition;
  
  	/*
  	 * The cutoff point is the start of the segment containing oldestXact. We
***************
*** 624,638 **** TruncateCLOG(TransactionId oldestXact)
  	 */
  	cutoffPage = TransactionIdToPage(oldestXact);
  
! 	/* Check to see if there's any files that could be removed */
! 	if (!SlruScanDirectory(ClogCtl, SlruScanDirCbReportPresence, &cutoffPage))
! 		return;					/* nothing to remove */
  
! 	/* Write XLOG record and flush XLOG to disk */
! 	WriteTruncateXlogRec(cutoffPage);
  
! 	/* Now we can remove the old CLOG segment(s) */
! 	SimpleLruTruncate(ClogCtl, cutoffPage);
  }
  
  
--- 697,716 ----
  	 */
  	cutoffPage = TransactionIdToPage(oldestXact);
  
! 	for (partition = 0; partition < NUM_CLOG_PARTITIONS; partition++)
! 	{
! 		SlruCtlData *ClogCtlP = &ClogCtl[partition];
! 
! 		/* Check to see if there's any files that could be removed */
! 		if (!SlruScanDirectory(ClogCtlP, SlruScanDirCbReportPresence, &cutoffPage))
! 			continue;					/* nothing to remove */
  
! 		/* Write XLOG record and flush XLOG to disk */
! 		WriteTruncateXlogRec(partition, cutoffPage);
  
! 		/* Now we can remove the old CLOG segment(s) */
! 		SimpleLruTruncate(ClogCtlP, cutoffPage);
! 	}
  }
  
  
***************
*** 664,675 **** CLOGPagePrecedes(int page1, int page2)
   * Write a ZEROPAGE xlog record
   */
  static void
! WriteZeroPageXlogRec(int pageno)
  {
  	XLogRecData rdata;
  
! 	rdata.data = (char *) (&pageno);
! 	rdata.len = sizeof(int);
  	rdata.buffer = InvalidBuffer;
  	rdata.next = NULL;
  	(void) XLogInsert(RM_CLOG_ID, CLOG_ZEROPAGE, &rdata);
--- 742,757 ----
   * Write a ZEROPAGE xlog record
   */
  static void
! WriteZeroPageXlogRec(int partition, int pageno)
  {
  	XLogRecData rdata;
+ 	xl_clog_page	cpage;
  
! 	cpage.partition = partition;
! 	cpage.pageno = pageno;
! 
! 	rdata.data = (char *) &cpage;
! 	rdata.len = sizeof(xl_clog_page);
  	rdata.buffer = InvalidBuffer;
  	rdata.next = NULL;
  	(void) XLogInsert(RM_CLOG_ID, CLOG_ZEROPAGE, &rdata);
***************
*** 682,694 **** WriteZeroPageXlogRec(int pageno)
   * in TruncateCLOG().
   */
  static void
! WriteTruncateXlogRec(int pageno)
  {
  	XLogRecData rdata;
  	XLogRecPtr	recptr;
  
! 	rdata.data = (char *) (&pageno);
! 	rdata.len = sizeof(int);
  	rdata.buffer = InvalidBuffer;
  	rdata.next = NULL;
  	recptr = XLogInsert(RM_CLOG_ID, CLOG_TRUNCATE, &rdata);
--- 764,780 ----
   * in TruncateCLOG().
   */
  static void
! WriteTruncateXlogRec(int partition, int pageno)
  {
  	XLogRecData rdata;
  	XLogRecPtr	recptr;
+ 	xl_clog_page	cpage;
+ 
+ 	cpage.partition = partition;
+ 	cpage.pageno = pageno;
  
! 	rdata.data = (char *) &cpage;
! 	rdata.len = sizeof(xl_clog_page);
  	rdata.buffer = InvalidBuffer;
  	rdata.next = NULL;
  	recptr = XLogInsert(RM_CLOG_ID, CLOG_TRUNCATE, &rdata);
***************
*** 702,739 **** void
  clog_redo(XLogRecPtr lsn, XLogRecord *record)
  {
  	uint8		info = record->xl_info & ~XLR_INFO_MASK;
  
  	/* Backup blocks are not used in clog records */
  	Assert(!(record->xl_info & XLR_BKP_BLOCK_MASK));
  
  	if (info == CLOG_ZEROPAGE)
  	{
- 		int			pageno;
  		int			slotno;
  
! 		memcpy(&pageno, XLogRecGetData(record), sizeof(int));
  
! 		LWLockAcquire(CLogControlLock, LW_EXCLUSIVE);
  
! 		slotno = ZeroCLOGPage(pageno, false);
! 		SimpleLruWritePage(ClogCtl, slotno);
! 		Assert(!ClogCtl->shared->page_dirty[slotno]);
  
! 		LWLockRelease(CLogControlLock);
  	}
  	else if (info == CLOG_TRUNCATE)
  	{
! 		int			pageno;
  
! 		memcpy(&pageno, XLogRecGetData(record), sizeof(int));
  
  		/*
  		 * During XLOG replay, latest_page_number isn't set up yet; insert a
  		 * suitable value to bypass the sanity test in SimpleLruTruncate.
  		 */
! 		ClogCtl->shared->latest_page_number = pageno;
  
! 		SimpleLruTruncate(ClogCtl, pageno);
  	}
  	else
  		elog(PANIC, "clog_redo: unknown op code %u", info);
--- 788,828 ----
  clog_redo(XLogRecPtr lsn, XLogRecord *record)
  {
  	uint8		info = record->xl_info & ~XLR_INFO_MASK;
+ 	xl_clog_page	cpage;
+ 	SlruCtlData *ClogCtlP;
  
  	/* Backup blocks are not used in clog records */
  	Assert(!(record->xl_info & XLR_BKP_BLOCK_MASK));
  
  	if (info == CLOG_ZEROPAGE)
  	{
  		int			slotno;
  
! 		memcpy(&cpage, XLogRecGetData(record), sizeof(xl_clog_page));
! 
! 		ClogCtlP = &ClogCtl[cpage.partition];
  
! 		LWLockAcquire(ClogCtlP->shared->ControlLock, LW_EXCLUSIVE);
  
! 		slotno = ZeroCLOGPage(cpage.partition, cpage.pageno, false);
! 		SimpleLruWritePage(ClogCtlP, slotno);
! 		Assert(!ClogCtlP->shared->page_dirty[slotno]);
  
! 		LWLockRelease(ClogCtlP->shared->ControlLock);
  	}
  	else if (info == CLOG_TRUNCATE)
  	{
! 		memcpy(&cpage, XLogRecGetData(record), sizeof(xl_clog_page));
  
! 		ClogCtlP = &ClogCtl[cpage.partition];
  
  		/*
  		 * During XLOG replay, latest_page_number isn't set up yet; insert a
  		 * suitable value to bypass the sanity test in SimpleLruTruncate.
  		 */
! 		ClogCtlP->shared->latest_page_number = cpage.pageno;
  
! 		SimpleLruTruncate(ClogCtlP, cpage.pageno);
  	}
  	else
  		elog(PANIC, "clog_redo: unknown op code %u", info);
***************
*** 743,762 **** void
  clog_desc(StringInfo buf, uint8 xl_info, char *rec)
  {
  	uint8		info = xl_info & ~XLR_INFO_MASK;
  
  	if (info == CLOG_ZEROPAGE)
  	{
! 		int			pageno;
! 
! 		memcpy(&pageno, rec, sizeof(int));
! 		appendStringInfo(buf, "zeropage: %d", pageno);
  	}
  	else if (info == CLOG_TRUNCATE)
  	{
! 		int			pageno;
! 
! 		memcpy(&pageno, rec, sizeof(int));
! 		appendStringInfo(buf, "truncate before: %d", pageno);
  	}
  	else
  		appendStringInfo(buf, "UNKNOWN");
--- 832,848 ----
  clog_desc(StringInfo buf, uint8 xl_info, char *rec)
  {
  	uint8		info = xl_info & ~XLR_INFO_MASK;
+ 	xl_clog_page	cpage;
  
  	if (info == CLOG_ZEROPAGE)
  	{
! 		memcpy(&cpage, rec, sizeof(xl_clog_page));
! 		appendStringInfo(buf, "zeropage: partition %d page %d", cpage.partition, cpage.pageno);
  	}
  	else if (info == CLOG_TRUNCATE)
  	{
! 		memcpy(&cpage, rec, sizeof(xl_clog_page));
! 		appendStringInfo(buf, "truncate before: partition %d page %d", cpage.partition, cpage.pageno);
  	}
  	else
  		appendStringInfo(buf, "UNKNOWN");
*** a/src/backend/access/transam/multixact.c
--- b/src/backend/access/transam/multixact.c
***************
*** 1392,1399 **** MultiXactShmemSize(void)
  			 mul_size(sizeof(MultiXactId) * 2, MaxOldestSlot))
  
  	size = SHARED_MULTIXACT_STATE_SIZE;
! 	size = add_size(size, SimpleLruShmemSize(NUM_MXACTOFFSET_BUFFERS, 0));
! 	size = add_size(size, SimpleLruShmemSize(NUM_MXACTMEMBER_BUFFERS, 0));
  
  	return size;
  }
--- 1392,1399 ----
  			 mul_size(sizeof(MultiXactId) * 2, MaxOldestSlot))
  
  	size = SHARED_MULTIXACT_STATE_SIZE;
! 	size = add_size(size, SimpleLruShmemSize(1, NUM_MXACTOFFSET_BUFFERS, 0));
! 	size = add_size(size, SimpleLruShmemSize(1, NUM_MXACTMEMBER_BUFFERS, 0));
  
  	return size;
  }
***************
*** 1409,1418 **** MultiXactShmemInit(void)
  	MultiXactMemberCtl->PagePrecedes = MultiXactMemberPagePrecedes;
  
  	SimpleLruInit(MultiXactOffsetCtl,
! 				  "MultiXactOffset Ctl", NUM_MXACTOFFSET_BUFFERS, 0,
  				  MultiXactOffsetControlLock, "pg_multixact/offsets");
  	SimpleLruInit(MultiXactMemberCtl,
! 				  "MultiXactMember Ctl", NUM_MXACTMEMBER_BUFFERS, 0,
  				  MultiXactMemberControlLock, "pg_multixact/members");
  
  	/* Initialize our shared state struct */
--- 1409,1418 ----
  	MultiXactMemberCtl->PagePrecedes = MultiXactMemberPagePrecedes;
  
  	SimpleLruInit(MultiXactOffsetCtl,
! 				  "MultiXactOffset Ctl", 1, NUM_MXACTOFFSET_BUFFERS, 0,
  				  MultiXactOffsetControlLock, "pg_multixact/offsets");
  	SimpleLruInit(MultiXactMemberCtl,
! 				  "MultiXactMember Ctl", 1, NUM_MXACTMEMBER_BUFFERS, 0,
  				  MultiXactMemberControlLock, "pg_multixact/members");
  
  	/* Initialize our shared state struct */
*** a/src/backend/access/transam/slru.c
--- b/src/backend/access/transam/slru.c
***************
*** 140,146 **** static bool SlruScanDirCbDeleteCutoff(SlruCtl ctl, char *filename,
   */
  
  Size
! SimpleLruShmemSize(int nslots, int nlsns)
  {
  	Size		sz;
  
--- 140,146 ----
   */
  
  Size
! SimpleLruShmemSize(int npartitions, int nslots, int nlsns)
  {
  	Size		sz;
  
***************
*** 156,237 **** SimpleLruShmemSize(int nslots, int nlsns)
  	if (nlsns > 0)
  		sz += MAXALIGN(nslots * nlsns * sizeof(XLogRecPtr));	/* group_lsn[] */
  
! 	return BUFFERALIGN(sz) + BLCKSZ * nslots;
  }
  
  void
! SimpleLruInit(SlruCtl ctl, const char *name, int nslots, int nlsns,
  			  LWLockId ctllock, const char *subdir)
  {
! 	SlruShared	shared;
  	bool		found;
  
! 	shared = (SlruShared) ShmemInitStruct(name,
! 										  SimpleLruShmemSize(nslots, nlsns),
  										  &found);
  
! 	if (!IsUnderPostmaster)
  	{
! 		/* Initialize locks and shared memory area */
! 		char	   *ptr;
! 		Size		offset;
! 		int			slotno;
  
! 		Assert(!found);
  
! 		memset(shared, 0, sizeof(SlruSharedData));
  
! 		shared->ControlLock = ctllock;
  
! 		shared->num_slots = nslots;
! 		shared->lsn_groups_per_page = nlsns;
  
! 		shared->cur_lru_count = 0;
  
! 		/* shared->latest_page_number will be set later */
  
! 		ptr = (char *) shared;
! 		offset = MAXALIGN(sizeof(SlruSharedData));
! 		shared->page_buffer = (char **) (ptr + offset);
! 		offset += MAXALIGN(nslots * sizeof(char *));
! 		shared->page_status = (SlruPageStatus *) (ptr + offset);
! 		offset += MAXALIGN(nslots * sizeof(SlruPageStatus));
! 		shared->page_dirty = (bool *) (ptr + offset);
! 		offset += MAXALIGN(nslots * sizeof(bool));
! 		shared->page_number = (int *) (ptr + offset);
! 		offset += MAXALIGN(nslots * sizeof(int));
! 		shared->page_lru_count = (int *) (ptr + offset);
! 		offset += MAXALIGN(nslots * sizeof(int));
! 		shared->buffer_locks = (LWLockId *) (ptr + offset);
! 		offset += MAXALIGN(nslots * sizeof(LWLockId));
  
! 		if (nlsns > 0)
! 		{
! 			shared->group_lsn = (XLogRecPtr *) (ptr + offset);
! 			offset += MAXALIGN(nslots * nlsns * sizeof(XLogRecPtr));
! 		}
  
! 		ptr += BUFFERALIGN(offset);
! 		for (slotno = 0; slotno < nslots; slotno++)
! 		{
! 			shared->page_buffer[slotno] = ptr;
! 			shared->page_status[slotno] = SLRU_PAGE_EMPTY;
! 			shared->page_dirty[slotno] = false;
! 			shared->page_lru_count[slotno] = 0;
! 			shared->buffer_locks[slotno] = LWLockAssign();
! 			ptr += BLCKSZ;
  		}
! 	}
! 	else
! 		Assert(found);
  
! 	/*
! 	 * Initialize the unshared control struct, including directory path. We
! 	 * assume caller set PagePrecedes.
! 	 */
! 	ctl->shared = shared;
! 	ctl->do_fsync = true;		/* default behavior */
! 	StrNCpy(ctl->Dir, subdir, sizeof(ctl->Dir));
  }
  
  /*
--- 156,252 ----
  	if (nlsns > 0)
  		sz += MAXALIGN(nslots * nlsns * sizeof(XLogRecPtr));	/* group_lsn[] */
  
! 	return npartitions * (BUFFERALIGN(sz) + BLCKSZ * nslots);
  }
  
  void
! SimpleLruInit(SlruCtl ctl, const char *name, int npartitions, int nslots, int nlsns,
  			  LWLockId ctllock, const char *subdir)
  {
! 	SlruShared	slrushared;
  	bool		found;
+ 	int			partition;
  
! 	slrushared = (SlruShared) ShmemInitStruct(name,
! 										  SimpleLruShmemSize(npartitions, nslots, nlsns),
  										  &found);
  
! 	for (partition = 0; partition < npartitions; partition++)
  	{
! 		SlruShared	shared = slrushared;
  
! 		if (!IsUnderPostmaster)
! 		{
! 			/* Initialize locks and shared memory area */
! 			char	   *ptr;
! 			Size		offset;
! 			int			slotno;
! 			SlruShared	shared;
  
! 			shared = slrushared + partition;
  
! 			Assert(!found);
  
! 			memset(shared, 0, sizeof(SlruSharedData));
  
! 			shared->ControlLock = ctllock + partition;
  
! 			shared->num_slots = nslots;
! 			shared->lsn_groups_per_page = nlsns;
  
! 			shared->cur_lru_count = 0;
  
! 			/* shared->latest_page_number will be set later */
  
! 			ptr = (char *) shared;
! 			offset = MAXALIGN(sizeof(SlruSharedData));
! 			shared->page_buffer = (char **) (ptr + offset);
! 			offset += MAXALIGN(nslots * sizeof(char *));
! 			shared->page_status = (SlruPageStatus *) (ptr + offset);
! 			offset += MAXALIGN(nslots * sizeof(SlruPageStatus));
! 			shared->page_dirty = (bool *) (ptr + offset);
! 			offset += MAXALIGN(nslots * sizeof(bool));
! 			shared->page_number = (int *) (ptr + offset);
! 			offset += MAXALIGN(nslots * sizeof(int));
! 			shared->page_lru_count = (int *) (ptr + offset);
! 			offset += MAXALIGN(nslots * sizeof(int));
! 			shared->buffer_locks = (LWLockId *) (ptr + offset);
! 			offset += MAXALIGN(nslots * sizeof(LWLockId));
! 
! 			if (nlsns > 0)
! 			{
! 				shared->group_lsn = (XLogRecPtr *) (ptr + offset);
! 				offset += MAXALIGN(nslots * nlsns * sizeof(XLogRecPtr));
! 			}
! 
! 			ptr += BUFFERALIGN(offset);
! 			for (slotno = 0; slotno < nslots; slotno++)
! 			{
! 				shared->page_buffer[slotno] = ptr;
! 				shared->page_status[slotno] = SLRU_PAGE_EMPTY;
! 				shared->page_dirty[slotno] = false;
! 				shared->page_lru_count[slotno] = 0;
! 				shared->buffer_locks[slotno] = LWLockAssign();
! 				ptr += BLCKSZ;
! 			}
  		}
! 		else
! 			Assert(found);
  
! 		/*
! 		 * Initialize the unshared control struct, including directory path. We
! 		 * assume caller set PagePrecedes.
! 		 */
! 		ctl->shared = shared;
! 		ctl->do_fsync = true;		/* default behavior */
! 		if (npartitions == 1)
! 			sprintf(ctl->Dir, "%s", subdir);
! 		else
! 			sprintf(ctl->Dir, "%s/%i", subdir, partition);
! 
! 		shared++;
! 		ctl++;
! 	}
  }
  
  /*
*** a/src/backend/access/transam/subtrans.c
--- b/src/backend/access/transam/subtrans.c
***************
*** 171,184 **** SubTransGetTopmostTransaction(TransactionId xid)
  Size
  SUBTRANSShmemSize(void)
  {
! 	return SimpleLruShmemSize(NUM_SUBTRANS_BUFFERS, 0);
  }
  
  void
  SUBTRANSShmemInit(void)
  {
  	SubTransCtl->PagePrecedes = SubTransPagePrecedes;
! 	SimpleLruInit(SubTransCtl, "SUBTRANS Ctl", NUM_SUBTRANS_BUFFERS, 0,
  				  SubtransControlLock, "pg_subtrans");
  	/* Override default assumption that writes should be fsync'd */
  	SubTransCtl->do_fsync = false;
--- 171,184 ----
  Size
  SUBTRANSShmemSize(void)
  {
! 	return SimpleLruShmemSize(1, NUM_SUBTRANS_BUFFERS, 0);
  }
  
  void
  SUBTRANSShmemInit(void)
  {
  	SubTransCtl->PagePrecedes = SubTransPagePrecedes;
! 	SimpleLruInit(SubTransCtl, "SUBTRANS Ctl", 1, NUM_SUBTRANS_BUFFERS, 0,
  				  SubtransControlLock, "pg_subtrans");
  	/* Override default assumption that writes should be fsync'd */
  	SubTransCtl->do_fsync = false;
*** a/src/backend/commands/async.c
--- b/src/backend/commands/async.c
***************
*** 422,428 **** AsyncShmemSize(void)
  	size = mul_size(MaxBackends, sizeof(QueueBackendStatus));
  	size = add_size(size, sizeof(AsyncQueueControl));
  
! 	size = add_size(size, SimpleLruShmemSize(NUM_ASYNC_BUFFERS, 0));
  
  	return size;
  }
--- 422,428 ----
  	size = mul_size(MaxBackends, sizeof(QueueBackendStatus));
  	size = add_size(size, sizeof(AsyncQueueControl));
  
! 	size = add_size(size, SimpleLruShmemSize(1, NUM_ASYNC_BUFFERS, 0));
  
  	return size;
  }
***************
*** 470,476 **** AsyncShmemInit(void)
  	 * Set up SLRU management of the pg_notify data.
  	 */
  	AsyncCtl->PagePrecedes = asyncQueuePagePrecedes;
! 	SimpleLruInit(AsyncCtl, "Async Ctl", NUM_ASYNC_BUFFERS, 0,
  				  AsyncCtlLock, "pg_notify");
  	/* Override default assumption that writes should be fsync'd */
  	AsyncCtl->do_fsync = false;
--- 470,476 ----
  	 * Set up SLRU management of the pg_notify data.
  	 */
  	AsyncCtl->PagePrecedes = asyncQueuePagePrecedes;
! 	SimpleLruInit(AsyncCtl, "Async Ctl", 1, NUM_ASYNC_BUFFERS, 0,
  				  AsyncCtlLock, "pg_notify");
  	/* Override default assumption that writes should be fsync'd */
  	AsyncCtl->do_fsync = false;
*** a/src/backend/storage/lmgr/predicate.c
--- b/src/backend/storage/lmgr/predicate.c
***************
*** 788,794 **** OldSerXidInit(void)
  	 */
  	OldSerXidSlruCtl->PagePrecedes = OldSerXidPagePrecedesLogically;
  	SimpleLruInit(OldSerXidSlruCtl, "OldSerXid SLRU Ctl",
! 				  NUM_OLDSERXID_BUFFERS, 0, OldSerXidLock, "pg_serial");
  	/* Override default assumption that writes should be fsync'd */
  	OldSerXidSlruCtl->do_fsync = false;
  
--- 788,794 ----
  	 */
  	OldSerXidSlruCtl->PagePrecedes = OldSerXidPagePrecedesLogically;
  	SimpleLruInit(OldSerXidSlruCtl, "OldSerXid SLRU Ctl",
! 				  1, NUM_OLDSERXID_BUFFERS, 0, OldSerXidLock, "pg_serial");
  	/* Override default assumption that writes should be fsync'd */
  	OldSerXidSlruCtl->do_fsync = false;
  
***************
*** 1334,1340 **** PredicateLockShmemSize(void)
  
  	/* Shared memory structures for SLRU tracking of old committed xids. */
  	size = add_size(size, sizeof(OldSerXidControlData));
! 	size = add_size(size, SimpleLruShmemSize(NUM_OLDSERXID_BUFFERS, 0));
  
  	return size;
  }
--- 1334,1340 ----
  
  	/* Shared memory structures for SLRU tracking of old committed xids. */
  	size = add_size(size, sizeof(OldSerXidControlData));
! 	size = add_size(size, SimpleLruShmemSize(1, NUM_OLDSERXID_BUFFERS, 0));
  
  	return size;
  }
*** a/src/include/access/clog.h
--- b/src/include/access/clog.h
***************
*** 30,35 **** typedef int XidStatus;
--- 30,36 ----
  
  /* Number of SLRU buffers to use for clog */
  #define NUM_CLOG_BUFFERS	8
+ #define NUM_CLOG_PARTITIONS	8
  
  
  extern void TransactionIdSetTreeStatus(TransactionId xid, int nsubxids,
*** a/src/include/access/slru.h
--- b/src/include/access/slru.h
***************
*** 134,141 **** typedef struct SlruCtlData
  typedef SlruCtlData *SlruCtl;
  
  
! extern Size SimpleLruShmemSize(int nslots, int nlsns);
! extern void SimpleLruInit(SlruCtl ctl, const char *name, int nslots, int nlsns,
  			  LWLockId ctllock, const char *subdir);
  extern int	SimpleLruZeroPage(SlruCtl ctl, int pageno);
  extern int SimpleLruReadPage(SlruCtl ctl, int pageno, bool write_ok,
--- 134,141 ----
  typedef SlruCtlData *SlruCtl;
  
  
! extern Size SimpleLruShmemSize(int npartitions, int nslots, int nlsns);
! extern void SimpleLruInit(SlruCtl ctl, const char *name, int npartitions, int nslots, int nlsns,
  			  LWLockId ctllock, const char *subdir);
  extern int	SimpleLruZeroPage(SlruCtl ctl, int pageno);
  extern int SimpleLruReadPage(SlruCtl ctl, int pageno, bool write_ok,
*** a/src/include/storage/lwlock.h
--- b/src/include/storage/lwlock.h
***************
*** 21,26 ****
--- 21,29 ----
   */
  
  /* Number of partitions of the shared buffer mapping hashtable */
+ #define NUM_CLOG_PARTITIONS  8
+ 
+ /* Number of partitions of the shared buffer mapping hashtable */
  #define NUM_BUFFER_PARTITIONS  16
  
  /* Number of partitions the shared lock tables are divided into */
***************
*** 57,63 **** typedef enum LWLockId
  	WALWriteLock,
  	ControlFileLock,
  	CheckpointLock,
! 	CLogControlLock,
  	SubtransControlLock,
  	MultiXactGenLock,
  	MultiXactOffsetControlLock,
--- 60,66 ----
  	WALWriteLock,
  	ControlFileLock,
  	CheckpointLock,
! 	CLogControlLock_NowUnused,
  	SubtransControlLock,
  	MultiXactGenLock,
  	MultiXactOffsetControlLock,
***************
*** 82,88 **** typedef enum LWLockId
  	/* Individual lock IDs end here */
  	FirstBufMappingLock,
  	FirstLockMgrLock = FirstBufMappingLock + NUM_BUFFER_PARTITIONS,
! 	FirstPredicateLockMgrLock = FirstLockMgrLock + NUM_LOCK_PARTITIONS,
  
  	/* must be last except for MaxDynamicLWLock: */
  	NumFixedLWLocks = FirstPredicateLockMgrLock + NUM_PREDICATELOCK_PARTITIONS,
--- 85,92 ----
  	/* Individual lock IDs end here */
  	FirstBufMappingLock,
  	FirstLockMgrLock = FirstBufMappingLock + NUM_BUFFER_PARTITIONS,
! 	FirstClogControlLock = FirstLockMgrLock + NUM_LOCK_PARTITIONS,
! 	FirstPredicateLockMgrLock = FirstLockMgrLock + NUM_CLOG_PARTITIONS,
  
  	/* must be last except for MaxDynamicLWLock: */
  	NumFixedLWLocks = FirstPredicateLockMgrLock + NUM_PREDICATELOCK_PARTITIONS,
