diff -rcN postgresql-CVS-01-21.org/src/backend/access/gin/ginxlog.c postgresql-CVS-01-21/src/backend/access/gin/ginxlog.c
*** postgresql-CVS-01-21.org/src/backend/access/gin/ginxlog.c	2009-01-21 10:39:04.000000000 +0900
--- postgresql-CVS-01-21/src/backend/access/gin/ginxlog.c	2009-01-21 10:35:03.000000000 +0900
***************
*** 14,19 ****
--- 14,20 ----
  #include "postgres.h"
  
  #include "access/gin.h"
+ #include "access/xlog.h"
  #include "access/xlogutils.h"
  #include "storage/bufmgr.h"
  #include "utils/memutils.h"
***************
*** 521,526 ****
--- 522,631 ----
  	}
  }
  
+ /*
+  *    gin_readahead  - enqueue information about data pages
+  *
+  * The readahead module stores information about pages that are modified through
+  * redo-ing record.
+  *
+  */
+ bool
+ gin_readahead(XLogRecPtr lsn, XLogRecord *record)
+ {
+ 	uint8 info = record->xl_info & ~XLR_INFO_MASK;
+ 
+ 	Assert(record);
+ 
+ 	switch (info)
+ 	{
+ 		case XLOG_GIN_CREATE_INDEX:
+ 			{
+ 				RelFileNode *node = (RelFileNode *) XLogRecGetData(record);
+ 
+ 				if (!ReadAheadHasRoom(1))
+ 					return false;
+ 				ReadAheadAddEntry(*node, GIN_ROOT_BLKNO, lsn.xrecoff, false);
+ 				break;
+ 			}
+ 		case XLOG_GIN_CREATE_PTREE:
+ 			{
+ 				ginxlogCreatePostingTree *data =
+ 					(ginxlogCreatePostingTree *) XLogRecGetData(record);
+ 
+ 				if (!ReadAheadHasRoom(1))
+ 					return false;
+ 				ReadAheadAddEntry(data->node, data->blkno, lsn.xrecoff, false);
+ 				break;
+ 			}
+ 		case XLOG_GIN_INSERT:
+ 			{
+ 				ginxlogInsert *data = (ginxlogInsert *) XLogRecGetData(record);
+ 
+ 				if (!ReadAheadHasRoom(1))
+ 					return false;
+ 				ReadAheadAddEntry(data->node, data->blkno,
+ 					lsn.xrecoff, record->xl_info & XLR_BKP_BLOCK_1);
+ 				break;
+ 			}
+ 		case XLOG_GIN_SPLIT:
+ 			{
+ 				int readahead_cnt;
+ 				ginxlogSplit *data = (ginxlogSplit *) XLogRecGetData(record);
+ 
+ 				readahead_cnt = 2;
+ 				if (data->isRootSplit)
+ 					readahead_cnt++;
+ 
+ 				if (!ReadAheadHasRoom(readahead_cnt))
+ 					return false;
+ 
+ 				ReadAheadAddEntry(data->node, data->lblkno, lsn.xrecoff, false);
+ 				ReadAheadAddEntry(data->node, data->rblkno, lsn.xrecoff, false);
+ 				if (data->isRootSplit)
+ 				{
+ 					ReadAheadAddEntry(data->node, data->rootBlkno,
+ 						lsn.xrecoff, false);
+ 				}
+ 				break;
+ 			}
+ 		case XLOG_GIN_VACUUM_PAGE:
+ 			{
+ 				ginxlogVacuumPage *data =
+ 					(ginxlogVacuumPage *) XLogRecGetData(record);
+ 
+ 				if (!ReadAheadHasRoom(1))
+ 					return false;
+ 				ReadAheadAddEntry(data->node, data->blkno,
+ 					lsn.xrecoff, record->xl_info & XLR_BKP_BLOCK_1);
+ 				break;
+ 			}
+ 		case XLOG_GIN_DELETE_PAGE:
+ 			{
+ 				int readahead_cnt;
+ 				ginxlogDeletePage *data =
+ 					(ginxlogDeletePage *) XLogRecGetData(record);
+ 				readahead_cnt = 2;
+ 				if (data->leftBlkno != InvalidBlockNumber)
+ 					readahead_cnt++;
+ 
+ 				if (!ReadAheadHasRoom(2))
+ 					return false;
+ 				ReadAheadAddEntry(data->node, data->blkno,
+ 					lsn.xrecoff, record->xl_info & XLR_BKP_BLOCK_1);
+ 				ReadAheadAddEntry(data->node, data->parentBlkno,
+ 					lsn.xrecoff, record->xl_info & XLR_BKP_BLOCK_2);
+ 				if (data->leftBlkno != InvalidBlockNumber)
+ 				{
+ 					ReadAheadAddEntry(data->node, data->leftBlkno,
+ 						lsn.xrecoff, record->xl_info & XLR_BKP_BLOCK_3);
+ 				}
+                         break;
+ 			}
+ 	}
+ 
+ 	return true;
+ }
+ 
  void
  gin_xlog_startup(void)
  {
diff -rcN postgresql-CVS-01-21.org/src/backend/access/gist/gistxlog.c postgresql-CVS-01-21/src/backend/access/gist/gistxlog.c
*** postgresql-CVS-01-21.org/src/backend/access/gist/gistxlog.c	2009-01-21 10:39:04.000000000 +0900
--- postgresql-CVS-01-21/src/backend/access/gist/gistxlog.c	2009-01-21 10:35:03.000000000 +0900
***************
*** 14,19 ****
--- 14,20 ----
  #include "postgres.h"
  
  #include "access/gist_private.h"
+ #include "access/xlog.h"
  #include "access/xlogutils.h"
  #include "miscadmin.h"
  #include "storage/bufmgr.h"
***************
*** 501,506 ****
--- 502,585 ----
  	}
  }
  
+ /*
+  *    gist_readahead   - enqueue information about data pages
+  *
+  * The readahead module stores information about pages that are modified through
+  * redo-ing record.
+  *
+  */
+ bool
+ gist_readahead(XLogRecPtr lsn, XLogRecord *record)
+ {
+ 	uint8 info = record->xl_info & ~XLR_INFO_MASK;
+ 
+ 	Assert(record);
+ 	
+ 	switch (info)
+ 	{
+ 		case XLOG_GIST_PAGE_UPDATE:
+ 		case XLOG_GIST_NEW_ROOT:
+ 			{
+ 				PageUpdateRecord xlrec;
+ 
+ 				decodePageUpdateRecord(&xlrec, record);
+ 
+ 				if (!ReadAheadHasRoom(1))
+ 					return false;
+ 				ReadAheadAddEntry(xlrec.data->node, xlrec.data->blkno,
+ 					lsn.xrecoff, record->xl_info & XLR_BKP_BLOCK_1);
+ 				break;
+ 			}
+ 		case XLOG_GIST_PAGE_SPLIT:
+ 			{
+ 				int i;
+ 
+ 				PageSplitRecord rec;
+ 				decodePageSplitRecord(&rec, record);
+ 
+ 				if (!ReadAheadHasRoom(rec.data->npage))
+ 					return false;
+ 				for (i = 0; i < rec.data->npage; i++)
+ 				{
+ 					ReadAheadAddEntry(rec.data->node, rec.page[i].header->blkno,
+ 						lsn.xrecoff, false);
+ 				}
+ 				break;
+ 			}
+ 		case XLOG_GIST_INSERT_COMPLETE:
+ 			{
+ 				/*
+ 				 * This WAL record never touch data page, so nothi ng
+ 				 * to do.
+ 				 */
+ 				break;
+ 			}
+ 		case XLOG_GIST_CREATE_INDEX:
+ 			{
+ 				RelFileNode *node = (RelFileNode *) XLogRecGetData(record);
+ 
+ 				if (!ReadAheadHasRoom(1))
+ 					return false;
+ 				ReadAheadAddEntry(*node, GIST_ROOT_BLKNO, lsn.xrecoff, false);
+ 				break;
+ 			}
+ 		case XLOG_GIST_PAGE_DELETE:
+ 			{
+ 				gistxlogPageDelete *xldata =
+ 					(gistxlogPageDelete *) XLogRecGetData(record);
+ 
+ 				if (!ReadAheadHasRoom(1))
+ 					return false;
+ 				ReadAheadAddEntry(xldata->node, xldata->blkno,
+ 					lsn.xrecoff, record->xl_info & XLR_BKP_BLOCK_1);
+ 				break;
+ 			}
+ 	}
+ 
+ 	return true;
+ }
+ 
  IndexTuple
  gist_form_invalid_tuple(BlockNumber blkno)
  {
diff -rcN postgresql-CVS-01-21.org/src/backend/access/heap/heapam.c postgresql-CVS-01-21/src/backend/access/heap/heapam.c
*** postgresql-CVS-01-21.org/src/backend/access/heap/heapam.c	2009-01-21 10:39:04.000000000 +0900
--- postgresql-CVS-01-21/src/backend/access/heap/heapam.c	2009-01-21 10:35:03.000000000 +0900
***************
*** 49,54 ****
--- 49,55 ----
  #include "access/valid.h"
  #include "access/visibilitymap.h"
  #include "access/xact.h"
+ #include "access/xlog.h"
  #include "access/xlogutils.h"
  #include "catalog/catalog.h"
  #include "catalog/namespace.h"
***************
*** 4975,4980 ****
--- 4976,5127 ----
  }
  
  /*
+  *	heap_readahead	- enqueue information about data pages
+  *
+  * The readahead module stores information about pages that are modified through
+  * redo-ing record.
+  *
+  */
+ bool
+ heap_readahead(XLogRecPtr lsn, XLogRecord *record)
+ {
+ 	uint8 info = record->xl_info & ~XLR_INFO_MASK;
+ 
+ 	Assert(record);
+ 
+ 	switch (info & XLOG_HEAP_OPMASK)
+ 	{
+ 		case XLOG_HEAP_INSERT:
+ 			{
+ 				xl_heap_insert *xlrec =
+ 					(xl_heap_insert *) XLogRecGetData(record);
+ 
+ 				if (!ReadAheadHasRoom(1))
+ 					return false;
+ 				ReadAheadAddEntry(xlrec->target.node,
+ 					ItemPointerGetBlockNumber(&xlrec->target.tid),
+ 					lsn.xrecoff, record->xl_info & XLR_BKP_BLOCK_1);
+ 				break;
+ 			}
+ 		case XLOG_HEAP_DELETE:
+ 			{
+ 				xl_heap_delete *xlrec =
+ 					(xl_heap_delete *) XLogRecGetData(record);
+ 
+ 				if (!ReadAheadHasRoom(1))
+ 					return false;
+ 				ReadAheadAddEntry(xlrec->target.node,
+ 					ItemPointerGetBlockNumber(&xlrec->target.tid),
+ 					lsn.xrecoff, record->xl_info & XLR_BKP_BLOCK_1);
+ 				break;
+ 			}
+ 		case XLOG_HEAP_UPDATE:
+ 		case XLOG_HEAP_MOVE:
+ 		case XLOG_HEAP_HOT_UPDATE:
+ 			{
+ 				bool samepage;
+ 				xl_heap_update *xlrec =
+ 					(xl_heap_update *) XLogRecGetData(record);
+ 
+ 				samepage = ItemPointerGetBlockNumber(&xlrec->newtid) ==
+ 					ItemPointerGetBlockNumber(&xlrec->target.tid);
+ 
+ 				if (!ReadAheadHasRoom(1 + (samepage ? 0 : 1)))
+ 					return false;
+ 				/* store page which contains updated tuple. */ 
+ 				ReadAheadAddEntry(xlrec->target.node,
+ 					ItemPointerGetBlockNumber(&xlrec->target.tid),
+ 					lsn.xrecoff, record->xl_info & XLR_BKP_BLOCK_1);
+ 				/* store another page if any. */ 
+ 				if (!samepage)
+ 					ReadAheadAddEntry(xlrec->target.node,
+ 						ItemPointerGetBlockNumber(&xlrec->newtid),
+ 						lsn.xrecoff, record->xl_info & XLR_BKP_BLOCK_2);
+ 				break;
+ 			}
+ 		case XLOG_HEAP_NEWPAGE:
+ 			{
+ 				xl_heap_newpage *xlrec =
+ 					(xl_heap_newpage *) XLogRecGetData(record);
+ 
+ 				if (!ReadAheadHasRoom(1))
+ 					return false;
+ 				ReadAheadAddEntry(xlrec->node, xlrec->blkno,
+ 					lsn.xrecoff, false);
+ 				break;
+ 			}
+ 		case XLOG_HEAP_LOCK:
+ 			{
+ 				xl_heap_lock *xlrec =
+ 					(xl_heap_lock *) XLogRecGetData(record);
+ 
+ 				if (!ReadAheadHasRoom(1))
+ 					return false;
+ 				ReadAheadAddEntry(xlrec->target.node, 
+ 					ItemPointerGetBlockNumber(&xlrec->target.tid),
+ 					lsn.xrecoff, record->xl_info & XLR_BKP_BLOCK_1);
+ 				break;
+ 			}
+ 		case XLOG_HEAP_INPLACE:
+ 			{
+ 				xl_heap_inplace *xlrec =
+ 					(xl_heap_inplace *) XLogRecGetData(record);
+ 
+ 				if (!ReadAheadHasRoom(1))
+ 					return false;
+ 				ReadAheadAddEntry(xlrec->target.node,
+ 					ItemPointerGetBlockNumber(&xlrec->target.tid),
+ 					lsn.xrecoff, record->xl_info & XLR_BKP_BLOCK_1);
+ 				break;
+ 			}
+ 	}
+ 
+ 	return true;
+ }
+ 
+ /*
+  *	heap2_readahead	- enqueue information about data pages
+  *
+  * The readahead module stores information about pages that are modified through
+  * redo-ing record.
+  *
+  */
+ bool
+ heap2_readahead(XLogRecPtr lsn, XLogRecord *record)
+ {
+ 	Assert(record);
+ 
+ 	switch (record->xl_info)
+ 	{
+ 		case XLOG_HEAP2_FREEZE:
+ 			{
+ 				xl_heap_freeze *xlrec =
+ 					(xl_heap_freeze *) XLogRecGetData(record);
+ 
+ 				if (!ReadAheadHasRoom(1))
+ 					return false;
+ 				ReadAheadAddEntry(xlrec->node, xlrec->block,
+ 					lsn.xrecoff, record->xl_info & XLR_BKP_BLOCK_1);
+ 				break;
+ 			}
+ 		case XLOG_HEAP2_CLEAN:
+ 		case XLOG_HEAP2_CLEAN_MOVE:
+ 			{
+ 				xl_heap_clean *xlrec =
+ 					(xl_heap_clean *) XLogRecGetData(record);
+ 
+ 				if (!ReadAheadHasRoom(1))
+ 					return false;
+ 				ReadAheadAddEntry(xlrec->node, xlrec->block,
+ 					lsn.xrecoff, record->xl_info & XLR_BKP_BLOCK_1);
+ 				break;
+ 			}
+ 	}
+ 
+ 	return true;
+ }
+ 
+ /*
   *	heap_sync		- sync a heap, for use when no WAL has been written
   *
   * This forces the heap contents (including TOAST heap if any) down to disk.
diff -rcN postgresql-CVS-01-21.org/src/backend/access/nbtree/nbtxlog.c postgresql-CVS-01-21/src/backend/access/nbtree/nbtxlog.c
*** postgresql-CVS-01-21.org/src/backend/access/nbtree/nbtxlog.c	2009-01-21 10:39:04.000000000 +0900
--- postgresql-CVS-01-21/src/backend/access/nbtree/nbtxlog.c	2009-01-21 10:35:03.000000000 +0900
***************
*** 16,21 ****
--- 16,22 ----
  
  #include "access/nbtree.h"
  #include "access/transam.h"
+ #include "access/xlog.h"
  #include "storage/bufmgr.h"
  
  /*
***************
*** 880,885 ****
--- 881,1016 ----
  	}
  }
  
+ /*
+  * btree_readahead	- enqueue information about data pages
+  *
+  */
+ bool
+ btree_readahead(XLogRecPtr lsn, XLogRecord *record)
+ {
+ 	uint8 info = record->xl_info & ~XLR_INFO_MASK;
+ 	
+ 	Assert(record);
+ 
+ 	switch (info)
+ 	{
+ 		case XLOG_BTREE_INSERT_LEAF:
+ 		case XLOG_BTREE_INSERT_UPPER:
+ 		case XLOG_BTREE_INSERT_META:
+ 			{
+ 				int readahead_cnt;
+ 				xl_btree_insert *xlrec =
+ 					(xl_btree_insert *) XLogRecGetData(record);
+ 
+ 				readahead_cnt = 1;
+ 				if (info == XLOG_BTREE_INSERT_META)
+ 					readahead_cnt++;
+ 
+ 				if (!ReadAheadHasRoom(readahead_cnt))
+ 					return false;
+ 				ReadAheadAddEntry(xlrec->target.node,
+ 					BlockIdGetBlockNumber(&xlrec->target.tid.ip_blkid),
+ 					lsn.xrecoff, record->xl_info & XLR_BKP_BLOCK_1);
+ 				if (info == XLOG_BTREE_INSERT_META)
+ 				{
+ 					ReadAheadAddEntry(xlrec->target.node,
+ 						BTREE_METAPAGE, lsn.xrecoff, false);
+ 				}
+ 				break;
+ 			}
+ 		case XLOG_BTREE_SPLIT_L:
+ 		case XLOG_BTREE_SPLIT_L_ROOT:
+ 		case XLOG_BTREE_SPLIT_R:
+ 		case XLOG_BTREE_SPLIT_R_ROOT:
+ 			{
+ 				int readahead_cnt;
+ 				xl_btree_split *xlrec =
+ 					(xl_btree_split *) XLogRecGetData(record);
+ 
+ 				readahead_cnt = 2;
+ 				if (xlrec->rnext != P_NONE)
+ 					readahead_cnt++;
+ 
+ 				if (!ReadAheadHasRoom(readahead_cnt))
+ 					return false;
+ 
+ 				ReadAheadAddEntry(xlrec->node, xlrec->rightsib,
+ 					lsn.xrecoff, false);
+ 				ReadAheadAddEntry(xlrec->node, xlrec->leftsib,
+ 					lsn.xrecoff, record->xl_info & XLR_BKP_BLOCK_1);
+ 				if (xlrec->rnext != P_NONE)
+ 				{
+ 					ReadAheadAddEntry(xlrec->node, xlrec->rnext,
+ 						lsn.xrecoff, record->xl_info & XLR_BKP_BLOCK_2);
+ 				}
+ 				break;
+ 			}
+ 		case XLOG_BTREE_DELETE:
+ 			{
+ 				xl_btree_delete *xlrec =
+ 					(xl_btree_delete *) XLogRecGetData(record);
+ 
+ 				if (!ReadAheadHasRoom(1))
+ 					return false;
+ 				ReadAheadAddEntry(xlrec->node, xlrec->block,
+ 					lsn.xrecoff, record->xl_info & XLR_BKP_BLOCK_1);
+ 				break;
+ 			}
+ 		case XLOG_BTREE_DELETE_PAGE:
+ 		case XLOG_BTREE_DELETE_PAGE_META:
+ 		case XLOG_BTREE_DELETE_PAGE_HALF:
+ 			{
+ 				int readahead_cnt;
+ 				xl_btree_delete_page *xlrec =
+ 					(xl_btree_delete_page *) XLogRecGetData(record);
+ 
+ 				readahead_cnt = 3;
+ 				if (info == XLOG_BTREE_DELETE_PAGE_META)
+ 					readahead_cnt++;
+ 				if (xlrec->leftblk != P_NONE)
+ 					readahead_cnt++;
+ 
+ 				/* parent page */
+ 				ReadAheadAddEntry(xlrec->target.node,
+ 					ItemPointerGetBlockNumber(&(xlrec->target.tid)),
+ 					lsn.xrecoff, record->xl_info & XLR_BKP_BLOCK_1);
+ 				/* rightsib page */
+ 				ReadAheadAddEntry(xlrec->target.node, xlrec->rightblk,
+ 						lsn.xrecoff, record->xl_info & XLR_BKP_BLOCK_2);
+ 				/* leftsib page, if exists */
+ 				if (xlrec->leftblk != P_NONE)
+ 				{
+ 					ReadAheadAddEntry(xlrec->target.node, xlrec->leftblk,
+ 						lsn.xrecoff, record->xl_info & XLR_BKP_BLOCK_3);
+ 				}
+ 				/* target page */
+ 				ReadAheadAddEntry(xlrec->target.node,
+ 					xlrec->deadblk, lsn.xrecoff, false);
+ 				/* metapage, if exists */
+ 				if (info == XLOG_BTREE_DELETE_PAGE_META)
+ 				{
+ 					ReadAheadAddEntry(xlrec->target.node,
+ 						BTREE_METAPAGE, lsn.xrecoff, false);
+ 				}
+ 				break;
+ 			}
+ 		case XLOG_BTREE_NEWROOT:
+ 			{
+ 				xl_btree_newroot *xlrec =
+ 					(xl_btree_newroot *) XLogRecGetData(record);
+ 
+ 				if (!ReadAheadHasRoom(1))
+ 					return false;
+ 				/* FPW does not exists. */
+ 				ReadAheadAddEntry(xlrec->node, xlrec->rootblk,
+ 					lsn.xrecoff, false);
+ 				break;
+ 			}
+ 	}
+ 
+ 	return true;
+ }
+ 
  void
  btree_xlog_startup(void)
  {
diff -rcN postgresql-CVS-01-21.org/src/backend/access/transam/Makefile postgresql-CVS-01-21/src/backend/access/transam/Makefile
*** postgresql-CVS-01-21.org/src/backend/access/transam/Makefile	2009-01-21 10:39:04.000000000 +0900
--- postgresql-CVS-01-21/src/backend/access/transam/Makefile	2009-01-21 10:35:03.000000000 +0900
***************
*** 12,18 ****
  top_builddir = ../../../..
  include $(top_builddir)/src/Makefile.global
  
! OBJS = clog.o transam.o varsup.o xact.o xlog.o xlogutils.o rmgr.o slru.o subtrans.o multixact.o twophase.o twophase_rmgr.o
  
  include $(top_srcdir)/src/backend/common.mk
  
--- 12,18 ----
  top_builddir = ../../../..
  include $(top_builddir)/src/Makefile.global
  
! OBJS = clog.o transam.o varsup.o xact.o xlog.o xlogutils.o rmgr.o slru.o subtrans.o multixact.o twophase.o twophase_rmgr.o readahead.o
  
  include $(top_srcdir)/src/backend/common.mk
  
diff -rcN postgresql-CVS-01-21.org/src/backend/access/transam/readahead.c postgresql-CVS-01-21/src/backend/access/transam/readahead.c
*** postgresql-CVS-01-21.org/src/backend/access/transam/readahead.c	1970-01-01 09:00:00.000000000 +0900
--- postgresql-CVS-01-21/src/backend/access/transam/readahead.c	2009-01-21 10:35:03.000000000 +0900
***************
*** 0 ****
--- 1,231 ----
+ /*-------------------------------------------------------------------------
+  *
+  * readahead.c
+  *		Store information of data pages which should be read ahead.
+  *
+  * Original coding 2008, Koichi Suzuki.
+  *
+  * Portions Copyright (c) 1998-2009, PostgreSQL Global Development Group
+  *
+  *-------------------------------------------------------------------------
+  */
+ 
+ #include <unistd.h>
+ #include <sys/types.h>
+ #include <sys/stat.h>
+ #include <fcntl.h>
+ 
+ #include "postgres.h"
+ #include "access/xlog_internal.h"
+ #include "catalog/catalog.h"
+ #include "storage/relfilenode.h"
+ #include "storage/block.h"
+ #include "storage/smgr.h"
+ 
+ /*
+  * Information about the data page which will be read ahead.
+  */
+ struct XLogReadAhead {
+ 	/*
+ 	 * The physical location of the data page.
+ 	 */
+ 	RelFileNode node;
+ 	BlockNumber blkno;
+ 
+ 	/*
+ 	 * xrecoff is the byte offset of location in the WAL segment file as
+ 	 * defined in xlogdefs.h. The read ahead command does not deal with more
+ 	 * than one WAL segment file at once, and xlogid is not going to be changed
+ 	 * during read-ahead. This is why we need only xrecoff. 
+ 	 */
+ 	uint32 xrecoff;
+ 
+ 	/*
+ 	 * has_fpw indicates whether an WAL record contains full page write or not.
+ 	 * This is used to skip unnecessary read-aheads.
+ 	 */
+ 	bool has_fpw;
+ }; 
+ typedef struct XLogReadAhead XLogReadAhead;
+ 
+ /*
+  * ReadAheadQueueSize is the initail size of XLogReadAhead queue.
+  * When the number of XLogReadAhead reaches this amount, we execute readahead.
+  * Queue uses 16MB.
+  */
+ #define ReadAheadQueueSize	(16 * 1024 * 1024)
+ 
+ /* The queue for XLogReadAhead entries. */
+ static XLogReadAhead *ReadAheadQueue = NULL;
+ 
+ /* The number of XLogReadAhead entries currently used. */
+ static uint32 ReadAheadQueueUsed = 0;
+ 
+ /* prototype of local function */
+ static int ReadAheadCompare(const void *l, const void *r);
+ 
+ /*
+  * Initialize the buffer for storing information about data pages
+  */
+ void
+ ReadAheadInit(void)
+ {
+ 	/* Allocate a buffer for storing information about data pages */
+ 	ReadAheadQueue = (XLogReadAhead *) malloc(ReadAheadQueueSize);
+ 	Assert(ReadAheadQueue != NULL);
+ }
+ 
+ /*
+  * Append a new XLogReadAhead entry to the queue
+  *
+  * If XLogReadAhead queue is fullfilled, prefetch first and add ReadAheadQueue
+  * to empty queue.
+  */
+ void
+ ReadAheadAddEntry(RelFileNode node, BlockNumber blkno, uint32 xrecoff,
+ 	bool has_fpw)
+ {
+ 	/* all entries are used, so prefetch pages and make the queue empty */
+ 	if (ReadAheadQueueUsed >= ReadAheadQueueSize / sizeof(XLogReadAhead))
+ 	{
+ 		ReadAheadExecute();
+ 	}
+ 
+ 	/* Append a new XLogReadAhead ReadAheadQueue to the queue. */
+ 	ReadAheadQueue[ReadAheadQueueUsed].node = node;
+ 	ReadAheadQueue[ReadAheadQueueUsed].blkno = blkno;
+ 	ReadAheadQueue[ReadAheadQueueUsed].xrecoff = xrecoff;
+ 	ReadAheadQueue[ReadAheadQueueUsed].has_fpw = has_fpw;
+ 
+ 	ReadAheadQueueUsed++;
+ }
+ 
+ /*
+  * ReadAhead queue availability check
+  *
+  * If the XLogReadAhead queue has enough room for appending more num of
+  * XLogReadAhead,
+  * return true. If it does not, try to double the queue.
+  * If new queue could't be allocated, return false.
+  */
+ bool
+ ReadAheadHasRoom(int num)
+ {
+ 	return (ReadAheadQueueUsed + num <= ReadAheadQueueSize /
+ 			sizeof(XLogReadAhead));
+ }
+ 
+ /*
+  * Check whether info1 and info2 point same data page.
+  */
+ #define IS_SAME_PAGE(info1, info2) \
+ 	(RelFileNodeEquals((info1).node, (info2).node) && \
+ 		(info1).blkno == (info2).blkno)
+  
+ /*
+  * Execute read ahead data pages
+  * 
+  * Before we actually read ahead data pages, sort the XLogReadAhead in the queue
+  * for avoiding duplicated disk access and hopefully, reducing seek time.
+  * We also skip read ahead data pages which has full page write.
+  *
+  * For performance, we keep file opened until reading another file.
+  */
+ void
+ ReadAheadExecute(void)
+ {
+ 	int i;
+ 	SMgrRelation reln;
+ 	XLogReadAhead last_entry = { { 0, 0, 0, }, 0, 0, false };
+ 
+ 	ereport(DEBUG1, (errmsg("%d blocks are prefetch candidate",
+ 		ReadAheadQueueUsed)));
+ 
+ 	/* Sort the XLogReadAhead queue for effective disk access. */
+ 	qsort(ReadAheadQueue, ReadAheadQueueUsed, sizeof(XLogReadAhead),
+ 		ReadAheadCompare);
+ 
+ 	for (i = 0; i < ReadAheadQueueUsed; i++)
+ 	{
+ 		/* Do read ahead once per a page if it doesn't have full page write. */
+ 		if (IS_SAME_PAGE(last_entry, ReadAheadQueue[i]) ||
+ 				ReadAheadQueue[i].has_fpw)
+ 		{
+ 			last_entry = ReadAheadQueue[i];
+ 			continue;
+ 		}
+ 
+ 		/* Create SMgrRelation object */
+ 		reln = smgropen(ReadAheadQueue[i].node);
+ 
+ 		/* Read ahead with prefetch API */
+ 		smgrprefetch(reln, MAIN_FORKNUM, ReadAheadQueue[i].blkno);
+ 			
+ 		/* Store XLogReadAhead to skip duplicate pages. */
+ 		last_entry = ReadAheadQueue[i];
+ 	}
+ 	ReadAheadQueueUsed = 0;
+ }
+ 
+ /*
+  * Compare two XLogReadAhead objects
+  *
+  * When l > r, then return 1, l == r, then return 0, and l < r, then return -1.
+  * The priority of comparison clauses shows below;
+  *    1. node.spcNode
+  *    2. node.dbNode
+  *    3. node.relNode
+  *    4. blkno
+  *    5. xrecoff
+  */
+ static int
+ ReadAheadCompare(const void *l, const void *r)
+ {
+ 	XLogReadAhead *left = (XLogReadAhead *)l;
+ 	XLogReadAhead *right = (XLogReadAhead *)r;
+ 
+ 	/* compare node.spcNode */
+ 	if (left->node.spcNode > right->node.spcNode)
+ 		return 1;
+ 	else if (left->node.spcNode < right->node.spcNode)
+ 		return -1;
+ 
+ 	/* compare node.dbNode */
+ 	if (left->node.dbNode > right->node.dbNode)
+ 		return 1;
+ 	else if (left->node.dbNode < right->node.dbNode)
+ 		return -1;
+ 
+ 	/* compare node.relNode */
+ 	if (left->node.relNode > right->node.relNode)
+ 		return 1;
+ 	else if (left->node.relNode < right->node.relNode)
+ 		return -1;
+  
+ 	/* compare blkno */
+ 	if (left->blkno > right->blkno)
+ 		return 1;
+ 	else if (left->blkno < right->blkno)
+ 		return -1;
+ 
+ 	/* compare xrecoff */
+ 	if (left->xrecoff > right->xrecoff)
+ 		return 1;
+ 	else if (left->xrecoff < right->xrecoff)
+ 		return -1;
+ 
+ 	/* These two XLogReadAhead are same. */
+ 	return 0;
+ }
+ 
+ /*
+  * Release ReadAheadQueue buffer
+  */
+ void
+ ReadAheadFinish(void)
+ {
+ 	if (ReadAheadQueue)
+ 	{
+ 		free(ReadAheadQueue);
+ 	}
+ }
diff -rcN postgresql-CVS-01-21.org/src/backend/access/transam/rmgr.c postgresql-CVS-01-21/src/backend/access/transam/rmgr.c
*** postgresql-CVS-01-21.org/src/backend/access/transam/rmgr.c	2009-01-21 10:39:04.000000000 +0900
--- postgresql-CVS-01-21/src/backend/access/transam/rmgr.c	2009-01-21 10:35:03.000000000 +0900
***************
*** 24,43 ****
  
  
  const RmgrData RmgrTable[RM_MAX_ID + 1] = {
! 	{"XLOG", xlog_redo, xlog_desc, NULL, NULL, NULL},
! 	{"Transaction", xact_redo, xact_desc, NULL, NULL, NULL},
! 	{"Storage", smgr_redo, smgr_desc, NULL, NULL, NULL},
! 	{"CLOG", clog_redo, clog_desc, NULL, NULL, NULL},
! 	{"Database", dbase_redo, dbase_desc, NULL, NULL, NULL},
! 	{"Tablespace", tblspc_redo, tblspc_desc, NULL, NULL, NULL},
! 	{"MultiXact", multixact_redo, multixact_desc, NULL, NULL, NULL},
! 	{"Reserved 7", NULL, NULL, NULL, NULL, NULL},
! 	{"Reserved 8", NULL, NULL, NULL, NULL, NULL},
! 	{"Heap2", heap2_redo, heap2_desc, NULL, NULL, NULL},
! 	{"Heap", heap_redo, heap_desc, NULL, NULL, NULL},
! 	{"Btree", btree_redo, btree_desc, btree_xlog_startup, btree_xlog_cleanup, btree_safe_restartpoint},
! 	{"Hash", hash_redo, hash_desc, NULL, NULL, NULL},
! 	{"Gin", gin_redo, gin_desc, gin_xlog_startup, gin_xlog_cleanup, gin_safe_restartpoint},
! 	{"Gist", gist_redo, gist_desc, gist_xlog_startup, gist_xlog_cleanup, gist_safe_restartpoint},
! 	{"Sequence", seq_redo, seq_desc, NULL, NULL, NULL}
  };
--- 24,43 ----
  
  
  const RmgrData RmgrTable[RM_MAX_ID + 1] = {
! 	{"XLOG", xlog_redo, xlog_desc, NULL, NULL, NULL, NULL},
! 	{"Transaction", xact_redo, xact_desc, NULL, NULL, NULL, NULL},
! 	{"Storage", smgr_redo, smgr_desc, NULL, NULL, NULL, NULL},
! 	{"CLOG", clog_redo, clog_desc, NULL, NULL, NULL, NULL},
! 	{"Database", dbase_redo, dbase_desc, NULL, NULL, NULL, NULL},
! 	{"Tablespace", tblspc_redo, tblspc_desc, NULL, NULL, NULL, NULL},
! 	{"MultiXact", multixact_redo, multixact_desc, NULL, NULL, NULL, NULL},
! 	{"Reserved 7", NULL, NULL, NULL, NULL, NULL, NULL},
! 	{"Reserved 8", NULL, NULL, NULL, NULL, NULL, NULL},
! 	{"Heap2", heap2_redo, heap2_desc, NULL, NULL, NULL, heap2_readahead},
! 	{"Heap", heap_redo, heap_desc, NULL, NULL, NULL, heap_readahead},
! 	{"Btree", btree_redo, btree_desc, btree_xlog_startup, btree_xlog_cleanup, btree_safe_restartpoint, btree_readahead},
! 	{"Hash", hash_redo, hash_desc, NULL, NULL, NULL, NULL},
! 	{"Gin", gin_redo, gin_desc, gin_xlog_startup, gin_xlog_cleanup, gin_safe_restartpoint, gin_readahead},
! 	{"Gist", gist_redo, gist_desc, gist_xlog_startup, gist_xlog_cleanup, gist_safe_restartpoint, gist_readahead},
! 	{"Sequence", seq_redo, seq_desc, NULL, NULL, NULL, seq_readahead}
  };
diff -rcN postgresql-CVS-01-21.org/src/backend/access/transam/xlog.c postgresql-CVS-01-21/src/backend/access/transam/xlog.c
*** postgresql-CVS-01-21.org/src/backend/access/transam/xlog.c	2009-01-21 10:39:04.000000000 +0900
--- postgresql-CVS-01-21/src/backend/access/transam/xlog.c	2009-01-21 10:35:03.000000000 +0900
***************
*** 382,387 ****
--- 382,399 ----
  static char *readRecordBuf = NULL;
  static uint32 readRecordBufSize = 0;
  
+ /*
+  * Buffer for queued WAL records (fixed size)
+  * 
+  * This buffer is used for holding WAL records and their LSNs. When the all WAL
+  * records of one WAL segment file are read, redo them and make the buffer
+  * empty. Therefore, twice of XLogSegSize, determined by the total size of WAL 
+  * records and LSNs, must be enough for the buffer.
+  */
+ #define RECORD_QUEUE_BUF_SIZE	(XLogSegSize * 2)
+ static char *RecordQueueBuf = NULL;
+ static uint32 RecordQueueBufUsed = 0;
+ 
  /* State information for XLOG reading */
  static XLogRecPtr ReadRecPtr;	/* start of last record read */
  static XLogRecPtr EndRecPtr;	/* end+1 of last record read */
***************
*** 442,447 ****
--- 454,462 ----
  static void rm_redo_error_callback(void *arg);
  static int get_sync_bit(int method);
  
+ static void PushRecord(XLogRecPtr lsn, XLogRecord *record);
+ static void PushReadAhead(XLogRecPtr lsn, XLogRecord *record);
+ static void RedoRecords(void);
  
  /*
   * Insert an XLOG record having the specified RMID and info bytes,
***************
*** 2365,2370 ****
--- 2380,2387 ----
  	ListCell   *cell;
  	int			fd;
  
+ 	ereport(DEBUG1, (errmsg("XLOG switch to %X/%X", log, seg)));
+ 
  	/*
  	 * Loop looking for a suitable timeline ID: we might need to read any of
  	 * the timelines listed in expectedTLIs.
***************
*** 2386,2391 ****
--- 2403,2415 ----
  
  		if (InArchiveRecovery)
  		{
+ 			/*
+ 			 * Wait until next WAL segment file. It might takes long time.
+ 			 * Therefore, redo with stored WAL records and LSNs here.
+ 			 */
+ 			ereport(DEBUG1, (errmsg("XLOG will be switched")));
+ 			RedoRecords();
+ 
  			/* Report recovery progress in PS display */
  			snprintf(activitymsg, sizeof(activitymsg), "waiting for %s",
  					 xlogfname);
***************
*** 3424,3429 ****
--- 3448,3460 ----
  	return (XLogRecord *) buffer;
  
  next_record_is_invalid:;
+ 	/*
+ 	 * Reached to unused area of current WAL segment file, redo all of WAL
+ 	 * records in the queue.
+ 	 */
+ 	ereport(DEBUG1, (errmsg("next record is invalid(maybe unused area)")));
+ 	RedoRecords();
+ 
  	if (readFile >= 0)
  	{
  		close(readFile);
***************
*** 4941,4946 ****
--- 4972,4988 ----
  	ValidateXLOGDirectoryStructure();
  
  	/*
+ 	 * To postpone the actual redo, store WAL records and EndRecPtrs. 
+ 	 * Therefore, this buffer must be allocated here because the buffer
+ 	 * will be used by RedoRecords(); ReadRecord() may call RedoRecords().
+ 	 */
+ 	RecordQueueBuf = (char *) malloc(RECORD_QUEUE_BUF_SIZE);
+ 	Assert(RecordQueueBuf != NULL);
+ 
+ 	/* Allocate the buffer for storing information about data pages. */
+ 	ReadAheadInit();
+ 
+ 	/*
  	 * Initialize on the assumption we want to recover to the same timeline
  	 * that's active according to pg_control.
  	 */
***************
*** 5154,5160 ****
  		{
  			bool		recoveryContinue = true;
  			bool		recoveryApply = true;
- 			ErrorContextCallback errcontext;
  
  			InRedo = true;
  			ereport(LOG,
--- 5196,5201 ----
***************
*** 5196,5225 ****
  						break;
  				}
  
! 				/* Setup error traceback support for ereport() */
! 				errcontext.callback = rm_redo_error_callback;
! 				errcontext.arg = (void *) record;
! 				errcontext.previous = error_context_stack;
! 				error_context_stack = &errcontext;
! 
! 				/* nextXid must be beyond record's xid */
! 				if (TransactionIdFollowsOrEquals(record->xl_xid,
! 												 ShmemVariableCache->nextXid))
! 				{
! 					ShmemVariableCache->nextXid = record->xl_xid;
! 					TransactionIdAdvance(ShmemVariableCache->nextXid);
! 				}
! 
! 				RmgrTable[record->xl_rmid].rm_redo(EndRecPtr, record);
  
! 				/* Pop the error context stack */
! 				error_context_stack = errcontext.previous;
  
  				LastRec = ReadRecPtr;
  
  				record = ReadRecord(NULL, LOG);
  			} while (record != NULL && recoveryContinue);
  
  			/*
  			 * end of main redo apply loop
  			 */
--- 5237,5270 ----
  						break;
  				}
  
! 				/*
! 				 * Push WAL record in WAL record buffer with its LSN for
! 				 * delayed redo.
! 				 * If the WAL record queue is full, redo all WAL records in the
! 				 * queue and make the queue empty.
! 				 */
! 				ereport(DEBUG1,
! 					(errmsg("WAL record queue is used %d(%d) bytes at %X/%08X.",
! 						RecordQueueBufUsed, record->xl_tot_len,
! 						EndRecPtr.xlogid, EndRecPtr.xrecoff)));
! 				PushRecord(EndRecPtr, record);
  
! 				/*
! 				 * Push page information to prefetch later.
! 				 * If no more space, redo all records in queue and make the
! 				 * queue empty.
! 				 */
! 				PushReadAhead(EndRecPtr, record);
  
  				LastRec = ReadRecPtr;
  
  				record = ReadRecord(NULL, LOG);
  			} while (record != NULL && recoveryContinue);
  
+ 			/* All WAL records are read, redo all queued WAL records.  */
+ 			ereport(DEBUG1, (errmsg("end of redo apply loop")));
+ 			RedoRecords();
+ 
  			/*
  			 * end of main redo apply loop
  			 */
***************
*** 5441,5446 ****
--- 5486,5587 ----
  		readRecordBuf = NULL;
  		readRecordBufSize = 0;
  	}
+ 	if (RecordQueueBuf)
+ 	{
+ 		free(RecordQueueBuf);
+ 		RecordQueueBufUsed = 0;
+ 		ReadAheadFinish();
+ 	}
+ }
+ 
+ /*
+  * Push the pair of WAL record and its LSN.
+  * Both WAL records and LSNs are aligned as same as WAL segment file.
+  */
+ static void
+ PushRecord(XLogRecPtr lsn, XLogRecord *record)
+ {
+ 	Assert(record);
+ 
+ 	if (RecordQueueBufUsed + MAXALIGN(sizeof(XLogRecPtr)) +
+ 			MAXALIGN(record->xl_tot_len) > RECORD_QUEUE_BUF_SIZE)
+ 	{
+ 		ereport(DEBUG1, (errmsg("WAL record queue is full.")));
+ 		RedoRecords();
+ 	}
+ 
+ 	memcpy(RecordQueueBuf + RecordQueueBufUsed, &lsn, sizeof(XLogRecPtr));
+ 	RecordQueueBufUsed += MAXALIGN(sizeof(XLogRecPtr));
+ 	memcpy(RecordQueueBuf + RecordQueueBufUsed, record, record->xl_tot_len);
+ 	RecordQueueBufUsed += MAXALIGN(record->xl_tot_len);
+ }
+ 
+ /*
+  * Push page information to readahead module.
+  */
+ static void
+ PushReadAhead(XLogRecPtr lsn, XLogRecord *record)
+ {
+ 	Assert(record);
+ 
+ 	if (!RmgrTable[record->xl_rmid].rm_readahead)
+ 		return;
+ 
+ 	while (!RmgrTable[record->xl_rmid].rm_readahead(lsn, record))
+ 	{
+ 		ereport(DEBUG1, (errmsg("ReadAhead queue is full.")));
+ 		RedoRecords();
+ 	}
+ }
+ 
+ /*
+  * Redo all WAL records in the queue and make the it empty.
+  */
+ static void
+ RedoRecords(void)
+ {
+ 	ErrorContextCallback errcontext;
+ 	uint32 off = 0;
+ 
+ 	/* Readahead data pages which will be modified during redo. */
+ 	ReadAheadExecute();
+ 
+ 	while (off < RecordQueueBufUsed)
+ 	{
+ 		XLogRecPtr lsn;
+ 		XLogRecord *record;
+ 
+ 		/* Extract LSN and WAL record image from local buffer. */
+ 		memcpy(&lsn, RecordQueueBuf + off, sizeof(XLogRecPtr));
+ 		off += MAXALIGN(sizeof(XLogRecPtr));
+ 		record = (XLogRecord *)(RecordQueueBuf + off);
+ 
+ 		/* Setup error traceback support for ereport() */
+ 		errcontext.callback = rm_redo_error_callback;
+ 		errcontext.arg = (void *) record;
+ 		errcontext.previous = error_context_stack;
+ 		error_context_stack = &errcontext;
+ 
+ 		/* nextXid must be beyond record's xid */
+ 		if (TransactionIdFollowsOrEquals(record->xl_xid,
+ 										 ShmemVariableCache->nextXid))
+ 		{
+ 			ShmemVariableCache->nextXid = record->xl_xid;
+ 			TransactionIdAdvance(ShmemVariableCache->nextXid);
+ 		}
+ 
+ 		/* Redo with WAL record and its LSN. */
+ 		RmgrTable[record->xl_rmid].rm_redo(lsn, record);
+ 
+ 		/* Pop the error context stack */
+ 		error_context_stack = errcontext.previous;
+ 
+ 		off += MAXALIGN(record->xl_tot_len);
+ 	}
+ 
+ 	/* Make RecordQueueBuf empty. */
+ 	MemSet(RecordQueueBuf, 0, sizeof(RecordQueueBuf));
+ 	RecordQueueBufUsed = 0;
  }
  
  /*
diff -rcN postgresql-CVS-01-21.org/src/backend/commands/sequence.c postgresql-CVS-01-21/src/backend/commands/sequence.c
*** postgresql-CVS-01-21.org/src/backend/commands/sequence.c	2009-01-21 10:39:04.000000000 +0900
--- postgresql-CVS-01-21/src/backend/commands/sequence.c	2009-01-21 10:35:03.000000000 +0900
***************
*** 17,22 ****
--- 17,23 ----
  #include "access/heapam.h"
  #include "access/transam.h"
  #include "access/xact.h"
+ #include "access/xlog.h"
  #include "access/xlogutils.h"
  #include "catalog/dependency.h"
  #include "catalog/namespace.h"
***************
*** 1385,1387 ****
--- 1386,1418 ----
  	appendStringInfo(buf, "rel %u/%u/%u",
  			   xlrec->node.spcNode, xlrec->node.dbNode, xlrec->node.relNode);
  }
+ 
+ /*
+  *    seq_readahead  - enqueue information about data pages
+  *
+  * The readahead module stores information about pages that are modified through
+  * redo-ing record.
+  *
+  */
+ bool
+ seq_readahead(XLogRecPtr lsn, XLogRecord *record)
+ {
+ 	uint8 info = record->xl_info & ~XLR_INFO_MASK;
+ 
+ 	Assert(record);
+ 
+     switch (info)
+     {
+         case XLOG_SEQ_LOG:
+             {
+                 xl_seq_rec *xlrec = (xl_seq_rec *) XLogRecGetData(record);
+ 
+                 if (!ReadAheadHasRoom(1))
+                     return false;
+                 ReadAheadAddEntry(xlrec->node, 0, lsn.xrecoff, false);
+                 break;
+             }
+     }
+ 
+ 	return true;
+ }
diff -rcN postgresql-CVS-01-21.org/src/backend/storage/smgr/md.c postgresql-CVS-01-21/src/backend/storage/smgr/md.c
*** postgresql-CVS-01-21.org/src/backend/storage/smgr/md.c	2009-01-21 10:39:04.000000000 +0900
--- postgresql-CVS-01-21/src/backend/storage/smgr/md.c	2009-01-21 10:35:03.000000000 +0900
***************
*** 560,566 ****
  	off_t		seekpos;
  	MdfdVec    *v;
  
! 	v = _mdfd_getseg(reln, forknum, blocknum, false, EXTENSION_FAIL);
  
  	seekpos = (off_t) BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE));
  	Assert(seekpos < (off_t) BLCKSZ * RELSEG_SIZE);
--- 560,568 ----
  	off_t		seekpos;
  	MdfdVec    *v;
  
! 	v = _mdfd_getseg(reln, forknum, blocknum, false, EXTENSION_RETURN_NULL);
! 	if (!v)
! 		return;
  
  	seekpos = (off_t) BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE));
  	Assert(seekpos < (off_t) BLCKSZ * RELSEG_SIZE);
diff -rcN postgresql-CVS-01-21.org/src/include/access/gin.h postgresql-CVS-01-21/src/include/access/gin.h
*** postgresql-CVS-01-21.org/src/include/access/gin.h	2009-01-21 10:39:04.000000000 +0900
--- postgresql-CVS-01-21/src/include/access/gin.h	2009-01-21 10:35:02.000000000 +0900
***************
*** 256,261 ****
--- 256,262 ----
  /* ginxlog.c */
  extern void gin_redo(XLogRecPtr lsn, XLogRecord *record);
  extern void gin_desc(StringInfo buf, uint8 xl_info, char *rec);
+ extern bool gin_readahead(XLogRecPtr lsn, XLogRecord *record);
  extern void gin_xlog_startup(void);
  extern void gin_xlog_cleanup(void);
  extern bool gin_safe_restartpoint(void);
diff -rcN postgresql-CVS-01-21.org/src/include/access/gist_private.h postgresql-CVS-01-21/src/include/access/gist_private.h
*** postgresql-CVS-01-21.org/src/include/access/gist_private.h	2009-01-21 10:39:04.000000000 +0900
--- postgresql-CVS-01-21/src/include/access/gist_private.h	2009-01-21 10:35:02.000000000 +0900
***************
*** 250,255 ****
--- 250,256 ----
  /* gistxlog.c */
  extern void gist_redo(XLogRecPtr lsn, XLogRecord *record);
  extern void gist_desc(StringInfo buf, uint8 xl_info, char *rec);
+ extern bool gist_readahead(XLogRecPtr lsn, XLogRecord *record);
  extern void gist_xlog_startup(void);
  extern void gist_xlog_cleanup(void);
  extern bool gist_safe_restartpoint(void);
diff -rcN postgresql-CVS-01-21.org/src/include/access/heapam.h postgresql-CVS-01-21/src/include/access/heapam.h
*** postgresql-CVS-01-21.org/src/include/access/heapam.h	2009-01-21 10:39:04.000000000 +0900
--- postgresql-CVS-01-21/src/include/access/heapam.h	2009-01-21 10:35:02.000000000 +0900
***************
*** 124,131 ****
--- 124,133 ----
  
  extern void heap_redo(XLogRecPtr lsn, XLogRecord *rptr);
  extern void heap_desc(StringInfo buf, uint8 xl_info, char *rec);
+ extern bool heap_readahead(XLogRecPtr lsn, XLogRecord *rptr);
  extern void heap2_redo(XLogRecPtr lsn, XLogRecord *rptr);
  extern void heap2_desc(StringInfo buf, uint8 xl_info, char *rec);
+ extern bool heap2_readahead(XLogRecPtr lsn, XLogRecord *rptr);
  
  extern XLogRecPtr log_heap_move(Relation reln, Buffer oldbuf,
  			  ItemPointerData from,
diff -rcN postgresql-CVS-01-21.org/src/include/access/nbtree.h postgresql-CVS-01-21/src/include/access/nbtree.h
*** postgresql-CVS-01-21.org/src/include/access/nbtree.h	2009-01-21 10:39:04.000000000 +0900
--- postgresql-CVS-01-21/src/include/access/nbtree.h	2009-01-21 10:35:02.000000000 +0900
***************
*** 591,596 ****
--- 591,597 ----
   */
  extern void btree_redo(XLogRecPtr lsn, XLogRecord *record);
  extern void btree_desc(StringInfo buf, uint8 xl_info, char *rec);
+ extern bool btree_readahead(XLogRecPtr lns, XLogRecord *rptr);
  extern void btree_xlog_startup(void);
  extern void btree_xlog_cleanup(void);
  extern bool btree_safe_restartpoint(void);
diff -rcN postgresql-CVS-01-21.org/src/include/access/xlog.h postgresql-CVS-01-21/src/include/access/xlog.h
*** postgresql-CVS-01-21.org/src/include/access/xlog.h	2009-01-21 10:39:04.000000000 +0900
--- postgresql-CVS-01-21/src/include/access/xlog.h	2009-01-21 10:35:02.000000000 +0900
***************
*** 14,20 ****
--- 14,23 ----
  #include "access/rmgr.h"
  #include "access/xlogdefs.h"
  #include "lib/stringinfo.h"
+ #include "postgres.h"
  #include "storage/buf.h"
+ #include "storage/relfilenode.h"
+ #include "storage/block.h"
  #include "utils/pg_crc.h"
  #include "utils/timestamp.h"
  
***************
*** 198,203 ****
--- 201,207 ----
  
  extern void xlog_redo(XLogRecPtr lsn, XLogRecord *record);
  extern void xlog_desc(StringInfo buf, uint8 xl_info, char *rec);
+ extern bool xlog_readahead(XLogRecPtr lsn, XLogRecord *rptr);
  
  extern void UpdateControlFile(void);
  extern Size XLOGShmemSize(void);
***************
*** 212,215 ****
--- 216,227 ----
  extern XLogRecPtr GetInsertRecPtr(void);
  extern void GetNextXidAndEpoch(TransactionId *xid, uint32 *epoch);
  
+ /* Implimented in readahead.c. */
+ extern void ReadAheadInit(void);
+ extern void ReadAheadAddEntry(RelFileNode node, BlockNumber blkno,
+ 								uint32 xrecoff, bool has_fpw);
+ extern bool ReadAheadHasRoom(int num);
+ extern void ReadAheadExecute(void);
+ extern void ReadAheadFinish(void);
+ 
  #endif   /* XLOG_H */
diff -rcN postgresql-CVS-01-21.org/src/include/access/xlog_internal.h postgresql-CVS-01-21/src/include/access/xlog_internal.h
*** postgresql-CVS-01-21.org/src/include/access/xlog_internal.h	2009-01-21 10:39:04.000000000 +0900
--- postgresql-CVS-01-21/src/include/access/xlog_internal.h	2009-01-21 10:35:02.000000000 +0900
***************
*** 235,240 ****
--- 235,241 ----
  	void		(*rm_startup) (void);
  	void		(*rm_cleanup) (void);
  	bool		(*rm_safe_restartpoint) (void);
+ 	bool		(*rm_readahead) (XLogRecPtr lsn, XLogRecord *rptr);
  } RmgrData;
  
  extern const RmgrData RmgrTable[];
diff -rcN postgresql-CVS-01-21.org/src/include/commands/sequence.h postgresql-CVS-01-21/src/include/commands/sequence.h
*** postgresql-CVS-01-21.org/src/include/commands/sequence.h	2009-01-21 10:39:04.000000000 +0900
--- postgresql-CVS-01-21/src/include/commands/sequence.h	2009-01-21 10:35:02.000000000 +0900
***************
*** 98,102 ****
--- 98,103 ----
  
  extern void seq_redo(XLogRecPtr lsn, XLogRecord *rptr);
  extern void seq_desc(StringInfo buf, uint8 xl_info, char *rec);
+ extern bool seq_readahead(XLogRecPtr lsn, XLogRecord *record);
  
  #endif   /* SEQUENCE_H */
