*** a/configure
--- b/configure
***************
*** 19937,19943 **** LIBS=`echo "$LIBS" | sed -e 's/-ledit//g' -e 's/-lreadline//g'`
  
  
  
! for ac_func in cbrt dlopen fdatasync getifaddrs getpeerucred getrlimit mbstowcs_l memmove poll pstat readlink setproctitle setsid shm_open sigprocmask symlink sync_file_range towlower utime utimes wcstombs wcstombs_l
  do
  as_ac_var=`$as_echo "ac_cv_func_$ac_func" | $as_tr_sh`
  { $as_echo "$as_me:$LINENO: checking for $ac_func" >&5
--- 19937,19943 ----
  
  
  
! for ac_func in cbrt dlopen fdatasync getifaddrs getpeerucred getrlimit mbstowcs_l memmove poll posix_fadvise pstat readlink setproctitle setsid shm_open sigprocmask symlink sync_file_range towlower utime utimes wcstombs wcstombs_l
  do
  as_ac_var=`$as_echo "ac_cv_func_$ac_func" | $as_tr_sh`
  { $as_echo "$as_me:$LINENO: checking for $ac_func" >&5
*** a/src/backend/commands/tablecmds.c
--- b/src/backend/commands/tablecmds.c
***************
*** 9119,9125 **** copy_relation_data(SMgrRelation src, SMgrRelation dst,
  		/* If we got a cancel signal during the copy of the data, quit */
  		CHECK_FOR_INTERRUPTS();
  
! 		smgrread(src, forkNum, blkno, buf);
  
  		if (!PageIsVerified(page, blkno))
  			ereport(ERROR,
--- 9119,9125 ----
  		/* If we got a cancel signal during the copy of the data, quit */
  		CHECK_FOR_INTERRUPTS();
  
! 		smgrread(src, forkNum, blkno, buf, (char *) BAS_BULKREAD);
  
  		if (!PageIsVerified(page, blkno))
  			ereport(ERROR,
*** a/src/backend/storage/buffer/bufmgr.c
--- b/src/backend/storage/buffer/bufmgr.c
***************
*** 41,46 ****
--- 41,47 ----
  #include "pg_trace.h"
  #include "pgstat.h"
  #include "postmaster/bgwriter.h"
+ #include "storage/buf.h"
  #include "storage/buf_internals.h"
  #include "storage/bufmgr.h"
  #include "storage/ipc.h"
***************
*** 451,457 **** ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
  			if (track_io_timing)
  				INSTR_TIME_SET_CURRENT(io_start);
  
! 			smgrread(smgr, forkNum, blockNum, (char *) bufBlock);
  
  			if (track_io_timing)
  			{
--- 452,458 ----
  			if (track_io_timing)
  				INSTR_TIME_SET_CURRENT(io_start);
  
! 			smgrread(smgr, forkNum, blockNum, (char *) bufBlock, (char *) strategy);
  
  			if (track_io_timing)
  			{
*** a/src/backend/storage/file/fd.c
--- b/src/backend/storage/file/fd.c
***************
*** 73,80 ****
--- 73,82 ----
  #include "catalog/pg_tablespace.h"
  #include "common/relpath.h"
  #include "pgstat.h"
+ #include "storage/buf.h"
  #include "storage/fd.h"
  #include "storage/ipc.h"
+ #include "storage/bufmgr.h"
  #include "utils/guc.h"
  #include "utils/resowner_private.h"
  
***************
*** 123,129 **** int			max_files_per_process = 1000;
   * setting this variable, and so need not be tested separately.
   */
  int			max_safe_fds = 32;	/* default if not changed */
! 
  
  /* Debugging.... */
  
--- 125,131 ----
   * setting this variable, and so need not be tested separately.
   */
  int			max_safe_fds = 32;	/* default if not changed */
! bool			enable_kernel_readahead = true ;
  
  /* Debugging.... */
  
***************
*** 383,388 **** pg_flush_data(int fd, off_t offset, off_t amount)
--- 385,405 ----
  	return 0;
  }
  
+ /*
+  * pg_fadvise --- advise OS that the cache will need or not
+  *
+  * Not all platforms have posix_fadvise. If it does not support posix_fadvise,
+  * we do nothing about here.
+  */
+ int
+ pg_fadvise(int fd, off_t offset, off_t amount, int advise)
+ {
+ #if defined(USE_POSIX_FADVISE) && defined(POSIX_FADV_DONTNEED) && defined(POSIX_FADV_RANDOM) && defined(POSIX_FADV_SEQUENTIAL)
+ 	return posix_fadvise(fd, offset, amount, advise);
+ #else
+ 	return 0;
+ #endif
+ }
  
  /*
   * fsync_fname -- fsync a file or directory, handling errors properly
***************
*** 1142,1147 **** OpenTemporaryFileInTablespace(Oid tblspcOid, bool rejectError)
--- 1159,1195 ----
  }
  
  /*
+  * Controling OS file cache using posix_fadvise()
+  */
+ int
+ FileCacheAdvise(File file, off_t offset, off_t amount, int advise)
+ {
+ 	return pg_fadvise(VfdCache[file].fd, offset, amount, advise);
+ }
+ 
+ /*
+  * Select OS readahead strategy using buffer hint. If we select POSIX_FADV_SEQUENTIAL,
+  * readahead parameter becomes the maximum and can read more faster. On the other hand,
+  * if we select POSIX_FADV_RANDOM, readahead wasn't executed at all and file cache
+  * replace algorithm will be more smart. Because it can calculate correct number of accesses
+  * which are hot data.
+  */
+ int
+ BufferHintIOAdvise(File file, char *offset, off_t amount, char *strategy)
+ {
+ 	if(enable_kernel_readahead)
+ 		return FileCacheAdvise(file, (off_t) offset, amount, POSIX_FADV_NORMAL);
+ 
+ 	/* readahead optimization */
+ 	if(strategy != NULL)
+ 		/* use maximum readahead setting in kernel, we can read more faster */
+ 		return FileCacheAdvise(file, (off_t) offset, amount, POSIX_FADV_SEQUENTIAL);
+ 	else
+ 		/* don't use readahead in kernel, so we can more effectively use OS file cache */
+ 		return FileCacheAdvise(file, (off_t) offset, amount, POSIX_FADV_RANDOM);
+ }
+ 
+ /*
   * close a file when done with it
   */
  void
*** a/src/backend/storage/smgr/md.c
--- b/src/backend/storage/smgr/md.c
***************
*** 162,168 **** static List *pendingUnlinks = NIL;
  static CycleCtr mdsync_cycle_ctr = 0;
  static CycleCtr mdckpt_cycle_ctr = 0;
  
- 
  typedef enum					/* behavior for mdopen & _mdfd_getseg */
  {
  	EXTENSION_FAIL,				/* ereport if segment not present */
--- 162,167 ----
***************
*** 653,659 **** mdprefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum)
   */
  void
  mdread(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
! 	   char *buffer)
  {
  	off_t		seekpos;
  	int			nbytes;
--- 652,658 ----
   */
  void
  mdread(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
! 	   char *buffer, char *strategy)
  {
  	off_t		seekpos;
  	int			nbytes;
***************
*** 677,682 **** mdread(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
--- 676,683 ----
  				 errmsg("could not seek to block %u in file \"%s\": %m",
  						blocknum, FilePathName(v->mdfd_vfd))));
  
+ 	/* Control buffered IO in OS by using posix_fadvise() */
+ 	BufferHintIOAdvise(v->mdfd_vfd, buffer, BLCKSZ, strategy);
  	nbytes = FileRead(v->mdfd_vfd, buffer, BLCKSZ);
  
  	TRACE_POSTGRESQL_SMGR_MD_READ_DONE(forknum, blocknum,
*** a/src/backend/storage/smgr/smgr.c
--- b/src/backend/storage/smgr/smgr.c
***************
*** 50,56 **** typedef struct f_smgr
  	void		(*smgr_prefetch) (SMgrRelation reln, ForkNumber forknum,
  											  BlockNumber blocknum);
  	void		(*smgr_read) (SMgrRelation reln, ForkNumber forknum,
! 										  BlockNumber blocknum, char *buffer);
  	void		(*smgr_write) (SMgrRelation reln, ForkNumber forknum,
  						 BlockNumber blocknum, char *buffer, bool skipFsync);
  	BlockNumber (*smgr_nblocks) (SMgrRelation reln, ForkNumber forknum);
--- 50,56 ----
  	void		(*smgr_prefetch) (SMgrRelation reln, ForkNumber forknum,
  											  BlockNumber blocknum);
  	void		(*smgr_read) (SMgrRelation reln, ForkNumber forknum,
! 					  BlockNumber blocknum, char *buffer, char *strategy);
  	void		(*smgr_write) (SMgrRelation reln, ForkNumber forknum,
  						 BlockNumber blocknum, char *buffer, bool skipFsync);
  	BlockNumber (*smgr_nblocks) (SMgrRelation reln, ForkNumber forknum);
***************
*** 588,596 **** smgrprefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum)
   */
  void
  smgrread(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
! 		 char *buffer)
  {
! 	(*(smgrsw[reln->smgr_which].smgr_read)) (reln, forknum, blocknum, buffer);
  }
  
  /*
--- 588,596 ----
   */
  void
  smgrread(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
! 		 char *buffer, char *strategy)
  {
! 	(*(smgrsw[reln->smgr_which].smgr_read)) (reln, forknum, blocknum, buffer, strategy);
  }
  
  /*
*** a/src/backend/utils/misc/guc.c
--- b/src/backend/utils/misc/guc.c
***************
*** 762,767 **** static struct config_bool ConfigureNamesBool[] =
--- 762,776 ----
  		NULL, NULL, NULL
  	},
  	{
+ 		{"enable_kernel_readahead", PGC_USERSET, QUERY_TUNING_METHOD,
+ 			gettext_noop("On is optimize readahead by kernel, off is optimized by postgres."),
+ 			NULL
+ 		},
+ 		&enable_kernel_readahead,
+ 		true,
+ 		NULL, NULL, NULL
+ 	},
+ 	{
  		{"geqo", PGC_USERSET, QUERY_TUNING_GEQO,
  			gettext_noop("Enables genetic query optimization."),
  			gettext_noop("This algorithm attempts to do planning without "
*** a/src/backend/utils/misc/postgresql.conf.sample
--- b/src/backend/utils/misc/postgresql.conf.sample
***************
*** 135,140 ****
--- 135,142 ----
  
  #temp_file_limit = -1			# limits per-session temp file space
  					# in kB, or -1 for no limit
+ #enable_kernel_readahead = on		# on is optimized by OS,
+ 					# off is optimized by postgres
  
  # - Kernel Resource Usage -
  
*** a/src/include/storage/bufmgr.h
--- b/src/include/storage/bufmgr.h
***************
*** 44,55 **** typedef enum
--- 44,58 ----
  /* in globals.c ... this duplicates miscadmin.h */
  extern PGDLLIMPORT int NBuffers;
  
+ 
+ 
  /* in bufmgr.c */
  extern bool zero_damaged_pages;
  extern int	bgwriter_lru_maxpages;
  extern double bgwriter_lru_multiplier;
  extern bool track_io_timing;
  extern int	target_prefetch_pages;
+ extern bool	enable_kernel_readahead;
  
  /* in buf_init.c */
  extern PGDLLIMPORT char *BufferBlocks;
*** a/src/include/storage/fd.h
--- b/src/include/storage/fd.h
***************
*** 68,73 **** extern int	max_safe_fds;
--- 68,74 ----
  extern File PathNameOpenFile(FileName fileName, int fileFlags, int fileMode);
  extern File OpenTemporaryFile(bool interXact);
  extern void FileClose(File file);
+ extern int	FileCacheAdvise(File file, off_t offset, off_t amount, int advise);
  extern int	FilePrefetch(File file, off_t offset, int amount);
  extern int	FileRead(File file, char *buffer, int amount);
  extern int	FileWrite(File file, char *buffer, int amount);
***************
*** 75,80 **** extern int	FileSync(File file);
--- 76,82 ----
  extern off_t FileSeek(File file, off_t offset, int whence);
  extern int	FileTruncate(File file, off_t offset);
  extern char *FilePathName(File file);
+ extern int	BufferHintIOAdvise(File file, char *offset, off_t amount, char *strategy);
  
  /* Operations that allow use of regular stdio --- USE WITH CAUTION */
  extern FILE *AllocateFile(const char *name, const char *mode);
***************
*** 113,118 **** extern int	pg_fsync_no_writethrough(int fd);
--- 115,121 ----
  extern int	pg_fsync_writethrough(int fd);
  extern int	pg_fdatasync(int fd);
  extern int	pg_flush_data(int fd, off_t offset, off_t amount);
+ extern int	pg_fadvise(int fd, off_t offset, off_t amount, int advise);
  extern void fsync_fname(char *fname, bool isdir);
  
  /* Filename components for OpenTemporaryFile */
*** a/src/include/storage/smgr.h
--- b/src/include/storage/smgr.h
***************
*** 92,98 **** extern void smgrextend(SMgrRelation reln, ForkNumber forknum,
  extern void smgrprefetch(SMgrRelation reln, ForkNumber forknum,
  			 BlockNumber blocknum);
  extern void smgrread(SMgrRelation reln, ForkNumber forknum,
! 		 BlockNumber blocknum, char *buffer);
  extern void smgrwrite(SMgrRelation reln, ForkNumber forknum,
  		  BlockNumber blocknum, char *buffer, bool skipFsync);
  extern BlockNumber smgrnblocks(SMgrRelation reln, ForkNumber forknum);
--- 92,98 ----
  extern void smgrprefetch(SMgrRelation reln, ForkNumber forknum,
  			 BlockNumber blocknum);
  extern void smgrread(SMgrRelation reln, ForkNumber forknum,
! 			BlockNumber blocknum, char *buffer, char *strategy);
  extern void smgrwrite(SMgrRelation reln, ForkNumber forknum,
  		  BlockNumber blocknum, char *buffer, bool skipFsync);
  extern BlockNumber smgrnblocks(SMgrRelation reln, ForkNumber forknum);
***************
*** 118,124 **** extern void mdextend(SMgrRelation reln, ForkNumber forknum,
  extern void mdprefetch(SMgrRelation reln, ForkNumber forknum,
  		   BlockNumber blocknum);
  extern void mdread(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
! 	   char *buffer);
  extern void mdwrite(SMgrRelation reln, ForkNumber forknum,
  		BlockNumber blocknum, char *buffer, bool skipFsync);
  extern BlockNumber mdnblocks(SMgrRelation reln, ForkNumber forknum);
--- 118,124 ----
  extern void mdprefetch(SMgrRelation reln, ForkNumber forknum,
  		   BlockNumber blocknum);
  extern void mdread(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
! 	   char *buffer, char *strategy);
  extern void mdwrite(SMgrRelation reln, ForkNumber forknum,
  		BlockNumber blocknum, char *buffer, bool skipFsync);
  extern BlockNumber mdnblocks(SMgrRelation reln, ForkNumber forknum);
*** a/src/test/regress/expected/rangefuncs.out
--- b/src/test/regress/expected/rangefuncs.out
***************
*** 1,18 ****
  SELECT name, setting FROM pg_settings WHERE name LIKE 'enable%';
!          name         | setting 
! ----------------------+---------
!  enable_bitmapscan    | on
!  enable_hashagg       | on
!  enable_hashjoin      | on
!  enable_indexonlyscan | on
!  enable_indexscan     | on
!  enable_material      | on
!  enable_mergejoin     | on
!  enable_nestloop      | on
!  enable_seqscan       | on
!  enable_sort          | on
!  enable_tidscan       | on
! (11 rows)
  
  CREATE TABLE foo2(fooid int, f2 int);
  INSERT INTO foo2 VALUES(1, 11);
--- 1,19 ----
  SELECT name, setting FROM pg_settings WHERE name LIKE 'enable%';
!           name           | setting 
! -------------------------+---------
!  enable_bitmapscan       | on
!  enable_hashagg          | on
!  enable_hashjoin         | on
!  enable_indexonlyscan    | on
!  enable_indexscan        | on
!  enable_kernel_readahead | on
!  enable_material         | on
!  enable_mergejoin        | on
!  enable_nestloop         | on
!  enable_seqscan          | on
!  enable_sort             | on
!  enable_tidscan          | on
! (12 rows)
  
  CREATE TABLE foo2(fooid int, f2 int);
  INSERT INTO foo2 VALUES(1, 11);
