*** a/src/backend/storage/page/Makefile
--- b/src/backend/storage/page/Makefile
***************
*** 12,17 **** subdir = src/backend/storage/page
  top_builddir = ../../../..
  include $(top_builddir)/src/Makefile.global
  
! OBJS =  bufpage.o itemptr.o
  
  include $(top_srcdir)/src/backend/common.mk
--- 12,22 ----
  top_builddir = ../../../..
  include $(top_builddir)/src/Makefile.global
  
! OBJS =  bufpage.o checksum.o itemptr.o
  
  include $(top_srcdir)/src/backend/common.mk
+ 
+ # important optimization flags for checksum.c
+ ifeq ($(GCC),yes)
+ checksum.o: CFLAGS += -msse4.1 -funroll-loops -ftree-vectorize
+ endif
*** a/src/backend/storage/page/README
--- b/src/backend/storage/page/README
***************
*** 61,63 **** checksums are enabled.  Systems in Hot-Standby mode may benefit from hint bits
--- 61,109 ----
  being set, but with checksums enabled, a page cannot be dirtied after setting a
  hint bit (due to the torn page risk). So, it must wait for full-page images
  containing the hint bit updates to arrive from the master.
+ 
+ Checksum algorithm
+ ------------------
+ 
+ The algorithm used to checksum pages is chosen for very fast calculation.
+ Workloads where the database working set fits into OS file cache but not into
+ shared buffers can read in pages at a very fast pace and the checksum
+ algorithm itself can become the largest bottleneck.
+ 
+ The checksum algorithm itself is based on the FNV-1a hash (FNV is shorthand for
+ Fowler/Noll/Vo) The primitive of a plain FNV-1a hash folds in data 4 bytes at
+ a time according to the formula:
+ 
+     hash = (hash ^ value) * FNV_PRIME(16777619)
+ 
+ PostgreSQL doesn't use FNV-1a hash directly because it has bad mixing of high
+ bits - high order bits in input data only affect high order bits in output
+ data. To resolve this we xor in the value prior to multiplication shifted
+ right by 3 bits. The number 3 was chosen as it is a small odd, prime, and
+ experimentally provides enough mixing for the high order bits to avalanche
+ into lower positions. The actual hash formula used as the basis is:
+ 
+     hash = (hash ^ value) * ((hash ^ value) >> 3)
+ 
+ The main bottleneck in this calculation is the multiplication latency. To hide
+ the latency and to make use of SIMD parallelism multiple hash values are
+ calculated in parallel. Each hash function uses a different initial value
+ (offset basis in FNV terminology). The initial values actually used were
+ chosen randomly, as the values themselves don't matter as much as that they
+ are different and don't match anything in real data. The page is then treated
+ as 32 wide array of 32bit values and each column is aggregated according to
+ the above formula. Finally one more iteration of the formula is performed with
+ value 0 to mix the bits of the last value added.
+ 
+ The partial checksums are then aggregated together using xor to form a
+ 32-bit checksum. The caller can safely reduce the value to 16 bits
+ using modulo 2^16-1. That will cause a very slight bias towards lower
+ values but this is not significant for the performance of the
+ checksum.
+ 
+ Vectorization of the algorithm requires 32bit x 32bit -> 32bit integer
+ multiplication instruction. As of 2013 the corresponding instruction is
+ available on x86 SSE4.1 extensions (pmulld) and ARM NEON (vmul.i32).
+ Vectorization requires a compiler to do the vectorization for us. For recent
+ GCC versions the flags -msse4.1 -funroll-loops -ftree-vectorize are enough
+ to achieve vectorization.
*** a/src/backend/storage/page/bufpage.c
--- b/src/backend/storage/page/bufpage.c
***************
*** 16,21 ****
--- 16,22 ----
  
  #include "access/htup_details.h"
  #include "access/xlog.h"
+ #include "storage/checksum.h"
  
  bool ignore_checksum_failure = false;
  
***************
*** 948,980 **** PageSetChecksumInplace(Page page, BlockNumber blkno)
  static uint16
  PageCalcChecksum16(Page page, BlockNumber blkno)
  {
! 	pg_crc32    		crc;
! 	PageHeader	p = (PageHeader) page;
  
  	/* only calculate the checksum for properly-initialized pages */
  	Assert(!PageIsNew(page));
  
- 	INIT_CRC32(crc);
- 
  	/*
! 	 * Initialize the checksum calculation with the block number. This helps
! 	 * catch corruption from whole blocks being transposed with other whole
! 	 * blocks.
  	 */
! 	COMP_CRC32(crc, &blkno, sizeof(blkno));
  
! 	/*
! 	 * Now add in the LSN, which is always the first field on the page.
! 	 */
! 	COMP_CRC32(crc, page, sizeof(p->pd_lsn));
  
  	/*
! 	 * Now add the rest of the page, skipping the pd_checksum field.
  	 */
! 	COMP_CRC32(crc, page + sizeof(p->pd_lsn) + sizeof(p->pd_checksum),
! 				  BLCKSZ - sizeof(p->pd_lsn) - sizeof(p->pd_checksum));
! 
! 	FIN_CRC32(crc);
! 
! 	return (uint16) crc;
  }
--- 949,976 ----
  static uint16
  PageCalcChecksum16(Page page, BlockNumber blkno)
  {
! 	PageHeader	phdr   = (PageHeader) page;
! 	uint16		save_checksum;
! 	uint32		fnv_checksum;
  
  	/* only calculate the checksum for properly-initialized pages */
  	Assert(!PageIsNew(page));
  
  	/*
! 	 * Save pd_checksum and set it to zero, so that the checksum calculation
! 	 * isn't affected by the checksum stored on the page.
  	 */
! 	save_checksum = phdr->pd_checksum;
! 	phdr->pd_checksum = 0;
! 	fnv_checksum = checksum_fnv(page, BLCKSZ);
! 	phdr->pd_checksum = save_checksum;
  
! 	/* mix in the block number to detect transposed pages */
! 	fnv_checksum ^= blkno;
  
  	/*
! 	 * Reduce to a uint16 (to fit in the pd_checksum field) with an offset of
! 	 * one. That avoids checksums of zero, which seems like a good idea.
  	 */
! 	return (fnv_checksum % 65535) + 1;
  }
*** /dev/null
--- b/src/backend/storage/page/checksum.c
***************
*** 0 ****
--- 1,80 ----
+ /*-------------------------------------------------------------------------
+  *
+  * checksum.c
+  *	  Checksum implementation for data pages.
+  *
+  * Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group
+  * Portions Copyright (c) 1994, Regents of the University of California
+  *
+  *
+  * IDENTIFICATION
+  *	  src/backend/storage/page/checksum.c
+  *
+  *-------------------------------------------------------------------------
+  */
+ #include "postgres.h"
+ 
+ #include "storage/checksum.h"
+ 
+ /*
+  * See src/backend/storage/page/README for specification of the
+  * checksum algorithm used here.
+  */
+ 
+ /* number of checksums to calculate in parallel */
+ #define N_SUMS 32
+ /* prime multiplier of FNV-1a hash */
+ #define FNV_PRIME 16777619
+ 
+ /*
+  * Base offsets to initialize each of the parallel FNV hashes into a
+  * different initial state.
+  */
+ static const uint32 checksumBaseOffsets[N_SUMS] = {
+ 	0x5B1F36E9, 0xB8525960, 0x02AB50AA, 0x1DE66D2A,
+ 	0x79FF467A, 0x9BB9F8A3, 0x217E7CD2, 0x83E13D2C,
+ 	0xF8D4474F, 0xE39EB970, 0x42C6AE16, 0x993216FA,
+ 	0x7B093B5D, 0x98DAFF3C, 0xF718902A, 0x0B1C9CDB,
+ 	0xE58F764B, 0x187636BC, 0x5D7B3BB1, 0xE73DE7DE,
+ 	0x92BEC979, 0xCCA6C0B2, 0x304A0979, 0x85AA43D4,
+ 	0x783125BB, 0x6CA8EAA2, 0xE407EAC6, 0x4B5CFC3E,
+ 	0x9FBF8C76, 0x15CA20BE, 0xF2CA9FD3, 0x959BD756
+ };
+ 
+ /*
+  * Calculate one round of the checksum.
+  */
+ #define CHECKSUM_COMP(checksum, value) do {\
+ 	uint32 __tmp = (checksum) ^ (value);\
+ 	(checksum) = __tmp * FNV_PRIME ^ (__tmp >> 3);\
+ } while (0)
+ 
+ uint32
+ checksum_fnv(char *data, uint32 size)
+ {
+ 	uint32 sums[N_SUMS];
+ 	uint32 (*dataArr)[N_SUMS] = (uint32 (*)[N_SUMS]) data;
+ 	uint32 result = 0;
+ 	int i, j;
+ 
+ 	/* ensure that the size is compatible with the algorithm */
+ 	Assert((size % (sizeof(uint32)*N_SUMS)) == 0);
+ 
+ 	/* initialize partial checksums to their corresponding offsets */
+ 	memcpy(sums, checksumBaseOffsets, sizeof(checksumBaseOffsets));
+ 
+ 	/* main checksum calculation */
+ 	for (i = 0; i < size/sizeof(uint32)/N_SUMS; i++)
+ 		for (j = 0; j < N_SUMS; j++)
+ 			CHECKSUM_COMP(sums[j], dataArr[i][j]);
+ 
+ 	/* finally add in one round of zeroes for one more layer of mixing */
+ 	for (j = 0; j < N_SUMS; j++)
+ 		CHECKSUM_COMP(sums[j], 0);
+ 
+ 	/* xor fold partial checksums together */
+ 	for (i = 0; i < N_SUMS; i++)
+ 		result ^= sums[i];
+ 
+ 	return result;
+ }
*** /dev/null
--- b/src/include/storage/checksum.h
***************
*** 0 ****
--- 1,20 ----
+ /*-------------------------------------------------------------------------
+  *
+  * checksum.h
+  *	  Checksum implementation for data pages.
+  *
+  *
+  * Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group
+  * Portions Copyright (c) 1994, Regents of the University of California
+  *
+  * src/include/storage/checksum.h
+  *
+  *-------------------------------------------------------------------------
+  */
+ #ifndef CHECKSUM_H
+ #define CHECKSUM_H
+ 
+ /* Fowler-Noll-Vo 1a block checksum algorithm */
+ extern uint32 checksum_fnv(char *data, uint32 size);
+ 
+ #endif   /* CHECKSUM_H */
