Index: src/backend/commands/cluster.c
===================================================================
RCS file: /home/stark/src/REPOSITORY/pgsql/src/backend/commands/cluster.c,v
retrieving revision 1.177
diff -c -r1.177 cluster.c
*** src/backend/commands/cluster.c	19 Jun 2008 00:46:04 -0000	1.177
--- src/backend/commands/cluster.c	31 Aug 2008 11:36:14 -0000
***************
*** 19,24 ****
--- 19,25 ----
  
  #include "access/genam.h"
  #include "access/heapam.h"
+ #include "access/nbtree.h"
  #include "access/relscan.h"
  #include "access/rewriteheap.h"
  #include "access/transam.h"
***************
*** 46,52 ****
  #include "utils/snapmgr.h"
  #include "utils/syscache.h"
  #include "utils/tqual.h"
! 
  
  /*
   * This struct is used to pass around the information on tables to be
--- 47,53 ----
  #include "utils/snapmgr.h"
  #include "utils/syscache.h"
  #include "utils/tqual.h"
! #include "utils/tuplesort.h"
  
  /*
   * This struct is used to pass around the information on tables to be
***************
*** 713,725 ****
  	int			natts;
  	Datum	   *values;
  	bool	   *isnull;
- 	IndexScanDesc scan;
  	HeapTuple	tuple;
  	bool		use_wal;
  	TransactionId OldestXmin;
  	TransactionId FreezeXid;
  	RewriteState rwstate;
  
  	/*
  	 * Open the relations we need.
  	 */
--- 714,739 ----
  	int			natts;
  	Datum	   *values;
  	bool	   *isnull;
  	HeapTuple	tuple;
  	bool		use_wal;
  	TransactionId OldestXmin;
  	TransactionId FreezeXid;
  	RewriteState rwstate;
  
+ 	bool	do_sort;
+ 
+ 	/* We use this if we're doing an index scan */
+ 	IndexScanDesc scan;
+ 
+ 	/* we need these if we're going to heap scan and sort */
+ 	HeapScanDesc hscan;
+ 	IndexInfo	*ii;
+ 	ScanKey		 scanKeys;
+ 	int			 i;
+ 
+ 	Tuplesortstate *tuplesort;
+ 	bool 		shouldfree;
+ 
  	/*
  	 * Open the relations we need.
  	 */
***************
*** 773,782 ****
  	 * copied, we scan with SnapshotAny and use HeapTupleSatisfiesVacuum for
  	 * the visibility test.
  	 */
- 	scan = index_beginscan(OldHeap, OldIndex,
- 						   SnapshotAny, 0, (ScanKey) NULL);
  
! 	while ((tuple = index_getnext(scan, ForwardScanDirection)) != NULL)
  	{
  		HeapTuple	copiedTuple;
  		bool		isdead;
--- 787,825 ----
  	 * copied, we scan with SnapshotAny and use HeapTupleSatisfiesVacuum for
  	 * the visibility test.
  	 */
  
! 	ii = BuildIndexInfo(OldIndex);
! 
! 	if (ii->ii_Expressions) {
! 		do_sort = false;
! 		shouldfree = false;
! 		scan = index_beginscan(OldHeap, OldIndex,
! 							   SnapshotAny, 0, (ScanKey) NULL);
! 	} else {
! 		do_sort = true;
! 
! 		/* Generate the scan keys for the index */
! 		scanKeys = _bt_mkscankey_nodata(OldIndex);
! 		/* And then point them at the heap attributes */
! 		for (i = 0; i < ii->ii_NumIndexAttrs; i++)
! 			scanKeys->sk_attno = ii->ii_KeyAttrNumbers[i];
! 		/* sort the raw heap tuples using the index ordering */
! 		tuplesort = tuplesort_begin_rawheap(oldTupDesc,
! 											ii->ii_NumIndexAttrs, scanKeys,
! 											maintenance_work_mem, false);
! 		
! 		hscan = heap_beginscan(OldHeap, SnapshotAny, 0, NULL);
! 		while ((tuple = heap_getnext(hscan, ForwardScanDirection))) {
! 			tuplesort_putrawtuple(tuplesort, tuple);
! 		}
! 		heap_endscan(hscan);
! 		tuplesort_performsort(tuplesort);
! 	}
! 	
! 		
! 	while ((tuple = (do_sort
! 					 ? tuplesort_getrawtuple(tuplesort, true, &shouldfree)
! 					 : index_getnext(scan, ForwardScanDirection))) != NULL)
  	{
  		HeapTuple	copiedTuple;
  		bool		isdead;
***************
*** 784,797 ****
  
  		CHECK_FOR_INTERRUPTS();
  
! 		/* Since we used no scan keys, should never need to recheck */
! 		if (scan->xs_recheck)
! 			elog(ERROR, "CLUSTER does not support lossy index conditions");
! 
! 		LockBuffer(scan->xs_cbuf, BUFFER_LOCK_SHARE);
  
  		switch (HeapTupleSatisfiesVacuum(tuple->t_data, OldestXmin,
! 										 scan->xs_cbuf))
  		{
  			case HEAPTUPLE_DEAD:
  				/* Definitely dead */
--- 827,843 ----
  
  		CHECK_FOR_INTERRUPTS();
  
! 		if (!do_sort) {
! 			
! 			/* Since we used no scan keys, should never need to recheck */
! 			if (scan->xs_recheck)
! 				elog(ERROR, "CLUSTER does not support lossy index conditions");
! 			
! 			LockBuffer(scan->xs_cbuf, BUFFER_LOCK_SHARE);
! 		}
  
  		switch (HeapTupleSatisfiesVacuum(tuple->t_data, OldestXmin,
! 										 (!do_sort ? scan->xs_cbuf : InvalidBuffer)))
  		{
  			case HEAPTUPLE_DEAD:
  				/* Definitely dead */
***************
*** 833,839 ****
  				break;
  		}
  
! 		LockBuffer(scan->xs_cbuf, BUFFER_LOCK_UNLOCK);
  
  		if (isdead)
  		{
--- 879,886 ----
  				break;
  		}
  
! 		if (!do_sort)
! 			LockBuffer(scan->xs_cbuf, BUFFER_LOCK_UNLOCK);
  
  		if (isdead)
  		{
***************
*** 875,883 ****
  		rewrite_heap_tuple(rwstate, tuple, copiedTuple);
  
  		heap_freetuple(copiedTuple);
  	}
  
! 	index_endscan(scan);
  
  	/* Write out any remaining tuples, and fsync if needed */
  	end_heap_rewrite(rwstate);
--- 922,935 ----
  		rewrite_heap_tuple(rwstate, tuple, copiedTuple);
  
  		heap_freetuple(copiedTuple);
+ 		if (shouldfree)
+ 			heap_freetuple(tuple);
  	}
  
! 	if (do_sort)
! 		tuplesort_end(tuplesort);
! 	else
! 		index_endscan(scan);
  
  	/* Write out any remaining tuples, and fsync if needed */
  	end_heap_rewrite(rwstate);
Index: src/backend/utils/error/elog.c
===================================================================
RCS file: /home/stark/src/REPOSITORY/pgsql/src/backend/utils/error/elog.c,v
retrieving revision 1.205
diff -c -r1.205 elog.c
*** src/backend/utils/error/elog.c	9 Jul 2008 15:56:49 -0000	1.205
--- src/backend/utils/error/elog.c	31 Aug 2008 11:21:00 -0000
***************
*** 2008,2017 ****
  		debug_query_string != NULL &&
  		!edata->hide_stmt)
  	{
! 		log_line_prefix(&buf);
! 		appendStringInfoString(&buf, _("STATEMENT:  "));
! 		append_with_tabs(&buf, debug_query_string);
! 		appendStringInfoChar(&buf, '\n');
  	}
  
  #ifdef HAVE_SYSLOG
--- 2008,2023 ----
  		debug_query_string != NULL &&
  		!edata->hide_stmt)
  	{
! 		/* awful kludge */
! 		static prev_string;
! 		if (prev_string != debug_query_string) {
! 			prev_string = debug_query_string;
! 
! 			log_line_prefix(&buf);
! 			appendStringInfoString(&buf, _("STATEMENT:  "));
! 			append_with_tabs(&buf, debug_query_string);
! 			appendStringInfoChar(&buf, '\n');
! 		}
  	}
  
  #ifdef HAVE_SYSLOG
Index: src/backend/utils/sort/tuplesort.c
===================================================================
RCS file: /home/stark/src/REPOSITORY/pgsql/src/backend/utils/sort/tuplesort.c,v
retrieving revision 1.86
diff -c -r1.86 tuplesort.c
*** src/backend/utils/sort/tuplesort.c	1 Aug 2008 13:16:09 -0000	1.86
--- src/backend/utils/sort/tuplesort.c	31 Aug 2008 13:09:14 -0000
***************
*** 443,448 ****
--- 443,449 ----
  static void tuplesort_heap_siftup(Tuplesortstate *state, bool checkIndex);
  static unsigned int getlen(Tuplesortstate *state, int tapenum, bool eofOK);
  static void markrunend(Tuplesortstate *state, int tapenum);
+ 
  static int comparetup_heap(const SortTuple *a, const SortTuple *b,
  				Tuplesortstate *state);
  static void copytup_heap(Tuplesortstate *state, SortTuple *stup, void *tup);
***************
*** 451,456 ****
--- 452,465 ----
  static void readtup_heap(Tuplesortstate *state, SortTuple *stup,
  			 int tapenum, unsigned int len);
  static void reversedirection_heap(Tuplesortstate *state);
+ 
+ static int comparetup_rawheap(const SortTuple *a, const SortTuple *b, 
+ 							  Tuplesortstate *state);
+ static void copytup_rawheap(Tuplesortstate *state, SortTuple *stup, void *tup);
+ static void writetup_rawheap(Tuplesortstate *state, int tapenum, SortTuple *stup);
+ static void readtup_rawheap(Tuplesortstate *state, SortTuple *stup, int tapenum, 
+ 							unsigned int len);
+ 
  static int comparetup_index_btree(const SortTuple *a, const SortTuple *b,
  				 Tuplesortstate *state);
  static int comparetup_index_hash(const SortTuple *a, const SortTuple *b,
***************
*** 462,467 ****
--- 471,477 ----
  			  int tapenum, unsigned int len);
  static void reversedirection_index_btree(Tuplesortstate *state);
  static void reversedirection_index_hash(Tuplesortstate *state);
+ 
  static int comparetup_datum(const SortTuple *a, const SortTuple *b,
  				 Tuplesortstate *state);
  static void copytup_datum(Tuplesortstate *state, SortTuple *stup, void *tup);
***************
*** 625,630 ****
--- 635,678 ----
  }
  
  Tuplesortstate *
+ tuplesort_begin_rawheap(TupleDesc tupDesc,
+ 						int nkeys, ScanKey scanKeys,
+ 						int workMem, bool randomAccess)
+ {
+ 	Tuplesortstate *state = tuplesort_begin_common(workMem, randomAccess);
+ 	MemoryContext oldcontext;
+ 
+ 	oldcontext = MemoryContextSwitchTo(state->sortcontext);
+ 
+ 	AssertArg(nkeys > 0);
+ 
+ #ifdef TRACE_SORT
+ 	if (trace_sort)
+ 		elog(LOG,
+ 			 "begin tuple sort: nkeys = %d, workMem = %d, randomAccess = %c",
+ 			 nkeys, workMem, randomAccess ? 't' : 'f');
+ #endif
+ 
+ 	TRACE_POSTGRESQL_SORT_START(HEAP_SORT, false, nkeys, workMem, randomAccess);
+ 
+ 	state->nKeys = nkeys;
+ 
+ 	state->comparetup = comparetup_rawheap;
+ 	state->copytup = copytup_rawheap;
+ 	state->writetup = writetup_rawheap;
+ 	state->readtup = readtup_rawheap;
+ 	state->reversedirection = reversedirection_heap;
+ 
+ 	state->tupDesc = tupDesc;	/* assume we need not copy tupDesc */
+ 	state->scanKeys = (ScanKey) palloc(nkeys * sizeof(ScanKeyData));
+ 	memcpy(state->scanKeys, scanKeys, nkeys * sizeof(ScanKeyData));
+ 	
+ 	MemoryContextSwitchTo(oldcontext);
+ 
+ 	return state;
+ }
+ 
+ Tuplesortstate *
  tuplesort_begin_index_btree(Relation indexRel,
  							bool enforceUnique,
  							int workMem, bool randomAccess)
***************
*** 916,921 ****
--- 964,991 ----
  }
  
  /*
+  * Accept one tuple while collecting input data for sort.
+  *
+  * Note that the input data is always copied; the caller need not save it.
+  */
+ void
+ tuplesort_putrawtuple(Tuplesortstate *state, HeapTuple tup)
+ {
+ 	MemoryContext oldcontext = MemoryContextSwitchTo(state->sortcontext);
+ 	SortTuple	stup;
+ 
+ 	/*
+ 	 * Copy the given tuple into memory we control, and decrease availMem.
+ 	 * Then call the common code.
+ 	 */
+ 	COPYTUP(state, &stup, (void *) tup);
+ 
+ 	puttuple_common(state, &stup);
+ 
+ 	MemoryContextSwitchTo(oldcontext);
+ }
+ 
+ /*
   * Accept one index tuple while collecting input data for sort.
   *
   * Note that the input tuple is always copied; the caller need not save it.
***************
*** 1414,1419 ****
--- 1484,1509 ----
  }
  
  /*
+  * Fetch the next tuple in either forward or back direction.
+  * Returns NULL if no more tuples.	If *should_free is set, the
+  * caller must pfree the returned tuple when done with it.
+  */
+ HeapTuple
+ tuplesort_getrawtuple(Tuplesortstate *state, bool forward,
+ 					  bool *should_free)
+ {
+ 	MemoryContext oldcontext = MemoryContextSwitchTo(state->sortcontext);
+ 	SortTuple	stup;
+ 
+ 	if (!tuplesort_gettuple_common(state, forward, &stup, should_free))
+ 		stup.tuple = NULL;
+ 
+ 	MemoryContextSwitchTo(oldcontext);
+ 
+ 	return stup.tuple;
+ }
+ 
+ /*
   * Fetch the next index tuple in either forward or back direction.
   * Returns NULL if no more tuples.	If *should_free is set, the
   * caller must pfree the returned tuple when done with it.
***************
*** 2704,2709 ****
--- 2794,2908 ----
  
  
  /*
+  * Routines specialized for Raw on-disk HeapTuple case These are only used when
+  * we need the visibility info for things like CLUSTER. Otherwise we used the
+  * regular *tup_heap methods which actually manipulate MinimalTuples.
+  */
+ 
+ static int
+ comparetup_rawheap(const SortTuple *a, const SortTuple *b, Tuplesortstate *state)
+ {
+ 	ScanKey		scanKey = state->scanKeys;
+ 	HeapTuple ltup;
+ 	HeapTuple rtup;
+ 	TupleDesc	tupDesc;
+ 	int			nkey;
+ 	int32		compare;
+ 
+ 	/* Allow interrupting long sorts */
+ 	CHECK_FOR_INTERRUPTS();
+ 
+ 	/* Compare the leading sort key */
+ 	compare = inlineApplySortFunction(&scanKey->sk_func, scanKey->sk_flags,
+ 									  a->datum1, a->isnull1,
+ 									  b->datum1, b->isnull1);
+ 	if (compare != 0)
+ 		return compare;
+ 
+ 	/* Compare additional sort keys */
+ 	ltup = (HeapTuple) a->tuple;
+ 	rtup = (HeapTuple) b->tuple;
+ 	tupDesc = state->tupDesc;
+ 	scanKey++;
+ 	for (nkey = 1; nkey < state->nKeys; nkey++, scanKey++)
+ 	{
+ 		AttrNumber	attno = scanKey->sk_attno;
+ 		Datum		datum1,
+ 					datum2;
+ 		bool		isnull1,
+ 					isnull2;
+ 
+ 		datum1 = heap_getattr(ltup, attno, tupDesc, &isnull1);
+ 		datum2 = heap_getattr(rtup, attno, tupDesc, &isnull2);
+ 
+ 		compare = inlineApplySortFunction(&scanKey->sk_func, scanKey->sk_flags,
+ 										  datum1, isnull1,
+ 										  datum2, isnull2);
+ 		if (compare != 0)
+ 			return compare;
+ 	}
+ 
+ 	return 0;
+ }
+ 
+ static void
+ copytup_rawheap(Tuplesortstate *state, SortTuple *stup, void *tup)
+ {
+ 	HeapTuple	tuple = (HeapTuple) tup;
+ 
+ 	/* copy the tuple into sort storage */
+ 	stup->tuple = (void *) heap_copytuple(tuple);
+ 	USEMEM(state, GetMemoryChunkSpace(stup->tuple));
+ 	/* set up first-column key value */
+ 	stup->datum1 = heap_getattr((HeapTuple) stup->tuple,
+ 								state->scanKeys[0].sk_attno,
+ 								state->tupDesc,
+ 								&stup->isnull1);
+ }
+ 
+ static void
+ writetup_rawheap(Tuplesortstate *state, int tapenum, SortTuple *stup)
+ {
+ 	HeapTuple	tuple = (HeapTuple) stup->tuple;
+ 	tuple->t_len += HEAPTUPLESIZE; /* write out the header as well */
+ 
+ 	LogicalTapeWrite(state->tapeset, tapenum,
+ 					 tuple, tuple->t_len);
+ 
+ 	if (state->randomAccess)	/* need trailing length word? */
+ 		LogicalTapeWrite(state->tapeset, tapenum,
+ 						 tuple, sizeof(tuple->t_len));
+ 
+ 	FREEMEM(state, GetMemoryChunkSpace(tuple));
+ 	heap_freetuple(tuple);
+ }
+ 
+ static void
+ readtup_rawheap(Tuplesortstate *state, SortTuple *stup,
+ 			 int tapenum, unsigned int tuplen)
+ {
+ 	HeapTuple	tuple = (HeapTuple) palloc(tuplen);
+ 
+ 	USEMEM(state, GetMemoryChunkSpace(tuple));
+ 
+ 	tuple->t_len = tuplen - HEAPTUPLESIZE;
+ 	if (LogicalTapeRead(state->tapeset, tapenum, &tuple->t_self, tuplen-sizeof(tuplen)) != tuplen-sizeof(tuplen))
+ 		elog(ERROR, "unexpected end of data");
+ 	if (state->randomAccess)	/* need trailing length word? */
+ 		if (LogicalTapeRead(state->tapeset, tapenum, &tuplen,
+ 							sizeof(tuplen)) != sizeof(tuplen))
+ 			elog(ERROR, "unexpected end of data");
+ 
+ 	stup->tuple = tuple;
+ 	/* set up first-column key value */
+ 	stup->datum1 = heap_getattr(tuple,
+ 								state->scanKeys[0].sk_attno,
+ 								state->tupDesc,
+ 								&stup->isnull1);
+ }
+ 
+ 
+ /*
   * Routines specialized for IndexTuple case
   *
   * The btree and hash cases require separate comparison functions, but the
Index: src/backend/utils/time/tqual.c
===================================================================
RCS file: /home/stark/src/REPOSITORY/pgsql/src/backend/utils/time/tqual.c,v
retrieving revision 1.110
diff -c -r1.110 tqual.c
*** src/backend/utils/time/tqual.c	26 Mar 2008 16:20:47 -0000	1.110
--- src/backend/utils/time/tqual.c	31 Aug 2008 11:34:11 -0000
***************
*** 86,91 ****
--- 86,97 ----
  SetHintBits(HeapTupleHeader tuple, Buffer buffer,
  			uint16 infomask, TransactionId xid)
  {
+ 	/* This is primarily for CLUSTER which needs to do HTSV on tuples copied
+ 	 * into local memory long after the original buffer is gone. Under normal
+ 	 * operation we hope branch prediction will make this zero-cost */
+ 	if (!BufferIsValid(buffer))
+ 		return;
+ 
  	if (TransactionIdIsValid(xid))
  	{
  		/* NB: xid must be known committed here! */
Index: src/include/utils/tuplesort.h
===================================================================
RCS file: /home/stark/src/REPOSITORY/pgsql/src/include/utils/tuplesort.h,v
retrieving revision 1.31
diff -c -r1.31 tuplesort.h
*** src/include/utils/tuplesort.h	19 Jun 2008 00:46:06 -0000	1.31
--- src/include/utils/tuplesort.h	31 Aug 2008 10:37:38 -0000
***************
*** 55,60 ****
--- 55,63 ----
  					 int nkeys, AttrNumber *attNums,
  					 Oid *sortOperators, bool *nullsFirstFlags,
  					 int workMem, bool randomAccess);
+ extern Tuplesortstate * tuplesort_begin_rawheap(TupleDesc tupDesc,
+ 						int nkeys, ScanKey scanKeys,
+ 						int workMem, bool randomAccess);
  extern Tuplesortstate *tuplesort_begin_index_btree(Relation indexRel,
  							bool enforceUnique,
  							int workMem, bool randomAccess);
***************
*** 69,74 ****
--- 72,78 ----
  
  extern void tuplesort_puttupleslot(Tuplesortstate *state,
  					   TupleTableSlot *slot);
+ extern void tuplesort_putrawtuple(Tuplesortstate *state, HeapTuple tup);
  extern void tuplesort_putindextuple(Tuplesortstate *state, IndexTuple tuple);
  extern void tuplesort_putdatum(Tuplesortstate *state, Datum val,
  				   bool isNull);
***************
*** 77,82 ****
--- 81,88 ----
  
  extern bool tuplesort_gettupleslot(Tuplesortstate *state, bool forward,
  					   TupleTableSlot *slot);
+ extern HeapTuple tuplesort_getrawtuple(Tuplesortstate *state, bool forward,
+ 									   bool *should_free);
  extern IndexTuple tuplesort_getindextuple(Tuplesortstate *state, bool forward,
  						bool *should_free);
  extern bool tuplesort_getdatum(Tuplesortstate *state, bool forward,
