diff --git a/src/backend/utils/misc/guc_tables.c b/src/backend/utils/misc/guc_tables.c
index 1c0583fe26..6585164ca4 100644
--- a/src/backend/utils/misc/guc_tables.c
+++ b/src/backend/utils/misc/guc_tables.c
@@ -846,6 +846,16 @@ struct config_bool ConfigureNamesBool[] =
 		true,
 		NULL, NULL, NULL
 	},
+	{
+		{"enable_sort_optimization", PGC_USERSET, QUERY_TUNING_METHOD,
+			gettext_noop("Enables the sort optimizations."),
+			NULL,
+			GUC_EXPLAIN
+		},
+		&enable_sort_optimization,
+		true,
+		NULL, NULL, NULL
+	},
 	{
 		{"enable_incremental_sort", PGC_USERSET, QUERY_TUNING_METHOD,
 			gettext_noop("Enables the planner's use of incremental sort steps."),
diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample
index d06074b86f..1b49f48457 100644
--- a/src/backend/utils/misc/postgresql.conf.sample
+++ b/src/backend/utils/misc/postgresql.conf.sample
@@ -389,6 +389,7 @@
 #enable_presorted_aggregate = on
 #enable_seqscan = on
 #enable_sort = on
+#enable_sort_optimization = on
 #enable_tidscan = on
 
 # - Planner Cost Constants -
diff --git a/src/backend/utils/sort/tuplesort.c b/src/backend/utils/sort/tuplesort.c
index 9ca9835aab..cbaf54437c 100644
--- a/src/backend/utils/sort/tuplesort.c
+++ b/src/backend/utils/sort/tuplesort.c
@@ -127,6 +127,8 @@
 bool		trace_sort = false;
 #endif
 
+bool		enable_sort_optimization = true;
+
 #ifdef DEBUG_BOUNDED_SORT
 bool		optimize_bounded_sort = true;
 #endif
@@ -404,6 +406,7 @@ struct Sharedsort
 #define SERIAL(state)		((state)->shared == NULL)
 #define WORKER(state)		((state)->shared && (state)->worker != -1)
 #define LEADER(state)		((state)->shared && (state)->worker == -1)
+#define CACHESIZE 2*1024*1024L
 
 /*
  * NOTES about on-tape representation of tuples:
@@ -2725,6 +2728,8 @@ tuplesort_sort_memtuples(Tuplesortstate *state)
 		{
 			if (state->base.sortKeys[0].comparator == ssup_datum_unsigned_cmp)
 			{
+				SortSupport oldonlyKey = state->base.onlyKey;
+				state->base.onlyKey = state->base.sortKeys;
 				qsort_tuple_unsigned(state->memtuples,
 									 state->memtupcount,
 									 state);
@@ -2740,11 +2745,63 @@ tuplesort_sort_memtuples(Tuplesortstate *state)
 			}
 #endif
 			else if (state->base.sortKeys[0].comparator == ssup_datum_int32_cmp)
-			{
-				qsort_tuple_int32(state->memtuples,
+			{	
+				if (enable_sort_optimization)
+				{
+					/* backup onlyKey and set it as NON NULL so that tie breaker won't be called
+					 * aim is to sort on first col only now 
+					 */
+					SortSupport oldonlyKey = state->base.onlyKey;
+					state->base.onlyKey = state->base.sortKeys;
+					qsort_tuple_int32(state->memtuples,
+									state->memtupcount,
+									state);
+					int BATCHSIZE = ceil(CACHESIZE/sizeof(state->memtuples[0].tuple));
+					if (state->memtupcount<BATCHSIZE)
+					{
+						BATCHSIZE = state->memtupcount;
+					}
+					int i=0;
+					int j=0;
+					while(i<state->memtupcount)
+					{
+
+						if ( (i + BATCHSIZE) > state->memtupcount)
+						{
+							BATCHSIZE = state->memtupcount - i;
+						}
+						SortTuple  *memtuples = &state->memtuples[i];
+						qsort_tuple_int32(memtuples,
+									BATCHSIZE,
+									state);
+						SortTuple  *tempmemtuples = (SortTuple *) palloc(BATCHSIZE * sizeof(SortTuple));
+						j=i;
+						for (; j < i+BATCHSIZE; j++)
+						{
+							void* temp = state->memtuples[j].tuple;
+							state->memtuples[j].tuple = heap_copy_minimal_tuple((MinimalTuple) state->memtuples[j].tuple);
+							pfree(temp);
+						}
+
+
+							i += BATCHSIZE;
+					}
+					/* revert onlyKey for pass 2 */
+					state->base.onlyKey = oldonlyKey;
+
+					qsort_tuple_int32(state->memtuples,
+									state->memtupcount,
+									state);
+					return;
+				}
+				else
+				{
+					qsort_tuple_int32(state->memtuples,
 								  state->memtupcount,
 								  state);
 				return;
+
+				}
 			}
 		}
 
diff --git a/src/include/utils/guc.h b/src/include/utils/guc.h
index ba89d013e6..2ae1c90420 100644
--- a/src/include/utils/guc.h
+++ b/src/include/utils/guc.h
@@ -281,6 +281,7 @@ extern PGDLLIMPORT int tcp_keepalives_idle;
 extern PGDLLIMPORT int tcp_keepalives_interval;
 extern PGDLLIMPORT int tcp_keepalives_count;
 extern PGDLLIMPORT int tcp_user_timeout;
+extern PGDLLIMPORT bool enable_sort_optimization;
 
 #ifdef TRACE_SORT
 extern PGDLLIMPORT bool trace_sort;
