From faea51dac940ce6c79235d60ea3b516c186d6e06 Mon Sep 17 00:00:00 2001
From: Peter Geoghegan <peter.geoghegan86@gmail.com>
Date: Sun, 16 Aug 2015 21:17:16 -0700
Subject: [PATCH 3/5] Log requirement for multiple external sort passes

The new log message warns users about a sort requiring multiple passes.
This is in the same spirit as checkpoint_warning.  It seems very
ill-advised to ever attempt a sort that will require multiple passes on
contemporary hardware, since that can greatly increase the amount of I/O
required, and yet can only occur when available memory is small fraction
of what is required for a fully internal sort.

A new GUC, multipass_warning controls this log message.  The default is
'on'.  Also, a new debug GUC (not available in a standard build) for
controlling whether replacement selection can be avoided in the first
run is added.

During review, this patch may be useful for highlighting how effectively
replacement selection sort prevents multiple passes during the merge
step (relative to a hybrid sort-merge strategy) in practice.
---
 doc/src/sgml/config.sgml           | 22 +++++++++++++++++++++
 src/backend/utils/misc/guc.c       | 29 +++++++++++++++++++++++++---
 src/backend/utils/sort/tuplesort.c | 39 ++++++++++++++++++++++++++++++++++++--
 src/include/utils/guc.h            |  2 ++
 4 files changed, 87 insertions(+), 5 deletions(-)

diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml
index e3dc23b..4be1ad8 100644
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ -1556,6 +1556,28 @@ include_dir 'conf.d'
      <title>Disk</title>
 
      <variablelist>
+     <varlistentry id="guc-multipass-warning" xreflabel="multipass_warning">
+      <term><varname>multipass_warning</varname> (<type>boolean</type>)
+      <indexterm>
+       <primary><varname>multipass_warning</> configuration parameter</primary>
+      </indexterm>
+      </term>
+      <listitem>
+       <para>
+        Write a message to the server log if an external sort
+        operation requires multiple passes (which suggests that
+        <varname>work_mem</> or <varname>maintenance_work_mem</> may
+        need to be raised).  Only a small fraction of the memory
+        required for an internal sort is required for an external sort
+        that makes no more than a single pass (typically less than
+        1%).  Since multi-pass sorts are often much slower, it is
+        advisable to avoid them altogether whenever possible.
+        The default setting is <literal>on</>.
+        Only superusers can change this setting.
+       </para>
+      </listitem>
+     </varlistentry>
+
      <varlistentry id="guc-temp-file-limit" xreflabel="temp_file_limit">
       <term><varname>temp_file_limit</varname> (<type>integer</type>)
       <indexterm>
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index b3dac51..3302648 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -115,8 +115,9 @@ extern bool synchronize_seqscans;
 #ifdef TRACE_SYNCSCAN
 extern bool trace_syncscan;
 #endif
-#ifdef DEBUG_BOUNDED_SORT
+#ifdef DEBUG_SORT
 extern bool optimize_bounded_sort;
+extern bool optimize_avoid_selection;
 #endif
 
 static int	GUC_check_errcode_value;
@@ -1041,6 +1042,16 @@ static struct config_bool ConfigureNamesBool[] =
 		NULL, NULL, NULL
 	},
 	{
+		{"multipass_warning", PGC_SUSET, LOGGING_WHAT,
+			gettext_noop("Enables warnings if external sorts require more than one pass."),
+			gettext_noop("Write a message to the server log if more than one pass is required "
+						 "for an external sort operation.")
+		},
+		&multipass_warning,
+		true,
+		NULL, NULL, NULL
+	},
+	{
 		{"debug_assertions", PGC_INTERNAL, PRESET_OPTIONS,
 			gettext_noop("Shows whether the running server has assertion checks enabled."),
 			NULL,
@@ -1449,8 +1460,8 @@ static struct config_bool ConfigureNamesBool[] =
 	},
 #endif
 
-#ifdef DEBUG_BOUNDED_SORT
-	/* this is undocumented because not exposed in a standard build */
+#ifdef DEBUG_SORT
+	/* these are undocumented because not exposed in a standard build */
 	{
 		{
 			"optimize_bounded_sort", PGC_USERSET, QUERY_TUNING_METHOD,
@@ -1462,6 +1473,18 @@ static struct config_bool ConfigureNamesBool[] =
 		true,
 		NULL, NULL, NULL
 	},
+
+	{
+		{
+			"optimize_avoid_selection", PGC_USERSET, QUERY_TUNING_METHOD,
+			gettext_noop("Enable avoiding replacement selection using heap sort."),
+			NULL,
+			GUC_NOT_IN_SAMPLE
+		},
+		&optimize_avoid_selection,
+		true,
+		NULL, NULL, NULL
+	},
 #endif
 
 #ifdef WAL_DEBUG
diff --git a/src/backend/utils/sort/tuplesort.c b/src/backend/utils/sort/tuplesort.c
index cca9683..abbef6c 100644
--- a/src/backend/utils/sort/tuplesort.c
+++ b/src/backend/utils/sort/tuplesort.c
@@ -160,8 +160,11 @@
 bool		trace_sort = false;
 #endif
 
-#ifdef DEBUG_BOUNDED_SORT
+bool		multipass_warning = true;
+
+#ifdef DEBUG_SORT
 bool		optimize_bounded_sort = true;
+bool		optimize_avoid_selection = true;
 #endif
 
 
@@ -250,6 +253,7 @@ struct Tuplesortstate
 {
 	TupSortStatus status;		/* enumerated value as shown above */
 	int			nKeys;			/* number of columns in sort key */
+	bool		querySort;		/* sort associated with query execution */
 	double		rowNumHint;		/* caller's hint of total # of rows */
 	bool		randomAccess;	/* did caller request random access? */
 	bool		bounded;		/* did caller specify a maximum number of
@@ -697,6 +701,7 @@ tuplesort_begin_heap(TupleDesc tupDesc,
 #endif
 
 	state->nKeys = nkeys;
+	state->querySort = true;
 
 	TRACE_POSTGRESQL_SORT_START(HEAP_SORT,
 								false,	/* no unique check */
@@ -771,6 +776,7 @@ tuplesort_begin_cluster(TupleDesc tupDesc,
 #endif
 
 	state->nKeys = RelationGetNumberOfAttributes(indexRel);
+	state->querySort = false;
 
 	TRACE_POSTGRESQL_SORT_START(CLUSTER_SORT,
 								false,	/* no unique check */
@@ -864,6 +870,7 @@ tuplesort_begin_index_btree(Relation heapRel,
 #endif
 
 	state->nKeys = RelationGetNumberOfAttributes(indexRel);
+	state->querySort = false;
 
 	TRACE_POSTGRESQL_SORT_START(INDEX_SORT,
 								enforceUnique,
@@ -939,6 +946,7 @@ tuplesort_begin_index_hash(Relation heapRel,
 #endif
 
 	state->nKeys = 1;			/* Only one sort column, the hash code */
+	state->querySort = false;
 
 	state->comparetup = comparetup_index_hash;
 	state->copytup = copytup_index;
@@ -976,6 +984,7 @@ tuplesort_begin_datum(Oid datumType, Oid sortOperator, Oid sortCollation,
 #endif
 
 	state->nKeys = 1;			/* always a one-column sort */
+	state->querySort = true;
 
 	TRACE_POSTGRESQL_SORT_START(DATUM_SORT,
 								false,	/* no unique check */
@@ -1042,7 +1051,7 @@ tuplesort_set_bound(Tuplesortstate *state, int64 bound)
 	Assert(state->memtupcount == 0);
 	Assert(!state->bounded);
 
-#ifdef DEBUG_BOUNDED_SORT
+#ifdef DEBUG_SORT
 	/* Honor GUC setting that disables the feature (for easy testing) */
 	if (!optimize_bounded_sort)
 		return;
@@ -2310,6 +2319,12 @@ useselection(Tuplesortstate *state)
 	double		crossover;
 	bool		useSelection;
 
+#ifdef DEBUG_SORT
+	/* Honor GUC setting that disables the feature (for easy testing) */
+	if (!optimize_avoid_selection)
+		return true;
+#endif
+
 	/* For randomAccess callers, "quicksort with spillover" is never used */
 	if (state->randomAccess)
 		return false;
@@ -2528,6 +2543,12 @@ selectnewtape(Tuplesortstate *state)
 static void
 mergeruns(Tuplesortstate *state)
 {
+#ifdef TRACE_SORT
+	bool		multiwarned = !(multipass_warning || trace_sort);
+#else
+	bool		multiwarned = !multipass_warning;
+#endif
+
 	int			tapenum,
 				svTape,
 				svRuns,
@@ -2639,6 +2660,20 @@ mergeruns(Tuplesortstate *state)
 		/* Step D6: decrease level */
 		if (--state->Level == 0)
 			break;
+
+		if (!multiwarned)
+		{
+			int64		memNowUsed = state->allowedMem - state->availMem;
+
+			ereport(LOG,
+					(errmsg("a multi-pass external merge sort is required "
+							"(%d tape maximum)", state->maxTapes),
+					 errhint("Consider increasing the configuration parameter \"%s\".",
+							 state->querySort ?  "work_mem" : "maintenance_work_mem")));
+
+			multiwarned = true;
+		}
+
 		/* rewind output tape T to use as new input */
 		LogicalTapeRewind(state->tapeset, state->tp_tapenum[state->tapeRange],
 						  false);
diff --git a/src/include/utils/guc.h b/src/include/utils/guc.h
index dc167f9..1e1519a 100644
--- a/src/include/utils/guc.h
+++ b/src/include/utils/guc.h
@@ -272,6 +272,8 @@ extern int	tcp_keepalives_count;
 extern bool trace_sort;
 #endif
 
+extern bool multipass_warning;
+
 /*
  * Functions exported by guc.c
  */
-- 
1.9.1

