>From ba11df016538acb5aa19f438a7122bfa8249e028 Mon Sep 17 00:00:00 2001
From: Peter Eisentraut <peter_e@gmx.net>
Date: Tue, 14 Jan 2014 19:53:21 -0500
Subject: [PATCH] vacuumdb: Add option --analyze-in-stages

Add vacuumdb option --analyze-in-stages which runs ANALYZE three times
with different configuration settings, adopting the logic from the
analyze_new_cluster.sh script that pg_upgrade generates.  That way,
users of pg_dump/pg_restore can also use that functionality.

Change pg_upgrade to create the script so that it calls vacuumdb instead
of implementing the logic itself.
---
 contrib/pg_upgrade/check.c     | 58 +++-------------------------------
 doc/src/sgml/ref/vacuumdb.sgml | 20 ++++++++++++
 src/bin/scripts/vacuumdb.c     | 70 ++++++++++++++++++++++++++++++++----------
 3 files changed, 78 insertions(+), 70 deletions(-)

diff --git a/contrib/pg_upgrade/check.c b/contrib/pg_upgrade/check.c
index a706708..c01a707 100644
--- a/contrib/pg_upgrade/check.c
+++ b/contrib/pg_upgrade/check.c
@@ -503,62 +503,12 @@ static void check_locale_and_encoding(ControlData *oldctrl,
 			"--analyze-only" : "--analyze", ECHO_QUOTE);
 	fprintf(script, "echo%s\n\n", ECHO_BLANK);
 
-#ifndef WIN32
-	fprintf(script, "sleep 2\n");
-	fprintf(script, "PGOPTIONS='-c default_statistics_target=1 -c vacuum_cost_delay=0'\n");
-	/* only need to export once */
-	fprintf(script, "export PGOPTIONS\n");
-#else
-	fprintf(script, "REM simulate sleep 2\n");
-	fprintf(script, "PING 1.1.1.1 -n 1 -w 2000 > nul\n");
-	fprintf(script, "SET PGOPTIONS=-c default_statistics_target=1 -c vacuum_cost_delay=0\n");
-#endif
-
-	fprintf(script, "echo %sGenerating minimal optimizer statistics (1 target)%s\n",
-			ECHO_QUOTE, ECHO_QUOTE);
-	fprintf(script, "echo %s--------------------------------------------------%s\n",
-			ECHO_QUOTE, ECHO_QUOTE);
-	fprintf(script, "\"%s/vacuumdb\" %s--all --analyze-only\n",
-			new_cluster.bindir, user_specification);
-	fprintf(script, "echo%s\n", ECHO_BLANK);
-	fprintf(script, "echo %sThe server is now available with minimal optimizer statistics.%s\n",
-			ECHO_QUOTE, ECHO_QUOTE);
-	fprintf(script, "echo %sQuery performance will be optimal once this script completes.%s\n",
-			ECHO_QUOTE, ECHO_QUOTE);
-	fprintf(script, "echo%s\n\n", ECHO_BLANK);
-
-#ifndef WIN32
-	fprintf(script, "sleep 2\n");
-	fprintf(script, "PGOPTIONS='-c default_statistics_target=10'\n");
-#else
-	fprintf(script, "REM simulate sleep\n");
-	fprintf(script, "PING 1.1.1.1 -n 1 -w 2000 > nul\n");
-	fprintf(script, "SET PGOPTIONS=-c default_statistics_target=10\n");
-#endif
-
-	fprintf(script, "echo %sGenerating medium optimizer statistics (10 targets)%s\n",
-			ECHO_QUOTE, ECHO_QUOTE);
-	fprintf(script, "echo %s---------------------------------------------------%s\n",
-			ECHO_QUOTE, ECHO_QUOTE);
-	fprintf(script, "\"%s/vacuumdb\" %s--all --analyze-only\n",
+	fprintf(script, "\"%s/vacuumdb\" %s--all --analyze-in-stages\n",
 			new_cluster.bindir, user_specification);
-	fprintf(script, "echo%s\n\n", ECHO_BLANK);
-
-#ifndef WIN32
-	fprintf(script, "unset PGOPTIONS\n");
-#else
-	fprintf(script, "SET PGOPTIONS\n");
-#endif
-
-	fprintf(script, "echo %sGenerating default (full) optimizer statistics (100 targets?)%s\n",
-			ECHO_QUOTE, ECHO_QUOTE);
-	fprintf(script, "echo %s-------------------------------------------------------------%s\n",
-			ECHO_QUOTE, ECHO_QUOTE);
-	fprintf(script, "\"%s/vacuumdb\" %s--all %s\n", new_cluster.bindir,
-			user_specification,
 	/* Did we copy the free space files? */
-			(GET_MAJOR_VERSION(old_cluster.major_version) >= 804) ?
-			"--analyze-only" : "--analyze");
+	if (GET_MAJOR_VERSION(old_cluster.major_version) < 804)
+		fprintf(script, "\"%s/vacuumdb\" %s--all\n", new_cluster.bindir,
+				user_specification);
 
 	fprintf(script, "echo%s\n\n", ECHO_BLANK);
 	fprintf(script, "echo %sDone%s\n",
diff --git a/doc/src/sgml/ref/vacuumdb.sgml b/doc/src/sgml/ref/vacuumdb.sgml
index a5216ec..d743b0d 100644
--- a/doc/src/sgml/ref/vacuumdb.sgml
+++ b/doc/src/sgml/ref/vacuumdb.sgml
@@ -204,6 +204,26 @@ <title>Options</title>
      </varlistentry>
 
      <varlistentry>
+      <term><option>--analyze-in-stages</option></term>
+      <listitem>
+       <para>
+        Only calculate statistics for use by the optimizer (no vacuum),
+        like <option>--analyze-only</option>.  Run several stages of analyze
+        with different configuration settings, to produce usable statistics
+        faster.
+       </para>
+
+       <para>
+        This option is useful to analyze a database that was newly populated
+        from a restored dump or by <command>pg_upgrade</command>.  This option
+        will try to create some statistics as fast as possible, to make the
+        database usable, and then produce full statistics in the subsequent
+        stages.
+       </para>
+      </listitem>
+     </varlistentry>
+
+     <varlistentry>
        <term><option>-?</></term>
        <term><option>--help</></term>
        <listitem>
diff --git a/src/bin/scripts/vacuumdb.c b/src/bin/scripts/vacuumdb.c
index f034032..2e8bac3 100644
--- a/src/bin/scripts/vacuumdb.c
+++ b/src/bin/scripts/vacuumdb.c
@@ -16,12 +16,12 @@
 
 
 static void vacuum_one_database(const char *dbname, bool full, bool verbose,
-					bool and_analyze, bool analyze_only, bool freeze,
+					bool and_analyze, bool analyze_only, bool analyze_in_stages, bool freeze,
 					const char *table, const char *host, const char *port,
 					const char *username, enum trivalue prompt_password,
 					const char *progname, bool echo);
 static void vacuum_all_databases(bool full, bool verbose, bool and_analyze,
-					 bool analyze_only, bool freeze,
+					 bool analyze_only, bool analyze_in_stages, bool freeze,
 					 const char *maintenance_db,
 					 const char *host, const char *port,
 					 const char *username, enum trivalue prompt_password,
@@ -50,6 +50,7 @@ static void vacuum_all_databases(bool full, bool verbose, bool and_analyze,
 		{"full", no_argument, NULL, 'f'},
 		{"verbose", no_argument, NULL, 'v'},
 		{"maintenance-db", required_argument, NULL, 2},
+		{"analyze-in-stages", no_argument, NULL, 3},
 		{NULL, 0, NULL, 0}
 	};
 
@@ -67,6 +68,7 @@ static void vacuum_all_databases(bool full, bool verbose, bool and_analyze,
 	bool		quiet = false;
 	bool		and_analyze = false;
 	bool		analyze_only = false;
+	bool		analyze_in_stages = false;
 	bool		freeze = false;
 	bool		alldb = false;
 	bool		full = false;
@@ -130,6 +132,9 @@ static void vacuum_all_databases(bool full, bool verbose, bool and_analyze,
 			case 2:
 				maintenance_db = pg_strdup(optarg);
 				break;
+			case 3:
+				analyze_in_stages = analyze_only = true;
+				break;
 			default:
 				fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
 				exit(1);
@@ -189,7 +194,7 @@ static void vacuum_all_databases(bool full, bool verbose, bool and_analyze,
 			exit(1);
 		}
 
-		vacuum_all_databases(full, verbose, and_analyze, analyze_only, freeze,
+		vacuum_all_databases(full, verbose, and_analyze, analyze_only, analyze_in_stages, freeze,
 							 maintenance_db, host, port, username,
 							 prompt_password, progname, echo, quiet);
 	}
@@ -212,7 +217,7 @@ static void vacuum_all_databases(bool full, bool verbose, bool and_analyze,
 			for (cell = tables.head; cell; cell = cell->next)
 			{
 				vacuum_one_database(dbname, full, verbose, and_analyze,
-									analyze_only,
+									analyze_only, analyze_in_stages,
 									freeze, cell->val,
 									host, port, username, prompt_password,
 									progname, echo);
@@ -220,7 +225,7 @@ static void vacuum_all_databases(bool full, bool verbose, bool and_analyze,
 		}
 		else
 			vacuum_one_database(dbname, full, verbose, and_analyze,
-								analyze_only,
+								analyze_only, analyze_in_stages,
 								freeze, NULL,
 								host, port, username, prompt_password,
 								progname, echo);
@@ -231,8 +236,25 @@ static void vacuum_all_databases(bool full, bool verbose, bool and_analyze,
 
 
 static void
+run_vacuum_command(PGconn *conn, const char *sql, bool echo, const char *dbname, const char *table, const char *progname)
+{
+	if (!executeMaintenanceCommand(conn, sql, echo))
+	{
+		if (table)
+			fprintf(stderr, _("%s: vacuuming of table \"%s\" in database \"%s\" failed: %s"),
+					progname, table, dbname, PQerrorMessage(conn));
+		else
+			fprintf(stderr, _("%s: vacuuming of database \"%s\" failed: %s"),
+					progname, dbname, PQerrorMessage(conn));
+		PQfinish(conn);
+		exit(1);
+	}
+}
+
+
+static void
 vacuum_one_database(const char *dbname, bool full, bool verbose, bool and_analyze,
-					bool analyze_only, bool freeze, const char *table,
+					bool analyze_only, bool analyze_in_stages, bool freeze, const char *table,
 					const char *host, const char *port,
 					const char *username, enum trivalue prompt_password,
 					const char *progname, bool echo)
@@ -300,17 +322,30 @@ static void vacuum_all_databases(bool full, bool verbose, bool and_analyze,
 		appendPQExpBuffer(&sql, " %s", table);
 	appendPQExpBufferStr(&sql, ";\n");
 
-	if (!executeMaintenanceCommand(conn, sql.data, echo))
+	if (analyze_in_stages)
 	{
-		if (table)
-			fprintf(stderr, _("%s: vacuuming of table \"%s\" in database \"%s\" failed: %s"),
-					progname, table, dbname, PQerrorMessage(conn));
-		else
-			fprintf(stderr, _("%s: vacuuming of database \"%s\" failed: %s"),
-					progname, dbname, PQerrorMessage(conn));
-		PQfinish(conn);
-		exit(1);
+		const char *stage_commands[] = {
+			"SET default_statistics_target=1; SET vacuum_cost_delay=0;",
+			"SET default_statistics_target=10; RESET vacuum_cost_delay;",
+			"RESET default_statistics_target;"
+		};
+		const char *stage_messages[] = {
+			gettext_noop("Generating minimal optimizer statistics (1 target)"),
+			gettext_noop("Generating medium optimizer statistics (10 targets)"),
+			gettext_noop("Generating default (full) optimizer statistics (100 targets?)")
+		};
+		int i;
+
+		for (i = 0; i < 3; i++)
+		{
+			puts(gettext(stage_messages[i]));
+			executeCommand(conn, stage_commands[i], progname, echo);
+			run_vacuum_command(conn, sql.data, echo, dbname, table, progname);
+		}
 	}
+	else
+		run_vacuum_command(conn, sql.data, echo, dbname, NULL, progname);
+
 	PQfinish(conn);
 	termPQExpBuffer(&sql);
 }
@@ -318,7 +353,7 @@ static void vacuum_all_databases(bool full, bool verbose, bool and_analyze,
 
 static void
 vacuum_all_databases(bool full, bool verbose, bool and_analyze, bool analyze_only,
-					 bool freeze, const char *maintenance_db,
+					 bool analyze_in_stages, bool freeze, const char *maintenance_db,
 					 const char *host, const char *port,
 					 const char *username, enum trivalue prompt_password,
 					 const char *progname, bool echo, bool quiet)
@@ -343,6 +378,7 @@ static void vacuum_all_databases(bool full, bool verbose, bool and_analyze,
 		}
 
 		vacuum_one_database(dbname, full, verbose, and_analyze, analyze_only,
+							analyze_in_stages,
 						 freeze, NULL, host, port, username, prompt_password,
 							progname, echo);
 	}
@@ -369,6 +405,8 @@ static void vacuum_all_databases(bool full, bool verbose, bool and_analyze,
 	printf(_("  -V, --version                   output version information, then exit\n"));
 	printf(_("  -z, --analyze                   update optimizer statistics\n"));
 	printf(_("  -Z, --analyze-only              only update optimizer statistics\n"));
+	printf(_("      --analyze-in-stages         only update optimizer statistics, in multiple\n"
+			 "                                  stages for faster results\n"));
 	printf(_("  -?, --help                      show this help, then exit\n"));
 	printf(_("\nConnection options:\n"));
 	printf(_("  -h, --host=HOSTNAME       database server host or socket directory\n"));
-- 
1.8.5.1

