Progress indication prototype

Started by Peter Eisentrautover 15 years ago27 messages
#1Peter Eisentraut
peter_e@gmx.net
1 attachment(s)

Here is a small prototype for a query progress indicator.

Past discussions seemed to indicate that the best place to report this
would be in pg_stat_activity. So that's what this does. You can try it
out with any of the following (on a sufficiently large table): VACUUM
(lazy) (also autovacuum), COPY out from table, COPY in from file,
table-rewriting ALTER TABLE (e.g., add column with default), or a very
simple query. Run the command, and stare at pg_stat_activity (perhaps
via "watch") in a separate session.

More can be added, and the exact placement of the counters is debatable,
but those might be details to be worked out later. Note, my emphasis
here is on maintenance commands; I don't plan to do a progress
estimation of complex queries at this time.

Some thoughts:

- Are the interfaces OK?

- Is this going to be too slow to be useful?

- Should there be a separate switch to turn it on (currently
track_activities)?

- How to handle commands that process multiple tables? For example,
lazy VACUUM on a single table is pretty easy to track (count the block
numbers), but what about a database-wide lazy VACUUM?

Other comments?

Attachments:

progress-indication.patchtext/x-patch; charset=UTF-8; name=progress-indication.patchDownload
diff --git a/src/backend/catalog/system_views.sql b/src/backend/catalog/system_views.sql
index 8852326..8280550 100644
--- a/src/backend/catalog/system_views.sql
+++ b/src/backend/catalog/system_views.sql
@@ -341,7 +341,8 @@ CREATE VIEW pg_stat_activity AS
             S.xact_start,
             S.query_start,
             S.waiting,
-            S.current_query
+            S.current_query,
+            S.query_progress
     FROM pg_database D, pg_stat_get_activity(NULL) AS S, pg_authid U
     WHERE S.datid = D.oid AND 
             S.usesysid = U.oid;
diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c
index 906d547..67e4fe0 100644
--- a/src/backend/commands/copy.c
+++ b/src/backend/commands/copy.c
@@ -35,6 +35,7 @@
 #include "miscadmin.h"
 #include "optimizer/planner.h"
 #include "parser/parse_relation.h"
+#include "pgstat.h"
 #include "rewrite/rewriteHandler.h"
 #include "storage/fd.h"
 #include "tcop/tcopprot.h"
@@ -1424,6 +1425,7 @@ CopyTo(CopyState cstate)
 		bool	   *nulls;
 		HeapScanDesc scandesc;
 		HeapTuple	tuple;
+		int64		cnt = 0;
 
 		values = (Datum *) palloc(num_phys_attrs * sizeof(Datum));
 		nulls = (bool *) palloc(num_phys_attrs * sizeof(bool));
@@ -1439,6 +1441,8 @@ CopyTo(CopyState cstate)
 
 			/* Format and send the data */
 			CopyOneRowTo(cstate, HeapTupleGetOid(tuple), values, nulls);
+
+			pgstat_report_progress(++cnt, cstate->rel->rd_rel->reltuples);
 		}
 
 		heap_endscan(scandesc);
@@ -1695,6 +1699,8 @@ CopyFrom(CopyState cstate)
 	CommandId	mycid = GetCurrentCommandId(true);
 	int			hi_options = 0; /* start with default heap_insert options */
 	BulkInsertState bistate;
+	off_t file_size;
+	off_t file_cnt = 0;
 
 	Assert(cstate->rel);
 
@@ -1776,6 +1782,8 @@ CopyFrom(CopyState cstate)
 			ereport(ERROR,
 					(errcode(ERRCODE_WRONG_OBJECT_TYPE),
 					 errmsg("\"%s\" is a directory", cstate->filename)));
+
+		file_size = st.st_size;
 	}
 
 	tupDesc = RelationGetDescr(cstate->rel);
@@ -2051,6 +2059,9 @@ CopyFrom(CopyState cstate)
 			}
 
 			Assert(fieldno == nfields);
+
+			file_cnt += cstate->line_buf.len;
+			pgstat_report_progress(file_cnt, file_size);
 		}
 		else
 		{
diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c
index 5f6fe41..0d70e86 100644
--- a/src/backend/commands/tablecmds.c
+++ b/src/backend/commands/tablecmds.c
@@ -60,6 +60,7 @@
 #include "parser/parse_type.h"
 #include "parser/parse_utilcmd.h"
 #include "parser/parser.h"
+#include "pgstat.h"
 #include "rewrite/rewriteDefine.h"
 #include "rewrite/rewriteHandler.h"
 #include "storage/bufmgr.h"
@@ -3126,6 +3127,7 @@ ATRewriteTable(AlteredTableInfo *tab, Oid OIDNewHeap)
 		MemoryContext oldCxt;
 		List	   *dropped_attrs = NIL;
 		ListCell   *lc;
+		int64		cnt = 0;
 
 		econtext = GetPerTupleExprContext(estate);
 
@@ -3253,6 +3255,8 @@ ATRewriteTable(AlteredTableInfo *tab, Oid OIDNewHeap)
 
 			ResetExprContext(econtext);
 
+			pgstat_report_progress(++cnt, oldrel->rd_rel->reltuples);
+
 			CHECK_FOR_INTERRUPTS();
 		}
 
@@ -7064,6 +7068,9 @@ copy_relation_data(SMgrRelation src, SMgrRelation dst,
 		 * write; we'll do it ourselves below.
 		 */
 		smgrextend(dst, forkNum, blkno, buf, true);
+
+		if (forkNum == MAIN_FORKNUM)
+			pgstat_report_progress(blkno + 1, nblocks);
 	}
 
 	/*
diff --git a/src/backend/commands/vacuumlazy.c b/src/backend/commands/vacuumlazy.c
index 4408db8..2127434 100644
--- a/src/backend/commands/vacuumlazy.c
+++ b/src/backend/commands/vacuumlazy.c
@@ -350,6 +350,8 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
 		bool		all_visible_according_to_vm = false;
 		bool		all_visible;
 
+		pgstat_report_progress(blkno, nblocks);
+
 		/*
 		 * Skip pages that don't require vacuuming according to the visibility
 		 * map. But only if we've seen a streak of at least
diff --git a/src/backend/executor/execMain.c b/src/backend/executor/execMain.c
index 48599d2..28342bc 100644
--- a/src/backend/executor/execMain.c
+++ b/src/backend/executor/execMain.c
@@ -47,6 +47,7 @@
 #include "optimizer/clauses.h"
 #include "parser/parse_clause.h"
 #include "parser/parsetree.h"
+#include "pgstat.h"
 #include "storage/bufmgr.h"
 #include "storage/lmgr.h"
 #include "storage/smgr.h"
@@ -1181,12 +1182,15 @@ ExecutePlan(EState *estate,
 {
 	TupleTableSlot *slot;
 	long		current_tuple_count;
+	double		planned_tuple_count;
 
 	/*
 	 * initialize local variables
 	 */
 	current_tuple_count = 0;
 
+	planned_tuple_count = planstate->plan->plan_rows;
+
 	/*
 	 * Set the direction.
 	 */
@@ -1246,6 +1250,8 @@ ExecutePlan(EState *estate,
 		current_tuple_count++;
 		if (numberTuples && numberTuples == current_tuple_count)
 			break;
+
+		pgstat_report_progress(current_tuple_count, planned_tuple_count);
 	}
 }
 
diff --git a/src/backend/postmaster/pgstat.c b/src/backend/postmaster/pgstat.c
index 894527e..07ad004 100644
--- a/src/backend/postmaster/pgstat.c
+++ b/src/backend/postmaster/pgstat.c
@@ -2328,6 +2328,7 @@ pgstat_bestart(void)
 	/* Also make sure the last byte in each string area is always 0 */
 	beentry->st_appname[NAMEDATALEN - 1] = '\0';
 	beentry->st_activity[pgstat_track_activity_query_size - 1] = '\0';
+	beentry->st_progress = 0.0;
 
 	beentry->st_changecount++;
 	Assert((beentry->st_changecount & 1) == 0);
@@ -2497,6 +2498,34 @@ pgstat_report_waiting(bool waiting)
 	beentry->st_waiting = waiting;
 }
 
+/* ----------
+ * pgstat_report_progress() -
+ *
+ *	Called from various commands to report their progress.
+ *
+ * NB: this *must* be able to survive being called before MyBEEntry has been
+ * initialized.
+ * ----------
+ */
+void
+pgstat_report_progress(double work_done, double work_total)
+{
+	volatile PgBackendStatus *beentry = MyBEEntry;
+
+	if (!pgstat_track_activities || !beentry)
+		return;
+
+	/*
+	 * Update my status entry, following the protocol of bumping
+	 * st_changecount before and after.  We use a volatile pointer here to
+	 * ensure the compiler doesn't try to get cute.
+	 */
+	beentry->st_changecount++;
+	beentry->st_progress = work_done/work_total;
+	beentry->st_changecount++;
+	Assert((beentry->st_changecount & 1) == 0);
+}
+
 
 /* ----------
  * pgstat_read_current_status() -
diff --git a/src/backend/tcop/postgres.c b/src/backend/tcop/postgres.c
index 5efad23..2de7e91 100644
--- a/src/backend/tcop/postgres.c
+++ b/src/backend/tcop/postgres.c
@@ -3814,6 +3814,7 @@ PostgresMain(int argc, char *argv[], const char *username)
 				set_ps_display("idle", false);
 				pgstat_report_activity("<IDLE>");
 			}
+			pgstat_report_progress(0, 0);
 
 			ReadyForQuery(whereToSendOutput);
 			send_ready_for_query = false;
diff --git a/src/backend/utils/adt/pgstatfuncs.c b/src/backend/utils/adt/pgstatfuncs.c
index 8379407..a55dfcf 100644
--- a/src/backend/utils/adt/pgstatfuncs.c
+++ b/src/backend/utils/adt/pgstatfuncs.c
@@ -50,6 +50,7 @@ extern Datum pg_stat_get_backend_pid(PG_FUNCTION_ARGS);
 extern Datum pg_stat_get_backend_dbid(PG_FUNCTION_ARGS);
 extern Datum pg_stat_get_backend_userid(PG_FUNCTION_ARGS);
 extern Datum pg_stat_get_backend_activity(PG_FUNCTION_ARGS);
+extern Datum pg_stat_get_backend_progress(PG_FUNCTION_ARGS);
 extern Datum pg_stat_get_backend_waiting(PG_FUNCTION_ARGS);
 extern Datum pg_stat_get_backend_activity_start(PG_FUNCTION_ARGS);
 extern Datum pg_stat_get_backend_xact_start(PG_FUNCTION_ARGS);
@@ -419,7 +420,7 @@ pg_stat_get_activity(PG_FUNCTION_ARGS)
 
 		oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
 
-		tupdesc = CreateTemplateTupleDesc(11, false);
+		tupdesc = CreateTemplateTupleDesc(12, false);
 		TupleDescInitEntry(tupdesc, (AttrNumber) 1, "datid", OIDOID, -1, 0);
 		TupleDescInitEntry(tupdesc, (AttrNumber) 2, "procpid", INT4OID, -1, 0);
 		TupleDescInitEntry(tupdesc, (AttrNumber) 3, "usesysid", OIDOID, -1, 0);
@@ -431,6 +432,7 @@ pg_stat_get_activity(PG_FUNCTION_ARGS)
 		TupleDescInitEntry(tupdesc, (AttrNumber) 9, "backend_start", TIMESTAMPTZOID, -1, 0);
 		TupleDescInitEntry(tupdesc, (AttrNumber) 10, "client_addr", INETOID, -1, 0);
 		TupleDescInitEntry(tupdesc, (AttrNumber) 11, "client_port", INT4OID, -1, 0);
+		TupleDescInitEntry(tupdesc, (AttrNumber) 12, "query_progress", FLOAT8OID, -1, 0);
 
 		funcctx->tuple_desc = BlessTupleDesc(tupdesc);
 
@@ -482,8 +484,8 @@ pg_stat_get_activity(PG_FUNCTION_ARGS)
 	if (funcctx->call_cntr < funcctx->max_calls)
 	{
 		/* for each row */
-		Datum		values[11];
-		bool		nulls[11];
+		Datum		values[12];
+		bool		nulls[12];
 		HeapTuple	tuple;
 		PgBackendStatus *beentry;
 		SockAddr	zero_clientaddr;
@@ -611,6 +613,7 @@ pg_stat_get_activity(PG_FUNCTION_ARGS)
 					nulls[10] = true;
 				}
 			}
+			values[11] = Float8GetDatum(beentry->st_progress);
 		}
 		else
 		{
@@ -622,6 +625,7 @@ pg_stat_get_activity(PG_FUNCTION_ARGS)
 			nulls[8] = true;
 			nulls[9] = true;
 			nulls[10] = true;
+			nulls[11] = true;
 		}
 
 		tuple = heap_form_tuple(funcctx->tuple_desc, values, nulls);
@@ -703,6 +707,24 @@ pg_stat_get_backend_activity(PG_FUNCTION_ARGS)
 
 
 Datum
+pg_stat_get_backend_progress(PG_FUNCTION_ARGS)
+{
+	int32		beid = PG_GETARG_INT32(0);
+	PgBackendStatus *beentry;
+	double		result;
+
+	if ((beentry = pgstat_fetch_stat_beentry(beid)) == NULL)
+		PG_RETURN_NULL();
+	else if (!superuser() && beentry->st_userid != GetUserId())
+		PG_RETURN_NULL();
+
+	result = beentry->st_progress;
+
+	PG_RETURN_FLOAT8(result);
+}
+
+
+Datum
 pg_stat_get_backend_waiting(PG_FUNCTION_ARGS)
 {
 	int32		beid = PG_GETARG_INT32(0);
diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h
index 6036493..6cd7b62 100644
--- a/src/include/catalog/pg_proc.h
+++ b/src/include/catalog/pg_proc.h
@@ -3031,7 +3031,7 @@ DATA(insert OID = 2784 (  pg_stat_get_last_autoanalyze_time PGNSP PGUID 12 1 0 0
 DESCR("statistics: last auto analyze time for a table");
 DATA(insert OID = 1936 (  pg_stat_get_backend_idset		PGNSP PGUID 12 1 100 0 f f f t t s 0 0 23 "" _null_ _null_ _null_ _null_ pg_stat_get_backend_idset _null_ _null_ _null_ ));
 DESCR("statistics: currently active backend IDs");
-DATA(insert OID = 2022 (  pg_stat_get_activity			PGNSP PGUID 12 1 100 0 f f f f t s 1 0 2249 "23" "{23,26,23,26,25,25,16,1184,1184,1184,869,23}" "{i,o,o,o,o,o,o,o,o,o,o,o}" "{pid,datid,procpid,usesysid,application_name,current_query,waiting,xact_start,query_start,backend_start,client_addr,client_port}" _null_ pg_stat_get_activity _null_ _null_ _null_ ));
+DATA(insert OID = 2022 (  pg_stat_get_activity			PGNSP PGUID 12 1 100 0 f f f f t s 1 0 2249 "23" "{23,26,23,26,25,25,16,1184,1184,1184,869,23,701}" "{i,o,o,o,o,o,o,o,o,o,o,o,o}" "{pid,datid,procpid,usesysid,application_name,current_query,waiting,xact_start,query_start,backend_start,client_addr,client_port,query_progress}" _null_ pg_stat_get_activity _null_ _null_ _null_ ));
 DESCR("statistics: information about currently active backends");
 DATA(insert OID = 2026 (  pg_backend_pid				PGNSP PGUID 12 1 0 0 f f f t f s 0 0 23 "" _null_ _null_ _null_ _null_ pg_backend_pid _null_ _null_ _null_ ));
 DESCR("statistics: current backend PID");
@@ -3043,6 +3043,8 @@ DATA(insert OID = 1939 (  pg_stat_get_backend_userid	PGNSP PGUID 12 1 0 0 f f f
 DESCR("statistics: user ID of backend");
 DATA(insert OID = 1940 (  pg_stat_get_backend_activity	PGNSP PGUID 12 1 0 0 f f f t f s 1 0 25 "23" _null_ _null_ _null_ _null_ pg_stat_get_backend_activity _null_ _null_ _null_ ));
 DESCR("statistics: current query of backend");
+DATA(insert OID = 2614 (  pg_stat_get_backend_progress	PGNSP PGUID 12 1 0 0 f f f t f s 1 0 701 "23" _null_ _null_ _null_ _null_ pg_stat_get_backend_progress _null_ _null_ _null_ ));
+DESCR("statistics: progress of current query of backend");
 DATA(insert OID = 2853 (  pg_stat_get_backend_waiting	PGNSP PGUID 12 1 0 0 f f f t f s 1 0 16 "23" _null_ _null_ _null_ _null_ pg_stat_get_backend_waiting _null_ _null_ _null_ ));
 DESCR("statistics: is backend currently waiting for a lock");
 DATA(insert OID = 2094 (  pg_stat_get_backend_activity_start PGNSP PGUID 12 1 0 0 f f f t f s 1 0 1184 "23" _null_ _null_ _null_ _null_ pg_stat_get_backend_activity_start _null_ _null_ _null_ ));
diff --git a/src/include/pgstat.h b/src/include/pgstat.h
index 3dd5f45..98ffbdd 100644
--- a/src/include/pgstat.h
+++ b/src/include/pgstat.h
@@ -610,6 +610,9 @@ typedef struct PgBackendStatus
 
 	/* current command string; MUST be null-terminated */
 	char	   *st_activity;
+
+	/* progress indicator */
+	double		st_progress;
 } PgBackendStatus;
 
 /*
@@ -690,6 +693,7 @@ extern void pgstat_report_activity(const char *cmd_str);
 extern void pgstat_report_appname(const char *appname);
 extern void pgstat_report_xact_timestamp(TimestampTz tstamp);
 extern void pgstat_report_waiting(bool waiting);
+extern void pgstat_report_progress(double work_done, double work_total);
 extern const char *pgstat_get_backend_current_activity(int pid, bool checkUser);
 
 extern void pgstat_initstats(Relation rel);
#2Stephen Frost
sfrost@snowman.net
In reply to: Peter Eisentraut (#1)
Re: Progress indication prototype

* Peter Eisentraut (peter_e@gmx.net) wrote:

Other comments?

Will we be able to use it for psql while keeping just one database
connection? I assume the answer is 'no', but it sure would be nice..

Perhaps psql could do something for \copy commands, anyway, but it'd be
independent.

Thanks,

Stephen

#3Erik Rijkers
er@xs4all.nl
In reply to: Peter Eisentraut (#1)
Re: Progress indication prototype

On Tue, August 17, 2010 07:19, Peter Eisentraut wrote:

Here is a small prototype for a query progress indicator.

The patch applies to cvs HEAD (9.1devel) and compiles OK, but make check fails.

./configure --prefix=/var/data1/pg_stuff/pg_installations/pgsql.progress_indicator
--with-pgport=6548 --quiet --enable-depend --enable-cassert --enable-debug --with-openssl
--with-perl --with-libxml

Running initdb manually gives the following error:

$ /var/data1/pg_stuff/pg_installations/pgsql.progress_indicator/bin/initdb -U rijkers -D
/var/data1/pg_stuff/pg_installations/pgsql.progress_indicator/data -E UTF8 -A md5
--pwfile=/var/data1/pg_stuff/.90devel
The files belonging to this database system will be owned by user "rijkers".
This user must also own the server process.

The database cluster will be initialized with locale en_US.UTF-8.
The default text search configuration will be set to "english".

creating directory /var/data1/pg_stuff/pg_installations/pgsql.progress_indicator/data ... ok
creating subdirectories ... ok
selecting default max_connections ... 100
selecting default shared_buffers ... 32MB
creating configuration files ... ok
creating template1 database in
/var/data1/pg_stuff/pg_installations/pgsql.progress_indicator/data/base/1 ... FATAL: could not
create unique index "pg_proc_oid_index"
DETAIL: Key (oid)=(2614) is duplicated.
child process exited with exit code 1
initdb: removing data directory "/var/data1/pg_stuff/pg_installations/pgsql.progress_indicator/data"

this is on centos 5.4 - x86_64 GNU/Linux (2.6.18-164.el5)

Erik Rijkers

#4Peter Eisentraut
peter_e@gmx.net
In reply to: Stephen Frost (#2)
Re: Progress indication prototype

On tis, 2010-08-17 at 08:31 -0400, Stephen Frost wrote:

Will we be able to use it for psql while keeping just one database
connection? I assume the answer is 'no', but it sure would be nice..

How do you expect that to behave? I suppose the backend could send
NOTICE-like messages every 1% or so, and then psql could try to display
that in some way (which?), but then I suspect that a) it will annoy some
people, so b) it will have to be off by default, and then c) it won't be
enabled when you need it.

#5Peter Eisentraut
peter_e@gmx.net
In reply to: Erik Rijkers (#3)
Re: Progress indication prototype

On tis, 2010-08-17 at 15:59 +0200, Erik Rijkers wrote:

creating template1 database in
/var/data1/pg_stuff/pg_installations/pgsql.progress_indicator/data/base/1 ... FATAL: could not
create unique index "pg_proc_oid_index"
DETAIL: Key (oid)=(2614) is duplicated.

Probably merge conflict with parallel developments. Try changing the
OID.

#6Alex Hunsaker
badalex@gmail.com
In reply to: Stephen Frost (#2)
Re: Progress indication prototype

On Tue, Aug 17, 2010 at 06:31, Stephen Frost <sfrost@snowman.net> wrote:

* Peter Eisentraut (peter_e@gmx.net) wrote:

Other comments?

Will we be able to use it for psql while keeping just one database
connection?  I assume the answer is 'no', but it sure would be nice..

I think thats something that could be worked out in libpq after this
patch. Although I'd bump your nice to an awesome.

#7Stephen Frost
sfrost@snowman.net
In reply to: Alex Hunsaker (#6)
Re: Progress indication prototype

* Alex Hunsaker (badalex@gmail.com) wrote:

On Tue, Aug 17, 2010 at 06:31, Stephen Frost <sfrost@snowman.net> wrote:

* Peter Eisentraut (peter_e@gmx.net) wrote:

Other comments?

Will we be able to use it for psql while keeping just one database
connection?  I assume the answer is 'no', but it sure would be nice..

I think thats something that could be worked out in libpq after this
patch. Although I'd bump your nice to an awesome.

If it was configurable via \set and I could drop it in my .psqlrc, and
it knew not to only do it after a few seconds (otherwise it'd be far too
much)...

I don't like how the backend would have to send something NOTICE-like, I
had originally been thinking "gee, it'd be nice if psql could query
pg_stat while doing something else", but that's not really possible...
So, I guess NOTICE-like messages would work, if the backend could be
taught to do it.

Thanks,

Stephen

#8Erik Rijkers
er@xs4all.nl
In reply to: Peter Eisentraut (#5)
Re: Progress indication prototype

On Tue, August 17, 2010 19:13, Peter Eisentraut wrote:

On tis, 2010-08-17 at 15:59 +0200, Erik Rijkers wrote:

creating template1 database in
/var/data1/pg_stuff/pg_installations/pgsql.progress_indicator/data/base/1 ... FATAL: could not
create unique index "pg_proc_oid_index"
DETAIL: Key (oid)=(2614) is duplicated.

Probably merge conflict with parallel developments. Try changing the
OID.

Could you elaborate? What is a 'merge conflict'? Or 'parallel developments'?

Do you mean the current git conversion? (I get source from a local rsync'ed cvs repository)

How can I 'change OID'? This error comes out of an initial initdb run. (There are several other
test-instances on this machine (several running), but with their own $PGDATA, $PGPORT. - they
can't interfere with each other, can they?)

thanks,

Erik Rijkers

#9Bernd Helmle
mailings@oopsware.de
In reply to: Erik Rijkers (#8)
Re: Progress indication prototype

--On 17. August 2010 20:08:51 +0200 Erik Rijkers <er@xs4all.nl> wrote:

How can I 'change OID'? This error comes out of an initial initdb run.
(There are several other test-instances on this machine (several
running), but with their own $PGDATA, $PGPORT. - they can't interfere
with each other, can they?)

I assume Peter means an OID conflict, resulting from concurrent patches or
drifting code.

Looks like pg_stat_get_backend_progress() has a conflict in current HEAD
with xmlexists() (both will get 2614 in my current version of pg_proc.h).
You need to resolve this to have initdb succeed.

--
Thanks

Bernd

#10Greg Stark
stark@mit.edu
In reply to: Peter Eisentraut (#1)
Re: Progress indication prototype

(Sorry for top posting and for any typos -- typing on my phone)

In my earlier patch to do progress indicators for arbitrary SQL queries I
had envisioned a setup similar to how we handle query cancellation. Psql
could support a key like SIGINFO which would make it request an update.
Clients like pgadmin would either do that periodically or set some guc or
protocol option to request periodic updates in advance.

greg

On 17 Aug 2010 19:07, "Stephen Frost" <sfrost@snowman.net> wrote:

* Alex Hunsaker (badalex@gmail.com) wrote:

On Tue, Aug 17, 2010 at 06:31, Stephen Frost <sfrost@sn...

If it was configurable via \set and I could drop it in my .psqlrc, and
it knew not to only do it after a few seconds (otherwise it'd be far too
much)...

I don't like how the backend would have to send something NOTICE-like, I
had originally been thinking "gee, it'd be nice if psql could query
pg_stat while doing something else", but that's not really possible...
So, I guess NOTICE-like messages would work, if the backend could be
taught to do it.

Thanks,

Stephen

-----BEGIN PGP SIGNATURE-----
Version: GnuPG v1.4.9 (GNU/Linux)

iEYEARECAAYFAkxqzFwACgkQrzgMPqB3kijVbACfWkUc/A5+6NaViTf8f9yrN/vT
Y3AAn1eDvj4meqxlr05r0L51j+OypNqs
=f+ya
-----END PGP SIGNATURE-----

#11Dave Page
dpage@pgadmin.org
In reply to: Greg Stark (#10)
Re: Progress indication prototype

On Tue, Aug 17, 2010 at 10:53 PM, Greg Stark <stark@mit.edu> wrote:

(Sorry for top posting and for any typos -- typing on my phone)

In my earlier patch to do progress indicators for arbitrary SQL queries I
had envisioned a setup similar to how we handle query cancellation. Psql
could support a key like SIGINFO which would make it request an update.
Clients like pgadmin would either do that periodically or set some guc or
protocol option to request periodic updates in advance.

Which is ideal for monitoring your own connection - having the info in
the pg_stat_activity is also valuable for monitoring and system
administration. Both would be ideal :-)

--
Dave Page
Blog: http://pgsnake.blogspot.com
Twitter: @pgsnake

EnterpriseDB UK: http://www.enterprisedb.com
The Enterprise Postgres Company

#12Greg Stark
stark@mit.edu
In reply to: Dave Page (#11)
Re: Progress indication prototype

On Tue, Aug 17, 2010 at 11:29 PM, Dave Page <dpage@pgadmin.org> wrote:

Which is ideal for monitoring your own connection - having the info in
the pg_stat_activity is also valuable for monitoring and system
administration. Both would be ideal :-)

Hm, I think I've come around to the idea that having the info in
pg_stat_activity would be very nice. I can just picture sitting in
pgadmin while a bunch of reports are running and seeing progress bars
for all of them...

But progress bars alone aren't really the big prize. I would really
love to see the explain plans for running queries. This would improve
the DBAs view of what's going on in the system immensely. Currently
you have to grab the query and try to set up a similar environment for
it to run explain on it. If analyze has run since or if the tables
have grown or shrank or if the query was run with some constants as
parameters it can be awkward. If some of the tables in the query were
temporary tables it can be impossible. You can never really be sure
you're looking at precisely the same plan than the other user's
session is running.

But stuffing the whole json or xml explain plan into pg_stat_activity
seems like it doesn't really fit the same model that the existing
infrastructure is designed around. It could be quite large and if we
want to support progress feedback it could change quite frequently.

We do stuff the whole query there (up to a limited size) so maybe I'm
all wet and stuffing the explain plan in there would be fine?

--
greg

#13Thom Brown
thom@linux.com
In reply to: Greg Stark (#12)
Re: Progress indication prototype

On 18 August 2010 13:45, Greg Stark <stark@mit.edu> wrote:

On Tue, Aug 17, 2010 at 11:29 PM, Dave Page <dpage@pgadmin.org> wrote:

Which is ideal for monitoring your own connection - having the info in
the pg_stat_activity is also valuable for monitoring and system
administration. Both would be ideal :-)

Hm, I think I've come around to the idea that having the info in
pg_stat_activity would be very nice. I can just picture sitting in
pgadmin while a bunch of reports are running and seeing progress bars
for all of them...

But progress bars alone aren't really the big prize. I would really
love to see the explain plans for running queries.

Do you mean just see the explain plan? Or see at what stage of the
plan the query has reached? I think the latter would be awesome. And
if it's broken down by step, wouldn't it be feasible to knew how far
through that step it's got for some steps? Obviously for ones with a
LIMIT applied it wouldn't know how far through it had got, but for
things like a sequential scan or sort it should be able to indicate
how far through it is.

--
Thom Brown
Registered Linux user: #516935

#14Robert Haas
robertmhaas@gmail.com
In reply to: Greg Stark (#12)
Re: Progress indication prototype

On Wed, Aug 18, 2010 at 8:45 AM, Greg Stark <stark@mit.edu> wrote:

On Tue, Aug 17, 2010 at 11:29 PM, Dave Page <dpage@pgadmin.org> wrote:

Which is ideal for monitoring your own connection - having the info in
the pg_stat_activity is also valuable for monitoring and system
administration. Both would be ideal :-)

Hm, I think I've come around to the idea that having the info in
pg_stat_activity would be very nice. I can just picture sitting in
pgadmin while a bunch of reports are running and seeing progress bars
for all of them...

But progress bars alone aren't really the big prize. I would really
love to see the explain plans for running queries. This would improve
the DBAs view of what's going on in the system immensely. Currently
you have to grab the query and try to set up a similar environment for
it to run explain on it. If analyze has run since or if the tables
have grown or shrank or if the query was run with some constants as
parameters it can be awkward. If some of the tables in the query were
temporary tables it can be impossible. You can never really be sure
you're looking at precisely the same plan than the other user's
session is running.

But stuffing the whole json or xml explain plan into pg_stat_activity
seems like it doesn't really fit the same model that the existing
infrastructure is designed around. It could be quite large and if we
want to support progress feedback it could change quite frequently.

We do stuff the whole query there (up to a limited size) so maybe I'm
all wet and stuffing the explain plan in there would be fine?

It seems to me that progress reporting could add quite a bit of
overhead. For example, in the whole-database vacuum case, the most
logical way to report progress would be to compute pages visited
divided by pages to be visited. But the total number of pages to be
visited is something that doesn't need to be computed in advance
unless someone cares about progress. I don't think we want to incur
that overhead in all cases just on the off chance someone might ask.
We need to think about ways to structure this so that it only costs
when someone's using it.

--
Robert Haas
EnterpriseDB: http://www.enterprisedb.com
The Enterprise Postgres Company

#15Peter Eisentraut
peter_e@gmx.net
In reply to: Stephen Frost (#7)
Re: Progress indication prototype

On tis, 2010-08-17 at 13:52 -0400, Stephen Frost wrote:

I don't like how the backend would have to send something NOTICE-like,
I had originally been thinking "gee, it'd be nice if psql could query
pg_stat while doing something else", but that's not really possible...
So, I guess NOTICE-like messages would work, if the backend could be
taught to do it.

That should be doable; you'd just have to do some ereport(NOTICE)
variant inside pgstat_report_progress and have a switch to turn it on
and off, and have psql do something with it. The latter is really the
interesting part; the former is relatively easy once the general
framework is in place.

#16Peter Eisentraut
peter_e@gmx.net
In reply to: Greg Stark (#12)
Re: Progress indication prototype

On ons, 2010-08-18 at 13:45 +0100, Greg Stark wrote:

But progress bars alone aren't really the big prize. I would really
love to see the explain plans for running queries.

The auto_explain module does that already.

#17A.M.
agentm@themactionfaction.com
In reply to: Robert Haas (#14)
Re: Progress indication prototype

On Aug 18, 2010, at 9:02 AM, Robert Haas wrote:

On Wed, Aug 18, 2010 at 8:45 AM, Greg Stark <stark@mit.edu> wrote:

On Tue, Aug 17, 2010 at 11:29 PM, Dave Page <dpage@pgadmin.org> wrote:

Which is ideal for monitoring your own connection - having the info in
the pg_stat_activity is also valuable for monitoring and system
administration. Both would be ideal :-)

Hm, I think I've come around to the idea that having the info in
pg_stat_activity would be very nice. I can just picture sitting in
pgadmin while a bunch of reports are running and seeing progress bars
for all of them...

But progress bars alone aren't really the big prize. I would really
love to see the explain plans for running queries. This would improve
the DBAs view of what's going on in the system immensely. Currently
you have to grab the query and try to set up a similar environment for
it to run explain on it. If analyze has run since or if the tables
have grown or shrank or if the query was run with some constants as
parameters it can be awkward. If some of the tables in the query were
temporary tables it can be impossible. You can never really be sure
you're looking at precisely the same plan than the other user's
session is running.

But stuffing the whole json or xml explain plan into pg_stat_activity
seems like it doesn't really fit the same model that the existing
infrastructure is designed around. It could be quite large and if we
want to support progress feedback it could change quite frequently.

We do stuff the whole query there (up to a limited size) so maybe I'm
all wet and stuffing the explain plan in there would be fine?

It seems to me that progress reporting could add quite a bit of
overhead. For example, in the whole-database vacuum case, the most
logical way to report progress would be to compute pages visited
divided by pages to be visited. But the total number of pages to be
visited is something that doesn't need to be computed in advance
unless someone cares about progress. I don't think we want to incur
that overhead in all cases just on the off chance someone might ask.
We need to think about ways to structure this so that it only costs
when someone's using it.

I wish that I could get explain analyze output step-by-step while running a long query instead of seeing it jump out at the end of execution. Some queries "never" end and it would be nice to see which step is spinning (explain can be a red herring). To me the "progress bar" is nice, but I don't see how it would be reliable enough to draw any inferences (such as execution time). If I could get the explain analyze results *and* the actual query results, that would be a huge win, too.

Cheers,
M

#18Itagaki Takahiro
itagaki.takahiro@gmail.com
In reply to: Peter Eisentraut (#1)
Re: Progress indication prototype

On Tue, Aug 17, 2010 at 2:19 PM, Peter Eisentraut <peter_e@gmx.net> wrote:

Here is a small prototype for a query progress indicator.

I read and tested the patch. Here are comments to the code itself.

- Oid of pg_stat_get_backend_progress() must be changed because we are using
the id for another function.
- One complier warning:
copy.c:1702: warning: ‘file_size’ may be used uninitialized in this function
- We can move the division "work_done/work_total" to outside of
st_changecount++ block.

Past discussions seemed to indicate that the best place to report this
would be in pg_stat_activity.

Agreed. BTW, "query_progress" column shows NaN if progress
counter is unavailable, but NULL would be better.

VACUUM (lazy) (also autovacuum), table-rewriting ALTER TABLE

We could also support VACUUM FULL, CLUSTER, CREATE INDEX and REINDEX.

COPY out from table, COPY in from file,

COPY FROM STDIN shows Infinity, but NULL might be better, too.

a very simple query.

SELECT * FROM tbl;
can report reasonable progress, but
SELECT count(*) FROM tbl;
cannot, because planned_tuple_count of the aggregation is 1.
I hope better solutions for the grouping case because they are used
in complex queries, where the progress counter is eagerly wanted.

- Are the interfaces OK?

I like the new column in pg_stat_activity to "pull" the progress.
In addition, as previously discussed, we could also have "push"
notifications; Ex. GUC parameter "notice_per_progress" (0.0-1.0),
or periodical NOTIFY messages.

- Is this going to be too slow to be useful?
- Should there be a separate switch to turn it on (currently
track_activities)?

I think we can always track the counters because shared memory
based counters are lightweight enough.

- How to handle commands that process multiple tables?  For example,
lazy VACUUM on a single table is pretty easy to track (count the block
numbers), but what about a database-wide lazy VACUUM?

Not only a database-wide lazy VACUUM but also some of maintenance
commands have non-linear progress -- Progress of index scans in VACUUM
is not linear. ALTER TABLE could have REINDEX after table rewrites.

We might need to have arbitrary knowledges for the non-uniform commands;
For example, "CREATE INDEX assigns 75% of the progress for table scan,
and 25% for the final merging of tapes".

--
Itagaki Takahiro

#19Peter Eisentraut
peter_e@gmx.net
In reply to: Itagaki Takahiro (#18)
Re: Progress indication prototype

On tor, 2010-09-16 at 15:47 +0900, Itagaki Takahiro wrote:

On Tue, Aug 17, 2010 at 2:19 PM, Peter Eisentraut <peter_e@gmx.net> wrote:

VACUUM (lazy) (also autovacuum), table-rewriting ALTER TABLE

We could also support VACUUM FULL, CLUSTER, CREATE INDEX and REINDEX.

Well, yeah, but those are a lot harder to do. ;-)

a very simple query.

SELECT * FROM tbl;
can report reasonable progress, but
SELECT count(*) FROM tbl;
cannot, because planned_tuple_count of the aggregation is 1.
I hope better solutions for the grouping case because they are used
in complex queries, where the progress counter is eagerly wanted.

I think that's a problem for a later day. Once we have the interfaces
to report the progress, someone (else) can investigate how to track
progress of arbitrary queries.

- Are the interfaces OK?

I like the new column in pg_stat_activity to "pull" the progress.
In addition, as previously discussed, we could also have "push"
notifications; Ex. GUC parameter "notice_per_progress" (0.0-1.0),
or periodical NOTIFY messages.

That's a three-line change in pgstat_report_progress() in the simplest
case. Maybe also something to consider later.

- How to handle commands that process multiple tables? For example,
lazy VACUUM on a single table is pretty easy to track (count the block
numbers), but what about a database-wide lazy VACUUM?

Not only a database-wide lazy VACUUM but also some of maintenance
commands have non-linear progress -- Progress of index scans in VACUUM
is not linear. ALTER TABLE could have REINDEX after table rewrites.

We might need to have arbitrary knowledges for the non-uniform commands;
For example, "CREATE INDEX assigns 75% of the progress for table scan,
and 25% for the final merging of tapes".

Maybe another approach is to forget about presenting progress
numerically. Instead, make it a string that saying something like, for
example for database-wide VACUUM, 'table 1/14 block 5/32'. That way you
can cover anything you want, and you give the user the most accurate
information available, but then you can't do things like sort
pg_stat_activitiy by expected end time, or display a progress bar. Or
of course we could do numerically and string, but that might be a bit
too much clutter.

#20Robert Haas
robertmhaas@gmail.com
In reply to: Peter Eisentraut (#19)
Re: Progress indication prototype

On Thu, Sep 16, 2010 at 2:52 PM, Peter Eisentraut <peter_e@gmx.net> wrote:

a very simple query.

  SELECT * FROM tbl;
can report reasonable progress, but
  SELECT count(*) FROM tbl;
cannot, because planned_tuple_count of the aggregation is 1.
I hope better solutions for the grouping case because they are used
in complex queries, where the progress counter is eagerly wanted.

I think that's a problem for a later day.  Once we have the interfaces
to report the progress, someone (else) can investigate how to track
progress of arbitrary queries.

I reiterate my earlier criticism of this whole approach: it seems to
assume that computing query progress is something inexpensive enough
that we can afford to do it regardless of whether anyone is looking.

--
Robert Haas
EnterpriseDB: http://www.enterprisedb.com
The Enterprise Postgres Company

#21Peter Eisentraut
peter_e@gmx.net
In reply to: Robert Haas (#20)
Re: Progress indication prototype

On tor, 2010-09-16 at 15:56 -0400, Robert Haas wrote:

I reiterate my earlier criticism of this whole approach: it seems to
assume that computing query progress is something inexpensive enough
that we can afford to do it regardless of whether anyone is looking.

That assumption appears to hold so far.

Anyway, do you have an alternative suggestion?

#22Robert Haas
robertmhaas@gmail.com
In reply to: Peter Eisentraut (#21)
Re: Progress indication prototype

On Thu, Sep 16, 2010 at 4:57 PM, Peter Eisentraut <peter_e@gmx.net> wrote:

On tor, 2010-09-16 at 15:56 -0400, Robert Haas wrote:

I reiterate my earlier criticism of this whole approach: it seems to
assume that computing query progress is something inexpensive enough
that we can afford to do it regardless of whether anyone is looking.

That assumption appears to hold so far.

It seems unlikely to hold in the general case, though, particularly if
you want to do it to be accurate. The problems with database-wide
VACUUM seem likely to be the tip of the iceberg.

Anyway, do you have an alternative suggestion?

I think that there should be a function which returns just this one
piece of data and is not automatically called as part of select * from
pg_stat_activity. Then we could eventually decide to give backends a
way to know if that function had been invoked on them and how
recently.

--
Robert Haas
EnterpriseDB: http://www.enterprisedb.com
The Enterprise Postgres Company

#23Peter Eisentraut
peter_e@gmx.net
In reply to: Robert Haas (#22)
Re: Progress indication prototype

On tor, 2010-09-16 at 19:14 -0400, Robert Haas wrote:

I think that there should be a function which returns just this one
piece of data and is not automatically called as part of select * from
pg_stat_activity. Then we could eventually decide to give backends a
way to know if that function had been invoked on them and how
recently.

Displaying this as part of pg_stat_activity is completely trivial: it's
just displaying the value of a float variable.

It seems you are advocating a completely different architecture, where
someone can find out on demand what the progress or status of another
session is, without that other session having known about that request
before it started its current command. But that seems pretty outlandish
to me, and I would ask for more details on what you have in mind.

#24Robert Haas
robertmhaas@gmail.com
In reply to: Peter Eisentraut (#23)
Re: Progress indication prototype

On Fri, Sep 17, 2010 at 4:50 PM, Peter Eisentraut <peter_e@gmx.net> wrote:

On tor, 2010-09-16 at 19:14 -0400, Robert Haas wrote:

I think that there should be a function which returns just this one
piece of data and is not automatically called as part of select * from
pg_stat_activity.  Then we could eventually decide to give backends a
way to know if that function had been invoked on them and how
recently.

Displaying this as part of pg_stat_activity is completely trivial: it's
just displaying the value of a float variable.

It seems you are advocating a completely different architecture, where
someone can find out on demand what the progress or status of another
session is, without that other session having known about that request
before it started its current command.  But that seems pretty outlandish
to me, and I would ask for more details on what you have in mind.

What you just said is about what I had in mind. I admit I can't
articulate a more detailed design right off the top of my head, but
the architecture you're proposing seems dead certain to never cover
more than 0.1% of what people actually do. If there's not even an
obvious way of generalizing this to the case of a full-database
VACUUM, let alone actual queries, that seems like a strong hint that
it might be badly designed. Leaving some parts of the problem for
future development is perfectly reasonable, but there should be some
realistic hope that the next guy will be able to make some further
progress.

It seems to me that this is the sort of information that people will
normally never see, and therefore won't be willing to pay a
performance penalty for. But when they need it (because something is
running long) they'll be happy to pay a modest penalty to get it.
Which is good, because the chances that we'll be able to provide this
information "for free" seem very poor even for utility commands. But
it also means that we shouldn't carve the "can get this for free"
aspect of it into stone.

--
Robert Haas
EnterpriseDB: http://www.enterprisedb.com
The Enterprise Postgres Company

#25Dimitri Fontaine
dfontaine@hi-media.com
In reply to: Robert Haas (#24)
Re: Progress indication prototype

Robert Haas <robertmhaas@gmail.com> writes:

What you just said is about what I had in mind. I admit I can't
articulate a more detailed design right off the top of my head, but
the architecture you're proposing seems dead certain to never cover
more than 0.1% of what people actually do.

Well, considering what we have now, the proposal is solid enough for
me. As far as supporting VACUUM or REINDEX operations, my feeling is
that offering a way to report current block being worked on and being
able to see that move is enough a progress indication.

We know for sure we won't be able to provide a reliable progress bar,
and I don't think adopting the famous windows time units (the longest
remaining second known) would do any good for the project.

Regards,
--
dim

#26Tom Lane
tgl@sss.pgh.pa.us
In reply to: Dimitri Fontaine (#25)
Re: Progress indication prototype

Dimitri Fontaine <dfontaine@hi-media.com> writes:

Robert Haas <robertmhaas@gmail.com> writes:

What you just said is about what I had in mind. I admit I can't
articulate a more detailed design right off the top of my head, but
the architecture you're proposing seems dead certain to never cover
more than 0.1% of what people actually do.

Well, considering what we have now, the proposal is solid enough for
me. As far as supporting VACUUM or REINDEX operations, my feeling is
that offering a way to report current block being worked on and being
able to see that move is enough a progress indication.

I don't really think that that would satisfy anybody. If you want to be
reassured that VACUUM is doing something, you can look at iostat
numbers, or strace the process, or something like that. What people
expect from a progress indicator is to get some idea of how much longer
the operation is likely to take, and current block doesn't do it
(mainly because it doesn't account for index cleanup scans). REINDEX
is even worse: how do you estimate progress when there's a table scan,
then a sort, then the actual index build?

I'm with Robert on this. A facility that's limited to information we
can get "for free" (and btw, it isn't even for free, only for relatively
cheap) isn't going to get the job done. We should be looking for
something that expends extra cycles when the information is demanded,
and otherwise not.

regards, tom lane

#27Mark Kirkwood
mark.kirkwood@catalyst.net.nz
In reply to: Peter Eisentraut (#1)
Re: Progress indication prototype

I had a small play with this. Pretty cool to be able to track progress
for COPY and VACUUM jobs. For some reason I could never elicit any
numbers (other than the default Nan) for a query (tried 'SELECT count(*)
FROM pgbench_accounts' but figured aggregates probably don't qualify as
simple, and also 'SELECT * FROM pgbench_accounts').

I'm thinking that complex queries is precisely where people would want
to see this sort of indicator - but maybe have a read of:

http://research.microsoft.com/pubs/76171/progress.pdf

This paper seems to suggest that there are real classes of query where a
useful progress indicator is going to be extremely hard to construct.
However it may still be a useful feature to have for all those other
queries!

Also I'm inclined to agree with Robert and think that a more accurate,
more performance obtrusive but settable on demand implementation is the
way to go.

Cheers

Mark

Show quoted text

On 17/08/10 17:19, Peter Eisentraut wrote:

Here is a small prototype for a query progress indicator.

Past discussions seemed to indicate that the best place to report this
would be in pg_stat_activity. So that's what this does. You can try it
out with any of the following (on a sufficiently large table): VACUUM
(lazy) (also autovacuum), COPY out from table, COPY in from file,
table-rewriting ALTER TABLE (e.g., add column with default), or a very
simple query. Run the command, and stare at pg_stat_activity (perhaps
via "watch") in a separate session.

More can be added, and the exact placement of the counters is debatable,
but those might be details to be worked out later. Note, my emphasis
here is on maintenance commands; I don't plan to do a progress
estimation of complex queries at this time.

Some thoughts:

- Are the interfaces OK?

- Is this going to be too slow to be useful?

- Should there be a separate switch to turn it on (currently
track_activities)?

- How to handle commands that process multiple tables? For example,
lazy VACUUM on a single table is pretty easy to track (count the block
numbers), but what about a database-wide lazy VACUUM?