[patch] executor and slru dtrace probes

Started by Zdenek Kotalaabout 16 years ago15 messages
#1Zdenek Kotala
Zdenek.Kotala@Sun.COM
1 attachment(s)

I attached patch which was already sent on february/april, but it was
lost in time. It is originally from Robert Lor and Theo Schlossnagle.

It contains two DTrace probe groups. One is related to monitoring SLRU
and second is about executor nodes.

I merged it with the head.

Original end of mail thread is here:

http://archives.postgresql.org/pgsql-hackers/2009-04/msg00148.php

Zdenek

Attachments:

slru_executor_dtrace.patchtext/x-patch; CHARSET=US-ASCII; name=slru_executor_dtrace.patchDownload
diff -r 68b8827f4738 src/backend/access/transam/slru.c
--- a/src/backend/access/transam/slru.c	Fri Nov 13 11:17:04 2009 +0000
+++ b/src/backend/access/transam/slru.c	Fri Nov 13 23:29:14 2009 +0100
@@ -57,6 +57,7 @@
 #include "storage/fd.h"
 #include "storage/shmem.h"
 #include "miscadmin.h"
+#include "pg_trace.h"
 
 
 /*
@@ -372,6 +373,7 @@
 {
 	SlruShared	shared = ctl->shared;
 
+	TRACE_POSTGRESQL_SLRU_READPAGE_START((uintptr_t)ctl, pageno, write_ok, xid);
 	/* Outer loop handles restart if we must wait for someone else's I/O */
 	for (;;)
 	{
@@ -399,6 +401,7 @@
 			}
 			/* Otherwise, it's ready to use */
 			SlruRecentlyUsed(shared, slotno);
+			TRACE_POSTGRESQL_SLRU_READPAGE_DONE(slotno);
 			return slotno;
 		}
 
@@ -446,6 +449,7 @@
 			SlruReportIOError(ctl, pageno, xid);
 
 		SlruRecentlyUsed(shared, slotno);
+		TRACE_POSTGRESQL_SLRU_READPAGE_DONE(slotno);
 		return slotno;
 	}
 }
@@ -470,6 +474,8 @@
 	SlruShared	shared = ctl->shared;
 	int			slotno;
 
+	TRACE_POSTGRESQL_SLRU_READPAGE_READONLY((uintptr_t)ctl, pageno, xid);
+
 	/* Try to find the page while holding only shared lock */
 	LWLockAcquire(shared->ControlLock, LW_SHARED);
 
@@ -511,6 +517,8 @@
 	int			pageno = shared->page_number[slotno];
 	bool		ok;
 
+	TRACE_POSTGRESQL_SLRU_WRITEPAGE_START((uintptr_t)ctl, pageno, slotno);
+
 	/* If a write is in progress, wait for it to finish */
 	while (shared->page_status[slotno] == SLRU_PAGE_WRITE_IN_PROGRESS &&
 		   shared->page_number[slotno] == pageno)
@@ -525,7 +533,10 @@
 	if (!shared->page_dirty[slotno] ||
 		shared->page_status[slotno] != SLRU_PAGE_VALID ||
 		shared->page_number[slotno] != pageno)
+	{
+		TRACE_POSTGRESQL_SLRU_WRITEPAGE_DONE();
 		return;
+	}
 
 	/*
 	 * Mark the slot write-busy, and clear the dirtybit.  After this point, a
@@ -569,6 +580,8 @@
 	/* Now it's okay to ereport if we failed */
 	if (!ok)
 		SlruReportIOError(ctl, pageno, InvalidTransactionId);
+
+	TRACE_POSTGRESQL_SLRU_WRITEPAGE_DONE();
 }
 
 /*
@@ -593,6 +606,8 @@
 
 	SlruFileName(ctl, path, segno);
 
+	TRACE_POSTGRESQL_SLRU_READPAGE_PHYSICAL_START((uintptr_t)ctl, path, pageno, slotno);
+
 	/*
 	 * In a crash-and-restart situation, it's possible for us to receive
 	 * commands to set the commit status of transactions whose bits are in
@@ -607,6 +622,7 @@
 		{
 			slru_errcause = SLRU_OPEN_FAILED;
 			slru_errno = errno;
+			TRACE_POSTGRESQL_SLRU_READPAGE_PHYSICAL_DONE(false, slru_errcause, slru_errno);
 			return false;
 		}
 
@@ -614,6 +630,7 @@
 				(errmsg("file \"%s\" doesn't exist, reading as zeroes",
 						path)));
 		MemSet(shared->page_buffer[slotno], 0, BLCKSZ);
+		TRACE_POSTGRESQL_SLRU_READPAGE_PHYSICAL_DONE(true, -1, -1);
 		return true;
 	}
 
@@ -622,6 +639,7 @@
 		slru_errcause = SLRU_SEEK_FAILED;
 		slru_errno = errno;
 		close(fd);
+		TRACE_POSTGRESQL_SLRU_READPAGE_PHYSICAL_DONE(false, slru_errcause, slru_errno);
 		return false;
 	}
 
@@ -631,6 +649,7 @@
 		slru_errcause = SLRU_READ_FAILED;
 		slru_errno = errno;
 		close(fd);
+		TRACE_POSTGRESQL_SLRU_READPAGE_PHYSICAL_DONE(false, slru_errcause, slru_errno);
 		return false;
 	}
 
@@ -638,9 +657,12 @@
 	{
 		slru_errcause = SLRU_CLOSE_FAILED;
 		slru_errno = errno;
+		TRACE_POSTGRESQL_SLRU_READPAGE_PHYSICAL_DONE(false, slru_errcause, slru_errno);
 		return false;
 	}
 
+	TRACE_POSTGRESQL_SLRU_READPAGE_PHYSICAL_DONE(true, -1, -1);
+
 	return true;
 }
 
@@ -668,6 +690,8 @@
 	char		path[MAXPGPATH];
 	int			fd = -1;
 
+	TRACE_POSTGRESQL_SLRU_WRITEPAGE_PHYSICAL_START((uintptr_t)ctl, pageno, slotno);
+
 	/*
 	 * Honor the write-WAL-before-data rule, if appropriate, so that we do not
 	 * write out data before associated WAL records.  This is the same action
@@ -753,6 +777,7 @@
 		{
 			slru_errcause = SLRU_OPEN_FAILED;
 			slru_errno = errno;
+			TRACE_POSTGRESQL_SLRU_WRITEPAGE_PHYSICAL_DONE(false, slru_errcause, slru_errno);
 			return false;
 		}
 
@@ -781,6 +806,7 @@
 		slru_errno = errno;
 		if (!fdata)
 			close(fd);
+		TRACE_POSTGRESQL_SLRU_WRITEPAGE_PHYSICAL_DONE(false, slru_errcause, slru_errno);
 		return false;
 	}
 
@@ -794,6 +820,7 @@
 		slru_errno = errno;
 		if (!fdata)
 			close(fd);
+		TRACE_POSTGRESQL_SLRU_WRITEPAGE_PHYSICAL_DONE(false, slru_errcause, slru_errno);
 		return false;
 	}
 
@@ -808,6 +835,7 @@
 			slru_errcause = SLRU_FSYNC_FAILED;
 			slru_errno = errno;
 			close(fd);
+			TRACE_POSTGRESQL_SLRU_WRITEPAGE_PHYSICAL_DONE(false, slru_errcause, slru_errno);
 			return false;
 		}
 
@@ -815,10 +843,12 @@
 		{
 			slru_errcause = SLRU_CLOSE_FAILED;
 			slru_errno = errno;
+			TRACE_POSTGRESQL_SLRU_WRITEPAGE_PHYSICAL_DONE(false, slru_errcause, slru_errno);
 			return false;
 		}
 	}
 
+	TRACE_POSTGRESQL_SLRU_WRITEPAGE_PHYSICAL_DONE(true, -1, -1);
 	return true;
 }
 
diff -r 68b8827f4738 src/backend/executor/execScan.c
--- a/src/backend/executor/execScan.c	Fri Nov 13 11:17:04 2009 +0000
+++ b/src/backend/executor/execScan.c	Fri Nov 13 23:29:14 2009 +0100
@@ -20,6 +20,7 @@
 
 #include "executor/executor.h"
 #include "miscadmin.h"
+#include "pg_trace.h"
 #include "utils/memutils.h"
 
 
@@ -121,6 +122,8 @@
 	qual = node->ps.qual;
 	projInfo = node->ps.ps_ProjInfo;
 
+	TRACE_POSTGRESQL_EXECUTOR_SCAN((uintptr_t)node, ((Scan *)node->ps.plan)->scanrelid, (uintptr_t)accessMtd);
+
 	/*
 	 * If we have neither a qual to check nor a projection to do, just skip
 	 * all the overhead and return the raw scan tuple.
diff -r 68b8827f4738 src/backend/executor/nodeAgg.c
--- a/src/backend/executor/nodeAgg.c	Fri Nov 13 11:17:04 2009 +0000
+++ b/src/backend/executor/nodeAgg.c	Fri Nov 13 23:29:14 2009 +0100
@@ -81,6 +81,7 @@
 #include "parser/parse_agg.h"
 #include "parser/parse_coerce.h"
 #include "parser/parse_oper.h"
+#include "pg_trace.h"
 #include "utils/acl.h"
 #include "utils/builtins.h"
 #include "utils/lsyscache.h"
@@ -818,6 +819,8 @@
 		node->ss.ps.ps_TupFromTlist = false;
 	}
 
+	TRACE_POSTGRESQL_EXECUTOR_AGG((uintptr_t)node, ((Agg *) node->ss.ps.plan)->aggstrategy);
+
 	/*
 	 * Exit if nothing left to do.  (We must do the ps_TupFromTlist check
 	 * first, because in some cases agg_done gets set before we emit the
diff -r 68b8827f4738 src/backend/executor/nodeGroup.c
--- a/src/backend/executor/nodeGroup.c	Fri Nov 13 11:17:04 2009 +0000
+++ b/src/backend/executor/nodeGroup.c	Fri Nov 13 23:29:14 2009 +0100
@@ -24,6 +24,7 @@
 
 #include "executor/executor.h"
 #include "executor/nodeGroup.h"
+#include "pg_trace.h"
 
 
 /*
@@ -49,6 +50,8 @@
 	numCols = ((Group *) node->ss.ps.plan)->numCols;
 	grpColIdx = ((Group *) node->ss.ps.plan)->grpColIdx;
 
+	TRACE_POSTGRESQL_EXECUTOR_GROUP((uintptr_t)node, numCols);
+
 	/*
 	 * Check to see if we're still projecting out tuples from a previous group
 	 * tuple (because there is a function-returning-set in the projection
diff -r 68b8827f4738 src/backend/executor/nodeHash.c
--- a/src/backend/executor/nodeHash.c	Fri Nov 13 11:17:04 2009 +0000
+++ b/src/backend/executor/nodeHash.c	Fri Nov 13 23:29:14 2009 +0100
@@ -33,6 +33,7 @@
 #include "executor/nodeHashjoin.h"
 #include "miscadmin.h"
 #include "parser/parse_expr.h"
+#include "pg_trace.h"
 #include "utils/dynahash.h"
 #include "utils/memutils.h"
 #include "utils/lsyscache.h"
@@ -79,6 +80,8 @@
 	ExprContext *econtext;
 	uint32		hashvalue;
 
+	TRACE_POSTGRESQL_EXECUTOR_HASH_MULTI((uintptr_t)node);
+
 	/* must provide our own instrumentation support */
 	if (node->ps.instrument)
 		InstrStartNode(node->ps.instrument);
diff -r 68b8827f4738 src/backend/executor/nodeHashjoin.c
--- a/src/backend/executor/nodeHashjoin.c	Fri Nov 13 11:17:04 2009 +0000
+++ b/src/backend/executor/nodeHashjoin.c	Fri Nov 13 23:29:14 2009 +0100
@@ -19,6 +19,7 @@
 #include "executor/hashjoin.h"
 #include "executor/nodeHash.h"
 #include "executor/nodeHashjoin.h"
+#include "pg_trace.h"
 #include "utils/memutils.h"
 
 
@@ -61,6 +62,8 @@
 	uint32		hashvalue;
 	int			batchno;
 
+	TRACE_POSTGRESQL_EXECUTOR_HASHJOIN((uintptr_t)node);
+
 	/*
 	 * get information from HashJoin node
 	 */
diff -r 68b8827f4738 src/backend/executor/nodeLimit.c
--- a/src/backend/executor/nodeLimit.c	Fri Nov 13 11:17:04 2009 +0000
+++ b/src/backend/executor/nodeLimit.c	Fri Nov 13 23:29:14 2009 +0100
@@ -23,6 +23,7 @@
 
 #include "executor/executor.h"
 #include "executor/nodeLimit.h"
+#include "pg_trace.h"
 
 static void recompute_limits(LimitState *node);
 
@@ -41,6 +42,8 @@
 	TupleTableSlot *slot;
 	PlanState  *outerPlan;
 
+	TRACE_POSTGRESQL_EXECUTOR_LIMIT((uintptr_t)node);
+
 	/*
 	 * get information from the node
 	 */
diff -r 68b8827f4738 src/backend/executor/nodeMaterial.c
--- a/src/backend/executor/nodeMaterial.c	Fri Nov 13 11:17:04 2009 +0000
+++ b/src/backend/executor/nodeMaterial.c	Fri Nov 13 23:29:14 2009 +0100
@@ -24,6 +24,7 @@
 #include "executor/executor.h"
 #include "executor/nodeMaterial.h"
 #include "miscadmin.h"
+#include "pg_trace.h"
 
 /* ----------------------------------------------------------------
  *		ExecMaterial
@@ -45,6 +46,8 @@
 	bool		eof_tuplestore;
 	TupleTableSlot *slot;
 
+	TRACE_POSTGRESQL_EXECUTOR_MATERIAL((uintptr_t)node);
+
 	/*
 	 * get state info from node
 	 */
diff -r 68b8827f4738 src/backend/executor/nodeMergejoin.c
--- a/src/backend/executor/nodeMergejoin.c	Fri Nov 13 11:17:04 2009 +0000
+++ b/src/backend/executor/nodeMergejoin.c	Fri Nov 13 23:29:14 2009 +0100
@@ -98,6 +98,7 @@
 #include "executor/execdefs.h"
 #include "executor/nodeMergejoin.h"
 #include "miscadmin.h"
+#include "pg_trace.h"
 #include "utils/acl.h"
 #include "utils/lsyscache.h"
 #include "utils/memutils.h"
@@ -565,6 +566,8 @@
 	bool		doFillOuter;
 	bool		doFillInner;
 
+	TRACE_POSTGRESQL_EXECUTOR_MERGEJOIN((uintptr_t)node);
+
 	/*
 	 * get information from node
 	 */
diff -r 68b8827f4738 src/backend/executor/nodeNestloop.c
--- a/src/backend/executor/nodeNestloop.c	Fri Nov 13 11:17:04 2009 +0000
+++ b/src/backend/executor/nodeNestloop.c	Fri Nov 13 23:29:14 2009 +0100
@@ -23,6 +23,7 @@
 
 #include "executor/execdebug.h"
 #include "executor/nodeNestloop.h"
+#include "pg_trace.h"
 #include "utils/memutils.h"
 
 
@@ -67,6 +68,8 @@
 	List	   *otherqual;
 	ExprContext *econtext;
 
+	TRACE_POSTGRESQL_EXECUTOR_NESTLOOP((uintptr_t)node);
+
 	/*
 	 * get information from the node
 	 */
diff -r 68b8827f4738 src/backend/executor/nodeSetOp.c
--- a/src/backend/executor/nodeSetOp.c	Fri Nov 13 11:17:04 2009 +0000
+++ b/src/backend/executor/nodeSetOp.c	Fri Nov 13 23:29:14 2009 +0100
@@ -46,6 +46,7 @@
 
 #include "executor/executor.h"
 #include "executor/nodeSetOp.h"
+#include "pg_trace.h"
 #include "utils/memutils.h"
 
 
@@ -196,6 +197,8 @@
 	SetOp	   *plannode = (SetOp *) node->ps.plan;
 	TupleTableSlot *resultTupleSlot = node->ps.ps_ResultTupleSlot;
 
+	TRACE_POSTGRESQL_EXECUTOR_SETOP((uintptr_t)node);
+
 	/*
 	 * If the previously-returned tuple needs to be returned more than once,
 	 * keep returning it.
diff -r 68b8827f4738 src/backend/executor/nodeSort.c
--- a/src/backend/executor/nodeSort.c	Fri Nov 13 11:17:04 2009 +0000
+++ b/src/backend/executor/nodeSort.c	Fri Nov 13 23:29:14 2009 +0100
@@ -18,6 +18,7 @@
 #include "executor/execdebug.h"
 #include "executor/nodeSort.h"
 #include "miscadmin.h"
+#include "pg_trace.h"
 #include "utils/tuplesort.h"
 
 
@@ -53,6 +54,8 @@
 	dir = estate->es_direction;
 	tuplesortstate = (Tuplesortstate *) node->tuplesortstate;
 
+	TRACE_POSTGRESQL_EXECUTOR_SORT((uintptr_t)node, dir);
+
 	/*
 	 * If first time through, read all tuples from outer plan and pass them to
 	 * tuplesort.c. Subsequent calls just fetch tuples from tuplesort.
diff -r 68b8827f4738 src/backend/executor/nodeSubplan.c
--- a/src/backend/executor/nodeSubplan.c	Fri Nov 13 11:17:04 2009 +0000
+++ b/src/backend/executor/nodeSubplan.c	Fri Nov 13 23:29:14 2009 +0100
@@ -24,6 +24,7 @@
 #include "executor/nodeSubplan.h"
 #include "nodes/makefuncs.h"
 #include "optimizer/clauses.h"
+#include "pg_trace.h"
 #include "utils/array.h"
 #include "utils/lsyscache.h"
 #include "utils/memutils.h"
@@ -92,6 +93,8 @@
 	ExprContext *innerecontext = node->innerecontext;
 	TupleTableSlot *slot;
 
+	TRACE_POSTGRESQL_EXECUTOR_SUBPLAN_HASH((uintptr_t)node);
+
 	/* Shouldn't have any direct correlation Vars */
 	if (subplan->parParam != NIL || node->args != NIL)
 		elog(ERROR, "hashed subplan with direct correlation not supported");
@@ -227,6 +230,8 @@
 	ListCell   *l;
 	ArrayBuildState *astate = NULL;
 
+	TRACE_POSTGRESQL_EXECUTOR_SUBPLAN_SCAN((uintptr_t)node);
+
 	/*
 	 * We are probably in a short-lived expression-evaluation context. Switch
 	 * to the per-query context for manipulating the child plan's chgParam,
diff -r 68b8827f4738 src/backend/executor/nodeUnique.c
--- a/src/backend/executor/nodeUnique.c	Fri Nov 13 11:17:04 2009 +0000
+++ b/src/backend/executor/nodeUnique.c	Fri Nov 13 23:29:14 2009 +0100
@@ -35,6 +35,7 @@
 
 #include "executor/executor.h"
 #include "executor/nodeUnique.h"
+#include "pg_trace.h"
 #include "utils/memutils.h"
 
 
@@ -50,6 +51,8 @@
 	TupleTableSlot *slot;
 	PlanState  *outerPlan;
 
+	TRACE_POSTGRESQL_EXECUTOR_UNIQUE((uintptr_t)node);
+
 	/*
 	 * get information from the node
 	 */
diff -r 68b8827f4738 src/backend/utils/probes.d
--- a/src/backend/utils/probes.d	Fri Nov 13 11:17:04 2009 +0000
+++ b/src/backend/utils/probes.d	Fri Nov 13 23:29:14 2009 +0100
@@ -15,6 +15,7 @@
  * in probe definitions, as they cause compilation errors on Mac OS X 10.5.
  */
 #define LocalTransactionId unsigned int
+#define TransactionId unsigned int
 #define LWLockId int
 #define LWLockMode int
 #define LOCKMODE int
@@ -90,4 +91,29 @@
 	probe xlog__switch();
 	probe wal__buffer__write__dirty__start();
 	probe wal__buffer__write__dirty__done();
+
+	probe slru__readpage__start(unsigned long, int, bool, TransactionId);
+	probe slru__readpage__done(int);
+	probe slru__readpage__readonly(unsigned long, int, TransactionId);
+	probe slru__writepage__start(unsigned long, int, int);
+	probe slru__writepage__done();
+	probe slru__readpage__physical__start(unsigned long, char *, int, int);
+	probe slru__readpage__physical__done(int, int, int);
+	probe slru__writepage__physical__start(unsigned long, int, int);
+	probe slru__writepage__physical__done(int, int, int);
+ 
+	probe executor__scan(unsigned long, unsigned int, unsigned long);
+	probe executor__agg(unsigned long, int);
+	probe executor__group(unsigned long, int);
+	probe executor__hash__multi(unsigned long);
+	probe executor__hashjoin(unsigned long);
+	probe executor__limit(unsigned long);
+	probe executor__material(unsigned long);
+	probe executor__mergejoin(unsigned long);
+	probe executor__nestloop(unsigned long);
+	probe executor__setop(unsigned long);
+	probe executor__sort(unsigned long, int);
+	probe executor__subplan__hash(unsigned long);
+	probe executor__subplan__scan(unsigned long);
+	probe executor__unique(unsigned long);
 };
#2Bernd Helmle
mailings@oopsware.de
In reply to: Zdenek Kotala (#1)
Re: [patch] executor and slru dtrace probes

--On 13. November 2009 23:29:41 +0100 Zdenek Kotala <Zdenek.Kotala@Sun.COM>
wrote:

t contains two DTrace probe groups. One is related to monitoring SLRU
and second is about executor nodes.

I merged it with the head.

Original end of mail thread is here:

http://archives.postgresql.org/pgsql-hackers/2009-04/msg00148.php

I've started to review this.

It seems to me the attached patch wasn't adjusted or discussed again to
address Tom's complaints? At least the executor probes contained here hold
still the same issues mentioned by Tom in the discussion linked here.

--
Thanks

Bernd

#3Zdenek Kotala
Zdenek.Kotala@Sun.COM
In reply to: Bernd Helmle (#2)
Re: [patch] executor and slru dtrace probes

Dne 8.12.09 00:27, Bernd Helmle napsal(a):

--On 13. November 2009 23:29:41 +0100 Zdenek Kotala
<Zdenek.Kotala@Sun.COM> wrote:

t contains two DTrace probe groups. One is related to monitoring SLRU
and second is about executor nodes.

I merged it with the head.

Original end of mail thread is here:

http://archives.postgresql.org/pgsql-hackers/2009-04/msg00148.php

I've started to review this.

It seems to me the attached patch wasn't adjusted or discussed again to
address Tom's complaints? At least the executor probes contained here
hold still the same issues mentioned by Tom in the discussion linked here.

I did not make any change. I only revival patch and merge it with head.
I think that SLRU probes are OK and acceptable.

Tom's issues with executor probes are still there and I expect
discussion about them. IIRC Theo uses these probes in production.

If you think that it is better I could split patch into two separate
patches and both can be reviewed separately.

thanks Zdenek

#4Theo Schlossnagle
jesus@omniti.com
In reply to: Zdenek Kotala (#3)
Re: [patch] executor and slru dtrace probes

On Dec 8, 2009, at 5:10 AM, Zdenek Kotala wrote:

Dne 8.12.09 00:27, Bernd Helmle napsal(a):

--On 13. November 2009 23:29:41 +0100 Zdenek Kotala <Zdenek.Kotala@Sun.COM> wrote:

t contains two DTrace probe groups. One is related to monitoring SLRU
and second is about executor nodes.

I merged it with the head.

Original end of mail thread is here:

http://archives.postgresql.org/pgsql-hackers/2009-04/msg00148.php

I've started to review this.
It seems to me the attached patch wasn't adjusted or discussed again to address Tom's complaints? At least the executor probes contained here hold still the same issues mentioned by Tom in the discussion linked here.

I did not make any change. I only revival patch and merge it with head. I think that SLRU probes are OK and acceptable.

Tom's issues with executor probes are still there and I expect discussion about them. IIRC Theo uses these probes in production.

If you think that it is better I could split patch into two separate patches and both can be reviewed separately.

I suppose I see it as a simple thing. The probes have no performance impact when they are not instrumented. I've used them on rare occasion to understand which exec nodes are causing which disk accesses. Seemed pretty darn useful at the time. There is (of course) some performance overhead when they are enabled, but that is intentionally performed by the operator, so it seems like a non-issue.

Now, there was some indication that there was a better place to probe that would be more comprehensive -- that should be addressed.

--
Theo Schlossnagle
Esoteric Curio -- http://lethargy.org/
OmniTI Computer Consulting, Inc. -- http://omniti.com/

#5Bernd Helmle
mailings@oopsware.de
In reply to: Zdenek Kotala (#3)
2 attachment(s)
Re: [patch] executor and slru dtrace probes

--On 8. Dezember 2009 11:10:44 +0100 Zdenek Kotala <Zdenek.Kotala@Sun.COM>
wrote:

If you think that it is better I could split patch into two separate
patches and both can be reviewed separately.

I split up this patch into two separate patches: one for SLRU and one for
the executor probes. I've done some documentation on the SLRU part, maybe
you can look over it (to make sure i didn't break anything).

I left the executor probes out of the documentation for now, since it seems
not to be clear how they would manifest.

Out of curiosity: Why do we want to pass the SlruCtl pointer down to the
probes? I don't understand what those probes are going to do with those
pointers, can you explain?

--
Thanks

Bernd

Attachments:

dtrace_executor_probes.patchapplication/text; name=dtrace_executor_probes.patchDownload
diff --git a/src/backend/executor/execScan.c b/src/backend/executor/execScan.c
index b968c5e..57e73cb 100644
--- a/src/backend/executor/execScan.c
+++ b/src/backend/executor/execScan.c
@@ -20,6 +20,7 @@
 
 #include "executor/executor.h"
 #include "miscadmin.h"
+#include "pg_trace.h"
 #include "utils/memutils.h"
 
 
@@ -121,6 +122,8 @@ ExecScan(ScanState *node,
 	qual = node->ps.qual;
 	projInfo = node->ps.ps_ProjInfo;
 
+	TRACE_POSTGRESQL_EXECUTOR_SCAN((uintptr_t)node, ((Scan *)node->ps.plan)->scanrelid, (uintptr_t)accessMtd);
+
 	/*
 	 * If we have neither a qual to check nor a projection to do, just skip
 	 * all the overhead and return the raw scan tuple.
diff --git a/src/backend/executor/nodeAgg.c b/src/backend/executor/nodeAgg.c
index 672051f..6c81dc2 100644
--- a/src/backend/executor/nodeAgg.c
+++ b/src/backend/executor/nodeAgg.c
@@ -81,6 +81,7 @@
 #include "parser/parse_agg.h"
 #include "parser/parse_coerce.h"
 #include "parser/parse_oper.h"
+#include "pg_trace.h"
 #include "utils/acl.h"
 #include "utils/builtins.h"
 #include "utils/lsyscache.h"
@@ -818,6 +819,8 @@ ExecAgg(AggState *node)
 		node->ss.ps.ps_TupFromTlist = false;
 	}
 
+	TRACE_POSTGRESQL_EXECUTOR_AGG((uintptr_t)node, ((Agg *) node->ss.ps.plan)->aggstrategy);
+
 	/*
 	 * Exit if nothing left to do.  (We must do the ps_TupFromTlist check
 	 * first, because in some cases agg_done gets set before we emit the
diff --git a/src/backend/executor/nodeGroup.c b/src/backend/executor/nodeGroup.c
index 73c62fb..51a9867 100644
--- a/src/backend/executor/nodeGroup.c
+++ b/src/backend/executor/nodeGroup.c
@@ -24,6 +24,7 @@
 
 #include "executor/executor.h"
 #include "executor/nodeGroup.h"
+#include "pg_trace.h"
 
 
 /*
@@ -49,6 +50,8 @@ ExecGroup(GroupState *node)
 	numCols = ((Group *) node->ss.ps.plan)->numCols;
 	grpColIdx = ((Group *) node->ss.ps.plan)->grpColIdx;
 
+	TRACE_POSTGRESQL_EXECUTOR_GROUP((uintptr_t)node, numCols);
+
 	/*
 	 * Check to see if we're still projecting out tuples from a previous group
 	 * tuple (because there is a function-returning-set in the projection
diff --git a/src/backend/executor/nodeHash.c b/src/backend/executor/nodeHash.c
index 9e32731..8291f2e 100644
--- a/src/backend/executor/nodeHash.c
+++ b/src/backend/executor/nodeHash.c
@@ -33,6 +33,7 @@
 #include "executor/nodeHashjoin.h"
 #include "miscadmin.h"
 #include "parser/parse_expr.h"
+#include "pg_trace.h"
 #include "utils/dynahash.h"
 #include "utils/memutils.h"
 #include "utils/lsyscache.h"
@@ -79,6 +80,8 @@ MultiExecHash(HashState *node)
 	ExprContext *econtext;
 	uint32		hashvalue;
 
+	TRACE_POSTGRESQL_EXECUTOR_HASH_MULTI((uintptr_t)node);
+
 	/* must provide our own instrumentation support */
 	if (node->ps.instrument)
 		InstrStartNode(node->ps.instrument);
diff --git a/src/backend/executor/nodeHashjoin.c b/src/backend/executor/nodeHashjoin.c
index e906711..337d1de 100644
--- a/src/backend/executor/nodeHashjoin.c
+++ b/src/backend/executor/nodeHashjoin.c
@@ -19,6 +19,7 @@
 #include "executor/hashjoin.h"
 #include "executor/nodeHash.h"
 #include "executor/nodeHashjoin.h"
+#include "pg_trace.h"
 #include "utils/memutils.h"
 
 
@@ -61,6 +62,8 @@ ExecHashJoin(HashJoinState *node)
 	uint32		hashvalue;
 	int			batchno;
 
+	TRACE_POSTGRESQL_EXECUTOR_HASHJOIN((uintptr_t)node);
+
 	/*
 	 * get information from HashJoin node
 	 */
diff --git a/src/backend/executor/nodeLimit.c b/src/backend/executor/nodeLimit.c
index e594be8..500df8b 100644
--- a/src/backend/executor/nodeLimit.c
+++ b/src/backend/executor/nodeLimit.c
@@ -23,6 +23,7 @@
 
 #include "executor/executor.h"
 #include "executor/nodeLimit.h"
+#include "pg_trace.h"
 
 static void recompute_limits(LimitState *node);
 
@@ -41,6 +42,8 @@ ExecLimit(LimitState *node)
 	TupleTableSlot *slot;
 	PlanState  *outerPlan;
 
+	TRACE_POSTGRESQL_EXECUTOR_LIMIT((uintptr_t)node);
+
 	/*
 	 * get information from the node
 	 */
diff --git a/src/backend/executor/nodeMaterial.c b/src/backend/executor/nodeMaterial.c
index 74041e2..df864da 100644
--- a/src/backend/executor/nodeMaterial.c
+++ b/src/backend/executor/nodeMaterial.c
@@ -24,6 +24,7 @@
 #include "executor/executor.h"
 #include "executor/nodeMaterial.h"
 #include "miscadmin.h"
+#include "pg_trace.h"
 
 /* ----------------------------------------------------------------
  *		ExecMaterial
@@ -45,6 +46,8 @@ ExecMaterial(MaterialState *node)
 	bool		eof_tuplestore;
 	TupleTableSlot *slot;
 
+	TRACE_POSTGRESQL_EXECUTOR_MATERIAL((uintptr_t)node);
+
 	/*
 	 * get state info from node
 	 */
diff --git a/src/backend/executor/nodeMergejoin.c b/src/backend/executor/nodeMergejoin.c
index b83cbbe..a4e5b07 100644
--- a/src/backend/executor/nodeMergejoin.c
+++ b/src/backend/executor/nodeMergejoin.c
@@ -98,6 +98,7 @@
 #include "executor/execdefs.h"
 #include "executor/nodeMergejoin.h"
 #include "miscadmin.h"
+#include "pg_trace.h"
 #include "utils/acl.h"
 #include "utils/lsyscache.h"
 #include "utils/memutils.h"
@@ -565,6 +566,8 @@ ExecMergeJoin(MergeJoinState *node)
 	bool		doFillOuter;
 	bool		doFillInner;
 
+	TRACE_POSTGRESQL_EXECUTOR_MERGEJOIN((uintptr_t)node);
+
 	/*
 	 * get information from node
 	 */
diff --git a/src/backend/executor/nodeNestloop.c b/src/backend/executor/nodeNestloop.c
index e91ac44..028d6bc 100644
--- a/src/backend/executor/nodeNestloop.c
+++ b/src/backend/executor/nodeNestloop.c
@@ -23,6 +23,7 @@
 
 #include "executor/execdebug.h"
 #include "executor/nodeNestloop.h"
+#include "pg_trace.h"
 #include "utils/memutils.h"
 
 
@@ -67,6 +68,8 @@ ExecNestLoop(NestLoopState *node)
 	List	   *otherqual;
 	ExprContext *econtext;
 
+	TRACE_POSTGRESQL_EXECUTOR_NESTLOOP((uintptr_t)node);
+
 	/*
 	 * get information from the node
 	 */
diff --git a/src/backend/executor/nodeSetOp.c b/src/backend/executor/nodeSetOp.c
index 4a24df5..1efe147 100644
--- a/src/backend/executor/nodeSetOp.c
+++ b/src/backend/executor/nodeSetOp.c
@@ -46,6 +46,7 @@
 
 #include "executor/executor.h"
 #include "executor/nodeSetOp.h"
+#include "pg_trace.h"
 #include "utils/memutils.h"
 
 
@@ -196,6 +197,8 @@ ExecSetOp(SetOpState *node)
 	SetOp	   *plannode = (SetOp *) node->ps.plan;
 	TupleTableSlot *resultTupleSlot = node->ps.ps_ResultTupleSlot;
 
+	TRACE_POSTGRESQL_EXECUTOR_SETOP((uintptr_t)node);
+
 	/*
 	 * If the previously-returned tuple needs to be returned more than once,
 	 * keep returning it.
diff --git a/src/backend/executor/nodeSort.c b/src/backend/executor/nodeSort.c
index f95f74b..7d994c6 100644
--- a/src/backend/executor/nodeSort.c
+++ b/src/backend/executor/nodeSort.c
@@ -18,6 +18,7 @@
 #include "executor/execdebug.h"
 #include "executor/nodeSort.h"
 #include "miscadmin.h"
+#include "pg_trace.h"
 #include "utils/tuplesort.h"
 
 
@@ -53,6 +54,8 @@ ExecSort(SortState *node)
 	dir = estate->es_direction;
 	tuplesortstate = (Tuplesortstate *) node->tuplesortstate;
 
+	TRACE_POSTGRESQL_EXECUTOR_SORT((uintptr_t)node, dir);
+
 	/*
 	 * If first time through, read all tuples from outer plan and pass them to
 	 * tuplesort.c. Subsequent calls just fetch tuples from tuplesort.
diff --git a/src/backend/executor/nodeSubplan.c b/src/backend/executor/nodeSubplan.c
index 4cb5459..758ec72 100644
--- a/src/backend/executor/nodeSubplan.c
+++ b/src/backend/executor/nodeSubplan.c
@@ -24,6 +24,7 @@
 #include "executor/nodeSubplan.h"
 #include "nodes/makefuncs.h"
 #include "optimizer/clauses.h"
+#include "pg_trace.h"
 #include "utils/array.h"
 #include "utils/lsyscache.h"
 #include "utils/memutils.h"
@@ -92,6 +93,8 @@ ExecHashSubPlan(SubPlanState *node,
 	ExprContext *innerecontext = node->innerecontext;
 	TupleTableSlot *slot;
 
+	TRACE_POSTGRESQL_EXECUTOR_SUBPLAN_HASH((uintptr_t)node);
+
 	/* Shouldn't have any direct correlation Vars */
 	if (subplan->parParam != NIL || node->args != NIL)
 		elog(ERROR, "hashed subplan with direct correlation not supported");
@@ -227,6 +230,8 @@ ExecScanSubPlan(SubPlanState *node,
 	ListCell   *l;
 	ArrayBuildState *astate = NULL;
 
+	TRACE_POSTGRESQL_EXECUTOR_SUBPLAN_SCAN((uintptr_t)node);
+
 	/*
 	 * We are probably in a short-lived expression-evaluation context. Switch
 	 * to the per-query context for manipulating the child plan's chgParam,
diff --git a/src/backend/executor/nodeUnique.c b/src/backend/executor/nodeUnique.c
index 733d85e..f6744a1 100644
--- a/src/backend/executor/nodeUnique.c
+++ b/src/backend/executor/nodeUnique.c
@@ -35,6 +35,7 @@
 
 #include "executor/executor.h"
 #include "executor/nodeUnique.h"
+#include "pg_trace.h"
 #include "utils/memutils.h"
 
 
@@ -50,6 +51,8 @@ ExecUnique(UniqueState *node)
 	TupleTableSlot *slot;
 	PlanState  *outerPlan;
 
+	TRACE_POSTGRESQL_EXECUTOR_UNIQUE((uintptr_t)node);
+
 	/*
 	 * get information from the node
 	 */
diff --git a/src/backend/utils/probes.d b/src/backend/utils/probes.d
index c703c2f..9f80270 100644
--- a/src/backend/utils/probes.d
+++ b/src/backend/utils/probes.d
@@ -90,4 +91,19 @@ provider postgresql {
 	probe xlog__switch();
 	probe wal__buffer__write__dirty__start();
 	probe wal__buffer__write__dirty__done();
+
+	probe executor__scan(unsigned long, unsigned int, unsigned long);
+	probe executor__agg(unsigned long, int);
+	probe executor__group(unsigned long, int);
+	probe executor__hash__multi(unsigned long);
+	probe executor__hashjoin(unsigned long);
+	probe executor__limit(unsigned long);
+	probe executor__material(unsigned long);
+	probe executor__mergejoin(unsigned long);
+	probe executor__nestloop(unsigned long);
+	probe executor__setop(unsigned long);
+	probe executor__sort(unsigned long, int);
+	probe executor__subplan__hash(unsigned long);
+	probe executor__subplan__scan(unsigned long);
+	probe executor__unique(unsigned long);
 };
dtrace_slru_probes.patchapplication/octet-stream; name=dtrace_slru_probes.patchDownload
diff --git a/doc/src/sgml/monitoring.sgml b/doc/src/sgml/monitoring.sgml
index 1f70fd4..c1bb9fd 100644
--- a/doc/src/sgml/monitoring.sgml
+++ b/doc/src/sgml/monitoring.sgml
@@ -1472,6 +1472,66 @@ SELECT pg_stat_get_backend_pid(s.backendid) AS procpid,
      <entry>Probe that fires when a deadlock is found by the deadlock
       detector.</entry>
     </row>
+    <row>
+      <entry>slru-readpage-start</entry>
+      <entry>(unsigned long, int, bool, TransactionId)</entry>
+      <entry>Probe that fires when a page is looked up in the SLRU buffer. arg0 is a pointer to
+      the current SLRU control structure, arg1 the page number, arg2 indicates when a dirty 
+      read is in progress and arg3 is the transaction id (XID).</entry>
+    </row>
+    <row>
+      <entry>slru-readpage-done</entry>
+      <entry>(int)</entry>
+      <entry>Probe that fires when looking up the buffer page is done. 
+      arg0 is the slot number finally returned.</entry>
+    </row>
+    <row>
+      <entry>slru-readpage-readonly</entry>
+      <entry>(unsigned long, int, TransactionId)</entry>
+      <entry>Probe that fires when a readonly lookup to find a page in the shared buffer is attempted. 
+      arg0 is the pointer to the local SLRU control structure, arg1 defines the pagenumber, arg2 holds the
+      current transaction id.</entry>
+    </row>
+    <row>
+      <entry>slru-writepage-start</entry>
+      <entry>(unsigned long, int, TransactionId)</entry>
+      <entry>Probe that fires when an attempt to write to a page in the buffer is attempted. 
+      arg0 is a pointer to the local SLRU control structure, arg1 the slot number and arg2 the pointer
+      to the data.</entry>
+    </row>
+    <row>
+      <entry>slru-writepage-done</entry>
+      <entry>()</entry>
+      <entry>Probe that fires when an attempt to write to a SLRU page is finished.</entry>
+    </row>    
+    <row>
+      <entry>slru-readpage-physical-start</entry>
+      <entry>(unsigned long, char *, int, int)</entry>
+      <entry>Probe that fires when a page is phyiscally read into a buffer slot. 
+      arg0 is a pointer to the local SLRU control structure, arg1 the path of the file
+      to read the page from, arg2 the page number and arg3 the slot number 
+      of the target slot.</entry>
+    </row>
+    <row>
+      <entry>slru-readpage-physical-done</entry>
+      <entry>(int, int, int)</entry>
+      <entry>Probe that fires when a page is phyiscally read into a buffer slot. 
+      arg0 indicates success or failure of the read attempt, arg1 holds the SLRU error cause and
+      arg2 finally the errno returned by the system call.</entry>
+    </row>
+    <row>
+      <entry>slru-writepage-physical-start</entry>
+      <entry>(unsigned long, int, int)</entry>
+      <entry>Probe that fires when a page is physically written out. arg0 is a pointer to the
+      local SLRU control structure, arg1 the page number and arg2 defines the slot number.</entry>
+    </row>
+    <row>
+      <entry>slru-writepage-physical-done</entry>
+      <entry>(int, int, int)</entry>
+      <entry>Probe that fires when physically writing a page out of the SLRU buffer is finished. arg0 indicates
+      wether the write action was successful, arg1 contains the error flag and arg2 the errno returned
+      by the system call.</entry>
+    </row>
 
    </tbody>
    </tgroup>
@@ -1494,6 +1554,10 @@ SELECT pg_stat_get_backend_pid(s.backendid) AS procpid,
      <entry>unsigned int</entry>
     </row>
     <row>
+     <entry>TransactionId</entry>
+     <entry>unsigned int</entry>
+    </row>
+    <row>
      <entry>LWLockId</entry>
      <entry>int</entry>
     </row>
diff --git a/src/backend/access/transam/slru.c b/src/backend/access/transam/slru.c
index 68e3869..235ca40 100644
--- a/src/backend/access/transam/slru.c
+++ b/src/backend/access/transam/slru.c
@@ -57,6 +57,7 @@
 #include "storage/fd.h"
 #include "storage/shmem.h"
 #include "miscadmin.h"
+#include "pg_trace.h"
 
 
 /*
@@ -372,6 +373,7 @@ SimpleLruReadPage(SlruCtl ctl, int pageno, bool write_ok,
 {
 	SlruShared	shared = ctl->shared;
 
+	TRACE_POSTGRESQL_SLRU_READPAGE_START((uintptr_t)ctl, pageno, write_ok, xid);
 	/* Outer loop handles restart if we must wait for someone else's I/O */
 	for (;;)
 	{
@@ -399,6 +401,7 @@ SimpleLruReadPage(SlruCtl ctl, int pageno, bool write_ok,
 			}
 			/* Otherwise, it's ready to use */
 			SlruRecentlyUsed(shared, slotno);
+			TRACE_POSTGRESQL_SLRU_READPAGE_DONE(slotno);
 			return slotno;
 		}
 
@@ -446,6 +449,7 @@ SimpleLruReadPage(SlruCtl ctl, int pageno, bool write_ok,
 			SlruReportIOError(ctl, pageno, xid);
 
 		SlruRecentlyUsed(shared, slotno);
+		TRACE_POSTGRESQL_SLRU_READPAGE_DONE(slotno);
 		return slotno;
 	}
 }
@@ -470,6 +474,8 @@ SimpleLruReadPage_ReadOnly(SlruCtl ctl, int pageno, TransactionId xid)
 	SlruShared	shared = ctl->shared;
 	int			slotno;
 
+	TRACE_POSTGRESQL_SLRU_READPAGE_READONLY((uintptr_t)ctl, pageno, xid);
+
 	/* Try to find the page while holding only shared lock */
 	LWLockAcquire(shared->ControlLock, LW_SHARED);
 
@@ -511,6 +517,8 @@ SimpleLruWritePage(SlruCtl ctl, int slotno, SlruFlush fdata)
 	int			pageno = shared->page_number[slotno];
 	bool		ok;
 
+	TRACE_POSTGRESQL_SLRU_WRITEPAGE_START((uintptr_t)ctl, pageno, slotno);
+
 	/* If a write is in progress, wait for it to finish */
 	while (shared->page_status[slotno] == SLRU_PAGE_WRITE_IN_PROGRESS &&
 		   shared->page_number[slotno] == pageno)
@@ -525,7 +533,10 @@ SimpleLruWritePage(SlruCtl ctl, int slotno, SlruFlush fdata)
 	if (!shared->page_dirty[slotno] ||
 		shared->page_status[slotno] != SLRU_PAGE_VALID ||
 		shared->page_number[slotno] != pageno)
+	{
+		TRACE_POSTGRESQL_SLRU_WRITEPAGE_DONE();
 		return;
+	}
 
 	/*
 	 * Mark the slot write-busy, and clear the dirtybit.  After this point, a
@@ -569,6 +580,8 @@ SimpleLruWritePage(SlruCtl ctl, int slotno, SlruFlush fdata)
 	/* Now it's okay to ereport if we failed */
 	if (!ok)
 		SlruReportIOError(ctl, pageno, InvalidTransactionId);
+
+	TRACE_POSTGRESQL_SLRU_WRITEPAGE_DONE();
 }
 
 /*
@@ -593,6 +606,8 @@ SlruPhysicalReadPage(SlruCtl ctl, int pageno, int slotno)
 
 	SlruFileName(ctl, path, segno);
 
+	TRACE_POSTGRESQL_SLRU_READPAGE_PHYSICAL_START((uintptr_t)ctl, path, pageno, slotno);
+
 	/*
 	 * In a crash-and-restart situation, it's possible for us to receive
 	 * commands to set the commit status of transactions whose bits are in
@@ -607,6 +622,7 @@ SlruPhysicalReadPage(SlruCtl ctl, int pageno, int slotno)
 		{
 			slru_errcause = SLRU_OPEN_FAILED;
 			slru_errno = errno;
+			TRACE_POSTGRESQL_SLRU_READPAGE_PHYSICAL_DONE(false, slru_errcause, slru_errno);
 			return false;
 		}
 
@@ -614,6 +630,7 @@ SlruPhysicalReadPage(SlruCtl ctl, int pageno, int slotno)
 				(errmsg("file \"%s\" doesn't exist, reading as zeroes",
 						path)));
 		MemSet(shared->page_buffer[slotno], 0, BLCKSZ);
+		TRACE_POSTGRESQL_SLRU_READPAGE_PHYSICAL_DONE(true, -1, -1);
 		return true;
 	}
 
@@ -622,6 +639,7 @@ SlruPhysicalReadPage(SlruCtl ctl, int pageno, int slotno)
 		slru_errcause = SLRU_SEEK_FAILED;
 		slru_errno = errno;
 		close(fd);
+		TRACE_POSTGRESQL_SLRU_READPAGE_PHYSICAL_DONE(false, slru_errcause, slru_errno);
 		return false;
 	}
 
@@ -631,6 +649,7 @@ SlruPhysicalReadPage(SlruCtl ctl, int pageno, int slotno)
 		slru_errcause = SLRU_READ_FAILED;
 		slru_errno = errno;
 		close(fd);
+		TRACE_POSTGRESQL_SLRU_READPAGE_PHYSICAL_DONE(false, slru_errcause, slru_errno);
 		return false;
 	}
 
@@ -638,9 +657,12 @@ SlruPhysicalReadPage(SlruCtl ctl, int pageno, int slotno)
 	{
 		slru_errcause = SLRU_CLOSE_FAILED;
 		slru_errno = errno;
+		TRACE_POSTGRESQL_SLRU_READPAGE_PHYSICAL_DONE(false, slru_errcause, slru_errno);
 		return false;
 	}
 
+	TRACE_POSTGRESQL_SLRU_READPAGE_PHYSICAL_DONE(true, -1, -1);
+
 	return true;
 }
 
@@ -668,6 +690,8 @@ SlruPhysicalWritePage(SlruCtl ctl, int pageno, int slotno, SlruFlush fdata)
 	char		path[MAXPGPATH];
 	int			fd = -1;
 
+	TRACE_POSTGRESQL_SLRU_WRITEPAGE_PHYSICAL_START((uintptr_t)ctl, pageno, slotno);
+
 	/*
 	 * Honor the write-WAL-before-data rule, if appropriate, so that we do not
 	 * write out data before associated WAL records.  This is the same action
@@ -753,6 +777,7 @@ SlruPhysicalWritePage(SlruCtl ctl, int pageno, int slotno, SlruFlush fdata)
 		{
 			slru_errcause = SLRU_OPEN_FAILED;
 			slru_errno = errno;
+			TRACE_POSTGRESQL_SLRU_WRITEPAGE_PHYSICAL_DONE(false, slru_errcause, slru_errno);
 			return false;
 		}
 
@@ -781,6 +806,7 @@ SlruPhysicalWritePage(SlruCtl ctl, int pageno, int slotno, SlruFlush fdata)
 		slru_errno = errno;
 		if (!fdata)
 			close(fd);
+		TRACE_POSTGRESQL_SLRU_WRITEPAGE_PHYSICAL_DONE(false, slru_errcause, slru_errno);
 		return false;
 	}
 
@@ -794,6 +820,7 @@ SlruPhysicalWritePage(SlruCtl ctl, int pageno, int slotno, SlruFlush fdata)
 		slru_errno = errno;
 		if (!fdata)
 			close(fd);
+		TRACE_POSTGRESQL_SLRU_WRITEPAGE_PHYSICAL_DONE(false, slru_errcause, slru_errno);
 		return false;
 	}
 
@@ -808,6 +835,7 @@ SlruPhysicalWritePage(SlruCtl ctl, int pageno, int slotno, SlruFlush fdata)
 			slru_errcause = SLRU_FSYNC_FAILED;
 			slru_errno = errno;
 			close(fd);
+			TRACE_POSTGRESQL_SLRU_WRITEPAGE_PHYSICAL_DONE(false, slru_errcause, slru_errno);
 			return false;
 		}
 
@@ -815,10 +843,12 @@ SlruPhysicalWritePage(SlruCtl ctl, int pageno, int slotno, SlruFlush fdata)
 		{
 			slru_errcause = SLRU_CLOSE_FAILED;
 			slru_errno = errno;
+			TRACE_POSTGRESQL_SLRU_WRITEPAGE_PHYSICAL_DONE(false, slru_errcause, slru_errno);
 			return false;
 		}
 	}
 
+	TRACE_POSTGRESQL_SLRU_WRITEPAGE_PHYSICAL_DONE(true, -1, -1);
 	return true;
 }
 
diff --git a/src/backend/utils/probes.d b/src/backend/utils/probes.d
index c703c2f..9f80270 100644
--- a/src/backend/utils/probes.d
+++ b/src/backend/utils/probes.d
@@ -15,6 +15,7 @@
  * in probe definitions, as they cause compilation errors on Mac OS X 10.5.
  */
 #define LocalTransactionId unsigned int
+#define TransactionId unsigned int
 #define LWLockId int
 #define LWLockMode int
 #define LOCKMODE int
@@ -90,4 +91,14 @@ provider postgresql {
 	probe xlog__switch();
 	probe wal__buffer__write__dirty__start();
 	probe wal__buffer__write__dirty__done();
+
+	probe slru__readpage__start(unsigned long, int, bool, TransactionId);
+	probe slru__readpage__done(int);
+	probe slru__readpage__readonly(unsigned long, int, TransactionId);
+	probe slru__writepage__start(unsigned long, int, int);
+	probe slru__writepage__done();
+	probe slru__readpage__physical__start(unsigned long, char *, int, int);
+	probe slru__readpage__physical__done(int, int, int);
+	probe slru__writepage__physical__start(unsigned long, int, int);
+	probe slru__writepage__physical__done(int, int, int);
 };
#6Zdenek Kotala
Zdenek.Kotala@Sun.COM
In reply to: Bernd Helmle (#5)
Re: [patch] executor and slru dtrace probes

Dne 10.12.09 15:51, Bernd Helmle napsal(a):

--On 8. Dezember 2009 11:10:44 +0100 Zdenek Kotala
<Zdenek.Kotala@Sun.COM> wrote:

If you think that it is better I could split patch into two separate
patches and both can be reviewed separately.

I split up this patch into two separate patches: one for SLRU and one
for the executor probes. I've done some documentation on the SLRU part,
maybe you can look over it (to make sure i didn't break anything).

I left the executor probes out of the documentation for now, since it
seems not to be clear how they would manifest.

Out of curiosity: Why do we want to pass the SlruCtl pointer down to the
probes? I don't understand what those probes are going to do with those
pointers, can you explain?

You need to determine which SLRU is used. Because SLRUs are initialized
during startup pointer should be same in all backends. If I think
more about it. Maybe it could be goot to add probe also into
SimpleLruInit to catch name of SLRUs.

Zdenek

#7Bernd Helmle
mailings@oopsware.de
In reply to: Theo Schlossnagle (#4)
Re: [patch] executor and slru dtrace probes

--On 9. Dezember 2009 19:08:07 -0500 Theo Schlossnagle <jesus@omniti.com>
wrote:

Now, there was some indication that there was a better place to probe
that would be more comprehensive -- that should be addressed.

For now there exists no consensus where they should go in. Tom pointed out
various issues with ExecProcNode() and he's worried about the performance
penalty those probes might introduce. I admit I'm not very experienced with
dtrace, but maybe some worries exists because an expensive instrumented
executor probe can cause forged results?

--
Thanks

Bernd

#8Bernd Helmle
mailings@oopsware.de
In reply to: Zdenek Kotala (#6)
Re: [patch] executor and slru dtrace probes

--On 10. Dezember 2009 16:49:50 +0100 Zdenek Kotala <Zdenek.Kotala@Sun.COM>
wrote:

You need to determine which SLRU is used. Because SLRUs are initialized
during startup pointer should be same in all backends. If I think more
about it. Maybe it could be goot to add probe also into SimpleLruInit to
catch name of SLRUs.

Hi Zdenek,

do you plan to work on this further? I was about to mark the SLRU probes as
ready for committer...

--
Thanks

Bernd

#9Robert Haas
robertmhaas@gmail.com
In reply to: Bernd Helmle (#8)
Re: [patch] executor and slru dtrace probes

On Mon, Dec 14, 2009 at 7:19 AM, Bernd Helmle <mailings@oopsware.de> wrote:

--On 10. Dezember 2009 16:49:50 +0100 Zdenek Kotala <Zdenek.Kotala@Sun.COM>
wrote:

You need to determine which SLRU is used. Because SLRUs are initialized
during startup  pointer should be same in all backends. If I think more
about it. Maybe it could be goot to add probe also into SimpleLruInit to
catch name of SLRUs.

Hi Zdenek,

do you plan to work on this further? I was about to mark the SLRU probes as
ready for committer...

Since the author has pretty much admitted he didn't fix any of the
issues that were raised by the last committer review, I'm a little
confused about why you're asking for another one.

...Robert

#10Bernd Helmle
mailings@oopsware.de
In reply to: Robert Haas (#9)
Re: [patch] executor and slru dtrace probes

--On 14. Dezember 2009 07:49:34 -0500 Robert Haas <robertmhaas@gmail.com>
wrote:

Since the author has pretty much admitted he didn't fix any of the
issues that were raised by the last committer review, I'm a little
confused about why you're asking for another one.

It wasn't clear to me what Zdenek meant with "If I think about it".

--
Thanks

Bernd

#11Bernd Helmle
mailings@oopsware.de
In reply to: Bernd Helmle (#10)
Re: [patch] executor and slru dtrace probes

--On 14. Dezember 2009 20:33:12 +0100 Bernd Helmle <mailings@oopsware.de>
wrote:

Since the author has pretty much admitted he didn't fix any of the
issues that were raised by the last committer review, I'm a little
confused about why you're asking for another one.

It wasn't clear to me what Zdenek meant with "If I think about it".

Oh, and i was under the opinion the last discussions were about executor
probes only (note the patch is split up into two parts now, SLRU and
executor probes). The latter won't be fixed, but it seems the SLRU part at
least is ready.

--
Thanks

Bernd

#12Zdenek Kotala
Zdenek.Kotala@Sun.COM
In reply to: Bernd Helmle (#11)
Re: [patch] executor and slru dtrace probes

Bernd Helmle píše v po 14. 12. 2009 v 20:42 +0100:

--On 14. Dezember 2009 20:33:12 +0100 Bernd Helmle <mailings@oopsware.de>
wrote:

Since the author has pretty much admitted he didn't fix any of the
issues that were raised by the last committer review, I'm a little
confused about why you're asking for another one.

It wasn't clear to me what Zdenek meant with "If I think about it".

Oh, and i was under the opinion the last discussions were about executor
probes only (note the patch is split up into two parts now, SLRU and
executor probes). The latter won't be fixed, but it seems the SLRU part at
least is ready.

I would like to add SimpleLruInit probe. I'm busy with PG packaging,
but I hope that I will send updated version tomorrow.

Zdenek

#13Greg Smith
greg@2ndquadrant.com
In reply to: Zdenek Kotala (#12)
Re: [patch] executor and slru dtrace probes

Zdenek Kotala wrote:

Bernd Helmle píše v po 14. 12. 2009 v 20:42 +0100:

Oh, and i was under the opinion the last discussions were about executor
probes only (note the patch is split up into two parts now, SLRU and
executor probes). The latter won't be fixed, but it seems the SLRU part at
least is ready.

I would like to add SimpleLruInit probe. I'm busy with PG packaging,
but I hope that I will send updated version tomorrow.

I'd like to see as many of these DTrace probes as possible make it into
8.5, and it's great that you've picked up these older ones and updated
them. It looks like a lot of progress was made on actually measuring
the overhead of adding these probes in even when they're not enabled,
which is good.

But I'm afraid we're already out of time for this one if you're still
tweaking the probes here. With a functional change like that, our
normal process at this point would be to have the reviewer re-evaluate
things before they head to a committer, and I don't feel like this patch
is quite at 100% yet--in particular, the probe documentation is
improving but still a bit rough. I don't feel like we're quite ready to
mark this one for commit for this one, and today we really want to clear
the queue for things for committers to deal with. Please send that
updated version, and let's keep working on this into the next
CommitFest, where it will be in the front of the queue rather than how
it ended up at the tail of this one just based on its submission date.
You're not really getting a fair chunk of time here between your review
and the end here because of problems lining up reviewer time, that
shouldn't happen next time.

--
Greg Smith 2ndQuadrant Baltimore, MD
PostgreSQL Training, Services and Support
greg@2ndQuadrant.com www.2ndQuadrant.com

#14Bernd Helmle
mailings@oopsware.de
In reply to: Greg Smith (#13)
Re: [patch] executor and slru dtrace probes

--On 15. Dezember 2009 12:10:09 -0500 Greg Smith <greg@2ndquadrant.com>
wrote:

But I'm afraid we're already out of time for this one if you're still
tweaking the probes here.  With a functional change like that, our
normal process at this point would be to have the reviewer re-evaluate
things before they head to a committer, and I don't feel like this patch
is quite at 100% yet--in particular, the probe documentation is improving
but still a bit rough.  I don't feel like we're quite ready to mark this
one for commit for this one, and today we really want to clear the queue
for things for committers to deal with.  Please send that updated
version, and let's keep working on this into the next CommitFest, where
it will be in the front of the queue rather than how it ended up at the
tail of this one just based on its submission date.  You're not really
getting a fair chunk of time here between your review and the end here
because of problems lining up reviewer time, that shouldn't happen next
time.

That seems reasonable.

I hope i could contribute something, even this was the first time i got my
hands on reviewing this DTrace thingie.

--
Thanks

Bernd

#15Zdenek Kotala
Zdenek.Kotala@Sun.COM
In reply to: Greg Smith (#13)
Re: [patch] executor and slru dtrace probes

Greg Smith píše v út 15. 12. 2009 v 12:10 -0500:

Please send that updated version, and let's keep working on this into
the next CommitFest, where it will be in the front of the queue rather
than how it ended up at the tail of this one just based on its
submission date. You're not really getting a fair chunk of time here
between your review and the end here because of problems lining up
reviewer time, that shouldn't happen next time.

Make sense.

Zdenek