Launching debugger on self on SIGSEGV

Started by Gurjeet Singhover 14 years ago3 messages
#1Gurjeet Singh
singh.gurjeet@gmail.com
2 attachment(s)

Hi,

The attached patch registers a signal handler for SIGSEGV and launches
GDB in batch mode on its own pid so that the stack leading to the SEGV can
be dumped in the server logs. Also attached is an example of the stack
dumped by gdb in server log file (caused by a `kill -segv nnn` on the
backend).

Since this patch calls fork() inside a signal handler, I investigated a
bit and found that, per POSIX, fork() is asynch-signal-safe and hence it can
be called inside a handler.

This in itself might not be very useful because I haven't seen many
crash reports in the community, but it can be extended to dump stack on
Assert so that it helps developers and our beta testers. It can also be used
to dump stack of a process we are about to kill for deadlock reasons, and
before certain PANIC conditions too.

Right now it works only for gdb (setting the GUC to true actually check
for the presence of gdb), but it can be made generic, they way our
archive_command etc. work, so that we take a string and replace certain
parameters with binary path and pid so that any debugger can be used.

It also looks pretty easy to port it to Windows since all we really want
to do is create an external process with certain parameters, and
CreateProcess() is all we need. I haven't investigated seriously about that
but of there's interest in this patch then I can spend some time on that
too.

Regards,
--
Gurjeet Singh
EnterpriseDB Corporation
The Enterprise PostgreSQL Company

Attachments:

debugger_on_self.patchtext/x-patch; charset=US-ASCII; name=debugger_on_self.patchDownload
diff --git a/src/backend/postmaster/postmaster.c b/src/backend/postmaster/postmaster.c
index dca5efc..04cd900 100644
--- a/src/backend/postmaster/postmaster.c
+++ b/src/backend/postmaster/postmaster.c
@@ -5049,3 +5049,34 @@ InitPostmasterDeathWatchHandle(void)
 								 (int) GetLastError())));
 #endif   /* WIN32 */
 }
+
+/* Fork a gdb process such that it emits my stack trace to the logs */
+static void
+print_self_stack()
+{
+	char pid_buf[30];
+	int child_pid;
+
+	sprintf(pid_buf, "%d", getpid());
+	child_pid = fork();
+
+	if (child_pid == 0)
+	{
+		fprintf(stderr, "stack trace for %s pid=%s\n", my_exec_path, pid_buf);
+		execlp("gdb", "gdb", "--batch", "-n", "-ex", "bt", my_exec_path, pid_buf, NULL);
+		abort(); /* If gdb failed to start */
+	}
+	else
+	{
+		waitpid(child_pid,NULL,0);
+	}
+}
+
+/* SIGSEGV handler, controlled by GUC */
+void
+dump_stack(SIGNAL_ARGS)
+{
+	print_self_stack();
+
+	abort();
+}
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index 5841631..7262839 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -40,6 +40,7 @@
 #include "libpq/auth.h"
 #include "libpq/be-fsstubs.h"
 #include "libpq/pqformat.h"
+#include "libpq/pqsignal.h"
 #include "miscadmin.h"
 #include "optimizer/cost.h"
 #include "optimizer/geqo.h"
@@ -167,6 +168,10 @@ static bool call_enum_check_hook(struct config_enum * conf, int *newval,
 static bool check_log_destination(char **newval, void **extra, GucSource source);
 static void assign_log_destination(const char *newval, void *extra);
 
+bool		dump_stack_on_crash = false;
+static bool check_dump_stack_on_crash(bool *newval, void **extra, GucSource source);
+static void assign_dump_stack_on_crash(bool newval, void *extra);
+
 #ifdef HAVE_SYSLOG
 static int	syslog_facility = LOG_LOCAL0;
 #else
@@ -1422,6 +1427,17 @@ static struct config_bool ConfigureNamesBool[] =
 		NULL, NULL, NULL
 	},
 
+	{
+		{"dump_stack_on_crash", PGC_USERSET, ERROR_HANDLING_OPTIONS,
+			gettext_noop("Use GDB to dump the stack of a crashing backend process."),
+			gettext_noop("This requires that GDB be already installed and accessible to Postgres."),
+		},
+		&quote_all_identifiers,
+		false,
+		check_dump_stack_on_crash, assign_dump_stack_on_crash, NULL
+	},
+
+
 	/* End-of-list marker */
 	{
 		{NULL, 0, 0, NULL, NULL}, NULL, false, NULL, NULL, NULL
@@ -8672,4 +8688,35 @@ show_log_file_mode(void)
 	return buf;
 }
 
+static bool
+check_dump_stack_on_crash(bool *newval, void **extra, GucSource source)
+{
+	/* TODO: Check if GDB is available. If not, then complain and return false */
+
+	char gdb_path[MAXPGPATH];
+
+	if (*newval == false)
+		return true;
+
+	Assert(*newval == true);
+
+	if (find_my_exec("gdb", gdb_path) < 0)
+	{
+		elog(WARNING, "Could not locate gdb.");
+
+		return false;
+	}
+	else
+		return true;
+}
+
+static void
+assign_dump_stack_on_crash(const bool newval, void *extra)
+{
+	if (newval)
+		pqsignal(SIGSEGV, dump_stack);
+	else
+		pqsignal(SIGSEGV, SIG_DFL);
+}
+
 #include "guc-file.c"
diff --git a/src/include/postmaster/postmaster.h b/src/include/postmaster/postmaster.h
index be4f8a7..f624d51 100644
--- a/src/include/postmaster/postmaster.h
+++ b/src/include/postmaster/postmaster.h
@@ -55,6 +55,8 @@ extern int	SubPostmasterMain(int argc, char *argv[]);
 
 extern Size ShmemBackendArraySize(void);
 extern void ShmemBackendArrayAllocation(void);
+extern void dump_stack(SIGNAL_ARGS);
+
 #endif
 
 #endif   /* _POSTMASTER_H */
sample_gdb_log.logtext/x-log; charset=US-ASCII; name=sample_gdb_log.logDownload
#2Tom Lane
tgl@sss.pgh.pa.us
In reply to: Gurjeet Singh (#1)
Re: Launching debugger on self on SIGSEGV

Gurjeet Singh <singh.gurjeet@gmail.com> writes:

The attached patch registers a signal handler for SIGSEGV and launches
GDB in batch mode on its own pid so that the stack leading to the SEGV can
be dumped in the server logs.

Did you not read the thread last week about how we did not want any such
thing?

Quite aside from any postgres-specific reasons not to have any added
delay in the signal-to-database-shutdown path, this patch makes a bunch
of untenable assumptions about whether or where gdb is installed,
whether there are usable debug symbols available, whether gdb's output
will go somewhere useful, etc etc. And on top of all that, it adds *no
functionality whatsoever* compared to a post-mortem gdb run on the core
file.

regards, tom lane

#3Gurjeet Singh
singh.gurjeet@gmail.com
In reply to: Tom Lane (#2)
Re: Launching debugger on self on SIGSEGV

On Mon, Jul 11, 2011 at 12:56 PM, Tom Lane <tgl@sss.pgh.pa.us> wrote:

Gurjeet Singh <singh.gurjeet@gmail.com> writes:

The attached patch registers a signal handler for SIGSEGV and

launches

GDB in batch mode on its own pid so that the stack leading to the SEGV

can

be dumped in the server logs.

Did you not read the thread last week about how we did not want any such
thing?

Unfortunately, I did not. I'll catch up on it.

Quite aside from any postgres-specific reasons not to have any added
delay in the signal-to-database-shutdown path, this patch makes a bunch
of untenable assumptions about whether or where gdb is installed,
whether there are usable debug symbols available, whether gdb's output
will go somewhere useful, etc etc. And on top of all that, it adds *no
functionality whatsoever* compared to a post-mortem gdb run on the core
file.

I agree that it makes a bunch of assumptions, that's why I proposed that we
make it user configurable parameter, like archive_command, so that users (or
their packagers) can provide the command and all the relevant options.

Regards,
--
Gurjeet Singh
EnterpriseDB Corporation
The Enterprise PostgreSQL Company