Improve pg_re_throw: check if sigjmp_buf is valid and report error

Started by Xiaoran Wangover 1 year ago9 messages
#1Xiaoran Wang
fanfuxiaoran@gmail.com
2 attachment(s)

/messages/by-id/CANncwrJTse6WKkUS_Y8Wj2PHVRvaqPxMk_qtEPsf_+NVPYxzyg@mail.gmail.com

As the problem discussed in the above thread, I also run into that. Besides
updating the doc, I would like to report a error for it.

If the code in PG_TRY contains any non local control flow other than
ereport(ERROR) like goto, break etc., the PG_CATCH or PG_END_TRY cannot
be called, then the PG_exception_stack will point to the memory whose
stack frame has been released. So after that, when the pg_re_throw
called, __longjmp() will crash and report Segmentation fault error.

In that case, to help developers to figure out the root cause easily, it is
better to report that 'the sigjmp_buf is invalid' rather than letting
the __longjmp report any error.

Addition to sigjmp_buf, add another field 'int magic' which is next to
the sigjum_buf in the local stack frame memory. The magic's value is always
'PG_exception_magic 0x12345678'. And in 'pg_re_throw' routine, check if
the magic's value is still '0x12345678', if not, that means the memory
where the 'PG_exception_stack' points to has been released, and the
'sigbuf'
must be invalid.

The related code is in patch 0001

------------------------------
I'm not sure if it is necessary to add a regress test for it. In patch
0002, to test the
patch can work correctly, I have added a function 'pg_re_throw_crash' in
regress.c

create function pg_re_throw_crash()
RETURNS void
AS :'regresslib', 'pg_re_throw_crash'
LANGUAGE C STRICT STABLE PARALLEL SAFE;
create above function and run 'select pg_re_throw_crash()', then will get
the error
'FATAL: Invalid sigjum_buf, code in PG_TRY cannot contain any non local
control flow other than ereport'

--
Best regards !
Xiaoran Wang

Attachments:

0001-Imporve-pg_re_throw-check-if-sigjmp_buf-is-valid-and.patchapplication/octet-stream; name=0001-Imporve-pg_re_throw-check-if-sigjmp_buf-is-valid-and.patchDownload
From c123eb70ec6a31cca579f8fd397e2a4ab61dd713 Mon Sep 17 00:00:00 2001
From: wangxiaoran <fanfuxiaoran@gmail.com>
Date: Mon, 19 Aug 2024 11:44:00 +0800
Subject: [PATCH 1/2] Imporve pg_re_throw: check if sigjmp_buf is valid and
 report error

If the code in PG_TRY contains any non local control flow other than
ereport(ERROR) like goto, break etc., the PG_CATCH or PG_END_TRY cannot
be called, then the PG_exception_stack will point to the memory whose
stack frame has been released. So after that, when the pg_re_throw
called, __longjmp() will crash and report Segmentation fault error.

In that case, to help developers to figure out the root cause easily, it is
better to report that 'the sigjmp_buf is invalid' rather than letting
the __longjmp report any error.

Addition to sigjmp_buf, add another field 'int magic' which is next to
the sigjum_buf in the local stack frame memory. The magic's value is always
'PG_exception_magic 0x12345678'. And in 'pg_re_throw' routine, check if
the magic's value is still '0x12345678', if not, that means the memory
where the 'PG_exception_stack' points to has been released, and the 'sigbuf'
must be invalid.
---
 src/backend/postmaster/autovacuum.c        |  8 ++++----
 src/backend/postmaster/bgworker.c          |  4 ++--
 src/backend/postmaster/bgwriter.c          |  4 ++--
 src/backend/postmaster/checkpointer.c      |  5 ++---
 src/backend/postmaster/pgarch.c            |  4 ++--
 src/backend/postmaster/walsummarizer.c     |  4 ++--
 src/backend/postmaster/walwriter.c         |  4 ++--
 src/backend/replication/logical/slotsync.c |  4 ++--
 src/backend/tcop/postgres.c                |  5 ++---
 src/backend/utils/error/elog.c             | 10 ++++++++--
 src/include/utils/elog.h                   | 14 ++++++++++----
 11 files changed, 38 insertions(+), 28 deletions(-)

diff --git a/src/backend/postmaster/autovacuum.c b/src/backend/postmaster/autovacuum.c
index 7d0877c95e..4076d99938 100644
--- a/src/backend/postmaster/autovacuum.c
+++ b/src/backend/postmaster/autovacuum.c
@@ -361,7 +361,7 @@ static void avl_sigusr2_handler(SIGNAL_ARGS);
 void
 AutoVacLauncherMain(char *startup_data, size_t startup_data_len)
 {
-	sigjmp_buf	local_sigjmp_buf;
+	PG_exception	local_sigjmp_buf = {PG_exception_magic};
 
 	Assert(startup_data_len == 0);
 
@@ -436,7 +436,7 @@ AutoVacLauncherMain(char *startup_data, size_t startup_data_len)
 	 * call redundant, but it is not since InterruptPending might be set
 	 * already.
 	 */
-	if (sigsetjmp(local_sigjmp_buf, 1) != 0)
+	if (sigsetjmp(local_sigjmp_buf.buf, 1) != 0)
 	{
 		/* since not using PG_TRY, must reset error stack by hand */
 		error_context_stack = NULL;
@@ -1359,7 +1359,7 @@ avl_sigusr2_handler(SIGNAL_ARGS)
 void
 AutoVacWorkerMain(char *startup_data, size_t startup_data_len)
 {
-	sigjmp_buf	local_sigjmp_buf;
+	PG_exception    local_sigjmp_buf = {PG_exception_magic};
 	Oid			dbid;
 
 	Assert(startup_data_len == 0);
@@ -1421,7 +1421,7 @@ AutoVacWorkerMain(char *startup_data, size_t startup_data_len)
 	 * seem that this policy makes the HOLD_INTERRUPTS() call redundant, but
 	 * it is not since InterruptPending might be set already.
 	 */
-	if (sigsetjmp(local_sigjmp_buf, 1) != 0)
+	if (sigsetjmp(local_sigjmp_buf.buf, 1) != 0)
 	{
 		/* since not using PG_TRY, must reset error stack by hand */
 		error_context_stack = NULL;
diff --git a/src/backend/postmaster/bgworker.c b/src/backend/postmaster/bgworker.c
index b83967cda3..d9cb01cd3a 100644
--- a/src/backend/postmaster/bgworker.c
+++ b/src/backend/postmaster/bgworker.c
@@ -716,7 +716,7 @@ bgworker_die(SIGNAL_ARGS)
 void
 BackgroundWorkerMain(char *startup_data, size_t startup_data_len)
 {
-	sigjmp_buf	local_sigjmp_buf;
+	PG_exception	local_sigjmp_buf = {PG_exception_magic};
 	BackgroundWorker *worker;
 	bgworker_main_type entrypt;
 
@@ -781,7 +781,7 @@ BackgroundWorkerMain(char *startup_data, size_t startup_data_len)
 	 *
 	 * We just need to clean up, report the error, and go away.
 	 */
-	if (sigsetjmp(local_sigjmp_buf, 1) != 0)
+	if (sigsetjmp(local_sigjmp_buf.buf, 1) != 0)
 	{
 		/* Since not using PG_TRY, must reset error stack by hand */
 		error_context_stack = NULL;
diff --git a/src/backend/postmaster/bgwriter.c b/src/backend/postmaster/bgwriter.c
index 0f75548759..445b56ac76 100644
--- a/src/backend/postmaster/bgwriter.c
+++ b/src/backend/postmaster/bgwriter.c
@@ -86,7 +86,7 @@ static XLogRecPtr last_snapshot_lsn = InvalidXLogRecPtr;
 void
 BackgroundWriterMain(char *startup_data, size_t startup_data_len)
 {
-	sigjmp_buf	local_sigjmp_buf;
+	PG_exception    local_sigjmp_buf = {PG_exception_magic};
 	MemoryContext bgwriter_context;
 	bool		prev_hibernate;
 	WritebackContext wb_context;
@@ -150,7 +150,7 @@ BackgroundWriterMain(char *startup_data, size_t startup_data_len)
 	 * call redundant, but it is not since InterruptPending might be set
 	 * already.
 	 */
-	if (sigsetjmp(local_sigjmp_buf, 1) != 0)
+	if (sigsetjmp(local_sigjmp_buf.buf, 1) != 0)
 	{
 		/* Since not using PG_TRY, must reset error stack by hand */
 		error_context_stack = NULL;
diff --git a/src/backend/postmaster/checkpointer.c b/src/backend/postmaster/checkpointer.c
index 199f008bcd..e2a2c7336e 100644
--- a/src/backend/postmaster/checkpointer.c
+++ b/src/backend/postmaster/checkpointer.c
@@ -172,7 +172,7 @@ static void ReqCheckpointHandler(SIGNAL_ARGS);
 void
 CheckpointerMain(char *startup_data, size_t startup_data_len)
 {
-	sigjmp_buf	local_sigjmp_buf;
+	PG_exception    local_sigjmp_buf = {PG_exception_magic};
 	MemoryContext checkpointer_context;
 
 	Assert(startup_data_len == 0);
@@ -248,7 +248,7 @@ CheckpointerMain(char *startup_data, size_t startup_data_len)
 	 * call redundant, but it is not since InterruptPending might be set
 	 * already.
 	 */
-	if (sigsetjmp(local_sigjmp_buf, 1) != 0)
+	if (sigsetjmp(local_sigjmp_buf.buf, 1) != 0)
 	{
 		/* Since not using PG_TRY, must reset error stack by hand */
 		error_context_stack = NULL;
@@ -311,7 +311,6 @@ CheckpointerMain(char *startup_data, size_t startup_data_len)
 
 	/* We can now handle ereport(ERROR) */
 	PG_exception_stack = &local_sigjmp_buf;
-
 	/*
 	 * Unblock signals (they were blocked when the postmaster forked us)
 	 */
diff --git a/src/backend/postmaster/pgarch.c b/src/backend/postmaster/pgarch.c
index 02f91431f5..3a40334bc4 100644
--- a/src/backend/postmaster/pgarch.c
+++ b/src/backend/postmaster/pgarch.c
@@ -515,7 +515,7 @@ pgarch_ArchiverCopyLoop(void)
 static bool
 pgarch_archiveXlog(char *xlog)
 {
-	sigjmp_buf	local_sigjmp_buf;
+	PG_exception    local_sigjmp_buf = {PG_exception_magic};
 	MemoryContext oldcontext;
 	char		pathname[MAXPGPATH];
 	char		activitymsg[MAXFNAMELEN + 16];
@@ -544,7 +544,7 @@ pgarch_archiveXlog(char *xlog)
 	 * PgArchiverMain() and use PG_TRY/PG_CATCH here, but the extra code to
 	 * avoid the odd archiver restart doesn't seem worth it.
 	 */
-	if (sigsetjmp(local_sigjmp_buf, 1) != 0)
+	if (sigsetjmp(local_sigjmp_buf.buf, 1) != 0)
 	{
 		/* Since not using PG_TRY, must reset error stack by hand */
 		error_context_stack = NULL;
diff --git a/src/backend/postmaster/walsummarizer.c b/src/backend/postmaster/walsummarizer.c
index daa7909382..da58b26aa8 100644
--- a/src/backend/postmaster/walsummarizer.c
+++ b/src/backend/postmaster/walsummarizer.c
@@ -210,7 +210,7 @@ WalSummarizerShmemInit(void)
 void
 WalSummarizerMain(char *startup_data, size_t startup_data_len)
 {
-	sigjmp_buf	local_sigjmp_buf;
+	PG_exception    local_sigjmp_buf = {PG_exception_magic};
 	MemoryContext context;
 
 	/*
@@ -273,7 +273,7 @@ WalSummarizerMain(char *startup_data, size_t startup_data_len)
 	/*
 	 * If an exception is encountered, processing resumes here.
 	 */
-	if (sigsetjmp(local_sigjmp_buf, 1) != 0)
+	if (sigsetjmp(local_sigjmp_buf.buf, 1) != 0)
 	{
 		/* Since not using PG_TRY, must reset error stack by hand */
 		error_context_stack = NULL;
diff --git a/src/backend/postmaster/walwriter.c b/src/backend/postmaster/walwriter.c
index 6e7918a78d..c18e7eb024 100644
--- a/src/backend/postmaster/walwriter.c
+++ b/src/backend/postmaster/walwriter.c
@@ -88,7 +88,7 @@ int			WalWriterFlushAfter = DEFAULT_WAL_WRITER_FLUSH_AFTER;
 void
 WalWriterMain(char *startup_data, size_t startup_data_len)
 {
-	sigjmp_buf	local_sigjmp_buf;
+	PG_exception    local_sigjmp_buf = {PG_exception_magic};
 	MemoryContext walwriter_context;
 	int			left_till_hibernate;
 	bool		hibernating;
@@ -147,7 +147,7 @@ WalWriterMain(char *startup_data, size_t startup_data_len)
 	 * call redundant, but it is not since InterruptPending might be set
 	 * already.
 	 */
-	if (sigsetjmp(local_sigjmp_buf, 1) != 0)
+	if (sigsetjmp(local_sigjmp_buf.buf, 1) != 0)
 	{
 		/* Since not using PG_TRY, must reset error stack by hand */
 		error_context_stack = NULL;
diff --git a/src/backend/replication/logical/slotsync.c b/src/backend/replication/logical/slotsync.c
index 2d1914ce08..c35c54edab 100644
--- a/src/backend/replication/logical/slotsync.c
+++ b/src/backend/replication/logical/slotsync.c
@@ -1333,7 +1333,7 @@ ReplSlotSyncWorkerMain(char *startup_data, size_t startup_data_len)
 	WalReceiverConn *wrconn = NULL;
 	char	   *dbname;
 	char	   *err;
-	sigjmp_buf	local_sigjmp_buf;
+	PG_exception    local_sigjmp_buf = {PG_exception_magic};
 	StringInfoData app_name;
 
 	Assert(startup_data_len == 0);
@@ -1366,7 +1366,7 @@ ReplSlotSyncWorkerMain(char *startup_data, size_t startup_data_len)
 	 * operates at the bottom of the exception stack, ERRORs turn into FATALs.
 	 * Therefore, we create our own exception handler to catch ERRORs.
 	 */
-	if (sigsetjmp(local_sigjmp_buf, 1) != 0)
+	if (sigsetjmp(local_sigjmp_buf.buf, 1) != 0)
 	{
 		/* since not using PG_TRY, must reset error stack by hand */
 		error_context_stack = NULL;
diff --git a/src/backend/tcop/postgres.c b/src/backend/tcop/postgres.c
index 8bc6bea113..c2108ef72e 100644
--- a/src/backend/tcop/postgres.c
+++ b/src/backend/tcop/postgres.c
@@ -4220,7 +4220,7 @@ PostgresSingleUserMain(int argc, char *argv[],
 void
 PostgresMain(const char *dbname, const char *username)
 {
-	sigjmp_buf	local_sigjmp_buf;
+	PG_exception    local_sigjmp_buf = {PG_exception_magic};
 
 	/* these must be volatile to ensure state is preserved across longjmp: */
 	volatile bool send_ready_for_query = true;
@@ -4422,7 +4422,7 @@ PostgresMain(const char *dbname, const char *username)
 	 * were inside a transaction.
 	 */
 
-	if (sigsetjmp(local_sigjmp_buf, 1) != 0)
+	if (sigsetjmp(local_sigjmp_buf.buf, 1) != 0)
 	{
 		/*
 		 * NOTE: if you are tempted to add more code in this if-block,
@@ -4537,7 +4537,6 @@ PostgresMain(const char *dbname, const char *username)
 
 	/* We can now handle ereport(ERROR) */
 	PG_exception_stack = &local_sigjmp_buf;
-
 	if (!ignore_till_sync)
 		send_ready_for_query = true;	/* initially, or after error */
 
diff --git a/src/backend/utils/error/elog.c b/src/backend/utils/error/elog.c
index 943d8588f3..b06836feb0 100644
--- a/src/backend/utils/error/elog.c
+++ b/src/backend/utils/error/elog.c
@@ -93,7 +93,7 @@
 /* Global variables */
 ErrorContextCallback *error_context_stack = NULL;
 
-sigjmp_buf *PG_exception_stack = NULL;
+PG_exception *PG_exception_stack = NULL;
 
 /*
  * Hook for intercepting messages before they are sent to the server log.
@@ -1985,7 +1985,13 @@ pg_re_throw(void)
 {
 	/* If possible, throw the error to the next outer setjmp handler */
 	if (PG_exception_stack != NULL)
-		siglongjmp(*PG_exception_stack, 1);
+	{
+		if (PG_exception_stack->magic != PG_exception_magic)
+			ereport(FATAL,
+					(errmsg("Invalid sigjum_buf, code in PG_TRY cannot contain"
+							" any non local control flow other than ereport")));
+		siglongjmp(PG_exception_stack->buf, 1);
+	}
 	else
 	{
 		/*
diff --git a/src/include/utils/elog.h b/src/include/utils/elog.h
index 054dd2bf62..d8dc45c1ff 100644
--- a/src/include/utils/elog.h
+++ b/src/include/utils/elog.h
@@ -383,11 +383,11 @@ extern PGDLLIMPORT ErrorContextCallback *error_context_stack;
  */
 #define PG_TRY(...)  \
 	do { \
-		sigjmp_buf *_save_exception_stack##__VA_ARGS__ = PG_exception_stack; \
+		PG_exception *_save_exception_stack##__VA_ARGS__ = PG_exception_stack; \
 		ErrorContextCallback *_save_context_stack##__VA_ARGS__ = error_context_stack; \
-		sigjmp_buf _local_sigjmp_buf##__VA_ARGS__; \
+		PG_exception _local_sigjmp_buf##__VA_ARGS__ = {PG_exception_magic}; \
 		bool _do_rethrow##__VA_ARGS__ = false; \
-		if (sigsetjmp(_local_sigjmp_buf##__VA_ARGS__, 0) == 0) \
+		if (sigsetjmp(_local_sigjmp_buf##__VA_ARGS__.buf, 0) == 0) \
 		{ \
 			PG_exception_stack = &_local_sigjmp_buf##__VA_ARGS__
 
@@ -426,7 +426,13 @@ extern PGDLLIMPORT ErrorContextCallback *error_context_stack;
 	(pg_re_throw(), pg_unreachable())
 #endif
 
-extern PGDLLIMPORT sigjmp_buf *PG_exception_stack;
+#define PG_exception_magic 0x12345678
+typedef struct PG_exception
+{
+	int magic;
+	sigjmp_buf buf;
+} PG_exception;
+extern PGDLLIMPORT PG_exception *PG_exception_stack;
 
 
 /* Stuff that error handlers might want to use */
-- 
2.39.3 (Apple Git-146)

0002-Test-pg_re_throw-checking-invalid-sigjmp_buf.patchapplication/octet-stream; name=0002-Test-pg_re_throw-checking-invalid-sigjmp_buf.patchDownload
From d2b53713896eb9c9a934760849ac5d2d6757fba9 Mon Sep 17 00:00:00 2001
From: wangxiaoran <fanfuxiaoran@gmail.com>
Date: Mon, 19 Aug 2024 11:49:12 +0800
Subject: [PATCH 2/2] Test pg_re_throw checking invalid sigjmp_buf

---
 src/test/regress/regress.c          | 31 +++++++++++++++++++++++++++++
 src/test/regress/sql/test_setup.sql |  5 +++++
 2 files changed, 36 insertions(+)

diff --git a/src/test/regress/regress.c b/src/test/regress/regress.c
index 14aad5a0c6..b7d5224c5d 100644
--- a/src/test/regress/regress.c
+++ b/src/test/regress/regress.c
@@ -1136,3 +1136,34 @@ binary_coercible(PG_FUNCTION_ARGS)
 
 	PG_RETURN_BOOL(IsBinaryCoercible(srctype, targettype));
 }
+
+/* Test pg_re_throw check the invalid sigjmp_buf and report error */
+static void
+wrong_pg_try()
+{
+	PG_TRY();
+	{
+		return;
+	}
+	PG_CATCH();
+	{
+		PG_RE_THROW();
+	}
+	PG_END_TRY();
+}
+
+PG_FUNCTION_INFO_V1(pg_re_throw_crash);
+Datum
+pg_re_throw_crash(PG_FUNCTION_ARGS)
+{
+	PG_TRY();
+	{
+		wrong_pg_try();
+		ereport(ERROR,(errmsg("called wrong_pg_try")));
+	}
+	PG_CATCH();
+	{
+	}
+	PG_END_TRY();
+	PG_RETURN_NULL();
+}
diff --git a/src/test/regress/sql/test_setup.sql b/src/test/regress/sql/test_setup.sql
index 06b0e2121f..24d9b5e1ac 100644
--- a/src/test/regress/sql/test_setup.sql
+++ b/src/test/regress/sql/test_setup.sql
@@ -294,3 +294,8 @@ create function fipshash(text)
     returns text
     strict immutable parallel safe leakproof
     return substr(encode(sha256($1::bytea), 'hex'), 1, 32);
+
+create function pg_re_throw_crash()
+	RETURNS void
+	AS :'regresslib', 'pg_re_throw_crash'
+    LANGUAGE C STRICT STABLE PARALLEL SAFE;
-- 
2.39.3 (Apple Git-146)

#2Robert Haas
robertmhaas@gmail.com
In reply to: Xiaoran Wang (#1)
Re: Improve pg_re_throw: check if sigjmp_buf is valid and report error

On Mon, Aug 19, 2024 at 2:17 AM Xiaoran Wang <fanfuxiaoran@gmail.com> wrote:

If the code in PG_TRY contains any non local control flow other than
ereport(ERROR) like goto, break etc., the PG_CATCH or PG_END_TRY cannot
be called, then the PG_exception_stack will point to the memory whose
stack frame has been released. So after that, when the pg_re_throw
called, __longjmp() will crash and report Segmentation fault error.

Addition to sigjmp_buf, add another field 'int magic' which is next to
the sigjum_buf in the local stack frame memory. The magic's value is always
'PG_exception_magic 0x12345678'. And in 'pg_re_throw' routine, check if
the magic's value is still '0x12345678', if not, that means the memory
where the 'PG_exception_stack' points to has been released, and the 'sigbuf'
must be invalid.

The related code is in patch 0001

This is an interesting idea. I suspect if we do this we want a
different magic number and a different error message than what you
chose here, but those are minor details.

I'm not sure how reliable this would be at actually finding problems,
though. It doesn't seem guaranteed that the magic number will get
overwritten if you do something wrong; it's just a possibility. Maybe
that's still useful enough that we should adopt this, but I'm not
sure.

Personally, I don't think I've ever made this particular mistake. I
think a much more common usage error is exiting the catch-block
without either throwing an error or rolling back a subtransaction. But
YMMV, of course.

--
Robert Haas
EDB: http://www.enterprisedb.com

#3Tom Lane
tgl@sss.pgh.pa.us
In reply to: Robert Haas (#2)
Re: Improve pg_re_throw: check if sigjmp_buf is valid and report error

Robert Haas <robertmhaas@gmail.com> writes:

On Mon, Aug 19, 2024 at 2:17 AM Xiaoran Wang <fanfuxiaoran@gmail.com> wrote:

If the code in PG_TRY contains any non local control flow other than
ereport(ERROR) like goto, break etc., the PG_CATCH or PG_END_TRY cannot
be called, then the PG_exception_stack will point to the memory whose
stack frame has been released. So after that, when the pg_re_throw
called, __longjmp() will crash and report Segmentation fault error.

Addition to sigjmp_buf, add another field 'int magic' which is next to
the sigjum_buf in the local stack frame memory. The magic's value is always
'PG_exception_magic 0x12345678'. And in 'pg_re_throw' routine, check if
the magic's value is still '0x12345678', if not, that means the memory
where the 'PG_exception_stack' points to has been released, and the 'sigbuf'
must be invalid.

This is an interesting idea. I suspect if we do this we want a
different magic number and a different error message than what you
chose here, but those are minor details.

I would suggest just adding an Assert; I doubt this check would be
useful in production.

I'm not sure how reliable this would be at actually finding problems,
though.

Yeah, that's the big problem. I don't have any confidence at all
that this would detect misuse. It'd require that the old stack
frame gets overwritten, which might not happen for a long time,
and it'd require that somebody eventually do a longjmp, which again
might not happen for a long time --- and when those did happen, the
error would be detected in someplace far away from the actual bug,
with little evidence remaining to help you localize it.

Also, as soon as some outer level of PG_TRY is exited in the proper
way, the dangling stack pointer will be fixed up. That means there's
a fairly narrow time frame in which the overwrite and longjmp must
happen for this to catch a bug.

So on the whole I doubt this'd be terribly useful in this form;
and I don't like the amount of code churn required.

Personally, I don't think I've ever made this particular mistake. I
think a much more common usage error is exiting the catch-block
without either throwing an error or rolling back a subtransaction. But
YMMV, of course.

We have had multiple instances of code "return"ing out of a PG_TRY,
so I fully agree that some better way to detect that would be good.
But maybe we ought to think about static analysis for that.

regards, tom lane

#4Xiaoran Wang
fanfuxiaoran@gmail.com
In reply to: Tom Lane (#3)
Re: Improve pg_re_throw: check if sigjmp_buf is valid and report error

Tom Lane <tgl@sss.pgh.pa.us> 于2024年8月19日周一 22:12写道:

Robert Haas <robertmhaas@gmail.com> writes:

On Mon, Aug 19, 2024 at 2:17 AM Xiaoran Wang <fanfuxiaoran@gmail.com>

wrote:

If the code in PG_TRY contains any non local control flow other than
ereport(ERROR) like goto, break etc., the PG_CATCH or PG_END_TRY cannot
be called, then the PG_exception_stack will point to the memory whose
stack frame has been released. So after that, when the pg_re_throw
called, __longjmp() will crash and report Segmentation fault error.

Addition to sigjmp_buf, add another field 'int magic' which is next to
the sigjum_buf in the local stack frame memory. The magic's value is

always

'PG_exception_magic 0x12345678'. And in 'pg_re_throw' routine, check if
the magic's value is still '0x12345678', if not, that means the memory
where the 'PG_exception_stack' points to has been released, and the

'sigbuf'

must be invalid.

This is an interesting idea. I suspect if we do this we want a
different magic number and a different error message than what you
chose here, but those are minor details.

I would suggest just adding an Assert; I doubt this check would be
useful in production.

Agree, an Assert is enough.

I'm not sure how reliable this would be at actually finding problems,
though.

Yeah, that's the big problem. I don't have any confidence at all
that this would detect misuse. It'd require that the old stack
frame gets overwritten, which might not happen for a long time,
and it'd require that somebody eventually do a longjmp, which again
might not happen for a long time --- and when those did happen, the
error would be detected in someplace far away from the actual bug,
with little evidence remaining to help you localize it.

Exactly, it cannot tell you which PG_TRY left the invalid sigjmp_buf,
but to implement that is easy I think, recording the line num maybe.

I think this is still useful, at least it tell you that the error is due
to the
PG_TRY.

Also, as soon as some outer level of PG_TRY is exited in the proper
way, the dangling stack pointer will be fixed up. That means there's
a fairly narrow time frame in which the overwrite and longjmp must
happen for this to catch a bug.

Yes, if the outer level PG_TRY call pg_re_throw instead of ereport,
the dangling stack pointer will be fixed up. It's would be great to fix
that
up in any case. But I have no idea how to implement that now.

In pg_re_throw, if we could use '_local_sigjmp_buf' instead of the
global var PG_exception_stack, that would be great. We don't
need to worry about the invalid sigjum_buf.

So on the whole I doubt this'd be terribly useful in this form;

and I don't like the amount of code churn required.

Personally, I don't think I've ever made this particular mistake. I
think a much more common usage error is exiting the catch-block
without either throwing an error or rolling back a subtransaction. But
YMMV, of course.

We have had multiple instances of code "return"ing out of a PG_TRY,
so I fully agree that some better way to detect that would be good.
But maybe we ought to think about static analysis for that.

regards, tom lane

--
Best regards !
Xiaoran Wang

#5Xiaoran Wang
fanfuxiaoran@gmail.com
In reply to: Xiaoran Wang (#4)
Re: Improve pg_re_throw: check if sigjmp_buf is valid and report error

Xiaoran Wang <fanfuxiaoran@gmail.com> 于2024年8月20日周二 11:32写道:

Tom Lane <tgl@sss.pgh.pa.us> 于2024年8月19日周一 22:12写道:

Robert Haas <robertmhaas@gmail.com> writes:

On Mon, Aug 19, 2024 at 2:17 AM Xiaoran Wang <fanfuxiaoran@gmail.com>

wrote:

If the code in PG_TRY contains any non local control flow other than
ereport(ERROR) like goto, break etc., the PG_CATCH or PG_END_TRY cannot
be called, then the PG_exception_stack will point to the memory whose
stack frame has been released. So after that, when the pg_re_throw
called, __longjmp() will crash and report Segmentation fault error.

Addition to sigjmp_buf, add another field 'int magic' which is next to
the sigjum_buf in the local stack frame memory. The magic's value is

always

'PG_exception_magic 0x12345678'. And in 'pg_re_throw' routine, check if
the magic's value is still '0x12345678', if not, that means the memory
where the 'PG_exception_stack' points to has been released, and the

'sigbuf'

must be invalid.

This is an interesting idea. I suspect if we do this we want a
different magic number and a different error message than what you
chose here, but those are minor details.

I would suggest just adding an Assert; I doubt this check would be
useful in production.

Agree, an Assert is enough.

I'm not sure how reliable this would be at actually finding problems,
though.

Yeah, that's the big problem. I don't have any confidence at all
that this would detect misuse. It'd require that the old stack
frame gets overwritten, which might not happen for a long time,
and it'd require that somebody eventually do a longjmp, which again
might not happen for a long time --- and when those did happen, the
error would be detected in someplace far away from the actual bug,
with little evidence remaining to help you localize it.

Exactly, it cannot tell you which PG_TRY left the invalid sigjmp_buf,
but to implement that is easy I think, recording the line num maybe.

I think this is still useful, at least it tell you that the error is due
to the
PG_TRY.

Also, as soon as some outer level of PG_TRY is exited in the proper
way, the dangling stack pointer will be fixed up. That means there's
a fairly narrow time frame in which the overwrite and longjmp must
happen for this to catch a bug.

Yes, if the outer level PG_TRY call pg_re_throw instead of ereport,
the dangling stack pointer will be fixed up. It's would be great to fix
that
up in any case. But I have no idea how to implement that now.

Sorry, "Yes, if the outer level PG_TRY call pg_re_throw instead of
ereport, " should
be "Yes, if the outer level PG_TRY reset the PG_exception_stack."

In pg_re_throw, if we could use '_local_sigjmp_buf' instead of the

global var PG_exception_stack, that would be great. We don't
need to worry about the invalid sigjum_buf.

So on the whole I doubt this'd be terribly useful in this form;

and I don't like the amount of code churn required.

Personally, I don't think I've ever made this particular mistake. I
think a much more common usage error is exiting the catch-block
without either throwing an error or rolling back a subtransaction. But
YMMV, of course.

We have had multiple instances of code "return"ing out of a PG_TRY,
so I fully agree that some better way to detect that would be good.
But maybe we ought to think about static analysis for that.

regards, tom lane

--
Best regards !
Xiaoran Wang

--
Best regards !
Xiaoran Wang

#6Tom Lane
tgl@sss.pgh.pa.us
In reply to: Xiaoran Wang (#4)
Re: Improve pg_re_throw: check if sigjmp_buf is valid and report error

Xiaoran Wang <fanfuxiaoran@gmail.com> writes:

Yeah, that's the big problem. I don't have any confidence at all
that this would detect misuse. It'd require that the old stack
frame gets overwritten, which might not happen for a long time,
and it'd require that somebody eventually do a longjmp, which again
might not happen for a long time --- and when those did happen, the
error would be detected in someplace far away from the actual bug,
with little evidence remaining to help you localize it.

Exactly, it cannot tell you which PG_TRY left the invalid sigjmp_buf,
but to implement that is easy I think, recording the line num maybe.

I don't think you get to assume that the canary word gets overwritten
but debug data a few bytes away survives.

regards, tom lane

#7Xiaoran Wang
fanfuxiaoran@gmail.com
In reply to: Tom Lane (#6)
Re: Improve pg_re_throw: check if sigjmp_buf is valid and report error

Tom Lane <tgl@sss.pgh.pa.us> 于2024年8月20日周二 11:44写道:

Xiaoran Wang <fanfuxiaoran@gmail.com> writes:

Yeah, that's the big problem. I don't have any confidence at all
that this would detect misuse. It'd require that the old stack
frame gets overwritten, which might not happen for a long time,
and it'd require that somebody eventually do a longjmp, which again
might not happen for a long time --- and when those did happen, the
error would be detected in someplace far away from the actual bug,
with little evidence remaining to help you localize it.

Exactly, it cannot tell you which PG_TRY left the invalid sigjmp_buf,
but to implement that is easy I think, recording the line num maybe.

I don't think you get to assume that the canary word gets overwritten
but debug data a few bytes away survives.

We can have a global var like 'PG_exception_debug' to save the debug info,
not saved in the local stack frame.

1. Before PG_TRY, we can save the debug info as 'save_debug_info', just
like
'_save_exception_stack', but not a pointer, memory copy the info into the
'_save_debug_info'.
2. In PG_TRY, set the new debug info into the global var
'PG_exception_debug'
3. And in PG_CATCH and PG_END_TRY, we can restore it.
So that the debug info will not be overwritten.

It doesn't seem guaranteed that the magic number will get
overwritten if you do something wrong;

That's my concern too. How about besides checking if the
'PG_exception_stack->magic'
is overwritten, also compare the address of 'PG_exception_stack->buf' and
current
stack top address? if the address of 'PG_exception_stack->buf' is lower
than current
stack top address, it must be invalid, otherwise , it must be overwritten.
Just like below

int stack_top;
if (PG_exception_stack->magic <= &stack_top || PG_exception_stack->magic !=
PG_exception_magic)
ereport(FATAL,
(errmsg("Invalid sigjum_buf, code in PG_TRY cannot contain"
" any non local control flow other than
ereport")));

I'm not sure if this can work, any thoughts?

regards, tom lane

--
Best regards !
Xiaoran Wang

#8Xing Guo
higuoxing@gmail.com
In reply to: Tom Lane (#3)
Re: Improve pg_re_throw: check if sigjmp_buf is valid and report error

Hi

On Mon, Aug 19, 2024 at 10:12 PM Tom Lane <tgl@sss.pgh.pa.us> wrote:

We have had multiple instances of code "return"ing out of a PG_TRY,
so I fully agree that some better way to detect that would be good.
But maybe we ought to think about static analysis for that.

I have some static analysis scripts for detecting this kind of problem
(of mis-using PG_TRY). Not sure if my scripts are helpful here but I
would like to share them.

- A clang plugin for detecting unsafe control flow statements in
PG_TRY. https://github.com/higuoxing/clang-plugins/blob/main/lib/ReturnInPgTryBlockChecker.cpp
- Same as above, but in CodeQL[^1] script.
https://github.com/higuoxing/postgres.ql/blob/main/return-in-PG_TRY.ql
- A CodeQL script for detecting the missing of volatile qualifiers
(objects have been changed between the setjmp invocation and longjmp
call should be qualified with volatile).
https://github.com/higuoxing/postgres.ql/blob/main/volatile-in-PG_TRY.ql

Andres also has some compiler hacking to detect return statements in PG_TRY[^2].

[^1]: https://codeql.github.com/
[^2]: /messages/by-id/20230113054900.b7onkvwtkrykeu3z@awork3.anarazel.de

#9Xiaoran Wang
fanfuxiaoran@gmail.com
In reply to: Xing Guo (#8)
1 attachment(s)
Re: Improve pg_re_throw: check if sigjmp_buf is valid and report error

Hi,

I would like to update something about this idea.
I attached a new
patch 0003-Imporve-pg_re_throw-check-if-sigjmp_buf-is-valid-and.patch.
Not too many updates in it:
- replace the 'ereport' with Assert
- besides checking the PG_exception_stack->magic, also check the address of
PG_exception_stack,
if it is lower than current stack, it means that it is an invalid
longjmp_buf.

There are 2 other things I would like to update with you:
- As you are concerned with that this method is not reliable as the
PG_exception_stack.magic might
not be rewritten even if the longjmp_buf is not invalid anymore. I have
tested hat,
you are right, it is not reliable. I tested it with the flowing function on
my MacOS:
-----------------------
wrong_pg_try(int i)
{
if (i == 100)
ereport(ERROR,(errmsg("called wrong_pg_try")));
if ( i == 0)
{
PG_TRY();
{
return;
}
PG_CATCH();
{
}
PG_END_TRY();
}
else
wrong_pg_try(i + 1) + j;
}
------------------------
First call wrong_pg_try(0); then call wrong_pg_try(1);
It didn't report any error.
I found that is due to the content of PG_exception_stack is not rewritten.
There is no variable saved on the "wrong_pg_try()" function stack, but the
stacks of
the two call are not continuous, looks they are aligned.

Sure there are other cases that the PG_exception_stack.magic would not be
rewritten

- More details about the case that segmentfault occurs from __longjmp.
I have a signal function added in PG, in it the PG_TRY called and returned.
Then it left a invalid sigjmp_buf. It is a custom signal function handler,
can be
triggered by another process.
Then another sql was running and then interrupted it. At that
time, ProcessInterrupts
->ereport->pg_re_throw will called, it crashed

Xing Guo <higuoxing@gmail.com> 于2024年8月20日周二 22:21写道:

Hi

On Mon, Aug 19, 2024 at 10:12 PM Tom Lane <tgl@sss.pgh.pa.us> wrote:

We have had multiple instances of code "return"ing out of a PG_TRY,
so I fully agree that some better way to detect that would be good.
But maybe we ought to think about static analysis for that.

I have some static analysis scripts for detecting this kind of problem
(of mis-using PG_TRY). Not sure if my scripts are helpful here but I
would like to share them.

- A clang plugin for detecting unsafe control flow statements in
PG_TRY.
https://github.com/higuoxing/clang-plugins/blob/main/lib/ReturnInPgTryBlockChecker.cpp
- Same as above, but in CodeQL[^1] script.
https://github.com/higuoxing/postgres.ql/blob/main/return-in-PG_TRY.ql
- A CodeQL script for detecting the missing of volatile qualifiers
(objects have been changed between the setjmp invocation and longjmp
call should be qualified with volatile).
https://github.com/higuoxing/postgres.ql/blob/main/volatile-in-PG_TRY.ql

Andres also has some compiler hacking to detect return statements in
PG_TRY[^2].

[^1]: https://codeql.github.com/
[^2]:
/messages/by-id/20230113054900.b7onkvwtkrykeu3z@awork3.anarazel.de

--
Best regards !
Xiaoran Wang

Attachments:

0003-Imporve-pg_re_throw-check-if-sigjmp_buf-is-valid-and.patchapplication/octet-stream; name=0003-Imporve-pg_re_throw-check-if-sigjmp_buf-is-valid-and.patchDownload
From e217c57624fd9bf952efff1476fec32604ef56d4 Mon Sep 17 00:00:00 2001
From: wangxiaoran <fanfuxiaoran@gmail.com>
Date: Mon, 19 Aug 2024 11:44:00 +0800
Subject: [PATCH] Imporve pg_re_throw: check if sigjmp_buf is valid and report
 error

If the code in PG_TRY contains any non local control flow other than
ereport(ERROR) like goto, break etc., the PG_CATCH or PG_END_TRY cannot
be called, then the PG_exception_stack will point to the memory whose
stack frame has been released. So after that, when the pg_re_throw
called, __longjmp() will crash and report Segmentation fault error.

In that case, to help developers to figure out the root cause easily, it is
better to report that 'the sigjmp_buf is invalid' rather than letting
the __longjmp report any error.

Addition to sigjmp_buf, add another field 'int magic' which is next to
the sigjum_buf in the local stack frame memory. The magic's value is always
'PG_exception_magic 0x12345678'. And in 'pg_re_throw' routine, check if
the magic's value is still '0x12345678', if not, that means the memory
where the 'PG_exception_stack' points to has been released, and the 'sigbuf'
must be invalid.
---
 src/backend/postmaster/autovacuum.c        |  8 ++++----
 src/backend/postmaster/bgworker.c          |  4 ++--
 src/backend/postmaster/bgwriter.c          |  4 ++--
 src/backend/postmaster/checkpointer.c      |  5 ++---
 src/backend/postmaster/pgarch.c            |  4 ++--
 src/backend/postmaster/walsummarizer.c     |  4 ++--
 src/backend/postmaster/walwriter.c         |  4 ++--
 src/backend/replication/logical/slotsync.c |  4 ++--
 src/backend/tcop/postgres.c                |  5 ++---
 src/backend/utils/error/elog.c             | 12 ++++++++++--
 src/include/utils/elog.h                   | 14 ++++++++++----
 11 files changed, 40 insertions(+), 28 deletions(-)

diff --git a/src/backend/postmaster/autovacuum.c b/src/backend/postmaster/autovacuum.c
index 7d0877c95e..4076d99938 100644
--- a/src/backend/postmaster/autovacuum.c
+++ b/src/backend/postmaster/autovacuum.c
@@ -361,7 +361,7 @@ static void avl_sigusr2_handler(SIGNAL_ARGS);
 void
 AutoVacLauncherMain(char *startup_data, size_t startup_data_len)
 {
-	sigjmp_buf	local_sigjmp_buf;
+	PG_exception	local_sigjmp_buf = {PG_exception_magic};
 
 	Assert(startup_data_len == 0);
 
@@ -436,7 +436,7 @@ AutoVacLauncherMain(char *startup_data, size_t startup_data_len)
 	 * call redundant, but it is not since InterruptPending might be set
 	 * already.
 	 */
-	if (sigsetjmp(local_sigjmp_buf, 1) != 0)
+	if (sigsetjmp(local_sigjmp_buf.buf, 1) != 0)
 	{
 		/* since not using PG_TRY, must reset error stack by hand */
 		error_context_stack = NULL;
@@ -1359,7 +1359,7 @@ avl_sigusr2_handler(SIGNAL_ARGS)
 void
 AutoVacWorkerMain(char *startup_data, size_t startup_data_len)
 {
-	sigjmp_buf	local_sigjmp_buf;
+	PG_exception    local_sigjmp_buf = {PG_exception_magic};
 	Oid			dbid;
 
 	Assert(startup_data_len == 0);
@@ -1421,7 +1421,7 @@ AutoVacWorkerMain(char *startup_data, size_t startup_data_len)
 	 * seem that this policy makes the HOLD_INTERRUPTS() call redundant, but
 	 * it is not since InterruptPending might be set already.
 	 */
-	if (sigsetjmp(local_sigjmp_buf, 1) != 0)
+	if (sigsetjmp(local_sigjmp_buf.buf, 1) != 0)
 	{
 		/* since not using PG_TRY, must reset error stack by hand */
 		error_context_stack = NULL;
diff --git a/src/backend/postmaster/bgworker.c b/src/backend/postmaster/bgworker.c
index b83967cda3..d9cb01cd3a 100644
--- a/src/backend/postmaster/bgworker.c
+++ b/src/backend/postmaster/bgworker.c
@@ -716,7 +716,7 @@ bgworker_die(SIGNAL_ARGS)
 void
 BackgroundWorkerMain(char *startup_data, size_t startup_data_len)
 {
-	sigjmp_buf	local_sigjmp_buf;
+	PG_exception	local_sigjmp_buf = {PG_exception_magic};
 	BackgroundWorker *worker;
 	bgworker_main_type entrypt;
 
@@ -781,7 +781,7 @@ BackgroundWorkerMain(char *startup_data, size_t startup_data_len)
 	 *
 	 * We just need to clean up, report the error, and go away.
 	 */
-	if (sigsetjmp(local_sigjmp_buf, 1) != 0)
+	if (sigsetjmp(local_sigjmp_buf.buf, 1) != 0)
 	{
 		/* Since not using PG_TRY, must reset error stack by hand */
 		error_context_stack = NULL;
diff --git a/src/backend/postmaster/bgwriter.c b/src/backend/postmaster/bgwriter.c
index 0f75548759..445b56ac76 100644
--- a/src/backend/postmaster/bgwriter.c
+++ b/src/backend/postmaster/bgwriter.c
@@ -86,7 +86,7 @@ static XLogRecPtr last_snapshot_lsn = InvalidXLogRecPtr;
 void
 BackgroundWriterMain(char *startup_data, size_t startup_data_len)
 {
-	sigjmp_buf	local_sigjmp_buf;
+	PG_exception    local_sigjmp_buf = {PG_exception_magic};
 	MemoryContext bgwriter_context;
 	bool		prev_hibernate;
 	WritebackContext wb_context;
@@ -150,7 +150,7 @@ BackgroundWriterMain(char *startup_data, size_t startup_data_len)
 	 * call redundant, but it is not since InterruptPending might be set
 	 * already.
 	 */
-	if (sigsetjmp(local_sigjmp_buf, 1) != 0)
+	if (sigsetjmp(local_sigjmp_buf.buf, 1) != 0)
 	{
 		/* Since not using PG_TRY, must reset error stack by hand */
 		error_context_stack = NULL;
diff --git a/src/backend/postmaster/checkpointer.c b/src/backend/postmaster/checkpointer.c
index 199f008bcd..e2a2c7336e 100644
--- a/src/backend/postmaster/checkpointer.c
+++ b/src/backend/postmaster/checkpointer.c
@@ -172,7 +172,7 @@ static void ReqCheckpointHandler(SIGNAL_ARGS);
 void
 CheckpointerMain(char *startup_data, size_t startup_data_len)
 {
-	sigjmp_buf	local_sigjmp_buf;
+	PG_exception    local_sigjmp_buf = {PG_exception_magic};
 	MemoryContext checkpointer_context;
 
 	Assert(startup_data_len == 0);
@@ -248,7 +248,7 @@ CheckpointerMain(char *startup_data, size_t startup_data_len)
 	 * call redundant, but it is not since InterruptPending might be set
 	 * already.
 	 */
-	if (sigsetjmp(local_sigjmp_buf, 1) != 0)
+	if (sigsetjmp(local_sigjmp_buf.buf, 1) != 0)
 	{
 		/* Since not using PG_TRY, must reset error stack by hand */
 		error_context_stack = NULL;
@@ -311,7 +311,6 @@ CheckpointerMain(char *startup_data, size_t startup_data_len)
 
 	/* We can now handle ereport(ERROR) */
 	PG_exception_stack = &local_sigjmp_buf;
-
 	/*
 	 * Unblock signals (they were blocked when the postmaster forked us)
 	 */
diff --git a/src/backend/postmaster/pgarch.c b/src/backend/postmaster/pgarch.c
index 02f91431f5..3a40334bc4 100644
--- a/src/backend/postmaster/pgarch.c
+++ b/src/backend/postmaster/pgarch.c
@@ -515,7 +515,7 @@ pgarch_ArchiverCopyLoop(void)
 static bool
 pgarch_archiveXlog(char *xlog)
 {
-	sigjmp_buf	local_sigjmp_buf;
+	PG_exception    local_sigjmp_buf = {PG_exception_magic};
 	MemoryContext oldcontext;
 	char		pathname[MAXPGPATH];
 	char		activitymsg[MAXFNAMELEN + 16];
@@ -544,7 +544,7 @@ pgarch_archiveXlog(char *xlog)
 	 * PgArchiverMain() and use PG_TRY/PG_CATCH here, but the extra code to
 	 * avoid the odd archiver restart doesn't seem worth it.
 	 */
-	if (sigsetjmp(local_sigjmp_buf, 1) != 0)
+	if (sigsetjmp(local_sigjmp_buf.buf, 1) != 0)
 	{
 		/* Since not using PG_TRY, must reset error stack by hand */
 		error_context_stack = NULL;
diff --git a/src/backend/postmaster/walsummarizer.c b/src/backend/postmaster/walsummarizer.c
index daa7909382..da58b26aa8 100644
--- a/src/backend/postmaster/walsummarizer.c
+++ b/src/backend/postmaster/walsummarizer.c
@@ -210,7 +210,7 @@ WalSummarizerShmemInit(void)
 void
 WalSummarizerMain(char *startup_data, size_t startup_data_len)
 {
-	sigjmp_buf	local_sigjmp_buf;
+	PG_exception    local_sigjmp_buf = {PG_exception_magic};
 	MemoryContext context;
 
 	/*
@@ -273,7 +273,7 @@ WalSummarizerMain(char *startup_data, size_t startup_data_len)
 	/*
 	 * If an exception is encountered, processing resumes here.
 	 */
-	if (sigsetjmp(local_sigjmp_buf, 1) != 0)
+	if (sigsetjmp(local_sigjmp_buf.buf, 1) != 0)
 	{
 		/* Since not using PG_TRY, must reset error stack by hand */
 		error_context_stack = NULL;
diff --git a/src/backend/postmaster/walwriter.c b/src/backend/postmaster/walwriter.c
index 6e7918a78d..c18e7eb024 100644
--- a/src/backend/postmaster/walwriter.c
+++ b/src/backend/postmaster/walwriter.c
@@ -88,7 +88,7 @@ int			WalWriterFlushAfter = DEFAULT_WAL_WRITER_FLUSH_AFTER;
 void
 WalWriterMain(char *startup_data, size_t startup_data_len)
 {
-	sigjmp_buf	local_sigjmp_buf;
+	PG_exception    local_sigjmp_buf = {PG_exception_magic};
 	MemoryContext walwriter_context;
 	int			left_till_hibernate;
 	bool		hibernating;
@@ -147,7 +147,7 @@ WalWriterMain(char *startup_data, size_t startup_data_len)
 	 * call redundant, but it is not since InterruptPending might be set
 	 * already.
 	 */
-	if (sigsetjmp(local_sigjmp_buf, 1) != 0)
+	if (sigsetjmp(local_sigjmp_buf.buf, 1) != 0)
 	{
 		/* Since not using PG_TRY, must reset error stack by hand */
 		error_context_stack = NULL;
diff --git a/src/backend/replication/logical/slotsync.c b/src/backend/replication/logical/slotsync.c
index 2d1914ce08..c35c54edab 100644
--- a/src/backend/replication/logical/slotsync.c
+++ b/src/backend/replication/logical/slotsync.c
@@ -1333,7 +1333,7 @@ ReplSlotSyncWorkerMain(char *startup_data, size_t startup_data_len)
 	WalReceiverConn *wrconn = NULL;
 	char	   *dbname;
 	char	   *err;
-	sigjmp_buf	local_sigjmp_buf;
+	PG_exception    local_sigjmp_buf = {PG_exception_magic};
 	StringInfoData app_name;
 
 	Assert(startup_data_len == 0);
@@ -1366,7 +1366,7 @@ ReplSlotSyncWorkerMain(char *startup_data, size_t startup_data_len)
 	 * operates at the bottom of the exception stack, ERRORs turn into FATALs.
 	 * Therefore, we create our own exception handler to catch ERRORs.
 	 */
-	if (sigsetjmp(local_sigjmp_buf, 1) != 0)
+	if (sigsetjmp(local_sigjmp_buf.buf, 1) != 0)
 	{
 		/* since not using PG_TRY, must reset error stack by hand */
 		error_context_stack = NULL;
diff --git a/src/backend/tcop/postgres.c b/src/backend/tcop/postgres.c
index 8bc6bea113..c2108ef72e 100644
--- a/src/backend/tcop/postgres.c
+++ b/src/backend/tcop/postgres.c
@@ -4220,7 +4220,7 @@ PostgresSingleUserMain(int argc, char *argv[],
 void
 PostgresMain(const char *dbname, const char *username)
 {
-	sigjmp_buf	local_sigjmp_buf;
+	PG_exception    local_sigjmp_buf = {PG_exception_magic};
 
 	/* these must be volatile to ensure state is preserved across longjmp: */
 	volatile bool send_ready_for_query = true;
@@ -4422,7 +4422,7 @@ PostgresMain(const char *dbname, const char *username)
 	 * were inside a transaction.
 	 */
 
-	if (sigsetjmp(local_sigjmp_buf, 1) != 0)
+	if (sigsetjmp(local_sigjmp_buf.buf, 1) != 0)
 	{
 		/*
 		 * NOTE: if you are tempted to add more code in this if-block,
@@ -4537,7 +4537,6 @@ PostgresMain(const char *dbname, const char *username)
 
 	/* We can now handle ereport(ERROR) */
 	PG_exception_stack = &local_sigjmp_buf;
-
 	if (!ignore_till_sync)
 		send_ready_for_query = true;	/* initially, or after error */
 
diff --git a/src/backend/utils/error/elog.c b/src/backend/utils/error/elog.c
index 943d8588f3..09fd4a829d 100644
--- a/src/backend/utils/error/elog.c
+++ b/src/backend/utils/error/elog.c
@@ -93,7 +93,7 @@
 /* Global variables */
 ErrorContextCallback *error_context_stack = NULL;
 
-sigjmp_buf *PG_exception_stack = NULL;
+PG_exception *PG_exception_stack = NULL;
 
 /*
  * Hook for intercepting messages before they are sent to the server log.
@@ -1985,7 +1985,15 @@ pg_re_throw(void)
 {
 	/* If possible, throw the error to the next outer setjmp handler */
 	if (PG_exception_stack != NULL)
-		siglongjmp(*PG_exception_stack, 1);
+	{
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wunused"
+		void *stackTop = NULL;
+#pragma clang diagnostic pop
+		Assert((void *)PG_exception_stack > &stackTop ||
+				PG_exception_stack->magic == PG_exception_magic);
+		siglongjmp(PG_exception_stack->buf, 1);
+	}
 	else
 	{
 		/*
diff --git a/src/include/utils/elog.h b/src/include/utils/elog.h
index 054dd2bf62..d8dc45c1ff 100644
--- a/src/include/utils/elog.h
+++ b/src/include/utils/elog.h
@@ -383,11 +383,11 @@ extern PGDLLIMPORT ErrorContextCallback *error_context_stack;
  */
 #define PG_TRY(...)  \
 	do { \
-		sigjmp_buf *_save_exception_stack##__VA_ARGS__ = PG_exception_stack; \
+		PG_exception *_save_exception_stack##__VA_ARGS__ = PG_exception_stack; \
 		ErrorContextCallback *_save_context_stack##__VA_ARGS__ = error_context_stack; \
-		sigjmp_buf _local_sigjmp_buf##__VA_ARGS__; \
+		PG_exception _local_sigjmp_buf##__VA_ARGS__ = {PG_exception_magic}; \
 		bool _do_rethrow##__VA_ARGS__ = false; \
-		if (sigsetjmp(_local_sigjmp_buf##__VA_ARGS__, 0) == 0) \
+		if (sigsetjmp(_local_sigjmp_buf##__VA_ARGS__.buf, 0) == 0) \
 		{ \
 			PG_exception_stack = &_local_sigjmp_buf##__VA_ARGS__
 
@@ -426,7 +426,13 @@ extern PGDLLIMPORT ErrorContextCallback *error_context_stack;
 	(pg_re_throw(), pg_unreachable())
 #endif
 
-extern PGDLLIMPORT sigjmp_buf *PG_exception_stack;
+#define PG_exception_magic 0x12345678
+typedef struct PG_exception
+{
+	int magic;
+	sigjmp_buf buf;
+} PG_exception;
+extern PGDLLIMPORT PG_exception *PG_exception_stack;
 
 
 /* Stuff that error handlers might want to use */
-- 
2.39.3 (Apple Git-146)