Drop replslot after pgstat_shutdown cause assert coredump

Started by houzj.fnst@fujitsu.comover 4 years ago6 messages
#1houzj.fnst@fujitsu.com
houzj.fnst@fujitsu.com

Hi,

When testing logical replication, I found a case which caused assert coredump on
latest HEAD. The reproduction steps are as follows:

1)
----publisher----
create table test(i int);
create publication pub for table test;
begin;

insert into test values(1);

2)
----subscriber----
create table test(i int);
create subscription sub connection 'dbname=postgres port=10000' publication pub;
- wait for a second and Ctrl-C

3)
----publisher----
commit;

I can see the walsender tried to release a not-quite-ready repliaction slot
that was created when create a subscription. But the pgstat has been shutdown
before invoking ReplicationSlotRelease().

The stack is as follows:

#2 in ExceptionalCondition (pgstat_is_initialized && !pgstat_is_shutdown)
#3 in pgstat_assert_is_up () at pgstat.c:4852
#4 in pgstat_send (msg=msg@entry=0x7ffe716f7470, len=len@entry=144) at pgstat.c:3075
#5 in pgstat_report_replslot_drop (slotname=slotname@entry=0x7fbcf57a3c98 "sub") at pgstat.c:1869
#6 in ReplicationSlotDropPtr (slot=0x7fbcf57a3c80) at slot.c:696
#7 in ReplicationSlotDropAcquired () at slot.c:585
#8 in ReplicationSlotRelease () at slot.c:482
#9 in ProcKill (code=<optimized out>, arg=<optimized out>) at proc.c:852
#10 in shmem_exit (code=code@entry=0) at ipc.c:272
#11 in proc_exit_prepare (code=code@entry=0) at ipc.c:194
#12 in proc_exit (code=code@entry=0) at ipc.c:107
#13 in ProcessRepliesIfAny () at walsender.c:1807
#14 in WalSndWaitForWal (loc=loc@entry=22087632) at walsender.c:1417
#15 in logical_read_xlog_page (state=0x2f8c600, targetPagePtr=22085632,
reqLen=, targetRecPtr=, cur_page=0x2f6c1e0 "\016\321\005") at walsender.c:821
#16 in ReadPageInternal (state=state@entry=0x2f8c600,
pageptr=pageptr@entry=22085632, reqLen=reqLen@entry=2000) at xlogreader.c:667
#17 in XLogReadRecord (state=0x2f8c600,
errormsg=errormsg@entry=0x7ffe716f7f98) at xlogreader.c:337
#18 in DecodingContextFindStartpoint (ctx=ctx@entry=0x2f8c240)
at logical.c:606
#19 in CreateReplicationSlot (cmd=cmd@entry=0x2f1aef0)

Is this behavior expected ?

Best regards,
Hou zhijie

#2Greg Nancarrow
gregn4422@gmail.com
In reply to: houzj.fnst@fujitsu.com (#1)
Re: Drop replslot after pgstat_shutdown cause assert coredump

On Mon, Oct 11, 2021 at 6:55 PM houzj.fnst@fujitsu.com
<houzj.fnst@fujitsu.com> wrote:

I can see the walsender tried to release a not-quite-ready repliaction slot
that was created when create a subscription. But the pgstat has been shutdown
before invoking ReplicationSlotRelease().

The stack is as follows:

#2 in ExceptionalCondition (pgstat_is_initialized && !pgstat_is_shutdown)
#3 in pgstat_assert_is_up () at pgstat.c:4852
#4 in pgstat_send (msg=msg@entry=0x7ffe716f7470, len=len@entry=144) at pgstat.c:3075
#5 in pgstat_report_replslot_drop (slotname=slotname@entry=0x7fbcf57a3c98 "sub") at pgstat.c:1869
#6 in ReplicationSlotDropPtr (slot=0x7fbcf57a3c80) at slot.c:696
#7 in ReplicationSlotDropAcquired () at slot.c:585
#8 in ReplicationSlotRelease () at slot.c:482
#9 in ProcKill (code=<optimized out>, arg=<optimized out>) at proc.c:852
#10 in shmem_exit (code=code@entry=0) at ipc.c:272
#11 in proc_exit_prepare (code=code@entry=0) at ipc.c:194
#12 in proc_exit (code=code@entry=0) at ipc.c:107
#13 in ProcessRepliesIfAny () at walsender.c:1807
#14 in WalSndWaitForWal (loc=loc@entry=22087632) at walsender.c:1417
#15 in logical_read_xlog_page (state=0x2f8c600, targetPagePtr=22085632,
reqLen=, targetRecPtr=, cur_page=0x2f6c1e0 "\016\321\005") at walsender.c:821
#16 in ReadPageInternal (state=state@entry=0x2f8c600,
pageptr=pageptr@entry=22085632, reqLen=reqLen@entry=2000) at xlogreader.c:667
#17 in XLogReadRecord (state=0x2f8c600,
errormsg=errormsg@entry=0x7ffe716f7f98) at xlogreader.c:337
#18 in DecodingContextFindStartpoint (ctx=ctx@entry=0x2f8c240)
at logical.c:606
#19 in CreateReplicationSlot (cmd=cmd@entry=0x2f1aef0)

Is this behavior expected ?

I'd say it's not!

Just looking at the stacktrace, I'm thinking that the following commit
may have had a bearing on this problem, by causing pgstat to be
shutdown earlier:

commit fb2c5028e63589c01fbdf8b031be824766dc7a68
Author: Andres Freund <andres@anarazel.de>
Date: Fri Aug 6 10:05:57 2021 -0700

pgstat: Schedule per-backend pgstat shutdown via before_shmem_exit().

Can you see if the problem can be reproduced prior to this commit?

Regards,
Greg Nancarrow
Fujitsu Australia

#3Fujii Masao
masao.fujii@oss.nttdata.com
In reply to: Greg Nancarrow (#2)
Re: Drop replslot after pgstat_shutdown cause assert coredump

On 2021/10/11 22:15, Greg Nancarrow wrote:

On Mon, Oct 11, 2021 at 6:55 PM houzj.fnst@fujitsu.com
<houzj.fnst@fujitsu.com> wrote:

I can see the walsender tried to release a not-quite-ready repliaction slot
that was created when create a subscription. But the pgstat has been shutdown
before invoking ReplicationSlotRelease().

The stack is as follows:

#2 in ExceptionalCondition (pgstat_is_initialized && !pgstat_is_shutdown)
#3 in pgstat_assert_is_up () at pgstat.c:4852
#4 in pgstat_send (msg=msg@entry=0x7ffe716f7470, len=len@entry=144) at pgstat.c:3075
#5 in pgstat_report_replslot_drop (slotname=slotname@entry=0x7fbcf57a3c98 "sub") at pgstat.c:1869
#6 in ReplicationSlotDropPtr (slot=0x7fbcf57a3c80) at slot.c:696
#7 in ReplicationSlotDropAcquired () at slot.c:585
#8 in ReplicationSlotRelease () at slot.c:482
#9 in ProcKill (code=<optimized out>, arg=<optimized out>) at proc.c:852
#10 in shmem_exit (code=code@entry=0) at ipc.c:272
#11 in proc_exit_prepare (code=code@entry=0) at ipc.c:194
#12 in proc_exit (code=code@entry=0) at ipc.c:107
#13 in ProcessRepliesIfAny () at walsender.c:1807
#14 in WalSndWaitForWal (loc=loc@entry=22087632) at walsender.c:1417
#15 in logical_read_xlog_page (state=0x2f8c600, targetPagePtr=22085632,
reqLen=, targetRecPtr=, cur_page=0x2f6c1e0 "\016\321\005") at walsender.c:821
#16 in ReadPageInternal (state=state@entry=0x2f8c600,
pageptr=pageptr@entry=22085632, reqLen=reqLen@entry=2000) at xlogreader.c:667
#17 in XLogReadRecord (state=0x2f8c600,
errormsg=errormsg@entry=0x7ffe716f7f98) at xlogreader.c:337
#18 in DecodingContextFindStartpoint (ctx=ctx@entry=0x2f8c240)
at logical.c:606
#19 in CreateReplicationSlot (cmd=cmd@entry=0x2f1aef0)

Is this behavior expected ?

I'd say it's not!

Yes. I think this is a bug.

Just looking at the stacktrace, I'm thinking that the following commit
may have had a bearing on this problem, by causing pgstat to be
shutdown earlier:

commit fb2c5028e63589c01fbdf8b031be824766dc7a68
Author: Andres Freund <andres@anarazel.de>
Date: Fri Aug 6 10:05:57 2021 -0700

pgstat: Schedule per-backend pgstat shutdown via before_shmem_exit().

Can you see if the problem can be reproduced prior to this commit?

Even in prior to the commit, pgstat_shutdown_hook() can be called
before ProcKill() at the backend exit, so ISTM that the problem can
be reproduced.

Probably we need to make sure that pgstat_shutdown_hook() is called
after ProcKill(), e.g., by registering pgstat_shutdown_hook() into
on_proc_exit_list (I'm not sure if this change is safe, though).
Or maybe pgstat logic for replication slot drop needs to be overhauled.

Regards,

--
Fujii Masao
Advanced Computing Technology Center
Research and Development Headquarters
NTT DATA CORPORATION

#4Kyotaro Horiguchi
horikyota.ntt@gmail.com
In reply to: Fujii Masao (#3)
Re: Drop replslot after pgstat_shutdown cause assert coredump

At Fri, 22 Oct 2021 02:10:21 +0900, Fujii Masao <masao.fujii@oss.nttdata.com> wrote in

Even in prior to the commit, pgstat_shutdown_hook() can be called
before ProcKill() at the backend exit, so ISTM that the problem can
be reproduced.

Probably we need to make sure that pgstat_shutdown_hook() is called
after ProcKill(), e.g., by registering pgstat_shutdown_hook() into

Considering the coming shared-memory based stats collector, pgstat
must be shutdown before shared memory shutdown. Every operation that
requires stats collector also must be shut down before the pgstat
shutdown. A naive solution would be having before-pgstat-shutdown hook
but I'm not sure it's the right direction.

on_proc_exit_list (I'm not sure if this change is safe, though).
Or maybe pgstat logic for replication slot drop needs to be
overhauled.

I think we don't want to lose the stats numbers of the to-be-dropped
slot. So the slot-drop must happen before pgstat shutdown.

regards.

--
Kyotaro Horiguchi
NTT Open Source Software Center

#5Kyotaro Horiguchi
horikyota.ntt@gmail.com
In reply to: Kyotaro Horiguchi (#4)
1 attachment(s)
Re: Drop replslot after pgstat_shutdown cause assert coredump

I said:

Considering the coming shared-memory based stats collector, pgstat
must be shutdown before shared memory shutdown. Every operation that
requires stats collector also must be shut down before the pgstat
shutdown. A naive solution would be having before-pgstat-shutdown hook
but I'm not sure it's the right direction.

For this particular issue, we can add an explicit initilization phase
of replication slot per backend, which simply registers before_shmem
callback. It would work fine unless we carelessly place the
initialization before pgstat_initialize() (not pgstat_init()) call.

(Honestly, I haven't been able to reproduce the issue itself for
myself yet..)

on_proc_exit_list (I'm not sure if this change is safe, though).
Or maybe pgstat logic for replication slot drop needs to be
overhauled.

I think we don't want to lose the stats numbers of the to-be-dropped
slot. So the slot-drop must happen before pgstat shutdown.

I haven't sought other similar issues. I'm going to check it if they,
if any, can be fixe the same way.

regards.

--
Kyotaro Horiguchi
NTT Open Source Software Center

Attachments:

replslot_use_shmem_hook.diff.txttext/plain; charset=us-asciiDownload
diff --git a/src/backend/postmaster/pgstat.c b/src/backend/postmaster/pgstat.c
index b7d0fbaefd..13762f82af 100644
--- a/src/backend/postmaster/pgstat.c
+++ b/src/backend/postmaster/pgstat.c
@@ -306,6 +306,8 @@ static bool pgstat_is_initialized = false;
 static bool pgstat_is_shutdown = false;
 #endif
 
+/* per-backend variable for assertion */
+bool pgstat_initialized PG_USED_FOR_ASSERTS_ONLY = false;
 
 /* ----------
  * Local function forward declarations
@@ -3036,6 +3038,7 @@ pgstat_initialize(void)
 
 	/* Set up a process-exit hook to clean up */
 	before_shmem_exit(pgstat_shutdown_hook, 0);
+	pgstat_initialized = true;
 
 #ifdef USE_ASSERT_CHECKING
 	pgstat_is_initialized = true;
diff --git a/src/backend/replication/slot.c b/src/backend/replication/slot.c
index 1c6c0c7ce2..e0430aefa9 100644
--- a/src/backend/replication/slot.c
+++ b/src/backend/replication/slot.c
@@ -46,6 +46,7 @@
 #include "pgstat.h"
 #include "replication/slot.h"
 #include "storage/fd.h"
+#include "storage/ipc.h"
 #include "storage/proc.h"
 #include "storage/procarray.h"
 #include "utils/builtins.h"
@@ -160,6 +161,33 @@ ReplicationSlotsShmemInit(void)
 	}
 }
 
+/*
+ * Exit hook to cleanup replication slots.
+ */
+static void
+ReplicationSlotShutdown(int code, Datum arg)
+{
+	/* Make sure active replication slots are released */
+	if (MyReplicationSlot != NULL)
+		ReplicationSlotRelease();
+
+	/* Also cleanup all the temporary slots. */
+	ReplicationSlotCleanup();
+}
+
+/*
+ * Initialize of replication slot facility per backend.
+ */
+void
+ReplicationSlotInit(void)
+{
+	if (max_replication_slots > 0)
+	{
+		assert_pgstat_initialized();
+		before_shmem_exit(ReplicationSlotShutdown, (Datum) 0);
+	}
+}
+
 /*
  * Check whether the passed slot name is valid and report errors at elevel.
  *
diff --git a/src/backend/storage/lmgr/proc.c b/src/backend/storage/lmgr/proc.c
index b7d9da0aa9..b593ec8964 100644
--- a/src/backend/storage/lmgr/proc.c
+++ b/src/backend/storage/lmgr/proc.c
@@ -41,7 +41,6 @@
 #include "miscadmin.h"
 #include "pgstat.h"
 #include "postmaster/autovacuum.h"
-#include "replication/slot.h"
 #include "replication/syncrep.h"
 #include "replication/walsender.h"
 #include "storage/condition_variable.h"
@@ -847,13 +846,6 @@ ProcKill(int code, Datum arg)
 	/* Cancel any pending condition variable sleep, too */
 	ConditionVariableCancelSleep();
 
-	/* Make sure active replication slots are released */
-	if (MyReplicationSlot != NULL)
-		ReplicationSlotRelease();
-
-	/* Also cleanup all the temporary slots. */
-	ReplicationSlotCleanup();
-
 	/*
 	 * Detach from any lock group of which we are a member.  If the leader
 	 * exist before all other group members, its PGPROC will remain allocated
diff --git a/src/backend/utils/init/postinit.c b/src/backend/utils/init/postinit.c
index 78bc64671e..dd83864b54 100644
--- a/src/backend/utils/init/postinit.c
+++ b/src/backend/utils/init/postinit.c
@@ -40,6 +40,7 @@
 #include "pgstat.h"
 #include "postmaster/autovacuum.h"
 #include "postmaster/postmaster.h"
+#include "replication/slot.h"
 #include "replication/walsender.h"
 #include "storage/bufmgr.h"
 #include "storage/fd.h"
@@ -531,6 +532,12 @@ BaseInit(void)
 	 */
 	pgstat_initialize();
 
+	/*
+	 * Initialize replication slot. This must be after pgstat_initialize() so
+	 * that the cleanup happnes before the shutdown of pgstat facility.
+	 */
+	ReplicationSlotInit();
+
 	/* Do local initialization of storage and buffer managers */
 	InitSync();
 	smgrinit();
diff --git a/src/include/pgstat.h b/src/include/pgstat.h
index bcd3588ea2..f06810c115 100644
--- a/src/include/pgstat.h
+++ b/src/include/pgstat.h
@@ -992,6 +992,14 @@ extern PgStat_Counter pgStatTransactionIdleTime;
  */
 extern SessionEndType pgStatSessionEndCause;
 
+/*
+ * modules requires pgstat required to install their before-shmem hook after
+ * pgstat. This variable is used to make sure that.
+ */
+extern bool pgstat_initialized;
+#define assert_pgstat_initialized() Assert (pgstat_initialized);
+
+
 /* ----------
  * Functions called from postmaster
  * ----------
diff --git a/src/include/replication/slot.h b/src/include/replication/slot.h
index 53d773ccff..124d107662 100644
--- a/src/include/replication/slot.h
+++ b/src/include/replication/slot.h
@@ -193,6 +193,9 @@ extern PGDLLIMPORT int max_replication_slots;
 extern Size ReplicationSlotsShmemSize(void);
 extern void ReplicationSlotsShmemInit(void);
 
+/* per-backend initialization */
+extern void ReplicationSlotInit(void);
+
 /* management of individual slots */
 extern void ReplicationSlotCreate(const char *name, bool db_specific,
 								  ReplicationSlotPersistency p, bool two_phase);
#6Kyotaro Horiguchi
horikyota.ntt@gmail.com
In reply to: Kyotaro Horiguchi (#5)
1 attachment(s)
Re: Drop replslot after pgstat_shutdown cause assert coredump

At Fri, 22 Oct 2021 11:43:08 +0900 (JST), Kyotaro Horiguchi <horikyota.ntt@gmail.com> wrote in

(Honestly, I haven't been able to reproduce the issue itself for
myself yet..)

I managed to reproduce it for me.

psql "dbname=postgres replication=database"
postgres=# CREATE_REPLICATION_SLOT "ts1" TEMPORARY LOGICAL "pgoutput";
postgres=# C-d
(crash)

And confirmed that it doesn't happen with the fix.

I haven't sought other similar issues. I'm going to check it if they,
if any, can be fixed the same way.

FileClose calls pgstat_report_tempfile() via
BeforeShmemExit_Files. It is already registered after pgstat.
I added a call to assert_pgstat_initialized() to

All other pgstat functions seem to be called outside shmem_exit.

regards.

--
Kyotaro Horiguchi
NTT Open Source Software Center

Attachments:

replslot_use_shmem_hook_2.diff.txttext/plain; charset=us-asciiDownload
diff --git a/src/backend/postmaster/pgstat.c b/src/backend/postmaster/pgstat.c
index b7d0fbaefd..13762f82af 100644
--- a/src/backend/postmaster/pgstat.c
+++ b/src/backend/postmaster/pgstat.c
@@ -306,6 +306,8 @@ static bool pgstat_is_initialized = false;
 static bool pgstat_is_shutdown = false;
 #endif
 
+/* per-backend variable for assertion */
+bool pgstat_initialized PG_USED_FOR_ASSERTS_ONLY = false;
 
 /* ----------
  * Local function forward declarations
@@ -3036,6 +3038,7 @@ pgstat_initialize(void)
 
 	/* Set up a process-exit hook to clean up */
 	before_shmem_exit(pgstat_shutdown_hook, 0);
+	pgstat_initialized = true;
 
 #ifdef USE_ASSERT_CHECKING
 	pgstat_is_initialized = true;
diff --git a/src/backend/replication/slot.c b/src/backend/replication/slot.c
index 1c6c0c7ce2..b2c719d31e 100644
--- a/src/backend/replication/slot.c
+++ b/src/backend/replication/slot.c
@@ -46,6 +46,7 @@
 #include "pgstat.h"
 #include "replication/slot.h"
 #include "storage/fd.h"
+#include "storage/ipc.h"
 #include "storage/proc.h"
 #include "storage/procarray.h"
 #include "utils/builtins.h"
@@ -160,6 +161,33 @@ ReplicationSlotsShmemInit(void)
 	}
 }
 
+/*
+ * Exit hook to cleanup replication slots.
+ */
+static void
+ReplicationSlotShutdown(int code, Datum arg)
+{
+	/* Make sure active replication slots are released */
+	if (MyReplicationSlot != NULL)
+		ReplicationSlotRelease();
+
+	/* Also cleanup all the temporary slots. */
+	ReplicationSlotCleanup();
+}
+
+/*
+ * Initialize replication slot facility per backend.
+ */
+void
+ReplicationSlotInit(void)
+{
+	if (max_replication_slots < 1)
+		return;
+
+	assert_pgstat_initialized();	/* the callback requires pgstat */
+	before_shmem_exit(ReplicationSlotShutdown, (Datum) 0);
+}
+
 /*
  * Check whether the passed slot name is valid and report errors at elevel.
  *
diff --git a/src/backend/storage/file/fd.c b/src/backend/storage/file/fd.c
index f9cda6906d..8fbacdc86c 100644
--- a/src/backend/storage/file/fd.c
+++ b/src/backend/storage/file/fd.c
@@ -917,6 +917,7 @@ InitTemporaryFileAccess(void)
 	 * Register before-shmem-exit hook to ensure temp files are dropped while
 	 * we can still report stats.
 	 */
+	assert_pgstat_initialized();	/* the callback requires pgstat */
 	before_shmem_exit(BeforeShmemExit_Files, 0);
 
 #ifdef USE_ASSERT_CHECKING
diff --git a/src/backend/storage/lmgr/proc.c b/src/backend/storage/lmgr/proc.c
index b7d9da0aa9..b593ec8964 100644
--- a/src/backend/storage/lmgr/proc.c
+++ b/src/backend/storage/lmgr/proc.c
@@ -41,7 +41,6 @@
 #include "miscadmin.h"
 #include "pgstat.h"
 #include "postmaster/autovacuum.h"
-#include "replication/slot.h"
 #include "replication/syncrep.h"
 #include "replication/walsender.h"
 #include "storage/condition_variable.h"
@@ -847,13 +846,6 @@ ProcKill(int code, Datum arg)
 	/* Cancel any pending condition variable sleep, too */
 	ConditionVariableCancelSleep();
 
-	/* Make sure active replication slots are released */
-	if (MyReplicationSlot != NULL)
-		ReplicationSlotRelease();
-
-	/* Also cleanup all the temporary slots. */
-	ReplicationSlotCleanup();
-
 	/*
 	 * Detach from any lock group of which we are a member.  If the leader
 	 * exist before all other group members, its PGPROC will remain allocated
diff --git a/src/backend/utils/init/postinit.c b/src/backend/utils/init/postinit.c
index 78bc64671e..b7c1a400f5 100644
--- a/src/backend/utils/init/postinit.c
+++ b/src/backend/utils/init/postinit.c
@@ -40,6 +40,7 @@
 #include "pgstat.h"
 #include "postmaster/autovacuum.h"
 #include "postmaster/postmaster.h"
+#include "replication/slot.h"
 #include "replication/walsender.h"
 #include "storage/bufmgr.h"
 #include "storage/fd.h"
@@ -531,6 +532,12 @@ BaseInit(void)
 	 */
 	pgstat_initialize();
 
+	/*
+	 * Initialize replication slot. This must be after pgstat_initialize() so
+	 * that the cleanup happens before the shutdown of pgstat facility.
+	 */
+	ReplicationSlotInit();
+
 	/* Do local initialization of storage and buffer managers */
 	InitSync();
 	smgrinit();
diff --git a/src/include/pgstat.h b/src/include/pgstat.h
index bcd3588ea2..3727e4cd53 100644
--- a/src/include/pgstat.h
+++ b/src/include/pgstat.h
@@ -992,6 +992,15 @@ extern PgStat_Counter pgStatTransactionIdleTime;
  */
 extern SessionEndType pgStatSessionEndCause;
 
+/*
+ * Modules that require pgstat (at process exit) should install their
+ * before-shmem hook after pgstat. This variable is used to make sure of that
+ * prerequisite.
+ */
+extern bool pgstat_initialized;
+#define assert_pgstat_initialized() Assert (pgstat_initialized);
+
+
 /* ----------
  * Functions called from postmaster
  * ----------
diff --git a/src/include/replication/slot.h b/src/include/replication/slot.h
index 53d773ccff..124d107662 100644
--- a/src/include/replication/slot.h
+++ b/src/include/replication/slot.h
@@ -193,6 +193,9 @@ extern PGDLLIMPORT int max_replication_slots;
 extern Size ReplicationSlotsShmemSize(void);
 extern void ReplicationSlotsShmemInit(void);
 
+/* per-backend initialization */
+extern void ReplicationSlotInit(void);
+
 /* management of individual slots */
 extern void ReplicationSlotCreate(const char *name, bool db_specific,
 								  ReplicationSlotPersistency p, bool two_phase);