cannot drop replication slot if server is running in single-user mode
Hi,
I found that if server is running in single-user mode , there we can
create replication slot but cannot drop it .
backend> SELECT * FROM pg_create_physical_replication_slot('p');
2018-03-06 13:20:03.441 GMT [14869] LOG: statement: SELECT * FROM
pg_create_physical_replication_slot('p');
1: slot_name (typeid = 19, len = 64, typmod = -1, byval = f)
2: lsn (typeid = 3220, len = 8, typmod = -1, byval = t)
----
1: slot_name = "p" (typeid = 19, len = 64, typmod = -1, byval = f)
----
backend> select pg_drop_replication_slot('p');
2018-03-06 13:20:24.390 GMT [14869] LOG: statement: select
pg_drop_replication_slot('p');
1: pg_drop_replication_slot (typeid = 2278, len = 4, typmod =
-1, byval = t)
----
2018-03-06 13:20:24.391 GMT [14869] ERROR: epoll_ctl() failed: Bad file
descriptor
2018-03-06 13:20:24.391 GMT [14869] STATEMENT: select
pg_drop_replication_slot('p');
--
regards,tushar
EnterpriseDB https://www.enterprisedb.com/
The Enterprise PostgreSQL Company
On Tue, Mar 6, 2018 at 06:59:33PM +0530, tushar wrote:
Hi,
I found that if server is running in single-user mode , there we can create
replication slot but cannot drop it .backend> SELECT * FROM pg_create_physical_replication_slot('p');
2018-03-06 13:20:03.441 GMT [14869] LOG:� statement: SELECT * FROM
pg_create_physical_replication_slot('p');��� �1: slot_name��� (typeid = 19, len = 64, typmod = -1, byval = f)
��� �2: lsn��� (typeid = 3220, len = 8, typmod = -1, byval = t)
��� ----
��� �1: slot_name = "p"��� (typeid = 19, len = 64, typmod = -1, byval = f)
��� ----
backend> select pg_drop_replication_slot('p');
2018-03-06 13:20:24.390 GMT [14869] LOG:� statement: select
pg_drop_replication_slot('p');��� �1: pg_drop_replication_slot��� (typeid = 2278, len = 4, typmod = -1,
byval = t)
��� ----
2018-03-06 13:20:24.391 GMT [14869] ERROR:� epoll_ctl() failed: Bad file
descriptor
2018-03-06 13:20:24.391 GMT [14869] STATEMENT:� select
pg_drop_replication_slot('p');
I can confirm this bug exists in single-user mode.
--
Bruce Momjian <bruce@momjian.us> http://momjian.us
EnterpriseDB http://enterprisedb.com
+ As you are, so once was I. As I am, so you will be. +
+ Ancient Roman grave inscription +
On 2018-03-29 17:42:57 -0400, Bruce Momjian wrote:
On Tue, Mar 6, 2018 at 06:59:33PM +0530, tushar wrote:
Hi,
I found that if server is running in single-user mode , there we can create
replication slot but cannot drop it .backend> SELECT * FROM pg_create_physical_replication_slot('p');
2018-03-06 13:20:03.441 GMT [14869] LOG:� statement: SELECT * FROM
pg_create_physical_replication_slot('p');��� �1: slot_name��� (typeid = 19, len = 64, typmod = -1, byval = f)
��� �2: lsn��� (typeid = 3220, len = 8, typmod = -1, byval = t)
��� ----
��� �1: slot_name = "p"��� (typeid = 19, len = 64, typmod = -1, byval = f)
��� ----
backend> select pg_drop_replication_slot('p');
2018-03-06 13:20:24.390 GMT [14869] LOG:� statement: select
pg_drop_replication_slot('p');��� �1: pg_drop_replication_slot��� (typeid = 2278, len = 4, typmod = -1,
byval = t)
��� ----
2018-03-06 13:20:24.391 GMT [14869] ERROR:� epoll_ctl() failed: Bad file
descriptor
2018-03-06 13:20:24.391 GMT [14869] STATEMENT:� select
pg_drop_replication_slot('p');I can confirm this bug exists in single-user mode.
I'm not sure we need to do anything about this, personally. This seems
like a fairly rare thing to do in a mode that's definitely not intended
to be general purpose.
Greetings,
Andres Freund
On Thu, Mar 29, 2018 at 02:51:24PM -0700, Andres Freund wrote:
On 2018-03-29 17:42:57 -0400, Bruce Momjian wrote:
On Tue, Mar 6, 2018 at 06:59:33PM +0530, tushar wrote:
Hi,
I found that if server is running in single-user mode , there we can create
replication slot but cannot drop it .backend> SELECT * FROM pg_create_physical_replication_slot('p');
2018-03-06 13:20:03.441 GMT [14869] LOG:� statement: SELECT * FROM
pg_create_physical_replication_slot('p');��� �1: slot_name��� (typeid = 19, len = 64, typmod = -1, byval = f)
��� �2: lsn��� (typeid = 3220, len = 8, typmod = -1, byval = t)
��� ----
��� �1: slot_name = "p"��� (typeid = 19, len = 64, typmod = -1, byval = f)
��� ----
backend> select pg_drop_replication_slot('p');
2018-03-06 13:20:24.390 GMT [14869] LOG:� statement: select
pg_drop_replication_slot('p');��� �1: pg_drop_replication_slot��� (typeid = 2278, len = 4, typmod = -1,
byval = t)
��� ----
2018-03-06 13:20:24.391 GMT [14869] ERROR:� epoll_ctl() failed: Bad file
descriptor
2018-03-06 13:20:24.391 GMT [14869] STATEMENT:� select
pg_drop_replication_slot('p');I can confirm this bug exists in single-user mode.
I'm not sure we need to do anything about this, personally. This seems
like a fairly rare thing to do in a mode that's definitely not intended
to be general purpose.
I think the question is whether this is exposing a bug or is expected
behavior.
--
Bruce Momjian <bruce@momjian.us> http://momjian.us
EnterpriseDB http://enterprisedb.com
+ As you are, so once was I. As I am, so you will be. +
+ Ancient Roman grave inscription +
On 2018-03-29 21:07:36 -0400, Bruce Momjian wrote:
I think the question is whether this is exposing a bug or is expected
behavior.
It's unsurprising. Acquiring a slot uses a condition variable, which
uses latches to sleep. Latches don't work in single user mode. Boom.
Greetings,
Andres Freund
On Thu, Mar 29, 2018 at 06:09:28PM -0700, Andres Freund wrote:
On 2018-03-29 21:07:36 -0400, Bruce Momjian wrote:
I think the question is whether this is exposing a bug or is expected
behavior.It's unsurprising. Acquiring a slot uses a condition variable, which
uses latches to sleep. Latches don't work in single user mode. Boom.
OK, thanks for confirming.
--
Bruce Momjian <bruce@momjian.us> http://momjian.us
EnterpriseDB http://enterprisedb.com
+ As you are, so once was I. As I am, so you will be. +
+ Ancient Roman grave inscription +
On Thu, Mar 29, 2018 at 5:51 PM, Andres Freund <andres@anarazel.de> wrote:
2018-03-06 13:20:24.391 GMT [14869] ERROR: epoll_ctl() failed: Bad file
descriptorI can confirm this bug exists in single-user mode.
I'm not sure we need to do anything about this, personally. This seems
like a fairly rare thing to do in a mode that's definitely not intended
to be general purpose.
Mmmph. I don't really think it's possible to view a user-visible
EBADF as anything other than a coding error.
--
Robert Haas
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company
Robert Haas <robertmhaas@gmail.com> writes:
On Thu, Mar 29, 2018 at 5:51 PM, Andres Freund <andres@anarazel.de> wrote:
I'm not sure we need to do anything about this, personally. This seems
like a fairly rare thing to do in a mode that's definitely not intended
to be general purpose.
Mmmph. I don't really think it's possible to view a user-visible
EBADF as anything other than a coding error.
If we think this is worth spending any code at all on, what I'd suggest
is to reject replication-related commands at some early stage if not
IsUnderPostmaster.
regards, tom lane
Robert Haas wrote:
On Thu, Mar 29, 2018 at 5:51 PM, Andres Freund <andres@anarazel.de> wrote:
2018-03-06 13:20:24.391 GMT [14869] ERROR: epoll_ctl() failed: Bad file
descriptorI can confirm this bug exists in single-user mode.
I'm not sure we need to do anything about this, personally. This seems
like a fairly rare thing to do in a mode that's definitely not intended
to be general purpose.Mmmph. I don't really think it's possible to view a user-visible
EBADF as anything other than a coding error.
IMO the problem is not the user-visible EBADF -- the problem is that the
user might be attempting to clean up from some previous mistake by
removing a replication slot. Using single-user mode might be a strange
tool, but it's not completely unreasonable. Is it really all that
difficult to fix slot acquisition for it?
I agree with Tom that for any other operation we can just reject it
early if not under postmaster, but dropping a slot seems a special case.
--
�lvaro Herrera https://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services
Alvaro Herrera wrote:
Robert Haas wrote:
On Thu, Mar 29, 2018 at 5:51 PM, Andres Freund <andres@anarazel.de> wrote:
2018-03-06 13:20:24.391 GMT [14869] ERROR: epoll_ctl() failed: Bad file
descriptorI can confirm this bug exists in single-user mode.
I'm not sure we need to do anything about this, personally. This seems
like a fairly rare thing to do in a mode that's definitely not intended
to be general purpose.Mmmph. I don't really think it's possible to view a user-visible
EBADF as anything other than a coding error.IMO the problem is not the user-visible EBADF -- the problem is that the
user might be attempting to clean up from some previous mistake by
removing a replication slot. Using single-user mode might be a strange
tool, but it's not completely unreasonable. Is it really all that
difficult to fix slot acquisition for it?
Here's a patch (against pg10) to fix this problem.
--
�lvaro Herrera https://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services
Attachments:
0001-don-t-try-cond-variables-if-single-user-mode.patchtext/plain; charset=us-asciiDownload
From 59eb9253797776f21267d35b608582b5fd3ff67c Mon Sep 17 00:00:00 2001
From: Alvaro Herrera <alvherre@alvh.no-ip.org>
Date: Tue, 8 May 2018 11:48:56 -0300
Subject: [PATCH 1/2] don't try cond variables if single user mode
---
src/backend/replication/slot.c | 28 ++++++++++++++++++----------
1 file changed, 18 insertions(+), 10 deletions(-)
diff --git a/src/backend/replication/slot.c b/src/backend/replication/slot.c
index a8a16f55e9..a43f402214 100644
--- a/src/backend/replication/slot.c
+++ b/src/backend/replication/slot.c
@@ -351,20 +351,28 @@ retry:
if (s->in_use && strcmp(name, NameStr(s->data.name)) == 0)
{
/*
- * This is the slot we want. We don't know yet if it's active, so
- * get ready to sleep on it in case it is. (We may end up not
- * sleeping, but we don't want to do this while holding the
- * spinlock.)
+ * This is the slot we want; check if it's active under some other
+ * process. In single user mode, we don't need this check.
*/
- ConditionVariablePrepareToSleep(&s->active_cv);
+ if (IsUnderPostmaster)
+ {
+ /*
+ * Get ready to sleep on it in case it is active. (We may end
+ * up not sleeping, but we don't want to do this while holding
+ * the spinlock.)
+ */
+ ConditionVariablePrepareToSleep(&s->active_cv);
- SpinLockAcquire(&s->mutex);
+ SpinLockAcquire(&s->mutex);
- active_pid = s->active_pid;
- if (active_pid == 0)
- active_pid = s->active_pid = MyProcPid;
+ active_pid = s->active_pid;
+ if (active_pid == 0)
+ active_pid = s->active_pid = MyProcPid;
- SpinLockRelease(&s->mutex);
+ SpinLockRelease(&s->mutex);
+ }
+ else
+ active_pid = MyProcPid;
slot = s;
break;
--
2.11.0
0002-simplify-baroque-coding.patchtext/plain; charset=us-asciiDownload
From 008ec88995b2ae1d851033222bfbd8dfebd7a368 Mon Sep 17 00:00:00 2001
From: Alvaro Herrera <alvherre@alvh.no-ip.org>
Date: Tue, 8 May 2018 11:49:08 -0300
Subject: [PATCH 2/2] simplify baroque coding
---
src/backend/replication/slot.c | 34 +++++++++++-----------------------
1 file changed, 11 insertions(+), 23 deletions(-)
diff --git a/src/backend/replication/slot.c b/src/backend/replication/slot.c
index a43f402214..664bf2e3ad 100644
--- a/src/backend/replication/slot.c
+++ b/src/backend/replication/slot.c
@@ -99,7 +99,6 @@ ReplicationSlot *MyReplicationSlot = NULL;
int max_replication_slots = 0; /* the maximum number of replication
* slots */
-static void ReplicationSlotDropAcquired(void);
static void ReplicationSlotDropPtr(ReplicationSlot *slot);
/* internal persistency functions */
@@ -434,7 +433,8 @@ ReplicationSlotRelease(void)
* fail, all that may happen is an incomplete cleanup of the on-disk
* data.
*/
- ReplicationSlotDropAcquired();
+ ReplicationSlotDropPtr(MyReplicationSlot);
+ MyReplicationSlot = NULL;
}
/*
@@ -520,23 +520,10 @@ ReplicationSlotDrop(const char *name, bool nowait)
ReplicationSlotAcquire(name, nowait);
- ReplicationSlotDropAcquired();
-}
+ ReplicationSlotDropPtr(MyReplicationSlot);
-/*
- * Permanently drop the currently acquired replication slot.
- */
-static void
-ReplicationSlotDropAcquired(void)
-{
- ReplicationSlot *slot = MyReplicationSlot;
-
- Assert(MyReplicationSlot != NULL);
-
- /* slot isn't acquired anymore */
+ /* no longer my slot */
MyReplicationSlot = NULL;
-
- ReplicationSlotDropPtr(slot);
}
/*
@@ -923,7 +910,7 @@ restart:
if (s->data.database != dboid)
continue;
- /* acquire slot, so ReplicationSlotDropAcquired can be reused */
+ /* acquire slot */
SpinLockAcquire(&s->mutex);
/* can't change while ReplicationSlotControlLock is held */
slotname = NameStr(s->data.name);
@@ -949,16 +936,17 @@ restart:
slotname, active_pid)));
/*
- * To avoid duplicating ReplicationSlotDropAcquired() and to avoid
- * holding ReplicationSlotControlLock over filesystem operations,
- * release ReplicationSlotControlLock and use
- * ReplicationSlotDropAcquired.
+ * To avoid holding ReplicationSlotControlLock over filesystem
+ * operations, release ReplicationSlotControlLock while we drop
+ * the slot.
*
* As that means the set of slots could change, restart scan from the
* beginning each time we release the lock.
*/
LWLockRelease(ReplicationSlotControlLock);
- ReplicationSlotDropAcquired();
+ ReplicationSlotDropPtr(MyReplicationSlot);
+ /* no longer my slot */
+ MyReplicationSlot = NULL;
goto restart;
}
LWLockRelease(ReplicationSlotControlLock);
--
2.11.0