old_snapshot_threshold bottleneck on replica
Hi!
One of our customers stumble onto a significant performance degradation
while running multiple OLAP-like queries on a replica.
After some investigation, it became clear that the problem is in accessing
old_snapshot_threshold parameter.
Accessing old_snapshot_threshold parameter is guarded by mutex_threshold.
This is not a problem on primary
server, since we rarely call GetOldSnapshotThresholdTimestamp:
5028 void
5029 TestForOldSnapshot_impl(Snapshot snapshot, Relation relation)
5030 {
5031 ····if (RelationAllowsEarlyPruning(relation)
5032 ········&& (snapshot)->whenTaken < GetOldSnapshotThresholdTimestamp())
5033 ········ereport(ERROR,
5034 ················(errcode(ERRCODE_SNAPSHOT_TOO_OLD),
5035 ················ errmsg("snapshot too old")));
But in case of a replica, we have to call GetOldSnapshotThresholdTimestamp
much often. So, this become a
bottleneck. The customer solve this issue by setting old_snapshot_threshold
to 0. But, I think, we can
do something about it.
Some more investigation:
-- On primary --
$ ./bin/psql postgres -c "create database benchmark"
CREATE DATABASE
$ ./bin/pgbench -i -Uorlov -s300 benchmark
dropping old tables...
NOTICE: table "pgbench_accounts" does not exist, skipping
...
creating tables...
generating data (client-side)...
30000000 of 30000000 tuples (100%) done (elapsed 142.37 s, remaining 0.00 s)
vacuuming...
creating primary keys...
done in 177.67 s (drop tables 0.00 s, create tables 0.01 s, client-side
generate 144.45 s, vacuum 0.59 s, primary keys 32.61 s).
-- On secondary --
$ touch 1.sql
$ vim 1.sql
$ cat 1.sql
\set bid random(1, 300)
BEGIN;
SELECT sum(aid) FROM pgbench_accounts where bid = :bid GROUP BY bid;
END;
$ ./bin/pgbench -f 1.sql -p5433 -Uorlov -j10 -c100 -T720 -P1 -n benchmark
pgbench (16devel)
progress: 1.0 s, 0.0 tps, lat 0.000 ms stddev 0.000, 0 failed
...
progress: 20.0 s, 0.0 tps, lat 0.000 ms stddev 0.000, 0 failed
$ perf record -F 99 -a -g --call-graph=dwarf sleep 5
$ perf script --header --fields comm,pid,tid,time,event,ip,sym,dso > file
$ grep s_lock file | wc -l
3486
My proposal is to use atomic for threshold_timestamp and threshold_xid. PFA
0001 patch.
With patch 0001 we got:
$ grep s_lock file2 | wc -l
8
Maybe, we shall go farther and remove mutex_threshold here? This will lead
to inconsistency of
threshold_timestamp and threshold_xid, but is this really a problem?
Thoughts?
--
Best regards,
Maxim Orlov.
Attachments:
0001-PGPRO-7624-use-atomic-old_snapshot_threshold.patchapplication/octet-stream; name=0001-PGPRO-7624-use-atomic-old_snapshot_threshold.patchDownload
From 98fa70754a8c68cdfb20f27bd7f6e6e3e5f8ed92 Mon Sep 17 00:00:00 2001
From: Maxim Orlov <orlovmg@gmail.com>
Date: Wed, 11 Jan 2023 18:07:10 +0300
Subject: [PATCH] [PGPRO-7624] use atomic old_snapshot_threshold
Using spinlock to access old_snapshot_threshold lead to the bottleneck on
replica, since GetOldSnapshotThresholdTimestamp is called too often. So, switch
to an atomic values.
tags: commitfest_hotfix
---
src/backend/utils/time/snapmgr.c | 31 +++++++++++++++++--------------
src/include/utils/old_snapshot.h | 5 +++--
2 files changed, 20 insertions(+), 16 deletions(-)
diff --git a/src/backend/utils/time/snapmgr.c b/src/backend/utils/time/snapmgr.c
index 7d11ae34781..0902e29f224 100644
--- a/src/backend/utils/time/snapmgr.c
+++ b/src/backend/utils/time/snapmgr.c
@@ -230,8 +230,8 @@ SnapMgrInit(void)
oldSnapshotControl->latest_xmin = InvalidTransactionId;
oldSnapshotControl->next_map_update = 0;
SpinLockInit(&oldSnapshotControl->mutex_threshold);
- oldSnapshotControl->threshold_timestamp = 0;
- oldSnapshotControl->threshold_xid = InvalidTransactionId;
+ pg_atomic_init_u64(&oldSnapshotControl->threshold_timestamp, 0);
+ pg_atomic_init_u32(&oldSnapshotControl->threshold_xid, InvalidTransactionId);
oldSnapshotControl->head_offset = 0;
oldSnapshotControl->head_timestamp = 0;
oldSnapshotControl->count_used = 0;
@@ -1706,9 +1706,7 @@ GetOldSnapshotThresholdTimestamp(void)
{
TimestampTz threshold_timestamp;
- SpinLockAcquire(&oldSnapshotControl->mutex_threshold);
- threshold_timestamp = oldSnapshotControl->threshold_timestamp;
- SpinLockRelease(&oldSnapshotControl->mutex_threshold);
+ threshold_timestamp = pg_atomic_read_u64(&oldSnapshotControl->threshold_timestamp);
return threshold_timestamp;
}
@@ -1716,11 +1714,18 @@ GetOldSnapshotThresholdTimestamp(void)
void
SetOldSnapshotThresholdTimestamp(TimestampTz ts, TransactionId xlimit)
{
+ TimestampTz threshold_timestamp;
+ TransactionId threshold_xid;
+
SpinLockAcquire(&oldSnapshotControl->mutex_threshold);
- Assert(oldSnapshotControl->threshold_timestamp <= ts);
- Assert(TransactionIdPrecedesOrEquals(oldSnapshotControl->threshold_xid, xlimit));
- oldSnapshotControl->threshold_timestamp = ts;
- oldSnapshotControl->threshold_xid = xlimit;
+ threshold_timestamp = pg_atomic_read_u64(&oldSnapshotControl->threshold_timestamp);
+ threshold_xid = pg_atomic_read_u32(&oldSnapshotControl->threshold_xid);
+
+ Assert(threshold_timestamp <= ts);
+ Assert(TransactionIdPrecedesOrEquals(threshold_xid, xlimit));
+
+ pg_atomic_write_u64(&oldSnapshotControl->threshold_timestamp, ts);
+ pg_atomic_write_u32(&oldSnapshotControl->threshold_xid, xlimit);
SpinLockRelease(&oldSnapshotControl->mutex_threshold);
}
@@ -1739,9 +1744,7 @@ SnapshotTooOldMagicForTest(void)
ts -= 5 * USECS_PER_SEC;
- SpinLockAcquire(&oldSnapshotControl->mutex_threshold);
- oldSnapshotControl->threshold_timestamp = ts;
- SpinLockRelease(&oldSnapshotControl->mutex_threshold);
+ pg_atomic_write_u64(&oldSnapshotControl->threshold_timestamp, ts);
}
/*
@@ -1846,8 +1849,8 @@ TransactionIdLimitedForOldSnapshots(TransactionId recentXmin,
/* Check for fast exit without LW locking. */
SpinLockAcquire(&oldSnapshotControl->mutex_threshold);
- threshold_timestamp = oldSnapshotControl->threshold_timestamp;
- threshold_xid = oldSnapshotControl->threshold_xid;
+ threshold_timestamp = pg_atomic_read_u64(&oldSnapshotControl->threshold_timestamp);
+ threshold_xid = pg_atomic_read_u32(&oldSnapshotControl->threshold_xid);
SpinLockRelease(&oldSnapshotControl->mutex_threshold);
if (ts == threshold_timestamp)
diff --git a/src/include/utils/old_snapshot.h b/src/include/utils/old_snapshot.h
index f1978a28e1c..3dd31d721c7 100644
--- a/src/include/utils/old_snapshot.h
+++ b/src/include/utils/old_snapshot.h
@@ -16,6 +16,7 @@
#define OLD_SNAPSHOT_H
#include "datatype/timestamp.h"
+#include "port/atomics.h"
#include "storage/s_lock.h"
/*
@@ -33,8 +34,8 @@ typedef struct OldSnapshotControlData
TransactionId latest_xmin; /* latest snapshot xmin */
TimestampTz next_map_update; /* latest snapshot valid up to */
slock_t mutex_threshold; /* protect threshold fields */
- TimestampTz threshold_timestamp; /* earlier snapshot is old */
- TransactionId threshold_xid; /* earlier xid may be gone */
+ pg_atomic_uint64 threshold_timestamp; /* earlier snapshot is old */
+ pg_atomic_uint32 threshold_xid; /* earlier xid may be gone */
/*
* Keep one xid per minute for old snapshot error handling.
--
2.34.1
Hi, Maxim!
On Mon, 23 Jan 2023 at 18:40, Maxim Orlov <orlovmg@gmail.com> wrote:
Hi!
One of our customers stumble onto a significant performance degradation while running multiple OLAP-like queries on a replica.
After some investigation, it became clear that the problem is in accessing old_snapshot_threshold parameter.Accessing old_snapshot_threshold parameter is guarded by mutex_threshold. This is not a problem on primary
server, since we rarely call GetOldSnapshotThresholdTimestamp:5028 void
5029 TestForOldSnapshot_impl(Snapshot snapshot, Relation relation)
5030 {
5031 ····if (RelationAllowsEarlyPruning(relation)
5032 ········&& (snapshot)->whenTaken < GetOldSnapshotThresholdTimestamp())
5033 ········ereport(ERROR,
5034 ················(errcode(ERRCODE_SNAPSHOT_TOO_OLD),
5035 ················ errmsg("snapshot too old")));But in case of a replica, we have to call GetOldSnapshotThresholdTimestamp much often. So, this become a
bottleneck. The customer solve this issue by setting old_snapshot_threshold to 0. But, I think, we can
do something about it.Some more investigation:
-- On primary --
$ ./bin/psql postgres -c "create database benchmark"
CREATE DATABASE
$ ./bin/pgbench -i -Uorlov -s300 benchmark
dropping old tables...
NOTICE: table "pgbench_accounts" does not exist, skipping
...
creating tables...
generating data (client-side)...
30000000 of 30000000 tuples (100%) done (elapsed 142.37 s, remaining 0.00 s)
vacuuming...
creating primary keys...
done in 177.67 s (drop tables 0.00 s, create tables 0.01 s, client-side generate 144.45 s, vacuum 0.59 s, primary keys 32.61 s).-- On secondary --
$ touch 1.sql
$ vim 1.sql
$ cat 1.sql
\set bid random(1, 300)
BEGIN;
SELECT sum(aid) FROM pgbench_accounts where bid = :bid GROUP BY bid;
END;
$ ./bin/pgbench -f 1.sql -p5433 -Uorlov -j10 -c100 -T720 -P1 -n benchmark
pgbench (16devel)
progress: 1.0 s, 0.0 tps, lat 0.000 ms stddev 0.000, 0 failed
...
progress: 20.0 s, 0.0 tps, lat 0.000 ms stddev 0.000, 0 failed$ perf record -F 99 -a -g --call-graph=dwarf sleep 5
$ perf script --header --fields comm,pid,tid,time,event,ip,sym,dso > file
$ grep s_lock file | wc -l3486
My proposal is to use atomic for threshold_timestamp and threshold_xid. PFA 0001 patch.
With patch 0001 we got:$ grep s_lock file2 | wc -l
8Maybe, we shall go farther and remove mutex_threshold here? This will lead to inconsistency of
threshold_timestamp and threshold_xid, but is this really a problem?Thoughts?
I think optimizing locking and switching to atomics wherever it
improves performance is a good direction. If performance improvement
could be demonstrated in a more direct way it would be a good argument
to commit the improvement. Personally I like TPS plots like in [1]/messages/by-id/CALT9ZEHSX1Hpz5xjDA62yHAHtpinkA6hg8Zt-odyxqppmKbQFA@mail.gmail.com.
[1]: /messages/by-id/CALT9ZEHSX1Hpz5xjDA62yHAHtpinkA6hg8Zt-odyxqppmKbQFA@mail.gmail.com
Kind regards,
Pavel Borisov,
Supabase
On Mon, Jan 23, 2023 at 9:40 AM Maxim Orlov <orlovmg@gmail.com> wrote:
One of our customers stumble onto a significant performance degradation while running multiple OLAP-like queries on a replica.
After some investigation, it became clear that the problem is in accessing old_snapshot_threshold parameter.
It has been suggested that we remove that feature entirely.
My proposal is to use atomic for threshold_timestamp and threshold_xid. PFA 0001 patch.
This patch changes threshold_timestamp and threshold_xid to use
atomics, but it does not remove mutex_threshold which, according to a
quick glance at the comments, protects exactly those two fields. So,
either:
(1) that mutex also protects something else and the existing comment
is wrong, or
(2) the mutex should have been removed but the patch neglected to do so, or
(3) the mutex is still needed for some reason, in which case either
(3a) the patch isn't actually safe or (3b) the patch needs comments to
explain what the new synchronization model is.
--
Robert Haas
EDB: http://www.enterprisedb.com
On Tue, 24 Jan 2023 at 18:46, Robert Haas <robertmhaas@gmail.com> wrote:
(1) that mutex also protects something else and the existing comment
is wrong, or(2) the mutex should have been removed but the patch neglected to do so, or
(3) the mutex is still needed for some reason, in which case either
(3a) the patch isn't actually safe or (3b) the patch needs comments to
explain what the new synchronization model is.Yes, you're absolutely right. And my first intention was to remove this
mutex completely.
But in TransactionIdLimitedForOldSnapshots these variable is using
conjointly. So, I'm not
sure, is it completely safe to remove mutex. Actually, removing mutex and
switch to atomics
was my first choice. I've run all the tests and no problems were found.
But, at that time I choose
to be more conservative. Anyway, here is the new variant.
--
Best regards,
Maxim Orlov.
Attachments:
v2-0001-Use-atomic-old_snapshot_threshold.patchapplication/octet-stream; name=v2-0001-Use-atomic-old_snapshot_threshold.patchDownload
From 6bdd3566e9252c4296b0c663786eba6e5bf95c12 Mon Sep 17 00:00:00 2001
From: Maxim Orlov <orlovmg@gmail.com>
Date: Wed, 11 Jan 2023 18:07:10 +0300
Subject: [PATCH v2] Use atomic old_snapshot_threshold
Using spinlock to access old_snapshot_threshold lead to the bottleneck on
replica, since GetOldSnapshotThresholdTimestamp is called too often. So, switch
to an atomic values.
---
src/backend/utils/time/snapmgr.c | 36 +++++++++++++++-----------------
src/include/utils/old_snapshot.h | 6 +++---
2 files changed, 20 insertions(+), 22 deletions(-)
diff --git a/src/backend/utils/time/snapmgr.c b/src/backend/utils/time/snapmgr.c
index 7d11ae34781..77f32ae468f 100644
--- a/src/backend/utils/time/snapmgr.c
+++ b/src/backend/utils/time/snapmgr.c
@@ -229,9 +229,8 @@ SnapMgrInit(void)
SpinLockInit(&oldSnapshotControl->mutex_latest_xmin);
oldSnapshotControl->latest_xmin = InvalidTransactionId;
oldSnapshotControl->next_map_update = 0;
- SpinLockInit(&oldSnapshotControl->mutex_threshold);
- oldSnapshotControl->threshold_timestamp = 0;
- oldSnapshotControl->threshold_xid = InvalidTransactionId;
+ pg_atomic_init_u64(&oldSnapshotControl->threshold_timestamp, 0);
+ pg_atomic_init_u32(&oldSnapshotControl->threshold_xid, InvalidTransactionId);
oldSnapshotControl->head_offset = 0;
oldSnapshotControl->head_timestamp = 0;
oldSnapshotControl->count_used = 0;
@@ -1706,9 +1705,7 @@ GetOldSnapshotThresholdTimestamp(void)
{
TimestampTz threshold_timestamp;
- SpinLockAcquire(&oldSnapshotControl->mutex_threshold);
- threshold_timestamp = oldSnapshotControl->threshold_timestamp;
- SpinLockRelease(&oldSnapshotControl->mutex_threshold);
+ threshold_timestamp = pg_atomic_read_u64(&oldSnapshotControl->threshold_timestamp);
return threshold_timestamp;
}
@@ -1716,12 +1713,17 @@ GetOldSnapshotThresholdTimestamp(void)
void
SetOldSnapshotThresholdTimestamp(TimestampTz ts, TransactionId xlimit)
{
- SpinLockAcquire(&oldSnapshotControl->mutex_threshold);
- Assert(oldSnapshotControl->threshold_timestamp <= ts);
- Assert(TransactionIdPrecedesOrEquals(oldSnapshotControl->threshold_xid, xlimit));
- oldSnapshotControl->threshold_timestamp = ts;
- oldSnapshotControl->threshold_xid = xlimit;
- SpinLockRelease(&oldSnapshotControl->mutex_threshold);
+ TimestampTz threshold_timestamp;
+ TransactionId threshold_xid;
+
+ threshold_timestamp = pg_atomic_read_u64(&oldSnapshotControl->threshold_timestamp);
+ threshold_xid = pg_atomic_read_u32(&oldSnapshotControl->threshold_xid);
+
+ Assert(threshold_timestamp <= ts);
+ Assert(TransactionIdPrecedesOrEquals(threshold_xid, xlimit));
+
+ pg_atomic_write_u64(&oldSnapshotControl->threshold_timestamp, ts);
+ pg_atomic_write_u32(&oldSnapshotControl->threshold_xid, xlimit);
}
/*
@@ -1739,9 +1741,7 @@ SnapshotTooOldMagicForTest(void)
ts -= 5 * USECS_PER_SEC;
- SpinLockAcquire(&oldSnapshotControl->mutex_threshold);
- oldSnapshotControl->threshold_timestamp = ts;
- SpinLockRelease(&oldSnapshotControl->mutex_threshold);
+ pg_atomic_write_u64(&oldSnapshotControl->threshold_timestamp, ts);
}
/*
@@ -1845,10 +1845,8 @@ TransactionIdLimitedForOldSnapshots(TransactionId recentXmin,
- (old_snapshot_threshold * USECS_PER_MINUTE);
/* Check for fast exit without LW locking. */
- SpinLockAcquire(&oldSnapshotControl->mutex_threshold);
- threshold_timestamp = oldSnapshotControl->threshold_timestamp;
- threshold_xid = oldSnapshotControl->threshold_xid;
- SpinLockRelease(&oldSnapshotControl->mutex_threshold);
+ threshold_timestamp = pg_atomic_read_u64(&oldSnapshotControl->threshold_timestamp);
+ threshold_xid = pg_atomic_read_u32(&oldSnapshotControl->threshold_xid);
if (ts == threshold_timestamp)
{
diff --git a/src/include/utils/old_snapshot.h b/src/include/utils/old_snapshot.h
index f1978a28e1c..979189a143f 100644
--- a/src/include/utils/old_snapshot.h
+++ b/src/include/utils/old_snapshot.h
@@ -16,6 +16,7 @@
#define OLD_SNAPSHOT_H
#include "datatype/timestamp.h"
+#include "port/atomics.h"
#include "storage/s_lock.h"
/*
@@ -32,9 +33,8 @@ typedef struct OldSnapshotControlData
slock_t mutex_latest_xmin; /* protect latest_xmin and next_map_update */
TransactionId latest_xmin; /* latest snapshot xmin */
TimestampTz next_map_update; /* latest snapshot valid up to */
- slock_t mutex_threshold; /* protect threshold fields */
- TimestampTz threshold_timestamp; /* earlier snapshot is old */
- TransactionId threshold_xid; /* earlier xid may be gone */
+ pg_atomic_uint64 threshold_timestamp; /* earlier snapshot is old */
+ pg_atomic_uint32 threshold_xid; /* earlier xid may be gone */
/*
* Keep one xid per minute for old snapshot error handling.
--
2.38.1
On Wed, Jan 25, 2023 at 3:52 AM Maxim Orlov <orlovmg@gmail.com> wrote:
But in TransactionIdLimitedForOldSnapshots these variable is using conjointly. So, I'm not
sure, is it completely safe to remove mutex.
Well, that's something we - and ideally you, as the patch author -
need to analyze and figure out. We can't just take a shot and hope for
the best.
Actually, removing mutex and switch to atomics
was my first choice. I've run all the tests and no problems were found
Unfortunately, that kind of testing is not very likely to find a
subtle synchronization problem. That's why a theoretical analysis is
so important.
--
Robert Haas
EDB: http://www.enterprisedb.com
On Wed, 25 Jan 2023 at 16:52, Robert Haas <robertmhaas@gmail.com> wrote:
On Wed, Jan 25, 2023 at 3:52 AM Maxim Orlov <orlovmg@gmail.com> wrote:
Well, that's something we - and ideally you, as the patch author -
need to analyze and figure out. We can't just take a shot and hope for
the best.
I thank you for your advices. I've dived deeper into the problem and I
think v2 patch is wrong.
Accessing threshold_timestamp and threshold_xid in
TransactionIdLimitedForOldSnapshots
without lock would lead to an improper xlimit calculation.
So, my choice would be (3b). My goal is to optimize access to the
threshold_timestamp to avoid
multiple spinlock acquisition on read. In the same time, simultaneous
access to these variable
(threshold_timestamp and threshold_xid) should be protected with spinlock.
I remove atomic for threshold_xid and add comments on mutex_threshold. PFA,
v3. I
--
Best regards,
Maxim Orlov.
Attachments:
v3-0001-Use-atomic-old_snapshot_threshold.patchapplication/octet-stream; name=v3-0001-Use-atomic-old_snapshot_threshold.patchDownload
From 7de7eb8a707117a692f3c6489b2b13167bd63516 Mon Sep 17 00:00:00 2001
From: Maxim Orlov <orlovmg@gmail.com>
Date: Wed, 11 Jan 2023 18:07:10 +0300
Subject: [PATCH v3] Use atomic old_snapshot_threshold
Using spinlock to access old_snapshot_threshold lead to the bottleneck on
replica, since GetOldSnapshotThresholdTimestamp is called too often. So, switch
to an atomic value.
---
src/backend/utils/time/snapmgr.c | 16 ++++++----------
src/include/utils/old_snapshot.h | 18 +++++++++++++++---
2 files changed, 21 insertions(+), 13 deletions(-)
diff --git a/src/backend/utils/time/snapmgr.c b/src/backend/utils/time/snapmgr.c
index 7d11ae3478..e1dc8b0feb 100644
--- a/src/backend/utils/time/snapmgr.c
+++ b/src/backend/utils/time/snapmgr.c
@@ -230,7 +230,7 @@ SnapMgrInit(void)
oldSnapshotControl->latest_xmin = InvalidTransactionId;
oldSnapshotControl->next_map_update = 0;
SpinLockInit(&oldSnapshotControl->mutex_threshold);
- oldSnapshotControl->threshold_timestamp = 0;
+ pg_atomic_init_u64(&oldSnapshotControl->threshold_timestamp, 0);
oldSnapshotControl->threshold_xid = InvalidTransactionId;
oldSnapshotControl->head_offset = 0;
oldSnapshotControl->head_timestamp = 0;
@@ -1706,9 +1706,7 @@ GetOldSnapshotThresholdTimestamp(void)
{
TimestampTz threshold_timestamp;
- SpinLockAcquire(&oldSnapshotControl->mutex_threshold);
- threshold_timestamp = oldSnapshotControl->threshold_timestamp;
- SpinLockRelease(&oldSnapshotControl->mutex_threshold);
+ threshold_timestamp = pg_atomic_read_u64(&oldSnapshotControl->threshold_timestamp);
return threshold_timestamp;
}
@@ -1717,9 +1715,9 @@ void
SetOldSnapshotThresholdTimestamp(TimestampTz ts, TransactionId xlimit)
{
SpinLockAcquire(&oldSnapshotControl->mutex_threshold);
- Assert(oldSnapshotControl->threshold_timestamp <= ts);
+ Assert(pg_atomic_read_u64(&oldSnapshotControl->threshold_timestamp) <= ts);
Assert(TransactionIdPrecedesOrEquals(oldSnapshotControl->threshold_xid, xlimit));
- oldSnapshotControl->threshold_timestamp = ts;
+ pg_atomic_write_u64(&oldSnapshotControl->threshold_timestamp, ts);
oldSnapshotControl->threshold_xid = xlimit;
SpinLockRelease(&oldSnapshotControl->mutex_threshold);
}
@@ -1739,9 +1737,7 @@ SnapshotTooOldMagicForTest(void)
ts -= 5 * USECS_PER_SEC;
- SpinLockAcquire(&oldSnapshotControl->mutex_threshold);
- oldSnapshotControl->threshold_timestamp = ts;
- SpinLockRelease(&oldSnapshotControl->mutex_threshold);
+ pg_atomic_write_u64(&oldSnapshotControl->threshold_timestamp, ts);
}
/*
@@ -1846,7 +1842,7 @@ TransactionIdLimitedForOldSnapshots(TransactionId recentXmin,
/* Check for fast exit without LW locking. */
SpinLockAcquire(&oldSnapshotControl->mutex_threshold);
- threshold_timestamp = oldSnapshotControl->threshold_timestamp;
+ threshold_timestamp = pg_atomic_read_u64(&oldSnapshotControl->threshold_timestamp);
threshold_xid = oldSnapshotControl->threshold_xid;
SpinLockRelease(&oldSnapshotControl->mutex_threshold);
diff --git a/src/include/utils/old_snapshot.h b/src/include/utils/old_snapshot.h
index f1978a28e1..7f26006fdd 100644
--- a/src/include/utils/old_snapshot.h
+++ b/src/include/utils/old_snapshot.h
@@ -16,6 +16,7 @@
#define OLD_SNAPSHOT_H
#include "datatype/timestamp.h"
+#include "port/atomics.h"
#include "storage/s_lock.h"
/*
@@ -32,9 +33,20 @@ typedef struct OldSnapshotControlData
slock_t mutex_latest_xmin; /* protect latest_xmin and next_map_update */
TransactionId latest_xmin; /* latest snapshot xmin */
TimestampTz next_map_update; /* latest snapshot valid up to */
- slock_t mutex_threshold; /* protect threshold fields */
- TimestampTz threshold_timestamp; /* earlier snapshot is old */
- TransactionId threshold_xid; /* earlier xid may be gone */
+ /*
+ * Use the spinlock to protect:
+ * - the simultaneous access to the threshold_timestamp and the
+ * threshold_xid;
+ * - access to the threshold_xid.
+ *
+ * NOTE: if threshold fields should be accessed simultaneously, we must use
+ * spinlock to protect them. If we only read threshold_timestamp, we may
+ * use atomicity, thus avoid using spinlock. This become critical in some
+ * workloads.
+ */
+ slock_t mutex_threshold;
+ pg_atomic_uint64 threshold_timestamp; /* earlier snapshot is old */
+ TransactionId threshold_xid; /* earlier xid may be gone */
/*
* Keep one xid per minute for old snapshot error handling.
--
2.38.1
On Fri, Jan 27, 2023 at 9:30 AM Maxim Orlov <orlovmg@gmail.com> wrote:
I thank you for your advices. I've dived deeper into the problem and I think v2 patch is wrong.
Cool!
Accessing threshold_timestamp and threshold_xid in TransactionIdLimitedForOldSnapshots
without lock would lead to an improper xlimit calculation.
That would be a bummer.
So, my choice would be (3b). My goal is to optimize access to the threshold_timestamp to avoid
multiple spinlock acquisition on read. In the same time, simultaneous access to these variable
(threshold_timestamp and threshold_xid) should be protected with spinlock.I remove atomic for threshold_xid and add comments on mutex_threshold. PFA, v3. I
Interesting, but it's still not entirely clear to me from reading the
comments why we should think that this is safe.
--
Robert Haas
EDB: http://www.enterprisedb.com
On Fri, 27 Jan 2023 at 18:18, Robert Haas <robertmhaas@gmail.com> wrote:
Interesting, but it's still not entirely clear to me from reading the
comments why we should think that this is safe.
In overall, I think this is safe, because we do not change algorithm here.
More specific, threshold_timestamp have only used in a few cases:
1). To get the latest value by calling GetOldSnapshotThresholdTimestamp.
This will work, since we only change the sync type here from the spinlock
to an atomic.
2). In TransactionIdLimitedForOldSnapshots, but here no changes in the
behaviour will be done. Sync model will be the save as before the patch.
3). In SnapshotTooOldMagicForTest, which is a stub to make
old_snapshot_threshold tests appear "working". But no coherence with the
threshold_xid here.
So, we have a two use cases for the threshold_timestamp:
a). When the threshold_timestamp is used in conjunction with the
threshold_xid. We must use spinlock to sync.
b). When the threshold_timestamp is used without conjunction with the
threshold_xid. In this case, we use atomic values.
--
Best regards,
Maxim Orlov.
Hi,
On 2023-01-24 10:46:28 -0500, Robert Haas wrote:
On Mon, Jan 23, 2023 at 9:40 AM Maxim Orlov <orlovmg@gmail.com> wrote:
One of our customers stumble onto a significant performance degradation while running multiple OLAP-like queries on a replica.
After some investigation, it became clear that the problem is in accessing old_snapshot_threshold parameter.It has been suggested that we remove that feature entirely.
Indeed. There's a lot of things wrong with it. We have reproducers for
creating wrong query results. Nobody has shown interest in fixing the
problems, for several years by now. It costs users that *do not* use the
feature performance (*).
I think we're doing our users a disservice by claiming to have this feature.
I don't think a lot of the existing code would survive if we were to create a
newer version, more maintainable / reliable, version of the feature.
Greetings,
Andres Freund
(*) E.g. TestForOldSnapshot() is called in a good number of places, and emits
quite a bit of code. It's not executed, but the emitted code is large
enough to lead to worse code being generated.
On Tue, Feb 14, 2023 at 9:45 AM Andres Freund <andres@anarazel.de> wrote:
On 2023-01-24 10:46:28 -0500, Robert Haas wrote:
On Mon, Jan 23, 2023 at 9:40 AM Maxim Orlov <orlovmg@gmail.com> wrote:
One of our customers stumble onto a significant performance degradation while running multiple OLAP-like queries on a replica.
After some investigation, it became clear that the problem is in accessing old_snapshot_threshold parameter.It has been suggested that we remove that feature entirely.
Indeed. There's a lot of things wrong with it. We have reproducers for
creating wrong query results. Nobody has shown interest in fixing the
problems, for several years by now. It costs users that *do not* use the
feature performance (*).I think we're doing our users a disservice by claiming to have this feature.
I don't think a lot of the existing code would survive if we were to create a
newer version, more maintainable / reliable, version of the feature.
I raised this at the recent developer meeting and the assembled
hackers agreed. Does anyone think we *shouldn't* drop the feature? I
volunteered to write a removal patch for v17, so here's a first run
through to find all the traces of this feature. In this first go I
removed everything I could think of, but we might want to keep some
vestiges. I guess we might want to keep the registered error
class/code? Should we invent a place where we keep stuff like #define
TestForOldSnapshot(...) expanding to nothing for some amount of time,
for extensions? I dunno, I bet extensions doing stuff that
sophisticated already have a bunch of version tests anyway. I suppose
keeping the GUC wouldn't really be helpful (if you're using it, you
probably want to know that it isn't available anymore and think about
the implications for your application).
Attachments:
0001-Remove-old_snapshot_threshold.patchtext/x-patch; charset=US-ASCII; name=0001-Remove-old_snapshot_threshold.patchDownload
From 3e2b3f3b20ce83737f3421ab3c7794853724266d Mon Sep 17 00:00:00 2001
From: Thomas Munro <thomas.munro@gmail.com>
Date: Wed, 14 Jun 2023 13:41:37 +1200
Subject: [PATCH] Remove old_snapshot_threshold.
Remove the old_snapshot_threshold feature, better known for its error
"snapshot too old". Unfortunately it had a number of problems, and we
agreed to remove it. There is no doubt that it was useful, and
someone might propose a new implementation in the future.
XXX draft, may contain nuts
Discussion: https://postgr.es/m/CACG%3DezYV%2BEvO135fLRdVn-ZusfVsTY6cH1OZqWtezuEYH6ciQA%40mail.gmail.com
---
contrib/Makefile | 1 -
contrib/bloom/blscan.c | 1 -
contrib/meson.build | 1 -
contrib/old_snapshot/Makefile | 21 -
contrib/old_snapshot/meson.build | 23 -
contrib/old_snapshot/old_snapshot--1.0.sql | 14 -
contrib/old_snapshot/old_snapshot.control | 5 -
contrib/old_snapshot/time_mapping.c | 142 ------
doc/src/sgml/config.sgml | 69 ---
doc/src/sgml/contrib.sgml | 1 -
doc/src/sgml/filelist.sgml | 1 -
doc/src/sgml/monitoring.sgml | 4 -
doc/src/sgml/oldsnapshot.sgml | 33 --
src/backend/access/brin/brin_revmap.c | 7 -
src/backend/access/gin/ginbtree.c | 2 -
src/backend/access/gin/ginget.c | 4 -
src/backend/access/gist/gistget.c | 1 -
src/backend/access/hash/hashsearch.c | 6 -
src/backend/access/heap/heapam.c | 9 -
src/backend/access/heap/pruneheap.c | 120 +----
src/backend/access/heap/vacuumlazy.c | 5 +-
src/backend/access/nbtree/nbtsearch.c | 9 -
src/backend/access/spgist/spgscan.c | 1 -
src/backend/catalog/index.c | 20 +-
src/backend/commands/vacuum.c | 19 -
src/backend/storage/buffer/bufmgr.c | 17 -
src/backend/storage/ipc/ipci.c | 2 -
src/backend/storage/ipc/procarray.c | 36 +-
src/backend/storage/lmgr/lwlocknames.txt | 2 +-
src/backend/utils/errcodes.txt | 4 -
src/backend/utils/misc/guc_tables.c | 11 -
src/backend/utils/misc/postgresql.conf.sample | 2 -
src/backend/utils/time/snapmgr.c | 468 ------------------
src/include/access/heapam.h | 2 -
src/include/storage/bufmgr.h | 36 --
src/include/utils/old_snapshot.h | 75 ---
src/include/utils/snapmgr.h | 49 --
src/test/modules/Makefile | 1 -
src/test/modules/meson.build | 1 -
src/test/modules/snapshot_too_old/.gitignore | 3 -
src/test/modules/snapshot_too_old/Makefile | 28 --
.../expected/sto_using_cursor.out | 19 -
.../expected/sto_using_hash_index.out | 19 -
.../expected/sto_using_select.out | 18 -
src/test/modules/snapshot_too_old/meson.build | 19 -
.../specs/sto_using_cursor.spec | 38 --
.../specs/sto_using_hash_index.spec | 31 --
.../specs/sto_using_select.spec | 37 --
src/test/modules/snapshot_too_old/sto.conf | 2 -
src/tools/pgindent/typedefs.list | 2 -
50 files changed, 19 insertions(+), 1422 deletions(-)
delete mode 100644 contrib/old_snapshot/Makefile
delete mode 100644 contrib/old_snapshot/meson.build
delete mode 100644 contrib/old_snapshot/old_snapshot--1.0.sql
delete mode 100644 contrib/old_snapshot/old_snapshot.control
delete mode 100644 contrib/old_snapshot/time_mapping.c
delete mode 100644 doc/src/sgml/oldsnapshot.sgml
delete mode 100644 src/include/utils/old_snapshot.h
delete mode 100644 src/test/modules/snapshot_too_old/.gitignore
delete mode 100644 src/test/modules/snapshot_too_old/Makefile
delete mode 100644 src/test/modules/snapshot_too_old/expected/sto_using_cursor.out
delete mode 100644 src/test/modules/snapshot_too_old/expected/sto_using_hash_index.out
delete mode 100644 src/test/modules/snapshot_too_old/expected/sto_using_select.out
delete mode 100644 src/test/modules/snapshot_too_old/meson.build
delete mode 100644 src/test/modules/snapshot_too_old/specs/sto_using_cursor.spec
delete mode 100644 src/test/modules/snapshot_too_old/specs/sto_using_hash_index.spec
delete mode 100644 src/test/modules/snapshot_too_old/specs/sto_using_select.spec
delete mode 100644 src/test/modules/snapshot_too_old/sto.conf
diff --git a/contrib/Makefile b/contrib/Makefile
index bbf220407b..da4e2316a3 100644
--- a/contrib/Makefile
+++ b/contrib/Makefile
@@ -29,7 +29,6 @@ SUBDIRS = \
lo \
ltree \
oid2name \
- old_snapshot \
pageinspect \
passwordcheck \
pg_buffercache \
diff --git a/contrib/bloom/blscan.c b/contrib/bloom/blscan.c
index 6cc7d07164..61d1f66b38 100644
--- a/contrib/bloom/blscan.c
+++ b/contrib/bloom/blscan.c
@@ -132,7 +132,6 @@ blgetbitmap(IndexScanDesc scan, TIDBitmap *tbm)
LockBuffer(buffer, BUFFER_LOCK_SHARE);
page = BufferGetPage(buffer);
- TestForOldSnapshot(scan->xs_snapshot, scan->indexRelation, page);
if (!PageIsNew(page) && !BloomPageIsDeleted(page))
{
diff --git a/contrib/meson.build b/contrib/meson.build
index bd4a57c43c..84d4e18561 100644
--- a/contrib/meson.build
+++ b/contrib/meson.build
@@ -37,7 +37,6 @@ subdir('lo')
subdir('ltree')
subdir('ltree_plpython')
subdir('oid2name')
-subdir('old_snapshot')
subdir('pageinspect')
subdir('passwordcheck')
subdir('pg_buffercache')
diff --git a/contrib/old_snapshot/Makefile b/contrib/old_snapshot/Makefile
deleted file mode 100644
index adb557532f..0000000000
--- a/contrib/old_snapshot/Makefile
+++ /dev/null
@@ -1,21 +0,0 @@
-# contrib/old_snapshot/Makefile
-
-MODULE_big = old_snapshot
-OBJS = \
- $(WIN32RES) \
- time_mapping.o
-
-EXTENSION = old_snapshot
-DATA = old_snapshot--1.0.sql
-PGFILEDESC = "old_snapshot - utilities in support of old_snapshot_threshold"
-
-ifdef USE_PGXS
-PG_CONFIG = pg_config
-PGXS := $(shell $(PG_CONFIG) --pgxs)
-include $(PGXS)
-else
-subdir = contrib/old_snapshot
-top_builddir = ../..
-include $(top_builddir)/src/Makefile.global
-include $(top_srcdir)/contrib/contrib-global.mk
-endif
diff --git a/contrib/old_snapshot/meson.build b/contrib/old_snapshot/meson.build
deleted file mode 100644
index fe5fb9027a..0000000000
--- a/contrib/old_snapshot/meson.build
+++ /dev/null
@@ -1,23 +0,0 @@
-# Copyright (c) 2022-2023, PostgreSQL Global Development Group
-
-old_snapshot_sources = files(
- 'time_mapping.c',
-)
-
-if host_system == 'windows'
- old_snapshot_sources += rc_lib_gen.process(win32ver_rc, extra_args: [
- '--NAME', 'old_snapshot',
- '--FILEDESC', 'old_snapshot - utilities in support of old_snapshot_threshold',])
-endif
-
-old_snapshot = shared_module('old_snapshot',
- old_snapshot_sources,
- kwargs: contrib_mod_args,
-)
-contrib_targets += old_snapshot
-
-install_data(
- 'old_snapshot.control',
- 'old_snapshot--1.0.sql',
- kwargs: contrib_data_args,
-)
diff --git a/contrib/old_snapshot/old_snapshot--1.0.sql b/contrib/old_snapshot/old_snapshot--1.0.sql
deleted file mode 100644
index 9ebb8829e3..0000000000
--- a/contrib/old_snapshot/old_snapshot--1.0.sql
+++ /dev/null
@@ -1,14 +0,0 @@
-/* contrib/old_snapshot/old_snapshot--1.0.sql */
-
--- complain if script is sourced in psql, rather than via CREATE EXTENSION
-\echo Use "CREATE EXTENSION old_snapshot" to load this file. \quit
-
--- Show visibility map and page-level visibility information for each block.
-CREATE FUNCTION pg_old_snapshot_time_mapping(array_offset OUT int4,
- end_timestamp OUT timestamptz,
- newest_xmin OUT xid)
-RETURNS SETOF record
-AS 'MODULE_PATHNAME', 'pg_old_snapshot_time_mapping'
-LANGUAGE C STRICT;
-
--- XXX. Do we want REVOKE commands here?
diff --git a/contrib/old_snapshot/old_snapshot.control b/contrib/old_snapshot/old_snapshot.control
deleted file mode 100644
index 491eec536c..0000000000
--- a/contrib/old_snapshot/old_snapshot.control
+++ /dev/null
@@ -1,5 +0,0 @@
-# old_snapshot extension
-comment = 'utilities in support of old_snapshot_threshold'
-default_version = '1.0'
-module_pathname = '$libdir/old_snapshot'
-relocatable = true
diff --git a/contrib/old_snapshot/time_mapping.c b/contrib/old_snapshot/time_mapping.c
deleted file mode 100644
index 352308cd49..0000000000
--- a/contrib/old_snapshot/time_mapping.c
+++ /dev/null
@@ -1,142 +0,0 @@
-/*-------------------------------------------------------------------------
- *
- * time_mapping.c
- * time to XID mapping information
- *
- * Copyright (c) 2020-2023, PostgreSQL Global Development Group
- *
- * contrib/old_snapshot/time_mapping.c
- *-------------------------------------------------------------------------
- */
-#include "postgres.h"
-
-#include "funcapi.h"
-#include "storage/lwlock.h"
-#include "utils/old_snapshot.h"
-#include "utils/snapmgr.h"
-#include "utils/timestamp.h"
-
-/*
- * Backend-private copy of the information from oldSnapshotControl which relates
- * to the time to XID mapping, plus an index so that we can iterate.
- *
- * Note that the length of the xid_by_minute array is given by
- * OLD_SNAPSHOT_TIME_MAP_ENTRIES (which is not a compile-time constant).
- */
-typedef struct
-{
- int current_index;
- int head_offset;
- TimestampTz head_timestamp;
- int count_used;
- TransactionId xid_by_minute[FLEXIBLE_ARRAY_MEMBER];
-} OldSnapshotTimeMapping;
-
-#define NUM_TIME_MAPPING_COLUMNS 3
-
-PG_MODULE_MAGIC;
-PG_FUNCTION_INFO_V1(pg_old_snapshot_time_mapping);
-
-static OldSnapshotTimeMapping *GetOldSnapshotTimeMapping(void);
-static HeapTuple MakeOldSnapshotTimeMappingTuple(TupleDesc tupdesc,
- OldSnapshotTimeMapping *mapping);
-
-/*
- * SQL-callable set-returning function.
- */
-Datum
-pg_old_snapshot_time_mapping(PG_FUNCTION_ARGS)
-{
- FuncCallContext *funcctx;
- OldSnapshotTimeMapping *mapping;
-
- if (SRF_IS_FIRSTCALL())
- {
- MemoryContext oldcontext;
- TupleDesc tupdesc;
-
- funcctx = SRF_FIRSTCALL_INIT();
- oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
- mapping = GetOldSnapshotTimeMapping();
- funcctx->user_fctx = mapping;
- if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
- elog(ERROR, "return type must be a row type");
- funcctx->tuple_desc = tupdesc;
- MemoryContextSwitchTo(oldcontext);
- }
-
- funcctx = SRF_PERCALL_SETUP();
- mapping = (OldSnapshotTimeMapping *) funcctx->user_fctx;
-
- while (mapping->current_index < mapping->count_used)
- {
- HeapTuple tuple;
-
- tuple = MakeOldSnapshotTimeMappingTuple(funcctx->tuple_desc, mapping);
- ++mapping->current_index;
- SRF_RETURN_NEXT(funcctx, HeapTupleGetDatum(tuple));
- }
-
- SRF_RETURN_DONE(funcctx);
-}
-
-/*
- * Get the old snapshot time mapping data from shared memory.
- */
-static OldSnapshotTimeMapping *
-GetOldSnapshotTimeMapping(void)
-{
- OldSnapshotTimeMapping *mapping;
-
- mapping = palloc(offsetof(OldSnapshotTimeMapping, xid_by_minute)
- + sizeof(TransactionId) * OLD_SNAPSHOT_TIME_MAP_ENTRIES);
- mapping->current_index = 0;
-
- LWLockAcquire(OldSnapshotTimeMapLock, LW_SHARED);
- mapping->head_offset = oldSnapshotControl->head_offset;
- mapping->head_timestamp = oldSnapshotControl->head_timestamp;
- mapping->count_used = oldSnapshotControl->count_used;
- for (int i = 0; i < OLD_SNAPSHOT_TIME_MAP_ENTRIES; ++i)
- mapping->xid_by_minute[i] = oldSnapshotControl->xid_by_minute[i];
- LWLockRelease(OldSnapshotTimeMapLock);
-
- return mapping;
-}
-
-/*
- * Convert one entry from the old snapshot time mapping to a HeapTuple.
- */
-static HeapTuple
-MakeOldSnapshotTimeMappingTuple(TupleDesc tupdesc, OldSnapshotTimeMapping *mapping)
-{
- Datum values[NUM_TIME_MAPPING_COLUMNS];
- bool nulls[NUM_TIME_MAPPING_COLUMNS];
- int array_position;
- TimestampTz timestamp;
-
- /*
- * Figure out the array position corresponding to the current index.
- *
- * Index 0 means the oldest entry in the mapping, which is stored at
- * mapping->head_offset. Index 1 means the next-oldest entry, which is a
- * the following index, and so on. We wrap around when we reach the end of
- * the array.
- */
- array_position = (mapping->head_offset + mapping->current_index)
- % OLD_SNAPSHOT_TIME_MAP_ENTRIES;
-
- /*
- * No explicit timestamp is stored for any entry other than the oldest
- * one, but each entry corresponds to 1-minute period, so we can just add.
- */
- timestamp = TimestampTzPlusMilliseconds(mapping->head_timestamp,
- mapping->current_index * 60000);
-
- /* Initialize nulls and values arrays. */
- memset(nulls, 0, sizeof(nulls));
- values[0] = Int32GetDatum(array_position);
- values[1] = TimestampTzGetDatum(timestamp);
- values[2] = TransactionIdGetDatum(mapping->xid_by_minute[array_position]);
-
- return heap_form_tuple(tupdesc, values, nulls);
-}
diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml
index 6262cb7bb2..b9cb8c0f93 100644
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ -2760,65 +2760,6 @@ include_dir 'conf.d'
</para>
</listitem>
</varlistentry>
-
- <varlistentry id="guc-old-snapshot-threshold" xreflabel="old_snapshot_threshold">
- <term><varname>old_snapshot_threshold</varname> (<type>integer</type>)
- <indexterm>
- <primary><varname>old_snapshot_threshold</varname> configuration parameter</primary>
- </indexterm>
- </term>
- <listitem>
- <para>
- Sets the minimum amount of time that a query snapshot can be used
- without risk of a <quote>snapshot too old</quote> error occurring
- when using the snapshot. Data that has been dead for longer than
- this threshold is allowed to be vacuumed away. This can help
- prevent bloat in the face of snapshots which remain in use for a
- long time. To prevent incorrect results due to cleanup of data which
- would otherwise be visible to the snapshot, an error is generated
- when the snapshot is older than this threshold and the snapshot is
- used to read a page which has been modified since the snapshot was
- built.
- </para>
-
- <para>
- If this value is specified without units, it is taken as minutes.
- A value of <literal>-1</literal> (the default) disables this feature,
- effectively setting the snapshot age limit to infinity.
- This parameter can only be set at server start.
- </para>
-
- <para>
- Useful values for production work probably range from a small number
- of hours to a few days. Small values (such as <literal>0</literal> or
- <literal>1min</literal>) are only allowed because they may sometimes be
- useful for testing. While a setting as high as <literal>60d</literal> is
- allowed, please note that in many workloads extreme bloat or
- transaction ID wraparound may occur in much shorter time frames.
- </para>
-
- <para>
- When this feature is enabled, freed space at the end of a relation
- cannot be released to the operating system, since that could remove
- information needed to detect the <quote>snapshot too old</quote>
- condition. All space allocated to a relation remains associated with
- that relation for reuse only within that relation unless explicitly
- freed (for example, with <command>VACUUM FULL</command>).
- </para>
-
- <para>
- This setting does not attempt to guarantee that an error will be
- generated under any particular circumstances. In fact, if the
- correct results can be generated from (for example) a cursor which
- has materialized a result set, no error will be generated even if the
- underlying rows in the referenced table have been vacuumed away.
- Some tables cannot safely be vacuumed early, and so will not be
- affected by this setting, such as system catalogs. For such tables
- this setting will neither reduce bloat nor create a possibility
- of a <quote>snapshot too old</quote> error on scanning.
- </para>
- </listitem>
- </varlistentry>
</variablelist>
</sect2>
</sect1>
@@ -4834,16 +4775,6 @@ ANY <replaceable class="parameter">num_sync</replaceable> ( <replaceable class="
until it eventually reaches the primary. Standbys make no other use
of feedback they receive other than to pass upstream.
</para>
- <para>
- This setting does not override the behavior of
- <varname>old_snapshot_threshold</varname> on the primary; a snapshot on the
- standby which exceeds the primary's age threshold can become invalid,
- resulting in cancellation of transactions on the standby. This is
- because <varname>old_snapshot_threshold</varname> is intended to provide an
- absolute limit on the time which dead rows can contribute to bloat,
- which would otherwise be violated because of the configuration of a
- standby.
- </para>
</listitem>
</varlistentry>
diff --git a/doc/src/sgml/contrib.sgml b/doc/src/sgml/contrib.sgml
index 922421a897..ab7e38b52a 100644
--- a/doc/src/sgml/contrib.sgml
+++ b/doc/src/sgml/contrib.sgml
@@ -150,7 +150,6 @@ CREATE EXTENSION <replaceable>extension_name</replaceable>;
&isn;
&lo;
<ree;
- &oldsnapshot;
&pageinspect;
&passwordcheck;
&pgbuffercache;
diff --git a/doc/src/sgml/filelist.sgml b/doc/src/sgml/filelist.sgml
index 0d6be9a2fa..d583399001 100644
--- a/doc/src/sgml/filelist.sgml
+++ b/doc/src/sgml/filelist.sgml
@@ -137,7 +137,6 @@
<!ENTITY lo SYSTEM "lo.sgml">
<!ENTITY ltree SYSTEM "ltree.sgml">
<!ENTITY oid2name SYSTEM "oid2name.sgml">
-<!ENTITY oldsnapshot SYSTEM "oldsnapshot.sgml">
<!ENTITY pageinspect SYSTEM "pageinspect.sgml">
<!ENTITY passwordcheck SYSTEM "passwordcheck.sgml">
<!ENTITY pgbuffercache SYSTEM "pgbuffercache.sgml">
diff --git a/doc/src/sgml/monitoring.sgml b/doc/src/sgml/monitoring.sgml
index 5cfdc70c03..b82ff67953 100644
--- a/doc/src/sgml/monitoring.sgml
+++ b/doc/src/sgml/monitoring.sgml
@@ -2089,10 +2089,6 @@ postgres 27093 0.0 0.0 30096 2752 ? Ss 11:34 0:00 postgres: ser
<entry><literal>OidGen</literal></entry>
<entry>Waiting to allocate a new OID.</entry>
</row>
- <row>
- <entry><literal>OldSnapshotTimeMap</literal></entry>
- <entry>Waiting to read or update old snapshot control information.</entry>
- </row>
<row>
<entry><literal>ParallelAppend</literal></entry>
<entry>Waiting to choose the next subplan during Parallel Append plan
diff --git a/doc/src/sgml/oldsnapshot.sgml b/doc/src/sgml/oldsnapshot.sgml
deleted file mode 100644
index 2e37087738..0000000000
--- a/doc/src/sgml/oldsnapshot.sgml
+++ /dev/null
@@ -1,33 +0,0 @@
-<!-- doc/src/sgml/oldsnapshot.sgml -->
-
-<sect1 id="oldsnapshot" xreflabel="old_snapshot">
- <title>old_snapshot — inspect <literal>old_snapshot_threshold</literal> state</title>
-
- <indexterm zone="oldsnapshot">
- <primary>old_snapshot</primary>
- </indexterm>
-
- <para>
- The <filename>old_snapshot</filename> module allows inspection
- of the server state that is used to implement
- <xref linkend="guc-old-snapshot-threshold" />.
- </para>
-
- <sect2 id="oldsnapshot-functions">
- <title>Functions</title>
-
- <variablelist>
- <varlistentry>
- <term><function>pg_old_snapshot_time_mapping(array_offset OUT int4, end_timestamp OUT timestamptz, newest_xmin OUT xid) returns setof record</function></term>
- <listitem>
- <para>
- Returns all of the entries in the server's timestamp to XID mapping.
- Each entry represents the newest xmin of any snapshot taken in the
- corresponding minute.
- </para>
- </listitem>
- </varlistentry>
- </variablelist>
- </sect2>
-
-</sect1>
diff --git a/src/backend/access/brin/brin_revmap.c b/src/backend/access/brin/brin_revmap.c
index f4271ba31c..de90ead228 100644
--- a/src/backend/access/brin/brin_revmap.c
+++ b/src/backend/access/brin/brin_revmap.c
@@ -79,7 +79,6 @@ brinRevmapInitialize(Relation idxrel, BlockNumber *pagesPerRange,
meta = ReadBuffer(idxrel, BRIN_METAPAGE_BLKNO);
LockBuffer(meta, BUFFER_LOCK_SHARE);
page = BufferGetPage(meta);
- TestForOldSnapshot(snapshot, idxrel, page);
metadata = (BrinMetaPageData *) PageGetContents(page);
revmap = palloc(sizeof(BrinRevmap));
@@ -277,7 +276,6 @@ brinGetTupleForHeapBlock(BrinRevmap *revmap, BlockNumber heapBlk,
}
LockBuffer(*buf, mode);
page = BufferGetPage(*buf);
- TestForOldSnapshot(snapshot, idxRel, page);
/* If we land on a revmap page, start over */
if (BRIN_IS_REGULAR_PAGE(page))
@@ -372,11 +370,6 @@ brinRevmapDesummarizeRange(Relation idxrel, BlockNumber heapBlk)
LockBuffer(regBuf, BUFFER_LOCK_EXCLUSIVE);
regPg = BufferGetPage(regBuf);
- /*
- * We're only removing data, not reading it, so there's no need to
- * TestForOldSnapshot here.
- */
-
/* if this is no longer a regular page, tell caller to start over */
if (!BRIN_IS_REGULAR_PAGE(regPg))
{
diff --git a/src/backend/access/gin/ginbtree.c b/src/backend/access/gin/ginbtree.c
index 35490c7283..7d097c75e0 100644
--- a/src/backend/access/gin/ginbtree.c
+++ b/src/backend/access/gin/ginbtree.c
@@ -100,7 +100,6 @@ ginFindLeafPage(GinBtree btree, bool searchMode,
stack->off = InvalidOffsetNumber;
page = BufferGetPage(stack->buffer);
- TestForOldSnapshot(snapshot, btree->index, page);
access = ginTraverseLock(stack->buffer, searchMode);
@@ -127,7 +126,6 @@ ginFindLeafPage(GinBtree btree, bool searchMode,
stack->buffer = ginStepRight(stack->buffer, btree->index, access);
stack->blkno = rightlink;
page = BufferGetPage(stack->buffer);
- TestForOldSnapshot(snapshot, btree->index, page);
if (!searchMode && GinPageIsIncompleteSplit(page))
ginFinishSplit(btree, stack, false, NULL);
diff --git a/src/backend/access/gin/ginget.c b/src/backend/access/gin/ginget.c
index cb676a710f..bf82aa9b85 100644
--- a/src/backend/access/gin/ginget.c
+++ b/src/backend/access/gin/ginget.c
@@ -156,7 +156,6 @@ collectMatchBitmap(GinBtreeData *btree, GinBtreeStack *stack,
return true;
page = BufferGetPage(stack->buffer);
- TestForOldSnapshot(snapshot, btree->index, page);
itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, stack->off));
/*
@@ -1458,7 +1457,6 @@ scanGetCandidate(IndexScanDesc scan, pendingPosition *pos)
for (;;)
{
page = BufferGetPage(pos->pendingBuffer);
- TestForOldSnapshot(scan->xs_snapshot, scan->indexRelation, page);
maxoff = PageGetMaxOffsetNumber(page);
if (pos->firstOffset > maxoff)
@@ -1639,7 +1637,6 @@ collectMatchesForHeapRow(IndexScanDesc scan, pendingPosition *pos)
sizeof(bool) * (pos->lastOffset - pos->firstOffset));
page = BufferGetPage(pos->pendingBuffer);
- TestForOldSnapshot(scan->xs_snapshot, scan->indexRelation, page);
for (i = 0; i < so->nkeys; i++)
{
@@ -1842,7 +1839,6 @@ scanPendingInsert(IndexScanDesc scan, TIDBitmap *tbm, int64 *ntids)
LockBuffer(metabuffer, GIN_SHARE);
page = BufferGetPage(metabuffer);
- TestForOldSnapshot(scan->xs_snapshot, scan->indexRelation, page);
blkno = GinPageGetMeta(page)->head;
/*
diff --git a/src/backend/access/gist/gistget.c b/src/backend/access/gist/gistget.c
index e2c9b5f069..3134917428 100644
--- a/src/backend/access/gist/gistget.c
+++ b/src/backend/access/gist/gistget.c
@@ -346,7 +346,6 @@ gistScanPage(IndexScanDesc scan, GISTSearchItem *pageItem,
PredicateLockPage(r, BufferGetBlockNumber(buffer), scan->xs_snapshot);
gistcheckpage(scan->indexRelation, buffer);
page = BufferGetPage(buffer);
- TestForOldSnapshot(scan->xs_snapshot, r, page);
opaque = GistPageGetOpaque(page);
/*
diff --git a/src/backend/access/hash/hashsearch.c b/src/backend/access/hash/hashsearch.c
index 9ea2a42a07..0a031bfd18 100644
--- a/src/backend/access/hash/hashsearch.c
+++ b/src/backend/access/hash/hashsearch.c
@@ -71,7 +71,6 @@ _hash_next(IndexScanDesc scan, ScanDirection dir)
if (BlockNumberIsValid(blkno))
{
buf = _hash_getbuf(rel, blkno, HASH_READ, LH_OVERFLOW_PAGE);
- TestForOldSnapshot(scan->xs_snapshot, rel, BufferGetPage(buf));
if (!_hash_readpage(scan, &buf, dir))
end_of_scan = true;
}
@@ -91,7 +90,6 @@ _hash_next(IndexScanDesc scan, ScanDirection dir)
{
buf = _hash_getbuf(rel, blkno, HASH_READ,
LH_BUCKET_PAGE | LH_OVERFLOW_PAGE);
- TestForOldSnapshot(scan->xs_snapshot, rel, BufferGetPage(buf));
/*
* We always maintain the pin on bucket page for whole scan
@@ -186,7 +184,6 @@ _hash_readnext(IndexScanDesc scan,
if (block_found)
{
*pagep = BufferGetPage(*bufp);
- TestForOldSnapshot(scan->xs_snapshot, rel, *pagep);
*opaquep = HashPageGetOpaque(*pagep);
}
}
@@ -232,7 +229,6 @@ _hash_readprev(IndexScanDesc scan,
*bufp = _hash_getbuf(rel, blkno, HASH_READ,
LH_BUCKET_PAGE | LH_OVERFLOW_PAGE);
*pagep = BufferGetPage(*bufp);
- TestForOldSnapshot(scan->xs_snapshot, rel, *pagep);
*opaquep = HashPageGetOpaque(*pagep);
/*
@@ -351,7 +347,6 @@ _hash_first(IndexScanDesc scan, ScanDirection dir)
buf = _hash_getbucketbuf_from_hashkey(rel, hashkey, HASH_READ, NULL);
PredicateLockPage(rel, BufferGetBlockNumber(buf), scan->xs_snapshot);
page = BufferGetPage(buf);
- TestForOldSnapshot(scan->xs_snapshot, rel, page);
opaque = HashPageGetOpaque(page);
bucket = opaque->hasho_bucket;
@@ -387,7 +382,6 @@ _hash_first(IndexScanDesc scan, ScanDirection dir)
LockBuffer(buf, BUFFER_LOCK_UNLOCK);
old_buf = _hash_getbuf(rel, old_blkno, HASH_READ, LH_BUCKET_PAGE);
- TestForOldSnapshot(scan->xs_snapshot, rel, BufferGetPage(old_buf));
/*
* remember the split bucket buffer so as to use it later for
diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c
index 7ed72abe59..e002dcbdcd 100644
--- a/src/backend/access/heap/heapam.c
+++ b/src/backend/access/heap/heapam.c
@@ -425,7 +425,6 @@ heapgetpage(TableScanDesc sscan, BlockNumber block)
LockBuffer(buffer, BUFFER_LOCK_SHARE);
page = BufferGetPage(buffer);
- TestForOldSnapshot(snapshot, scan->rs_base.rs_rd, page);
lines = PageGetMaxOffsetNumber(page);
ntup = 0;
@@ -565,8 +564,6 @@ heapgettup_start_page(HeapScanDesc scan, ScanDirection dir, int *linesleft,
/* Caller is responsible for ensuring buffer is locked if needed */
page = BufferGetPage(scan->rs_cbuf);
- TestForOldSnapshot(scan->rs_base.rs_snapshot, scan->rs_base.rs_rd, page);
-
*linesleft = PageGetMaxOffsetNumber(page) - FirstOffsetNumber + 1;
if (ScanDirectionIsForward(dir))
@@ -598,8 +595,6 @@ heapgettup_continue_page(HeapScanDesc scan, ScanDirection dir, int *linesleft,
/* Caller is responsible for ensuring buffer is locked if needed */
page = BufferGetPage(scan->rs_cbuf);
- TestForOldSnapshot(scan->rs_base.rs_snapshot, scan->rs_base.rs_rd, page);
-
if (ScanDirectionIsForward(dir))
{
*lineoff = OffsetNumberNext(scan->rs_coffset);
@@ -864,7 +859,6 @@ heapgettup_pagemode(HeapScanDesc scan,
/* continue from previously returned page/tuple */
block = scan->rs_cblock; /* current page */
page = BufferGetPage(scan->rs_cbuf);
- TestForOldSnapshot(scan->rs_base.rs_snapshot, scan->rs_base.rs_rd, page);
lineindex = scan->rs_cindex + dir;
if (ScanDirectionIsForward(dir))
@@ -884,7 +878,6 @@ heapgettup_pagemode(HeapScanDesc scan,
{
heapgetpage((TableScanDesc) scan, block);
page = BufferGetPage(scan->rs_cbuf);
- TestForOldSnapshot(scan->rs_base.rs_snapshot, scan->rs_base.rs_rd, page);
linesleft = scan->rs_ntuples;
lineindex = ScanDirectionIsForward(dir) ? 0 : linesleft - 1;
@@ -1372,7 +1365,6 @@ heap_fetch(Relation relation,
*/
LockBuffer(buffer, BUFFER_LOCK_SHARE);
page = BufferGetPage(buffer);
- TestForOldSnapshot(snapshot, relation, page);
/*
* We'd better check for out-of-range offnum in case of VACUUM since the
@@ -1663,7 +1655,6 @@ heap_get_latest_tid(TableScanDesc sscan,
buffer = ReadBuffer(relation, ItemPointerGetBlockNumber(&ctid));
LockBuffer(buffer, BUFFER_LOCK_SHARE);
page = BufferGetPage(buffer);
- TestForOldSnapshot(snapshot, relation, page);
/*
* Check for bogus item number. This is not treated as an error
diff --git a/src/backend/access/heap/pruneheap.c b/src/backend/access/heap/pruneheap.c
index 47b9e20915..b0881131c9 100644
--- a/src/backend/access/heap/pruneheap.c
+++ b/src/backend/access/heap/pruneheap.c
@@ -36,18 +36,6 @@ typedef struct
/* tuple visibility test, initialized for the relation */
GlobalVisState *vistest;
- /*
- * Thresholds set by TransactionIdLimitedForOldSnapshots() if they have
- * been computed (done on demand, and only if
- * OldSnapshotThresholdActive()). The first time a tuple is about to be
- * removed based on the limited horizon, old_snap_used is set to true, and
- * SetOldSnapshotThresholdTimestamp() is called. See
- * heap_prune_satisfies_vacuum().
- */
- TimestampTz old_snap_ts;
- TransactionId old_snap_xmin;
- bool old_snap_used;
-
TransactionId new_prune_xid; /* new prune hint value for page */
TransactionId snapshotConflictHorizon; /* latest xid removed */
int nredirected; /* numbers of entries in arrays below */
@@ -110,8 +98,6 @@ heap_page_prune_opt(Relation relation, Buffer buffer)
Page page = BufferGetPage(buffer);
TransactionId prune_xid;
GlobalVisState *vistest;
- TransactionId limited_xmin = InvalidTransactionId;
- TimestampTz limited_ts = 0;
Size minfree;
/*
@@ -122,15 +108,6 @@ heap_page_prune_opt(Relation relation, Buffer buffer)
if (RecoveryInProgress())
return;
- /*
- * XXX: Magic to keep old_snapshot_threshold tests appear "working". They
- * currently are broken, and discussion of what to do about them is
- * ongoing. See
- * https://www.postgresql.org/message-id/20200403001235.e6jfdll3gh2ygbuc%40alap3.anarazel.de
- */
- if (old_snapshot_threshold == 0)
- SnapshotTooOldMagicForTest();
-
/*
* First check whether there's any chance there's something to prune,
* determining the appropriate horizon is a waste if there's no prune_xid
@@ -143,35 +120,11 @@ heap_page_prune_opt(Relation relation, Buffer buffer)
/*
* Check whether prune_xid indicates that there may be dead rows that can
* be cleaned up.
- *
- * It is OK to check the old snapshot limit before acquiring the cleanup
- * lock because the worst that can happen is that we are not quite as
- * aggressive about the cleanup (by however many transaction IDs are
- * consumed between this point and acquiring the lock). This allows us to
- * save significant overhead in the case where the page is found not to be
- * prunable.
- *
- * Even if old_snapshot_threshold is set, we first check whether the page
- * can be pruned without. Both because
- * TransactionIdLimitedForOldSnapshots() is not cheap, and because not
- * unnecessarily relying on old_snapshot_threshold avoids causing
- * conflicts.
*/
vistest = GlobalVisTestFor(relation);
if (!GlobalVisTestIsRemovableXid(vistest, prune_xid))
- {
- if (!OldSnapshotThresholdActive())
- return;
-
- if (!TransactionIdLimitedForOldSnapshots(GlobalVisTestNonRemovableHorizon(vistest),
- relation,
- &limited_xmin, &limited_ts))
- return;
-
- if (!TransactionIdPrecedes(prune_xid, limited_xmin))
- return;
- }
+ return;
/*
* We prune when a previous UPDATE failed to find enough space on the page
@@ -205,8 +158,8 @@ heap_page_prune_opt(Relation relation, Buffer buffer)
int ndeleted,
nnewlpdead;
- ndeleted = heap_page_prune(relation, buffer, vistest, limited_xmin,
- limited_ts, &nnewlpdead, NULL);
+ ndeleted = heap_page_prune(relation, buffer, vistest,
+ &nnewlpdead, NULL);
/*
* Report the number of tuples reclaimed to pgstats. This is
@@ -249,9 +202,7 @@ heap_page_prune_opt(Relation relation, Buffer buffer)
*
* vistest is used to distinguish whether tuples are DEAD or RECENTLY_DEAD
* (see heap_prune_satisfies_vacuum and
- * HeapTupleSatisfiesVacuum). old_snap_xmin / old_snap_ts need to
- * either have been set by TransactionIdLimitedForOldSnapshots, or
- * InvalidTransactionId/0 respectively.
+ * HeapTupleSatisfiesVacuum).
*
* Sets *nnewlpdead for caller, indicating the number of items that were
* newly set LP_DEAD during prune operation.
@@ -264,8 +215,6 @@ heap_page_prune_opt(Relation relation, Buffer buffer)
int
heap_page_prune(Relation relation, Buffer buffer,
GlobalVisState *vistest,
- TransactionId old_snap_xmin,
- TimestampTz old_snap_ts,
int *nnewlpdead,
OffsetNumber *off_loc)
{
@@ -291,9 +240,6 @@ heap_page_prune(Relation relation, Buffer buffer,
prstate.new_prune_xid = InvalidTransactionId;
prstate.rel = relation;
prstate.vistest = vistest;
- prstate.old_snap_xmin = old_snap_xmin;
- prstate.old_snap_ts = old_snap_ts;
- prstate.old_snap_used = false;
prstate.snapshotConflictHorizon = InvalidTransactionId;
prstate.nredirected = prstate.ndead = prstate.nunused = 0;
memset(prstate.marked, 0, sizeof(prstate.marked));
@@ -481,19 +427,6 @@ heap_page_prune(Relation relation, Buffer buffer,
/*
* Perform visibility checks for heap pruning.
- *
- * This is more complicated than just using GlobalVisTestIsRemovableXid()
- * because of old_snapshot_threshold. We only want to increase the threshold
- * that triggers errors for old snapshots when we actually decide to remove a
- * row based on the limited horizon.
- *
- * Due to its cost we also only want to call
- * TransactionIdLimitedForOldSnapshots() if necessary, i.e. we might not have
- * done so in heap_page_prune_opt() if pd_prune_xid was old enough. But we
- * still want to be able to remove rows that are too new to be removed
- * according to prstate->vistest, but that can be removed based on
- * old_snapshot_threshold. So we call TransactionIdLimitedForOldSnapshots() on
- * demand in here, if appropriate.
*/
static HTSV_Result
heap_prune_satisfies_vacuum(PruneState *prstate, HeapTuple tup, Buffer buffer)
@@ -507,52 +440,11 @@ heap_prune_satisfies_vacuum(PruneState *prstate, HeapTuple tup, Buffer buffer)
return res;
/*
- * If we are already relying on the limited xmin, there is no need to
- * delay doing so anymore.
- */
- if (prstate->old_snap_used)
- {
- Assert(TransactionIdIsValid(prstate->old_snap_xmin));
-
- if (TransactionIdPrecedes(dead_after, prstate->old_snap_xmin))
- res = HEAPTUPLE_DEAD;
- return res;
- }
-
- /*
- * First check if GlobalVisTestIsRemovableXid() is sufficient to find the
- * row dead. If not, and old_snapshot_threshold is enabled, try to use the
- * lowered horizon.
+ * Check if GlobalVisTestIsRemovableXid() is sufficient to find the row
+ * dead.
*/
if (GlobalVisTestIsRemovableXid(prstate->vistest, dead_after))
res = HEAPTUPLE_DEAD;
- else if (OldSnapshotThresholdActive())
- {
- /* haven't determined limited horizon yet, requests */
- if (!TransactionIdIsValid(prstate->old_snap_xmin))
- {
- TransactionId horizon =
- GlobalVisTestNonRemovableHorizon(prstate->vistest);
-
- TransactionIdLimitedForOldSnapshots(horizon, prstate->rel,
- &prstate->old_snap_xmin,
- &prstate->old_snap_ts);
- }
-
- if (TransactionIdIsValid(prstate->old_snap_xmin) &&
- TransactionIdPrecedes(dead_after, prstate->old_snap_xmin))
- {
- /*
- * About to remove row based on snapshot_too_old. Need to raise
- * the threshold so problematic accesses would error.
- */
- Assert(!prstate->old_snap_used);
- SetOldSnapshotThresholdTimestamp(prstate->old_snap_ts,
- prstate->old_snap_xmin);
- prstate->old_snap_used = true;
- res = HEAPTUPLE_DEAD;
- }
- }
return res;
}
diff --git a/src/backend/access/heap/vacuumlazy.c b/src/backend/access/heap/vacuumlazy.c
index 4eb953f904..d2f5f33e99 100644
--- a/src/backend/access/heap/vacuumlazy.c
+++ b/src/backend/access/heap/vacuumlazy.c
@@ -1588,7 +1588,7 @@ retry:
* that were deleted from indexes.
*/
tuples_deleted = heap_page_prune(rel, buf, vacrel->vistest,
- InvalidTransactionId, 0, &nnewlpdead,
+ &nnewlpdead,
&vacrel->offnum);
/*
@@ -2823,8 +2823,7 @@ should_attempt_truncation(LVRelState *vacrel)
{
BlockNumber possibly_freeable;
- if (!vacrel->do_rel_truncate || VacuumFailsafeActive ||
- old_snapshot_threshold >= 0)
+ if (!vacrel->do_rel_truncate || VacuumFailsafeActive)
return false;
possibly_freeable = vacrel->rel_pages - vacrel->nonempty_pages;
diff --git a/src/backend/access/nbtree/nbtsearch.c b/src/backend/access/nbtree/nbtsearch.c
index 7e05e58676..d3ad49733c 100644
--- a/src/backend/access/nbtree/nbtsearch.c
+++ b/src/backend/access/nbtree/nbtsearch.c
@@ -277,7 +277,6 @@ _bt_moveright(Relation rel,
for (;;)
{
page = BufferGetPage(buf);
- TestForOldSnapshot(snapshot, rel, page);
opaque = BTPageGetOpaque(page);
if (P_RIGHTMOST(opaque))
@@ -2016,7 +2015,6 @@ _bt_readnextpage(IndexScanDesc scan, BlockNumber blkno, ScanDirection dir)
/* step right one page */
so->currPos.buf = _bt_getbuf(rel, blkno, BT_READ);
page = BufferGetPage(so->currPos.buf);
- TestForOldSnapshot(scan->xs_snapshot, rel, page);
opaque = BTPageGetOpaque(page);
/* check for deleted page */
if (!P_IGNORE(opaque))
@@ -2119,7 +2117,6 @@ _bt_readnextpage(IndexScanDesc scan, BlockNumber blkno, ScanDirection dir)
* and do it all again.
*/
page = BufferGetPage(so->currPos.buf);
- TestForOldSnapshot(scan->xs_snapshot, rel, page);
opaque = BTPageGetOpaque(page);
if (!P_IGNORE(opaque))
{
@@ -2225,7 +2222,6 @@ _bt_walk_left(Relation rel, Buffer buf, Snapshot snapshot)
CHECK_FOR_INTERRUPTS();
buf = _bt_getbuf(rel, blkno, BT_READ);
page = BufferGetPage(buf);
- TestForOldSnapshot(snapshot, rel, page);
opaque = BTPageGetOpaque(page);
/*
@@ -2252,14 +2248,12 @@ _bt_walk_left(Relation rel, Buffer buf, Snapshot snapshot)
blkno = opaque->btpo_next;
buf = _bt_relandgetbuf(rel, buf, blkno, BT_READ);
page = BufferGetPage(buf);
- TestForOldSnapshot(snapshot, rel, page);
opaque = BTPageGetOpaque(page);
}
/* Return to the original page to see what's up */
buf = _bt_relandgetbuf(rel, buf, obknum, BT_READ);
page = BufferGetPage(buf);
- TestForOldSnapshot(snapshot, rel, page);
opaque = BTPageGetOpaque(page);
if (P_ISDELETED(opaque))
{
@@ -2277,7 +2271,6 @@ _bt_walk_left(Relation rel, Buffer buf, Snapshot snapshot)
blkno = opaque->btpo_next;
buf = _bt_relandgetbuf(rel, buf, blkno, BT_READ);
page = BufferGetPage(buf);
- TestForOldSnapshot(snapshot, rel, page);
opaque = BTPageGetOpaque(page);
if (!P_ISDELETED(opaque))
break;
@@ -2338,7 +2331,6 @@ _bt_get_endpoint(Relation rel, uint32 level, bool rightmost,
return InvalidBuffer;
page = BufferGetPage(buf);
- TestForOldSnapshot(snapshot, rel, page);
opaque = BTPageGetOpaque(page);
for (;;)
@@ -2358,7 +2350,6 @@ _bt_get_endpoint(Relation rel, uint32 level, bool rightmost,
RelationGetRelationName(rel));
buf = _bt_relandgetbuf(rel, buf, blkno, BT_READ);
page = BufferGetPage(buf);
- TestForOldSnapshot(snapshot, rel, page);
opaque = BTPageGetOpaque(page);
}
diff --git a/src/backend/access/spgist/spgscan.c b/src/backend/access/spgist/spgscan.c
index cbfaf0c00a..17cab0087f 100644
--- a/src/backend/access/spgist/spgscan.c
+++ b/src/backend/access/spgist/spgscan.c
@@ -862,7 +862,6 @@ redirect:
/* else new pointer points to the same page, no work needed */
page = BufferGetPage(buffer);
- TestForOldSnapshot(snapshot, index, page);
isnull = SpGistPageStoresNulls(page) ? true : false;
diff --git a/src/backend/catalog/index.c b/src/backend/catalog/index.c
index 352e43d0e6..42a66b4e76 100644
--- a/src/backend/catalog/index.c
+++ b/src/backend/catalog/index.c
@@ -3050,8 +3050,7 @@ index_build(Relation heapRelation,
/*
* If we found any potentially broken HOT chains, mark the index as not
* being usable until the current transaction is below the event horizon.
- * See src/backend/access/heap/README.HOT for discussion. Also set this
- * if early pruning/vacuuming is enabled for the heap relation. While it
+ * See src/backend/access/heap/README.HOT for discussion. While it
* might become safe to use the index earlier based on actual cleanup
* activity and other active transactions, the test for that would be much
* more complex and would require some form of blocking, so keep it simple
@@ -3067,7 +3066,7 @@ index_build(Relation heapRelation,
*
* We also need not set indcheckxmin during a concurrent index build,
* because we won't set indisvalid true until all transactions that care
- * about the broken HOT chains or early pruning/vacuuming are gone.
+ * about the broken HOT chains are gone.
*
* Therefore, this code path can only be taken during non-concurrent
* CREATE INDEX. Thus the fact that heap_update will set the pg_index
@@ -3076,7 +3075,7 @@ index_build(Relation heapRelation,
* about any concurrent readers of the tuple; no other transaction can see
* it yet.
*/
- if ((indexInfo->ii_BrokenHotChain || EarlyPruningEnabled(heapRelation)) &&
+ if (indexInfo->ii_BrokenHotChain &&
!isreindex &&
!indexInfo->ii_Concurrent)
{
@@ -3761,11 +3760,6 @@ reindex_index(Oid indexId, bool skip_constraint_checks, char persistence,
* reindexing pg_index itself, we must not try to update tuples in it.
* pg_index's indexes should always have these flags in their clean state,
* so that won't happen.
- *
- * If early pruning/vacuuming is enabled for the heap relation, the
- * usability horizon must be advanced to the current transaction on every
- * build or rebuild. pg_index is OK in this regard because catalog tables
- * are not subject to early cleanup.
*/
if (!skipped_constraint)
{
@@ -3773,7 +3767,6 @@ reindex_index(Oid indexId, bool skip_constraint_checks, char persistence,
HeapTuple indexTuple;
Form_pg_index indexForm;
bool index_bad;
- bool early_pruning_enabled = EarlyPruningEnabled(heapRelation);
pg_index = table_open(IndexRelationId, RowExclusiveLock);
@@ -3787,12 +3780,11 @@ reindex_index(Oid indexId, bool skip_constraint_checks, char persistence,
!indexForm->indisready ||
!indexForm->indislive);
if (index_bad ||
- (indexForm->indcheckxmin && !indexInfo->ii_BrokenHotChain) ||
- early_pruning_enabled)
+ (indexForm->indcheckxmin && !indexInfo->ii_BrokenHotChain))
{
- if (!indexInfo->ii_BrokenHotChain && !early_pruning_enabled)
+ if (!indexInfo->ii_BrokenHotChain)
indexForm->indcheckxmin = false;
- else if (index_bad || early_pruning_enabled)
+ else if (index_bad)
indexForm->indcheckxmin = true;
indexForm->indisvalid = true;
indexForm->indisready = true;
diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c
index a843f9ad92..0652cdfdfb 100644
--- a/src/backend/commands/vacuum.c
+++ b/src/backend/commands/vacuum.c
@@ -1113,25 +1113,6 @@ vacuum_get_cutoffs(Relation rel, const VacuumParams *params,
*/
cutoffs->OldestXmin = GetOldestNonRemovableTransactionId(rel);
- if (OldSnapshotThresholdActive())
- {
- TransactionId limit_xmin;
- TimestampTz limit_ts;
-
- if (TransactionIdLimitedForOldSnapshots(cutoffs->OldestXmin, rel,
- &limit_xmin, &limit_ts))
- {
- /*
- * TODO: We should only set the threshold if we are pruning on the
- * basis of the increased limits. Not as crucial here as it is
- * for opportunistic pruning (which often happens at a much higher
- * frequency), but would still be a significant improvement.
- */
- SetOldSnapshotThresholdTimestamp(limit_ts, limit_xmin);
- cutoffs->OldestXmin = limit_xmin;
- }
- }
-
Assert(TransactionIdIsNormal(cutoffs->OldestXmin));
/* Acquire OldestMxact */
diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c
index 3c59bbd04e..8eb6d07943 100644
--- a/src/backend/storage/buffer/bufmgr.c
+++ b/src/backend/storage/buffer/bufmgr.c
@@ -5575,20 +5575,3 @@ IssuePendingWritebacks(WritebackContext *wb_context, IOContext io_context)
wb_context->nr_pending = 0;
}
-
-
-/*
- * Implement slower/larger portions of TestForOldSnapshot
- *
- * Smaller/faster portions are put inline, but the entire set of logic is too
- * big for that.
- */
-void
-TestForOldSnapshot_impl(Snapshot snapshot, Relation relation)
-{
- if (RelationAllowsEarlyPruning(relation)
- && (snapshot)->whenTaken < GetOldSnapshotThresholdTimestamp())
- ereport(ERROR,
- (errcode(ERRCODE_SNAPSHOT_TOO_OLD),
- errmsg("snapshot too old")));
-}
diff --git a/src/backend/storage/ipc/ipci.c b/src/backend/storage/ipc/ipci.c
index 8f1ded7338..a0c7b0f027 100644
--- a/src/backend/storage/ipc/ipci.c
+++ b/src/backend/storage/ipc/ipci.c
@@ -137,7 +137,6 @@ CalculateShmemSize(int *num_semaphores)
size = add_size(size, WalRcvShmemSize());
size = add_size(size, PgArchShmemSize());
size = add_size(size, ApplyLauncherShmemSize());
- size = add_size(size, SnapMgrShmemSize());
size = add_size(size, BTreeShmemSize());
size = add_size(size, SyncScanShmemSize());
size = add_size(size, AsyncShmemSize());
@@ -289,7 +288,6 @@ CreateSharedMemoryAndSemaphores(void)
/*
* Set up other modules that need some shared memory space
*/
- SnapMgrInit();
BTreeShmemInit();
SyncScanShmemInit();
AsyncShmemInit();
diff --git a/src/backend/storage/ipc/procarray.c b/src/backend/storage/ipc/procarray.c
index 8c8d728ba8..8f68a16051 100644
--- a/src/backend/storage/ipc/procarray.c
+++ b/src/backend/storage/ipc/procarray.c
@@ -2066,34 +2066,6 @@ GetMaxSnapshotSubxidCount(void)
return TOTAL_MAX_CACHED_SUBXIDS;
}
-/*
- * Initialize old_snapshot_threshold specific parts of a newly build snapshot.
- */
-static void
-GetSnapshotDataInitOldSnapshot(Snapshot snapshot)
-{
- if (!OldSnapshotThresholdActive())
- {
- /*
- * If not using "snapshot too old" feature, fill related fields with
- * dummy values that don't require any locking.
- */
- snapshot->lsn = InvalidXLogRecPtr;
- snapshot->whenTaken = 0;
- }
- else
- {
- /*
- * Capture the current time and WAL stream location in case this
- * snapshot becomes old enough to need to fall back on the special
- * "old snapshot" logic.
- */
- snapshot->lsn = GetXLogInsertRecPtr();
- snapshot->whenTaken = GetSnapshotCurrentTimestamp();
- MaintainOldSnapshotTimeMapping(snapshot->whenTaken, snapshot->xmin);
- }
-}
-
/*
* Helper function for GetSnapshotData() that checks if the bulk of the
* visibility information in the snapshot is still valid. If so, it updates
@@ -2147,8 +2119,8 @@ GetSnapshotDataReuse(Snapshot snapshot)
snapshot->active_count = 0;
snapshot->regd_count = 0;
snapshot->copied = false;
-
- GetSnapshotDataInitOldSnapshot(snapshot);
+ snapshot->lsn = InvalidXLogRecPtr;
+ snapshot->whenTaken = 0;
return true;
}
@@ -2529,8 +2501,8 @@ GetSnapshotData(Snapshot snapshot)
snapshot->active_count = 0;
snapshot->regd_count = 0;
snapshot->copied = false;
-
- GetSnapshotDataInitOldSnapshot(snapshot);
+ snapshot->lsn = InvalidXLogRecPtr;
+ snapshot->whenTaken = 0;
return snapshot;
}
diff --git a/src/backend/storage/lmgr/lwlocknames.txt b/src/backend/storage/lmgr/lwlocknames.txt
index 6c7cf6c295..036ee3f49c 100644
--- a/src/backend/storage/lmgr/lwlocknames.txt
+++ b/src/backend/storage/lmgr/lwlocknames.txt
@@ -47,7 +47,7 @@ CommitTsSLRULock 38
CommitTsLock 39
ReplicationOriginLock 40
MultiXactTruncationLock 41
-OldSnapshotTimeMapLock 42
+# 42 was OldSnapshotTimeMapLock
LogicalRepWorkerLock 43
XactTruncationLock 44
# 45 was XactTruncationLock until removal of BackendRandomLock
diff --git a/src/backend/utils/errcodes.txt b/src/backend/utils/errcodes.txt
index 3d244af130..8e97a0150f 100644
--- a/src/backend/utils/errcodes.txt
+++ b/src/backend/utils/errcodes.txt
@@ -439,10 +439,6 @@ Section: Class 58 - System Error (errors external to PostgreSQL itself)
58P01 E ERRCODE_UNDEFINED_FILE undefined_file
58P02 E ERRCODE_DUPLICATE_FILE duplicate_file
-Section: Class 72 - Snapshot Failure
-# (class borrowed from Oracle)
-72000 E ERRCODE_SNAPSHOT_TOO_OLD snapshot_too_old
-
Section: Class F0 - Configuration File Error
# (PostgreSQL-specific error class)
diff --git a/src/backend/utils/misc/guc_tables.c b/src/backend/utils/misc/guc_tables.c
index 71e27f8eb0..1513d7e395 100644
--- a/src/backend/utils/misc/guc_tables.c
+++ b/src/backend/utils/misc/guc_tables.c
@@ -3280,17 +3280,6 @@ struct config_int ConfigureNamesInt[] =
check_autovacuum_work_mem, NULL, NULL
},
- {
- {"old_snapshot_threshold", PGC_POSTMASTER, RESOURCES_ASYNCHRONOUS,
- gettext_noop("Time before a snapshot is too old to read pages changed after the snapshot was taken."),
- gettext_noop("A value of -1 disables this feature."),
- GUC_UNIT_MIN
- },
- &old_snapshot_threshold,
- -1, -1, MINS_PER_HOUR * HOURS_PER_DAY * 60,
- NULL, NULL, NULL
- },
-
{
{"tcp_keepalives_idle", PGC_USERSET, CONN_AUTH_TCP,
gettext_noop("Time between issuing TCP keepalives."),
diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample
index e4c0269fa3..c06c0503a9 100644
--- a/src/backend/utils/misc/postgresql.conf.sample
+++ b/src/backend/utils/misc/postgresql.conf.sample
@@ -198,8 +198,6 @@
#max_parallel_workers = 8 # maximum number of max_worker_processes that
# can be used in parallel operations
#parallel_leader_participation = on
-#old_snapshot_threshold = -1 # 1min-60d; -1 disables; 0 is immediate
- # (change requires restart)
#------------------------------------------------------------------------------
diff --git a/src/backend/utils/time/snapmgr.c b/src/backend/utils/time/snapmgr.c
index 3a419e348f..b20440ee21 100644
--- a/src/backend/utils/time/snapmgr.c
+++ b/src/backend/utils/time/snapmgr.c
@@ -65,7 +65,6 @@
#include "storage/spin.h"
#include "utils/builtins.h"
#include "utils/memutils.h"
-#include "utils/old_snapshot.h"
#include "utils/rel.h"
#include "utils/resowner_private.h"
#include "utils/snapmgr.h"
@@ -73,14 +72,6 @@
#include "utils/timestamp.h"
-/*
- * GUC parameters
- */
-int old_snapshot_threshold; /* number of minutes, -1 disables */
-
-volatile OldSnapshotControlData *oldSnapshotControl;
-
-
/*
* CurrentSnapshot points to the only snapshot taken in transaction-snapshot
* mode, and to the latest one taken in a read-committed transaction.
@@ -170,7 +161,6 @@ typedef struct ExportedSnapshot
static List *exportedSnapshots = NIL;
/* Prototypes for local functions */
-static TimestampTz AlignTimestampToMinuteBoundary(TimestampTz ts);
static Snapshot CopySnapshot(Snapshot snapshot);
static void FreeSnapshot(Snapshot snapshot);
static void SnapshotResetXmin(void);
@@ -194,50 +184,6 @@ typedef struct SerializedSnapshotData
XLogRecPtr lsn;
} SerializedSnapshotData;
-Size
-SnapMgrShmemSize(void)
-{
- Size size;
-
- size = offsetof(OldSnapshotControlData, xid_by_minute);
- if (old_snapshot_threshold > 0)
- size = add_size(size, mul_size(sizeof(TransactionId),
- OLD_SNAPSHOT_TIME_MAP_ENTRIES));
-
- return size;
-}
-
-/*
- * Initialize for managing old snapshot detection.
- */
-void
-SnapMgrInit(void)
-{
- bool found;
-
- /*
- * Create or attach to the OldSnapshotControlData structure.
- */
- oldSnapshotControl = (volatile OldSnapshotControlData *)
- ShmemInitStruct("OldSnapshotControlData",
- SnapMgrShmemSize(), &found);
-
- if (!found)
- {
- SpinLockInit(&oldSnapshotControl->mutex_current);
- oldSnapshotControl->current_timestamp = 0;
- SpinLockInit(&oldSnapshotControl->mutex_latest_xmin);
- oldSnapshotControl->latest_xmin = InvalidTransactionId;
- oldSnapshotControl->next_map_update = 0;
- SpinLockInit(&oldSnapshotControl->mutex_threshold);
- oldSnapshotControl->threshold_timestamp = 0;
- oldSnapshotControl->threshold_xid = InvalidTransactionId;
- oldSnapshotControl->head_offset = 0;
- oldSnapshotControl->head_timestamp = 0;
- oldSnapshotControl->count_used = 0;
- }
-}
-
/*
* GetTransactionSnapshot
* Get the appropriate snapshot for a new query in a transaction.
@@ -1656,420 +1602,6 @@ HaveRegisteredOrActiveSnapshot(void)
}
-/*
- * Return a timestamp that is exactly on a minute boundary.
- *
- * If the argument is already aligned, return that value, otherwise move to
- * the next minute boundary following the given time.
- */
-static TimestampTz
-AlignTimestampToMinuteBoundary(TimestampTz ts)
-{
- TimestampTz retval = ts + (USECS_PER_MINUTE - 1);
-
- return retval - (retval % USECS_PER_MINUTE);
-}
-
-/*
- * Get current timestamp for snapshots
- *
- * This is basically GetCurrentTimestamp(), but with a guarantee that
- * the result never moves backward.
- */
-TimestampTz
-GetSnapshotCurrentTimestamp(void)
-{
- TimestampTz now = GetCurrentTimestamp();
-
- /*
- * Don't let time move backward; if it hasn't advanced, use the old value.
- */
- SpinLockAcquire(&oldSnapshotControl->mutex_current);
- if (now <= oldSnapshotControl->current_timestamp)
- now = oldSnapshotControl->current_timestamp;
- else
- oldSnapshotControl->current_timestamp = now;
- SpinLockRelease(&oldSnapshotControl->mutex_current);
-
- return now;
-}
-
-/*
- * Get timestamp through which vacuum may have processed based on last stored
- * value for threshold_timestamp.
- *
- * XXX: So far, we never trust that a 64-bit value can be read atomically; if
- * that ever changes, we could get rid of the spinlock here.
- */
-TimestampTz
-GetOldSnapshotThresholdTimestamp(void)
-{
- TimestampTz threshold_timestamp;
-
- SpinLockAcquire(&oldSnapshotControl->mutex_threshold);
- threshold_timestamp = oldSnapshotControl->threshold_timestamp;
- SpinLockRelease(&oldSnapshotControl->mutex_threshold);
-
- return threshold_timestamp;
-}
-
-void
-SetOldSnapshotThresholdTimestamp(TimestampTz ts, TransactionId xlimit)
-{
- SpinLockAcquire(&oldSnapshotControl->mutex_threshold);
- Assert(oldSnapshotControl->threshold_timestamp <= ts);
- Assert(TransactionIdPrecedesOrEquals(oldSnapshotControl->threshold_xid, xlimit));
- oldSnapshotControl->threshold_timestamp = ts;
- oldSnapshotControl->threshold_xid = xlimit;
- SpinLockRelease(&oldSnapshotControl->mutex_threshold);
-}
-
-/*
- * XXX: Magic to keep old_snapshot_threshold tests appear "working". They
- * currently are broken, and discussion of what to do about them is
- * ongoing. See
- * https://www.postgresql.org/message-id/20200403001235.e6jfdll3gh2ygbuc%40alap3.anarazel.de
- */
-void
-SnapshotTooOldMagicForTest(void)
-{
- TimestampTz ts = GetSnapshotCurrentTimestamp();
-
- Assert(old_snapshot_threshold == 0);
-
- ts -= 5 * USECS_PER_SEC;
-
- SpinLockAcquire(&oldSnapshotControl->mutex_threshold);
- oldSnapshotControl->threshold_timestamp = ts;
- SpinLockRelease(&oldSnapshotControl->mutex_threshold);
-}
-
-/*
- * If there is a valid mapping for the timestamp, set *xlimitp to
- * that. Returns whether there is such a mapping.
- */
-static bool
-GetOldSnapshotFromTimeMapping(TimestampTz ts, TransactionId *xlimitp)
-{
- bool in_mapping = false;
-
- Assert(ts == AlignTimestampToMinuteBoundary(ts));
-
- LWLockAcquire(OldSnapshotTimeMapLock, LW_SHARED);
-
- if (oldSnapshotControl->count_used > 0
- && ts >= oldSnapshotControl->head_timestamp)
- {
- int offset;
-
- offset = ((ts - oldSnapshotControl->head_timestamp)
- / USECS_PER_MINUTE);
- if (offset > oldSnapshotControl->count_used - 1)
- offset = oldSnapshotControl->count_used - 1;
- offset = (oldSnapshotControl->head_offset + offset)
- % OLD_SNAPSHOT_TIME_MAP_ENTRIES;
-
- *xlimitp = oldSnapshotControl->xid_by_minute[offset];
-
- in_mapping = true;
- }
-
- LWLockRelease(OldSnapshotTimeMapLock);
-
- return in_mapping;
-}
-
-/*
- * TransactionIdLimitedForOldSnapshots
- *
- * Apply old snapshot limit. This is intended to be called for page pruning
- * and table vacuuming, to allow old_snapshot_threshold to override the normal
- * global xmin value. Actual testing for snapshot too old will be based on
- * whether a snapshot timestamp is prior to the threshold timestamp set in
- * this function.
- *
- * If the limited horizon allows a cleanup action that otherwise would not be
- * possible, SetOldSnapshotThresholdTimestamp(*limit_ts, *limit_xid) needs to
- * be called before that cleanup action.
- */
-bool
-TransactionIdLimitedForOldSnapshots(TransactionId recentXmin,
- Relation relation,
- TransactionId *limit_xid,
- TimestampTz *limit_ts)
-{
- TimestampTz ts;
- TransactionId xlimit = recentXmin;
- TransactionId latest_xmin;
- TimestampTz next_map_update_ts;
- TransactionId threshold_timestamp;
- TransactionId threshold_xid;
-
- Assert(TransactionIdIsNormal(recentXmin));
- Assert(OldSnapshotThresholdActive());
- Assert(limit_ts != NULL && limit_xid != NULL);
-
- /*
- * TestForOldSnapshot() assumes early pruning advances the page LSN, so we
- * can't prune early when skipping WAL.
- */
- if (!RelationAllowsEarlyPruning(relation) || !RelationNeedsWAL(relation))
- return false;
-
- ts = GetSnapshotCurrentTimestamp();
-
- SpinLockAcquire(&oldSnapshotControl->mutex_latest_xmin);
- latest_xmin = oldSnapshotControl->latest_xmin;
- next_map_update_ts = oldSnapshotControl->next_map_update;
- SpinLockRelease(&oldSnapshotControl->mutex_latest_xmin);
-
- /*
- * Zero threshold always overrides to latest xmin, if valid. Without some
- * heuristic it will find its own snapshot too old on, for example, a
- * simple UPDATE -- which would make it useless for most testing, but
- * there is no principled way to ensure that it doesn't fail in this way.
- * Use a five-second delay to try to get useful testing behavior, but this
- * may need adjustment.
- */
- if (old_snapshot_threshold == 0)
- {
- if (TransactionIdPrecedes(latest_xmin, MyProc->xmin)
- && TransactionIdFollows(latest_xmin, xlimit))
- xlimit = latest_xmin;
-
- ts -= 5 * USECS_PER_SEC;
- }
- else
- {
- ts = AlignTimestampToMinuteBoundary(ts)
- - (old_snapshot_threshold * USECS_PER_MINUTE);
-
- /* Check for fast exit without LW locking. */
- SpinLockAcquire(&oldSnapshotControl->mutex_threshold);
- threshold_timestamp = oldSnapshotControl->threshold_timestamp;
- threshold_xid = oldSnapshotControl->threshold_xid;
- SpinLockRelease(&oldSnapshotControl->mutex_threshold);
-
- if (ts == threshold_timestamp)
- {
- /*
- * Current timestamp is in same bucket as the last limit that was
- * applied. Reuse.
- */
- xlimit = threshold_xid;
- }
- else if (ts == next_map_update_ts)
- {
- /*
- * FIXME: This branch is super iffy - but that should probably
- * fixed separately.
- */
- xlimit = latest_xmin;
- }
- else if (GetOldSnapshotFromTimeMapping(ts, &xlimit))
- {
- }
-
- /*
- * Failsafe protection against vacuuming work of active transaction.
- *
- * This is not an assertion because we avoid the spinlock for
- * performance, leaving open the possibility that xlimit could advance
- * and be more current; but it seems prudent to apply this limit. It
- * might make pruning a tiny bit less aggressive than it could be, but
- * protects against data loss bugs.
- */
- if (TransactionIdIsNormal(latest_xmin)
- && TransactionIdPrecedes(latest_xmin, xlimit))
- xlimit = latest_xmin;
- }
-
- if (TransactionIdIsValid(xlimit) &&
- TransactionIdFollowsOrEquals(xlimit, recentXmin))
- {
- *limit_ts = ts;
- *limit_xid = xlimit;
-
- return true;
- }
-
- return false;
-}
-
-/*
- * Take care of the circular buffer that maps time to xid.
- */
-void
-MaintainOldSnapshotTimeMapping(TimestampTz whenTaken, TransactionId xmin)
-{
- TimestampTz ts;
- TransactionId latest_xmin;
- TimestampTz update_ts;
- bool map_update_required = false;
-
- /* Never call this function when old snapshot checking is disabled. */
- Assert(old_snapshot_threshold >= 0);
-
- ts = AlignTimestampToMinuteBoundary(whenTaken);
-
- /*
- * Keep track of the latest xmin seen by any process. Update mapping with
- * a new value when we have crossed a bucket boundary.
- */
- SpinLockAcquire(&oldSnapshotControl->mutex_latest_xmin);
- latest_xmin = oldSnapshotControl->latest_xmin;
- update_ts = oldSnapshotControl->next_map_update;
- if (ts > update_ts)
- {
- oldSnapshotControl->next_map_update = ts;
- map_update_required = true;
- }
- if (TransactionIdFollows(xmin, latest_xmin))
- oldSnapshotControl->latest_xmin = xmin;
- SpinLockRelease(&oldSnapshotControl->mutex_latest_xmin);
-
- /* We only needed to update the most recent xmin value. */
- if (!map_update_required)
- return;
-
- /* No further tracking needed for 0 (used for testing). */
- if (old_snapshot_threshold == 0)
- return;
-
- /*
- * We don't want to do something stupid with unusual values, but we don't
- * want to litter the log with warnings or break otherwise normal
- * processing for this feature; so if something seems unreasonable, just
- * log at DEBUG level and return without doing anything.
- */
- if (whenTaken < 0)
- {
- elog(DEBUG1,
- "MaintainOldSnapshotTimeMapping called with negative whenTaken = %ld",
- (long) whenTaken);
- return;
- }
- if (!TransactionIdIsNormal(xmin))
- {
- elog(DEBUG1,
- "MaintainOldSnapshotTimeMapping called with xmin = %lu",
- (unsigned long) xmin);
- return;
- }
-
- LWLockAcquire(OldSnapshotTimeMapLock, LW_EXCLUSIVE);
-
- Assert(oldSnapshotControl->head_offset >= 0);
- Assert(oldSnapshotControl->head_offset < OLD_SNAPSHOT_TIME_MAP_ENTRIES);
- Assert((oldSnapshotControl->head_timestamp % USECS_PER_MINUTE) == 0);
- Assert(oldSnapshotControl->count_used >= 0);
- Assert(oldSnapshotControl->count_used <= OLD_SNAPSHOT_TIME_MAP_ENTRIES);
-
- if (oldSnapshotControl->count_used == 0)
- {
- /* set up first entry for empty mapping */
- oldSnapshotControl->head_offset = 0;
- oldSnapshotControl->head_timestamp = ts;
- oldSnapshotControl->count_used = 1;
- oldSnapshotControl->xid_by_minute[0] = xmin;
- }
- else if (ts < oldSnapshotControl->head_timestamp)
- {
- /* old ts; log it at DEBUG */
- LWLockRelease(OldSnapshotTimeMapLock);
- elog(DEBUG1,
- "MaintainOldSnapshotTimeMapping called with old whenTaken = %ld",
- (long) whenTaken);
- return;
- }
- else if (ts <= (oldSnapshotControl->head_timestamp +
- ((oldSnapshotControl->count_used - 1)
- * USECS_PER_MINUTE)))
- {
- /* existing mapping; advance xid if possible */
- int bucket = (oldSnapshotControl->head_offset
- + ((ts - oldSnapshotControl->head_timestamp)
- / USECS_PER_MINUTE))
- % OLD_SNAPSHOT_TIME_MAP_ENTRIES;
-
- if (TransactionIdPrecedes(oldSnapshotControl->xid_by_minute[bucket], xmin))
- oldSnapshotControl->xid_by_minute[bucket] = xmin;
- }
- else
- {
- /* We need a new bucket, but it might not be the very next one. */
- int distance_to_new_tail;
- int distance_to_current_tail;
- int advance;
-
- /*
- * Our goal is for the new "tail" of the mapping, that is, the entry
- * which is newest and thus furthest from the "head" entry, to
- * correspond to "ts". Since there's one entry per minute, the
- * distance between the current head and the new tail is just the
- * number of minutes of difference between ts and the current
- * head_timestamp.
- *
- * The distance from the current head to the current tail is one less
- * than the number of entries in the mapping, because the entry at the
- * head_offset is for 0 minutes after head_timestamp.
- *
- * The difference between these two values is the number of minutes by
- * which we need to advance the mapping, either adding new entries or
- * rotating old ones out.
- */
- distance_to_new_tail =
- (ts - oldSnapshotControl->head_timestamp) / USECS_PER_MINUTE;
- distance_to_current_tail =
- oldSnapshotControl->count_used - 1;
- advance = distance_to_new_tail - distance_to_current_tail;
- Assert(advance > 0);
-
- if (advance >= OLD_SNAPSHOT_TIME_MAP_ENTRIES)
- {
- /* Advance is so far that all old data is junk; start over. */
- oldSnapshotControl->head_offset = 0;
- oldSnapshotControl->count_used = 1;
- oldSnapshotControl->xid_by_minute[0] = xmin;
- oldSnapshotControl->head_timestamp = ts;
- }
- else
- {
- /* Store the new value in one or more buckets. */
- int i;
-
- for (i = 0; i < advance; i++)
- {
- if (oldSnapshotControl->count_used == OLD_SNAPSHOT_TIME_MAP_ENTRIES)
- {
- /* Map full and new value replaces old head. */
- int old_head = oldSnapshotControl->head_offset;
-
- if (old_head == (OLD_SNAPSHOT_TIME_MAP_ENTRIES - 1))
- oldSnapshotControl->head_offset = 0;
- else
- oldSnapshotControl->head_offset = old_head + 1;
- oldSnapshotControl->xid_by_minute[old_head] = xmin;
- oldSnapshotControl->head_timestamp += USECS_PER_MINUTE;
- }
- else
- {
- /* Extend map to unused entry. */
- int new_tail = (oldSnapshotControl->head_offset
- + oldSnapshotControl->count_used)
- % OLD_SNAPSHOT_TIME_MAP_ENTRIES;
-
- oldSnapshotControl->count_used++;
- oldSnapshotControl->xid_by_minute[new_tail] = xmin;
- }
- }
- }
- }
-
- LWLockRelease(OldSnapshotTimeMapLock);
-}
-
-
/*
* Setup a snapshot that replaces normal catalog snapshots that allows catalog
* access to behave just like it did at a certain point in the past.
diff --git a/src/include/access/heapam.h b/src/include/access/heapam.h
index faf5026519..6598c4d7d8 100644
--- a/src/include/access/heapam.h
+++ b/src/include/access/heapam.h
@@ -286,8 +286,6 @@ struct GlobalVisState;
extern void heap_page_prune_opt(Relation relation, Buffer buffer);
extern int heap_page_prune(Relation relation, Buffer buffer,
struct GlobalVisState *vistest,
- TransactionId old_snap_xmin,
- TimestampTz old_snap_ts,
int *nnewlpdead,
OffsetNumber *off_loc);
extern void heap_page_prune_execute(Buffer buffer,
diff --git a/src/include/storage/bufmgr.h b/src/include/storage/bufmgr.h
index 0f5fb6be00..09b7f98e2c 100644
--- a/src/include/storage/bufmgr.h
+++ b/src/include/storage/bufmgr.h
@@ -250,8 +250,6 @@ extern void AbortBufferIO(Buffer buffer);
extern bool BgBufferSync(struct WritebackContext *wb_context);
-extern void TestForOldSnapshot_impl(Snapshot snapshot, Relation relation);
-
/* in buf_init.c */
extern void InitBufferPool(void);
extern Size BufferShmemSize(void);
@@ -347,9 +345,6 @@ BufferGetPageSize(Buffer buffer)
/*
* BufferGetPage
* Returns the page associated with a buffer.
- *
- * When this is called as part of a scan, there may be a need for a nearby
- * call to TestForOldSnapshot(). See the definition of that for details.
*/
static inline Page
BufferGetPage(Buffer buffer)
@@ -357,37 +352,6 @@ BufferGetPage(Buffer buffer)
return (Page) BufferGetBlock(buffer);
}
-/*
- * Check whether the given snapshot is too old to have safely read the given
- * page from the given table. If so, throw a "snapshot too old" error.
- *
- * This test generally needs to be performed after every BufferGetPage() call
- * that is executed as part of a scan. It is not needed for calls made for
- * modifying the page (for example, to position to the right place to insert a
- * new index tuple or for vacuuming). It may also be omitted where calls to
- * lower-level functions will have already performed the test.
- *
- * Note that a NULL snapshot argument is allowed and causes a fast return
- * without error; this is to support call sites which can be called from
- * either scans or index modification areas.
- *
- * For best performance, keep the tests that are fastest and/or most likely to
- * exclude a page from old snapshot testing near the front.
- */
-static inline void
-TestForOldSnapshot(Snapshot snapshot, Relation relation, Page page)
-{
- Assert(relation != NULL);
-
- if (old_snapshot_threshold >= 0
- && (snapshot) != NULL
- && ((snapshot)->snapshot_type == SNAPSHOT_MVCC
- || (snapshot)->snapshot_type == SNAPSHOT_TOAST)
- && !XLogRecPtrIsInvalid((snapshot)->lsn)
- && PageGetLSN(page) > (snapshot)->lsn)
- TestForOldSnapshot_impl(snapshot, relation);
-}
-
#endif /* FRONTEND */
#endif /* BUFMGR_H */
diff --git a/src/include/utils/old_snapshot.h b/src/include/utils/old_snapshot.h
deleted file mode 100644
index f1978a28e1..0000000000
--- a/src/include/utils/old_snapshot.h
+++ /dev/null
@@ -1,75 +0,0 @@
-/*-------------------------------------------------------------------------
- *
- * old_snapshot.h
- * Data structures for 'snapshot too old'
- *
- * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
- * Portions Copyright (c) 1994, Regents of the University of California
- *
- * IDENTIFICATION
- * src/include/utils/old_snapshot.h
- *
- *-------------------------------------------------------------------------
- */
-
-#ifndef OLD_SNAPSHOT_H
-#define OLD_SNAPSHOT_H
-
-#include "datatype/timestamp.h"
-#include "storage/s_lock.h"
-
-/*
- * Structure for dealing with old_snapshot_threshold implementation.
- */
-typedef struct OldSnapshotControlData
-{
- /*
- * Variables for old snapshot handling are shared among processes and are
- * only allowed to move forward.
- */
- slock_t mutex_current; /* protect current_timestamp */
- TimestampTz current_timestamp; /* latest snapshot timestamp */
- slock_t mutex_latest_xmin; /* protect latest_xmin and next_map_update */
- TransactionId latest_xmin; /* latest snapshot xmin */
- TimestampTz next_map_update; /* latest snapshot valid up to */
- slock_t mutex_threshold; /* protect threshold fields */
- TimestampTz threshold_timestamp; /* earlier snapshot is old */
- TransactionId threshold_xid; /* earlier xid may be gone */
-
- /*
- * Keep one xid per minute for old snapshot error handling.
- *
- * Use a circular buffer with a head offset, a count of entries currently
- * used, and a timestamp corresponding to the xid at the head offset. A
- * count_used value of zero means that there are no times stored; a
- * count_used value of OLD_SNAPSHOT_TIME_MAP_ENTRIES means that the buffer
- * is full and the head must be advanced to add new entries. Use
- * timestamps aligned to minute boundaries, since that seems less
- * surprising than aligning based on the first usage timestamp. The
- * latest bucket is effectively stored within latest_xmin. The circular
- * buffer is updated when we get a new xmin value that doesn't fall into
- * the same interval.
- *
- * It is OK if the xid for a given time slot is from earlier than
- * calculated by adding the number of minutes corresponding to the
- * (possibly wrapped) distance from the head offset to the time of the
- * head entry, since that just results in the vacuuming of old tuples
- * being slightly less aggressive. It would not be OK for it to be off in
- * the other direction, since it might result in vacuuming tuples that are
- * still expected to be there.
- *
- * Use of an SLRU was considered but not chosen because it is more
- * heavyweight than is needed for this, and would probably not be any less
- * code to implement.
- *
- * Persistence is not needed.
- */
- int head_offset; /* subscript of oldest tracked time */
- TimestampTz head_timestamp; /* time corresponding to head xid */
- int count_used; /* how many slots are in use */
- TransactionId xid_by_minute[FLEXIBLE_ARRAY_MEMBER];
-} OldSnapshotControlData;
-
-extern PGDLLIMPORT volatile OldSnapshotControlData *oldSnapshotControl;
-
-#endif
diff --git a/src/include/utils/snapmgr.h b/src/include/utils/snapmgr.h
index 980d37a194..4dc94e7ec9 100644
--- a/src/include/utils/snapmgr.h
+++ b/src/include/utils/snapmgr.h
@@ -19,40 +19,6 @@
#include "utils/snapshot.h"
-/*
- * The structure used to map times to TransactionId values for the "snapshot
- * too old" feature must have a few entries at the tail to hold old values;
- * otherwise the lookup will often fail and the expected early pruning or
- * vacuum will not usually occur. It is best if this padding is for a number
- * of minutes greater than a thread would normally be stalled, but it's OK if
- * early vacuum opportunities are occasionally missed, so there's no need to
- * use an extreme value or get too fancy. 10 minutes seems plenty.
- */
-#define OLD_SNAPSHOT_PADDING_ENTRIES 10
-#define OLD_SNAPSHOT_TIME_MAP_ENTRIES (old_snapshot_threshold + OLD_SNAPSHOT_PADDING_ENTRIES)
-
-/*
- * Common definition of relation properties that allow early pruning/vacuuming
- * when old_snapshot_threshold >= 0.
- */
-#define RelationAllowsEarlyPruning(rel) \
-( \
- RelationIsPermanent(rel) && !IsCatalogRelation(rel) \
- && !RelationIsAccessibleInLogicalDecoding(rel) \
-)
-
-#define EarlyPruningEnabled(rel) (old_snapshot_threshold >= 0 && RelationAllowsEarlyPruning(rel))
-
-/* GUC variables */
-extern PGDLLIMPORT int old_snapshot_threshold;
-
-
-extern Size SnapMgrShmemSize(void);
-extern void SnapMgrInit(void);
-extern TimestampTz GetSnapshotCurrentTimestamp(void);
-extern TimestampTz GetOldSnapshotThresholdTimestamp(void);
-extern void SnapshotTooOldMagicForTest(void);
-
extern PGDLLIMPORT bool FirstSnapshotSet;
extern PGDLLIMPORT TransactionId TransactionXmin;
@@ -97,14 +63,6 @@ extern PGDLLIMPORT SnapshotData CatalogSnapshotData;
((snapshot)->snapshot_type == SNAPSHOT_MVCC || \
(snapshot)->snapshot_type == SNAPSHOT_HISTORIC_MVCC)
-#ifndef FRONTEND
-static inline bool
-OldSnapshotThresholdActive(void)
-{
- return old_snapshot_threshold >= 0;
-}
-#endif
-
extern Snapshot GetTransactionSnapshot(void);
extern Snapshot GetLatestSnapshot(void);
extern void SnapshotSetCommandId(CommandId curcid);
@@ -138,13 +96,6 @@ extern void DeleteAllExportedSnapshotFiles(void);
extern void WaitForOlderSnapshots(TransactionId limitXmin, bool progress);
extern bool ThereAreNoPriorRegisteredSnapshots(void);
extern bool HaveRegisteredOrActiveSnapshot(void);
-extern bool TransactionIdLimitedForOldSnapshots(TransactionId recentXmin,
- Relation relation,
- TransactionId *limit_xid,
- TimestampTz *limit_ts);
-extern void SetOldSnapshotThresholdTimestamp(TimestampTz ts, TransactionId xlimit);
-extern void MaintainOldSnapshotTimeMapping(TimestampTz whenTaken,
- TransactionId xmin);
extern char *ExportSnapshot(Snapshot snapshot);
diff --git a/src/test/modules/Makefile b/src/test/modules/Makefile
index 6331c976dc..e81873cb5a 100644
--- a/src/test/modules/Makefile
+++ b/src/test/modules/Makefile
@@ -12,7 +12,6 @@ SUBDIRS = \
dummy_seclabel \
libpq_pipeline \
plsample \
- snapshot_too_old \
spgist_name_ops \
test_bloomfilter \
test_copy_callbacks \
diff --git a/src/test/modules/meson.build b/src/test/modules/meson.build
index 17d369e378..fcd643f6f1 100644
--- a/src/test/modules/meson.build
+++ b/src/test/modules/meson.build
@@ -8,7 +8,6 @@ subdir('dummy_seclabel')
subdir('ldap_password_func')
subdir('libpq_pipeline')
subdir('plsample')
-subdir('snapshot_too_old')
subdir('spgist_name_ops')
subdir('ssl_passphrase_callback')
subdir('test_bloomfilter')
diff --git a/src/test/modules/snapshot_too_old/.gitignore b/src/test/modules/snapshot_too_old/.gitignore
deleted file mode 100644
index ba2160b66c..0000000000
--- a/src/test/modules/snapshot_too_old/.gitignore
+++ /dev/null
@@ -1,3 +0,0 @@
-# Generated subdirectories
-/output_iso/
-/tmp_check_iso/
diff --git a/src/test/modules/snapshot_too_old/Makefile b/src/test/modules/snapshot_too_old/Makefile
deleted file mode 100644
index dfb4537f63..0000000000
--- a/src/test/modules/snapshot_too_old/Makefile
+++ /dev/null
@@ -1,28 +0,0 @@
-# src/test/modules/snapshot_too_old/Makefile
-
-# Note: because we don't tell the Makefile there are any regression tests,
-# we have to clean those result files explicitly
-EXTRA_CLEAN = $(pg_regress_clean_files)
-
-ISOLATION = sto_using_cursor sto_using_select sto_using_hash_index
-ISOLATION_OPTS = --temp-config $(top_srcdir)/src/test/modules/snapshot_too_old/sto.conf
-
-# Disabled because these tests require "old_snapshot_threshold" >= 0, which
-# typical installcheck users do not have (e.g. buildfarm clients).
-NO_INSTALLCHECK = 1
-
-ifdef USE_PGXS
-PG_CONFIG = pg_config
-PGXS := $(shell $(PG_CONFIG) --pgxs)
-include $(PGXS)
-else
-subdir = src/test/modules/snapshot_too_old
-top_builddir = ../../../..
-include $(top_builddir)/src/Makefile.global
-include $(top_srcdir)/contrib/contrib-global.mk
-endif
-
-# But it can nonetheless be very helpful to run tests on preexisting
-# installation, allow to do so, but only if requested explicitly.
-installcheck-force:
- $(pg_isolation_regress_installcheck) $(ISOLATION)
diff --git a/src/test/modules/snapshot_too_old/expected/sto_using_cursor.out b/src/test/modules/snapshot_too_old/expected/sto_using_cursor.out
deleted file mode 100644
index 06fe4d0669..0000000000
--- a/src/test/modules/snapshot_too_old/expected/sto_using_cursor.out
+++ /dev/null
@@ -1,19 +0,0 @@
-Parsed test spec with 2 sessions
-
-starting permutation: s1decl s1f1 s1sleep s2u s1f2
-step s1decl: DECLARE cursor1 CURSOR FOR SELECT c FROM sto1;
-step s1f1: FETCH FIRST FROM cursor1;
-c
--
-1
-(1 row)
-
-step s1sleep: SELECT setting, pg_sleep(6) FROM pg_settings WHERE name = 'old_snapshot_threshold';
-setting|pg_sleep
--------+--------
- 0|
-(1 row)
-
-step s2u: UPDATE sto1 SET c = 1001 WHERE c = 1;
-step s1f2: FETCH FIRST FROM cursor1;
-ERROR: snapshot too old
diff --git a/src/test/modules/snapshot_too_old/expected/sto_using_hash_index.out b/src/test/modules/snapshot_too_old/expected/sto_using_hash_index.out
deleted file mode 100644
index 11f827fbfe..0000000000
--- a/src/test/modules/snapshot_too_old/expected/sto_using_hash_index.out
+++ /dev/null
@@ -1,19 +0,0 @@
-Parsed test spec with 2 sessions
-
-starting permutation: noseq s1f1 s2sleep s2u s1f2
-step noseq: SET enable_seqscan = false;
-step s1f1: SELECT c FROM sto1 where c = 1000;
- c
-----
-1000
-(1 row)
-
-step s2sleep: SELECT setting, pg_sleep(6) FROM pg_settings WHERE name = 'old_snapshot_threshold';
-setting|pg_sleep
--------+--------
- 0|
-(1 row)
-
-step s2u: UPDATE sto1 SET c = 1001 WHERE c = 1000;
-step s1f2: SELECT c FROM sto1 where c = 1001;
-ERROR: snapshot too old
diff --git a/src/test/modules/snapshot_too_old/expected/sto_using_select.out b/src/test/modules/snapshot_too_old/expected/sto_using_select.out
deleted file mode 100644
index e910e5c71e..0000000000
--- a/src/test/modules/snapshot_too_old/expected/sto_using_select.out
+++ /dev/null
@@ -1,18 +0,0 @@
-Parsed test spec with 2 sessions
-
-starting permutation: s1f1 s1sleep s2u s1f2
-step s1f1: SELECT c FROM sto1 ORDER BY c LIMIT 1;
-c
--
-1
-(1 row)
-
-step s1sleep: SELECT setting, pg_sleep(6) FROM pg_settings WHERE name = 'old_snapshot_threshold';
-setting|pg_sleep
--------+--------
- 0|
-(1 row)
-
-step s2u: UPDATE sto1 SET c = 1001 WHERE c = 1;
-step s1f2: SELECT c FROM sto1 ORDER BY c LIMIT 1;
-ERROR: snapshot too old
diff --git a/src/test/modules/snapshot_too_old/meson.build b/src/test/modules/snapshot_too_old/meson.build
deleted file mode 100644
index 6a2f4a0b72..0000000000
--- a/src/test/modules/snapshot_too_old/meson.build
+++ /dev/null
@@ -1,19 +0,0 @@
-# Copyright (c) 2022-2023, PostgreSQL Global Development Group
-
-tests += {
- 'name': 'snapshot_too_old',
- 'sd': meson.current_source_dir(),
- 'bd': meson.current_build_dir(),
- 'isolation': {
- 'test_kwargs': {'priority': 40}, # sto tests are slow, start early
- 'specs': [
- 'sto_using_cursor',
- 'sto_using_select',
- 'sto_using_hash_index',
- ],
- 'regress_args': ['--temp-config', files('sto.conf')],
- # Disabled because these tests require "old_snapshot_threshold" >= 0, which
- # typical runningcheck users do not have (e.g. buildfarm clients).
- 'runningcheck': false,
- },
-}
diff --git a/src/test/modules/snapshot_too_old/specs/sto_using_cursor.spec b/src/test/modules/snapshot_too_old/specs/sto_using_cursor.spec
deleted file mode 100644
index f3677a8fa9..0000000000
--- a/src/test/modules/snapshot_too_old/specs/sto_using_cursor.spec
+++ /dev/null
@@ -1,38 +0,0 @@
-# This test provokes a "snapshot too old" error using a cursor.
-#
-# The sleep is needed because with a threshold of zero a statement could error
-# on changes it made. With more normal settings no external delay is needed,
-# but we don't want these tests to run long enough to see that, since
-# granularity is in minutes.
-#
-# Since results depend on the value of old_snapshot_threshold, sneak that into
-# the line generated by the sleep, so that a surprising value isn't so hard
-# to identify.
-
-setup
-{
- CREATE TABLE sto1 (c int NOT NULL);
- INSERT INTO sto1 SELECT generate_series(1, 1000);
-}
-setup
-{
- VACUUM ANALYZE sto1;
-}
-
-teardown
-{
- DROP TABLE sto1;
-}
-
-session "s1"
-setup { BEGIN ISOLATION LEVEL REPEATABLE READ; }
-step "s1decl" { DECLARE cursor1 CURSOR FOR SELECT c FROM sto1; }
-step "s1f1" { FETCH FIRST FROM cursor1; }
-step "s1sleep" { SELECT setting, pg_sleep(6) FROM pg_settings WHERE name = 'old_snapshot_threshold'; }
-step "s1f2" { FETCH FIRST FROM cursor1; }
-teardown { COMMIT; }
-
-session "s2"
-step "s2u" { UPDATE sto1 SET c = 1001 WHERE c = 1; }
-
-permutation "s1decl" "s1f1" "s1sleep" "s2u" "s1f2"
diff --git a/src/test/modules/snapshot_too_old/specs/sto_using_hash_index.spec b/src/test/modules/snapshot_too_old/specs/sto_using_hash_index.spec
deleted file mode 100644
index 33d91ff852..0000000000
--- a/src/test/modules/snapshot_too_old/specs/sto_using_hash_index.spec
+++ /dev/null
@@ -1,31 +0,0 @@
-# This test is like sto_using_select, except that we test access via a
-# hash index.
-
-setup
-{
- CREATE TABLE sto1 (c int NOT NULL);
- INSERT INTO sto1 SELECT generate_series(1, 1000);
- CREATE INDEX idx_sto1 ON sto1 USING HASH (c);
-}
-setup
-{
- VACUUM ANALYZE sto1;
-}
-
-teardown
-{
- DROP TABLE sto1;
-}
-
-session "s1"
-setup { BEGIN ISOLATION LEVEL REPEATABLE READ; }
-step "noseq" { SET enable_seqscan = false; }
-step "s1f1" { SELECT c FROM sto1 where c = 1000; }
-step "s1f2" { SELECT c FROM sto1 where c = 1001; }
-teardown { ROLLBACK; }
-
-session "s2"
-step "s2sleep" { SELECT setting, pg_sleep(6) FROM pg_settings WHERE name = 'old_snapshot_threshold'; }
-step "s2u" { UPDATE sto1 SET c = 1001 WHERE c = 1000; }
-
-permutation "noseq" "s1f1" "s2sleep" "s2u" "s1f2"
diff --git a/src/test/modules/snapshot_too_old/specs/sto_using_select.spec b/src/test/modules/snapshot_too_old/specs/sto_using_select.spec
deleted file mode 100644
index 80a31763ad..0000000000
--- a/src/test/modules/snapshot_too_old/specs/sto_using_select.spec
+++ /dev/null
@@ -1,37 +0,0 @@
-# This test provokes a "snapshot too old" error using SELECT statements.
-#
-# The sleep is needed because with a threshold of zero a statement could error
-# on changes it made. With more normal settings no external delay is needed,
-# but we don't want these tests to run long enough to see that, since
-# granularity is in minutes.
-#
-# Since results depend on the value of old_snapshot_threshold, sneak that into
-# the line generated by the sleep, so that a surprising value isn't so hard
-# to identify.
-
-setup
-{
- CREATE TABLE sto1 (c int NOT NULL);
- INSERT INTO sto1 SELECT generate_series(1, 1000);
-}
-setup
-{
- VACUUM ANALYZE sto1;
-}
-
-teardown
-{
- DROP TABLE sto1;
-}
-
-session "s1"
-setup { BEGIN ISOLATION LEVEL REPEATABLE READ; }
-step "s1f1" { SELECT c FROM sto1 ORDER BY c LIMIT 1; }
-step "s1sleep" { SELECT setting, pg_sleep(6) FROM pg_settings WHERE name = 'old_snapshot_threshold'; }
-step "s1f2" { SELECT c FROM sto1 ORDER BY c LIMIT 1; }
-teardown { COMMIT; }
-
-session "s2"
-step "s2u" { UPDATE sto1 SET c = 1001 WHERE c = 1; }
-
-permutation "s1f1" "s1sleep" "s2u" "s1f2"
diff --git a/src/test/modules/snapshot_too_old/sto.conf b/src/test/modules/snapshot_too_old/sto.conf
deleted file mode 100644
index 7eeaeeb0dc..0000000000
--- a/src/test/modules/snapshot_too_old/sto.conf
+++ /dev/null
@@ -1,2 +0,0 @@
-autovacuum = off
-old_snapshot_threshold = 0
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list
index 260854747b..be7c2f18d5 100644
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -1656,8 +1656,6 @@ OffsetVarNodes_context
Oid
OidOptions
OkeysState
-OldSnapshotControlData
-OldSnapshotTimeMapping
OldToNewMapping
OldToNewMappingData
OnCommitAction
--
2.39.2
On Wed, Jun 14, 2023 at 3:56 PM Thomas Munro <thomas.munro@gmail.com> wrote:
On Tue, Feb 14, 2023 at 9:45 AM Andres Freund <andres@anarazel.de> wrote:
Indeed. There's a lot of things wrong with it. We have reproducers for
creating wrong query results. Nobody has shown interest in fixing the
problems, for several years by now. It costs users that *do not* use the
feature performance (*).I think we're doing our users a disservice by claiming to have this feature.
I don't think a lot of the existing code would survive if we were to create a
newer version, more maintainable / reliable, version of the feature.I raised this at the recent developer meeting and the assembled
hackers agreed. Does anyone think we *shouldn't* drop the feature? I
volunteered to write a removal patch for v17, so here's a first run
through to find all the traces of this feature. In this first go I
removed everything I could think of, but we might want to keep some
vestiges. I guess we might want to keep the registered error
class/code? Should we invent a place where we keep stuff like #define
TestForOldSnapshot(...) expanding to nothing for some amount of time,
for extensions? I dunno, I bet extensions doing stuff that
sophisticated already have a bunch of version tests anyway. I suppose
keeping the GUC wouldn't really be helpful (if you're using it, you
probably want to know that it isn't available anymore and think about
the implications for your application).
Done.
I hope we get "snapshot too old" back one day.
On Tue, Sep 5, 2023 at 12:58 AM Thomas Munro <thomas.munro@gmail.com> wrote:
I hope we get "snapshot too old" back one day.
Thanks for working on this. Though I wonder why you didn't do
something closer to a straight revert of the feature. Why is nbtree
still passing around snapshots needlessly?
Also, why are there still many comments referencing the feature?
There's the one above should_attempt_truncation(), for example.
Another appears above init_toast_snapshot(). Are these just
oversights, or was it deliberate? You said something about retaining
vestiges.
--
Peter Geoghegan
On Fri, Sep 8, 2023 at 1:53 PM Peter Geoghegan <pg@bowt.ie> wrote:
On Tue, Sep 5, 2023 at 12:58 AM Thomas Munro <thomas.munro@gmail.com> wrote:
I hope we get "snapshot too old" back one day.
Thanks for working on this. Though I wonder why you didn't do
something closer to a straight revert of the feature. Why is nbtree
still passing around snapshots needlessly?Also, why are there still many comments referencing the feature?
There's the one above should_attempt_truncation(), for example.
Another appears above init_toast_snapshot(). Are these just
oversights, or was it deliberate? You said something about retaining
vestiges.
Oh. Not intentional. Looking now...
On Fri, Sep 8, 2023 at 2:00 PM Thomas Munro <thomas.munro@gmail.com> wrote:
On Fri, Sep 8, 2023 at 1:53 PM Peter Geoghegan <pg@bowt.ie> wrote:
Thanks for working on this. Though I wonder why you didn't do
something closer to a straight revert of the feature. Why is nbtree
still passing around snapshots needlessly?
The code moved around quite a few times over several commits and quite
a lot since then, which is why I didn't go for straight revert, but
clearly the manual approach risked missing things. I think the
attached removes all unused 'snapshot' arguments from AM-internal
functions. Checked by compiling with clang's -Wunused-parameters, and
then searching for 'snapshot', and excluding the expected cases.
Also, why are there still many comments referencing the feature?
There's the one above should_attempt_truncation(), for example.
Another appears above init_toast_snapshot(). Are these just
oversights, or was it deliberate? You said something about retaining
vestiges.
Stray comments removed.
Attachments:
0001-Remove-some-more-snapshot-too-old-vestiges.patchtext/x-patch; charset=US-ASCII; name=0001-Remove-some-more-snapshot-too-old-vestiges.patchDownload
From 7d57de24c5a4e9ab3dacca9231a9893b909439f6 Mon Sep 17 00:00:00 2001
From: Thomas Munro <thomas.munro@gmail.com>
Date: Fri, 8 Sep 2023 14:29:13 +1200
Subject: [PATCH] Remove some more "snapshot too old" vestiges.
Commit f691f5b8 removed the logic, but left behind some now-useless
Snapshot arguments to various AM-internal functions, and missed a couple
of comments.
Reported-by: Peter Geoghegan <pg@bowt.ie>
Discussion: https://postgr.es/m/CAH2-Wznj9qSNXZ1P1uWTUD_FeaTezbUazb416EPwi4Qr_jR_6A%40mail.gmail.com
diff --git a/contrib/amcheck/verify_nbtree.c b/contrib/amcheck/verify_nbtree.c
index 94a9759322..dbb83d80f8 100644
--- a/contrib/amcheck/verify_nbtree.c
+++ b/contrib/amcheck/verify_nbtree.c
@@ -2694,7 +2694,7 @@ bt_rootdescend(BtreeCheckState *state, IndexTuple itup)
*/
Assert(state->readonly && state->rootdescend);
exists = false;
- stack = _bt_search(state->rel, NULL, key, &lbuf, BT_READ, NULL);
+ stack = _bt_search(state->rel, NULL, key, &lbuf, BT_READ);
if (BufferIsValid(lbuf))
{
diff --git a/src/backend/access/brin/brin.c b/src/backend/access/brin/brin.c
index d4fec654bb..a7538f32c2 100644
--- a/src/backend/access/brin/brin.c
+++ b/src/backend/access/brin/brin.c
@@ -169,7 +169,7 @@ brininsert(Relation idxRel, Datum *values, bool *nulls,
MemoryContext oldcxt = CurrentMemoryContext;
bool autosummarize = BrinGetAutoSummarize(idxRel);
- revmap = brinRevmapInitialize(idxRel, &pagesPerRange, NULL);
+ revmap = brinRevmapInitialize(idxRel, &pagesPerRange);
/*
* origHeapBlk is the block number where the insertion occurred. heapBlk
@@ -202,7 +202,7 @@ brininsert(Relation idxRel, Datum *values, bool *nulls,
lastPageTuple =
brinGetTupleForHeapBlock(revmap, lastPageRange, &buf, &off,
- NULL, BUFFER_LOCK_SHARE, NULL);
+ NULL, BUFFER_LOCK_SHARE);
if (!lastPageTuple)
{
bool recorded;
@@ -222,7 +222,7 @@ brininsert(Relation idxRel, Datum *values, bool *nulls,
}
brtup = brinGetTupleForHeapBlock(revmap, heapBlk, &buf, &off,
- NULL, BUFFER_LOCK_SHARE, NULL);
+ NULL, BUFFER_LOCK_SHARE);
/* if range is unsummarized, there's nothing to do */
if (!brtup)
@@ -332,8 +332,7 @@ brinbeginscan(Relation r, int nkeys, int norderbys)
scan = RelationGetIndexScan(r, nkeys, norderbys);
opaque = palloc_object(BrinOpaque);
- opaque->bo_rmAccess = brinRevmapInitialize(r, &opaque->bo_pagesPerRange,
- scan->xs_snapshot);
+ opaque->bo_rmAccess = brinRevmapInitialize(r, &opaque->bo_pagesPerRange);
opaque->bo_bdesc = brin_build_desc(r);
scan->opaque = opaque;
@@ -537,8 +536,7 @@ bringetbitmap(IndexScanDesc scan, TIDBitmap *tbm)
MemoryContextResetAndDeleteChildren(perRangeCxt);
tup = brinGetTupleForHeapBlock(opaque->bo_rmAccess, heapBlk, &buf,
- &off, &size, BUFFER_LOCK_SHARE,
- scan->xs_snapshot);
+ &off, &size, BUFFER_LOCK_SHARE);
if (tup)
{
gottuple = true;
@@ -880,7 +878,7 @@ brinbuild(Relation heap, Relation index, IndexInfo *indexInfo)
/*
* Initialize our state, including the deformed tuple state.
*/
- revmap = brinRevmapInitialize(index, &pagesPerRange, NULL);
+ revmap = brinRevmapInitialize(index, &pagesPerRange);
state = initialize_brin_buildstate(index, revmap, pagesPerRange);
/*
@@ -1458,8 +1456,7 @@ summarize_range(IndexInfo *indexInfo, BrinBuildState *state, Relation heapRel,
* the same.)
*/
phtup = brinGetTupleForHeapBlock(state->bs_rmAccess, heapBlk, &phbuf,
- &offset, &phsz, BUFFER_LOCK_SHARE,
- NULL);
+ &offset, &phsz, BUFFER_LOCK_SHARE);
/* the placeholder tuple must exist */
if (phtup == NULL)
elog(ERROR, "missing placeholder tuple");
@@ -1496,7 +1493,7 @@ brinsummarize(Relation index, Relation heapRel, BlockNumber pageRange,
Buffer buf;
BlockNumber startBlk;
- revmap = brinRevmapInitialize(index, &pagesPerRange, NULL);
+ revmap = brinRevmapInitialize(index, &pagesPerRange);
/* determine range of pages to process */
heapNumBlocks = RelationGetNumberOfBlocks(heapRel);
@@ -1537,7 +1534,7 @@ brinsummarize(Relation index, Relation heapRel, BlockNumber pageRange,
CHECK_FOR_INTERRUPTS();
tup = brinGetTupleForHeapBlock(revmap, startBlk, &buf, &off, NULL,
- BUFFER_LOCK_SHARE, NULL);
+ BUFFER_LOCK_SHARE);
if (tup == NULL)
{
/* no revmap entry for this heap range. Summarize it. */
diff --git a/src/backend/access/brin/brin_revmap.c b/src/backend/access/brin/brin_revmap.c
index 956dd92765..fcea10ccdb 100644
--- a/src/backend/access/brin/brin_revmap.c
+++ b/src/backend/access/brin/brin_revmap.c
@@ -68,8 +68,7 @@ static void revmap_physical_extend(BrinRevmap *revmap);
* brinRevmapTerminate when caller is done with it.
*/
BrinRevmap *
-brinRevmapInitialize(Relation idxrel, BlockNumber *pagesPerRange,
- Snapshot snapshot)
+brinRevmapInitialize(Relation idxrel, BlockNumber *pagesPerRange)
{
BrinRevmap *revmap;
Buffer meta;
@@ -194,8 +193,7 @@ brinSetHeapBlockItemptr(Buffer buf, BlockNumber pagesPerRange,
*/
BrinTuple *
brinGetTupleForHeapBlock(BrinRevmap *revmap, BlockNumber heapBlk,
- Buffer *buf, OffsetNumber *off, Size *size, int mode,
- Snapshot snapshot)
+ Buffer *buf, OffsetNumber *off, Size *size, int mode)
{
Relation idxRel = revmap->rm_irel;
BlockNumber mapBlk;
@@ -339,7 +337,7 @@ brinRevmapDesummarizeRange(Relation idxrel, BlockNumber heapBlk)
OffsetNumber regOffset;
ItemId lp;
- revmap = brinRevmapInitialize(idxrel, &pagesPerRange, NULL);
+ revmap = brinRevmapInitialize(idxrel, &pagesPerRange);
revmapBlk = revmap_get_blkno(revmap, heapBlk);
if (!BlockNumberIsValid(revmapBlk))
diff --git a/src/backend/access/common/toast_internals.c b/src/backend/access/common/toast_internals.c
index 588825ed85..371515395a 100644
--- a/src/backend/access/common/toast_internals.c
+++ b/src/backend/access/common/toast_internals.c
@@ -636,8 +636,7 @@ toast_close_indexes(Relation *toastidxs, int num_indexes, LOCKMODE lock)
*
* Initialize an appropriate TOAST snapshot. We must use an MVCC snapshot
* to initialize the TOAST snapshot; since we don't know which one to use,
- * just use the oldest one. This is safe: at worst, we will get a "snapshot
- * too old" error that might have been avoided otherwise.
+ * just use the oldest one.
*/
void
init_toast_snapshot(Snapshot toast_snapshot)
diff --git a/src/backend/access/gin/ginbtree.c b/src/backend/access/gin/ginbtree.c
index 7d097c75e0..06e97a7fbb 100644
--- a/src/backend/access/gin/ginbtree.c
+++ b/src/backend/access/gin/ginbtree.c
@@ -78,7 +78,7 @@ ginTraverseLock(Buffer buffer, bool searchMode)
*/
GinBtreeStack *
ginFindLeafPage(GinBtree btree, bool searchMode,
- bool rootConflictCheck, Snapshot snapshot)
+ bool rootConflictCheck)
{
GinBtreeStack *stack;
diff --git a/src/backend/access/gin/gindatapage.c b/src/backend/access/gin/gindatapage.c
index 9caeac164a..344e1c5e6b 100644
--- a/src/backend/access/gin/gindatapage.c
+++ b/src/backend/access/gin/gindatapage.c
@@ -1917,7 +1917,7 @@ ginInsertItemPointers(Relation index, BlockNumber rootBlkno,
{
/* search for the leaf page where the first item should go to */
btree.itemptr = insertdata.items[insertdata.curitem];
- stack = ginFindLeafPage(&btree, false, true, NULL);
+ stack = ginFindLeafPage(&btree, false, true);
ginInsertValue(&btree, stack, &insertdata, buildStats);
}
@@ -1927,8 +1927,7 @@ ginInsertItemPointers(Relation index, BlockNumber rootBlkno,
* Starts a new scan on a posting tree.
*/
GinBtreeStack *
-ginScanBeginPostingTree(GinBtree btree, Relation index, BlockNumber rootBlkno,
- Snapshot snapshot)
+ginScanBeginPostingTree(GinBtree btree, Relation index, BlockNumber rootBlkno)
{
GinBtreeStack *stack;
@@ -1936,7 +1935,7 @@ ginScanBeginPostingTree(GinBtree btree, Relation index, BlockNumber rootBlkno,
btree->fullScan = true;
- stack = ginFindLeafPage(btree, true, false, snapshot);
+ stack = ginFindLeafPage(btree, true, false);
return stack;
}
diff --git a/src/backend/access/gin/ginget.c b/src/backend/access/gin/ginget.c
index 10d2bb8900..88d8d0c04b 100644
--- a/src/backend/access/gin/ginget.c
+++ b/src/backend/access/gin/ginget.c
@@ -67,7 +67,7 @@ moveRightIfItNeeded(GinBtreeData *btree, GinBtreeStack *stack, Snapshot snapshot
*/
static void
scanPostingTree(Relation index, GinScanEntry scanEntry,
- BlockNumber rootPostingTree, Snapshot snapshot)
+ BlockNumber rootPostingTree)
{
GinBtreeData btree;
GinBtreeStack *stack;
@@ -75,7 +75,7 @@ scanPostingTree(Relation index, GinScanEntry scanEntry,
Page page;
/* Descend to the leftmost leaf page */
- stack = ginScanBeginPostingTree(&btree, index, rootPostingTree, snapshot);
+ stack = ginScanBeginPostingTree(&btree, index, rootPostingTree);
buffer = stack->buffer;
IncrBufferRefCount(buffer); /* prevent unpin in freeGinBtreeStack */
@@ -244,8 +244,7 @@ collectMatchBitmap(GinBtreeData *btree, GinBtreeStack *stack,
PredicateLockPage(btree->index, rootPostingTree, snapshot);
/* Collect all the TIDs in this entry's posting tree */
- scanPostingTree(btree->index, scanEntry, rootPostingTree,
- snapshot);
+ scanPostingTree(btree->index, scanEntry, rootPostingTree);
/*
* We lock again the entry page and while it was unlocked insert
@@ -344,7 +343,7 @@ restartScanEntry:
ginPrepareEntryScan(&btreeEntry, entry->attnum,
entry->queryKey, entry->queryCategory,
ginstate);
- stackEntry = ginFindLeafPage(&btreeEntry, true, false, snapshot);
+ stackEntry = ginFindLeafPage(&btreeEntry, true, false);
page = BufferGetPage(stackEntry->buffer);
/* ginFindLeafPage() will have already checked snapshot age. */
@@ -419,7 +418,7 @@ restartScanEntry:
needUnlock = false;
stack = ginScanBeginPostingTree(&entry->btree, ginstate->index,
- rootPostingTree, snapshot);
+ rootPostingTree);
entry->buffer = stack->buffer;
/*
@@ -652,7 +651,7 @@ startScan(IndexScanDesc scan)
*/
static void
entryLoadMoreItems(GinState *ginstate, GinScanEntry entry,
- ItemPointerData advancePast, Snapshot snapshot)
+ ItemPointerData advancePast)
{
Page page;
int i;
@@ -697,7 +696,7 @@ entryLoadMoreItems(GinState *ginstate, GinScanEntry entry,
OffsetNumberNext(GinItemPointerGetOffsetNumber(&advancePast)));
}
entry->btree.fullScan = false;
- stack = ginFindLeafPage(&entry->btree, true, false, snapshot);
+ stack = ginFindLeafPage(&entry->btree, true, false);
/* we don't need the stack, just the buffer. */
entry->buffer = stack->buffer;
@@ -807,7 +806,7 @@ entryLoadMoreItems(GinState *ginstate, GinScanEntry entry,
*/
static void
entryGetItem(GinState *ginstate, GinScanEntry entry,
- ItemPointerData advancePast, Snapshot snapshot)
+ ItemPointerData advancePast)
{
Assert(!entry->isFinished);
@@ -938,7 +937,7 @@ entryGetItem(GinState *ginstate, GinScanEntry entry,
/* If we've processed the current batch, load more items */
while (entry->offset >= entry->nlist)
{
- entryLoadMoreItems(ginstate, entry, advancePast, snapshot);
+ entryLoadMoreItems(ginstate, entry, advancePast);
if (entry->isFinished)
{
@@ -989,7 +988,7 @@ entryGetItem(GinState *ginstate, GinScanEntry entry,
*/
static void
keyGetItem(GinState *ginstate, MemoryContext tempCtx, GinScanKey key,
- ItemPointerData advancePast, Snapshot snapshot)
+ ItemPointerData advancePast)
{
ItemPointerData minItem;
ItemPointerData curPageLossy;
@@ -1036,7 +1035,7 @@ keyGetItem(GinState *ginstate, MemoryContext tempCtx, GinScanKey key,
*/
if (ginCompareItemPointers(&entry->curItem, &advancePast) <= 0)
{
- entryGetItem(ginstate, entry, advancePast, snapshot);
+ entryGetItem(ginstate, entry, advancePast);
if (entry->isFinished)
continue;
}
@@ -1111,7 +1110,7 @@ keyGetItem(GinState *ginstate, MemoryContext tempCtx, GinScanKey key,
if (ginCompareItemPointers(&entry->curItem, &advancePast) <= 0)
{
- entryGetItem(ginstate, entry, advancePast, snapshot);
+ entryGetItem(ginstate, entry, advancePast);
if (entry->isFinished)
continue;
}
@@ -1334,8 +1333,7 @@ scanGetItem(IndexScanDesc scan, ItemPointerData advancePast,
}
/* Fetch the next item for this key that is > advancePast. */
- keyGetItem(&so->ginstate, so->tempCtx, key, advancePast,
- scan->xs_snapshot);
+ keyGetItem(&so->ginstate, so->tempCtx, key, advancePast);
if (key->isFinished)
return false;
diff --git a/src/backend/access/gin/gininsert.c b/src/backend/access/gin/gininsert.c
index 56968b95ac..b4d216d4c6 100644
--- a/src/backend/access/gin/gininsert.c
+++ b/src/backend/access/gin/gininsert.c
@@ -192,7 +192,7 @@ ginEntryInsert(GinState *ginstate,
ginPrepareEntryScan(&btree, attnum, key, category, ginstate);
btree.isBuild = (buildStats != NULL);
- stack = ginFindLeafPage(&btree, false, false, NULL);
+ stack = ginFindLeafPage(&btree, false, false);
page = BufferGetPage(stack->buffer);
if (btree.findItem(&btree, stack))
diff --git a/src/backend/access/heap/vacuumlazy.c b/src/backend/access/heap/vacuumlazy.c
index 1a05adfa61..b3c24d68f0 100644
--- a/src/backend/access/heap/vacuumlazy.c
+++ b/src/backend/access/heap/vacuumlazy.c
@@ -2865,10 +2865,6 @@ lazy_cleanup_one_index(Relation indrel, IndexBulkDeleteResult *istat,
* in effect in any case. lazy_scan_prune makes the optimistic assumption
* that any LP_DEAD items it encounters will always be LP_UNUSED by the time
* we're called.
- *
- * Also don't attempt it if we are doing early pruning/vacuuming, because a
- * scan which cannot find a truncated heap page cannot determine that the
- * snapshot is too old to read that page.
*/
static bool
should_attempt_truncation(LVRelState *vacrel)
diff --git a/src/backend/access/nbtree/nbtinsert.c b/src/backend/access/nbtree/nbtinsert.c
index d33f814a93..9cff4f2931 100644
--- a/src/backend/access/nbtree/nbtinsert.c
+++ b/src/backend/access/nbtree/nbtinsert.c
@@ -378,7 +378,7 @@ _bt_search_insert(Relation rel, Relation heaprel, BTInsertState insertstate)
/* Cannot use optimization -- descend tree, return proper descent stack */
return _bt_search(rel, heaprel, insertstate->itup_key, &insertstate->buf,
- BT_WRITE, NULL);
+ BT_WRITE);
}
/*
@@ -2165,7 +2165,7 @@ _bt_insert_parent(Relation rel,
BlockNumberIsValid(RelationGetTargetBlock(rel))));
/* Find the leftmost page at the next level up */
- pbuf = _bt_get_endpoint(rel, opaque->btpo_level + 1, false, NULL);
+ pbuf = _bt_get_endpoint(rel, opaque->btpo_level + 1, false);
/* Set up a phony stack entry pointing there */
stack = &fakestack;
stack->bts_blkno = BufferGetBlockNumber(pbuf);
diff --git a/src/backend/access/nbtree/nbtpage.c b/src/backend/access/nbtree/nbtpage.c
index 6558aea42b..b7660a459e 100644
--- a/src/backend/access/nbtree/nbtpage.c
+++ b/src/backend/access/nbtree/nbtpage.c
@@ -1962,8 +1962,7 @@ _bt_pagedel(Relation rel, Buffer leafbuf, BTVacState *vstate)
itup_key = _bt_mkscankey(rel, targetkey);
/* find the leftmost leaf page with matching pivot/high key */
itup_key->pivotsearch = true;
- stack = _bt_search(rel, NULL, itup_key, &sleafbuf, BT_READ,
- NULL);
+ stack = _bt_search(rel, NULL, itup_key, &sleafbuf, BT_READ);
/* won't need a second lock or pin on leafbuf */
_bt_relbuf(rel, sleafbuf);
diff --git a/src/backend/access/nbtree/nbtsearch.c b/src/backend/access/nbtree/nbtsearch.c
index 1799089fb4..17ad89749d 100644
--- a/src/backend/access/nbtree/nbtsearch.c
+++ b/src/backend/access/nbtree/nbtsearch.c
@@ -43,7 +43,7 @@ static bool _bt_steppage(IndexScanDesc scan, ScanDirection dir);
static bool _bt_readnextpage(IndexScanDesc scan, BlockNumber blkno, ScanDirection dir);
static bool _bt_parallel_readpage(IndexScanDesc scan, BlockNumber blkno,
ScanDirection dir);
-static Buffer _bt_walk_left(Relation rel, Buffer buf, Snapshot snapshot);
+static Buffer _bt_walk_left(Relation rel, Buffer buf);
static bool _bt_endpoint(IndexScanDesc scan, ScanDirection dir);
static inline void _bt_initialize_more_data(BTScanOpaque so, ScanDirection dir);
@@ -83,10 +83,6 @@ _bt_drop_lock_and_maybe_pin(IndexScanDesc scan, BTScanPos sp)
* which is locked and pinned. No locks are held on the parent pages,
* however!
*
- * If the snapshot parameter is not NULL, "old snapshot" checking will take
- * place during the descent through the tree. This is not needed when
- * positioning for an insert or delete, so NULL is used for those cases.
- *
* The returned buffer is locked according to access parameter. Additionally,
* access = BT_WRITE will allow an empty root page to be created and returned.
* When access = BT_READ, an empty index will result in *bufP being set to
@@ -98,7 +94,7 @@ _bt_drop_lock_and_maybe_pin(IndexScanDesc scan, BTScanPos sp)
*/
BTStack
_bt_search(Relation rel, Relation heaprel, BTScanInsert key, Buffer *bufP,
- int access, Snapshot snapshot)
+ int access)
{
BTStack stack_in = NULL;
int page_access = BT_READ;
@@ -138,7 +134,7 @@ _bt_search(Relation rel, Relation heaprel, BTScanInsert key, Buffer *bufP,
* opportunity to finish splits of internal pages too.
*/
*bufP = _bt_moveright(rel, heaprel, key, *bufP, (access == BT_WRITE),
- stack_in, page_access, snapshot);
+ stack_in, page_access);
/* if this is a leaf page, we're done */
page = BufferGetPage(*bufP);
@@ -198,8 +194,7 @@ _bt_search(Relation rel, Relation heaprel, BTScanInsert key, Buffer *bufP,
* but before we acquired a write lock. If it has, we may need to
* move right to its new sibling. Do that.
*/
- *bufP = _bt_moveright(rel, heaprel, key, *bufP, true, stack_in, BT_WRITE,
- snapshot);
+ *bufP = _bt_moveright(rel, heaprel, key, *bufP, true, stack_in, BT_WRITE);
}
return stack_in;
@@ -235,10 +230,6 @@ _bt_search(Relation rel, Relation heaprel, BTScanInsert key, Buffer *bufP,
* On entry, we have the buffer pinned and a lock of the type specified by
* 'access'. If we move right, we release the buffer and lock and acquire
* the same on the right sibling. Return value is the buffer we stop at.
- *
- * If the snapshot parameter is not NULL, "old snapshot" checking will take
- * place during the descent through the tree. This is not needed when
- * positioning for an insert or delete, so NULL is used for those cases.
*/
Buffer
_bt_moveright(Relation rel,
@@ -247,8 +238,7 @@ _bt_moveright(Relation rel,
Buffer buf,
bool forupdate,
BTStack stack,
- int access,
- Snapshot snapshot)
+ int access)
{
Page page;
BTPageOpaque opaque;
@@ -1373,7 +1363,7 @@ _bt_first(IndexScanDesc scan, ScanDirection dir)
* Use the manufactured insertion scan key to descend the tree and
* position ourselves on the target leaf page.
*/
- stack = _bt_search(rel, NULL, &inskey, &buf, BT_READ, scan->xs_snapshot);
+ stack = _bt_search(rel, NULL, &inskey, &buf, BT_READ);
/* don't need to keep the stack around... */
_bt_freestack(stack);
@@ -1392,8 +1382,7 @@ _bt_first(IndexScanDesc scan, ScanDirection dir)
if (IsolationIsSerializable())
{
PredicateLockRelation(rel, scan->xs_snapshot);
- stack = _bt_search(rel, NULL, &inskey, &buf, BT_READ,
- scan->xs_snapshot);
+ stack = _bt_search(rel, NULL, &inskey, &buf, BT_READ);
_bt_freestack(stack);
}
@@ -2113,8 +2102,7 @@ _bt_readnextpage(IndexScanDesc scan, BlockNumber blkno, ScanDirection dir)
}
/* Step to next physical page */
- so->currPos.buf = _bt_walk_left(rel, so->currPos.buf,
- scan->xs_snapshot);
+ so->currPos.buf = _bt_walk_left(rel, so->currPos.buf);
/* if we're physically at end of index, return failure */
if (so->currPos.buf == InvalidBuffer)
@@ -2205,7 +2193,7 @@ _bt_parallel_readpage(IndexScanDesc scan, BlockNumber blkno, ScanDirection dir)
* again if it's important.
*/
static Buffer
-_bt_walk_left(Relation rel, Buffer buf, Snapshot snapshot)
+_bt_walk_left(Relation rel, Buffer buf)
{
Page page;
BTPageOpaque opaque;
@@ -2320,8 +2308,7 @@ _bt_walk_left(Relation rel, Buffer buf, Snapshot snapshot)
* The returned buffer is pinned and read-locked.
*/
Buffer
-_bt_get_endpoint(Relation rel, uint32 level, bool rightmost,
- Snapshot snapshot)
+_bt_get_endpoint(Relation rel, uint32 level, bool rightmost)
{
Buffer buf;
Page page;
@@ -2417,7 +2404,7 @@ _bt_endpoint(IndexScanDesc scan, ScanDirection dir)
* version of _bt_search(). We don't maintain a stack since we know we
* won't need it.
*/
- buf = _bt_get_endpoint(rel, 0, ScanDirectionIsBackward(dir), scan->xs_snapshot);
+ buf = _bt_get_endpoint(rel, 0, ScanDirectionIsBackward(dir));
if (!BufferIsValid(buf))
{
diff --git a/src/backend/access/spgist/spgscan.c b/src/backend/access/spgist/spgscan.c
index 17cab0087f..8c00b724db 100644
--- a/src/backend/access/spgist/spgscan.c
+++ b/src/backend/access/spgist/spgscan.c
@@ -815,7 +815,7 @@ spgTestLeafTuple(SpGistScanOpaque so,
*/
static void
spgWalk(Relation index, SpGistScanOpaque so, bool scanWholeIndex,
- storeRes_func storeRes, Snapshot snapshot)
+ storeRes_func storeRes)
{
Buffer buffer = InvalidBuffer;
bool reportedSome = false;
@@ -949,7 +949,7 @@ spggetbitmap(IndexScanDesc scan, TIDBitmap *tbm)
so->tbm = tbm;
so->ntids = 0;
- spgWalk(scan->indexRelation, so, true, storeBitmap, scan->xs_snapshot);
+ spgWalk(scan->indexRelation, so, true, storeBitmap);
return so->ntids;
}
@@ -1070,8 +1070,7 @@ spggettuple(IndexScanDesc scan, ScanDirection dir)
}
so->iPtr = so->nPtrs = 0;
- spgWalk(scan->indexRelation, so, false, storeGettuple,
- scan->xs_snapshot);
+ spgWalk(scan->indexRelation, so, false, storeGettuple);
if (so->nPtrs == 0)
break; /* must have completed scan */
diff --git a/src/include/access/brin_revmap.h b/src/include/access/brin_revmap.h
index 75323630e0..79726051a4 100644
--- a/src/include/access/brin_revmap.h
+++ b/src/include/access/brin_revmap.h
@@ -24,7 +24,7 @@
typedef struct BrinRevmap BrinRevmap;
extern BrinRevmap *brinRevmapInitialize(Relation idxrel,
- BlockNumber *pagesPerRange, Snapshot snapshot);
+ BlockNumber *pagesPerRange);
extern void brinRevmapTerminate(BrinRevmap *revmap);
extern void brinRevmapExtend(BrinRevmap *revmap,
@@ -35,7 +35,7 @@ extern void brinSetHeapBlockItemptr(Buffer buf, BlockNumber pagesPerRange,
BlockNumber heapBlk, ItemPointerData tid);
extern BrinTuple *brinGetTupleForHeapBlock(BrinRevmap *revmap,
BlockNumber heapBlk, Buffer *buf, OffsetNumber *off,
- Size *size, int mode, Snapshot snapshot);
+ Size *size, int mode);
extern bool brinRevmapDesummarizeRange(Relation idxrel, BlockNumber heapBlk);
#endif /* BRIN_REVMAP_H */
diff --git a/src/include/access/gin_private.h b/src/include/access/gin_private.h
index 6da64928b6..76b9923201 100644
--- a/src/include/access/gin_private.h
+++ b/src/include/access/gin_private.h
@@ -202,7 +202,7 @@ typedef struct
*/
extern GinBtreeStack *ginFindLeafPage(GinBtree btree, bool searchMode,
- bool rootConflictCheck, Snapshot snapshot);
+ bool rootConflictCheck);
extern Buffer ginStepRight(Buffer buffer, Relation index, int lockmode);
extern void freeGinBtreeStack(GinBtreeStack *stack);
extern void ginInsertValue(GinBtree btree, GinBtreeStack *stack,
@@ -230,7 +230,7 @@ extern void GinPageDeletePostingItem(Page page, OffsetNumber offset);
extern void ginInsertItemPointers(Relation index, BlockNumber rootBlkno,
ItemPointerData *items, uint32 nitem,
GinStatsData *buildStats);
-extern GinBtreeStack *ginScanBeginPostingTree(GinBtree btree, Relation index, BlockNumber rootBlkno, Snapshot snapshot);
+extern GinBtreeStack *ginScanBeginPostingTree(GinBtree btree, Relation index, BlockNumber rootBlkno);
extern void ginDataFillRoot(GinBtree btree, Page root, BlockNumber lblkno, Page lpage, BlockNumber rblkno, Page rpage);
/*
diff --git a/src/include/access/nbtree.h b/src/include/access/nbtree.h
index 8891fa7973..f5c66964ca 100644
--- a/src/include/access/nbtree.h
+++ b/src/include/access/nbtree.h
@@ -1231,16 +1231,15 @@ extern void _bt_pendingfsm_finalize(Relation rel, BTVacState *vstate);
* prototypes for functions in nbtsearch.c
*/
extern BTStack _bt_search(Relation rel, Relation heaprel, BTScanInsert key,
- Buffer *bufP, int access, Snapshot snapshot);
+ Buffer *bufP, int access);
extern Buffer _bt_moveright(Relation rel, Relation heaprel, BTScanInsert key,
Buffer buf, bool forupdate, BTStack stack,
- int access, Snapshot snapshot);
+ int access);
extern OffsetNumber _bt_binsrch_insert(Relation rel, BTInsertState insertstate);
extern int32 _bt_compare(Relation rel, BTScanInsert key, Page page, OffsetNumber offnum);
extern bool _bt_first(IndexScanDesc scan, ScanDirection dir);
extern bool _bt_next(IndexScanDesc scan, ScanDirection dir);
-extern Buffer _bt_get_endpoint(Relation rel, uint32 level, bool rightmost,
- Snapshot snapshot);
+extern Buffer _bt_get_endpoint(Relation rel, uint32 level, bool rightmost);
/*
* prototypes for functions in nbtutils.c
--
2.41.0
On Thu, Sep 7, 2023 at 8:49 PM Thomas Munro <thomas.munro@gmail.com> wrote:
The code moved around quite a few times over several commits and quite
a lot since then, which is why I didn't go for straight revert, but
clearly the manual approach risked missing things.
It's not a big deal, obviously.
I think the
attached removes all unused 'snapshot' arguments from AM-internal
functions. Checked by compiling with clang's -Wunused-parameters, and
then searching for 'snapshot', and excluding the expected cases.
This looks right to me.
Thanks
--
Peter Geoghegan