Historic snapshot doesn't track txns committed in BUILDING_SNAPSHOT state

Started by cca5507over 1 year ago27 messages
#1cca5507
cca5507@qq.com

Hello hackers, I found that we currently don't track txns committed in
BUILDING_SNAPSHOT state because of the code in xact_decode():
/*
* If the snapshot isn't yet fully built, we cannot decode anything, so
* bail out.
*/
if (SnapBuildCurrentState(builder) < SNAPBUILD_FULL_SNAPSHOT)
return;

This can cause a txn to take an incorrect historic snapshot and result in an

interruption of logical replication. Consider the following scenario:
(pub)create table t1 (id int primary key);
(pub)insert into t1 values (1);
(pub)create publication pub for table t1;
(sub)create table t1 (id int primary key);
(pub)begin; insert into t1 values (2); (txn1 in session1)
(sub)create subscription sub connection 'hostaddr=127.0.0.1 port=5432 user=xxx dbname=postgres' publication pub; (pub will switch to BUILDING_SNAPSHOT state soon)
(pub)begin; insert into t1 values (3); (txn2 in session2)
(pub)create table t2 (id int primary key); (session3)
(pub)commit; (commit txn1, and pub will switch to FULL_SNAPSHOT state soon)
(pub)begin; insert into t2 values (1); (txn3 in session3)
(pub)commit; (commit txn2, and pub will switch to CONSISTENT state soon)
(pub)commit; (commit txn3, and replay txn3 will failed because its snapshot cannot see table t2)

The output of pub's log:
ERROR: could not map filenumber "base/5/16395" to relation OID

Is this a bug? Should we also track the txns committed in BUILDING_SNAPSHOT state?

--
Regards,
ChangAo Chen

#2Michael Paquier
michael@paquier.xyz
In reply to: cca5507 (#1)
Re: Historic snapshot doesn't track txns committed in BUILDING_SNAPSHOT state

On Sun, Jun 09, 2024 at 11:21:52PM +0800, cca5507 wrote:

Hello hackers, I found that we&nbsp;currently don't track txns committed in
BUILDING_SNAPSHOT state because of the code in xact_decode():
/*
* If the snapshot isn't yet fully built, we cannot decode anything, so
* bail out.
*/
if (SnapBuildCurrentState(builder) < SNAPBUILD_FULL_SNAPSHOT)
return;

The output of pub's log:
ERROR: could not map filenumber "base/5/16395" to relation OID

Is this a bug? Should we also track the txns committed in BUILDING_SNAPSHOT state?

Clearly, this is not an error you should be able to see as a user. So
yes, that's something that needs to be fixed.
--
Michael

#3cca5507
cca5507@qq.com
In reply to: Michael Paquier (#2)
1 attachment(s)
Re: Historic snapshot doesn't track txns committed in BUILDING_SNAPSHOT state

Thank you for reply!I am trying to fix it. This patch (pass check-world) will track txns
committed in BUILDING_SNAPSHOT state and can fix this bug.

--
Regards,
ChangAo Chen

Attachments:

v1-0001-Track-transactions-committed-in-BUILDING_SNAPSHOT.patchapplication/octet-stream; charset=ISO-8859-1; name=v1-0001-Track-transactions-committed-in-BUILDING_SNAPSHOT.patchDownload
From 53d079af84b8ac44497f27eab52c73cfc2bbd298 Mon Sep 17 00:00:00 2001
From: ChangAo Chen <cca5507@qq.com>
Date: Mon, 10 Jun 2024 20:56:37 +0800
Subject: [PATCH v1] Track transactions committed in BUILDING_SNAPSHOT.

Transactions committed in BUILDING_SNAPSHOT state are useful for
building historic snapshot, but we didn't track them previously.
This results in a transaction taking an incorrect snapshot and
logical replication being interrupted. So it's necessary to track
these transactions.
---
 src/backend/replication/logical/decode.c    | 27 +++++++++++++--------
 src/backend/replication/logical/snapbuild.c |  7 ++++++
 2 files changed, 24 insertions(+), 10 deletions(-)

diff --git a/src/backend/replication/logical/decode.c b/src/backend/replication/logical/decode.c
index 8ec5adfd90..6159beca4b 100644
--- a/src/backend/replication/logical/decode.c
+++ b/src/backend/replication/logical/decode.c
@@ -206,10 +206,11 @@ xact_decode(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
 	uint8		info = XLogRecGetInfo(r) & XLOG_XACT_OPMASK;
 
 	/*
-	 * If the snapshot isn't yet fully built, we cannot decode anything, so
-	 * bail out.
+	 * If the snapshot isn't yet fully built, we cannot decode anything.
+	 * But we need to track transactions committed in BUILDING_SNAPSHOT
+	 * state, or the snapshot will be incorrect.
 	 */
-	if (SnapBuildCurrentState(builder) < SNAPBUILD_FULL_SNAPSHOT)
+	if (SnapBuildCurrentState(builder) < SNAPBUILD_BUILDING_SNAPSHOT)
 		return;
 
 	switch (info)
@@ -282,9 +283,11 @@ xact_decode(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
 			{
 				TransactionId xid;
 				xl_xact_invals *invals;
+				bool		has_snapshot = false;
 
 				xid = XLogRecGetXid(r);
 				invals = (xl_xact_invals *) XLogRecGetData(r);
+				has_snapshot = SnapBuildCurrentState(builder) >= SNAPBUILD_FULL_SNAPSHOT;
 
 				/*
 				 * Execute the invalidations for xid-less transactions,
@@ -293,7 +296,7 @@ xact_decode(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
 				 */
 				if (TransactionIdIsValid(xid))
 				{
-					if (!ctx->fast_forward)
+					if (!ctx->fast_forward && has_snapshot)
 						ReorderBufferAddInvalidations(reorder, xid,
 													  buf->origptr,
 													  invals->nmsgs,
@@ -301,12 +304,13 @@ xact_decode(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
 					ReorderBufferXidSetCatalogChanges(ctx->reorder, xid,
 													  buf->origptr);
 				}
-				else if ((!ctx->fast_forward))
+				else if (!ctx->fast_forward && has_snapshot)
 					ReorderBufferImmediateInvalidation(ctx->reorder,
 													   invals->nmsgs,
 													   invals->msgs);
+
+				break;
 			}
-			break;
 		case XLOG_XACT_PREPARE:
 			{
 				xl_xact_parsed_prepare parsed;
@@ -411,9 +415,10 @@ heap2_decode(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
 
 	/*
 	 * If we don't have snapshot or we are just fast-forwarding, there is no
-	 * point in decoding changes.
+	 * point in decoding changes. If we are building snapshot, we need to
+	 * handle XLOG_HEAP2_NEW_CID which mark a transaction as catalog modifying.
 	 */
-	if (SnapBuildCurrentState(builder) < SNAPBUILD_FULL_SNAPSHOT ||
+	if (SnapBuildCurrentState(builder) < SNAPBUILD_BUILDING_SNAPSHOT ||
 		ctx->fast_forward)
 		return;
 
@@ -470,9 +475,10 @@ heap_decode(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
 
 	/*
 	 * If we don't have snapshot or we are just fast-forwarding, there is no
-	 * point in decoding data changes.
+	 * point in decoding data changes. If we are building snapshot, we need to
+	 * handle XLOG_HEAP_INPLACE which mark a transaction as catalog modifying.
 	 */
-	if (SnapBuildCurrentState(builder) < SNAPBUILD_FULL_SNAPSHOT ||
+	if (SnapBuildCurrentState(builder) < SNAPBUILD_BUILDING_SNAPSHOT ||
 		ctx->fast_forward)
 		return;
 
@@ -1300,6 +1306,7 @@ DecodeTXNNeedSkip(LogicalDecodingContext *ctx, XLogRecordBuffer *buf,
 				  Oid txn_dbid, RepOriginId origin_id)
 {
 	if (SnapBuildXactNeedsSkip(ctx->snapshot_builder, buf->origptr) ||
+		SnapBuildCurrentState(ctx->snapshot_builder) < SNAPBUILD_CONSISTENT ||
 		(txn_dbid != InvalidOid && txn_dbid != ctx->slot->data.database) ||
 		FilterByOrigin(ctx, origin_id))
 		return true;
diff --git a/src/backend/replication/logical/snapbuild.c b/src/backend/replication/logical/snapbuild.c
index e37e22f441..8eacede275 100644
--- a/src/backend/replication/logical/snapbuild.c
+++ b/src/backend/replication/logical/snapbuild.c
@@ -826,6 +826,13 @@ SnapBuildProcessNewCid(SnapBuild *builder, TransactionId xid,
 	 */
 	ReorderBufferXidSetCatalogChanges(builder->reorder, xid, lsn);
 
+	/*
+	 * If we don't have snapshot, the transaction won't be
+	 * replayed, just return here.
+	 */
+	if (builder->state < SNAPBUILD_FULL_SNAPSHOT)
+		return;
+
 	ReorderBufferAddNewTupleCids(builder->reorder, xlrec->top_xid, lsn,
 								 xlrec->target_locator, xlrec->target_tid,
 								 xlrec->cmin, xlrec->cmax,
-- 
2.34.1

#4Bertrand Drouvot
bertranddrouvot.pg@gmail.com
In reply to: cca5507 (#3)
Re: Historic snapshot doesn't track txns committed in BUILDING_SNAPSHOT state

Hi,

On Mon, Jun 10, 2024 at 10:04:31PM +0800, cca5507 wrote:

Thank you for reply!I am trying to fix it. This patch (pass check-world) will track txns
committed in BUILDING_SNAPSHOT state and can fix this bug.

Thanks for the report and the patch!

I did not look at the patch in detail but I can see that it does no contain a test
related to this issue.

Would you mind to add a test in say contrib/test_decoding? (see catalog_change_snapshot
for example).

FWIW, to ease the writing of the test, I think you don't need pub/sub to produce
the issue. I think you can reproduce with a single instance and multiple sessions
: replace in your repro the "(sub)create subscription" by a "logical slot creation"
and finish the test by "pg_logical_slot_get_changes" on the slot created above.

Regards,

--
Bertrand Drouvot
PostgreSQL Contributors Team
RDS Open Source Databases
Amazon Web Services: https://aws.amazon.com

#5cca5507
cca5507@qq.com
In reply to: Bertrand Drouvot (#4)
2 attachment(s)
Re: Historic snapshot doesn't track txns committed in BUILDING_SNAPSHOT state

Hi,

Thanks for pointing it out!

Here are the new version patches with a test case.

--
Regards,
ChangAo Chen

------------------&nbsp;Original&nbsp;------------------
From: "Bertrand Drouvot" <bertranddrouvot.pg@gmail.com&gt;;
Date:&nbsp;Wed, Aug 7, 2024 06:33 PM
To:&nbsp;"cca5507"<cca5507@qq.com&gt;;
Cc:&nbsp;"Michael Paquier"<michael@paquier.xyz&gt;;"pgsql-hackers"<pgsql-hackers@lists.postgresql.org&gt;;
Subject:&nbsp;Re: Historic snapshot doesn't track txns committed in BUILDING_SNAPSHOT state

Hi,

On Mon, Jun 10, 2024 at 10:04:31PM +0800, cca5507 wrote:
&gt; Thank you for reply!I am trying to fix it. This patch (pass check-world) will track txns
&gt; committed in BUILDING_SNAPSHOT state and can fix this bug.

Thanks for the report and the patch!

I did not look at the patch in detail but I can see that it does no contain a test
related to this issue.

Would you mind to add a test in say contrib/test_decoding? (see catalog_change_snapshot
for example).

FWIW, to ease the writing of the test, I think you don't need pub/sub to produce
the issue. I think you can reproduce with a single instance and multiple sessions
: replace in your repro the "(sub)create subscription" by a "logical slot creation"
and finish the test by "pg_logical_slot_get_changes" on the slot created above.

Regards,

--
Bertrand Drouvot
PostgreSQL Contributors Team
RDS Open Source Databases
Amazon Web Services: https://aws.amazon.com

Attachments:

v2-0001-Track-transactions-committed-in-BUILDING_SNAPSHOT.patchapplication/octet-stream; charset=ISO-8859-1; name=v2-0001-Track-transactions-committed-in-BUILDING_SNAPSHOT.patchDownload
From 625b98a4030281eae7a1e23be973849200fe5c91 Mon Sep 17 00:00:00 2001
From: ChangAo Chen <cca5507@qq.com>
Date: Thu, 8 Aug 2024 12:21:07 +0800
Subject: [PATCH v2 1/2] Track transactions committed in BUILDING_SNAPSHOT.

Transactions committed in BUILDING_SNAPSHOT are useful for build
historic snapshot, but we didn't track them previously. This results
in a transaction taking an incorrect snapshot and logical decoding
being interrupted. So we must track these transactions.
---
 src/backend/replication/logical/decode.c    | 30 +++++++++++++--------
 src/backend/replication/logical/snapbuild.c |  7 +++++
 2 files changed, 26 insertions(+), 11 deletions(-)

diff --git a/src/backend/replication/logical/decode.c b/src/backend/replication/logical/decode.c
index d687ceee33..267e886ca3 100644
--- a/src/backend/replication/logical/decode.c
+++ b/src/backend/replication/logical/decode.c
@@ -206,10 +206,11 @@ xact_decode(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
 	uint8		info = XLogRecGetInfo(r) & XLOG_XACT_OPMASK;
 
 	/*
-	 * If the snapshot isn't yet fully built, we cannot decode anything, so
-	 * bail out.
+	 * Before BUILDING_SNAPSHOT, the xlog is useless, so bail out. Note
+	 * that the xlog in BUILDING_SNAPSHOT is only useful for build the
+	 * snapshot and will not be decoded.
 	 */
-	if (SnapBuildCurrentState(builder) < SNAPBUILD_FULL_SNAPSHOT)
+	if (SnapBuildCurrentState(builder) < SNAPBUILD_BUILDING_SNAPSHOT)
 		return;
 
 	switch (info)
@@ -282,9 +283,11 @@ xact_decode(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
 			{
 				TransactionId xid;
 				xl_xact_invals *invals;
+				bool		has_snapshot;
 
 				xid = XLogRecGetXid(r);
 				invals = (xl_xact_invals *) XLogRecGetData(r);
+				has_snapshot = SnapBuildCurrentState(builder) >= SNAPBUILD_FULL_SNAPSHOT;
 
 				/*
 				 * Execute the invalidations for xid-less transactions,
@@ -293,7 +296,7 @@ xact_decode(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
 				 */
 				if (TransactionIdIsValid(xid))
 				{
-					if (!ctx->fast_forward)
+					if (!ctx->fast_forward && has_snapshot)
 						ReorderBufferAddInvalidations(reorder, xid,
 													  buf->origptr,
 													  invals->nmsgs,
@@ -301,7 +304,7 @@ xact_decode(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
 					ReorderBufferXidSetCatalogChanges(ctx->reorder, xid,
 													  buf->origptr);
 				}
-				else if (!ctx->fast_forward)
+				else if (!ctx->fast_forward && has_snapshot)
 					ReorderBufferImmediateInvalidation(ctx->reorder,
 													   invals->nmsgs,
 													   invals->msgs);
@@ -411,10 +414,12 @@ heap2_decode(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
 	ReorderBufferProcessXid(ctx->reorder, xid, buf->origptr);
 
 	/*
-	 * If we don't have snapshot or we are just fast-forwarding, there is no
-	 * point in decoding changes.
+	 * Before BUILDING_SNAPSHOT or we are just fast-forwarding, there is no
+	 * point in decoding changes. Note that we only handle XLOG_HEAP2_NEW_CID
+	 * which mark a transaction as catalog modifying in BUILDING_SNAPSHOT, it's
+	 * useful for build the snapshot.
 	 */
-	if (SnapBuildCurrentState(builder) < SNAPBUILD_FULL_SNAPSHOT ||
+	if (SnapBuildCurrentState(builder) < SNAPBUILD_BUILDING_SNAPSHOT ||
 		ctx->fast_forward)
 		return;
 
@@ -470,10 +475,12 @@ heap_decode(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
 	ReorderBufferProcessXid(ctx->reorder, xid, buf->origptr);
 
 	/*
-	 * If we don't have snapshot or we are just fast-forwarding, there is no
-	 * point in decoding data changes.
+	 * Before BUILDING_SNAPSHOT or we are just fast-forwarding, there is no
+	 * point in decoding changes. Note that we only handle XLOG_HEAP_INPLACE
+	 * which mark a transaction as catalog modifying in BUILDING_SNAPSHOT, it's
+	 * useful for build the snapshot.
 	 */
-	if (SnapBuildCurrentState(builder) < SNAPBUILD_FULL_SNAPSHOT ||
+	if (SnapBuildCurrentState(builder) < SNAPBUILD_BUILDING_SNAPSHOT ||
 		ctx->fast_forward)
 		return;
 
@@ -1301,6 +1308,7 @@ DecodeTXNNeedSkip(LogicalDecodingContext *ctx, XLogRecordBuffer *buf,
 				  Oid txn_dbid, RepOriginId origin_id)
 {
 	if (SnapBuildXactNeedsSkip(ctx->snapshot_builder, buf->origptr) ||
+		SnapBuildCurrentState(ctx->snapshot_builder) < SNAPBUILD_CONSISTENT ||
 		(txn_dbid != InvalidOid && txn_dbid != ctx->slot->data.database) ||
 		FilterByOrigin(ctx, origin_id))
 		return true;
diff --git a/src/backend/replication/logical/snapbuild.c b/src/backend/replication/logical/snapbuild.c
index ae676145e6..5443b6dce8 100644
--- a/src/backend/replication/logical/snapbuild.c
+++ b/src/backend/replication/logical/snapbuild.c
@@ -836,6 +836,13 @@ SnapBuildProcessNewCid(SnapBuild *builder, TransactionId xid,
 	 */
 	ReorderBufferXidSetCatalogChanges(builder->reorder, xid, lsn);
 
+	/*
+	 * If we don't have snapshot, the transaction won't be
+	 * replayed, just return here.
+	 */
+	if (builder->state < SNAPBUILD_FULL_SNAPSHOT)
+		return;
+
 	ReorderBufferAddNewTupleCids(builder->reorder, xlrec->top_xid, lsn,
 								 xlrec->target_locator, xlrec->target_tid,
 								 xlrec->cmin, xlrec->cmax,
-- 
2.34.1

v2-0002-Add-test-case-snapshot_build-for-test_decoding.patchapplication/octet-stream; charset=ISO-8859-1; name=v2-0002-Add-test-case-snapshot_build-for-test_decoding.patchDownload
From 0836852a942a37d99510aea8de079979f5c3b632 Mon Sep 17 00:00:00 2001
From: ChangAo Chen <cca5507@qq.com>
Date: Thu, 8 Aug 2024 15:06:24 +0800
Subject: [PATCH v2 2/2] Add test case snapshot_build for test_decoding.

---
 contrib/test_decoding/Makefile                |  2 +-
 .../test_decoding/expected/snapshot_build.out | 33 +++++++++++++
 contrib/test_decoding/meson.build             |  1 +
 .../test_decoding/specs/snapshot_build.spec   | 46 +++++++++++++++++++
 4 files changed, 81 insertions(+), 1 deletion(-)
 create mode 100644 contrib/test_decoding/expected/snapshot_build.out
 create mode 100644 contrib/test_decoding/specs/snapshot_build.spec

diff --git a/contrib/test_decoding/Makefile b/contrib/test_decoding/Makefile
index a4ba1a509a..8113e2d99c 100644
--- a/contrib/test_decoding/Makefile
+++ b/contrib/test_decoding/Makefile
@@ -9,7 +9,7 @@ REGRESS = ddl xact rewrite toast permissions decoding_in_xact \
 ISOLATION = mxact delayed_startup ondisk_startup concurrent_ddl_dml \
 	oldest_xmin snapshot_transfer subxact_without_top concurrent_stream \
 	twophase_snapshot slot_creation_error catalog_change_snapshot \
-	skip_snapshot_restore
+	skip_snapshot_restore snapshot_build
 
 REGRESS_OPTS = --temp-config $(top_srcdir)/contrib/test_decoding/logical.conf
 ISOLATION_OPTS = --temp-config $(top_srcdir)/contrib/test_decoding/logical.conf
diff --git a/contrib/test_decoding/expected/snapshot_build.out b/contrib/test_decoding/expected/snapshot_build.out
new file mode 100644
index 0000000000..0fcf20cce8
--- /dev/null
+++ b/contrib/test_decoding/expected/snapshot_build.out
@@ -0,0 +1,33 @@
+Parsed test spec with 4 sessions
+
+starting permutation: s1_begin s1_insert s2_init s3_begin s3_insert s4_create s1_commit s4_begin s4_insert s3_commit s4_commit s2_get_changes
+step s1_begin: BEGIN;
+step s1_insert: INSERT INTO tbl1 VALUES (1);
+step s2_init: SELECT 'init' FROM pg_create_logical_replication_slot('isolation_slot', 'test_decoding'); <waiting ...>
+step s3_begin: BEGIN;
+step s3_insert: INSERT INTO tbl1 VALUES (1);
+step s4_create: CREATE TABLE tbl2 (val1 integer);
+step s1_commit: COMMIT;
+step s4_begin: BEGIN;
+step s4_insert: INSERT INTO tbl2 VALUES (1);
+step s3_commit: COMMIT;
+step s2_init: <... completed>
+?column?
+--------
+init    
+(1 row)
+
+step s4_commit: COMMIT;
+step s2_get_changes: SELECT data FROM pg_logical_slot_get_changes('isolation_slot', NULL, NULL, 'skip-empty-xacts', '1', 'include-xids', '0');
+data                                      
+------------------------------------------
+BEGIN                                     
+table public.tbl2: INSERT: val1[integer]:1
+COMMIT                                    
+(3 rows)
+
+?column?
+--------
+stop    
+(1 row)
+
diff --git a/contrib/test_decoding/meson.build b/contrib/test_decoding/meson.build
index f643dc81a2..2b7e80ba71 100644
--- a/contrib/test_decoding/meson.build
+++ b/contrib/test_decoding/meson.build
@@ -63,6 +63,7 @@ tests += {
       'twophase_snapshot',
       'slot_creation_error',
       'skip_snapshot_restore',
+      'snapshot_build',
     ],
     'regress_args': [
       '--temp-config', files('logical.conf'),
diff --git a/contrib/test_decoding/specs/snapshot_build.spec b/contrib/test_decoding/specs/snapshot_build.spec
new file mode 100644
index 0000000000..63c6b21cc1
--- /dev/null
+++ b/contrib/test_decoding/specs/snapshot_build.spec
@@ -0,0 +1,46 @@
+# Test snapshot build correctly
+
+setup
+{
+    DROP TABLE IF EXISTS tbl1;
+    DROP TABLE IF EXISTS tbl2;
+    CREATE TABLE tbl1 (val1 integer);
+}
+
+teardown
+{
+    DROP TABLE tbl1;
+    DROP TABLE tbl2;
+    SELECT 'stop' FROM pg_drop_replication_slot('isolation_slot');
+}
+
+session "s1"
+setup { SET synchronous_commit=on; }
+step "s1_begin" { BEGIN; }
+step "s1_insert" { INSERT INTO tbl1 VALUES (1); }
+step "s1_commit" { COMMIT; }
+
+session "s2"
+setup { SET synchronous_commit=on; }
+step "s2_init" { SELECT 'init' FROM pg_create_logical_replication_slot('isolation_slot', 'test_decoding'); }
+step "s2_get_changes" { SELECT data FROM pg_logical_slot_get_changes('isolation_slot', NULL, NULL, 'skip-empty-xacts', '1', 'include-xids', '0'); }
+
+session "s3"
+setup { SET synchronous_commit=on; }
+step "s3_begin" { BEGIN; }
+step "s3_insert" { INSERT INTO tbl1 VALUES (1); }
+step "s3_commit" { COMMIT; }
+
+session "s4"
+setup { SET synchronous_commit=on; }
+step "s4_create" { CREATE TABLE tbl2 (val1 integer); }
+step "s4_begin" { BEGIN; }
+step "s4_insert" { INSERT INTO tbl2 VALUES (1); }
+step "s4_commit" { COMMIT; }
+
+# T1: s1_begin -> s1_insert -> BUILDING_SNAPSHOT -> s1_commit -> FULL_SNAPSHOT
+# T2: BUILDING_SNAPSHOT -> s3_begin -> s3_insert -> FULL_SNAPSHOT -> s3_commit -> CONSISTENT
+# T3: BUILDING_SNAPSHOT -> s4_create -> FULL_SNAPSHOT
+# T4: FULL_SNAPSHOT -> s4_begin -> s4_insert -> CONSISTENT -> s4_commit
+# The snapshot must track T3 or the replay of T4 will fail because its snapshot cannot see tbl2
+permutation "s1_begin" "s1_insert" "s2_init" "s3_begin" "s3_insert" "s4_create" "s1_commit" "s4_begin" "s4_insert" "s3_commit" "s4_commit" "s2_get_changes"
-- 
2.34.1

#6Bertrand Drouvot
bertranddrouvot.pg@gmail.com
In reply to: cca5507 (#5)
Re: Historic snapshot doesn't track txns committed in BUILDING_SNAPSHOT state

Hi,

On Thu, Aug 08, 2024 at 03:53:29PM +0800, cca5507 wrote:

Hi,

Thanks for pointing it out!

Here are the new version patches with a test case.

Thanks!

I think the approach that the patch implements makes sense and that we should
track the transactions that have been commmitted while building the snapshot.

A few random comments:

1 ===

+ * that the xlog in BUILDING_SNAPSHOT is only useful for build

s/for build/to build? (same comment for the commit message)

2 ===

+ * snapshot and will not be decoded.

worth to mention DecodeTXNNeedSkip() here?

3 ===

+        * point in decoding changes. Note that we only handle XLOG_HEAP2_NEW_CID
+        * which mark a transaction as catalog modifying in BUILDING_SNAPSHOT

do you mean? Note that during BUILDING_SNAPSHOT, we only handle XLOG_HEAP2_NEW_CID
as it marks a transaction as catalog modifying. If so, what about making the

-       if (SnapBuildCurrentState(builder) < SNAPBUILD_FULL_SNAPSHOT ||
+       if (SnapBuildCurrentState(builder) < SNAPBUILD_BUILDING_SNAPSHOT ||

more clear about that? (to avoid any work when it's not needed)

4 ===

+ * useful for build the snapshot.

s/for/to/?

5 ===

+        * point in decoding changes. Note that we only handle XLOG_HEAP_INPLACE
+        * which mark a transaction as catalog modifying in BUILDING_SNAPSHOT, it's

s/which mark/which might mark/?

do you mean? Note that during BUILDING_SNAPSHOT, we only handle XLOG_HEAP_INPLACE
as it might mark a transaction as catalog modifying. If so, what about making the

-       if (SnapBuildCurrentState(builder) < SNAPBUILD_FULL_SNAPSHOT ||
+       if (SnapBuildCurrentState(builder) < SNAPBUILD_BUILDING_SNAPSHOT ||

more clear about that? (to avoid any work when it's not needed)

Idea of 3 === and 5 === is to proceed further in the SNAPBUILD_BUILDING_SNAPSHOT
case only if we know that the transaction is a catalog changing one (or might
be one).

6 ===

+ * useful for build the snapshot.

s/for/to/?

7 ===

+# Test snapshot build correctly
+

what about? Test tracking of committed transactions during BUILDING_SNAPSHOT

Regards,

--
Bertrand Drouvot
PostgreSQL Contributors Team
RDS Open Source Databases
Amazon Web Services: https://aws.amazon.com

#7cca5507
cca5507@qq.com
In reply to: Bertrand Drouvot (#6)
2 attachment(s)
Re: Historic snapshot doesn't track txns committed in BUILDING_SNAPSHOT state

Hi,

Thanks for the comments!

Here are the new version patches, I think it will be more clear.

--
Regards,
ChangAo Chen

Attachments:

v3-0001-Track-transactions-committed-in-BUILDING_SNAPSHOT.patchapplication/octet-stream; charset=ISO-8859-1; name=v3-0001-Track-transactions-committed-in-BUILDING_SNAPSHOT.patchDownload
From 446767f492ad9a893dc5aee8b71ef629e45d5504 Mon Sep 17 00:00:00 2001
From: ChangAo Chen <cca5507@qq.com>
Date: Sat, 10 Aug 2024 17:00:21 +0800
Subject: [PATCH v3 1/2] Track transactions committed in BUILDING_SNAPSHOT.

The historic snapshot previously didn't track transactions
committed in BUILDING_SNAPSHOT, and this might result in a
transaction taking an incorrect snapshot and logical decoding
being interrupted. So we must track these transactions.

Because the historic snapshot only tracks catalog modifying
transactions, we also need handle the xlog that might mark a
transaction as catalog modifying during BUILDING_SNAPSHOT.
---
 src/backend/replication/logical/decode.c | 48 +++++++++++++++++++++---
 1 file changed, 43 insertions(+), 5 deletions(-)

diff --git a/src/backend/replication/logical/decode.c b/src/backend/replication/logical/decode.c
index d687ceee33..ec4a43d5a7 100644
--- a/src/backend/replication/logical/decode.c
+++ b/src/backend/replication/logical/decode.c
@@ -206,12 +206,17 @@ xact_decode(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
 	uint8		info = XLogRecGetInfo(r) & XLOG_XACT_OPMASK;
 
 	/*
-	 * If the snapshot isn't yet fully built, we cannot decode anything, so
-	 * bail out.
+	 * Before BUILDING_SNAPSHOT, we cannot decode anything because we don't
+	 * have snapshot and the transaction will not be tracked by snapshot
+	 * anyway(see SnapBuildCommitTxn() for details), so bail out.
 	 */
-	if (SnapBuildCurrentState(builder) < SNAPBUILD_FULL_SNAPSHOT)
+	if (SnapBuildCurrentState(builder) < SNAPBUILD_BUILDING_SNAPSHOT)
 		return;
 
+	/*
+	 * Note that during BUILDING_SNAPSHOT, the xlog is only used to build
+	 * snapshot and will not be decoded because we don't have snapshot yet.
+	 */
 	switch (info)
 	{
 		case XLOG_XACT_COMMIT:
@@ -282,9 +287,11 @@ xact_decode(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
 			{
 				TransactionId xid;
 				xl_xact_invals *invals;
+				bool		has_snapshot;
 
 				xid = XLogRecGetXid(r);
 				invals = (xl_xact_invals *) XLogRecGetData(r);
+				has_snapshot = SnapBuildCurrentState(builder) >= SNAPBUILD_FULL_SNAPSHOT;
 
 				/*
 				 * Execute the invalidations for xid-less transactions,
@@ -293,7 +300,7 @@ xact_decode(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
 				 */
 				if (TransactionIdIsValid(xid))
 				{
-					if (!ctx->fast_forward)
+					if (!ctx->fast_forward && has_snapshot)
 						ReorderBufferAddInvalidations(reorder, xid,
 													  buf->origptr,
 													  invals->nmsgs,
@@ -301,7 +308,7 @@ xact_decode(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
 					ReorderBufferXidSetCatalogChanges(ctx->reorder, xid,
 													  buf->origptr);
 				}
-				else if (!ctx->fast_forward)
+				else if (!ctx->fast_forward && has_snapshot)
 					ReorderBufferImmediateInvalidation(ctx->reorder,
 													   invals->nmsgs,
 													   invals->msgs);
@@ -416,7 +423,22 @@ heap2_decode(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
 	 */
 	if (SnapBuildCurrentState(builder) < SNAPBUILD_FULL_SNAPSHOT ||
 		ctx->fast_forward)
+	{
+		/*
+		 * Note that during or after BUILDING_SNAPSHOT, we need handle the xlog
+		 * that might mark a transaction as catalog modifying because the snapshot
+		 * only tracks catalog modifying transactions. The transaction before
+		 * BUILDING_SNAPSHOT will not be tracked anyway(see SnapBuildCommitTxn()
+		 * for details), so just return.
+	 	 */
+		if (SnapBuildCurrentState(builder) >= SNAPBUILD_BUILDING_SNAPSHOT)
+		{
+			/* Currently only XLOG_HEAP2_NEW_CID means a catalog modifying */
+			if (info == XLOG_HEAP2_NEW_CID && TransactionIdIsValid(xid))
+				ReorderBufferXidSetCatalogChanges(ctx->reorder, xid, buf->origptr);
+		}
 		return;
+	}
 
 	switch (info)
 	{
@@ -475,7 +497,22 @@ heap_decode(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
 	 */
 	if (SnapBuildCurrentState(builder) < SNAPBUILD_FULL_SNAPSHOT ||
 		ctx->fast_forward)
+	{
+		/*
+		 * Note that during or after BUILDING_SNAPSHOT, we need handle the xlog
+		 * that might mark a transaction as catalog modifying because the snapshot
+		 * only tracks catalog modifying transactions. The transaction before
+		 * BUILDING_SNAPSHOT will not be tracked anyway(see SnapBuildCommitTxn()
+		 * for details), so just return.
+	 	 */
+		if (SnapBuildCurrentState(builder) >= SNAPBUILD_BUILDING_SNAPSHOT)
+		{
+			/* Currently only XLOG_HEAP_INPLACE means a catalog modifying */
+			if (info == XLOG_HEAP_INPLACE && TransactionIdIsValid(xid))
+				ReorderBufferXidSetCatalogChanges(ctx->reorder, xid, buf->origptr);
+		}
 		return;
+	}
 
 	switch (info)
 	{
@@ -1301,6 +1338,7 @@ DecodeTXNNeedSkip(LogicalDecodingContext *ctx, XLogRecordBuffer *buf,
 				  Oid txn_dbid, RepOriginId origin_id)
 {
 	if (SnapBuildXactNeedsSkip(ctx->snapshot_builder, buf->origptr) ||
+		SnapBuildCurrentState(ctx->snapshot_builder) < SNAPBUILD_CONSISTENT ||
 		(txn_dbid != InvalidOid && txn_dbid != ctx->slot->data.database) ||
 		FilterByOrigin(ctx, origin_id))
 		return true;
-- 
2.34.1

v3-0002-Add-test-case-snapshot_build-for-test_decoding.patchapplication/octet-stream; charset=ISO-8859-1; name=v3-0002-Add-test-case-snapshot_build-for-test_decoding.patchDownload
From 51838f4cba785231ad6bd6d0f1bc105b9778392f Mon Sep 17 00:00:00 2001
From: ChangAo Chen <cca5507@qq.com>
Date: Sat, 10 Aug 2024 17:34:26 +0800
Subject: [PATCH v3 2/2] Add test case snapshot_build for test_decoding.

---
 contrib/test_decoding/Makefile                |  2 +-
 .../test_decoding/expected/snapshot_build.out | 33 +++++++++++++
 contrib/test_decoding/meson.build             |  1 +
 .../test_decoding/specs/snapshot_build.spec   | 46 +++++++++++++++++++
 4 files changed, 81 insertions(+), 1 deletion(-)
 create mode 100644 contrib/test_decoding/expected/snapshot_build.out
 create mode 100644 contrib/test_decoding/specs/snapshot_build.spec

diff --git a/contrib/test_decoding/Makefile b/contrib/test_decoding/Makefile
index a4ba1a509a..8113e2d99c 100644
--- a/contrib/test_decoding/Makefile
+++ b/contrib/test_decoding/Makefile
@@ -9,7 +9,7 @@ REGRESS = ddl xact rewrite toast permissions decoding_in_xact \
 ISOLATION = mxact delayed_startup ondisk_startup concurrent_ddl_dml \
 	oldest_xmin snapshot_transfer subxact_without_top concurrent_stream \
 	twophase_snapshot slot_creation_error catalog_change_snapshot \
-	skip_snapshot_restore
+	skip_snapshot_restore snapshot_build
 
 REGRESS_OPTS = --temp-config $(top_srcdir)/contrib/test_decoding/logical.conf
 ISOLATION_OPTS = --temp-config $(top_srcdir)/contrib/test_decoding/logical.conf
diff --git a/contrib/test_decoding/expected/snapshot_build.out b/contrib/test_decoding/expected/snapshot_build.out
new file mode 100644
index 0000000000..0fcf20cce8
--- /dev/null
+++ b/contrib/test_decoding/expected/snapshot_build.out
@@ -0,0 +1,33 @@
+Parsed test spec with 4 sessions
+
+starting permutation: s1_begin s1_insert s2_init s3_begin s3_insert s4_create s1_commit s4_begin s4_insert s3_commit s4_commit s2_get_changes
+step s1_begin: BEGIN;
+step s1_insert: INSERT INTO tbl1 VALUES (1);
+step s2_init: SELECT 'init' FROM pg_create_logical_replication_slot('isolation_slot', 'test_decoding'); <waiting ...>
+step s3_begin: BEGIN;
+step s3_insert: INSERT INTO tbl1 VALUES (1);
+step s4_create: CREATE TABLE tbl2 (val1 integer);
+step s1_commit: COMMIT;
+step s4_begin: BEGIN;
+step s4_insert: INSERT INTO tbl2 VALUES (1);
+step s3_commit: COMMIT;
+step s2_init: <... completed>
+?column?
+--------
+init    
+(1 row)
+
+step s4_commit: COMMIT;
+step s2_get_changes: SELECT data FROM pg_logical_slot_get_changes('isolation_slot', NULL, NULL, 'skip-empty-xacts', '1', 'include-xids', '0');
+data                                      
+------------------------------------------
+BEGIN                                     
+table public.tbl2: INSERT: val1[integer]:1
+COMMIT                                    
+(3 rows)
+
+?column?
+--------
+stop    
+(1 row)
+
diff --git a/contrib/test_decoding/meson.build b/contrib/test_decoding/meson.build
index f643dc81a2..2b7e80ba71 100644
--- a/contrib/test_decoding/meson.build
+++ b/contrib/test_decoding/meson.build
@@ -63,6 +63,7 @@ tests += {
       'twophase_snapshot',
       'slot_creation_error',
       'skip_snapshot_restore',
+      'snapshot_build',
     ],
     'regress_args': [
       '--temp-config', files('logical.conf'),
diff --git a/contrib/test_decoding/specs/snapshot_build.spec b/contrib/test_decoding/specs/snapshot_build.spec
new file mode 100644
index 0000000000..334531dd21
--- /dev/null
+++ b/contrib/test_decoding/specs/snapshot_build.spec
@@ -0,0 +1,46 @@
+# Test snapshot build correctly, it must track committed transactions during BUILDING_SNAPSHOT
+
+setup
+{
+    DROP TABLE IF EXISTS tbl1;
+    DROP TABLE IF EXISTS tbl2;
+    CREATE TABLE tbl1 (val1 integer);
+}
+
+teardown
+{
+    DROP TABLE tbl1;
+    DROP TABLE tbl2;
+    SELECT 'stop' FROM pg_drop_replication_slot('isolation_slot');
+}
+
+session "s1"
+setup { SET synchronous_commit=on; }
+step "s1_begin" { BEGIN; }
+step "s1_insert" { INSERT INTO tbl1 VALUES (1); }
+step "s1_commit" { COMMIT; }
+
+session "s2"
+setup { SET synchronous_commit=on; }
+step "s2_init" { SELECT 'init' FROM pg_create_logical_replication_slot('isolation_slot', 'test_decoding'); }
+step "s2_get_changes" { SELECT data FROM pg_logical_slot_get_changes('isolation_slot', NULL, NULL, 'skip-empty-xacts', '1', 'include-xids', '0'); }
+
+session "s3"
+setup { SET synchronous_commit=on; }
+step "s3_begin" { BEGIN; }
+step "s3_insert" { INSERT INTO tbl1 VALUES (1); }
+step "s3_commit" { COMMIT; }
+
+session "s4"
+setup { SET synchronous_commit=on; }
+step "s4_create" { CREATE TABLE tbl2 (val1 integer); }
+step "s4_begin" { BEGIN; }
+step "s4_insert" { INSERT INTO tbl2 VALUES (1); }
+step "s4_commit" { COMMIT; }
+
+# T1: s1_begin -> s1_insert -> BUILDING_SNAPSHOT -> s1_commit -> FULL_SNAPSHOT
+# T2: BUILDING_SNAPSHOT -> s3_begin -> s3_insert -> FULL_SNAPSHOT -> s3_commit -> CONSISTENT
+# T3: BUILDING_SNAPSHOT -> s4_create -> FULL_SNAPSHOT
+# T4: FULL_SNAPSHOT -> s4_begin -> s4_insert -> CONSISTENT -> s4_commit
+# The snapshot must track T3 or the replay of T4 will fail because its snapshot cannot see tbl2
+permutation "s1_begin" "s1_insert" "s2_init" "s3_begin" "s3_insert" "s4_create" "s1_commit" "s4_begin" "s4_insert" "s3_commit" "s4_commit" "s2_get_changes"
-- 
2.34.1

#8Bertrand Drouvot
bertranddrouvot.pg@gmail.com
In reply to: cca5507 (#7)
Re: Historic snapshot doesn't track txns committed in BUILDING_SNAPSHOT state

Hi,

On Sat, Aug 10, 2024 at 06:07:30PM +0800, cca5507 wrote:

Hi,

Thanks for the comments!

Here are the new version patches, I think it will be more clear.

Thanks!

1 ===

When applying I get:

Applying: Track transactions committed in BUILDING_SNAPSHOT.
.git/rebase-apply/patch:71: space before tab in indent.
*/
.git/rebase-apply/patch:94: space before tab in indent.
*/
warning: 2 lines add whitespace errors.

2 ===

+ * have snapshot and the transaction will not be tracked by snapshot

s/have snapshot/have a snapshot/?

3 ===

+ * snapshot and will not be decoded

s/snapshot/a snapshot/?

4 ===

        if (SnapBuildCurrentState(builder) < SNAPBUILD_FULL_SNAPSHOT ||
                ctx->fast_forward)
+       {
+               /*
+                * Note that during or after BUILDING_SNAPSHOT, we need handle the xlog
+                * that might mark a transaction as catalog modifying because the snapshot
+                * only tracks catalog modifying transactions. The transaction before
+                * BUILDING_SNAPSHOT will not be tracked anyway(see SnapBuildCommitTxn()
+                * for details), so just return.
+                */
+               if (SnapBuildCurrentState(builder) >= SNAPBUILD_BUILDING_SNAPSHOT)
+               {
+                       /* Currently only XLOG_HEAP2_NEW_CID means a catalog modifying */
+                       if (info == XLOG_HEAP2_NEW_CID && TransactionIdIsValid(xid))

What about?

if (SnapBuildCurrentState(builder) < SNAPBUILD_BUILDING_SNAPSHOT ||
(SnapBuildCurrentState(builder) == SNAPBUILD_BUILDING_SNAPSHOT && info != XLOG_HEAP2_NEW_CID) ||
ctx->fast_forward)
return;

That way we'd still rely on what's being done in the XLOG_HEAP2_NEW_CID case (
should it change in the future).

5 ===

        if (SnapBuildCurrentState(builder) < SNAPBUILD_FULL_SNAPSHOT ||
                ctx->fast_forward)
+       {
+               /*
+                * Note that during or after BUILDING_SNAPSHOT, we need handle the xlog
+                * that might mark a transaction as catalog modifying because the snapshot
+                * only tracks catalog modifying transactions. The transaction before
+                * BUILDING_SNAPSHOT will not be tracked anyway(see SnapBuildCommitTxn()
+                * for details), so just return.
+                */
+               if (SnapBuildCurrentState(builder) >= SNAPBUILD_BUILDING_SNAPSHOT)
+               {

What about?

if (SnapBuildCurrentState(builder) < SNAPBUILD_BUILDING_SNAPSHOT ||
(SnapBuildCurrentState(builder) == SNAPBUILD_BUILDING_SNAPSHOT && info != XLOG_HEAP_INPLACE) ||
ctx->fast_forward)
return;

That way we'd still rely on what's being done in the XLOG_HEAP_INPLACE case (
should it change in the future).

6 ===

v3-0002 LGTM.

Regards,

--
Bertrand Drouvot
PostgreSQL Contributors Team
RDS Open Source Databases
Amazon Web Services: https://aws.amazon.com

#9cca5507
cca5507@qq.com
In reply to: Bertrand Drouvot (#8)
Re: Historic snapshot doesn't track txns committed in BUILDING_SNAPSHOT state

Hi,

4, 5 ===

&gt; if (SnapBuildCurrentState(builder) < SNAPBUILD_BUILDING_SNAPSHOT ||
&gt;&nbsp; &nbsp; &nbsp;(SnapBuildCurrentState(builder) == SNAPBUILD_BUILDING_SNAPSHOT &amp;&amp; info != XLOG_HEAP_INPLACE) ||
&gt;&nbsp; &nbsp; &nbsp;ctx-&gt;fast_forward)
&gt;&nbsp; &nbsp; &nbsp;return;

I think during fast forward, we also need handle the xlog that marks a transaction
as&nbsp;catalog modifying, or the snapshot might lose some transactions?

&gt; That way we'd still rely on what's being done in the XLOG_HEAP_INPLACE case

+		if (SnapBuildCurrentState(builder) &gt;= SNAPBUILD_BUILDING_SNAPSHOT)
+		{
+			/* Currently only XLOG_HEAP_INPLACE means a catalog modifying */
+			if (info == XLOG_HEAP_INPLACE &amp;&amp; TransactionIdIsValid(xid))
+				ReorderBufferXidSetCatalogChanges(ctx-&gt;reorder, xid, buf-&gt;origptr);
+		}

We only call&nbsp;ReorderBufferXidSetCatalogChanges() for the xlog that marks a transaction as&nbsp;catalog
modifying, and we don't care about the other steps being done in the xlog, so I think the current
approach is ok.

--
Regards,
ChangAo Chen

#10Bertrand Drouvot
bertranddrouvot.pg@gmail.com
In reply to: cca5507 (#9)
Re: Historic snapshot doesn't track txns committed in BUILDING_SNAPSHOT state

Hi,

On Mon, Aug 12, 2024 at 04:34:25PM +0800, cca5507 wrote:

Hi,

4, 5 ===

&gt; if (SnapBuildCurrentState(builder) < SNAPBUILD_BUILDING_SNAPSHOT ||
&gt;&nbsp; &nbsp; &nbsp;(SnapBuildCurrentState(builder) == SNAPBUILD_BUILDING_SNAPSHOT &amp;&amp; info != XLOG_HEAP_INPLACE) ||
&gt;&nbsp; &nbsp; &nbsp;ctx-&gt;fast_forward)
&gt;&nbsp; &nbsp; &nbsp;return;

I think during fast forward, we also need handle the xlog that marks a transaction
as&nbsp;catalog modifying, or the snapshot might lose some transactions?

I think it's fine to skip during fast forward as we are not generating logical
changes. It's done that way in master, in your proposal and in my "if" proposals.
Note that my proposals related to the if conditions are for heap2_decode and
heap_decode (not xact_decode).

&gt; That way we'd still rely on what's being done in the XLOG_HEAP_INPLACE case

+		if (SnapBuildCurrentState(builder) &gt;= SNAPBUILD_BUILDING_SNAPSHOT)
+		{
+			/* Currently only XLOG_HEAP_INPLACE means a catalog modifying */
+			if (info == XLOG_HEAP_INPLACE &amp;&amp; TransactionIdIsValid(xid))
+				ReorderBufferXidSetCatalogChanges(ctx-&gt;reorder, xid, buf-&gt;origptr);
+		}

We only call&nbsp;ReorderBufferXidSetCatalogChanges() for the xlog that marks a transaction as&nbsp;catalog
modifying, and we don't care about the other steps being done in the xlog, so I think the current
approach is ok.

Yeah, I think your proposal does not do anything wrong. I just prefer to put
everything in a single if condition (as per my proposal) so that we can jump
directly in the appropriate case. I think that way the code is easier to maintain
instead of having to deal with the same info values in distinct places.

Regards,

--
Bertrand Drouvot
PostgreSQL Contributors Team
RDS Open Source Databases
Amazon Web Services: https://aws.amazon.com

#11cca5507
cca5507@qq.com
In reply to: Bertrand Drouvot (#10)
Re: Historic snapshot doesn't track txns committed in BUILDING_SNAPSHOT state

Hi,

Thanks for the comments!

- I think it's fine to skip during fast forward as we are not generating logical
- changes. It's done that way in master, in your proposal and in my "if" proposals.
- Note that my proposals related to the if conditions are for heap2_decode and
- heap_decode (not xact_decode).

But note that in xact_decode(), case&nbsp;XLOG_XACT_INVALIDATIONS, we call
ReorderBufferXidSetCatalogChanges() even if we are fast-forwarding, it might
be better to be consistent.

In addition, we don't decode anything during fast forward, but the snapshot might
serialize to disk. If we skip calling ReorderBufferXidSetCatalogChanges(), the snapshot
may be wrong on disk.

--
Regards,
ChangAo Chen

#12cca5507
cca5507@qq.com
In reply to: Bertrand Drouvot (#10)
2 attachment(s)
Re: Historic snapshot doesn't track txns committed in BUILDING_SNAPSHOT state

Hi,

I refactor the code and fix the git apply warning&nbsp;according to [1]&nbsp;/messages/by-id/Zrmh7X8jYCbFYXjH@ip-10-97-1-34.eu-west-3.compute.internal.

Here are the new version patches.

--
Regards,
ChangAo Chen

[1]: &nbsp;/messages/by-id/Zrmh7X8jYCbFYXjH@ip-10-97-1-34.eu-west-3.compute.internal

Attachments:

v3-0002-Add-test-case-snapshot_build-for-test_decoding.patchapplication/octet-stream; charset=ISO-8859-1; name=v3-0002-Add-test-case-snapshot_build-for-test_decoding.patchDownload
From 51838f4cba785231ad6bd6d0f1bc105b9778392f Mon Sep 17 00:00:00 2001
From: ChangAo Chen <cca5507@qq.com>
Date: Sat, 10 Aug 2024 17:34:26 +0800
Subject: [PATCH v3 2/2] Add test case snapshot_build for test_decoding.

---
 contrib/test_decoding/Makefile                |  2 +-
 .../test_decoding/expected/snapshot_build.out | 33 +++++++++++++
 contrib/test_decoding/meson.build             |  1 +
 .../test_decoding/specs/snapshot_build.spec   | 46 +++++++++++++++++++
 4 files changed, 81 insertions(+), 1 deletion(-)
 create mode 100644 contrib/test_decoding/expected/snapshot_build.out
 create mode 100644 contrib/test_decoding/specs/snapshot_build.spec

diff --git a/contrib/test_decoding/Makefile b/contrib/test_decoding/Makefile
index a4ba1a509a..8113e2d99c 100644
--- a/contrib/test_decoding/Makefile
+++ b/contrib/test_decoding/Makefile
@@ -9,7 +9,7 @@ REGRESS = ddl xact rewrite toast permissions decoding_in_xact \
 ISOLATION = mxact delayed_startup ondisk_startup concurrent_ddl_dml \
 	oldest_xmin snapshot_transfer subxact_without_top concurrent_stream \
 	twophase_snapshot slot_creation_error catalog_change_snapshot \
-	skip_snapshot_restore
+	skip_snapshot_restore snapshot_build
 
 REGRESS_OPTS = --temp-config $(top_srcdir)/contrib/test_decoding/logical.conf
 ISOLATION_OPTS = --temp-config $(top_srcdir)/contrib/test_decoding/logical.conf
diff --git a/contrib/test_decoding/expected/snapshot_build.out b/contrib/test_decoding/expected/snapshot_build.out
new file mode 100644
index 0000000000..0fcf20cce8
--- /dev/null
+++ b/contrib/test_decoding/expected/snapshot_build.out
@@ -0,0 +1,33 @@
+Parsed test spec with 4 sessions
+
+starting permutation: s1_begin s1_insert s2_init s3_begin s3_insert s4_create s1_commit s4_begin s4_insert s3_commit s4_commit s2_get_changes
+step s1_begin: BEGIN;
+step s1_insert: INSERT INTO tbl1 VALUES (1);
+step s2_init: SELECT 'init' FROM pg_create_logical_replication_slot('isolation_slot', 'test_decoding'); <waiting ...>
+step s3_begin: BEGIN;
+step s3_insert: INSERT INTO tbl1 VALUES (1);
+step s4_create: CREATE TABLE tbl2 (val1 integer);
+step s1_commit: COMMIT;
+step s4_begin: BEGIN;
+step s4_insert: INSERT INTO tbl2 VALUES (1);
+step s3_commit: COMMIT;
+step s2_init: <... completed>
+?column?
+--------
+init    
+(1 row)
+
+step s4_commit: COMMIT;
+step s2_get_changes: SELECT data FROM pg_logical_slot_get_changes('isolation_slot', NULL, NULL, 'skip-empty-xacts', '1', 'include-xids', '0');
+data                                      
+------------------------------------------
+BEGIN                                     
+table public.tbl2: INSERT: val1[integer]:1
+COMMIT                                    
+(3 rows)
+
+?column?
+--------
+stop    
+(1 row)
+
diff --git a/contrib/test_decoding/meson.build b/contrib/test_decoding/meson.build
index f643dc81a2..2b7e80ba71 100644
--- a/contrib/test_decoding/meson.build
+++ b/contrib/test_decoding/meson.build
@@ -63,6 +63,7 @@ tests += {
       'twophase_snapshot',
       'slot_creation_error',
       'skip_snapshot_restore',
+      'snapshot_build',
     ],
     'regress_args': [
       '--temp-config', files('logical.conf'),
diff --git a/contrib/test_decoding/specs/snapshot_build.spec b/contrib/test_decoding/specs/snapshot_build.spec
new file mode 100644
index 0000000000..334531dd21
--- /dev/null
+++ b/contrib/test_decoding/specs/snapshot_build.spec
@@ -0,0 +1,46 @@
+# Test snapshot build correctly, it must track committed transactions during BUILDING_SNAPSHOT
+
+setup
+{
+    DROP TABLE IF EXISTS tbl1;
+    DROP TABLE IF EXISTS tbl2;
+    CREATE TABLE tbl1 (val1 integer);
+}
+
+teardown
+{
+    DROP TABLE tbl1;
+    DROP TABLE tbl2;
+    SELECT 'stop' FROM pg_drop_replication_slot('isolation_slot');
+}
+
+session "s1"
+setup { SET synchronous_commit=on; }
+step "s1_begin" { BEGIN; }
+step "s1_insert" { INSERT INTO tbl1 VALUES (1); }
+step "s1_commit" { COMMIT; }
+
+session "s2"
+setup { SET synchronous_commit=on; }
+step "s2_init" { SELECT 'init' FROM pg_create_logical_replication_slot('isolation_slot', 'test_decoding'); }
+step "s2_get_changes" { SELECT data FROM pg_logical_slot_get_changes('isolation_slot', NULL, NULL, 'skip-empty-xacts', '1', 'include-xids', '0'); }
+
+session "s3"
+setup { SET synchronous_commit=on; }
+step "s3_begin" { BEGIN; }
+step "s3_insert" { INSERT INTO tbl1 VALUES (1); }
+step "s3_commit" { COMMIT; }
+
+session "s4"
+setup { SET synchronous_commit=on; }
+step "s4_create" { CREATE TABLE tbl2 (val1 integer); }
+step "s4_begin" { BEGIN; }
+step "s4_insert" { INSERT INTO tbl2 VALUES (1); }
+step "s4_commit" { COMMIT; }
+
+# T1: s1_begin -> s1_insert -> BUILDING_SNAPSHOT -> s1_commit -> FULL_SNAPSHOT
+# T2: BUILDING_SNAPSHOT -> s3_begin -> s3_insert -> FULL_SNAPSHOT -> s3_commit -> CONSISTENT
+# T3: BUILDING_SNAPSHOT -> s4_create -> FULL_SNAPSHOT
+# T4: FULL_SNAPSHOT -> s4_begin -> s4_insert -> CONSISTENT -> s4_commit
+# The snapshot must track T3 or the replay of T4 will fail because its snapshot cannot see tbl2
+permutation "s1_begin" "s1_insert" "s2_init" "s3_begin" "s3_insert" "s4_create" "s1_commit" "s4_begin" "s4_insert" "s3_commit" "s4_commit" "s2_get_changes"
-- 
2.34.1

v4-0001-Track-transactions-committed-in-BUILDING_SNAPSHOT.patchapplication/octet-stream; charset=ISO-8859-1; name=v4-0001-Track-transactions-committed-in-BUILDING_SNAPSHOT.patchDownload
From b35711f5ef0941f79127acbec55a8ede5e27a5eb Mon Sep 17 00:00:00 2001
From: ChangAo Chen <cca5507@qq.com>
Date: Tue, 13 Aug 2024 11:45:07 +0800
Subject: [PATCH v4] Track transactions committed in BUILDING_SNAPSHOT.

The historic snapshot previously didn't track transactions committed
in BUILDING_SNAPSHOT, and this might result in a transaction taking
an incorrect snapshot and logical decoding being interrupted. So we
need track these transactions.

We also need handle the xlog that marks a transaction as containing
catalog changes in BUILDING_SNAPSHOT because the historic snapshot
only tracks catalog modifying transactions.
---
 src/backend/replication/logical/decode.c | 45 +++++++++++++++++++++---
 1 file changed, 40 insertions(+), 5 deletions(-)

diff --git a/src/backend/replication/logical/decode.c b/src/backend/replication/logical/decode.c
index d687ceee33..6df558ad18 100644
--- a/src/backend/replication/logical/decode.c
+++ b/src/backend/replication/logical/decode.c
@@ -206,12 +206,16 @@ xact_decode(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
 	uint8		info = XLogRecGetInfo(r) & XLOG_XACT_OPMASK;
 
 	/*
-	 * If the snapshot isn't yet fully built, we cannot decode anything, so
-	 * bail out.
+	 * If the snapshot hasn't started building yet, the transaction won't be
+	 * decoded or tracked by the snapshot, so bail out.
 	 */
-	if (SnapBuildCurrentState(builder) < SNAPBUILD_FULL_SNAPSHOT)
+	if (SnapBuildCurrentState(builder) < SNAPBUILD_BUILDING_SNAPSHOT)
 		return;
 
+	/*
+	 * Note that if the snapshot isn't yet fully built, the xlog is only used
+	 * to build the snapshot and won't be decoded.
+	 */
 	switch (info)
 	{
 		case XLOG_XACT_COMMIT:
@@ -282,9 +286,11 @@ xact_decode(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
 			{
 				TransactionId xid;
 				xl_xact_invals *invals;
+				bool		has_snapshot;
 
 				xid = XLogRecGetXid(r);
 				invals = (xl_xact_invals *) XLogRecGetData(r);
+				has_snapshot = SnapBuildCurrentState(builder) >= SNAPBUILD_FULL_SNAPSHOT;
 
 				/*
 				 * Execute the invalidations for xid-less transactions,
@@ -293,7 +299,7 @@ xact_decode(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
 				 */
 				if (TransactionIdIsValid(xid))
 				{
-					if (!ctx->fast_forward)
+					if (!ctx->fast_forward && has_snapshot)
 						ReorderBufferAddInvalidations(reorder, xid,
 													  buf->origptr,
 													  invals->nmsgs,
@@ -301,7 +307,7 @@ xact_decode(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
 					ReorderBufferXidSetCatalogChanges(ctx->reorder, xid,
 													  buf->origptr);
 				}
-				else if (!ctx->fast_forward)
+				else if (!ctx->fast_forward && has_snapshot)
 					ReorderBufferImmediateInvalidation(ctx->reorder,
 													   invals->nmsgs,
 													   invals->msgs);
@@ -407,6 +413,8 @@ heap2_decode(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
 	uint8		info = XLogRecGetInfo(buf->record) & XLOG_HEAP_OPMASK;
 	TransactionId xid = XLogRecGetXid(buf->record);
 	SnapBuild  *builder = ctx->snapshot_builder;
+	/* True if the xlog marks the transaction as containing catalog changes */
+	bool	set_catalog_changes = (info == XLOG_HEAP2_NEW_CID);
 
 	ReorderBufferProcessXid(ctx->reorder, xid, buf->origptr);
 
@@ -416,7 +424,19 @@ heap2_decode(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
 	 */
 	if (SnapBuildCurrentState(builder) < SNAPBUILD_FULL_SNAPSHOT ||
 		ctx->fast_forward)
+	{
+		/*
+		 * If the transaction contains catalog changes, we need mark it in
+		 * reorder buffer before return as the snapshot only tracks catalog
+		 * modifying transactions. The transaction before BUILDING_SNAPSHOT
+		 * won't be tracked anyway(see SnapBuildCommitTxn), so skip it.
+		 */
+		if (set_catalog_changes && TransactionIdIsValid(xid) &&
+			SnapBuildCurrentState(builder) >= SNAPBUILD_BUILDING_SNAPSHOT)
+			ReorderBufferXidSetCatalogChanges(ctx->reorder, xid, buf->origptr);
+
 		return;
+	}
 
 	switch (info)
 	{
@@ -466,6 +486,8 @@ heap_decode(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
 	uint8		info = XLogRecGetInfo(buf->record) & XLOG_HEAP_OPMASK;
 	TransactionId xid = XLogRecGetXid(buf->record);
 	SnapBuild  *builder = ctx->snapshot_builder;
+	/* True if the xlog marks the transaction as containing catalog changes */
+	bool	set_catalog_changes = (info == XLOG_HEAP_INPLACE);
 
 	ReorderBufferProcessXid(ctx->reorder, xid, buf->origptr);
 
@@ -475,7 +497,19 @@ heap_decode(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
 	 */
 	if (SnapBuildCurrentState(builder) < SNAPBUILD_FULL_SNAPSHOT ||
 		ctx->fast_forward)
+	{
+		/*
+		 * If the transaction contains catalog changes, we need mark it in
+		 * reorder buffer before return as the snapshot only tracks catalog
+		 * modifying transactions. The transaction before BUILDING_SNAPSHOT
+		 * won't be tracked anyway(see SnapBuildCommitTxn), so skip it.
+		 */
+		if (set_catalog_changes && TransactionIdIsValid(xid) &&
+			SnapBuildCurrentState(builder) >= SNAPBUILD_BUILDING_SNAPSHOT)
+			ReorderBufferXidSetCatalogChanges(ctx->reorder, xid, buf->origptr);
+
 		return;
+	}
 
 	switch (info)
 	{
@@ -1301,6 +1335,7 @@ DecodeTXNNeedSkip(LogicalDecodingContext *ctx, XLogRecordBuffer *buf,
 				  Oid txn_dbid, RepOriginId origin_id)
 {
 	if (SnapBuildXactNeedsSkip(ctx->snapshot_builder, buf->origptr) ||
+		SnapBuildCurrentState(ctx->snapshot_builder) < SNAPBUILD_CONSISTENT ||
 		(txn_dbid != InvalidOid && txn_dbid != ctx->slot->data.database) ||
 		FilterByOrigin(ctx, origin_id))
 		return true;
-- 
2.34.1

#13Bertrand Drouvot
bertranddrouvot.pg@gmail.com
In reply to: cca5507 (#12)
Re: Historic snapshot doesn't track txns committed in BUILDING_SNAPSHOT state

Hi,

On Tue, Aug 13, 2024 at 12:23:04PM +0800, cca5507 wrote:

Hi,

I refactor the code and fix the git apply warning&nbsp;according to [1].

Here are the new version patches.

Thanks!

1 ===

+       /* True if the xlog marks the transaction as containing catalog changes */
+       bool    set_catalog_changes = (info == XLOG_HEAP2_NEW_CID);
        if (SnapBuildCurrentState(builder) < SNAPBUILD_FULL_SNAPSHOT ||
                ctx->fast_forward)
+       {
+               /*
+                * If the transaction contains catalog changes, we need mark it in
+                * reorder buffer before return as the snapshot only tracks catalog
+                * modifying transactions. The transaction before BUILDING_SNAPSHOT
+                * won't be tracked anyway(see SnapBuildCommitTxn), so skip it.
+                */
+               if (set_catalog_changes && TransactionIdIsValid(xid) &&
+                       SnapBuildCurrentState(builder) >= SNAPBUILD_BUILDING_SNAPSHOT)
+                       ReorderBufferXidSetCatalogChanges(ctx->reorder, xid, buf->origptr);
+
                return;
+       }

I still prefer to replace the above with:

if (SnapBuildCurrentState(builder) < SNAPBUILD_BUILDING_SNAPSHOT ||
(SnapBuildCurrentState(builder) == SNAPBUILD_BUILDING_SNAPSHOT && info != XLOG_HEAP2_NEW_CID) ||
ctx->fast_forward)
return;

Let's see what others think.

2 ===

+       /* True if the xlog marks the transaction as containing catalog changes */
+       bool    set_catalog_changes = (info == XLOG_HEAP_INPLACE);
        if (SnapBuildCurrentState(builder) < SNAPBUILD_FULL_SNAPSHOT ||
                ctx->fast_forward)
+       {
+               /*
+                * If the transaction contains catalog changes, we need mark it in
+                * reorder buffer before return as the snapshot only tracks catalog
+                * modifying transactions. The transaction before BUILDING_SNAPSHOT
+                * won't be tracked anyway(see SnapBuildCommitTxn), so skip it.
+                */
+               if (set_catalog_changes && TransactionIdIsValid(xid) &&
+                       SnapBuildCurrentState(builder) >= SNAPBUILD_BUILDING_SNAPSHOT)
+                       ReorderBufferXidSetCatalogChanges(ctx->reorder, xid, buf->origptr);
+
                return;
+       }

I still prefer to replace the above with:

if (SnapBuildCurrentState(builder) < SNAPBUILD_BUILDING_SNAPSHOT ||
(SnapBuildCurrentState(builder) == SNAPBUILD_BUILDING_SNAPSHOT && info != XLOG_HEAP_INPLACE) ||
ctx->fast_forward)
return;

Let's see what others think.

3 ===

I re-read your comments in [0]/messages/by-id/tencent_8DEC9842690A9B6AFD52D4659EF0700E9409@qq.com and it looks like you've concern about
the 2 "if" I'm proposing above and the fast forward handling. Is that the case
or is your fast forward concern unrelated to my proposals?

Not sure what happened but it looks like your reply in [0]/messages/by-id/tencent_8DEC9842690A9B6AFD52D4659EF0700E9409@qq.com is not part of the
initial thread [1]/messages/by-id/tencent_6AAF072A7623A11A85C0B5FD290232467808@qq.com, but created a new thread instead, making the whole
conversation difficult to follow.

[0]: /messages/by-id/tencent_8DEC9842690A9B6AFD52D4659EF0700E9409@qq.com
[1]: /messages/by-id/tencent_6AAF072A7623A11A85C0B5FD290232467808@qq.com

Regards,

--
Bertrand Drouvot
PostgreSQL Contributors Team
RDS Open Source Databases
Amazon Web Services: https://aws.amazon.com

#14cca5507
cca5507@qq.com
In reply to: Bertrand Drouvot (#13)
Re: Historic snapshot doesn't track txns committed in BUILDING_SNAPSHOT state

Hi,

- I re-read your comments in [0] and it looks like you've concern about
- the 2 "if" I'm proposing above and the fast forward handling. Is that the case
- or is your fast forward concern unrelated to my proposals?

In your proposals, we will just return when fast forward. But I think we need
handle&nbsp;XLOG_HEAP2_NEW_CID or&nbsp;XLOG_HEAP_INPLACE even if we are fast
forwarding as it decides whether the snapshot will track the transaction or not.

During fast forward, if there is a transaction that generates XLOG_HEAP2_NEW_CID
but no XLOG_XACT_INVALIDATIONS(I'm not sure), the snapshot won't track this
transaction in your proposals, I think it's wrong from a build snapshot perspective.

Although we don't decode anything during fast forward, the snapshot might be
serialized to disk when CONSISTENT, it would be better to keep the snapshot correct.

- Not sure what happened but it looks like your reply in [0] is not part of the
- initial thread [1], but created a new thread instead, making the whole
- conversation difficult to follow.

I'm not sure what happened but I attach the new thread to the CF:

https://commitfest.postgresql.org/49/5029

--
Regards,
ChangAo Chen

#15Bertrand Drouvot
bertranddrouvot.pg@gmail.com
In reply to: cca5507 (#14)
Re: Historic snapshot doesn't track txns committed in BUILDING_SNAPSHOT state

Hi,

On Tue, Aug 13, 2024 at 03:32:42PM +0800, cca5507 wrote:

Hi,

- I re-read your comments in [0] and it looks like you've concern about
- the 2 "if" I'm proposing above and the fast forward handling. Is that the case
- or is your fast forward concern unrelated to my proposals?

In your proposals, we will just return when fast forward. But I think we need
handle&nbsp;XLOG_HEAP2_NEW_CID or&nbsp;XLOG_HEAP_INPLACE even if we are fast
forwarding as it decides whether the snapshot will track the transaction or not.

During fast forward, if there is a transaction that generates XLOG_HEAP2_NEW_CID
but no XLOG_XACT_INVALIDATIONS(I'm not sure), the snapshot won't track this
transaction in your proposals, I think it's wrong from a build snapshot perspective.

Although we don't decode anything during fast forward, the snapshot might be
serialized to disk when CONSISTENT, it would be better to keep the snapshot correct.

IIUC your "fast forward" concern is not related to this particular thread but you
think it's already an issue on the master branch (outside of the BUILDING_SNAPSHOT
handling we are discussing here), is that correct? (that's also what your coding
changes makes me think of). If so, I'd suggest to open a dedicated thread for that
particular "fast forward" point and do the coding in the current thread as if the
fast forward is not an issue.

Does that make sense?

- Not sure what happened but it looks like your reply in [0] is not part of the
- initial thread [1], but created a new thread instead, making the whole
- conversation difficult to follow.

I'm not sure what happened but I attach the new thread to the CF:

Unfortunately your last reply did start a new email thread again.

Regards,

--
Bertrand Drouvot
PostgreSQL Contributors Team
RDS Open Source Databases
Amazon Web Services: https://aws.amazon.com

#16cca5507
cca5507@qq.com
In reply to: Bertrand Drouvot (#15)
Re: Historic snapshot doesn't track txns committed in BUILDING_SNAPSHOT state

Hi,

- IIUC your "fast forward" concern is not related to this particular thread but you
- think it's already an issue on the master branch (outside of the BUILDING_SNAPSHOT
- handling we are discussing here), is that correct? (that's also what your coding
- changes makes me think of). If so, I'd suggest to open a dedicated thread for that
- particular "fast forward" point and do the coding in the current thread as if the
- fast forward is not an issue.

- Does that make sense?

Yes.

But I think the v4-0001 in [1]&nbsp;/messages/by-id/tencent_925A991463194F3C97830C3BB7D0A2C2BD07@qq.com is fine.

Let's see what others think.

--
Regards,
ChangAo Chen

[1]: &nbsp;/messages/by-id/tencent_925A991463194F3C97830C3BB7D0A2C2BD07@qq.com

#17Ajin Cherian
itsajin@gmail.com
In reply to: cca5507 (#7)
Re: Historic snapshot doesn't track txns committed in BUILDING_SNAPSHOT state

On Wed, Mar 26, 2025 at 9:11 PM cca5507 <cca5507@qq.com> wrote:

Hi,

Thanks for the comments!

Here are the new version patches, I think it will be more clear.

--

+    {
+        /*
+         * Note that during or after BUILDING_SNAPSHOT, we need handle the xlog
+         * that might mark a transaction as catalog modifying because
the snapshot
+         * only tracks catalog modifying transactions. The transaction before
+         * BUILDING_SNAPSHOT will not be tracked anyway(see
SnapBuildCommitTxn()
+         * for details), so just return.
+          */
+        if (SnapBuildCurrentState(builder) >= SNAPBUILD_BUILDING_SNAPSHOT)
+        {
+            /* Currently only XLOG_HEAP2_NEW_CID means a catalog modifying */
+            if (info == XLOG_HEAP2_NEW_CID && TransactionIdIsValid(xid))
+                ReorderBufferXidSetCatalogChanges(ctx->reorder, xid,
buf->origptr);
+        }

Any reason why you avoided calling SnapBuildProcessNewCid here and
only called ReorderBufferXidSetCatalogChanges? If any, please mention
in the comments the reason.

regards,
Ajin Cherian
Fujitsu Australia

#18cca5507
cca5507@qq.com
In reply to: Ajin Cherian (#17)
Re: Historic snapshot doesn't track txns committed in BUILDING_SNAPSHOT state
#19Ajin Cherian
itsajin@gmail.com
In reply to: cca5507 (#16)
Re: Historic snapshot doesn't track txns committed in BUILDING_SNAPSHOT state

On Thu, Mar 27, 2025 at 1:13 PM cca5507 <cca5507@qq.com> wrote:

Hi,

- IIUC your "fast forward" concern is not related to this particular thread but you
- think it's already an issue on the master branch (outside of the BUILDING_SNAPSHOT
- handling we are discussing here), is that correct? (that's also what your coding
- changes makes me think of). If so, I'd suggest to open a dedicated thread for that
- particular "fast forward" point and do the coding in the current thread as if the
- fast forward is not an issue.

- Does that make sense?

Yes.

But I think the v4-0001 in [1] is fine.

@@ -1301,6 +1335,7 @@ DecodeTXNNeedSkip(LogicalDecodingContext *ctx,
XLogRecordBuffer *buf,
                   Oid txn_dbid, RepOriginId origin_id)
 {
     if (SnapBuildXactNeedsSkip(ctx->snapshot_builder, buf->origptr) ||
+        SnapBuildCurrentState(ctx->snapshot_builder) < SNAPBUILD_CONSISTENT ||
         (txn_dbid != InvalidOid && txn_dbid != ctx->slot->data.database) ||
         FilterByOrigin(ctx, origin_id))
         return true;

I don't see an explanation as to why this change was added? Maybe I
missed it in the multiple threads. With this, we are no longer
decoding changes which were in the state SNAPBUILD_FULL_SNAPSHOT. Why
did that change?

regards,
Ajin Cherian
Fujitsu Australia

#20cca5507
cca5507@qq.com
In reply to: Ajin Cherian (#19)
Re: Historic snapshot doesn't track txns committed in BUILDING_SNAPSHOT state

Hi,

According to the comment above DecodeTXNNeedSkip(), transactions committed before SNAPBUILD_CONSISTENT state won't be decoded. It's important for initial table data synchronization because the table sync workers use the consistent snapshot to copy data so transactions before SNAPBUILD_CONSISTENT are already included in the initial data.

This change may be redundant with SnapBuildXactNeedsSkip(), I add just for safe.

--
Regards,
ChangAo Chen

#21Ajin Cherian
itsajin@gmail.com
In reply to: cca5507 (#20)
Re: Historic snapshot doesn't track txns committed in BUILDING_SNAPSHOT state

On Thu, Mar 27, 2025 at 2:47 PM cca5507 <cca5507@qq.com> wrote:

Hi,

According to the comment above DecodeTXNNeedSkip(), transactions committed before SNAPBUILD_CONSISTENT state won't be decoded. It's important for initial table data synchronization because the table sync workers use the consistent snapshot to copy data so transactions before SNAPBUILD_CONSISTENT are already included in the initial data.

This change may be redundant with SnapBuildXactNeedsSkip(), I add just for safe.

--

I understand your explanation but was there a bug on HEAD that this is
solving? If so, can you write a test case to show this? This will make
it more convincing that this change as well is required. Your other
change has a test case to confirm, but this doesn't.

regards,
Ajin Cherian
Fujitsu Australia

#22cca5507
cca5507@qq.com
In reply to: Ajin Cherian (#21)
Re: Historic snapshot doesn't track txns committed in BUILDING_SNAPSHOT state

Hi,

This change doesn't solve a bug. But I think it makes the code and comments more consistent. I currently have no idea about how to test it.

--
Regards,
ChangAo Chen

#23Masahiko Sawada
sawada.mshk@gmail.com
In reply to: cca5507 (#12)
Re: Historic snapshot doesn't track txns committed in BUILDING_SNAPSHOT state

On Mon, Aug 12, 2024 at 9:25 PM cca5507 <cca5507@qq.com> wrote:

Hi,

I refactor the code and fix the git apply warning according to [1].

Here are the new version patches.

I've investigated the reported issue and reviewed the patch. IIUC to
fix this issue, what we need to do is to track catalog-change
transactions even in BUILDING_SNAPSHOT phase so that we can decode
transactions correctly that started in FULL_SNAPSHOT. My question is;
in order to track just catalog-change transactions, whether it's
sufficient to check if XLOG_XACT_COMMIT[_PREPARED] has the
XACT_XINFO_HAS_INVALS flag. If yes, we probably should change only
xact_decode() to check the commit records even in BUILDING_SNAPSHOT.
Otherwise, we would need to change mostly all paths where we mark the
transaction as catalog-change as the patch does.

Regards,

--
Masahiko Sawada
Amazon Web Services: https://aws.amazon.com

#24Ajin Cherian
itsajin@gmail.com
In reply to: cca5507 (#12)
Re: Historic snapshot doesn't track txns committed in BUILDING_SNAPSHOT state

On Fri, Jun 27, 2025 at 3:48 PM cca5507 <cca5507@qq.com> wrote:

Hi,

I refactor the code and fix the git apply warning according to [1].

Here are the new version patches.

--
Regards,
ChangAo Chen

[1] /messages/by-id/Zrmh7X8jYCbFYXjH@ip-10-97-1-34.eu-west-3.compute.internal

I see this problem is similar to the bug reported in [1]/messages/by-id/18509-983f064d174ea880@postgresql.org, and your fix
also addresses the issue reported there. Although I like your approach
of tracking changes starting from the BUILDING_SNAPSHOT state, I’d
like to suggest an alternative.

While debugging that issue, my plan was not to track catalog changes
prior to SNAPBUILD_CONSISTENT, but instead to ensure we don’t use
snapshots built before SNAPBUILD_CONSISTENT, since we don’t track
catalog changes in those states. We should discard previously built
snapshots and rebuild them once we reach the SNAPBUILD_CONSISTENT
state. At that point, all necessary transactions would have been
committed, and builder->xmin would have advanced enough to decode all
transactions from then on.

The problem is that previously built snapshots hang around without the
latest xmin and xmax, and we tend to reuse them. We should ensure that
all txn->base_snapshot and builder->snapshot snapshots built in the
SNAPBUILD_FULL_SNAPSHOT state are rebuilt once we reach
SNAPBUILD_CONSISTENT. For this, we need to track when the snapshot was
built. There is already a field in ReorderBufferTXN -
'base_snapshot_lsn' which we can use. If base_snapshot_lsn <
builder->start_decoding_at, then we should rebuild the snapshot. Just
a thought.

regards,
Ajin Cherian
Fujitsu Australia

[1]: /messages/by-id/18509-983f064d174ea880@postgresql.org

#25cca5507
cca5507@qq.com
In reply to: Ajin Cherian (#24)
Re: Historic snapshot doesn't track txns committed in BUILDING_SNAPSHOT state

Hi,

If I understand correctly, this may break the basic&nbsp;principle:

The aim is to build a&nbsp;snapshot that behaves the same as a freshly taken MVCC snapshot would have&nbsp;at the time the XLogRecord was generated.

--
Regards,
ChangAo Chen

------------------&nbsp;Original&nbsp;------------------
From: "Ajin Cherian" <itsajin@gmail.com&gt;;
Date:&nbsp;Fri, Jun 27, 2025 06:29 PM
To:&nbsp;"cca5507"<cca5507@qq.com&gt;;
Cc:&nbsp;"Bertrand Drouvot"<bertranddrouvot.pg@gmail.com&gt;;"Michael Paquier"<michael@paquier.xyz&gt;;"pgsql-hackers"<pgsql-hackers@lists.postgresql.org&gt;;
Subject:&nbsp;Re: Historic snapshot doesn't track txns committed in BUILDING_SNAPSHOT state

On Fri, Jun 27, 2025 at 3:48 PM cca5507 <cca5507@qq.com&gt; wrote:
&gt;
&gt; Hi,
&gt;
&gt; I refactor the code and fix the git apply warning according to [1]/messages/by-id/18509-983f064d174ea880@postgresql.org.
&gt;
&gt; Here are the new version patches.
&gt;
&gt; --
&gt; Regards,
&gt; ChangAo Chen
&gt;
&gt; [1]/messages/by-id/18509-983f064d174ea880@postgresql.org /messages/by-id/Zrmh7X8jYCbFYXjH@ip-10-97-1-34.eu-west-3.compute.internal

I see this problem is similar to the bug reported in [1]/messages/by-id/18509-983f064d174ea880@postgresql.org, and your fix
also addresses the issue reported there. Although I like your approach
of tracking changes starting from the BUILDING_SNAPSHOT state, I’d
like to suggest an alternative.

While debugging that issue, my plan was not to track catalog changes
prior to SNAPBUILD_CONSISTENT, but instead to ensure we don’t use
snapshots built before SNAPBUILD_CONSISTENT, since we don’t track
catalog changes in those states. We should discard previously built
snapshots and rebuild them once we reach the SNAPBUILD_CONSISTENT
state. At that point, all necessary transactions would have been
committed, and builder-&gt;xmin would have advanced enough to decode all
transactions from then on.

The problem is that previously built snapshots hang around without the
latest xmin and xmax, and we tend to reuse them. We should ensure that
all txn-&gt;base_snapshot and builder-&gt;snapshot snapshots built in the
SNAPBUILD_FULL_SNAPSHOT state are rebuilt once we reach
SNAPBUILD_CONSISTENT. For this, we need to track when the snapshot was
built. There is already a field in ReorderBufferTXN -
'base_snapshot_lsn' which we can use. If base_snapshot_lsn <
builder-&gt;start_decoding_at, then we should rebuild the snapshot. Just
a thought.

regards,
Ajin Cherian
Fujitsu Australia

[1]: /messages/by-id/18509-983f064d174ea880@postgresql.org

#26Michael Paquier
michael@paquier.xyz
In reply to: cca5507 (#25)
Re: Historic snapshot doesn't track txns committed in BUILDING_SNAPSHOT state

On Sat, Jun 28, 2025 at 11:44:52AM +0800, cca5507 wrote:

If I understand correctly, this may break the basic&nbsp;principle:

The aim is to build a&nbsp;snapshot that behaves the same as a
freshly taken MVCC snapshot would have&nbsp;at the time the
XLogRecord was generated.

Please note that we prefer bottom-posting when sending messages on
pgsql-hackers and the community mailing lists, as defined in this
link:
https://en.wikipedia.org/wiki/Posting_style#Bottom-posting

Top-posting, as you did in your last email, is breaking the logic and
flow of the discussion.

Some more useful documentation from the PostgreSQL wiki:
https://wiki.postgresql.org/wiki/Mailing_Lists
--
Michael

#27cca5507
cca5507@qq.com
In reply to: Michael Paquier (#26)
2 attachment(s)
Re: Historic snapshot doesn't track txns committed inBUILDING_SNAPSHOT state

Attachments:

v6-0001-Track-transactions-committed-in-BUILDING_SNAPSHOT.patchapplication/octet-stream; charset=utf-8; name=v6-0001-Track-transactions-committed-in-BUILDING_SNAPSHOT.patchDownload
From 12dd3434ef13609b324bbbbe68a3f0e2a48934a2 Mon Sep 17 00:00:00 2001
From: ChangAo Chen <cca5507@qq.com>
Date: Fri, 21 Nov 2025 15:19:22 +0800
Subject: [PATCH v6 1/2] Track transactions committed in BUILDING_SNAPSHOT.

The historic snapshot previously didn't track transactions committed
in BUILDING_SNAPSHOT, this might result in a transaction taking an
incorrect snapshot and logical decoding being interrupted. So we need
to track these transactions.

We also need to handle the xlog which means a catalog change in BUILDING_SNAPSHOT
because the historic snapshot only tracks catalog modifying transactions.
---
 src/backend/replication/logical/decode.c | 33 ++++++++++++++++++++----
 1 file changed, 28 insertions(+), 5 deletions(-)

diff --git a/src/backend/replication/logical/decode.c b/src/backend/replication/logical/decode.c
index cc03f0706e9..de1bed30781 100644
--- a/src/backend/replication/logical/decode.c
+++ b/src/backend/replication/logical/decode.c
@@ -206,12 +206,16 @@ xact_decode(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
 	uint8		info = XLogRecGetInfo(r) & XLOG_XACT_OPMASK;
 
 	/*
-	 * If the snapshot isn't yet fully built, we cannot decode anything, so
-	 * bail out.
+	 * If the snapshot hasn't started building yet, the transaction won't be
+	 * decoded or tracked by the snapshot, so bail out.
 	 */
-	if (SnapBuildCurrentState(builder) < SNAPBUILD_FULL_SNAPSHOT)
+	if (SnapBuildCurrentState(builder) < SNAPBUILD_BUILDING_SNAPSHOT)
 		return;
 
+	/*
+	 * Note that if the snapshot isn't yet fully built, the xlog is only used
+	 * to build the snapshot and won't be decoded.
+	 */
 	switch (info)
 	{
 		case XLOG_XACT_COMMIT:
@@ -282,18 +286,24 @@ xact_decode(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
 			{
 				TransactionId xid;
 				xl_xact_invals *invals;
+				bool has_snapshot;
 
 				xid = XLogRecGetXid(r);
 				invals = (xl_xact_invals *) XLogRecGetData(r);
+				has_snapshot =
+					SnapBuildCurrentState(builder) >= SNAPBUILD_FULL_SNAPSHOT;
 
 				/*
 				 * Execute the invalidations for xid-less transactions,
 				 * otherwise, accumulate them so that they can be processed at
 				 * the commit time.
+				 *
+				 * Note that we only need to do this when we are not fast-forwarding
+				 * and there is a snapshot.
 				 */
 				if (TransactionIdIsValid(xid))
 				{
-					if (!ctx->fast_forward)
+					if (!ctx->fast_forward && has_snapshot)
 						ReorderBufferAddInvalidations(reorder, xid,
 													  buf->origptr,
 													  invals->nmsgs,
@@ -301,7 +311,7 @@ xact_decode(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
 					ReorderBufferXidSetCatalogChanges(ctx->reorder, xid,
 													  buf->origptr);
 				}
-				else if (!ctx->fast_forward)
+				else if (!ctx->fast_forward && has_snapshot)
 					ReorderBufferImmediateInvalidation(ctx->reorder,
 													   invals->nmsgs,
 													   invals->msgs);
@@ -419,7 +429,19 @@ heap2_decode(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
 	 * SnapBuildProcessRunningXacts().
 	 */
 	if (SnapBuildCurrentState(builder) < SNAPBUILD_FULL_SNAPSHOT)
+	{
+		/*
+		 * If we are building snapshot and the xlog means a catalog
+		 * change, we need to mark it in the reorder buffer.
+		 *
+		 * Now only XLOG_HEAP2_NEW_CID means a catalog change.
+		 */
+		if (SnapBuildCurrentState(builder) >= SNAPBUILD_BUILDING_SNAPSHOT &&
+			TransactionIdIsValid(xid) && info == XLOG_HEAP2_NEW_CID)
+			ReorderBufferXidSetCatalogChanges(ctx->reorder, xid, buf->origptr);
+
 		return;
+	}
 
 	switch (info)
 	{
@@ -1306,6 +1328,7 @@ DecodeTXNNeedSkip(LogicalDecodingContext *ctx, XLogRecordBuffer *buf,
 				  Oid txn_dbid, RepOriginId origin_id)
 {
 	if (SnapBuildXactNeedsSkip(ctx->snapshot_builder, buf->origptr) ||
+		SnapBuildCurrentState(ctx->snapshot_builder) < SNAPBUILD_CONSISTENT ||
 		(txn_dbid != InvalidOid && txn_dbid != ctx->slot->data.database) ||
 		FilterByOrigin(ctx, origin_id))
 		return true;
-- 
2.34.1

v6-0002-Add-test-case-snapshot_build-for-test_decoding.patchapplication/octet-stream; charset=utf-8; name=v6-0002-Add-test-case-snapshot_build-for-test_decoding.patchDownload
From d4747efa7c8103ec259a725051fff3bc4849dc17 Mon Sep 17 00:00:00 2001
From: ChangAo Chen <cca5507@qq.com>
Date: Fri, 21 Nov 2025 15:48:27 +0800
Subject: [PATCH v6 2/2] Add test case snapshot_build for test_decoding.

---
 contrib/test_decoding/Makefile                |  3 +-
 .../test_decoding/expected/snapshot_build.out | 33 +++++++++++++
 contrib/test_decoding/meson.build             |  1 +
 .../test_decoding/specs/snapshot_build.spec   | 46 +++++++++++++++++++
 4 files changed, 82 insertions(+), 1 deletion(-)
 create mode 100644 contrib/test_decoding/expected/snapshot_build.out
 create mode 100644 contrib/test_decoding/specs/snapshot_build.spec

diff --git a/contrib/test_decoding/Makefile b/contrib/test_decoding/Makefile
index acbcaed2feb..60210726566 100644
--- a/contrib/test_decoding/Makefile
+++ b/contrib/test_decoding/Makefile
@@ -9,7 +9,8 @@ REGRESS = ddl xact rewrite toast permissions decoding_in_xact \
 ISOLATION = mxact delayed_startup ondisk_startup concurrent_ddl_dml \
 	oldest_xmin snapshot_transfer subxact_without_top concurrent_stream \
 	twophase_snapshot slot_creation_error catalog_change_snapshot \
-	skip_snapshot_restore invalidation_distribution parallel_session_origin
+	skip_snapshot_restore invalidation_distribution parallel_session_origin \
+	snapshot_build
 
 REGRESS_OPTS = --temp-config $(top_srcdir)/contrib/test_decoding/logical.conf
 ISOLATION_OPTS = --temp-config $(top_srcdir)/contrib/test_decoding/logical.conf
diff --git a/contrib/test_decoding/expected/snapshot_build.out b/contrib/test_decoding/expected/snapshot_build.out
new file mode 100644
index 00000000000..0fcf20cce86
--- /dev/null
+++ b/contrib/test_decoding/expected/snapshot_build.out
@@ -0,0 +1,33 @@
+Parsed test spec with 4 sessions
+
+starting permutation: s1_begin s1_insert s2_init s3_begin s3_insert s4_create s1_commit s4_begin s4_insert s3_commit s4_commit s2_get_changes
+step s1_begin: BEGIN;
+step s1_insert: INSERT INTO tbl1 VALUES (1);
+step s2_init: SELECT 'init' FROM pg_create_logical_replication_slot('isolation_slot', 'test_decoding'); <waiting ...>
+step s3_begin: BEGIN;
+step s3_insert: INSERT INTO tbl1 VALUES (1);
+step s4_create: CREATE TABLE tbl2 (val1 integer);
+step s1_commit: COMMIT;
+step s4_begin: BEGIN;
+step s4_insert: INSERT INTO tbl2 VALUES (1);
+step s3_commit: COMMIT;
+step s2_init: <... completed>
+?column?
+--------
+init    
+(1 row)
+
+step s4_commit: COMMIT;
+step s2_get_changes: SELECT data FROM pg_logical_slot_get_changes('isolation_slot', NULL, NULL, 'skip-empty-xacts', '1', 'include-xids', '0');
+data                                      
+------------------------------------------
+BEGIN                                     
+table public.tbl2: INSERT: val1[integer]:1
+COMMIT                                    
+(3 rows)
+
+?column?
+--------
+stop    
+(1 row)
+
diff --git a/contrib/test_decoding/meson.build b/contrib/test_decoding/meson.build
index 99310555e6c..252a39b7727 100644
--- a/contrib/test_decoding/meson.build
+++ b/contrib/test_decoding/meson.build
@@ -65,6 +65,7 @@ tests += {
       'skip_snapshot_restore',
       'invalidation_distribution',
       'parallel_session_origin',
+      'snapshot_build',
     ],
     'regress_args': [
       '--temp-config', files('logical.conf'),
diff --git a/contrib/test_decoding/specs/snapshot_build.spec b/contrib/test_decoding/specs/snapshot_build.spec
new file mode 100644
index 00000000000..334531dd219
--- /dev/null
+++ b/contrib/test_decoding/specs/snapshot_build.spec
@@ -0,0 +1,46 @@
+# Test snapshot build correctly, it must track committed transactions during BUILDING_SNAPSHOT
+
+setup
+{
+    DROP TABLE IF EXISTS tbl1;
+    DROP TABLE IF EXISTS tbl2;
+    CREATE TABLE tbl1 (val1 integer);
+}
+
+teardown
+{
+    DROP TABLE tbl1;
+    DROP TABLE tbl2;
+    SELECT 'stop' FROM pg_drop_replication_slot('isolation_slot');
+}
+
+session "s1"
+setup { SET synchronous_commit=on; }
+step "s1_begin" { BEGIN; }
+step "s1_insert" { INSERT INTO tbl1 VALUES (1); }
+step "s1_commit" { COMMIT; }
+
+session "s2"
+setup { SET synchronous_commit=on; }
+step "s2_init" { SELECT 'init' FROM pg_create_logical_replication_slot('isolation_slot', 'test_decoding'); }
+step "s2_get_changes" { SELECT data FROM pg_logical_slot_get_changes('isolation_slot', NULL, NULL, 'skip-empty-xacts', '1', 'include-xids', '0'); }
+
+session "s3"
+setup { SET synchronous_commit=on; }
+step "s3_begin" { BEGIN; }
+step "s3_insert" { INSERT INTO tbl1 VALUES (1); }
+step "s3_commit" { COMMIT; }
+
+session "s4"
+setup { SET synchronous_commit=on; }
+step "s4_create" { CREATE TABLE tbl2 (val1 integer); }
+step "s4_begin" { BEGIN; }
+step "s4_insert" { INSERT INTO tbl2 VALUES (1); }
+step "s4_commit" { COMMIT; }
+
+# T1: s1_begin -> s1_insert -> BUILDING_SNAPSHOT -> s1_commit -> FULL_SNAPSHOT
+# T2: BUILDING_SNAPSHOT -> s3_begin -> s3_insert -> FULL_SNAPSHOT -> s3_commit -> CONSISTENT
+# T3: BUILDING_SNAPSHOT -> s4_create -> FULL_SNAPSHOT
+# T4: FULL_SNAPSHOT -> s4_begin -> s4_insert -> CONSISTENT -> s4_commit
+# The snapshot must track T3 or the replay of T4 will fail because its snapshot cannot see tbl2
+permutation "s1_begin" "s1_insert" "s2_init" "s3_begin" "s3_insert" "s4_create" "s1_commit" "s4_begin" "s4_insert" "s3_commit" "s4_commit" "s2_get_changes"
-- 
2.34.1