Disallow UPDATE/DELETE on table with unpublished generated column as REPLICA IDENTITY

Started by Shlok Kyalabout 1 year ago55 messages
#1Shlok Kyal
shlok.kyal.oss@gmail.com
1 attachment(s)

Hi,

There was an issue reported recently by Sawada-san on a different thread [1]/messages/by-id/CAD21AoA_RBkMa-6nUpBSoEP9s=46r3oq15vQkunVRCsYKXKMnA@mail.gmail.com.
I have created this thread to discuss the issue separately.

Currently, generated columns can be published only when we explicitly
specify it in the Publication column list.
An issue was found that UPDATE and DELETE are allowed on the table
even if its replica identity is set to generated columns that are not
published.
For example:
CREATE TABLE testpub_gencol (a INT, b INT GENERATED ALWAYS AS (a + 1)
STORED NOT NULL);
CREATE UNIQUE INDEX testpub_gencol_idx ON testpub_gencol (b);
ALTER TABLE testpub_gencol REPLICA IDENTITY USING index testpub_gencol_idx;
CREATE PUBLICATION pub_gencol FOR TABLE testpub_gencol;
UPDATE testpub_gencol SET a = 100 WHERE a = 1;

Here the generated column 'b' is set as REPLICA IDENTITY for table
'testpub_gencol'. When we create publication 'pub_gencol' we do not
specify any column list, so column 'b' will not be published.
So, the update message generated by the last UPDATE would have NULL
for column 'b'.

To avoid the issue, we can disallow UPDATE/DELETE on table with
unpublished generated column as REPLICA IDENTITY. I have attached a
patch for the same.

[1]: /messages/by-id/CAD21AoA_RBkMa-6nUpBSoEP9s=46r3oq15vQkunVRCsYKXKMnA@mail.gmail.com

Thanks and regards,
Shlok Kyal

Attachments:

v1-0001-Disallow-UPDATE-DELETE-on-table-with-generated-co.patchapplication/octet-stream; name=v1-0001-Disallow-UPDATE-DELETE-on-table-with-generated-co.patchDownload
From 789d3536fe512916838902a10e75329b7d1d1550 Mon Sep 17 00:00:00 2001
From: Shlok Kyal <shlok.kyal.oss@gmail.com>
Date: Tue, 5 Nov 2024 11:11:25 +0530
Subject: [PATCH v1] Disallow UPDATE/DELETE on table with generated column as
 REPLICA IDENTITY

UPDATE/DELETE on table having unpublished generated column as REPLICA
IDENTITY is allowed. UPDATE/DELETE on such tables should not be allowed
---
 src/backend/commands/publicationcmds.c    | 47 +++++++++++++++++++++++
 src/test/regress/expected/publication.out | 13 +++++++
 src/test/regress/sql/publication.sql      | 13 +++++++
 3 files changed, 73 insertions(+)

diff --git a/src/backend/commands/publicationcmds.c b/src/backend/commands/publicationcmds.c
index d6ffef374e..d72df2b589 100644
--- a/src/backend/commands/publicationcmds.c
+++ b/src/backend/commands/publicationcmds.c
@@ -427,6 +427,53 @@ pub_collist_contains_invalid_column(Oid pubid, Relation relation, List *ancestor
 		bms_free(idattrs);
 		bms_free(columns);
 	}
+	else
+	{
+		int			x;
+		Bitmapset  *idattrs = NULL;
+
+		idattrs = RelationGetIndexAttrBitmap(relation,
+											 INDEX_ATTR_BITMAP_IDENTITY_KEY);
+
+		x = -1;
+		while ((x = bms_next_member(idattrs, x)) >= 0)
+		{
+			AttrNumber	attnum = (x + FirstLowInvalidHeapAttributeNumber);
+			char attgenerated = get_attgenerated(relid, attnum);
+
+			/*
+			 * If pubviaroot is true, we are validating the column list of the
+			 * parent table, but the bitmap contains the replica identity
+			 * information of the child table. The parent/child attnums may
+			 * not match, so translate them to the parent - get the attname
+			 * from the child, and look it up in the parent.
+			 */
+			if (pubviaroot)
+			{
+				/* attribute name in the child table */
+				char	   *colname = get_attname(relid, attnum, false);
+
+				/*
+				 * Determine the attnum for the attribute name in parent (we
+				 * are using the column list defined on the parent).
+				 */
+				attnum = get_attnum(publish_as_relid, colname);
+				attgenerated = get_attgenerated(publish_as_relid, attnum);
+			}
+
+			/*
+			 * For publication with no column list, replica identity having
+			 * generated column is not allowed
+			 */
+			if (attgenerated == ATTRIBUTE_GENERATED_STORED)
+			{
+				result = true;
+				break;
+			}
+		}
+
+		bms_free(idattrs);
+	}
 
 	ReleaseSysCache(tuple);
 
diff --git a/src/test/regress/expected/publication.out b/src/test/regress/expected/publication.out
index d2ed1efc3b..1a2b0ad31a 100644
--- a/src/test/regress/expected/publication.out
+++ b/src/test/regress/expected/publication.out
@@ -664,6 +664,19 @@ DROP TABLE rf_tbl_abcd_pk;
 DROP TABLE rf_tbl_abcd_nopk;
 DROP TABLE rf_tbl_abcd_part_pk;
 -- ======================================================
+-- test with generated column
+SET client_min_messages = 'ERROR';
+CREATE TABLE testpub_gencol (a INT, b INT GENERATED ALWAYS AS (a + 1) STORED NOT NULL);
+CREATE UNIQUE INDEX testpub_gencol_idx ON testpub_gencol (b);
+ALTER TABLE testpub_gencol REPLICA IDENTITY USING index testpub_gencol_idx;
+CREATE PUBLICATION pub_gencol FOR TABLE testpub_gencol;
+UPDATE testpub_gencol SET a = 100 WHERE a = 1;
+ERROR:  cannot update table "testpub_gencol"
+DETAIL:  Column list used by the publication does not cover the replica identity.
+DROP PUBLICATION pub_gencol;
+DROP INDEX testpub_gencol_idx;
+DROP TABLE testpub_gencol;
+RESET client_min_messages;
 -- fail - duplicate tables are not allowed if that table has any column lists
 SET client_min_messages = 'ERROR';
 CREATE PUBLICATION testpub_dups FOR TABLE testpub_tbl1 (a), testpub_tbl1 WITH (publish = 'insert');
diff --git a/src/test/regress/sql/publication.sql b/src/test/regress/sql/publication.sql
index 12aea71c0f..3be0e9ec82 100644
--- a/src/test/regress/sql/publication.sql
+++ b/src/test/regress/sql/publication.sql
@@ -394,6 +394,19 @@ DROP TABLE rf_tbl_abcd_nopk;
 DROP TABLE rf_tbl_abcd_part_pk;
 -- ======================================================
 
+-- test with generated column
+SET client_min_messages = 'ERROR';
+CREATE TABLE testpub_gencol (a INT, b INT GENERATED ALWAYS AS (a + 1) STORED NOT NULL);
+CREATE UNIQUE INDEX testpub_gencol_idx ON testpub_gencol (b);
+ALTER TABLE testpub_gencol REPLICA IDENTITY USING index testpub_gencol_idx;
+CREATE PUBLICATION pub_gencol FOR TABLE testpub_gencol;
+UPDATE testpub_gencol SET a = 100 WHERE a = 1;
+
+DROP PUBLICATION pub_gencol;
+DROP INDEX testpub_gencol_idx;
+DROP TABLE testpub_gencol;
+RESET client_min_messages;
+
 -- fail - duplicate tables are not allowed if that table has any column lists
 SET client_min_messages = 'ERROR';
 CREATE PUBLICATION testpub_dups FOR TABLE testpub_tbl1 (a), testpub_tbl1 WITH (publish = 'insert');
-- 
2.34.1

#2Aleksander Alekseev
aleksander@timescale.com
In reply to: Shlok Kyal (#1)
Re: Disallow UPDATE/DELETE on table with unpublished generated column as REPLICA IDENTITY

Hi Shlok,

Here the generated column 'b' is set as REPLICA IDENTITY for table
'testpub_gencol'. When we create publication 'pub_gencol' we do not
specify any column list, so column 'b' will not be published.
So, the update message generated by the last UPDATE would have NULL
for column 'b'.

To avoid the issue, we can disallow UPDATE/DELETE on table with
unpublished generated column as REPLICA IDENTITY. I have attached a
patch for the same.

I don't think this would be a correct fix. Let's say I *don't* have
any publications:

```
=# CREATE TABLE testpub_gencol (a INT, b INT GENERATED ALWAYS AS (a + 1)
STORED NOT NULL);
CREATE TABLE

=# CREATE UNIQUE INDEX testpub_gencol_idx ON testpub_gencol (b);
CREATE INDEX

=# INSERT INTO testpub_gencol (a) VALUES (1);
INSERT 0 1

=# UPDATE testpub_gencol SET a = 100 WHERE a = 1;
UPDATE 1
eax=# SELECT * FROM testpub_gencol ;
a | b
-----+-----
100 | 101
(1 row)
```

So far everything works fine. You are saying that when one creates a
publication UPDATEs should stop working. That would be rather
surprising behavior for a typical user not to mention that it will
break the current behavior.

I believe one would expect that both UPDATEs and the publication
should continue to work. Perhaps we should forbid the creation of a
publication like this instead. Or alternatively include a generated
column to the publication list if it's used as a replica identity. Or
maybe even keep everything as is.

Thoughts?

--
Best regards,
Aleksander Alekseev

#3Shlok Kyal
shlok.kyal.oss@gmail.com
In reply to: Aleksander Alekseev (#2)
Re: Disallow UPDATE/DELETE on table with unpublished generated column as REPLICA IDENTITY

Hi Aleksander,

Here the generated column 'b' is set as REPLICA IDENTITY for table
'testpub_gencol'. When we create publication 'pub_gencol' we do not
specify any column list, so column 'b' will not be published.
So, the update message generated by the last UPDATE would have NULL
for column 'b'.

To avoid the issue, we can disallow UPDATE/DELETE on table with
unpublished generated column as REPLICA IDENTITY. I have attached a
patch for the same.

I don't think this would be a correct fix. Let's say I *don't* have
any publications:

```
=# CREATE TABLE testpub_gencol (a INT, b INT GENERATED ALWAYS AS (a + 1)
STORED NOT NULL);
CREATE TABLE

=# CREATE UNIQUE INDEX testpub_gencol_idx ON testpub_gencol (b);
CREATE INDEX

=# INSERT INTO testpub_gencol (a) VALUES (1);
INSERT 0 1

=# UPDATE testpub_gencol SET a = 100 WHERE a = 1;
UPDATE 1
eax=# SELECT * FROM testpub_gencol ;
a | b
-----+-----
100 | 101
(1 row)
```

So far everything works fine. You are saying that when one creates a
publication UPDATEs should stop working. That would be rather
surprising behavior for a typical user not to mention that it will
break the current behavior.

I believe one would expect that both UPDATEs and the publication
should continue to work. Perhaps we should forbid the creation of a
publication like this instead. Or alternatively include a generated
column to the publication list if it's used as a replica identity. Or
maybe even keep everything as is.

Thoughts?

While testing I found that similar behaviors already exist in some
cases. Where once we create a publication UPDATES might stop working.
For example:
Case1:
postgres=# create table t1(c1 int);
CREATE TABLE
postgres=# insert into t1 values(1);
INSERT 0 1
postgres=# update t1 set c1 = 100 where c1 = 1;
UPDATE 1
postgres=# create publication pub for table t1;
CREATE PUBLICATION
postgres=# update t1 set c1 = 100 where c1 = 1;
ERROR: cannot update table "t1" because it does not have a replica
identity and publishes updates
HINT: To enable updating the table, set REPLICA IDENTITY using ALTER TABLE.

Case2:
postgres=# create table t2(c1 int, c2 int not null);
CREATE TABLE
postgres=# create unique index t2_idx on t2 (c2);
CREATE INDEX
postgres=# alter table t2 replica identity using index t2_idx;
ALTER TABLE
postgres=# insert into t2 values(1,1);
INSERT 0 1
postgres=# update t2 set c1 = 100 where c1 = 1;
UPDATE 1
postgres=# create publication pub2 for table t2 where (c1 > 10);
CREATE PUBLICATION
postgres=# update t2 set c1 = 100 where c1 = 1;
ERROR: cannot update table "t2"
DETAIL: Column used in the publication WHERE expression is not part
of the replica identity.

Behaviour with the patch provided in [1]/messages/by-id/CANhcyEVw4V2Awe2AB6i0E5AJLNdASShGfdBLbUd1XtWDboymCA@mail.gmail.com to resolve the issue:
postgres=# create table t3(c1 int, c2 INT GENERATED ALWAYS AS (c1 + 1)
STORED NOT NULL);
CREATE TABLE
postgres=# create unique index t3_idx on t3 (c2);
CREATE INDEX
postgres=# alter table t3 replica identity using index t3_idx;
ALTER TABLE
postgres=# insert into t3 values(1);
INSERT 0 1
postgres=# update t3 set c1 = 100 where c1 = 1;
UPDATE 1
postgres=# create publication pub3 for table t3;
CREATE PUBLICATION
postgres=# update t3 set c1 = 100 where c1 = 1;
ERROR: cannot update table "t3"
DETAIL: Column list used by the publication does not cover the
replica identity.

So, I think this behavior would be acceptable. Thoughts?

[1]: /messages/by-id/CANhcyEVw4V2Awe2AB6i0E5AJLNdASShGfdBLbUd1XtWDboymCA@mail.gmail.com

Thanks and Regards,
Shlok Kyal

#4Aleksander Alekseev
aleksander@timescale.com
In reply to: Shlok Kyal (#3)
Re: Disallow UPDATE/DELETE on table with unpublished generated column as REPLICA IDENTITY

Hi Shlok,

So, I think this behavior would be acceptable. Thoughts?

That's a fair point, thanks for sharing. Personally I find this
behavior somewhat suboptimal but since we already have it in certain
cases I guess what you propose might be acceptable.

I'm still not entirely happy about breaking the existing behavior in
the discussed case. Not sure what the lesser evil would be - breaking
it or keeping it as is.

Some input from other people on the mailing list would be appreciated.

--
Best regards,
Aleksander Alekseev

#5Amit Kapila
amit.kapila16@gmail.com
In reply to: Aleksander Alekseev (#4)
Re: Disallow UPDATE/DELETE on table with unpublished generated column as REPLICA IDENTITY

On Wed, Nov 6, 2024 at 5:48 PM Aleksander Alekseev
<aleksander@timescale.com> wrote:

So, I think this behavior would be acceptable. Thoughts?

That's a fair point, thanks for sharing. Personally I find this
behavior somewhat suboptimal but since we already have it in certain
cases I guess what you propose might be acceptable.

This is not a suboptimal behavior but a must to have, otherwise, there
is no way we can identify the row to update on the subscriber side.
Also, this is not in certain cases but in all cases for UPDATE/DELETE,
we need REPLICA IDENTITY to be set. See more about REPLICA IDENTITY in
Alter Table docs [1]https://www.postgresql.org/docs/devel/sql-altertable.html. The problem reported by Shlok is that even
though we have a REPLICA IDENTITY defined on a generated column but
still won't send the required column value (as generated columns are
skipped by default) to the subscriber which will lead to ERROR as
mentioned below. Now, one can argue that this is not expected from the
user or why the user would have such a setup but I think we should fix
the problem if it leads to unexpected behavior on the subscriber.

I'm still not entirely happy about breaking the existing behavior in
the discussed case. Not sure what the lesser evil would be - breaking
it or keeping it as is.

The current behavior is not acceptable because it would generate an
ERROR as follows on the subscriber:

2024-11-07 10:50:31.381 IST [16260] ERROR: publisher did not send
replica identity column expected by the logical replication target
relation "public.testpub_gencol"
2024-11-07 10:50:31.381 IST [16260] CONTEXT: processing remote data
for replication origin "pg_16389" during message type "UPDATE" for
replication target relation "public.testpub_gencol" in transaction
748, finished at 0/176D5D8
2024-11-07 10:50:31.398 IST [6216] LOG: background worker "logical
replication apply worker" (PID 16260) exited with exit code 1

Some input from other people on the mailing list would be appreciated.

We should fix this in the HEAD and back branches.

[1]: https://www.postgresql.org/docs/devel/sql-altertable.html

--
With Regards,
Amit Kapila.

#6Amit Kapila
amit.kapila16@gmail.com
In reply to: Shlok Kyal (#1)
Re: Disallow UPDATE/DELETE on table with unpublished generated column as REPLICA IDENTITY

On Tue, Nov 5, 2024 at 12:53 PM Shlok Kyal <shlok.kyal.oss@gmail.com> wrote:

To avoid the issue, we can disallow UPDATE/DELETE on table with
unpublished generated column as REPLICA IDENTITY. I have attached a
patch for the same.

+CREATE PUBLICATION pub_gencol FOR TABLE testpub_gencol;
+UPDATE testpub_gencol SET a = 100 WHERE a = 1;
+ERROR:  cannot update table "testpub_gencol"
+DETAIL:  Column list used by the publication does not cover the
replica identity.

This is not a correct ERROR message as the publication doesn't have
any column list associated with it. You have added the code to detect
this in the column list code path which I think is not required. BTW,
you also need to consider the latest commit 7054186c4e for this. I
guess you need to keep another flag in PublicationDesc to detect this
and then give an appropriate ERROR.

--
With Regards,
Amit Kapila.

#7Amit Kapila
amit.kapila16@gmail.com
In reply to: Amit Kapila (#5)
Re: Disallow UPDATE/DELETE on table with unpublished generated column as REPLICA IDENTITY

On Thu, Nov 7, 2024 at 11:04 AM Amit Kapila <amit.kapila16@gmail.com> wrote:

On Wed, Nov 6, 2024 at 5:48 PM Aleksander Alekseev
<aleksander@timescale.com> wrote:

We should fix this in the HEAD and back branches.

BTW, I was thinking as to how to fix it on back branches and it seems
we should restrict to define REPLICA IDENTITY on stored generated
columns in the first place in back branches as those can't be
replicated. So, the following should fail:

CREATE TABLE testpub_gencol (a INT, b INT GENERATED ALWAYS AS (a + 1)
STORED NOT NULL);
CREATE UNIQUE INDEX testpub_gencol_idx ON testpub_gencol (b);
ALTER TABLE testpub_gencol REPLICA IDENTITY USING index testpub_gencol_idx;

Peter, do you have an opinion on this?

[1]: https://www.postgresql.org/docs/devel/ddl-generated-columns.html

--
With Regards,
Amit Kapila.

#8Shlok Kyal
shlok.kyal.oss@gmail.com
In reply to: Amit Kapila (#6)
1 attachment(s)
Re: Disallow UPDATE/DELETE on table with unpublished generated column as REPLICA IDENTITY

Hi Amit,

On Thu, 7 Nov 2024 at 11:37, Amit Kapila <amit.kapila16@gmail.com> wrote:

On Tue, Nov 5, 2024 at 12:53 PM Shlok Kyal <shlok.kyal.oss@gmail.com> wrote:

To avoid the issue, we can disallow UPDATE/DELETE on table with
unpublished generated column as REPLICA IDENTITY. I have attached a
patch for the same.

+CREATE PUBLICATION pub_gencol FOR TABLE testpub_gencol;
+UPDATE testpub_gencol SET a = 100 WHERE a = 1;
+ERROR:  cannot update table "testpub_gencol"
+DETAIL:  Column list used by the publication does not cover the
replica identity.

This is not a correct ERROR message as the publication doesn't have
any column list associated with it. You have added the code to detect
this in the column list code path which I think is not required. BTW,
you also need to consider the latest commit 7054186c4e for this. I
guess you need to keep another flag in PublicationDesc to detect this
and then give an appropriate ERROR.

I have addressed the comments and provided an updated patch. Also, I
am currently working to fix this issue in back branches.

Thanks and Regards,
Shlok Kyal

Attachments:

v2-0001-Disallow-UPDATE-DELETE-on-table-with-generated-co.patchapplication/octet-stream; name=v2-0001-Disallow-UPDATE-DELETE-on-table-with-generated-co.patchDownload
From 867fda5bb3f7e0ef2a8a1c5729fb7eca4d310832 Mon Sep 17 00:00:00 2001
From: Shlok Kyal <shlok.kyal.oss@gmail.com>
Date: Tue, 5 Nov 2024 11:11:25 +0530
Subject: [PATCH v2] Disallow UPDATE/DELETE on table with generated column as
 REPLICA IDENTITY

UPDATE/DELETE on table having unpublished generated column as REPLICA
IDENTITY is allowed. UPDATE/DELETE on such tables should not be allowed
---
 src/backend/commands/publicationcmds.c    | 103 ++++++++++++++++++++++
 src/backend/executor/execReplication.c    |  12 +++
 src/backend/utils/cache/relcache.c        |  16 ++++
 src/include/catalog/pg_publication.h      |   6 ++
 src/include/commands/publicationcmds.h    |   2 +
 src/test/regress/expected/publication.out |  24 +++++
 src/test/regress/sql/publication.sql      |  24 +++++
 7 files changed, 187 insertions(+)

diff --git a/src/backend/commands/publicationcmds.c b/src/backend/commands/publicationcmds.c
index 0129db18c6..fb132b96cd 100644
--- a/src/backend/commands/publicationcmds.c
+++ b/src/backend/commands/publicationcmds.c
@@ -444,6 +444,109 @@ pub_collist_contains_invalid_column(Oid pubid, Relation relation, List *ancestor
 	return result;
 }
 
+/*
+ * Check if REPLICA IDENTITY consists of any unpublished generated column.
+ *
+ * Returns true if any replica identity column is an unpublished generated column.
+ */
+bool
+replident_has_unpublished_gen_col(Oid pubid, Relation relation, List *ancestors,
+								  bool pubviaroot)
+{
+	HeapTuple	tuple;
+	Oid			relid = RelationGetRelid(relation);
+	Oid			publish_as_relid = RelationGetRelid(relation);
+	bool		result = false;
+	bool		isnull;
+
+	/*
+	 * For a partition, if pubviaroot is true, find the topmost ancestor that
+	 * is published via this publication as we need to use its column list for
+	 * the changes.
+	 *
+	 * Note that even though the column list used is for an ancestor, the
+	 * REPLICA IDENTITY used will be for the actual child table.
+	 */
+	if (pubviaroot && relation->rd_rel->relispartition)
+	{
+		publish_as_relid = GetTopMostAncestorInPublication(pubid, ancestors, NULL);
+
+		if (!OidIsValid(publish_as_relid))
+			publish_as_relid = relid;
+	}
+
+	tuple = SearchSysCache2(PUBLICATIONRELMAP,
+							ObjectIdGetDatum(publish_as_relid),
+							ObjectIdGetDatum(pubid));
+
+	if (!HeapTupleIsValid(tuple))
+		return false;
+
+	(void) SysCacheGetAttr(PUBLICATIONRELMAP, tuple,
+						   Anum_pg_publication_rel_prattrs,
+						   &isnull);
+
+	if(isnull)
+	{
+		int			x;
+		Bitmapset  *idattrs = NULL;
+
+		idattrs = RelationGetIndexAttrBitmap(relation,
+											 INDEX_ATTR_BITMAP_IDENTITY_KEY);
+
+		x = -1;
+
+		/*
+		 * Check if any REPLICA IDENTITY column is an generated column.
+		 */
+		while ((x = bms_next_member(idattrs, x)) >= 0)
+		{
+			AttrNumber	attnum = (x + FirstLowInvalidHeapAttributeNumber);
+			char attgenerated = get_attgenerated(relid, attnum);
+
+			/*
+			 * If pubviaroot is true, we are validating the column list of the
+			 * parent table, but the bitmap contains the replica identity
+			 * information of the child table. The parent/child attnums may
+			 * not match, so translate them to the parent - get the attname
+			 * from the child, and look it up in the parent.
+			 */
+			if (pubviaroot)
+			{
+				/* attribute name in the child table */
+				char	   *colname = get_attname(relid, attnum, false);
+
+				/*
+				 * Determine the attnum for the attribute name in parent (we
+				 * are using the column list defined on the parent).
+				 */
+				attnum = get_attnum(publish_as_relid, colname);
+				attgenerated = get_attgenerated(publish_as_relid, attnum);
+			}
+
+			/*
+			 * Check if the column is a generated column.
+			 *
+			 * 'publish_generated_columns = false' and no column list is
+			 * specified for publication. So if the column is a generated
+			 * column, this implies that the REPLICA IDENTITY consists an
+			 * unpublished generated column.
+			 */
+			if (attgenerated == ATTRIBUTE_GENERATED_STORED)
+			{
+				result = true;
+				break;
+			}
+		}
+
+		bms_free(idattrs);
+	}
+
+	ReleaseSysCache(tuple);
+
+	return result;
+}
+
 /* check_functions_in_node callback */
 static bool
 contain_mutable_or_user_functions_checker(Oid func_id, void *context)
diff --git a/src/backend/executor/execReplication.c b/src/backend/executor/execReplication.c
index 54025c9f15..1ba69320ea 100644
--- a/src/backend/executor/execReplication.c
+++ b/src/backend/executor/execReplication.c
@@ -809,6 +809,12 @@ CheckCmdReplicaIdentity(Relation rel, CmdType cmd)
 				 errmsg("cannot update table \"%s\"",
 						RelationGetRelationName(rel)),
 				 errdetail("Column list used by the publication does not cover the replica identity.")));
+	else if (cmd == CMD_UPDATE && !pubdesc.replident_has_valid_gen_cols)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
+				 errmsg("cannot update table \"%s\"",
+						RelationGetRelationName(rel)),
+				 errdetail("REPLICA IDENTITY consists of an unpublished generated column.")));
 	else if (cmd == CMD_DELETE && !pubdesc.rf_valid_for_delete)
 		ereport(ERROR,
 				(errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
@@ -821,6 +827,12 @@ CheckCmdReplicaIdentity(Relation rel, CmdType cmd)
 				 errmsg("cannot delete from table \"%s\"",
 						RelationGetRelationName(rel)),
 				 errdetail("Column list used by the publication does not cover the replica identity.")));
+	else if (cmd == CMD_DELETE && !pubdesc.replident_has_valid_gen_cols)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
+				 errmsg("cannot delete from table \"%s\"",
+						RelationGetRelationName(rel)),
+				 errdetail("REPLICA IDENTITY consists of an unpublished generated column.")));
 
 	/* If relation has replica identity we are always good. */
 	if (OidIsValid(RelationGetReplicaIndex(rel)))
diff --git a/src/backend/utils/cache/relcache.c b/src/backend/utils/cache/relcache.c
index 5bbb654a5d..eea5a9fb52 100644
--- a/src/backend/utils/cache/relcache.c
+++ b/src/backend/utils/cache/relcache.c
@@ -5712,6 +5712,7 @@ RelationBuildPublicationDesc(Relation relation, PublicationDesc *pubdesc)
 		pubdesc->rf_valid_for_delete = true;
 		pubdesc->cols_valid_for_update = true;
 		pubdesc->cols_valid_for_delete = true;
+		pubdesc->replident_has_valid_gen_cols = true;
 		return;
 	}
 
@@ -5726,6 +5727,7 @@ RelationBuildPublicationDesc(Relation relation, PublicationDesc *pubdesc)
 	pubdesc->rf_valid_for_delete = true;
 	pubdesc->cols_valid_for_update = true;
 	pubdesc->cols_valid_for_delete = true;
+	pubdesc->replident_has_valid_gen_cols = true;
 
 	/* Fetch the publication membership info. */
 	puboids = GetRelationPublications(relid);
@@ -5803,6 +5805,20 @@ RelationBuildPublicationDesc(Relation relation, PublicationDesc *pubdesc)
 				pubdesc->cols_valid_for_delete = false;
 		}
 
+		/*
+		 * Check if all columns which are part of the REPLICA IDENTITY is
+		 * published.
+		 *
+		 * If the publication is FOR ALL TABLES we can skip the validation.
+		 */
+		if (!pubform->puballtables && !pubform->pubgencols &&
+			(pubform->pubupdate || pubform->pubdelete) &&
+			replident_has_unpublished_gen_col(pubid, relation, ancestors,
+											  pubform->pubviaroot))
+		{
+			pubdesc->replident_has_valid_gen_cols = false;
+		}
+
 		ReleaseSysCache(tup);
 
 		/*
diff --git a/src/include/catalog/pg_publication.h b/src/include/catalog/pg_publication.h
index 9a83a72d6b..5eeeced91d 100644
--- a/src/include/catalog/pg_publication.h
+++ b/src/include/catalog/pg_publication.h
@@ -98,6 +98,12 @@ typedef struct PublicationDesc
 	 */
 	bool		cols_valid_for_update;
 	bool		cols_valid_for_delete;
+
+	/*
+	 * true if all generated columns which are part of replica identity are
+	 * published.
+	 */
+	bool		replident_has_valid_gen_cols;
 } PublicationDesc;
 
 typedef struct Publication
diff --git a/src/include/commands/publicationcmds.h b/src/include/commands/publicationcmds.h
index 5487c571f6..0a862e93a9 100644
--- a/src/include/commands/publicationcmds.h
+++ b/src/include/commands/publicationcmds.h
@@ -35,5 +35,7 @@ extern bool pub_rf_contains_invalid_column(Oid pubid, Relation relation,
 										   List *ancestors, bool pubviaroot);
 extern bool pub_collist_contains_invalid_column(Oid pubid, Relation relation,
 												List *ancestors, bool pubviaroot);
+extern bool replident_has_unpublished_gen_col(Oid pubid, Relation relation,
+												List *ancestors, bool pubviaroot);
 
 #endif							/* PUBLICATIONCMDS_H */
diff --git a/src/test/regress/expected/publication.out b/src/test/regress/expected/publication.out
index a8949ffc2c..d0dfbd969f 100644
--- a/src/test/regress/expected/publication.out
+++ b/src/test/regress/expected/publication.out
@@ -670,6 +670,30 @@ DROP TABLE rf_tbl_abcd_pk;
 DROP TABLE rf_tbl_abcd_nopk;
 DROP TABLE rf_tbl_abcd_part_pk;
 -- ======================================================
+-- ======================================================
+-- test with generated column
+SET client_min_messages = 'ERROR';
+CREATE TABLE testpub_gencol (a INT, b INT GENERATED ALWAYS AS (a + 1) STORED NOT NULL);
+CREATE UNIQUE INDEX testpub_gencol_idx ON testpub_gencol (b);
+ALTER TABLE testpub_gencol REPLICA IDENTITY USING index testpub_gencol_idx;
+-- error - generated column "b" is not published but part of REPLICA IDENTITY
+CREATE PUBLICATION pub_gencol FOR TABLE testpub_gencol;
+UPDATE testpub_gencol SET a = 100 WHERE a = 1;
+ERROR:  cannot update table "testpub_gencol"
+DETAIL:  REPLICA IDENTITY consists of an unpublished generated column.
+DELETE FROM testpub_gencol WHERE a = 100;
+ERROR:  cannot delete from table "testpub_gencol"
+DETAIL:  REPLICA IDENTITY consists of an unpublished generated column.
+DROP PUBLICATION pub_gencol;
+-- ok - generated column "b" is published and is part of REPLICA IDENTITY
+CREATE PUBLICATION pub_gencol FOR TABLE testpub_gencol with (publish_generated_columns = true);
+UPDATE testpub_gencol SET a = 100 WHERE a = 1;
+DELETE FROM testpub_gencol WHERE a = 100;
+DROP PUBLICATION pub_gencol;
+DROP INDEX testpub_gencol_idx;
+DROP TABLE testpub_gencol;
+RESET client_min_messages;
+-- ======================================================
 -- fail - duplicate tables are not allowed if that table has any column lists
 SET client_min_messages = 'ERROR';
 CREATE PUBLICATION testpub_dups FOR TABLE testpub_tbl1 (a), testpub_tbl1 WITH (publish = 'insert');
diff --git a/src/test/regress/sql/publication.sql b/src/test/regress/sql/publication.sql
index 48e68bcca2..d0a31b6c29 100644
--- a/src/test/regress/sql/publication.sql
+++ b/src/test/regress/sql/publication.sql
@@ -396,6 +396,30 @@ DROP TABLE rf_tbl_abcd_nopk;
 DROP TABLE rf_tbl_abcd_part_pk;
 -- ======================================================
 
+-- ======================================================
+-- test with generated column
+SET client_min_messages = 'ERROR';
+CREATE TABLE testpub_gencol (a INT, b INT GENERATED ALWAYS AS (a + 1) STORED NOT NULL);
+CREATE UNIQUE INDEX testpub_gencol_idx ON testpub_gencol (b);
+ALTER TABLE testpub_gencol REPLICA IDENTITY USING index testpub_gencol_idx;
+
+-- error - generated column "b" is not published but part of REPLICA IDENTITY
+CREATE PUBLICATION pub_gencol FOR TABLE testpub_gencol;
+UPDATE testpub_gencol SET a = 100 WHERE a = 1;
+DELETE FROM testpub_gencol WHERE a = 100;
+DROP PUBLICATION pub_gencol;
+
+-- ok - generated column "b" is published and is part of REPLICA IDENTITY
+CREATE PUBLICATION pub_gencol FOR TABLE testpub_gencol with (publish_generated_columns = true);
+UPDATE testpub_gencol SET a = 100 WHERE a = 1;
+DELETE FROM testpub_gencol WHERE a = 100;
+DROP PUBLICATION pub_gencol;
+
+DROP INDEX testpub_gencol_idx;
+DROP TABLE testpub_gencol;
+RESET client_min_messages;
+-- ======================================================
+
 -- fail - duplicate tables are not allowed if that table has any column lists
 SET client_min_messages = 'ERROR';
 CREATE PUBLICATION testpub_dups FOR TABLE testpub_tbl1 (a), testpub_tbl1 WITH (publish = 'insert');
-- 
2.34.1

#9Alvaro Herrera
alvherre@alvh.no-ip.org
In reply to: Amit Kapila (#7)
Re: Disallow UPDATE/DELETE on table with unpublished generated column as REPLICA IDENTITY

On 2024-Nov-07, Amit Kapila wrote:

BTW, I was thinking as to how to fix it on back branches and it seems
we should restrict to define REPLICA IDENTITY on stored generated
columns in the first place in back branches as those can't be
replicated. So, the following should fail:

CREATE TABLE testpub_gencol (a INT, b INT GENERATED ALWAYS AS (a + 1)
STORED NOT NULL);
CREATE UNIQUE INDEX testpub_gencol_idx ON testpub_gencol (b);
ALTER TABLE testpub_gencol REPLICA IDENTITY USING index testpub_gencol_idx;

Peter, do you have an opinion on this?

I think a blanket restriction of this sort is not a good idea (at least
in back branches), because there might be people using replica
identities with stacks other than pgoutput. Would it work to enforce
the restriction when such a table is added to a publication?

--
Álvaro Herrera 48°01'N 7°57'E — https://www.EnterpriseDB.com/
"Nunca confiaré en un traidor. Ni siquiera si el traidor lo he creado yo"
(Barón Vladimir Harkonnen)

#10Amit Kapila
amit.kapila16@gmail.com
In reply to: Alvaro Herrera (#9)
Re: Disallow UPDATE/DELETE on table with unpublished generated column as REPLICA IDENTITY

On Fri, Nov 8, 2024 at 5:17 PM Alvaro Herrera <alvherre@alvh.no-ip.org> wrote:

On 2024-Nov-07, Amit Kapila wrote:

BTW, I was thinking as to how to fix it on back branches and it seems
we should restrict to define REPLICA IDENTITY on stored generated
columns in the first place in back branches as those can't be
replicated. So, the following should fail:

CREATE TABLE testpub_gencol (a INT, b INT GENERATED ALWAYS AS (a + 1)
STORED NOT NULL);
CREATE UNIQUE INDEX testpub_gencol_idx ON testpub_gencol (b);
ALTER TABLE testpub_gencol REPLICA IDENTITY USING index testpub_gencol_idx;

Peter, do you have an opinion on this?

I think a blanket restriction of this sort is not a good idea (at least
in back branches), because there might be people using replica
identities with stacks other than pgoutput.

Do you mean to say that people using plugins other than pgoutput may
already be sending generated columns, so defining replica identity
should be okay for them?

Would it work to enforce
the restriction when such a table is added to a publication?

But what if somebody defines REPLICA IDENTITY on the generated column
after adding the table to the publication?

--
With Regards,
Amit Kapila.

#11Amit Kapila
amit.kapila16@gmail.com
In reply to: Amit Kapila (#10)
Re: Disallow UPDATE/DELETE on table with unpublished generated column as REPLICA IDENTITY

On Sat, Nov 9, 2024 at 8:46 AM Amit Kapila <amit.kapila16@gmail.com> wrote:

On Fri, Nov 8, 2024 at 5:17 PM Alvaro Herrera <alvherre@alvh.no-ip.org> wrote:

On 2024-Nov-07, Amit Kapila wrote:

BTW, I was thinking as to how to fix it on back branches and it seems
we should restrict to define REPLICA IDENTITY on stored generated
columns in the first place in back branches as those can't be
replicated. So, the following should fail:

CREATE TABLE testpub_gencol (a INT, b INT GENERATED ALWAYS AS (a + 1)
STORED NOT NULL);
CREATE UNIQUE INDEX testpub_gencol_idx ON testpub_gencol (b);
ALTER TABLE testpub_gencol REPLICA IDENTITY USING index testpub_gencol_idx;

Peter, do you have an opinion on this?

I think a blanket restriction of this sort is not a good idea (at least
in back branches), because there might be people using replica
identities with stacks other than pgoutput.

Do you mean to say that people using plugins other than pgoutput may
already be sending generated columns, so defining replica identity
should be okay for them?

If we don't want to add a restriction to not create replica identity
on generated columns then I think the solution similar to HEAD should
be okay which is to restrict UPDATE/DELETE in such cases.

Also, another point against restricting defining REPLICA IDENTITY on
generated columns is that we do allow generated columns to be PRIMARY
KEY which is a DEFAULT for REPLICA IDENTITY, so that also needs to be
restricted. That won't be a good idea.

--
With Regards,
Amit Kapila.

#12Zhijie Hou (Fujitsu)
houzj.fnst@fujitsu.com
In reply to: Shlok Kyal (#8)
RE: Disallow UPDATE/DELETE on table with unpublished generated column as REPLICA IDENTITY

On Friday, November 8, 2024 7:06 PM Shlok Kyal <shlok.kyal.oss@gmail.com> wrote:

Hi Amit,

On Thu, 7 Nov 2024 at 11:37, Amit Kapila <amit.kapila16@gmail.com> wrote:

On Tue, Nov 5, 2024 at 12:53 PM Shlok Kyal <shlok.kyal.oss@gmail.com>

wrote:

To avoid the issue, we can disallow UPDATE/DELETE on table with
unpublished generated column as REPLICA IDENTITY. I have attached a
patch for the same.

+CREATE PUBLICATION pub_gencol FOR TABLE testpub_gencol; UPDATE
+testpub_gencol SET a = 100 WHERE a = 1;
+ERROR:  cannot update table "testpub_gencol"
+DETAIL:  Column list used by the publication does not cover the
replica identity.

This is not a correct ERROR message as the publication doesn't have
any column list associated with it. You have added the code to detect
this in the column list code path which I think is not required. BTW,
you also need to consider the latest commit 7054186c4e for this. I
guess you need to keep another flag in PublicationDesc to detect this
and then give an appropriate ERROR.

I have addressed the comments and provided an updated patch. Also, I am
currently working to fix this issue in back branches.

Thanks for the patch. I am reviewing it and have some initial comments:

1.
+			char attgenerated = get_attgenerated(relid, attnum);
+

I think it's unnecessary to initialize attgenerated here because the value will
be overwritten if pubviaroot is true anyway. Also, the get_attgenerated()
is not cheap.

2.

I think the patch missed to check the case when table is marked REPLICA
IDENTITY FULL, and generated column is not published:

CREATE TABLE testpub_gencol (a INT, b INT GENERATED ALWAYS AS (a + 1) STORED NOT NULL);
ALTER TABLE testpub_gencol REPLICA IDENTITY FULL;
CREATE PUBLICATION pub_gencol FOR TABLE testpub_gencol;
UPDATE testpub_gencol SET a = 2;

I expected the UPDATE to fail in above case, but it can still pass after applying the patch.

3.

+		 * If the publication is FOR ALL TABLES we can skip the validation.
+		 */

This comment seems not clear to me, could you elaborate a bit more on this ?

4.

Also, I think the patch does not handle the FOR ALL TABLE case correctly:

CREATE TABLE testpub_gencol (a INT, b INT GENERATED ALWAYS AS (a + 1) STORED NOT NULL);
CREATE UNIQUE INDEX testpub_gencol_idx ON testpub_gencol (b);
ALTER TABLE testpub_gencol REPLICA IDENTITY USING index testpub_gencol_idx;
CREATE PUBLICATION pub_gencol FOR ALL TABLEs;
UPDATE testpub_gencol SET a = 2;

I expected the UPDATE to fail in above case as well.

5.

+	else if (cmd == CMD_UPDATE && !pubdesc.replident_has_valid_gen_cols)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
+				 errmsg("cannot update table \"%s\"",
+						RelationGetRelationName(rel)),
+				 errdetail("REPLICA IDENTITY consists of an unpublished generated column.")));

I think it would be better to use lower case "replica identity" to consistent
with other existing messages.

Best Regards,
Hou zj

#13Alvaro Herrera
alvherre@alvh.no-ip.org
In reply to: Zhijie Hou (Fujitsu) (#12)
Re: Disallow UPDATE/DELETE on table with unpublished generated column as REPLICA IDENTITY

On 2024-Nov-09, Amit Kapila wrote:

On Fri, Nov 8, 2024 at 5:17 PM Alvaro Herrera <alvherre@alvh.no-ip.org> wrote:

On 2024-Nov-07, Amit Kapila wrote:

BTW, I was thinking as to how to fix it on back branches and it seems
we should restrict to define REPLICA IDENTITY on stored generated
columns in the first place in back branches as those can't be
replicated.

I think a blanket restriction of this sort is not a good idea (at least
in back branches), because there might be people using replica
identities with stacks other than pgoutput.

Do you mean to say that people using plugins other than pgoutput may
already be sending generated columns, so defining replica identity
should be okay for them?

Yes.

If we don't want to add a restriction to not create replica identity
on generated columns then I think the solution similar to HEAD should
be okay which is to restrict UPDATE/DELETE in such cases.

Hmm, I don't know about this. Maybe nobody cares, but I'm uneasy about
it. I'm wondering about hypothetical cases where people is already
using this combination of features in stable branches, without pgoutput.
I think it's not great to add restrictions that didn't exist when they
upgraded to some stable branch. In branch master it's probably okay,
because they'll have to test before upgrading and they'll realize the
problem and have the chance to adjust (or complain) before calling the
upgrade good. But if we do that for stable branches, we'd deprive them
of the ability to do minor upgrades, which would be Not Good.

So, another option is to do nothing for stable branches.

Would it work to enforce the restriction when such a table is added
to a publication?

But what if somebody defines REPLICA IDENTITY on the generated column
after adding the table to the publication?

Well, maybe we can restrict the change of REPLICA IDENTITY if the table
is already in a pgoutput publication?

On 2024-Nov-12, Amit Kapila wrote:

Also, another point against restricting defining REPLICA IDENTITY on
generated columns is that we do allow generated columns to be PRIMARY
KEY which is a DEFAULT for REPLICA IDENTITY, so that also needs to be
restricted. That won't be a good idea.

Oh, that's a good point too.

It's not clear to me why doesn't pgoutput cope with generated columns in
replica identities. Maybe that can be reconsidered?

--
Álvaro Herrera 48°01'N 7°57'E — https://www.EnterpriseDB.com/
"La persona que no quería pecar / estaba obligada a sentarse
en duras y empinadas sillas / desprovistas, por cierto
de blandos atenuantes" (Patricio Vogel)

#14Amit Kapila
amit.kapila16@gmail.com
In reply to: Alvaro Herrera (#13)
Re: Disallow UPDATE/DELETE on table with unpublished generated column as REPLICA IDENTITY

On Tue, Nov 12, 2024 at 2:15 PM Alvaro Herrera <alvherre@alvh.no-ip.org> wrote:

On 2024-Nov-09, Amit Kapila wrote:

On Fri, Nov 8, 2024 at 5:17 PM Alvaro Herrera <alvherre@alvh.no-ip.org> wrote:

On 2024-Nov-07, Amit Kapila wrote:

BTW, I was thinking as to how to fix it on back branches and it seems
we should restrict to define REPLICA IDENTITY on stored generated
columns in the first place in back branches as those can't be
replicated.

I think a blanket restriction of this sort is not a good idea (at least
in back branches), because there might be people using replica
identities with stacks other than pgoutput.

Do you mean to say that people using plugins other than pgoutput may
already be sending generated columns, so defining replica identity
should be okay for them?

Yes.

If we don't want to add a restriction to not create replica identity
on generated columns then I think the solution similar to HEAD should
be okay which is to restrict UPDATE/DELETE in such cases.

Hmm, I don't know about this. Maybe nobody cares, but I'm uneasy about
it. I'm wondering about hypothetical cases where people is already
using this combination of features in stable branches, without pgoutput.
I think it's not great to add restrictions that didn't exist when they
upgraded to some stable branch. In branch master it's probably okay,
because they'll have to test before upgrading and they'll realize the
problem and have the chance to adjust (or complain) before calling the
upgrade good. But if we do that for stable branches, we'd deprive them
of the ability to do minor upgrades, which would be Not Good.

So, another option is to do nothing for stable branches.

Fair enough. The other point in favor of that option is that nobody
has reported this problem yet but my guess is that they would have
probably not used such a combination at least with pgoutput plugin
otherwise, they would have faced the ERRORs on the subscriber. So, we
can do this only for HEAD and decide on the fix if anyone ever reports
this problem.

Would it work to enforce the restriction when such a table is added
to a publication?

But what if somebody defines REPLICA IDENTITY on the generated column
after adding the table to the publication?

Well, maybe we can restrict the change of REPLICA IDENTITY if the table
is already in a pgoutput publication?

What about the PRIMARY KEY case as shared in my later email? Even
apart from that the plugin is decided via slot, so we won't be able to
detect from table<->publication relationship.

On 2024-Nov-12, Amit Kapila wrote:

Also, another point against restricting defining REPLICA IDENTITY on
generated columns is that we do allow generated columns to be PRIMARY
KEY which is a DEFAULT for REPLICA IDENTITY, so that also needs to be
restricted. That won't be a good idea.

Oh, that's a good point too.

It's not clear to me why doesn't pgoutput cope with generated columns in
replica identities. Maybe that can be reconsidered?

In stable branches, we intentionally skip publishing generated columns
as we assumed that the subscriber side also had a generated column.
So, sending it would be a waste of network bandwidth. OTOH, when one
tries to replicate the changes to some other database that didn't have
the generated columns concept, it would create a problem. So we
developed a new feature for HEAD as part of commits 745217a051 and
7054186c4e which allows the publication of generated columns when
explicitly specified by the users.

--
With Regards,
Amit Kapila.

#15Alvaro Herrera
alvherre@alvh.no-ip.org
In reply to: Amit Kapila (#14)
Re: Disallow UPDATE/DELETE on table with unpublished generated column as REPLICA IDENTITY

On 2024-Nov-12, Amit Kapila wrote:

On Tue, Nov 12, 2024 at 2:15 PM Alvaro Herrera <alvherre@alvh.no-ip.org> wrote:

So, another option is to do nothing for stable branches.

Fair enough. The other point in favor of that option is that nobody
has reported this problem yet but my guess is that they would have
probably not used such a combination at least with pgoutput plugin
otherwise, they would have faced the ERRORs on the subscriber. So, we
can do this only for HEAD and decide on the fix if anyone ever reports
this problem.

Right.

Well, maybe we can restrict the change of REPLICA IDENTITY if the table
is already in a pgoutput publication?

What about the PRIMARY KEY case as shared in my later email? Even
apart from that the plugin is decided via slot, so we won't be able to
detect from table<->publication relationship.

I responded to both emails together, my response is what you quoted
below:

On 2024-Nov-12, Amit Kapila wrote:

Also, another point against restricting defining REPLICA IDENTITY on
generated columns is that we do allow generated columns to be PRIMARY
KEY which is a DEFAULT for REPLICA IDENTITY, so that also needs to be
restricted. That won't be a good idea.

Oh, that's a good point too.

(I was acknowledging this as a problem case.)

It's not clear to me why doesn't pgoutput cope with generated columns in
replica identities. Maybe that can be reconsidered?

In stable branches, we intentionally skip publishing generated columns
as we assumed that the subscriber side also had a generated column.
So, sending it would be a waste of network bandwidth. OTOH, when one
tries to replicate the changes to some other database that didn't have
the generated columns concept, it would create a problem. So we
developed a new feature for HEAD as part of commits 745217a051 and
7054186c4e which allows the publication of generated columns when
explicitly specified by the users.

Ah, I think it's good then, we don't need to do anything further on
this. It's just not supported on earlier branches (and it doesn't work
with pgoutput, though it does with other plugins); and master has a
mechanism for it to work with any output plugin.

--
Álvaro Herrera Breisgau, Deutschland — https://www.EnterpriseDB.com/

#16Amit Kapila
amit.kapila16@gmail.com
In reply to: Alvaro Herrera (#15)
Re: Disallow UPDATE/DELETE on table with unpublished generated column as REPLICA IDENTITY

On Tue, Nov 12, 2024 at 5:37 PM Alvaro Herrera <alvherre@alvh.no-ip.org> wrote:

On 2024-Nov-12, Amit Kapila wrote:

It's not clear to me why doesn't pgoutput cope with generated columns in
replica identities. Maybe that can be reconsidered?

In stable branches, we intentionally skip publishing generated columns
as we assumed that the subscriber side also had a generated column.
So, sending it would be a waste of network bandwidth. OTOH, when one
tries to replicate the changes to some other database that didn't have
the generated columns concept, it would create a problem. So we
developed a new feature for HEAD as part of commits 745217a051 and
7054186c4e which allows the publication of generated columns when
explicitly specified by the users.

Ah, I think it's good then, we don't need to do anything further on
this. It's just not supported on earlier branches (and it doesn't work
with pgoutput, though it does with other plugins); and master has a
mechanism for it to work with any output plugin.

I think we still need a fix for the master for the case when generated
columns are not published but are part of REPLICA IDENTITY as that
could lead to failures in applying UPDATE and DELETE on subscriber.
Am, I missing something?

--
With Regards,
Amit Kapila.

#17Alvaro Herrera
alvherre@alvh.no-ip.org
In reply to: Amit Kapila (#16)
Re: Disallow UPDATE/DELETE on table with unpublished generated column as REPLICA IDENTITY

On 2024-Nov-12, Amit Kapila wrote:

I think we still need a fix for the master for the case when generated
columns are not published but are part of REPLICA IDENTITY as that
could lead to failures in applying UPDATE and DELETE on subscriber.

Ah, I thought that was already in place.

Am, I missing something?

Nope, it's me who was missing something.

--
Álvaro Herrera Breisgau, Deutschland — https://www.EnterpriseDB.com/
"Use it up, wear it out, make it do, or do without"

#18Shlok Kyal
shlok.kyal.oss@gmail.com
In reply to: Zhijie Hou (Fujitsu) (#12)
1 attachment(s)
Re: Disallow UPDATE/DELETE on table with unpublished generated column as REPLICA IDENTITY

Thanks for providing the comments.

On Tue, 12 Nov 2024 at 12:52, Zhijie Hou (Fujitsu)
<houzj.fnst@fujitsu.com> wrote:

On Friday, November 8, 2024 7:06 PM Shlok Kyal <shlok.kyal.oss@gmail.com> wrote:

Hi Amit,

On Thu, 7 Nov 2024 at 11:37, Amit Kapila <amit.kapila16@gmail.com> wrote:

On Tue, Nov 5, 2024 at 12:53 PM Shlok Kyal <shlok.kyal.oss@gmail.com>

wrote:

To avoid the issue, we can disallow UPDATE/DELETE on table with
unpublished generated column as REPLICA IDENTITY. I have attached a
patch for the same.

+CREATE PUBLICATION pub_gencol FOR TABLE testpub_gencol; UPDATE
+testpub_gencol SET a = 100 WHERE a = 1;
+ERROR:  cannot update table "testpub_gencol"
+DETAIL:  Column list used by the publication does not cover the
replica identity.

This is not a correct ERROR message as the publication doesn't have
any column list associated with it. You have added the code to detect
this in the column list code path which I think is not required. BTW,
you also need to consider the latest commit 7054186c4e for this. I
guess you need to keep another flag in PublicationDesc to detect this
and then give an appropriate ERROR.

I have addressed the comments and provided an updated patch. Also, I am
currently working to fix this issue in back branches.

Thanks for the patch. I am reviewing it and have some initial comments:

1.
+                       char attgenerated = get_attgenerated(relid, attnum);
+

I think it's unnecessary to initialize attgenerated here because the value will
be overwritten if pubviaroot is true anyway. Also, the get_attgenerated()
is not cheap.

Fixed

2.

I think the patch missed to check the case when table is marked REPLICA
IDENTITY FULL, and generated column is not published:

CREATE TABLE testpub_gencol (a INT, b INT GENERATED ALWAYS AS (a + 1) STORED NOT NULL);
ALTER TABLE testpub_gencol REPLICA IDENTITY FULL;
CREATE PUBLICATION pub_gencol FOR TABLE testpub_gencol;
UPDATE testpub_gencol SET a = 2;

I expected the UPDATE to fail in above case, but it can still pass after applying the patch.

Fixed

3.

+                * If the publication is FOR ALL TABLES we can skip the validation.
+                */

This comment seems not clear to me, could you elaborate a bit more on this ?

I missed to handle the case FOR ALL TABLES. Have removed the comment.

4.

Also, I think the patch does not handle the FOR ALL TABLE case correctly:

CREATE TABLE testpub_gencol (a INT, b INT GENERATED ALWAYS AS (a + 1) STORED NOT NULL);
CREATE UNIQUE INDEX testpub_gencol_idx ON testpub_gencol (b);
ALTER TABLE testpub_gencol REPLICA IDENTITY USING index testpub_gencol_idx;
CREATE PUBLICATION pub_gencol FOR ALL TABLEs;
UPDATE testpub_gencol SET a = 2;

I expected the UPDATE to fail in above case as well.

Fixed

5.

+       else if (cmd == CMD_UPDATE && !pubdesc.replident_has_valid_gen_cols)
+               ereport(ERROR,
+                               (errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
+                                errmsg("cannot update table \"%s\"",
+                                               RelationGetRelationName(rel)),
+                                errdetail("REPLICA IDENTITY consists of an unpublished generated column.")));

I think it would be better to use lower case "replica identity" to consistent
with other existing messages.

Fixed

I have attached the updated patch here.

Thanks and Regards,
Shlok Kyal

Attachments:

v3-0001-Disallow-UPDATE-DELETE-on-table-with-generated-co.patchapplication/octet-stream; name=v3-0001-Disallow-UPDATE-DELETE-on-table-with-generated-co.patchDownload
From 0292ed3510a676dfd6f5f77360008de30af16475 Mon Sep 17 00:00:00 2001
From: Shlok Kyal <shlok.kyal.oss@gmail.com>
Date: Tue, 5 Nov 2024 11:11:25 +0530
Subject: [PATCH v3] Disallow UPDATE/DELETE on table with generated column as
 REPLICA IDENTITY

UPDATE/DELETE on table having unpublished generated column as REPLICA
IDENTITY is allowed. UPDATE/DELETE on such tables should not be allowed
---
 src/backend/commands/publicationcmds.c    | 117 ++++++++++++++++++++++
 src/backend/executor/execReplication.c    |  12 +++
 src/backend/utils/cache/relcache.c        |  14 +++
 src/include/catalog/pg_publication.h      |   6 ++
 src/include/commands/publicationcmds.h    |   2 +
 src/test/regress/expected/publication.out |  32 ++++++
 src/test/regress/sql/publication.sql      |  29 ++++++
 src/test/subscription/t/100_bugs.pl       |  16 +--
 8 files changed, 215 insertions(+), 13 deletions(-)

diff --git a/src/backend/commands/publicationcmds.c b/src/backend/commands/publicationcmds.c
index 0129db18c6..21ed6344ee 100644
--- a/src/backend/commands/publicationcmds.c
+++ b/src/backend/commands/publicationcmds.c
@@ -444,6 +444,123 @@ pub_collist_contains_invalid_column(Oid pubid, Relation relation, List *ancestor
 	return result;
 }
 
+/*
+ * Check if REPLICA IDENTITY consists of any unpublished generated column.
+ *
+ * Returns true if any replica identity column is an unpublished generated column.
+ */
+bool
+replident_has_unpublished_gen_col(Oid pubid, Relation relation, List *ancestors,
+								  bool pubviaroot, bool puballtables)
+{
+	HeapTuple	tuple;
+	Oid			relid = RelationGetRelid(relation);
+	Oid			publish_as_relid = RelationGetRelid(relation);
+	bool		result = false;
+	bool		isnull;
+
+	/*
+	 * For a partition, if pubviaroot is true, find the topmost ancestor that
+	 * is published via this publication as we need to use its column list for
+	 * the changes.
+	 *
+	 * Note that even though the column list used is for an ancestor, the
+	 * REPLICA IDENTITY used will be for the actual child table.
+	 */
+	if (pubviaroot && relation->rd_rel->relispartition)
+	{
+		publish_as_relid = GetTopMostAncestorInPublication(pubid, ancestors, NULL);
+
+		if (!OidIsValid(publish_as_relid))
+			publish_as_relid = relid;
+	}
+
+	if (!puballtables)
+	{
+		tuple = SearchSysCache2(PUBLICATIONRELMAP,
+								ObjectIdGetDatum(publish_as_relid),
+								ObjectIdGetDatum(pubid));
+
+		if (!HeapTupleIsValid(tuple))
+			return false;
+
+		(void) SysCacheGetAttr(PUBLICATIONRELMAP, tuple,
+							   Anum_pg_publication_rel_prattrs,
+							   &isnull);
+
+		ReleaseSysCache(tuple);
+	}
+
+	if(puballtables || isnull)
+	{
+		int			x;
+		Bitmapset  *idattrs = NULL;
+
+		/*
+		 * REPLICA IDENTITY can be FULL only if there is no column list for
+		 * publication. If REPLICA IDENTITY is set as FULL and relation has a
+		 * generated column we should error out.
+		 */
+		if(relation->rd_rel->relreplident == REPLICA_IDENTITY_FULL &&
+		   relation->rd_att->constr && relation->rd_att->constr->has_generated_stored)
+			result = true;
+
+		idattrs = RelationGetIndexAttrBitmap(relation,
+											 INDEX_ATTR_BITMAP_IDENTITY_KEY);
+
+		x = -1;
+
+		/*
+		 * Check if any REPLICA IDENTITY column is an generated column.
+		 */
+		while ((x = bms_next_member(idattrs, x)) >= 0)
+		{
+			AttrNumber	attnum = (x + FirstLowInvalidHeapAttributeNumber);
+			char attgenerated;
+
+			/*
+			 * If pubviaroot is true, we are validating the column list of the
+			 * parent table, but the bitmap contains the replica identity
+			 * information of the child table. The parent/child attnums may
+			 * not match, so translate them to the parent - get the attname
+			 * from the child, and look it up in the parent.
+			 */
+			if (pubviaroot)
+			{
+				/* attribute name in the child table */
+				char	   *colname = get_attname(relid, attnum, false);
+
+				/*
+				 * Determine the attnum for the attribute name in parent (we
+				 * are using the column list defined on the parent).
+				 */
+				attnum = get_attnum(publish_as_relid, colname);
+				attgenerated = get_attgenerated(publish_as_relid, attnum);
+			}
+			else
+				attgenerated = get_attgenerated(relid, attnum);
+
+			/*
+			 * Check if the column is a generated column.
+			 *
+			 * 'publish_generated_columns = false' and no column list is
+			 * specified for publication. So if the column is a generated
+			 * column, this implies that the REPLICA IDENTITY consists an
+			 * unpublished generated column.
+			 */
+			if (attgenerated == ATTRIBUTE_GENERATED_STORED)
+			{
+				result = true;
+				break;
+			}
+		}
+
+		bms_free(idattrs);
+	}
+
+	return result;
+}
+
 /* check_functions_in_node callback */
 static bool
 contain_mutable_or_user_functions_checker(Oid func_id, void *context)
diff --git a/src/backend/executor/execReplication.c b/src/backend/executor/execReplication.c
index 54025c9f15..d3517db8b3 100644
--- a/src/backend/executor/execReplication.c
+++ b/src/backend/executor/execReplication.c
@@ -809,6 +809,12 @@ CheckCmdReplicaIdentity(Relation rel, CmdType cmd)
 				 errmsg("cannot update table \"%s\"",
 						RelationGetRelationName(rel)),
 				 errdetail("Column list used by the publication does not cover the replica identity.")));
+	else if (cmd == CMD_UPDATE && !pubdesc.replident_has_valid_gen_cols)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
+				 errmsg("cannot update table \"%s\"",
+						RelationGetRelationName(rel)),
+				 errdetail("replica identity consists of an unpublished generated column.")));
 	else if (cmd == CMD_DELETE && !pubdesc.rf_valid_for_delete)
 		ereport(ERROR,
 				(errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
@@ -821,6 +827,12 @@ CheckCmdReplicaIdentity(Relation rel, CmdType cmd)
 				 errmsg("cannot delete from table \"%s\"",
 						RelationGetRelationName(rel)),
 				 errdetail("Column list used by the publication does not cover the replica identity.")));
+	else if (cmd == CMD_DELETE && !pubdesc.replident_has_valid_gen_cols)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
+				 errmsg("cannot delete from table \"%s\"",
+						RelationGetRelationName(rel)),
+				 errdetail("replica identity consists of an unpublished generated column.")));
 
 	/* If relation has replica identity we are always good. */
 	if (OidIsValid(RelationGetReplicaIndex(rel)))
diff --git a/src/backend/utils/cache/relcache.c b/src/backend/utils/cache/relcache.c
index 342467fd18..87e1c12bac 100644
--- a/src/backend/utils/cache/relcache.c
+++ b/src/backend/utils/cache/relcache.c
@@ -5736,6 +5736,7 @@ RelationBuildPublicationDesc(Relation relation, PublicationDesc *pubdesc)
 		pubdesc->rf_valid_for_delete = true;
 		pubdesc->cols_valid_for_update = true;
 		pubdesc->cols_valid_for_delete = true;
+		pubdesc->replident_has_valid_gen_cols = true;
 		return;
 	}
 
@@ -5750,6 +5751,7 @@ RelationBuildPublicationDesc(Relation relation, PublicationDesc *pubdesc)
 	pubdesc->rf_valid_for_delete = true;
 	pubdesc->cols_valid_for_update = true;
 	pubdesc->cols_valid_for_delete = true;
+	pubdesc->replident_has_valid_gen_cols = true;
 
 	/* Fetch the publication membership info. */
 	puboids = GetRelationPublications(relid);
@@ -5827,6 +5829,18 @@ RelationBuildPublicationDesc(Relation relation, PublicationDesc *pubdesc)
 				pubdesc->cols_valid_for_delete = false;
 		}
 
+		/*
+		 * Check if all columns which are part of the REPLICA IDENTITY is
+		 * published.
+		 */
+		if (!pubform->pubgencols &&
+			(pubform->pubupdate || pubform->pubdelete) &&
+			replident_has_unpublished_gen_col(pubid, relation, ancestors,
+											  pubform->pubviaroot, pubform->puballtables))
+		{
+			pubdesc->replident_has_valid_gen_cols = false;
+		}
+
 		ReleaseSysCache(tup);
 
 		/*
diff --git a/src/include/catalog/pg_publication.h b/src/include/catalog/pg_publication.h
index 9a83a72d6b..5eeeced91d 100644
--- a/src/include/catalog/pg_publication.h
+++ b/src/include/catalog/pg_publication.h
@@ -98,6 +98,12 @@ typedef struct PublicationDesc
 	 */
 	bool		cols_valid_for_update;
 	bool		cols_valid_for_delete;
+
+	/*
+	 * true if all generated columns which are part of replica identity are
+	 * published.
+	 */
+	bool		replident_has_valid_gen_cols;
 } PublicationDesc;
 
 typedef struct Publication
diff --git a/src/include/commands/publicationcmds.h b/src/include/commands/publicationcmds.h
index 5487c571f6..23153afd8a 100644
--- a/src/include/commands/publicationcmds.h
+++ b/src/include/commands/publicationcmds.h
@@ -35,5 +35,7 @@ extern bool pub_rf_contains_invalid_column(Oid pubid, Relation relation,
 										   List *ancestors, bool pubviaroot);
 extern bool pub_collist_contains_invalid_column(Oid pubid, Relation relation,
 												List *ancestors, bool pubviaroot);
+extern bool replident_has_unpublished_gen_col(Oid pubid, Relation relation,
+												List *ancestors, bool pubviaroot, bool puballtables);
 
 #endif							/* PUBLICATIONCMDS_H */
diff --git a/src/test/regress/expected/publication.out b/src/test/regress/expected/publication.out
index 5de2d64d01..fe17dda5c9 100644
--- a/src/test/regress/expected/publication.out
+++ b/src/test/regress/expected/publication.out
@@ -672,6 +672,38 @@ DROP TABLE rf_tbl_abcd_pk;
 DROP TABLE rf_tbl_abcd_nopk;
 DROP TABLE rf_tbl_abcd_part_pk;
 -- ======================================================
+-- ======================================================
+-- test with generated column
+SET client_min_messages = 'ERROR';
+CREATE TABLE testpub_gencol (a INT, b INT GENERATED ALWAYS AS (a + 1) STORED NOT NULL);
+CREATE UNIQUE INDEX testpub_gencol_idx ON testpub_gencol (b);
+ALTER TABLE testpub_gencol REPLICA IDENTITY USING index testpub_gencol_idx;
+-- error - generated column "b" is not published but part of index set as REPLICA IDENTITY
+CREATE PUBLICATION pub_gencol FOR TABLE testpub_gencol;
+UPDATE testpub_gencol SET a = 100 WHERE a = 1;
+ERROR:  cannot update table "testpub_gencol"
+DETAIL:  replica identity consists of an unpublished generated column.
+DELETE FROM testpub_gencol WHERE a = 100;
+ERROR:  cannot delete from table "testpub_gencol"
+DETAIL:  replica identity consists of an unpublished generated column.
+-- error - generated column "b" is not published and REPLICA IDENTITY is set FULL
+ALTER TABLE testpub_gencol REPLICA IDENTITY FULL;
+UPDATE testpub_gencol SET a = 100 WHERE a = 1;
+ERROR:  cannot update table "testpub_gencol"
+DETAIL:  replica identity consists of an unpublished generated column.
+DELETE FROM testpub_gencol WHERE a = 100;
+ERROR:  cannot delete from table "testpub_gencol"
+DETAIL:  replica identity consists of an unpublished generated column.
+DROP PUBLICATION pub_gencol;
+-- ok - generated column "b" is published and is part of REPLICA IDENTITY
+CREATE PUBLICATION pub_gencol FOR TABLE testpub_gencol with (publish_generated_columns = true);
+UPDATE testpub_gencol SET a = 100 WHERE a = 1;
+DELETE FROM testpub_gencol WHERE a = 100;
+DROP PUBLICATION pub_gencol;
+DROP INDEX testpub_gencol_idx;
+DROP TABLE testpub_gencol;
+RESET client_min_messages;
+-- ======================================================
 -- fail - duplicate tables are not allowed if that table has any column lists
 SET client_min_messages = 'ERROR';
 CREATE PUBLICATION testpub_dups FOR TABLE testpub_tbl1 (a), testpub_tbl1 WITH (publish = 'insert');
diff --git a/src/test/regress/sql/publication.sql b/src/test/regress/sql/publication.sql
index 48e68bcca2..edf71e5325 100644
--- a/src/test/regress/sql/publication.sql
+++ b/src/test/regress/sql/publication.sql
@@ -396,6 +396,35 @@ DROP TABLE rf_tbl_abcd_nopk;
 DROP TABLE rf_tbl_abcd_part_pk;
 -- ======================================================
 
+-- ======================================================
+-- test with generated column
+SET client_min_messages = 'ERROR';
+CREATE TABLE testpub_gencol (a INT, b INT GENERATED ALWAYS AS (a + 1) STORED NOT NULL);
+CREATE UNIQUE INDEX testpub_gencol_idx ON testpub_gencol (b);
+ALTER TABLE testpub_gencol REPLICA IDENTITY USING index testpub_gencol_idx;
+
+-- error - generated column "b" is not published but part of index set as REPLICA IDENTITY
+CREATE PUBLICATION pub_gencol FOR TABLE testpub_gencol;
+UPDATE testpub_gencol SET a = 100 WHERE a = 1;
+DELETE FROM testpub_gencol WHERE a = 100;
+
+-- error - generated column "b" is not published and REPLICA IDENTITY is set FULL
+ALTER TABLE testpub_gencol REPLICA IDENTITY FULL;
+UPDATE testpub_gencol SET a = 100 WHERE a = 1;
+DELETE FROM testpub_gencol WHERE a = 100;
+DROP PUBLICATION pub_gencol;
+
+-- ok - generated column "b" is published and is part of REPLICA IDENTITY
+CREATE PUBLICATION pub_gencol FOR TABLE testpub_gencol with (publish_generated_columns = true);
+UPDATE testpub_gencol SET a = 100 WHERE a = 1;
+DELETE FROM testpub_gencol WHERE a = 100;
+DROP PUBLICATION pub_gencol;
+
+DROP INDEX testpub_gencol_idx;
+DROP TABLE testpub_gencol;
+RESET client_min_messages;
+-- ======================================================
+
 -- fail - duplicate tables are not allowed if that table has any column lists
 SET client_min_messages = 'ERROR';
 CREATE PUBLICATION testpub_dups FOR TABLE testpub_tbl1 (a), testpub_tbl1 WITH (publish = 'insert');
diff --git a/src/test/subscription/t/100_bugs.pl b/src/test/subscription/t/100_bugs.pl
index cb36ca7b16..794b928f50 100644
--- a/src/test/subscription/t/100_bugs.pl
+++ b/src/test/subscription/t/100_bugs.pl
@@ -377,8 +377,8 @@ $node_publisher->safe_psql('postgres', "DROP PUBLICATION tap_pub_sch");
 $node_publisher->stop('fast');
 $node_subscriber->stop('fast');
 
-# The bug was that when the REPLICA IDENTITY FULL is used with dropped or
-# generated columns, we fail to apply updates and deletes
+# The bug was that when the REPLICA IDENTITY FULL is used with dropped
+# we fail to apply updates and deletes
 $node_publisher->rotate_logfile();
 $node_publisher->start();
 
@@ -389,18 +389,14 @@ $node_publisher->safe_psql(
 	'postgres', qq(
 	CREATE TABLE dropped_cols (a int, b_drop int, c int);
 	ALTER TABLE dropped_cols REPLICA IDENTITY FULL;
-	CREATE TABLE generated_cols (a int, b_gen int GENERATED ALWAYS AS (5 * a) STORED, c int);
-	ALTER TABLE generated_cols REPLICA IDENTITY FULL;
-	CREATE PUBLICATION pub_dropped_cols FOR TABLE dropped_cols, generated_cols;
+	CREATE PUBLICATION pub_dropped_cols FOR TABLE dropped_cols;
 	-- some initial data
 	INSERT INTO dropped_cols VALUES (1, 1, 1);
-	INSERT INTO generated_cols (a, c) VALUES (1, 1);
 ));
 
 $node_subscriber->safe_psql(
 	'postgres', qq(
 	 CREATE TABLE dropped_cols (a int, b_drop int, c int);
-	 CREATE TABLE generated_cols (a int, b_gen int GENERATED ALWAYS AS (5 * a) STORED, c int);
 ));
 
 $publisher_connstr = $node_publisher->connstr . ' dbname=postgres';
@@ -421,7 +417,6 @@ $node_subscriber->safe_psql(
 $node_publisher->safe_psql(
 	'postgres', qq(
 		UPDATE dropped_cols SET a = 100;
-		UPDATE generated_cols SET a = 100;
 ));
 $node_publisher->wait_for_catchup('sub_dropped_cols');
 
@@ -430,11 +425,6 @@ is( $node_subscriber->safe_psql(
 	qq(1),
 	'replication with RI FULL and dropped columns');
 
-is( $node_subscriber->safe_psql(
-		'postgres', "SELECT count(*) FROM generated_cols WHERE a = 100"),
-	qq(1),
-	'replication with RI FULL and generated columns');
-
 $node_publisher->stop('fast');
 $node_subscriber->stop('fast');
 
-- 
2.34.1

#19Amit Kapila
amit.kapila16@gmail.com
In reply to: Alvaro Herrera (#17)
Re: Disallow UPDATE/DELETE on table with unpublished generated column as REPLICA IDENTITY

On Tue, Nov 12, 2024 at 7:05 PM Alvaro Herrera <alvherre@alvh.no-ip.org> wrote:

On 2024-Nov-12, Amit Kapila wrote:

I think we still need a fix for the master for the case when generated
columns are not published but are part of REPLICA IDENTITY as that
could lead to failures in applying UPDATE and DELETE on subscriber.

Ah, I thought that was already in place.

No, we left it with the thought that we needed something for it in the
back branches as well. But now that we have decided not to do anything
for the back branches, we should fix it in HEAD.

Am, I missing something?

Nope, it's me who was missing something.

No problem, thanks for all the feedback and helping us to conclude.

--
With Regards,
Amit Kapila.

#20vignesh C
vignesh21@gmail.com
In reply to: Shlok Kyal (#18)
Re: Disallow UPDATE/DELETE on table with unpublished generated column as REPLICA IDENTITY

On Wed, 13 Nov 2024 at 11:15, Shlok Kyal <shlok.kyal.oss@gmail.com> wrote:

Thanks for providing the comments.

On Tue, 12 Nov 2024 at 12:52, Zhijie Hou (Fujitsu)
<houzj.fnst@fujitsu.com> wrote:

On Friday, November 8, 2024 7:06 PM Shlok Kyal <shlok.kyal.oss@gmail.com> wrote:

Hi Amit,

On Thu, 7 Nov 2024 at 11:37, Amit Kapila <amit.kapila16@gmail.com> wrote:

On Tue, Nov 5, 2024 at 12:53 PM Shlok Kyal <shlok.kyal.oss@gmail.com>

wrote:

To avoid the issue, we can disallow UPDATE/DELETE on table with
unpublished generated column as REPLICA IDENTITY. I have attached a
patch for the same.

+CREATE PUBLICATION pub_gencol FOR TABLE testpub_gencol; UPDATE
+testpub_gencol SET a = 100 WHERE a = 1;
+ERROR:  cannot update table "testpub_gencol"
+DETAIL:  Column list used by the publication does not cover the
replica identity.

This is not a correct ERROR message as the publication doesn't have
any column list associated with it. You have added the code to detect
this in the column list code path which I think is not required. BTW,
you also need to consider the latest commit 7054186c4e for this. I
guess you need to keep another flag in PublicationDesc to detect this
and then give an appropriate ERROR.

I have addressed the comments and provided an updated patch. Also, I am
currently working to fix this issue in back branches.

Thanks for the patch. I am reviewing it and have some initial comments:

1.
+                       char attgenerated = get_attgenerated(relid, attnum);
+

I think it's unnecessary to initialize attgenerated here because the value will
be overwritten if pubviaroot is true anyway. Also, the get_attgenerated()
is not cheap.

Fixed

2.

I think the patch missed to check the case when table is marked REPLICA
IDENTITY FULL, and generated column is not published:

CREATE TABLE testpub_gencol (a INT, b INT GENERATED ALWAYS AS (a + 1) STORED NOT NULL);
ALTER TABLE testpub_gencol REPLICA IDENTITY FULL;
CREATE PUBLICATION pub_gencol FOR TABLE testpub_gencol;
UPDATE testpub_gencol SET a = 2;

I expected the UPDATE to fail in above case, but it can still pass after applying the patch.

Fixed

3.

+                * If the publication is FOR ALL TABLES we can skip the validation.
+                */

This comment seems not clear to me, could you elaborate a bit more on this ?

I missed to handle the case FOR ALL TABLES. Have removed the comment.

4.

Also, I think the patch does not handle the FOR ALL TABLE case correctly:

CREATE TABLE testpub_gencol (a INT, b INT GENERATED ALWAYS AS (a + 1) STORED NOT NULL);
CREATE UNIQUE INDEX testpub_gencol_idx ON testpub_gencol (b);
ALTER TABLE testpub_gencol REPLICA IDENTITY USING index testpub_gencol_idx;
CREATE PUBLICATION pub_gencol FOR ALL TABLEs;
UPDATE testpub_gencol SET a = 2;

I expected the UPDATE to fail in above case as well.

Fixed

5.

+       else if (cmd == CMD_UPDATE && !pubdesc.replident_has_valid_gen_cols)
+               ereport(ERROR,
+                               (errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
+                                errmsg("cannot update table \"%s\"",
+                                               RelationGetRelationName(rel)),
+                                errdetail("REPLICA IDENTITY consists of an unpublished generated column.")));

I think it would be better to use lower case "replica identity" to consistent
with other existing messages.

Fixed

I have attached the updated patch here.

Few comments:
1) In the first check relation->rd_rel->relispartition also is checked
whereas in the below it is not checked, shouldn't the same check be
there below to avoid few of the function calls which are not required:
+       if (pubviaroot && relation->rd_rel->relispartition)
+       {
+               publish_as_relid =
GetTopMostAncestorInPublication(pubid, ancestors, NULL);
+
+               if (!OidIsValid(publish_as_relid))
+                       publish_as_relid = relid;
+       }
+
+                       if (pubviaroot)
+                       {
+                               /* attribute name in the child table */
+                               char       *colname =
get_attname(relid, attnum, false);
+
+                               /*
+                                * Determine the attnum for the
attribute name in parent (we
+                                * are using the column list defined
on the parent).
+                                */
+                               attnum = get_attnum(publish_as_relid, colname);
+                               attgenerated =
get_attgenerated(publish_as_relid, attnum);
+                       }
+                       else
+                               attgenerated = get_attgenerated(relid, attnum);
2) I think we could use check_and_fetch_column_list to see that it is
not a column list publication instead of below code:
+       if (!puballtables)
+       {
+               tuple = SearchSysCache2(PUBLICATIONRELMAP,
+
ObjectIdGetDatum(publish_as_relid),
+
ObjectIdGetDatum(pubid));
+
+               if (!HeapTupleIsValid(tuple))
+                       return false;
+
+               (void) SysCacheGetAttr(PUBLICATIONRELMAP, tuple,
+
Anum_pg_publication_rel_prattrs,
+                                                          &isnull);
+
+               ReleaseSysCache(tuple);
+       }
+
+       if(puballtables || isnull)
3) Since there is only a single statement, remove the enclosing parenthisis:
+               if (!pubform->pubgencols &&
+                       (pubform->pubupdate || pubform->pubdelete) &&
+                       replident_has_unpublished_gen_col(pubid,
relation, ancestors,
+
                   pubform->pubviaroot, pubform->puballtables))
+               {
+                       pubdesc->replident_has_valid_gen_cols = false;
+               }
4) Pgindent should be run there are few issues:
4.a)
+extern bool replident_has_unpublished_gen_col(Oid pubid, Relation relation,
+
                         List *ancestors, bool pubviaroot, bool
puballtables);
4.b)
+       }
+
+       if(puballtables || isnull)
+       {
+               int                     x;
+               Bitmapset  *idattrs = NULL;
4.c)
+                * generated column we should error out.
+                */
+               if(relation->rd_rel->relreplident == REPLICA_IDENTITY_FULL &&
+                  relation->rd_att->constr &&
relation->rd_att->constr->has_generated_stored)
+                       result = true;
4.d)
+               while ((x = bms_next_member(idattrs, x)) >= 0)
+               {
+                       AttrNumber      attnum = (x +
FirstLowInvalidHeapAttributeNumber);
+                       char attgenerated;
5) You could do this in a single line comment:
+               /*
+                * Check if any REPLICA IDENTITY column is an generated column.
+                */
+               while ((x = bms_next_member(idattrs, x)) >= 0)
6) I felt one of update or delete is enough in this case as the code
path is same:
+UPDATE testpub_gencol SET a = 100 WHERE a = 1;
+DELETE FROM testpub_gencol WHERE a = 100;
+
+-- error - generated column "b" is not published and REPLICA IDENTITY
is set FULL
+ALTER TABLE testpub_gencol REPLICA IDENTITY FULL;
+UPDATE testpub_gencol SET a = 100 WHERE a = 1;
+DELETE FROM testpub_gencol WHERE a = 100;
+DROP PUBLICATION pub_gencol;
+
+-- ok - generated column "b" is published and is part of REPLICA IDENTITY
+CREATE PUBLICATION pub_gencol FOR TABLE testpub_gencol with
(publish_generated_columns = true);
+UPDATE testpub_gencol SET a = 100 WHERE a = 1;
+DELETE FROM testpub_gencol WHERE a = 100;

Regards,
Vignesh

#21Shlok Kyal
shlok.kyal.oss@gmail.com
In reply to: vignesh C (#20)
1 attachment(s)
Re: Disallow UPDATE/DELETE on table with unpublished generated column as REPLICA IDENTITY

Thanks for providing the comments.

On Thu, 14 Nov 2024 at 12:22, vignesh C <vignesh21@gmail.com> wrote:

On Wed, 13 Nov 2024 at 11:15, Shlok Kyal <shlok.kyal.oss@gmail.com> wrote:

Thanks for providing the comments.

On Tue, 12 Nov 2024 at 12:52, Zhijie Hou (Fujitsu)
<houzj.fnst@fujitsu.com> wrote:

On Friday, November 8, 2024 7:06 PM Shlok Kyal <shlok.kyal.oss@gmail.com> wrote:

Hi Amit,

On Thu, 7 Nov 2024 at 11:37, Amit Kapila <amit.kapila16@gmail.com> wrote:

On Tue, Nov 5, 2024 at 12:53 PM Shlok Kyal <shlok.kyal.oss@gmail.com>

wrote:

To avoid the issue, we can disallow UPDATE/DELETE on table with
unpublished generated column as REPLICA IDENTITY. I have attached a
patch for the same.

+CREATE PUBLICATION pub_gencol FOR TABLE testpub_gencol; UPDATE
+testpub_gencol SET a = 100 WHERE a = 1;
+ERROR:  cannot update table "testpub_gencol"
+DETAIL:  Column list used by the publication does not cover the
replica identity.

This is not a correct ERROR message as the publication doesn't have
any column list associated with it. You have added the code to detect
this in the column list code path which I think is not required. BTW,
you also need to consider the latest commit 7054186c4e for this. I
guess you need to keep another flag in PublicationDesc to detect this
and then give an appropriate ERROR.

I have addressed the comments and provided an updated patch. Also, I am
currently working to fix this issue in back branches.

Thanks for the patch. I am reviewing it and have some initial comments:

1.
+                       char attgenerated = get_attgenerated(relid, attnum);
+

I think it's unnecessary to initialize attgenerated here because the value will
be overwritten if pubviaroot is true anyway. Also, the get_attgenerated()
is not cheap.

Fixed

2.

I think the patch missed to check the case when table is marked REPLICA
IDENTITY FULL, and generated column is not published:

CREATE TABLE testpub_gencol (a INT, b INT GENERATED ALWAYS AS (a + 1) STORED NOT NULL);
ALTER TABLE testpub_gencol REPLICA IDENTITY FULL;
CREATE PUBLICATION pub_gencol FOR TABLE testpub_gencol;
UPDATE testpub_gencol SET a = 2;

I expected the UPDATE to fail in above case, but it can still pass after applying the patch.

Fixed

3.

+                * If the publication is FOR ALL TABLES we can skip the validation.
+                */

This comment seems not clear to me, could you elaborate a bit more on this ?

I missed to handle the case FOR ALL TABLES. Have removed the comment.

4.

Also, I think the patch does not handle the FOR ALL TABLE case correctly:

CREATE TABLE testpub_gencol (a INT, b INT GENERATED ALWAYS AS (a + 1) STORED NOT NULL);
CREATE UNIQUE INDEX testpub_gencol_idx ON testpub_gencol (b);
ALTER TABLE testpub_gencol REPLICA IDENTITY USING index testpub_gencol_idx;
CREATE PUBLICATION pub_gencol FOR ALL TABLEs;
UPDATE testpub_gencol SET a = 2;

I expected the UPDATE to fail in above case as well.

Fixed

5.

+       else if (cmd == CMD_UPDATE && !pubdesc.replident_has_valid_gen_cols)
+               ereport(ERROR,
+                               (errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
+                                errmsg("cannot update table \"%s\"",
+                                               RelationGetRelationName(rel)),
+                                errdetail("REPLICA IDENTITY consists of an unpublished generated column.")));

I think it would be better to use lower case "replica identity" to consistent
with other existing messages.

Fixed

I have attached the updated patch here.

Few comments:
1) In the first check relation->rd_rel->relispartition also is checked
whereas in the below it is not checked, shouldn't the same check be
there below to avoid few of the function calls which are not required:
+       if (pubviaroot && relation->rd_rel->relispartition)
+       {
+               publish_as_relid =
GetTopMostAncestorInPublication(pubid, ancestors, NULL);
+
+               if (!OidIsValid(publish_as_relid))
+                       publish_as_relid = relid;
+       }
+
+                       if (pubviaroot)
+                       {
+                               /* attribute name in the child table */
+                               char       *colname =
get_attname(relid, attnum, false);
+
+                               /*
+                                * Determine the attnum for the
attribute name in parent (we
+                                * are using the column list defined
on the parent).
+                                */
+                               attnum = get_attnum(publish_as_relid, colname);
+                               attgenerated =
get_attgenerated(publish_as_relid, attnum);
+                       }
+                       else
+                               attgenerated = get_attgenerated(relid, attnum);

I have updated the if condititon

2) I think we could use check_and_fetch_column_list to see that it is
not a column list publication instead of below code:
+       if (!puballtables)
+       {
+               tuple = SearchSysCache2(PUBLICATIONRELMAP,
+
ObjectIdGetDatum(publish_as_relid),
+
ObjectIdGetDatum(pubid));
+
+               if (!HeapTupleIsValid(tuple))
+                       return false;
+
+               (void) SysCacheGetAttr(PUBLICATIONRELMAP, tuple,
+
Anum_pg_publication_rel_prattrs,
+                                                          &isnull);
+
+               ReleaseSysCache(tuple);
+       }
+
+       if(puballtables || isnull)

Yes we can use it. I have updated the patch.

3) Since there is only a single statement, remove the enclosing parenthisis:
+               if (!pubform->pubgencols &&
+                       (pubform->pubupdate || pubform->pubdelete) &&
+                       replident_has_unpublished_gen_col(pubid,
relation, ancestors,
+
pubform->pubviaroot, pubform->puballtables))
+               {
+                       pubdesc->replident_has_valid_gen_cols = false;
+               }

Fixed

4) Pgindent should be run there are few issues:
4.a)
+extern bool replident_has_unpublished_gen_col(Oid pubid, Relation relation,
+
List *ancestors, bool pubviaroot, bool
puballtables);
4.b)
+       }
+
+       if(puballtables || isnull)
+       {
+               int                     x;
+               Bitmapset  *idattrs = NULL;
4.c)
+                * generated column we should error out.
+                */
+               if(relation->rd_rel->relreplident == REPLICA_IDENTITY_FULL &&
+                  relation->rd_att->constr &&
relation->rd_att->constr->has_generated_stored)
+                       result = true;
4.d)
+               while ((x = bms_next_member(idattrs, x)) >= 0)
+               {
+                       AttrNumber      attnum = (x +
FirstLowInvalidHeapAttributeNumber);
+                       char attgenerated;

Fixed

5) You could do this in a single line comment:
+               /*
+                * Check if any REPLICA IDENTITY column is an generated column.
+                */
+               while ((x = bms_next_member(idattrs, x)) >= 0)

Fixed

6) I felt one of update or delete is enough in this case as the code
path is same:
+UPDATE testpub_gencol SET a = 100 WHERE a = 1;
+DELETE FROM testpub_gencol WHERE a = 100;
+
+-- error - generated column "b" is not published and REPLICA IDENTITY
is set FULL
+ALTER TABLE testpub_gencol REPLICA IDENTITY FULL;
+UPDATE testpub_gencol SET a = 100 WHERE a = 1;
+DELETE FROM testpub_gencol WHERE a = 100;
+DROP PUBLICATION pub_gencol;
+
+-- ok - generated column "b" is published and is part of REPLICA IDENTITY
+CREATE PUBLICATION pub_gencol FOR TABLE testpub_gencol with
(publish_generated_columns = true);
+UPDATE testpub_gencol SET a = 100 WHERE a = 1;
+DELETE FROM testpub_gencol WHERE a = 100;

Removed the 'DELETE' case.

I have addressed the comments and updated the patch.

Thanks and Regards,
Shlok Kyal

Attachments:

v4-0001-Disallow-UPDATE-DELETE-on-table-with-generated-co.patchapplication/octet-stream; name=v4-0001-Disallow-UPDATE-DELETE-on-table-with-generated-co.patchDownload
From 1aaa1cf9379ae1961a05f417c7bf6553bf789cf6 Mon Sep 17 00:00:00 2001
From: Shlok Kyal <shlok.kyal.oss@gmail.com>
Date: Tue, 5 Nov 2024 11:11:25 +0530
Subject: [PATCH v4] Disallow UPDATE/DELETE on table with generated column as
 REPLICA IDENTITY

UPDATE/DELETE on table having unpublished generated column as REPLICA
IDENTITY is allowed. UPDATE/DELETE on such tables should not be allowed
---
 src/backend/commands/publicationcmds.c    | 102 ++++++++++++++++++++++
 src/backend/executor/execReplication.c    |  12 +++
 src/backend/utils/cache/relcache.c        |  12 +++
 src/include/catalog/pg_publication.h      |   6 ++
 src/include/commands/publicationcmds.h    |   2 +
 src/test/regress/expected/publication.out |  25 ++++++
 src/test/regress/sql/publication.sql      |  26 ++++++
 src/test/subscription/t/100_bugs.pl       |  16 +---
 8 files changed, 188 insertions(+), 13 deletions(-)

diff --git a/src/backend/commands/publicationcmds.c b/src/backend/commands/publicationcmds.c
index 0129db18c6..a095c61edc 100644
--- a/src/backend/commands/publicationcmds.c
+++ b/src/backend/commands/publicationcmds.c
@@ -444,6 +444,108 @@ pub_collist_contains_invalid_column(Oid pubid, Relation relation, List *ancestor
 	return result;
 }
 
+/*
+ * Check if REPLICA IDENTITY consists of any unpublished generated column.
+ *
+ * Returns true if any replica identity column is an unpublished generated column.
+ */
+bool
+replident_has_unpublished_gen_col(Oid pubid, Relation relation, List *ancestors,
+								  bool pubviaroot)
+{
+	Oid			relid = RelationGetRelid(relation);
+	Oid			publish_as_relid = RelationGetRelid(relation);
+	bool		result = false;
+	bool		found;
+	Publication *pub;
+
+	/*
+	 * For a partition, if pubviaroot is true, find the topmost ancestor that
+	 * is published via this publication as we need to use its column list for
+	 * the changes.
+	 *
+	 * Note that even though the column list used is for an ancestor, the
+	 * REPLICA IDENTITY used will be for the actual child table.
+	 */
+	if (pubviaroot && relation->rd_rel->relispartition)
+	{
+		publish_as_relid = GetTopMostAncestorInPublication(pubid, ancestors, NULL);
+
+		if (!OidIsValid(publish_as_relid))
+			publish_as_relid = relid;
+	}
+
+	pub = GetPublication(pubid);
+	found = check_and_fetch_column_list(pub, publish_as_relid, NULL, NULL);
+
+	if (!found)
+	{
+		int			x;
+		Bitmapset  *idattrs = NULL;
+
+		/*
+		 * REPLICA IDENTITY can be FULL only if there is no column list for
+		 * publication. If REPLICA IDENTITY is set as FULL and relation has a
+		 * generated column we should error out.
+		 */
+		if (relation->rd_rel->relreplident == REPLICA_IDENTITY_FULL &&
+			relation->rd_att->constr && relation->rd_att->constr->has_generated_stored)
+			result = true;
+
+		idattrs = RelationGetIndexAttrBitmap(relation,
+											 INDEX_ATTR_BITMAP_IDENTITY_KEY);
+
+		x = -1;
+
+		/* Check if any REPLICA IDENTITY column is an generated column. */
+		while ((x = bms_next_member(idattrs, x)) >= 0)
+		{
+			AttrNumber	attnum = (x + FirstLowInvalidHeapAttributeNumber);
+			char		attgenerated;
+
+			/*
+			 * If pubviaroot is true, we are validating the column list of the
+			 * parent table, but the bitmap contains the replica identity
+			 * information of the child table. The parent/child attnums may
+			 * not match, so translate them to the parent - get the attname
+			 * from the child, and look it up in the parent.
+			 */
+			if (pubviaroot && relation->rd_rel->relispartition)
+			{
+				/* attribute name in the child table */
+				char	   *colname = get_attname(relid, attnum, false);
+
+				/*
+				 * Determine the attnum for the attribute name in parent (we
+				 * are using the column list defined on the parent).
+				 */
+				attnum = get_attnum(publish_as_relid, colname);
+				attgenerated = get_attgenerated(publish_as_relid, attnum);
+			}
+			else
+				attgenerated = get_attgenerated(relid, attnum);
+
+			/*
+			 * Check if the column is a generated column.
+			 *
+			 * 'publish_generated_columns = false' and no column list is
+			 * specified for publication. So if the column is a generated
+			 * column, this implies that the REPLICA IDENTITY consists an
+			 * unpublished generated column.
+			 */
+			if (attgenerated == ATTRIBUTE_GENERATED_STORED)
+			{
+				result = true;
+				break;
+			}
+		}
+
+		bms_free(idattrs);
+	}
+
+	return result;
+}
+
 /* check_functions_in_node callback */
 static bool
 contain_mutable_or_user_functions_checker(Oid func_id, void *context)
diff --git a/src/backend/executor/execReplication.c b/src/backend/executor/execReplication.c
index 54025c9f15..d3517db8b3 100644
--- a/src/backend/executor/execReplication.c
+++ b/src/backend/executor/execReplication.c
@@ -809,6 +809,12 @@ CheckCmdReplicaIdentity(Relation rel, CmdType cmd)
 				 errmsg("cannot update table \"%s\"",
 						RelationGetRelationName(rel)),
 				 errdetail("Column list used by the publication does not cover the replica identity.")));
+	else if (cmd == CMD_UPDATE && !pubdesc.replident_has_valid_gen_cols)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
+				 errmsg("cannot update table \"%s\"",
+						RelationGetRelationName(rel)),
+				 errdetail("replica identity consists of an unpublished generated column.")));
 	else if (cmd == CMD_DELETE && !pubdesc.rf_valid_for_delete)
 		ereport(ERROR,
 				(errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
@@ -821,6 +827,12 @@ CheckCmdReplicaIdentity(Relation rel, CmdType cmd)
 				 errmsg("cannot delete from table \"%s\"",
 						RelationGetRelationName(rel)),
 				 errdetail("Column list used by the publication does not cover the replica identity.")));
+	else if (cmd == CMD_DELETE && !pubdesc.replident_has_valid_gen_cols)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
+				 errmsg("cannot delete from table \"%s\"",
+						RelationGetRelationName(rel)),
+				 errdetail("replica identity consists of an unpublished generated column.")));
 
 	/* If relation has replica identity we are always good. */
 	if (OidIsValid(RelationGetReplicaIndex(rel)))
diff --git a/src/backend/utils/cache/relcache.c b/src/backend/utils/cache/relcache.c
index 342467fd18..02d7fba160 100644
--- a/src/backend/utils/cache/relcache.c
+++ b/src/backend/utils/cache/relcache.c
@@ -5736,6 +5736,7 @@ RelationBuildPublicationDesc(Relation relation, PublicationDesc *pubdesc)
 		pubdesc->rf_valid_for_delete = true;
 		pubdesc->cols_valid_for_update = true;
 		pubdesc->cols_valid_for_delete = true;
+		pubdesc->replident_has_valid_gen_cols = true;
 		return;
 	}
 
@@ -5750,6 +5751,7 @@ RelationBuildPublicationDesc(Relation relation, PublicationDesc *pubdesc)
 	pubdesc->rf_valid_for_delete = true;
 	pubdesc->cols_valid_for_update = true;
 	pubdesc->cols_valid_for_delete = true;
+	pubdesc->replident_has_valid_gen_cols = true;
 
 	/* Fetch the publication membership info. */
 	puboids = GetRelationPublications(relid);
@@ -5827,6 +5829,16 @@ RelationBuildPublicationDesc(Relation relation, PublicationDesc *pubdesc)
 				pubdesc->cols_valid_for_delete = false;
 		}
 
+		/*
+		 * Check if all columns which are part of the REPLICA IDENTITY is
+		 * published.
+		 */
+		if (!pubform->pubgencols &&
+			(pubform->pubupdate || pubform->pubdelete) &&
+			replident_has_unpublished_gen_col(pubid, relation, ancestors,
+											  pubform->pubviaroot))
+			pubdesc->replident_has_valid_gen_cols = false;
+
 		ReleaseSysCache(tup);
 
 		/*
diff --git a/src/include/catalog/pg_publication.h b/src/include/catalog/pg_publication.h
index 9a83a72d6b..5eeeced91d 100644
--- a/src/include/catalog/pg_publication.h
+++ b/src/include/catalog/pg_publication.h
@@ -98,6 +98,12 @@ typedef struct PublicationDesc
 	 */
 	bool		cols_valid_for_update;
 	bool		cols_valid_for_delete;
+
+	/*
+	 * true if all generated columns which are part of replica identity are
+	 * published.
+	 */
+	bool		replident_has_valid_gen_cols;
 } PublicationDesc;
 
 typedef struct Publication
diff --git a/src/include/commands/publicationcmds.h b/src/include/commands/publicationcmds.h
index 5487c571f6..b18e576b77 100644
--- a/src/include/commands/publicationcmds.h
+++ b/src/include/commands/publicationcmds.h
@@ -35,5 +35,7 @@ extern bool pub_rf_contains_invalid_column(Oid pubid, Relation relation,
 										   List *ancestors, bool pubviaroot);
 extern bool pub_collist_contains_invalid_column(Oid pubid, Relation relation,
 												List *ancestors, bool pubviaroot);
+extern bool replident_has_unpublished_gen_col(Oid pubid, Relation relation,
+											  List *ancestors, bool pubviaroot);
 
 #endif							/* PUBLICATIONCMDS_H */
diff --git a/src/test/regress/expected/publication.out b/src/test/regress/expected/publication.out
index 5de2d64d01..62198ead23 100644
--- a/src/test/regress/expected/publication.out
+++ b/src/test/regress/expected/publication.out
@@ -672,6 +672,31 @@ DROP TABLE rf_tbl_abcd_pk;
 DROP TABLE rf_tbl_abcd_nopk;
 DROP TABLE rf_tbl_abcd_part_pk;
 -- ======================================================
+-- ======================================================
+-- test with generated column
+SET client_min_messages = 'ERROR';
+CREATE TABLE testpub_gencol (a INT, b INT GENERATED ALWAYS AS (a + 1) STORED NOT NULL);
+CREATE UNIQUE INDEX testpub_gencol_idx ON testpub_gencol (b);
+ALTER TABLE testpub_gencol REPLICA IDENTITY USING index testpub_gencol_idx;
+-- error - generated column "b" is not published but part of index set as REPLICA IDENTITY
+CREATE PUBLICATION pub_gencol FOR TABLE testpub_gencol;
+UPDATE testpub_gencol SET a = 100 WHERE a = 1;
+ERROR:  cannot update table "testpub_gencol"
+DETAIL:  replica identity consists of an unpublished generated column.
+-- error - generated column "b" is not published and REPLICA IDENTITY is set FULL
+ALTER TABLE testpub_gencol REPLICA IDENTITY FULL;
+UPDATE testpub_gencol SET a = 100 WHERE a = 1;
+ERROR:  cannot update table "testpub_gencol"
+DETAIL:  replica identity consists of an unpublished generated column.
+DROP PUBLICATION pub_gencol;
+-- ok - generated column "b" is published and is part of REPLICA IDENTITY
+CREATE PUBLICATION pub_gencol FOR TABLE testpub_gencol with (publish_generated_columns = true);
+UPDATE testpub_gencol SET a = 100 WHERE a = 1;
+DROP PUBLICATION pub_gencol;
+DROP INDEX testpub_gencol_idx;
+DROP TABLE testpub_gencol;
+RESET client_min_messages;
+-- ======================================================
 -- fail - duplicate tables are not allowed if that table has any column lists
 SET client_min_messages = 'ERROR';
 CREATE PUBLICATION testpub_dups FOR TABLE testpub_tbl1 (a), testpub_tbl1 WITH (publish = 'insert');
diff --git a/src/test/regress/sql/publication.sql b/src/test/regress/sql/publication.sql
index 48e68bcca2..3b1c661440 100644
--- a/src/test/regress/sql/publication.sql
+++ b/src/test/regress/sql/publication.sql
@@ -396,6 +396,32 @@ DROP TABLE rf_tbl_abcd_nopk;
 DROP TABLE rf_tbl_abcd_part_pk;
 -- ======================================================
 
+-- ======================================================
+-- test with generated column
+SET client_min_messages = 'ERROR';
+CREATE TABLE testpub_gencol (a INT, b INT GENERATED ALWAYS AS (a + 1) STORED NOT NULL);
+CREATE UNIQUE INDEX testpub_gencol_idx ON testpub_gencol (b);
+ALTER TABLE testpub_gencol REPLICA IDENTITY USING index testpub_gencol_idx;
+
+-- error - generated column "b" is not published but part of index set as REPLICA IDENTITY
+CREATE PUBLICATION pub_gencol FOR TABLE testpub_gencol;
+UPDATE testpub_gencol SET a = 100 WHERE a = 1;
+
+-- error - generated column "b" is not published and REPLICA IDENTITY is set FULL
+ALTER TABLE testpub_gencol REPLICA IDENTITY FULL;
+UPDATE testpub_gencol SET a = 100 WHERE a = 1;
+DROP PUBLICATION pub_gencol;
+
+-- ok - generated column "b" is published and is part of REPLICA IDENTITY
+CREATE PUBLICATION pub_gencol FOR TABLE testpub_gencol with (publish_generated_columns = true);
+UPDATE testpub_gencol SET a = 100 WHERE a = 1;
+DROP PUBLICATION pub_gencol;
+
+DROP INDEX testpub_gencol_idx;
+DROP TABLE testpub_gencol;
+RESET client_min_messages;
+-- ======================================================
+
 -- fail - duplicate tables are not allowed if that table has any column lists
 SET client_min_messages = 'ERROR';
 CREATE PUBLICATION testpub_dups FOR TABLE testpub_tbl1 (a), testpub_tbl1 WITH (publish = 'insert');
diff --git a/src/test/subscription/t/100_bugs.pl b/src/test/subscription/t/100_bugs.pl
index cb36ca7b16..794b928f50 100644
--- a/src/test/subscription/t/100_bugs.pl
+++ b/src/test/subscription/t/100_bugs.pl
@@ -377,8 +377,8 @@ $node_publisher->safe_psql('postgres', "DROP PUBLICATION tap_pub_sch");
 $node_publisher->stop('fast');
 $node_subscriber->stop('fast');
 
-# The bug was that when the REPLICA IDENTITY FULL is used with dropped or
-# generated columns, we fail to apply updates and deletes
+# The bug was that when the REPLICA IDENTITY FULL is used with dropped
+# we fail to apply updates and deletes
 $node_publisher->rotate_logfile();
 $node_publisher->start();
 
@@ -389,18 +389,14 @@ $node_publisher->safe_psql(
 	'postgres', qq(
 	CREATE TABLE dropped_cols (a int, b_drop int, c int);
 	ALTER TABLE dropped_cols REPLICA IDENTITY FULL;
-	CREATE TABLE generated_cols (a int, b_gen int GENERATED ALWAYS AS (5 * a) STORED, c int);
-	ALTER TABLE generated_cols REPLICA IDENTITY FULL;
-	CREATE PUBLICATION pub_dropped_cols FOR TABLE dropped_cols, generated_cols;
+	CREATE PUBLICATION pub_dropped_cols FOR TABLE dropped_cols;
 	-- some initial data
 	INSERT INTO dropped_cols VALUES (1, 1, 1);
-	INSERT INTO generated_cols (a, c) VALUES (1, 1);
 ));
 
 $node_subscriber->safe_psql(
 	'postgres', qq(
 	 CREATE TABLE dropped_cols (a int, b_drop int, c int);
-	 CREATE TABLE generated_cols (a int, b_gen int GENERATED ALWAYS AS (5 * a) STORED, c int);
 ));
 
 $publisher_connstr = $node_publisher->connstr . ' dbname=postgres';
@@ -421,7 +417,6 @@ $node_subscriber->safe_psql(
 $node_publisher->safe_psql(
 	'postgres', qq(
 		UPDATE dropped_cols SET a = 100;
-		UPDATE generated_cols SET a = 100;
 ));
 $node_publisher->wait_for_catchup('sub_dropped_cols');
 
@@ -430,11 +425,6 @@ is( $node_subscriber->safe_psql(
 	qq(1),
 	'replication with RI FULL and dropped columns');
 
-is( $node_subscriber->safe_psql(
-		'postgres', "SELECT count(*) FROM generated_cols WHERE a = 100"),
-	qq(1),
-	'replication with RI FULL and generated columns');
-
 $node_publisher->stop('fast');
 $node_subscriber->stop('fast');
 
-- 
2.34.1

#22vignesh C
vignesh21@gmail.com
In reply to: Shlok Kyal (#21)
Re: Disallow UPDATE/DELETE on table with unpublished generated column as REPLICA IDENTITY

On Thu, 14 Nov 2024 at 15:51, Shlok Kyal <shlok.kyal.oss@gmail.com> wrote:

Thanks for providing the comments.

On Thu, 14 Nov 2024 at 12:22, vignesh C <vignesh21@gmail.com> wrote:

On Wed, 13 Nov 2024 at 11:15, Shlok Kyal <shlok.kyal.oss@gmail.com> wrote:

Thanks for providing the comments.

On Tue, 12 Nov 2024 at 12:52, Zhijie Hou (Fujitsu)
<houzj.fnst@fujitsu.com> wrote:

On Friday, November 8, 2024 7:06 PM Shlok Kyal <shlok.kyal.oss@gmail.com> wrote:

Hi Amit,

On Thu, 7 Nov 2024 at 11:37, Amit Kapila <amit.kapila16@gmail.com> wrote:

On Tue, Nov 5, 2024 at 12:53 PM Shlok Kyal <shlok.kyal.oss@gmail.com>

wrote:

To avoid the issue, we can disallow UPDATE/DELETE on table with
unpublished generated column as REPLICA IDENTITY. I have attached a
patch for the same.

+CREATE PUBLICATION pub_gencol FOR TABLE testpub_gencol; UPDATE
+testpub_gencol SET a = 100 WHERE a = 1;
+ERROR:  cannot update table "testpub_gencol"
+DETAIL:  Column list used by the publication does not cover the
replica identity.

This is not a correct ERROR message as the publication doesn't have
any column list associated with it. You have added the code to detect
this in the column list code path which I think is not required. BTW,
you also need to consider the latest commit 7054186c4e for this. I
guess you need to keep another flag in PublicationDesc to detect this
and then give an appropriate ERROR.

I have addressed the comments and provided an updated patch. Also, I am
currently working to fix this issue in back branches.

Thanks for the patch. I am reviewing it and have some initial comments:

1.
+                       char attgenerated = get_attgenerated(relid, attnum);
+

I think it's unnecessary to initialize attgenerated here because the value will
be overwritten if pubviaroot is true anyway. Also, the get_attgenerated()
is not cheap.

Fixed

2.

I think the patch missed to check the case when table is marked REPLICA
IDENTITY FULL, and generated column is not published:

CREATE TABLE testpub_gencol (a INT, b INT GENERATED ALWAYS AS (a + 1) STORED NOT NULL);
ALTER TABLE testpub_gencol REPLICA IDENTITY FULL;
CREATE PUBLICATION pub_gencol FOR TABLE testpub_gencol;
UPDATE testpub_gencol SET a = 2;

I expected the UPDATE to fail in above case, but it can still pass after applying the patch.

Fixed

3.

+                * If the publication is FOR ALL TABLES we can skip the validation.
+                */

This comment seems not clear to me, could you elaborate a bit more on this ?

I missed to handle the case FOR ALL TABLES. Have removed the comment.

4.

Also, I think the patch does not handle the FOR ALL TABLE case correctly:

CREATE TABLE testpub_gencol (a INT, b INT GENERATED ALWAYS AS (a + 1) STORED NOT NULL);
CREATE UNIQUE INDEX testpub_gencol_idx ON testpub_gencol (b);
ALTER TABLE testpub_gencol REPLICA IDENTITY USING index testpub_gencol_idx;
CREATE PUBLICATION pub_gencol FOR ALL TABLEs;
UPDATE testpub_gencol SET a = 2;

I expected the UPDATE to fail in above case as well.

Fixed

5.

+       else if (cmd == CMD_UPDATE && !pubdesc.replident_has_valid_gen_cols)
+               ereport(ERROR,
+                               (errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
+                                errmsg("cannot update table \"%s\"",
+                                               RelationGetRelationName(rel)),
+                                errdetail("REPLICA IDENTITY consists of an unpublished generated column.")));

I think it would be better to use lower case "replica identity" to consistent
with other existing messages.

Fixed

I have attached the updated patch here.

Few comments:
1) In the first check relation->rd_rel->relispartition also is checked
whereas in the below it is not checked, shouldn't the same check be
there below to avoid few of the function calls which are not required:
+       if (pubviaroot && relation->rd_rel->relispartition)
+       {
+               publish_as_relid =
GetTopMostAncestorInPublication(pubid, ancestors, NULL);
+
+               if (!OidIsValid(publish_as_relid))
+                       publish_as_relid = relid;
+       }
+
+                       if (pubviaroot)
+                       {
+                               /* attribute name in the child table */
+                               char       *colname =
get_attname(relid, attnum, false);
+
+                               /*
+                                * Determine the attnum for the
attribute name in parent (we
+                                * are using the column list defined
on the parent).
+                                */
+                               attnum = get_attnum(publish_as_relid, colname);
+                               attgenerated =
get_attgenerated(publish_as_relid, attnum);
+                       }
+                       else
+                               attgenerated = get_attgenerated(relid, attnum);

I have updated the if condititon

2) I think we could use check_and_fetch_column_list to see that it is
not a column list publication instead of below code:
+       if (!puballtables)
+       {
+               tuple = SearchSysCache2(PUBLICATIONRELMAP,
+
ObjectIdGetDatum(publish_as_relid),
+
ObjectIdGetDatum(pubid));
+
+               if (!HeapTupleIsValid(tuple))
+                       return false;
+
+               (void) SysCacheGetAttr(PUBLICATIONRELMAP, tuple,
+
Anum_pg_publication_rel_prattrs,
+                                                          &isnull);
+
+               ReleaseSysCache(tuple);
+       }
+
+       if(puballtables || isnull)

Yes we can use it. I have updated the patch.

3) Since there is only a single statement, remove the enclosing parenthisis:
+               if (!pubform->pubgencols &&
+                       (pubform->pubupdate || pubform->pubdelete) &&
+                       replident_has_unpublished_gen_col(pubid,
relation, ancestors,
+
pubform->pubviaroot, pubform->puballtables))
+               {
+                       pubdesc->replident_has_valid_gen_cols = false;
+               }

Fixed

4) Pgindent should be run there are few issues:
4.a)
+extern bool replident_has_unpublished_gen_col(Oid pubid, Relation relation,
+
List *ancestors, bool pubviaroot, bool
puballtables);
4.b)
+       }
+
+       if(puballtables || isnull)
+       {
+               int                     x;
+               Bitmapset  *idattrs = NULL;
4.c)
+                * generated column we should error out.
+                */
+               if(relation->rd_rel->relreplident == REPLICA_IDENTITY_FULL &&
+                  relation->rd_att->constr &&
relation->rd_att->constr->has_generated_stored)
+                       result = true;
4.d)
+               while ((x = bms_next_member(idattrs, x)) >= 0)
+               {
+                       AttrNumber      attnum = (x +
FirstLowInvalidHeapAttributeNumber);
+                       char attgenerated;

Fixed

5) You could do this in a single line comment:
+               /*
+                * Check if any REPLICA IDENTITY column is an generated column.
+                */
+               while ((x = bms_next_member(idattrs, x)) >= 0)

Fixed

6) I felt one of update or delete is enough in this case as the code
path is same:
+UPDATE testpub_gencol SET a = 100 WHERE a = 1;
+DELETE FROM testpub_gencol WHERE a = 100;
+
+-- error - generated column "b" is not published and REPLICA IDENTITY
is set FULL
+ALTER TABLE testpub_gencol REPLICA IDENTITY FULL;
+UPDATE testpub_gencol SET a = 100 WHERE a = 1;
+DELETE FROM testpub_gencol WHERE a = 100;
+DROP PUBLICATION pub_gencol;
+
+-- ok - generated column "b" is published and is part of REPLICA IDENTITY
+CREATE PUBLICATION pub_gencol FOR TABLE testpub_gencol with
(publish_generated_columns = true);
+UPDATE testpub_gencol SET a = 100 WHERE a = 1;
+DELETE FROM testpub_gencol WHERE a = 100;

Removed the 'DELETE' case.

I have addressed the comments and updated the patch.

Few comments:
1) Current patch will not handle this scenario where subset of columns
are specified in the replica identity index:
CREATE TABLE t1 (a INT not null, a1 int not null, a2 int not null, b
INT GENERATED ALWAYS AS (a + 1) STORED NOT NULL);
create unique index idx1_t1 on t1(a, a1);

-- Replica identity will have subset of table columns
alter table t1 replica identity using index idx1_t1 ;
insert into t1 values(1,1,1);
create publication pub1 for table t1;

postgres=# update t1 set a = 2;
UPDATE 1

I felt we should throw an error in this case too.

2) Instead of checking if replica identity has a generated column, can
we check if the columns that will be published and the columns in the
replica identity matches:
+                       if (pubviaroot && relation->rd_rel->relispartition)
+                       {
+                               /* attribute name in the child table */
+                               char       *colname =
get_attname(relid, attnum, false);
+
+                               /*
+                                * Determine the attnum for the
attribute name in parent (we
+                                * are using the column list defined
on the parent).
+                                */
+                               attnum = get_attnum(publish_as_relid, colname);
+                               attgenerated =
get_attgenerated(publish_as_relid, attnum);
+                       }
+                       else
+                               attgenerated = get_attgenerated(relid, attnum);
3) publish_as_relid will be set accordingly based on pubviaroot, so it
need not be initialized:
+       Oid                     relid = RelationGetRelid(relation);
+       Oid                     publish_as_relid = RelationGetRelid(relation);
+       bool            result = false;

Regards,
Vignesh

#23Shlok Kyal
shlok.kyal.oss@gmail.com
In reply to: vignesh C (#22)
1 attachment(s)
Re: Disallow UPDATE/DELETE on table with unpublished generated column as REPLICA IDENTITY

Thanks for providing the comments

On Fri, 15 Nov 2024 at 10:59, vignesh C <vignesh21@gmail.com> wrote:

On Thu, 14 Nov 2024 at 15:51, Shlok Kyal <shlok.kyal.oss@gmail.com> wrote:

Thanks for providing the comments.

On Thu, 14 Nov 2024 at 12:22, vignesh C <vignesh21@gmail.com> wrote:

On Wed, 13 Nov 2024 at 11:15, Shlok Kyal <shlok.kyal.oss@gmail.com> wrote:

Thanks for providing the comments.

On Tue, 12 Nov 2024 at 12:52, Zhijie Hou (Fujitsu)
<houzj.fnst@fujitsu.com> wrote:

On Friday, November 8, 2024 7:06 PM Shlok Kyal <shlok.kyal.oss@gmail.com> wrote:

Hi Amit,

On Thu, 7 Nov 2024 at 11:37, Amit Kapila <amit.kapila16@gmail.com> wrote:

On Tue, Nov 5, 2024 at 12:53 PM Shlok Kyal <shlok.kyal.oss@gmail.com>

wrote:

To avoid the issue, we can disallow UPDATE/DELETE on table with
unpublished generated column as REPLICA IDENTITY. I have attached a
patch for the same.

+CREATE PUBLICATION pub_gencol FOR TABLE testpub_gencol; UPDATE
+testpub_gencol SET a = 100 WHERE a = 1;
+ERROR:  cannot update table "testpub_gencol"
+DETAIL:  Column list used by the publication does not cover the
replica identity.

This is not a correct ERROR message as the publication doesn't have
any column list associated with it. You have added the code to detect
this in the column list code path which I think is not required. BTW,
you also need to consider the latest commit 7054186c4e for this. I
guess you need to keep another flag in PublicationDesc to detect this
and then give an appropriate ERROR.

I have addressed the comments and provided an updated patch. Also, I am
currently working to fix this issue in back branches.

Thanks for the patch. I am reviewing it and have some initial comments:

1.
+                       char attgenerated = get_attgenerated(relid, attnum);
+

I think it's unnecessary to initialize attgenerated here because the value will
be overwritten if pubviaroot is true anyway. Also, the get_attgenerated()
is not cheap.

Fixed

2.

I think the patch missed to check the case when table is marked REPLICA
IDENTITY FULL, and generated column is not published:

CREATE TABLE testpub_gencol (a INT, b INT GENERATED ALWAYS AS (a + 1) STORED NOT NULL);
ALTER TABLE testpub_gencol REPLICA IDENTITY FULL;
CREATE PUBLICATION pub_gencol FOR TABLE testpub_gencol;
UPDATE testpub_gencol SET a = 2;

I expected the UPDATE to fail in above case, but it can still pass after applying the patch.

Fixed

3.

+                * If the publication is FOR ALL TABLES we can skip the validation.
+                */

This comment seems not clear to me, could you elaborate a bit more on this ?

I missed to handle the case FOR ALL TABLES. Have removed the comment.

4.

Also, I think the patch does not handle the FOR ALL TABLE case correctly:

CREATE TABLE testpub_gencol (a INT, b INT GENERATED ALWAYS AS (a + 1) STORED NOT NULL);
CREATE UNIQUE INDEX testpub_gencol_idx ON testpub_gencol (b);
ALTER TABLE testpub_gencol REPLICA IDENTITY USING index testpub_gencol_idx;
CREATE PUBLICATION pub_gencol FOR ALL TABLEs;
UPDATE testpub_gencol SET a = 2;

I expected the UPDATE to fail in above case as well.

Fixed

5.

+       else if (cmd == CMD_UPDATE && !pubdesc.replident_has_valid_gen_cols)
+               ereport(ERROR,
+                               (errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
+                                errmsg("cannot update table \"%s\"",
+                                               RelationGetRelationName(rel)),
+                                errdetail("REPLICA IDENTITY consists of an unpublished generated column.")));

I think it would be better to use lower case "replica identity" to consistent
with other existing messages.

Fixed

I have attached the updated patch here.

Few comments:
1) In the first check relation->rd_rel->relispartition also is checked
whereas in the below it is not checked, shouldn't the same check be
there below to avoid few of the function calls which are not required:
+       if (pubviaroot && relation->rd_rel->relispartition)
+       {
+               publish_as_relid =
GetTopMostAncestorInPublication(pubid, ancestors, NULL);
+
+               if (!OidIsValid(publish_as_relid))
+                       publish_as_relid = relid;
+       }
+
+                       if (pubviaroot)
+                       {
+                               /* attribute name in the child table */
+                               char       *colname =
get_attname(relid, attnum, false);
+
+                               /*
+                                * Determine the attnum for the
attribute name in parent (we
+                                * are using the column list defined
on the parent).
+                                */
+                               attnum = get_attnum(publish_as_relid, colname);
+                               attgenerated =
get_attgenerated(publish_as_relid, attnum);
+                       }
+                       else
+                               attgenerated = get_attgenerated(relid, attnum);

I have updated the if condititon

2) I think we could use check_and_fetch_column_list to see that it is
not a column list publication instead of below code:
+       if (!puballtables)
+       {
+               tuple = SearchSysCache2(PUBLICATIONRELMAP,
+
ObjectIdGetDatum(publish_as_relid),
+
ObjectIdGetDatum(pubid));
+
+               if (!HeapTupleIsValid(tuple))
+                       return false;
+
+               (void) SysCacheGetAttr(PUBLICATIONRELMAP, tuple,
+
Anum_pg_publication_rel_prattrs,
+                                                          &isnull);
+
+               ReleaseSysCache(tuple);
+       }
+
+       if(puballtables || isnull)

Yes we can use it. I have updated the patch.

3) Since there is only a single statement, remove the enclosing parenthisis:
+               if (!pubform->pubgencols &&
+                       (pubform->pubupdate || pubform->pubdelete) &&
+                       replident_has_unpublished_gen_col(pubid,
relation, ancestors,
+
pubform->pubviaroot, pubform->puballtables))
+               {
+                       pubdesc->replident_has_valid_gen_cols = false;
+               }

Fixed

4) Pgindent should be run there are few issues:
4.a)
+extern bool replident_has_unpublished_gen_col(Oid pubid, Relation relation,
+
List *ancestors, bool pubviaroot, bool
puballtables);
4.b)
+       }
+
+       if(puballtables || isnull)
+       {
+               int                     x;
+               Bitmapset  *idattrs = NULL;
4.c)
+                * generated column we should error out.
+                */
+               if(relation->rd_rel->relreplident == REPLICA_IDENTITY_FULL &&
+                  relation->rd_att->constr &&
relation->rd_att->constr->has_generated_stored)
+                       result = true;
4.d)
+               while ((x = bms_next_member(idattrs, x)) >= 0)
+               {
+                       AttrNumber      attnum = (x +
FirstLowInvalidHeapAttributeNumber);
+                       char attgenerated;

Fixed

5) You could do this in a single line comment:
+               /*
+                * Check if any REPLICA IDENTITY column is an generated column.
+                */
+               while ((x = bms_next_member(idattrs, x)) >= 0)

Fixed

6) I felt one of update or delete is enough in this case as the code
path is same:
+UPDATE testpub_gencol SET a = 100 WHERE a = 1;
+DELETE FROM testpub_gencol WHERE a = 100;
+
+-- error - generated column "b" is not published and REPLICA IDENTITY
is set FULL
+ALTER TABLE testpub_gencol REPLICA IDENTITY FULL;
+UPDATE testpub_gencol SET a = 100 WHERE a = 1;
+DELETE FROM testpub_gencol WHERE a = 100;
+DROP PUBLICATION pub_gencol;
+
+-- ok - generated column "b" is published and is part of REPLICA IDENTITY
+CREATE PUBLICATION pub_gencol FOR TABLE testpub_gencol with
(publish_generated_columns = true);
+UPDATE testpub_gencol SET a = 100 WHERE a = 1;
+DELETE FROM testpub_gencol WHERE a = 100;

Removed the 'DELETE' case.

I have addressed the comments and updated the patch.

Few comments:
1) Current patch will not handle this scenario where subset of columns
are specified in the replica identity index:
CREATE TABLE t1 (a INT not null, a1 int not null, a2 int not null, b
INT GENERATED ALWAYS AS (a + 1) STORED NOT NULL);
create unique index idx1_t1 on t1(a, a1);

-- Replica identity will have subset of table columns
alter table t1 replica identity using index idx1_t1 ;
insert into t1 values(1,1,1);
create publication pub1 for table t1;

postgres=# update t1 set a = 2;
UPDATE 1

I felt we should throw an error in this case too.

I feel the above behaviour is expected. I think we can specify a
subset of columns in the replica identity index as per documentation
[1]: https://www.postgresql.org/docs/current/logical-replication-publication.html

2) Instead of checking if replica identity has a generated column, can
we check if the columns that will be published and the columns in the
replica identity matches:
+                       if (pubviaroot && relation->rd_rel->relispartition)
+                       {
+                               /* attribute name in the child table */
+                               char       *colname =
get_attname(relid, attnum, false);
+
+                               /*
+                                * Determine the attnum for the
attribute name in parent (we
+                                * are using the column list defined
on the parent).
+                                */
+                               attnum = get_attnum(publish_as_relid, colname);
+                               attgenerated =
get_attgenerated(publish_as_relid, attnum);
+                       }
+                       else
+                               attgenerated = get_attgenerated(relid, attnum);

Fixed

3) publish_as_relid will be set accordingly based on pubviaroot, so it
need not be initialized:
+       Oid                     relid = RelationGetRelid(relation);
+       Oid                     publish_as_relid = RelationGetRelid(relation);
+       bool            result = false;

Fixed

I have addressed the comments and attached the updated patch.

[1]: https://www.postgresql.org/docs/current/logical-replication-publication.html

Thanks and Regards,
Shlok Kyal

Attachments:

v5-0001-Disallow-UPDATE-DELETE-on-table-with-generated-co.patchapplication/x-patch; name=v5-0001-Disallow-UPDATE-DELETE-on-table-with-generated-co.patchDownload
From 924891000ac8ff1f4a77dc9c2a0c46ab141e8f5c Mon Sep 17 00:00:00 2001
From: Shlok Kyal <shlok.kyal.oss@gmail.com>
Date: Tue, 5 Nov 2024 11:11:25 +0530
Subject: [PATCH v5] Disallow UPDATE/DELETE on table with generated column as
 REPLICA IDENTITY

UPDATE/DELETE on table having unpublished generated column as REPLICA
IDENTITY is allowed. UPDATE/DELETE on such tables should not be allowed
---
 src/backend/commands/publicationcmds.c    | 88 +++++++++++++++++++++++
 src/backend/executor/execReplication.c    | 12 ++++
 src/backend/utils/cache/relcache.c        | 12 ++++
 src/include/catalog/pg_publication.h      |  6 ++
 src/include/commands/publicationcmds.h    |  2 +
 src/test/regress/expected/publication.out | 25 +++++++
 src/test/regress/sql/publication.sql      | 26 +++++++
 src/test/subscription/t/100_bugs.pl       | 16 +----
 8 files changed, 174 insertions(+), 13 deletions(-)

diff --git a/src/backend/commands/publicationcmds.c b/src/backend/commands/publicationcmds.c
index 0129db18c6..c354094c7d 100644
--- a/src/backend/commands/publicationcmds.c
+++ b/src/backend/commands/publicationcmds.c
@@ -444,6 +444,94 @@ pub_collist_contains_invalid_column(Oid pubid, Relation relation, List *ancestor
 	return result;
 }
 
+/*
+ * Check if REPLICA IDENTITY consists of any unpublished generated column.
+ *
+ * Returns true if any replica identity column is an unpublished generated
+ * column.
+ */
+bool
+replident_has_unpublished_gen_col(Oid pubid, Relation relation, List *ancestors,
+								  bool pubviaroot)
+{
+	Oid			relid = RelationGetRelid(relation);
+	Oid			publish_as_relid;
+	bool		result = false;
+	bool		found;
+	Publication *pub;
+
+	/*
+	 * For a partition, if pubviaroot is true, find the topmost ancestor that
+	 * is published via this publication as we need to use its column list for
+	 * the changes.
+	 *
+	 * Note that even though the column list used is for an ancestor, the
+	 * REPLICA IDENTITY used will be for the actual child table.
+	 */
+	if (pubviaroot && relation->rd_rel->relispartition)
+	{
+		publish_as_relid = GetTopMostAncestorInPublication(pubid, ancestors, NULL);
+
+		if (!OidIsValid(publish_as_relid))
+			publish_as_relid = relid;
+	}
+	else
+		publish_as_relid = relid;
+
+	pub = GetPublication(pubid);
+	found = check_and_fetch_column_list(pub, publish_as_relid, NULL, NULL);
+
+	if (!found)
+	{
+		int			x;
+		Bitmapset  *idattrs = NULL;
+		Bitmapset  *columns = NULL;
+
+		/*
+		 * REPLICA IDENTITY can be FULL only if there is no column list for
+		 * publication. If REPLICA IDENTITY is set as FULL and relation has a
+		 * generated column we should error out.
+		 */
+		if (relation->rd_rel->relreplident == REPLICA_IDENTITY_FULL &&
+			relation->rd_att->constr &&
+			relation->rd_att->constr->has_generated_stored)
+			result = true;
+
+		idattrs = RelationGetIndexAttrBitmap(relation,
+											 INDEX_ATTR_BITMAP_IDENTITY_KEY);
+
+		/*
+		 * Bitmap of published columns when publish_generated_columns is
+		 * 'false' and no column list is specified.
+		 */
+		columns = pub_form_cols_map(relation, false);
+
+		/*
+		 * Attnums in the bitmap returned by RelationGetIndexAttrBitmap are
+		 * offset (to handle system columns the usual way), while column list
+		 * does not use offset, so we can't do bms_is_subset(). Instead, we
+		 * have to loop over the idattrs and check all of them are in the
+		 * list.
+		 */
+		x = -1;
+		while ((x = bms_next_member(idattrs, x)) >= 0)
+		{
+			AttrNumber	attnum = (x + FirstLowInvalidHeapAttributeNumber);
+
+			/* replica identity column, not covered by the column list */
+			if (!bms_is_member(attnum, columns))
+			{
+				result = true;
+				break;
+			}
+		}
+
+		bms_free(idattrs);
+	}
+
+	return result;
+}
+
 /* check_functions_in_node callback */
 static bool
 contain_mutable_or_user_functions_checker(Oid func_id, void *context)
diff --git a/src/backend/executor/execReplication.c b/src/backend/executor/execReplication.c
index 54025c9f15..d3517db8b3 100644
--- a/src/backend/executor/execReplication.c
+++ b/src/backend/executor/execReplication.c
@@ -809,6 +809,12 @@ CheckCmdReplicaIdentity(Relation rel, CmdType cmd)
 				 errmsg("cannot update table \"%s\"",
 						RelationGetRelationName(rel)),
 				 errdetail("Column list used by the publication does not cover the replica identity.")));
+	else if (cmd == CMD_UPDATE && !pubdesc.replident_has_valid_gen_cols)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
+				 errmsg("cannot update table \"%s\"",
+						RelationGetRelationName(rel)),
+				 errdetail("replica identity consists of an unpublished generated column.")));
 	else if (cmd == CMD_DELETE && !pubdesc.rf_valid_for_delete)
 		ereport(ERROR,
 				(errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
@@ -821,6 +827,12 @@ CheckCmdReplicaIdentity(Relation rel, CmdType cmd)
 				 errmsg("cannot delete from table \"%s\"",
 						RelationGetRelationName(rel)),
 				 errdetail("Column list used by the publication does not cover the replica identity.")));
+	else if (cmd == CMD_DELETE && !pubdesc.replident_has_valid_gen_cols)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
+				 errmsg("cannot delete from table \"%s\"",
+						RelationGetRelationName(rel)),
+				 errdetail("replica identity consists of an unpublished generated column.")));
 
 	/* If relation has replica identity we are always good. */
 	if (OidIsValid(RelationGetReplicaIndex(rel)))
diff --git a/src/backend/utils/cache/relcache.c b/src/backend/utils/cache/relcache.c
index 342467fd18..02d7fba160 100644
--- a/src/backend/utils/cache/relcache.c
+++ b/src/backend/utils/cache/relcache.c
@@ -5736,6 +5736,7 @@ RelationBuildPublicationDesc(Relation relation, PublicationDesc *pubdesc)
 		pubdesc->rf_valid_for_delete = true;
 		pubdesc->cols_valid_for_update = true;
 		pubdesc->cols_valid_for_delete = true;
+		pubdesc->replident_has_valid_gen_cols = true;
 		return;
 	}
 
@@ -5750,6 +5751,7 @@ RelationBuildPublicationDesc(Relation relation, PublicationDesc *pubdesc)
 	pubdesc->rf_valid_for_delete = true;
 	pubdesc->cols_valid_for_update = true;
 	pubdesc->cols_valid_for_delete = true;
+	pubdesc->replident_has_valid_gen_cols = true;
 
 	/* Fetch the publication membership info. */
 	puboids = GetRelationPublications(relid);
@@ -5827,6 +5829,16 @@ RelationBuildPublicationDesc(Relation relation, PublicationDesc *pubdesc)
 				pubdesc->cols_valid_for_delete = false;
 		}
 
+		/*
+		 * Check if all columns which are part of the REPLICA IDENTITY is
+		 * published.
+		 */
+		if (!pubform->pubgencols &&
+			(pubform->pubupdate || pubform->pubdelete) &&
+			replident_has_unpublished_gen_col(pubid, relation, ancestors,
+											  pubform->pubviaroot))
+			pubdesc->replident_has_valid_gen_cols = false;
+
 		ReleaseSysCache(tup);
 
 		/*
diff --git a/src/include/catalog/pg_publication.h b/src/include/catalog/pg_publication.h
index 9a83a72d6b..5eeeced91d 100644
--- a/src/include/catalog/pg_publication.h
+++ b/src/include/catalog/pg_publication.h
@@ -98,6 +98,12 @@ typedef struct PublicationDesc
 	 */
 	bool		cols_valid_for_update;
 	bool		cols_valid_for_delete;
+
+	/*
+	 * true if all generated columns which are part of replica identity are
+	 * published.
+	 */
+	bool		replident_has_valid_gen_cols;
 } PublicationDesc;
 
 typedef struct Publication
diff --git a/src/include/commands/publicationcmds.h b/src/include/commands/publicationcmds.h
index 5487c571f6..b18e576b77 100644
--- a/src/include/commands/publicationcmds.h
+++ b/src/include/commands/publicationcmds.h
@@ -35,5 +35,7 @@ extern bool pub_rf_contains_invalid_column(Oid pubid, Relation relation,
 										   List *ancestors, bool pubviaroot);
 extern bool pub_collist_contains_invalid_column(Oid pubid, Relation relation,
 												List *ancestors, bool pubviaroot);
+extern bool replident_has_unpublished_gen_col(Oid pubid, Relation relation,
+											  List *ancestors, bool pubviaroot);
 
 #endif							/* PUBLICATIONCMDS_H */
diff --git a/src/test/regress/expected/publication.out b/src/test/regress/expected/publication.out
index 5de2d64d01..62198ead23 100644
--- a/src/test/regress/expected/publication.out
+++ b/src/test/regress/expected/publication.out
@@ -672,6 +672,31 @@ DROP TABLE rf_tbl_abcd_pk;
 DROP TABLE rf_tbl_abcd_nopk;
 DROP TABLE rf_tbl_abcd_part_pk;
 -- ======================================================
+-- ======================================================
+-- test with generated column
+SET client_min_messages = 'ERROR';
+CREATE TABLE testpub_gencol (a INT, b INT GENERATED ALWAYS AS (a + 1) STORED NOT NULL);
+CREATE UNIQUE INDEX testpub_gencol_idx ON testpub_gencol (b);
+ALTER TABLE testpub_gencol REPLICA IDENTITY USING index testpub_gencol_idx;
+-- error - generated column "b" is not published but part of index set as REPLICA IDENTITY
+CREATE PUBLICATION pub_gencol FOR TABLE testpub_gencol;
+UPDATE testpub_gencol SET a = 100 WHERE a = 1;
+ERROR:  cannot update table "testpub_gencol"
+DETAIL:  replica identity consists of an unpublished generated column.
+-- error - generated column "b" is not published and REPLICA IDENTITY is set FULL
+ALTER TABLE testpub_gencol REPLICA IDENTITY FULL;
+UPDATE testpub_gencol SET a = 100 WHERE a = 1;
+ERROR:  cannot update table "testpub_gencol"
+DETAIL:  replica identity consists of an unpublished generated column.
+DROP PUBLICATION pub_gencol;
+-- ok - generated column "b" is published and is part of REPLICA IDENTITY
+CREATE PUBLICATION pub_gencol FOR TABLE testpub_gencol with (publish_generated_columns = true);
+UPDATE testpub_gencol SET a = 100 WHERE a = 1;
+DROP PUBLICATION pub_gencol;
+DROP INDEX testpub_gencol_idx;
+DROP TABLE testpub_gencol;
+RESET client_min_messages;
+-- ======================================================
 -- fail - duplicate tables are not allowed if that table has any column lists
 SET client_min_messages = 'ERROR';
 CREATE PUBLICATION testpub_dups FOR TABLE testpub_tbl1 (a), testpub_tbl1 WITH (publish = 'insert');
diff --git a/src/test/regress/sql/publication.sql b/src/test/regress/sql/publication.sql
index 48e68bcca2..3b1c661440 100644
--- a/src/test/regress/sql/publication.sql
+++ b/src/test/regress/sql/publication.sql
@@ -396,6 +396,32 @@ DROP TABLE rf_tbl_abcd_nopk;
 DROP TABLE rf_tbl_abcd_part_pk;
 -- ======================================================
 
+-- ======================================================
+-- test with generated column
+SET client_min_messages = 'ERROR';
+CREATE TABLE testpub_gencol (a INT, b INT GENERATED ALWAYS AS (a + 1) STORED NOT NULL);
+CREATE UNIQUE INDEX testpub_gencol_idx ON testpub_gencol (b);
+ALTER TABLE testpub_gencol REPLICA IDENTITY USING index testpub_gencol_idx;
+
+-- error - generated column "b" is not published but part of index set as REPLICA IDENTITY
+CREATE PUBLICATION pub_gencol FOR TABLE testpub_gencol;
+UPDATE testpub_gencol SET a = 100 WHERE a = 1;
+
+-- error - generated column "b" is not published and REPLICA IDENTITY is set FULL
+ALTER TABLE testpub_gencol REPLICA IDENTITY FULL;
+UPDATE testpub_gencol SET a = 100 WHERE a = 1;
+DROP PUBLICATION pub_gencol;
+
+-- ok - generated column "b" is published and is part of REPLICA IDENTITY
+CREATE PUBLICATION pub_gencol FOR TABLE testpub_gencol with (publish_generated_columns = true);
+UPDATE testpub_gencol SET a = 100 WHERE a = 1;
+DROP PUBLICATION pub_gencol;
+
+DROP INDEX testpub_gencol_idx;
+DROP TABLE testpub_gencol;
+RESET client_min_messages;
+-- ======================================================
+
 -- fail - duplicate tables are not allowed if that table has any column lists
 SET client_min_messages = 'ERROR';
 CREATE PUBLICATION testpub_dups FOR TABLE testpub_tbl1 (a), testpub_tbl1 WITH (publish = 'insert');
diff --git a/src/test/subscription/t/100_bugs.pl b/src/test/subscription/t/100_bugs.pl
index cb36ca7b16..794b928f50 100644
--- a/src/test/subscription/t/100_bugs.pl
+++ b/src/test/subscription/t/100_bugs.pl
@@ -377,8 +377,8 @@ $node_publisher->safe_psql('postgres', "DROP PUBLICATION tap_pub_sch");
 $node_publisher->stop('fast');
 $node_subscriber->stop('fast');
 
-# The bug was that when the REPLICA IDENTITY FULL is used with dropped or
-# generated columns, we fail to apply updates and deletes
+# The bug was that when the REPLICA IDENTITY FULL is used with dropped
+# we fail to apply updates and deletes
 $node_publisher->rotate_logfile();
 $node_publisher->start();
 
@@ -389,18 +389,14 @@ $node_publisher->safe_psql(
 	'postgres', qq(
 	CREATE TABLE dropped_cols (a int, b_drop int, c int);
 	ALTER TABLE dropped_cols REPLICA IDENTITY FULL;
-	CREATE TABLE generated_cols (a int, b_gen int GENERATED ALWAYS AS (5 * a) STORED, c int);
-	ALTER TABLE generated_cols REPLICA IDENTITY FULL;
-	CREATE PUBLICATION pub_dropped_cols FOR TABLE dropped_cols, generated_cols;
+	CREATE PUBLICATION pub_dropped_cols FOR TABLE dropped_cols;
 	-- some initial data
 	INSERT INTO dropped_cols VALUES (1, 1, 1);
-	INSERT INTO generated_cols (a, c) VALUES (1, 1);
 ));
 
 $node_subscriber->safe_psql(
 	'postgres', qq(
 	 CREATE TABLE dropped_cols (a int, b_drop int, c int);
-	 CREATE TABLE generated_cols (a int, b_gen int GENERATED ALWAYS AS (5 * a) STORED, c int);
 ));
 
 $publisher_connstr = $node_publisher->connstr . ' dbname=postgres';
@@ -421,7 +417,6 @@ $node_subscriber->safe_psql(
 $node_publisher->safe_psql(
 	'postgres', qq(
 		UPDATE dropped_cols SET a = 100;
-		UPDATE generated_cols SET a = 100;
 ));
 $node_publisher->wait_for_catchup('sub_dropped_cols');
 
@@ -430,11 +425,6 @@ is( $node_subscriber->safe_psql(
 	qq(1),
 	'replication with RI FULL and dropped columns');
 
-is( $node_subscriber->safe_psql(
-		'postgres', "SELECT count(*) FROM generated_cols WHERE a = 100"),
-	qq(1),
-	'replication with RI FULL and generated columns');
-
 $node_publisher->stop('fast');
 $node_subscriber->stop('fast');
 
-- 
2.34.1

#24vignesh C
vignesh21@gmail.com
In reply to: Shlok Kyal (#23)
Re: Disallow UPDATE/DELETE on table with unpublished generated column as REPLICA IDENTITY

On Fri, 15 Nov 2024 at 16:45, Shlok Kyal <shlok.kyal.oss@gmail.com> wrote:

Thanks for providing the comments

On Fri, 15 Nov 2024 at 10:59, vignesh C <vignesh21@gmail.com> wrote:

On Thu, 14 Nov 2024 at 15:51, Shlok Kyal <shlok.kyal.oss@gmail.com> wrote:

Thanks for providing the comments.

On Thu, 14 Nov 2024 at 12:22, vignesh C <vignesh21@gmail.com> wrote:

On Wed, 13 Nov 2024 at 11:15, Shlok Kyal <shlok.kyal.oss@gmail.com> wrote:

Thanks for providing the comments.

On Tue, 12 Nov 2024 at 12:52, Zhijie Hou (Fujitsu)
<houzj.fnst@fujitsu.com> wrote:

On Friday, November 8, 2024 7:06 PM Shlok Kyal <shlok.kyal.oss@gmail.com> wrote:

Hi Amit,

On Thu, 7 Nov 2024 at 11:37, Amit Kapila <amit.kapila16@gmail.com> wrote:

On Tue, Nov 5, 2024 at 12:53 PM Shlok Kyal <shlok.kyal.oss@gmail.com>

wrote:

To avoid the issue, we can disallow UPDATE/DELETE on table with
unpublished generated column as REPLICA IDENTITY. I have attached a
patch for the same.

+CREATE PUBLICATION pub_gencol FOR TABLE testpub_gencol; UPDATE
+testpub_gencol SET a = 100 WHERE a = 1;
+ERROR:  cannot update table "testpub_gencol"
+DETAIL:  Column list used by the publication does not cover the
replica identity.

This is not a correct ERROR message as the publication doesn't have
any column list associated with it. You have added the code to detect
this in the column list code path which I think is not required. BTW,
you also need to consider the latest commit 7054186c4e for this. I
guess you need to keep another flag in PublicationDesc to detect this
and then give an appropriate ERROR.

I have addressed the comments and provided an updated patch. Also, I am
currently working to fix this issue in back branches.

Thanks for the patch. I am reviewing it and have some initial comments:

1.
+                       char attgenerated = get_attgenerated(relid, attnum);
+

I think it's unnecessary to initialize attgenerated here because the value will
be overwritten if pubviaroot is true anyway. Also, the get_attgenerated()
is not cheap.

Fixed

2.

I think the patch missed to check the case when table is marked REPLICA
IDENTITY FULL, and generated column is not published:

CREATE TABLE testpub_gencol (a INT, b INT GENERATED ALWAYS AS (a + 1) STORED NOT NULL);
ALTER TABLE testpub_gencol REPLICA IDENTITY FULL;
CREATE PUBLICATION pub_gencol FOR TABLE testpub_gencol;
UPDATE testpub_gencol SET a = 2;

I expected the UPDATE to fail in above case, but it can still pass after applying the patch.

Fixed

3.

+                * If the publication is FOR ALL TABLES we can skip the validation.
+                */

This comment seems not clear to me, could you elaborate a bit more on this ?

I missed to handle the case FOR ALL TABLES. Have removed the comment.

4.

Also, I think the patch does not handle the FOR ALL TABLE case correctly:

CREATE TABLE testpub_gencol (a INT, b INT GENERATED ALWAYS AS (a + 1) STORED NOT NULL);
CREATE UNIQUE INDEX testpub_gencol_idx ON testpub_gencol (b);
ALTER TABLE testpub_gencol REPLICA IDENTITY USING index testpub_gencol_idx;
CREATE PUBLICATION pub_gencol FOR ALL TABLEs;
UPDATE testpub_gencol SET a = 2;

I expected the UPDATE to fail in above case as well.

Fixed

5.

+       else if (cmd == CMD_UPDATE && !pubdesc.replident_has_valid_gen_cols)
+               ereport(ERROR,
+                               (errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
+                                errmsg("cannot update table \"%s\"",
+                                               RelationGetRelationName(rel)),
+                                errdetail("REPLICA IDENTITY consists of an unpublished generated column.")));

I think it would be better to use lower case "replica identity" to consistent
with other existing messages.

Fixed

I have attached the updated patch here.

Few comments:
1) In the first check relation->rd_rel->relispartition also is checked
whereas in the below it is not checked, shouldn't the same check be
there below to avoid few of the function calls which are not required:
+       if (pubviaroot && relation->rd_rel->relispartition)
+       {
+               publish_as_relid =
GetTopMostAncestorInPublication(pubid, ancestors, NULL);
+
+               if (!OidIsValid(publish_as_relid))
+                       publish_as_relid = relid;
+       }
+
+                       if (pubviaroot)
+                       {
+                               /* attribute name in the child table */
+                               char       *colname =
get_attname(relid, attnum, false);
+
+                               /*
+                                * Determine the attnum for the
attribute name in parent (we
+                                * are using the column list defined
on the parent).
+                                */
+                               attnum = get_attnum(publish_as_relid, colname);
+                               attgenerated =
get_attgenerated(publish_as_relid, attnum);
+                       }
+                       else
+                               attgenerated = get_attgenerated(relid, attnum);

I have updated the if condititon

2) I think we could use check_and_fetch_column_list to see that it is
not a column list publication instead of below code:
+       if (!puballtables)
+       {
+               tuple = SearchSysCache2(PUBLICATIONRELMAP,
+
ObjectIdGetDatum(publish_as_relid),
+
ObjectIdGetDatum(pubid));
+
+               if (!HeapTupleIsValid(tuple))
+                       return false;
+
+               (void) SysCacheGetAttr(PUBLICATIONRELMAP, tuple,
+
Anum_pg_publication_rel_prattrs,
+                                                          &isnull);
+
+               ReleaseSysCache(tuple);
+       }
+
+       if(puballtables || isnull)

Yes we can use it. I have updated the patch.

3) Since there is only a single statement, remove the enclosing parenthisis:
+               if (!pubform->pubgencols &&
+                       (pubform->pubupdate || pubform->pubdelete) &&
+                       replident_has_unpublished_gen_col(pubid,
relation, ancestors,
+
pubform->pubviaroot, pubform->puballtables))
+               {
+                       pubdesc->replident_has_valid_gen_cols = false;
+               }

Fixed

4) Pgindent should be run there are few issues:
4.a)
+extern bool replident_has_unpublished_gen_col(Oid pubid, Relation relation,
+
List *ancestors, bool pubviaroot, bool
puballtables);
4.b)
+       }
+
+       if(puballtables || isnull)
+       {
+               int                     x;
+               Bitmapset  *idattrs = NULL;
4.c)
+                * generated column we should error out.
+                */
+               if(relation->rd_rel->relreplident == REPLICA_IDENTITY_FULL &&
+                  relation->rd_att->constr &&
relation->rd_att->constr->has_generated_stored)
+                       result = true;
4.d)
+               while ((x = bms_next_member(idattrs, x)) >= 0)
+               {
+                       AttrNumber      attnum = (x +
FirstLowInvalidHeapAttributeNumber);
+                       char attgenerated;

Fixed

5) You could do this in a single line comment:
+               /*
+                * Check if any REPLICA IDENTITY column is an generated column.
+                */
+               while ((x = bms_next_member(idattrs, x)) >= 0)

Fixed

6) I felt one of update or delete is enough in this case as the code
path is same:
+UPDATE testpub_gencol SET a = 100 WHERE a = 1;
+DELETE FROM testpub_gencol WHERE a = 100;
+
+-- error - generated column "b" is not published and REPLICA IDENTITY
is set FULL
+ALTER TABLE testpub_gencol REPLICA IDENTITY FULL;
+UPDATE testpub_gencol SET a = 100 WHERE a = 1;
+DELETE FROM testpub_gencol WHERE a = 100;
+DROP PUBLICATION pub_gencol;
+
+-- ok - generated column "b" is published and is part of REPLICA IDENTITY
+CREATE PUBLICATION pub_gencol FOR TABLE testpub_gencol with
(publish_generated_columns = true);
+UPDATE testpub_gencol SET a = 100 WHERE a = 1;
+DELETE FROM testpub_gencol WHERE a = 100;

Removed the 'DELETE' case.

I have addressed the comments and updated the patch.

Few comments:
1) Current patch will not handle this scenario where subset of columns
are specified in the replica identity index:
CREATE TABLE t1 (a INT not null, a1 int not null, a2 int not null, b
INT GENERATED ALWAYS AS (a + 1) STORED NOT NULL);
create unique index idx1_t1 on t1(a, a1);

-- Replica identity will have subset of table columns
alter table t1 replica identity using index idx1_t1 ;
insert into t1 values(1,1,1);
create publication pub1 for table t1;

postgres=# update t1 set a = 2;
UPDATE 1

I felt we should throw an error in this case too.

I feel the above behaviour is expected. I think we can specify a
subset of columns in the replica identity index as per documentation
[1]. Thoughts?

2) Instead of checking if replica identity has a generated column, can
we check if the columns that will be published and the columns in the
replica identity matches:
+                       if (pubviaroot && relation->rd_rel->relispartition)
+                       {
+                               /* attribute name in the child table */
+                               char       *colname =
get_attname(relid, attnum, false);
+
+                               /*
+                                * Determine the attnum for the
attribute name in parent (we
+                                * are using the column list defined
on the parent).
+                                */
+                               attnum = get_attnum(publish_as_relid, colname);
+                               attgenerated =
get_attgenerated(publish_as_relid, attnum);
+                       }
+                       else
+                               attgenerated = get_attgenerated(relid, attnum);

Fixed

3) publish_as_relid will be set accordingly based on pubviaroot, so it
need not be initialized:
+       Oid                     relid = RelationGetRelid(relation);
+       Oid                     publish_as_relid = RelationGetRelid(relation);
+       bool            result = false;

Fixed

I have addressed the comments and attached the updated patch.

Few comments:
1) I felt we can return from here after identifying it is replica
identity full instead of processing further:
+               /*
+                * REPLICA IDENTITY can be FULL only if there is no
column list for
+                * publication. If REPLICA IDENTITY is set as FULL and
relation has a
+                * generated column we should error out.
+                */
+               if (relation->rd_rel->relreplident == REPLICA_IDENTITY_FULL &&
+                       relation->rd_att->constr &&
+                       relation->rd_att->constr->has_generated_stored)
+                       result = true;
2) columns bms also should be freed here:
                     /* replica identity column, not covered by the
column list */
+                       if (!bms_is_member(attnum, columns))
+                       {
+                               result = true;
+                               break;
+                       }
+               }
+
+               bms_free(idattrs);
3) Error detail message should begin with upper case:
3.a)
@@ -809,6 +809,12 @@ CheckCmdReplicaIdentity(Relation rel, CmdType cmd)
                                 errmsg("cannot update table \"%s\"",
                                                RelationGetRelationName(rel)),
                                 errdetail("Column list used by the
publication does not cover the replica identity.")));
+       else if (cmd == CMD_UPDATE && !pubdesc.replident_has_valid_gen_cols)
+               ereport(ERROR,
+                               (errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
+                                errmsg("cannot update table \"%s\"",
+                                               RelationGetRelationName(rel)),
+                                errdetail("replica identity consists
of an unpublished generated column.")));
        else if (cmd == CMD_DELETE && !pubdesc.rf_valid_for_delete)
                ereport(ERROR,
                                (errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
3.b) Similarly here too:
                                 errdetail("Column list used by the
publication does not cover the replica identity.")));
+       else if (cmd == CMD_DELETE && !pubdesc.replident_has_valid_gen_cols)
+               ereport(ERROR,
+                               (errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
+                                errmsg("cannot delete from table \"%s\"",
+                                               RelationGetRelationName(rel)),
+                                errdetail("replica identity consists
of an unpublished generated column.")));

Regards,
Vignesh

#25Shlok Kyal
shlok.kyal.oss@gmail.com
In reply to: vignesh C (#24)
1 attachment(s)
Re: Disallow UPDATE/DELETE on table with unpublished generated column as REPLICA IDENTITY

On Fri, 15 Nov 2024 at 20:31, vignesh C <vignesh21@gmail.com> wrote:

On Fri, 15 Nov 2024 at 16:45, Shlok Kyal <shlok.kyal.oss@gmail.com> wrote:

Thanks for providing the comments

On Fri, 15 Nov 2024 at 10:59, vignesh C <vignesh21@gmail.com> wrote:

On Thu, 14 Nov 2024 at 15:51, Shlok Kyal <shlok.kyal.oss@gmail.com> wrote:

Thanks for providing the comments.

On Thu, 14 Nov 2024 at 12:22, vignesh C <vignesh21@gmail.com> wrote:

On Wed, 13 Nov 2024 at 11:15, Shlok Kyal <shlok.kyal.oss@gmail.com> wrote:

Thanks for providing the comments.

On Tue, 12 Nov 2024 at 12:52, Zhijie Hou (Fujitsu)
<houzj.fnst@fujitsu.com> wrote:

On Friday, November 8, 2024 7:06 PM Shlok Kyal <shlok.kyal.oss@gmail.com> wrote:

Hi Amit,

On Thu, 7 Nov 2024 at 11:37, Amit Kapila <amit.kapila16@gmail.com> wrote:

On Tue, Nov 5, 2024 at 12:53 PM Shlok Kyal <shlok.kyal.oss@gmail.com>

wrote:

To avoid the issue, we can disallow UPDATE/DELETE on table with
unpublished generated column as REPLICA IDENTITY. I have attached a
patch for the same.

+CREATE PUBLICATION pub_gencol FOR TABLE testpub_gencol; UPDATE
+testpub_gencol SET a = 100 WHERE a = 1;
+ERROR:  cannot update table "testpub_gencol"
+DETAIL:  Column list used by the publication does not cover the
replica identity.

This is not a correct ERROR message as the publication doesn't have
any column list associated with it. You have added the code to detect
this in the column list code path which I think is not required. BTW,
you also need to consider the latest commit 7054186c4e for this. I
guess you need to keep another flag in PublicationDesc to detect this
and then give an appropriate ERROR.

I have addressed the comments and provided an updated patch. Also, I am
currently working to fix this issue in back branches.

Thanks for the patch. I am reviewing it and have some initial comments:

1.
+                       char attgenerated = get_attgenerated(relid, attnum);
+

I think it's unnecessary to initialize attgenerated here because the value will
be overwritten if pubviaroot is true anyway. Also, the get_attgenerated()
is not cheap.

Fixed

2.

I think the patch missed to check the case when table is marked REPLICA
IDENTITY FULL, and generated column is not published:

CREATE TABLE testpub_gencol (a INT, b INT GENERATED ALWAYS AS (a + 1) STORED NOT NULL);
ALTER TABLE testpub_gencol REPLICA IDENTITY FULL;
CREATE PUBLICATION pub_gencol FOR TABLE testpub_gencol;
UPDATE testpub_gencol SET a = 2;

I expected the UPDATE to fail in above case, but it can still pass after applying the patch.

Fixed

3.

+                * If the publication is FOR ALL TABLES we can skip the validation.
+                */

This comment seems not clear to me, could you elaborate a bit more on this ?

I missed to handle the case FOR ALL TABLES. Have removed the comment.

4.

Also, I think the patch does not handle the FOR ALL TABLE case correctly:

CREATE TABLE testpub_gencol (a INT, b INT GENERATED ALWAYS AS (a + 1) STORED NOT NULL);
CREATE UNIQUE INDEX testpub_gencol_idx ON testpub_gencol (b);
ALTER TABLE testpub_gencol REPLICA IDENTITY USING index testpub_gencol_idx;
CREATE PUBLICATION pub_gencol FOR ALL TABLEs;
UPDATE testpub_gencol SET a = 2;

I expected the UPDATE to fail in above case as well.

Fixed

5.

+       else if (cmd == CMD_UPDATE && !pubdesc.replident_has_valid_gen_cols)
+               ereport(ERROR,
+                               (errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
+                                errmsg("cannot update table \"%s\"",
+                                               RelationGetRelationName(rel)),
+                                errdetail("REPLICA IDENTITY consists of an unpublished generated column.")));

I think it would be better to use lower case "replica identity" to consistent
with other existing messages.

Fixed

I have attached the updated patch here.

Few comments:
1) In the first check relation->rd_rel->relispartition also is checked
whereas in the below it is not checked, shouldn't the same check be
there below to avoid few of the function calls which are not required:
+       if (pubviaroot && relation->rd_rel->relispartition)
+       {
+               publish_as_relid =
GetTopMostAncestorInPublication(pubid, ancestors, NULL);
+
+               if (!OidIsValid(publish_as_relid))
+                       publish_as_relid = relid;
+       }
+
+                       if (pubviaroot)
+                       {
+                               /* attribute name in the child table */
+                               char       *colname =
get_attname(relid, attnum, false);
+
+                               /*
+                                * Determine the attnum for the
attribute name in parent (we
+                                * are using the column list defined
on the parent).
+                                */
+                               attnum = get_attnum(publish_as_relid, colname);
+                               attgenerated =
get_attgenerated(publish_as_relid, attnum);
+                       }
+                       else
+                               attgenerated = get_attgenerated(relid, attnum);

I have updated the if condititon

2) I think we could use check_and_fetch_column_list to see that it is
not a column list publication instead of below code:
+       if (!puballtables)
+       {
+               tuple = SearchSysCache2(PUBLICATIONRELMAP,
+
ObjectIdGetDatum(publish_as_relid),
+
ObjectIdGetDatum(pubid));
+
+               if (!HeapTupleIsValid(tuple))
+                       return false;
+
+               (void) SysCacheGetAttr(PUBLICATIONRELMAP, tuple,
+
Anum_pg_publication_rel_prattrs,
+                                                          &isnull);
+
+               ReleaseSysCache(tuple);
+       }
+
+       if(puballtables || isnull)

Yes we can use it. I have updated the patch.

3) Since there is only a single statement, remove the enclosing parenthisis:
+               if (!pubform->pubgencols &&
+                       (pubform->pubupdate || pubform->pubdelete) &&
+                       replident_has_unpublished_gen_col(pubid,
relation, ancestors,
+
pubform->pubviaroot, pubform->puballtables))
+               {
+                       pubdesc->replident_has_valid_gen_cols = false;
+               }

Fixed

4) Pgindent should be run there are few issues:
4.a)
+extern bool replident_has_unpublished_gen_col(Oid pubid, Relation relation,
+
List *ancestors, bool pubviaroot, bool
puballtables);
4.b)
+       }
+
+       if(puballtables || isnull)
+       {
+               int                     x;
+               Bitmapset  *idattrs = NULL;
4.c)
+                * generated column we should error out.
+                */
+               if(relation->rd_rel->relreplident == REPLICA_IDENTITY_FULL &&
+                  relation->rd_att->constr &&
relation->rd_att->constr->has_generated_stored)
+                       result = true;
4.d)
+               while ((x = bms_next_member(idattrs, x)) >= 0)
+               {
+                       AttrNumber      attnum = (x +
FirstLowInvalidHeapAttributeNumber);
+                       char attgenerated;

Fixed

5) You could do this in a single line comment:
+               /*
+                * Check if any REPLICA IDENTITY column is an generated column.
+                */
+               while ((x = bms_next_member(idattrs, x)) >= 0)

Fixed

6) I felt one of update or delete is enough in this case as the code
path is same:
+UPDATE testpub_gencol SET a = 100 WHERE a = 1;
+DELETE FROM testpub_gencol WHERE a = 100;
+
+-- error - generated column "b" is not published and REPLICA IDENTITY
is set FULL
+ALTER TABLE testpub_gencol REPLICA IDENTITY FULL;
+UPDATE testpub_gencol SET a = 100 WHERE a = 1;
+DELETE FROM testpub_gencol WHERE a = 100;
+DROP PUBLICATION pub_gencol;
+
+-- ok - generated column "b" is published and is part of REPLICA IDENTITY
+CREATE PUBLICATION pub_gencol FOR TABLE testpub_gencol with
(publish_generated_columns = true);
+UPDATE testpub_gencol SET a = 100 WHERE a = 1;
+DELETE FROM testpub_gencol WHERE a = 100;

Removed the 'DELETE' case.

I have addressed the comments and updated the patch.

Few comments:
1) Current patch will not handle this scenario where subset of columns
are specified in the replica identity index:
CREATE TABLE t1 (a INT not null, a1 int not null, a2 int not null, b
INT GENERATED ALWAYS AS (a + 1) STORED NOT NULL);
create unique index idx1_t1 on t1(a, a1);

-- Replica identity will have subset of table columns
alter table t1 replica identity using index idx1_t1 ;
insert into t1 values(1,1,1);
create publication pub1 for table t1;

postgres=# update t1 set a = 2;
UPDATE 1

I felt we should throw an error in this case too.

I feel the above behaviour is expected. I think we can specify a
subset of columns in the replica identity index as per documentation
[1]. Thoughts?

2) Instead of checking if replica identity has a generated column, can
we check if the columns that will be published and the columns in the
replica identity matches:
+                       if (pubviaroot && relation->rd_rel->relispartition)
+                       {
+                               /* attribute name in the child table */
+                               char       *colname =
get_attname(relid, attnum, false);
+
+                               /*
+                                * Determine the attnum for the
attribute name in parent (we
+                                * are using the column list defined
on the parent).
+                                */
+                               attnum = get_attnum(publish_as_relid, colname);
+                               attgenerated =
get_attgenerated(publish_as_relid, attnum);
+                       }
+                       else
+                               attgenerated = get_attgenerated(relid, attnum);

Fixed

3) publish_as_relid will be set accordingly based on pubviaroot, so it
need not be initialized:
+       Oid                     relid = RelationGetRelid(relation);
+       Oid                     publish_as_relid = RelationGetRelid(relation);
+       bool            result = false;

Fixed

I have addressed the comments and attached the updated patch.

Few comments:
1) I felt we can return from here after identifying it is replica
identity full instead of processing further:
+               /*
+                * REPLICA IDENTITY can be FULL only if there is no
column list for
+                * publication. If REPLICA IDENTITY is set as FULL and
relation has a
+                * generated column we should error out.
+                */
+               if (relation->rd_rel->relreplident == REPLICA_IDENTITY_FULL &&
+                       relation->rd_att->constr &&
+                       relation->rd_att->constr->has_generated_stored)
+                       result = true;
2) columns bms also should be freed here:
/* replica identity column, not covered by the
column list */
+                       if (!bms_is_member(attnum, columns))
+                       {
+                               result = true;
+                               break;
+                       }
+               }
+
+               bms_free(idattrs);
3) Error detail message should begin with upper case:
3.a)
@@ -809,6 +809,12 @@ CheckCmdReplicaIdentity(Relation rel, CmdType cmd)
errmsg("cannot update table \"%s\"",
RelationGetRelationName(rel)),
errdetail("Column list used by the
publication does not cover the replica identity.")));
+       else if (cmd == CMD_UPDATE && !pubdesc.replident_has_valid_gen_cols)
+               ereport(ERROR,
+                               (errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
+                                errmsg("cannot update table \"%s\"",
+                                               RelationGetRelationName(rel)),
+                                errdetail("replica identity consists
of an unpublished generated column.")));
else if (cmd == CMD_DELETE && !pubdesc.rf_valid_for_delete)
ereport(ERROR,
(errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
3.b) Similarly here too:
errdetail("Column list used by the
publication does not cover the replica identity.")));
+       else if (cmd == CMD_DELETE && !pubdesc.replident_has_valid_gen_cols)
+               ereport(ERROR,
+                               (errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
+                                errmsg("cannot delete from table \"%s\"",
+                                               RelationGetRelationName(rel)),
+                                errdetail("replica identity consists
of an unpublished generated column.")));

Thanks for providing the comments. I have fixed all the comments and
attached the updated patch.

Thanks and Regards,
Shlok Kyal

Attachments:

v6-0001-Disallow-UPDATE-DELETE-on-table-with-generated-co.patchapplication/octet-stream; name=v6-0001-Disallow-UPDATE-DELETE-on-table-with-generated-co.patchDownload
From 6a59b708494ad8e669a22714f87f41fe08f5cb58 Mon Sep 17 00:00:00 2001
From: Shlok Kyal <shlok.kyal.oss@gmail.com>
Date: Tue, 5 Nov 2024 11:11:25 +0530
Subject: [PATCH v6] Disallow UPDATE/DELETE on table with generated column as
 REPLICA IDENTITY

UPDATE/DELETE on table having unpublished generated column as REPLICA
IDENTITY is allowed. UPDATE/DELETE on such tables should not be allowed
---
 src/backend/commands/publicationcmds.c    | 89 +++++++++++++++++++++++
 src/backend/executor/execReplication.c    | 12 +++
 src/backend/utils/cache/relcache.c        | 12 +++
 src/include/catalog/pg_publication.h      |  6 ++
 src/include/commands/publicationcmds.h    |  2 +
 src/test/regress/expected/publication.out | 25 +++++++
 src/test/regress/sql/publication.sql      | 26 +++++++
 src/test/subscription/t/100_bugs.pl       | 16 +---
 8 files changed, 175 insertions(+), 13 deletions(-)

diff --git a/src/backend/commands/publicationcmds.c b/src/backend/commands/publicationcmds.c
index 0129db18c6..05debf6d79 100644
--- a/src/backend/commands/publicationcmds.c
+++ b/src/backend/commands/publicationcmds.c
@@ -444,6 +444,95 @@ pub_collist_contains_invalid_column(Oid pubid, Relation relation, List *ancestor
 	return result;
 }
 
+/*
+ * Check if REPLICA IDENTITY consists of any unpublished generated column.
+ *
+ * Returns true if any replica identity column is an unpublished generated
+ * column.
+ */
+bool
+replident_has_unpublished_gen_col(Oid pubid, Relation relation, List *ancestors,
+								  bool pubviaroot)
+{
+	Oid			relid = RelationGetRelid(relation);
+	Oid			publish_as_relid;
+	bool		result = false;
+	bool		found;
+	Publication *pub;
+
+	/*
+	 * For a partition, if pubviaroot is true, find the topmost ancestor that
+	 * is published via this publication as we need to use its column list for
+	 * the changes.
+	 *
+	 * Note that even though the column list used is for an ancestor, the
+	 * REPLICA IDENTITY used will be for the actual child table.
+	 */
+	if (pubviaroot && relation->rd_rel->relispartition)
+	{
+		publish_as_relid = GetTopMostAncestorInPublication(pubid, ancestors, NULL);
+
+		if (!OidIsValid(publish_as_relid))
+			publish_as_relid = relid;
+	}
+	else
+		publish_as_relid = relid;
+
+	pub = GetPublication(pubid);
+	found = check_and_fetch_column_list(pub, publish_as_relid, NULL, NULL);
+
+	if (!found)
+	{
+		int			x;
+		Bitmapset  *idattrs = NULL;
+		Bitmapset  *columns = NULL;
+
+		/*
+		 * REPLICA IDENTITY can be FULL only if there is no column list for
+		 * publication. If REPLICA IDENTITY is set as FULL and relation has a
+		 * generated column we should error out.
+		 */
+		if (relation->rd_rel->relreplident == REPLICA_IDENTITY_FULL &&
+			relation->rd_att->constr &&
+			relation->rd_att->constr->has_generated_stored)
+			return true;
+
+		idattrs = RelationGetIndexAttrBitmap(relation,
+											 INDEX_ATTR_BITMAP_IDENTITY_KEY);
+
+		/*
+		 * Bitmap of published columns when publish_generated_columns is
+		 * 'false' and no column list is specified.
+		 */
+		columns = pub_form_cols_map(relation, false);
+
+		/*
+		 * Attnums in the bitmap returned by RelationGetIndexAttrBitmap are
+		 * offset (to handle system columns the usual way), while column list
+		 * does not use offset, so we can't do bms_is_subset(). Instead, we
+		 * have to loop over the idattrs and check all of them are in the
+		 * list.
+		 */
+		x = -1;
+		while ((x = bms_next_member(idattrs, x)) >= 0)
+		{
+			AttrNumber	attnum = (x + FirstLowInvalidHeapAttributeNumber);
+
+			/* replica identity column, not covered by the column list */
+			if (!bms_is_member(attnum, columns))
+			{
+				result = true;
+				break;
+			}
+		}
+
+		bms_free(idattrs);
+		bms_free(columns);
+	}
+
+	return result;
+}
+
 /* check_functions_in_node callback */
 static bool
 contain_mutable_or_user_functions_checker(Oid func_id, void *context)
diff --git a/src/backend/executor/execReplication.c b/src/backend/executor/execReplication.c
index 54025c9f15..6e42de104f 100644
--- a/src/backend/executor/execReplication.c
+++ b/src/backend/executor/execReplication.c
@@ -809,6 +809,12 @@ CheckCmdReplicaIdentity(Relation rel, CmdType cmd)
 				 errmsg("cannot update table \"%s\"",
 						RelationGetRelationName(rel)),
 				 errdetail("Column list used by the publication does not cover the replica identity.")));
+	else if (cmd == CMD_UPDATE && !pubdesc.replident_has_valid_gen_cols)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
+				 errmsg("cannot update table \"%s\"",
+						RelationGetRelationName(rel)),
+				 errdetail("Replica identity consists of an unpublished generated column.")));
 	else if (cmd == CMD_DELETE && !pubdesc.rf_valid_for_delete)
 		ereport(ERROR,
 				(errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
@@ -821,6 +827,12 @@ CheckCmdReplicaIdentity(Relation rel, CmdType cmd)
 				 errmsg("cannot delete from table \"%s\"",
 						RelationGetRelationName(rel)),
 				 errdetail("Column list used by the publication does not cover the replica identity.")));
+	else if (cmd == CMD_DELETE && !pubdesc.replident_has_valid_gen_cols)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
+				 errmsg("cannot delete from table \"%s\"",
+						RelationGetRelationName(rel)),
+				 errdetail("Replica identity consists of an unpublished generated column.")));
 
 	/* If relation has replica identity we are always good. */
 	if (OidIsValid(RelationGetReplicaIndex(rel)))
diff --git a/src/backend/utils/cache/relcache.c b/src/backend/utils/cache/relcache.c
index 342467fd18..02d7fba160 100644
--- a/src/backend/utils/cache/relcache.c
+++ b/src/backend/utils/cache/relcache.c
@@ -5736,6 +5736,7 @@ RelationBuildPublicationDesc(Relation relation, PublicationDesc *pubdesc)
 		pubdesc->rf_valid_for_delete = true;
 		pubdesc->cols_valid_for_update = true;
 		pubdesc->cols_valid_for_delete = true;
+		pubdesc->replident_has_valid_gen_cols = true;
 		return;
 	}
 
@@ -5750,6 +5751,7 @@ RelationBuildPublicationDesc(Relation relation, PublicationDesc *pubdesc)
 	pubdesc->rf_valid_for_delete = true;
 	pubdesc->cols_valid_for_update = true;
 	pubdesc->cols_valid_for_delete = true;
+	pubdesc->replident_has_valid_gen_cols = true;
 
 	/* Fetch the publication membership info. */
 	puboids = GetRelationPublications(relid);
@@ -5827,6 +5829,16 @@ RelationBuildPublicationDesc(Relation relation, PublicationDesc *pubdesc)
 				pubdesc->cols_valid_for_delete = false;
 		}
 
+		/*
+		 * Check if all columns which are part of the REPLICA IDENTITY is
+		 * published.
+		 */
+		if (!pubform->pubgencols &&
+			(pubform->pubupdate || pubform->pubdelete) &&
+			replident_has_unpublished_gen_col(pubid, relation, ancestors,
+											  pubform->pubviaroot))
+			pubdesc->replident_has_valid_gen_cols = false;
+
 		ReleaseSysCache(tup);
 
 		/*
diff --git a/src/include/catalog/pg_publication.h b/src/include/catalog/pg_publication.h
index 9a83a72d6b..5eeeced91d 100644
--- a/src/include/catalog/pg_publication.h
+++ b/src/include/catalog/pg_publication.h
@@ -98,6 +98,12 @@ typedef struct PublicationDesc
 	 */
 	bool		cols_valid_for_update;
 	bool		cols_valid_for_delete;
+
+	/*
+	 * true if all generated columns which are part of replica identity are
+	 * published.
+	 */
+	bool		replident_has_valid_gen_cols;
 } PublicationDesc;
 
 typedef struct Publication
diff --git a/src/include/commands/publicationcmds.h b/src/include/commands/publicationcmds.h
index 5487c571f6..b18e576b77 100644
--- a/src/include/commands/publicationcmds.h
+++ b/src/include/commands/publicationcmds.h
@@ -35,5 +35,7 @@ extern bool pub_rf_contains_invalid_column(Oid pubid, Relation relation,
 										   List *ancestors, bool pubviaroot);
 extern bool pub_collist_contains_invalid_column(Oid pubid, Relation relation,
 												List *ancestors, bool pubviaroot);
+extern bool replident_has_unpublished_gen_col(Oid pubid, Relation relation,
+											  List *ancestors, bool pubviaroot);
 
 #endif							/* PUBLICATIONCMDS_H */
diff --git a/src/test/regress/expected/publication.out b/src/test/regress/expected/publication.out
index 5de2d64d01..caf6e31554 100644
--- a/src/test/regress/expected/publication.out
+++ b/src/test/regress/expected/publication.out
@@ -672,6 +672,31 @@ DROP TABLE rf_tbl_abcd_pk;
 DROP TABLE rf_tbl_abcd_nopk;
 DROP TABLE rf_tbl_abcd_part_pk;
 -- ======================================================
+-- ======================================================
+-- test with generated column
+SET client_min_messages = 'ERROR';
+CREATE TABLE testpub_gencol (a INT, b INT GENERATED ALWAYS AS (a + 1) STORED NOT NULL);
+CREATE UNIQUE INDEX testpub_gencol_idx ON testpub_gencol (b);
+ALTER TABLE testpub_gencol REPLICA IDENTITY USING index testpub_gencol_idx;
+-- error - generated column "b" is not published but part of index set as REPLICA IDENTITY
+CREATE PUBLICATION pub_gencol FOR TABLE testpub_gencol;
+UPDATE testpub_gencol SET a = 100 WHERE a = 1;
+ERROR:  cannot update table "testpub_gencol"
+DETAIL:  Replica identity consists of an unpublished generated column.
+-- error - generated column "b" is not published and REPLICA IDENTITY is set FULL
+ALTER TABLE testpub_gencol REPLICA IDENTITY FULL;
+UPDATE testpub_gencol SET a = 100 WHERE a = 1;
+ERROR:  cannot update table "testpub_gencol"
+DETAIL:  Replica identity consists of an unpublished generated column.
+DROP PUBLICATION pub_gencol;
+-- ok - generated column "b" is published and is part of REPLICA IDENTITY
+CREATE PUBLICATION pub_gencol FOR TABLE testpub_gencol with (publish_generated_columns = true);
+UPDATE testpub_gencol SET a = 100 WHERE a = 1;
+DROP PUBLICATION pub_gencol;
+DROP INDEX testpub_gencol_idx;
+DROP TABLE testpub_gencol;
+RESET client_min_messages;
+-- ======================================================
 -- fail - duplicate tables are not allowed if that table has any column lists
 SET client_min_messages = 'ERROR';
 CREATE PUBLICATION testpub_dups FOR TABLE testpub_tbl1 (a), testpub_tbl1 WITH (publish = 'insert');
diff --git a/src/test/regress/sql/publication.sql b/src/test/regress/sql/publication.sql
index 48e68bcca2..3b1c661440 100644
--- a/src/test/regress/sql/publication.sql
+++ b/src/test/regress/sql/publication.sql
@@ -396,6 +396,32 @@ DROP TABLE rf_tbl_abcd_nopk;
 DROP TABLE rf_tbl_abcd_part_pk;
 -- ======================================================
 
+-- ======================================================
+-- test with generated column
+SET client_min_messages = 'ERROR';
+CREATE TABLE testpub_gencol (a INT, b INT GENERATED ALWAYS AS (a + 1) STORED NOT NULL);
+CREATE UNIQUE INDEX testpub_gencol_idx ON testpub_gencol (b);
+ALTER TABLE testpub_gencol REPLICA IDENTITY USING index testpub_gencol_idx;
+
+-- error - generated column "b" is not published but part of index set as REPLICA IDENTITY
+CREATE PUBLICATION pub_gencol FOR TABLE testpub_gencol;
+UPDATE testpub_gencol SET a = 100 WHERE a = 1;
+
+-- error - generated column "b" is not published and REPLICA IDENTITY is set FULL
+ALTER TABLE testpub_gencol REPLICA IDENTITY FULL;
+UPDATE testpub_gencol SET a = 100 WHERE a = 1;
+DROP PUBLICATION pub_gencol;
+
+-- ok - generated column "b" is published and is part of REPLICA IDENTITY
+CREATE PUBLICATION pub_gencol FOR TABLE testpub_gencol with (publish_generated_columns = true);
+UPDATE testpub_gencol SET a = 100 WHERE a = 1;
+DROP PUBLICATION pub_gencol;
+
+DROP INDEX testpub_gencol_idx;
+DROP TABLE testpub_gencol;
+RESET client_min_messages;
+-- ======================================================
+
 -- fail - duplicate tables are not allowed if that table has any column lists
 SET client_min_messages = 'ERROR';
 CREATE PUBLICATION testpub_dups FOR TABLE testpub_tbl1 (a), testpub_tbl1 WITH (publish = 'insert');
diff --git a/src/test/subscription/t/100_bugs.pl b/src/test/subscription/t/100_bugs.pl
index cb36ca7b16..794b928f50 100644
--- a/src/test/subscription/t/100_bugs.pl
+++ b/src/test/subscription/t/100_bugs.pl
@@ -377,8 +377,8 @@ $node_publisher->safe_psql('postgres', "DROP PUBLICATION tap_pub_sch");
 $node_publisher->stop('fast');
 $node_subscriber->stop('fast');
 
-# The bug was that when the REPLICA IDENTITY FULL is used with dropped or
-# generated columns, we fail to apply updates and deletes
+# The bug was that when the REPLICA IDENTITY FULL is used with dropped
+# we fail to apply updates and deletes
 $node_publisher->rotate_logfile();
 $node_publisher->start();
 
@@ -389,18 +389,14 @@ $node_publisher->safe_psql(
 	'postgres', qq(
 	CREATE TABLE dropped_cols (a int, b_drop int, c int);
 	ALTER TABLE dropped_cols REPLICA IDENTITY FULL;
-	CREATE TABLE generated_cols (a int, b_gen int GENERATED ALWAYS AS (5 * a) STORED, c int);
-	ALTER TABLE generated_cols REPLICA IDENTITY FULL;
-	CREATE PUBLICATION pub_dropped_cols FOR TABLE dropped_cols, generated_cols;
+	CREATE PUBLICATION pub_dropped_cols FOR TABLE dropped_cols;
 	-- some initial data
 	INSERT INTO dropped_cols VALUES (1, 1, 1);
-	INSERT INTO generated_cols (a, c) VALUES (1, 1);
 ));
 
 $node_subscriber->safe_psql(
 	'postgres', qq(
 	 CREATE TABLE dropped_cols (a int, b_drop int, c int);
-	 CREATE TABLE generated_cols (a int, b_gen int GENERATED ALWAYS AS (5 * a) STORED, c int);
 ));
 
 $publisher_connstr = $node_publisher->connstr . ' dbname=postgres';
@@ -421,7 +417,6 @@ $node_subscriber->safe_psql(
 $node_publisher->safe_psql(
 	'postgres', qq(
 		UPDATE dropped_cols SET a = 100;
-		UPDATE generated_cols SET a = 100;
 ));
 $node_publisher->wait_for_catchup('sub_dropped_cols');
 
@@ -430,11 +425,6 @@ is( $node_subscriber->safe_psql(
 	qq(1),
 	'replication with RI FULL and dropped columns');
 
-is( $node_subscriber->safe_psql(
-		'postgres', "SELECT count(*) FROM generated_cols WHERE a = 100"),
-	qq(1),
-	'replication with RI FULL and generated columns');
-
 $node_publisher->stop('fast');
 $node_subscriber->stop('fast');
 
-- 
2.34.1

#26vignesh C
vignesh21@gmail.com
In reply to: Shlok Kyal (#25)
Re: Disallow UPDATE/DELETE on table with unpublished generated column as REPLICA IDENTITY

On Sat, 16 Nov 2024 at 00:10, Shlok Kyal <shlok.kyal.oss@gmail.com> wrote:

Thanks for providing the comments. I have fixed all the comments and
attached the updated patch.

Few comments:
1) The replident_has_valid_gen_cols flag is set when either an update
or delete operation is published by the publication.
+               /*
+                * Check if all columns which are part of the REPLICA
IDENTITY is
+                * published.
+                */
+               if (!pubform->pubgencols &&
+                       (pubform->pubupdate || pubform->pubdelete) &&
+                       replident_has_unpublished_gen_col(pubid,
relation, ancestors,
+
                   pubform->pubviaroot))
+                       pubdesc->replident_has_valid_gen_cols = false;

You should create two separate flags—one for updates and one for
deletes—and set them accordingly, based on the operation being
published. This is similar to how the cols_valid_for_update and
cols_valid_for_delete flags are handled for column lists.

As shown in the test below, the delete operation fails even though the
delete operation is not published by the pub_gencol publication:
CREATE TABLE testpub_gencol (a INT, b INT GENERATED ALWAYS AS (a + 1)
STORED NOT NULL);
CREATE UNIQUE INDEX testpub_gencol_idx ON testpub_gencol (b);
ALTER TABLE testpub_gencol REPLICA IDENTITY USING index testpub_gencol_idx;

CREATE PUBLICATION pub_gencol FOR TABLE testpub_gencol with ( PUBLISH
= 'update');

-- This should be successful, since the publication is not publishing delete:
postgres=# delete from testpub_gencol ;
ERROR: cannot delete from table "testpub_gencol"
DETAIL: Replica identity consists of an unpublished generated column.

2) Since the code in replident_has_unpublished_gen_col and
pub_collist_contains_invalid_column is largely identical, we can
consolidate them into a single function that handles both column lists
and relation columns. The function name, header comments, and internal
comments should be updated accordingly.

Regards,
Vignesh

#27Zhijie Hou (Fujitsu)
houzj.fnst@fujitsu.com
In reply to: Shlok Kyal (#25)
RE: Disallow UPDATE/DELETE on table with unpublished generated column as REPLICA IDENTITY

On Saturday, November 16, 2024 2:41 AM Shlok Kyal <shlok.kyal.oss@gmail.com> wrote:

Thanks for providing the comments. I have fixed all the comments and attached
the updated patch.

Thanks for the patch. I have one comment for the following codes:

+		/*
+		 * Bitmap of published columns when publish_generated_columns is
+		 * 'false' and no column list is specified.
+		 */
+		columns = pub_form_cols_map(relation, false);
+
+		/*
+		 * Attnums in the bitmap returned by RelationGetIndexAttrBitmap are
+		 * offset (to handle system columns the usual way), while column list
+		 * does not use offset, so we can't do bms_is_subset(). Instead, we
+		 * have to loop over the idattrs and check all of them are in the
+		 * list.
+		 */
+		x = -1;
+		while ((x = bms_next_member(idattrs, x)) >= 0)
+		{
...
+		}

It doesn't seem necessary to build a bitmap and then iterator the replica
identity bitmap. Instead, we can efficiently traverse the columns as follows:

for (int i = 0; i < desc->natts; i++)
{
Form_pg_attribute att = TupleDescAttr(desc, i);

if (!att->attisdropped && att->attgenerated &&
bms_is_member(att->attnum - FirstLowInvalidHeapAttributeNumber,
idattrs))
{
result = true;
break;
}
}

Best Regards,
Hou zj

#28Shlok Kyal
shlok.kyal.oss@gmail.com
In reply to: vignesh C (#26)
1 attachment(s)
Re: Disallow UPDATE/DELETE on table with unpublished generated column as REPLICA IDENTITY

Thanks for providing the comments.

On Sat, 16 Nov 2024 at 17:29, vignesh C <vignesh21@gmail.com> wrote:

On Sat, 16 Nov 2024 at 00:10, Shlok Kyal <shlok.kyal.oss@gmail.com> wrote:

Thanks for providing the comments. I have fixed all the comments and
attached the updated patch.

Few comments:
1) The replident_has_valid_gen_cols flag is set when either an update
or delete operation is published by the publication.
+               /*
+                * Check if all columns which are part of the REPLICA
IDENTITY is
+                * published.
+                */
+               if (!pubform->pubgencols &&
+                       (pubform->pubupdate || pubform->pubdelete) &&
+                       replident_has_unpublished_gen_col(pubid,
relation, ancestors,
+
pubform->pubviaroot))
+                       pubdesc->replident_has_valid_gen_cols = false;

You should create two separate flags—one for updates and one for
deletes—and set them accordingly, based on the operation being
published. This is similar to how the cols_valid_for_update and
cols_valid_for_delete flags are handled for column lists.

As shown in the test below, the delete operation fails even though the
delete operation is not published by the pub_gencol publication:
CREATE TABLE testpub_gencol (a INT, b INT GENERATED ALWAYS AS (a + 1)
STORED NOT NULL);
CREATE UNIQUE INDEX testpub_gencol_idx ON testpub_gencol (b);
ALTER TABLE testpub_gencol REPLICA IDENTITY USING index testpub_gencol_idx;

CREATE PUBLICATION pub_gencol FOR TABLE testpub_gencol with ( PUBLISH
= 'update');

-- This should be successful, since the publication is not publishing delete:
postgres=# delete from testpub_gencol ;
ERROR: cannot delete from table "testpub_gencol"
DETAIL: Replica identity consists of an unpublished generated column.

I have fixed the issue.

2) Since the code in replident_has_unpublished_gen_col and
pub_collist_contains_invalid_column is largely identical, we can
consolidate them into a single function that handles both column lists
and relation columns. The function name, header comments, and internal
comments should be updated accordingly.

I tried to merge both functions but it required extra checks. I think
it would make the patch a little complicated to review.

I have attached the updated version of the patch.

Thanks and Regards,
Shlok Kyal

Attachments:

v7-0001-Disallow-UPDATE-DELETE-on-table-with-generated-co.patchapplication/octet-stream; name=v7-0001-Disallow-UPDATE-DELETE-on-table-with-generated-co.patchDownload
From f9b85b8f376456205d6e410ee08d3e083bff4fed Mon Sep 17 00:00:00 2001
From: Shlok Kyal <shlok.kyal.oss@gmail.com>
Date: Tue, 5 Nov 2024 11:11:25 +0530
Subject: [PATCH v7] Disallow UPDATE/DELETE on table with generated column as
 REPLICA IDENTITY

UPDATE/DELETE on table having unpublished generated column as REPLICA
IDENTITY is allowed. UPDATE/DELETE on such tables should not be allowed
---
 src/backend/commands/publicationcmds.c    | 76 +++++++++++++++++++++++
 src/backend/executor/execReplication.c    | 12 ++++
 src/backend/utils/cache/relcache.c        | 19 ++++++
 src/include/catalog/pg_publication.h      |  7 +++
 src/include/commands/publicationcmds.h    |  2 +
 src/test/regress/expected/publication.out | 25 ++++++++
 src/test/regress/sql/publication.sql      | 26 ++++++++
 src/test/subscription/t/100_bugs.pl       | 16 +----
 8 files changed, 170 insertions(+), 13 deletions(-)

diff --git a/src/backend/commands/publicationcmds.c b/src/backend/commands/publicationcmds.c
index 0129db18c6..84abcef208 100644
--- a/src/backend/commands/publicationcmds.c
+++ b/src/backend/commands/publicationcmds.c
@@ -444,6 +444,82 @@ pub_collist_contains_invalid_column(Oid pubid, Relation relation, List *ancestor
 	return result;
 }
 
+/*
+ * Check if REPLICA IDENTITY consists of any unpublished generated column.
+ *
+ * Returns true if any replica identity column is an unpublished generated
+ * column.
+ */
+bool
+replident_has_unpublished_gen_col(Oid pubid, Relation relation, List *ancestors,
+								  bool pubviaroot)
+{
+	Oid			relid = RelationGetRelid(relation);
+	Oid			publish_as_relid;
+	bool		result = false;
+	bool		found;
+	Publication *pub;
+
+	/*
+	 * For a partition, if pubviaroot is true, find the topmost ancestor that
+	 * is published via this publication as we need to use its column list for
+	 * the changes.
+	 *
+	 * Note that even though the column list used is for an ancestor, the
+	 * REPLICA IDENTITY used will be for the actual child table.
+	 */
+	if (pubviaroot && relation->rd_rel->relispartition)
+	{
+		publish_as_relid = GetTopMostAncestorInPublication(pubid, ancestors, NULL);
+
+		if (!OidIsValid(publish_as_relid))
+			publish_as_relid = relid;
+	}
+	else
+		publish_as_relid = relid;
+
+	pub = GetPublication(pubid);
+	found = check_and_fetch_column_list(pub, publish_as_relid, NULL, NULL);
+
+	if (!found)
+	{
+		int			x;
+		Bitmapset  *idattrs = NULL;
+		TupleDesc	desc = RelationGetDescr(relation);
+
+		/*
+		 * REPLICA IDENTITY can be FULL only if there is no column list for
+		 * publication. If REPLICA IDENTITY is set as FULL and relation has a
+		 * generated column we should error out.
+		 */
+		if (relation->rd_rel->relreplident == REPLICA_IDENTITY_FULL &&
+			relation->rd_att->constr &&
+			relation->rd_att->constr->has_generated_stored)
+			return true;
+
+		idattrs = RelationGetIndexAttrBitmap(relation,
+											 INDEX_ATTR_BITMAP_IDENTITY_KEY);
+
+		for (int i = 0; i < desc->natts; i++)
+		{
+			Form_pg_attribute att = TupleDescAttr(desc, i);
+
+			/* check if generated column is part of REPLICA IDENTITY */
+			if (!att->attisdropped && att->attgenerated &&
+				bms_is_member(att->attnum - FirstLowInvalidHeapAttributeNumber,
+							  idattrs))
+			{
+				result = true;
+				break;
+			}
+		}
+
+		bms_free(idattrs);
+	}
+
+	return result;
+}
+
 /* check_functions_in_node callback */
 static bool
 contain_mutable_or_user_functions_checker(Oid func_id, void *context)
diff --git a/src/backend/executor/execReplication.c b/src/backend/executor/execReplication.c
index 54025c9f15..57599df153 100644
--- a/src/backend/executor/execReplication.c
+++ b/src/backend/executor/execReplication.c
@@ -809,6 +809,12 @@ CheckCmdReplicaIdentity(Relation rel, CmdType cmd)
 				 errmsg("cannot update table \"%s\"",
 						RelationGetRelationName(rel)),
 				 errdetail("Column list used by the publication does not cover the replica identity.")));
+	else if (cmd == CMD_UPDATE && !pubdesc.replident_valid_for_update)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
+				 errmsg("cannot update table \"%s\"",
+						RelationGetRelationName(rel)),
+				 errdetail("Replica identity consists of an unpublished generated column.")));
 	else if (cmd == CMD_DELETE && !pubdesc.rf_valid_for_delete)
 		ereport(ERROR,
 				(errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
@@ -821,6 +827,12 @@ CheckCmdReplicaIdentity(Relation rel, CmdType cmd)
 				 errmsg("cannot delete from table \"%s\"",
 						RelationGetRelationName(rel)),
 				 errdetail("Column list used by the publication does not cover the replica identity.")));
+	else if (cmd == CMD_DELETE && !pubdesc.replident_valid_for_delete)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
+				 errmsg("cannot delete from table \"%s\"",
+						RelationGetRelationName(rel)),
+				 errdetail("Replica identity consists of an unpublished generated column.")));
 
 	/* If relation has replica identity we are always good. */
 	if (OidIsValid(RelationGetReplicaIndex(rel)))
diff --git a/src/backend/utils/cache/relcache.c b/src/backend/utils/cache/relcache.c
index 342467fd18..ef3203a682 100644
--- a/src/backend/utils/cache/relcache.c
+++ b/src/backend/utils/cache/relcache.c
@@ -5736,6 +5736,8 @@ RelationBuildPublicationDesc(Relation relation, PublicationDesc *pubdesc)
 		pubdesc->rf_valid_for_delete = true;
 		pubdesc->cols_valid_for_update = true;
 		pubdesc->cols_valid_for_delete = true;
+		pubdesc->replident_valid_for_update = true;
+		pubdesc->replident_valid_for_delete = true;
 		return;
 	}
 
@@ -5750,6 +5752,8 @@ RelationBuildPublicationDesc(Relation relation, PublicationDesc *pubdesc)
 	pubdesc->rf_valid_for_delete = true;
 	pubdesc->cols_valid_for_update = true;
 	pubdesc->cols_valid_for_delete = true;
+	pubdesc->replident_valid_for_update = true;
+	pubdesc->replident_valid_for_delete = true;
 
 	/* Fetch the publication membership info. */
 	puboids = GetRelationPublications(relid);
@@ -5827,6 +5831,21 @@ RelationBuildPublicationDesc(Relation relation, PublicationDesc *pubdesc)
 				pubdesc->cols_valid_for_delete = false;
 		}
 
+		/*
+		 * Check if all columns which are part of the REPLICA IDENTITY is
+		 * published.
+		 */
+		if (!pubform->pubgencols &&
+			(pubform->pubupdate || pubform->pubdelete) &&
+			replident_has_unpublished_gen_col(pubid, relation, ancestors,
+											  pubform->pubviaroot))
+		{
+			if (pubform->pubupdate)
+				pubdesc->replident_valid_for_update = false;
+			if (pubform->pubdelete)
+				pubdesc->replident_valid_for_delete = false;
+		}
+
 		ReleaseSysCache(tup);
 
 		/*
diff --git a/src/include/catalog/pg_publication.h b/src/include/catalog/pg_publication.h
index 9a83a72d6b..486f609a9a 100644
--- a/src/include/catalog/pg_publication.h
+++ b/src/include/catalog/pg_publication.h
@@ -98,6 +98,13 @@ typedef struct PublicationDesc
 	 */
 	bool		cols_valid_for_update;
 	bool		cols_valid_for_delete;
+
+	/*
+	 * true if all generated columns which are part of replica identity are
+	 * published or the publication actions do not include UPDATE or DELETE.
+	 */
+	bool		replident_valid_for_update;
+	bool		replident_valid_for_delete;
 } PublicationDesc;
 
 typedef struct Publication
diff --git a/src/include/commands/publicationcmds.h b/src/include/commands/publicationcmds.h
index 5487c571f6..b18e576b77 100644
--- a/src/include/commands/publicationcmds.h
+++ b/src/include/commands/publicationcmds.h
@@ -35,5 +35,7 @@ extern bool pub_rf_contains_invalid_column(Oid pubid, Relation relation,
 										   List *ancestors, bool pubviaroot);
 extern bool pub_collist_contains_invalid_column(Oid pubid, Relation relation,
 												List *ancestors, bool pubviaroot);
+extern bool replident_has_unpublished_gen_col(Oid pubid, Relation relation,
+											  List *ancestors, bool pubviaroot);
 
 #endif							/* PUBLICATIONCMDS_H */
diff --git a/src/test/regress/expected/publication.out b/src/test/regress/expected/publication.out
index 5de2d64d01..caf6e31554 100644
--- a/src/test/regress/expected/publication.out
+++ b/src/test/regress/expected/publication.out
@@ -672,6 +672,31 @@ DROP TABLE rf_tbl_abcd_pk;
 DROP TABLE rf_tbl_abcd_nopk;
 DROP TABLE rf_tbl_abcd_part_pk;
 -- ======================================================
+-- ======================================================
+-- test with generated column
+SET client_min_messages = 'ERROR';
+CREATE TABLE testpub_gencol (a INT, b INT GENERATED ALWAYS AS (a + 1) STORED NOT NULL);
+CREATE UNIQUE INDEX testpub_gencol_idx ON testpub_gencol (b);
+ALTER TABLE testpub_gencol REPLICA IDENTITY USING index testpub_gencol_idx;
+-- error - generated column "b" is not published but part of index set as REPLICA IDENTITY
+CREATE PUBLICATION pub_gencol FOR TABLE testpub_gencol;
+UPDATE testpub_gencol SET a = 100 WHERE a = 1;
+ERROR:  cannot update table "testpub_gencol"
+DETAIL:  Replica identity consists of an unpublished generated column.
+-- error - generated column "b" is not published and REPLICA IDENTITY is set FULL
+ALTER TABLE testpub_gencol REPLICA IDENTITY FULL;
+UPDATE testpub_gencol SET a = 100 WHERE a = 1;
+ERROR:  cannot update table "testpub_gencol"
+DETAIL:  Replica identity consists of an unpublished generated column.
+DROP PUBLICATION pub_gencol;
+-- ok - generated column "b" is published and is part of REPLICA IDENTITY
+CREATE PUBLICATION pub_gencol FOR TABLE testpub_gencol with (publish_generated_columns = true);
+UPDATE testpub_gencol SET a = 100 WHERE a = 1;
+DROP PUBLICATION pub_gencol;
+DROP INDEX testpub_gencol_idx;
+DROP TABLE testpub_gencol;
+RESET client_min_messages;
+-- ======================================================
 -- fail - duplicate tables are not allowed if that table has any column lists
 SET client_min_messages = 'ERROR';
 CREATE PUBLICATION testpub_dups FOR TABLE testpub_tbl1 (a), testpub_tbl1 WITH (publish = 'insert');
diff --git a/src/test/regress/sql/publication.sql b/src/test/regress/sql/publication.sql
index 48e68bcca2..3b1c661440 100644
--- a/src/test/regress/sql/publication.sql
+++ b/src/test/regress/sql/publication.sql
@@ -396,6 +396,32 @@ DROP TABLE rf_tbl_abcd_nopk;
 DROP TABLE rf_tbl_abcd_part_pk;
 -- ======================================================
 
+-- ======================================================
+-- test with generated column
+SET client_min_messages = 'ERROR';
+CREATE TABLE testpub_gencol (a INT, b INT GENERATED ALWAYS AS (a + 1) STORED NOT NULL);
+CREATE UNIQUE INDEX testpub_gencol_idx ON testpub_gencol (b);
+ALTER TABLE testpub_gencol REPLICA IDENTITY USING index testpub_gencol_idx;
+
+-- error - generated column "b" is not published but part of index set as REPLICA IDENTITY
+CREATE PUBLICATION pub_gencol FOR TABLE testpub_gencol;
+UPDATE testpub_gencol SET a = 100 WHERE a = 1;
+
+-- error - generated column "b" is not published and REPLICA IDENTITY is set FULL
+ALTER TABLE testpub_gencol REPLICA IDENTITY FULL;
+UPDATE testpub_gencol SET a = 100 WHERE a = 1;
+DROP PUBLICATION pub_gencol;
+
+-- ok - generated column "b" is published and is part of REPLICA IDENTITY
+CREATE PUBLICATION pub_gencol FOR TABLE testpub_gencol with (publish_generated_columns = true);
+UPDATE testpub_gencol SET a = 100 WHERE a = 1;
+DROP PUBLICATION pub_gencol;
+
+DROP INDEX testpub_gencol_idx;
+DROP TABLE testpub_gencol;
+RESET client_min_messages;
+-- ======================================================
+
 -- fail - duplicate tables are not allowed if that table has any column lists
 SET client_min_messages = 'ERROR';
 CREATE PUBLICATION testpub_dups FOR TABLE testpub_tbl1 (a), testpub_tbl1 WITH (publish = 'insert');
diff --git a/src/test/subscription/t/100_bugs.pl b/src/test/subscription/t/100_bugs.pl
index cb36ca7b16..794b928f50 100644
--- a/src/test/subscription/t/100_bugs.pl
+++ b/src/test/subscription/t/100_bugs.pl
@@ -377,8 +377,8 @@ $node_publisher->safe_psql('postgres', "DROP PUBLICATION tap_pub_sch");
 $node_publisher->stop('fast');
 $node_subscriber->stop('fast');
 
-# The bug was that when the REPLICA IDENTITY FULL is used with dropped or
-# generated columns, we fail to apply updates and deletes
+# The bug was that when the REPLICA IDENTITY FULL is used with dropped
+# we fail to apply updates and deletes
 $node_publisher->rotate_logfile();
 $node_publisher->start();
 
@@ -389,18 +389,14 @@ $node_publisher->safe_psql(
 	'postgres', qq(
 	CREATE TABLE dropped_cols (a int, b_drop int, c int);
 	ALTER TABLE dropped_cols REPLICA IDENTITY FULL;
-	CREATE TABLE generated_cols (a int, b_gen int GENERATED ALWAYS AS (5 * a) STORED, c int);
-	ALTER TABLE generated_cols REPLICA IDENTITY FULL;
-	CREATE PUBLICATION pub_dropped_cols FOR TABLE dropped_cols, generated_cols;
+	CREATE PUBLICATION pub_dropped_cols FOR TABLE dropped_cols;
 	-- some initial data
 	INSERT INTO dropped_cols VALUES (1, 1, 1);
-	INSERT INTO generated_cols (a, c) VALUES (1, 1);
 ));
 
 $node_subscriber->safe_psql(
 	'postgres', qq(
 	 CREATE TABLE dropped_cols (a int, b_drop int, c int);
-	 CREATE TABLE generated_cols (a int, b_gen int GENERATED ALWAYS AS (5 * a) STORED, c int);
 ));
 
 $publisher_connstr = $node_publisher->connstr . ' dbname=postgres';
@@ -421,7 +417,6 @@ $node_subscriber->safe_psql(
 $node_publisher->safe_psql(
 	'postgres', qq(
 		UPDATE dropped_cols SET a = 100;
-		UPDATE generated_cols SET a = 100;
 ));
 $node_publisher->wait_for_catchup('sub_dropped_cols');
 
@@ -430,11 +425,6 @@ is( $node_subscriber->safe_psql(
 	qq(1),
 	'replication with RI FULL and dropped columns');
 
-is( $node_subscriber->safe_psql(
-		'postgres', "SELECT count(*) FROM generated_cols WHERE a = 100"),
-	qq(1),
-	'replication with RI FULL and generated columns');
-
 $node_publisher->stop('fast');
 $node_subscriber->stop('fast');
 
-- 
2.34.1

#29Shlok Kyal
shlok.kyal.oss@gmail.com
In reply to: Zhijie Hou (Fujitsu) (#27)
Re: Disallow UPDATE/DELETE on table with unpublished generated column as REPLICA IDENTITY

On Mon, 18 Nov 2024 at 08:57, Zhijie Hou (Fujitsu)
<houzj.fnst@fujitsu.com> wrote:

On Saturday, November 16, 2024 2:41 AM Shlok Kyal <shlok.kyal.oss@gmail.com> wrote:

Thanks for providing the comments. I have fixed all the comments and attached
the updated patch.

Thanks for the patch. I have one comment for the following codes:

+               /*
+                * Bitmap of published columns when publish_generated_columns is
+                * 'false' and no column list is specified.
+                */
+               columns = pub_form_cols_map(relation, false);
+
+               /*
+                * Attnums in the bitmap returned by RelationGetIndexAttrBitmap are
+                * offset (to handle system columns the usual way), while column list
+                * does not use offset, so we can't do bms_is_subset(). Instead, we
+                * have to loop over the idattrs and check all of them are in the
+                * list.
+                */
+               x = -1;
+               while ((x = bms_next_member(idattrs, x)) >= 0)
+               {
...
+               }

It doesn't seem necessary to build a bitmap and then iterator the replica
identity bitmap. Instead, we can efficiently traverse the columns as follows:

for (int i = 0; i < desc->natts; i++)
{
Form_pg_attribute att = TupleDescAttr(desc, i);

if (!att->attisdropped && att->attgenerated &&
bms_is_member(att->attnum - FirstLowInvalidHeapAttributeNumber,
idattrs))
{
result = true;
break;
}
}

Best Regards,
Hou zj

Thanks for providing the comments.
I agree with your approach and updated the same in the v7 patch [1]/messages/by-id/CANhcyEUi6T+0O83LEsG6jOJFL3BY_WD=vZ73bt0FRUcJHRt=sQ@mail.gmail.com.

[1]: /messages/by-id/CANhcyEUi6T+0O83LEsG6jOJFL3BY_WD=vZ73bt0FRUcJHRt=sQ@mail.gmail.com

Thanks and Regards,
Shlok Kyal

#30vignesh C
vignesh21@gmail.com
In reply to: Shlok Kyal (#28)
Re: Disallow UPDATE/DELETE on table with unpublished generated column as REPLICA IDENTITY

On Mon, 18 Nov 2024 at 13:07, Shlok Kyal <shlok.kyal.oss@gmail.com> wrote:

Thanks for providing the comments.

On Sat, 16 Nov 2024 at 17:29, vignesh C <vignesh21@gmail.com> wrote:

I have attached the updated version of the patch.

Few comments:
1) We have the following check for cols validation and rf validation:
/*
* If we know everything is replicated and the column list is invalid
* for update and delete, there is no point to check for other
* publications.
*/
if (pubdesc->pubactions.pubinsert && pubdesc->pubactions.pubupdate &&
pubdesc->pubactions.pubdelete && pubdesc->pubactions.pubtruncate &&
!pubdesc->cols_valid_for_update && !pubdesc->cols_valid_for_delete)
break;

Should we do this for replident_valid_for_update and
replident_valid_for_delete also?

2) This variable is not required, there is a warning:
publicationcmds.c: In function ‘replident_has_unpublished_gen_col’:
publicationcmds.c:486:41: warning: unused variable ‘x’ [-Wunused-variable]

Regards,
Vignesh

#31Shlok Kyal
shlok.kyal.oss@gmail.com
In reply to: vignesh C (#30)
1 attachment(s)
Re: Disallow UPDATE/DELETE on table with unpublished generated column as REPLICA IDENTITY

On Mon, 18 Nov 2024 at 19:19, vignesh C <vignesh21@gmail.com> wrote:

On Mon, 18 Nov 2024 at 13:07, Shlok Kyal <shlok.kyal.oss@gmail.com> wrote:

Thanks for providing the comments.

On Sat, 16 Nov 2024 at 17:29, vignesh C <vignesh21@gmail.com> wrote:

I have attached the updated version of the patch.

Few comments:
1) We have the following check for cols validation and rf validation:
/*
* If we know everything is replicated and the column list is invalid
* for update and delete, there is no point to check for other
* publications.
*/
if (pubdesc->pubactions.pubinsert && pubdesc->pubactions.pubupdate &&
pubdesc->pubactions.pubdelete && pubdesc->pubactions.pubtruncate &&
!pubdesc->cols_valid_for_update && !pubdesc->cols_valid_for_delete)
break;

Should we do this for replident_valid_for_update and
replident_valid_for_delete also?

Yes, we can add this check.

2) This variable is not required, there is a warning:
publicationcmds.c: In function ‘replident_has_unpublished_gen_col’:
publicationcmds.c:486:41: warning: unused variable ‘x’ [-Wunused-variable]

Fixed

I have fixed the comments and attached an updated patch.

Thanks and Regards,
Shlok Kyal

Attachments:

v8-0001-Disallow-UPDATE-DELETE-on-table-with-generated-co.patchapplication/octet-stream; name=v8-0001-Disallow-UPDATE-DELETE-on-table-with-generated-co.patchDownload
From f995e00c4527bb88e609f90939366d39cd6e9025 Mon Sep 17 00:00:00 2001
From: Shlok Kyal <shlok.kyal.oss@gmail.com>
Date: Tue, 5 Nov 2024 11:11:25 +0530
Subject: [PATCH v8] Disallow UPDATE/DELETE on table with generated column as
 REPLICA IDENTITY

UPDATE/DELETE on table having unpublished generated column as REPLICA
IDENTITY is allowed. UPDATE/DELETE on such tables should not be allowed
---
 src/backend/commands/publicationcmds.c    | 75 +++++++++++++++++++++++
 src/backend/executor/execReplication.c    | 12 ++++
 src/backend/utils/cache/relcache.c        | 30 +++++++++
 src/include/catalog/pg_publication.h      |  7 +++
 src/include/commands/publicationcmds.h    |  2 +
 src/test/regress/expected/publication.out | 25 ++++++++
 src/test/regress/sql/publication.sql      | 26 ++++++++
 src/test/subscription/t/100_bugs.pl       | 16 +----
 8 files changed, 180 insertions(+), 13 deletions(-)

diff --git a/src/backend/commands/publicationcmds.c b/src/backend/commands/publicationcmds.c
index 0129db18c6..8e6a61c997 100644
--- a/src/backend/commands/publicationcmds.c
+++ b/src/backend/commands/publicationcmds.c
@@ -444,6 +444,81 @@ pub_collist_contains_invalid_column(Oid pubid, Relation relation, List *ancestor
 	return result;
 }
 
+/*
+ * Check if REPLICA IDENTITY consists of any unpublished generated column.
+ *
+ * Returns true if any replica identity column is an unpublished generated
+ * column.
+ */
+bool
+replident_has_unpublished_gen_col(Oid pubid, Relation relation, List *ancestors,
+								  bool pubviaroot)
+{
+	Oid			relid = RelationGetRelid(relation);
+	Oid			publish_as_relid;
+	bool		result = false;
+	bool		found;
+	Publication *pub;
+
+	/*
+	 * For a partition, if pubviaroot is true, find the topmost ancestor that
+	 * is published via this publication as we need to use its column list for
+	 * the changes.
+	 *
+	 * Note that even though the column list used is for an ancestor, the
+	 * REPLICA IDENTITY used will be for the actual child table.
+	 */
+	if (pubviaroot && relation->rd_rel->relispartition)
+	{
+		publish_as_relid = GetTopMostAncestorInPublication(pubid, ancestors, NULL);
+
+		if (!OidIsValid(publish_as_relid))
+			publish_as_relid = relid;
+	}
+	else
+		publish_as_relid = relid;
+
+	pub = GetPublication(pubid);
+	found = check_and_fetch_column_list(pub, publish_as_relid, NULL, NULL);
+
+	if (!found)
+	{
+		Bitmapset  *idattrs = NULL;
+		TupleDesc	desc = RelationGetDescr(relation);
+
+		/*
+		 * REPLICA IDENTITY can be FULL only if there is no column list for
+		 * publication. If REPLICA IDENTITY is set as FULL and relation has a
+		 * generated column we should error out.
+		 */
+		if (relation->rd_rel->relreplident == REPLICA_IDENTITY_FULL &&
+			relation->rd_att->constr &&
+			relation->rd_att->constr->has_generated_stored)
+			return true;
+
+		idattrs = RelationGetIndexAttrBitmap(relation,
+											 INDEX_ATTR_BITMAP_IDENTITY_KEY);
+
+		for (int i = 0; i < desc->natts; i++)
+		{
+			Form_pg_attribute att = TupleDescAttr(desc, i);
+
+			/* check if generated column is part of REPLICA IDENTITY */
+			if (!att->attisdropped && att->attgenerated &&
+				bms_is_member(att->attnum - FirstLowInvalidHeapAttributeNumber,
+							  idattrs))
+			{
+				result = true;
+				break;
+			}
+		}
+
+		bms_free(idattrs);
+	}
+
+	return result;
+}
+
 /* check_functions_in_node callback */
 static bool
 contain_mutable_or_user_functions_checker(Oid func_id, void *context)
diff --git a/src/backend/executor/execReplication.c b/src/backend/executor/execReplication.c
index 54025c9f15..57599df153 100644
--- a/src/backend/executor/execReplication.c
+++ b/src/backend/executor/execReplication.c
@@ -809,6 +809,12 @@ CheckCmdReplicaIdentity(Relation rel, CmdType cmd)
 				 errmsg("cannot update table \"%s\"",
 						RelationGetRelationName(rel)),
 				 errdetail("Column list used by the publication does not cover the replica identity.")));
+	else if (cmd == CMD_UPDATE && !pubdesc.replident_valid_for_update)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
+				 errmsg("cannot update table \"%s\"",
+						RelationGetRelationName(rel)),
+				 errdetail("Replica identity consists of an unpublished generated column.")));
 	else if (cmd == CMD_DELETE && !pubdesc.rf_valid_for_delete)
 		ereport(ERROR,
 				(errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
@@ -821,6 +827,12 @@ CheckCmdReplicaIdentity(Relation rel, CmdType cmd)
 				 errmsg("cannot delete from table \"%s\"",
 						RelationGetRelationName(rel)),
 				 errdetail("Column list used by the publication does not cover the replica identity.")));
+	else if (cmd == CMD_DELETE && !pubdesc.replident_valid_for_delete)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
+				 errmsg("cannot delete from table \"%s\"",
+						RelationGetRelationName(rel)),
+				 errdetail("Replica identity consists of an unpublished generated column.")));
 
 	/* If relation has replica identity we are always good. */
 	if (OidIsValid(RelationGetReplicaIndex(rel)))
diff --git a/src/backend/utils/cache/relcache.c b/src/backend/utils/cache/relcache.c
index 342467fd18..1e4cf99e85 100644
--- a/src/backend/utils/cache/relcache.c
+++ b/src/backend/utils/cache/relcache.c
@@ -5736,6 +5736,8 @@ RelationBuildPublicationDesc(Relation relation, PublicationDesc *pubdesc)
 		pubdesc->rf_valid_for_delete = true;
 		pubdesc->cols_valid_for_update = true;
 		pubdesc->cols_valid_for_delete = true;
+		pubdesc->replident_valid_for_update = true;
+		pubdesc->replident_valid_for_delete = true;
 		return;
 	}
 
@@ -5750,6 +5752,8 @@ RelationBuildPublicationDesc(Relation relation, PublicationDesc *pubdesc)
 	pubdesc->rf_valid_for_delete = true;
 	pubdesc->cols_valid_for_update = true;
 	pubdesc->cols_valid_for_delete = true;
+	pubdesc->replident_valid_for_update = true;
+	pubdesc->replident_valid_for_delete = true;
 
 	/* Fetch the publication membership info. */
 	puboids = GetRelationPublications(relid);
@@ -5827,6 +5831,21 @@ RelationBuildPublicationDesc(Relation relation, PublicationDesc *pubdesc)
 				pubdesc->cols_valid_for_delete = false;
 		}
 
+		/*
+		 * Check if all columns which are part of the REPLICA IDENTITY is
+		 * published.
+		 */
+		if (!pubform->pubgencols &&
+			(pubform->pubupdate || pubform->pubdelete) &&
+			replident_has_unpublished_gen_col(pubid, relation, ancestors,
+											  pubform->pubviaroot))
+		{
+			if (pubform->pubupdate)
+				pubdesc->replident_valid_for_update = false;
+			if (pubform->pubdelete)
+				pubdesc->replident_valid_for_delete = false;
+		}
+
 		ReleaseSysCache(tup);
 
 		/*
@@ -5848,6 +5867,17 @@ RelationBuildPublicationDesc(Relation relation, PublicationDesc *pubdesc)
 			pubdesc->pubactions.pubdelete && pubdesc->pubactions.pubtruncate &&
 			!pubdesc->cols_valid_for_update && !pubdesc->cols_valid_for_delete)
 			break;
+
+		/*
+		 * If we know everything is replicated and replica identity has an
+		 * unpublished generated column, there is no point to check for other
+		 * publications.
+		 */
+		if (pubdesc->pubactions.pubinsert && pubdesc->pubactions.pubupdate &&
+			pubdesc->pubactions.pubdelete && pubdesc->pubactions.pubtruncate &&
+			!pubdesc->replident_valid_for_update &&
+			!pubdesc->replident_valid_for_delete)
+			break;
 	}
 
 	if (relation->rd_pubdesc)
diff --git a/src/include/catalog/pg_publication.h b/src/include/catalog/pg_publication.h
index 9a83a72d6b..486f609a9a 100644
--- a/src/include/catalog/pg_publication.h
+++ b/src/include/catalog/pg_publication.h
@@ -98,6 +98,13 @@ typedef struct PublicationDesc
 	 */
 	bool		cols_valid_for_update;
 	bool		cols_valid_for_delete;
+
+	/*
+	 * true if all generated columns which are part of replica identity are
+	 * published or the publication actions do not include UPDATE or DELETE.
+	 */
+	bool		replident_valid_for_update;
+	bool		replident_valid_for_delete;
 } PublicationDesc;
 
 typedef struct Publication
diff --git a/src/include/commands/publicationcmds.h b/src/include/commands/publicationcmds.h
index 5487c571f6..b18e576b77 100644
--- a/src/include/commands/publicationcmds.h
+++ b/src/include/commands/publicationcmds.h
@@ -35,5 +35,7 @@ extern bool pub_rf_contains_invalid_column(Oid pubid, Relation relation,
 										   List *ancestors, bool pubviaroot);
 extern bool pub_collist_contains_invalid_column(Oid pubid, Relation relation,
 												List *ancestors, bool pubviaroot);
+extern bool replident_has_unpublished_gen_col(Oid pubid, Relation relation,
+											  List *ancestors, bool pubviaroot);
 
 #endif							/* PUBLICATIONCMDS_H */
diff --git a/src/test/regress/expected/publication.out b/src/test/regress/expected/publication.out
index 5de2d64d01..caf6e31554 100644
--- a/src/test/regress/expected/publication.out
+++ b/src/test/regress/expected/publication.out
@@ -672,6 +672,31 @@ DROP TABLE rf_tbl_abcd_pk;
 DROP TABLE rf_tbl_abcd_nopk;
 DROP TABLE rf_tbl_abcd_part_pk;
 -- ======================================================
+-- ======================================================
+-- test with generated column
+SET client_min_messages = 'ERROR';
+CREATE TABLE testpub_gencol (a INT, b INT GENERATED ALWAYS AS (a + 1) STORED NOT NULL);
+CREATE UNIQUE INDEX testpub_gencol_idx ON testpub_gencol (b);
+ALTER TABLE testpub_gencol REPLICA IDENTITY USING index testpub_gencol_idx;
+-- error - generated column "b" is not published but part of index set as REPLICA IDENTITY
+CREATE PUBLICATION pub_gencol FOR TABLE testpub_gencol;
+UPDATE testpub_gencol SET a = 100 WHERE a = 1;
+ERROR:  cannot update table "testpub_gencol"
+DETAIL:  Replica identity consists of an unpublished generated column.
+-- error - generated column "b" is not published and REPLICA IDENTITY is set FULL
+ALTER TABLE testpub_gencol REPLICA IDENTITY FULL;
+UPDATE testpub_gencol SET a = 100 WHERE a = 1;
+ERROR:  cannot update table "testpub_gencol"
+DETAIL:  Replica identity consists of an unpublished generated column.
+DROP PUBLICATION pub_gencol;
+-- ok - generated column "b" is published and is part of REPLICA IDENTITY
+CREATE PUBLICATION pub_gencol FOR TABLE testpub_gencol with (publish_generated_columns = true);
+UPDATE testpub_gencol SET a = 100 WHERE a = 1;
+DROP PUBLICATION pub_gencol;
+DROP INDEX testpub_gencol_idx;
+DROP TABLE testpub_gencol;
+RESET client_min_messages;
+-- ======================================================
 -- fail - duplicate tables are not allowed if that table has any column lists
 SET client_min_messages = 'ERROR';
 CREATE PUBLICATION testpub_dups FOR TABLE testpub_tbl1 (a), testpub_tbl1 WITH (publish = 'insert');
diff --git a/src/test/regress/sql/publication.sql b/src/test/regress/sql/publication.sql
index 48e68bcca2..3b1c661440 100644
--- a/src/test/regress/sql/publication.sql
+++ b/src/test/regress/sql/publication.sql
@@ -396,6 +396,32 @@ DROP TABLE rf_tbl_abcd_nopk;
 DROP TABLE rf_tbl_abcd_part_pk;
 -- ======================================================
 
+-- ======================================================
+-- test with generated column
+SET client_min_messages = 'ERROR';
+CREATE TABLE testpub_gencol (a INT, b INT GENERATED ALWAYS AS (a + 1) STORED NOT NULL);
+CREATE UNIQUE INDEX testpub_gencol_idx ON testpub_gencol (b);
+ALTER TABLE testpub_gencol REPLICA IDENTITY USING index testpub_gencol_idx;
+
+-- error - generated column "b" is not published but part of index set as REPLICA IDENTITY
+CREATE PUBLICATION pub_gencol FOR TABLE testpub_gencol;
+UPDATE testpub_gencol SET a = 100 WHERE a = 1;
+
+-- error - generated column "b" is not published and REPLICA IDENTITY is set FULL
+ALTER TABLE testpub_gencol REPLICA IDENTITY FULL;
+UPDATE testpub_gencol SET a = 100 WHERE a = 1;
+DROP PUBLICATION pub_gencol;
+
+-- ok - generated column "b" is published and is part of REPLICA IDENTITY
+CREATE PUBLICATION pub_gencol FOR TABLE testpub_gencol with (publish_generated_columns = true);
+UPDATE testpub_gencol SET a = 100 WHERE a = 1;
+DROP PUBLICATION pub_gencol;
+
+DROP INDEX testpub_gencol_idx;
+DROP TABLE testpub_gencol;
+RESET client_min_messages;
+-- ======================================================
+
 -- fail - duplicate tables are not allowed if that table has any column lists
 SET client_min_messages = 'ERROR';
 CREATE PUBLICATION testpub_dups FOR TABLE testpub_tbl1 (a), testpub_tbl1 WITH (publish = 'insert');
diff --git a/src/test/subscription/t/100_bugs.pl b/src/test/subscription/t/100_bugs.pl
index cb36ca7b16..794b928f50 100644
--- a/src/test/subscription/t/100_bugs.pl
+++ b/src/test/subscription/t/100_bugs.pl
@@ -377,8 +377,8 @@ $node_publisher->safe_psql('postgres', "DROP PUBLICATION tap_pub_sch");
 $node_publisher->stop('fast');
 $node_subscriber->stop('fast');
 
-# The bug was that when the REPLICA IDENTITY FULL is used with dropped or
-# generated columns, we fail to apply updates and deletes
+# The bug was that when the REPLICA IDENTITY FULL is used with dropped
+# we fail to apply updates and deletes
 $node_publisher->rotate_logfile();
 $node_publisher->start();
 
@@ -389,18 +389,14 @@ $node_publisher->safe_psql(
 	'postgres', qq(
 	CREATE TABLE dropped_cols (a int, b_drop int, c int);
 	ALTER TABLE dropped_cols REPLICA IDENTITY FULL;
-	CREATE TABLE generated_cols (a int, b_gen int GENERATED ALWAYS AS (5 * a) STORED, c int);
-	ALTER TABLE generated_cols REPLICA IDENTITY FULL;
-	CREATE PUBLICATION pub_dropped_cols FOR TABLE dropped_cols, generated_cols;
+	CREATE PUBLICATION pub_dropped_cols FOR TABLE dropped_cols;
 	-- some initial data
 	INSERT INTO dropped_cols VALUES (1, 1, 1);
-	INSERT INTO generated_cols (a, c) VALUES (1, 1);
 ));
 
 $node_subscriber->safe_psql(
 	'postgres', qq(
 	 CREATE TABLE dropped_cols (a int, b_drop int, c int);
-	 CREATE TABLE generated_cols (a int, b_gen int GENERATED ALWAYS AS (5 * a) STORED, c int);
 ));
 
 $publisher_connstr = $node_publisher->connstr . ' dbname=postgres';
@@ -421,7 +417,6 @@ $node_subscriber->safe_psql(
 $node_publisher->safe_psql(
 	'postgres', qq(
 		UPDATE dropped_cols SET a = 100;
-		UPDATE generated_cols SET a = 100;
 ));
 $node_publisher->wait_for_catchup('sub_dropped_cols');
 
@@ -430,11 +425,6 @@ is( $node_subscriber->safe_psql(
 	qq(1),
 	'replication with RI FULL and dropped columns');
 
-is( $node_subscriber->safe_psql(
-		'postgres', "SELECT count(*) FROM generated_cols WHERE a = 100"),
-	qq(1),
-	'replication with RI FULL and generated columns');
-
 $node_publisher->stop('fast');
 $node_subscriber->stop('fast');
 
-- 
2.34.1

#32Zhijie Hou (Fujitsu)
houzj.fnst@fujitsu.com
In reply to: Shlok Kyal (#31)
1 attachment(s)
RE: Disallow UPDATE/DELETE on table with unpublished generated column as REPLICA IDENTITY

On Tuesday, November 19, 2024 3:06 AM Shlok Kyal <shlok.kyal.oss@gmail.com> wrote:

I have fixed the comments and attached an updated patch.

Thanks for the patch.

I slightly refactored the codes a bit:

* make the codes in replident_has_unpublished_gen_col()
consistent with other similar functions.

* Avoid unnecessary operations when there are no generated columns
In the table.

* Improve the loop by traversing the replica identity columns instead. I think
it looks clearer this way and better aligns with the purpose of the
replident_has_unpublished_gen_col function.

* Some cosmetic changes in the comments.

Please check the attached diff. Feel free to merge if it looks
acceptable to you.

Best Regards,
Hou zj

Attachments:

v2-0001-improve-logic.patch.txttext/plain; name=v2-0001-improve-logic.patch.txtDownload
From 553dc53ec14b6ed8b898992f6b2085cbe12e5408 Mon Sep 17 00:00:00 2001
From: Hou Zhijie <houzj.fnst@cn.fujitsu.com>
Date: Tue, 19 Nov 2024 11:13:13 +0800
Subject: [PATCH v2] improve logic

---
 src/backend/commands/publicationcmds.c | 29 ++++++++++++++------------
 src/backend/utils/cache/relcache.c     |  2 +-
 2 files changed, 17 insertions(+), 14 deletions(-)

diff --git a/src/backend/commands/publicationcmds.c b/src/backend/commands/publicationcmds.c
index 8e6a61c997..053877c524 100644
--- a/src/backend/commands/publicationcmds.c
+++ b/src/backend/commands/publicationcmds.c
@@ -455,11 +455,16 @@ replident_has_unpublished_gen_col(Oid pubid, Relation relation, List *ancestors,
 								  bool pubviaroot)
 {
 	Oid			relid = RelationGetRelid(relation);
-	Oid			publish_as_relid;
+	Oid			publish_as_relid = RelationGetRelid(relation);
 	bool		result = false;
 	bool		found;
 	Publication *pub;
 
+	/* Return if the table does not contain any generated columns */
+	if (!relation->rd_att->constr ||
+		!relation->rd_att->constr->has_generated_stored)
+		return false;
+
 	/*
 	 * For a partition, if pubviaroot is true, find the topmost ancestor that
 	 * is published via this publication as we need to use its column list for
@@ -475,38 +480,36 @@ replident_has_unpublished_gen_col(Oid pubid, Relation relation, List *ancestors,
 		if (!OidIsValid(publish_as_relid))
 			publish_as_relid = relid;
 	}
-	else
-		publish_as_relid = relid;
 
 	pub = GetPublication(pubid);
 	found = check_and_fetch_column_list(pub, publish_as_relid, NULL, NULL);
 
 	if (!found)
 	{
-		Bitmapset  *idattrs = NULL;
 		TupleDesc	desc = RelationGetDescr(relation);
+		Bitmapset  *idattrs;
+		int			x;
 
 		/*
 		 * REPLICA IDENTITY can be FULL only if there is no column list for
 		 * publication. If REPLICA IDENTITY is set as FULL and relation has a
 		 * generated column we should error out.
 		 */
-		if (relation->rd_rel->relreplident == REPLICA_IDENTITY_FULL &&
-			relation->rd_att->constr &&
-			relation->rd_att->constr->has_generated_stored)
+		if (relation->rd_rel->relreplident == REPLICA_IDENTITY_FULL)
 			return true;
 
+		/* Remember columns that are part of the REPLICA IDENTITY */
 		idattrs = RelationGetIndexAttrBitmap(relation,
 											 INDEX_ATTR_BITMAP_IDENTITY_KEY);
 
-		for (int i = 0; i < desc->natts; i++)
+		x = -1;
+		while ((x = bms_next_member(idattrs, x)) >= 0)
 		{
-			Form_pg_attribute att = TupleDescAttr(desc, i);
+			AttrNumber	attnum = (x + FirstLowInvalidHeapAttributeNumber);
+			Form_pg_attribute att = TupleDescAttr(desc, attnum - 1);
 
-			/* check if generated column is part of REPLICA IDENTITY */
-			if (!att->attisdropped && att->attgenerated &&
-				bms_is_member(att->attnum - FirstLowInvalidHeapAttributeNumber,
-							  idattrs))
+			/* Check if generated column is part of REPLICA IDENTITY */
+			if (!att->attisdropped && att->attgenerated)
 			{
 				result = true;
 				break;
diff --git a/src/backend/utils/cache/relcache.c b/src/backend/utils/cache/relcache.c
index 1e4cf99e85..be8f8eea8f 100644
--- a/src/backend/utils/cache/relcache.c
+++ b/src/backend/utils/cache/relcache.c
@@ -5832,7 +5832,7 @@ RelationBuildPublicationDesc(Relation relation, PublicationDesc *pubdesc)
 		}
 
 		/*
-		 * Check if all columns which are part of the REPLICA IDENTITY is
+		 * Check if all generated columns included in the REPLICA IDENTITY are
 		 * published.
 		 */
 		if (!pubform->pubgencols &&
-- 
2.30.0.windows.2

#33vignesh C
vignesh21@gmail.com
In reply to: Shlok Kyal (#31)
Re: Disallow UPDATE/DELETE on table with unpublished generated column as REPLICA IDENTITY

On Tue, 19 Nov 2024 at 00:36, Shlok Kyal <shlok.kyal.oss@gmail.com> wrote:

On Mon, 18 Nov 2024 at 19:19, vignesh C <vignesh21@gmail.com> wrote:

On Mon, 18 Nov 2024 at 13:07, Shlok Kyal <shlok.kyal.oss@gmail.com> wrote:

Thanks for providing the comments.

On Sat, 16 Nov 2024 at 17:29, vignesh C <vignesh21@gmail.com> wrote:

I have attached the updated version of the patch.

Few comments:
1) We have the following check for cols validation and rf validation:
/*
* If we know everything is replicated and the column list is invalid
* for update and delete, there is no point to check for other
* publications.
*/
if (pubdesc->pubactions.pubinsert && pubdesc->pubactions.pubupdate &&
pubdesc->pubactions.pubdelete && pubdesc->pubactions.pubtruncate &&
!pubdesc->cols_valid_for_update && !pubdesc->cols_valid_for_delete)
break;

Should we do this for replident_valid_for_update and
replident_valid_for_delete also?

Yes, we can add this check.

2) This variable is not required, there is a warning:
publicationcmds.c: In function ‘replident_has_unpublished_gen_col’:
publicationcmds.c:486:41: warning: unused variable ‘x’ [-Wunused-variable]

Fixed

I have fixed the comments and attached an updated patch.

To ensure easy backtracking after the patch is committed, we should
include a brief explanation for the test removal in the commit
message:
diff --git a/src/test/subscription/t/100_bugs.pl
b/src/test/subscription/t/100_bugs.pl
index cb36ca7b16..794b928f50 100644
--- a/src/test/subscription/t/100_bugs.pl
+++ b/src/test/subscription/t/100_bugs.pl
@@ -377,8 +377,8 @@ $node_publisher->safe_psql('postgres', "DROP
PUBLICATION tap_pub_sch");
 $node_publisher->stop('fast');
 $node_subscriber->stop('fast');
-# The bug was that when the REPLICA IDENTITY FULL is used with dropped or
-# generated columns, we fail to apply updates and deletes
+# The bug was that when the REPLICA IDENTITY FULL is used with dropped
+# we fail to apply updates and deletes
 $node_publisher->rotate_logfile();
 $node_publisher->start();
@@ -389,18 +389,14 @@ $node_publisher->safe_psql(
  'postgres', qq(
  CREATE TABLE dropped_cols (a int, b_drop int, c int);
  ALTER TABLE dropped_cols REPLICA IDENTITY FULL;
- CREATE TABLE generated_cols (a int, b_gen int GENERATED ALWAYS AS (5
* a) STORED, c int);
- ALTER TABLE generated_cols REPLICA IDENTITY FULL;
- CREATE PUBLICATION pub_dropped_cols FOR TABLE dropped_cols, generated_cols;
+ CREATE PUBLICATION pub_dropped_cols FOR TABLE dropped_cols;

Regards,
Vignesh

#34Shlok Kyal
shlok.kyal.oss@gmail.com
In reply to: Zhijie Hou (Fujitsu) (#32)
1 attachment(s)
Re: Disallow UPDATE/DELETE on table with unpublished generated column as REPLICA IDENTITY

On Tue, 19 Nov 2024 at 09:50, Zhijie Hou (Fujitsu)
<houzj.fnst@fujitsu.com> wrote:

On Tuesday, November 19, 2024 3:06 AM Shlok Kyal <shlok.kyal.oss@gmail.com> wrote:

I have fixed the comments and attached an updated patch.

Thanks for the patch.

I slightly refactored the codes a bit:

* make the codes in replident_has_unpublished_gen_col()
consistent with other similar functions.

* Avoid unnecessary operations when there are no generated columns
In the table.

* Improve the loop by traversing the replica identity columns instead. I think
it looks clearer this way and better aligns with the purpose of the
replident_has_unpublished_gen_col function.

* Some cosmetic changes in the comments.

Please check the attached diff. Feel free to merge if it looks
acceptable to you.

It looks good to me. I have added it to the latest patch.

Thanks and Regards,
Shlok Kyal

Attachments:

v9-0001-Disallow-UPDATE-DELETE-on-table-with-generated-co.patchapplication/octet-stream; name=v9-0001-Disallow-UPDATE-DELETE-on-table-with-generated-co.patchDownload
From fc2a6043cef8a48bf3145e5e1bca90ac33e7efa9 Mon Sep 17 00:00:00 2001
From: Shlok Kyal <shlok.kyal.oss@gmail.com>
Date: Tue, 5 Nov 2024 11:11:25 +0530
Subject: [PATCH v9] Disallow UPDATE/DELETE on table with generated column as
 REPLICA IDENTITY

UPDATE/DELETE on table having unpublished generated column as REPLICA
IDENTITY is allowed. UPDATE/DELETE on such tables should not be allowed
---
 src/backend/commands/publicationcmds.c    | 78 +++++++++++++++++++++++
 src/backend/executor/execReplication.c    | 12 ++++
 src/backend/utils/cache/relcache.c        | 30 +++++++++
 src/include/catalog/pg_publication.h      |  7 ++
 src/include/commands/publicationcmds.h    |  2 +
 src/test/regress/expected/publication.out | 25 ++++++++
 src/test/regress/sql/publication.sql      | 26 ++++++++
 src/test/subscription/t/100_bugs.pl       |  4 +-
 8 files changed, 182 insertions(+), 2 deletions(-)

diff --git a/src/backend/commands/publicationcmds.c b/src/backend/commands/publicationcmds.c
index 0129db18c6..053877c524 100644
--- a/src/backend/commands/publicationcmds.c
+++ b/src/backend/commands/publicationcmds.c
@@ -444,6 +444,84 @@ pub_collist_contains_invalid_column(Oid pubid, Relation relation, List *ancestor
 	return result;
 }
 
+/*
+ * Check if REPLICA IDENTITY consists of any unpublished generated column.
+ *
+ * Returns true if any replica identity column is an unpublished generated
+ * column.
+ */
+bool
+replident_has_unpublished_gen_col(Oid pubid, Relation relation, List *ancestors,
+								  bool pubviaroot)
+{
+	Oid			relid = RelationGetRelid(relation);
+	Oid			publish_as_relid = RelationGetRelid(relation);
+	bool		result = false;
+	bool		found;
+	Publication *pub;
+
+	/* Return if the table does not contain any generated columns */
+	if (!relation->rd_att->constr ||
+		!relation->rd_att->constr->has_generated_stored)
+		return false;
+
+	/*
+	 * For a partition, if pubviaroot is true, find the topmost ancestor that
+	 * is published via this publication as we need to use its column list for
+	 * the changes.
+	 *
+	 * Note that even though the column list used is for an ancestor, the
+	 * REPLICA IDENTITY used will be for the actual child table.
+	 */
+	if (pubviaroot && relation->rd_rel->relispartition)
+	{
+		publish_as_relid = GetTopMostAncestorInPublication(pubid, ancestors, NULL);
+
+		if (!OidIsValid(publish_as_relid))
+			publish_as_relid = relid;
+	}
+
+	pub = GetPublication(pubid);
+	found = check_and_fetch_column_list(pub, publish_as_relid, NULL, NULL);
+
+	if (!found)
+	{
+		TupleDesc	desc = RelationGetDescr(relation);
+		Bitmapset  *idattrs;
+		int			x;
+
+		/*
+		 * REPLICA IDENTITY can be FULL only if there is no column list for
+		 * publication. If REPLICA IDENTITY is set as FULL and relation has a
+		 * generated column we should error out.
+		 */
+		if (relation->rd_rel->relreplident == REPLICA_IDENTITY_FULL)
+			return true;
+
+		/* Remember columns that are part of the REPLICA IDENTITY */
+		idattrs = RelationGetIndexAttrBitmap(relation,
+											 INDEX_ATTR_BITMAP_IDENTITY_KEY);
+
+		x = -1;
+		while ((x = bms_next_member(idattrs, x)) >= 0)
+		{
+			AttrNumber	attnum = (x + FirstLowInvalidHeapAttributeNumber);
+			Form_pg_attribute att = TupleDescAttr(desc, attnum - 1);
+
+			/* Check if generated column is part of REPLICA IDENTITY */
+			if (!att->attisdropped && att->attgenerated)
+			{
+				result = true;
+				break;
+			}
+		}
+
+		bms_free(idattrs);
+	}
+
+	return result;
+}
+
 /* check_functions_in_node callback */
 static bool
 contain_mutable_or_user_functions_checker(Oid func_id, void *context)
diff --git a/src/backend/executor/execReplication.c b/src/backend/executor/execReplication.c
index 54025c9f15..57599df153 100644
--- a/src/backend/executor/execReplication.c
+++ b/src/backend/executor/execReplication.c
@@ -809,6 +809,12 @@ CheckCmdReplicaIdentity(Relation rel, CmdType cmd)
 				 errmsg("cannot update table \"%s\"",
 						RelationGetRelationName(rel)),
 				 errdetail("Column list used by the publication does not cover the replica identity.")));
+	else if (cmd == CMD_UPDATE && !pubdesc.replident_valid_for_update)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
+				 errmsg("cannot update table \"%s\"",
+						RelationGetRelationName(rel)),
+				 errdetail("Replica identity consists of an unpublished generated column.")));
 	else if (cmd == CMD_DELETE && !pubdesc.rf_valid_for_delete)
 		ereport(ERROR,
 				(errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
@@ -821,6 +827,12 @@ CheckCmdReplicaIdentity(Relation rel, CmdType cmd)
 				 errmsg("cannot delete from table \"%s\"",
 						RelationGetRelationName(rel)),
 				 errdetail("Column list used by the publication does not cover the replica identity.")));
+	else if (cmd == CMD_DELETE && !pubdesc.replident_valid_for_delete)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
+				 errmsg("cannot delete from table \"%s\"",
+						RelationGetRelationName(rel)),
+				 errdetail("Replica identity consists of an unpublished generated column.")));
 
 	/* If relation has replica identity we are always good. */
 	if (OidIsValid(RelationGetReplicaIndex(rel)))
diff --git a/src/backend/utils/cache/relcache.c b/src/backend/utils/cache/relcache.c
index 342467fd18..be8f8eea8f 100644
--- a/src/backend/utils/cache/relcache.c
+++ b/src/backend/utils/cache/relcache.c
@@ -5736,6 +5736,8 @@ RelationBuildPublicationDesc(Relation relation, PublicationDesc *pubdesc)
 		pubdesc->rf_valid_for_delete = true;
 		pubdesc->cols_valid_for_update = true;
 		pubdesc->cols_valid_for_delete = true;
+		pubdesc->replident_valid_for_update = true;
+		pubdesc->replident_valid_for_delete = true;
 		return;
 	}
 
@@ -5750,6 +5752,8 @@ RelationBuildPublicationDesc(Relation relation, PublicationDesc *pubdesc)
 	pubdesc->rf_valid_for_delete = true;
 	pubdesc->cols_valid_for_update = true;
 	pubdesc->cols_valid_for_delete = true;
+	pubdesc->replident_valid_for_update = true;
+	pubdesc->replident_valid_for_delete = true;
 
 	/* Fetch the publication membership info. */
 	puboids = GetRelationPublications(relid);
@@ -5827,6 +5831,21 @@ RelationBuildPublicationDesc(Relation relation, PublicationDesc *pubdesc)
 				pubdesc->cols_valid_for_delete = false;
 		}
 
+		/*
+		 * Check if all generated columns included in the REPLICA IDENTITY are
+		 * published.
+		 */
+		if (!pubform->pubgencols &&
+			(pubform->pubupdate || pubform->pubdelete) &&
+			replident_has_unpublished_gen_col(pubid, relation, ancestors,
+											  pubform->pubviaroot))
+		{
+			if (pubform->pubupdate)
+				pubdesc->replident_valid_for_update = false;
+			if (pubform->pubdelete)
+				pubdesc->replident_valid_for_delete = false;
+		}
+
 		ReleaseSysCache(tup);
 
 		/*
@@ -5848,6 +5867,17 @@ RelationBuildPublicationDesc(Relation relation, PublicationDesc *pubdesc)
 			pubdesc->pubactions.pubdelete && pubdesc->pubactions.pubtruncate &&
 			!pubdesc->cols_valid_for_update && !pubdesc->cols_valid_for_delete)
 			break;
+
+		/*
+		 * If we know everything is replicated and replica identity has an
+		 * unpublished generated column, there is no point to check for other
+		 * publications.
+		 */
+		if (pubdesc->pubactions.pubinsert && pubdesc->pubactions.pubupdate &&
+			pubdesc->pubactions.pubdelete && pubdesc->pubactions.pubtruncate &&
+			!pubdesc->replident_valid_for_update &&
+			!pubdesc->replident_valid_for_delete)
+			break;
 	}
 
 	if (relation->rd_pubdesc)
diff --git a/src/include/catalog/pg_publication.h b/src/include/catalog/pg_publication.h
index 9a83a72d6b..486f609a9a 100644
--- a/src/include/catalog/pg_publication.h
+++ b/src/include/catalog/pg_publication.h
@@ -98,6 +98,13 @@ typedef struct PublicationDesc
 	 */
 	bool		cols_valid_for_update;
 	bool		cols_valid_for_delete;
+
+	/*
+	 * true if all generated columns which are part of replica identity are
+	 * published or the publication actions do not include UPDATE or DELETE.
+	 */
+	bool		replident_valid_for_update;
+	bool		replident_valid_for_delete;
 } PublicationDesc;
 
 typedef struct Publication
diff --git a/src/include/commands/publicationcmds.h b/src/include/commands/publicationcmds.h
index 5487c571f6..b18e576b77 100644
--- a/src/include/commands/publicationcmds.h
+++ b/src/include/commands/publicationcmds.h
@@ -35,5 +35,7 @@ extern bool pub_rf_contains_invalid_column(Oid pubid, Relation relation,
 										   List *ancestors, bool pubviaroot);
 extern bool pub_collist_contains_invalid_column(Oid pubid, Relation relation,
 												List *ancestors, bool pubviaroot);
+extern bool replident_has_unpublished_gen_col(Oid pubid, Relation relation,
+											  List *ancestors, bool pubviaroot);
 
 #endif							/* PUBLICATIONCMDS_H */
diff --git a/src/test/regress/expected/publication.out b/src/test/regress/expected/publication.out
index 5de2d64d01..caf6e31554 100644
--- a/src/test/regress/expected/publication.out
+++ b/src/test/regress/expected/publication.out
@@ -672,6 +672,31 @@ DROP TABLE rf_tbl_abcd_pk;
 DROP TABLE rf_tbl_abcd_nopk;
 DROP TABLE rf_tbl_abcd_part_pk;
 -- ======================================================
+-- ======================================================
+-- test with generated column
+SET client_min_messages = 'ERROR';
+CREATE TABLE testpub_gencol (a INT, b INT GENERATED ALWAYS AS (a + 1) STORED NOT NULL);
+CREATE UNIQUE INDEX testpub_gencol_idx ON testpub_gencol (b);
+ALTER TABLE testpub_gencol REPLICA IDENTITY USING index testpub_gencol_idx;
+-- error - generated column "b" is not published but part of index set as REPLICA IDENTITY
+CREATE PUBLICATION pub_gencol FOR TABLE testpub_gencol;
+UPDATE testpub_gencol SET a = 100 WHERE a = 1;
+ERROR:  cannot update table "testpub_gencol"
+DETAIL:  Replica identity consists of an unpublished generated column.
+-- error - generated column "b" is not published and REPLICA IDENTITY is set FULL
+ALTER TABLE testpub_gencol REPLICA IDENTITY FULL;
+UPDATE testpub_gencol SET a = 100 WHERE a = 1;
+ERROR:  cannot update table "testpub_gencol"
+DETAIL:  Replica identity consists of an unpublished generated column.
+DROP PUBLICATION pub_gencol;
+-- ok - generated column "b" is published and is part of REPLICA IDENTITY
+CREATE PUBLICATION pub_gencol FOR TABLE testpub_gencol with (publish_generated_columns = true);
+UPDATE testpub_gencol SET a = 100 WHERE a = 1;
+DROP PUBLICATION pub_gencol;
+DROP INDEX testpub_gencol_idx;
+DROP TABLE testpub_gencol;
+RESET client_min_messages;
+-- ======================================================
 -- fail - duplicate tables are not allowed if that table has any column lists
 SET client_min_messages = 'ERROR';
 CREATE PUBLICATION testpub_dups FOR TABLE testpub_tbl1 (a), testpub_tbl1 WITH (publish = 'insert');
diff --git a/src/test/regress/sql/publication.sql b/src/test/regress/sql/publication.sql
index 48e68bcca2..3b1c661440 100644
--- a/src/test/regress/sql/publication.sql
+++ b/src/test/regress/sql/publication.sql
@@ -396,6 +396,32 @@ DROP TABLE rf_tbl_abcd_nopk;
 DROP TABLE rf_tbl_abcd_part_pk;
 -- ======================================================
 
+-- ======================================================
+-- test with generated column
+SET client_min_messages = 'ERROR';
+CREATE TABLE testpub_gencol (a INT, b INT GENERATED ALWAYS AS (a + 1) STORED NOT NULL);
+CREATE UNIQUE INDEX testpub_gencol_idx ON testpub_gencol (b);
+ALTER TABLE testpub_gencol REPLICA IDENTITY USING index testpub_gencol_idx;
+
+-- error - generated column "b" is not published but part of index set as REPLICA IDENTITY
+CREATE PUBLICATION pub_gencol FOR TABLE testpub_gencol;
+UPDATE testpub_gencol SET a = 100 WHERE a = 1;
+
+-- error - generated column "b" is not published and REPLICA IDENTITY is set FULL
+ALTER TABLE testpub_gencol REPLICA IDENTITY FULL;
+UPDATE testpub_gencol SET a = 100 WHERE a = 1;
+DROP PUBLICATION pub_gencol;
+
+-- ok - generated column "b" is published and is part of REPLICA IDENTITY
+CREATE PUBLICATION pub_gencol FOR TABLE testpub_gencol with (publish_generated_columns = true);
+UPDATE testpub_gencol SET a = 100 WHERE a = 1;
+DROP PUBLICATION pub_gencol;
+
+DROP INDEX testpub_gencol_idx;
+DROP TABLE testpub_gencol;
+RESET client_min_messages;
+-- ======================================================
+
 -- fail - duplicate tables are not allowed if that table has any column lists
 SET client_min_messages = 'ERROR';
 CREATE PUBLICATION testpub_dups FOR TABLE testpub_tbl1 (a), testpub_tbl1 WITH (publish = 'insert');
diff --git a/src/test/subscription/t/100_bugs.pl b/src/test/subscription/t/100_bugs.pl
index cb36ca7b16..64b902db73 100644
--- a/src/test/subscription/t/100_bugs.pl
+++ b/src/test/subscription/t/100_bugs.pl
@@ -391,7 +391,7 @@ $node_publisher->safe_psql(
 	ALTER TABLE dropped_cols REPLICA IDENTITY FULL;
 	CREATE TABLE generated_cols (a int, b_gen int GENERATED ALWAYS AS (5 * a) STORED, c int);
 	ALTER TABLE generated_cols REPLICA IDENTITY FULL;
-	CREATE PUBLICATION pub_dropped_cols FOR TABLE dropped_cols, generated_cols;
+	CREATE PUBLICATION pub_dropped_cols FOR TABLE dropped_cols, generated_cols with (publish_generated_columns = true);
 	-- some initial data
 	INSERT INTO dropped_cols VALUES (1, 1, 1);
 	INSERT INTO generated_cols (a, c) VALUES (1, 1);
@@ -400,7 +400,7 @@ $node_publisher->safe_psql(
 $node_subscriber->safe_psql(
 	'postgres', qq(
 	 CREATE TABLE dropped_cols (a int, b_drop int, c int);
-	 CREATE TABLE generated_cols (a int, b_gen int GENERATED ALWAYS AS (5 * a) STORED, c int);
+	 CREATE TABLE generated_cols (a int, b_gen int, c int);
 ));
 
 $publisher_connstr = $node_publisher->connstr . ' dbname=postgres';
-- 
2.34.1

#35Shlok Kyal
shlok.kyal.oss@gmail.com
In reply to: vignesh C (#33)
Re: Disallow UPDATE/DELETE on table with unpublished generated column as REPLICA IDENTITY

On Tue, 19 Nov 2024 at 10:22, vignesh C <vignesh21@gmail.com> wrote:

On Tue, 19 Nov 2024 at 00:36, Shlok Kyal <shlok.kyal.oss@gmail.com> wrote:

On Mon, 18 Nov 2024 at 19:19, vignesh C <vignesh21@gmail.com> wrote:

On Mon, 18 Nov 2024 at 13:07, Shlok Kyal <shlok.kyal.oss@gmail.com> wrote:

Thanks for providing the comments.

On Sat, 16 Nov 2024 at 17:29, vignesh C <vignesh21@gmail.com> wrote:

I have attached the updated version of the patch.

Few comments:
1) We have the following check for cols validation and rf validation:
/*
* If we know everything is replicated and the column list is invalid
* for update and delete, there is no point to check for other
* publications.
*/
if (pubdesc->pubactions.pubinsert && pubdesc->pubactions.pubupdate &&
pubdesc->pubactions.pubdelete && pubdesc->pubactions.pubtruncate &&
!pubdesc->cols_valid_for_update && !pubdesc->cols_valid_for_delete)
break;

Should we do this for replident_valid_for_update and
replident_valid_for_delete also?

Yes, we can add this check.

2) This variable is not required, there is a warning:
publicationcmds.c: In function ‘replident_has_unpublished_gen_col’:
publicationcmds.c:486:41: warning: unused variable ‘x’ [-Wunused-variable]

Fixed

I have fixed the comments and attached an updated patch.

To ensure easy backtracking after the patch is committed, we should
include a brief explanation for the test removal in the commit
message:
diff --git a/src/test/subscription/t/100_bugs.pl
b/src/test/subscription/t/100_bugs.pl
index cb36ca7b16..794b928f50 100644
--- a/src/test/subscription/t/100_bugs.pl
+++ b/src/test/subscription/t/100_bugs.pl
@@ -377,8 +377,8 @@ $node_publisher->safe_psql('postgres', "DROP
PUBLICATION tap_pub_sch");
$node_publisher->stop('fast');
$node_subscriber->stop('fast');
-# The bug was that when the REPLICA IDENTITY FULL is used with dropped or
-# generated columns, we fail to apply updates and deletes
+# The bug was that when the REPLICA IDENTITY FULL is used with dropped
+# we fail to apply updates and deletes
$node_publisher->rotate_logfile();
$node_publisher->start();
@@ -389,18 +389,14 @@ $node_publisher->safe_psql(
'postgres', qq(
CREATE TABLE dropped_cols (a int, b_drop int, c int);
ALTER TABLE dropped_cols REPLICA IDENTITY FULL;
- CREATE TABLE generated_cols (a int, b_gen int GENERATED ALWAYS AS (5
* a) STORED, c int);
- ALTER TABLE generated_cols REPLICA IDENTITY FULL;
- CREATE PUBLICATION pub_dropped_cols FOR TABLE dropped_cols, generated_cols;
+ CREATE PUBLICATION pub_dropped_cols FOR TABLE dropped_cols;

I noticed that we can add 'publish_generated_columns = true' for the
case of generated column. So we won't need to remove the test. I have
made the changes in v9 patch [1]/messages/by-id/CANhcyEVCqrSYxAg_s99VYevUc4F-Lb9XowWUC2E5RG0i8RtZwA@mail.gmail.com.

[1]: /messages/by-id/CANhcyEVCqrSYxAg_s99VYevUc4F-Lb9XowWUC2E5RG0i8RtZwA@mail.gmail.com

Thanks and Regards,
Shlok Kyal

#36Zhijie Hou (Fujitsu)
houzj.fnst@fujitsu.com
In reply to: Shlok Kyal (#35)
RE: Disallow UPDATE/DELETE on table with unpublished generated column as REPLICA IDENTITY

On Tuesday, November 19, 2024 3:15 PM Shlok Kyal <shlok.kyal.oss@gmail.com> wrote:

I noticed that we can add 'publish_generated_columns = true' for the case of
generated column. So we won't need to remove the test. I have made the
changes in v9 patch [1].

I think this would unexpectedly change the original purpose of that testcase,
which is to test the bug mentioned in commit b797def.

Basically, I expected the new testcase to fail if we remove the codes fix added in
b797def, but the new testcase can pass even after that.

If we confirmed that that bug will never be triggered after applying the fix in
the thread, it would be better Tt remove that testcase and mention it in the
commit message.

[1]:
/messages/by-id/CANhcyEVCqrSYxAg_s99VYevUc4F
-Lb9XowWUC2E5RG0i8RtZwA%40mail.gmail.com

Best Regards,
Hou zj

#37Zhijie Hou (Fujitsu)
houzj.fnst@fujitsu.com
In reply to: Zhijie Hou (Fujitsu) (#36)
RE: Disallow UPDATE/DELETE on table with unpublished generated column as REPLICA IDENTITY

On Tuesday, November 19, 2024 5:10 PM Zhijie Hou (Fujitsu) <houzj.fnst@fujitsu.com> wrote:

On Tuesday, November 19, 2024 3:15 PM Shlok Kyal
<shlok.kyal.oss@gmail.com> wrote:

I noticed that we can add 'publish_generated_columns = true' for the case of
generated column. So we won't need to remove the test. I have made the
changes in v9 patch [1].

I think this would unexpectedly change the original purpose of that testcase,
which is to test the bug mentioned in commit b797def.

Basically, I expected the new testcase to fail if we remove the codes fix added in
b797def, but the new testcase can pass even after that.

Sorry, a typo here. I meant the commit adedf54 instead of b797def.

If we confirmed that that bug will never be triggered after applying the fix in
the thread, it would be better Tt remove that testcase and mention it in the
commit message.

Best Regards,
Hou zj

#38Shlok Kyal
shlok.kyal.oss@gmail.com
In reply to: Zhijie Hou (Fujitsu) (#36)
1 attachment(s)
Re: Disallow UPDATE/DELETE on table with unpublished generated column as REPLICA IDENTITY

On Tue, 19 Nov 2024 at 14:39, Zhijie Hou (Fujitsu)
<houzj.fnst@fujitsu.com> wrote:

On Tuesday, November 19, 2024 3:15 PM Shlok Kyal <shlok.kyal.oss@gmail.com> wrote:

I noticed that we can add 'publish_generated_columns = true' for the case of
generated column. So we won't need to remove the test. I have made the
changes in v9 patch [1].

I think this would unexpectedly change the original purpose of that testcase,
which is to test the bug mentioned in commit b797def.

Basically, I expected the new testcase to fail if we remove the codes fix added in
b797def, but the new testcase can pass even after that.

If we confirmed that that bug will never be triggered after applying the fix in
the thread, it would be better Tt remove that testcase and mention it in the
commit message.

I agree that we can remove the test. I debugged and found the test
modified in above patch does not hit the condition added in commit
adedf54.
Also, according to me we cannot trigger the bug after the fix in this
thread. So, I think we can remove the testcase.

I have attached the latest patch with an updated commit message and
also removed the testcase.

Thanks and Regards,
Shlok Kyal

Attachments:

v10-0001-Disallow-UPDATE-DELETE-on-table-with-generated-c.patchapplication/octet-stream; name=v10-0001-Disallow-UPDATE-DELETE-on-table-with-generated-c.patchDownload
From 0d50d01253feeade771f4bc591ac6e61162d08a4 Mon Sep 17 00:00:00 2001
From: Shlok Kyal <shlok.kyal.oss@gmail.com>
Date: Tue, 5 Nov 2024 11:11:25 +0530
Subject: [PATCH v10] Disallow UPDATE/DELETE on table with generated column as
 REPLICA IDENTITY

UPDATE or DELETE operations on tables with unpublished generated columns
set as REPLICA IDENTITY are not permitted. This patch ensures that if an
UPDATE or DELETE command is executed on such tables, an error will be
thrown.
With this patch, the behavior has changed for the test added in commit
adedf54. Additionally, there is no other way to trigger the bug that was
fixed by commit adedf54, so the test has been removed.
---
 src/backend/commands/publicationcmds.c    | 78 +++++++++++++++++++++++
 src/backend/executor/execReplication.c    | 12 ++++
 src/backend/utils/cache/relcache.c        | 30 +++++++++
 src/include/catalog/pg_publication.h      |  7 ++
 src/include/commands/publicationcmds.h    |  2 +
 src/test/regress/expected/publication.out | 25 ++++++++
 src/test/regress/sql/publication.sql      | 26 ++++++++
 src/test/subscription/t/100_bugs.pl       | 16 +----
 8 files changed, 183 insertions(+), 13 deletions(-)

diff --git a/src/backend/commands/publicationcmds.c b/src/backend/commands/publicationcmds.c
index 0129db18c6..053877c524 100644
--- a/src/backend/commands/publicationcmds.c
+++ b/src/backend/commands/publicationcmds.c
@@ -444,6 +444,84 @@ pub_collist_contains_invalid_column(Oid pubid, Relation relation, List *ancestor
 	return result;
 }
 
+/*
+ * Check if REPLICA IDENTITY consists of any unpublished generated column.
+ *
+ * Returns true if any replica identity column is an unpublished generated
+ * column.
+ */
+bool
+replident_has_unpublished_gen_col(Oid pubid, Relation relation, List *ancestors,
+								  bool pubviaroot)
+{
+	Oid			relid = RelationGetRelid(relation);
+	Oid			publish_as_relid = RelationGetRelid(relation);
+	bool		result = false;
+	bool		found;
+	Publication *pub;
+
+	/* Return if the table does not contain any generated columns */
+	if (!relation->rd_att->constr ||
+		!relation->rd_att->constr->has_generated_stored)
+		return false;
+
+	/*
+	 * For a partition, if pubviaroot is true, find the topmost ancestor that
+	 * is published via this publication as we need to use its column list for
+	 * the changes.
+	 *
+	 * Note that even though the column list used is for an ancestor, the
+	 * REPLICA IDENTITY used will be for the actual child table.
+	 */
+	if (pubviaroot && relation->rd_rel->relispartition)
+	{
+		publish_as_relid = GetTopMostAncestorInPublication(pubid, ancestors, NULL);
+
+		if (!OidIsValid(publish_as_relid))
+			publish_as_relid = relid;
+	}
+
+	pub = GetPublication(pubid);
+	found = check_and_fetch_column_list(pub, publish_as_relid, NULL, NULL);
+
+	if (!found)
+	{
+		TupleDesc	desc = RelationGetDescr(relation);
+		Bitmapset  *idattrs;
+		int			x;
+
+		/*
+		 * REPLICA IDENTITY can be FULL only if there is no column list for
+		 * publication. If REPLICA IDENTITY is set as FULL and relation has a
+		 * generated column we should error out.
+		 */
+		if (relation->rd_rel->relreplident == REPLICA_IDENTITY_FULL)
+			return true;
+
+		/* Remember columns that are part of the REPLICA IDENTITY */
+		idattrs = RelationGetIndexAttrBitmap(relation,
+											 INDEX_ATTR_BITMAP_IDENTITY_KEY);
+
+		x = -1;
+		while ((x = bms_next_member(idattrs, x)) >= 0)
+		{
+			AttrNumber	attnum = (x + FirstLowInvalidHeapAttributeNumber);
+			Form_pg_attribute att = TupleDescAttr(desc, attnum - 1);
+
+			/* Check if generated column is part of REPLICA IDENTITY */
+			if (!att->attisdropped && att->attgenerated)
+			{
+				result = true;
+				break;
+			}
+		}
+
+		bms_free(idattrs);
+	}
+
+	return result;
+}
+
 /* check_functions_in_node callback */
 static bool
 contain_mutable_or_user_functions_checker(Oid func_id, void *context)
diff --git a/src/backend/executor/execReplication.c b/src/backend/executor/execReplication.c
index 54025c9f15..57599df153 100644
--- a/src/backend/executor/execReplication.c
+++ b/src/backend/executor/execReplication.c
@@ -809,6 +809,12 @@ CheckCmdReplicaIdentity(Relation rel, CmdType cmd)
 				 errmsg("cannot update table \"%s\"",
 						RelationGetRelationName(rel)),
 				 errdetail("Column list used by the publication does not cover the replica identity.")));
+	else if (cmd == CMD_UPDATE && !pubdesc.replident_valid_for_update)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
+				 errmsg("cannot update table \"%s\"",
+						RelationGetRelationName(rel)),
+				 errdetail("Replica identity consists of an unpublished generated column.")));
 	else if (cmd == CMD_DELETE && !pubdesc.rf_valid_for_delete)
 		ereport(ERROR,
 				(errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
@@ -821,6 +827,12 @@ CheckCmdReplicaIdentity(Relation rel, CmdType cmd)
 				 errmsg("cannot delete from table \"%s\"",
 						RelationGetRelationName(rel)),
 				 errdetail("Column list used by the publication does not cover the replica identity.")));
+	else if (cmd == CMD_DELETE && !pubdesc.replident_valid_for_delete)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
+				 errmsg("cannot delete from table \"%s\"",
+						RelationGetRelationName(rel)),
+				 errdetail("Replica identity consists of an unpublished generated column.")));
 
 	/* If relation has replica identity we are always good. */
 	if (OidIsValid(RelationGetReplicaIndex(rel)))
diff --git a/src/backend/utils/cache/relcache.c b/src/backend/utils/cache/relcache.c
index 342467fd18..be8f8eea8f 100644
--- a/src/backend/utils/cache/relcache.c
+++ b/src/backend/utils/cache/relcache.c
@@ -5736,6 +5736,8 @@ RelationBuildPublicationDesc(Relation relation, PublicationDesc *pubdesc)
 		pubdesc->rf_valid_for_delete = true;
 		pubdesc->cols_valid_for_update = true;
 		pubdesc->cols_valid_for_delete = true;
+		pubdesc->replident_valid_for_update = true;
+		pubdesc->replident_valid_for_delete = true;
 		return;
 	}
 
@@ -5750,6 +5752,8 @@ RelationBuildPublicationDesc(Relation relation, PublicationDesc *pubdesc)
 	pubdesc->rf_valid_for_delete = true;
 	pubdesc->cols_valid_for_update = true;
 	pubdesc->cols_valid_for_delete = true;
+	pubdesc->replident_valid_for_update = true;
+	pubdesc->replident_valid_for_delete = true;
 
 	/* Fetch the publication membership info. */
 	puboids = GetRelationPublications(relid);
@@ -5827,6 +5831,21 @@ RelationBuildPublicationDesc(Relation relation, PublicationDesc *pubdesc)
 				pubdesc->cols_valid_for_delete = false;
 		}
 
+		/*
+		 * Check if all generated columns included in the REPLICA IDENTITY are
+		 * published.
+		 */
+		if (!pubform->pubgencols &&
+			(pubform->pubupdate || pubform->pubdelete) &&
+			replident_has_unpublished_gen_col(pubid, relation, ancestors,
+											  pubform->pubviaroot))
+		{
+			if (pubform->pubupdate)
+				pubdesc->replident_valid_for_update = false;
+			if (pubform->pubdelete)
+				pubdesc->replident_valid_for_delete = false;
+		}
+
 		ReleaseSysCache(tup);
 
 		/*
@@ -5848,6 +5867,17 @@ RelationBuildPublicationDesc(Relation relation, PublicationDesc *pubdesc)
 			pubdesc->pubactions.pubdelete && pubdesc->pubactions.pubtruncate &&
 			!pubdesc->cols_valid_for_update && !pubdesc->cols_valid_for_delete)
 			break;
+
+		/*
+		 * If we know everything is replicated and replica identity has an
+		 * unpublished generated column, there is no point to check for other
+		 * publications.
+		 */
+		if (pubdesc->pubactions.pubinsert && pubdesc->pubactions.pubupdate &&
+			pubdesc->pubactions.pubdelete && pubdesc->pubactions.pubtruncate &&
+			!pubdesc->replident_valid_for_update &&
+			!pubdesc->replident_valid_for_delete)
+			break;
 	}
 
 	if (relation->rd_pubdesc)
diff --git a/src/include/catalog/pg_publication.h b/src/include/catalog/pg_publication.h
index 9a83a72d6b..486f609a9a 100644
--- a/src/include/catalog/pg_publication.h
+++ b/src/include/catalog/pg_publication.h
@@ -98,6 +98,13 @@ typedef struct PublicationDesc
 	 */
 	bool		cols_valid_for_update;
 	bool		cols_valid_for_delete;
+
+	/*
+	 * true if all generated columns which are part of replica identity are
+	 * published or the publication actions do not include UPDATE or DELETE.
+	 */
+	bool		replident_valid_for_update;
+	bool		replident_valid_for_delete;
 } PublicationDesc;
 
 typedef struct Publication
diff --git a/src/include/commands/publicationcmds.h b/src/include/commands/publicationcmds.h
index 5487c571f6..b18e576b77 100644
--- a/src/include/commands/publicationcmds.h
+++ b/src/include/commands/publicationcmds.h
@@ -35,5 +35,7 @@ extern bool pub_rf_contains_invalid_column(Oid pubid, Relation relation,
 										   List *ancestors, bool pubviaroot);
 extern bool pub_collist_contains_invalid_column(Oid pubid, Relation relation,
 												List *ancestors, bool pubviaroot);
+extern bool replident_has_unpublished_gen_col(Oid pubid, Relation relation,
+											  List *ancestors, bool pubviaroot);
 
 #endif							/* PUBLICATIONCMDS_H */
diff --git a/src/test/regress/expected/publication.out b/src/test/regress/expected/publication.out
index 5de2d64d01..caf6e31554 100644
--- a/src/test/regress/expected/publication.out
+++ b/src/test/regress/expected/publication.out
@@ -672,6 +672,31 @@ DROP TABLE rf_tbl_abcd_pk;
 DROP TABLE rf_tbl_abcd_nopk;
 DROP TABLE rf_tbl_abcd_part_pk;
 -- ======================================================
+-- ======================================================
+-- test with generated column
+SET client_min_messages = 'ERROR';
+CREATE TABLE testpub_gencol (a INT, b INT GENERATED ALWAYS AS (a + 1) STORED NOT NULL);
+CREATE UNIQUE INDEX testpub_gencol_idx ON testpub_gencol (b);
+ALTER TABLE testpub_gencol REPLICA IDENTITY USING index testpub_gencol_idx;
+-- error - generated column "b" is not published but part of index set as REPLICA IDENTITY
+CREATE PUBLICATION pub_gencol FOR TABLE testpub_gencol;
+UPDATE testpub_gencol SET a = 100 WHERE a = 1;
+ERROR:  cannot update table "testpub_gencol"
+DETAIL:  Replica identity consists of an unpublished generated column.
+-- error - generated column "b" is not published and REPLICA IDENTITY is set FULL
+ALTER TABLE testpub_gencol REPLICA IDENTITY FULL;
+UPDATE testpub_gencol SET a = 100 WHERE a = 1;
+ERROR:  cannot update table "testpub_gencol"
+DETAIL:  Replica identity consists of an unpublished generated column.
+DROP PUBLICATION pub_gencol;
+-- ok - generated column "b" is published and is part of REPLICA IDENTITY
+CREATE PUBLICATION pub_gencol FOR TABLE testpub_gencol with (publish_generated_columns = true);
+UPDATE testpub_gencol SET a = 100 WHERE a = 1;
+DROP PUBLICATION pub_gencol;
+DROP INDEX testpub_gencol_idx;
+DROP TABLE testpub_gencol;
+RESET client_min_messages;
+-- ======================================================
 -- fail - duplicate tables are not allowed if that table has any column lists
 SET client_min_messages = 'ERROR';
 CREATE PUBLICATION testpub_dups FOR TABLE testpub_tbl1 (a), testpub_tbl1 WITH (publish = 'insert');
diff --git a/src/test/regress/sql/publication.sql b/src/test/regress/sql/publication.sql
index 48e68bcca2..3b1c661440 100644
--- a/src/test/regress/sql/publication.sql
+++ b/src/test/regress/sql/publication.sql
@@ -396,6 +396,32 @@ DROP TABLE rf_tbl_abcd_nopk;
 DROP TABLE rf_tbl_abcd_part_pk;
 -- ======================================================
 
+-- ======================================================
+-- test with generated column
+SET client_min_messages = 'ERROR';
+CREATE TABLE testpub_gencol (a INT, b INT GENERATED ALWAYS AS (a + 1) STORED NOT NULL);
+CREATE UNIQUE INDEX testpub_gencol_idx ON testpub_gencol (b);
+ALTER TABLE testpub_gencol REPLICA IDENTITY USING index testpub_gencol_idx;
+
+-- error - generated column "b" is not published but part of index set as REPLICA IDENTITY
+CREATE PUBLICATION pub_gencol FOR TABLE testpub_gencol;
+UPDATE testpub_gencol SET a = 100 WHERE a = 1;
+
+-- error - generated column "b" is not published and REPLICA IDENTITY is set FULL
+ALTER TABLE testpub_gencol REPLICA IDENTITY FULL;
+UPDATE testpub_gencol SET a = 100 WHERE a = 1;
+DROP PUBLICATION pub_gencol;
+
+-- ok - generated column "b" is published and is part of REPLICA IDENTITY
+CREATE PUBLICATION pub_gencol FOR TABLE testpub_gencol with (publish_generated_columns = true);
+UPDATE testpub_gencol SET a = 100 WHERE a = 1;
+DROP PUBLICATION pub_gencol;
+
+DROP INDEX testpub_gencol_idx;
+DROP TABLE testpub_gencol;
+RESET client_min_messages;
+-- ======================================================
+
 -- fail - duplicate tables are not allowed if that table has any column lists
 SET client_min_messages = 'ERROR';
 CREATE PUBLICATION testpub_dups FOR TABLE testpub_tbl1 (a), testpub_tbl1 WITH (publish = 'insert');
diff --git a/src/test/subscription/t/100_bugs.pl b/src/test/subscription/t/100_bugs.pl
index cb36ca7b16..794b928f50 100644
--- a/src/test/subscription/t/100_bugs.pl
+++ b/src/test/subscription/t/100_bugs.pl
@@ -377,8 +377,8 @@ $node_publisher->safe_psql('postgres', "DROP PUBLICATION tap_pub_sch");
 $node_publisher->stop('fast');
 $node_subscriber->stop('fast');
 
-# The bug was that when the REPLICA IDENTITY FULL is used with dropped or
-# generated columns, we fail to apply updates and deletes
+# The bug was that when the REPLICA IDENTITY FULL is used with dropped
+# we fail to apply updates and deletes
 $node_publisher->rotate_logfile();
 $node_publisher->start();
 
@@ -389,18 +389,14 @@ $node_publisher->safe_psql(
 	'postgres', qq(
 	CREATE TABLE dropped_cols (a int, b_drop int, c int);
 	ALTER TABLE dropped_cols REPLICA IDENTITY FULL;
-	CREATE TABLE generated_cols (a int, b_gen int GENERATED ALWAYS AS (5 * a) STORED, c int);
-	ALTER TABLE generated_cols REPLICA IDENTITY FULL;
-	CREATE PUBLICATION pub_dropped_cols FOR TABLE dropped_cols, generated_cols;
+	CREATE PUBLICATION pub_dropped_cols FOR TABLE dropped_cols;
 	-- some initial data
 	INSERT INTO dropped_cols VALUES (1, 1, 1);
-	INSERT INTO generated_cols (a, c) VALUES (1, 1);
 ));
 
 $node_subscriber->safe_psql(
 	'postgres', qq(
 	 CREATE TABLE dropped_cols (a int, b_drop int, c int);
-	 CREATE TABLE generated_cols (a int, b_gen int GENERATED ALWAYS AS (5 * a) STORED, c int);
 ));
 
 $publisher_connstr = $node_publisher->connstr . ' dbname=postgres';
@@ -421,7 +417,6 @@ $node_subscriber->safe_psql(
 $node_publisher->safe_psql(
 	'postgres', qq(
 		UPDATE dropped_cols SET a = 100;
-		UPDATE generated_cols SET a = 100;
 ));
 $node_publisher->wait_for_catchup('sub_dropped_cols');
 
@@ -430,11 +425,6 @@ is( $node_subscriber->safe_psql(
 	qq(1),
 	'replication with RI FULL and dropped columns');
 
-is( $node_subscriber->safe_psql(
-		'postgres', "SELECT count(*) FROM generated_cols WHERE a = 100"),
-	qq(1),
-	'replication with RI FULL and generated columns');
-
 $node_publisher->stop('fast');
 $node_subscriber->stop('fast');
 
-- 
2.34.1

#39Zhijie Hou (Fujitsu)
houzj.fnst@fujitsu.com
In reply to: Shlok Kyal (#38)
1 attachment(s)
RE: Disallow UPDATE/DELETE on table with unpublished generated column as REPLICA IDENTITY

On Tuesday, November 19, 2024 9:42 PM Shlok Kyal <shlok.kyal.oss@gmail.com> wrote:

I agree that we can remove the test. I debugged and found the test modified in
above patch does not hit the condition added in commit adedf54.
Also, according to me we cannot trigger the bug after the fix in this thread. So, I
think we can remove the testcase.

I have attached the latest patch with an updated commit message and also
removed the testcase.

Thanks for updating the patch.

Out of curiosity, I tried to merge the pub_collist_contains_invalid_column()
and replident_has_unpublished_gen_col() functions into a single function to
evaluate if this approach might be better.

As shown in the attached diff, it reduces some code redundancy *but* also
introduces additional IF conditions and parameters in the new combined
function, which might complicate the logic a bit.

Personally, I think the existing V10 patch looks good and clear. I am just
sharing the diff for feedback in case others want to have a look.

Best Regards,
Hou zj

Attachments:

v10-0001-combine-functions.patch.txttext/plain; name=v10-0001-combine-functions.patch.txtDownload
From 8640a4605f32f8072d093902da0af23dfa6d119b Mon Sep 17 00:00:00 2001
From: Hou Zhijie <houzj.fnst@cn.fujitsu.com>
Date: Wed, 20 Nov 2024 16:34:04 +0800
Subject: [PATCH v10] combine functions

---
 src/backend/commands/publicationcmds.c | 219 ++++++++++---------------
 src/backend/utils/cache/relcache.c     |  38 ++---
 src/include/commands/publicationcmds.h |   7 +-
 3 files changed, 105 insertions(+), 159 deletions(-)

diff --git a/src/backend/commands/publicationcmds.c b/src/backend/commands/publicationcmds.c
index 053877c524..0d5daf7626 100644
--- a/src/backend/commands/publicationcmds.c
+++ b/src/backend/commands/publicationcmds.c
@@ -336,21 +336,36 @@ pub_rf_contains_invalid_column(Oid pubid, Relation relation, List *ancestors,
 }
 
 /*
- * Check if all columns referenced in the REPLICA IDENTITY are covered by
- * the column list.
+ * Check for invalid columns in the publication table definition.
  *
- * Returns true if any replica identity column is not covered by column list.
+ * This function evaluates two conditions:
+ *
+ * 1. Ensures that all columns referenced in the REPLICA IDENTITY are covered
+ *    by the column list. If any column is missing, *invalid_column_list is set
+ *    to true.
+ *
+ * 2. Ensures that the REPLICA IDENTITY does not contain unpublished generated
+ *    columns. If an unpublished generated column is found,
+ *    *unpublished_gen_col is set to true.
+ *
+ * Returns true if any of the above conditions are not met.
  */
 bool
-pub_collist_contains_invalid_column(Oid pubid, Relation relation, List *ancestors,
-									bool pubviaroot)
+pub_contains_invalid_column(Oid pubid, Relation relation, List *ancestors,
+							bool pubviaroot, bool pubgencols,
+							bool *invalid_column_list,
+							bool *unpublished_gen_col)
 {
-	HeapTuple	tuple;
 	Oid			relid = RelationGetRelid(relation);
 	Oid			publish_as_relid = RelationGetRelid(relation);
-	bool		result = false;
-	Datum		datum;
-	bool		isnull;
+	Bitmapset  *idattrs;
+	Bitmapset  *columns = NULL;
+	TupleDesc	desc = RelationGetDescr(relation);
+	Publication *pub;
+	int			x;
+
+	*invalid_column_list = false;
+	*unpublished_gen_col = false;
 
 	/*
 	 * For a partition, if pubviaroot is true, find the topmost ancestor that
@@ -368,158 +383,90 @@ pub_collist_contains_invalid_column(Oid pubid, Relation relation, List *ancestor
 			publish_as_relid = relid;
 	}
 
-	tuple = SearchSysCache2(PUBLICATIONRELMAP,
-							ObjectIdGetDatum(publish_as_relid),
-							ObjectIdGetDatum(pubid));
-
-	if (!HeapTupleIsValid(tuple))
-		return false;
-
-	datum = SysCacheGetAttr(PUBLICATIONRELMAP, tuple,
-							Anum_pg_publication_rel_prattrs,
-							&isnull);
+	/* Fetch the column list */
+	pub = GetPublication(pubid);
+	check_and_fetch_column_list(pub, publish_as_relid, NULL, &columns);
 
-	if (!isnull)
+	if (relation->rd_rel->relreplident == REPLICA_IDENTITY_FULL)
 	{
-		int			x;
-		Bitmapset  *idattrs;
-		Bitmapset  *columns = NULL;
-
 		/* With REPLICA IDENTITY FULL, no column list is allowed. */
-		if (relation->rd_rel->relreplident == REPLICA_IDENTITY_FULL)
-			result = true;
-
-		/* Transform the column list datum to a bitmapset. */
-		columns = pub_collist_to_bitmapset(NULL, datum, NULL);
-
-		/* Remember columns that are part of the REPLICA IDENTITY */
-		idattrs = RelationGetIndexAttrBitmap(relation,
-											 INDEX_ATTR_BITMAP_IDENTITY_KEY);
+		*invalid_column_list = (columns != NULL);
 
 		/*
-		 * Attnums in the bitmap returned by RelationGetIndexAttrBitmap are
-		 * offset (to handle system columns the usual way), while column list
-		 * does not use offset, so we can't do bms_is_subset(). Instead, we
-		 * have to loop over the idattrs and check all of them are in the
-		 * list.
+		 * REPLICA IDENTITY can be FULL only if there is no column list for
+		 * publication. When REPLICA IDENTITY is FULL and the relation
+		 * includes a generated column, but the publish_generated_columns
+		 * option is set to false, this scenario is invalid.
 		 */
-		x = -1;
-		while ((x = bms_next_member(idattrs, x)) >= 0)
-		{
-			AttrNumber	attnum = (x + FirstLowInvalidHeapAttributeNumber);
-
-			/*
-			 * If pubviaroot is true, we are validating the column list of the
-			 * parent table, but the bitmap contains the replica identity
-			 * information of the child table. The parent/child attnums may
-			 * not match, so translate them to the parent - get the attname
-			 * from the child, and look it up in the parent.
-			 */
-			if (pubviaroot)
-			{
-				/* attribute name in the child table */
-				char	   *colname = get_attname(relid, attnum, false);
-
-				/*
-				 * Determine the attnum for the attribute name in parent (we
-				 * are using the column list defined on the parent).
-				 */
-				attnum = get_attnum(publish_as_relid, colname);
-			}
-
-			/* replica identity column, not covered by the column list */
-			if (!bms_is_member(attnum, columns))
-			{
-				result = true;
-				break;
-			}
-		}
+		if (!pubgencols && relation->rd_att->constr &&
+			relation->rd_att->constr->has_generated_stored)
+			*unpublished_gen_col = true;
 
-		bms_free(idattrs);
-		bms_free(columns);
+		if (*unpublished_gen_col && *invalid_column_list)
+			return true;
 	}
 
-	ReleaseSysCache(tuple);
-
-	return result;
-}
-
-/*
- * Check if REPLICA IDENTITY consists of any unpublished generated column.
- *
- * Returns true if any replica identity column is an unpublished generated
- * column.
- */
-bool
-replident_has_unpublished_gen_col(Oid pubid, Relation relation, List *ancestors,
-								  bool pubviaroot)
-{
-	Oid			relid = RelationGetRelid(relation);
-	Oid			publish_as_relid = RelationGetRelid(relation);
-	bool		result = false;
-	bool		found;
-	Publication *pub;
-
-	/* Return if the table does not contain any generated columns */
-	if (!relation->rd_att->constr ||
-		!relation->rd_att->constr->has_generated_stored)
-		return false;
+	/* Remember columns that are part of the REPLICA IDENTITY */
+	idattrs = RelationGetIndexAttrBitmap(relation,
+										 INDEX_ATTR_BITMAP_IDENTITY_KEY);
 
 	/*
-	 * For a partition, if pubviaroot is true, find the topmost ancestor that
-	 * is published via this publication as we need to use its column list for
-	 * the changes.
-	 *
-	 * Note that even though the column list used is for an ancestor, the
-	 * REPLICA IDENTITY used will be for the actual child table.
+	 * Attnums in the bitmap returned by RelationGetIndexAttrBitmap are offset
+	 * (to handle system columns the usual way), while column list does not
+	 * use offset, so we can't do bms_is_subset(). Instead, we have to loop
+	 * over the idattrs and check all of them are in the list.
 	 */
-	if (pubviaroot && relation->rd_rel->relispartition)
+	x = -1;
+	while ((x = bms_next_member(idattrs, x)) >= 0)
 	{
-		publish_as_relid = GetTopMostAncestorInPublication(pubid, ancestors, NULL);
+		AttrNumber	attnum = (x + FirstLowInvalidHeapAttributeNumber);
+		Form_pg_attribute att = TupleDescAttr(desc, attnum - 1);
 
-		if (!OidIsValid(publish_as_relid))
-			publish_as_relid = relid;
-	}
+		Assert(!att->attisdropped);
 
-	pub = GetPublication(pubid);
-	found = check_and_fetch_column_list(pub, publish_as_relid, NULL, NULL);
+		/* Check if generated column is part of REPLICA IDENTITY */
+		*unpublished_gen_col |= att->attgenerated;
 
-	if (!found)
-	{
-		TupleDesc	desc = RelationGetDescr(relation);
-		Bitmapset  *idattrs;
-		int			x;
+		if (columns == NULL)
+		{
+			/* Break the loop if unpublished generated columns exist. */
+			if (*unpublished_gen_col)
+				break;
+
+			/* Skip validating the column list since it is not defined */
+			continue;
+		}
 
 		/*
-		 * REPLICA IDENTITY can be FULL only if there is no column list for
-		 * publication. If REPLICA IDENTITY is set as FULL and relation has a
-		 * generated column we should error out.
+		 * If pubviaroot is true, we are validating the column list of the
+		 * parent table, but the bitmap contains the replica identity
+		 * information of the child table. The parent/child attnums may not
+		 * match, so translate them to the parent - get the attname from the
+		 * child, and look it up in the parent.
 		 */
-		if (relation->rd_rel->relreplident == REPLICA_IDENTITY_FULL)
-			return true;
-
-		/* Remember columns that are part of the REPLICA IDENTITY */
-		idattrs = RelationGetIndexAttrBitmap(relation,
-											 INDEX_ATTR_BITMAP_IDENTITY_KEY);
-
-		x = -1;
-		while ((x = bms_next_member(idattrs, x)) >= 0)
+		if (pubviaroot)
 		{
-			AttrNumber	attnum = (x + FirstLowInvalidHeapAttributeNumber);
-			Form_pg_attribute att = TupleDescAttr(desc, attnum - 1);
+			/* attribute name in the child table */
+			char	   *colname = get_attname(relid, attnum, false);
 
-			/* Check if generated column is part of REPLICA IDENTITY */
-			if (!att->attisdropped && att->attgenerated)
-			{
-				result = true;
-				break;
-			}
+			/*
+			 * Determine the attnum for the attribute name in parent (we are
+			 * using the column list defined on the parent).
+			 */
+			attnum = get_attnum(publish_as_relid, colname);
 		}
 
-		bms_free(idattrs);
+		/* replica identity column, not covered by the column list */
+		*invalid_column_list |= !bms_is_member(attnum, columns);
+
+		if (*invalid_column_list && *unpublished_gen_col)
+			break;
 	}
 
-	return result;
+	bms_free(columns);
+	bms_free(idattrs);
+
+	return *invalid_column_list || *unpublished_gen_col;
 }
 
 /* check_functions_in_node callback */
diff --git a/src/backend/utils/cache/relcache.c b/src/backend/utils/cache/relcache.c
index be8f8eea8f..c1199596bf 100644
--- a/src/backend/utils/cache/relcache.c
+++ b/src/backend/utils/cache/relcache.c
@@ -5783,6 +5783,8 @@ RelationBuildPublicationDesc(Relation relation, PublicationDesc *pubdesc)
 		Oid			pubid = lfirst_oid(lc);
 		HeapTuple	tup;
 		Form_pg_publication pubform;
+		bool		invalid_column_list;
+		bool		unpublished_gen_col;
 
 		tup = SearchSysCache1(PUBLICATIONOID, ObjectIdGetDatum(pubid));
 
@@ -5817,33 +5819,27 @@ RelationBuildPublicationDesc(Relation relation, PublicationDesc *pubdesc)
 		/*
 		 * Check if all columns are part of the REPLICA IDENTITY index or not.
 		 *
-		 * If the publication is FOR ALL TABLES then it means the table has no
-		 * column list and we can skip the validation.
-		 */
-		if (!pubform->puballtables &&
-			(pubform->pubupdate || pubform->pubdelete) &&
-			pub_collist_contains_invalid_column(pubid, relation, ancestors,
-												pubform->pubviaroot))
-		{
-			if (pubform->pubupdate)
-				pubdesc->cols_valid_for_update = false;
-			if (pubform->pubdelete)
-				pubdesc->cols_valid_for_delete = false;
-		}
-
-		/*
 		 * Check if all generated columns included in the REPLICA IDENTITY are
 		 * published.
 		 */
-		if (!pubform->pubgencols &&
-			(pubform->pubupdate || pubform->pubdelete) &&
-			replident_has_unpublished_gen_col(pubid, relation, ancestors,
-											  pubform->pubviaroot))
+		if ((pubform->pubupdate || pubform->pubdelete) &&
+			pub_contains_invalid_column(pubid, relation, ancestors,
+										pubform->pubviaroot,
+										pubform->pubgencols,
+										&invalid_column_list,
+										&unpublished_gen_col))
 		{
 			if (pubform->pubupdate)
-				pubdesc->replident_valid_for_update = false;
+			{
+				pubdesc->cols_valid_for_update = !invalid_column_list;
+				pubdesc->replident_valid_for_update = !unpublished_gen_col;
+			}
+
 			if (pubform->pubdelete)
-				pubdesc->replident_valid_for_delete = false;
+			{
+				pubdesc->cols_valid_for_delete = !invalid_column_list;
+				pubdesc->replident_valid_for_delete = !unpublished_gen_col;
+			}
 		}
 
 		ReleaseSysCache(tup);
diff --git a/src/include/commands/publicationcmds.h b/src/include/commands/publicationcmds.h
index b18e576b77..fbf78ea08e 100644
--- a/src/include/commands/publicationcmds.h
+++ b/src/include/commands/publicationcmds.h
@@ -33,8 +33,11 @@ extern void AlterPublicationOwner_oid(Oid subid, Oid newOwnerId);
 extern void InvalidatePublicationRels(List *relids);
 extern bool pub_rf_contains_invalid_column(Oid pubid, Relation relation,
 										   List *ancestors, bool pubviaroot);
-extern bool pub_collist_contains_invalid_column(Oid pubid, Relation relation,
-												List *ancestors, bool pubviaroot);
+extern bool pub_contains_invalid_column(Oid pubid, Relation relation,
+										List *ancestors, bool pubviaroot,
+										bool pubgencols,
+										bool *invalid_column_list,
+										bool *unpublished_gen_col);
 extern bool replident_has_unpublished_gen_col(Oid pubid, Relation relation,
 											  List *ancestors, bool pubviaroot);
 
-- 
2.30.0.windows.2

#40vignesh C
vignesh21@gmail.com
In reply to: Shlok Kyal (#38)
Re: Disallow UPDATE/DELETE on table with unpublished generated column as REPLICA IDENTITY

On Tue, 19 Nov 2024 at 19:12, Shlok Kyal <shlok.kyal.oss@gmail.com> wrote:

On Tue, 19 Nov 2024 at 14:39, Zhijie Hou (Fujitsu)
<houzj.fnst@fujitsu.com> wrote:

On Tuesday, November 19, 2024 3:15 PM Shlok Kyal <shlok.kyal.oss@gmail.com> wrote:

I noticed that we can add 'publish_generated_columns = true' for the case of
generated column. So we won't need to remove the test. I have made the
changes in v9 patch [1].

I think this would unexpectedly change the original purpose of that testcase,
which is to test the bug mentioned in commit b797def.

Basically, I expected the new testcase to fail if we remove the codes fix added in
b797def, but the new testcase can pass even after that.

If we confirmed that that bug will never be triggered after applying the fix in
the thread, it would be better Tt remove that testcase and mention it in the
commit message.

I agree that we can remove the test. I debugged and found the test
modified in above patch does not hit the condition added in commit
adedf54.
Also, according to me we cannot trigger the bug after the fix in this
thread. So, I think we can remove the testcase.

I have attached the latest patch with an updated commit message and
also removed the testcase.

Few comments:
1) This seems like a copy paste from
pub_collist_contains_invalid_column, the comments should be updated
according to this function:
+       /*
+        * For a partition, if pubviaroot is true, find the topmost
ancestor that
+        * is published via this publication as we need to use its
column list for
+        * the changes.
+        *
+        * Note that even though the column list used is for an ancestor, the
+        * REPLICA IDENTITY used will be for the actual child table.
+        */
+       if (pubviaroot && relation->rd_rel->relispartition)
2) Here drop index is not required as the drop table will take care of
dropping the index too:
+UPDATE testpub_gencol SET a = 100 WHERE a = 1;
+DROP PUBLICATION pub_gencol;
+DROP INDEX testpub_gencol_idx;
+DROP TABLE testpub_gencol;
+RESET client_min_messages;

Regards,
Vignesh

#41Shlok Kyal
shlok.kyal.oss@gmail.com
In reply to: vignesh C (#40)
1 attachment(s)
Re: Disallow UPDATE/DELETE on table with unpublished generated column as REPLICA IDENTITY

On Thu, 21 Nov 2024 at 15:26, vignesh C <vignesh21@gmail.com> wrote:

On Tue, 19 Nov 2024 at 19:12, Shlok Kyal <shlok.kyal.oss@gmail.com> wrote:

On Tue, 19 Nov 2024 at 14:39, Zhijie Hou (Fujitsu)
<houzj.fnst@fujitsu.com> wrote:

On Tuesday, November 19, 2024 3:15 PM Shlok Kyal <shlok.kyal.oss@gmail.com> wrote:

I noticed that we can add 'publish_generated_columns = true' for the case of
generated column. So we won't need to remove the test. I have made the
changes in v9 patch [1].

I think this would unexpectedly change the original purpose of that testcase,
which is to test the bug mentioned in commit b797def.

Basically, I expected the new testcase to fail if we remove the codes fix added in
b797def, but the new testcase can pass even after that.

If we confirmed that that bug will never be triggered after applying the fix in
the thread, it would be better Tt remove that testcase and mention it in the
commit message.

I agree that we can remove the test. I debugged and found the test
modified in above patch does not hit the condition added in commit
adedf54.
Also, according to me we cannot trigger the bug after the fix in this
thread. So, I think we can remove the testcase.

I have attached the latest patch with an updated commit message and
also removed the testcase.

Few comments:
1) This seems like a copy paste from
pub_collist_contains_invalid_column, the comments should be updated
according to this function:
+       /*
+        * For a partition, if pubviaroot is true, find the topmost
ancestor that
+        * is published via this publication as we need to use its
column list for
+        * the changes.
+        *
+        * Note that even though the column list used is for an ancestor, the
+        * REPLICA IDENTITY used will be for the actual child table.
+        */
+       if (pubviaroot && relation->rd_rel->relispartition)
2) Here drop index is not required as the drop table will take care of
dropping the index too:
+UPDATE testpub_gencol SET a = 100 WHERE a = 1;
+DROP PUBLICATION pub_gencol;
+DROP INDEX testpub_gencol_idx;
+DROP TABLE testpub_gencol;
+RESET client_min_messages;

Thanks for the comments. I have fixed the comments and attached the
updated patch.

Thanks and Regards,
Shlok Kyal

Attachments:

v11-0001-Disallow-UPDATE-DELETE-on-table-with-generated-c.patchapplication/octet-stream; name=v11-0001-Disallow-UPDATE-DELETE-on-table-with-generated-c.patchDownload
From 0e9b329ddb84ebe3f622b3cfdca7800516e68601 Mon Sep 17 00:00:00 2001
From: Shlok Kyal <shlok.kyal.oss@gmail.com>
Date: Tue, 5 Nov 2024 11:11:25 +0530
Subject: [PATCH v11] Disallow UPDATE/DELETE on table with generated column as
 REPLICA IDENTITY

UPDATE or DELETE operations on tables with unpublished generated columns
set as REPLICA IDENTITY are not permitted. This patch ensures that if an
UPDATE or DELETE command is executed on such tables, an error will be
thrown.
With this patch, the behavior has changed for the test added in commit
adedf54. Additionally, there is no other way to trigger the bug that was
fixed by commit adedf54, so the test has been removed.
---
 src/backend/commands/publicationcmds.c    | 78 +++++++++++++++++++++++
 src/backend/executor/execReplication.c    | 12 ++++
 src/backend/utils/cache/relcache.c        | 30 +++++++++
 src/include/catalog/pg_publication.h      |  7 ++
 src/include/commands/publicationcmds.h    |  2 +
 src/test/regress/expected/publication.out | 24 +++++++
 src/test/regress/sql/publication.sql      | 25 ++++++++
 src/test/subscription/t/100_bugs.pl       | 16 +----
 8 files changed, 181 insertions(+), 13 deletions(-)

diff --git a/src/backend/commands/publicationcmds.c b/src/backend/commands/publicationcmds.c
index 0129db18c6..bad5fbba5c 100644
--- a/src/backend/commands/publicationcmds.c
+++ b/src/backend/commands/publicationcmds.c
@@ -444,6 +444,84 @@ pub_collist_contains_invalid_column(Oid pubid, Relation relation, List *ancestor
 	return result;
 }
 
+/*
+ * Check if REPLICA IDENTITY consists of any unpublished generated column.
+ *
+ * Returns true if any replica identity column is an unpublished generated
+ * column.
+ */
+bool
+replident_has_unpublished_gen_col(Oid pubid, Relation relation, List *ancestors,
+								  bool pubviaroot)
+{
+	Oid			relid = RelationGetRelid(relation);
+	Oid			publish_as_relid = RelationGetRelid(relation);
+	bool		result = false;
+	bool		found;
+	Publication *pub;
+
+	/* Return if the table does not contain any generated columns */
+	if (!relation->rd_att->constr ||
+		!relation->rd_att->constr->has_generated_stored)
+		return false;
+
+	/*
+	 * For a partition, if pubviaroot is true, find the topmost ancestor that
+	 * is published via this publication as we need to check if its column list
+	 * is specified.
+	 *
+	 * Note that even though the column list of an ancestor is checked, the
+	 * REPLICA IDENTITY used will be for the actual child table.
+	 */
+	if (pubviaroot && relation->rd_rel->relispartition)
+	{
+		publish_as_relid = GetTopMostAncestorInPublication(pubid, ancestors, NULL);
+
+		if (!OidIsValid(publish_as_relid))
+			publish_as_relid = relid;
+	}
+
+	pub = GetPublication(pubid);
+	found = check_and_fetch_column_list(pub, publish_as_relid, NULL, NULL);
+
+	if (!found)
+	{
+		TupleDesc	desc = RelationGetDescr(relation);
+		Bitmapset  *idattrs;
+		int			x;
+
+		/*
+		 * REPLICA IDENTITY can be FULL only if there is no column list for
+		 * publication. If REPLICA IDENTITY is set as FULL and relation has a
+		 * generated column we should error out.
+		 */
+		if (relation->rd_rel->relreplident == REPLICA_IDENTITY_FULL)
+			return true;
+
+		/* Remember columns that are part of the REPLICA IDENTITY */
+		idattrs = RelationGetIndexAttrBitmap(relation,
+											 INDEX_ATTR_BITMAP_IDENTITY_KEY);
+
+		x = -1;
+		while ((x = bms_next_member(idattrs, x)) >= 0)
+		{
+			AttrNumber	attnum = (x + FirstLowInvalidHeapAttributeNumber);
+			Form_pg_attribute att = TupleDescAttr(desc, attnum - 1);
+
+			/* Check if generated column is part of REPLICA IDENTITY */
+			if (!att->attisdropped && att->attgenerated)
+			{
+				result = true;
+				break;
+			}
+		}
+
+		bms_free(idattrs);
+	}
+
+	return result;
+}
+
 /* check_functions_in_node callback */
 static bool
 contain_mutable_or_user_functions_checker(Oid func_id, void *context)
diff --git a/src/backend/executor/execReplication.c b/src/backend/executor/execReplication.c
index 54025c9f15..57599df153 100644
--- a/src/backend/executor/execReplication.c
+++ b/src/backend/executor/execReplication.c
@@ -809,6 +809,12 @@ CheckCmdReplicaIdentity(Relation rel, CmdType cmd)
 				 errmsg("cannot update table \"%s\"",
 						RelationGetRelationName(rel)),
 				 errdetail("Column list used by the publication does not cover the replica identity.")));
+	else if (cmd == CMD_UPDATE && !pubdesc.replident_valid_for_update)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
+				 errmsg("cannot update table \"%s\"",
+						RelationGetRelationName(rel)),
+				 errdetail("Replica identity consists of an unpublished generated column.")));
 	else if (cmd == CMD_DELETE && !pubdesc.rf_valid_for_delete)
 		ereport(ERROR,
 				(errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
@@ -821,6 +827,12 @@ CheckCmdReplicaIdentity(Relation rel, CmdType cmd)
 				 errmsg("cannot delete from table \"%s\"",
 						RelationGetRelationName(rel)),
 				 errdetail("Column list used by the publication does not cover the replica identity.")));
+	else if (cmd == CMD_DELETE && !pubdesc.replident_valid_for_delete)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
+				 errmsg("cannot delete from table \"%s\"",
+						RelationGetRelationName(rel)),
+				 errdetail("Replica identity consists of an unpublished generated column.")));
 
 	/* If relation has replica identity we are always good. */
 	if (OidIsValid(RelationGetReplicaIndex(rel)))
diff --git a/src/backend/utils/cache/relcache.c b/src/backend/utils/cache/relcache.c
index 342467fd18..be8f8eea8f 100644
--- a/src/backend/utils/cache/relcache.c
+++ b/src/backend/utils/cache/relcache.c
@@ -5736,6 +5736,8 @@ RelationBuildPublicationDesc(Relation relation, PublicationDesc *pubdesc)
 		pubdesc->rf_valid_for_delete = true;
 		pubdesc->cols_valid_for_update = true;
 		pubdesc->cols_valid_for_delete = true;
+		pubdesc->replident_valid_for_update = true;
+		pubdesc->replident_valid_for_delete = true;
 		return;
 	}
 
@@ -5750,6 +5752,8 @@ RelationBuildPublicationDesc(Relation relation, PublicationDesc *pubdesc)
 	pubdesc->rf_valid_for_delete = true;
 	pubdesc->cols_valid_for_update = true;
 	pubdesc->cols_valid_for_delete = true;
+	pubdesc->replident_valid_for_update = true;
+	pubdesc->replident_valid_for_delete = true;
 
 	/* Fetch the publication membership info. */
 	puboids = GetRelationPublications(relid);
@@ -5827,6 +5831,21 @@ RelationBuildPublicationDesc(Relation relation, PublicationDesc *pubdesc)
 				pubdesc->cols_valid_for_delete = false;
 		}
 
+		/*
+		 * Check if all generated columns included in the REPLICA IDENTITY are
+		 * published.
+		 */
+		if (!pubform->pubgencols &&
+			(pubform->pubupdate || pubform->pubdelete) &&
+			replident_has_unpublished_gen_col(pubid, relation, ancestors,
+											  pubform->pubviaroot))
+		{
+			if (pubform->pubupdate)
+				pubdesc->replident_valid_for_update = false;
+			if (pubform->pubdelete)
+				pubdesc->replident_valid_for_delete = false;
+		}
+
 		ReleaseSysCache(tup);
 
 		/*
@@ -5848,6 +5867,17 @@ RelationBuildPublicationDesc(Relation relation, PublicationDesc *pubdesc)
 			pubdesc->pubactions.pubdelete && pubdesc->pubactions.pubtruncate &&
 			!pubdesc->cols_valid_for_update && !pubdesc->cols_valid_for_delete)
 			break;
+
+		/*
+		 * If we know everything is replicated and replica identity has an
+		 * unpublished generated column, there is no point to check for other
+		 * publications.
+		 */
+		if (pubdesc->pubactions.pubinsert && pubdesc->pubactions.pubupdate &&
+			pubdesc->pubactions.pubdelete && pubdesc->pubactions.pubtruncate &&
+			!pubdesc->replident_valid_for_update &&
+			!pubdesc->replident_valid_for_delete)
+			break;
 	}
 
 	if (relation->rd_pubdesc)
diff --git a/src/include/catalog/pg_publication.h b/src/include/catalog/pg_publication.h
index 9a83a72d6b..486f609a9a 100644
--- a/src/include/catalog/pg_publication.h
+++ b/src/include/catalog/pg_publication.h
@@ -98,6 +98,13 @@ typedef struct PublicationDesc
 	 */
 	bool		cols_valid_for_update;
 	bool		cols_valid_for_delete;
+
+	/*
+	 * true if all generated columns which are part of replica identity are
+	 * published or the publication actions do not include UPDATE or DELETE.
+	 */
+	bool		replident_valid_for_update;
+	bool		replident_valid_for_delete;
 } PublicationDesc;
 
 typedef struct Publication
diff --git a/src/include/commands/publicationcmds.h b/src/include/commands/publicationcmds.h
index 5487c571f6..b18e576b77 100644
--- a/src/include/commands/publicationcmds.h
+++ b/src/include/commands/publicationcmds.h
@@ -35,5 +35,7 @@ extern bool pub_rf_contains_invalid_column(Oid pubid, Relation relation,
 										   List *ancestors, bool pubviaroot);
 extern bool pub_collist_contains_invalid_column(Oid pubid, Relation relation,
 												List *ancestors, bool pubviaroot);
+extern bool replident_has_unpublished_gen_col(Oid pubid, Relation relation,
+											  List *ancestors, bool pubviaroot);
 
 #endif							/* PUBLICATIONCMDS_H */
diff --git a/src/test/regress/expected/publication.out b/src/test/regress/expected/publication.out
index 5de2d64d01..12d0611a7c 100644
--- a/src/test/regress/expected/publication.out
+++ b/src/test/regress/expected/publication.out
@@ -672,6 +672,30 @@ DROP TABLE rf_tbl_abcd_pk;
 DROP TABLE rf_tbl_abcd_nopk;
 DROP TABLE rf_tbl_abcd_part_pk;
 -- ======================================================
+-- ======================================================
+-- test with generated column
+SET client_min_messages = 'ERROR';
+CREATE TABLE testpub_gencol (a INT, b INT GENERATED ALWAYS AS (a + 1) STORED NOT NULL);
+CREATE UNIQUE INDEX testpub_gencol_idx ON testpub_gencol (b);
+ALTER TABLE testpub_gencol REPLICA IDENTITY USING index testpub_gencol_idx;
+-- error - generated column "b" is not published but part of index set as REPLICA IDENTITY
+CREATE PUBLICATION pub_gencol FOR TABLE testpub_gencol;
+UPDATE testpub_gencol SET a = 100 WHERE a = 1;
+ERROR:  cannot update table "testpub_gencol"
+DETAIL:  Replica identity consists of an unpublished generated column.
+-- error - generated column "b" is not published and REPLICA IDENTITY is set FULL
+ALTER TABLE testpub_gencol REPLICA IDENTITY FULL;
+UPDATE testpub_gencol SET a = 100 WHERE a = 1;
+ERROR:  cannot update table "testpub_gencol"
+DETAIL:  Replica identity consists of an unpublished generated column.
+DROP PUBLICATION pub_gencol;
+-- ok - generated column "b" is published and is part of REPLICA IDENTITY
+CREATE PUBLICATION pub_gencol FOR TABLE testpub_gencol with (publish_generated_columns = true);
+UPDATE testpub_gencol SET a = 100 WHERE a = 1;
+DROP PUBLICATION pub_gencol;
+DROP TABLE testpub_gencol;
+RESET client_min_messages;
+-- ======================================================
 -- fail - duplicate tables are not allowed if that table has any column lists
 SET client_min_messages = 'ERROR';
 CREATE PUBLICATION testpub_dups FOR TABLE testpub_tbl1 (a), testpub_tbl1 WITH (publish = 'insert');
diff --git a/src/test/regress/sql/publication.sql b/src/test/regress/sql/publication.sql
index 48e68bcca2..a29587b45d 100644
--- a/src/test/regress/sql/publication.sql
+++ b/src/test/regress/sql/publication.sql
@@ -396,6 +396,31 @@ DROP TABLE rf_tbl_abcd_nopk;
 DROP TABLE rf_tbl_abcd_part_pk;
 -- ======================================================
 
+-- ======================================================
+-- test with generated column
+SET client_min_messages = 'ERROR';
+CREATE TABLE testpub_gencol (a INT, b INT GENERATED ALWAYS AS (a + 1) STORED NOT NULL);
+CREATE UNIQUE INDEX testpub_gencol_idx ON testpub_gencol (b);
+ALTER TABLE testpub_gencol REPLICA IDENTITY USING index testpub_gencol_idx;
+
+-- error - generated column "b" is not published but part of index set as REPLICA IDENTITY
+CREATE PUBLICATION pub_gencol FOR TABLE testpub_gencol;
+UPDATE testpub_gencol SET a = 100 WHERE a = 1;
+
+-- error - generated column "b" is not published and REPLICA IDENTITY is set FULL
+ALTER TABLE testpub_gencol REPLICA IDENTITY FULL;
+UPDATE testpub_gencol SET a = 100 WHERE a = 1;
+DROP PUBLICATION pub_gencol;
+
+-- ok - generated column "b" is published and is part of REPLICA IDENTITY
+CREATE PUBLICATION pub_gencol FOR TABLE testpub_gencol with (publish_generated_columns = true);
+UPDATE testpub_gencol SET a = 100 WHERE a = 1;
+DROP PUBLICATION pub_gencol;
+
+DROP TABLE testpub_gencol;
+RESET client_min_messages;
+-- ======================================================
+
 -- fail - duplicate tables are not allowed if that table has any column lists
 SET client_min_messages = 'ERROR';
 CREATE PUBLICATION testpub_dups FOR TABLE testpub_tbl1 (a), testpub_tbl1 WITH (publish = 'insert');
diff --git a/src/test/subscription/t/100_bugs.pl b/src/test/subscription/t/100_bugs.pl
index cb36ca7b16..794b928f50 100644
--- a/src/test/subscription/t/100_bugs.pl
+++ b/src/test/subscription/t/100_bugs.pl
@@ -377,8 +377,8 @@ $node_publisher->safe_psql('postgres', "DROP PUBLICATION tap_pub_sch");
 $node_publisher->stop('fast');
 $node_subscriber->stop('fast');
 
-# The bug was that when the REPLICA IDENTITY FULL is used with dropped or
-# generated columns, we fail to apply updates and deletes
+# The bug was that when the REPLICA IDENTITY FULL is used with dropped
+# we fail to apply updates and deletes
 $node_publisher->rotate_logfile();
 $node_publisher->start();
 
@@ -389,18 +389,14 @@ $node_publisher->safe_psql(
 	'postgres', qq(
 	CREATE TABLE dropped_cols (a int, b_drop int, c int);
 	ALTER TABLE dropped_cols REPLICA IDENTITY FULL;
-	CREATE TABLE generated_cols (a int, b_gen int GENERATED ALWAYS AS (5 * a) STORED, c int);
-	ALTER TABLE generated_cols REPLICA IDENTITY FULL;
-	CREATE PUBLICATION pub_dropped_cols FOR TABLE dropped_cols, generated_cols;
+	CREATE PUBLICATION pub_dropped_cols FOR TABLE dropped_cols;
 	-- some initial data
 	INSERT INTO dropped_cols VALUES (1, 1, 1);
-	INSERT INTO generated_cols (a, c) VALUES (1, 1);
 ));
 
 $node_subscriber->safe_psql(
 	'postgres', qq(
 	 CREATE TABLE dropped_cols (a int, b_drop int, c int);
-	 CREATE TABLE generated_cols (a int, b_gen int GENERATED ALWAYS AS (5 * a) STORED, c int);
 ));
 
 $publisher_connstr = $node_publisher->connstr . ' dbname=postgres';
@@ -421,7 +417,6 @@ $node_subscriber->safe_psql(
 $node_publisher->safe_psql(
 	'postgres', qq(
 		UPDATE dropped_cols SET a = 100;
-		UPDATE generated_cols SET a = 100;
 ));
 $node_publisher->wait_for_catchup('sub_dropped_cols');
 
@@ -430,11 +425,6 @@ is( $node_subscriber->safe_psql(
 	qq(1),
 	'replication with RI FULL and dropped columns');
 
-is( $node_subscriber->safe_psql(
-		'postgres', "SELECT count(*) FROM generated_cols WHERE a = 100"),
-	qq(1),
-	'replication with RI FULL and generated columns');
-
 $node_publisher->stop('fast');
 $node_subscriber->stop('fast');
 
-- 
2.34.1

#42Amit Kapila
amit.kapila16@gmail.com
In reply to: Shlok Kyal (#41)
Re: Disallow UPDATE/DELETE on table with unpublished generated column as REPLICA IDENTITY

On Thu, Nov 21, 2024 at 5:30 PM Shlok Kyal <shlok.kyal.oss@gmail.com> wrote:

Review comments:
===============
1.
+
+ /*
+ * true if all generated columns which are part of replica identity are
+ * published or the publication actions do not include UPDATE or DELETE.
+ */
+ bool replident_valid_for_update;
+ bool replident_valid_for_delete;

These are too generic names for the purpose they are used. How about
instead name them as gencols_valid_for_update and
gencols_valid_for_delete?

2. The comments atop RelationBuildPublicationDesc() is only about row
filter. We should update it for column list and generated columns as
well.

3. It is better to merge the functionality of the invalid column list
and unpublished generated columns as proposed by Hou-San above.

--
With Regards,
Amit Kapila.

#43Shlok Kyal
shlok.kyal.oss@gmail.com
In reply to: Amit Kapila (#42)
1 attachment(s)
Re: Disallow UPDATE/DELETE on table with unpublished generated column as REPLICA IDENTITY

On Thu, 28 Nov 2024 at 16:38, Amit Kapila <amit.kapila16@gmail.com> wrote:

On Thu, Nov 21, 2024 at 5:30 PM Shlok Kyal <shlok.kyal.oss@gmail.com> wrote:

Review comments:
===============
1.
+
+ /*
+ * true if all generated columns which are part of replica identity are
+ * published or the publication actions do not include UPDATE or DELETE.
+ */
+ bool replident_valid_for_update;
+ bool replident_valid_for_delete;

These are too generic names for the purpose they are used. How about
instead name them as gencols_valid_for_update and
gencols_valid_for_delete?

2. The comments atop RelationBuildPublicationDesc() is only about row
filter. We should update it for column list and generated columns as
well.

3. It is better to merge the functionality of the invalid column list
and unpublished generated columns as proposed by Hou-San above.

Thanks for reviewing the patch. I have addressed the comments and
updated the patch.

Thanks and Regards,
Shlok Kyal

Attachments:

v12-0001-Disallow-UPDATE-DELETE-on-table-with-generated-c.patchapplication/octet-stream; name=v12-0001-Disallow-UPDATE-DELETE-on-table-with-generated-c.patchDownload
From 4dd53e651323c0574531a9b4d79ed31f6af1b2d0 Mon Sep 17 00:00:00 2001
From: Shlok Kyal <shlok.kyal.oss@gmail.com>
Date: Tue, 5 Nov 2024 11:11:25 +0530
Subject: [PATCH v12] Disallow UPDATE/DELETE on table with generated column as
 REPLICA IDENTITY

UPDATE or DELETE operations on tables with unpublished generated columns
set as REPLICA IDENTITY are not permitted. This patch ensures that if an
UPDATE or DELETE command is executed on such tables, an error will be
thrown.
With this patch, the behavior has changed for the test added in commit
adedf54. Additionally, there is no other way to trigger the bug that was
fixed by commit adedf54, so the test has been removed.
---
 src/backend/commands/publicationcmds.c    | 153 +++++++++++++---------
 src/backend/executor/execReplication.c    |  32 ++++-
 src/backend/utils/cache/relcache.c        |  57 ++++++--
 src/include/catalog/pg_publication.h      |   7 +
 src/include/commands/publicationcmds.h    |   7 +-
 src/test/regress/expected/publication.out |  24 ++++
 src/test/regress/sql/publication.sql      |  25 ++++
 src/test/subscription/t/100_bugs.pl       |  16 +--
 8 files changed, 226 insertions(+), 95 deletions(-)

diff --git a/src/backend/commands/publicationcmds.c b/src/backend/commands/publicationcmds.c
index 031c84ec29..d153ee329a 100644
--- a/src/backend/commands/publicationcmds.c
+++ b/src/backend/commands/publicationcmds.c
@@ -336,21 +336,36 @@ pub_rf_contains_invalid_column(Oid pubid, Relation relation, List *ancestors,
 }
 
 /*
- * Check if all columns referenced in the REPLICA IDENTITY are covered by
- * the column list.
+ * Check for invalid columns in the publication table definition.
  *
- * Returns true if any replica identity column is not covered by column list.
+ * This function evaluates two conditions:
+ *
+ * 1. Ensures that all columns referenced in the REPLICA IDENTITY are covered
+ *    by the column list. If any column is missing, *invalid_column_list is set
+ *    to true.
+ *
+ * 2. Ensures that the REPLICA IDENTITY does not contain unpublished generated
+ *    columns. If an unpublished generated column is found,
+ *    *unpublished_gen_col is set to true.
+ *
+ * Returns true if any of the above conditions are not met.
  */
 bool
-pub_collist_contains_invalid_column(Oid pubid, Relation relation, List *ancestors,
-									bool pubviaroot)
+pub_contains_invalid_column(Oid pubid, Relation relation, List *ancestors,
+							bool pubviaroot, bool pubgencols,
+							bool *invalid_column_list,
+							bool *unpublished_gen_col)
 {
-	HeapTuple	tuple;
 	Oid			relid = RelationGetRelid(relation);
 	Oid			publish_as_relid = RelationGetRelid(relation);
-	bool		result = false;
-	Datum		datum;
-	bool		isnull;
+	Bitmapset  *idattrs;
+	Bitmapset  *columns = NULL;
+	TupleDesc	desc = RelationGetDescr(relation);
+	Publication *pub;
+	int			x;
+
+	*invalid_column_list = false;
+	*unpublished_gen_col = false;
 
 	/*
 	 * For a partition, if pubviaroot is true, find the topmost ancestor that
@@ -368,80 +383,90 @@ pub_collist_contains_invalid_column(Oid pubid, Relation relation, List *ancestor
 			publish_as_relid = relid;
 	}
 
-	tuple = SearchSysCache2(PUBLICATIONRELMAP,
-							ObjectIdGetDatum(publish_as_relid),
-							ObjectIdGetDatum(pubid));
+	/* Fetch the column list */
+	pub = GetPublication(pubid);
+	check_and_fetch_column_list(pub, publish_as_relid, NULL, &columns);
 
-	if (!HeapTupleIsValid(tuple))
-		return false;
+	if (relation->rd_rel->relreplident == REPLICA_IDENTITY_FULL)
+	{
+		/* With REPLICA IDENTITY FULL, no column list is allowed. */
+		*invalid_column_list = (columns != NULL);
 
-	datum = SysCacheGetAttr(PUBLICATIONRELMAP, tuple,
-							Anum_pg_publication_rel_prattrs,
-							&isnull);
+		/*
+		 * REPLICA IDENTITY can be FULL only if there is no column list for
+		 * publication. When REPLICA IDENTITY is FULL and the relation
+		 * includes a generated column, but the publish_generated_columns
+		 * option is set to false, this scenario is invalid.
+		 */
+		if (!pubgencols && relation->rd_att->constr &&
+			relation->rd_att->constr->has_generated_stored)
+			*unpublished_gen_col = true;
 
-	if (!isnull)
+		if (*unpublished_gen_col && *invalid_column_list)
+			return true;
+	}
+
+	/* Remember columns that are part of the REPLICA IDENTITY */
+	idattrs = RelationGetIndexAttrBitmap(relation,
+										 INDEX_ATTR_BITMAP_IDENTITY_KEY);
+
+	/*
+	 * Attnums in the bitmap returned by RelationGetIndexAttrBitmap are offset
+	 * (to handle system columns the usual way), while column list does not
+	 * use offset, so we can't do bms_is_subset(). Instead, we have to loop
+	 * over the idattrs and check all of them are in the list.
+	 */
+	x = -1;
+	while ((x = bms_next_member(idattrs, x)) >= 0)
 	{
-		int			x;
-		Bitmapset  *idattrs;
-		Bitmapset  *columns = NULL;
+		AttrNumber	attnum = (x + FirstLowInvalidHeapAttributeNumber);
+		Form_pg_attribute att = TupleDescAttr(desc, attnum - 1);
 
-		/* With REPLICA IDENTITY FULL, no column list is allowed. */
-		if (relation->rd_rel->relreplident == REPLICA_IDENTITY_FULL)
-			result = true;
+		Assert(!att->attisdropped);
 
-		/* Transform the column list datum to a bitmapset. */
-		columns = pub_collist_to_bitmapset(NULL, datum, NULL);
+		/* Check if generated column is part of REPLICA IDENTITY */
+		*unpublished_gen_col |= att->attgenerated;
 
-		/* Remember columns that are part of the REPLICA IDENTITY */
-		idattrs = RelationGetIndexAttrBitmap(relation,
-											 INDEX_ATTR_BITMAP_IDENTITY_KEY);
+		if (columns == NULL)
+		{
+			/* Break the loop if unpublished generated columns exist. */
+			if (*unpublished_gen_col)
+				break;
+
+			/* Skip validating the column list since it is not defined */
+			continue;
+		}
 
 		/*
-		 * Attnums in the bitmap returned by RelationGetIndexAttrBitmap are
-		 * offset (to handle system columns the usual way), while column list
-		 * does not use offset, so we can't do bms_is_subset(). Instead, we
-		 * have to loop over the idattrs and check all of them are in the
-		 * list.
+		 * If pubviaroot is true, we are validating the column list of the
+		 * parent table, but the bitmap contains the replica identity
+		 * information of the child table. The parent/child attnums may not
+		 * match, so translate them to the parent - get the attname from the
+		 * child, and look it up in the parent.
 		 */
-		x = -1;
-		while ((x = bms_next_member(idattrs, x)) >= 0)
+		if (pubviaroot)
 		{
-			AttrNumber	attnum = (x + FirstLowInvalidHeapAttributeNumber);
+			/* attribute name in the child table */
+			char	   *colname = get_attname(relid, attnum, false);
 
 			/*
-			 * If pubviaroot is true, we are validating the column list of the
-			 * parent table, but the bitmap contains the replica identity
-			 * information of the child table. The parent/child attnums may
-			 * not match, so translate them to the parent - get the attname
-			 * from the child, and look it up in the parent.
+			 * Determine the attnum for the attribute name in parent (we are
+			 * using the column list defined on the parent).
 			 */
-			if (pubviaroot)
-			{
-				/* attribute name in the child table */
-				char	   *colname = get_attname(relid, attnum, false);
-
-				/*
-				 * Determine the attnum for the attribute name in parent (we
-				 * are using the column list defined on the parent).
-				 */
-				attnum = get_attnum(publish_as_relid, colname);
-			}
-
-			/* replica identity column, not covered by the column list */
-			if (!bms_is_member(attnum, columns))
-			{
-				result = true;
-				break;
-			}
+			attnum = get_attnum(publish_as_relid, colname);
 		}
 
-		bms_free(idattrs);
-		bms_free(columns);
+		/* replica identity column, not covered by the column list */
+		*invalid_column_list |= !bms_is_member(attnum, columns);
+
+		if (*invalid_column_list && *unpublished_gen_col)
+			break;
 	}
 
-	ReleaseSysCache(tuple);
+	bms_free(columns);
+	bms_free(idattrs);
 
-	return result;
+	return *invalid_column_list || *unpublished_gen_col;
 }
 
 /* check_functions_in_node callback */
diff --git a/src/backend/executor/execReplication.c b/src/backend/executor/execReplication.c
index 54025c9f15..227a8aeea0 100644
--- a/src/backend/executor/execReplication.c
+++ b/src/backend/executor/execReplication.c
@@ -785,10 +785,22 @@ CheckCmdReplicaIdentity(Relation rel, CmdType cmd)
 		return;
 
 	/*
-	 * It is only safe to execute UPDATE/DELETE when all columns, referenced
-	 * in the row filters from publications which the relation is in, are
-	 * valid - i.e. when all referenced columns are part of REPLICA IDENTITY
-	 * or the table does not publish UPDATEs or DELETEs.
+	 * It is only safe to execute UPDATE/DELETE when:
+	 *
+	 * 1. All columns, referenced in the row filters from publications which
+	 * the relation is in, are valid - i.e. when all referenced columns are
+	 * part of REPLICA IDENTITY or the table does not publish UPDATEs or
+	 * DELETEs.
+	 *
+	 * 2. All columns, referenced in the column lists from publications which
+	 * the relation is in, are valid - i.e. when all referenced columns are
+	 * part of REPLICA IDENTITY or the table does not publish UPDATEs or
+	 * DELETEs.
+	 *
+	 * 3. All generated columns in REPLICA IDENTITY of the relation, for all
+	 * the publications which the relation is in, are valid - i.e. when
+	 * unpublished generated columns are not part of REPLICA IDENTITY or the
+	 * table does not publish UPDATEs or DELETEs.
 	 *
 	 * XXX We could optimize it by first checking whether any of the
 	 * publications have a row filter for this relation. If not and relation
@@ -809,6 +821,12 @@ CheckCmdReplicaIdentity(Relation rel, CmdType cmd)
 				 errmsg("cannot update table \"%s\"",
 						RelationGetRelationName(rel)),
 				 errdetail("Column list used by the publication does not cover the replica identity.")));
+	else if (cmd == CMD_UPDATE && !pubdesc.gencols_valid_for_update)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
+				 errmsg("cannot update table \"%s\"",
+						RelationGetRelationName(rel)),
+				 errdetail("Replica identity consists of an unpublished generated column.")));
 	else if (cmd == CMD_DELETE && !pubdesc.rf_valid_for_delete)
 		ereport(ERROR,
 				(errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
@@ -821,6 +839,12 @@ CheckCmdReplicaIdentity(Relation rel, CmdType cmd)
 				 errmsg("cannot delete from table \"%s\"",
 						RelationGetRelationName(rel)),
 				 errdetail("Column list used by the publication does not cover the replica identity.")));
+	else if (cmd == CMD_DELETE && !pubdesc.gencols_valid_for_delete)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
+				 errmsg("cannot delete from table \"%s\"",
+						RelationGetRelationName(rel)),
+				 errdetail("Replica identity consists of an unpublished generated column.")));
 
 	/* If relation has replica identity we are always good. */
 	if (OidIsValid(RelationGetReplicaIndex(rel)))
diff --git a/src/backend/utils/cache/relcache.c b/src/backend/utils/cache/relcache.c
index d0892cee24..9205ea86c4 100644
--- a/src/backend/utils/cache/relcache.c
+++ b/src/backend/utils/cache/relcache.c
@@ -5706,12 +5706,19 @@ RelationGetExclusionInfo(Relation indexRelation,
  * Get the publication information for the given relation.
  *
  * Traverse all the publications which the relation is in to get the
- * publication actions and validate the row filter expressions for such
- * publications if any. We consider the row filter expression as invalid if it
- * references any column which is not part of REPLICA IDENTITY.
+ * publication actions and validate:
+ * 1. The row filter expressions for such publications if any. We consider the
+ *    row filter expression as invalid if it references any column which is not
+ *    part of REPLICA IDENTITY.
+ * 2. The column list for such publication if any. We consider column list as
+ * 	  invalid if it references any column which is not part of REPLICA IDENTITY.
+ * 3. The generated columns of the relation for such publications. We consider
+ *    any reference of an unpublished generated column in REPLICA IDENTITY as
+ *    invalid.
  *
  * To avoid fetching the publication information repeatedly, we cache the
- * publication actions and row filter validation information.
+ * publication actions, row filter validation information, column list
+ * validation information and generated column validation information.
  */
 void
 RelationBuildPublicationDesc(Relation relation, PublicationDesc *pubdesc)
@@ -5734,6 +5741,8 @@ RelationBuildPublicationDesc(Relation relation, PublicationDesc *pubdesc)
 		pubdesc->rf_valid_for_delete = true;
 		pubdesc->cols_valid_for_update = true;
 		pubdesc->cols_valid_for_delete = true;
+		pubdesc->gencols_valid_for_update = true;
+		pubdesc->gencols_valid_for_delete = true;
 		return;
 	}
 
@@ -5748,6 +5757,8 @@ RelationBuildPublicationDesc(Relation relation, PublicationDesc *pubdesc)
 	pubdesc->rf_valid_for_delete = true;
 	pubdesc->cols_valid_for_update = true;
 	pubdesc->cols_valid_for_delete = true;
+	pubdesc->gencols_valid_for_update = true;
+	pubdesc->gencols_valid_for_delete = true;
 
 	/* Fetch the publication membership info. */
 	puboids = GetRelationPublications(relid);
@@ -5777,6 +5788,8 @@ RelationBuildPublicationDesc(Relation relation, PublicationDesc *pubdesc)
 		Oid			pubid = lfirst_oid(lc);
 		HeapTuple	tup;
 		Form_pg_publication pubform;
+		bool		invalid_column_list;
+		bool		unpublished_gen_col;
 
 		tup = SearchSysCache1(PUBLICATIONOID, ObjectIdGetDatum(pubid));
 
@@ -5811,18 +5824,27 @@ RelationBuildPublicationDesc(Relation relation, PublicationDesc *pubdesc)
 		/*
 		 * Check if all columns are part of the REPLICA IDENTITY index or not.
 		 *
-		 * If the publication is FOR ALL TABLES then it means the table has no
-		 * column list and we can skip the validation.
+		 * Check if all generated columns included in the REPLICA IDENTITY are
+		 * published.
 		 */
-		if (!pubform->puballtables &&
-			(pubform->pubupdate || pubform->pubdelete) &&
-			pub_collist_contains_invalid_column(pubid, relation, ancestors,
-												pubform->pubviaroot))
+		if ((pubform->pubupdate || pubform->pubdelete) &&
+			pub_contains_invalid_column(pubid, relation, ancestors,
+										pubform->pubviaroot,
+										pubform->pubgencols,
+										&invalid_column_list,
+										&unpublished_gen_col))
 		{
 			if (pubform->pubupdate)
-				pubdesc->cols_valid_for_update = false;
+			{
+				pubdesc->cols_valid_for_update = !invalid_column_list;
+				pubdesc->gencols_valid_for_update = !unpublished_gen_col;
+			}
+
 			if (pubform->pubdelete)
-				pubdesc->cols_valid_for_delete = false;
+			{
+				pubdesc->cols_valid_for_delete = !invalid_column_list;
+				pubdesc->gencols_valid_for_delete = !unpublished_gen_col;
+			}
 		}
 
 		ReleaseSysCache(tup);
@@ -5846,6 +5868,17 @@ RelationBuildPublicationDesc(Relation relation, PublicationDesc *pubdesc)
 			pubdesc->pubactions.pubdelete && pubdesc->pubactions.pubtruncate &&
 			!pubdesc->cols_valid_for_update && !pubdesc->cols_valid_for_delete)
 			break;
+
+		/*
+		 * If we know everything is replicated and replica identity has an
+		 * unpublished generated column, there is no point to check for other
+		 * publications.
+		 */
+		if (pubdesc->pubactions.pubinsert && pubdesc->pubactions.pubupdate &&
+			pubdesc->pubactions.pubdelete && pubdesc->pubactions.pubtruncate &&
+			!pubdesc->gencols_valid_for_update &&
+			!pubdesc->gencols_valid_for_delete)
+			break;
 	}
 
 	if (relation->rd_pubdesc)
diff --git a/src/include/catalog/pg_publication.h b/src/include/catalog/pg_publication.h
index 9a83a72d6b..e167b34461 100644
--- a/src/include/catalog/pg_publication.h
+++ b/src/include/catalog/pg_publication.h
@@ -98,6 +98,13 @@ typedef struct PublicationDesc
 	 */
 	bool		cols_valid_for_update;
 	bool		cols_valid_for_delete;
+
+	/*
+	 * true if all generated columns which are part of replica identity are
+	 * published or the publication actions do not include UPDATE or DELETE.
+	 */
+	bool		gencols_valid_for_update;
+	bool		gencols_valid_for_delete;
 } PublicationDesc;
 
 typedef struct Publication
diff --git a/src/include/commands/publicationcmds.h b/src/include/commands/publicationcmds.h
index 5487c571f6..c200c19c6d 100644
--- a/src/include/commands/publicationcmds.h
+++ b/src/include/commands/publicationcmds.h
@@ -33,7 +33,10 @@ extern void AlterPublicationOwner_oid(Oid subid, Oid newOwnerId);
 extern void InvalidatePublicationRels(List *relids);
 extern bool pub_rf_contains_invalid_column(Oid pubid, Relation relation,
 										   List *ancestors, bool pubviaroot);
-extern bool pub_collist_contains_invalid_column(Oid pubid, Relation relation,
-												List *ancestors, bool pubviaroot);
+extern bool pub_contains_invalid_column(Oid pubid, Relation relation,
+										List *ancestors, bool pubviaroot,
+										bool pubgencols,
+										bool *invalid_column_list,
+										bool *unpublished_gen_col);
 
 #endif							/* PUBLICATIONCMDS_H */
diff --git a/src/test/regress/expected/publication.out b/src/test/regress/expected/publication.out
index 5de2d64d01..12d0611a7c 100644
--- a/src/test/regress/expected/publication.out
+++ b/src/test/regress/expected/publication.out
@@ -672,6 +672,30 @@ DROP TABLE rf_tbl_abcd_pk;
 DROP TABLE rf_tbl_abcd_nopk;
 DROP TABLE rf_tbl_abcd_part_pk;
 -- ======================================================
+-- ======================================================
+-- test with generated column
+SET client_min_messages = 'ERROR';
+CREATE TABLE testpub_gencol (a INT, b INT GENERATED ALWAYS AS (a + 1) STORED NOT NULL);
+CREATE UNIQUE INDEX testpub_gencol_idx ON testpub_gencol (b);
+ALTER TABLE testpub_gencol REPLICA IDENTITY USING index testpub_gencol_idx;
+-- error - generated column "b" is not published but part of index set as REPLICA IDENTITY
+CREATE PUBLICATION pub_gencol FOR TABLE testpub_gencol;
+UPDATE testpub_gencol SET a = 100 WHERE a = 1;
+ERROR:  cannot update table "testpub_gencol"
+DETAIL:  Replica identity consists of an unpublished generated column.
+-- error - generated column "b" is not published and REPLICA IDENTITY is set FULL
+ALTER TABLE testpub_gencol REPLICA IDENTITY FULL;
+UPDATE testpub_gencol SET a = 100 WHERE a = 1;
+ERROR:  cannot update table "testpub_gencol"
+DETAIL:  Replica identity consists of an unpublished generated column.
+DROP PUBLICATION pub_gencol;
+-- ok - generated column "b" is published and is part of REPLICA IDENTITY
+CREATE PUBLICATION pub_gencol FOR TABLE testpub_gencol with (publish_generated_columns = true);
+UPDATE testpub_gencol SET a = 100 WHERE a = 1;
+DROP PUBLICATION pub_gencol;
+DROP TABLE testpub_gencol;
+RESET client_min_messages;
+-- ======================================================
 -- fail - duplicate tables are not allowed if that table has any column lists
 SET client_min_messages = 'ERROR';
 CREATE PUBLICATION testpub_dups FOR TABLE testpub_tbl1 (a), testpub_tbl1 WITH (publish = 'insert');
diff --git a/src/test/regress/sql/publication.sql b/src/test/regress/sql/publication.sql
index 48e68bcca2..a29587b45d 100644
--- a/src/test/regress/sql/publication.sql
+++ b/src/test/regress/sql/publication.sql
@@ -396,6 +396,31 @@ DROP TABLE rf_tbl_abcd_nopk;
 DROP TABLE rf_tbl_abcd_part_pk;
 -- ======================================================
 
+-- ======================================================
+-- test with generated column
+SET client_min_messages = 'ERROR';
+CREATE TABLE testpub_gencol (a INT, b INT GENERATED ALWAYS AS (a + 1) STORED NOT NULL);
+CREATE UNIQUE INDEX testpub_gencol_idx ON testpub_gencol (b);
+ALTER TABLE testpub_gencol REPLICA IDENTITY USING index testpub_gencol_idx;
+
+-- error - generated column "b" is not published but part of index set as REPLICA IDENTITY
+CREATE PUBLICATION pub_gencol FOR TABLE testpub_gencol;
+UPDATE testpub_gencol SET a = 100 WHERE a = 1;
+
+-- error - generated column "b" is not published and REPLICA IDENTITY is set FULL
+ALTER TABLE testpub_gencol REPLICA IDENTITY FULL;
+UPDATE testpub_gencol SET a = 100 WHERE a = 1;
+DROP PUBLICATION pub_gencol;
+
+-- ok - generated column "b" is published and is part of REPLICA IDENTITY
+CREATE PUBLICATION pub_gencol FOR TABLE testpub_gencol with (publish_generated_columns = true);
+UPDATE testpub_gencol SET a = 100 WHERE a = 1;
+DROP PUBLICATION pub_gencol;
+
+DROP TABLE testpub_gencol;
+RESET client_min_messages;
+-- ======================================================
+
 -- fail - duplicate tables are not allowed if that table has any column lists
 SET client_min_messages = 'ERROR';
 CREATE PUBLICATION testpub_dups FOR TABLE testpub_tbl1 (a), testpub_tbl1 WITH (publish = 'insert');
diff --git a/src/test/subscription/t/100_bugs.pl b/src/test/subscription/t/100_bugs.pl
index cb36ca7b16..794b928f50 100644
--- a/src/test/subscription/t/100_bugs.pl
+++ b/src/test/subscription/t/100_bugs.pl
@@ -377,8 +377,8 @@ $node_publisher->safe_psql('postgres', "DROP PUBLICATION tap_pub_sch");
 $node_publisher->stop('fast');
 $node_subscriber->stop('fast');
 
-# The bug was that when the REPLICA IDENTITY FULL is used with dropped or
-# generated columns, we fail to apply updates and deletes
+# The bug was that when the REPLICA IDENTITY FULL is used with dropped
+# we fail to apply updates and deletes
 $node_publisher->rotate_logfile();
 $node_publisher->start();
 
@@ -389,18 +389,14 @@ $node_publisher->safe_psql(
 	'postgres', qq(
 	CREATE TABLE dropped_cols (a int, b_drop int, c int);
 	ALTER TABLE dropped_cols REPLICA IDENTITY FULL;
-	CREATE TABLE generated_cols (a int, b_gen int GENERATED ALWAYS AS (5 * a) STORED, c int);
-	ALTER TABLE generated_cols REPLICA IDENTITY FULL;
-	CREATE PUBLICATION pub_dropped_cols FOR TABLE dropped_cols, generated_cols;
+	CREATE PUBLICATION pub_dropped_cols FOR TABLE dropped_cols;
 	-- some initial data
 	INSERT INTO dropped_cols VALUES (1, 1, 1);
-	INSERT INTO generated_cols (a, c) VALUES (1, 1);
 ));
 
 $node_subscriber->safe_psql(
 	'postgres', qq(
 	 CREATE TABLE dropped_cols (a int, b_drop int, c int);
-	 CREATE TABLE generated_cols (a int, b_gen int GENERATED ALWAYS AS (5 * a) STORED, c int);
 ));
 
 $publisher_connstr = $node_publisher->connstr . ' dbname=postgres';
@@ -421,7 +417,6 @@ $node_subscriber->safe_psql(
 $node_publisher->safe_psql(
 	'postgres', qq(
 		UPDATE dropped_cols SET a = 100;
-		UPDATE generated_cols SET a = 100;
 ));
 $node_publisher->wait_for_catchup('sub_dropped_cols');
 
@@ -430,11 +425,6 @@ is( $node_subscriber->safe_psql(
 	qq(1),
 	'replication with RI FULL and dropped columns');
 
-is( $node_subscriber->safe_psql(
-		'postgres', "SELECT count(*) FROM generated_cols WHERE a = 100"),
-	qq(1),
-	'replication with RI FULL and generated columns');
-
 $node_publisher->stop('fast');
 $node_subscriber->stop('fast');
 
-- 
2.34.1

#44vignesh C
vignesh21@gmail.com
In reply to: Shlok Kyal (#43)
Re: Disallow UPDATE/DELETE on table with unpublished generated column as REPLICA IDENTITY

On Fri, 29 Nov 2024 at 13:38, Shlok Kyal <shlok.kyal.oss@gmail.com> wrote:

On Thu, 28 Nov 2024 at 16:38, Amit Kapila <amit.kapila16@gmail.com> wrote:

On Thu, Nov 21, 2024 at 5:30 PM Shlok Kyal <shlok.kyal.oss@gmail.com> wrote:

Review comments:
===============
1.
+
+ /*
+ * true if all generated columns which are part of replica identity are
+ * published or the publication actions do not include UPDATE or DELETE.
+ */
+ bool replident_valid_for_update;
+ bool replident_valid_for_delete;

These are too generic names for the purpose they are used. How about
instead name them as gencols_valid_for_update and
gencols_valid_for_delete?

2. The comments atop RelationBuildPublicationDesc() is only about row
filter. We should update it for column list and generated columns as
well.

3. It is better to merge the functionality of the invalid column list
and unpublished generated columns as proposed by Hou-San above.

Thanks for reviewing the patch. I have addressed the comments and
updated the patch.

Shouldn't unpublished_gen_col be set only if the column list is absent?
-               /* Transform the column list datum to a bitmapset. */
-               columns = pub_collist_to_bitmapset(NULL, datum, NULL);
+               /* Check if generated column is part of REPLICA IDENTITY */
+               *unpublished_gen_col |= att->attgenerated;
-               /* Remember columns that are part of the REPLICA IDENTITY */
-               idattrs = RelationGetIndexAttrBitmap(relation,
-
                  INDEX_ATTR_BITMAP_IDENTITY_KEY);
+               if (columns == NULL)
+               {
+                       /* Break the loop if unpublished generated
columns exist. */
+                       if (*unpublished_gen_col)
+                               break;
+
+                       /* Skip validating the column list since it is
not defined */
+                       continue;
+               }

This scenario worked in v11 but fails in v12:
CREATE TABLE testpub_gencol (a INT, b INT GENERATED ALWAYS AS (a + 1)
STORED NOT NULL);
CREATE UNIQUE INDEX testpub_gencol_idx ON testpub_gencol (b);
ALTER TABLE testpub_gencol REPLICA IDENTITY USING index testpub_gencol_idx;
CREATE PUBLICATION pub_gencol FOR TABLE testpub_gencol(b);
UPDATE testpub_gencol SET a = 100 WHERE a = 1;

Regards,
Vignesh

#45Shlok Kyal
shlok.kyal.oss@gmail.com
In reply to: vignesh C (#44)
1 attachment(s)
Re: Disallow UPDATE/DELETE on table with unpublished generated column as REPLICA IDENTITY

On Fri, 29 Nov 2024 at 15:49, vignesh C <vignesh21@gmail.com> wrote:

On Fri, 29 Nov 2024 at 13:38, Shlok Kyal <shlok.kyal.oss@gmail.com> wrote:

On Thu, 28 Nov 2024 at 16:38, Amit Kapila <amit.kapila16@gmail.com> wrote:

On Thu, Nov 21, 2024 at 5:30 PM Shlok Kyal <shlok.kyal.oss@gmail.com> wrote:

Review comments:
===============
1.
+
+ /*
+ * true if all generated columns which are part of replica identity are
+ * published or the publication actions do not include UPDATE or DELETE.
+ */
+ bool replident_valid_for_update;
+ bool replident_valid_for_delete;

These are too generic names for the purpose they are used. How about
instead name them as gencols_valid_for_update and
gencols_valid_for_delete?

2. The comments atop RelationBuildPublicationDesc() is only about row
filter. We should update it for column list and generated columns as
well.

3. It is better to merge the functionality of the invalid column list
and unpublished generated columns as proposed by Hou-San above.

Thanks for reviewing the patch. I have addressed the comments and
updated the patch.

Shouldn't unpublished_gen_col be set only if the column list is absent?
-               /* Transform the column list datum to a bitmapset. */
-               columns = pub_collist_to_bitmapset(NULL, datum, NULL);
+               /* Check if generated column is part of REPLICA IDENTITY */
+               *unpublished_gen_col |= att->attgenerated;
-               /* Remember columns that are part of the REPLICA IDENTITY */
-               idattrs = RelationGetIndexAttrBitmap(relation,
-
INDEX_ATTR_BITMAP_IDENTITY_KEY);
+               if (columns == NULL)
+               {
+                       /* Break the loop if unpublished generated
columns exist. */
+                       if (*unpublished_gen_col)
+                               break;
+
+                       /* Skip validating the column list since it is
not defined */
+                       continue;
+               }

This scenario worked in v11 but fails in v12:
CREATE TABLE testpub_gencol (a INT, b INT GENERATED ALWAYS AS (a + 1)
STORED NOT NULL);
CREATE UNIQUE INDEX testpub_gencol_idx ON testpub_gencol (b);
ALTER TABLE testpub_gencol REPLICA IDENTITY USING index testpub_gencol_idx;
CREATE PUBLICATION pub_gencol FOR TABLE testpub_gencol(b);
UPDATE testpub_gencol SET a = 100 WHERE a = 1;

Thanks for reviewing the patch. I have fixed the issue and updated the patch.

Thanks and Regards,
Shlok Kyal

Attachments:

v13-0001-Disallow-UPDATE-DELETE-on-table-with-generated-c.patchapplication/octet-stream; name=v13-0001-Disallow-UPDATE-DELETE-on-table-with-generated-c.patchDownload
From 362d0a20ccdace50055e37f996f5e168f6f86873 Mon Sep 17 00:00:00 2001
From: Shlok Kyal <shlok.kyal.oss@gmail.com>
Date: Tue, 5 Nov 2024 11:11:25 +0530
Subject: [PATCH v13] Disallow UPDATE/DELETE on table with generated column as
 REPLICA IDENTITY

UPDATE or DELETE operations on tables with unpublished generated columns
set as REPLICA IDENTITY are not permitted. This patch ensures that if an
UPDATE or DELETE command is executed on such tables, an error will be
thrown.
With this patch, the behavior has changed for the test added in commit
adedf54. Additionally, there is no other way to trigger the bug that was
fixed by commit adedf54, so the test has been removed.
---
 src/backend/commands/publicationcmds.c    | 159 +++++++++++++---------
 src/backend/executor/execReplication.c    |  32 ++++-
 src/backend/utils/cache/relcache.c        |  57 ++++++--
 src/include/catalog/pg_publication.h      |   7 +
 src/include/commands/publicationcmds.h    |   7 +-
 src/test/regress/expected/publication.out |  24 ++++
 src/test/regress/sql/publication.sql      |  25 ++++
 src/test/subscription/t/100_bugs.pl       |  16 +--
 8 files changed, 232 insertions(+), 95 deletions(-)

diff --git a/src/backend/commands/publicationcmds.c b/src/backend/commands/publicationcmds.c
index 031c84ec29..d2b4c8e9a6 100644
--- a/src/backend/commands/publicationcmds.c
+++ b/src/backend/commands/publicationcmds.c
@@ -336,21 +336,36 @@ pub_rf_contains_invalid_column(Oid pubid, Relation relation, List *ancestors,
 }
 
 /*
- * Check if all columns referenced in the REPLICA IDENTITY are covered by
- * the column list.
+ * Check for invalid columns in the publication table definition.
  *
- * Returns true if any replica identity column is not covered by column list.
+ * This function evaluates two conditions:
+ *
+ * 1. Ensures that all columns referenced in the REPLICA IDENTITY are covered
+ *    by the column list. If any column is missing, *invalid_column_list is set
+ *    to true.
+ *
+ * 2. Ensures that the REPLICA IDENTITY does not contain unpublished generated
+ *    columns. If an unpublished generated column is found,
+ *    *unpublished_gen_col is set to true.
+ *
+ * Returns true if any of the above conditions are not met.
  */
 bool
-pub_collist_contains_invalid_column(Oid pubid, Relation relation, List *ancestors,
-									bool pubviaroot)
+pub_contains_invalid_column(Oid pubid, Relation relation, List *ancestors,
+							bool pubviaroot, bool pubgencols,
+							bool *invalid_column_list,
+							bool *unpublished_gen_col)
 {
-	HeapTuple	tuple;
 	Oid			relid = RelationGetRelid(relation);
 	Oid			publish_as_relid = RelationGetRelid(relation);
-	bool		result = false;
-	Datum		datum;
-	bool		isnull;
+	Bitmapset  *idattrs;
+	Bitmapset  *columns = NULL;
+	TupleDesc	desc = RelationGetDescr(relation);
+	Publication *pub;
+	int			x;
+
+	*invalid_column_list = false;
+	*unpublished_gen_col = false;
 
 	/*
 	 * For a partition, if pubviaroot is true, find the topmost ancestor that
@@ -368,80 +383,96 @@ pub_collist_contains_invalid_column(Oid pubid, Relation relation, List *ancestor
 			publish_as_relid = relid;
 	}
 
-	tuple = SearchSysCache2(PUBLICATIONRELMAP,
-							ObjectIdGetDatum(publish_as_relid),
-							ObjectIdGetDatum(pubid));
+	/* Fetch the column list */
+	pub = GetPublication(pubid);
+	check_and_fetch_column_list(pub, publish_as_relid, NULL, &columns);
 
-	if (!HeapTupleIsValid(tuple))
-		return false;
+	if (relation->rd_rel->relreplident == REPLICA_IDENTITY_FULL)
+	{
+		/* With REPLICA IDENTITY FULL, no column list is allowed. */
+		*invalid_column_list = (columns != NULL);
 
-	datum = SysCacheGetAttr(PUBLICATIONRELMAP, tuple,
-							Anum_pg_publication_rel_prattrs,
-							&isnull);
+		/*
+		 * REPLICA IDENTITY can be FULL only if there is no column list for
+		 * publication. When REPLICA IDENTITY is FULL and the relation
+		 * includes a generated column, but the publish_generated_columns
+		 * option is set to false, this scenario is invalid.
+		 */
+		if (!pubgencols && relation->rd_att->constr &&
+			relation->rd_att->constr->has_generated_stored)
+			*unpublished_gen_col = true;
 
-	if (!isnull)
-	{
-		int			x;
-		Bitmapset  *idattrs;
-		Bitmapset  *columns = NULL;
+		if (*unpublished_gen_col && *invalid_column_list)
+			return true;
+	}
 
-		/* With REPLICA IDENTITY FULL, no column list is allowed. */
-		if (relation->rd_rel->relreplident == REPLICA_IDENTITY_FULL)
-			result = true;
+	/* Remember columns that are part of the REPLICA IDENTITY */
+	idattrs = RelationGetIndexAttrBitmap(relation,
+										 INDEX_ATTR_BITMAP_IDENTITY_KEY);
 
-		/* Transform the column list datum to a bitmapset. */
-		columns = pub_collist_to_bitmapset(NULL, datum, NULL);
+	/*
+	 * Attnums in the bitmap returned by RelationGetIndexAttrBitmap are offset
+	 * (to handle system columns the usual way), while column list does not
+	 * use offset, so we can't do bms_is_subset(). Instead, we have to loop
+	 * over the idattrs and check all of them are in the list.
+	 */
+	x = -1;
+	while ((x = bms_next_member(idattrs, x)) >= 0)
+	{
+		AttrNumber	attnum = (x + FirstLowInvalidHeapAttributeNumber);
+		Form_pg_attribute att = TupleDescAttr(desc, attnum - 1);
 
-		/* Remember columns that are part of the REPLICA IDENTITY */
-		idattrs = RelationGetIndexAttrBitmap(relation,
-											 INDEX_ATTR_BITMAP_IDENTITY_KEY);
+		Assert(!att->attisdropped);
 
-		/*
-		 * Attnums in the bitmap returned by RelationGetIndexAttrBitmap are
-		 * offset (to handle system columns the usual way), while column list
-		 * does not use offset, so we can't do bms_is_subset(). Instead, we
-		 * have to loop over the idattrs and check all of them are in the
-		 * list.
-		 */
-		x = -1;
-		while ((x = bms_next_member(idattrs, x)) >= 0)
+		if (columns == NULL)
 		{
-			AttrNumber	attnum = (x + FirstLowInvalidHeapAttributeNumber);
-
 			/*
-			 * If pubviaroot is true, we are validating the column list of the
-			 * parent table, but the bitmap contains the replica identity
-			 * information of the child table. The parent/child attnums may
-			 * not match, so translate them to the parent - get the attname
-			 * from the child, and look it up in the parent.
+			 * Check if pubgencols is false and generated column is part of
+			 * REPLICA IDENTITY
 			 */
-			if (pubviaroot)
+			if (!pubgencols)
 			{
-				/* attribute name in the child table */
-				char	   *colname = get_attname(relid, attnum, false);
+				*unpublished_gen_col |= att->attgenerated;
 
-				/*
-				 * Determine the attnum for the attribute name in parent (we
-				 * are using the column list defined on the parent).
-				 */
-				attnum = get_attnum(publish_as_relid, colname);
+				/* Break the loop if unpublished generated columns exist. */
+				if (*unpublished_gen_col)
+					break;
 			}
 
-			/* replica identity column, not covered by the column list */
-			if (!bms_is_member(attnum, columns))
-			{
-				result = true;
-				break;
-			}
+			/* Skip validating the column list since it is not defined */
+			continue;
+		}
+
+		/*
+		 * If pubviaroot is true, we are validating the column list of the
+		 * parent table, but the bitmap contains the replica identity
+		 * information of the child table. The parent/child attnums may not
+		 * match, so translate them to the parent - get the attname from the
+		 * child, and look it up in the parent.
+		 */
+		if (pubviaroot)
+		{
+			/* attribute name in the child table */
+			char	   *colname = get_attname(relid, attnum, false);
+
+			/*
+			 * Determine the attnum for the attribute name in parent (we are
+			 * using the column list defined on the parent).
+			 */
+			attnum = get_attnum(publish_as_relid, colname);
 		}
 
-		bms_free(idattrs);
-		bms_free(columns);
+		/* replica identity column, not covered by the column list */
+		*invalid_column_list |= !bms_is_member(attnum, columns);
+
+		if (*invalid_column_list && *unpublished_gen_col)
+			break;
 	}
 
-	ReleaseSysCache(tuple);
+	bms_free(columns);
+	bms_free(idattrs);
 
-	return result;
+	return *invalid_column_list || *unpublished_gen_col;
 }
 
 /* check_functions_in_node callback */
diff --git a/src/backend/executor/execReplication.c b/src/backend/executor/execReplication.c
index 54025c9f15..227a8aeea0 100644
--- a/src/backend/executor/execReplication.c
+++ b/src/backend/executor/execReplication.c
@@ -785,10 +785,22 @@ CheckCmdReplicaIdentity(Relation rel, CmdType cmd)
 		return;
 
 	/*
-	 * It is only safe to execute UPDATE/DELETE when all columns, referenced
-	 * in the row filters from publications which the relation is in, are
-	 * valid - i.e. when all referenced columns are part of REPLICA IDENTITY
-	 * or the table does not publish UPDATEs or DELETEs.
+	 * It is only safe to execute UPDATE/DELETE when:
+	 *
+	 * 1. All columns, referenced in the row filters from publications which
+	 * the relation is in, are valid - i.e. when all referenced columns are
+	 * part of REPLICA IDENTITY or the table does not publish UPDATEs or
+	 * DELETEs.
+	 *
+	 * 2. All columns, referenced in the column lists from publications which
+	 * the relation is in, are valid - i.e. when all referenced columns are
+	 * part of REPLICA IDENTITY or the table does not publish UPDATEs or
+	 * DELETEs.
+	 *
+	 * 3. All generated columns in REPLICA IDENTITY of the relation, for all
+	 * the publications which the relation is in, are valid - i.e. when
+	 * unpublished generated columns are not part of REPLICA IDENTITY or the
+	 * table does not publish UPDATEs or DELETEs.
 	 *
 	 * XXX We could optimize it by first checking whether any of the
 	 * publications have a row filter for this relation. If not and relation
@@ -809,6 +821,12 @@ CheckCmdReplicaIdentity(Relation rel, CmdType cmd)
 				 errmsg("cannot update table \"%s\"",
 						RelationGetRelationName(rel)),
 				 errdetail("Column list used by the publication does not cover the replica identity.")));
+	else if (cmd == CMD_UPDATE && !pubdesc.gencols_valid_for_update)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
+				 errmsg("cannot update table \"%s\"",
+						RelationGetRelationName(rel)),
+				 errdetail("Replica identity consists of an unpublished generated column.")));
 	else if (cmd == CMD_DELETE && !pubdesc.rf_valid_for_delete)
 		ereport(ERROR,
 				(errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
@@ -821,6 +839,12 @@ CheckCmdReplicaIdentity(Relation rel, CmdType cmd)
 				 errmsg("cannot delete from table \"%s\"",
 						RelationGetRelationName(rel)),
 				 errdetail("Column list used by the publication does not cover the replica identity.")));
+	else if (cmd == CMD_DELETE && !pubdesc.gencols_valid_for_delete)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
+				 errmsg("cannot delete from table \"%s\"",
+						RelationGetRelationName(rel)),
+				 errdetail("Replica identity consists of an unpublished generated column.")));
 
 	/* If relation has replica identity we are always good. */
 	if (OidIsValid(RelationGetReplicaIndex(rel)))
diff --git a/src/backend/utils/cache/relcache.c b/src/backend/utils/cache/relcache.c
index d0892cee24..9205ea86c4 100644
--- a/src/backend/utils/cache/relcache.c
+++ b/src/backend/utils/cache/relcache.c
@@ -5706,12 +5706,19 @@ RelationGetExclusionInfo(Relation indexRelation,
  * Get the publication information for the given relation.
  *
  * Traverse all the publications which the relation is in to get the
- * publication actions and validate the row filter expressions for such
- * publications if any. We consider the row filter expression as invalid if it
- * references any column which is not part of REPLICA IDENTITY.
+ * publication actions and validate:
+ * 1. The row filter expressions for such publications if any. We consider the
+ *    row filter expression as invalid if it references any column which is not
+ *    part of REPLICA IDENTITY.
+ * 2. The column list for such publication if any. We consider column list as
+ * 	  invalid if it references any column which is not part of REPLICA IDENTITY.
+ * 3. The generated columns of the relation for such publications. We consider
+ *    any reference of an unpublished generated column in REPLICA IDENTITY as
+ *    invalid.
  *
  * To avoid fetching the publication information repeatedly, we cache the
- * publication actions and row filter validation information.
+ * publication actions, row filter validation information, column list
+ * validation information and generated column validation information.
  */
 void
 RelationBuildPublicationDesc(Relation relation, PublicationDesc *pubdesc)
@@ -5734,6 +5741,8 @@ RelationBuildPublicationDesc(Relation relation, PublicationDesc *pubdesc)
 		pubdesc->rf_valid_for_delete = true;
 		pubdesc->cols_valid_for_update = true;
 		pubdesc->cols_valid_for_delete = true;
+		pubdesc->gencols_valid_for_update = true;
+		pubdesc->gencols_valid_for_delete = true;
 		return;
 	}
 
@@ -5748,6 +5757,8 @@ RelationBuildPublicationDesc(Relation relation, PublicationDesc *pubdesc)
 	pubdesc->rf_valid_for_delete = true;
 	pubdesc->cols_valid_for_update = true;
 	pubdesc->cols_valid_for_delete = true;
+	pubdesc->gencols_valid_for_update = true;
+	pubdesc->gencols_valid_for_delete = true;
 
 	/* Fetch the publication membership info. */
 	puboids = GetRelationPublications(relid);
@@ -5777,6 +5788,8 @@ RelationBuildPublicationDesc(Relation relation, PublicationDesc *pubdesc)
 		Oid			pubid = lfirst_oid(lc);
 		HeapTuple	tup;
 		Form_pg_publication pubform;
+		bool		invalid_column_list;
+		bool		unpublished_gen_col;
 
 		tup = SearchSysCache1(PUBLICATIONOID, ObjectIdGetDatum(pubid));
 
@@ -5811,18 +5824,27 @@ RelationBuildPublicationDesc(Relation relation, PublicationDesc *pubdesc)
 		/*
 		 * Check if all columns are part of the REPLICA IDENTITY index or not.
 		 *
-		 * If the publication is FOR ALL TABLES then it means the table has no
-		 * column list and we can skip the validation.
+		 * Check if all generated columns included in the REPLICA IDENTITY are
+		 * published.
 		 */
-		if (!pubform->puballtables &&
-			(pubform->pubupdate || pubform->pubdelete) &&
-			pub_collist_contains_invalid_column(pubid, relation, ancestors,
-												pubform->pubviaroot))
+		if ((pubform->pubupdate || pubform->pubdelete) &&
+			pub_contains_invalid_column(pubid, relation, ancestors,
+										pubform->pubviaroot,
+										pubform->pubgencols,
+										&invalid_column_list,
+										&unpublished_gen_col))
 		{
 			if (pubform->pubupdate)
-				pubdesc->cols_valid_for_update = false;
+			{
+				pubdesc->cols_valid_for_update = !invalid_column_list;
+				pubdesc->gencols_valid_for_update = !unpublished_gen_col;
+			}
+
 			if (pubform->pubdelete)
-				pubdesc->cols_valid_for_delete = false;
+			{
+				pubdesc->cols_valid_for_delete = !invalid_column_list;
+				pubdesc->gencols_valid_for_delete = !unpublished_gen_col;
+			}
 		}
 
 		ReleaseSysCache(tup);
@@ -5846,6 +5868,17 @@ RelationBuildPublicationDesc(Relation relation, PublicationDesc *pubdesc)
 			pubdesc->pubactions.pubdelete && pubdesc->pubactions.pubtruncate &&
 			!pubdesc->cols_valid_for_update && !pubdesc->cols_valid_for_delete)
 			break;
+
+		/*
+		 * If we know everything is replicated and replica identity has an
+		 * unpublished generated column, there is no point to check for other
+		 * publications.
+		 */
+		if (pubdesc->pubactions.pubinsert && pubdesc->pubactions.pubupdate &&
+			pubdesc->pubactions.pubdelete && pubdesc->pubactions.pubtruncate &&
+			!pubdesc->gencols_valid_for_update &&
+			!pubdesc->gencols_valid_for_delete)
+			break;
 	}
 
 	if (relation->rd_pubdesc)
diff --git a/src/include/catalog/pg_publication.h b/src/include/catalog/pg_publication.h
index 9a83a72d6b..e167b34461 100644
--- a/src/include/catalog/pg_publication.h
+++ b/src/include/catalog/pg_publication.h
@@ -98,6 +98,13 @@ typedef struct PublicationDesc
 	 */
 	bool		cols_valid_for_update;
 	bool		cols_valid_for_delete;
+
+	/*
+	 * true if all generated columns which are part of replica identity are
+	 * published or the publication actions do not include UPDATE or DELETE.
+	 */
+	bool		gencols_valid_for_update;
+	bool		gencols_valid_for_delete;
 } PublicationDesc;
 
 typedef struct Publication
diff --git a/src/include/commands/publicationcmds.h b/src/include/commands/publicationcmds.h
index 5487c571f6..c200c19c6d 100644
--- a/src/include/commands/publicationcmds.h
+++ b/src/include/commands/publicationcmds.h
@@ -33,7 +33,10 @@ extern void AlterPublicationOwner_oid(Oid subid, Oid newOwnerId);
 extern void InvalidatePublicationRels(List *relids);
 extern bool pub_rf_contains_invalid_column(Oid pubid, Relation relation,
 										   List *ancestors, bool pubviaroot);
-extern bool pub_collist_contains_invalid_column(Oid pubid, Relation relation,
-												List *ancestors, bool pubviaroot);
+extern bool pub_contains_invalid_column(Oid pubid, Relation relation,
+										List *ancestors, bool pubviaroot,
+										bool pubgencols,
+										bool *invalid_column_list,
+										bool *unpublished_gen_col);
 
 #endif							/* PUBLICATIONCMDS_H */
diff --git a/src/test/regress/expected/publication.out b/src/test/regress/expected/publication.out
index 5de2d64d01..12d0611a7c 100644
--- a/src/test/regress/expected/publication.out
+++ b/src/test/regress/expected/publication.out
@@ -672,6 +672,30 @@ DROP TABLE rf_tbl_abcd_pk;
 DROP TABLE rf_tbl_abcd_nopk;
 DROP TABLE rf_tbl_abcd_part_pk;
 -- ======================================================
+-- ======================================================
+-- test with generated column
+SET client_min_messages = 'ERROR';
+CREATE TABLE testpub_gencol (a INT, b INT GENERATED ALWAYS AS (a + 1) STORED NOT NULL);
+CREATE UNIQUE INDEX testpub_gencol_idx ON testpub_gencol (b);
+ALTER TABLE testpub_gencol REPLICA IDENTITY USING index testpub_gencol_idx;
+-- error - generated column "b" is not published but part of index set as REPLICA IDENTITY
+CREATE PUBLICATION pub_gencol FOR TABLE testpub_gencol;
+UPDATE testpub_gencol SET a = 100 WHERE a = 1;
+ERROR:  cannot update table "testpub_gencol"
+DETAIL:  Replica identity consists of an unpublished generated column.
+-- error - generated column "b" is not published and REPLICA IDENTITY is set FULL
+ALTER TABLE testpub_gencol REPLICA IDENTITY FULL;
+UPDATE testpub_gencol SET a = 100 WHERE a = 1;
+ERROR:  cannot update table "testpub_gencol"
+DETAIL:  Replica identity consists of an unpublished generated column.
+DROP PUBLICATION pub_gencol;
+-- ok - generated column "b" is published and is part of REPLICA IDENTITY
+CREATE PUBLICATION pub_gencol FOR TABLE testpub_gencol with (publish_generated_columns = true);
+UPDATE testpub_gencol SET a = 100 WHERE a = 1;
+DROP PUBLICATION pub_gencol;
+DROP TABLE testpub_gencol;
+RESET client_min_messages;
+-- ======================================================
 -- fail - duplicate tables are not allowed if that table has any column lists
 SET client_min_messages = 'ERROR';
 CREATE PUBLICATION testpub_dups FOR TABLE testpub_tbl1 (a), testpub_tbl1 WITH (publish = 'insert');
diff --git a/src/test/regress/sql/publication.sql b/src/test/regress/sql/publication.sql
index 48e68bcca2..a29587b45d 100644
--- a/src/test/regress/sql/publication.sql
+++ b/src/test/regress/sql/publication.sql
@@ -396,6 +396,31 @@ DROP TABLE rf_tbl_abcd_nopk;
 DROP TABLE rf_tbl_abcd_part_pk;
 -- ======================================================
 
+-- ======================================================
+-- test with generated column
+SET client_min_messages = 'ERROR';
+CREATE TABLE testpub_gencol (a INT, b INT GENERATED ALWAYS AS (a + 1) STORED NOT NULL);
+CREATE UNIQUE INDEX testpub_gencol_idx ON testpub_gencol (b);
+ALTER TABLE testpub_gencol REPLICA IDENTITY USING index testpub_gencol_idx;
+
+-- error - generated column "b" is not published but part of index set as REPLICA IDENTITY
+CREATE PUBLICATION pub_gencol FOR TABLE testpub_gencol;
+UPDATE testpub_gencol SET a = 100 WHERE a = 1;
+
+-- error - generated column "b" is not published and REPLICA IDENTITY is set FULL
+ALTER TABLE testpub_gencol REPLICA IDENTITY FULL;
+UPDATE testpub_gencol SET a = 100 WHERE a = 1;
+DROP PUBLICATION pub_gencol;
+
+-- ok - generated column "b" is published and is part of REPLICA IDENTITY
+CREATE PUBLICATION pub_gencol FOR TABLE testpub_gencol with (publish_generated_columns = true);
+UPDATE testpub_gencol SET a = 100 WHERE a = 1;
+DROP PUBLICATION pub_gencol;
+
+DROP TABLE testpub_gencol;
+RESET client_min_messages;
+-- ======================================================
+
 -- fail - duplicate tables are not allowed if that table has any column lists
 SET client_min_messages = 'ERROR';
 CREATE PUBLICATION testpub_dups FOR TABLE testpub_tbl1 (a), testpub_tbl1 WITH (publish = 'insert');
diff --git a/src/test/subscription/t/100_bugs.pl b/src/test/subscription/t/100_bugs.pl
index cb36ca7b16..794b928f50 100644
--- a/src/test/subscription/t/100_bugs.pl
+++ b/src/test/subscription/t/100_bugs.pl
@@ -377,8 +377,8 @@ $node_publisher->safe_psql('postgres', "DROP PUBLICATION tap_pub_sch");
 $node_publisher->stop('fast');
 $node_subscriber->stop('fast');
 
-# The bug was that when the REPLICA IDENTITY FULL is used with dropped or
-# generated columns, we fail to apply updates and deletes
+# The bug was that when the REPLICA IDENTITY FULL is used with dropped
+# we fail to apply updates and deletes
 $node_publisher->rotate_logfile();
 $node_publisher->start();
 
@@ -389,18 +389,14 @@ $node_publisher->safe_psql(
 	'postgres', qq(
 	CREATE TABLE dropped_cols (a int, b_drop int, c int);
 	ALTER TABLE dropped_cols REPLICA IDENTITY FULL;
-	CREATE TABLE generated_cols (a int, b_gen int GENERATED ALWAYS AS (5 * a) STORED, c int);
-	ALTER TABLE generated_cols REPLICA IDENTITY FULL;
-	CREATE PUBLICATION pub_dropped_cols FOR TABLE dropped_cols, generated_cols;
+	CREATE PUBLICATION pub_dropped_cols FOR TABLE dropped_cols;
 	-- some initial data
 	INSERT INTO dropped_cols VALUES (1, 1, 1);
-	INSERT INTO generated_cols (a, c) VALUES (1, 1);
 ));
 
 $node_subscriber->safe_psql(
 	'postgres', qq(
 	 CREATE TABLE dropped_cols (a int, b_drop int, c int);
-	 CREATE TABLE generated_cols (a int, b_gen int GENERATED ALWAYS AS (5 * a) STORED, c int);
 ));
 
 $publisher_connstr = $node_publisher->connstr . ' dbname=postgres';
@@ -421,7 +417,6 @@ $node_subscriber->safe_psql(
 $node_publisher->safe_psql(
 	'postgres', qq(
 		UPDATE dropped_cols SET a = 100;
-		UPDATE generated_cols SET a = 100;
 ));
 $node_publisher->wait_for_catchup('sub_dropped_cols');
 
@@ -430,11 +425,6 @@ is( $node_subscriber->safe_psql(
 	qq(1),
 	'replication with RI FULL and dropped columns');
 
-is( $node_subscriber->safe_psql(
-		'postgres', "SELECT count(*) FROM generated_cols WHERE a = 100"),
-	qq(1),
-	'replication with RI FULL and generated columns');
-
 $node_publisher->stop('fast');
 $node_subscriber->stop('fast');
 
-- 
2.34.1

#46Zhijie Hou (Fujitsu)
houzj.fnst@fujitsu.com
In reply to: Shlok Kyal (#45)
1 attachment(s)
RE: Disallow UPDATE/DELETE on table with unpublished generated column as REPLICA IDENTITY

On Friday, November 29, 2024 9:08 PM Shlok Kyal <shlok.kyal.oss@gmail.com> wrote:

Thanks for reviewing the patch. I have fixed the issue and updated the patch.

Thanks for updating the patch. I have reviewed and have few suggestions.

Please check the attached diff which includes:

1) Comments in CheckCmdReplicaIdentity()
* Removed some duplicate descriptions.
* Fixed the comments for the column list which I think is not correct.
* Mentioned the column list and generated column in XXX part as well.

2) Doc
* Since we mentioned the restriction for UPDATEs and DELTEs when row filter or
column list is defined in the create_publication.sgml, I feel we may need to
mention the generated part as well. So, added in the diff.

3) pub_contains_invalid_column
* Simplified one condition a bit.

Please check and merge if it looks good to you.

Best Regards,
Hou zj

Attachments:

0001-improvements.patch.txttext/plain; name=0001-improvements.patch.txtDownload
From abdc729ab5e880543e4702d65e8ea66533325d71 Mon Sep 17 00:00:00 2001
From: Hou Zhijie <houzj.fnst@cn.fujitsu.com>
Date: Tue, 3 Dec 2024 12:19:50 +0800
Subject: [PATCH] improvements

---
 doc/src/sgml/ref/create_publication.sgml |  9 ++++++++
 src/backend/commands/publicationcmds.c   | 13 +++++-------
 src/backend/executor/execReplication.c   | 27 ++++++++++++------------
 3 files changed, 27 insertions(+), 22 deletions(-)

diff --git a/doc/src/sgml/ref/create_publication.sgml b/doc/src/sgml/ref/create_publication.sgml
index f8e217d661..ae21018697 100644
--- a/doc/src/sgml/ref/create_publication.sgml
+++ b/doc/src/sgml/ref/create_publication.sgml
@@ -311,6 +311,15 @@ CREATE PUBLICATION <replaceable class="parameter">name</replaceable>
    system columns.
   </para>
 
+  <para>
+   If tables added to a publication include generated columns that are part of
+   the <literal>REPLICA IDENTITY</literal>, it is essential to publish these
+   columns by explicitly listing them in the column list or by enabling the
+   <literal>publish_generated_columns</literal> option. Otherwise,
+   <command>UPDATE</command> or <command>DELETE</command> operations will be
+   disallowed on those tables.
+  </para>
+
   <para>
    The row filter on a table becomes redundant if
    <literal>FOR TABLES IN SCHEMA</literal> is specified and the table
diff --git a/src/backend/commands/publicationcmds.c b/src/backend/commands/publicationcmds.c
index d2b4c8e9a6..323f59fae1 100644
--- a/src/backend/commands/publicationcmds.c
+++ b/src/backend/commands/publicationcmds.c
@@ -427,16 +427,13 @@ pub_contains_invalid_column(Oid pubid, Relation relation, List *ancestors,
 		if (columns == NULL)
 		{
 			/*
-			 * Check if pubgencols is false and generated column is part of
-			 * REPLICA IDENTITY
+			 * Break the loop if an unpublished generated column is part of the
+			 * REPLICA IDENTITY.
 			 */
-			if (!pubgencols)
+			if (!pubgencols && att->attgenerated)
 			{
-				*unpublished_gen_col |= att->attgenerated;
-
-				/* Break the loop if unpublished generated columns exist. */
-				if (*unpublished_gen_col)
-					break;
+				*unpublished_gen_col = true;
+				break;
 			}
 
 			/* Skip validating the column list since it is not defined */
diff --git a/src/backend/executor/execReplication.c b/src/backend/executor/execReplication.c
index 227a8aeea0..f66ff21159 100644
--- a/src/backend/executor/execReplication.c
+++ b/src/backend/executor/execReplication.c
@@ -785,27 +785,26 @@ CheckCmdReplicaIdentity(Relation rel, CmdType cmd)
 		return;
 
 	/*
-	 * It is only safe to execute UPDATE/DELETE when:
+	 * It is only safe to execute UPDATE/DELETE if the relation does not
+	 * publish UPDATEs or DELETEs, or all the following conditions are
+	 * satisfied:
 	 *
 	 * 1. All columns, referenced in the row filters from publications which
-	 * the relation is in, are valid - i.e. when all referenced columns are
-	 * part of REPLICA IDENTITY or the table does not publish UPDATEs or
-	 * DELETEs.
+	 *    the relation is in, are valid - i.e. when all referenced columns are
+	 *    part of REPLICA IDENTITY.
 	 *
 	 * 2. All columns, referenced in the column lists from publications which
-	 * the relation is in, are valid - i.e. when all referenced columns are
-	 * part of REPLICA IDENTITY or the table does not publish UPDATEs or
-	 * DELETEs.
+	 *    the relation is in, are valid - i.e. when all columns referenced in
+	 *    the REPLICA IDENTITY are covered by the column list.
 	 *
-	 * 3. All generated columns in REPLICA IDENTITY of the relation, for all
-	 * the publications which the relation is in, are valid - i.e. when
-	 * unpublished generated columns are not part of REPLICA IDENTITY or the
-	 * table does not publish UPDATEs or DELETEs.
+	 * 3. All generated columns in REPLICA IDENTITY of the relation, are valid
+	 *    - i.e. when all these generated columns are published.
 	 *
 	 * XXX We could optimize it by first checking whether any of the
-	 * publications have a row filter for this relation. If not and relation
-	 * has replica identity then we can avoid building the descriptor but as
-	 * this happens only one time it doesn't seem worth the additional
+	 * publications have a row filter or column list for this relation, or if
+	 * the relation contains a generated column. If none of these exist and the
+	 * relation has replica identity then we can avoid building the descriptor
+	 * but as this happens only one time it doesn't seem worth the additional
 	 * complexity.
 	 */
 	RelationBuildPublicationDesc(rel, &pubdesc);
-- 
2.30.0.windows.2

#47Shlok Kyal
shlok.kyal.oss@gmail.com
In reply to: Zhijie Hou (Fujitsu) (#46)
1 attachment(s)
Re: Disallow UPDATE/DELETE on table with unpublished generated column as REPLICA IDENTITY

On Tue, 3 Dec 2024 at 10:21, Zhijie Hou (Fujitsu)
<houzj.fnst@fujitsu.com> wrote:

On Friday, November 29, 2024 9:08 PM Shlok Kyal <shlok.kyal.oss@gmail.com> wrote:

Thanks for reviewing the patch. I have fixed the issue and updated the patch.

Thanks for updating the patch. I have reviewed and have few suggestions.

Please check the attached diff which includes:

1) Comments in CheckCmdReplicaIdentity()
* Removed some duplicate descriptions.
* Fixed the comments for the column list which I think is not correct.
* Mentioned the column list and generated column in XXX part as well.

2) Doc
* Since we mentioned the restriction for UPDATEs and DELTEs when row filter or
column list is defined in the create_publication.sgml, I feel we may need to
mention the generated part as well. So, added in the diff.

3) pub_contains_invalid_column
* Simplified one condition a bit.

Please check and merge if it looks good to you.

The changes look good to me. I have included it in the updated patch.

Thanks and Regards,
Shlok Kyal

Attachments:

v14-0001-Disallow-UPDATE-DELETE-on-table-with-generated-c.patchapplication/octet-stream; name=v14-0001-Disallow-UPDATE-DELETE-on-table-with-generated-c.patchDownload
From 0421de2cd268104b9bb87f12452fa20d5acdaeeb Mon Sep 17 00:00:00 2001
From: Shlok Kyal <shlok.kyal.oss@gmail.com>
Date: Tue, 5 Nov 2024 11:11:25 +0530
Subject: [PATCH v14] Disallow UPDATE/DELETE on table with generated column as
 REPLICA IDENTITY

UPDATE or DELETE operations on tables with unpublished generated columns
set as REPLICA IDENTITY are not permitted. This patch ensures that if an
UPDATE or DELETE command is executed on such tables, an error will be
thrown.
With this patch, the behavior has changed for the test added in commit
adedf54. Additionally, there is no other way to trigger the bug that was
fixed by commit adedf54, so the test has been removed.
---
 doc/src/sgml/ref/create_publication.sgml  |   9 ++
 src/backend/commands/publicationcmds.c    | 156 +++++++++++++---------
 src/backend/executor/execReplication.c    |  37 ++++-
 src/backend/utils/cache/relcache.c        |  57 ++++++--
 src/include/catalog/pg_publication.h      |   7 +
 src/include/commands/publicationcmds.h    |   7 +-
 src/test/regress/expected/publication.out |  24 ++++
 src/test/regress/sql/publication.sql      |  25 ++++
 src/test/subscription/t/100_bugs.pl       |  16 +--
 9 files changed, 240 insertions(+), 98 deletions(-)

diff --git a/doc/src/sgml/ref/create_publication.sgml b/doc/src/sgml/ref/create_publication.sgml
index f8e217d661..ae21018697 100644
--- a/doc/src/sgml/ref/create_publication.sgml
+++ b/doc/src/sgml/ref/create_publication.sgml
@@ -311,6 +311,15 @@ CREATE PUBLICATION <replaceable class="parameter">name</replaceable>
    system columns.
   </para>
 
+  <para>
+   If tables added to a publication include generated columns that are part of
+   the <literal>REPLICA IDENTITY</literal>, it is essential to publish these
+   columns by explicitly listing them in the column list or by enabling the
+   <literal>publish_generated_columns</literal> option. Otherwise,
+   <command>UPDATE</command> or <command>DELETE</command> operations will be
+   disallowed on those tables.
+  </para>
+
   <para>
    The row filter on a table becomes redundant if
    <literal>FOR TABLES IN SCHEMA</literal> is specified and the table
diff --git a/src/backend/commands/publicationcmds.c b/src/backend/commands/publicationcmds.c
index 031c84ec29..323f59fae1 100644
--- a/src/backend/commands/publicationcmds.c
+++ b/src/backend/commands/publicationcmds.c
@@ -336,21 +336,36 @@ pub_rf_contains_invalid_column(Oid pubid, Relation relation, List *ancestors,
 }
 
 /*
- * Check if all columns referenced in the REPLICA IDENTITY are covered by
- * the column list.
+ * Check for invalid columns in the publication table definition.
  *
- * Returns true if any replica identity column is not covered by column list.
+ * This function evaluates two conditions:
+ *
+ * 1. Ensures that all columns referenced in the REPLICA IDENTITY are covered
+ *    by the column list. If any column is missing, *invalid_column_list is set
+ *    to true.
+ *
+ * 2. Ensures that the REPLICA IDENTITY does not contain unpublished generated
+ *    columns. If an unpublished generated column is found,
+ *    *unpublished_gen_col is set to true.
+ *
+ * Returns true if any of the above conditions are not met.
  */
 bool
-pub_collist_contains_invalid_column(Oid pubid, Relation relation, List *ancestors,
-									bool pubviaroot)
+pub_contains_invalid_column(Oid pubid, Relation relation, List *ancestors,
+							bool pubviaroot, bool pubgencols,
+							bool *invalid_column_list,
+							bool *unpublished_gen_col)
 {
-	HeapTuple	tuple;
 	Oid			relid = RelationGetRelid(relation);
 	Oid			publish_as_relid = RelationGetRelid(relation);
-	bool		result = false;
-	Datum		datum;
-	bool		isnull;
+	Bitmapset  *idattrs;
+	Bitmapset  *columns = NULL;
+	TupleDesc	desc = RelationGetDescr(relation);
+	Publication *pub;
+	int			x;
+
+	*invalid_column_list = false;
+	*unpublished_gen_col = false;
 
 	/*
 	 * For a partition, if pubviaroot is true, find the topmost ancestor that
@@ -368,80 +383,93 @@ pub_collist_contains_invalid_column(Oid pubid, Relation relation, List *ancestor
 			publish_as_relid = relid;
 	}
 
-	tuple = SearchSysCache2(PUBLICATIONRELMAP,
-							ObjectIdGetDatum(publish_as_relid),
-							ObjectIdGetDatum(pubid));
+	/* Fetch the column list */
+	pub = GetPublication(pubid);
+	check_and_fetch_column_list(pub, publish_as_relid, NULL, &columns);
 
-	if (!HeapTupleIsValid(tuple))
-		return false;
+	if (relation->rd_rel->relreplident == REPLICA_IDENTITY_FULL)
+	{
+		/* With REPLICA IDENTITY FULL, no column list is allowed. */
+		*invalid_column_list = (columns != NULL);
 
-	datum = SysCacheGetAttr(PUBLICATIONRELMAP, tuple,
-							Anum_pg_publication_rel_prattrs,
-							&isnull);
+		/*
+		 * REPLICA IDENTITY can be FULL only if there is no column list for
+		 * publication. When REPLICA IDENTITY is FULL and the relation
+		 * includes a generated column, but the publish_generated_columns
+		 * option is set to false, this scenario is invalid.
+		 */
+		if (!pubgencols && relation->rd_att->constr &&
+			relation->rd_att->constr->has_generated_stored)
+			*unpublished_gen_col = true;
 
-	if (!isnull)
+		if (*unpublished_gen_col && *invalid_column_list)
+			return true;
+	}
+
+	/* Remember columns that are part of the REPLICA IDENTITY */
+	idattrs = RelationGetIndexAttrBitmap(relation,
+										 INDEX_ATTR_BITMAP_IDENTITY_KEY);
+
+	/*
+	 * Attnums in the bitmap returned by RelationGetIndexAttrBitmap are offset
+	 * (to handle system columns the usual way), while column list does not
+	 * use offset, so we can't do bms_is_subset(). Instead, we have to loop
+	 * over the idattrs and check all of them are in the list.
+	 */
+	x = -1;
+	while ((x = bms_next_member(idattrs, x)) >= 0)
 	{
-		int			x;
-		Bitmapset  *idattrs;
-		Bitmapset  *columns = NULL;
+		AttrNumber	attnum = (x + FirstLowInvalidHeapAttributeNumber);
+		Form_pg_attribute att = TupleDescAttr(desc, attnum - 1);
 
-		/* With REPLICA IDENTITY FULL, no column list is allowed. */
-		if (relation->rd_rel->relreplident == REPLICA_IDENTITY_FULL)
-			result = true;
+		Assert(!att->attisdropped);
 
-		/* Transform the column list datum to a bitmapset. */
-		columns = pub_collist_to_bitmapset(NULL, datum, NULL);
+		if (columns == NULL)
+		{
+			/*
+			 * Break the loop if an unpublished generated column is part of the
+			 * REPLICA IDENTITY.
+			 */
+			if (!pubgencols && att->attgenerated)
+			{
+				*unpublished_gen_col = true;
+				break;
+			}
 
-		/* Remember columns that are part of the REPLICA IDENTITY */
-		idattrs = RelationGetIndexAttrBitmap(relation,
-											 INDEX_ATTR_BITMAP_IDENTITY_KEY);
+			/* Skip validating the column list since it is not defined */
+			continue;
+		}
 
 		/*
-		 * Attnums in the bitmap returned by RelationGetIndexAttrBitmap are
-		 * offset (to handle system columns the usual way), while column list
-		 * does not use offset, so we can't do bms_is_subset(). Instead, we
-		 * have to loop over the idattrs and check all of them are in the
-		 * list.
+		 * If pubviaroot is true, we are validating the column list of the
+		 * parent table, but the bitmap contains the replica identity
+		 * information of the child table. The parent/child attnums may not
+		 * match, so translate them to the parent - get the attname from the
+		 * child, and look it up in the parent.
 		 */
-		x = -1;
-		while ((x = bms_next_member(idattrs, x)) >= 0)
+		if (pubviaroot)
 		{
-			AttrNumber	attnum = (x + FirstLowInvalidHeapAttributeNumber);
+			/* attribute name in the child table */
+			char	   *colname = get_attname(relid, attnum, false);
 
 			/*
-			 * If pubviaroot is true, we are validating the column list of the
-			 * parent table, but the bitmap contains the replica identity
-			 * information of the child table. The parent/child attnums may
-			 * not match, so translate them to the parent - get the attname
-			 * from the child, and look it up in the parent.
+			 * Determine the attnum for the attribute name in parent (we are
+			 * using the column list defined on the parent).
 			 */
-			if (pubviaroot)
-			{
-				/* attribute name in the child table */
-				char	   *colname = get_attname(relid, attnum, false);
-
-				/*
-				 * Determine the attnum for the attribute name in parent (we
-				 * are using the column list defined on the parent).
-				 */
-				attnum = get_attnum(publish_as_relid, colname);
-			}
-
-			/* replica identity column, not covered by the column list */
-			if (!bms_is_member(attnum, columns))
-			{
-				result = true;
-				break;
-			}
+			attnum = get_attnum(publish_as_relid, colname);
 		}
 
-		bms_free(idattrs);
-		bms_free(columns);
+		/* replica identity column, not covered by the column list */
+		*invalid_column_list |= !bms_is_member(attnum, columns);
+
+		if (*invalid_column_list && *unpublished_gen_col)
+			break;
 	}
 
-	ReleaseSysCache(tuple);
+	bms_free(columns);
+	bms_free(idattrs);
 
-	return result;
+	return *invalid_column_list || *unpublished_gen_col;
 }
 
 /* check_functions_in_node callback */
diff --git a/src/backend/executor/execReplication.c b/src/backend/executor/execReplication.c
index 54025c9f15..f66ff21159 100644
--- a/src/backend/executor/execReplication.c
+++ b/src/backend/executor/execReplication.c
@@ -785,15 +785,26 @@ CheckCmdReplicaIdentity(Relation rel, CmdType cmd)
 		return;
 
 	/*
-	 * It is only safe to execute UPDATE/DELETE when all columns, referenced
-	 * in the row filters from publications which the relation is in, are
-	 * valid - i.e. when all referenced columns are part of REPLICA IDENTITY
-	 * or the table does not publish UPDATEs or DELETEs.
+	 * It is only safe to execute UPDATE/DELETE if the relation does not
+	 * publish UPDATEs or DELETEs, or all the following conditions are
+	 * satisfied:
+	 *
+	 * 1. All columns, referenced in the row filters from publications which
+	 *    the relation is in, are valid - i.e. when all referenced columns are
+	 *    part of REPLICA IDENTITY.
+	 *
+	 * 2. All columns, referenced in the column lists from publications which
+	 *    the relation is in, are valid - i.e. when all columns referenced in
+	 *    the REPLICA IDENTITY are covered by the column list.
+	 *
+	 * 3. All generated columns in REPLICA IDENTITY of the relation, are valid
+	 *    - i.e. when all these generated columns are published.
 	 *
 	 * XXX We could optimize it by first checking whether any of the
-	 * publications have a row filter for this relation. If not and relation
-	 * has replica identity then we can avoid building the descriptor but as
-	 * this happens only one time it doesn't seem worth the additional
+	 * publications have a row filter or column list for this relation, or if
+	 * the relation contains a generated column. If none of these exist and the
+	 * relation has replica identity then we can avoid building the descriptor
+	 * but as this happens only one time it doesn't seem worth the additional
 	 * complexity.
 	 */
 	RelationBuildPublicationDesc(rel, &pubdesc);
@@ -809,6 +820,12 @@ CheckCmdReplicaIdentity(Relation rel, CmdType cmd)
 				 errmsg("cannot update table \"%s\"",
 						RelationGetRelationName(rel)),
 				 errdetail("Column list used by the publication does not cover the replica identity.")));
+	else if (cmd == CMD_UPDATE && !pubdesc.gencols_valid_for_update)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
+				 errmsg("cannot update table \"%s\"",
+						RelationGetRelationName(rel)),
+				 errdetail("Replica identity consists of an unpublished generated column.")));
 	else if (cmd == CMD_DELETE && !pubdesc.rf_valid_for_delete)
 		ereport(ERROR,
 				(errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
@@ -821,6 +838,12 @@ CheckCmdReplicaIdentity(Relation rel, CmdType cmd)
 				 errmsg("cannot delete from table \"%s\"",
 						RelationGetRelationName(rel)),
 				 errdetail("Column list used by the publication does not cover the replica identity.")));
+	else if (cmd == CMD_DELETE && !pubdesc.gencols_valid_for_delete)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
+				 errmsg("cannot delete from table \"%s\"",
+						RelationGetRelationName(rel)),
+				 errdetail("Replica identity consists of an unpublished generated column.")));
 
 	/* If relation has replica identity we are always good. */
 	if (OidIsValid(RelationGetReplicaIndex(rel)))
diff --git a/src/backend/utils/cache/relcache.c b/src/backend/utils/cache/relcache.c
index d0892cee24..9205ea86c4 100644
--- a/src/backend/utils/cache/relcache.c
+++ b/src/backend/utils/cache/relcache.c
@@ -5706,12 +5706,19 @@ RelationGetExclusionInfo(Relation indexRelation,
  * Get the publication information for the given relation.
  *
  * Traverse all the publications which the relation is in to get the
- * publication actions and validate the row filter expressions for such
- * publications if any. We consider the row filter expression as invalid if it
- * references any column which is not part of REPLICA IDENTITY.
+ * publication actions and validate:
+ * 1. The row filter expressions for such publications if any. We consider the
+ *    row filter expression as invalid if it references any column which is not
+ *    part of REPLICA IDENTITY.
+ * 2. The column list for such publication if any. We consider column list as
+ * 	  invalid if it references any column which is not part of REPLICA IDENTITY.
+ * 3. The generated columns of the relation for such publications. We consider
+ *    any reference of an unpublished generated column in REPLICA IDENTITY as
+ *    invalid.
  *
  * To avoid fetching the publication information repeatedly, we cache the
- * publication actions and row filter validation information.
+ * publication actions, row filter validation information, column list
+ * validation information and generated column validation information.
  */
 void
 RelationBuildPublicationDesc(Relation relation, PublicationDesc *pubdesc)
@@ -5734,6 +5741,8 @@ RelationBuildPublicationDesc(Relation relation, PublicationDesc *pubdesc)
 		pubdesc->rf_valid_for_delete = true;
 		pubdesc->cols_valid_for_update = true;
 		pubdesc->cols_valid_for_delete = true;
+		pubdesc->gencols_valid_for_update = true;
+		pubdesc->gencols_valid_for_delete = true;
 		return;
 	}
 
@@ -5748,6 +5757,8 @@ RelationBuildPublicationDesc(Relation relation, PublicationDesc *pubdesc)
 	pubdesc->rf_valid_for_delete = true;
 	pubdesc->cols_valid_for_update = true;
 	pubdesc->cols_valid_for_delete = true;
+	pubdesc->gencols_valid_for_update = true;
+	pubdesc->gencols_valid_for_delete = true;
 
 	/* Fetch the publication membership info. */
 	puboids = GetRelationPublications(relid);
@@ -5777,6 +5788,8 @@ RelationBuildPublicationDesc(Relation relation, PublicationDesc *pubdesc)
 		Oid			pubid = lfirst_oid(lc);
 		HeapTuple	tup;
 		Form_pg_publication pubform;
+		bool		invalid_column_list;
+		bool		unpublished_gen_col;
 
 		tup = SearchSysCache1(PUBLICATIONOID, ObjectIdGetDatum(pubid));
 
@@ -5811,18 +5824,27 @@ RelationBuildPublicationDesc(Relation relation, PublicationDesc *pubdesc)
 		/*
 		 * Check if all columns are part of the REPLICA IDENTITY index or not.
 		 *
-		 * If the publication is FOR ALL TABLES then it means the table has no
-		 * column list and we can skip the validation.
+		 * Check if all generated columns included in the REPLICA IDENTITY are
+		 * published.
 		 */
-		if (!pubform->puballtables &&
-			(pubform->pubupdate || pubform->pubdelete) &&
-			pub_collist_contains_invalid_column(pubid, relation, ancestors,
-												pubform->pubviaroot))
+		if ((pubform->pubupdate || pubform->pubdelete) &&
+			pub_contains_invalid_column(pubid, relation, ancestors,
+										pubform->pubviaroot,
+										pubform->pubgencols,
+										&invalid_column_list,
+										&unpublished_gen_col))
 		{
 			if (pubform->pubupdate)
-				pubdesc->cols_valid_for_update = false;
+			{
+				pubdesc->cols_valid_for_update = !invalid_column_list;
+				pubdesc->gencols_valid_for_update = !unpublished_gen_col;
+			}
+
 			if (pubform->pubdelete)
-				pubdesc->cols_valid_for_delete = false;
+			{
+				pubdesc->cols_valid_for_delete = !invalid_column_list;
+				pubdesc->gencols_valid_for_delete = !unpublished_gen_col;
+			}
 		}
 
 		ReleaseSysCache(tup);
@@ -5846,6 +5868,17 @@ RelationBuildPublicationDesc(Relation relation, PublicationDesc *pubdesc)
 			pubdesc->pubactions.pubdelete && pubdesc->pubactions.pubtruncate &&
 			!pubdesc->cols_valid_for_update && !pubdesc->cols_valid_for_delete)
 			break;
+
+		/*
+		 * If we know everything is replicated and replica identity has an
+		 * unpublished generated column, there is no point to check for other
+		 * publications.
+		 */
+		if (pubdesc->pubactions.pubinsert && pubdesc->pubactions.pubupdate &&
+			pubdesc->pubactions.pubdelete && pubdesc->pubactions.pubtruncate &&
+			!pubdesc->gencols_valid_for_update &&
+			!pubdesc->gencols_valid_for_delete)
+			break;
 	}
 
 	if (relation->rd_pubdesc)
diff --git a/src/include/catalog/pg_publication.h b/src/include/catalog/pg_publication.h
index 9a83a72d6b..e167b34461 100644
--- a/src/include/catalog/pg_publication.h
+++ b/src/include/catalog/pg_publication.h
@@ -98,6 +98,13 @@ typedef struct PublicationDesc
 	 */
 	bool		cols_valid_for_update;
 	bool		cols_valid_for_delete;
+
+	/*
+	 * true if all generated columns which are part of replica identity are
+	 * published or the publication actions do not include UPDATE or DELETE.
+	 */
+	bool		gencols_valid_for_update;
+	bool		gencols_valid_for_delete;
 } PublicationDesc;
 
 typedef struct Publication
diff --git a/src/include/commands/publicationcmds.h b/src/include/commands/publicationcmds.h
index 5487c571f6..c200c19c6d 100644
--- a/src/include/commands/publicationcmds.h
+++ b/src/include/commands/publicationcmds.h
@@ -33,7 +33,10 @@ extern void AlterPublicationOwner_oid(Oid subid, Oid newOwnerId);
 extern void InvalidatePublicationRels(List *relids);
 extern bool pub_rf_contains_invalid_column(Oid pubid, Relation relation,
 										   List *ancestors, bool pubviaroot);
-extern bool pub_collist_contains_invalid_column(Oid pubid, Relation relation,
-												List *ancestors, bool pubviaroot);
+extern bool pub_contains_invalid_column(Oid pubid, Relation relation,
+										List *ancestors, bool pubviaroot,
+										bool pubgencols,
+										bool *invalid_column_list,
+										bool *unpublished_gen_col);
 
 #endif							/* PUBLICATIONCMDS_H */
diff --git a/src/test/regress/expected/publication.out b/src/test/regress/expected/publication.out
index 5de2d64d01..12d0611a7c 100644
--- a/src/test/regress/expected/publication.out
+++ b/src/test/regress/expected/publication.out
@@ -672,6 +672,30 @@ DROP TABLE rf_tbl_abcd_pk;
 DROP TABLE rf_tbl_abcd_nopk;
 DROP TABLE rf_tbl_abcd_part_pk;
 -- ======================================================
+-- ======================================================
+-- test with generated column
+SET client_min_messages = 'ERROR';
+CREATE TABLE testpub_gencol (a INT, b INT GENERATED ALWAYS AS (a + 1) STORED NOT NULL);
+CREATE UNIQUE INDEX testpub_gencol_idx ON testpub_gencol (b);
+ALTER TABLE testpub_gencol REPLICA IDENTITY USING index testpub_gencol_idx;
+-- error - generated column "b" is not published but part of index set as REPLICA IDENTITY
+CREATE PUBLICATION pub_gencol FOR TABLE testpub_gencol;
+UPDATE testpub_gencol SET a = 100 WHERE a = 1;
+ERROR:  cannot update table "testpub_gencol"
+DETAIL:  Replica identity consists of an unpublished generated column.
+-- error - generated column "b" is not published and REPLICA IDENTITY is set FULL
+ALTER TABLE testpub_gencol REPLICA IDENTITY FULL;
+UPDATE testpub_gencol SET a = 100 WHERE a = 1;
+ERROR:  cannot update table "testpub_gencol"
+DETAIL:  Replica identity consists of an unpublished generated column.
+DROP PUBLICATION pub_gencol;
+-- ok - generated column "b" is published and is part of REPLICA IDENTITY
+CREATE PUBLICATION pub_gencol FOR TABLE testpub_gencol with (publish_generated_columns = true);
+UPDATE testpub_gencol SET a = 100 WHERE a = 1;
+DROP PUBLICATION pub_gencol;
+DROP TABLE testpub_gencol;
+RESET client_min_messages;
+-- ======================================================
 -- fail - duplicate tables are not allowed if that table has any column lists
 SET client_min_messages = 'ERROR';
 CREATE PUBLICATION testpub_dups FOR TABLE testpub_tbl1 (a), testpub_tbl1 WITH (publish = 'insert');
diff --git a/src/test/regress/sql/publication.sql b/src/test/regress/sql/publication.sql
index 48e68bcca2..a29587b45d 100644
--- a/src/test/regress/sql/publication.sql
+++ b/src/test/regress/sql/publication.sql
@@ -396,6 +396,31 @@ DROP TABLE rf_tbl_abcd_nopk;
 DROP TABLE rf_tbl_abcd_part_pk;
 -- ======================================================
 
+-- ======================================================
+-- test with generated column
+SET client_min_messages = 'ERROR';
+CREATE TABLE testpub_gencol (a INT, b INT GENERATED ALWAYS AS (a + 1) STORED NOT NULL);
+CREATE UNIQUE INDEX testpub_gencol_idx ON testpub_gencol (b);
+ALTER TABLE testpub_gencol REPLICA IDENTITY USING index testpub_gencol_idx;
+
+-- error - generated column "b" is not published but part of index set as REPLICA IDENTITY
+CREATE PUBLICATION pub_gencol FOR TABLE testpub_gencol;
+UPDATE testpub_gencol SET a = 100 WHERE a = 1;
+
+-- error - generated column "b" is not published and REPLICA IDENTITY is set FULL
+ALTER TABLE testpub_gencol REPLICA IDENTITY FULL;
+UPDATE testpub_gencol SET a = 100 WHERE a = 1;
+DROP PUBLICATION pub_gencol;
+
+-- ok - generated column "b" is published and is part of REPLICA IDENTITY
+CREATE PUBLICATION pub_gencol FOR TABLE testpub_gencol with (publish_generated_columns = true);
+UPDATE testpub_gencol SET a = 100 WHERE a = 1;
+DROP PUBLICATION pub_gencol;
+
+DROP TABLE testpub_gencol;
+RESET client_min_messages;
+-- ======================================================
+
 -- fail - duplicate tables are not allowed if that table has any column lists
 SET client_min_messages = 'ERROR';
 CREATE PUBLICATION testpub_dups FOR TABLE testpub_tbl1 (a), testpub_tbl1 WITH (publish = 'insert');
diff --git a/src/test/subscription/t/100_bugs.pl b/src/test/subscription/t/100_bugs.pl
index cb36ca7b16..794b928f50 100644
--- a/src/test/subscription/t/100_bugs.pl
+++ b/src/test/subscription/t/100_bugs.pl
@@ -377,8 +377,8 @@ $node_publisher->safe_psql('postgres', "DROP PUBLICATION tap_pub_sch");
 $node_publisher->stop('fast');
 $node_subscriber->stop('fast');
 
-# The bug was that when the REPLICA IDENTITY FULL is used with dropped or
-# generated columns, we fail to apply updates and deletes
+# The bug was that when the REPLICA IDENTITY FULL is used with dropped
+# we fail to apply updates and deletes
 $node_publisher->rotate_logfile();
 $node_publisher->start();
 
@@ -389,18 +389,14 @@ $node_publisher->safe_psql(
 	'postgres', qq(
 	CREATE TABLE dropped_cols (a int, b_drop int, c int);
 	ALTER TABLE dropped_cols REPLICA IDENTITY FULL;
-	CREATE TABLE generated_cols (a int, b_gen int GENERATED ALWAYS AS (5 * a) STORED, c int);
-	ALTER TABLE generated_cols REPLICA IDENTITY FULL;
-	CREATE PUBLICATION pub_dropped_cols FOR TABLE dropped_cols, generated_cols;
+	CREATE PUBLICATION pub_dropped_cols FOR TABLE dropped_cols;
 	-- some initial data
 	INSERT INTO dropped_cols VALUES (1, 1, 1);
-	INSERT INTO generated_cols (a, c) VALUES (1, 1);
 ));
 
 $node_subscriber->safe_psql(
 	'postgres', qq(
 	 CREATE TABLE dropped_cols (a int, b_drop int, c int);
-	 CREATE TABLE generated_cols (a int, b_gen int GENERATED ALWAYS AS (5 * a) STORED, c int);
 ));
 
 $publisher_connstr = $node_publisher->connstr . ' dbname=postgres';
@@ -421,7 +417,6 @@ $node_subscriber->safe_psql(
 $node_publisher->safe_psql(
 	'postgres', qq(
 		UPDATE dropped_cols SET a = 100;
-		UPDATE generated_cols SET a = 100;
 ));
 $node_publisher->wait_for_catchup('sub_dropped_cols');
 
@@ -430,11 +425,6 @@ is( $node_subscriber->safe_psql(
 	qq(1),
 	'replication with RI FULL and dropped columns');
 
-is( $node_subscriber->safe_psql(
-		'postgres', "SELECT count(*) FROM generated_cols WHERE a = 100"),
-	qq(1),
-	'replication with RI FULL and generated columns');
-
 $node_publisher->stop('fast');
 $node_subscriber->stop('fast');
 
-- 
2.34.1

#48Amit Kapila
amit.kapila16@gmail.com
In reply to: Shlok Kyal (#47)
1 attachment(s)
Re: Disallow UPDATE/DELETE on table with unpublished generated column as REPLICA IDENTITY

On Tue, Dec 3, 2024 at 12:31 PM Shlok Kyal <shlok.kyal.oss@gmail.com> wrote:

The changes look good to me. I have included it in the updated patch.

The patch looks mostly good to me. I have updated the docs, comments,
and some other cosmetic changes. Please see attached and let me know
what you think.

--
With Regards,
Amit Kapila.

Attachments:

v15-0001-Disallow-UPDATE-DELETE-on-table-with-generated-c.patchapplication/octet-stream; name=v15-0001-Disallow-UPDATE-DELETE-on-table-with-generated-c.patchDownload
From 11bfcaa237a9c46d5c6b798d06c188d4e16025da Mon Sep 17 00:00:00 2001
From: Shlok Kyal <shlok.kyal.oss@gmail.com>
Date: Tue, 5 Nov 2024 11:11:25 +0530
Subject: [PATCH v15] Disallow UPDATE/DELETE on table with generated column as
 REPLICA IDENTITY

UPDATE or DELETE operations on tables with unpublished generated columns
set as REPLICA IDENTITY are not permitted. This patch ensures that if an
UPDATE or DELETE command is executed on such tables, an error will be
thrown.
With this patch, the behavior has changed for the test added in commit
adedf54. Additionally, there is no other way to trigger the bug that was
fixed by commit adedf54, so the test has been removed.
---
 doc/src/sgml/ref/create_publication.sgml  |   8 ++
 src/backend/commands/publicationcmds.c    | 156 +++++++++++++---------
 src/backend/executor/execReplication.c    |  37 ++++-
 src/backend/utils/cache/relcache.c        |  57 ++++++--
 src/include/catalog/pg_publication.h      |   7 +
 src/include/commands/publicationcmds.h    |   7 +-
 src/test/regress/expected/publication.out |  24 ++++
 src/test/regress/sql/publication.sql      |  25 ++++
 src/test/subscription/t/100_bugs.pl       |  16 +--
 9 files changed, 238 insertions(+), 99 deletions(-)

diff --git a/doc/src/sgml/ref/create_publication.sgml b/doc/src/sgml/ref/create_publication.sgml
index f8e217d661..5e25536554 100644
--- a/doc/src/sgml/ref/create_publication.sgml
+++ b/doc/src/sgml/ref/create_publication.sgml
@@ -311,6 +311,14 @@ CREATE PUBLICATION <replaceable class="parameter">name</replaceable>
    system columns.
   </para>
 
+  <para>
+   The generated columns that are part of <literal>REPLICA IDENTITY</literal>
+   must be published explicitly either by listing them in the column list or
+   by enabling the <literal>publish_generated_columns</literal> option, in
+   order for <command>UPDATE</command> and <command>DELETE</command> operations
+   to be published.
+  </para>
+
   <para>
    The row filter on a table becomes redundant if
    <literal>FOR TABLES IN SCHEMA</literal> is specified and the table
diff --git a/src/backend/commands/publicationcmds.c b/src/backend/commands/publicationcmds.c
index 031c84ec29..5050057a7e 100644
--- a/src/backend/commands/publicationcmds.c
+++ b/src/backend/commands/publicationcmds.c
@@ -336,21 +336,36 @@ pub_rf_contains_invalid_column(Oid pubid, Relation relation, List *ancestors,
 }
 
 /*
- * Check if all columns referenced in the REPLICA IDENTITY are covered by
- * the column list.
+ * Check for invalid columns in the publication table definition.
  *
- * Returns true if any replica identity column is not covered by column list.
+ * This function evaluates two conditions:
+ *
+ * 1. Ensures that all columns referenced in the REPLICA IDENTITY are covered
+ *    by the column list. If any column is missing, *invalid_column_list is set
+ *    to true.
+ * 2. Ensures that all the generated columns referenced in the REPLICA IDENTITY
+ *    are published either by listing them in the column list or by enabling
+ *    publish_generated_columns option. If any unpublished generated column is
+ *    found, *invalid_gen_col is set to true.
+ *
+ * Returns true if any of the above conditions are not met.
  */
 bool
-pub_collist_contains_invalid_column(Oid pubid, Relation relation, List *ancestors,
-									bool pubviaroot)
+pub_contains_invalid_column(Oid pubid, Relation relation, List *ancestors,
+							bool pubviaroot, bool pubgencols,
+							bool *invalid_column_list,
+							bool *invalid_gen_col)
 {
-	HeapTuple	tuple;
 	Oid			relid = RelationGetRelid(relation);
 	Oid			publish_as_relid = RelationGetRelid(relation);
-	bool		result = false;
-	Datum		datum;
-	bool		isnull;
+	Bitmapset  *idattrs;
+	Bitmapset  *columns = NULL;
+	TupleDesc	desc = RelationGetDescr(relation);
+	Publication *pub;
+	int			x;
+
+	*invalid_column_list = false;
+	*invalid_gen_col = false;
 
 	/*
 	 * For a partition, if pubviaroot is true, find the topmost ancestor that
@@ -368,80 +383,91 @@ pub_collist_contains_invalid_column(Oid pubid, Relation relation, List *ancestor
 			publish_as_relid = relid;
 	}
 
-	tuple = SearchSysCache2(PUBLICATIONRELMAP,
-							ObjectIdGetDatum(publish_as_relid),
-							ObjectIdGetDatum(pubid));
+	/* Fetch the column list */
+	pub = GetPublication(pubid);
+	check_and_fetch_column_list(pub, publish_as_relid, NULL, &columns);
 
-	if (!HeapTupleIsValid(tuple))
-		return false;
+	if (relation->rd_rel->relreplident == REPLICA_IDENTITY_FULL)
+	{
+		/* With REPLICA IDENTITY FULL, no column list is allowed. */
+		*invalid_column_list = (columns != NULL);
 
-	datum = SysCacheGetAttr(PUBLICATIONRELMAP, tuple,
-							Anum_pg_publication_rel_prattrs,
-							&isnull);
+		/*
+		 * As we don't allow a column list with REPLICA IDENTITY FULL, the
+		 * publish_generated_columns option must be set to true if the table
+		 * has any stored generated columns.
+		 */
+		if (!pubgencols &&
+			relation->rd_att->constr &&
+			relation->rd_att->constr->has_generated_stored)
+			*invalid_gen_col = true;
 
-	if (!isnull)
-	{
-		int			x;
-		Bitmapset  *idattrs;
-		Bitmapset  *columns = NULL;
+		if (*invalid_gen_col && *invalid_column_list)
+			return true;
+	}
 
-		/* With REPLICA IDENTITY FULL, no column list is allowed. */
-		if (relation->rd_rel->relreplident == REPLICA_IDENTITY_FULL)
-			result = true;
+	/* Remember columns that are part of the REPLICA IDENTITY */
+	idattrs = RelationGetIndexAttrBitmap(relation,
+										 INDEX_ATTR_BITMAP_IDENTITY_KEY);
 
-		/* Transform the column list datum to a bitmapset. */
-		columns = pub_collist_to_bitmapset(NULL, datum, NULL);
+	/*
+	 * Attnums in the bitmap returned by RelationGetIndexAttrBitmap are offset
+	 * (to handle system columns the usual way), while column list does not
+	 * use offset, so we can't do bms_is_subset(). Instead, we have to loop
+	 * over the idattrs and check all of them are in the list.
+	 */
+	x = -1;
+	while ((x = bms_next_member(idattrs, x)) >= 0)
+	{
+		AttrNumber	attnum = (x + FirstLowInvalidHeapAttributeNumber);
+		Form_pg_attribute att = TupleDescAttr(desc, attnum - 1);
 
-		/* Remember columns that are part of the REPLICA IDENTITY */
-		idattrs = RelationGetIndexAttrBitmap(relation,
-											 INDEX_ATTR_BITMAP_IDENTITY_KEY);
+		if (columns == NULL)
+		{
+			/*
+			 * The publish_generated_columns option must be set to true if the
+			 * REPLICA IDENTITY contains any stored generated column.
+			 */
+			if (!pubgencols && att->attgenerated)
+			{
+				*invalid_gen_col = true;
+				break;
+			}
+
+			/* Skip validating the column list since it is not defined */
+			continue;
+		}
 
 		/*
-		 * Attnums in the bitmap returned by RelationGetIndexAttrBitmap are
-		 * offset (to handle system columns the usual way), while column list
-		 * does not use offset, so we can't do bms_is_subset(). Instead, we
-		 * have to loop over the idattrs and check all of them are in the
-		 * list.
+		 * If pubviaroot is true, we are validating the column list of the
+		 * parent table, but the bitmap contains the replica identity
+		 * information of the child table. The parent/child attnums may not
+		 * match, so translate them to the parent - get the attname from the
+		 * child, and look it up in the parent.
 		 */
-		x = -1;
-		while ((x = bms_next_member(idattrs, x)) >= 0)
+		if (pubviaroot)
 		{
-			AttrNumber	attnum = (x + FirstLowInvalidHeapAttributeNumber);
+			/* attribute name in the child table */
+			char	   *colname = get_attname(relid, attnum, false);
 
 			/*
-			 * If pubviaroot is true, we are validating the column list of the
-			 * parent table, but the bitmap contains the replica identity
-			 * information of the child table. The parent/child attnums may
-			 * not match, so translate them to the parent - get the attname
-			 * from the child, and look it up in the parent.
+			 * Determine the attnum for the attribute name in parent (we are
+			 * using the column list defined on the parent).
 			 */
-			if (pubviaroot)
-			{
-				/* attribute name in the child table */
-				char	   *colname = get_attname(relid, attnum, false);
-
-				/*
-				 * Determine the attnum for the attribute name in parent (we
-				 * are using the column list defined on the parent).
-				 */
-				attnum = get_attnum(publish_as_relid, colname);
-			}
-
-			/* replica identity column, not covered by the column list */
-			if (!bms_is_member(attnum, columns))
-			{
-				result = true;
-				break;
-			}
+			attnum = get_attnum(publish_as_relid, colname);
 		}
 
-		bms_free(idattrs);
-		bms_free(columns);
+		/* replica identity column, not covered by the column list */
+		*invalid_column_list |= !bms_is_member(attnum, columns);
+
+		if (*invalid_column_list && *invalid_gen_col)
+			break;
 	}
 
-	ReleaseSysCache(tuple);
+	bms_free(columns);
+	bms_free(idattrs);
 
-	return result;
+	return *invalid_column_list || *invalid_gen_col;
 }
 
 /* check_functions_in_node callback */
diff --git a/src/backend/executor/execReplication.c b/src/backend/executor/execReplication.c
index 54025c9f15..7f90cd022b 100644
--- a/src/backend/executor/execReplication.c
+++ b/src/backend/executor/execReplication.c
@@ -785,15 +785,26 @@ CheckCmdReplicaIdentity(Relation rel, CmdType cmd)
 		return;
 
 	/*
-	 * It is only safe to execute UPDATE/DELETE when all columns, referenced
-	 * in the row filters from publications which the relation is in, are
-	 * valid - i.e. when all referenced columns are part of REPLICA IDENTITY
-	 * or the table does not publish UPDATEs or DELETEs.
+	 * It is only safe to execute UPDATE/DELETE if the relation does not
+	 * publish UPDATEs or DELETEs, or all the following conditions are
+	 * satisfied:
+	 *
+	 * 1. All columns, referenced in the row filters from publications which
+	 *    the relation is in, are valid - i.e. when all referenced columns are
+	 *    part of REPLICA IDENTITY.
+	 *
+	 * 2. All columns, referenced in the column lists are valid - i.e. when all
+	 *    columns referenced in the REPLICA IDENTITY are covered by the column
+	 *    list.
+	 *
+	 * 3. All generated columns in REPLICA IDENTITY of the relation, are valid
+	 *    - i.e. when all these generated columns are published.
 	 *
 	 * XXX We could optimize it by first checking whether any of the
-	 * publications have a row filter for this relation. If not and relation
-	 * has replica identity then we can avoid building the descriptor but as
-	 * this happens only one time it doesn't seem worth the additional
+	 * publications have a row filter or column list for this relation, or if
+	 * the relation contains a generated column. If none of these exist and the
+	 * relation has replica identity then we can avoid building the descriptor
+	 * but as this happens only one time it doesn't seem worth the additional
 	 * complexity.
 	 */
 	RelationBuildPublicationDesc(rel, &pubdesc);
@@ -809,6 +820,12 @@ CheckCmdReplicaIdentity(Relation rel, CmdType cmd)
 				 errmsg("cannot update table \"%s\"",
 						RelationGetRelationName(rel)),
 				 errdetail("Column list used by the publication does not cover the replica identity.")));
+	else if (cmd == CMD_UPDATE && !pubdesc.gencols_valid_for_update)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
+				 errmsg("cannot update table \"%s\"",
+						RelationGetRelationName(rel)),
+				 errdetail("Replica identity consists of an unpublished generated column.")));
 	else if (cmd == CMD_DELETE && !pubdesc.rf_valid_for_delete)
 		ereport(ERROR,
 				(errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
@@ -821,6 +838,12 @@ CheckCmdReplicaIdentity(Relation rel, CmdType cmd)
 				 errmsg("cannot delete from table \"%s\"",
 						RelationGetRelationName(rel)),
 				 errdetail("Column list used by the publication does not cover the replica identity.")));
+	else if (cmd == CMD_DELETE && !pubdesc.gencols_valid_for_delete)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
+				 errmsg("cannot delete from table \"%s\"",
+						RelationGetRelationName(rel)),
+				 errdetail("Replica identity consists of an unpublished generated column.")));
 
 	/* If relation has replica identity we are always good. */
 	if (OidIsValid(RelationGetReplicaIndex(rel)))
diff --git a/src/backend/utils/cache/relcache.c b/src/backend/utils/cache/relcache.c
index d0892cee24..422509f18d 100644
--- a/src/backend/utils/cache/relcache.c
+++ b/src/backend/utils/cache/relcache.c
@@ -5706,12 +5706,19 @@ RelationGetExclusionInfo(Relation indexRelation,
  * Get the publication information for the given relation.
  *
  * Traverse all the publications which the relation is in to get the
- * publication actions and validate the row filter expressions for such
- * publications if any. We consider the row filter expression as invalid if it
- * references any column which is not part of REPLICA IDENTITY.
+ * publication actions and validate:
+ * 1. The row filter expressions for such publications if any. We consider the
+ *    row filter expression as invalid if it references any column which is not
+ *    part of REPLICA IDENTITY.
+ * 2. The column list for such publication if any. We consider the column list
+ * 	  invalid if REPLICA IDENTITY contains any column that is not part of it.
+ * 3. The generated columns of the relation for such publications. We consider
+ *    any reference of an unpublished generated column in REPLICA IDENTITY as
+ *    invalid.
  *
  * To avoid fetching the publication information repeatedly, we cache the
- * publication actions and row filter validation information.
+ * publication actions, row filter validation information, column list
+ * validation information, and generated column validation information.
  */
 void
 RelationBuildPublicationDesc(Relation relation, PublicationDesc *pubdesc)
@@ -5734,6 +5741,8 @@ RelationBuildPublicationDesc(Relation relation, PublicationDesc *pubdesc)
 		pubdesc->rf_valid_for_delete = true;
 		pubdesc->cols_valid_for_update = true;
 		pubdesc->cols_valid_for_delete = true;
+		pubdesc->gencols_valid_for_update = true;
+		pubdesc->gencols_valid_for_delete = true;
 		return;
 	}
 
@@ -5748,6 +5757,8 @@ RelationBuildPublicationDesc(Relation relation, PublicationDesc *pubdesc)
 	pubdesc->rf_valid_for_delete = true;
 	pubdesc->cols_valid_for_update = true;
 	pubdesc->cols_valid_for_delete = true;
+	pubdesc->gencols_valid_for_update = true;
+	pubdesc->gencols_valid_for_delete = true;
 
 	/* Fetch the publication membership info. */
 	puboids = GetRelationPublications(relid);
@@ -5777,6 +5788,8 @@ RelationBuildPublicationDesc(Relation relation, PublicationDesc *pubdesc)
 		Oid			pubid = lfirst_oid(lc);
 		HeapTuple	tup;
 		Form_pg_publication pubform;
+		bool		invalid_column_list;
+		bool		invalid_gen_col;
 
 		tup = SearchSysCache1(PUBLICATIONOID, ObjectIdGetDatum(pubid));
 
@@ -5811,18 +5824,27 @@ RelationBuildPublicationDesc(Relation relation, PublicationDesc *pubdesc)
 		/*
 		 * Check if all columns are part of the REPLICA IDENTITY index or not.
 		 *
-		 * If the publication is FOR ALL TABLES then it means the table has no
-		 * column list and we can skip the validation.
+		 * Check if all generated columns included in the REPLICA IDENTITY are
+		 * published.
 		 */
-		if (!pubform->puballtables &&
-			(pubform->pubupdate || pubform->pubdelete) &&
-			pub_collist_contains_invalid_column(pubid, relation, ancestors,
-												pubform->pubviaroot))
+		if ((pubform->pubupdate || pubform->pubdelete) &&
+			pub_contains_invalid_column(pubid, relation, ancestors,
+										pubform->pubviaroot,
+										pubform->pubgencols,
+										&invalid_column_list,
+										&invalid_gen_col))
 		{
 			if (pubform->pubupdate)
-				pubdesc->cols_valid_for_update = false;
+			{
+				pubdesc->cols_valid_for_update = !invalid_column_list;
+				pubdesc->gencols_valid_for_update = !invalid_gen_col;
+			}
+
 			if (pubform->pubdelete)
-				pubdesc->cols_valid_for_delete = false;
+			{
+				pubdesc->cols_valid_for_delete = !invalid_column_list;
+				pubdesc->gencols_valid_for_delete = !invalid_gen_col;
+			}
 		}
 
 		ReleaseSysCache(tup);
@@ -5846,6 +5868,17 @@ RelationBuildPublicationDesc(Relation relation, PublicationDesc *pubdesc)
 			pubdesc->pubactions.pubdelete && pubdesc->pubactions.pubtruncate &&
 			!pubdesc->cols_valid_for_update && !pubdesc->cols_valid_for_delete)
 			break;
+
+		/*
+		 * If we know everything is replicated and replica identity has an
+		 * unpublished generated column, there is no point to check for other
+		 * publications.
+		 */
+		if (pubdesc->pubactions.pubinsert && pubdesc->pubactions.pubupdate &&
+			pubdesc->pubactions.pubdelete && pubdesc->pubactions.pubtruncate &&
+			!pubdesc->gencols_valid_for_update &&
+			!pubdesc->gencols_valid_for_delete)
+			break;
 	}
 
 	if (relation->rd_pubdesc)
diff --git a/src/include/catalog/pg_publication.h b/src/include/catalog/pg_publication.h
index 9a83a72d6b..e2d894a2ff 100644
--- a/src/include/catalog/pg_publication.h
+++ b/src/include/catalog/pg_publication.h
@@ -98,6 +98,13 @@ typedef struct PublicationDesc
 	 */
 	bool		cols_valid_for_update;
 	bool		cols_valid_for_delete;
+
+	/*
+	 * true if all generated columns that are part of replica identity are
+	 * published or the publication actions do not include UPDATE or DELETE.
+	 */
+	bool		gencols_valid_for_update;
+	bool		gencols_valid_for_delete;
 } PublicationDesc;
 
 typedef struct Publication
diff --git a/src/include/commands/publicationcmds.h b/src/include/commands/publicationcmds.h
index 5487c571f6..19037518e8 100644
--- a/src/include/commands/publicationcmds.h
+++ b/src/include/commands/publicationcmds.h
@@ -33,7 +33,10 @@ extern void AlterPublicationOwner_oid(Oid subid, Oid newOwnerId);
 extern void InvalidatePublicationRels(List *relids);
 extern bool pub_rf_contains_invalid_column(Oid pubid, Relation relation,
 										   List *ancestors, bool pubviaroot);
-extern bool pub_collist_contains_invalid_column(Oid pubid, Relation relation,
-												List *ancestors, bool pubviaroot);
+extern bool pub_contains_invalid_column(Oid pubid, Relation relation,
+										List *ancestors, bool pubviaroot,
+										bool pubgencols,
+										bool *invalid_column_list,
+										bool *invalid_gen_col);
 
 #endif							/* PUBLICATIONCMDS_H */
diff --git a/src/test/regress/expected/publication.out b/src/test/regress/expected/publication.out
index 5de2d64d01..12d0611a7c 100644
--- a/src/test/regress/expected/publication.out
+++ b/src/test/regress/expected/publication.out
@@ -672,6 +672,30 @@ DROP TABLE rf_tbl_abcd_pk;
 DROP TABLE rf_tbl_abcd_nopk;
 DROP TABLE rf_tbl_abcd_part_pk;
 -- ======================================================
+-- ======================================================
+-- test with generated column
+SET client_min_messages = 'ERROR';
+CREATE TABLE testpub_gencol (a INT, b INT GENERATED ALWAYS AS (a + 1) STORED NOT NULL);
+CREATE UNIQUE INDEX testpub_gencol_idx ON testpub_gencol (b);
+ALTER TABLE testpub_gencol REPLICA IDENTITY USING index testpub_gencol_idx;
+-- error - generated column "b" is not published but part of index set as REPLICA IDENTITY
+CREATE PUBLICATION pub_gencol FOR TABLE testpub_gencol;
+UPDATE testpub_gencol SET a = 100 WHERE a = 1;
+ERROR:  cannot update table "testpub_gencol"
+DETAIL:  Replica identity consists of an unpublished generated column.
+-- error - generated column "b" is not published and REPLICA IDENTITY is set FULL
+ALTER TABLE testpub_gencol REPLICA IDENTITY FULL;
+UPDATE testpub_gencol SET a = 100 WHERE a = 1;
+ERROR:  cannot update table "testpub_gencol"
+DETAIL:  Replica identity consists of an unpublished generated column.
+DROP PUBLICATION pub_gencol;
+-- ok - generated column "b" is published and is part of REPLICA IDENTITY
+CREATE PUBLICATION pub_gencol FOR TABLE testpub_gencol with (publish_generated_columns = true);
+UPDATE testpub_gencol SET a = 100 WHERE a = 1;
+DROP PUBLICATION pub_gencol;
+DROP TABLE testpub_gencol;
+RESET client_min_messages;
+-- ======================================================
 -- fail - duplicate tables are not allowed if that table has any column lists
 SET client_min_messages = 'ERROR';
 CREATE PUBLICATION testpub_dups FOR TABLE testpub_tbl1 (a), testpub_tbl1 WITH (publish = 'insert');
diff --git a/src/test/regress/sql/publication.sql b/src/test/regress/sql/publication.sql
index 48e68bcca2..a29587b45d 100644
--- a/src/test/regress/sql/publication.sql
+++ b/src/test/regress/sql/publication.sql
@@ -396,6 +396,31 @@ DROP TABLE rf_tbl_abcd_nopk;
 DROP TABLE rf_tbl_abcd_part_pk;
 -- ======================================================
 
+-- ======================================================
+-- test with generated column
+SET client_min_messages = 'ERROR';
+CREATE TABLE testpub_gencol (a INT, b INT GENERATED ALWAYS AS (a + 1) STORED NOT NULL);
+CREATE UNIQUE INDEX testpub_gencol_idx ON testpub_gencol (b);
+ALTER TABLE testpub_gencol REPLICA IDENTITY USING index testpub_gencol_idx;
+
+-- error - generated column "b" is not published but part of index set as REPLICA IDENTITY
+CREATE PUBLICATION pub_gencol FOR TABLE testpub_gencol;
+UPDATE testpub_gencol SET a = 100 WHERE a = 1;
+
+-- error - generated column "b" is not published and REPLICA IDENTITY is set FULL
+ALTER TABLE testpub_gencol REPLICA IDENTITY FULL;
+UPDATE testpub_gencol SET a = 100 WHERE a = 1;
+DROP PUBLICATION pub_gencol;
+
+-- ok - generated column "b" is published and is part of REPLICA IDENTITY
+CREATE PUBLICATION pub_gencol FOR TABLE testpub_gencol with (publish_generated_columns = true);
+UPDATE testpub_gencol SET a = 100 WHERE a = 1;
+DROP PUBLICATION pub_gencol;
+
+DROP TABLE testpub_gencol;
+RESET client_min_messages;
+-- ======================================================
+
 -- fail - duplicate tables are not allowed if that table has any column lists
 SET client_min_messages = 'ERROR';
 CREATE PUBLICATION testpub_dups FOR TABLE testpub_tbl1 (a), testpub_tbl1 WITH (publish = 'insert');
diff --git a/src/test/subscription/t/100_bugs.pl b/src/test/subscription/t/100_bugs.pl
index cb36ca7b16..794b928f50 100644
--- a/src/test/subscription/t/100_bugs.pl
+++ b/src/test/subscription/t/100_bugs.pl
@@ -377,8 +377,8 @@ $node_publisher->safe_psql('postgres', "DROP PUBLICATION tap_pub_sch");
 $node_publisher->stop('fast');
 $node_subscriber->stop('fast');
 
-# The bug was that when the REPLICA IDENTITY FULL is used with dropped or
-# generated columns, we fail to apply updates and deletes
+# The bug was that when the REPLICA IDENTITY FULL is used with dropped
+# we fail to apply updates and deletes
 $node_publisher->rotate_logfile();
 $node_publisher->start();
 
@@ -389,18 +389,14 @@ $node_publisher->safe_psql(
 	'postgres', qq(
 	CREATE TABLE dropped_cols (a int, b_drop int, c int);
 	ALTER TABLE dropped_cols REPLICA IDENTITY FULL;
-	CREATE TABLE generated_cols (a int, b_gen int GENERATED ALWAYS AS (5 * a) STORED, c int);
-	ALTER TABLE generated_cols REPLICA IDENTITY FULL;
-	CREATE PUBLICATION pub_dropped_cols FOR TABLE dropped_cols, generated_cols;
+	CREATE PUBLICATION pub_dropped_cols FOR TABLE dropped_cols;
 	-- some initial data
 	INSERT INTO dropped_cols VALUES (1, 1, 1);
-	INSERT INTO generated_cols (a, c) VALUES (1, 1);
 ));
 
 $node_subscriber->safe_psql(
 	'postgres', qq(
 	 CREATE TABLE dropped_cols (a int, b_drop int, c int);
-	 CREATE TABLE generated_cols (a int, b_gen int GENERATED ALWAYS AS (5 * a) STORED, c int);
 ));
 
 $publisher_connstr = $node_publisher->connstr . ' dbname=postgres';
@@ -421,7 +417,6 @@ $node_subscriber->safe_psql(
 $node_publisher->safe_psql(
 	'postgres', qq(
 		UPDATE dropped_cols SET a = 100;
-		UPDATE generated_cols SET a = 100;
 ));
 $node_publisher->wait_for_catchup('sub_dropped_cols');
 
@@ -430,11 +425,6 @@ is( $node_subscriber->safe_psql(
 	qq(1),
 	'replication with RI FULL and dropped columns');
 
-is( $node_subscriber->safe_psql(
-		'postgres', "SELECT count(*) FROM generated_cols WHERE a = 100"),
-	qq(1),
-	'replication with RI FULL and generated columns');
-
 $node_publisher->stop('fast');
 $node_subscriber->stop('fast');
 
-- 
2.28.0.windows.1

#49Peter Smith
smithpb2250@gmail.com
In reply to: Amit Kapila (#48)
Re: Disallow UPDATE/DELETE on table with unpublished generated column as REPLICA IDENTITY

Hi,

I was looking at the recently pushed code [1]. IMO the wording of some
of those new error messages of function CheckCmdReplicaIdentity() is
not quite correct.

According to my understanding, and according also to Chat-GPT:
------
The sentence "Replica identity consists of an unpublished generated
column." implies that the entire replica identity is made up of an
unpublished generated column and nothing else.

This is because the phrase "consists of" typically indicates a
complete composition, meaning that the replica identity is exclusively
composed of the unpublished generated column in this context.
------

IIUC, these errors are intended for when there is *any* unpublished
generated column found in the RI, and the RI might also have other
columns in it generated or otherwise. So, I think those error messages
saying "consists of" should be reworded like below, or similar:
* errdetail("Replica identity includes an unpublished generated column.")));
* errdetail("Replica identity has one or more unpublished generated
columns.")));
* errdetail("One or more unpublished generated columns are in the
Replica identity.")));
* ...

======
[]1 https://github.com/postgres/postgres/commit/87ce27de6963091f4a365f80bcdb06b9da098f00

Kind Regards,
Peter Smith.
Fujitsu Australia

#50Amit Kapila
amit.kapila16@gmail.com
In reply to: Peter Smith (#49)
Re: Disallow UPDATE/DELETE on table with unpublished generated column as REPLICA IDENTITY

On Thu, Dec 5, 2024 at 7:34 AM Peter Smith <smithpb2250@gmail.com> wrote:

IIUC, these errors are intended for when there is *any* unpublished
generated column found in the RI, and the RI might also have other
columns in it generated or otherwise. So, I think those error messages
saying "consists of" should be reworded like below, or similar:
* errdetail("Replica identity includes an unpublished generated column.")));
* errdetail("Replica identity has one or more unpublished generated
columns.")));
* errdetail("One or more unpublished generated columns are in the
Replica identity.")));
* ...

How about a bit clearer: "Replica identity must not contain any
unpublished generated column."?

--
With Regards,
Amit Kapila.

#51Peter Smith
smithpb2250@gmail.com
In reply to: Amit Kapila (#50)
Re: Disallow UPDATE/DELETE on table with unpublished generated column as REPLICA IDENTITY

On Thu, Dec 5, 2024 at 2:41 PM Amit Kapila <amit.kapila16@gmail.com> wrote:

On Thu, Dec 5, 2024 at 7:34 AM Peter Smith <smithpb2250@gmail.com> wrote:

IIUC, these errors are intended for when there is *any* unpublished
generated column found in the RI, and the RI might also have other
columns in it generated or otherwise. So, I think those error messages
saying "consists of" should be reworded like below, or similar:
* errdetail("Replica identity includes an unpublished generated column.")));
* errdetail("Replica identity has one or more unpublished generated
columns.")));
* errdetail("One or more unpublished generated columns are in the
Replica identity.")));
* ...

How about a bit clearer: "Replica identity must not contain any
unpublished generated column."?

Yes, that is better.

Compare:
Replica identity contains unpublished generated columns.
Replica identity must not contain unpublished generated columns.

Maybe it is just my imagination, but the "must not" version feels more
like it implies the Replica Identify is in the wrong, whereas I think
it is most likely that the Replica Identity is correct, and the real
problem is that the user just forgot to publish the generated column.

======
Kind Regards
Peter Smith.
Fujitsu Australia

#52Amit Kapila
amit.kapila16@gmail.com
In reply to: Peter Smith (#51)
Re: Disallow UPDATE/DELETE on table with unpublished generated column as REPLICA IDENTITY

On Thu, Dec 5, 2024 at 9:32 AM Peter Smith <smithpb2250@gmail.com> wrote:

On Thu, Dec 5, 2024 at 2:41 PM Amit Kapila <amit.kapila16@gmail.com> wrote:

On Thu, Dec 5, 2024 at 7:34 AM Peter Smith <smithpb2250@gmail.com> wrote:

IIUC, these errors are intended for when there is *any* unpublished
generated column found in the RI, and the RI might also have other
columns in it generated or otherwise. So, I think those error messages
saying "consists of" should be reworded like below, or similar:
* errdetail("Replica identity includes an unpublished generated column.")));
* errdetail("Replica identity has one or more unpublished generated
columns.")));
* errdetail("One or more unpublished generated columns are in the
Replica identity.")));
* ...

How about a bit clearer: "Replica identity must not contain any
unpublished generated column."?

Yes, that is better.

Compare:
Replica identity contains unpublished generated columns.
Replica identity must not contain unpublished generated columns.

Maybe it is just my imagination, but the "must not" version feels more
like it implies the Replica Identify is in the wrong, whereas I think
it is most likely that the Replica Identity is correct, and the real
problem is that the user just forgot to publish the generated column.

Either way is possible and I find the message "Replica identity must
not contain unpublished generated columns." clearer as compared to the
other option.

--
With Regards,
Amit Kapila.

#53Peter Smith
smithpb2250@gmail.com
In reply to: Amit Kapila (#52)
Re: Disallow UPDATE/DELETE on table with unpublished generated column as REPLICA IDENTITY

On Thu, Dec 5, 2024 at 8:49 PM Amit Kapila <amit.kapila16@gmail.com> wrote:

On Thu, Dec 5, 2024 at 9:32 AM Peter Smith <smithpb2250@gmail.com> wrote:

On Thu, Dec 5, 2024 at 2:41 PM Amit Kapila <amit.kapila16@gmail.com> wrote:

On Thu, Dec 5, 2024 at 7:34 AM Peter Smith <smithpb2250@gmail.com> wrote:

IIUC, these errors are intended for when there is *any* unpublished
generated column found in the RI, and the RI might also have other
columns in it generated or otherwise. So, I think those error messages
saying "consists of" should be reworded like below, or similar:
* errdetail("Replica identity includes an unpublished generated column.")));
* errdetail("Replica identity has one or more unpublished generated
columns.")));
* errdetail("One or more unpublished generated columns are in the
Replica identity.")));
* ...

How about a bit clearer: "Replica identity must not contain any
unpublished generated column."?

Yes, that is better.

Compare:
Replica identity contains unpublished generated columns.
Replica identity must not contain unpublished generated columns.

Maybe it is just my imagination, but the "must not" version feels more
like it implies the Replica Identify is in the wrong, whereas I think
it is most likely that the Replica Identity is correct, and the real
problem is that the user just forgot to publish the generated column.

Either way is possible and I find the message "Replica identity must
not contain unpublished generated columns." clearer as compared to the
other option.

OK, let's go with that.

======
Kind Regards
Peter Smith.
Fujitsu Australia.

#54Shlok Kyal
shlok.kyal.oss@gmail.com
In reply to: Peter Smith (#53)
1 attachment(s)
Re: Disallow UPDATE/DELETE on table with unpublished generated column as REPLICA IDENTITY

On Fri, 6 Dec 2024 at 02:44, Peter Smith <smithpb2250@gmail.com> wrote:

On Thu, Dec 5, 2024 at 8:49 PM Amit Kapila <amit.kapila16@gmail.com> wrote:

On Thu, Dec 5, 2024 at 9:32 AM Peter Smith <smithpb2250@gmail.com> wrote:

On Thu, Dec 5, 2024 at 2:41 PM Amit Kapila <amit.kapila16@gmail.com> wrote:

On Thu, Dec 5, 2024 at 7:34 AM Peter Smith <smithpb2250@gmail.com> wrote:

IIUC, these errors are intended for when there is *any* unpublished
generated column found in the RI, and the RI might also have other
columns in it generated or otherwise. So, I think those error messages
saying "consists of" should be reworded like below, or similar:
* errdetail("Replica identity includes an unpublished generated column.")));
* errdetail("Replica identity has one or more unpublished generated
columns.")));
* errdetail("One or more unpublished generated columns are in the
Replica identity.")));
* ...

How about a bit clearer: "Replica identity must not contain any
unpublished generated column."?

Yes, that is better.

Compare:
Replica identity contains unpublished generated columns.
Replica identity must not contain unpublished generated columns.

Maybe it is just my imagination, but the "must not" version feels more
like it implies the Replica Identify is in the wrong, whereas I think
it is most likely that the Replica Identity is correct, and the real
problem is that the user just forgot to publish the generated column.

Either way is possible and I find the message "Replica identity must
not contain unpublished generated columns." clearer as compared to the
other option.

OK, let's go with that.

Thanks Peter, for pointing this out.
I also feel that the error message suggested by Amit would be better.
I have attached a patch for the same.

Thanks and Regards,
Shlok Kyal

Attachments:

v1-0001-Improve-error-message-introduced-in-commit-87ce27.patchapplication/octet-stream; name=v1-0001-Improve-error-message-introduced-in-commit-87ce27.patchDownload
From d57017700a79935b387a4a0f70d89d6227e622de Mon Sep 17 00:00:00 2001
From: Shlok Kyal <shlok.kyal.oss@gmail.com>
Date: Fri, 6 Dec 2024 10:52:44 +0530
Subject: [PATCH v1] Improve error message introduced in commit 87ce27de696

The sentence "Replica identity consists of an unpublished generated
column." implies that the entire replica identity is made up of an
unpublished generated column and nothing else.

This patch fixes it with a better error message.
---
 src/backend/executor/execReplication.c    | 4 ++--
 src/test/regress/expected/publication.out | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/backend/executor/execReplication.c b/src/backend/executor/execReplication.c
index cfdf2eedf4..368a40337a 100644
--- a/src/backend/executor/execReplication.c
+++ b/src/backend/executor/execReplication.c
@@ -825,7 +825,7 @@ CheckCmdReplicaIdentity(Relation rel, CmdType cmd)
 				(errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
 				 errmsg("cannot update table \"%s\"",
 						RelationGetRelationName(rel)),
-				 errdetail("Replica identity consists of an unpublished generated column.")));
+				 errdetail("Replica identity must not contain unpublished generated columns.")));
 	else if (cmd == CMD_DELETE && !pubdesc.rf_valid_for_delete)
 		ereport(ERROR,
 				(errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
@@ -843,7 +843,7 @@ CheckCmdReplicaIdentity(Relation rel, CmdType cmd)
 				(errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
 				 errmsg("cannot delete from table \"%s\"",
 						RelationGetRelationName(rel)),
-				 errdetail("Replica identity consists of an unpublished generated column.")));
+				 errdetail("Replica identity must not contain unpublished generated columns.")));
 
 	/* If relation has replica identity we are always good. */
 	if (OidIsValid(RelationGetReplicaIndex(rel)))
diff --git a/src/test/regress/expected/publication.out b/src/test/regress/expected/publication.out
index b44ab007de..c48f11f293 100644
--- a/src/test/regress/expected/publication.out
+++ b/src/test/regress/expected/publication.out
@@ -683,13 +683,13 @@ ALTER TABLE testpub_gencol REPLICA IDENTITY USING index testpub_gencol_idx;
 CREATE PUBLICATION pub_gencol FOR TABLE testpub_gencol;
 UPDATE testpub_gencol SET a = 100 WHERE a = 1;
 ERROR:  cannot update table "testpub_gencol"
-DETAIL:  Replica identity consists of an unpublished generated column.
+DETAIL:  Replica identity must not contain unpublished generated columns.
 -- error - generated column "b" must be published explicitly as it is
 -- part of the REPLICA IDENTITY.
 ALTER TABLE testpub_gencol REPLICA IDENTITY FULL;
 UPDATE testpub_gencol SET a = 100 WHERE a = 1;
 ERROR:  cannot update table "testpub_gencol"
-DETAIL:  Replica identity consists of an unpublished generated column.
+DETAIL:  Replica identity must not contain unpublished generated columns.
 DROP PUBLICATION pub_gencol;
 -- ok - generated column "b" is published explicitly
 CREATE PUBLICATION pub_gencol FOR TABLE testpub_gencol with (publish_generated_columns = true);
-- 
2.34.1

#55Amit Kapila
amit.kapila16@gmail.com
In reply to: Shlok Kyal (#54)
Re: Disallow UPDATE/DELETE on table with unpublished generated column as REPLICA IDENTITY

On Fri, Dec 6, 2024 at 11:10 AM Shlok Kyal <shlok.kyal.oss@gmail.com> wrote:

Thanks Peter, for pointing this out.
I also feel that the error message suggested by Amit would be better.
I have attached a patch for the same.

Pushed.

--
With Regards,
Amit Kapila.