From 0eb0846450cb33996d6fb35c19290d297c26fd3c Mon Sep 17 00:00:00 2001
From: jian he <jian.universality@gmail.com>
Date: Sat, 13 Apr 2024 20:28:25 +0800
Subject: [PATCH v3 1/1] ALTER COLUMN SET DATA TYPE Re-check foreign key ties
 under certain condition

with deterministic collations, we check ties by looking at binary
equality. But nondeterministic collation can allow non-identical values
to be considered equal, e.g. 'a' and 'A' when case-insensitive.
ALTER COLUMN .. SET DATA TYPE make
some references that used to be valid may not be anymore.

for `ALTER COLUMN .. SET DATA TYPE`:
If the changed key columns is part of the primary key columns,
then under the following condition
we need to revalidate the foreign key constraint:
* the primary key is referenced by a foreign key constraint
* the new collation is not the same as the old
* the old collation is nondeterministic.

discussion: https://postgr.es/m/78d824e0-b21e-480d-a252-e4b84bc2c24b%40illuminatedcomputing.com
---
 src/backend/commands/tablecmds.c              | 62 +++++++++++++++----
 src/include/nodes/parsenodes.h                |  3 +-
 .../regress/expected/collate.icu.utf8.out     |  9 +++
 src/test/regress/sql/collate.icu.utf8.sql     |  8 +++
 4 files changed, 69 insertions(+), 13 deletions(-)

diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c
index 000212f2..d5bd61f1 100644
--- a/src/backend/commands/tablecmds.c
+++ b/src/backend/commands/tablecmds.c
@@ -198,6 +198,7 @@ typedef struct AlteredTableInfo
 	/* Objects to rebuild after completing ALTER TYPE operations */
 	List	   *changedConstraintOids;	/* OIDs of constraints to rebuild */
 	List	   *changedConstraintDefs;	/* string definitions of same */
+	bool		verify_new_collation; /* T if we should recheck new collation for foreign key */
 	List	   *changedIndexOids;	/* OIDs of indexes to rebuild */
 	List	   *changedIndexDefs;	/* string definitions of same */
 	char	   *replicaIdentityIndex;	/* index to reset as REPLICA IDENTITY */
@@ -583,12 +584,13 @@ static void ATPostAlterTypeCleanup(List **wqueue, AlteredTableInfo *tab,
 								   LOCKMODE lockmode);
 static void ATPostAlterTypeParse(Oid oldId, Oid oldRelId, Oid refRelId,
 								 char *cmd, List **wqueue, LOCKMODE lockmode,
-								 bool rewrite);
+								 bool rewrite,
+								 bool verify_new_collation);
 static void RebuildConstraintComment(AlteredTableInfo *tab, AlterTablePass pass,
 									 Oid objid, Relation rel, List *domname,
 									 const char *conname);
 static void TryReuseIndex(Oid oldId, IndexStmt *stmt);
-static void TryReuseForeignKey(Oid oldId, Constraint *con);
+static void TryReuseForeignKey(Oid oldId, Constraint *con, bool verify_new_collation);
 static ObjectAddress ATExecAlterColumnGenericOptions(Relation rel, const char *colName,
 													 List *options, LOCKMODE lockmode);
 static void change_owner_fix_column_acls(Oid relationOid,
@@ -10256,6 +10258,15 @@ ATAddForeignKeyConstraint(List **wqueue, AlteredTableInfo *tab, Relation rel,
 									old_pfeqop_item);
 		}
 		if (old_check_ok)
+		{
+			/* also see TryReuseForeignKey.
+			 * collation_recheck is true means that, we need to
+			 * recheck the foreign key side value is ok with the
+			 * new primey key collation.
+			*/
+			old_check_ok = !fkconstraint->collation_recheck;
+		}
+		if (old_check_ok)
 		{
 			Oid			old_fktype;
 			Oid			new_fktype;
@@ -10301,10 +10312,6 @@ ATAddForeignKeyConstraint(List **wqueue, AlteredTableInfo *tab, Relation rel,
 			 * turn conform to the domain.  Consequently, we need not treat
 			 * domains specially here.
 			 *
-			 * Since we require that all collations share the same notion of
-			 * equality (which they do, because texteq reduces to bitwise
-			 * equality), we don't compare collation here.
-			 *
 			 * We need not directly consider the PK type.  It's necessarily
 			 * binary coercible to the opcintype of the unique index column,
 			 * and ri_triggers.c will only deal with PK datums in terms of
@@ -13738,6 +13745,30 @@ ATExecAlterColumnType(AlteredTableInfo *tab, Relation rel,
 	/* And the collation */
 	targetcollid = GetColumnDefCollation(NULL, def, targettype);
 
+	if (OidIsValid(targetcollid))
+	{
+		Oid			typid;
+		int32		typmod;
+		Oid			collid;
+
+		get_atttypetypmodcoll(RelationGetRelid(tab->rel), attnum,
+							&typid, &typmod, &collid);
+		/*
+		* All deterministic collations use bitwise equality to resolve
+		* ties, but if the previous(source) collation was indeterminstic,
+		* and previous collation is not the same as the target collation then
+		* we must re-check the foreign key, because some references
+		* that use to be "equal" may not be anymore. we first store this
+		* information in AlteredTableInfo, later we pass it to foreign key
+		* Constraint.
+		* also see ATAddForeignKeyConstraint.
+		*/
+		if (OidIsValid(collid))
+		{
+			if (!get_collation_isdeterministic(collid) && targetcollid != collid)
+				tab->verify_new_collation = true;
+		}
+	}
 	/*
 	 * If there is a default expression for the column, get it and ensure we
 	 * can coerce it to the new datatype.  (We must do this before changing
@@ -14406,7 +14437,8 @@ ATPostAlterTypeCleanup(List **wqueue, AlteredTableInfo *tab, LOCKMODE lockmode)
 
 		ATPostAlterTypeParse(oldId, relid, confrelid,
 							 (char *) lfirst(def_item),
-							 wqueue, lockmode, tab->rewrite);
+							 wqueue, lockmode, tab->rewrite,
+							 tab->verify_new_collation);
 	}
 	forboth(oid_item, tab->changedIndexOids,
 			def_item, tab->changedIndexDefs)
@@ -14417,7 +14449,8 @@ ATPostAlterTypeCleanup(List **wqueue, AlteredTableInfo *tab, LOCKMODE lockmode)
 		relid = IndexGetRelation(oldId, false);
 		ATPostAlterTypeParse(oldId, relid, InvalidOid,
 							 (char *) lfirst(def_item),
-							 wqueue, lockmode, tab->rewrite);
+							 wqueue, lockmode, tab->rewrite,
+							 tab->verify_new_collation);
 
 		ObjectAddressSet(obj, RelationRelationId, oldId);
 		add_exact_object_address(&obj, objects);
@@ -14433,7 +14466,8 @@ ATPostAlterTypeCleanup(List **wqueue, AlteredTableInfo *tab, LOCKMODE lockmode)
 		relid = StatisticsGetRelation(oldId, false);
 		ATPostAlterTypeParse(oldId, relid, InvalidOid,
 							 (char *) lfirst(def_item),
-							 wqueue, lockmode, tab->rewrite);
+							 wqueue, lockmode, tab->rewrite,
+							 tab->verify_new_collation);
 
 		ObjectAddressSet(obj, StatisticExtRelationId, oldId);
 		add_exact_object_address(&obj, objects);
@@ -14496,7 +14530,8 @@ ATPostAlterTypeCleanup(List **wqueue, AlteredTableInfo *tab, LOCKMODE lockmode)
  */
 static void
 ATPostAlterTypeParse(Oid oldId, Oid oldRelId, Oid refRelId, char *cmd,
-					 List **wqueue, LOCKMODE lockmode, bool rewrite)
+					 List **wqueue, LOCKMODE lockmode, bool rewrite,
+					 bool verify_new_collation)
 {
 	List	   *raw_parsetree_list;
 	List	   *querytree_list;
@@ -14623,7 +14658,7 @@ ATPostAlterTypeParse(Oid oldId, Oid oldRelId, Oid refRelId, char *cmd,
 					/* rewriting neither side of a FK */
 					if (con->contype == CONSTR_FOREIGN &&
 						!rewrite && tab->rewrite == 0)
-						TryReuseForeignKey(oldId, con);
+						TryReuseForeignKey(oldId, con, verify_new_collation);
 					con->reset_default_tblspc = true;
 					cmd->subtype = AT_ReAddConstraint;
 					tab->subcmds[AT_PASS_OLD_CONSTR] =
@@ -14784,7 +14819,7 @@ TryReuseIndex(Oid oldId, IndexStmt *stmt)
  * this constraint can be skipped.
  */
 static void
-TryReuseForeignKey(Oid oldId, Constraint *con)
+TryReuseForeignKey(Oid oldId, Constraint *con, bool verify_new_collation)
 {
 	HeapTuple	tup;
 	Datum		adatum;
@@ -14816,6 +14851,9 @@ TryReuseForeignKey(Oid oldId, Constraint *con)
 		con->old_conpfeqop = lappend_oid(con->old_conpfeqop, rawarr[i]);
 
 	ReleaseSysCache(tup);
+
+	/* verify_new_collation is computed at ATExecAlterColumnType */
+	con->collation_recheck = verify_new_collation;
 }
 
 /*
diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h
index f763f790..0349c61c 100644
--- a/src/include/nodes/parsenodes.h
+++ b/src/include/nodes/parsenodes.h
@@ -2777,7 +2777,8 @@ typedef struct Constraint
 	List	   *old_conpfeqop;	/* pg_constraint.conpfeqop of my former self */
 	Oid			old_pktable_oid;	/* pg_constraint.confrelid of my former
 									 * self */
-
+	bool		collation_recheck; /* does primary key columns collation change need
+									* foreign key constraint recheck */
 	ParseLoc	location;		/* token location, or -1 if unknown */
 } Constraint;
 
diff --git a/src/test/regress/expected/collate.icu.utf8.out b/src/test/regress/expected/collate.icu.utf8.out
index 4b8c8f14..4119776f 100644
--- a/src/test/regress/expected/collate.icu.utf8.out
+++ b/src/test/regress/expected/collate.icu.utf8.out
@@ -1933,6 +1933,15 @@ SELECT * FROM test11fk;
 ---
 (0 rows)
 
+-- Test altering the collation of the referenced column.
+CREATE TABLE pktable (x text COLLATE case_insensitive PRIMARY KEY);
+CREATE TABLE fktable (x text REFERENCES pktable);
+INSERT INTO pktable VALUES ('a');
+INSERT INTO fktable VALUES ('A');
+-- should fail:
+ALTER TABLE pktable ALTER COLUMN x TYPE text COLLATE "C";
+ERROR:  insert or update on table "fktable" violates foreign key constraint "fktable_x_fkey"
+DETAIL:  Key (x)=(A) is not present in table "pktable".
 -- partitioning
 CREATE TABLE test20 (a int, b text COLLATE case_insensitive) PARTITION BY LIST (b);
 CREATE TABLE test20_1 PARTITION OF test20 FOR VALUES IN ('abc');
diff --git a/src/test/regress/sql/collate.icu.utf8.sql b/src/test/regress/sql/collate.icu.utf8.sql
index 80f28a97..08a69649 100644
--- a/src/test/regress/sql/collate.icu.utf8.sql
+++ b/src/test/regress/sql/collate.icu.utf8.sql
@@ -721,6 +721,14 @@ DELETE FROM test11pk WHERE x = 'abc';
 SELECT * FROM test11pk;
 SELECT * FROM test11fk;
 
+-- Test altering the collation of the referenced column.
+CREATE TABLE pktable (x text COLLATE case_insensitive PRIMARY KEY);
+CREATE TABLE fktable (x text REFERENCES pktable);
+INSERT INTO pktable VALUES ('a');
+INSERT INTO fktable VALUES ('A');
+-- should fail:
+ALTER TABLE pktable ALTER COLUMN x TYPE text COLLATE "C";
+
 -- partitioning
 CREATE TABLE test20 (a int, b text COLLATE case_insensitive) PARTITION BY LIST (b);
 CREATE TABLE test20_1 PARTITION OF test20 FOR VALUES IN ('abc');

base-commit: 4cc1c76fe9f13aa96bae14f4fcfdf6d508af72a4
-- 
2.34.1

