From c8489365872309b166f40101835586247c137ba0 Mon Sep 17 00:00:00 2001
From: jian he <jian.universality@gmail.com>
Date: Fri, 3 Oct 2025 20:53:48 +0800
Subject: [PATCH v2 1/1] fix SQL/JSON default expression collation issue

create table t(a jsonb);
select json_value(a, '$.c' returning text default 'A' collate "C" on empty) from t;

As you can see, this query returns a set of rows. If the collation of the
DEFAULT node differs from the default text collation, the resulting set may have
inconsistent collations.

As a result, the query's collation becomes unreliable. For instance, is it valid
to create the following index in this case?
create index xx on t (( json_value(a, '$.c' returning text default 'A' on empty) ));

discussion: https://postgr.es/m/CACJufxHVwYYSyiVQ6o+PsRX6zQ7rAFinh_fv1kCfTsT1xG4Zeg@mail.gmail.com
---
 src/backend/parser/parse_expr.c               | 59 ++++++++++++++++---
 .../regress/expected/sqljson_queryfuncs.out   | 48 +++++++++++++++
 src/test/regress/sql/sqljson_queryfuncs.sql   | 15 +++++
 3 files changed, 114 insertions(+), 8 deletions(-)

diff --git a/src/backend/parser/parse_expr.c b/src/backend/parser/parse_expr.c
index e1979a80c19..0296e92e108 100644
--- a/src/backend/parser/parse_expr.c
+++ b/src/backend/parser/parse_expr.c
@@ -93,7 +93,8 @@ static Node *transformJsonFuncExpr(ParseState *pstate, JsonFuncExpr *func);
 static void transformJsonPassingArgs(ParseState *pstate, const char *constructName,
 									 JsonFormatType format, List *args,
 									 List **passing_values, List **passing_names);
-static JsonBehavior *transformJsonBehavior(ParseState *pstate, JsonBehavior *behavior,
+static JsonBehavior *transformJsonBehavior(ParseState *pstate, JsonExpr *jsexpr,
+										   JsonBehavior *behavior,
 										   JsonBehaviorType default_behavior,
 										   JsonReturning *returning);
 static Node *GetJsonBehaviorConst(JsonBehaviorType btype, int location);
@@ -4528,13 +4529,16 @@ transformJsonFuncExpr(ParseState *pstate, JsonFuncExpr *func)
 			{
 				jsexpr->returning->typid = BOOLOID;
 				jsexpr->returning->typmod = -1;
+				jsexpr->collation = InvalidOid;
 			}
 
 			/* JSON_TABLE() COLUMNS can specify a non-boolean type. */
 			if (jsexpr->returning->typid != BOOLOID)
 				jsexpr->use_json_coercion = true;
 
-			jsexpr->on_error = transformJsonBehavior(pstate, func->on_error,
+			jsexpr->on_error = transformJsonBehavior(pstate,
+													 jsexpr,
+													 func->on_error,
 													 JSON_BEHAVIOR_FALSE,
 													 jsexpr->returning);
 			break;
@@ -4549,6 +4553,8 @@ transformJsonFuncExpr(ParseState *pstate, JsonFuncExpr *func)
 				ret->typmod = -1;
 			}
 
+			jsexpr->collation = get_typcollation(jsexpr->returning->typid);
+
 			/*
 			 * Keep quotes on scalar strings by default, omitting them only if
 			 * OMIT QUOTES is specified.
@@ -4565,11 +4571,15 @@ transformJsonFuncExpr(ParseState *pstate, JsonFuncExpr *func)
 				jsexpr->use_json_coercion = true;
 
 			/* Assume NULL ON EMPTY when ON EMPTY is not specified. */
-			jsexpr->on_empty = transformJsonBehavior(pstate, func->on_empty,
+			jsexpr->on_empty = transformJsonBehavior(pstate,
+													 jsexpr,
+													 func->on_empty,
 													 JSON_BEHAVIOR_NULL,
 													 jsexpr->returning);
 			/* Assume NULL ON ERROR when ON ERROR is not specified. */
-			jsexpr->on_error = transformJsonBehavior(pstate, func->on_error,
+			jsexpr->on_error = transformJsonBehavior(pstate,
+													 jsexpr,
+													 func->on_error,
 													 JSON_BEHAVIOR_NULL,
 													 jsexpr->returning);
 			break;
@@ -4581,6 +4591,7 @@ transformJsonFuncExpr(ParseState *pstate, JsonFuncExpr *func)
 				jsexpr->returning->typid = TEXTOID;
 				jsexpr->returning->typmod = -1;
 			}
+			jsexpr->collation = get_typcollation(jsexpr->returning->typid);
 
 			/*
 			 * Override whatever transformJsonOutput() set these to, which
@@ -4606,11 +4617,15 @@ transformJsonFuncExpr(ParseState *pstate, JsonFuncExpr *func)
 			}
 
 			/* Assume NULL ON EMPTY when ON EMPTY is not specified. */
-			jsexpr->on_empty = transformJsonBehavior(pstate, func->on_empty,
+			jsexpr->on_empty = transformJsonBehavior(pstate,
+													 jsexpr,
+													 func->on_empty,
 													 JSON_BEHAVIOR_NULL,
 													 jsexpr->returning);
 			/* Assume NULL ON ERROR when ON ERROR is not specified. */
-			jsexpr->on_error = transformJsonBehavior(pstate, func->on_error,
+			jsexpr->on_error = transformJsonBehavior(pstate,
+													 jsexpr,
+													 func->on_error,
 													 JSON_BEHAVIOR_NULL,
 													 jsexpr->returning);
 			break;
@@ -4621,6 +4636,7 @@ transformJsonFuncExpr(ParseState *pstate, JsonFuncExpr *func)
 				jsexpr->returning->typid = exprType(jsexpr->formatted_expr);
 				jsexpr->returning->typmod = -1;
 			}
+			jsexpr->collation = get_typcollation(jsexpr->returning->typid);
 
 			/*
 			 * Assume EMPTY ARRAY ON ERROR when ON ERROR is not specified.
@@ -4628,7 +4644,9 @@ transformJsonFuncExpr(ParseState *pstate, JsonFuncExpr *func)
 			 * ON EMPTY cannot be specified at the top level but it can be for
 			 * the individual columns.
 			 */
-			jsexpr->on_error = transformJsonBehavior(pstate, func->on_error,
+			jsexpr->on_error = transformJsonBehavior(pstate,
+													 jsexpr,
+													 func->on_error,
 													 JSON_BEHAVIOR_EMPTY_ARRAY,
 													 jsexpr->returning);
 			break;
@@ -4704,7 +4722,8 @@ ValidJsonBehaviorDefaultExpr(Node *expr, void *context)
  * Transform a JSON BEHAVIOR clause.
  */
 static JsonBehavior *
-transformJsonBehavior(ParseState *pstate, JsonBehavior *behavior,
+transformJsonBehavior(ParseState *pstate, JsonExpr *jsexpr,
+					  JsonBehavior *behavior,
 					  JsonBehaviorType default_behavior,
 					  JsonReturning *returning)
 {
@@ -4712,6 +4731,7 @@ transformJsonBehavior(ParseState *pstate, JsonBehavior *behavior,
 	Node	   *expr = NULL;
 	bool		coerce_at_runtime = false;
 	int			location = -1;
+	Oid			exprcoll;
 
 	if (behavior)
 	{
@@ -4720,6 +4740,29 @@ transformJsonBehavior(ParseState *pstate, JsonBehavior *behavior,
 		if (btype == JSON_BEHAVIOR_DEFAULT)
 		{
 			expr = transformExprRecurse(pstate, behavior->expr);
+
+			exprcoll = exprCollation(expr);
+			if (!OidIsValid(exprcoll))
+				exprcoll = get_typcollation(exprType(expr));
+
+			if (jsexpr->collation != exprcoll && OidIsValid(exprcoll) && OidIsValid(jsexpr->collation))
+				ereport(ERROR,
+						errcode(ERRCODE_DATATYPE_MISMATCH),
+						errmsg("the collation of DEFAULT expression conflicts with RETURNING caluse"),
+						errdetail("\"%s\" versus \"%s\"",
+								  get_collation_name(exprcoll),
+								  get_collation_name(jsexpr->collation)),
+						parser_errposition(pstate, exprLocation(expr)));
+
+			/*
+			 * Strip any top-level COLLATE clause because exprSetCollation can
+			 * not cope with explit COLLATE clause.  This is safe because we
+			 * have already verified that the DEFAULT expression's explicit
+			 * collation matches the RETURNING type's collation.
+			 */
+			while (IsA(expr, CollateExpr))
+				expr = (Node *) ((CollateExpr *) expr)->arg;
+
 			if (!ValidJsonBehaviorDefaultExpr(expr, NULL))
 				ereport(ERROR,
 						(errcode(ERRCODE_DATATYPE_MISMATCH),
diff --git a/src/test/regress/expected/sqljson_queryfuncs.out b/src/test/regress/expected/sqljson_queryfuncs.out
index 5a35aeb7bba..871d25e3287 100644
--- a/src/test/regress/expected/sqljson_queryfuncs.out
+++ b/src/test/regress/expected/sqljson_queryfuncs.out
@@ -1464,3 +1464,51 @@ SELECT JSON_VALUE(jsonb '1234', '$' RETURNING bit(3)  DEFAULT 1::bit(3) ON ERROR
 SELECT JSON_VALUE(jsonb '"111"', '$.a'  RETURNING bit(3) DEFAULT '1111' ON EMPTY);
 ERROR:  bit string length 4 does not match type bit(3)
 DROP DOMAIN queryfuncs_d_varbit3;
+CREATE COLLATION case_insensitive (provider = icu, locale = 'und-u-ks-level2', deterministic = false);
+create domain d1 as text collate case_insensitive;
+create domain d2 as text collate "C";
+select json_value('{"a": "A"}', '$.a' returning d1 default ('C' collate "C") collate case_insensitive on empty) = 'a'; --true
+ ?column? 
+----------
+ t
+(1 row)
+
+select json_value('{"a": "A"}', '$.a' returning d1 default 'C' on empty) = 'a'; --true
+ ?column? 
+----------
+ t
+(1 row)
+
+select json_value('{"a": "A"}', '$.a' returning d1 default 'C'::d2 on empty) = 'a'; --error
+ERROR:  the collation of DEFAULT expression conflicts with RETURNING caluse
+LINE 1: ...on_value('{"a": "A"}', '$.a' returning d1 default 'C'::d2 on...
+                                                             ^
+DETAIL:  "C" versus "case_insensitive"
+select json_value('{"a": "A"}', '$.a' returning d1 default 'C' collate "C" on empty) = 'a'; --error
+ERROR:  the collation of DEFAULT expression conflicts with RETURNING caluse
+LINE 1: ...on_value('{"a": "A"}', '$.a' returning d1 default 'C' collat...
+                                                             ^
+DETAIL:  "C" versus "case_insensitive"
+select json_value('{"a": "A"}', '$.c' returning d1 default 'A' on empty) = 'a'; --true
+ ?column? 
+----------
+ t
+(1 row)
+
+select json_value('{"a": "A"}', '$.c' returning d1 default 'A' collate case_insensitive on empty) = 'a'; --true
+ ?column? 
+----------
+ t
+(1 row)
+
+select json_value('{"a": "A"}', '$.c' returning d1 default 'A'::d2 on empty) = 'a'; --error
+ERROR:  the collation of DEFAULT expression conflicts with RETURNING caluse
+LINE 1: ...on_value('{"a": "A"}', '$.c' returning d1 default 'A'::d2 on...
+                                                             ^
+DETAIL:  "C" versus "case_insensitive"
+select json_value('{"a": "A"}', '$.c' returning d1 default 'A' collate "C" on empty) = 'a'; --error
+ERROR:  the collation of DEFAULT expression conflicts with RETURNING caluse
+LINE 1: ...on_value('{"a": "A"}', '$.c' returning d1 default 'A' collat...
+                                                             ^
+DETAIL:  "C" versus "case_insensitive"
+drop domain d1, d2;
diff --git a/src/test/regress/sql/sqljson_queryfuncs.sql b/src/test/regress/sql/sqljson_queryfuncs.sql
index 8d7b225b612..4c600f884b3 100644
--- a/src/test/regress/sql/sqljson_queryfuncs.sql
+++ b/src/test/regress/sql/sqljson_queryfuncs.sql
@@ -500,3 +500,18 @@ SELECT JSON_VALUE(jsonb '1234', '$' RETURNING bit(3)  DEFAULT 1 ON ERROR);
 SELECT JSON_VALUE(jsonb '1234', '$' RETURNING bit(3)  DEFAULT 1::bit(3) ON ERROR);
 SELECT JSON_VALUE(jsonb '"111"', '$.a'  RETURNING bit(3) DEFAULT '1111' ON EMPTY);
 DROP DOMAIN queryfuncs_d_varbit3;
+
+CREATE COLLATION case_insensitive (provider = icu, locale = 'und-u-ks-level2', deterministic = false);
+create domain d1 as text collate case_insensitive;
+create domain d2 as text collate "C";
+
+select json_value('{"a": "A"}', '$.a' returning d1 default ('C' collate "C") collate case_insensitive on empty) = 'a'; --true
+select json_value('{"a": "A"}', '$.a' returning d1 default 'C' on empty) = 'a'; --true
+select json_value('{"a": "A"}', '$.a' returning d1 default 'C'::d2 on empty) = 'a'; --error
+select json_value('{"a": "A"}', '$.a' returning d1 default 'C' collate "C" on empty) = 'a'; --error
+
+select json_value('{"a": "A"}', '$.c' returning d1 default 'A' on empty) = 'a'; --true
+select json_value('{"a": "A"}', '$.c' returning d1 default 'A' collate case_insensitive on empty) = 'a'; --true
+select json_value('{"a": "A"}', '$.c' returning d1 default 'A'::d2 on empty) = 'a'; --error
+select json_value('{"a": "A"}', '$.c' returning d1 default 'A' collate "C" on empty) = 'a'; --error
+drop domain d1, d2;
\ No newline at end of file
-- 
2.34.1

