From 45837d645a276912b400b5d99d80ac290e0ba8ad Mon Sep 17 00:00:00 2001
From: Tomas Vondra <tomas@2ndquadrant.com>
Date: Tue, 10 Sep 2024 17:15:10 +0200
Subject: [PATCH] Fix unique key checks in JSON object constructors

When building a JSON object, the code builds a hash table of keys, to
allow checking if the keys are unique. The uniqueness check and adding
the new key happens in json_unique_check_key(), but this assumes the
pointer to the key remains valid.

Unfortunately, two places passed pointers to keys in a StringInfo
buffer, while also appending additional data (more key/value pairs) to
the buffer. With enough data the buffer would be resized using enlarge
enlargeStringInfo(), which does repalloc, invalidating the earlier key
pointers.

Due to this the uniqueness check may fail with both false negatives and
false positives, producing JSON objects with duplicate keys or failing
to produce a perfectly valid JSON object.

This affects four functions, introduced in PG16 with the new SQL/JSON:

- json_object_agg_unique / jsonb_object_agg_unique
- json_object / jsonb_objectagg

The existing regression tests did not detect this issue, simply because
the initial buffer size is 1024 and the objects were small enough not to
require the repalloc.

Reported by Alexander Lakhin, based on AddressSanitizer failure.
Valgrind notices this too, but both require the JSON object large enough
to require the repalloc.

Fixed by properly copying the key into the aggregate memory context, and
adding regression tests with enough data to repalloc the buffer.

Investigation and initial fix by Junwang Zhao, with various improvements
and tests by me.

Backpatch to 16, where these functions were introduced.

Reported-by: Alexander Lakhin
Author: Junwang Zhao, Tomas Vondra
Discussion: https://postgr.es/m/18598-3279ed972a2347c7@postgresql.org
Discussion: https://postgr.es/m/CAEG8a3JjH0ReJF2_O7-8LuEbO69BxPhYeXs95_x7+H9AMWF1gw@mail.gmail.com
---
 src/backend/utils/adt/json.c          | 6 ++++--
 src/test/regress/expected/json.out    | 3 +++
 src/test/regress/expected/sqljson.out | 5 +++++
 src/test/regress/sql/json.sql         | 2 ++
 src/test/regress/sql/sqljson.sql      | 5 +++++
 5 files changed, 19 insertions(+), 2 deletions(-)

diff --git a/src/backend/utils/adt/json.c b/src/backend/utils/adt/json.c
index 4eeeeaf0a60..3819afdc433 100644
--- a/src/backend/utils/adt/json.c
+++ b/src/backend/utils/adt/json.c
@@ -1111,7 +1111,8 @@ json_object_agg_transfn_worker(FunctionCallInfo fcinfo,
 
 	if (unique_keys)
 	{
-		const char *key = &out->data[key_offset];
+		const char *key = MemoryContextStrdup(aggcontext,
+											  &out->data[key_offset]);
 
 		if (!json_unique_check_key(&state->unique_check.check, key, 0))
 			ereport(ERROR,
@@ -1275,7 +1276,8 @@ json_build_object_worker(int nargs, const Datum *args, const bool *nulls, const
 		if (unique_keys)
 		{
 			/* check key uniqueness after key appending */
-			const char *key = &out->data[key_offset];
+			const char *key = MemoryContextStrdup(unique_check.mcxt,
+												  &out->data[key_offset]);
 
 			if (!json_unique_check_key(&unique_check.check, key, 0))
 				ereport(ERROR,
diff --git a/src/test/regress/expected/json.out b/src/test/regress/expected/json.out
index 7df11c2f385..96c40911cb9 100644
--- a/src/test/regress/expected/json.out
+++ b/src/test/regress/expected/json.out
@@ -2330,6 +2330,9 @@ select json_object('{a,b,"","d e f"}','{1,2,3,"a b c"}');
  {"a" : "1", "b" : "2", "" : "3", "d e f" : "a b c"}
 (1 row)
 
+-- json_object_agg_unique requires unique keys
+select json_object_agg_unique(mod(i,100), i) from generate_series(0, 199) i;
+ERROR:  duplicate JSON object key value: "0"
 -- json_to_record and json_to_recordset
 select * from json_to_record('{"a":1,"b":"foo","c":"bar"}')
     as x(a int, b text, d text);
diff --git a/src/test/regress/expected/sqljson.out b/src/test/regress/expected/sqljson.out
index 4f91e2117ef..9adfbe15f65 100644
--- a/src/test/regress/expected/sqljson.out
+++ b/src/test/regress/expected/sqljson.out
@@ -537,6 +537,8 @@ SELECT JSON_OBJECT('a': '1', 'b': NULL, 'c': 2 ABSENT ON NULL);
  {"a" : "1", "c" : 2}
 (1 row)
 
+SELECT JSON_OBJECT(1: 1, '2': NULL, '3': 1, repeat('x', 1000): 1, 2: repeat('a', 100) WITH UNIQUE);
+ERROR:  duplicate JSON object key value: "2"
 SELECT JSON_OBJECT(1: 1, '1': NULL WITH UNIQUE);
 ERROR:  duplicate JSON object key value: "1"
 SELECT JSON_OBJECT(1: 1, '1': NULL ABSENT ON NULL WITH UNIQUE);
@@ -921,6 +923,9 @@ FROM (VALUES (1, 1), (0, NULL),(4, null), (5, null),(6, null),(2, 2)) foo(k, v);
  {"1": 1, "2": 2}
 (1 row)
 
+SELECT JSON_OBJECTAGG(mod(i,100): (i)::text FORMAT JSON WITH UNIQUE)
+FROM generate_series(0, 199) i;
+ERROR:  duplicate JSON object key value: "0"
 -- Test JSON_OBJECT deparsing
 EXPLAIN (VERBOSE, COSTS OFF)
 SELECT JSON_OBJECT('foo' : '1' FORMAT JSON, 'bar' : 'baz' RETURNING json);
diff --git a/src/test/regress/sql/json.sql b/src/test/regress/sql/json.sql
index 5c886cd6b33..8251f4f4006 100644
--- a/src/test/regress/sql/json.sql
+++ b/src/test/regress/sql/json.sql
@@ -755,6 +755,8 @@ select json_object('{a,b,NULL,"d e f"}','{1,2,3,"a b c"}');
 
 select json_object('{a,b,"","d e f"}','{1,2,3,"a b c"}');
 
+-- json_object_agg_unique requires unique keys
+select json_object_agg_unique(mod(i,100), i) from generate_series(0, 199) i;
 
 -- json_to_record and json_to_recordset
 
diff --git a/src/test/regress/sql/sqljson.sql b/src/test/regress/sql/sqljson.sql
index bb2487e8649..42dbec26d6f 100644
--- a/src/test/regress/sql/sqljson.sql
+++ b/src/test/regress/sql/sqljson.sql
@@ -138,6 +138,8 @@ SELECT JSON_OBJECT('a': '1', 'b': NULL, 'c': 2);
 SELECT JSON_OBJECT('a': '1', 'b': NULL, 'c': 2 NULL ON NULL);
 SELECT JSON_OBJECT('a': '1', 'b': NULL, 'c': 2 ABSENT ON NULL);
 
+SELECT JSON_OBJECT(1: 1, '2': NULL, '3': 1, repeat('x', 1000): 1, 2: repeat('a', 100) WITH UNIQUE);
+
 SELECT JSON_OBJECT(1: 1, '1': NULL WITH UNIQUE);
 SELECT JSON_OBJECT(1: 1, '1': NULL ABSENT ON NULL WITH UNIQUE);
 SELECT JSON_OBJECT(1: 1, '1': NULL NULL ON NULL WITH UNIQUE RETURNING jsonb);
@@ -283,6 +285,9 @@ FROM (VALUES (1, 1), (1, NULL), (2, 2)) foo(k, v);
 SELECT JSON_OBJECTAGG(k: v ABSENT ON NULL WITH UNIQUE KEYS RETURNING jsonb)
 FROM (VALUES (1, 1), (0, NULL),(4, null), (5, null),(6, null),(2, 2)) foo(k, v);
 
+SELECT JSON_OBJECTAGG(mod(i,100): (i)::text FORMAT JSON WITH UNIQUE)
+FROM generate_series(0, 199) i;
+
 -- Test JSON_OBJECT deparsing
 EXPLAIN (VERBOSE, COSTS OFF)
 SELECT JSON_OBJECT('foo' : '1' FORMAT JSON, 'bar' : 'baz' RETURNING json);
-- 
2.46.0

