From f2d8bfdaab6c1ac815764c658e8144a807e578d3 Mon Sep 17 00:00:00 2001
From: Ayush Tiwari <ayushtiwari.slg01@gmail.com>
Date: Mon, 20 Apr 2026 11:16:36 +0530
Subject: [PATCH v3] Apply encoding conversion in COPY TO FORMAT JSON

CopyToJsonOneRow() sent the output of composite_to_json() directly
via CopySendData() without encoding conversion.  The text and CSV
paths convert per-attribute via pg_server_to_any() when
need_transcoding is true, but the JSON path skipped this entirely.

This meant COPY ... TO ... WITH (FORMAT json, ENCODING 'LATIN1') on
a UTF-8 server silently produced UTF-8 output, and COPY TO STDOUT
with a non-UTF-8 client_encoding would send unconverted bytes.

Apply pg_server_to_any() to the whole JSON buffer after
composite_to_json() returns, converting to the requested file
encoding when it differs from the server encoding.

Introduced by 7dadd38cda9 (json format for COPY TO).
---
 src/backend/commands/copyto.c              | 20 +++++++++++++++++++-
 src/test/regress/expected/copyencoding.out | 21 +++++++++++++++++++++
 src/test/regress/sql/copyencoding.sql      | 12 ++++++++++++
 3 files changed, 52 insertions(+), 1 deletion(-)

diff --git a/src/backend/commands/copyto.c b/src/backend/commands/copyto.c
index 85d15353647..bb28c15339d 100644
--- a/src/backend/commands/copyto.c
+++ b/src/backend/commands/copyto.c
@@ -427,7 +427,25 @@ CopyToJsonOneRow(CopyToState cstate, TupleTableSlot *slot)
 		}
 	}
 
-	CopySendData(cstate, cstate->json_buf->data, cstate->json_buf->len);
+	/*
+	 * Convert the JSON output to the target encoding if needed.  Unlike the
+	 * text and CSV paths which convert per-attribute via CopyAttributeOut*,
+	 * composite_to_json() emits the whole row as one buffer, so we transcode
+	 * it here in a single call before sending.
+	 */
+	if (cstate->need_transcoding)
+	{
+		char	   *converted;
+
+		converted = pg_server_to_any(cstate->json_buf->data,
+									 cstate->json_buf->len,
+									 cstate->file_encoding);
+		CopySendData(cstate, converted, strlen(converted));
+		if (converted != cstate->json_buf->data)
+			pfree(converted);
+	}
+	else
+		CopySendData(cstate, cstate->json_buf->data, cstate->json_buf->len);
 
 	CopySendTextLikeEndOfRow(cstate);
 }
diff --git a/src/test/regress/expected/copyencoding.out b/src/test/regress/expected/copyencoding.out
index 76ea0e7cf04..fc6ea54b232 100644
--- a/src/test/regress/expected/copyencoding.out
+++ b/src/test/regress/expected/copyencoding.out
@@ -50,4 +50,25 @@ COPY copy_encoding_tab FROM :'utf8_csv' WITH (FORMAT csv);
 ERROR:  invalid byte sequence for encoding "EUC_JP": 0xe3 0x81
 CONTEXT:  COPY copy_encoding_tab, line 1
 RESET client_encoding;
+-- JSON format encoding conversion
+\set json_latin1 :abs_builddir '/results/copyencoding_json_latin1.json'
+COPY (SELECT E'\u00e9' AS c) TO :'json_latin1' WITH (FORMAT json, ENCODING 'LATIN1');
+-- Verify the file contains LATIN1 'é' (single byte 0xe9) and not UTF-8 (0xc3 0xa9).
+-- Done as separate position checks to stay independent of the platform's
+-- end-of-line convention.
+SELECT position('\xe9'::bytea  IN pg_read_binary_file(:'json_latin1')) > 0 AS has_latin1_e9,
+       position('\xc3a9'::bytea IN pg_read_binary_file(:'json_latin1')) > 0 AS has_utf8_e9;
+ has_latin1_e9 | has_utf8_e9 
+---------------+-------------
+ t             | f
+(1 row)
+
+-- Round-trip back through COPY FROM with the same encoding.
+COPY copy_encoding_tab FROM :'json_latin1' WITH (ENCODING 'LATIN1');
+SELECT * FROM copy_encoding_tab WHERE t LIKE '%é%';
+     t     
+-----------
+ {"c":"é"}
+(1 row)
+
 DROP TABLE copy_encoding_tab;
diff --git a/src/test/regress/sql/copyencoding.sql b/src/test/regress/sql/copyencoding.sql
index 64718245b94..51b16abf42b 100644
--- a/src/test/regress/sql/copyencoding.sql
+++ b/src/test/regress/sql/copyencoding.sql
@@ -57,4 +57,16 @@ SET client_encoding TO EUC_JP;
 COPY copy_encoding_tab FROM :'utf8_csv' WITH (FORMAT csv);
 RESET client_encoding;
 
+-- JSON format encoding conversion
+\set json_latin1 :abs_builddir '/results/copyencoding_json_latin1.json'
+COPY (SELECT E'\u00e9' AS c) TO :'json_latin1' WITH (FORMAT json, ENCODING 'LATIN1');
+-- Verify the file contains LATIN1 'é' (single byte 0xe9) and not UTF-8 (0xc3 0xa9).
+-- Done as separate position checks to stay independent of the platform's
+-- end-of-line convention.
+SELECT position('\xe9'::bytea  IN pg_read_binary_file(:'json_latin1')) > 0 AS has_latin1_e9,
+       position('\xc3a9'::bytea IN pg_read_binary_file(:'json_latin1')) > 0 AS has_utf8_e9;
+-- Round-trip back through COPY FROM with the same encoding.
+COPY copy_encoding_tab FROM :'json_latin1' WITH (ENCODING 'LATIN1');
+SELECT * FROM copy_encoding_tab WHERE t LIKE '%é%';
+
 DROP TABLE copy_encoding_tab;
-- 
2.34.1