From e2c9180a6428044ede3ac5e456ab9b92b2f7cd6f Mon Sep 17 00:00:00 2001
From: Heikki Linnakangas <heikki.linnakangas@iki.fi>
Date: Mon, 1 Feb 2021 17:10:01 +0200
Subject: [PATCH v3 1/5] Add regression tests for built-in encoding
 conversions.

This doesn't cover every conversion, but it covers all the internal
functions in conv.c that are used to implement the conversions.
---
 src/test/regress/expected/conversion.out | 493 +++++++++++++++++++++++
 src/test/regress/sql/conversion.sql      | 182 +++++++++
 2 files changed, 675 insertions(+)

diff --git a/src/test/regress/expected/conversion.out b/src/test/regress/expected/conversion.out
index 62c10671685..38f8cef0f38 100644
--- a/src/test/regress/expected/conversion.out
+++ b/src/test/regress/expected/conversion.out
@@ -37,3 +37,496 @@ DROP CONVERSION mydef;
 --
 RESET SESSION AUTHORIZATION;
 DROP USER regress_conversion_user;
+--
+-- Test built-in conversion functions.
+--
+-- helper function to test a conversion
+create or replace function test_conv(
+  input IN bytea,
+  src_encoding IN text,
+  dst_encoding IN text,
+  result OUT bytea,
+  errorat OUT bytea,
+  error OUT text)
+language plpgsql as
+$$
+declare
+  validlen int;
+begin
+  -- Try to perform the conversion. If it fails, catch the error and return
+  -- it to the caller.
+  begin
+    select * into result from convert(input, src_encoding, dst_encoding);
+    validlen = length(input);
+    errorat = NULL;
+    error := NULL;
+  exception when others then
+    result = NULL;
+    errorat = NULL;
+    error := sqlerrm;
+  end;
+  return;
+end;
+$$;
+--
+-- UTF-8
+--
+CREATE TABLE utf8_inputs (inbytes bytea, description text);
+insert into utf8_inputs  values
+  ('\x666f6f',		'valid, pure ASCII'),
+  ('\xc3a4c3b6',	'valid, extra latin chars'),
+  ('\xd184d0bed0be',	'valid, cyrillic'),
+  ('\x666f6fe8b1a1',	'valid, kanji/Chinese'),
+  ('\xe382abe3829a',	'valid, two chars that combine to one in EUC_JIS_2004'),
+  ('\xe382ab',		'only first half of combined char in EUC_JIS_2004'),
+  ('\xecbd94eb81bceba6ac', 'valid, Hangul, Korean'),
+  ('\x666f6fefa8aa',	'valid, needs mapping function to convert to GB18030'),
+  ('\x66e8b1ff6f6f',	'invalid byte sequence'),
+  ('\x66006f',		'invalid, NUL byte'),
+  ('\x666f6fe8b100',    'invalid, NUL byte'),
+  ('\x666f6fe8b1',	'incomplete character at end');
+-- Test UTF-8 verification
+select description, (test_conv(inbytes, 'utf8', 'utf8')).* from utf8_inputs;
+                     description                      |        result        | errorat |                           error                           
+------------------------------------------------------+----------------------+---------+-----------------------------------------------------------
+ valid, pure ASCII                                    | \x666f6f             |         | 
+ valid, extra latin chars                             | \xc3a4c3b6           |         | 
+ valid, cyrillic                                      | \xd184d0bed0be       |         | 
+ valid, kanji/Chinese                                 | \x666f6fe8b1a1       |         | 
+ valid, two chars that combine to one in EUC_JIS_2004 | \xe382abe3829a       |         | 
+ only first half of combined char in EUC_JIS_2004     | \xe382ab             |         | 
+ valid, Hangul, Korean                                | \xecbd94eb81bceba6ac |         | 
+ valid, needs mapping function to convert to GB18030  | \x666f6fefa8aa       |         | 
+ invalid byte sequence                                |                      |         | invalid byte sequence for encoding "UTF8": 0xe8 0xb1 0xff
+ invalid, NUL byte                                    |                      |         | invalid byte sequence for encoding "UTF8": 0x00
+ invalid, NUL byte                                    |                      |         | invalid byte sequence for encoding "UTF8": 0xe8 0xb1 0x00
+ incomplete character at end                          |                      |         | invalid byte sequence for encoding "UTF8": 0xe8 0xb1
+(12 rows)
+
+-- Test conversions from UTF-8
+select description, inbytes, (test_conv(inbytes, 'utf8', 'euc_jis_2004')).* from utf8_inputs;
+                     description                      |       inbytes        |     result     | errorat |                                                    error                                                    
+------------------------------------------------------+----------------------+----------------+---------+-------------------------------------------------------------------------------------------------------------
+ valid, pure ASCII                                    | \x666f6f             | \x666f6f       |         | 
+ valid, extra latin chars                             | \xc3a4c3b6           | \xa9daa9ec     |         | 
+ valid, cyrillic                                      | \xd184d0bed0be       | \xa7e6a7e0a7e0 |         | 
+ valid, kanji/Chinese                                 | \x666f6fe8b1a1       | \x666f6fbedd   |         | 
+ valid, two chars that combine to one in EUC_JIS_2004 | \xe382abe3829a       | \xa5f7         |         | 
+ only first half of combined char in EUC_JIS_2004     | \xe382ab             | \xa5ab         |         | 
+ valid, Hangul, Korean                                | \xecbd94eb81bceba6ac |                |         | character with byte sequence 0xec 0xbd 0x94 in encoding "UTF8" has no equivalent in encoding "EUC_JIS_2004"
+ valid, needs mapping function to convert to GB18030  | \x666f6fefa8aa       |                |         | character with byte sequence 0xef 0xa8 0xaa in encoding "UTF8" has no equivalent in encoding "EUC_JIS_2004"
+ invalid byte sequence                                | \x66e8b1ff6f6f       |                |         | invalid byte sequence for encoding "UTF8": 0xe8 0xb1 0xff
+ invalid, NUL byte                                    | \x66006f             |                |         | invalid byte sequence for encoding "UTF8": 0x00
+ invalid, NUL byte                                    | \x666f6fe8b100       |                |         | invalid byte sequence for encoding "UTF8": 0xe8 0xb1 0x00
+ incomplete character at end                          | \x666f6fe8b1         |                |         | invalid byte sequence for encoding "UTF8": 0xe8 0xb1
+(12 rows)
+
+select description, inbytes, (test_conv(inbytes, 'utf8', 'latin1')).* from utf8_inputs;
+                     description                      |       inbytes        |  result  | errorat |                                                 error                                                 
+------------------------------------------------------+----------------------+----------+---------+-------------------------------------------------------------------------------------------------------
+ valid, pure ASCII                                    | \x666f6f             | \x666f6f |         | 
+ valid, extra latin chars                             | \xc3a4c3b6           | \xe4f6   |         | 
+ valid, cyrillic                                      | \xd184d0bed0be       |          |         | character with byte sequence 0xd1 0x84 in encoding "UTF8" has no equivalent in encoding "LATIN1"
+ valid, kanji/Chinese                                 | \x666f6fe8b1a1       |          |         | character with byte sequence 0xe8 0xb1 0xa1 in encoding "UTF8" has no equivalent in encoding "LATIN1"
+ valid, two chars that combine to one in EUC_JIS_2004 | \xe382abe3829a       |          |         | character with byte sequence 0xe3 0x82 0xab in encoding "UTF8" has no equivalent in encoding "LATIN1"
+ only first half of combined char in EUC_JIS_2004     | \xe382ab             |          |         | character with byte sequence 0xe3 0x82 0xab in encoding "UTF8" has no equivalent in encoding "LATIN1"
+ valid, Hangul, Korean                                | \xecbd94eb81bceba6ac |          |         | character with byte sequence 0xec 0xbd 0x94 in encoding "UTF8" has no equivalent in encoding "LATIN1"
+ valid, needs mapping function to convert to GB18030  | \x666f6fefa8aa       |          |         | character with byte sequence 0xef 0xa8 0xaa in encoding "UTF8" has no equivalent in encoding "LATIN1"
+ invalid byte sequence                                | \x66e8b1ff6f6f       |          |         | invalid byte sequence for encoding "UTF8": 0xe8 0xb1 0xff
+ invalid, NUL byte                                    | \x66006f             |          |         | invalid byte sequence for encoding "UTF8": 0x00
+ invalid, NUL byte                                    | \x666f6fe8b100       |          |         | invalid byte sequence for encoding "UTF8": 0xe8 0xb1 0x00
+ incomplete character at end                          | \x666f6fe8b1         |          |         | invalid byte sequence for encoding "UTF8": 0xe8 0xb1
+(12 rows)
+
+select description, inbytes, (test_conv(inbytes, 'utf8', 'latin2')).* from utf8_inputs;
+                     description                      |       inbytes        |  result  | errorat |                                                 error                                                 
+------------------------------------------------------+----------------------+----------+---------+-------------------------------------------------------------------------------------------------------
+ valid, pure ASCII                                    | \x666f6f             | \x666f6f |         | 
+ valid, extra latin chars                             | \xc3a4c3b6           | \xe4f6   |         | 
+ valid, cyrillic                                      | \xd184d0bed0be       |          |         | character with byte sequence 0xd1 0x84 in encoding "UTF8" has no equivalent in encoding "LATIN2"
+ valid, kanji/Chinese                                 | \x666f6fe8b1a1       |          |         | character with byte sequence 0xe8 0xb1 0xa1 in encoding "UTF8" has no equivalent in encoding "LATIN2"
+ valid, two chars that combine to one in EUC_JIS_2004 | \xe382abe3829a       |          |         | character with byte sequence 0xe3 0x82 0xab in encoding "UTF8" has no equivalent in encoding "LATIN2"
+ only first half of combined char in EUC_JIS_2004     | \xe382ab             |          |         | character with byte sequence 0xe3 0x82 0xab in encoding "UTF8" has no equivalent in encoding "LATIN2"
+ valid, Hangul, Korean                                | \xecbd94eb81bceba6ac |          |         | character with byte sequence 0xec 0xbd 0x94 in encoding "UTF8" has no equivalent in encoding "LATIN2"
+ valid, needs mapping function to convert to GB18030  | \x666f6fefa8aa       |          |         | character with byte sequence 0xef 0xa8 0xaa in encoding "UTF8" has no equivalent in encoding "LATIN2"
+ invalid byte sequence                                | \x66e8b1ff6f6f       |          |         | invalid byte sequence for encoding "UTF8": 0xe8 0xb1 0xff
+ invalid, NUL byte                                    | \x66006f             |          |         | invalid byte sequence for encoding "UTF8": 0x00
+ invalid, NUL byte                                    | \x666f6fe8b100       |          |         | invalid byte sequence for encoding "UTF8": 0xe8 0xb1 0x00
+ incomplete character at end                          | \x666f6fe8b1         |          |         | invalid byte sequence for encoding "UTF8": 0xe8 0xb1
+(12 rows)
+
+select description, inbytes, (test_conv(inbytes, 'utf8', 'latin5')).* from utf8_inputs;
+                     description                      |       inbytes        |  result  | errorat |                                                 error                                                 
+------------------------------------------------------+----------------------+----------+---------+-------------------------------------------------------------------------------------------------------
+ valid, pure ASCII                                    | \x666f6f             | \x666f6f |         | 
+ valid, extra latin chars                             | \xc3a4c3b6           | \xe4f6   |         | 
+ valid, cyrillic                                      | \xd184d0bed0be       |          |         | character with byte sequence 0xd1 0x84 in encoding "UTF8" has no equivalent in encoding "LATIN5"
+ valid, kanji/Chinese                                 | \x666f6fe8b1a1       |          |         | character with byte sequence 0xe8 0xb1 0xa1 in encoding "UTF8" has no equivalent in encoding "LATIN5"
+ valid, two chars that combine to one in EUC_JIS_2004 | \xe382abe3829a       |          |         | character with byte sequence 0xe3 0x82 0xab in encoding "UTF8" has no equivalent in encoding "LATIN5"
+ only first half of combined char in EUC_JIS_2004     | \xe382ab             |          |         | character with byte sequence 0xe3 0x82 0xab in encoding "UTF8" has no equivalent in encoding "LATIN5"
+ valid, Hangul, Korean                                | \xecbd94eb81bceba6ac |          |         | character with byte sequence 0xec 0xbd 0x94 in encoding "UTF8" has no equivalent in encoding "LATIN5"
+ valid, needs mapping function to convert to GB18030  | \x666f6fefa8aa       |          |         | character with byte sequence 0xef 0xa8 0xaa in encoding "UTF8" has no equivalent in encoding "LATIN5"
+ invalid byte sequence                                | \x66e8b1ff6f6f       |          |         | invalid byte sequence for encoding "UTF8": 0xe8 0xb1 0xff
+ invalid, NUL byte                                    | \x66006f             |          |         | invalid byte sequence for encoding "UTF8": 0x00
+ invalid, NUL byte                                    | \x666f6fe8b100       |          |         | invalid byte sequence for encoding "UTF8": 0xe8 0xb1 0x00
+ incomplete character at end                          | \x666f6fe8b1         |          |         | invalid byte sequence for encoding "UTF8": 0xe8 0xb1
+(12 rows)
+
+select description, inbytes, (test_conv(inbytes, 'utf8', 'koi8r')).* from utf8_inputs;
+                     description                      |       inbytes        |  result  | errorat |                                                error                                                 
+------------------------------------------------------+----------------------+----------+---------+------------------------------------------------------------------------------------------------------
+ valid, pure ASCII                                    | \x666f6f             | \x666f6f |         | 
+ valid, extra latin chars                             | \xc3a4c3b6           |          |         | character with byte sequence 0xc3 0xa4 in encoding "UTF8" has no equivalent in encoding "KOI8R"
+ valid, cyrillic                                      | \xd184d0bed0be       | \xc6cfcf |         | 
+ valid, kanji/Chinese                                 | \x666f6fe8b1a1       |          |         | character with byte sequence 0xe8 0xb1 0xa1 in encoding "UTF8" has no equivalent in encoding "KOI8R"
+ valid, two chars that combine to one in EUC_JIS_2004 | \xe382abe3829a       |          |         | character with byte sequence 0xe3 0x82 0xab in encoding "UTF8" has no equivalent in encoding "KOI8R"
+ only first half of combined char in EUC_JIS_2004     | \xe382ab             |          |         | character with byte sequence 0xe3 0x82 0xab in encoding "UTF8" has no equivalent in encoding "KOI8R"
+ valid, Hangul, Korean                                | \xecbd94eb81bceba6ac |          |         | character with byte sequence 0xec 0xbd 0x94 in encoding "UTF8" has no equivalent in encoding "KOI8R"
+ valid, needs mapping function to convert to GB18030  | \x666f6fefa8aa       |          |         | character with byte sequence 0xef 0xa8 0xaa in encoding "UTF8" has no equivalent in encoding "KOI8R"
+ invalid byte sequence                                | \x66e8b1ff6f6f       |          |         | invalid byte sequence for encoding "UTF8": 0xe8 0xb1 0xff
+ invalid, NUL byte                                    | \x66006f             |          |         | invalid byte sequence for encoding "UTF8": 0x00
+ invalid, NUL byte                                    | \x666f6fe8b100       |          |         | invalid byte sequence for encoding "UTF8": 0xe8 0xb1 0x00
+ incomplete character at end                          | \x666f6fe8b1         |          |         | invalid byte sequence for encoding "UTF8": 0xe8 0xb1
+(12 rows)
+
+select description, inbytes, (test_conv(inbytes, 'utf8', 'gb18030')).* from utf8_inputs;
+                     description                      |       inbytes        |           result           | errorat |                           error                           
+------------------------------------------------------+----------------------+----------------------------+---------+-----------------------------------------------------------
+ valid, pure ASCII                                    | \x666f6f             | \x666f6f                   |         | 
+ valid, extra latin chars                             | \xc3a4c3b6           | \x81308a3181308b32         |         | 
+ valid, cyrillic                                      | \xd184d0bed0be       | \xa7e6a7e0a7e0             |         | 
+ valid, kanji/Chinese                                 | \x666f6fe8b1a1       | \x666f6fcff3               |         | 
+ valid, two chars that combine to one in EUC_JIS_2004 | \xe382abe3829a       | \xa5ab8139a732             |         | 
+ only first half of combined char in EUC_JIS_2004     | \xe382ab             | \xa5ab                     |         | 
+ valid, Hangul, Korean                                | \xecbd94eb81bceba6ac | \x8334e5398238c4338330b335 |         | 
+ valid, needs mapping function to convert to GB18030  | \x666f6fefa8aa       | \x666f6f84309c38           |         | 
+ invalid byte sequence                                | \x66e8b1ff6f6f       |                            |         | invalid byte sequence for encoding "UTF8": 0xe8 0xb1 0xff
+ invalid, NUL byte                                    | \x66006f             |                            |         | invalid byte sequence for encoding "UTF8": 0x00
+ invalid, NUL byte                                    | \x666f6fe8b100       |                            |         | invalid byte sequence for encoding "UTF8": 0xe8 0xb1 0x00
+ incomplete character at end                          | \x666f6fe8b1         |                            |         | invalid byte sequence for encoding "UTF8": 0xe8 0xb1
+(12 rows)
+
+--
+-- EUC_JIS_2004
+--
+CREATE TABLE euc_jis_2004_inputs (inbytes bytea, description text);
+insert into euc_jis_2004_inputs  values
+  ('\x666f6f',		'valid, pure ASCII'),
+  ('\x666f6fbedd',	'valid'),
+  ('\xa5f7',		'valid, translates to two UTF-8 chars '),
+  ('\xbeddbe',		'incomplete char '),
+  ('\x666f6f00bedd',	'invalid, NUL byte'),
+  ('\x666f6fbe00dd',	'invalid, NUL byte'),
+  ('\x666f6fbedd00',	'invalid, NUL byte'),
+  ('\xbe04',		'invalid byte sequence');
+-- Test EUC_JIS_2004 verification
+select description, inbytes, (test_conv(inbytes, 'euc_jis_2004', 'euc_jis_2004')).* from euc_jis_2004_inputs;
+              description              |    inbytes     |    result    | errorat |                            error                             
+---------------------------------------+----------------+--------------+---------+--------------------------------------------------------------
+ valid, pure ASCII                     | \x666f6f       | \x666f6f     |         | 
+ valid                                 | \x666f6fbedd   | \x666f6fbedd |         | 
+ valid, translates to two UTF-8 chars  | \xa5f7         | \xa5f7       |         | 
+ incomplete char                       | \xbeddbe       |              |         | invalid byte sequence for encoding "EUC_JIS_2004": 0xbe
+ invalid, NUL byte                     | \x666f6f00bedd |              |         | invalid byte sequence for encoding "EUC_JIS_2004": 0x00
+ invalid, NUL byte                     | \x666f6fbe00dd |              |         | invalid byte sequence for encoding "EUC_JIS_2004": 0xbe 0x00
+ invalid, NUL byte                     | \x666f6fbedd00 |              |         | invalid byte sequence for encoding "EUC_JIS_2004": 0x00
+ invalid byte sequence                 | \xbe04         |              |         | invalid byte sequence for encoding "EUC_JIS_2004": 0xbe 0x04
+(8 rows)
+
+-- Test conversions from EUC_JIS_2004
+select description, inbytes, (test_conv(inbytes, 'euc_jis_2004', 'utf8')).* from euc_jis_2004_inputs;
+              description              |    inbytes     |     result     | errorat |                            error                             
+---------------------------------------+----------------+----------------+---------+--------------------------------------------------------------
+ valid, pure ASCII                     | \x666f6f       | \x666f6f       |         | 
+ valid                                 | \x666f6fbedd   | \x666f6fe8b1a1 |         | 
+ valid, translates to two UTF-8 chars  | \xa5f7         | \xe382abe3829a |         | 
+ incomplete char                       | \xbeddbe       |                |         | invalid byte sequence for encoding "EUC_JIS_2004": 0xbe
+ invalid, NUL byte                     | \x666f6f00bedd |                |         | invalid byte sequence for encoding "EUC_JIS_2004": 0x00
+ invalid, NUL byte                     | \x666f6fbe00dd |                |         | invalid byte sequence for encoding "EUC_JIS_2004": 0xbe 0x00
+ invalid, NUL byte                     | \x666f6fbedd00 |                |         | invalid byte sequence for encoding "EUC_JIS_2004": 0x00
+ invalid byte sequence                 | \xbe04         |                |         | invalid byte sequence for encoding "EUC_JIS_2004": 0xbe 0x04
+(8 rows)
+
+--
+-- SHIFT-JIS-2004
+--
+CREATE TABLE shiftjis2004_inputs (inbytes bytea, description text);
+insert into shiftjis2004_inputs  values
+  ('\x666f6f',		'valid, pure ASCII'),
+  ('\x666f6f8fdb',	'valid'),
+  ('\x666f6f81c0',	'valid, no translation to UTF-8'),
+  ('\x666f6f82f5',	'valid, translates to two UTF-8 chars '),
+  ('\x666f6f8fdb8f',	'incomplete char '),
+  ('\x666f6f820a',	'incomplete char, followed by newline '),
+  ('\x666f6f008fdb',	'invalid, NUL byte'),
+  ('\x666f6f8f00db',	'invalid, NUL byte'),
+  ('\x666f6f8fdb00',	'invalid, NUL byte');
+-- Test SHIFT-JIS-2004 verification
+select description, inbytes, (test_conv(inbytes, 'shiftjis2004', 'shiftjis2004')).* from shiftjis2004_inputs;
+              description              |    inbytes     |    result    | errorat |                             error                              
+---------------------------------------+----------------+--------------+---------+----------------------------------------------------------------
+ valid, pure ASCII                     | \x666f6f       | \x666f6f     |         | 
+ valid                                 | \x666f6f8fdb   | \x666f6f8fdb |         | 
+ valid, no translation to UTF-8        | \x666f6f81c0   | \x666f6f81c0 |         | 
+ valid, translates to two UTF-8 chars  | \x666f6f82f5   | \x666f6f82f5 |         | 
+ incomplete char                       | \x666f6f8fdb8f |              |         | invalid byte sequence for encoding "SHIFT_JIS_2004": 0x8f
+ incomplete char, followed by newline  | \x666f6f820a   |              |         | invalid byte sequence for encoding "SHIFT_JIS_2004": 0x82 0x0a
+ invalid, NUL byte                     | \x666f6f008fdb |              |         | invalid byte sequence for encoding "SHIFT_JIS_2004": 0x00
+ invalid, NUL byte                     | \x666f6f8f00db |              |         | invalid byte sequence for encoding "SHIFT_JIS_2004": 0x8f 0x00
+ invalid, NUL byte                     | \x666f6f8fdb00 |              |         | invalid byte sequence for encoding "SHIFT_JIS_2004": 0x00
+(9 rows)
+
+-- Test conversions from SHIFT-JIS-2004
+select description, inbytes, (test_conv(inbytes, 'shiftjis2004', 'utf8')).* from shiftjis2004_inputs;
+              description              |    inbytes     |        result        | errorat |                             error                              
+---------------------------------------+----------------+----------------------+---------+----------------------------------------------------------------
+ valid, pure ASCII                     | \x666f6f       | \x666f6f             |         | 
+ valid                                 | \x666f6f8fdb   | \x666f6fe8b1a1       |         | 
+ valid, no translation to UTF-8        | \x666f6f81c0   | \x666f6fe28a84       |         | 
+ valid, translates to two UTF-8 chars  | \x666f6f82f5   | \x666f6fe3818be3829a |         | 
+ incomplete char                       | \x666f6f8fdb8f |                      |         | invalid byte sequence for encoding "SHIFT_JIS_2004": 0x8f
+ incomplete char, followed by newline  | \x666f6f820a   |                      |         | invalid byte sequence for encoding "SHIFT_JIS_2004": 0x82 0x0a
+ invalid, NUL byte                     | \x666f6f008fdb |                      |         | invalid byte sequence for encoding "SHIFT_JIS_2004": 0x00
+ invalid, NUL byte                     | \x666f6f8f00db |                      |         | invalid byte sequence for encoding "SHIFT_JIS_2004": 0x8f 0x00
+ invalid, NUL byte                     | \x666f6f8fdb00 |                      |         | invalid byte sequence for encoding "SHIFT_JIS_2004": 0x00
+(9 rows)
+
+select description, inbytes, (test_conv(inbytes, 'shiftjis2004', 'euc_jis_2004')).* from shiftjis2004_inputs;
+              description              |    inbytes     |    result    | errorat |                             error                              
+---------------------------------------+----------------+--------------+---------+----------------------------------------------------------------
+ valid, pure ASCII                     | \x666f6f       | \x666f6f     |         | 
+ valid                                 | \x666f6f8fdb   | \x666f6fbedd |         | 
+ valid, no translation to UTF-8        | \x666f6f81c0   | \x666f6fa2c2 |         | 
+ valid, translates to two UTF-8 chars  | \x666f6f82f5   | \x666f6fa4f7 |         | 
+ incomplete char                       | \x666f6f8fdb8f |              |         | invalid byte sequence for encoding "SHIFT_JIS_2004": 0x8f
+ incomplete char, followed by newline  | \x666f6f820a   |              |         | invalid byte sequence for encoding "SHIFT_JIS_2004": 0x82 0x0a
+ invalid, NUL byte                     | \x666f6f008fdb |              |         | invalid byte sequence for encoding "SHIFT_JIS_2004": 0x00
+ invalid, NUL byte                     | \x666f6f8f00db |              |         | invalid byte sequence for encoding "SHIFT_JIS_2004": 0x8f 0x00
+ invalid, NUL byte                     | \x666f6f8fdb00 |              |         | invalid byte sequence for encoding "SHIFT_JIS_2004": 0x00
+(9 rows)
+
+--
+-- GB18030
+--
+CREATE TABLE gb18030_inputs (inbytes bytea, description text);
+insert into gb18030_inputs  values
+  ('\x666f6f',		'valid, pure ASCII'),
+  ('\x666f6fcff3',	'valid'),
+  ('\x666f6f8431a530',	'valid, no translation to UTF-8'),
+  ('\x666f6f84309c38',	'valid, translates to UTF-8 by mapping function'),
+  ('\x666f6f84309c',	'incomplete char '),
+  ('\x666f6f84309c0a',	'incomplete char, followed by newline '),
+  ('\x666f6f84309c3800', 'invalid, NUL byte'),
+  ('\x666f6f84309c0038', 'invalid, NUL byte');
+-- Test GB18030 verification
+select description, inbytes, (test_conv(inbytes, 'gb18030', 'gb18030')).* from gb18030_inputs;
+                  description                   |      inbytes       |      result      | errorat |                               error                               
+------------------------------------------------+--------------------+------------------+---------+-------------------------------------------------------------------
+ valid, pure ASCII                              | \x666f6f           | \x666f6f         |         | 
+ valid                                          | \x666f6fcff3       | \x666f6fcff3     |         | 
+ valid, no translation to UTF-8                 | \x666f6f8431a530   | \x666f6f8431a530 |         | 
+ valid, translates to UTF-8 by mapping function | \x666f6f84309c38   | \x666f6f84309c38 |         | 
+ incomplete char                                | \x666f6f84309c     |                  |         | invalid byte sequence for encoding "GB18030": 0x84 0x30 0x9c
+ incomplete char, followed by newline           | \x666f6f84309c0a   |                  |         | invalid byte sequence for encoding "GB18030": 0x84 0x30 0x9c 0x0a
+ invalid, NUL byte                              | \x666f6f84309c3800 |                  |         | invalid byte sequence for encoding "GB18030": 0x00
+ invalid, NUL byte                              | \x666f6f84309c0038 |                  |         | invalid byte sequence for encoding "GB18030": 0x84 0x30 0x9c 0x00
+(8 rows)
+
+-- Test conversions from GB18030
+select description, inbytes, (test_conv(inbytes, 'gb18030', 'utf8')).* from gb18030_inputs;
+                  description                   |      inbytes       |     result     | errorat |                                                    error                                                    
+------------------------------------------------+--------------------+----------------+---------+-------------------------------------------------------------------------------------------------------------
+ valid, pure ASCII                              | \x666f6f           | \x666f6f       |         | 
+ valid                                          | \x666f6fcff3       | \x666f6fe8b1a1 |         | 
+ valid, no translation to UTF-8                 | \x666f6f8431a530   |                |         | character with byte sequence 0x84 0x31 0xa5 0x30 in encoding "GB18030" has no equivalent in encoding "UTF8"
+ valid, translates to UTF-8 by mapping function | \x666f6f84309c38   | \x666f6fefa8aa |         | 
+ incomplete char                                | \x666f6f84309c     |                |         | invalid byte sequence for encoding "GB18030": 0x84 0x30 0x9c
+ incomplete char, followed by newline           | \x666f6f84309c0a   |                |         | invalid byte sequence for encoding "GB18030": 0x84 0x30 0x9c 0x0a
+ invalid, NUL byte                              | \x666f6f84309c3800 |                |         | invalid byte sequence for encoding "GB18030": 0x00
+ invalid, NUL byte                              | \x666f6f84309c0038 |                |         | invalid byte sequence for encoding "GB18030": 0x84 0x30 0x9c 0x00
+(8 rows)
+
+--
+-- ISO-8859-5
+--
+CREATE TABLE iso8859_5_inputs (inbytes bytea, description text);
+insert into iso8859_5_inputs  values
+  ('\x666f6f',		'valid, pure ASCII'),
+  ('\xe4dede',		'valid'),
+  ('\x00',		'invalid, NUL byte'),
+  ('\xe400dede',	'invalid, NUL byte'),
+  ('\xe4dede00',	'invalid, NUL byte');
+-- Test ISO-88591 verification
+select description, inbytes, (test_conv(inbytes, 'iso8859-5', 'iso8859-5')).* from iso8859_5_inputs;
+    description    |  inbytes   |  result  | errorat |                         error                         
+-------------------+------------+----------+---------+-------------------------------------------------------
+ valid, pure ASCII | \x666f6f   | \x666f6f |         | 
+ valid             | \xe4dede   | \xe4dede |         | 
+ invalid, NUL byte | \x00       |          |         | invalid byte sequence for encoding "ISO_8859_5": 0x00
+ invalid, NUL byte | \xe400dede |          |         | invalid byte sequence for encoding "ISO_8859_5": 0x00
+ invalid, NUL byte | \xe4dede00 |          |         | invalid byte sequence for encoding "ISO_8859_5": 0x00
+(5 rows)
+
+-- Test conversions from ISO-88591
+select description, inbytes, (test_conv(inbytes, 'iso8859-5', 'utf8')).* from iso8859_5_inputs;
+    description    |  inbytes   |     result     | errorat |                         error                         
+-------------------+------------+----------------+---------+-------------------------------------------------------
+ valid, pure ASCII | \x666f6f   | \x666f6f       |         | 
+ valid             | \xe4dede   | \xd184d0bed0be |         | 
+ invalid, NUL byte | \x00       |                |         | invalid byte sequence for encoding "ISO_8859_5": 0x00
+ invalid, NUL byte | \xe400dede |                |         | invalid byte sequence for encoding "ISO_8859_5": 0x00
+ invalid, NUL byte | \xe4dede00 |                |         | invalid byte sequence for encoding "ISO_8859_5": 0x00
+(5 rows)
+
+select description, inbytes, (test_conv(inbytes, 'iso8859-5', 'koi8r')).* from iso8859_5_inputs;
+    description    |  inbytes   |  result  | errorat |                         error                         
+-------------------+------------+----------+---------+-------------------------------------------------------
+ valid, pure ASCII | \x666f6f   | \x666f6f |         | 
+ valid             | \xe4dede   | \xc6cfcf |         | 
+ invalid, NUL byte | \x00       |          |         | invalid byte sequence for encoding "ISO_8859_5": 0x00
+ invalid, NUL byte | \xe400dede |          |         | invalid byte sequence for encoding "ISO_8859_5": 0x00
+ invalid, NUL byte | \xe4dede00 |          |         | invalid byte sequence for encoding "ISO_8859_5": 0x00
+(5 rows)
+
+select description, inbytes, (test_conv(inbytes, 'iso8859_5', 'mule_internal')).* from iso8859_5_inputs;
+    description    |  inbytes   |     result     | errorat |                         error                         
+-------------------+------------+----------------+---------+-------------------------------------------------------
+ valid, pure ASCII | \x666f6f   | \x666f6f       |         | 
+ valid             | \xe4dede   | \x8bc68bcf8bcf |         | 
+ invalid, NUL byte | \x00       |                |         | invalid byte sequence for encoding "ISO_8859_5": 0x00
+ invalid, NUL byte | \xe400dede |                |         | invalid byte sequence for encoding "ISO_8859_5": 0x00
+ invalid, NUL byte | \xe4dede00 |                |         | invalid byte sequence for encoding "ISO_8859_5": 0x00
+(5 rows)
+
+--
+-- Big5
+--
+CREATE TABLE big5_inputs (inbytes bytea, description text);
+insert into big5_inputs  values
+  ('\x666f6f',		'valid, pure ASCII'),
+  ('\x666f6fb648',	'valid'),
+  ('\x666f6fa27f',	'valid, no translation to UTF-8'),
+  ('\x666f6fb60048',	'invalid, NUL byte'),
+  ('\x666f6fb64800',	'invalid, NUL byte');
+-- Test Big5 verification
+select description, inbytes, (test_conv(inbytes, 'big5', 'big5')).* from big5_inputs;
+          description           |    inbytes     |    result    | errorat |                        error                         
+--------------------------------+----------------+--------------+---------+------------------------------------------------------
+ valid, pure ASCII              | \x666f6f       | \x666f6f     |         | 
+ valid                          | \x666f6fb648   | \x666f6fb648 |         | 
+ valid, no translation to UTF-8 | \x666f6fa27f   | \x666f6fa27f |         | 
+ invalid, NUL byte              | \x666f6fb60048 |              |         | invalid byte sequence for encoding "BIG5": 0xb6 0x00
+ invalid, NUL byte              | \x666f6fb64800 |              |         | invalid byte sequence for encoding "BIG5": 0x00
+(5 rows)
+
+-- Test conversions from Big5
+select description, inbytes, (test_conv(inbytes, 'big5', 'utf8')).* from big5_inputs;
+          description           |    inbytes     |     result     | errorat |                                             error                                              
+--------------------------------+----------------+----------------+---------+------------------------------------------------------------------------------------------------
+ valid, pure ASCII              | \x666f6f       | \x666f6f       |         | 
+ valid                          | \x666f6fb648   | \x666f6fe8b1a1 |         | 
+ valid, no translation to UTF-8 | \x666f6fa27f   |                |         | character with byte sequence 0xa2 0x7f in encoding "BIG5" has no equivalent in encoding "UTF8"
+ invalid, NUL byte              | \x666f6fb60048 |                |         | invalid byte sequence for encoding "BIG5": 0xb6 0x00
+ invalid, NUL byte              | \x666f6fb64800 |                |         | invalid byte sequence for encoding "BIG5": 0x00
+(5 rows)
+
+select description, inbytes, (test_conv(inbytes, 'big5', 'mule_internal')).* from big5_inputs;
+          description           |    inbytes     |     result     | errorat |                        error                         
+--------------------------------+----------------+----------------+---------+------------------------------------------------------
+ valid, pure ASCII              | \x666f6f       | \x666f6f       |         | 
+ valid                          | \x666f6fb648   | \x666f6f95e2af |         | 
+ valid, no translation to UTF-8 | \x666f6fa27f   | \x666f6f95a3c1 |         | 
+ invalid, NUL byte              | \x666f6fb60048 |                |         | invalid byte sequence for encoding "BIG5": 0xb6 0x00
+ invalid, NUL byte              | \x666f6fb64800 |                |         | invalid byte sequence for encoding "BIG5": 0x00
+(5 rows)
+
+-- MULE_INTERNAL
+CREATE TABLE mic_inputs (inbytes bytea, description text);
+insert into mic_inputs  values
+  ('\x666f6f',		'valid, pure ASCII'),
+  ('\x8bc68bcf8bcf',	'valid (in KOI8R)'),
+  ('\x8bc68bcf8b',	'invalid,incomplete char'),
+  ('\x92bedd',		'valid (in SHIFT_JIS)'),
+  ('\x92be',		'invalid, incomplete char)'),
+  ('\x666f6f95a3c1',	'valid (in Big5)'),
+  ('\x666f6f95a3',	'invalid, incomplete char'),
+  ('\x9200bedd',	'invalid, NUL byte'),
+  ('\x92bedd00',	'invalid, NUL byte'),
+  ('\x8b00c68bcf8bcf',	'invalid, NUL byte');
+-- Test MULE_INTERNAL verification
+select description, inbytes, (test_conv(inbytes, 'mule_internal', 'koi8r')).* from mic_inputs;
+        description        |     inbytes      |  result  | errorat |                                                     error                                                     
+---------------------------+------------------+----------+---------+---------------------------------------------------------------------------------------------------------------
+ valid, pure ASCII         | \x666f6f         | \x666f6f |         | 
+ valid (in KOI8R)          | \x8bc68bcf8bcf   | \xc6cfcf |         | 
+ invalid,incomplete char   | \x8bc68bcf8b     |          |         | invalid byte sequence for encoding "MULE_INTERNAL": 0x8b
+ valid (in SHIFT_JIS)      | \x92bedd         |          |         | character with byte sequence 0x92 0xbe 0xdd in encoding "MULE_INTERNAL" has no equivalent in encoding "KOI8R"
+ invalid, incomplete char) | \x92be           |          |         | invalid byte sequence for encoding "MULE_INTERNAL": 0x92 0xbe
+ valid (in Big5)           | \x666f6f95a3c1   |          |         | character with byte sequence 0x95 0xa3 0xc1 in encoding "MULE_INTERNAL" has no equivalent in encoding "KOI8R"
+ invalid, incomplete char  | \x666f6f95a3     |          |         | invalid byte sequence for encoding "MULE_INTERNAL": 0x95 0xa3
+ invalid, NUL byte         | \x9200bedd       |          |         | invalid byte sequence for encoding "MULE_INTERNAL": 0x92 0x00 0xbe
+ invalid, NUL byte         | \x92bedd00       |          |         | invalid byte sequence for encoding "MULE_INTERNAL": 0x00
+ invalid, NUL byte         | \x8b00c68bcf8bcf |          |         | invalid byte sequence for encoding "MULE_INTERNAL": 0x8b 0x00
+(10 rows)
+
+-- Test conversions from MULE_INTERNAL
+select description, inbytes, (test_conv(inbytes, 'mule_internal', 'iso8859-5')).* from mic_inputs;
+        description        |     inbytes      |  result  | errorat |                                                       error                                                        
+---------------------------+------------------+----------+---------+--------------------------------------------------------------------------------------------------------------------
+ valid, pure ASCII         | \x666f6f         | \x666f6f |         | 
+ valid (in KOI8R)          | \x8bc68bcf8bcf   | \xe4dede |         | 
+ invalid,incomplete char   | \x8bc68bcf8b     |          |         | invalid byte sequence for encoding "MULE_INTERNAL": 0x8b
+ valid (in SHIFT_JIS)      | \x92bedd         |          |         | character with byte sequence 0x92 0xbe 0xdd in encoding "MULE_INTERNAL" has no equivalent in encoding "ISO_8859_5"
+ invalid, incomplete char) | \x92be           |          |         | invalid byte sequence for encoding "MULE_INTERNAL": 0x92 0xbe
+ valid (in Big5)           | \x666f6f95a3c1   |          |         | character with byte sequence 0x95 0xa3 0xc1 in encoding "MULE_INTERNAL" has no equivalent in encoding "ISO_8859_5"
+ invalid, incomplete char  | \x666f6f95a3     |          |         | invalid byte sequence for encoding "MULE_INTERNAL": 0x95 0xa3
+ invalid, NUL byte         | \x9200bedd       |          |         | invalid byte sequence for encoding "MULE_INTERNAL": 0x92 0x00 0xbe
+ invalid, NUL byte         | \x92bedd00       |          |         | invalid byte sequence for encoding "MULE_INTERNAL": 0x00
+ invalid, NUL byte         | \x8b00c68bcf8bcf |          |         | invalid byte sequence for encoding "MULE_INTERNAL": 0x8b 0x00
+(10 rows)
+
+select description, inbytes, (test_conv(inbytes, 'mule_internal', 'sjis')).* from mic_inputs;
+        description        |     inbytes      |  result  | errorat |                                                    error                                                     
+---------------------------+------------------+----------+---------+--------------------------------------------------------------------------------------------------------------
+ valid, pure ASCII         | \x666f6f         | \x666f6f |         | 
+ valid (in KOI8R)          | \x8bc68bcf8bcf   |          |         | character with byte sequence 0x8b 0xc6 in encoding "MULE_INTERNAL" has no equivalent in encoding "SJIS"
+ invalid,incomplete char   | \x8bc68bcf8b     |          |         | invalid byte sequence for encoding "MULE_INTERNAL": 0x8b
+ valid (in SHIFT_JIS)      | \x92bedd         | \x8fdb   |         | 
+ invalid, incomplete char) | \x92be           |          |         | invalid byte sequence for encoding "MULE_INTERNAL": 0x92 0xbe
+ valid (in Big5)           | \x666f6f95a3c1   |          |         | character with byte sequence 0x95 0xa3 0xc1 in encoding "MULE_INTERNAL" has no equivalent in encoding "SJIS"
+ invalid, incomplete char  | \x666f6f95a3     |          |         | invalid byte sequence for encoding "MULE_INTERNAL": 0x95 0xa3
+ invalid, NUL byte         | \x9200bedd       |          |         | invalid byte sequence for encoding "MULE_INTERNAL": 0x92 0x00 0xbe
+ invalid, NUL byte         | \x92bedd00       |          |         | invalid byte sequence for encoding "MULE_INTERNAL": 0x00
+ invalid, NUL byte         | \x8b00c68bcf8bcf |          |         | invalid byte sequence for encoding "MULE_INTERNAL": 0x8b 0x00
+(10 rows)
+
+select description, inbytes, (test_conv(inbytes, 'mule_internal', 'big5')).* from mic_inputs;
+        description        |     inbytes      |    result    | errorat |                                                    error                                                     
+---------------------------+------------------+--------------+---------+--------------------------------------------------------------------------------------------------------------
+ valid, pure ASCII         | \x666f6f         | \x666f6f     |         | 
+ valid (in KOI8R)          | \x8bc68bcf8bcf   |              |         | character with byte sequence 0x8b 0xc6 in encoding "MULE_INTERNAL" has no equivalent in encoding "BIG5"
+ invalid,incomplete char   | \x8bc68bcf8b     |              |         | invalid byte sequence for encoding "MULE_INTERNAL": 0x8b
+ valid (in SHIFT_JIS)      | \x92bedd         |              |         | character with byte sequence 0x92 0xbe 0xdd in encoding "MULE_INTERNAL" has no equivalent in encoding "BIG5"
+ invalid, incomplete char) | \x92be           |              |         | invalid byte sequence for encoding "MULE_INTERNAL": 0x92 0xbe
+ valid (in Big5)           | \x666f6f95a3c1   | \x666f6fa2a1 |         | 
+ invalid, incomplete char  | \x666f6f95a3     |              |         | invalid byte sequence for encoding "MULE_INTERNAL": 0x95 0xa3
+ invalid, NUL byte         | \x9200bedd       |              |         | invalid byte sequence for encoding "MULE_INTERNAL": 0x92 0x00 0xbe
+ invalid, NUL byte         | \x92bedd00       |              |         | invalid byte sequence for encoding "MULE_INTERNAL": 0x00
+ invalid, NUL byte         | \x8b00c68bcf8bcf |              |         | invalid byte sequence for encoding "MULE_INTERNAL": 0x8b 0x00
+(10 rows)
+
+select description, inbytes, (test_conv(inbytes, 'mule_internal', 'euc_jp')).* from mic_inputs;
+        description        |     inbytes      |  result  | errorat |                                                     error                                                      
+---------------------------+------------------+----------+---------+----------------------------------------------------------------------------------------------------------------
+ valid, pure ASCII         | \x666f6f         | \x666f6f |         | 
+ valid (in KOI8R)          | \x8bc68bcf8bcf   |          |         | character with byte sequence 0x8b 0xc6 in encoding "MULE_INTERNAL" has no equivalent in encoding "EUC_JP"
+ invalid,incomplete char   | \x8bc68bcf8b     |          |         | invalid byte sequence for encoding "MULE_INTERNAL": 0x8b
+ valid (in SHIFT_JIS)      | \x92bedd         | \xbedd   |         | 
+ invalid, incomplete char) | \x92be           |          |         | invalid byte sequence for encoding "MULE_INTERNAL": 0x92 0xbe
+ valid (in Big5)           | \x666f6f95a3c1   |          |         | character with byte sequence 0x95 0xa3 0xc1 in encoding "MULE_INTERNAL" has no equivalent in encoding "EUC_JP"
+ invalid, incomplete char  | \x666f6f95a3     |          |         | invalid byte sequence for encoding "MULE_INTERNAL": 0x95 0xa3
+ invalid, NUL byte         | \x9200bedd       |          |         | invalid byte sequence for encoding "MULE_INTERNAL": 0x92 0x00 0xbe
+ invalid, NUL byte         | \x92bedd00       |          |         | invalid byte sequence for encoding "MULE_INTERNAL": 0x00
+ invalid, NUL byte         | \x8b00c68bcf8bcf |          |         | invalid byte sequence for encoding "MULE_INTERNAL": 0x8b 0x00
+(10 rows)
+
diff --git a/src/test/regress/sql/conversion.sql b/src/test/regress/sql/conversion.sql
index 02cf39f1ce9..644531d3333 100644
--- a/src/test/regress/sql/conversion.sql
+++ b/src/test/regress/sql/conversion.sql
@@ -34,3 +34,185 @@ DROP CONVERSION mydef;
 --
 RESET SESSION AUTHORIZATION;
 DROP USER regress_conversion_user;
+
+
+
+--
+-- Test built-in conversion functions.
+--
+
+-- helper function to test a conversion
+create or replace function test_conv(
+  input IN bytea,
+  src_encoding IN text,
+  dst_encoding IN text,
+
+  result OUT bytea,
+  errorat OUT bytea,
+  error OUT text)
+language plpgsql as
+$$
+declare
+  validlen int;
+begin
+  -- Try to perform the conversion. If it fails, catch the error and return
+  -- it to the caller.
+  begin
+    select * into result from convert(input, src_encoding, dst_encoding);
+    validlen = length(input);
+    errorat = NULL;
+    error := NULL;
+  exception when others then
+    result = NULL;
+    errorat = NULL;
+    error := sqlerrm;
+  end;
+  return;
+end;
+$$;
+
+--
+-- UTF-8
+--
+CREATE TABLE utf8_inputs (inbytes bytea, description text);
+insert into utf8_inputs  values
+  ('\x666f6f',		'valid, pure ASCII'),
+  ('\xc3a4c3b6',	'valid, extra latin chars'),
+  ('\xd184d0bed0be',	'valid, cyrillic'),
+  ('\x666f6fe8b1a1',	'valid, kanji/Chinese'),
+  ('\xe382abe3829a',	'valid, two chars that combine to one in EUC_JIS_2004'),
+  ('\xe382ab',		'only first half of combined char in EUC_JIS_2004'),
+  ('\xecbd94eb81bceba6ac', 'valid, Hangul, Korean'),
+  ('\x666f6fefa8aa',	'valid, needs mapping function to convert to GB18030'),
+  ('\x66e8b1ff6f6f',	'invalid byte sequence'),
+  ('\x66006f',		'invalid, NUL byte'),
+  ('\x666f6fe8b100',    'invalid, NUL byte'),
+  ('\x666f6fe8b1',	'incomplete character at end');
+
+-- Test UTF-8 verification
+select description, (test_conv(inbytes, 'utf8', 'utf8')).* from utf8_inputs;
+-- Test conversions from UTF-8
+select description, inbytes, (test_conv(inbytes, 'utf8', 'euc_jis_2004')).* from utf8_inputs;
+select description, inbytes, (test_conv(inbytes, 'utf8', 'latin1')).* from utf8_inputs;
+select description, inbytes, (test_conv(inbytes, 'utf8', 'latin2')).* from utf8_inputs;
+select description, inbytes, (test_conv(inbytes, 'utf8', 'latin5')).* from utf8_inputs;
+select description, inbytes, (test_conv(inbytes, 'utf8', 'koi8r')).* from utf8_inputs;
+select description, inbytes, (test_conv(inbytes, 'utf8', 'gb18030')).* from utf8_inputs;
+
+--
+-- EUC_JIS_2004
+--
+CREATE TABLE euc_jis_2004_inputs (inbytes bytea, description text);
+insert into euc_jis_2004_inputs  values
+  ('\x666f6f',		'valid, pure ASCII'),
+  ('\x666f6fbedd',	'valid'),
+  ('\xa5f7',		'valid, translates to two UTF-8 chars '),
+  ('\xbeddbe',		'incomplete char '),
+  ('\x666f6f00bedd',	'invalid, NUL byte'),
+  ('\x666f6fbe00dd',	'invalid, NUL byte'),
+  ('\x666f6fbedd00',	'invalid, NUL byte'),
+  ('\xbe04',		'invalid byte sequence');
+
+-- Test EUC_JIS_2004 verification
+select description, inbytes, (test_conv(inbytes, 'euc_jis_2004', 'euc_jis_2004')).* from euc_jis_2004_inputs;
+-- Test conversions from EUC_JIS_2004
+select description, inbytes, (test_conv(inbytes, 'euc_jis_2004', 'utf8')).* from euc_jis_2004_inputs;
+
+--
+-- SHIFT-JIS-2004
+--
+CREATE TABLE shiftjis2004_inputs (inbytes bytea, description text);
+insert into shiftjis2004_inputs  values
+  ('\x666f6f',		'valid, pure ASCII'),
+  ('\x666f6f8fdb',	'valid'),
+  ('\x666f6f81c0',	'valid, no translation to UTF-8'),
+  ('\x666f6f82f5',	'valid, translates to two UTF-8 chars '),
+  ('\x666f6f8fdb8f',	'incomplete char '),
+  ('\x666f6f820a',	'incomplete char, followed by newline '),
+  ('\x666f6f008fdb',	'invalid, NUL byte'),
+  ('\x666f6f8f00db',	'invalid, NUL byte'),
+  ('\x666f6f8fdb00',	'invalid, NUL byte');
+
+-- Test SHIFT-JIS-2004 verification
+select description, inbytes, (test_conv(inbytes, 'shiftjis2004', 'shiftjis2004')).* from shiftjis2004_inputs;
+-- Test conversions from SHIFT-JIS-2004
+select description, inbytes, (test_conv(inbytes, 'shiftjis2004', 'utf8')).* from shiftjis2004_inputs;
+select description, inbytes, (test_conv(inbytes, 'shiftjis2004', 'euc_jis_2004')).* from shiftjis2004_inputs;
+
+--
+-- GB18030
+--
+CREATE TABLE gb18030_inputs (inbytes bytea, description text);
+insert into gb18030_inputs  values
+  ('\x666f6f',		'valid, pure ASCII'),
+  ('\x666f6fcff3',	'valid'),
+  ('\x666f6f8431a530',	'valid, no translation to UTF-8'),
+  ('\x666f6f84309c38',	'valid, translates to UTF-8 by mapping function'),
+  ('\x666f6f84309c',	'incomplete char '),
+  ('\x666f6f84309c0a',	'incomplete char, followed by newline '),
+  ('\x666f6f84309c3800', 'invalid, NUL byte'),
+  ('\x666f6f84309c0038', 'invalid, NUL byte');
+
+-- Test GB18030 verification
+select description, inbytes, (test_conv(inbytes, 'gb18030', 'gb18030')).* from gb18030_inputs;
+-- Test conversions from GB18030
+select description, inbytes, (test_conv(inbytes, 'gb18030', 'utf8')).* from gb18030_inputs;
+
+
+--
+-- ISO-8859-5
+--
+CREATE TABLE iso8859_5_inputs (inbytes bytea, description text);
+insert into iso8859_5_inputs  values
+  ('\x666f6f',		'valid, pure ASCII'),
+  ('\xe4dede',		'valid'),
+  ('\x00',		'invalid, NUL byte'),
+  ('\xe400dede',	'invalid, NUL byte'),
+  ('\xe4dede00',	'invalid, NUL byte');
+
+-- Test ISO-88591 verification
+select description, inbytes, (test_conv(inbytes, 'iso8859-5', 'iso8859-5')).* from iso8859_5_inputs;
+-- Test conversions from ISO-88591
+select description, inbytes, (test_conv(inbytes, 'iso8859-5', 'utf8')).* from iso8859_5_inputs;
+select description, inbytes, (test_conv(inbytes, 'iso8859-5', 'koi8r')).* from iso8859_5_inputs;
+select description, inbytes, (test_conv(inbytes, 'iso8859_5', 'mule_internal')).* from iso8859_5_inputs;
+
+--
+-- Big5
+--
+CREATE TABLE big5_inputs (inbytes bytea, description text);
+insert into big5_inputs  values
+  ('\x666f6f',		'valid, pure ASCII'),
+  ('\x666f6fb648',	'valid'),
+  ('\x666f6fa27f',	'valid, no translation to UTF-8'),
+  ('\x666f6fb60048',	'invalid, NUL byte'),
+  ('\x666f6fb64800',	'invalid, NUL byte');
+
+-- Test Big5 verification
+select description, inbytes, (test_conv(inbytes, 'big5', 'big5')).* from big5_inputs;
+-- Test conversions from Big5
+select description, inbytes, (test_conv(inbytes, 'big5', 'utf8')).* from big5_inputs;
+select description, inbytes, (test_conv(inbytes, 'big5', 'mule_internal')).* from big5_inputs;
+
+
+-- MULE_INTERNAL
+CREATE TABLE mic_inputs (inbytes bytea, description text);
+insert into mic_inputs  values
+  ('\x666f6f',		'valid, pure ASCII'),
+  ('\x8bc68bcf8bcf',	'valid (in KOI8R)'),
+  ('\x8bc68bcf8b',	'invalid,incomplete char'),
+  ('\x92bedd',		'valid (in SHIFT_JIS)'),
+  ('\x92be',		'invalid, incomplete char)'),
+  ('\x666f6f95a3c1',	'valid (in Big5)'),
+  ('\x666f6f95a3',	'invalid, incomplete char'),
+  ('\x9200bedd',	'invalid, NUL byte'),
+  ('\x92bedd00',	'invalid, NUL byte'),
+  ('\x8b00c68bcf8bcf',	'invalid, NUL byte');
+
+-- Test MULE_INTERNAL verification
+select description, inbytes, (test_conv(inbytes, 'mule_internal', 'koi8r')).* from mic_inputs;
+-- Test conversions from MULE_INTERNAL
+select description, inbytes, (test_conv(inbytes, 'mule_internal', 'iso8859-5')).* from mic_inputs;
+select description, inbytes, (test_conv(inbytes, 'mule_internal', 'sjis')).* from mic_inputs;
+select description, inbytes, (test_conv(inbytes, 'mule_internal', 'big5')).* from mic_inputs;
+select description, inbytes, (test_conv(inbytes, 'mule_internal', 'euc_jp')).* from mic_inputs;
-- 
2.29.2

