Moving other hex functions to /common

Started by Bruce Momjianabout 5 years ago22 messages
#1Bruce Momjian
bruce@momjian.us
1 attachment(s)

I now understand the wisdom of moving the remaining hex functions to
/common. I know someone already suggested that, and the attached patch
does this.

I will add the attached patch to the commitfest so I can get cfbot
testing.

--
Bruce Momjian <bruce@momjian.us> https://momjian.us
EnterpriseDB https://enterprisedb.com

The usefulness of a cup is in its emptiness, Bruce Lee

Attachments:

hex.difftext/x-diff; charset=us-asciiDownload
diff --git a/src/backend/replication/backup_manifest.c b/src/backend/replication/backup_manifest.c
new file mode 100644
index c3f339c..716f114
*** a/src/backend/replication/backup_manifest.c
--- b/src/backend/replication/backup_manifest.c
***************
*** 17,23 ****
  #include "libpq/pqformat.h"
  #include "mb/pg_wchar.h"
  #include "replication/backup_manifest.h"
! #include "utils/builtins.h"
  #include "utils/json.h"
  
  static void AppendStringToManifest(backup_manifest_info *manifest, char *s);
--- 17,23 ----
  #include "libpq/pqformat.h"
  #include "mb/pg_wchar.h"
  #include "replication/backup_manifest.h"
! #include "common/hex.h"
  #include "utils/json.h"
  
  static void AppendStringToManifest(backup_manifest_info *manifest, char *s);
diff --git a/src/backend/utils/adt/encode.c b/src/backend/utils/adt/encode.c
new file mode 100644
index a6c65b1..bca941a
*** a/src/backend/utils/adt/encode.c
--- b/src/backend/utils/adt/encode.c
***************
*** 15,21 ****
  
  #include <ctype.h>
  
! #include "common/hex_decode.h"
  #include "mb/pg_wchar.h"
  #include "utils/builtins.h"
  #include "utils/memutils.h"
--- 15,21 ----
  
  #include <ctype.h>
  
! #include "common/hex.h"
  #include "mb/pg_wchar.h"
  #include "utils/builtins.h"
  #include "utils/memutils.h"
*************** binary_decode(PG_FUNCTION_ARGS)
*** 142,179 ****
  
  
  /*
-  * HEX
-  */
- 
- static const char hextbl[] = "0123456789abcdef";
- 
- uint64
- hex_encode(const char *src, size_t len, char *dst)
- {
- 	const char *end = src + len;
- 
- 	while (src < end)
- 	{
- 		*dst++ = hextbl[(*src >> 4) & 0xF];
- 		*dst++ = hextbl[*src & 0xF];
- 		src++;
- 	}
- 	return (uint64) len * 2;
- }
- 
- static uint64
- hex_enc_len(const char *src, size_t srclen)
- {
- 	return (uint64) srclen << 1;
- }
- 
- static uint64
- hex_dec_len(const char *src, size_t srclen)
- {
- 	return (uint64) srclen >> 1;
- }
- 
- /*
   * BASE64
   */
  
--- 142,147 ----
diff --git a/src/backend/utils/adt/varlena.c b/src/backend/utils/adt/varlena.c
new file mode 100644
index 9300d19..79fcdcd
*** a/src/backend/utils/adt/varlena.c
--- b/src/backend/utils/adt/varlena.c
***************
*** 22,28 ****
  #include "catalog/pg_type.h"
  #include "common/hashfn.h"
  #include "common/int.h"
! #include "common/hex_decode.h"
  #include "common/unicode_norm.h"
  #include "lib/hyperloglog.h"
  #include "libpq/pqformat.h"
--- 22,28 ----
  #include "catalog/pg_type.h"
  #include "common/hashfn.h"
  #include "common/int.h"
! #include "common/hex.h"
  #include "common/unicode_norm.h"
  #include "lib/hyperloglog.h"
  #include "libpq/pqformat.h"
diff --git a/src/common/Makefile b/src/common/Makefile
new file mode 100644
index f624977..93eb27a
*** a/src/common/Makefile
--- b/src/common/Makefile
*************** OBJS_COMMON = \
*** 58,64 ****
  	file_perm.o \
  	file_utils.o \
  	hashfn.o \
! 	hex_decode.o \
  	ip.o \
  	jsonapi.o \
  	keywords.o \
--- 58,64 ----
  	file_perm.o \
  	file_utils.o \
  	hashfn.o \
! 	hex.o \
  	ip.o \
  	jsonapi.o \
  	keywords.o \
diff --git a/src/common/hex_decode.c b/src/common/hex_decode.c
new file mode 100644
index 3ecdc73..97f57bc
*** a/src/common/hex_decode.c
--- b/src/common/hex_decode.c
***************
*** 1,7 ****
  /*-------------------------------------------------------------------------
   *
!  * hex_decode.c
!  *		hex decoding
   *
   *
   * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
--- 1,7 ----
  /*-------------------------------------------------------------------------
   *
!  * hex.c
!  *		hex processing
   *
   *
   * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
***************
*** 9,15 ****
   *
   *
   * IDENTIFICATION
!  *	  src/common/hex_decode.c
   *
   *-------------------------------------------------------------------------
   */
--- 9,15 ----
   *
   *
   * IDENTIFICATION
!  *	  src/common/hex.c
   *
   *-------------------------------------------------------------------------
   */
***************
*** 26,32 ****
  #else
  #include "mb/pg_wchar.h"
  #endif
! #include "common/hex_decode.h"
  
  
  static const int8 hexlookup[128] = {
--- 26,32 ----
  #else
  #include "mb/pg_wchar.h"
  #endif
! #include "common/hex.h"
  
  
  static const int8 hexlookup[128] = {
*************** static const int8 hexlookup[128] = {
*** 40,45 ****
--- 40,65 ----
  	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
  };
  
+ /*
+  * HEX
+  */
+ 
+ static const char hextbl[] = "0123456789abcdef";
+ 
+ uint64
+ hex_encode(const char *src, size_t len, char *dst)
+ {
+ 	const char *end = src + len;
+ 
+ 	while (src < end)
+ 	{
+ 		*dst++ = hextbl[(*src >> 4) & 0xF];
+ 		*dst++ = hextbl[*src & 0xF];
+ 		src++;
+ 	}
+ 	return (uint64) len * 2;
+ }
+ 
  static inline char
  get_hex(const char *cp)
  {
*************** hex_decode(const char *src, size_t len,
*** 104,106 ****
--- 124,138 ----
  
  	return p - dst;
  }
+ 
+ uint64
+ hex_enc_len(const char *src, size_t srclen)
+ {
+ 	return (uint64) srclen << 1;
+ }
+ 
+ uint64
+ hex_dec_len(const char *src, size_t srclen)
+ {
+ 	return (uint64) srclen >> 1;
+ }
diff --git a/src/include/common/hex.h b/src/include/common/hex.h
new file mode 100644
index ...76154b6
*** a/src/include/common/hex.h
--- b/src/include/common/hex.h
***************
*** 0 ****
--- 1,18 ----
+ /*
+  *	hex.h
+  *		hex processing
+  *
+  *	Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
+  *	Portions Copyright (c) 1994, Regents of the University of California
+  *
+  *	src/include/common/hex.h
+  */
+ #ifndef COMMON_HEX_H
+ #define COMMON_HEX_H
+ 
+ extern uint64 hex_decode(const char *src, size_t len, char *dst);
+ extern uint64 hex_encode(const char *src, size_t len, char *dst);
+ extern uint64 hex_enc_len(const char *src, size_t srclen);
+ extern uint64 hex_dec_len(const char *src, size_t srclen);
+ 
+ #endif							/* COMMON_HEX_H */
diff --git a/src/include/common/hex_decode.h b/src/include/common/hex_decode.h
new file mode .
index 1f99f06..e69de29
*** a/src/include/common/hex_decode.h
--- b/src/include/common/hex_decode.h
***************
*** 1,16 ****
- /*
-  *	hex_decode.h
-  *		hex decoding
-  *
-  *	Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
-  *	Portions Copyright (c) 1994, Regents of the University of California
-  *
-  *	src/include/common/hex_decode.h
-  */
- #ifndef COMMON_HEX_DECODE_H
- #define COMMON_HEX_DECODE_H
- 
- extern uint64 hex_decode(const char *src, size_t len, char *dst);
- 
- 
- #endif							/* COMMON_HEX_DECODE_H */
--- 0 ----
diff --git a/src/include/utils/builtins.h b/src/include/utils/builtins.h
new file mode 100644
index 19271e0..11ba6ae
*** a/src/include/utils/builtins.h
--- b/src/include/utils/builtins.h
*************** extern void domain_check(Datum value, bo
*** 31,39 ****
  extern int	errdatatype(Oid datatypeOid);
  extern int	errdomainconstraint(Oid datatypeOid, const char *conname);
  
- /* encode.c */
- extern uint64 hex_encode(const char *src, size_t len, char *dst);
- 
  /* int.c */
  extern int2vector *buildint2vector(const int16 *int2s, int n);
  
--- 31,36 ----
diff --git a/src/tools/msvc/Mkvcbuild.pm b/src/tools/msvc/Mkvcbuild.pm
new file mode 100644
index 7f014a1..60b216c
*** a/src/tools/msvc/Mkvcbuild.pm
--- b/src/tools/msvc/Mkvcbuild.pm
*************** sub mkvcbuild
*** 121,127 ****
  	our @pgcommonallfiles = qw(
  	  archive.c base64.c checksum_helper.c
  	  config_info.c controldata_utils.c d2s.c encnames.c exec.c
! 	  f2s.c file_perm.c file_utils.c hashfn.c hex_decode.c ip.c jsonapi.c
  	  keywords.c kwlookup.c link-canary.c md5_common.c
  	  pg_get_line.c pg_lzcompress.c pgfnames.c psprintf.c relpath.c rmtree.c
  	  saslprep.c scram-common.c string.c stringinfo.c unicode_norm.c username.c
--- 121,127 ----
  	our @pgcommonallfiles = qw(
  	  archive.c base64.c checksum_helper.c
  	  config_info.c controldata_utils.c d2s.c encnames.c exec.c
! 	  f2s.c file_perm.c file_utils.c hashfn.c hex.c ip.c jsonapi.c
  	  keywords.c kwlookup.c link-canary.c md5_common.c
  	  pg_get_line.c pg_lzcompress.c pgfnames.c psprintf.c relpath.c rmtree.c
  	  saslprep.c scram-common.c string.c stringinfo.c unicode_norm.c username.c
#2Bruce Momjian
bruce@momjian.us
In reply to: Bruce Momjian (#1)
1 attachment(s)
Re: Moving other hex functions to /common

On Wed, Dec 30, 2020 at 07:35:57PM -0500, Bruce Momjian wrote:

I now understand the wisdom of moving the remaining hex functions to
/common. I know someone already suggested that, and the attached patch
does this.

I will add the attached patch to the commitfest so I can get cfbot
testing.

So, I am learning this cfbot thing. Seems I need -M100% to disable
rename detection for diffs to work with cfbot --- makes sense.
New patch attached.

--
Bruce Momjian <bruce@momjian.us> https://momjian.us
EnterpriseDB https://enterprisedb.com

The usefulness of a cup is in its emptiness, Bruce Lee

Attachments:

hex.diff.gzapplication/gzipDownload
#3Michael Paquier
michael@paquier.xyz
In reply to: Bruce Momjian (#2)
Re: Moving other hex functions to /common

On Wed, Dec 30, 2020 at 08:22:07PM -0500, Bruce Momjian wrote:

So, I am learning this cfbot thing. Seems I need -M100% to disable
rename detection for diffs to work with cfbot --- makes sense.

A good way to make sure that a patch format is correct for the CF bot
would be to use "git format-patch -1" to generate a patch from a
single commit.

New patch attached.

I think that this patch would have more value if we remove completely
the hex routines from ECPG and have ECPG use what's moved in
src/common/, meaning the following changes:
- Remove the exit(), pg_log_fatal() and ereport() calls from
src/common/hex.c, replace the error code paths with -1, and return a
signed result.
- The ECPG copies make no use of ecpg_raise(), so things map easily.
- This means keeping small wrappers in encode.c able to generate those
ereport(FATAL) in the backend, but that's just necessary for the
decode routine that's the only thing using get_hex().
- Let's prefix the routines in src/common/ with "pg_", to be
consistent with base64.
- It would be good to document the top each routine in hex.c (see
base64.c for a similar reference).
--
Michael

#4Bruce Momjian
bruce@momjian.us
In reply to: Michael Paquier (#3)
1 attachment(s)
Re: Moving other hex functions to /common

On Thu, Dec 31, 2020 at 03:10:29PM +0900, Michael Paquier wrote:

On Wed, Dec 30, 2020 at 08:22:07PM -0500, Bruce Momjian wrote:

So, I am learning this cfbot thing. Seems I need -M100% to disable
rename detection for diffs to work with cfbot --- makes sense.

A good way to make sure that a patch format is correct for the CF bot
would be to use "git format-patch -1" to generate a patch from a
single commit.

Thanks. I had to learn how to squash my commits into a new branch and
then generate a format-patch on that:

https://bugsdb.com/_en/debug/8b648ec395b86be32efa9629cb006d74

I wanted to see how the cfbot liked my original patch with the renames,
and it didn't, so now I know I have to use this method for the
commitfest. Patch attached.

New patch attached.

I think that this patch would have more value if we remove completely
the hex routines from ECPG and have ECPG use what's moved in
src/common/, meaning the following changes:
- Remove the exit(), pg_log_fatal() and ereport() calls from
src/common/hex.c, replace the error code paths with -1, and return a
signed result.
- The ECPG copies make no use of ecpg_raise(), so things map easily.
- This means keeping small wrappers in encode.c able to generate those
ereport(FATAL) in the backend, but that's just necessary for the
decode routine that's the only thing using get_hex().
- Let's prefix the routines in src/common/ with "pg_", to be
consistent with base64.
- It would be good to document the top each routine in hex.c (see
base64.c for a similar reference).

Let me get my patch building on the cfbot and then I will address each
of these. I am trying to do one stage at a time since I am still
learning the process. Thanks.

--
Bruce Momjian <bruce@momjian.us> https://momjian.us
EnterpriseDB https://enterprisedb.com

The usefulness of a cup is in its emptiness, Bruce Lee

Attachments:

hex.difftext/x-diff; charset=us-asciiDownload
From 464746996a22149edc607241d27837bf876cceda Mon Sep 17 00:00:00 2001
From: Bruce Momjian <bruce@momjian.us>
Date: Fri, 1 Jan 2021 15:04:42 -0500
Subject: [PATCH] hex squash commit

---
 src/backend/replication/backup_manifest.c |  2 +-
 src/backend/utils/adt/encode.c            | 34 +------------------
 src/backend/utils/adt/varlena.c           |  2 +-
 src/common/Makefile                       |  2 +-
 src/common/{hex_decode.c => hex.c}        | 40 ++++++++++++++++++++---
 src/include/common/hex.h (new)            | 18 ++++++++++
 src/include/common/hex_decode.h (gone)    | 16 ---------
 src/include/utils/builtins.h              |  3 --
 src/tools/msvc/Mkvcbuild.pm               |  2 +-
 9 files changed, 59 insertions(+), 60 deletions(-)

diff --git a/src/backend/replication/backup_manifest.c b/src/backend/replication/backup_manifest.c
index c3f339c556..716f114d78 100644
--- a/src/backend/replication/backup_manifest.c
+++ b/src/backend/replication/backup_manifest.c
@@ -17,7 +17,7 @@
 #include "libpq/pqformat.h"
 #include "mb/pg_wchar.h"
 #include "replication/backup_manifest.h"
-#include "utils/builtins.h"
+#include "common/hex.h"
 #include "utils/json.h"
 
 static void AppendStringToManifest(backup_manifest_info *manifest, char *s);
diff --git a/src/backend/utils/adt/encode.c b/src/backend/utils/adt/encode.c
index a6c65b1657..bca941a496 100644
--- a/src/backend/utils/adt/encode.c
+++ b/src/backend/utils/adt/encode.c
@@ -15,7 +15,7 @@
 
 #include <ctype.h>
 
-#include "common/hex_decode.h"
+#include "common/hex.h"
 #include "mb/pg_wchar.h"
 #include "utils/builtins.h"
 #include "utils/memutils.h"
@@ -141,38 +141,6 @@ binary_decode(PG_FUNCTION_ARGS)
 }
 
 
-/*
- * HEX
- */
-
-static const char hextbl[] = "0123456789abcdef";
-
-uint64
-hex_encode(const char *src, size_t len, char *dst)
-{
-	const char *end = src + len;
-
-	while (src < end)
-	{
-		*dst++ = hextbl[(*src >> 4) & 0xF];
-		*dst++ = hextbl[*src & 0xF];
-		src++;
-	}
-	return (uint64) len * 2;
-}
-
-static uint64
-hex_enc_len(const char *src, size_t srclen)
-{
-	return (uint64) srclen << 1;
-}
-
-static uint64
-hex_dec_len(const char *src, size_t srclen)
-{
-	return (uint64) srclen >> 1;
-}
-
 /*
  * BASE64
  */
diff --git a/src/backend/utils/adt/varlena.c b/src/backend/utils/adt/varlena.c
index 9300d19e0c..79fcdcd178 100644
--- a/src/backend/utils/adt/varlena.c
+++ b/src/backend/utils/adt/varlena.c
@@ -22,7 +22,7 @@
 #include "catalog/pg_type.h"
 #include "common/hashfn.h"
 #include "common/int.h"
-#include "common/hex_decode.h"
+#include "common/hex.h"
 #include "common/unicode_norm.h"
 #include "lib/hyperloglog.h"
 #include "libpq/pqformat.h"
diff --git a/src/common/Makefile b/src/common/Makefile
index f624977939..93eb27a2aa 100644
--- a/src/common/Makefile
+++ b/src/common/Makefile
@@ -58,7 +58,7 @@ OBJS_COMMON = \
 	file_perm.o \
 	file_utils.o \
 	hashfn.o \
-	hex_decode.o \
+	hex.o \
 	ip.o \
 	jsonapi.o \
 	keywords.o \
diff --git a/src/common/hex_decode.c b/src/common/hex.c
similarity index 79%
rename from src/common/hex_decode.c
rename to src/common/hex.c
index 3ecdc73b5c..97f57bcc32 100644
--- a/src/common/hex_decode.c
+++ b/src/common/hex.c
@@ -1,7 +1,7 @@
 /*-------------------------------------------------------------------------
  *
- * hex_decode.c
- *		hex decoding
+ * hex.c
+ *		hex processing
  *
  *
  * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
@@ -9,7 +9,7 @@
  *
  *
  * IDENTIFICATION
- *	  src/common/hex_decode.c
+ *	  src/common/hex.c
  *
  *-------------------------------------------------------------------------
  */
@@ -26,7 +26,7 @@
 #else
 #include "mb/pg_wchar.h"
 #endif
-#include "common/hex_decode.h"
+#include "common/hex.h"
 
 
 static const int8 hexlookup[128] = {
@@ -40,6 +40,26 @@ static const int8 hexlookup[128] = {
 	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
 };
 
+/*
+ * HEX
+ */
+
+static const char hextbl[] = "0123456789abcdef";
+
+uint64
+hex_encode(const char *src, size_t len, char *dst)
+{
+	const char *end = src + len;
+
+	while (src < end)
+	{
+		*dst++ = hextbl[(*src >> 4) & 0xF];
+		*dst++ = hextbl[*src & 0xF];
+		src++;
+	}
+	return (uint64) len * 2;
+}
+
 static inline char
 get_hex(const char *cp)
 {
@@ -104,3 +124,15 @@ hex_decode(const char *src, size_t len, char *dst)
 
 	return p - dst;
 }
+
+uint64
+hex_enc_len(const char *src, size_t srclen)
+{
+	return (uint64) srclen << 1;
+}
+
+uint64
+hex_dec_len(const char *src, size_t srclen)
+{
+	return (uint64) srclen >> 1;
+}
diff --git a/src/include/common/hex.h b/src/include/common/hex.h
new file mode 100644
index 0000000000..76154b65af
--- /dev/null
+++ b/src/include/common/hex.h
@@ -0,0 +1,18 @@
+/*
+ *	hex.h
+ *		hex processing
+ *
+ *	Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
+ *	Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *	src/include/common/hex.h
+ */
+#ifndef COMMON_HEX_H
+#define COMMON_HEX_H
+
+extern uint64 hex_decode(const char *src, size_t len, char *dst);
+extern uint64 hex_encode(const char *src, size_t len, char *dst);
+extern uint64 hex_enc_len(const char *src, size_t srclen);
+extern uint64 hex_dec_len(const char *src, size_t srclen);
+
+#endif							/* COMMON_HEX_H */
diff --git a/src/include/common/hex_decode.h b/src/include/common/hex_decode.h
deleted file mode 100644
index 1f99f069b2..0000000000
--- a/src/include/common/hex_decode.h
+++ /dev/null
@@ -1,16 +0,0 @@
-/*
- *	hex_decode.h
- *		hex decoding
- *
- *	Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
- *	Portions Copyright (c) 1994, Regents of the University of California
- *
- *	src/include/common/hex_decode.h
- */
-#ifndef COMMON_HEX_DECODE_H
-#define COMMON_HEX_DECODE_H
-
-extern uint64 hex_decode(const char *src, size_t len, char *dst);
-
-
-#endif							/* COMMON_HEX_DECODE_H */
diff --git a/src/include/utils/builtins.h b/src/include/utils/builtins.h
index 19271e0696..11ba6ae565 100644
--- a/src/include/utils/builtins.h
+++ b/src/include/utils/builtins.h
@@ -31,9 +31,6 @@ extern void domain_check(Datum value, bool isnull, Oid domainType,
 extern int	errdatatype(Oid datatypeOid);
 extern int	errdomainconstraint(Oid datatypeOid, const char *conname);
 
-/* encode.c */
-extern uint64 hex_encode(const char *src, size_t len, char *dst);
-
 /* int.c */
 extern int2vector *buildint2vector(const int16 *int2s, int n);
 
diff --git a/src/tools/msvc/Mkvcbuild.pm b/src/tools/msvc/Mkvcbuild.pm
index 7f014a12c9..60b216cce0 100644
--- a/src/tools/msvc/Mkvcbuild.pm
+++ b/src/tools/msvc/Mkvcbuild.pm
@@ -121,7 +121,7 @@ sub mkvcbuild
 	our @pgcommonallfiles = qw(
 	  archive.c base64.c checksum_helper.c
 	  config_info.c controldata_utils.c d2s.c encnames.c exec.c
-	  f2s.c file_perm.c file_utils.c hashfn.c hex_decode.c ip.c jsonapi.c
+	  f2s.c file_perm.c file_utils.c hashfn.c hex.c ip.c jsonapi.c
 	  keywords.c kwlookup.c link-canary.c md5_common.c
 	  pg_get_line.c pg_lzcompress.c pgfnames.c psprintf.c relpath.c rmtree.c
 	  saslprep.c scram-common.c string.c stringinfo.c unicode_norm.c username.c
-- 
2.20.1

#5Michael Paquier
michael@paquier.xyz
In reply to: Bruce Momjian (#4)
1 attachment(s)
Re: Moving other hex functions to /common

On Fri, Jan 01, 2021 at 03:06:13PM -0500, Bruce Momjian wrote:

Thanks. I had to learn how to squash my commits into a new branch and
then generate a format-patch on that:

https://bugsdb.com/_en/debug/8b648ec395b86be32efa9629cb006d74

I wanted to see how the cfbot liked my original patch with the renames,
and it didn't, so now I know I have to use this method for the
commitfest. Patch attached.

There are many ways to do that, indeed. On my end, I use a local
branch. and then apply a set of git reset --soft before recreating a
single commit.

Let me get my patch building on the cfbot and then I will address each
of these. I am trying to do one stage at a time since I am still
learning the process. Thanks.

No problem.  On my end, this stuff has been itching me for a couple of
days and I could not recall why..  Until I remembered that the design
of the hex APIs in your patch is weak with overflow handling because
we don't pass down to the function the size of the destination buffer.
We have finished with a similar set of issues on the past with SCRAM
and base64, with has led to CVE-2019-10164 and the improvements done
in cfc40d3.  So I think that we need to improve things in a safer way.
Mapping with the design for base64, I have finished with the attached
patch, and the following set:
+extern int64 pg_hex_decode(const char *src, int64 len, char *dst, int64 dstlen);
+extern int64 pg_hex_encode(const char *src, int64 len, char *dst, int64 dstlen);
+extern int64 pg_hex_enc_len(int64 srclen);
+extern int64 pg_hex_dec_len(int64 srclen);

This ensures that the result never overflows, which explains the
introduction of an error code for the encoding part, and does not
use elog()/pg_log() so as external libraries can use them. ECPG uses
long variables in a couple of places, explaining why it feels safer to
use int64. int should give enough room to any caller of those APIs,
but there is no drawback in using a 64-bit API either, and I don't
think it is worth the risk to break ECPG either for long handling,
even if I don't believe either that folks are going to work on strings
larger than 2GB.

Things get trickier for the bytea input/output because we want more
generic error messages depending for invalid values or an incorrect
number of digits, which is why I have left the copies in encode.c.
This design could be easily extended with more types of error codes,
though I am not sure if that's worth bothering.

Even with that, this leads to much more sanity for hex buffer
manipulation in backup manifests (I don't think that using
PG_SHAXXX_DIGEST_STRING_LENGTH is a good idea either, I'd like to get
rid of it in the long-term) and ECPG, so that's clearly a gain.

I don't have a Windows host at hand, though I think that it should
work there correctly. What do you think about the ideas in the
attached patch?
--
Michael

Attachments:

0001-Refactor-the-hex-code.patchtext/x-diff; charset=us-asciiDownload
From 8952fb2d5e5a7a01799fe380408a17441a185436 Mon Sep 17 00:00:00 2001
From: Michael Paquier <michael@paquier.xyz>
Date: Sat, 2 Jan 2021 14:00:22 +0900
Subject: [PATCH] Refactor the hex code

---
 src/include/common/hex.h                     |  23 +++
 src/include/common/hex_decode.h              |  16 --
 src/include/utils/builtins.h                 |   1 +
 src/backend/replication/backup_manifest.c    |  39 +++--
 src/backend/utils/adt/encode.c               |  60 ++++++-
 src/backend/utils/adt/varlena.c              |  12 +-
 src/common/Makefile                          |   2 +-
 src/common/hex.c                             | 166 +++++++++++++++++++
 src/common/hex_decode.c                      | 106 ------------
 src/interfaces/ecpg/ecpglib/data.c           |  92 +---------
 src/interfaces/ecpg/ecpglib/ecpglib_extern.h |   3 -
 src/interfaces/ecpg/ecpglib/execute.c        |  28 +++-
 src/interfaces/ecpg/include/ecpgerrno.h      |   1 +
 src/tools/msvc/Mkvcbuild.pm                  |   2 +-
 14 files changed, 321 insertions(+), 230 deletions(-)
 create mode 100644 src/include/common/hex.h
 create mode 100644 src/common/hex.c

diff --git a/src/include/common/hex.h b/src/include/common/hex.h
new file mode 100644
index 0000000000..cf36895ee4
--- /dev/null
+++ b/src/include/common/hex.h
@@ -0,0 +1,23 @@
+/*------------------------------------------------------------------------
+ *
+ *	hex.h
+ *	  Encoding and decoding routines for hex strings.
+ *
+ *	Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
+ *	Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * IDENTIFICATION
+ *		  src/include/common/hex.h
+ *
+ *------------------------------------------------------------------------
+ */
+
+#ifndef COMMON_HEX_H
+#define COMMON_HEX_H
+
+extern int64 pg_hex_decode(const char *src, int64 len, char *dst, int64 dstlen);
+extern int64 pg_hex_encode(const char *src, int64 len, char *dst, int64 dstlen);
+extern int64 pg_hex_enc_len(int64 srclen);
+extern int64 pg_hex_dec_len(int64 srclen);
+
+#endif							/* COMMON_HEX_H */
diff --git a/src/include/common/hex_decode.h b/src/include/common/hex_decode.h
index 1f99f069b2..e69de29bb2 100644
--- a/src/include/common/hex_decode.h
+++ b/src/include/common/hex_decode.h
@@ -1,16 +0,0 @@
-/*
- *	hex_decode.h
- *		hex decoding
- *
- *	Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
- *	Portions Copyright (c) 1994, Regents of the University of California
- *
- *	src/include/common/hex_decode.h
- */
-#ifndef COMMON_HEX_DECODE_H
-#define COMMON_HEX_DECODE_H
-
-extern uint64 hex_decode(const char *src, size_t len, char *dst);
-
-
-#endif							/* COMMON_HEX_DECODE_H */
diff --git a/src/include/utils/builtins.h b/src/include/utils/builtins.h
index 19271e0696..c3a0601ef5 100644
--- a/src/include/utils/builtins.h
+++ b/src/include/utils/builtins.h
@@ -32,6 +32,7 @@ extern int	errdatatype(Oid datatypeOid);
 extern int	errdomainconstraint(Oid datatypeOid, const char *conname);
 
 /* encode.c */
+extern uint64 hex_decode(const char *src, size_t len, char *dst);
 extern uint64 hex_encode(const char *src, size_t len, char *dst);
 
 /* int.c */
diff --git a/src/backend/replication/backup_manifest.c b/src/backend/replication/backup_manifest.c
index c3f339c556..6c1250ab6c 100644
--- a/src/backend/replication/backup_manifest.c
+++ b/src/backend/replication/backup_manifest.c
@@ -13,11 +13,11 @@
 #include "postgres.h"
 
 #include "access/timeline.h"
+#include "common/hex.h"
 #include "libpq/libpq.h"
 #include "libpq/pqformat.h"
 #include "mb/pg_wchar.h"
 #include "replication/backup_manifest.h"
-#include "utils/builtins.h"
 #include "utils/json.h"
 
 static void AppendStringToManifest(backup_manifest_info *manifest, char *s);
@@ -150,10 +150,17 @@ AddFileToBackupManifest(backup_manifest_info *manifest, const char *spcoid,
 	}
 	else
 	{
+		int64	dstlen = pg_hex_enc_len(pathlen);
+		int64	res;
+
 		appendStringInfoString(&buf, "{ \"Encoded-Path\": \"");
-		enlargeStringInfo(&buf, 2 * pathlen);
-		buf.len += hex_encode(pathname, pathlen,
-							  &buf.data[buf.len]);
+		enlargeStringInfo(&buf, dstlen);
+		res = pg_hex_encode(pathname, pathlen,
+							&buf.data[buf.len], dstlen);
+		if (res < 0)
+			elog(ERROR, "could not encode path \"%s\" in backup manifest",
+				 pathname);
+		buf.len += res;
 		appendStringInfoString(&buf, "\", ");
 	}
 
@@ -176,6 +183,8 @@ AddFileToBackupManifest(backup_manifest_info *manifest, const char *spcoid,
 	{
 		uint8		checksumbuf[PG_CHECKSUM_MAX_LENGTH];
 		int			checksumlen;
+		int64		dstlen;
+		int64		res;
 
 		checksumlen = pg_checksum_final(checksum_ctx, checksumbuf);
 		if (checksumlen < 0)
@@ -185,9 +194,14 @@ AddFileToBackupManifest(backup_manifest_info *manifest, const char *spcoid,
 		appendStringInfo(&buf,
 						 ", \"Checksum-Algorithm\": \"%s\", \"Checksum\": \"",
 						 pg_checksum_type_name(checksum_ctx->type));
-		enlargeStringInfo(&buf, 2 * checksumlen);
-		buf.len += hex_encode((char *) checksumbuf, checksumlen,
-							  &buf.data[buf.len]);
+		dstlen = pg_hex_enc_len(checksumlen);
+		enlargeStringInfo(&buf, dstlen);
+		res = pg_hex_encode((char *) checksumbuf, checksumlen,
+							&buf.data[buf.len], dstlen);
+		if (res < 0)
+			elog(ERROR, "could not encode checksum algorithm of file \"%s\"",
+				 pathname);
+		buf.len += res;
 		appendStringInfoChar(&buf, '"');
 	}
 
@@ -307,8 +321,9 @@ SendBackupManifest(backup_manifest_info *manifest)
 {
 	StringInfoData protobuf;
 	uint8		checksumbuf[PG_SHA256_DIGEST_LENGTH];
-	char		checksumstringbuf[PG_SHA256_DIGEST_STRING_LENGTH];
+	char	   *checksumstringbuf;
 	size_t		manifest_bytes_done = 0;
+	int64		dstlen;
 
 	if (!IsManifestEnabled(manifest))
 		return;
@@ -328,8 +343,12 @@ SendBackupManifest(backup_manifest_info *manifest)
 	if (pg_cryptohash_final(manifest->manifest_ctx, checksumbuf) < 0)
 		elog(ERROR, "failed to finalize checksum of backup manifest");
 	AppendStringToManifest(manifest, "\"Manifest-Checksum\": \"");
-	hex_encode((char *) checksumbuf, sizeof checksumbuf, checksumstringbuf);
-	checksumstringbuf[PG_SHA256_DIGEST_STRING_LENGTH - 1] = '\0';
+	dstlen = pg_hex_enc_len(PG_SHA256_DIGEST_LENGTH);
+	checksumstringbuf = palloc0(dstlen + 1);	/* includes \0 */
+	if (pg_hex_encode((char *) checksumbuf, sizeof checksumbuf,
+					  checksumstringbuf, dstlen) < 0)
+		elog(ERROR, "could not encode SHA2 result in backup manifest");
+	checksumstringbuf[dstlen] = '\0';
 	AppendStringToManifest(manifest, checksumstringbuf);
 	AppendStringToManifest(manifest, "\"}\n");
 
diff --git a/src/backend/utils/adt/encode.c b/src/backend/utils/adt/encode.c
index a6c65b1657..4e8f673335 100644
--- a/src/backend/utils/adt/encode.c
+++ b/src/backend/utils/adt/encode.c
@@ -15,7 +15,6 @@
 
 #include <ctype.h>
 
-#include "common/hex_decode.h"
 #include "mb/pg_wchar.h"
 #include "utils/builtins.h"
 #include "utils/memutils.h"
@@ -147,6 +146,17 @@ binary_decode(PG_FUNCTION_ARGS)
 
 static const char hextbl[] = "0123456789abcdef";
 
+static const int8 hexlookup[128] = {
+	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+	0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1,
+	-1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+	-1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+};
+
 uint64
 hex_encode(const char *src, size_t len, char *dst)
 {
@@ -161,6 +171,54 @@ hex_encode(const char *src, size_t len, char *dst)
 	return (uint64) len * 2;
 }
 
+static inline char
+get_hex(char c)
+{
+	int			res = -1;
+
+	if (c > 0 && c < 127)
+		res = hexlookup[(unsigned char) c];
+
+	if (res < 0)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+				 errmsg("invalid hexadecimal digit: \"%c\"", c)));
+
+	return (char) res;
+}
+
+uint64
+hex_decode(const char *src, size_t len, char *dst)
+{
+	const char *s,
+			   *srcend;
+	char		v1,
+				v2,
+			   *p;
+
+	srcend = src + len;
+	s = src;
+	p = dst;
+	while (s < srcend)
+	{
+		if (*s == ' ' || *s == '\n' || *s == '\t' || *s == '\r')
+		{
+			s++;
+			continue;
+		}
+		v1 = get_hex(*s++) << 4;
+		if (s >= srcend)
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+					 errmsg("invalid hexadecimal data: odd number of digits")));
+
+		v2 = get_hex(*s++);
+		*p++ = v1 | v2;
+	}
+
+	return p - dst;
+}
+
 static uint64
 hex_enc_len(const char *src, size_t srclen)
 {
diff --git a/src/backend/utils/adt/varlena.c b/src/backend/utils/adt/varlena.c
index 9300d19e0c..35f0e78238 100644
--- a/src/backend/utils/adt/varlena.c
+++ b/src/backend/utils/adt/varlena.c
@@ -21,8 +21,8 @@
 #include "catalog/pg_collation.h"
 #include "catalog/pg_type.h"
 #include "common/hashfn.h"
+#include "common/hex.h"
 #include "common/int.h"
-#include "common/hex_decode.h"
 #include "common/unicode_norm.h"
 #include "lib/hyperloglog.h"
 #include "libpq/pqformat.h"
@@ -307,6 +307,11 @@ byteain(PG_FUNCTION_ARGS)
 
 		bc = (len - 2) / 2 + VARHDRSZ;	/* maximum possible length */
 		result = palloc(bc);
+
+		/*
+		 * Note that this does not use the hex implementation in src/common/
+		 * to get a proper error handling.
+		 */
 		bc = hex_decode(inputText + 2, len - 2, VARDATA(result));
 		SET_VARSIZE(result, bc + VARHDRSZ); /* actual length */
 
@@ -400,6 +405,11 @@ byteaout(PG_FUNCTION_ARGS)
 		rp = result = palloc(VARSIZE_ANY_EXHDR(vlena) * 2 + 2 + 1);
 		*rp++ = '\\';
 		*rp++ = 'x';
+
+		/*
+		 * Note that this does not use the hex implementation in src/common/
+		 * to get a proper error handling.
+		 */
 		rp += hex_encode(VARDATA_ANY(vlena), VARSIZE_ANY_EXHDR(vlena), rp);
 	}
 	else if (bytea_output == BYTEA_OUTPUT_ESCAPE)
diff --git a/src/common/Makefile b/src/common/Makefile
index f624977939..93eb27a2aa 100644
--- a/src/common/Makefile
+++ b/src/common/Makefile
@@ -58,7 +58,7 @@ OBJS_COMMON = \
 	file_perm.o \
 	file_utils.o \
 	hashfn.o \
-	hex_decode.o \
+	hex.o \
 	ip.o \
 	jsonapi.o \
 	keywords.o \
diff --git a/src/common/hex.c b/src/common/hex.c
new file mode 100644
index 0000000000..1e16c3291c
--- /dev/null
+++ b/src/common/hex.c
@@ -0,0 +1,166 @@
+/*-------------------------------------------------------------------------
+ *
+ * hex.c
+ *	  Encoding and decoding routines for hex.
+ *
+ * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * IDENTIFICATION
+ *	  src/common/hex.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+
+#ifndef FRONTEND
+#include "postgres.h"
+#else
+#include "postgres_fe.h"
+#endif
+
+#include "common/hex.h"
+#include "mb/pg_wchar.h"
+
+
+static const int8 hexlookup[128] = {
+	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+	0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1,
+	-1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+	-1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+};
+
+static const char hextbl[] = "0123456789abcdef";
+
+static inline char
+get_hex(const char *cp)
+{
+	unsigned char c = (unsigned char) *cp;
+	int			res = -1;
+
+	if (c < 127)
+		res = hexlookup[c];
+
+	if (res < 0)
+		return -1;
+
+	return (char) res;
+}
+
+/*
+ * pg_hex_encode
+ *
+ * Encode into hex the given string.  Returns the length of the encoded
+ * string, and -1 in the event of an error with the result buffer zeroed
+ * for safety.
+ */
+int64
+pg_hex_encode(const char *src, int64 len, char *dst, int64 dstlen)
+{
+	const char *end = src + len;
+	char	   *p;
+
+	p = dst;
+
+	while (src < end)
+	{
+		/*
+		 * Leave if there is an overflow in the area allocated for the
+		 * encoded string.
+		 */
+		if ((p - dst + 2) > dstlen)
+			goto error;
+
+		*p++ = hextbl[(*src >> 4) & 0xF];
+		*p++ = hextbl[*src & 0xF];
+		src++;
+	}
+	return len * 2;
+
+error:
+	memset(dst, 0, dstlen);
+	return -1;
+}
+
+/*
+ * pg_hex_decode
+ *
+ * Decode the given hex string.  Returns the length of the decoded
+ * string on success, and -1 in the event of an error with the result
+ * buffer zeroed for safety.
+ */
+int64
+pg_hex_decode(const char *src, int64 len, char *dst, int64 dstlen)
+{
+	const char *s,
+			   *srcend;
+	char		v1,
+				v2,
+			   *p;
+
+	srcend = src + len;
+	s = src;
+	p = dst;
+	while (s < srcend)
+	{
+		if (*s == ' ' || *s == '\n' || *s == '\t' || *s == '\r')
+		{
+			s++;
+			continue;
+		}
+		v1 = get_hex(s) << 4;
+		s++;
+
+		if (s >= srcend)
+			goto error;
+
+		v2 = get_hex(s);
+		s++;
+
+		/* overflow check */
+		if ((p - dst + 1) > dstlen)
+			goto error;
+
+		*p++ = v1 | v2;
+	}
+
+
+	Assert((p - dst) <= dstlen);
+	return p - dst;
+
+error:
+	memset(dst, 0, dstlen);
+	return -1;
+}
+
+/*
+ * pg_hex_enc_len
+ *
+ * Returns to caller the length of the string if it were encoded with
+ * hex based on the length provided by caller.  This is useful to* estimate
+ * how large a buffer allocation needs to be done before doing the actual
+ * encoding.
+ */
+int64
+pg_hex_enc_len(int64 srclen)
+{
+	return (srclen << 1);
+}
+
+/*
+ * pg_hex_dec_len
+ *
+ * Returns to caller the length of the string if it were to be decoded
+ * with hex, based on the length given by caller.  This is useful to
+ * estimate how large a buffer allocation needs to be done before doing
+ * the actual decoding.
+ */
+int64
+pg_hex_dec_len(int64 srclen)
+{
+	return (srclen >> 1);
+}
diff --git a/src/common/hex_decode.c b/src/common/hex_decode.c
index 3ecdc73b5c..e69de29bb2 100644
--- a/src/common/hex_decode.c
+++ b/src/common/hex_decode.c
@@ -1,106 +0,0 @@
-/*-------------------------------------------------------------------------
- *
- * hex_decode.c
- *		hex decoding
- *
- *
- * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
- * Portions Copyright (c) 1994, Regents of the University of California
- *
- *
- * IDENTIFICATION
- *	  src/common/hex_decode.c
- *
- *-------------------------------------------------------------------------
- */
-
-
-#ifndef FRONTEND
-#include "postgres.h"
-#else
-#include "postgres_fe.h"
-#endif
-
-#ifdef FRONTEND
-#include "common/logging.h"
-#else
-#include "mb/pg_wchar.h"
-#endif
-#include "common/hex_decode.h"
-
-
-static const int8 hexlookup[128] = {
-	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-	0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1,
-	-1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-	-1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-};
-
-static inline char
-get_hex(const char *cp)
-{
-	unsigned char c = (unsigned char) *cp;
-	int			res = -1;
-
-	if (c < 127)
-		res = hexlookup[c];
-
-	if (res < 0)
-	{
-#ifdef FRONTEND
-		pg_log_fatal("invalid hexadecimal digit");
-		exit(EXIT_FAILURE);
-#else
-		ereport(ERROR,
-				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
-				 errmsg("invalid hexadecimal digit: \"%.*s\"",
-						pg_mblen(cp), cp)));
-#endif
-	}
-
-	return (char) res;
-}
-
-uint64
-hex_decode(const char *src, size_t len, char *dst)
-{
-	const char *s,
-			   *srcend;
-	char		v1,
-				v2,
-			   *p;
-
-	srcend = src + len;
-	s = src;
-	p = dst;
-	while (s < srcend)
-	{
-		if (*s == ' ' || *s == '\n' || *s == '\t' || *s == '\r')
-		{
-			s++;
-			continue;
-		}
-		v1 = get_hex(s) << 4;
-		s++;
-		if (s >= srcend)
-		{
-#ifdef FRONTEND
-			pg_log_fatal("invalid hexadecimal data: odd number of digits");
-			exit(EXIT_FAILURE);
-#else
-			ereport(ERROR,
-					(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
-					 errmsg("invalid hexadecimal data: odd number of digits")));
-#endif
-		}
-		v2 = get_hex(s);
-		s++;
-		*p++ = v1 | v2;
-	}
-
-	return p - dst;
-}
diff --git a/src/interfaces/ecpg/ecpglib/data.c b/src/interfaces/ecpg/ecpglib/data.c
index 6bc91ef7eb..7b79a6a5de 100644
--- a/src/interfaces/ecpg/ecpglib/data.c
+++ b/src/interfaces/ecpg/ecpglib/data.c
@@ -5,6 +5,7 @@
 
 #include <math.h>
 
+#include "common/hex.h"
 #include "ecpgerrno.h"
 #include "ecpglib.h"
 #include "ecpglib_extern.h"
@@ -122,86 +123,6 @@ check_special_value(char *ptr, double *retval, char **endptr)
 	return false;
 }
 
-/* imported from src/backend/utils/adt/encode.c */
-
-unsigned
-ecpg_hex_enc_len(unsigned srclen)
-{
-	return srclen << 1;
-}
-
-unsigned
-ecpg_hex_dec_len(unsigned srclen)
-{
-	return srclen >> 1;
-}
-
-static inline char
-get_hex(char c)
-{
-	static const int8 hexlookup[128] = {
-		-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-		-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-		-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-		0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1,
-		-1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-		-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-		-1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-		-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-	};
-	int			res = -1;
-
-	if (c > 0 && c < 127)
-		res = hexlookup[(unsigned char) c];
-
-	return (char) res;
-}
-
-static unsigned
-hex_decode(const char *src, unsigned len, char *dst)
-{
-	const char *s,
-			   *srcend;
-	char		v1,
-				v2,
-			   *p;
-
-	srcend = src + len;
-	s = src;
-	p = dst;
-	while (s < srcend)
-	{
-		if (*s == ' ' || *s == '\n' || *s == '\t' || *s == '\r')
-		{
-			s++;
-			continue;
-		}
-		v1 = get_hex(*s++) << 4;
-		if (s >= srcend)
-			return -1;
-
-		v2 = get_hex(*s++);
-		*p++ = v1 | v2;
-	}
-
-	return p - dst;
-}
-
-unsigned
-ecpg_hex_encode(const char *src, unsigned len, char *dst)
-{
-	static const char hextbl[] = "0123456789abcdef";
-	const char *end = src + len;
-
-	while (src < end)
-	{
-		*dst++ = hextbl[(*src >> 4) & 0xF];
-		*dst++ = hextbl[*src & 0xF];
-		src++;
-	}
-	return len * 2;
-}
-
 bool
 ecpg_get_data(const PGresult *results, int act_tuple, int act_field, int lineno,
 			  enum ECPGttype type, enum ECPGttype ind_type,
@@ -529,14 +450,19 @@ ecpg_get_data(const PGresult *results, int act_tuple, int act_field, int lineno,
 									src_size,
 									dec_size;
 
-						dst_size = ecpg_hex_enc_len(varcharsize);
+						dst_size = pg_hex_enc_len(varcharsize);
 						src_size = size - 2;	/* exclude backslash + 'x' */
 						dec_size = src_size < dst_size ? src_size : dst_size;
-						variable->len = hex_decode(pval + 2, dec_size, variable->arr);
+						variable->len = pg_hex_decode(pval + 2, dec_size, variable->arr, dst_size);
+
+						if (variable->len < 0)
+							ecpg_raise(lineno, ECPG_OVERFLOW,
+									   ECPG_SQLSTATE_ECPG_INTERNAL_ERROR,
+									   NULL);
 
 						if (dst_size < src_size)
 						{
-							long		rcv_size = ecpg_hex_dec_len(size - 2);
+							long		rcv_size = pg_hex_dec_len(size - 2);
 
 							/* truncation */
 							switch (ind_type)
diff --git a/src/interfaces/ecpg/ecpglib/ecpglib_extern.h b/src/interfaces/ecpg/ecpglib/ecpglib_extern.h
index 1a98dea1b5..cdde4903ec 100644
--- a/src/interfaces/ecpg/ecpglib/ecpglib_extern.h
+++ b/src/interfaces/ecpg/ecpglib/ecpglib_extern.h
@@ -215,9 +215,6 @@ struct sqlda_compat *ecpg_build_compat_sqlda(int, PGresult *, int, enum COMPAT_M
 void		ecpg_set_compat_sqlda(int, struct sqlda_compat **, const PGresult *, int, enum COMPAT_MODE);
 struct sqlda_struct *ecpg_build_native_sqlda(int, PGresult *, int, enum COMPAT_MODE);
 void		ecpg_set_native_sqlda(int, struct sqlda_struct **, const PGresult *, int, enum COMPAT_MODE);
-unsigned	ecpg_hex_dec_len(unsigned srclen);
-unsigned	ecpg_hex_enc_len(unsigned srclen);
-unsigned	ecpg_hex_encode(const char *src, unsigned len, char *dst);
 
 #ifdef ENABLE_NLS
 extern char *ecpg_gettext(const char *msgid) pg_attribute_format_arg(1);
diff --git a/src/interfaces/ecpg/ecpglib/execute.c b/src/interfaces/ecpg/ecpglib/execute.c
index 930b6adbe4..5b16f5ed2c 100644
--- a/src/interfaces/ecpg/ecpglib/execute.c
+++ b/src/interfaces/ecpg/ecpglib/execute.c
@@ -19,6 +19,7 @@
 #include <math.h>
 
 #include "catalog/pg_type_d.h"
+#include "common/hex.h"
 #include "ecpgerrno.h"
 #include "ecpglib.h"
 #include "ecpglib_extern.h"
@@ -491,16 +492,18 @@ static char *
 convert_bytea_to_string(char *from_data, int from_len, int lineno)
 {
 	char	   *to_data;
-	int			to_len = ecpg_hex_enc_len(from_len) + 4 + 1;	/* backslash + 'x' +
-																 * quote + quote */
+	int			dstlen = pg_hex_enc_len(from_len);
+	int			to_len = dstlen + 4 + 1;	/* backslash + 'x' +
+											 * quote + quote */
 
 	to_data = ecpg_alloc(to_len, lineno);
 	if (!to_data)
 		return NULL;
 
 	strcpy(to_data, "'\\x");
-	ecpg_hex_encode(from_data, from_len, to_data + 3);
-	strcpy(to_data + 3 + ecpg_hex_enc_len(from_len), "\'");
+	if (pg_hex_encode(from_data, from_len, to_data + 3, dstlen) < 0)
+		return NULL;
+	strcpy(to_data + 3 + pg_hex_enc_len(from_len), "\'");
 
 	return to_data;
 }
@@ -1087,12 +1090,21 @@ print_param_value(char *value, int len, int is_binary, int lineno, int nth)
 		value_s = value;
 	else
 	{
-		value_s = ecpg_alloc(ecpg_hex_enc_len(len) + 1, lineno);
+		int		dstlen = pg_hex_enc_len(len);
+
+		value_s = ecpg_alloc(dstlen + 1, lineno);
 		if (value_s != NULL)
 		{
-			ecpg_hex_encode(value, len, value_s);
-			value_s[ecpg_hex_enc_len(len)] = '\0';
-			malloced = true;
+			if (pg_hex_encode(value, len, value_s, dstlen) >= 0)
+			{
+				value_s[dstlen] = '\0';
+				malloced = true;
+			}
+			else
+			{
+				ecpg_free(value_s);
+				value_s = "error when encoding parameter";
+			}
 		}
 		else
 			value_s = "no memory for logging of parameter";
diff --git a/src/interfaces/ecpg/include/ecpgerrno.h b/src/interfaces/ecpg/include/ecpgerrno.h
index c4bc526463..ee88740035 100644
--- a/src/interfaces/ecpg/include/ecpgerrno.h
+++ b/src/interfaces/ecpg/include/ecpgerrno.h
@@ -32,6 +32,7 @@
 #define ECPG_NO_ARRAY			-214
 #define ECPG_DATA_NOT_ARRAY		-215
 #define ECPG_ARRAY_INSERT		-216
+#define ECPG_OVERFLOW		-217
 
 #define ECPG_NO_CONN			-220
 #define ECPG_NOT_CONN			-221
diff --git a/src/tools/msvc/Mkvcbuild.pm b/src/tools/msvc/Mkvcbuild.pm
index 7f014a12c9..60b216cce0 100644
--- a/src/tools/msvc/Mkvcbuild.pm
+++ b/src/tools/msvc/Mkvcbuild.pm
@@ -121,7 +121,7 @@ sub mkvcbuild
 	our @pgcommonallfiles = qw(
 	  archive.c base64.c checksum_helper.c
 	  config_info.c controldata_utils.c d2s.c encnames.c exec.c
-	  f2s.c file_perm.c file_utils.c hashfn.c hex_decode.c ip.c jsonapi.c
+	  f2s.c file_perm.c file_utils.c hashfn.c hex.c ip.c jsonapi.c
 	  keywords.c kwlookup.c link-canary.c md5_common.c
 	  pg_get_line.c pg_lzcompress.c pgfnames.c psprintf.c relpath.c rmtree.c
 	  saslprep.c scram-common.c string.c stringinfo.c unicode_norm.c username.c
-- 
2.30.0

#6Bruce Momjian
bruce@momjian.us
In reply to: Michael Paquier (#5)
1 attachment(s)
Re: Moving other hex functions to /common

On Sat, Jan 2, 2021 at 02:25:33PM +0900, Michael Paquier wrote:

Let me get my patch building on the cfbot and then I will address each
of these. I am trying to do one stage at a time since I am still
learning the process. Thanks.

No problem.  On my end, this stuff has been itching me for a couple of
days and I could not recall why..  Until I remembered that the design
of the hex APIs in your patch is weak with overflow handling because
we don't pass down to the function the size of the destination buffer.
We have finished with a similar set of issues on the past with SCRAM
and base64, with has led to CVE-2019-10164 and the improvements done
in cfc40d3.  So I think that we need to improve things in a safer way.
Mapping with the design for base64, I have finished with the attached
patch, and the following set:
+extern int64 pg_hex_decode(const char *src, int64 len, char *dst, int64 dstlen);
+extern int64 pg_hex_encode(const char *src, int64 len, char *dst, int64 dstlen);
+extern int64 pg_hex_enc_len(int64 srclen);
+extern int64 pg_hex_dec_len(int64 srclen);

I can see the value of passing the destination length to the hex
functions, and I think you have to pass the src length to pg_hex_encode
since the input can be binary. I assume the pg_hex_decode doesn't need
the source length because it is a null-terminated C string, right?
However, pg_base64_decode(const char *src, size_t len, char *dst) does
pass in the src length, so I can see we should just make it the same. I
also agree it is unclear if 'len' is the src or dst len, so your patch
fixes that with the names. Also, is there a reason you use int64
instead of the size_t used by bytea?

This ensures that the result never overflows, which explains the
introduction of an error code for the encoding part, and does not
use elog()/pg_log() so as external libraries can use them. ECPG uses
long variables in a couple of places, explaining why it feels safer to
use int64. int should give enough room to any caller of those APIs,
but there is no drawback in using a 64-bit API either, and I don't
think it is worth the risk to break ECPG either for long handling,
even if I don't believe either that folks are going to work on strings
larger than 2GB.

Might as well just have hex match what bytea and esc does.

Things get trickier for the bytea input/output because we want more
generic error messages depending for invalid values or an incorrect
number of digits, which is why I have left the copies in encode.c.
This design could be easily extended with more types of error codes,
though I am not sure if that's worth bothering.

I don't think removing the two error type reporting from src/common, and
then having to add it back into adt/encode.c makes sense. There are
two, soon three, that want those two error reports, so I am thinking we
are best to just leave ecpg alone since it is just a library that
doesn't want more than one error reporting. I don't think we will have
many more library call cases.

Even with that, this leads to much more sanity for hex buffer
manipulation in backup manifests (I don't think that using
PG_SHAXXX_DIGEST_STRING_LENGTH is a good idea either, I'd like to get
rid of it in the long-term) and ECPG, so that's clearly a gain.

I don't have a Windows host at hand, though I think that it should
work there correctly. What do you think about the ideas in the
attached patch?

I think your patch has a few mistakes. First, I think you removed the
hex_decode files from the file system, rather than removing it via git,
so the diff didn't apply in the cfbot. Second, I don't think ecpg ever
used libpgcommon before. For non-Windows, libpgcommon got referenced
anyway in the build, so non-Windows compiles worked, but in Windows,
that was not referenced, meaning the cfbot failed on ecpglib. I have
modified the attached patch with both of these fixed --- let's see how
it likes this version.

--
Bruce Momjian <bruce@momjian.us> https://momjian.us
EnterpriseDB https://enterprisedb.com

The usefulness of a cup is in its emptiness, Bruce Lee

Attachments:

hex2.diff.gzapplication/gzipDownload
#7Michael Paquier
michael@paquier.xyz
In reply to: Bruce Momjian (#6)
Re: Moving other hex functions to /common

On Mon, Jan 04, 2021 at 10:47:39PM -0500, Bruce Momjian wrote:

I can see the value of passing the destination length to the hex
functions, and I think you have to pass the src length to pg_hex_encode
since the input can be binary. I assume the pg_hex_decode doesn't need
the source length because it is a null-terminated C string, right?

I don't think that we should assume that this is always the case,
actually. That would be an assumption easy to miss for callers of
those routines.

However, pg_base64_decode(const char *src, size_t len, char *dst) does
pass in the src length, so I can see we should just make it the same. I
also agree it is unclear if 'len' is the src or dst len, so your patch
fixes that with the names. Also, is there a reason you use int64
instead of the size_t used by bytea?

For the argument type, sure you could just use size_t, using an int
just looked more consistent to me to match with the style of
base64.c.

I don't think removing the two error type reporting from src/common, and
then having to add it back into adt/encode.c makes sense. There are
two, soon three, that want those two error reports, so I am thinking we
are best to just leave ecpg alone since it is just a library that
doesn't want more than one error reporting. I don't think we will have
many more library call cases.

Hmm. Even for libpq? I have grown allergic to the addition of more
"ifdef FRONTEND" and elog()/pg_log() calls into src/common/.

I think your patch has a few mistakes. First, I think you removed the
hex_decode files from the file system, rather than removing it via git,
so the diff didn't apply in the cfbot.

Oh, indeed, thanks. I missed that the patch should have used
/dev/null for the removed files.

Second, I don't think ecpg ever used libpgcommon before.

Yep. That would be the first time. I don't see anything that would
prevent its use, though I may be missing something.

For non-Windows, libpgcommon got referenced
anyway in the build, so non-Windows compiles worked, but in Windows,
that was not referenced, meaning the cfbot failed on ecpglib. I have
modified the attached patch with both of these fixed --- let's see how
it likes this version.

Indeed. Likely I am to blame for not having my Windows machine at
hand these days. I'll have this environment available only next week
:)

FWIW, I think that this refactoring has more value iff we are able to
remove all the duplicate hex implementations we have in the tree,
while being able to cover the case you are looking for frontend tools
able to do logging. Merging ECPG and the backend requires switching
to a logic where we return more than one error code so we could just
use an enum for the result result à-la-PQping like in libpq.
--
Michael

#8Thomas Munro
thomas.munro@gmail.com
In reply to: Bruce Momjian (#6)
Re: Moving other hex functions to /common

On Tue, Jan 5, 2021 at 4:47 PM Bruce Momjian <bruce@momjian.us> wrote:

... let's see how it likes this version.

cfbot ideally processes a new patch fairly quickly but I didn't think
of ".diff.gz" when writing the regexp to recognise patch files. I
just tweaked the relevant regexp and it's building your patch now.
Sorry about that. :-)

#9Bruce Momjian
bruce@momjian.us
In reply to: Michael Paquier (#7)
1 attachment(s)
Re: Moving other hex functions to /common

On Tue, Jan 5, 2021 at 03:47:59PM +0900, Michael Paquier wrote:

On Mon, Jan 04, 2021 at 10:47:39PM -0500, Bruce Momjian wrote:

I can see the value of passing the destination length to the hex
functions, and I think you have to pass the src length to pg_hex_encode
since the input can be binary. I assume the pg_hex_decode doesn't need
the source length because it is a null-terminated C string, right?

I don't think that we should assume that this is always the case,
actually. That would be an assumption easy to miss for callers of
those routines.

Well, if the backend uses /common for hex like I suggested, and like we
do now, it has to match the function signatures of bytea and esc, see
struct pg_encoding. I don't see the point in changing those.

However, pg_base64_decode(const char *src, size_t len, char *dst) does
pass in the src length, so I can see we should just make it the same. I

Sorry, I was wrong in the above --- len is the destination length.

also agree it is unclear if 'len' is the src or dst len, so your patch
fixes that with the names. Also, is there a reason you use int64
instead of the size_t used by bytea?

For the argument type, sure you could just use size_t, using an int
just looked more consistent to me to match with the style of
base64.c.

I think we would more likely match what adt/encoding.c does, and we
actually must if we are going to use /common for the backend.

I don't think removing the two error type reporting from src/common, and
then having to add it back into adt/encode.c makes sense. There are
two, soon three, that want those two error reports, so I am thinking we
are best to just leave ecpg alone since it is just a library that
doesn't want more than one error reporting. I don't think we will have
many more library call cases.

Hmm. Even for libpq? I have grown allergic to the addition of more
"ifdef FRONTEND" and elog()/pg_log() calls into src/common/.

I can understand the allergic reaction, but the other options seem
worse, and we have multiple places that need that multiple error string
reporting.

I think your patch has a few mistakes. First, I think you removed the
hex_decode files from the file system, rather than removing it via git,
so the diff didn't apply in the cfbot.

Oh, indeed, thanks. I missed that the patch should have used
/dev/null for the removed files.

Second, I don't think ecpg ever used libpgcommon before.

Yep. That would be the first time. I don't see anything that would
prevent its use, though I may be missing something.

The cfbot is all green for this patch now, so we are making progress. ;-)

For non-Windows, libpgcommon got referenced
anyway in the build, so non-Windows compiles worked, but in Windows,
that was not referenced, meaning the cfbot failed on ecpglib. I have
modified the attached patch with both of these fixed --- let's see how
it likes this version.

Indeed. Likely I am to blame for not having my Windows machine at
hand these days. I'll have this environment available only next week
:)

Well, the cfbot gives us all access to Windows compiles, so we are good.

FWIW, I think that this refactoring has more value iff we are able to
remove all the duplicate hex implementations we have in the tree,
while being able to cover the case you are looking for frontend tools
able to do logging. Merging ECPG and the backend requires switching
to a logic where we return more than one error code so we could just
use an enum for the result result �-la-PQping like in libpq.

I think we are best leaving ecpglib alone, since it is a library, and
just have one other hex implementation in /common for all other cases.

I found serious confusion in encoding.c:

* function pointer prototype argument names didn't match function
prototypes
* used dlen for datalen, and data where src was used in real functions
* used len for src and dst len
* used dstlen for srclen

It was very confusing, and this attached patch fixes all of that. I
also added the pg_ prefix you suggrested. If we want to add dstlen to
all the functions, we have to do it for all types --- not sure it is
worth it, now that things are much clearer.

--
Bruce Momjian <bruce@momjian.us> https://momjian.us
EnterpriseDB https://enterprisedb.com

The usefulness of a cup is in its emptiness, Bruce Lee

Attachments:

hex.diff.gzapplication/gzipDownload
#10Bruce Momjian
bruce@momjian.us
In reply to: Thomas Munro (#8)
Re: Moving other hex functions to /common

On Tue, Jan 5, 2021 at 08:54:11PM +1300, Thomas Munro wrote:

On Tue, Jan 5, 2021 at 4:47 PM Bruce Momjian <bruce@momjian.us> wrote:

... let's see how it likes this version.

cfbot ideally processes a new patch fairly quickly but I didn't think
of ".diff.gz" when writing the regexp to recognise patch files. I
just tweaked the relevant regexp and it's building your patch now.
Sorry about that. :-)

Oh, thanks. I have been using gzip since it makes larger patches less
of a burden.

--
Bruce Momjian <bruce@momjian.us> https://momjian.us
EnterpriseDB https://enterprisedb.com

The usefulness of a cup is in its emptiness, Bruce Lee

#11Michael Paquier
michael@paquier.xyz
In reply to: Bruce Momjian (#9)
Re: Moving other hex functions to /common

On Tue, Jan 05, 2021 at 12:21:09PM -0500, Bruce Momjian wrote:

Well, if the backend uses /common for hex like I suggested, and like we
do now, it has to match the function signatures of bytea and esc, see
struct pg_encoding. I don't see the point in changing those.

Not necessarily with some thin wrappers to adapt. And we only care
about the source string for the escape format, not for hex. It seems
to me that a huge point in redesigning the hex APIs is that we can
make them more security-aware. We had one CVE in 2019 for SCRAM
because of the base64 ones in src/common/ that got copied from
encode.c. And I'd rather avoid taking any unnecessary risks here,
particularly as this is going to be used for more security-related
features.

The cfbot is all green for this patch now, so we are making progress. ;-)

Ah, cool.

I think we are best leaving ecpglib alone, since it is a library, and
just have one other hex implementation in /common for all other cases.

I found serious confusion in encoding.c:

* function pointer prototype argument names didn't match function
prototypes
* used dlen for datalen, and data where src was used in real functions
* used len for src and dst len
* used dstlen for srclen

It would be as well just a separate cleanup patch?

It was very confusing, and this attached patch fixes all of that. I
also added the pg_ prefix you suggrested. If we want to add dstlen to
all the functions, we have to do it for all types --- not sure it is
worth it, now that things are much clearer.

Overflow protection is very important in my opinion here once we
expose more those functions. Not touching ECPG at all and not making
this refactoring pluggable into shared libs is fine by me at the end
because we don't have a case for libpq yet, but I object to the lack
of protection against overflows.
--
Michael

#12Bruce Momjian
bruce@momjian.us
In reply to: Michael Paquier (#11)
Re: Moving other hex functions to /common

On Wed, Jan 6, 2021 at 01:10:28PM +0900, Michael Paquier wrote:

It was very confusing, and this attached patch fixes all of that. I
also added the pg_ prefix you suggrested. If we want to add dstlen to
all the functions, we have to do it for all types --- not sure it is
worth it, now that things are much clearer.

Overflow protection is very important in my opinion here once we
expose more those functions. Not touching ECPG at all and not making
this refactoring pluggable into shared libs is fine by me at the end
because we don't have a case for libpq yet, but I object to the lack
of protection against overflows.

Fine. Do you want to add the overflow to the patch I posted, for all
encoding types?

--
Bruce Momjian <bruce@momjian.us> https://momjian.us
EnterpriseDB https://enterprisedb.com

The usefulness of a cup is in its emptiness, Bruce Lee

#13Michael Paquier
michael@paquier.xyz
In reply to: Bruce Momjian (#12)
Re: Moving other hex functions to /common

On Wed, Jan 06, 2021 at 08:58:23AM -0500, Bruce Momjian wrote:

Fine. Do you want to add the overflow to the patch I posted, for all
encoding types?

Yeah. It looks that it would be good to be consistent as well for
escape case, so as it is possible to add a dstlen argument to struct
pg_encoding for the encoding and decoding routines. I would also
prefer the option to remove the argument "data" from the encode and
decode length routines for the hex routines part of src/common/, even
if it forces the creation of two small wrappers in encode.c to call
the routines of src/common/. Would you prefer if I send a patch by
myself? Please note that anything I'd send would use directly elog()
and pg_log() instead of returning status codes for the src/common/
routines, and of course not touch ECPG, as that's the approach you are
favoring.
--
Michael

#14Bruce Momjian
bruce@momjian.us
In reply to: Michael Paquier (#13)
Re: Moving other hex functions to /common

On Mon, Jan 11, 2021 at 04:45:14PM +0900, Michael Paquier wrote:

On Wed, Jan 06, 2021 at 08:58:23AM -0500, Bruce Momjian wrote:

Fine. Do you want to add the overflow to the patch I posted, for all
encoding types?

Yeah. It looks that it would be good to be consistent as well for
escape case, so as it is possible to add a dstlen argument to struct
pg_encoding for the encoding and decoding routines. I would also

Sure.

prefer the option to remove the argument "data" from the encode and
decode length routines for the hex routines part of src/common/, even
if it forces the creation of two small wrappers in encode.c to call
the routines of src/common/. Would you prefer if I send a patch by

Agreed. Having an argument that does nothing is odd.

myself? Please note that anything I'd send would use directly elog()
and pg_log() instead of returning status codes for the src/common/
routines, and of course not touch ECPG, as that's the approach you are
favoring.

Sure, I realize the elog/pg_log is odd, but the alternatives seem worse.
You can take ownership of my hex patch and just add to it. I know you
already did the hex length part, and have other ideas of what you want.

My key management patch needs the hex encoding in /common, so it will
wait for the application of your patch.

--
Bruce Momjian <bruce@momjian.us> https://momjian.us
EnterpriseDB https://enterprisedb.com

The usefulness of a cup is in its emptiness, Bruce Lee

#15Michael Paquier
michael@paquier.xyz
In reply to: Bruce Momjian (#14)
1 attachment(s)
Re: Moving other hex functions to /common

On Mon, Jan 11, 2021 at 11:27:30AM -0500, Bruce Momjian wrote:

Sure, I realize the elog/pg_log is odd, but the alternatives seem worse.

I guess that it depends on the use cases. If there is no need to
worry about shared libraries, elog/pg_log would do just fine.

You can take ownership of my hex patch and just add to it. I know you
already did the hex length part, and have other ideas of what you want.

OK, thanks. I have been looking at it, and tweaked the patch as per
the attached. That's basically what you did on the following points:
- Use size_t for the arguments, uint64 as return result.
- Leave ECPG out of the equation.
- Use pg_log()/elog() to report failures in src/common/, rather than
error codes.
- Renamed the arguments of encode.c to src, srclen, dst, dstlen.

The two only things that were not present are the set of checks for
overflows, and the adjustments for varlena.c. The first point makes
the code of encode.c safer, as previously the code would issue a FATAL
*after* writing out-of-bound data. Now it issues an ERROR before any
overwrite is done, and I have added some assertions as an extra safety
net. For the second, I think that it makes the allocation pattern
easier to follow, similarly to checksum manifests.

Thoughts?
--
Michael

Attachments:

v5-0001-Refactor-hex-code.patchtext/x-diff; charset=us-asciiDownload
From 3e5c9fa42ff981933bc6eeede92335ae89e94e21 Mon Sep 17 00:00:00 2001
From: Michael Paquier <michael@paquier.xyz>
Date: Tue, 12 Jan 2021 11:21:42 +0900
Subject: [PATCH v5] Refactor hex code

---
 src/include/common/hex.h                  |  25 +++
 src/include/common/hex_decode.h           |  16 --
 src/include/utils/builtins.h              |   3 -
 src/backend/replication/backup_manifest.c |  28 ++--
 src/backend/utils/adt/encode.c            |  96 ++++++-----
 src/backend/utils/adt/varlena.c           |  16 +-
 src/common/Makefile                       |   2 +-
 src/common/hex.c                          | 196 ++++++++++++++++++++++
 src/common/hex_decode.c                   | 106 ------------
 src/tools/msvc/Mkvcbuild.pm               |   2 +-
 10 files changed, 308 insertions(+), 182 deletions(-)
 create mode 100644 src/include/common/hex.h
 delete mode 100644 src/include/common/hex_decode.h
 create mode 100644 src/common/hex.c
 delete mode 100644 src/common/hex_decode.c

diff --git a/src/include/common/hex.h b/src/include/common/hex.h
new file mode 100644
index 0000000000..3c3c956bb6
--- /dev/null
+++ b/src/include/common/hex.h
@@ -0,0 +1,25 @@
+/*------------------------------------------------------------------------
+ *
+ *	hex.h
+ *	  Encoding and decoding routines for hex strings.
+ *
+ *	Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
+ *	Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * IDENTIFICATION
+ *		  src/include/common/hex.h
+ *
+ *------------------------------------------------------------------------
+ */
+
+#ifndef COMMON_HEX_H
+#define COMMON_HEX_H
+
+extern uint64 pg_hex_decode(const char *src, size_t srclen,
+							char *dst, size_t dstlen);
+extern uint64 pg_hex_encode(const char *src, size_t srclen,
+							char *dst, size_t dstlen);
+extern uint64 pg_hex_enc_len(size_t srclen);
+extern uint64 pg_hex_dec_len(size_t srclen);
+
+#endif							/* COMMON_HEX_H */
diff --git a/src/include/common/hex_decode.h b/src/include/common/hex_decode.h
deleted file mode 100644
index 29ab248458..0000000000
--- a/src/include/common/hex_decode.h
+++ /dev/null
@@ -1,16 +0,0 @@
-/*
- *	hex_decode.h
- *		hex decoding
- *
- *	Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
- *	Portions Copyright (c) 1994, Regents of the University of California
- *
- *	src/include/common/hex_decode.h
- */
-#ifndef COMMON_HEX_DECODE_H
-#define COMMON_HEX_DECODE_H
-
-extern uint64 hex_decode(const char *src, size_t len, char *dst);
-
-
-#endif							/* COMMON_HEX_DECODE_H */
diff --git a/src/include/utils/builtins.h b/src/include/utils/builtins.h
index 6f739a8822..27d2f2ffb3 100644
--- a/src/include/utils/builtins.h
+++ b/src/include/utils/builtins.h
@@ -31,9 +31,6 @@ extern void domain_check(Datum value, bool isnull, Oid domainType,
 extern int	errdatatype(Oid datatypeOid);
 extern int	errdomainconstraint(Oid datatypeOid, const char *conname);
 
-/* encode.c */
-extern uint64 hex_encode(const char *src, size_t len, char *dst);
-
 /* int.c */
 extern int2vector *buildint2vector(const int16 *int2s, int n);
 
diff --git a/src/backend/replication/backup_manifest.c b/src/backend/replication/backup_manifest.c
index 8af94610b3..0df5186828 100644
--- a/src/backend/replication/backup_manifest.c
+++ b/src/backend/replication/backup_manifest.c
@@ -13,11 +13,11 @@
 #include "postgres.h"
 
 #include "access/timeline.h"
+#include "common/hex.h"
 #include "libpq/libpq.h"
 #include "libpq/pqformat.h"
 #include "mb/pg_wchar.h"
 #include "replication/backup_manifest.h"
-#include "utils/builtins.h"
 #include "utils/json.h"
 
 static void AppendStringToManifest(backup_manifest_info *manifest, char *s);
@@ -150,10 +150,12 @@ AddFileToBackupManifest(backup_manifest_info *manifest, const char *spcoid,
 	}
 	else
 	{
+		uint64	dstlen = pg_hex_enc_len(pathlen);
+
 		appendStringInfoString(&buf, "{ \"Encoded-Path\": \"");
-		enlargeStringInfo(&buf, 2 * pathlen);
-		buf.len += hex_encode(pathname, pathlen,
-							  &buf.data[buf.len]);
+		enlargeStringInfo(&buf, dstlen);
+		buf.len += pg_hex_encode(pathname, pathlen,
+								 &buf.data[buf.len], dstlen);
 		appendStringInfoString(&buf, "\", ");
 	}
 
@@ -176,6 +178,7 @@ AddFileToBackupManifest(backup_manifest_info *manifest, const char *spcoid,
 	{
 		uint8		checksumbuf[PG_CHECKSUM_MAX_LENGTH];
 		int			checksumlen;
+		uint64		dstlen;
 
 		checksumlen = pg_checksum_final(checksum_ctx, checksumbuf);
 		if (checksumlen < 0)
@@ -185,9 +188,10 @@ AddFileToBackupManifest(backup_manifest_info *manifest, const char *spcoid,
 		appendStringInfo(&buf,
 						 ", \"Checksum-Algorithm\": \"%s\", \"Checksum\": \"",
 						 pg_checksum_type_name(checksum_ctx->type));
-		enlargeStringInfo(&buf, 2 * checksumlen);
-		buf.len += hex_encode((char *) checksumbuf, checksumlen,
-							  &buf.data[buf.len]);
+		dstlen = pg_hex_enc_len(checksumlen);
+		enlargeStringInfo(&buf, dstlen);
+		buf.len += pg_hex_encode((char *) checksumbuf, checksumlen,
+								 &buf.data[buf.len], dstlen);
 		appendStringInfoChar(&buf, '"');
 	}
 
@@ -307,8 +311,9 @@ SendBackupManifest(backup_manifest_info *manifest)
 {
 	StringInfoData protobuf;
 	uint8		checksumbuf[PG_SHA256_DIGEST_LENGTH];
-	char		checksumstringbuf[PG_SHA256_DIGEST_STRING_LENGTH];
+	char	   *checksumstringbuf;
 	size_t		manifest_bytes_done = 0;
+	uint64		dstlen;
 
 	if (!IsManifestEnabled(manifest))
 		return;
@@ -328,8 +333,11 @@ SendBackupManifest(backup_manifest_info *manifest)
 	if (pg_cryptohash_final(manifest->manifest_ctx, checksumbuf) < 0)
 		elog(ERROR, "failed to finalize checksum of backup manifest");
 	AppendStringToManifest(manifest, "\"Manifest-Checksum\": \"");
-	hex_encode((char *) checksumbuf, sizeof checksumbuf, checksumstringbuf);
-	checksumstringbuf[PG_SHA256_DIGEST_STRING_LENGTH - 1] = '\0';
+	dstlen = pg_hex_enc_len(PG_SHA256_DIGEST_LENGTH);
+	checksumstringbuf = palloc0(dstlen + 1);	/* includes \0 */
+	pg_hex_encode((char *) checksumbuf, sizeof checksumbuf,
+				  checksumstringbuf, dstlen);
+	checksumstringbuf[dstlen] = '\0';
 	AppendStringToManifest(manifest, checksumstringbuf);
 	AppendStringToManifest(manifest, "\"}\n");
 
diff --git a/src/backend/utils/adt/encode.c b/src/backend/utils/adt/encode.c
index 4759be25e9..6e26f2af8a 100644
--- a/src/backend/utils/adt/encode.c
+++ b/src/backend/utils/adt/encode.c
@@ -15,7 +15,7 @@
 
 #include <ctype.h>
 
-#include "common/hex_decode.h"
+#include "common/hex.h"
 #include "mb/pg_wchar.h"
 #include "utils/builtins.h"
 #include "utils/memutils.h"
@@ -32,10 +32,12 @@
  */
 struct pg_encoding
 {
-	uint64		(*encode_len) (const char *data, size_t dlen);
-	uint64		(*decode_len) (const char *data, size_t dlen);
-	uint64		(*encode) (const char *data, size_t dlen, char *res);
-	uint64		(*decode) (const char *data, size_t dlen, char *res);
+	uint64		(*encode_len) (const char *src, size_t srclen);
+	uint64		(*decode_len) (const char *src, size_t srclen);
+	uint64		(*encode) (const char *src, size_t srclen,
+						   char *dst, size_t dstlen);
+	uint64		(*decode) (const char *src, size_t srclen,
+						   char *dst, size_t dstlen);
 };
 
 static const struct pg_encoding *pg_find_encoding(const char *name);
@@ -81,11 +83,7 @@ binary_encode(PG_FUNCTION_ARGS)
 
 	result = palloc(VARHDRSZ + resultlen);
 
-	res = enc->encode(dataptr, datalen, VARDATA(result));
-
-	/* Make this FATAL 'cause we've trodden on memory ... */
-	if (res > resultlen)
-		elog(FATAL, "overflow - encode estimate too small");
+	res = enc->encode(dataptr, datalen, VARDATA(result), resultlen);
 
 	SET_VARSIZE(result, VARHDRSZ + res);
 
@@ -129,11 +127,7 @@ binary_decode(PG_FUNCTION_ARGS)
 
 	result = palloc(VARHDRSZ + resultlen);
 
-	res = enc->decode(dataptr, datalen, VARDATA(result));
-
-	/* Make this FATAL 'cause we've trodden on memory ... */
-	if (res > resultlen)
-		elog(FATAL, "overflow - decode estimate too small");
+	res = enc->decode(dataptr, datalen, VARDATA(result), resultlen);
 
 	SET_VARSIZE(result, VARHDRSZ + res);
 
@@ -145,32 +139,20 @@ binary_decode(PG_FUNCTION_ARGS)
  * HEX
  */
 
-static const char hextbl[] = "0123456789abcdef";
-
-uint64
-hex_encode(const char *src, size_t len, char *dst)
-{
-	const char *end = src + len;
-
-	while (src < end)
-	{
-		*dst++ = hextbl[(*src >> 4) & 0xF];
-		*dst++ = hextbl[*src & 0xF];
-		src++;
-	}
-	return (uint64) len * 2;
-}
-
+/*
+ * Those two wrappers are still needed to match with the layer of
+ * src/common/.
+ */
 static uint64
 hex_enc_len(const char *src, size_t srclen)
 {
-	return (uint64) srclen << 1;
+	return pg_hex_enc_len(srclen);
 }
 
 static uint64
 hex_dec_len(const char *src, size_t srclen)
 {
-	return (uint64) srclen >> 1;
+	return pg_hex_dec_len(srclen);
 }
 
 /*
@@ -192,12 +174,12 @@ static const int8 b64lookup[128] = {
 };
 
 static uint64
-pg_base64_encode(const char *src, size_t len, char *dst)
+pg_base64_encode(const char *src, size_t srclen, char *dst, size_t dstlen)
 {
 	char	   *p,
 			   *lend = dst + 76;
 	const char *s,
-			   *end = src + len;
+			   *end = src + srclen;
 	int			pos = 2;
 	uint32		buf = 0;
 
@@ -213,6 +195,8 @@ pg_base64_encode(const char *src, size_t len, char *dst)
 		/* write it out */
 		if (pos < 0)
 		{
+			if ((p - dst + 4) > dstlen)
+				elog(ERROR, "overflow - encode estimate too small");
 			*p++ = _base64[(buf >> 18) & 0x3f];
 			*p++ = _base64[(buf >> 12) & 0x3f];
 			*p++ = _base64[(buf >> 6) & 0x3f];
@@ -223,25 +207,30 @@ pg_base64_encode(const char *src, size_t len, char *dst)
 		}
 		if (p >= lend)
 		{
+			if ((p - dst + 1) > dstlen)
+				elog(ERROR, "overflow - encode estimate too small");
 			*p++ = '\n';
 			lend = p + 76;
 		}
 	}
 	if (pos != 2)
 	{
+		if ((p - dst + 4) > dstlen)
+			elog(ERROR, "overflow - encode estimate too small");
 		*p++ = _base64[(buf >> 18) & 0x3f];
 		*p++ = _base64[(buf >> 12) & 0x3f];
 		*p++ = (pos == 0) ? _base64[(buf >> 6) & 0x3f] : '=';
 		*p++ = '=';
 	}
 
+	Assert((p - dst) <= dstlen);
 	return p - dst;
 }
 
 static uint64
-pg_base64_decode(const char *src, size_t len, char *dst)
+pg_base64_decode(const char *src, size_t srclen, char *dst, size_t dstlen)
 {
-	const char *srcend = src + len,
+	const char *srcend = src + srclen,
 			   *s = src;
 	char	   *p = dst;
 	char		c;
@@ -289,11 +278,21 @@ pg_base64_decode(const char *src, size_t len, char *dst)
 		pos++;
 		if (pos == 4)
 		{
+			if ((p - dst + 1) > dstlen)
+				elog(ERROR, "overflow - decode estimate too small");
 			*p++ = (buf >> 16) & 255;
 			if (end == 0 || end > 1)
+			{
+				if ((p - dst + 1) > dstlen)
+					elog(ERROR, "overflow - decode estimate too small");
 				*p++ = (buf >> 8) & 255;
+			}
 			if (end == 0 || end > 2)
+			{
+				if ((p - dst + 1) > dstlen)
+					elog(ERROR, "overflow - decode estimate too small");
 				*p++ = buf & 255;
+			}
 			buf = 0;
 			pos = 0;
 		}
@@ -305,6 +304,7 @@ pg_base64_decode(const char *src, size_t len, char *dst)
 				 errmsg("invalid base64 end sequence"),
 				 errhint("Input data is missing padding, is truncated, or is otherwise corrupted.")));
 
+	Assert((p - dst) <= dstlen);
 	return p - dst;
 }
 
@@ -340,7 +340,7 @@ pg_base64_dec_len(const char *src, size_t srclen)
 #define DIG(VAL)		((VAL) + '0')
 
 static uint64
-esc_encode(const char *src, size_t srclen, char *dst)
+esc_encode(const char *src, size_t srclen, char *dst, size_t dstlen)
 {
 	const char *end = src + srclen;
 	char	   *rp = dst;
@@ -352,6 +352,8 @@ esc_encode(const char *src, size_t srclen, char *dst)
 
 		if (c == '\0' || IS_HIGHBIT_SET(c))
 		{
+			if ((rp - dst + 4) > dstlen)
+				elog(ERROR, "overflow - encode estimate too small");
 			rp[0] = '\\';
 			rp[1] = DIG(c >> 6);
 			rp[2] = DIG((c >> 3) & 7);
@@ -361,6 +363,8 @@ esc_encode(const char *src, size_t srclen, char *dst)
 		}
 		else if (c == '\\')
 		{
+			if ((rp - dst + 2) > dstlen)
+				elog(ERROR, "overflow - encode estimate too small");
 			rp[0] = '\\';
 			rp[1] = '\\';
 			rp += 2;
@@ -368,6 +372,8 @@ esc_encode(const char *src, size_t srclen, char *dst)
 		}
 		else
 		{
+			if ((rp - dst + 1) > dstlen)
+				elog(ERROR, "overflow - encode estimate too small");
 			*rp++ = c;
 			len++;
 		}
@@ -375,11 +381,12 @@ esc_encode(const char *src, size_t srclen, char *dst)
 		src++;
 	}
 
+	Assert((rp - dst) <= dstlen);
 	return len;
 }
 
 static uint64
-esc_decode(const char *src, size_t srclen, char *dst)
+esc_decode(const char *src, size_t srclen, char *dst, size_t dstlen)
 {
 	const char *end = src + srclen;
 	char	   *rp = dst;
@@ -388,7 +395,11 @@ esc_decode(const char *src, size_t srclen, char *dst)
 	while (src < end)
 	{
 		if (src[0] != '\\')
+		{
+			if ((rp - dst + 1) > dstlen)
+				elog(ERROR, "overflow - decode estimate too small");
 			*rp++ = *src++;
+		}
 		else if (src + 3 < end &&
 				 (src[1] >= '0' && src[1] <= '3') &&
 				 (src[2] >= '0' && src[2] <= '7') &&
@@ -400,12 +411,16 @@ esc_decode(const char *src, size_t srclen, char *dst)
 			val <<= 3;
 			val += VAL(src[2]);
 			val <<= 3;
+			if ((rp - dst + 1) > dstlen)
+				elog(ERROR, "overflow - decode estimate too small");
 			*rp++ = val + VAL(src[3]);
 			src += 4;
 		}
 		else if (src + 1 < end &&
 				 (src[1] == '\\'))
 		{
+			if ((rp - dst + 1) > dstlen)
+				elog(ERROR, "overflow - decode estimate too small");
 			*rp++ = '\\';
 			src += 2;
 		}
@@ -423,6 +438,7 @@ esc_decode(const char *src, size_t srclen, char *dst)
 		len++;
 	}
 
+	Assert((rp - dst) <= dstlen);
 	return len;
 }
 
@@ -504,7 +520,7 @@ static const struct
 	{
 		"hex",
 		{
-			hex_enc_len, hex_dec_len, hex_encode, hex_decode
+			hex_enc_len, hex_dec_len, pg_hex_encode, pg_hex_decode
 		}
 	},
 	{
diff --git a/src/backend/utils/adt/varlena.c b/src/backend/utils/adt/varlena.c
index 4838bfb4ff..479ed9ae54 100644
--- a/src/backend/utils/adt/varlena.c
+++ b/src/backend/utils/adt/varlena.c
@@ -21,8 +21,8 @@
 #include "catalog/pg_collation.h"
 #include "catalog/pg_type.h"
 #include "common/hashfn.h"
+#include "common/hex.h"
 #include "common/int.h"
-#include "common/hex_decode.h"
 #include "common/unicode_norm.h"
 #include "lib/hyperloglog.h"
 #include "libpq/pqformat.h"
@@ -304,10 +304,12 @@ byteain(PG_FUNCTION_ARGS)
 	if (inputText[0] == '\\' && inputText[1] == 'x')
 	{
 		size_t		len = strlen(inputText);
+		uint64		dstlen = pg_hex_dec_len(len - 2);
 
-		bc = (len - 2) / 2 + VARHDRSZ;	/* maximum possible length */
+		bc = dstlen + VARHDRSZ;	/* maximum possible length */
 		result = palloc(bc);
-		bc = hex_decode(inputText + 2, len - 2, VARDATA(result));
+
+		bc = pg_hex_decode(inputText + 2, len - 2, VARDATA(result), dstlen);
 		SET_VARSIZE(result, bc + VARHDRSZ); /* actual length */
 
 		PG_RETURN_BYTEA_P(result);
@@ -396,11 +398,15 @@ byteaout(PG_FUNCTION_ARGS)
 
 	if (bytea_output == BYTEA_OUTPUT_HEX)
 	{
+		uint64		dstlen = pg_hex_enc_len(VARSIZE_ANY_EXHDR(vlena));
+
 		/* Print hex format */
-		rp = result = palloc(VARSIZE_ANY_EXHDR(vlena) * 2 + 2 + 1);
+		rp = result = palloc(dstlen + 2 + 1);
 		*rp++ = '\\';
 		*rp++ = 'x';
-		rp += hex_encode(VARDATA_ANY(vlena), VARSIZE_ANY_EXHDR(vlena), rp);
+
+		rp += pg_hex_encode(VARDATA_ANY(vlena), VARSIZE_ANY_EXHDR(vlena), rp,
+							dstlen);
 	}
 	else if (bytea_output == BYTEA_OUTPUT_ESCAPE)
 	{
diff --git a/src/common/Makefile b/src/common/Makefile
index f624977939..93eb27a2aa 100644
--- a/src/common/Makefile
+++ b/src/common/Makefile
@@ -58,7 +58,7 @@ OBJS_COMMON = \
 	file_perm.o \
 	file_utils.o \
 	hashfn.o \
-	hex_decode.o \
+	hex.o \
 	ip.o \
 	jsonapi.o \
 	keywords.o \
diff --git a/src/common/hex.c b/src/common/hex.c
new file mode 100644
index 0000000000..0123c69697
--- /dev/null
+++ b/src/common/hex.c
@@ -0,0 +1,196 @@
+/*-------------------------------------------------------------------------
+ *
+ * hex.c
+ *	  Encoding and decoding routines for hex.
+ *
+ * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * IDENTIFICATION
+ *	  src/common/hex.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+
+#ifndef FRONTEND
+#include "postgres.h"
+#else
+#include "postgres_fe.h"
+#endif
+
+#include "common/hex.h"
+#ifdef FRONTEND
+#include "common/logging.h"
+#endif
+#include "mb/pg_wchar.h"
+
+
+static const int8 hexlookup[128] = {
+	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+	0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1,
+	-1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+	-1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+};
+
+static const char hextbl[] = "0123456789abcdef";
+
+static inline char
+get_hex(const char *cp)
+{
+	unsigned char c = (unsigned char) *cp;
+	int			res = -1;
+
+	if (c < 127)
+		res = hexlookup[c];
+
+	if (res < 0)
+	{
+#ifdef FRONTEND
+		pg_log_fatal("invalid hexadecimal digit");
+		exit(EXIT_FAILURE);
+#else
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+				 errmsg("invalid hexadecimal digit: \"%.*s\"",
+						pg_mblen(cp), cp)));
+#endif
+	}
+
+	return (char) res;
+}
+
+/*
+ * pg_hex_encode
+ *
+ * Encode into hex the given string.  Returns the length of the encoded
+ * string.
+ */
+uint64
+pg_hex_encode(const char *src, size_t srclen, char *dst, size_t dstlen)
+{
+	const char *end = src + srclen;
+	char	   *p;
+
+	p = dst;
+
+	while (src < end)
+	{
+		/*
+		 * Leave if there is an overflow in the area allocated for the
+		 * encoded string.
+		 */
+		if ((p - dst + 2) > dstlen)
+		{
+#ifdef FRONTEND
+			pg_log_fatal("overflow of destination buffer during hex encoding");
+			exit(EXIT_FAILURE);
+#else
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+					 errmsg("overflow of destination buffer during hex encoding")));
+#endif
+		}
+
+		*p++ = hextbl[(*src >> 4) & 0xF];
+		*p++ = hextbl[*src & 0xF];
+		src++;
+	}
+
+	Assert((p - dst) <= dstlen);
+	return p - dst;
+}
+
+/*
+ * pg_hex_decode
+ *
+ * Decode the given hex string.  Returns the length of the decoded string.
+ */
+uint64
+pg_hex_decode(const char *src, size_t srclen, char *dst, size_t dstlen)
+{
+	const char *s,
+			   *srcend;
+	char		v1,
+				v2,
+			   *p;
+
+	srcend = src + srclen;
+	s = src;
+	p = dst;
+	while (s < srcend)
+	{
+		if (*s == ' ' || *s == '\n' || *s == '\t' || *s == '\r')
+		{
+			s++;
+			continue;
+		}
+		v1 = get_hex(s) << 4;
+		s++;
+
+		if (s >= srcend)
+		{
+#ifdef FRONTEND
+			pg_log_fatal("invalid hexadecimal data: odd number of digits");
+			exit(EXIT_FAILURE);
+#else
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+					 errmsg("invalid hexadecimal data: odd number of digits")));
+#endif
+		}
+
+		v2 = get_hex(s);
+		s++;
+
+		/* overflow check */
+		if ((p - dst + 1) > dstlen)
+		{
+#ifdef FRONTEND
+			pg_log_fatal("overflow of destination buffer during hex decoding");
+			exit(EXIT_FAILURE);
+#else
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+					 errmsg("overflow of destination buffer during hex decoding")));
+#endif
+		}
+
+		*p++ = v1 | v2;
+	}
+
+	Assert((p - dst) <= dstlen);
+	return p - dst;
+}
+
+/*
+ * pg_hex_enc_len
+ *
+ * Returns to caller the length of the string if it were encoded with
+ * hex based on the length provided by caller.  This is useful to estimate
+ * how large a buffer allocation needs to be done before doing the actual
+ * encoding.
+ */
+uint64
+pg_hex_enc_len(size_t srclen)
+{
+	return (srclen << 1);
+}
+
+/*
+ * pg_hex_dec_len
+ *
+ * Returns to caller the length of the string if it were to be decoded
+ * with hex, based on the length given by caller.  This is useful to
+ * estimate how large a buffer allocation needs to be done before doing
+ * the actual decoding.
+ */
+uint64
+pg_hex_dec_len(size_t srclen)
+{
+	return (srclen >> 1);
+}
diff --git a/src/common/hex_decode.c b/src/common/hex_decode.c
deleted file mode 100644
index dbc0e1c30a..0000000000
--- a/src/common/hex_decode.c
+++ /dev/null
@@ -1,106 +0,0 @@
-/*-------------------------------------------------------------------------
- *
- * hex_decode.c
- *		hex decoding
- *
- *
- * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
- * Portions Copyright (c) 1994, Regents of the University of California
- *
- *
- * IDENTIFICATION
- *	  src/common/hex_decode.c
- *
- *-------------------------------------------------------------------------
- */
-
-
-#ifndef FRONTEND
-#include "postgres.h"
-#else
-#include "postgres_fe.h"
-#endif
-
-#ifdef FRONTEND
-#include "common/logging.h"
-#else
-#include "mb/pg_wchar.h"
-#endif
-#include "common/hex_decode.h"
-
-
-static const int8 hexlookup[128] = {
-	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-	0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1,
-	-1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-	-1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-};
-
-static inline char
-get_hex(const char *cp)
-{
-	unsigned char c = (unsigned char) *cp;
-	int			res = -1;
-
-	if (c < 127)
-		res = hexlookup[c];
-
-	if (res < 0)
-	{
-#ifdef FRONTEND
-		pg_log_fatal("invalid hexadecimal digit");
-		exit(EXIT_FAILURE);
-#else
-		ereport(ERROR,
-				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
-				 errmsg("invalid hexadecimal digit: \"%.*s\"",
-						pg_mblen(cp), cp)));
-#endif
-	}
-
-	return (char) res;
-}
-
-uint64
-hex_decode(const char *src, size_t len, char *dst)
-{
-	const char *s,
-			   *srcend;
-	char		v1,
-				v2,
-			   *p;
-
-	srcend = src + len;
-	s = src;
-	p = dst;
-	while (s < srcend)
-	{
-		if (*s == ' ' || *s == '\n' || *s == '\t' || *s == '\r')
-		{
-			s++;
-			continue;
-		}
-		v1 = get_hex(s) << 4;
-		s++;
-		if (s >= srcend)
-		{
-#ifdef FRONTEND
-			pg_log_fatal("invalid hexadecimal data: odd number of digits");
-			exit(EXIT_FAILURE);
-#else
-			ereport(ERROR,
-					(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
-					 errmsg("invalid hexadecimal data: odd number of digits")));
-#endif
-		}
-		v2 = get_hex(s);
-		s++;
-		*p++ = v1 | v2;
-	}
-
-	return p - dst;
-}
diff --git a/src/tools/msvc/Mkvcbuild.pm b/src/tools/msvc/Mkvcbuild.pm
index 7f014a12c9..60b216cce0 100644
--- a/src/tools/msvc/Mkvcbuild.pm
+++ b/src/tools/msvc/Mkvcbuild.pm
@@ -121,7 +121,7 @@ sub mkvcbuild
 	our @pgcommonallfiles = qw(
 	  archive.c base64.c checksum_helper.c
 	  config_info.c controldata_utils.c d2s.c encnames.c exec.c
-	  f2s.c file_perm.c file_utils.c hashfn.c hex_decode.c ip.c jsonapi.c
+	  f2s.c file_perm.c file_utils.c hashfn.c hex.c ip.c jsonapi.c
 	  keywords.c kwlookup.c link-canary.c md5_common.c
 	  pg_get_line.c pg_lzcompress.c pgfnames.c psprintf.c relpath.c rmtree.c
 	  saslprep.c scram-common.c string.c stringinfo.c unicode_norm.c username.c
-- 
2.30.0

#16Bruce Momjian
bruce@momjian.us
In reply to: Michael Paquier (#15)
Re: Moving other hex functions to /common

On Tue, Jan 12, 2021 at 11:26:51AM +0900, Michael Paquier wrote:

The two only things that were not present are the set of checks for
overflows, and the adjustments for varlena.c. The first point makes
the code of encode.c safer, as previously the code would issue a FATAL
*after* writing out-of-bound data. Now it issues an ERROR before any
overwrite is done, and I have added some assertions as an extra safety
net. For the second, I think that it makes the allocation pattern
easier to follow, similarly to checksum manifests.

Thanks for you work on this. Looks good. I posted your patch under my
key management patch and the cfbot reports all green:

http://cfbot.cputube.org/index.html

The key management patch calls the src/common hex functions from
src/backend/crypto, pg_alterckey, and the crypto tests, and these are
all tested by make check-world.

--
Bruce Momjian <bruce@momjian.us> https://momjian.us
EnterpriseDB https://enterprisedb.com

The usefulness of a cup is in its emptiness, Bruce Lee

#17Michael Paquier
michael@paquier.xyz
In reply to: Bruce Momjian (#16)
Re: Moving other hex functions to /common

On Tue, Jan 12, 2021 at 01:13:00PM -0500, Bruce Momjian wrote:

Thanks for you work on this. Looks good.

I have been looking again at this patch again for a couple of hours
this morning to double-check if I have not missed anything, and I
think that we should be in good shape. This still needs a pgindent
run but I'll take care of it. Let's wait a bit and see if others have
any comments or objections. If there is nothing, I'll commit what we
have here.

I posted your patch under my key management patch and the cfbot
reports all green:

http://cfbot.cputube.org/index.html

The key management patch calls the src/common hex functions from
src/backend/crypto, pg_alterckey, and the crypto tests, and these are
all tested by make check-world.

Ah, OK. Nice.
--
Michael

#18Sehrope Sarkuni
sehrope@jackdb.com
In reply to: Michael Paquier (#17)
Re: Moving other hex functions to /common

The length functions in src/common/hex.c should cast srclen to uint64 prior
to the shift. The current hex_enc_len(...) in encode.c performs such a
cast.

diff --git a/src/common/hex.c b/src/common/hex.c
index 0123c69697..e87aa1fd7f 100644
--- a/src/common/hex.c
+++ b/src/common/hex.c
@@ -178,7 +178,7 @@ pg_hex_decode(const char *src, size_t srclen, char
*dst, size_t dstlen)
 uint64
 pg_hex_enc_len(size_t srclen)
 {
-       return (srclen << 1);
+       return (uint64) srclen << 1;
 }

/*
@@ -192,5 +192,5 @@ pg_hex_enc_len(size_t srclen)
uint64
pg_hex_dec_len(size_t srclen)
{
- return (srclen >> 1);
+ return (uint64) srclen >> 1;
}

Regards,
-- Sehrope Sarkuni
Founder & CEO | JackDB, Inc. | https://www.jackdb.com/

#19Michael Paquier
michael@paquier.xyz
In reply to: Sehrope Sarkuni (#18)
Re: Moving other hex functions to /common

On Wed, Jan 13, 2021 at 10:00:49AM -0500, Sehrope Sarkuni wrote:

The length functions in src/common/hex.c should cast srclen to uint64 prior
to the shift. The current hex_enc_len(...) in encode.c performs such a
cast.

Thanks, Sehrope. I have reviewed the code this morning and fixed
that, adjusted a couple of elog() strings I found inconsistent after
review and ran pgindent. And applied it.
--
Michael

#20Bruce Momjian
bruce@momjian.us
In reply to: Michael Paquier (#19)
Re: Moving other hex functions to /common

On Thu, Jan 14, 2021 at 11:17:40AM +0900, Michael Paquier wrote:

On Wed, Jan 13, 2021 at 10:00:49AM -0500, Sehrope Sarkuni wrote:

The length functions in src/common/hex.c should cast srclen to uint64 prior
to the shift. The current hex_enc_len(...) in encode.c performs such a
cast.

Thanks, Sehrope. I have reviewed the code this morning and fixed
that, adjusted a couple of elog() strings I found inconsistent after
review and ran pgindent. And applied it.

Thanks. All my key management regression tests pass on top of your
applied patch.

--
Bruce Momjian <bruce@momjian.us> https://momjian.us
EnterpriseDB https://enterprisedb.com

The usefulness of a cup is in its emptiness, Bruce Lee

#21Tom Lane
tgl@sss.pgh.pa.us
In reply to: Bruce Momjian (#20)
Re: Moving other hex functions to /common

Bruce Momjian <bruce@momjian.us> writes:

On Thu, Jan 14, 2021 at 11:17:40AM +0900, Michael Paquier wrote:

Thanks, Sehrope. I have reviewed the code this morning and fixed
that, adjusted a couple of elog() strings I found inconsistent after
review and ran pgindent. And applied it.

Thanks. All my key management regression tests pass on top of your
applied patch.

Should the CF entry for this be closed? The cfbot is still trying to
apply the patch, and unsurprisingly failing.

regards, tom lane

#22Michael Paquier
michael@paquier.xyz
In reply to: Tom Lane (#21)
Re: Moving other hex functions to /common

On Wed, Jan 13, 2021 at 10:11:54PM -0500, Tom Lane wrote:

Should the CF entry for this be closed? The cfbot is still trying to
apply the patch, and unsurprisingly failing.

Yes, I was going to close it once I was sure that the buildfarm had
nothing to say. Done now.
--
Michael