From e25090646f6a0ca619bb9faaa7ff6a330c277b0f Mon Sep 17 00:00:00 2001
From: Justin Pryzby <pryzbyj@telsasoft.com>
Date: Fri, 12 Mar 2021 14:43:53 -0600
Subject: [PATCH v9 7/9] add wal_compression_method: zstd

TODO: 9ca40dcd4d0cad43d95a9a253fafaa9a9ba7de24
---
 configure                                     | 217 ++++++++++++++++++
 configure.ac                                  |  33 +++
 doc/src/sgml/config.sgml                      |   2 +-
 doc/src/sgml/installation.sgml                |  19 ++
 src/backend/access/transam/xloginsert.c       |  18 +-
 src/backend/access/transam/xlogreader.c       |  18 ++
 src/backend/utils/misc/postgresql.conf.sample |   2 +-
 src/include/access/xlog_internal.h            |   1 +
 src/include/access/xlogrecord.h               |   5 +-
 src/include/pg_config.h.in                    |   3 +
 src/tools/msvc/Solution.pm                    |   1 +
 src/tools/msvc/config_default.pl              |   1 +
 12 files changed, 315 insertions(+), 5 deletions(-)

diff --git a/configure b/configure
index 7038b0727c..72bbd719dc 100755
--- a/configure
+++ b/configure
@@ -699,6 +699,9 @@ with_gnu_ld
 LD
 LDFLAGS_SL
 LDFLAGS_EX
+ZSTD_LIBS
+ZSTD_CFLAGS
+with_zstd
 LZ4_LIBS
 LZ4_CFLAGS
 with_lz4
@@ -868,6 +871,7 @@ with_libxslt
 with_system_tzdata
 with_zlib
 with_lz4
+with_zstd
 with_gnu_ld
 with_ssl
 with_openssl
@@ -897,6 +901,8 @@ XML2_CFLAGS
 XML2_LIBS
 LZ4_CFLAGS
 LZ4_LIBS
+ZSTD_CFLAGS
+ZSTD_LIBS
 LDFLAGS_EX
 LDFLAGS_SL
 PERL
@@ -1576,6 +1582,7 @@ Optional Packages:
                           use system time zone data in DIR
   --without-zlib          do not use Zlib
   --without-lz4           build without LZ4 support
+  --with-zstd             build with Zstd compression library
   --with-gnu-ld           assume the C compiler uses GNU ld [default=no]
   --with-ssl=LIB          use LIB for SSL/TLS support (openssl)
   --with-openssl          obsolete spelling of --with-ssl=openssl
@@ -1605,6 +1612,8 @@ Some influential environment variables:
   XML2_LIBS   linker flags for XML2, overriding pkg-config
   LZ4_CFLAGS  C compiler flags for LZ4, overriding pkg-config
   LZ4_LIBS    linker flags for LZ4, overriding pkg-config
+  ZSTD_CFLAGS C compiler flags for ZSTD, overriding pkg-config
+  ZSTD_LIBS   linker flags for ZSTD, overriding pkg-config
   LDFLAGS_EX  extra linker flags for linking executables only
   LDFLAGS_SL  extra linker flags for linking shared libraries only
   PERL        Perl program
@@ -8715,6 +8724,147 @@ fi
   done
 fi
 
+#
+# ZSTD
+#
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether to build with zstd support" >&5
+$as_echo_n "checking whether to build with zstd support... " >&6; }
+
+
+
+# Check whether --with-zstd was given.
+if test "${with_zstd+set}" = set; then :
+  withval=$with_zstd;
+  case $withval in
+    yes)
+
+$as_echo "#define USE_ZSTD 1" >>confdefs.h
+
+      ;;
+    no)
+      :
+      ;;
+    *)
+      as_fn_error $? "no argument expected for --with-zstd option" "$LINENO" 5
+      ;;
+  esac
+
+else
+  with_zstd=no
+
+fi
+
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $with_zstd" >&5
+$as_echo "$with_zstd" >&6; }
+
+
+if test "$with_zstd" = yes; then
+
+pkg_failed=no
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for libzstd" >&5
+$as_echo_n "checking for libzstd... " >&6; }
+
+if test -n "$ZSTD_CFLAGS"; then
+    pkg_cv_ZSTD_CFLAGS="$ZSTD_CFLAGS"
+ elif test -n "$PKG_CONFIG"; then
+    if test -n "$PKG_CONFIG" && \
+    { { $as_echo "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"libzstd\""; } >&5
+  ($PKG_CONFIG --exists --print-errors "libzstd") 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; then
+  pkg_cv_ZSTD_CFLAGS=`$PKG_CONFIG --cflags "libzstd" 2>/dev/null`
+		      test "x$?" != "x0" && pkg_failed=yes
+else
+  pkg_failed=yes
+fi
+ else
+    pkg_failed=untried
+fi
+if test -n "$ZSTD_LIBS"; then
+    pkg_cv_ZSTD_LIBS="$ZSTD_LIBS"
+ elif test -n "$PKG_CONFIG"; then
+    if test -n "$PKG_CONFIG" && \
+    { { $as_echo "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"libzstd\""; } >&5
+  ($PKG_CONFIG --exists --print-errors "libzstd") 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; then
+  pkg_cv_ZSTD_LIBS=`$PKG_CONFIG --libs "libzstd" 2>/dev/null`
+		      test "x$?" != "x0" && pkg_failed=yes
+else
+  pkg_failed=yes
+fi
+ else
+    pkg_failed=untried
+fi
+
+
+
+if test $pkg_failed = yes; then
+        { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+
+if $PKG_CONFIG --atleast-pkgconfig-version 0.20; then
+        _pkg_short_errors_supported=yes
+else
+        _pkg_short_errors_supported=no
+fi
+        if test $_pkg_short_errors_supported = yes; then
+	        ZSTD_PKG_ERRORS=`$PKG_CONFIG --short-errors --print-errors --cflags --libs "libzstd" 2>&1`
+        else
+	        ZSTD_PKG_ERRORS=`$PKG_CONFIG --print-errors --cflags --libs "libzstd" 2>&1`
+        fi
+	# Put the nasty error message in config.log where it belongs
+	echo "$ZSTD_PKG_ERRORS" >&5
+
+	as_fn_error $? "Package requirements (libzstd) were not met:
+
+$ZSTD_PKG_ERRORS
+
+Consider adjusting the PKG_CONFIG_PATH environment variable if you
+installed software in a non-standard prefix.
+
+Alternatively, you may set the environment variables ZSTD_CFLAGS
+and ZSTD_LIBS to avoid the need to call pkg-config.
+See the pkg-config man page for more details." "$LINENO" 5
+elif test $pkg_failed = untried; then
+        { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+	{ { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
+$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
+as_fn_error $? "The pkg-config script could not be found or is too old.  Make sure it
+is in your PATH or set the PKG_CONFIG environment variable to the full
+path to pkg-config.
+
+Alternatively, you may set the environment variables ZSTD_CFLAGS
+and ZSTD_LIBS to avoid the need to call pkg-config.
+See the pkg-config man page for more details.
+
+To get pkg-config, see <http://pkg-config.freedesktop.org/>.
+See \`config.log' for more details" "$LINENO" 5; }
+else
+	ZSTD_CFLAGS=$pkg_cv_ZSTD_CFLAGS
+	ZSTD_LIBS=$pkg_cv_ZSTD_LIBS
+        { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+$as_echo "yes" >&6; }
+
+fi
+  # We only care about -I, -D, and -L switches;
+  # note that -lzstd will be added by AC_CHECK_LIB below.
+  for pgac_option in $ZSTD_CFLAGS; do
+    case $pgac_option in
+      -I*|-D*) CPPFLAGS="$CPPFLAGS $pgac_option";;
+    esac
+  done
+  for pgac_option in $ZSTD_LIBS; do
+    case $pgac_option in
+      -L*) LDFLAGS="$LDFLAGS $pgac_option";;
+    esac
+  done
+fi
+
 #
 # Assignments
 #
@@ -12878,6 +13028,56 @@ fi
 
 fi
 
+if test "$with_zstd" = yes ; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for ZSTD_compress in -lzstd" >&5
+$as_echo_n "checking for ZSTD_compress in -lzstd... " >&6; }
+if ${ac_cv_lib_zstd_ZSTD_compress+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  ac_check_lib_save_LIBS=$LIBS
+LIBS="-lzstd  $LIBS"
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char ZSTD_compress ();
+int
+main ()
+{
+return ZSTD_compress ();
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"; then :
+  ac_cv_lib_zstd_ZSTD_compress=yes
+else
+  ac_cv_lib_zstd_ZSTD_compress=no
+fi
+rm -f core conftest.err conftest.$ac_objext \
+    conftest$ac_exeext conftest.$ac_ext
+LIBS=$ac_check_lib_save_LIBS
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_zstd_ZSTD_compress" >&5
+$as_echo "$ac_cv_lib_zstd_ZSTD_compress" >&6; }
+if test "x$ac_cv_lib_zstd_ZSTD_compress" = xyes; then :
+  cat >>confdefs.h <<_ACEOF
+#define HAVE_LIBZSTD 1
+_ACEOF
+
+  LIBS="-lzstd $LIBS"
+
+else
+  as_fn_error $? "library 'zstd' is required for ZSTD support" "$LINENO" 5
+fi
+
+fi
+
 # Note: We can test for libldap_r only after we know PTHREAD_LIBS
 if test "$with_ldap" = yes ; then
   _LIBS="$LIBS"
@@ -13600,6 +13800,23 @@ done
 
 fi
 
+if test "$with_zstd" = yes; then
+  for ac_header in zstd.h
+do :
+  ac_fn_c_check_header_mongrel "$LINENO" "zstd.h" "ac_cv_header_zstd_h" "$ac_includes_default"
+if test "x$ac_cv_header_zstd_h" = xyes; then :
+  cat >>confdefs.h <<_ACEOF
+#define HAVE_ZSTD_H 1
+_ACEOF
+
+else
+  as_fn_error $? "zstd.h header file is required for zstd" "$LINENO" 5
+fi
+
+done
+
+fi
+
 if test "$with_gssapi" = yes ; then
   for ac_header in gssapi/gssapi.h
 do :
diff --git a/configure.ac b/configure.ac
index cb0261f179..c348a3ee91 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1011,6 +1011,31 @@ if test "$with_lz4" = yes; then
   done
 fi
 
+#
+# ZSTD
+#
+AC_MSG_CHECKING([whether to build with zstd support])
+PGAC_ARG_BOOL(with, zstd, no, [build with Zstd compression library],
+              [AC_DEFINE([USE_ZSTD], 1, [Define to 1 to build with zstd support. (--with-zstd)])])
+AC_MSG_RESULT([$with_zstd])
+AC_SUBST(with_zstd)
+
+if test "$with_zstd" = yes; then
+  PKG_CHECK_MODULES(ZSTD, libzstd)
+  # We only care about -I, -D, and -L switches;
+  # note that -lzstd will be added by AC_CHECK_LIB below.
+  for pgac_option in $ZSTD_CFLAGS; do
+    case $pgac_option in
+      -I*|-D*) CPPFLAGS="$CPPFLAGS $pgac_option";;
+    esac
+  done
+  for pgac_option in $ZSTD_LIBS; do
+    case $pgac_option in
+      -L*) LDFLAGS="$LDFLAGS $pgac_option";;
+    esac
+  done
+fi
+
 #
 # Assignments
 #
@@ -1285,6 +1310,10 @@ if test "$with_lz4" = yes ; then
   AC_CHECK_LIB(lz4, LZ4_compress_default, [], [AC_MSG_ERROR([library 'lz4' is required for LZ4 support])])
 fi
 
+if test "$with_zstd" = yes ; then
+  AC_CHECK_LIB(zstd, ZSTD_compress, [], [AC_MSG_ERROR([library 'zstd' is required for ZSTD support])])
+fi
+
 # Note: We can test for libldap_r only after we know PTHREAD_LIBS
 if test "$with_ldap" = yes ; then
   _LIBS="$LIBS"
@@ -1443,6 +1472,10 @@ if test "$with_lz4" = yes; then
   AC_CHECK_HEADERS(lz4.h, [], [AC_MSG_ERROR([lz4.h header file is required for LZ4])])
 fi
 
+if test "$with_zstd" = yes; then
+  AC_CHECK_HEADERS(zstd.h, [], [AC_MSG_ERROR([zstd.h header file is required for zstd])])
+fi
+
 if test "$with_gssapi" = yes ; then
   AC_CHECK_HEADERS(gssapi/gssapi.h, [],
 	[AC_CHECK_HEADERS(gssapi.h, [], [AC_MSG_ERROR([gssapi.h header file is required for GSSAPI])])])
diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml
index df5ff70d91..ee4c44fb7f 100644
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ -3141,7 +3141,7 @@ include_dir 'conf.d'
         <xref linkend="guc-full-page-writes"/> is on or during a base backup.
         A compressed page image will be decompressed during WAL replay.
         The supported methods are pglz, zlib, and (if configured when
-        <productname>PostgreSQL</productname> was built) lz4.
+        <productname>PostgreSQL</productname> was built) lz4 and zstd.
         The default value is <literal>off</literal>.
         Only superusers can change this setting.
        </para>
diff --git a/doc/src/sgml/installation.sgml b/doc/src/sgml/installation.sgml
index c7673a4dc8..3e985bbd05 100644
--- a/doc/src/sgml/installation.sgml
+++ b/doc/src/sgml/installation.sgml
@@ -275,6 +275,14 @@ su - postgres
      </para>
     </listitem>
 
+    <listitem>
+     <para>
+      The <productname>ZSTD</productname> library can be used to enable
+      compression using that method; see
+      <xref linkend="guc-wal-compression"/>.
+     </para>
+    </listitem>
+
     <listitem>
      <para>
       To build the <productname>PostgreSQL</productname> documentation,
@@ -986,6 +994,17 @@ build-postgresql:
        </listitem>
       </varlistentry>
 
+      <varlistentry>
+       <term><option>--with-zstd</option></term>
+       <listitem>
+        <para>
+         Build with <productname>ZSTD</productname> compression support.
+         This enables use of <productname>ZSTD</productname> for
+         compression of WAL data.
+        </para>
+       </listitem>
+      </varlistentry>
+
       <varlistentry>
        <term><option>--with-ssl=<replaceable>LIBRARY</replaceable></option>
        <indexterm>
diff --git a/src/backend/access/transam/xloginsert.c b/src/backend/access/transam/xloginsert.c
index a8794a941a..96f497d5d6 100644
--- a/src/backend/access/transam/xloginsert.c
+++ b/src/backend/access/transam/xloginsert.c
@@ -48,10 +48,17 @@
 #define LZ4_MAX_BLCKSZ		0
 #endif
 
+#ifdef USE_ZSTD
+#include "zstd.h"
+#define ZSTD_MAX_BLCKSZ		ZSTD_COMPRESSBOUND(BLCKSZ)
+#else
+#define ZSTD_MAX_BLCKSZ		0
+#endif
+
 /* Buffer size required to store a compressed version of backup block image */
 #define PGLZ_MAX_BLCKSZ		PGLZ_MAX_OUTPUT(BLCKSZ)
 
-#define COMPRESS_BUFSIZE	Max(Max(PGLZ_MAX_BLCKSZ, ZLIB_MAX_BLCKSZ), LZ4_MAX_BLCKSZ)
+#define COMPRESS_BUFSIZE	Max(Max(Max(PGLZ_MAX_BLCKSZ, ZLIB_MAX_BLCKSZ), LZ4_MAX_BLCKSZ), ZSTD_MAX_BLCKSZ)
 
 /*
  * For each block reference registered with XLogRegisterBuffer, we fill in
@@ -904,6 +911,15 @@ XLogCompressBackupBlock(char *page, uint16 hole_offset, uint16 hole_length,
 		break;
 #endif
 
+#ifdef USE_ZSTD
+	case WAL_COMPRESSION_ZSTD:
+		len = ZSTD_compress(dest, COMPRESS_BUFSIZE, source, orig_len,
+				ZSTD_CLEVEL_DEFAULT);
+		if (ZSTD_isError(len))
+			len = -1;
+		break;
+#endif
+
 	default:
 		/*
 		 * It should be impossible to get here for unsupported algorithms,
diff --git a/src/backend/access/transam/xlogreader.c b/src/backend/access/transam/xlogreader.c
index e44817fece..1b13d1f660 100644
--- a/src/backend/access/transam/xlogreader.c
+++ b/src/backend/access/transam/xlogreader.c
@@ -42,6 +42,10 @@
 #include "lz4.h"
 #endif
 
+#ifdef USE_ZSTD
+#include "zstd.h"
+#endif
+
 static void report_invalid_record(XLogReaderState *state, const char *fmt,...)
 			pg_attribute_printf(2, 3);
 static bool allocate_recordbuf(XLogReaderState *state, uint32 reclength);
@@ -83,6 +87,10 @@ const struct config_enum_entry wal_compression_options[] = {
 	{"lz4", WAL_COMPRESSION_LZ4, false},
 #endif
 
+#ifdef  USE_ZSTD
+	{"zstd", WAL_COMPRESSION_ZSTD, false},
+#endif
+
 	{NULL, 0, false}
 };
 
@@ -1639,6 +1647,16 @@ RestoreBlockImage(XLogReaderState *record, uint8 block_id, char *page)
 			break;
 #endif
 
+#ifdef USE_ZSTD
+		case WAL_COMPRESSION_ZSTD:
+			decomp_result = ZSTD_decompress(tmp.data, BLCKSZ-bkpb->hole_length,
+					ptr, bkpb->bimg_len);
+			// XXX: ZSTD_getErrorName
+			if (ZSTD_isError(decomp_result))
+				decomp_result = -1;
+			break;
+#endif
+
 		default:
 			report_invalid_record(record, "image at %X/%X is compressed with unsupported codec, block %d (%d/%s)",
 								  (uint32) (record->ReadRecPtr >> 32),
diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample
index 728acef953..818b26faad 100644
--- a/src/backend/utils/misc/postgresql.conf.sample
+++ b/src/backend/utils/misc/postgresql.conf.sample
@@ -218,7 +218,7 @@
 #full_page_writes = on			# recover from partial page writes
 #wal_log_hints = off			# also do full page writes of non-critical updates
 					# (change requires restart)
-#wal_compression = off			# enable compression of full-page writes: off, pglz, zlib, lz4
+#wal_compression = off			# enable compression of full-page writes: off, pglz, zlib, lz4, zstd
 #wal_init_zero = on			# zero-fill new WAL files
 #wal_recycle = on			# recycle WAL files
 #wal_buffers = -1			# min 32kB, -1 sets based on shared_buffers
diff --git a/src/include/access/xlog_internal.h b/src/include/access/xlog_internal.h
index 0287592cd4..1da965a708 100644
--- a/src/include/access/xlog_internal.h
+++ b/src/include/access/xlog_internal.h
@@ -331,6 +331,7 @@ typedef enum WalCompression
 	WAL_COMPRESSION_PGLZ,
 	WAL_COMPRESSION_ZLIB,
 	WAL_COMPRESSION_LZ4,
+	WAL_COMPRESSION_ZSTD,
 } WalCompression;
 
 extern const char *wal_compression_name(WalCompression compression);
diff --git a/src/include/access/xlogrecord.h b/src/include/access/xlogrecord.h
index abb42b364d..84ffb17596 100644
--- a/src/include/access/xlogrecord.h
+++ b/src/include/access/xlogrecord.h
@@ -147,12 +147,13 @@ typedef struct XLogRecordBlockImageHeader
 #define	BKPIMAGE_APPLY		0x02	/* page image should be restored during
 									 * replay */
 #define BKPIMAGE_COMPRESS_METHOD1	0x04	/* bits to encode compression method */
-#define BKPIMAGE_COMPRESS_METHOD2	0x08	/* 0=none, 1=pglz, 2=zlib, 3=lz4 */
+#define BKPIMAGE_COMPRESS_METHOD2	0x08	/* 0=none, 1=pglz, 2=zlib, 3=lz4, 4=zstd */
+#define BKPIMAGE_COMPRESS_METHOD3	0x10
 
 /* How many bits to shift to extract compression */
 #define	BKPIMAGE_COMPRESS_OFFSET_BITS	2
 /* How many bits are for compression */
-#define	BKPIMAGE_COMPRESS_BITS			2
+#define	BKPIMAGE_COMPRESS_BITS			3
 /* Extract the compression from the bimg_info */
 #define	BKPIMAGE_COMPRESSION(info)		((info >> BKPIMAGE_COMPRESS_OFFSET_BITS) & ((1<<BKPIMAGE_COMPRESS_BITS) - 1))
 #define	BKPIMAGE_IS_COMPRESSED(info)	(BKPIMAGE_COMPRESSION(info) != 0)
diff --git a/src/include/pg_config.h.in b/src/include/pg_config.h.in
index 783b8fc1ba..bb44ef2a9d 100644
--- a/src/include/pg_config.h.in
+++ b/src/include/pg_config.h.in
@@ -917,6 +917,9 @@
 /* Define to 1 to build with LZ4 support. (--with-lz4) */
 #undef USE_LZ4
 
+/* Define to 1 if you have the `zstd' library (-lzstd). */
+#undef USE_ZSTD
+
 /* Define to select named POSIX semaphores. */
 #undef USE_NAMED_POSIX_SEMAPHORES
 
diff --git a/src/tools/msvc/Solution.pm b/src/tools/msvc/Solution.pm
index a7b8f720b5..28ff10f09f 100644
--- a/src/tools/msvc/Solution.pm
+++ b/src/tools/msvc/Solution.pm
@@ -494,6 +494,7 @@ sub GenerateFiles
 		USE_LIBXML                 => undef,
 		USE_LIBXSLT                => undef,
 		USE_LZ4                    => undef,
+		USE_ZSTD                   => $self->{options}->{zstd} ? 1 : undef,
 		USE_LDAP                   => $self->{options}->{ldap} ? 1 : undef,
 		USE_LLVM                   => undef,
 		USE_NAMED_POSIX_SEMAPHORES => undef,
diff --git a/src/tools/msvc/config_default.pl b/src/tools/msvc/config_default.pl
index 460c0375d4..b8a1aac3c2 100644
--- a/src/tools/msvc/config_default.pl
+++ b/src/tools/msvc/config_default.pl
@@ -26,6 +26,7 @@ our $config = {
 	xslt      => undef,    # --with-libxslt=<path>
 	iconv     => undef,    # (not in configure, path to iconv)
 	zlib      => undef     # --with-zlib=<path>
+	zstd      => undef     # --with-zstd=<path>
 };
 
 1;
-- 
2.17.0

