From f0e5ee4d78dce6bc4d111b8b574c6b75f546ee4a Mon Sep 17 00:00:00 2001
From: Justin Pryzby <pryzbyj@telsasoft.com>
Date: Sun, 27 Mar 2022 11:55:01 -0500
Subject: [PATCH 3/4] basebackup: support -Z zstd:long

---
 doc/src/sgml/protocol.sgml                | 10 +++++-
 doc/src/sgml/ref/pg_basebackup.sgml       |  4 +--
 src/backend/replication/basebackup_zstd.c | 12 +++++++
 src/bin/pg_basebackup/bbstreamer_zstd.c   | 13 +++++++
 src/common/backup_compression.c           | 44 +++++++++++++++++++++++
 src/include/common/backup_compression.h   |  2 ++
 6 files changed, 82 insertions(+), 3 deletions(-)

diff --git a/doc/src/sgml/protocol.sgml b/doc/src/sgml/protocol.sgml
index 98f0bc3cc34..80f1a1f9a04 100644
--- a/doc/src/sgml/protocol.sgml
+++ b/doc/src/sgml/protocol.sgml
@@ -2740,7 +2740,8 @@ The commands accepted in replication mode are:
           level.  Otherwise, it should be a comma-separated list of items,
           each of the form <literal>keyword</literal> or
           <literal>keyword=value</literal>. Currently, the supported keywords
-          are <literal>level</literal> and <literal>workers</literal>.
+          are <literal>level</literal>, <literal>long</literal>, and
+          <literal>workers</literal>.
         </para>
 
         <para>
@@ -2751,6 +2752,13 @@ The commands accepted in replication mode are:
           between 1 and 22.
          </para>
 
+        <para>
+          The <literal>long</literal> keyword enables long-distance matching
+          mode, for improved compression ratio, at the expense of higher memory
+          use.  Long-distance mode is supported only for
+          <literal>zstd</literal>.
+         </para>
+
         <para>
           The <literal>workers</literal> keyword sets the number of threads
           that should be used for parallel compression. Parallel compression
diff --git a/doc/src/sgml/ref/pg_basebackup.sgml b/doc/src/sgml/ref/pg_basebackup.sgml
index 82f5f606250..014c454bfab 100644
--- a/doc/src/sgml/ref/pg_basebackup.sgml
+++ b/doc/src/sgml/ref/pg_basebackup.sgml
@@ -424,8 +424,8 @@ PostgreSQL documentation
         integer, it specifies the compression level.  Otherwise, it should be
         a comma-separated list of items, each of the form
         <literal>keyword</literal> or <literal>keyword=value</literal>.
-        Currently, the supported keywords are <literal>level</literal>
-        and <literal>workers</literal>.
+        Currently, the supported keywords are <literal>level</literal>,
+        <literal>long</literal>, and <literal>workers</literal>.
        </para>
        <para>
         If no compression level is specified, the default compression level
diff --git a/src/backend/replication/basebackup_zstd.c b/src/backend/replication/basebackup_zstd.c
index f6876f48118..dc23898f7fd 100644
--- a/src/backend/replication/basebackup_zstd.c
+++ b/src/backend/replication/basebackup_zstd.c
@@ -121,6 +121,18 @@ bbsink_zstd_begin_backup(bbsink *sink)
 						   compress->workers, ZSTD_getErrorName(ret)));
 	}
 
+	if ((compress->options & BACKUP_COMPRESSION_OPTION_ZSTD_LONG) != 0)
+	{
+		ret = ZSTD_CCtx_setParameter(mysink->cctx,
+									 ZSTD_c_enableLongDistanceMatching,
+									 compress->zstd_long);
+		if (ZSTD_isError(ret))
+			ereport(ERROR,
+					errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+					errmsg("could not set compression flag for %s: %s",
+						   "long", ZSTD_getErrorName(ret)));
+	}
+
 	/*
 	 * We need our own buffer, because we're going to pass different data to
 	 * the next sink than what gets passed to us.
diff --git a/src/bin/pg_basebackup/bbstreamer_zstd.c b/src/bin/pg_basebackup/bbstreamer_zstd.c
index f94c5c041d3..051b97458ba 100644
--- a/src/bin/pg_basebackup/bbstreamer_zstd.c
+++ b/src/bin/pg_basebackup/bbstreamer_zstd.c
@@ -118,6 +118,19 @@ bbstreamer_zstd_compressor_new(bbstreamer *next, bc_specification *compress)
 		}
 	}
 
+	if ((compress->options & BACKUP_COMPRESSION_OPTION_ZSTD_LONG) != 0)
+	{
+		ret = ZSTD_CCtx_setParameter(streamer->cctx,
+									 ZSTD_c_enableLongDistanceMatching,
+									 compress->zstd_long);
+		if (ZSTD_isError(ret))
+		{
+			pg_log_error("could not set compression flag for %s: %s",
+						 "long", ZSTD_getErrorName(ret));
+			exit(1);
+		}
+	}
+
 	/* Initialize the ZSTD output buffer. */
 	streamer->zstd_outBuf.dst = streamer->base.bbs_buffer.data;
 	streamer->zstd_outBuf.size = streamer->base.bbs_buffer.maxlen;
diff --git a/src/common/backup_compression.c b/src/common/backup_compression.c
index 477dc7eb49b..9fc865ff299 100644
--- a/src/common/backup_compression.c
+++ b/src/common/backup_compression.c
@@ -31,6 +31,8 @@
 
 static int	expect_integer_value(char *keyword, char *value,
 								 bc_specification *result);
+static bool	expect_boolean_value(char *keyword, char *value,
+								 bc_specification *result);
 
 /*
  * Look up a compression algorithm by name. Returns true and sets *algorithm
@@ -182,6 +184,11 @@ parse_bc_specification(bc_algorithm algorithm, char *specification,
 			result->workers = expect_integer_value(keyword, value, result);
 			result->options |= BACKUP_COMPRESSION_OPTION_WORKERS;
 		}
+		else if (strcmp(keyword, "long") == 0)
+		{
+			result->zstd_long = expect_boolean_value(keyword, value, result);
+			result->options |= BACKUP_COMPRESSION_OPTION_ZSTD_LONG;
+		}
 		else
 			result->parse_error =
 				psprintf(_("unknown compression option \"%s\""), keyword);
@@ -235,6 +242,43 @@ expect_integer_value(char *keyword, char *value, bc_specification *result)
 	return ivalue;
 }
 
+/*
+ * Parse 'value' as an boolean and return the result.
+ *
+ * If parsing fails, set result->parse_error to an appropriate message
+ * and return -1.  The caller must check result->parse_error to determine if
+ * the call was successful.
+ *
+ * Valid values are: yes, no, on, off, 1, 0.
+ *
+ * Inspired by ParseVariableBool().
+ */
+static bool
+expect_boolean_value(char *keyword, char *value, bc_specification *result)
+{
+	if (value == NULL)
+		return true;
+
+	if (pg_strcasecmp(value, "yes") == 0)
+		return true;
+	if (pg_strcasecmp(value, "on") == 0)
+		return true;
+	if (pg_strcasecmp(value, "1") == 0)
+		return true;
+
+	if (pg_strcasecmp(value, "no") == 0)
+		return false;
+	if (pg_strcasecmp(value, "off") == 0)
+		return false;
+	if (pg_strcasecmp(value, "0") == 0)
+		return false;
+
+	result->parse_error =
+		psprintf(_("value for compression option \"%s\" must be a boolean"),
+				 keyword);
+	return false;
+}
+
 /*
  * Returns NULL if the compression specification string was syntactically
  * valid and semantically sensible.  Otherwise, returns an error message.
diff --git a/src/include/common/backup_compression.h b/src/include/common/backup_compression.h
index 6a0ecaa99c9..a378631a8da 100644
--- a/src/include/common/backup_compression.h
+++ b/src/include/common/backup_compression.h
@@ -24,6 +24,7 @@ typedef enum bc_algorithm
 
 #define	BACKUP_COMPRESSION_OPTION_LEVEL			(1 << 0)
 #define BACKUP_COMPRESSION_OPTION_WORKERS		(1 << 1)
+#define BACKUP_COMPRESSION_OPTION_ZSTD_LONG		(1 << 2)
 
 typedef struct bc_specification
 {
@@ -31,6 +32,7 @@ typedef struct bc_specification
 	unsigned	options;		/* OR of BACKUP_COMPRESSION_OPTION constants */
 	int			level;
 	int			workers;
+	int			zstd_long;
 	char	   *parse_error;	/* NULL if parsing was OK, else message */
 } bc_specification;
 
-- 
2.17.1

