From 74124b8d69e5fbe632fd51bff0effec81ebdc806 Mon Sep 17 00:00:00 2001
From: Justin Pryzby <pryzbyj@telsasoft.com>
Date: Sun, 27 Mar 2022 11:55:01 -0500
Subject: [PATCH 4/5] basebackup: support -Z zstd:long

---
 doc/src/sgml/protocol.sgml                | 10 +++++++++-
 doc/src/sgml/ref/pg_basebackup.sgml       |  4 ++--
 src/backend/replication/basebackup_zstd.c | 21 +++++++++++++++++++++
 src/bin/pg_basebackup/bbstreamer_zstd.c   | 13 +++++++++++++
 src/common/backup_compression.c           |  5 +++++
 src/include/common/backup_compression.h   |  2 ++
 6 files changed, 52 insertions(+), 3 deletions(-)

diff --git a/doc/src/sgml/protocol.sgml b/doc/src/sgml/protocol.sgml
index 98f0bc3cc34..80f1a1f9a04 100644
--- a/doc/src/sgml/protocol.sgml
+++ b/doc/src/sgml/protocol.sgml
@@ -2740,7 +2740,8 @@ The commands accepted in replication mode are:
           level.  Otherwise, it should be a comma-separated list of items,
           each of the form <literal>keyword</literal> or
           <literal>keyword=value</literal>. Currently, the supported keywords
-          are <literal>level</literal> and <literal>workers</literal>.
+          are <literal>level</literal>, <literal>long</literal>, and
+          <literal>workers</literal>.
         </para>
 
         <para>
@@ -2751,6 +2752,13 @@ The commands accepted in replication mode are:
           between 1 and 22.
          </para>
 
+        <para>
+          The <literal>long</literal> keyword enables long-distance matching
+          mode, for improved compression ratio, at the expense of higher memory
+          use.  Long-distance mode is supported only for
+          <literal>zstd</literal>.
+         </para>
+
         <para>
           The <literal>workers</literal> keyword sets the number of threads
           that should be used for parallel compression. Parallel compression
diff --git a/doc/src/sgml/ref/pg_basebackup.sgml b/doc/src/sgml/ref/pg_basebackup.sgml
index 82f5f606250..014c454bfab 100644
--- a/doc/src/sgml/ref/pg_basebackup.sgml
+++ b/doc/src/sgml/ref/pg_basebackup.sgml
@@ -424,8 +424,8 @@ PostgreSQL documentation
         integer, it specifies the compression level.  Otherwise, it should be
         a comma-separated list of items, each of the form
         <literal>keyword</literal> or <literal>keyword=value</literal>.
-        Currently, the supported keywords are <literal>level</literal>
-        and <literal>workers</literal>.
+        Currently, the supported keywords are <literal>level</literal>,
+        <literal>long</literal>, and <literal>workers</literal>.
        </para>
        <para>
         If no compression level is specified, the default compression level
diff --git a/src/backend/replication/basebackup_zstd.c b/src/backend/replication/basebackup_zstd.c
index a112d6e181e..b900604f59f 100644
--- a/src/backend/replication/basebackup_zstd.c
+++ b/src/backend/replication/basebackup_zstd.c
@@ -31,6 +31,9 @@ typedef struct bbsink_zstd
 	/* Number of parallel workers. */
 	int			workers;
 
+	/* Flags */
+	bool		zstd_long;
+
 	ZSTD_CCtx  *cctx;
 	ZSTD_outBuffer zstd_outBuf;
 } bbsink_zstd;
@@ -72,6 +75,7 @@ bbsink_zstd_new(bbsink *next, bc_specification *compress)
 	bbsink_zstd *sink;
 	int		compresslevel;
 	int		workers;
+	bool	zstd_long;
 
 	Assert(next != NULL);
 
@@ -88,11 +92,15 @@ bbsink_zstd_new(bbsink *next, bc_specification *compress)
 	else
 		workers = 0;
 
+	zstd_long = (compress->options & BACKUP_COMPRESSION_OPTION_ZSTD_LONG) ?
+				compress->zstd_long : false;
+
 	sink = palloc0(sizeof(bbsink_zstd));
 	*((const bbsink_ops **) &sink->base.bbs_ops) = &bbsink_zstd_ops;
 	sink->base.bbs_next = next;
 	sink->compresslevel = compresslevel;
 	sink->workers = workers;
+	sink->zstd_long = zstd_long;
 
 	return &sink->base;
 #endif
@@ -131,6 +139,19 @@ bbsink_zstd_begin_backup(bbsink *sink)
 						   mysink->workers, ZSTD_getErrorName(ret)));
 	}
 
+	if (mysink->zstd_long)
+	{
+		ret = ZSTD_CCtx_setParameter(mysink->cctx,
+									 ZSTD_c_enableLongDistanceMatching,
+									 mysink->zstd_long);
+		fprintf(stderr, "setting LDM %d\n", ret);
+		if (ZSTD_isError(ret))
+			ereport(ERROR,
+					errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+					errmsg("could not set compression flag for %s: %s",
+						   "long", ZSTD_getErrorName(ret)));
+	}
+
 	/*
 	 * We need our own buffer, because we're going to pass different data to
 	 * the next sink than what gets passed to us.
diff --git a/src/bin/pg_basebackup/bbstreamer_zstd.c b/src/bin/pg_basebackup/bbstreamer_zstd.c
index 678af73e6f0..3c7396a1373 100644
--- a/src/bin/pg_basebackup/bbstreamer_zstd.c
+++ b/src/bin/pg_basebackup/bbstreamer_zstd.c
@@ -114,6 +114,19 @@ bbstreamer_zstd_compressor_new(bbstreamer *next, bc_specification *compress)
 		}
 	}
 
+	if (compress->zstd_long)
+	{
+		ret = ZSTD_CCtx_setParameter(streamer->cctx,
+									 ZSTD_c_enableLongDistanceMatching,
+									 compress->zstd_long);
+		if (ZSTD_isError(ret))
+		{
+			pg_log_error("could not set compression flag for %s: %s",
+						 "long", ZSTD_getErrorName(ret));
+			exit(1);
+		}
+	}
+
 	/* Initialize the ZSTD output buffer. */
 	streamer->zstd_outBuf.dst = streamer->base.bbs_buffer.data;
 	streamer->zstd_outBuf.size = streamer->base.bbs_buffer.maxlen;
diff --git a/src/common/backup_compression.c b/src/common/backup_compression.c
index 969e08cca20..f43a5608e65 100644
--- a/src/common/backup_compression.c
+++ b/src/common/backup_compression.c
@@ -182,6 +182,11 @@ parse_bc_specification(bc_algorithm algorithm, char *specification,
 			result->workers = expect_integer_value(keyword, value, result);
 			result->options |= BACKUP_COMPRESSION_OPTION_WORKERS;
 		}
+		else if (strcmp(keyword, "long") == 0)
+		{
+			result->zstd_long = expect_integer_value(keyword, value, result); // XXX: expect_bool?
+			result->options |= BACKUP_COMPRESSION_OPTION_ZSTD_LONG;
+		}
 		else
 			result->parse_error =
 				psprintf(_("unknown compression option \"%s\""), keyword);
diff --git a/src/include/common/backup_compression.h b/src/include/common/backup_compression.h
index 6a0ecaa99c9..a378631a8da 100644
--- a/src/include/common/backup_compression.h
+++ b/src/include/common/backup_compression.h
@@ -24,6 +24,7 @@ typedef enum bc_algorithm
 
 #define	BACKUP_COMPRESSION_OPTION_LEVEL			(1 << 0)
 #define BACKUP_COMPRESSION_OPTION_WORKERS		(1 << 1)
+#define BACKUP_COMPRESSION_OPTION_ZSTD_LONG		(1 << 2)
 
 typedef struct bc_specification
 {
@@ -31,6 +32,7 @@ typedef struct bc_specification
 	unsigned	options;		/* OR of BACKUP_COMPRESSION_OPTION constants */
 	int			level;
 	int			workers;
+	int			zstd_long;
 	char	   *parse_error;	/* NULL if parsing was OK, else message */
 } bc_specification;
 
-- 
2.17.1

