From af502308192d3f316e493682a36f63733881043b Mon Sep 17 00:00:00 2001
From: Nikita Glukhov <n.gluhov@postgrespro.ru>
Date: Tue, 16 Jan 2018 18:31:23 +0300
Subject: [PATCH 4/9] Add opclass parameters to GiST tsvector_ops

---
 doc/src/sgml/textsearch.sgml          |   9 +-
 src/backend/utils/adt/tsgistidx.c     | 269 +++++++++++++++++++---------------
 src/include/catalog/pg_amproc.dat     |   5 +-
 src/include/catalog/pg_proc.dat       |  19 ++-
 src/test/regress/expected/tsearch.out | 176 ++++++++++++++++++++++
 src/test/regress/sql/tsearch.sql      |  45 ++++++
 6 files changed, 392 insertions(+), 131 deletions(-)

diff --git a/doc/src/sgml/textsearch.sgml b/doc/src/sgml/textsearch.sgml
index ecebade..92abf6e 100644
--- a/doc/src/sgml/textsearch.sgml
+++ b/doc/src/sgml/textsearch.sgml
@@ -3635,7 +3635,7 @@ SELECT plainto_tsquery('supernovae stars');
       <tertiary>text search</tertiary>
      </indexterm>
 
-      <literal>CREATE INDEX <replaceable>name</replaceable> ON <replaceable>table</replaceable> USING GIST (<replaceable>column</replaceable>);</literal>
+      <literal>CREATE INDEX <replaceable>name</replaceable> ON <replaceable>table</replaceable> USING GIST (<replaceable>column</replaceable> [ { DEFAULT | tsvector_ops } (siglen = <replaceable>number</replaceable>) ] );</literal>
      </term>
 
      <listitem>
@@ -3643,6 +3643,8 @@ SELECT plainto_tsquery('supernovae stars');
        Creates a GiST (Generalized Search Tree)-based index.
        The <replaceable>column</replaceable> can be of <type>tsvector</type> or
        <type>tsquery</type> type.
+       Optional integer parameter <literal>siglen</literal> determines
+       signature length in bytes (see below for details).
       </para>
      </listitem>
     </varlistentry>
@@ -3666,7 +3668,10 @@ SELECT plainto_tsquery('supernovae stars');
    to check the actual table row to eliminate such false matches.
    (<productname>PostgreSQL</productname> does this automatically when needed.)
    GiST indexes are lossy because each document is represented in the
-   index by a fixed-length signature. The signature is generated by hashing
+   index by a fixed-length signature.  Signature length in bytes is determined
+   by the value of the optional integer parameter <literal>siglen</literal>.
+   Default signature length (when <literal>siglen</literal> is not specied) is
+   124 bytes, maximal length is 484 bytes. The signature is generated by hashing
    each word into a single bit in an n-bit string, with all these bits OR-ed
    together to produce an n-bit document signature.  When two words hash to
    the same bit position there will be a false match.  If all words in
diff --git a/src/backend/utils/adt/tsgistidx.c b/src/backend/utils/adt/tsgistidx.c
index 2d9ecc4..462791a 100644
--- a/src/backend/utils/adt/tsgistidx.c
+++ b/src/backend/utils/adt/tsgistidx.c
@@ -15,23 +15,29 @@
 #include "postgres.h"
 
 #include "access/gist.h"
+#include "access/reloptions.h"
 #include "access/tuptoaster.h"
 #include "tsearch/ts_utils.h"
 #include "utils/builtins.h"
 #include "utils/pg_crc.h"
 
 
-#define SIGLENINT  31			/* >121 => key will toast, so it will not work
-								 * !!! */
+#define SIGLEN_DEFAULT	(31 * 4)
+#define SIGLEN_MAX		(121 * 4)	/* key will toast, so it will not work !!! */
 
-#define SIGLEN	( sizeof(int32) * SIGLENINT )
-#define SIGLENBIT (SIGLEN * BITS_PER_BYTE)
+#define SIGLENBIT(siglen) ((siglen) * BITS_PER_BYTE)
+
+/* tsvector_ops opclass options */
+typedef struct GistTsVectorOptions
+{
+	int32		vl_len_;		/* varlena header (do not touch directly!) */
+	int			siglen;			/* signature length */
+}	GistTsVectorOptions;
 
-typedef char BITVEC[SIGLEN];
 typedef char *BITVECP;
 
-#define LOOPBYTE \
-			for(i=0;i<SIGLEN;i++)
+#define LOOPBYTE(siglen) \
+			for (i = 0; i < siglen; i++)
 
 #define GETBYTE(x,i) ( *( (BITVECP)(x) + (int)( (i) / BITS_PER_BYTE ) ) )
 #define GETBITBYTE(x,i) ( ((char)(x)) >> (i) & 0x01 )
@@ -39,8 +45,8 @@ typedef char *BITVECP;
 #define SETBIT(x,i)   GETBYTE(x,i) |=  ( 0x01 << ( (i) % BITS_PER_BYTE ) )
 #define GETBIT(x,i) ( (GETBYTE(x,i) >> ( (i) % BITS_PER_BYTE )) & 0x01 )
 
-#define HASHVAL(val) (((unsigned int)(val)) % SIGLENBIT)
-#define HASH(sign, val) SETBIT((sign), HASHVAL(val))
+#define HASHVAL(val, siglen) (((unsigned int)(val)) % SIGLENBIT(siglen))
+#define HASH(sign, val, siglen) SETBIT((sign), HASHVAL(val, siglen))
 
 #define GETENTRY(vec,pos) ((SignTSVector *) DatumGetPointer((vec)->vector[(pos)].key))
 
@@ -64,9 +70,10 @@ typedef struct
 #define ISALLTRUE(x)	( ((SignTSVector*)(x))->flag & ALLISTRUE )
 
 #define GTHDRSIZE	( VARHDRSZ + sizeof(int32) )
-#define CALCGTSIZE(flag, len) ( GTHDRSIZE + ( ( (flag) & ARRKEY ) ? ((len)*sizeof(int32)) : (((flag) & ALLISTRUE) ? 0 : SIGLEN) ) )
+#define CALCGTSIZE(flag, len) ( GTHDRSIZE + ( ( (flag) & ARRKEY ) ? ((len)*sizeof(int32)) : (((flag) & ALLISTRUE) ? 0 : (len)) ) )
 
 #define GETSIGN(x)	( (BITVECP)( (char*)(x)+GTHDRSIZE ) )
+#define GETSIGLEN(x)( VARSIZE(x) - GTHDRSIZE )
 #define GETARR(x)	( (int32*)( (char*)(x)+GTHDRSIZE ) )
 #define ARRNELEM(x) ( ( VARSIZE(x) - GTHDRSIZE )/sizeof(int32) )
 
@@ -90,7 +97,7 @@ static const uint8 number_of_ones[256] = {
 	4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8
 };
 
-static int32 sizebitvec(BITVECP sign);
+static int32 sizebitvec(BITVECP sign, int siglen);
 
 Datum
 gtsvectorin(PG_FUNCTION_ARGS)
@@ -121,9 +128,10 @@ gtsvectorout(PG_FUNCTION_ARGS)
 		sprintf(outbuf, ARROUTSTR, (int) ARRNELEM(key));
 	else
 	{
-		int			cnttrue = (ISALLTRUE(key)) ? SIGLENBIT : sizebitvec(GETSIGN(key));
+		int			siglen = GETSIGLEN(key);
+		int			cnttrue = (ISALLTRUE(key)) ? SIGLENBIT(siglen) : sizebitvec(GETSIGN(key), siglen);
 
-		sprintf(outbuf, SINGOUTSTR, cnttrue, (int) SIGLENBIT - cnttrue);
+		sprintf(outbuf, SINGOUTSTR, cnttrue, (int) SIGLENBIT(siglen) - cnttrue);
 	}
 
 	PG_FREE_IF_COPY(key, 0);
@@ -167,36 +175,49 @@ uniqueint(int32 *a, int32 l)
 }
 
 static void
-makesign(BITVECP sign, SignTSVector *a)
+makesign(BITVECP sign, SignTSVector *a, int siglen)
 {
 	int32		k,
 				len = ARRNELEM(a);
 	int32	   *ptr = GETARR(a);
 
-	MemSet((void *) sign, 0, sizeof(BITVEC));
+	MemSet((void *) sign, 0, siglen);
 	for (k = 0; k < len; k++)
-		HASH(sign, ptr[k]);
+		HASH(sign, ptr[k], siglen);
+}
+
+static SignTSVector *
+gtsvector_alloc(int flag, int len, BITVECP sign)
+{
+	int			size = CALCGTSIZE(flag, len);
+	SignTSVector *res = palloc(size);
+
+	SET_VARSIZE(res, size);
+	res->flag = flag;
+
+	if ((flag & (SIGNKEY | ALLISTRUE)) == SIGNKEY && sign)
+		memcpy(GETSIGN(res), sign, len);
+
+	return res;
 }
 
+
 Datum
 gtsvector_compress(PG_FUNCTION_ARGS)
 {
 	GISTENTRY  *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
+	int			siglen = ((GistTsVectorOptions *) PG_GETARG_POINTER(1))->siglen;
 	GISTENTRY  *retval = entry;
 
 	if (entry->leafkey)
 	{							/* tsvector */
-		SignTSVector *res;
 		TSVector	val = DatumGetTSVector(entry->key);
+		SignTSVector *res = gtsvector_alloc(ARRKEY, val->size, NULL);
 		int32		len;
 		int32	   *arr;
 		WordEntry  *ptr = ARRPTR(val);
 		char	   *words = STRPTR(val);
 
-		len = CALCGTSIZE(ARRKEY, val->size);
-		res = (SignTSVector *) palloc(len);
-		SET_VARSIZE(res, len);
-		res->flag = ARRKEY;
 		arr = GETARR(res);
 		len = val->size;
 		while (len--)
@@ -227,13 +248,9 @@ gtsvector_compress(PG_FUNCTION_ARGS)
 		/* make signature, if array is too long */
 		if (VARSIZE(res) > TOAST_INDEX_TARGET)
 		{
-			SignTSVector *ressign;
+			SignTSVector *ressign = gtsvector_alloc(SIGNKEY, siglen, NULL);
 
-			len = CALCGTSIZE(SIGNKEY, 0);
-			ressign = (SignTSVector *) palloc(len);
-			SET_VARSIZE(ressign, len);
-			ressign->flag = SIGNKEY;
-			makesign(GETSIGN(ressign), res);
+			makesign(GETSIGN(ressign), res, siglen);
 			res = ressign;
 		}
 
@@ -245,22 +262,17 @@ gtsvector_compress(PG_FUNCTION_ARGS)
 	else if (ISSIGNKEY(DatumGetPointer(entry->key)) &&
 			 !ISALLTRUE(DatumGetPointer(entry->key)))
 	{
-		int32		i,
-					len;
+		int32		i;
 		SignTSVector *res;
 		BITVECP		sign = GETSIGN(DatumGetPointer(entry->key));
 
-		LOOPBYTE
+		LOOPBYTE(siglen)
 		{
 			if ((sign[i] & 0xff) != 0xff)
 				PG_RETURN_POINTER(retval);
 		}
 
-		len = CALCGTSIZE(SIGNKEY | ALLISTRUE, 0);
-		res = (SignTSVector *) palloc(len);
-		SET_VARSIZE(res, len);
-		res->flag = SIGNKEY | ALLISTRUE;
-
+		res = gtsvector_alloc(SIGNKEY | ALLISTRUE, siglen, sign);
 		retval = (GISTENTRY *) palloc(sizeof(GISTENTRY));
 		gistentryinit(*retval, PointerGetDatum(res),
 					  entry->rel, entry->page,
@@ -334,12 +346,14 @@ checkcondition_arr(void *checkval, QueryOperand *val, ExecPhraseData *data)
 static bool
 checkcondition_bit(void *checkval, QueryOperand *val, ExecPhraseData *data)
 {
+	void *key = (SignTSVector *) checkval;
+
 	/*
 	 * we are not able to find a prefix in signature tree
 	 */
 	if (val->prefix)
 		return true;
-	return GETBIT(checkval, HASHVAL(val->valcrc));
+	return GETBIT(GETSIGN(key), HASHVAL(val->valcrc, GETSIGLEN(key)));
 }
 
 Datum
@@ -366,7 +380,7 @@ gtsvector_consistent(PG_FUNCTION_ARGS)
 
 		/* since signature is lossy, cannot specify CALC_NOT here */
 		PG_RETURN_BOOL(TS_execute(GETQUERY(query),
-								  (void *) GETSIGN(key),
+								  key,
 								  TS_EXEC_PHRASE_NO_POS,
 								  checkcondition_bit));
 	}
@@ -384,7 +398,7 @@ gtsvector_consistent(PG_FUNCTION_ARGS)
 }
 
 static int32
-unionkey(BITVECP sbase, SignTSVector *add)
+unionkey(BITVECP sbase, SignTSVector *add, int siglen)
 {
 	int32		i;
 
@@ -395,7 +409,9 @@ unionkey(BITVECP sbase, SignTSVector *add)
 		if (ISALLTRUE(add))
 			return 1;
 
-		LOOPBYTE
+		Assert(GETSIGLEN(add) == siglen);
+
+		LOOPBYTE(siglen)
 			sbase[i] |= sadd[i];
 	}
 	else
@@ -403,7 +419,7 @@ unionkey(BITVECP sbase, SignTSVector *add)
 		int32	   *ptr = GETARR(add);
 
 		for (i = 0; i < ARRNELEM(add); i++)
-			HASH(sbase, ptr[i]);
+			HASH(sbase, ptr[i], siglen);
 	}
 	return 0;
 }
@@ -414,30 +430,24 @@ gtsvector_union(PG_FUNCTION_ARGS)
 {
 	GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0);
 	int		   *size = (int *) PG_GETARG_POINTER(1);
-	BITVEC		base;
-	int32		i,
-				len;
-	int32		flag = 0;
-	SignTSVector *result;
+	int			siglen = ((GistTsVectorOptions *) PG_GETARG_POINTER(2))->siglen;
+	SignTSVector *result = gtsvector_alloc(SIGNKEY, siglen, NULL);
+	BITVECP		base = GETSIGN(result);
+	int32		i;
+
+	memset(base, 0, siglen);
 
-	MemSet((void *) base, 0, sizeof(BITVEC));
 	for (i = 0; i < entryvec->n; i++)
 	{
-		if (unionkey(base, GETENTRY(entryvec, i)))
+		if (unionkey(base, GETENTRY(entryvec, i), siglen))
 		{
-			flag = ALLISTRUE;
+			result->flag |= ALLISTRUE;
+			SET_VARSIZE(result, CALCGTSIZE(result->flag, siglen));
 			break;
 		}
 	}
 
-	flag |= SIGNKEY;
-	len = CALCGTSIZE(flag, 0);
-	result = (SignTSVector *) palloc(len);
-	*size = len;
-	SET_VARSIZE(result, len);
-	result->flag = flag;
-	if (!ISALLTRUE(result))
-		memcpy((void *) GETSIGN(result), (void *) base, sizeof(BITVEC));
+	*size = VARSIZE(result);
 
 	PG_RETURN_POINTER(result);
 }
@@ -448,6 +458,7 @@ gtsvector_same(PG_FUNCTION_ARGS)
 	SignTSVector *a = (SignTSVector *) PG_GETARG_POINTER(0);
 	SignTSVector *b = (SignTSVector *) PG_GETARG_POINTER(1);
 	bool	   *result = (bool *) PG_GETARG_POINTER(2);
+	int			siglen = ((GistTsVectorOptions *) PG_GETARG_POINTER(3))->siglen;
 
 	if (ISSIGNKEY(a))
 	{							/* then b also ISSIGNKEY */
@@ -463,8 +474,10 @@ gtsvector_same(PG_FUNCTION_ARGS)
 			BITVECP		sa = GETSIGN(a),
 						sb = GETSIGN(b);
 
+			Assert(GETSIGLEN(a) == siglen && GETSIGLEN(b) == siglen);
+
 			*result = true;
-			LOOPBYTE
+			LOOPBYTE(siglen)
 			{
 				if (sa[i] != sb[i])
 				{
@@ -501,24 +514,24 @@ gtsvector_same(PG_FUNCTION_ARGS)
 }
 
 static int32
-sizebitvec(BITVECP sign)
+sizebitvec(BITVECP sign, int siglen)
 {
 	int32		size = 0,
 				i;
 
-	LOOPBYTE
+	LOOPBYTE(siglen)
 		size += number_of_ones[(unsigned char) sign[i]];
 	return size;
 }
 
 static int
-hemdistsign(BITVECP a, BITVECP b)
+hemdistsign(BITVECP a, BITVECP b, int siglen)
 {
 	int			i,
 				diff,
 				dist = 0;
 
-	LOOPBYTE
+	LOOPBYTE(siglen)
 	{
 		diff = (unsigned char) (a[i] ^ b[i]);
 		dist += number_of_ones[diff];
@@ -529,17 +542,22 @@ hemdistsign(BITVECP a, BITVECP b)
 static int
 hemdist(SignTSVector *a, SignTSVector *b)
 {
+	int siglena = GETSIGLEN(a);
+	int siglenb = GETSIGLEN(b);
+
 	if (ISALLTRUE(a))
 	{
 		if (ISALLTRUE(b))
 			return 0;
 		else
-			return SIGLENBIT - sizebitvec(GETSIGN(b));
+			return SIGLENBIT(siglenb) - sizebitvec(GETSIGN(b), siglenb);
 	}
 	else if (ISALLTRUE(b))
-		return SIGLENBIT - sizebitvec(GETSIGN(a));
+		return SIGLENBIT(siglena) - sizebitvec(GETSIGN(a), siglena);
 
-	return hemdistsign(GETSIGN(a), GETSIGN(b));
+	Assert(siglena == siglenb);
+
+	return hemdistsign(GETSIGN(a), GETSIGN(b), siglena);
 }
 
 Datum
@@ -548,6 +566,7 @@ gtsvector_penalty(PG_FUNCTION_ARGS)
 	GISTENTRY  *origentry = (GISTENTRY *) PG_GETARG_POINTER(0); /* always ISSIGNKEY */
 	GISTENTRY  *newentry = (GISTENTRY *) PG_GETARG_POINTER(1);
 	float	   *penalty = (float *) PG_GETARG_POINTER(2);
+	int			siglen = ((GistTsVectorOptions *) PG_GETARG_POINTER(3))->siglen;
 	SignTSVector *origval = (SignTSVector *) DatumGetPointer(origentry->key);
 	SignTSVector *newval = (SignTSVector *) DatumGetPointer(newentry->key);
 	BITVECP		orig = GETSIGN(origval);
@@ -556,14 +575,22 @@ gtsvector_penalty(PG_FUNCTION_ARGS)
 
 	if (ISARRKEY(newval))
 	{
-		BITVEC		sign;
+		BITVECP		sign = palloc(siglen);
 
-		makesign(sign, newval);
+		makesign(sign, newval, siglen);
 
 		if (ISALLTRUE(origval))
-			*penalty = ((float) (SIGLENBIT - sizebitvec(sign))) / (float) (SIGLENBIT + 1);
+		{
+			int			siglenbit = SIGLENBIT(siglen);
+
+			*penalty =
+				(float) (siglenbit - sizebitvec(sign, siglen)) /
+				(float) (siglenbit + 1);
+		}
 		else
-			*penalty = hemdistsign(sign, orig);
+			*penalty = hemdistsign(sign, orig, siglen);
+
+		pfree(sign);
 	}
 	else
 		*penalty = hemdist(origval, newval);
@@ -573,19 +600,19 @@ gtsvector_penalty(PG_FUNCTION_ARGS)
 typedef struct
 {
 	bool		allistrue;
-	BITVEC		sign;
+	BITVECP		sign;
 } CACHESIGN;
 
 static void
-fillcache(CACHESIGN *item, SignTSVector *key)
+fillcache(CACHESIGN *item, SignTSVector *key, int siglen)
 {
 	item->allistrue = false;
 	if (ISARRKEY(key))
-		makesign(item->sign, key);
+		makesign(item->sign, key, siglen);
 	else if (ISALLTRUE(key))
 		item->allistrue = true;
 	else
-		memcpy((void *) item->sign, (void *) GETSIGN(key), sizeof(BITVEC));
+		memcpy((void *) item->sign, (void *) GETSIGN(key), siglen);
 }
 
 #define WISH_F(a,b,c) (double)( -(double)(((a)-(b))*((a)-(b))*((a)-(b)))*(c) )
@@ -609,19 +636,19 @@ comparecost(const void *va, const void *vb)
 
 
 static int
-hemdistcache(CACHESIGN *a, CACHESIGN *b)
+hemdistcache(CACHESIGN *a, CACHESIGN *b, int siglen)
 {
 	if (a->allistrue)
 	{
 		if (b->allistrue)
 			return 0;
 		else
-			return SIGLENBIT - sizebitvec(b->sign);
+			return SIGLENBIT(siglen) - sizebitvec(b->sign, siglen);
 	}
 	else if (b->allistrue)
-		return SIGLENBIT - sizebitvec(a->sign);
+		return SIGLENBIT(siglen) - sizebitvec(a->sign, siglen);
 
-	return hemdistsign(a->sign, b->sign);
+	return hemdistsign(a->sign, b->sign, siglen);
 }
 
 Datum
@@ -629,6 +656,7 @@ gtsvector_picksplit(PG_FUNCTION_ARGS)
 {
 	GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0);
 	GIST_SPLITVEC *v = (GIST_SPLITVEC *) PG_GETARG_POINTER(1);
+	int			siglen = ((GistTsVectorOptions *) PG_GETARG_POINTER(2))->siglen;
 	OffsetNumber k,
 				j;
 	SignTSVector *datum_l,
@@ -648,6 +676,7 @@ gtsvector_picksplit(PG_FUNCTION_ARGS)
 	BITVECP		ptr;
 	int			i;
 	CACHESIGN  *cache;
+	char	   *cache_sign;
 	SPLITCOST  *costvector;
 
 	maxoff = entryvec->n - 2;
@@ -656,16 +685,22 @@ gtsvector_picksplit(PG_FUNCTION_ARGS)
 	v->spl_right = (OffsetNumber *) palloc(nbytes);
 
 	cache = (CACHESIGN *) palloc(sizeof(CACHESIGN) * (maxoff + 2));
-	fillcache(&cache[FirstOffsetNumber], GETENTRY(entryvec, FirstOffsetNumber));
+	cache_sign = palloc(siglen * (maxoff + 2));
+
+	for (j = 0; j < maxoff + 2; j++)
+		cache[j].sign = &cache_sign[siglen * j];
+
+	fillcache(&cache[FirstOffsetNumber], GETENTRY(entryvec, FirstOffsetNumber),
+			  siglen);
 
 	for (k = FirstOffsetNumber; k < maxoff; k = OffsetNumberNext(k))
 	{
 		for (j = OffsetNumberNext(k); j <= maxoff; j = OffsetNumberNext(j))
 		{
 			if (k == FirstOffsetNumber)
-				fillcache(&cache[j], GETENTRY(entryvec, j));
+				fillcache(&cache[j], GETENTRY(entryvec, j), siglen);
 
-			size_waste = hemdistcache(&(cache[j]), &(cache[k]));
+			size_waste = hemdistcache(&(cache[j]), &(cache[k]), siglen);
 			if (size_waste > waste)
 			{
 				waste = size_waste;
@@ -687,44 +722,21 @@ gtsvector_picksplit(PG_FUNCTION_ARGS)
 	}
 
 	/* form initial .. */
-	if (cache[seed_1].allistrue)
-	{
-		datum_l = (SignTSVector *) palloc(CALCGTSIZE(SIGNKEY | ALLISTRUE, 0));
-		SET_VARSIZE(datum_l, CALCGTSIZE(SIGNKEY | ALLISTRUE, 0));
-		datum_l->flag = SIGNKEY | ALLISTRUE;
-	}
-	else
-	{
-		datum_l = (SignTSVector *) palloc(CALCGTSIZE(SIGNKEY, 0));
-		SET_VARSIZE(datum_l, CALCGTSIZE(SIGNKEY, 0));
-		datum_l->flag = SIGNKEY;
-		memcpy((void *) GETSIGN(datum_l), (void *) cache[seed_1].sign, sizeof(BITVEC));
-	}
-	if (cache[seed_2].allistrue)
-	{
-		datum_r = (SignTSVector *) palloc(CALCGTSIZE(SIGNKEY | ALLISTRUE, 0));
-		SET_VARSIZE(datum_r, CALCGTSIZE(SIGNKEY | ALLISTRUE, 0));
-		datum_r->flag = SIGNKEY | ALLISTRUE;
-	}
-	else
-	{
-		datum_r = (SignTSVector *) palloc(CALCGTSIZE(SIGNKEY, 0));
-		SET_VARSIZE(datum_r, CALCGTSIZE(SIGNKEY, 0));
-		datum_r->flag = SIGNKEY;
-		memcpy((void *) GETSIGN(datum_r), (void *) cache[seed_2].sign, sizeof(BITVEC));
-	}
-
+	datum_l = gtsvector_alloc(SIGNKEY | (cache[seed_1].allistrue ? ALLISTRUE : 0),
+							  siglen, cache[seed_1].sign);
+	datum_r = gtsvector_alloc(SIGNKEY | (cache[seed_2].allistrue ? ALLISTRUE : 0),
+							  siglen, cache[seed_2].sign);
 	union_l = GETSIGN(datum_l);
 	union_r = GETSIGN(datum_r);
 	maxoff = OffsetNumberNext(maxoff);
-	fillcache(&cache[maxoff], GETENTRY(entryvec, maxoff));
+	fillcache(&cache[maxoff], GETENTRY(entryvec, maxoff), siglen);
 	/* sort before ... */
 	costvector = (SPLITCOST *) palloc(sizeof(SPLITCOST) * maxoff);
 	for (j = FirstOffsetNumber; j <= maxoff; j = OffsetNumberNext(j))
 	{
 		costvector[j - 1].pos = j;
-		size_alpha = hemdistcache(&(cache[seed_1]), &(cache[j]));
-		size_beta = hemdistcache(&(cache[seed_2]), &(cache[j]));
+		size_alpha = hemdistcache(&(cache[seed_1]), &(cache[j]), siglen);
+		size_beta = hemdistcache(&(cache[seed_2]), &(cache[j]), siglen);
 		costvector[j - 1].cost = Abs(size_alpha - size_beta);
 	}
 	qsort((void *) costvector, maxoff, sizeof(SPLITCOST), comparecost);
@@ -750,36 +762,34 @@ gtsvector_picksplit(PG_FUNCTION_ARGS)
 			if (ISALLTRUE(datum_l) && cache[j].allistrue)
 				size_alpha = 0;
 			else
-				size_alpha = SIGLENBIT - sizebitvec(
-													(cache[j].allistrue) ? GETSIGN(datum_l) : GETSIGN(cache[j].sign)
-					);
+				size_alpha = SIGLENBIT(siglen) -
+					sizebitvec((cache[j].allistrue) ? GETSIGN(datum_l) : GETSIGN(cache[j].sign), siglen);
 		}
 		else
-			size_alpha = hemdistsign(cache[j].sign, GETSIGN(datum_l));
+			size_alpha = hemdistsign(cache[j].sign, GETSIGN(datum_l), siglen);
 
 		if (ISALLTRUE(datum_r) || cache[j].allistrue)
 		{
 			if (ISALLTRUE(datum_r) && cache[j].allistrue)
 				size_beta = 0;
 			else
-				size_beta = SIGLENBIT - sizebitvec(
-												   (cache[j].allistrue) ? GETSIGN(datum_r) : GETSIGN(cache[j].sign)
-					);
+				size_beta = SIGLENBIT(siglen) -
+					sizebitvec((cache[j].allistrue) ? GETSIGN(datum_r) : GETSIGN(cache[j].sign), siglen);
 		}
 		else
-			size_beta = hemdistsign(cache[j].sign, GETSIGN(datum_r));
+			size_beta = hemdistsign(cache[j].sign, GETSIGN(datum_r), siglen);
 
 		if (size_alpha < size_beta + WISH_F(v->spl_nleft, v->spl_nright, 0.1))
 		{
 			if (ISALLTRUE(datum_l) || cache[j].allistrue)
 			{
 				if (!ISALLTRUE(datum_l))
-					MemSet((void *) GETSIGN(datum_l), 0xff, sizeof(BITVEC));
+					MemSet((void *) GETSIGN(datum_l), 0xff, siglen);
 			}
 			else
 			{
 				ptr = cache[j].sign;
-				LOOPBYTE
+				LOOPBYTE(siglen)
 					union_l[i] |= ptr[i];
 			}
 			*left++ = j;
@@ -790,12 +800,12 @@ gtsvector_picksplit(PG_FUNCTION_ARGS)
 			if (ISALLTRUE(datum_r) || cache[j].allistrue)
 			{
 				if (!ISALLTRUE(datum_r))
-					MemSet((void *) GETSIGN(datum_r), 0xff, sizeof(BITVEC));
+					MemSet((void *) GETSIGN(datum_r), 0xff, siglen);
 			}
 			else
 			{
 				ptr = cache[j].sign;
-				LOOPBYTE
+				LOOPBYTE(siglen)
 					union_r[i] |= ptr[i];
 			}
 			*right++ = j;
@@ -822,3 +832,20 @@ gtsvector_consistent_oldsig(PG_FUNCTION_ARGS)
 {
 	return gtsvector_consistent(fcinfo);
 }
+
+Datum
+gtsvector_options(PG_FUNCTION_ARGS)
+{
+	Datum		raw_options = PG_GETARG_DATUM(0);
+	bool		validate = PG_GETARG_BOOL(1);
+	relopt_int	siglen =
+		{ {"siglen", "signature length", 0, 0, 6, RELOPT_TYPE_INT },
+			SIGLEN_DEFAULT, 1, SIGLEN_MAX };
+	relopt_gen *optgen[] = { &siglen.gen };
+	int			offsets[] = { offsetof(GistTsVectorOptions, siglen) };
+	GistTsVectorOptions *options =
+		parseAndFillLocalRelOptions(raw_options, optgen, offsets, 1,
+									sizeof(GistTsVectorOptions), validate);
+
+	PG_RETURN_POINTER(options);
+}
diff --git a/src/include/catalog/pg_amproc.dat b/src/include/catalog/pg_amproc.dat
index 0ef2c08..c1ee3db 100644
--- a/src/include/catalog/pg_amproc.dat
+++ b/src/include/catalog/pg_amproc.dat
@@ -450,7 +450,7 @@
   amproc => 'gist_circle_distance' },
 { amprocfamily => 'gist/tsvector_ops', amproclefttype => 'tsvector',
   amprocrighttype => 'tsvector', amprocnum => '1',
-  amproc => 'gtsvector_consistent(internal,tsvector,int2,oid,internal)' },
+  amproc => 'gtsvector_consistent(internal,tsvector,int2,oid,internal,internal)' },
 { amprocfamily => 'gist/tsvector_ops', amproclefttype => 'tsvector',
   amprocrighttype => 'tsvector', amprocnum => '2',
   amproc => 'gtsvector_union' },
@@ -468,6 +468,9 @@
   amproc => 'gtsvector_picksplit' },
 { amprocfamily => 'gist/tsvector_ops', amproclefttype => 'tsvector',
   amprocrighttype => 'tsvector', amprocnum => '7', amproc => 'gtsvector_same' },
+{ amprocfamily => 'gist/tsvector_ops', amproclefttype => 'tsvector',
+  amprocrighttype => 'tsvector', amprocnum => '10',
+  amproc => 'gtsvector_options' },
 { amprocfamily => 'gist/tsquery_ops', amproclefttype => 'tsquery',
   amprocrighttype => 'tsquery', amprocnum => '1',
   amproc => 'gtsquery_consistent(internal,tsquery,int2,oid,internal)' },
diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat
index 9264a2e..c22d639 100644
--- a/src/include/catalog/pg_proc.dat
+++ b/src/include/catalog/pg_proc.dat
@@ -8430,30 +8430,35 @@
 
 { oid => '3648', descr => 'GiST tsvector support',
   proname => 'gtsvector_compress', prorettype => 'internal',
-  proargtypes => 'internal', prosrc => 'gtsvector_compress' },
+  proargtypes => 'internal internal', prosrc => 'gtsvector_compress' },
 { oid => '3649', descr => 'GiST tsvector support',
   proname => 'gtsvector_decompress', prorettype => 'internal',
-  proargtypes => 'internal', prosrc => 'gtsvector_decompress' },
+  proargtypes => 'internal internal', prosrc => 'gtsvector_decompress' },
 { oid => '3650', descr => 'GiST tsvector support',
   proname => 'gtsvector_picksplit', prorettype => 'internal',
-  proargtypes => 'internal internal', prosrc => 'gtsvector_picksplit' },
+  proargtypes => 'internal internal internal', prosrc => 'gtsvector_picksplit' },
 { oid => '3651', descr => 'GiST tsvector support',
   proname => 'gtsvector_union', prorettype => 'gtsvector',
-  proargtypes => 'internal internal', prosrc => 'gtsvector_union' },
+  proargtypes => 'internal internal internal', prosrc => 'gtsvector_union' },
 { oid => '3652', descr => 'GiST tsvector support',
   proname => 'gtsvector_same', prorettype => 'internal',
-  proargtypes => 'gtsvector gtsvector internal', prosrc => 'gtsvector_same' },
+  proargtypes => 'gtsvector gtsvector internal internal',
+  prosrc => 'gtsvector_same' },
 { oid => '3653', descr => 'GiST tsvector support',
   proname => 'gtsvector_penalty', prorettype => 'internal',
-  proargtypes => 'internal internal internal', prosrc => 'gtsvector_penalty' },
+  proargtypes => 'internal internal internal internal',
+  prosrc => 'gtsvector_penalty' },
 { oid => '3654', descr => 'GiST tsvector support',
   proname => 'gtsvector_consistent', prorettype => 'bool',
-  proargtypes => 'internal tsvector int2 oid internal',
+  proargtypes => 'internal tsvector int2 oid internal internal',
   prosrc => 'gtsvector_consistent' },
 { oid => '3790', descr => 'GiST tsvector support (obsolete)',
   proname => 'gtsvector_consistent', prorettype => 'bool',
   proargtypes => 'internal gtsvector int4 oid internal',
   prosrc => 'gtsvector_consistent_oldsig' },
+{ oid => '3996', descr => 'GiST tsvector support',
+  proname => 'gtsvector_options', prorettype => 'internal',
+  proargtypes => 'internal bool', prosrc => 'gtsvector_options' },
 
 { oid => '3656', descr => 'GIN tsvector support',
   proname => 'gin_extract_tsvector', prorettype => 'internal',
diff --git a/src/test/regress/expected/tsearch.out b/src/test/regress/expected/tsearch.out
index b088ff0..9d81489 100644
--- a/src/test/regress/expected/tsearch.out
+++ b/src/test/regress/expected/tsearch.out
@@ -260,6 +260,182 @@ SELECT count(*) FROM test_tsvector WHERE a @@ '!no_such_lexeme';
    508
 (1 row)
 
+-- Test siglen parameter of GiST tsvector_ops
+CREATE INDEX wowidx1 ON test_tsvector USING gist (a tsvector_ops(foo=1));
+ERROR:  unrecognized parameter "foo"
+CREATE INDEX wowidx1 ON test_tsvector USING gist (a tsvector_ops(siglen=0));
+ERROR:  value 0 out of bounds for option "siglen"
+DETAIL:  Valid values are between "1" and "484".
+CREATE INDEX wowidx1 ON test_tsvector USING gist (a tsvector_ops(siglen=485));
+ERROR:  value 485 out of bounds for option "siglen"
+DETAIL:  Valid values are between "1" and "484".
+CREATE INDEX wowidx1 ON test_tsvector USING gist (a tsvector_ops(siglen=100,foo='bar'));
+ERROR:  unrecognized parameter "foo"
+CREATE INDEX wowidx1 ON test_tsvector USING gist (a tsvector_ops(siglen=100, siglen = 200));
+ERROR:  parameter "siglen" specified more than once
+CREATE INDEX wowidx2 ON test_tsvector USING gist (a tsvector_ops(siglen=1));
+\d test_tsvector
+            Table "public.test_tsvector"
+ Column |   Type   | Collation | Nullable | Default 
+--------+----------+-----------+----------+---------
+ t      | text     |           |          | 
+ a      | tsvector |           |          | 
+Indexes:
+    "wowidx" gist (a)
+    "wowidx2" gist (a tsvector_ops (siglen='1'))
+
+DROP INDEX wowidx;
+EXPLAIN (costs off) SELECT count(*) FROM test_tsvector WHERE a @@ 'wr|qh';
+                         QUERY PLAN                          
+-------------------------------------------------------------
+ Aggregate
+   ->  Bitmap Heap Scan on test_tsvector
+         Recheck Cond: (a @@ '''wr'' | ''qh'''::tsquery)
+         ->  Bitmap Index Scan on wowidx2
+               Index Cond: (a @@ '''wr'' | ''qh'''::tsquery)
+(5 rows)
+
+SELECT count(*) FROM test_tsvector WHERE a @@ 'wr|qh';
+ count 
+-------
+   158
+(1 row)
+
+SELECT count(*) FROM test_tsvector WHERE a @@ 'wr&qh';
+ count 
+-------
+    17
+(1 row)
+
+SELECT count(*) FROM test_tsvector WHERE a @@ 'eq&yt';
+ count 
+-------
+     6
+(1 row)
+
+SELECT count(*) FROM test_tsvector WHERE a @@ 'eq|yt';
+ count 
+-------
+    98
+(1 row)
+
+SELECT count(*) FROM test_tsvector WHERE a @@ '(eq&yt)|(wr&qh)';
+ count 
+-------
+    23
+(1 row)
+
+SELECT count(*) FROM test_tsvector WHERE a @@ '(eq|yt)&(wr|qh)';
+ count 
+-------
+    39
+(1 row)
+
+SELECT count(*) FROM test_tsvector WHERE a @@ 'w:*|q:*';
+ count 
+-------
+   494
+(1 row)
+
+SELECT count(*) FROM test_tsvector WHERE a @@ any ('{wr,qh}');
+ count 
+-------
+   158
+(1 row)
+
+SELECT count(*) FROM test_tsvector WHERE a @@ 'no_such_lexeme';
+ count 
+-------
+     0
+(1 row)
+
+SELECT count(*) FROM test_tsvector WHERE a @@ '!no_such_lexeme';
+ count 
+-------
+   508
+(1 row)
+
+DROP INDEX wowidx2;
+CREATE INDEX wowidx ON test_tsvector USING gist (a DEFAULT(siglen=484));
+\d test_tsvector
+            Table "public.test_tsvector"
+ Column |   Type   | Collation | Nullable | Default 
+--------+----------+-----------+----------+---------
+ t      | text     |           |          | 
+ a      | tsvector |           |          | 
+Indexes:
+    "wowidx" gist (a tsvector_ops (siglen='484'))
+
+explain (costs off) SELECT count(*) FROM test_tsvector WHERE a @@ 'wr|qh';
+                         QUERY PLAN                          
+-------------------------------------------------------------
+ Aggregate
+   ->  Bitmap Heap Scan on test_tsvector
+         Recheck Cond: (a @@ '''wr'' | ''qh'''::tsquery)
+         ->  Bitmap Index Scan on wowidx
+               Index Cond: (a @@ '''wr'' | ''qh'''::tsquery)
+(5 rows)
+
+SELECT count(*) FROM test_tsvector WHERE a @@ 'wr|qh';
+ count 
+-------
+   158
+(1 row)
+
+SELECT count(*) FROM test_tsvector WHERE a @@ 'wr&qh';
+ count 
+-------
+    17
+(1 row)
+
+SELECT count(*) FROM test_tsvector WHERE a @@ 'eq&yt';
+ count 
+-------
+     6
+(1 row)
+
+SELECT count(*) FROM test_tsvector WHERE a @@ 'eq|yt';
+ count 
+-------
+    98
+(1 row)
+
+SELECT count(*) FROM test_tsvector WHERE a @@ '(eq&yt)|(wr&qh)';
+ count 
+-------
+    23
+(1 row)
+
+SELECT count(*) FROM test_tsvector WHERE a @@ '(eq|yt)&(wr|qh)';
+ count 
+-------
+    39
+(1 row)
+
+SELECT count(*) FROM test_tsvector WHERE a @@ 'w:*|q:*';
+ count 
+-------
+   494
+(1 row)
+
+SELECT count(*) FROM test_tsvector WHERE a @@ any ('{wr,qh}');
+ count 
+-------
+   158
+(1 row)
+
+SELECT count(*) FROM test_tsvector WHERE a @@ 'no_such_lexeme';
+ count 
+-------
+     0
+(1 row)
+
+SELECT count(*) FROM test_tsvector WHERE a @@ '!no_such_lexeme';
+ count 
+-------
+   508
+(1 row)
+
 RESET enable_seqscan;
 RESET enable_indexscan;
 RESET enable_bitmapscan;
diff --git a/src/test/regress/sql/tsearch.sql b/src/test/regress/sql/tsearch.sql
index 637bfb3..740d5e0 100644
--- a/src/test/regress/sql/tsearch.sql
+++ b/src/test/regress/sql/tsearch.sql
@@ -87,6 +87,51 @@ SELECT count(*) FROM test_tsvector WHERE a @@ any ('{wr,qh}');
 SELECT count(*) FROM test_tsvector WHERE a @@ 'no_such_lexeme';
 SELECT count(*) FROM test_tsvector WHERE a @@ '!no_such_lexeme';
 
+-- Test siglen parameter of GiST tsvector_ops
+CREATE INDEX wowidx1 ON test_tsvector USING gist (a tsvector_ops(foo=1));
+CREATE INDEX wowidx1 ON test_tsvector USING gist (a tsvector_ops(siglen=0));
+CREATE INDEX wowidx1 ON test_tsvector USING gist (a tsvector_ops(siglen=485));
+CREATE INDEX wowidx1 ON test_tsvector USING gist (a tsvector_ops(siglen=100,foo='bar'));
+CREATE INDEX wowidx1 ON test_tsvector USING gist (a tsvector_ops(siglen=100, siglen = 200));
+
+CREATE INDEX wowidx2 ON test_tsvector USING gist (a tsvector_ops(siglen=1));
+
+\d test_tsvector
+
+DROP INDEX wowidx;
+
+EXPLAIN (costs off) SELECT count(*) FROM test_tsvector WHERE a @@ 'wr|qh';
+
+SELECT count(*) FROM test_tsvector WHERE a @@ 'wr|qh';
+SELECT count(*) FROM test_tsvector WHERE a @@ 'wr&qh';
+SELECT count(*) FROM test_tsvector WHERE a @@ 'eq&yt';
+SELECT count(*) FROM test_tsvector WHERE a @@ 'eq|yt';
+SELECT count(*) FROM test_tsvector WHERE a @@ '(eq&yt)|(wr&qh)';
+SELECT count(*) FROM test_tsvector WHERE a @@ '(eq|yt)&(wr|qh)';
+SELECT count(*) FROM test_tsvector WHERE a @@ 'w:*|q:*';
+SELECT count(*) FROM test_tsvector WHERE a @@ any ('{wr,qh}');
+SELECT count(*) FROM test_tsvector WHERE a @@ 'no_such_lexeme';
+SELECT count(*) FROM test_tsvector WHERE a @@ '!no_such_lexeme';
+
+DROP INDEX wowidx2;
+
+CREATE INDEX wowidx ON test_tsvector USING gist (a DEFAULT(siglen=484));
+
+\d test_tsvector
+
+explain (costs off) SELECT count(*) FROM test_tsvector WHERE a @@ 'wr|qh';
+
+SELECT count(*) FROM test_tsvector WHERE a @@ 'wr|qh';
+SELECT count(*) FROM test_tsvector WHERE a @@ 'wr&qh';
+SELECT count(*) FROM test_tsvector WHERE a @@ 'eq&yt';
+SELECT count(*) FROM test_tsvector WHERE a @@ 'eq|yt';
+SELECT count(*) FROM test_tsvector WHERE a @@ '(eq&yt)|(wr&qh)';
+SELECT count(*) FROM test_tsvector WHERE a @@ '(eq|yt)&(wr|qh)';
+SELECT count(*) FROM test_tsvector WHERE a @@ 'w:*|q:*';
+SELECT count(*) FROM test_tsvector WHERE a @@ any ('{wr,qh}');
+SELECT count(*) FROM test_tsvector WHERE a @@ 'no_such_lexeme';
+SELECT count(*) FROM test_tsvector WHERE a @@ '!no_such_lexeme';
+
 RESET enable_seqscan;
 RESET enable_indexscan;
 RESET enable_bitmapscan;
-- 
2.7.4

