Remove 1MB size limit in tsvector

Started by Ildus Kurbangalievover 8 years ago19 messages

Ildus Kurbangaliev

i.kurbangaliev@postgrespro.ru

over 8 years ago

1 attachment(s)

Hello, hackers!

Historically tsvector type can't hold more than 1MB data.
I want to propose a patch that removes that limit.

That limit is created by 'pos' field from WordEntry, which have only
20 bits for storage.

In the proposed patch I removed this field and instead of it I keep
offsets only at each Nth item in WordEntry's array. Now I set N as 4,
because it gave best results in my benchmarks. It can be increased in
the future without affecting already saved data in database. Also
removing the field improves compression of tsvectors.

I simplified the code by creating functions that can be used to
build tsvectors. There were duplicated code fragments in places where
tsvector was built.

Also new patch frees some space in WordEntry that can be used to
save some additional information about saved words.

-
---
Ildus Kurbangaliev
Postgres Professional: http://www.postgrespro.com
Russian Postgres Company

Attachments:

tsvector_stretched_v1.patchtext/x-patchDownload

diff --git a/src/backend/tsearch/Makefile b/src/backend/tsearch/Makefile
index 34fe4c5b3c..9585a25003 100644
--- a/src/backend/tsearch/Makefile
+++ b/src/backend/tsearch/Makefile
@@ -26,7 +26,7 @@ DICTFILES_PATH=$(addprefix dicts/,$(DICTFILES))
 OBJS = ts_locale.o ts_parse.o wparser.o wparser_def.o dict.o \
 	dict_simple.o dict_synonym.o dict_thesaurus.o \
 	dict_ispell.o regis.o spell.o \
-	to_tsany.o ts_selfuncs.o ts_typanalyze.o ts_utils.o
+	to_tsany.o ts_selfuncs.o ts_typanalyze.o ts_utils.o ts_compat.o
 
 include $(top_srcdir)/src/backend/common.mk
 
diff --git a/src/backend/tsearch/to_tsany.c b/src/backend/tsearch/to_tsany.c
index 35d9ab276c..d66b69baea 100644
--- a/src/backend/tsearch/to_tsany.c
+++ b/src/backend/tsearch/to_tsany.c
@@ -156,13 +156,10 @@ TSVector
 make_tsvector(ParsedText *prs)
 {
 	int			i,
-				j,
 				lenstr = 0,
-				totallen;
+				totallen,
+				stroff = 0;
 	TSVector	in;
-	WordEntry  *ptr;
-	char	   *str;
-	int			stroff;
 
 	/* Merge duplicate words */
 	if (prs->curwords > 0)
@@ -171,12 +168,8 @@ make_tsvector(ParsedText *prs)
 	/* Determine space needed */
 	for (i = 0; i < prs->curwords; i++)
 	{
-		lenstr += prs->words[i].len;
-		if (prs->words[i].alen)
-		{
-			lenstr = SHORTALIGN(lenstr);
-			lenstr += sizeof(uint16) + prs->words[i].pos.apos[0] * sizeof(WordEntryPos);
-		}
+		int npos = prs->words[i].alen ? prs->words[i].pos.apos[0] : 0;
+		INCRSIZE(lenstr, i, prs->words[i].len, npos);
 	}
 
 	if (lenstr > MAXSTRPOS)
@@ -187,41 +180,21 @@ make_tsvector(ParsedText *prs)
 	totallen = CALCDATASIZE(prs->curwords, lenstr);
 	in = (TSVector) palloc0(totallen);
 	SET_VARSIZE(in, totallen);
-	in->size = prs->curwords;
+	TS_SETCOUNT(in, prs->curwords);
 
-	ptr = ARRPTR(in);
-	str = STRPTR(in);
-	stroff = 0;
 	for (i = 0; i < prs->curwords; i++)
 	{
-		ptr->len = prs->words[i].len;
-		ptr->pos = stroff;
-		memcpy(str + stroff, prs->words[i].word, prs->words[i].len);
-		stroff += prs->words[i].len;
-		pfree(prs->words[i].word);
+		int npos = 0;
 		if (prs->words[i].alen)
-		{
-			int			k = prs->words[i].pos.apos[0];
-			WordEntryPos *wptr;
+			npos = prs->words[i].pos.apos[0];
 
-			if (k > 0xFFFF)
-				elog(ERROR, "positions array too long");
+		tsvector_addlexeme(in, i, &stroff, prs->words[i].word, prs->words[i].len,
+			prs->words[i].pos.apos + 1, npos);
 
-			ptr->haspos = 1;
-			stroff = SHORTALIGN(stroff);
-			*(uint16 *) (str + stroff) = (uint16) k;
-			wptr = POSDATAPTR(in, ptr);
-			for (j = 0; j < k; j++)
-			{
-				WEP_SETWEIGHT(wptr[j], 0);
-				WEP_SETPOS(wptr[j], prs->words[i].pos.apos[j + 1]);
-			}
-			stroff += sizeof(uint16) + k * sizeof(WordEntryPos);
+		pfree(prs->words[i].word);
+		if (prs->words[i].alen)
 			pfree(prs->words[i].pos.apos);
-		}
-		else
-			ptr->haspos = 0;
-		ptr++;
+
 	}
 
 	if (prs->words)
@@ -251,7 +224,6 @@ to_tsvector_byid(PG_FUNCTION_ARGS)
 	PG_FREE_IF_COPY(in, 1);
 
 	out = make_tsvector(&prs);
-
 	PG_RETURN_TSVECTOR(out);
 }
 
diff --git a/src/backend/tsearch/ts_compat.c b/src/backend/tsearch/ts_compat.c
new file mode 100644
index 0000000000..bb2a62eaf7
--- /dev/null
+++ b/src/backend/tsearch/ts_compat.c
@@ -0,0 +1,83 @@
+#include "postgres.h"
+#include "tsearch/ts_type.h"
+
+/*
+ * Definition of old WordEntry struct in TSVector. Because of limitations
+ * in size (max 1MB for lexemes), the format has changed
+ */
+typedef struct
+{
+	uint32
+				haspos:1,
+				len:11,
+				pos:20;
+} OldWordEntry;
+
+typedef struct
+{
+	uint16		npos;
+	WordEntryPos pos[FLEXIBLE_ARRAY_MEMBER];
+} OldWordEntryPosVector;
+
+#define OLDSTRPTR(x)	( (char *) &(x)->entries[x->size_] )
+#define _OLDPOSVECPTR(x, e)	\
+	((OldWordEntryPosVector *)(STRPTR(x) + SHORTALIGN((e)->pos + (e)->len)))
+#define OLDPOSDATALEN(x,e) ( ( (e)->haspos ) ? (_OLDPOSVECPTR(x,e)->npos) : 0 )
+#define OLDPOSDATAPTR(x,e) (_OLDPOSVECPTR(x,e)->pos)
+
+/*
+ * Converts tsvector with the old structure to current.
+ * @orig - tsvector to convert,
+ * @copy - return copy of tsvector, it has a meaning when tsvector doensn't
+ * need to be converted.
+ */
+TSVector
+tsvector_upgrade(Datum orig, bool copy)
+{
+	int	       i,
+	           dataoff = 0,
+			   datalen = 0,
+			   totallen;
+	TSVector   in,
+			   out;
+
+	in = (TSVector) PG_DETOAST_DATUM(orig);
+
+	/* If already in new format, return as is */
+	if (in->size_ & TS_FLAG_STRETCHED)
+	{
+		TSVector out;
+
+		if (!copy)
+			return in;
+
+		out = (TSVector) palloc(VARSIZE(in));
+		memcpy(out, in, VARSIZE(in));
+		return out;
+	}
+
+	/*
+	 * Calculate required size.
+	 * We don't check any sizes here because old format was limited with 1MB
+	 */
+	for (i = 0; i < in->size_; i++)
+	{
+		OldWordEntry *entry = (OldWordEntry *) (in->entries + i);
+		INCRSIZE(datalen, i, entry->len, OLDPOSDATALEN(in, entry));
+	}
+
+	totallen = CALCDATASIZE(in->size_, datalen);
+	out = (TSVector) palloc0(totallen);
+	SET_VARSIZE(out, totallen);
+	TS_SETCOUNT(out, in->size_);
+
+	for (i = 0; i < in->size_; i++)
+	{
+		OldWordEntry *entry = (OldWordEntry *) (in->entries + i);
+		tsvector_addlexeme(out, i, &dataoff,
+				OLDSTRPTR(in) + entry->pos, entry->len,
+				OLDPOSDATAPTR(in, entry), OLDPOSDATALEN(in, entry));
+	}
+
+	return out;
+}
diff --git a/src/backend/tsearch/ts_typanalyze.c b/src/backend/tsearch/ts_typanalyze.c
index 320c7f1a61..9b2fc4be04 100644
--- a/src/backend/tsearch/ts_typanalyze.c
+++ b/src/backend/tsearch/ts_typanalyze.c
@@ -202,7 +202,8 @@ compute_tsvector_stats(VacAttrStats *stats,
 		TSVector	vector;
 		WordEntry  *curentryptr;
 		char	   *lexemesptr;
-		int			j;
+		int			j,
+					pos;
 
 		vacuum_delay_point();
 
@@ -236,7 +237,9 @@ compute_tsvector_stats(VacAttrStats *stats,
 		 */
 		lexemesptr = STRPTR(vector);
 		curentryptr = ARRPTR(vector);
-		for (j = 0; j < vector->size; j++)
+
+		INITPOS(pos);
+		for (j = 0; j < TS_COUNT(vector); j++)
 		{
 			bool		found;
 
@@ -246,8 +249,8 @@ compute_tsvector_stats(VacAttrStats *stats,
 			 * make a copy of it.  This way we can free the tsvector value
 			 * once we've processed all its lexemes.
 			 */
-			hash_key.lexeme = lexemesptr + curentryptr->pos;
-			hash_key.length = curentryptr->len;
+			hash_key.lexeme = lexemesptr + pos;
+			hash_key.length = ENTRY_LEN(vector, curentryptr);
 
 			/* Lookup current lexeme in hashtable, adding it if new */
 			item = (TrackItem *) hash_search(lexemes_tab,
@@ -280,7 +283,7 @@ compute_tsvector_stats(VacAttrStats *stats,
 			}
 
 			/* Advance to the next WordEntry in the tsvector */
-			curentryptr++;
+			INCRPTR(vector, curentryptr, pos);
 		}
 
 		/* If the vector was toasted, free the detoasted copy. */
diff --git a/src/backend/utils/adt/tsginidx.c b/src/backend/utils/adt/tsginidx.c
index 83a939dfd5..fc39dfcb3f 100644
--- a/src/backend/utils/adt/tsginidx.c
+++ b/src/backend/utils/adt/tsginidx.c
@@ -67,23 +67,26 @@ gin_extract_tsvector(PG_FUNCTION_ARGS)
 	TSVector	vector = PG_GETARG_TSVECTOR(0);
 	int32	   *nentries = (int32 *) PG_GETARG_POINTER(1);
 	Datum	   *entries = NULL;
+	int			tscount = TS_COUNT(vector);
 
-	*nentries = vector->size;
-	if (vector->size > 0)
+	*nentries = tscount;
+	if (tscount > 0)
 	{
 		int			i;
-		WordEntry  *we = ARRPTR(vector);
+		uint32		pos;
 
-		entries = (Datum *) palloc(sizeof(Datum) * vector->size);
+		WordEntry  *we = ARRPTR(vector);
+		entries = (Datum *) palloc(sizeof(Datum) * tscount);
 
-		for (i = 0; i < vector->size; i++)
+		INITPOS(pos);
+		for (i = 0; i < tscount; i++)
 		{
 			text	   *txt;
 
-			txt = cstring_to_text_with_len(STRPTR(vector) + we->pos, we->len);
+			txt = cstring_to_text_with_len(STRPTR(vector) + pos,
+				ENTRY_LEN(vector, we));
 			entries[i] = PointerGetDatum(txt);
-
-			we++;
+			INCRPTR(vector, we, pos);
 		}
 	}
 
diff --git a/src/backend/utils/adt/tsgistidx.c b/src/backend/utils/adt/tsgistidx.c
index 7ce2699b5c..18d3de3725 100644
--- a/src/backend/utils/adt/tsgistidx.c
+++ b/src/backend/utils/adt/tsgistidx.c
@@ -192,28 +192,33 @@ gtsvector_compress(PG_FUNCTION_ARGS)
 		int32	   *arr;
 		WordEntry  *ptr = ARRPTR(val);
 		char	   *words = STRPTR(val);
+		const int	tscount = TS_COUNT(val);
+		uint32		pos;
 
-		len = CALCGTSIZE(ARRKEY, val->size);
+		len = CALCGTSIZE(ARRKEY, tscount);
 		res = (SignTSVector *) palloc(len);
 		SET_VARSIZE(res, len);
 		res->flag = ARRKEY;
 		arr = GETARR(res);
-		len = val->size;
+		len = tscount;
+
+		INITPOS(pos);
 		while (len--)
 		{
 			pg_crc32	c;
 
 			INIT_LEGACY_CRC32(c);
-			COMP_LEGACY_CRC32(c, words + ptr->pos, ptr->len);
+			COMP_LEGACY_CRC32(c, words + pos, ENTRY_LEN(val, ptr));
 			FIN_LEGACY_CRC32(c);
 
 			*arr = *(int32 *) &c;
 			arr++;
-			ptr++;
+
+			INCRPTR(val, ptr, pos);
 		}
 
-		len = uniqueint(GETARR(res), val->size);
-		if (len != val->size)
+		len = uniqueint(GETARR(res), tscount);
+		if (len != tscount)
 		{
 			/*
 			 * there is a collision of hash-function; len is always less than
diff --git a/src/backend/utils/adt/tsrank.c b/src/backend/utils/adt/tsrank.c
index 4577bcc0b8..26252ca353 100644
--- a/src/backend/utils/adt/tsrank.c
+++ b/src/backend/utils/adt/tsrank.c
@@ -53,43 +53,38 @@ word_distance(int32 w)
 static int
 cnt_length(TSVector t)
 {
-	WordEntry  *ptr = ARRPTR(t),
-			   *end = (WordEntry *) STRPTR(t);
-	int			len = 0;
+	int		i,
+			len = 0;
 
-	while (ptr < end)
+	for (i = 0; i < TS_COUNT(t); i++)
 	{
-		int			clen = POSDATALEN(t, ptr);
-
-		if (clen == 0)
-			len += 1;
-		else
-			len += clen;
-
-		ptr++;
+		WordEntry *entry = UNWRAP_ENTRY(t, ARRPTR(t) + i);
+		Assert(!entry->hasoff);
+		len += (entry->npos == 0) ? 1 : entry->npos;
 	}
 
 	return len;
 }
 
 
-#define WordECompareQueryItem(e,q,p,i,m) \
-	tsCompareString((q) + (i)->distance, (i)->length,	\
-					(e) + (p)->pos, (p)->len, (m))
-
-
 /*
  * Returns a pointer to a WordEntry's array corresponding to 'item' from
  * tsvector 't'. 'q' is the TSQuery containing 'item'.
  * Returns NULL if not found.
  */
-static WordEntry *
+static int
 find_wordentry(TSVector t, TSQuery q, QueryOperand *item, int32 *nitem)
 {
-	WordEntry  *StopLow = ARRPTR(t);
-	WordEntry  *StopHigh = (WordEntry *) STRPTR(t);
-	WordEntry  *StopMiddle = StopHigh;
-	int			difference;
+#define WordECompareQueryItem(s,l,q,i,m) \
+	tsCompareString((q) + (i)->distance, (i)->length,	\
+					s, l, (m))
+
+	int StopLow = 0;
+	int StopHigh = TS_COUNT(t);
+	int StopMiddle = StopHigh;
+	int	difference;
+	char *lexeme;
+	WordEntry *we;
 
 	*nitem = 0;
 
@@ -97,7 +92,12 @@ find_wordentry(TSVector t, TSQuery q, QueryOperand *item, int32 *nitem)
 	while (StopLow < StopHigh)
 	{
 		StopMiddle = StopLow + (StopHigh - StopLow) / 2;
-		difference = WordECompareQueryItem(STRPTR(t), GETOPERAND(q), StopMiddle, item, false);
+		lexeme = tsvector_getlexeme(t, StopMiddle, &we);
+
+		Assert(!we->hasoff);
+		difference = WordECompareQueryItem(lexeme, we->len,
+			GETOPERAND(q), item, false);
+
 		if (difference == 0)
 		{
 			StopHigh = StopMiddle;
@@ -117,18 +117,22 @@ find_wordentry(TSVector t, TSQuery q, QueryOperand *item, int32 *nitem)
 
 		*nitem = 0;
 
-		while (StopMiddle < (WordEntry *) STRPTR(t) &&
-			   WordECompareQueryItem(STRPTR(t), GETOPERAND(q), StopMiddle, item, true) == 0)
+		while (StopMiddle < TS_COUNT(t))
 		{
+			lexeme = tsvector_getlexeme(t, StopMiddle, &we);
+
+			Assert(!we->hasoff);
+			if (WordECompareQueryItem(lexeme, we->len, GETOPERAND(q), item, true) != 0)
+				break;
+
 			(*nitem)++;
 			StopMiddle++;
 		}
 	}
 
-	return (*nitem > 0) ? StopHigh : NULL;
+	return (*nitem > 0) ? StopHigh : -1;
 }
 
-
 /*
  * sort QueryOperands by (length, word)
  */
@@ -200,15 +204,13 @@ SortAndUniqItems(TSQuery q, int *size)
 static float
 calc_rank_and(const float *w, TSVector t, TSQuery q)
 {
-	WordEntryPosVector **pos;
-	WordEntryPosVector1 posnull;
-	WordEntryPosVector *POSNULL;
+	WordEntryPos **pos;
+	uint16 *npos;
+	WordEntryPos posnull[1] = {0};
 	int			i,
 				k,
 				l,
 				p;
-	WordEntry  *entry,
-			   *firstentry;
 	WordEntryPos *post,
 			   *ct;
 	int32		dimt,
@@ -225,41 +227,55 @@ calc_rank_and(const float *w, TSVector t, TSQuery q)
 		pfree(item);
 		return calc_rank_or(w, t, q);
 	}
-	pos = (WordEntryPosVector **) palloc0(sizeof(WordEntryPosVector *) * q->size);
+	pos = (WordEntryPos **) palloc0(sizeof(WordEntryPos *) * q->size);
+	npos = (uint16 *) palloc0(sizeof(uint16) * q->size);
 
-	/* A dummy WordEntryPos array to use when haspos is false */
-	posnull.npos = 1;
-	posnull.pos[0] = 0;
-	WEP_SETPOS(posnull.pos[0], MAXENTRYPOS - 1);
-	POSNULL = (WordEntryPosVector *) &posnull;
+	/* posnull is a dummy WordEntryPos array to use when npos == 0 */
+	WEP_SETPOS(posnull[0], MAXENTRYPOS - 1);
 
 	for (i = 0; i < size; i++)
 	{
-		firstentry = entry = find_wordentry(t, q, item[i], &nitem);
-		if (!entry)
+		int   idx = find_wordentry(t, q, item[i], &nitem),
+			  firstidx;
+
+		if (idx == -1)
 			continue;
 
-		while (entry - firstentry < nitem)
+		firstidx = idx;
+
+		while (idx - firstidx < nitem)
 		{
-			if (entry->haspos)
-				pos[i] = _POSVECPTR(t, entry);
+			WordEntry *entry;
+
+			char *lexeme = tsvector_getlexeme(t, idx, &entry);
+
+			Assert(!entry->hasoff);
+			if (entry->npos)
+			{
+				pos[i] = POSDATAPTR(lexeme, entry->len);
+				npos[i] = entry->npos;
+			}
 			else
-				pos[i] = POSNULL;
+			{
+				pos[i] = posnull;
+				npos[i] = 1;
+			}
+
+			post = pos[i];
+			dimt = npos[i];
 
-			dimt = pos[i]->npos;
-			post = pos[i]->pos;
 			for (k = 0; k < i; k++)
 			{
 				if (!pos[k])
 					continue;
-				lenct = pos[k]->npos;
-				ct = pos[k]->pos;
+				lenct = npos[k];
+				ct = pos[k];
 				for (l = 0; l < dimt; l++)
 				{
 					for (p = 0; p < lenct; p++)
 					{
 						dist = Abs((int) WEP_GETPOS(post[l]) - (int) WEP_GETPOS(ct[p]));
-						if (dist || (dist == 0 && (pos[i] == POSNULL || pos[k] == POSNULL)))
+						if (dist || (dist == 0 && (pos[i] == posnull || pos[k] == posnull)))
 						{
 							float		curw;
 
@@ -272,10 +288,11 @@ calc_rank_and(const float *w, TSVector t, TSQuery q)
 				}
 			}
 
-			entry++;
+			idx++;
 		}
 	}
 	pfree(pos);
+	pfree(npos);
 	pfree(item);
 	return res;
 }
@@ -283,9 +300,8 @@ calc_rank_and(const float *w, TSVector t, TSQuery q)
 static float
 calc_rank_or(const float *w, TSVector t, TSQuery q)
 {
-	WordEntry  *entry,
-			   *firstentry;
-	WordEntryPosVector1 posnull;
+	/* A dummy WordEntryPos array to use when lexeme hasn't positions */
+	WordEntryPos posnull[1] = {0};
 	WordEntryPos *post;
 	int32		dimt,
 				j,
@@ -295,33 +311,36 @@ calc_rank_or(const float *w, TSVector t, TSQuery q)
 	QueryOperand **item;
 	int			size = q->size;
 
-	/* A dummy WordEntryPos array to use when haspos is false */
-	posnull.npos = 1;
-	posnull.pos[0] = 0;
-
 	item = SortAndUniqItems(q, &size);
 
 	for (i = 0; i < size; i++)
 	{
+		int         idx  , firstidx;
 		float		resj,
 					wjm;
-		int32		jm;
+		int32       jm;
 
-		firstentry = entry = find_wordentry(t, q, item[i], &nitem);
-		if (!entry)
+		idx = find_wordentry(t, q, item[i], &nitem);
+		if (idx == -1)
 			continue;
 
-		while (entry - firstentry < nitem)
+		firstidx = idx;
+
+		while (idx - firstidx < nitem)
 		{
-			if (entry->haspos)
+			WordEntry *entry;
+			char *lexeme = tsvector_getlexeme(t, idx, &entry);
+
+			Assert(!entry->hasoff);
+			if (entry->npos)
 			{
-				dimt = POSDATALEN(t, entry);
-				post = POSDATAPTR(t, entry);
+				dimt = entry->npos;
+				post = POSDATAPTR(lexeme, entry->len);
 			}
 			else
 			{
-				dimt = posnull.npos;
-				post = posnull.pos;
+				dimt = 1;
+				post = posnull;
 			}
 
 			resj = 0.0;
@@ -345,7 +364,7 @@ calc_rank_or(const float *w, TSVector t, TSQuery q)
 */
 			res = res + (wjm + resj - wjm / ((jm + 1) * (jm + 1))) / 1.64493406685;
 
-			entry++;
+			idx++;
 		}
 	}
 	if (size > 0)
@@ -361,7 +380,7 @@ calc_rank(const float *w, TSVector t, TSQuery q, int32 method)
 	float		res = 0.0;
 	int			len;
 
-	if (!t->size || !q->size)
+	if (!TS_COUNT(t) || !q->size)
 		return 0.0;
 
 	/* XXX: What about NOT? */
@@ -373,7 +392,7 @@ calc_rank(const float *w, TSVector t, TSQuery q, int32 method)
 	if (res < 0)
 		res = 1e-20f;
 
-	if ((method & RANK_NORM_LOGLENGTH) && t->size > 0)
+	if ((method & RANK_NORM_LOGLENGTH) && TS_COUNT(t) > 0)
 		res /= log((double) (cnt_length(t) + 1)) / log(2.0);
 
 	if (method & RANK_NORM_LENGTH)
@@ -385,11 +404,11 @@ calc_rank(const float *w, TSVector t, TSQuery q, int32 method)
 
 	/* RANK_NORM_EXTDIST not applicable */
 
-	if ((method & RANK_NORM_UNIQ) && t->size > 0)
-		res /= (float) (t->size);
+	if ((method & RANK_NORM_UNIQ) && TS_COUNT(t) > 0)
+		res /= (float) (TS_COUNT(t));
 
-	if ((method & RANK_NORM_LOGUNIQ) && t->size > 0)
-		res /= log((double) (t->size + 1)) / log(2.0);
+	if ((method & RANK_NORM_LOGUNIQ) && TS_COUNT(t) > 0)
+		res /= log((double) (TS_COUNT(t) + 1)) / log(2.0);
 
 	if (method & RANK_NORM_RDIVRPLUS1)
 		res /= (res + 1);
@@ -504,13 +523,13 @@ typedef struct
 		struct
 		{						/* compiled doc representation */
 			QueryItem **items;
-			int16		nitem;
+			int32		nitem;
 		}			query;
 		struct
 		{						/* struct is used for preparing doc
 								 * representation */
 			QueryItem  *item;
-			WordEntry  *entry;
+			int32		idx;
 		}			map;
 	}			data;
 	WordEntryPos pos;
@@ -526,10 +545,10 @@ compareDocR(const void *va, const void *vb)
 	{
 		if (WEP_GETWEIGHT(a->pos) == WEP_GETWEIGHT(b->pos))
 		{
-			if (a->data.map.entry == b->data.map.entry)
+			if (a->data.map.idx == b->data.map.idx)
 				return 0;
 
-			return (a->data.map.entry > b->data.map.entry) ? 1 : -1;
+			return (a->data.map.idx > b->data.map.idx) ? 1 : -1;
 		}
 
 		return (WEP_GETWEIGHT(a->pos) > WEP_GETWEIGHT(b->pos)) ? 1 : -1;
@@ -724,9 +743,6 @@ static DocRepresentation *
 get_docrep(TSVector txt, QueryRepresentation *qr, int *doclen)
 {
 	QueryItem  *item = GETQUERY(qr->query);
-	WordEntry  *entry,
-			   *firstentry;
-	WordEntryPos *post;
 	int32		dimt,			/* number of 'post' items */
 				j,
 				i,
@@ -743,29 +759,38 @@ get_docrep(TSVector txt, QueryRepresentation *qr, int *doclen)
 	 */
 	for (i = 0; i < qr->query->size; i++)
 	{
-		QueryOperand *curoperand;
+		int				idx,
+						firstidx;
+		QueryOperand   *curoperand;
+		WordEntryPos   *post;
 
 		if (item[i].type != QI_VAL)
 			continue;
 
 		curoperand = &item[i].qoperand;
 
-		firstentry = entry = find_wordentry(txt, qr->query, curoperand, &nitem);
-		if (!entry)
+		idx = find_wordentry(txt, qr->query, curoperand, &nitem);
+		if (idx < 0)
 			continue;
 
+		firstidx = idx;
+
 		/* iterations over entries in tsvector */
-		while (entry - firstentry < nitem)
+		while (idx - firstidx < nitem)
 		{
-			if (entry->haspos)
+			WordEntry	*entry;
+			char		*lex = tsvector_getlexeme(txt, idx, &entry);
+
+			Assert(!entry->hasoff);
+			if (entry->npos)
 			{
-				dimt = POSDATALEN(txt, entry);
-				post = POSDATAPTR(txt, entry);
+				dimt = entry->npos;
+				post = POSDATAPTR(lex, entry->len);
 			}
 			else
 			{
 				/* ignore words without positions */
-				entry++;
+				idx++;
 				continue;
 			}
 
@@ -782,13 +807,12 @@ get_docrep(TSVector txt, QueryRepresentation *qr, int *doclen)
 					curoperand->weight & (1 << WEP_GETWEIGHT(post[j])))
 				{
 					doc[cur].pos = post[j];
-					doc[cur].data.map.entry = entry;
+					doc[cur].data.map.idx = idx;
 					doc[cur].data.map.item = (QueryItem *) curoperand;
 					cur++;
 				}
 			}
-
-			entry++;
+			idx++;
 		}
 	}
 
@@ -814,7 +838,7 @@ get_docrep(TSVector txt, QueryRepresentation *qr, int *doclen)
 		while (rptr - doc < cur)
 		{
 			if (rptr->pos == (rptr - 1)->pos &&
-				rptr->data.map.entry == (rptr - 1)->data.map.entry)
+				rptr->data.map.idx == (rptr - 1)->data.map.idx)
 			{
 				storage.data.query.items[storage.data.query.nitem] = rptr->data.map.item;
 				storage.data.query.nitem++;
@@ -917,7 +941,7 @@ calc_rank_cd(const float4 *arrdata, TSVector txt, TSQuery query, int method)
 		NExtent++;
 	}
 
-	if ((method & RANK_NORM_LOGLENGTH) && txt->size > 0)
+	if ((method & RANK_NORM_LOGLENGTH) && TS_COUNT(txt) > 0)
 		Wdoc /= log((double) (cnt_length(txt) + 1));
 
 	if (method & RANK_NORM_LENGTH)
@@ -930,11 +954,11 @@ calc_rank_cd(const float4 *arrdata, TSVector txt, TSQuery query, int method)
 	if ((method & RANK_NORM_EXTDIST) && NExtent > 0 && SumDist > 0)
 		Wdoc /= ((double) NExtent) / SumDist;
 
-	if ((method & RANK_NORM_UNIQ) && txt->size > 0)
-		Wdoc /= (double) (txt->size);
+	if ((method & RANK_NORM_UNIQ) && TS_COUNT(txt) > 0)
+		Wdoc /= (double) (TS_COUNT(txt));
 
-	if ((method & RANK_NORM_LOGUNIQ) && txt->size > 0)
-		Wdoc /= log((double) (txt->size + 1)) / log(2.0);
+	if ((method & RANK_NORM_LOGUNIQ) && TS_COUNT(txt) > 0)
+		Wdoc /= log((double) (TS_COUNT(txt) + 1)) / log(2.0);
 
 	if (method & RANK_NORM_RDIVRPLUS1)
 		Wdoc /= (Wdoc + 1);
diff --git a/src/backend/utils/adt/tsvector.c b/src/backend/utils/adt/tsvector.c
index 6f66c1f58c..57f1de8d14 100644
--- a/src/backend/utils/adt/tsvector.c
+++ b/src/backend/utils/adt/tsvector.c
@@ -22,9 +22,9 @@
 
 typedef struct
 {
-	WordEntry	entry;			/* must be first! */
-	WordEntryPos *pos;
-	int			poslen;			/* number of elements in pos */
+	WordEntry		entry;			/* must be first! */
+	size_t			offset;			/* offset of lexeme in some buffer */
+	WordEntryPos   *pos;
 } WordEntryIN;
 
 
@@ -79,14 +79,30 @@ uniquePos(WordEntryPos *a, int l)
 
 /* Compare two WordEntryIN values for qsort */
 static int
-compareentry(const void *va, const void *vb, void *arg)
+compareentry_in(const void *va, const void *vb, void *arg)
 {
 	const WordEntryIN *a = (const WordEntryIN *) va;
 	const WordEntryIN *b = (const WordEntryIN *) vb;
 	char	   *BufferStr = (char *) arg;
 
-	return tsCompareString(&BufferStr[a->entry.pos], a->entry.len,
-						   &BufferStr[b->entry.pos], b->entry.len,
+	return tsCompareString(&BufferStr[a->offset], a->entry.len,
+						   &BufferStr[b->offset], b->entry.len,
+						   false);
+}
+
+/* Compare two WordEntry values for qsort */
+static int
+compareentry(const void *va, const void *vb, void *arg)
+{
+	const WordEntry *a = (const WordEntry *) va;
+	const WordEntry *b = (const WordEntry *) vb;
+	TSVector	   tsv = (TSVector) arg;
+
+	uint32 offset1 = tsvector_getoffset(tsv, a - ARRPTR(tsv), NULL),
+		   offset2 = tsvector_getoffset(tsv, b - ARRPTR(tsv), NULL);
+
+	return tsCompareString(STRPTR(tsv) + offset1, ENTRY_LEN(tsv, a),
+						   STRPTR(tsv) + offset2, ENTRY_LEN(tsv, b),
 						   false);
 }
 
@@ -97,14 +113,15 @@ compareentry(const void *va, const void *vb, void *arg)
 static int
 uniqueentry(WordEntryIN *a, int l, char *buf, int *outbuflen)
 {
-	int			buflen;
+	int			buflen,
+				i = 0;
 	WordEntryIN *ptr,
 			   *res;
 
 	Assert(l >= 1);
 
 	if (l > 1)
-		qsort_arg((void *) a, l, sizeof(WordEntryIN), compareentry,
+		qsort_arg((void *) a, l, sizeof(WordEntryIN), compareentry_in,
 				  (void *) buf);
 
 	buflen = 0;
@@ -112,67 +129,76 @@ uniqueentry(WordEntryIN *a, int l, char *buf, int *outbuflen)
 	ptr = a + 1;
 	while (ptr - a < l)
 	{
+		Assert(!ptr->entry.hasoff);
+
 		if (!(ptr->entry.len == res->entry.len &&
-			  strncmp(&buf[ptr->entry.pos], &buf[res->entry.pos],
-					  res->entry.len) == 0))
+			  strncmp(&buf[ptr->offset], &buf[res->offset], res->entry.len) == 0))
 		{
 			/* done accumulating data into *res, count space needed */
+			buflen = SHORTALIGN(buflen);
+			if (i++ % TS_OFFSET_STRIDE == 0)
+			{
+				buflen = INTALIGN(buflen);
+				buflen += sizeof(WordEntry);
+			}
+
 			buflen += res->entry.len;
-			if (res->entry.haspos)
+			if (res->entry.npos)
 			{
-				res->poslen = uniquePos(res->pos, res->poslen);
+				res->entry.npos = uniquePos(res->pos, res->entry.npos);
 				buflen = SHORTALIGN(buflen);
-				buflen += res->poslen * sizeof(WordEntryPos) + sizeof(uint16);
+				buflen += res->entry.npos * sizeof(WordEntryPos);
 			}
 			res++;
 			if (res != ptr)
-				memcpy(res, ptr, sizeof(WordEntryIN));
+				*res = *ptr;
 		}
-		else if (ptr->entry.haspos)
+		else if (ptr->entry.npos)
 		{
-			if (res->entry.haspos)
+			if (res->entry.npos)
 			{
 				/* append ptr's positions to res's positions */
-				int			newlen = ptr->poslen + res->poslen;
+				int newlen = ptr->entry.npos + res->entry.npos;
 
 				res->pos = (WordEntryPos *)
 					repalloc(res->pos, newlen * sizeof(WordEntryPos));
-				memcpy(&res->pos[res->poslen], ptr->pos,
-					   ptr->poslen * sizeof(WordEntryPos));
-				res->poslen = newlen;
+				memcpy(&res->pos[res->entry.npos], ptr->pos,
+					   ptr->entry.npos * sizeof(WordEntryPos));
+				res->entry.npos = newlen;
 				pfree(ptr->pos);
 			}
 			else
 			{
 				/* just give ptr's positions to pos */
-				res->entry.haspos = 1;
+				res->entry.npos = ptr->entry.npos;
 				res->pos = ptr->pos;
-				res->poslen = ptr->poslen;
 			}
 		}
 		ptr++;
 	}
 
 	/* count space needed for last item */
+	if (i % TS_OFFSET_STRIDE == 0)
+	{
+		buflen = INTALIGN(buflen);
+		buflen += sizeof(WordEntry);
+	}
+	else
+		buflen = SHORTALIGN(buflen);
+
 	buflen += res->entry.len;
-	if (res->entry.haspos)
+
+	if (res->entry.npos)
 	{
-		res->poslen = uniquePos(res->pos, res->poslen);
+		res->entry.npos = uniquePos(res->pos, res->entry.npos);
 		buflen = SHORTALIGN(buflen);
-		buflen += res->poslen * sizeof(WordEntryPos) + sizeof(uint16);
+		buflen += res->entry.npos * sizeof(WordEntryPos);
 	}
 
 	*outbuflen = buflen;
-	return res + 1 - a;
+	return res + 1 -a;
 }
 
-static int
-WordEntryCMP(WordEntry *a, WordEntry *b, char *buf)
-{
-	return compareentry(a, b, buf);
-}
-
-
 Datum
 tsvectorin(PG_FUNCTION_ARGS)
 {
@@ -181,7 +207,6 @@ tsvectorin(PG_FUNCTION_ARGS)
 	WordEntryIN *arr;
 	int			totallen;
 	int			arrlen;			/* allocated size of arr */
-	WordEntry  *inarr;
 	int			len = 0;
 	TSVector	in;
 	int			i;
@@ -189,7 +214,6 @@ tsvectorin(PG_FUNCTION_ARGS)
 	int			toklen;
 	WordEntryPos *pos;
 	int			poslen;
-	char	   *strbuf;
 	int			stroff;
 
 	/*
@@ -238,23 +262,13 @@ tsvectorin(PG_FUNCTION_ARGS)
 			tmpbuf = (char *) repalloc((void *) tmpbuf, buflen);
 			cur = tmpbuf + dist;
 		}
+		arr[len].entry.hasoff = 0;
 		arr[len].entry.len = toklen;
-		arr[len].entry.pos = cur - tmpbuf;
+		arr[len].offset = cur - tmpbuf;
+		arr[len].entry.npos = poslen;
+		arr[len].pos = (poslen != 0)? pos : NULL;
 		memcpy((void *) cur, (void *) token, toklen);
 		cur += toklen;
-
-		if (poslen != 0)
-		{
-			arr[len].entry.haspos = 1;
-			arr[len].pos = pos;
-			arr[len].poslen = poslen;
-		}
-		else
-		{
-			arr[len].entry.haspos = 0;
-			arr[len].pos = NULL;
-			arr[len].poslen = 0;
-		}
 		len++;
 	}
 
@@ -273,36 +287,18 @@ tsvectorin(PG_FUNCTION_ARGS)
 	totallen = CALCDATASIZE(len, buflen);
 	in = (TSVector) palloc0(totallen);
 	SET_VARSIZE(in, totallen);
-	in->size = len;
-	inarr = ARRPTR(in);
-	strbuf = STRPTR(in);
+	TS_SETCOUNT(in, len);
 	stroff = 0;
 	for (i = 0; i < len; i++)
 	{
-		memcpy(strbuf + stroff, &tmpbuf[arr[i].entry.pos], arr[i].entry.len);
-		arr[i].entry.pos = stroff;
-		stroff += arr[i].entry.len;
-		if (arr[i].entry.haspos)
-		{
-			if (arr[i].poslen > 0xFFFF)
-				elog(ERROR, "positions array too long");
-
-			/* Copy number of positions */
-			stroff = SHORTALIGN(stroff);
-			*(uint16 *) (strbuf + stroff) = (uint16) arr[i].poslen;
-			stroff += sizeof(uint16);
-
-			/* Copy positions */
-			memcpy(strbuf + stroff, arr[i].pos, arr[i].poslen * sizeof(WordEntryPos));
-			stroff += arr[i].poslen * sizeof(WordEntryPos);
+		tsvector_addlexeme(in, i, &stroff, &tmpbuf[arr[i].offset],
+				arr[i].entry.len, arr[i].pos, arr[i].entry.npos);
 
+		if (arr[i].entry.npos)
 			pfree(arr[i].pos);
-		}
-		inarr[i] = arr[i].entry;
 	}
 
-	Assert((strbuf + stroff - (char *) in) == totallen);
-
+	Assert((STRPTR(in) + stroff - (char *) in) == totallen);
 	PG_RETURN_TSVECTOR(in);
 }
 
@@ -313,28 +309,36 @@ tsvectorout(PG_FUNCTION_ARGS)
 	char	   *outbuf;
 	int32		i,
 				lenbuf = 0,
-				pp;
+				pp,
+				tscount = TS_COUNT(out);
+	uint32		pos;
 	WordEntry  *ptr = ARRPTR(out);
 	char	   *curbegin,
 			   *curin,
 			   *curout;
 
-	lenbuf = out->size * 2 /* '' */ + out->size - 1 /* space */ + 2 /* \0 */ ;
-	for (i = 0; i < out->size; i++)
+	lenbuf = tscount * 2 /* '' */ + tscount - 1 /* space */ + 2 /* \0 */ ;
+	for (i = 0; i < tscount; i++)
 	{
-		lenbuf += ptr[i].len * 2 * pg_database_encoding_max_length() /* for escape */ ;
-		if (ptr[i].haspos)
-			lenbuf += 1 /* : */ + 7 /* int2 + , + weight */ * POSDATALEN(out, &(ptr[i]));
+		int npos = ENTRY_NPOS(out, ptr + i);
+		lenbuf += ENTRY_LEN(out, ptr + i) * 2 * pg_database_encoding_max_length() /* for escape */ ;
+		if (npos)
+			lenbuf += 1 /* : */ + 7 /* int2 + , + weight */ * npos;
 	}
 
 	curout = outbuf = (char *) palloc(lenbuf);
-	for (i = 0; i < out->size; i++)
+
+	INITPOS(pos);
+	for (i = 0; i < tscount; i++)
 	{
-		curbegin = curin = STRPTR(out) + ptr->pos;
+		int		lex_len = ENTRY_LEN(out, ptr),
+				npos = ENTRY_NPOS(out, ptr);
+
+		curbegin = curin = STRPTR(out) + pos;
 		if (i != 0)
 			*curout++ = ' ';
 		*curout++ = '\'';
-		while (curin - curbegin < ptr->len)
+		while (curin - curbegin < lex_len)
 		{
 			int			len = pg_mblen(curin);
 
@@ -348,12 +352,12 @@ tsvectorout(PG_FUNCTION_ARGS)
 		}
 
 		*curout++ = '\'';
-		if ((pp = POSDATALEN(out, ptr)) != 0)
+		if ((pp = npos) != 0)
 		{
 			WordEntryPos *wptr;
 
 			*curout++ = ':';
-			wptr = POSDATAPTR(out, ptr);
+			wptr = POSDATAPTR(curbegin, lex_len);
 			while (pp)
 			{
 				curout += sprintf(curout, "%d", WEP_GETPOS(*wptr));
@@ -379,7 +383,8 @@ tsvectorout(PG_FUNCTION_ARGS)
 				wptr++;
 			}
 		}
-		ptr++;
+
+		INCRPTR(out, ptr, pos);
 	}
 
 	*curout = '\0';
@@ -406,35 +411,38 @@ tsvectorsend(PG_FUNCTION_ARGS)
 	StringInfoData buf;
 	int			i,
 				j;
+	uint32		pos;
 	WordEntry  *weptr = ARRPTR(vec);
 
 	pq_begintypsend(&buf);
+	pq_sendint(&buf, TS_COUNT(vec), sizeof(int32));
 
-	pq_sendint(&buf, vec->size, sizeof(int32));
-	for (i = 0; i < vec->size; i++)
+	INITPOS(pos);
+	for (i = 0; i < TS_COUNT(vec); i++)
 	{
-		uint16		npos;
+		char   *lexeme	= STRPTR(vec) + pos;
+		int		npos	= ENTRY_NPOS(vec, weptr),
+				lex_len = ENTRY_LEN(vec, weptr);
 
 		/*
 		 * the strings in the TSVector array are not null-terminated, so we
 		 * have to send the null-terminator separately
 		 */
-		pq_sendtext(&buf, STRPTR(vec) + weptr->pos, weptr->len);
+		pq_sendtext(&buf, lexeme, lex_len);
 		pq_sendbyte(&buf, '\0');
-
-		npos = POSDATALEN(vec, weptr);
 		pq_sendint(&buf, npos, sizeof(uint16));
 
 		if (npos > 0)
 		{
-			WordEntryPos *wepptr = POSDATAPTR(vec, weptr);
+			WordEntryPos *wepptr = POSDATAPTR(lexeme, lex_len);
 
 			for (j = 0; j < npos; j++)
 				pq_sendint(&buf, wepptr[j], sizeof(WordEntryPos));
 		}
-		weptr++;
+		INCRPTR(vec, weptr, pos);
 	}
 
+	PG_FREE_IF_COPY(vec, 0);
 	PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
 }
 
@@ -443,14 +451,16 @@ tsvectorrecv(PG_FUNCTION_ARGS)
 {
 	StringInfo	buf = (StringInfo) PG_GETARG_POINTER(0);
 	TSVector	vec;
-	int			i;
-	int32		nentries;
-	int			datalen;		/* number of bytes used in the variable size
+	int			i,
+				datalen;		/* number of bytes used in the variable size
 								 * area after fixed size TSVector header and
 								 * WordEntries */
+	int32		nentries;
 	Size		hdrlen;
 	Size		len;			/* allocated size of vec */
 	bool		needSort = false;
+	char	   *prev_lexeme = NULL;
+	int			prev_lex_len;
 
 	nentries = pq_getmsgint(buf, sizeof(int32));
 	if (nentries < 0 || nentries > (MaxAllocSize / sizeof(WordEntry)))
@@ -460,16 +470,17 @@ tsvectorrecv(PG_FUNCTION_ARGS)
 
 	len = hdrlen * 2;			/* times two to make room for lexemes */
 	vec = (TSVector) palloc0(len);
-	vec->size = nentries;
+	TS_SETCOUNT(vec, nentries);
 
 	datalen = 0;
 	for (i = 0; i < nentries; i++)
 	{
-		const char *lexeme;
+		char	   *lexeme,
+				   *lexeme_out;
 		uint16		npos;
-		size_t		lex_len;
+		int			lex_len;
 
-		lexeme = pq_getmsgstring(buf);
+		lexeme = (char *) pq_getmsgstring(buf);
 		npos = (uint16) pq_getmsgint(buf, sizeof(uint16));
 
 		/* sanity checks */
@@ -489,62 +500,42 @@ tsvectorrecv(PG_FUNCTION_ARGS)
 		 *
 		 * But make sure the buffer is large enough first.
 		 */
-		while (hdrlen + SHORTALIGN(datalen + lex_len) +
-			   (npos + 1) * sizeof(WordEntryPos) >= len)
+		while (hdrlen + SHORTALIGN(datalen + lex_len) + sizeof(WordEntry) +
+			   npos * sizeof(WordEntryPos) >= len)
 		{
 			len *= 2;
 			vec = (TSVector) repalloc(vec, len);
 		}
 
-		vec->entries[i].haspos = (npos > 0) ? 1 : 0;
-		vec->entries[i].len = lex_len;
-		vec->entries[i].pos = datalen;
-
-		memcpy(STRPTR(vec) + datalen, lexeme, lex_len);
-
-		datalen += lex_len;
-
-		if (i > 0 && WordEntryCMP(&vec->entries[i],
-								  &vec->entries[i - 1],
-								  STRPTR(vec)) <= 0)
+		if (prev_lexeme && tsCompareString(lexeme, lex_len,
+						prev_lexeme, prev_lex_len, false) <= 0)
 			needSort = true;
 
-		/* Receive positions */
+		lexeme_out = tsvector_addlexeme(vec, i, &datalen, lexeme,
+				lex_len, NULL, npos);
 		if (npos > 0)
 		{
-			uint16		j;
-			WordEntryPos *wepptr;
-
-			/*
-			 * Pad to 2-byte alignment if necessary. Though we used palloc0
-			 * for the initial allocation, subsequent repalloc'd memory areas
-			 * are not initialized to zero.
-			 */
-			if (datalen != SHORTALIGN(datalen))
-			{
-				*(STRPTR(vec) + datalen) = '\0';
-				datalen = SHORTALIGN(datalen);
-			}
+			WordEntryPos   *wepptr;
+			int				j;
 
-			memcpy(STRPTR(vec) + datalen, &npos, sizeof(uint16));
-
-			wepptr = POSDATAPTR(vec, &vec->entries[i]);
+			wepptr = POSDATAPTR(lexeme_out, lex_len);
 			for (j = 0; j < npos; j++)
 			{
 				wepptr[j] = (WordEntryPos) pq_getmsgint(buf, sizeof(WordEntryPos));
 				if (j > 0 && WEP_GETPOS(wepptr[j]) <= WEP_GETPOS(wepptr[j - 1]))
 					elog(ERROR, "position information is misordered");
 			}
-
-			datalen += (npos + 1) * sizeof(WordEntry);
 		}
+
+		prev_lexeme = lexeme;
+		prev_lex_len = lex_len;
 	}
 
 	SET_VARSIZE(vec, hdrlen + datalen);
 
 	if (needSort)
-		qsort_arg((void *) ARRPTR(vec), vec->size, sizeof(WordEntry),
-				  compareentry, (void *) STRPTR(vec));
+		qsort_arg((void *) ARRPTR(vec), TS_COUNT(vec), sizeof(WordEntry),
+				  compareentry, (void *) vec);
 
 	PG_RETURN_TSVECTOR(vec);
 }
diff --git a/src/backend/utils/adt/tsvector_op.c b/src/backend/utils/adt/tsvector_op.c
index 822520299e..9f53aae357 100644
--- a/src/backend/utils/adt/tsvector_op.c
+++ b/src/backend/utils/adt/tsvector_op.c
@@ -33,10 +33,10 @@
 
 typedef struct
 {
-	WordEntry  *arrb;
-	WordEntry  *arre;
-	char	   *values;
-	char	   *operand;
+	TSVector			vec;
+	int					bidx;
+	int					eidx;
+	char			   *operand;
 } CHKVAL;
 
 
@@ -71,7 +71,7 @@ static Datum tsvector_update_trigger(PG_FUNCTION_ARGS, bool config_column);
 static int	tsvector_bsearch(const TSVector tsv, char *lexeme, int lexeme_len);
 
 /*
- * Order: haspos, len, word, for all positions (pos, weight)
+ * Order: npos, len, word, for all positions (pos, weight)
  */
 static int
 silly_cmp_tsvector(const TSVector a, const TSVector b)
@@ -80,9 +80,9 @@ silly_cmp_tsvector(const TSVector a, const TSVector b)
 		return -1;
 	else if (VARSIZE(a) > VARSIZE(b))
 		return 1;
-	else if (a->size < b->size)
+	else if (TS_COUNT(a) < TS_COUNT(b))
 		return -1;
-	else if (a->size > b->size)
+	else if (TS_COUNT(a) > TS_COUNT(b))
 		return 1;
 	else
 	{
@@ -90,28 +90,40 @@ silly_cmp_tsvector(const TSVector a, const TSVector b)
 		WordEntry  *bptr = ARRPTR(b);
 		int			i = 0;
 		int			res;
+		uint32		pos1,
+					pos2;
 
+		INITPOS(pos1);
+		INITPOS(pos2);
 
-		for (i = 0; i < a->size; i++)
+		for (i = 0; i < TS_COUNT(a); i++)
 		{
-			if (aptr->haspos != bptr->haspos)
-			{
-				return (aptr->haspos > bptr->haspos) ? -1 : 1;
-			}
-			else if ((res = tsCompareString(STRPTR(a) + aptr->pos, aptr->len, STRPTR(b) + bptr->pos, bptr->len, false)) != 0)
+			char   *lex1 = STRPTR(a) + pos1,
+				   *lex2 = STRPTR(b) + pos2;
+			int		npos1 = ENTRY_NPOS(a, aptr),
+					npos2 = ENTRY_NPOS(b, bptr);
+			int		len1 = ENTRY_LEN(a, aptr),
+					len2 = ENTRY_LEN(b, bptr);
+
+			if ((npos1 == 0 || npos2 == 0) && npos1 != npos2)
+				return npos1 > npos2? -1 : 1;
+			else if ((res = tsCompareString(lex1, len1, lex2, len2, false)) != 0)
 			{
 				return res;
 			}
-			else if (aptr->haspos)
+			else if (npos1 > 0)
 			{
-				WordEntryPos *ap = POSDATAPTR(a, aptr);
-				WordEntryPos *bp = POSDATAPTR(b, bptr);
+				WordEntryPos *ap,
+							 *bp;
 				int			j;
 
-				if (POSDATALEN(a, aptr) != POSDATALEN(b, bptr))
-					return (POSDATALEN(a, aptr) > POSDATALEN(b, bptr)) ? -1 : 1;
+				ap = POSDATAPTR(lex1, len1);
+				bp = POSDATAPTR(lex2, len2);
+
+				if (npos1 != npos2)
+					return (npos1 > npos2) ? -1 : 1;
 
-				for (j = 0; j < POSDATALEN(a, aptr); j++)
+				for (j = 0; j < npos1; j++)
 				{
 					if (WEP_GETPOS(*ap) != WEP_GETPOS(*bp))
 					{
@@ -125,8 +137,8 @@ silly_cmp_tsvector(const TSVector a, const TSVector b)
 				}
 			}
 
-			aptr++;
-			bptr++;
+			INCRPTR(a, aptr, pos1);
+			INCRPTR(b, bptr, pos2);
 		}
 	}
 
@@ -161,27 +173,29 @@ tsvector_strip(PG_FUNCTION_ARGS)
 	TSVector	in = PG_GETARG_TSVECTOR(0);
 	TSVector	out;
 	int			i,
+				count,
+				posout = 0,
+				pos,
 				len = 0;
-	WordEntry  *arrin = ARRPTR(in),
-			   *arrout;
-	char	   *cur;
+	WordEntry  *entryin = ARRPTR(in);
 
-	for (i = 0; i < in->size; i++)
-		len += arrin[i].len;
+	count = TS_COUNT(in);
+	for (i = 0; i < count; i++)
+		INCRSIZE(len, i, ENTRY_LEN(in, ARRPTR(in) + i), 0);
 
-	len = CALCDATASIZE(in->size, len);
+	len = CALCDATASIZE(count, len);
 	out = (TSVector) palloc0(len);
 	SET_VARSIZE(out, len);
-	out->size = in->size;
-	arrout = ARRPTR(out);
-	cur = STRPTR(out);
-	for (i = 0; i < in->size; i++)
+	TS_SETCOUNT(out, count);
+
+	INITPOS(pos);
+	for (i = 0; i < count; i++)
 	{
-		memcpy(cur, STRPTR(in) + arrin[i].pos, arrin[i].len);
-		arrout[i].haspos = 0;
-		arrout[i].len = arrin[i].len;
-		arrout[i].pos = cur - STRPTR(out);
-		cur += arrout[i].len;
+		tsvector_addlexeme(out, i, &posout,
+				STRPTR(in) + pos, ENTRY_LEN(in, entryin),
+				NULL, 0);
+
+		INCRPTR(in, entryin, pos);
 	}
 
 	PG_FREE_IF_COPY(in, 0);
@@ -192,7 +206,7 @@ Datum
 tsvector_length(PG_FUNCTION_ARGS)
 {
 	TSVector	in = PG_GETARG_TSVECTOR(0);
-	int32		ret = in->size;
+	int32		ret = TS_COUNT(in);
 
 	PG_FREE_IF_COPY(in, 0);
 	PG_RETURN_INT32(ret);
@@ -204,11 +218,10 @@ tsvector_setweight(PG_FUNCTION_ARGS)
 	TSVector	in = PG_GETARG_TSVECTOR(0);
 	char		cw = PG_GETARG_CHAR(1);
 	TSVector	out;
-	int			i,
-				j;
-	WordEntry  *entry;
-	WordEntryPos *p;
+	int			i;
+	WordEntry  *weptr;
 	int			w = 0;
+	uint32		pos;
 
 	switch (cw)
 	{
@@ -235,20 +248,21 @@ tsvector_setweight(PG_FUNCTION_ARGS)
 
 	out = (TSVector) palloc(VARSIZE(in));
 	memcpy(out, in, VARSIZE(in));
-	entry = ARRPTR(out);
-	i = out->size;
-	while (i--)
+	weptr = ARRPTR(out);
+
+	INITPOS(pos);
+	for (i = 0; i < TS_COUNT(out); i++)
 	{
-		if ((j = POSDATALEN(out, entry)) != 0)
+		int j,
+			npos = ENTRY_NPOS(out, weptr);
+
+		if (npos)
 		{
-			p = POSDATAPTR(out, entry);
-			while (j--)
-			{
-				WEP_SETWEIGHT(*p, w);
-				p++;
-			}
+			WordEntryPos *p = POSDATAPTR(STRPTR(out) + pos, ENTRY_LEN(out, weptr));
+			for (j = 0; j < npos; j++)
+				WEP_SETWEIGHT(p[j], w);
 		}
-		entry++;
+		INCRPTR(out, weptr, pos);
 	}
 
 	PG_FREE_IF_COPY(in, 0);
@@ -269,10 +283,8 @@ tsvector_setweight_by_filter(PG_FUNCTION_ARGS)
 
 	TSVector	tsout;
 	int			i,
-				j,
 				nlexemes,
 				weight;
-	WordEntry  *entry;
 	Datum	   *dlexemes;
 	bool	   *nulls;
 
@@ -301,8 +313,6 @@ tsvector_setweight_by_filter(PG_FUNCTION_ARGS)
 
 	tsout = (TSVector) palloc(VARSIZE(tsin));
 	memcpy(tsout, tsin, VARSIZE(tsin));
-	entry = ARRPTR(tsout);
-
 	deconstruct_array(lexemes, TEXTOID, -1, false, 'i',
 					  &dlexemes, &nulls, &nlexemes);
 
@@ -315,7 +325,8 @@ tsvector_setweight_by_filter(PG_FUNCTION_ARGS)
 	{
 		char	   *lex;
 		int			lex_len,
-					lex_pos;
+					lex_idx,
+					npos;
 
 		if (nulls[i])
 			ereport(ERROR,
@@ -324,17 +335,19 @@ tsvector_setweight_by_filter(PG_FUNCTION_ARGS)
 
 		lex = VARDATA(dlexemes[i]);
 		lex_len = VARSIZE(dlexemes[i]) - VARHDRSZ;
-		lex_pos = tsvector_bsearch(tsout, lex, lex_len);
+		lex_idx = tsvector_bsearch(tsin, lex, lex_len);
+		npos = ENTRY_NPOS(tsin, ARRPTR(tsout) + lex_idx);
 
-		if (lex_pos >= 0 && (j = POSDATALEN(tsout, entry + lex_pos)) != 0)
+		if (lex_idx >= 0 && npos > 0)
 		{
-			WordEntryPos *p = POSDATAPTR(tsout, entry + lex_pos);
+			int			j;
+			WordEntry  *we;
+			char	   *lexeme = tsvector_getlexeme(tsout, lex_idx, &we);
 
-			while (j--)
-			{
-				WEP_SETWEIGHT(*p, weight);
-				p++;
-			}
+			WordEntryPos   *p = POSDATAPTR(lexeme, we->len);
+
+			for (j = 0; j < npos; j++)
+				WEP_SETWEIGHT(p[j], weight);
 		}
 	}
 
@@ -354,34 +367,27 @@ tsvector_setweight_by_filter(PG_FUNCTION_ARGS)
  * Return the number added (might be less than expected due to overflow)
  */
 static int32
-add_pos(TSVector src, WordEntry *srcptr,
-		TSVector dest, WordEntry *destptr,
+add_pos(char *src, WordEntry *srcptr,
+		WordEntryPos *dest, int from,
 		int32 maxpos)
 {
-	uint16	   *clen = &_POSVECPTR(dest, destptr)->npos;
+	uint16	    clen = from;
 	int			i;
-	uint16		slen = POSDATALEN(src, srcptr),
-				startlen;
-	WordEntryPos *spos = POSDATAPTR(src, srcptr),
-			   *dpos = POSDATAPTR(dest, destptr);
+	uint16		slen = srcptr->npos;
+	WordEntryPos *spos = POSDATAPTR(src, srcptr->len);
 
-	if (!destptr->haspos)
-		*clen = 0;
-
-	startlen = *clen;
+	Assert(!srcptr->hasoff);
 	for (i = 0;
-		 i < slen && *clen < MAXNUMPOS &&
-		 (*clen == 0 || WEP_GETPOS(dpos[*clen - 1]) != MAXENTRYPOS - 1);
+		 i < slen && clen < MAXNUMPOS &&
+		 (clen == 0 || WEP_GETPOS(dest[clen - 1]) != MAXENTRYPOS - 1);
 		 i++)
 	{
-		WEP_SETWEIGHT(dpos[*clen], WEP_GETWEIGHT(spos[i]));
-		WEP_SETPOS(dpos[*clen], LIMITPOS(WEP_GETPOS(spos[i]) + maxpos));
-		(*clen)++;
+		WEP_SETWEIGHT(dest[clen], WEP_GETWEIGHT(spos[i]));
+		WEP_SETPOS(dest[clen], LIMITPOS(WEP_GETPOS(spos[i]) + maxpos));
+		clen++;
 	}
 
-	if (*clen != startlen)
-		destptr->haspos = 1;
-	return *clen - startlen;
+	return clen - from;
 }
 
 /*
@@ -392,20 +398,20 @@ add_pos(TSVector src, WordEntry *srcptr,
 static int
 tsvector_bsearch(const TSVector tsv, char *lexeme, int lexeme_len)
 {
-	WordEntry  *arrin = ARRPTR(tsv);
 	int			StopLow = 0,
-				StopHigh = tsv->size,
+				StopHigh = TS_COUNT(tsv),
 				StopMiddle,
 				cmp;
 
 	while (StopLow < StopHigh)
 	{
-		StopMiddle = (StopLow + StopHigh) / 2;
+		WordEntry  *entry = NULL;
+		char	   *str;
 
+		StopMiddle = (StopLow + StopHigh) / 2;
+		str = tsvector_getlexeme(tsv, StopMiddle, &entry);
 		cmp = tsCompareString(lexeme, lexeme_len,
-							  STRPTR(tsv) + arrin[StopMiddle].pos,
-							  arrin[StopMiddle].len,
-							  false);
+							  str, entry->len, false);
 
 		if (cmp < 0)
 			StopHigh = StopMiddle;
@@ -460,14 +466,12 @@ tsvector_delete_by_indices(TSVector tsv, int *indices_to_delete,
 						   int indices_count)
 {
 	TSVector	tsout;
-	WordEntry  *arrin = ARRPTR(tsv),
-			   *arrout;
-	char	   *data = STRPTR(tsv),
-			   *dataout;
-	int			i,				/* index in arrin */
-				j,				/* index in arrout */
+	WordEntry  *ptr = ARRPTR(tsv);
+	int			i,				/* index in input tsvector */
+				j,				/* index in output tsvector */
 				k,				/* index in indices_to_delete */
-				curoff;			/* index in dataout area */
+				curoff = 0,		/* index in data area of output */
+				pos;
 
 	/*
 	 * Sort the filter array to simplify membership checks below.  Also, get
@@ -495,16 +499,18 @@ tsvector_delete_by_indices(TSVector tsv, int *indices_to_delete,
 	tsout = (TSVector) palloc0(VARSIZE(tsv));
 
 	/* This count must be correct because STRPTR(tsout) relies on it. */
-	tsout->size = tsv->size - indices_count;
+	TS_SETCOUNT(tsout, TS_COUNT(tsv) - indices_count);
 
 	/*
 	 * Copy tsv to tsout, skipping lexemes listed in indices_to_delete.
 	 */
-	arrout = ARRPTR(tsout);
-	dataout = STRPTR(tsout);
-	curoff = 0;
-	for (i = j = k = 0; i < tsv->size; i++)
+
+	INITPOS(pos);
+	for (i = j = k = 0; i < TS_COUNT(tsv); i++)
 	{
+		char	*lex = STRPTR(tsv) + pos;
+		int		 lex_len = ENTRY_LEN(tsv, ptr);
+
 		/*
 		 * If current i is present in indices_to_delete, skip this lexeme.
 		 * Since indices_to_delete is already sorted, we only need to check
@@ -513,28 +519,14 @@ tsvector_delete_by_indices(TSVector tsv, int *indices_to_delete,
 		if (k < indices_count && i == indices_to_delete[k])
 		{
 			k++;
-			continue;
+			goto next;
 		}
 
-		/* Copy lexeme and its positions and weights */
-		memcpy(dataout + curoff, data + arrin[i].pos, arrin[i].len);
-		arrout[j].haspos = arrin[i].haspos;
-		arrout[j].len = arrin[i].len;
-		arrout[j].pos = curoff;
-		curoff += arrin[i].len;
-		if (arrin[i].haspos)
-		{
-			int			len = POSDATALEN(tsv, arrin + i) * sizeof(WordEntryPos)
-			+ sizeof(uint16);
-
-			curoff = SHORTALIGN(curoff);
-			memcpy(dataout + curoff,
-				   STRPTR(tsv) + SHORTALIGN(arrin[i].pos + arrin[i].len),
-				   len);
-			curoff += len;
-		}
+		tsvector_addlexeme(tsout, j++, &curoff, lex, lex_len,
+				POSDATAPTR(lex, lex_len), ENTRY_NPOS(tsv, ptr));
 
-		j++;
+next:
+		INCRPTR(tsv, ptr, pos);
 	}
 
 	/*
@@ -543,8 +535,7 @@ tsvector_delete_by_indices(TSVector tsv, int *indices_to_delete,
 	 * estimation of tsout's size is wrong.
 	 */
 	Assert(k == indices_count);
-
-	SET_VARSIZE(tsout, CALCDATASIZE(tsout->size, curoff));
+	SET_VARSIZE(tsout, CALCDATASIZE(TS_COUNT(tsout), curoff));
 	return tsout;
 }
 
@@ -635,8 +626,9 @@ tsvector_delete_arr(PG_FUNCTION_ARGS)
 Datum
 tsvector_unnest(PG_FUNCTION_ARGS)
 {
-	FuncCallContext *funcctx;
-	TSVector	tsin;
+	FuncCallContext	   *funcctx;
+	TSVector			tsin;
+	uint32				pos;
 
 	if (SRF_IS_FIRSTCALL())
 	{
@@ -655,31 +647,33 @@ tsvector_unnest(PG_FUNCTION_ARGS)
 						   TEXTARRAYOID, -1, 0);
 		funcctx->tuple_desc = BlessTupleDesc(tupdesc);
 
-		funcctx->user_fctx = PG_GETARG_TSVECTOR_COPY(0);
+		INITPOS(pos);
+		funcctx->user_fctx = list_make2(PG_GETARG_TSVECTOR(0), makeInteger(pos));
 
 		MemoryContextSwitchTo(oldcontext);
 	}
 
 	funcctx = SRF_PERCALL_SETUP();
-	tsin = (TSVector) funcctx->user_fctx;
+	tsin = (TSVector) linitial(funcctx->user_fctx);
+	pos = intVal(lsecond(funcctx->user_fctx));
 
-	if (funcctx->call_cntr < tsin->size)
+	if (funcctx->call_cntr < TS_COUNT(tsin))
 	{
-		WordEntry  *arrin = ARRPTR(tsin);
+		WordEntry  *entry = ARRPTR(tsin) + funcctx->call_cntr;
 		char	   *data = STRPTR(tsin);
 		HeapTuple	tuple;
 		int			j,
-					i = funcctx->call_cntr;
+					npos = ENTRY_NPOS(tsin, entry),
+					lex_len = ENTRY_LEN(tsin, entry);
 		bool		nulls[] = {false, false, false};
 		Datum		values[3];
 
 		values[0] = PointerGetDatum(
-									cstring_to_text_with_len(data + arrin[i].pos, arrin[i].len)
-			);
+			cstring_to_text_with_len(data + pos, lex_len));
 
-		if (arrin[i].haspos)
+		if (npos)
 		{
-			WordEntryPosVector *posv;
+			WordEntryPos *apos = POSDATAPTR(data + pos, lex_len);
 			Datum	   *positions;
 			Datum	   *weights;
 			char		weight;
@@ -689,28 +683,28 @@ tsvector_unnest(PG_FUNCTION_ARGS)
 			 * uint16 (2 bits for weight, 14 for position). Here we extract
 			 * that in two separate arrays.
 			 */
-			posv = _POSVECPTR(tsin, arrin + i);
-			positions = palloc(posv->npos * sizeof(Datum));
-			weights = palloc(posv->npos * sizeof(Datum));
-			for (j = 0; j < posv->npos; j++)
+			positions = palloc(npos * sizeof(Datum));
+			weights = palloc(npos * sizeof(Datum));
+			for (j = 0; j < npos; j++)
 			{
-				positions[j] = Int16GetDatum(WEP_GETPOS(posv->pos[j]));
-				weight = 'D' - WEP_GETWEIGHT(posv->pos[j]);
+				positions[j] = Int16GetDatum(WEP_GETPOS(apos[j]));
+				weight = 'D' - WEP_GETWEIGHT(apos[j]);
 				weights[j] = PointerGetDatum(
 											 cstring_to_text_with_len(&weight, 1)
 					);
 			}
 
 			values[1] = PointerGetDatum(
-										construct_array(positions, posv->npos, INT2OID, 2, true, 's'));
+					construct_array(positions, npos, INT2OID, 2, true, 's'));
 			values[2] = PointerGetDatum(
-										construct_array(weights, posv->npos, TEXTOID, -1, false, 'i'));
+					construct_array(weights, npos, TEXTOID, -1, false, 'i'));
 		}
 		else
 		{
 			nulls[1] = nulls[2] = true;
 		}
 
+		INCRPTR(tsin, entry, intVal(lsecond(funcctx->user_fctx)));
 		tuple = heap_form_tuple(funcctx->tuple_desc, values, nulls);
 		SRF_RETURN_NEXT(funcctx, HeapTupleGetDatum(tuple));
 	}
@@ -728,21 +722,23 @@ Datum
 tsvector_to_array(PG_FUNCTION_ARGS)
 {
 	TSVector	tsin = PG_GETARG_TSVECTOR(0);
-	WordEntry  *arrin = ARRPTR(tsin);
+	WordEntry  *entry = ARRPTR(tsin);
 	Datum	   *elements;
 	int			i;
 	ArrayType  *array;
+	long		pos;
 
-	elements = palloc(tsin->size * sizeof(Datum));
+	elements = palloc(TS_COUNT(tsin) * sizeof(Datum));
 
-	for (i = 0; i < tsin->size; i++)
+	INITPOS(pos);
+	for (i = 0; i < TS_COUNT(tsin); i++)
 	{
 		elements[i] = PointerGetDatum(
-									  cstring_to_text_with_len(STRPTR(tsin) + arrin[i].pos, arrin[i].len)
-			);
+		  cstring_to_text_with_len(STRPTR(tsin) + pos, ENTRY_LEN(tsin, entry)));
+		INCRPTR(tsin, entry, pos);
 	}
 
-	array = construct_array(elements, tsin->size, TEXTOID, -1, false, 'i');
+	array = construct_array(elements, TS_COUNT(tsin), TEXTOID, -1, false, 'i');
 
 	pfree(elements);
 	PG_FREE_IF_COPY(tsin, 0);
@@ -750,6 +746,124 @@ tsvector_to_array(PG_FUNCTION_ARGS)
 }
 
 /*
+ * Returns offset by given index in TSVector,
+ * this function used when we need random access
+ */
+int
+tsvector_getoffset(TSVector vec, int idx, WordEntry **we)
+{
+	int			offset = 0;
+	WordEntry  *entry;
+
+	entry = ARRPTR(vec) + idx;
+	if (we)
+		*we = entry;
+
+	while (!entry->hasoff)
+	{
+		entry--;
+		if (!entry->hasoff)
+			offset += SHORTALIGN(entry->len) + entry->npos * sizeof(WordEntryPos);
+	}
+
+	Assert(entry >= ARRPTR(vec));
+
+	if (idx % TS_OFFSET_STRIDE)
+	{
+		/* if idx is by offset */
+		WordEntry *offset_entry = (WordEntry *) (STRPTR(vec) + entry->offset);
+
+		offset += entry->offset + sizeof(WordEntry);
+		offset += SHORTALIGN(offset_entry->len) + offset_entry->npos * sizeof(WordEntryPos);
+	}
+	else
+	{
+		Assert(entry == ARRPTR(vec) + idx);
+
+		if (we)
+			*we = (WordEntry *) (STRPTR(vec) + entry->offset);
+		offset = entry->offset + sizeof(WordEntry);
+	}
+
+	return offset;
+}
+
+/*
+ * Add lexeme and its positions to tsvector and move dataoff (offset where
+ * data should be added) to new position.
+ * Returns pointer to lexeme start
+ */
+char *
+tsvector_addlexeme(TSVector tsv, int idx, int *dataoff,
+		char *lexeme, int lexeme_len, WordEntryPos *pos, int npos)
+{
+	int			stroff;
+	WordEntry  *entry;
+	char	   *result;
+
+	/* when idx is 0, dataoff should be 0 too, and otherwise */
+	Assert(!((idx == 0) ^ (*dataoff == 0)));
+
+	stroff = *dataoff;
+	entry = ARRPTR(tsv) + idx;
+
+	if (idx % TS_OFFSET_STRIDE == 0)
+	{
+		/* WordEntry with offset */
+		WordEntry offentry;
+
+		stroff = INTALIGN(stroff);
+		entry->hasoff = 1;
+		entry->offset = stroff;
+
+		/* fill WordEntry for offset */
+		offentry.hasoff = 0;
+		offentry.len = lexeme_len;
+		offentry.npos = npos;
+		memcpy(STRPTR(tsv) + stroff, &offentry, sizeof(WordEntry));
+		stroff += sizeof(WordEntry);
+	}
+	else
+	{
+		stroff = SHORTALIGN(stroff);							\
+		entry->hasoff = 0;
+		entry->len = lexeme_len;
+		entry->npos = npos;
+	}
+
+	memcpy(STRPTR(tsv) + stroff, lexeme, lexeme_len);
+	result = STRPTR(tsv) + stroff;
+	stroff += lexeme_len;
+
+	if (npos)
+	{
+		if (npos > 0xFFFF)
+			elog(ERROR, "positions array too long");
+
+		/*
+		 * Pad to 2-byte alignment if necessary. We don't know how memory was
+		 * allocated, so in case of aligning we need to make sure that unused
+		 * is zero.
+		 */
+		if (stroff != SHORTALIGN(stroff))
+		{
+			*(STRPTR(tsv) + stroff) = '\0';
+			stroff = SHORTALIGN(stroff);
+		}
+
+		/* Copy positions */
+		if (pos)
+			memcpy(STRPTR(tsv) + stroff, pos, npos * sizeof(WordEntryPos));
+
+		stroff += npos * sizeof(WordEntryPos);
+	}
+
+	*dataoff = stroff;
+
+	return result;
+}
+
+/*
  * Build tsvector from array of lexemes.
  */
 Datum
@@ -758,14 +872,13 @@ array_to_tsvector(PG_FUNCTION_ARGS)
 	ArrayType  *v = PG_GETARG_ARRAYTYPE_P(0);
 	TSVector	tsout;
 	Datum	   *dlexemes;
-	WordEntry  *arrout;
 	bool	   *nulls;
 	int			nitems,
 				i,
 				j,
 				tslen,
+				cur = 0,
 				datalen = 0;
-	char	   *cur;
 
 	deconstruct_array(v, TEXTOID, -1, false, 'i', &dlexemes, &nulls, &nitems);
 
@@ -793,26 +906,23 @@ array_to_tsvector(PG_FUNCTION_ARGS)
 
 	/* Calculate space needed for surviving lexemes. */
 	for (i = 0; i < nitems; i++)
-		datalen += VARSIZE(dlexemes[i]) - VARHDRSZ;
+	{
+		int	lex_len = VARSIZE(dlexemes[i]) - VARHDRSZ;
+		INCRSIZE(datalen, i, lex_len, 0);
+	}
 	tslen = CALCDATASIZE(nitems, datalen);
 
 	/* Allocate and fill tsvector. */
 	tsout = (TSVector) palloc0(tslen);
 	SET_VARSIZE(tsout, tslen);
-	tsout->size = nitems;
+	TS_SETCOUNT(tsout, nitems);
 
-	arrout = ARRPTR(tsout);
-	cur = STRPTR(tsout);
 	for (i = 0; i < nitems; i++)
 	{
 		char	   *lex = VARDATA(dlexemes[i]);
 		int			lex_len = VARSIZE(dlexemes[i]) - VARHDRSZ;
 
-		memcpy(cur, lex, lex_len);
-		arrout[i].haspos = 0;
-		arrout[i].len = lex_len;
-		arrout[i].pos = cur - STRPTR(tsout);
-		cur += lex_len;
+		tsvector_addlexeme(tsout, i, &cur, lex, lex_len, NULL, 0);
 	}
 
 	PG_FREE_IF_COPY(v, 0);
@@ -828,17 +938,16 @@ tsvector_filter(PG_FUNCTION_ARGS)
 	TSVector	tsin = PG_GETARG_TSVECTOR(0),
 				tsout;
 	ArrayType  *weights = PG_GETARG_ARRAYTYPE_P(1);
-	WordEntry  *arrin = ARRPTR(tsin),
-			   *arrout;
-	char	   *datain = STRPTR(tsin),
-			   *dataout;
+	char	   *dataout;
 	Datum	   *dweights;
 	bool	   *nulls;
 	int			nweights;
 	int			i,
-				j;
-	int			cur_pos = 0;
+				j,
+				dataoff = 0,
+				pos;
 	char		mask = 0;
+	WordEntry  *ptr = ARRPTR(tsin);
 
 	deconstruct_array(weights, CHAROID, 1, true, 'c',
 					  &dweights, &nulls, &nweights);
@@ -879,109 +988,112 @@ tsvector_filter(PG_FUNCTION_ARGS)
 	}
 
 	tsout = (TSVector) palloc0(VARSIZE(tsin));
-	tsout->size = tsin->size;
-	arrout = ARRPTR(tsout);
+	TS_SETCOUNT(tsout, TS_COUNT(tsin));
 	dataout = STRPTR(tsout);
 
-	for (i = j = 0; i < tsin->size; i++)
+	INITPOS(pos);
+	for (i = j = 0; i < TS_COUNT(tsin); i++)
 	{
-		WordEntryPosVector *posvin,
-				   *posvout;
-		int			npos = 0;
-		int			k;
-
-		if (!arrin[i].haspos)
-			continue;
-
-		posvin = _POSVECPTR(tsin, arrin + i);
-		posvout = (WordEntryPosVector *)
-			(dataout + SHORTALIGN(cur_pos + arrin[i].len));
-
-		for (k = 0; k < posvin->npos; k++)
+		WordEntryPos   *posin,
+					   *posout;
+		int				k,
+						npos = 0,
+						lex_len = ENTRY_LEN(tsin, ptr);
+		char		   *lex = STRPTR(tsin) + pos,
+					   *lexout;
+
+		posin = POSDATAPTR(lex, lex_len);
+		for (k = 0; k < ENTRY_NPOS(tsin, ptr); k++)
 		{
-			if (mask & (1 << WEP_GETWEIGHT(posvin->pos[k])))
-				posvout->pos[npos++] = posvin->pos[k];
+			if (mask & (1 << WEP_GETWEIGHT(posin[k])))
+				npos++;
 		}
 
-		/* if no satisfactory positions found, skip lexeme */
 		if (!npos)
-			continue;
+			goto next;
 
-		arrout[j].haspos = true;
-		arrout[j].len = arrin[i].len;
-		arrout[j].pos = cur_pos;
+		lexout = tsvector_addlexeme(tsout, j++, &dataoff, lex, lex_len,
+				NULL, npos);
+		posout =  POSDATAPTR(lexout, lex_len);
+		npos = 0;
+		for (k = 0; k < ENTRY_NPOS(tsin, ptr); k++)
+		{
+			if (mask & (1 << WEP_GETWEIGHT(posin[k])))
+				posout[npos++] = posin[k];
+		}
 
-		memcpy(dataout + cur_pos, datain + arrin[i].pos, arrin[i].len);
-		posvout->npos = npos;
-		cur_pos += SHORTALIGN(arrin[i].len);
-		cur_pos += POSDATALEN(tsout, arrout + j) * sizeof(WordEntryPos) +
-			sizeof(uint16);
-		j++;
+next:
+		INCRPTR(tsin, ptr, pos);
 	}
 
-	tsout->size = j;
+	TS_SETCOUNT(tsout, j);
 	if (dataout != STRPTR(tsout))
-		memmove(STRPTR(tsout), dataout, cur_pos);
+		memmove(STRPTR(tsout), dataout, dataoff);
 
-	SET_VARSIZE(tsout, CALCDATASIZE(tsout->size, cur_pos));
+	SET_VARSIZE(tsout, CALCDATASIZE(TS_COUNT(tsout), dataoff));
 
 	PG_FREE_IF_COPY(tsin, 0);
 	PG_RETURN_POINTER(tsout);
 }
 
+/* Get max position in in1; we'll need this to offset in2's positions */
+static int
+get_maxpos(TSVector tsv)
+{
+	int				i,
+					j,
+					maxpos = 0;
+	WordEntry	   *ptr = ARRPTR(tsv);
+	uint32			pos;
+	WordEntryPos   *apos;
+
+	INITPOS(pos);
+	for (i = 0; i < TS_COUNT(tsv); i++)
+	{
+		apos = POSDATAPTR(STRPTR(tsv) + pos, ENTRY_LEN(tsv, ptr));
+		for (j = 0; j < ENTRY_NPOS(tsv, ptr); j++)
+		{
+			if (WEP_GETPOS(apos[j]) > maxpos)
+				maxpos = WEP_GETPOS(apos[j]);
+		}
+
+		INCRPTR(tsv, ptr, pos);
+	}
+
+	return maxpos;
+}
+
 Datum
 tsvector_concat(PG_FUNCTION_ARGS)
 {
-	TSVector	in1 = PG_GETARG_TSVECTOR(0);
-	TSVector	in2 = PG_GETARG_TSVECTOR(1);
-	TSVector	out;
-	WordEntry  *ptr;
-	WordEntry  *ptr1,
+	TSVector	in1 = PG_GETARG_TSVECTOR(0),
+				in2 = PG_GETARG_TSVECTOR(1),
+				out;
+	WordEntry  *ptr,
+			   *ptr1,
 			   *ptr2;
-	WordEntryPos *p;
 	int			maxpos = 0,
 				i,
-				j,
 				i1,
 				i2,
-				dataoff,
 				output_bytes,
-				output_size;
-	char	   *data,
-			   *data1,
-			   *data2;
-
-	/* Get max position in in1; we'll need this to offset in2's positions */
-	ptr = ARRPTR(in1);
-	i = in1->size;
-	while (i--)
-	{
-		if ((j = POSDATALEN(in1, ptr)) != 0)
-		{
-			p = POSDATAPTR(in1, ptr);
-			while (j--)
-			{
-				if (WEP_GETPOS(*p) > maxpos)
-					maxpos = WEP_GETPOS(*p);
-				p++;
-			}
-		}
-		ptr++;
-	}
+				pos1,
+				pos2,
+				dataoff;
+	char	   *data;
 
 	ptr1 = ARRPTR(in1);
 	ptr2 = ARRPTR(in2);
-	data1 = STRPTR(in1);
-	data2 = STRPTR(in2);
-	i1 = in1->size;
-	i2 = in2->size;
+	i1 = TS_COUNT(in1);
+	i2 = TS_COUNT(in2);
 
 	/*
 	 * Conservative estimate of space needed.  We might need all the data in
-	 * both inputs, and conceivably add a pad byte before position data for
-	 * each item where there was none before.
+	 * both inputs, and conceivably add a pad bytes before lexeme and position
+	 * data, and pad bytes before WordEntry for offset entry.
 	 */
-	output_bytes = VARSIZE(in1) + VARSIZE(in2) + i1 + i2;
+	output_bytes = VARSIZE(in1) + VARSIZE(in2) + i1 * 2 + i2 * 2;
+	output_bytes += 4 * (i1 + i2) / TS_OFFSET_STRIDE;
 
 	out = (TSVector) palloc0(output_bytes);
 	SET_VARSIZE(out, output_bytes);
@@ -990,91 +1102,110 @@ tsvector_concat(PG_FUNCTION_ARGS)
 	 * We must make out->size valid so that STRPTR(out) is sensible.  We'll
 	 * collapse out any unused space at the end.
 	 */
-	out->size = in1->size + in2->size;
+	TS_SETCOUNT(out, i1 + i2);
 
-	ptr = ARRPTR(out);
+	ptr = NULL;
 	data = STRPTR(out);
+	i = 0;
 	dataoff = 0;
+
+	INITPOS(pos1);
+	INITPOS(pos2);
+
+	/*
+	 * we will need max position from first tsvector to add it positions of
+	 * second tsvector
+	 */
+	maxpos = get_maxpos(in1);
+
 	while (i1 && i2)
 	{
-		int			cmp = compareEntry(data1, ptr1, data2, ptr2);
+		char   *lex = STRPTR(in1) + pos1,
+			   *lex2 = STRPTR(in2) + pos2;
+
+		int		lex_len = ENTRY_LEN(in1, ptr1),
+				lex2_len = ENTRY_LEN(in2, ptr2);
+
+		int		cmp = tsCompareString(lex, lex_len,	lex2, lex2_len,	false);
 
 		if (cmp < 0)
 		{						/* in1 first */
-			ptr->haspos = ptr1->haspos;
-			ptr->len = ptr1->len;
-			memcpy(data + dataoff, data1 + ptr1->pos, ptr1->len);
-			ptr->pos = dataoff;
-			dataoff += ptr1->len;
-			if (ptr->haspos)
-			{
-				dataoff = SHORTALIGN(dataoff);
-				memcpy(data + dataoff, _POSVECPTR(in1, ptr1), POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16));
-				dataoff += POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16);
-			}
+			tsvector_addlexeme(out, i, &dataoff,
+					lex, lex_len,
+					POSDATAPTR(lex, lex_len), ENTRY_NPOS(in1, ptr1));
 
-			ptr++;
-			ptr1++;
+			INCRPTR(in1, ptr1, pos1);
 			i1--;
+			i++;
 		}
 		else if (cmp > 0)
 		{						/* in2 first */
-			ptr->haspos = ptr2->haspos;
-			ptr->len = ptr2->len;
-			memcpy(data + dataoff, data2 + ptr2->pos, ptr2->len);
-			ptr->pos = dataoff;
-			dataoff += ptr2->len;
-			if (ptr->haspos)
+			char		*new_lex;
+			WordEntry	*we = UNWRAP_ENTRY(in2, ptr2);
+
+			new_lex = tsvector_addlexeme(out, i, &dataoff, lex2, lex2_len, NULL, 0);
+			if (we->npos > 0)
 			{
-				int			addlen = add_pos(in2, ptr2, out, ptr, maxpos);
+				int				addlen;
+				WordEntryPos   *apos = POSDATAPTR(new_lex, lex2_len);
 
-				if (addlen == 0)
-					ptr->haspos = 0;
-				else
+				addlen = add_pos(lex2, we, apos, 0, maxpos);
+				if (addlen > 0)
 				{
+					ptr = UNWRAP_ENTRY(out, ARRPTR(out) + i);
+					ptr->npos = addlen;
 					dataoff = SHORTALIGN(dataoff);
-					dataoff += addlen * sizeof(WordEntryPos) + sizeof(uint16);
+					dataoff += ptr->npos * sizeof(WordEntryPos);
 				}
 			}
 
-			ptr++;
-			ptr2++;
+			INCRPTR(in2, ptr2, pos2);
+			i++;
 			i2--;
 		}
 		else
 		{
-			ptr->haspos = ptr1->haspos | ptr2->haspos;
-			ptr->len = ptr1->len;
-			memcpy(data + dataoff, data1 + ptr1->pos, ptr1->len);
-			ptr->pos = dataoff;
-			dataoff += ptr1->len;
-			if (ptr->haspos)
+			char		   *new_lex;
+			int				npos1 = ENTRY_NPOS(in1, ptr1),
+							npos2 = ENTRY_NPOS(in2, ptr2);
+			WordEntryPos   *apos;
+
+			new_lex = tsvector_addlexeme(out, i, &dataoff, lex, lex_len, NULL, 0);
+			apos = POSDATAPTR(new_lex, lex_len);
+
+			if (npos1 || npos2)
 			{
-				if (ptr1->haspos)
-				{
-					dataoff = SHORTALIGN(dataoff);
-					memcpy(data + dataoff, _POSVECPTR(in1, ptr1), POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16));
-					dataoff += POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16);
-					if (ptr2->haspos)
-						dataoff += add_pos(in2, ptr2, out, ptr, maxpos) * sizeof(WordEntryPos);
-				}
-				else			/* must have ptr2->haspos */
+				int			addlen;
+				char	   *lex2 = STRPTR(in2) + pos2;
+
+				ptr = UNWRAP_ENTRY(out, ARRPTR(out) + i);
+				if (npos1)
 				{
-					int			addlen = add_pos(in2, ptr2, out, ptr, maxpos);
+					/* add positions from left tsvector */
+					addlen = add_pos(lex, UNWRAP_ENTRY(in1, ptr1), apos, 0, 0);
+					ptr->npos = addlen;
 
-					if (addlen == 0)
-						ptr->haspos = 0;
-					else
+					if (npos2)
 					{
-						dataoff = SHORTALIGN(dataoff);
-						dataoff += addlen * sizeof(WordEntryPos) + sizeof(uint16);
+						/* add positions from right right tsvector */
+						addlen = add_pos(lex2, UNWRAP_ENTRY(in2, ptr2), apos, addlen, maxpos);
+						ptr->npos += addlen;
 					}
 				}
+				else	/* npos in second should be > 0 */
+				{
+					/* add positions from right tsvector */
+					addlen = add_pos(lex2, UNWRAP_ENTRY(in2, ptr2), apos, 0, maxpos);
+					ptr->npos = addlen;
+				}
+
+				dataoff = SHORTALIGN(dataoff);
+				dataoff += ptr->npos * sizeof(WordEntryPos);
 			}
 
-			ptr++;
-			ptr1++;
-			ptr2++;
+			INCRPTR(in1, ptr1, pos1);
+			INCRPTR(in2, ptr2, pos2);
+			i++;
 			i1--;
 			i2--;
 		}
@@ -1082,45 +1213,43 @@ tsvector_concat(PG_FUNCTION_ARGS)
 
 	while (i1)
 	{
-		ptr->haspos = ptr1->haspos;
-		ptr->len = ptr1->len;
-		memcpy(data + dataoff, data1 + ptr1->pos, ptr1->len);
-		ptr->pos = dataoff;
-		dataoff += ptr1->len;
-		if (ptr->haspos)
-		{
-			dataoff = SHORTALIGN(dataoff);
-			memcpy(data + dataoff, _POSVECPTR(in1, ptr1), POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16));
-			dataoff += POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16);
-		}
+		char   *lex = STRPTR(in1) + pos1;
+		int		lex_len = ENTRY_LEN(in1, ptr1);
 
-		ptr++;
-		ptr1++;
+		tsvector_addlexeme(out, i, &dataoff,
+				lex, lex_len,
+				POSDATAPTR(lex, lex_len), ENTRY_NPOS(in1, ptr1));
+
+		INCRPTR(in1, ptr1, pos1);
+		i++;
 		i1--;
 	}
 
 	while (i2)
 	{
-		ptr->haspos = ptr2->haspos;
-		ptr->len = ptr2->len;
-		memcpy(data + dataoff, data2 + ptr2->pos, ptr2->len);
-		ptr->pos = dataoff;
-		dataoff += ptr2->len;
-		if (ptr->haspos)
+		char   *lex = STRPTR(in2) + pos2,
+			   *new_lex;
+		int		lex_len = ENTRY_LEN(in2, ptr2),
+				npos = ENTRY_NPOS(in2, ptr2);
+
+		new_lex = tsvector_addlexeme(out, i, &dataoff, lex, lex_len, NULL, 0);
+		if (npos > 0)
 		{
-			int			addlen = add_pos(in2, ptr2, out, ptr, maxpos);
+			int				addlen;
+			WordEntryPos   *apos = POSDATAPTR(new_lex, lex_len);
 
-			if (addlen == 0)
-				ptr->haspos = 0;
-			else
+			addlen = add_pos(lex, UNWRAP_ENTRY(in2, ptr2), apos, 0, maxpos);
+			if (addlen > 0)
 			{
+				WordEntry	*ptr = UNWRAP_ENTRY(out, ARRPTR(out) + i);
+				ptr->npos = addlen;
 				dataoff = SHORTALIGN(dataoff);
-				dataoff += addlen * sizeof(WordEntryPos) + sizeof(uint16);
+				dataoff += npos * sizeof(WordEntryPos);
 			}
 		}
 
-		ptr++;
-		ptr2++;
+		INCRPTR(in2, ptr2, pos2);
+		i++;
 		i2--;
 	}
 
@@ -1137,12 +1266,10 @@ tsvector_concat(PG_FUNCTION_ARGS)
 	 * Adjust sizes (asserting that we didn't overrun the original estimates)
 	 * and collapse out any unused array entries.
 	 */
-	output_size = ptr - ARRPTR(out);
-	Assert(output_size <= out->size);
-	out->size = output_size;
+	TS_SETCOUNT(out, i);
 	if (data != STRPTR(out))
 		memmove(STRPTR(out), data, dataoff);
-	output_bytes = CALCDATASIZE(out->size, dataoff);
+	output_bytes = CALCDATASIZE(TS_COUNT(out), dataoff);
 	Assert(output_bytes <= VARSIZE(out));
 	SET_VARSIZE(out, output_bytes);
 
@@ -1194,35 +1321,26 @@ tsCompareString(char *a, int lena, char *b, int lenb, bool prefix)
  * Check weight info or/and fill 'data' with the required positions
  */
 static bool
-checkclass_str(CHKVAL *chkval, WordEntry *entry, QueryOperand *val,
+checkclass_str(WordEntryPos *pv, int npos, QueryOperand *val,
 			   ExecPhraseData *data)
 {
 	bool		result = false;
 
-	if (entry->haspos && (val->weight || data))
+	if (npos && (val->weight || data))
 	{
-		WordEntryPosVector *posvec;
-
-		/*
-		 * We can't use the _POSVECPTR macro here because the pointer to the
-		 * tsvector's lexeme storage is already contained in chkval->values.
-		 */
-		posvec = (WordEntryPosVector *)
-			(chkval->values + SHORTALIGN(entry->pos + entry->len));
-
 		if (val->weight && data)
 		{
-			WordEntryPos *posvec_iter = posvec->pos;
+			WordEntryPos *posvec_iter = pv;
 			WordEntryPos *dptr;
 
 			/*
 			 * Filter position information by weights
 			 */
-			dptr = data->pos = palloc(sizeof(WordEntryPos) * posvec->npos);
+			dptr = data->pos = palloc(sizeof(WordEntryPos) * npos);
 			data->allocated = true;
 
 			/* Is there a position with a matching weight? */
-			while (posvec_iter < posvec->pos + posvec->npos)
+			while (posvec_iter < (pv + npos))
 			{
 				/* If true, append this position to the data->pos */
 				if (val->weight & (1 << WEP_GETWEIGHT(*posvec_iter)))
@@ -1241,10 +1359,10 @@ checkclass_str(CHKVAL *chkval, WordEntry *entry, QueryOperand *val,
 		}
 		else if (val->weight)
 		{
-			WordEntryPos *posvec_iter = posvec->pos;
+			WordEntryPos *posvec_iter = pv;
 
 			/* Is there a position with a matching weight? */
-			while (posvec_iter < posvec->pos + posvec->npos)
+			while (posvec_iter < (pv + npos))
 			{
 				if (val->weight & (1 << WEP_GETWEIGHT(*posvec_iter)))
 				{
@@ -1257,8 +1375,8 @@ checkclass_str(CHKVAL *chkval, WordEntry *entry, QueryOperand *val,
 		}
 		else					/* data != NULL */
 		{
-			data->npos = posvec->npos;
-			data->pos = posvec->pos;
+			data->npos = npos;
+			data->pos = pv;
 			data->allocated = false;
 			result = true;
 		}
@@ -1311,26 +1429,32 @@ static bool
 checkcondition_str(void *checkval, QueryOperand *val, ExecPhraseData *data)
 {
 	CHKVAL	   *chkval = (CHKVAL *) checkval;
-	WordEntry  *StopLow = chkval->arrb;
-	WordEntry  *StopHigh = chkval->arre;
-	WordEntry  *StopMiddle = StopHigh;
+	int			StopLow = chkval->bidx;
+	int			StopHigh = chkval->eidx;
+	int			StopMiddle = StopHigh;
 	int			difference = -1;
 	bool		res = false;
+	char	   *lexeme;
+	WordEntry  *entry;
 
 	/* Loop invariant: StopLow <= val < StopHigh */
 	while (StopLow < StopHigh)
 	{
 		StopMiddle = StopLow + (StopHigh - StopLow) / 2;
+		lexeme = tsvector_getlexeme(chkval->vec, StopMiddle, &entry);
+
+		Assert(!entry->hasoff);
 		difference = tsCompareString(chkval->operand + val->distance,
 									 val->length,
-									 chkval->values + StopMiddle->pos,
-									 StopMiddle->len,
+									 lexeme,
+									 entry->len,
 									 false);
 
 		if (difference == 0)
 		{
 			/* Check weight info & fill 'data' with positions */
-			res = checkclass_str(chkval, StopMiddle, val, data);
+			res = checkclass_str(POSDATAPTR(lexeme, entry->len),
+								 entry->npos, val, data);
 			break;
 		}
 		else if (difference > 0)
@@ -1352,19 +1476,31 @@ checkcondition_str(void *checkval, QueryOperand *val, ExecPhraseData *data)
 		if (StopLow >= StopHigh)
 			StopMiddle = StopHigh;
 
-		while ((!res || data) && StopMiddle < chkval->arre &&
-			   tsCompareString(chkval->operand + val->distance,
-							   val->length,
-							   chkval->values + StopMiddle->pos,
-							   StopMiddle->len,
-							   true) == 0)
+		while ((!res || data) && StopMiddle < chkval->eidx)
 		{
+			char		   *lexeme;
+			int				cmp;
+			WordEntryPos   *pv;
+
+			lexeme = tsvector_getlexeme(chkval->vec, StopMiddle, &entry);
+
+			Assert(!entry->hasoff);
+			pv = POSDATAPTR(lexeme, entry->len);
+			cmp = tsCompareString(chkval->operand + val->distance,
+								  val->length,
+								  lexeme,
+								  entry->len,
+								  true);
+
+			if (cmp != 0)
+				break;
+
 			if (data)
 			{
 				/*
 				 * We need to join position information
 				 */
-				res = checkclass_str(chkval, StopMiddle, val, data);
+				res = checkclass_str(pv, entry->npos, val, data);
 
 				if (res)
 				{
@@ -1388,7 +1524,7 @@ checkcondition_str(void *checkval, QueryOperand *val, ExecPhraseData *data)
 			}
 			else
 			{
-				res = checkclass_str(chkval, StopMiddle, val, NULL);
+				res = checkclass_str(pv, entry->npos, val, NULL);
 			}
 
 			StopMiddle++;
@@ -1935,9 +2071,9 @@ ts_match_vq(PG_FUNCTION_ARGS)
 		PG_RETURN_BOOL(false);
 	}
 
-	chkval.arrb = ARRPTR(val);
-	chkval.arre = chkval.arrb + val->size;
-	chkval.values = STRPTR(val);
+	chkval.bidx = 0;
+	chkval.eidx = TS_COUNT(val);
+	chkval.vec = val;
 	chkval.operand = GETOPERAND(query);
 	result = TS_execute(GETQUERY(query),
 						&chkval,
@@ -2001,12 +2137,15 @@ ts_match_tq(PG_FUNCTION_ARGS)
  * that have a weight equal to one of the weights in 'weight' bitmask.
  */
 static int
-check_weight(TSVector txt, WordEntry *wptr, int8 weight)
+check_weight(char *lexeme, WordEntry *wptr, int8 weight)
 {
-	int			len = POSDATALEN(txt, wptr);
-	int			num = 0;
-	WordEntryPos *ptr = POSDATAPTR(txt, wptr);
+	int				len;
+	int				num = 0;
+	WordEntryPos   *ptr;
 
+	Assert(!wptr->hasoff);
+	len = wptr->len;
+	ptr = POSDATAPTR(lexeme, len);
 	while (len--)
 	{
 		if (weight & (1 << WEP_GETWEIGHT(*ptr)))
@@ -2017,31 +2156,34 @@ check_weight(TSVector txt, WordEntry *wptr, int8 weight)
 }
 
 #define compareStatWord(a,e,t)							\
-	tsCompareString((a)->lexeme, (a)->lenlexeme,		\
-					STRPTR(t) + (e)->pos, (e)->len,		\
-					false)
+	(tsCompareString((a)->lexeme, (a)->lenlexeme,		\
+					t, (e)->len, false))
 
 static void
 insertStatEntry(MemoryContext persistentContext, TSVectorStat *stat, TSVector txt, uint32 off)
 {
-	WordEntry  *we = ARRPTR(txt) + off;
+	WordEntry  *we;
 	StatEntry  *node = stat->root,
 			   *pnode = NULL;
 	int			n,
 				res = 0;
 	uint32		depth = 1;
+	char	   *lexeme;
+
+	lexeme = tsvector_getlexeme(txt, off, &we);
 
+	Assert(!we->hasoff);
 	if (stat->weight == 0)
-		n = (we->haspos) ? POSDATALEN(txt, we) : 1;
+		n = (we->npos) ? we->npos : 1;
 	else
-		n = (we->haspos) ? check_weight(txt, we, stat->weight) : 0;
+		n = (we->npos) ? check_weight(lexeme, we, stat->weight) : 0;
 
 	if (n == 0)
 		return;					/* nothing to insert */
 
 	while (node)
 	{
-		res = compareStatWord(node, we, txt);
+		res = compareStatWord(node, we, lexeme);
 
 		if (res == 0)
 		{
@@ -2065,7 +2207,7 @@ insertStatEntry(MemoryContext persistentContext, TSVectorStat *stat, TSVector tx
 		node->ndoc = 1;
 		node->nentry = n;
 		node->lenlexeme = we->len;
-		memcpy(node->lexeme, STRPTR(txt) + we->pos, node->lenlexeme);
+		memcpy(node->lexeme, lexeme, node->lenlexeme);
 
 		if (pnode == NULL)
 		{
@@ -2092,13 +2234,14 @@ chooseNextStatEntry(MemoryContext persistentContext, TSVectorStat *stat, TSVecto
 					uint32 low, uint32 high, uint32 offset)
 {
 	uint32		pos;
-	uint32		middle = (low + high) >> 1;
+	uint32		middle = (low + high) >> 1,
+				count = TS_COUNT(txt);
 
 	pos = (low + middle) >> 1;
-	if (low != middle && pos >= offset && pos - offset < txt->size)
+	if (low != middle && pos >= offset && pos - offset < count)
 		insertStatEntry(persistentContext, stat, txt, pos - offset);
 	pos = (high + middle + 1) >> 1;
-	if (middle + 1 != high && pos >= offset && pos - offset < txt->size)
+	if (middle + 1 != high && pos >= offset && pos - offset < count)
 		insertStatEntry(persistentContext, stat, txt, pos - offset);
 
 	if (low != middle)
@@ -2125,7 +2268,8 @@ ts_accum(MemoryContext persistentContext, TSVectorStat *stat, Datum data)
 	TSVector	txt = DatumGetTSVector(data);
 	uint32		i,
 				nbit = 0,
-				offset;
+				offset,
+				count = TS_COUNT(txt);
 
 	if (stat == NULL)
 	{							/* Init in first */
@@ -2134,19 +2278,19 @@ ts_accum(MemoryContext persistentContext, TSVectorStat *stat, Datum data)
 	}
 
 	/* simple check of correctness */
-	if (txt == NULL || txt->size == 0)
+	if (txt == NULL || count == 0)
 	{
 		if (txt && txt != (TSVector) DatumGetPointer(data))
 			pfree(txt);
 		return stat;
 	}
 
-	i = txt->size - 1;
+	i = count - 1;
 	for (; i > 0; i >>= 1)
 		nbit++;
 
 	nbit = 1 << nbit;
-	offset = (nbit - txt->size) / 2;
+	offset = (nbit - count) / 2;
 
 	insertStatEntry(persistentContext, stat, txt, (nbit >> 1) - offset);
 	chooseNextStatEntry(persistentContext, stat, txt, 0, nbit, offset);
@@ -2579,15 +2723,28 @@ tsvector_update_trigger(PG_FUNCTION_ARGS, bool config_column)
 	}
 
 	/* make tsvector value */
-	datum = TSVectorGetDatum(make_tsvector(&prs));
-	isnull = false;
-
-	/* and insert it into tuple */
-	rettuple = heap_modify_tuple_by_cols(rettuple, rel->rd_att,
-										 1, &tsvector_attr_num,
-										 &datum, &isnull);
-
-	pfree(DatumGetPointer(datum));
+	if (prs.curwords)
+	{
+		datum = PointerGetDatum(make_tsvector(&prs));
+		isnull = false;
+		rettuple = heap_modify_tuple_by_cols(rettuple, rel->rd_att,
+											 1, &tsvector_attr_num,
+											 &datum, &isnull);
+		pfree(DatumGetPointer(datum));
+	}
+	else
+	{
+		TSVector	out = palloc(CALCDATASIZE(0, 0));
+
+		SET_VARSIZE(out, CALCDATASIZE(0, 0));
+		TS_SETCOUNT(out, 0);
+		datum = PointerGetDatum(out);
+		isnull = false;
+		rettuple = heap_modify_tuple_by_cols(rettuple, rel->rd_att,
+											 1, &tsvector_attr_num,
+											 &datum, &isnull);
+		pfree(prs.words);
+	}
 
 	return PointerGetDatum(rettuple);
 }
diff --git a/src/include/tsearch/ts_type.h b/src/include/tsearch/ts_type.h
index 30d7c4bccd..47aa498432 100644
--- a/src/include/tsearch/ts_type.h
+++ b/src/include/tsearch/ts_type.h
@@ -24,30 +24,38 @@
  * 2) int32		size - number of lexemes (WordEntry array entries)
  * 3) Array of WordEntry - one per lexeme; must be sorted according to
  *				tsCompareString() (ie, memcmp of lexeme strings).
- *				WordEntry->pos gives the number of bytes from end of WordEntry
- *				array to start of lexeme's string, which is of length len.
+ *	  WordEntry have two types: offset or metadata (length of lexeme and number
+ *	  of positions). If it has offset then metadata will be by this offset.
  * 4) Per-lexeme data storage:
- *	  lexeme string (not null-terminated)
- *	  if haspos is true:
+ *    [4-byte aligned WordEntry] (if its WordEntry has offset)
+ *	  2-byte aligned lexeme string (not null-terminated)
+ *	  if it has positions:
  *		padding byte if necessary to make the position data 2-byte aligned
- *		uint16			number of positions that follow
  *		WordEntryPos[]	positions
  *
  * The positions for each lexeme must be sorted.
  *
- * Note, tsvectorsend/recv believe that sizeof(WordEntry) == 4
+ * Note, tsvector functions believe that sizeof(WordEntry) == 4
  */
 
-typedef struct
+#define TS_OFFSET_STRIDE 4
+
+typedef union
 {
-	uint32
-				haspos:1,
-				len:11,			/* MAX 2Kb */
-				pos:20;			/* MAX 1Mb */
+	struct {
+		uint32 hasoff: 1,
+			   offset: 31;
+	};
+	struct {
+		uint32 hasoff_: 1,
+			   len:11,
+			   npos: 16,
+			   _unused: 4;
+	};
 } WordEntry;
 
 #define MAXSTRLEN ( (1<<11) - 1)
-#define MAXSTRPOS ( (1<<20) - 1)
+#define MAXSTRPOS ( (1<<30) - 1)
 
 extern int	compareWordEntryPos(const void *a, const void *b);
 
@@ -62,19 +70,6 @@ extern int	compareWordEntryPos(const void *a, const void *b);
 
 typedef uint16 WordEntryPos;
 
-typedef struct
-{
-	uint16		npos;
-	WordEntryPos pos[FLEXIBLE_ARRAY_MEMBER];
-} WordEntryPosVector;
-
-/* WordEntryPosVector with exactly 1 entry */
-typedef struct
-{
-	uint16		npos;
-	WordEntryPos pos[1];
-} WordEntryPosVector1;
-
 
 #define WEP_GETWEIGHT(x)	( (x) >> 14 )
 #define WEP_GETPOS(x)		( (x) & 0x3fff )
@@ -90,13 +85,17 @@ typedef struct
 typedef struct
 {
 	int32		vl_len_;		/* varlena header (do not touch directly!) */
-	int32		size;
+	int32		size_;			/* flags and lexemes count */
 	WordEntry	entries[FLEXIBLE_ARRAY_MEMBER];
 	/* lexemes follow the entries[] array */
 } TSVectorData;
 
 typedef TSVectorData *TSVector;
 
+#define TS_FLAG_STRETCHED 0x80000000
+#define TS_COUNT(t) ((t)->size_ & 0x0FFFFFFF)
+#define TS_SETCOUNT(t,c) ((t)->size_ = (c) | TS_FLAG_STRETCHED)
+
 #define DATAHDRSIZE (offsetof(TSVectorData, entries))
 #define CALCDATASIZE(nentries, lenstr) (DATAHDRSIZE + (nentries) * sizeof(WordEntry) + (lenstr) )
 
@@ -104,24 +103,65 @@ typedef TSVectorData *TSVector;
 #define ARRPTR(x)	( (x)->entries )
 
 /* pointer to start of a tsvector's lexeme storage */
-#define STRPTR(x)	( (char *) &(x)->entries[(x)->size] )
+#define STRPTR(x)	( (char *) &(x)->entries[TS_COUNT(x)] )
 
-#define _POSVECPTR(x, e)	((WordEntryPosVector *)(STRPTR(x) + SHORTALIGN((e)->pos + (e)->len)))
-#define POSDATALEN(x,e) ( ( (e)->haspos ) ? (_POSVECPTR(x,e)->npos) : 0 )
-#define POSDATAPTR(x,e) (_POSVECPTR(x,e)->pos)
+/* for WordEntry with offset return its WordEntry with other properties */
+#define UNWRAP_ENTRY(x,we) \
+	((we)->hasoff? (WordEntry *)(STRPTR(x) + (we)->offset): (we))
+
+/*
+ * helpers used when we're not sure that WordEntry
+ * contains ether offset or len
+ */
+#define ENTRY_NPOS(x,we) (UNWRAP_ENTRY(x,we)->npos)
+#define ENTRY_LEN(x,we) (UNWRAP_ENTRY(x,we)->len)
+
+/* pointer to start of positions */
+#define POSDATAPTR(lex, len) ((WordEntryPos *) (lex + SHORTALIGN(len)))
+
+/* set default offset in tsvector data */
+#define INITPOS(p) ((p) = sizeof(WordEntry))
+
+/* increment entry and offset by given WordEntry */
+#define INCRPTR(x,w,p) \
+do { \
+	WordEntry *y = (w);									\
+	if ((w)->hasoff)									\
+	{													\
+		y = (WordEntry *) (STRPTR(x) + (w)->offset);	\
+		(p) = (w)->offset + sizeof(WordEntry);			\
+	}													\
+	(w)++;												\
+	Assert(!y->hasoff);									\
+	(p) += SHORTALIGN(y->len) + y->npos * sizeof(WordEntryPos); \
+	if ((w) - ARRPTR(x) < TS_COUNT(x) && w->hasoff)		\
+		(p) = INTALIGN(p) + sizeof(WordEntry);			\
+} while (0);
+
+/* used to calculate tsvector size in in tsvector constructors */
+#define INCRSIZE(s,i,l,n) /* size,index,len,npos */		\
+do {													\
+	if ((i) % TS_OFFSET_STRIDE == 0)					\
+		(s) = INTALIGN(s) + sizeof(WordEntry);			\
+	else												\
+		(s) = SHORTALIGN(s);							\
+	(s) += (l);											\
+	(s) = (n)? SHORTALIGN(s) + (n) * sizeof(WordEntryPos) : (s);	\
+} while (0);
 
 /*
  * fmgr interface macros
  */
 
-#define DatumGetTSVector(X)			((TSVector) PG_DETOAST_DATUM(X))
-#define DatumGetTSVectorCopy(X)		((TSVector) PG_DETOAST_DATUM_COPY(X))
+TSVector tsvector_upgrade(Datum orig, bool copy);
+
+#define DatumGetTSVector(X)			tsvector_upgrade((X), false)
+#define DatumGetTSVectorCopy(X)		tsvector_upgrade((X), true)
 #define TSVectorGetDatum(X)			PointerGetDatum(X)
 #define PG_GETARG_TSVECTOR(n)		DatumGetTSVector(PG_GETARG_DATUM(n))
 #define PG_GETARG_TSVECTOR_COPY(n)	DatumGetTSVectorCopy(PG_GETARG_DATUM(n))
 #define PG_RETURN_TSVECTOR(x)		return TSVectorGetDatum(x)
 
-
 /*
  * TSQuery
  *
@@ -239,4 +279,22 @@ typedef TSQueryData *TSQuery;
 #define PG_GETARG_TSQUERY_COPY(n)	DatumGetTSQueryCopy(PG_GETARG_DATUM(n))
 #define PG_RETURN_TSQUERY(x)		return TSQueryGetDatum(x)
 
+int tsvector_getoffset(TSVector vec, int idx, WordEntry **we);
+char *tsvector_addlexeme(TSVector tsv, int idx, int *dataoff,
+		char *lexeme, int lexeme_len, WordEntryPos *pos, int npos);
+
+/* Returns lexeme and its entry by given index from TSVector */
+inline static char *
+tsvector_getlexeme(TSVector vec, int idx, WordEntry **we)
+{
+	Assert(idx >=0 && idx < TS_COUNT(vec));
+
+	/*
+	 * we do not allow we == NULL because returned lexeme is not \0 ended,
+	 * and always should be used with we->len
+	 */
+	Assert(we != NULL);
+	return STRPTR(vec) + tsvector_getoffset(vec, idx, we);
+}
+
 #endif							/* _PG_TSTYPE_H_ */

Robert Haas

robertmhaas@gmail.com

over 8 years ago

In reply to: Ildus Kurbangaliev (#1)

Re: Remove 1MB size limit in tsvector

On Tue, Aug 1, 2017 at 10:08 AM, Ildus Kurbangaliev
<i.kurbangaliev@postgrespro.ru> wrote:

Historically tsvector type can't hold more than 1MB data.
I want to propose a patch that removes that limit.

That limit is created by 'pos' field from WordEntry, which have only
20 bits for storage.

In the proposed patch I removed this field and instead of it I keep
offsets only at each Nth item in WordEntry's array.

So this would break pg_upgrade for tsvector columns?

--
Robert Haas
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Ildus K

i.kurbangaliev@postgrespro.ru

over 8 years ago

In reply to: Robert Haas (#2)

Re: Remove 1MB size limit in tsvector

On Tue, 1 Aug 2017 14:56:54 -0400
Robert Haas <robertmhaas@gmail.com> wrote:

On Tue, Aug 1, 2017 at 10:08 AM, Ildus Kurbangaliev
<i.kurbangaliev@postgrespro.ru> wrote:

Historically tsvector type can't hold more than 1MB data.
I want to propose a patch that removes that limit.

That limit is created by 'pos' field from WordEntry, which have only
20 bits for storage.

In the proposed patch I removed this field and instead of it I keep
offsets only at each Nth item in WordEntry's array.

So this would break pg_upgrade for tsvector columns?

I added a function that will convert old tsvectors on the fly. It's the
approach used in hstore before.

Regards,
Ildus Kurbangaliev

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Robert Haas

robertmhaas@gmail.com

over 8 years ago

In reply to: Ildus K (#3)

Re: Remove 1MB size limit in tsvector

On Tue, Aug 1, 2017 at 3:10 PM, Ildus K <i.kurbangaliev@postgrespro.ru> wrote:

So this would break pg_upgrade for tsvector columns?

I added a function that will convert old tsvectors on the fly. It's the
approach used in hstore before.

Does that mean the answer to the question that I asked is "yes, but I
have a workaround" or does it mean that the answer is "no"?

--
Robert Haas
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Ildus K

i.kurbangaliev@postgrespro.ru

over 8 years ago

In reply to: Robert Haas (#4)

Re: Remove 1MB size limit in tsvector

On Tue, 1 Aug 2017 15:33:08 -0400
Robert Haas <robertmhaas@gmail.com> wrote:

On Tue, Aug 1, 2017 at 3:10 PM, Ildus K
<i.kurbangaliev@postgrespro.ru> wrote:

So this would break pg_upgrade for tsvector columns?

I added a function that will convert old tsvectors on the fly. It's
the approach used in hstore before.

Does that mean the answer to the question that I asked is "yes, but I
have a workaround" or does it mean that the answer is "no"?

It's a workaround. DatumGetTSVector and
DatumGetTSVectorCopy will upgrade tsvector on the fly if it
has old format.

Regards,
Ildus Kurbangaliev

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Torsten Zuehlsdorff

mailinglists@toco-domains.de

over 8 years ago

In reply to: Ildus K (#5)

Re: Remove 1MB size limit in tsvector

On 01.08.2017 22:00, Ildus K wrote:

On Tue, 1 Aug 2017 15:33:08 -0400
Robert Haas <robertmhaas@gmail.com> wrote:

On Tue, Aug 1, 2017 at 3:10 PM, Ildus K
<i.kurbangaliev@postgrespro.ru> wrote:

So this would break pg_upgrade for tsvector columns?

I added a function that will convert old tsvectors on the fly. It's
the approach used in hstore before.

Does that mean the answer to the question that I asked is "yes, but I
have a workaround" or does it mean that the answer is "no"?

It's a workaround. DatumGetTSVector and
DatumGetTSVectorCopy will upgrade tsvector on the fly if it
has old format.

I'm not familiar with pg_upgrade, but want to ask: should this
workaround be part of pg_upgrade?

Greetings,
Torsten

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Ildus Kurbangaliev

i.kurbangaliev@postgrespro.ru

over 8 years ago

In reply to: Torsten Zuehlsdorff (#6)

Re: Remove 1MB size limit in tsvector

On Wed, 9 Aug 2017 09:01:44 +0200
Torsten Zuehlsdorff <mailinglists@toco-domains.de> wrote:

On 01.08.2017 22:00, Ildus K wrote:

On Tue, 1 Aug 2017 15:33:08 -0400
Robert Haas <robertmhaas@gmail.com> wrote:

On Tue, Aug 1, 2017 at 3:10 PM, Ildus K
<i.kurbangaliev@postgrespro.ru> wrote:

So this would break pg_upgrade for tsvector columns?

I added a function that will convert old tsvectors on the fly.
It's the approach used in hstore before.

Does that mean the answer to the question that I asked is "yes,
but I have a workaround" or does it mean that the answer is "no"?

It's a workaround. DatumGetTSVector and
DatumGetTSVectorCopy will upgrade tsvector on the fly if it
has old format.

I'm not familiar with pg_upgrade, but want to ask: should this
workaround be part of pg_upgrade?

Greetings,
Torsten

I chose the way when the data remains the same, until the user decides
to update it. I'm not so familiar with pg_upgrade myself and I don't
see now how the data will be converted with it, but it will anyway
increase downtime which is the shorter the better.

--
---
Ildus Kurbangaliev
Postgres Professional: http://www.postgrespro.com
Russian Postgres Company

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Robert Haas

robertmhaas@gmail.com

over 8 years ago

In reply to: Ildus K (#5)

Re: Remove 1MB size limit in tsvector

On Tue, Aug 1, 2017 at 4:00 PM, Ildus K <i.kurbangaliev@postgrespro.ru> wrote:

It's a workaround. DatumGetTSVector and
DatumGetTSVectorCopy will upgrade tsvector on the fly if it
has old format.

Hmm, that seems like a real fix, not just a workaround. If you can
transparently read the old format, there's no problem. Not sure about
performance, though.

The patch doesn't really conform to our coding standards, though, so
you need to clean it up (or, if you're not sure what you need to do,
you need to have someone who knows how PostgreSQL code needs to look
review it for you).

--
Robert Haas
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Michael Paquier

michael.paquier@gmail.com

over 8 years ago

In reply to: Robert Haas (#8)

Re: Remove 1MB size limit in tsvector

On Wed, Aug 9, 2017 at 6:38 PM, Robert Haas <robertmhaas@gmail.com> wrote:

The patch doesn't really conform to our coding standards, though, so
you need to clean it up (or, if you're not sure what you need to do,
you need to have someone who knows how PostgreSQL code needs to look
review it for you).

The documentation has a couple of rules for coding conventions:
https://www.postgresql.org/docs/9.6/static/source.html
--
Michael

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

#10

Alexander Korotkov

a.korotkov@postgrespro.ru

over 8 years ago

In reply to: Michael Paquier (#9)

Re: Remove 1MB size limit in tsvector

On Thu, Aug 10, 2017 at 7:37 AM, Michael Paquier <michael.paquier@gmail.com>
wrote:

On Wed, Aug 9, 2017 at 6:38 PM, Robert Haas <robertmhaas@gmail.com> wrote:

The patch doesn't really conform to our coding standards, though, so
you need to clean it up (or, if you're not sure what you need to do,
you need to have someone who knows how PostgreSQL code needs to look
review it for you).

The documentation has a couple of rules for coding conventions:
https://www.postgresql.org/docs/9.6/static/source.html

Ildus, from the first glance I see at least following violations of
PostgreSQL coding standards in your code.

+/*
+ * Converts tsvector with the old structure to current.
+ * @orig - tsvector to convert,
+ * @copy - return copy of tsvector, it has a meaning when tsvector doensn't
+ * need to be converted.
+ */
This comment will be reflowed by pgindent.  Also we don't use '@' for
parameters description in comments.
https://www.postgresql.org/docs/9.6/static/source-format.html

+TSVector
+tsvector_upgrade(Datum orig, bool copy)
+{
+ int       i,
+           dataoff = 0,
+   datalen = 0,
+   totallen;
+ TSVector   in,
+   out;

You have random mix of tabs and spaces here.

+ {
+ stroff = SHORTALIGN(stroff); \
+ entry->hasoff = 0;
+ entry->len = lexeme_len;
+ entry->npos = npos;
+ }

What this backslash is doing here?
There are other similar (and probably different) violations of coding
standard over the code. Ildus, please check you patches carefully before
publishing.

------
Alexander Korotkov
Postgres Professional: http://www.postgrespro.com
The Russian Postgres Company

#11

Alexander Korotkov

a.korotkov@postgrespro.ru

over 8 years ago

In reply to: Robert Haas (#8)

Re: Remove 1MB size limit in tsvector

On Wed, Aug 9, 2017 at 7:38 PM, Robert Haas <robertmhaas@gmail.com> wrote:

On Tue, Aug 1, 2017 at 4:00 PM, Ildus K <i.kurbangaliev@postgrespro.ru>
wrote:

It's a workaround. DatumGetTSVector and
DatumGetTSVectorCopy will upgrade tsvector on the fly if it
has old format.

Hmm, that seems like a real fix, not just a workaround. If you can
transparently read the old format, there's no problem. Not sure about
performance, though.

+1
Ildus, I think we need to benchmark reading of the old format. There would
be tradeoff between performance of old format reading and amount of extra
code needed. Once we will have benchmarks we can consider whether this is
the solution we would like to buy.

------
Alexander Korotkov
Postgres Professional: http://www.postgrespro.com
The Russian Postgres Company

#12

Tom Lane

tgl@sss.pgh.pa.us

over 8 years ago

In reply to: Alexander Korotkov (#10)

Re: Remove 1MB size limit in tsvector

Alexander Korotkov <a.korotkov@postgrespro.ru> writes:

...
You have random mix of tabs and spaces here.

It's worth running pgindent over your code before submitting. It should
be pretty easy to set that up nowadays, see src/tools/pgindent/README.
(If you find any portability problems while trying to install pgindent,
please let me know.)

regards, tom lane

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

#13

Ildus Kurbangaliev

i.kurbangaliev@postgrespro.ru

over 8 years ago

In reply to: Tom Lane (#12)

1 attachment(s)

Re: Remove 1MB size limit in tsvector

On Thu, 10 Aug 2017 11:46:55 -0400
Tom Lane <tgl@sss.pgh.pa.us> wrote:

Alexander Korotkov <a.korotkov@postgrespro.ru> writes:

...
You have random mix of tabs and spaces here.

It's worth running pgindent over your code before submitting. It
should be pretty easy to set that up nowadays, see
src/tools/pgindent/README. (If you find any portability problems
while trying to install pgindent, please let me know.)

Attached a new version of the patch. It mostly contains cosmetic
changes. I rebased it to current master, ran pgindent and fixed
formatting errors.

--
---
Ildus Kurbangaliev
Postgres Professional: http://www.postgrespro.com
Russian Postgres Company

Attachments:

tsvector_mixed_positions_v2.patchtext/x-patchDownload

diff --git a/src/backend/tsearch/Makefile b/src/backend/tsearch/Makefile
index 34fe4c5b3c..9585a25003 100644
--- a/src/backend/tsearch/Makefile
+++ b/src/backend/tsearch/Makefile
@@ -26,7 +26,7 @@ DICTFILES_PATH=$(addprefix dicts/,$(DICTFILES))
 OBJS = ts_locale.o ts_parse.o wparser.o wparser_def.o dict.o \
 	dict_simple.o dict_synonym.o dict_thesaurus.o \
 	dict_ispell.o regis.o spell.o \
-	to_tsany.o ts_selfuncs.o ts_typanalyze.o ts_utils.o
+	to_tsany.o ts_selfuncs.o ts_typanalyze.o ts_utils.o ts_compat.o
 
 include $(top_srcdir)/src/backend/common.mk
 
diff --git a/src/backend/tsearch/to_tsany.c b/src/backend/tsearch/to_tsany.c
index 35d9ab276c..aa87fd8a04 100644
--- a/src/backend/tsearch/to_tsany.c
+++ b/src/backend/tsearch/to_tsany.c
@@ -156,13 +156,10 @@ TSVector
 make_tsvector(ParsedText *prs)
 {
 	int			i,
-				j,
 				lenstr = 0,
-				totallen;
+				totallen,
+				stroff = 0;
 	TSVector	in;
-	WordEntry  *ptr;
-	char	   *str;
-	int			stroff;
 
 	/* Merge duplicate words */
 	if (prs->curwords > 0)
@@ -171,12 +168,9 @@ make_tsvector(ParsedText *prs)
 	/* Determine space needed */
 	for (i = 0; i < prs->curwords; i++)
 	{
-		lenstr += prs->words[i].len;
-		if (prs->words[i].alen)
-		{
-			lenstr = SHORTALIGN(lenstr);
-			lenstr += sizeof(uint16) + prs->words[i].pos.apos[0] * sizeof(WordEntryPos);
-		}
+		int			npos = prs->words[i].alen ? prs->words[i].pos.apos[0] : 0;
+
+		INCRSIZE(lenstr, i, prs->words[i].len, npos);
 	}
 
 	if (lenstr > MAXSTRPOS)
@@ -187,41 +181,21 @@ make_tsvector(ParsedText *prs)
 	totallen = CALCDATASIZE(prs->curwords, lenstr);
 	in = (TSVector) palloc0(totallen);
 	SET_VARSIZE(in, totallen);
-	in->size = prs->curwords;
+	TS_SETCOUNT(in, prs->curwords);
 
-	ptr = ARRPTR(in);
-	str = STRPTR(in);
-	stroff = 0;
 	for (i = 0; i < prs->curwords; i++)
 	{
-		ptr->len = prs->words[i].len;
-		ptr->pos = stroff;
-		memcpy(str + stroff, prs->words[i].word, prs->words[i].len);
-		stroff += prs->words[i].len;
-		pfree(prs->words[i].word);
+		int			npos = 0;
+
 		if (prs->words[i].alen)
-		{
-			int			k = prs->words[i].pos.apos[0];
-			WordEntryPos *wptr;
+			npos = prs->words[i].pos.apos[0];
 
-			if (k > 0xFFFF)
-				elog(ERROR, "positions array too long");
+		tsvector_addlexeme(in, i, &stroff, prs->words[i].word, prs->words[i].len,
+						   prs->words[i].pos.apos + 1, npos);
 
-			ptr->haspos = 1;
-			stroff = SHORTALIGN(stroff);
-			*(uint16 *) (str + stroff) = (uint16) k;
-			wptr = POSDATAPTR(in, ptr);
-			for (j = 0; j < k; j++)
-			{
-				WEP_SETWEIGHT(wptr[j], 0);
-				WEP_SETPOS(wptr[j], prs->words[i].pos.apos[j + 1]);
-			}
-			stroff += sizeof(uint16) + k * sizeof(WordEntryPos);
+		pfree(prs->words[i].word);
+		if (prs->words[i].alen)
 			pfree(prs->words[i].pos.apos);
-		}
-		else
-			ptr->haspos = 0;
-		ptr++;
 	}
 
 	if (prs->words)
@@ -251,7 +225,6 @@ to_tsvector_byid(PG_FUNCTION_ARGS)
 	PG_FREE_IF_COPY(in, 1);
 
 	out = make_tsvector(&prs);
-
 	PG_RETURN_TSVECTOR(out);
 }
 
diff --git a/src/backend/tsearch/ts_compat.c b/src/backend/tsearch/ts_compat.c
new file mode 100644
index 0000000000..bc45109241
--- /dev/null
+++ b/src/backend/tsearch/ts_compat.c
@@ -0,0 +1,84 @@
+#include "postgres.h"
+#include "tsearch/ts_type.h"
+
+/*
+ * Definition of old WordEntry struct in TSVector. Because of limitations
+ * in size (max 1MB for lexemes), the format has changed
+ */
+typedef struct
+{
+	uint32
+				haspos:1,
+				len:11,
+				pos:20;
+}			OldWordEntry;
+
+typedef struct
+{
+	uint16		npos;
+	WordEntryPos pos[FLEXIBLE_ARRAY_MEMBER];
+}			OldWordEntryPosVector;
+
+#define OLDSTRPTR(x)	( (char *) &(x)->entries[x->size_] )
+#define _OLDPOSVECPTR(x, e)	\
+	((OldWordEntryPosVector *)(STRPTR(x) + SHORTALIGN((e)->pos + (e)->len)))
+#define OLDPOSDATALEN(x,e) ( ( (e)->haspos ) ? (_OLDPOSVECPTR(x,e)->npos) : 0 )
+#define OLDPOSDATAPTR(x,e) (_OLDPOSVECPTR(x,e)->pos)
+
+/*
+ * Converts tsvector with the old structure to current.
+ * Can return copy of tsvector, but it has a meaning when tsvector doensn't
+ * need to be converted.
+ */
+TSVector
+tsvector_upgrade(Datum orig, bool copy)
+{
+	int			i,
+				dataoff = 0,
+				datalen = 0,
+				totallen;
+	TSVector	in,
+				out;
+
+	in = (TSVector) PG_DETOAST_DATUM(orig);
+
+	/* If already in new format, return as is */
+	if (in->size_ & TS_FLAG_STRETCHED)
+	{
+		TSVector	out;
+
+		if (!copy)
+			return in;
+
+		out = (TSVector) palloc(VARSIZE(in));
+		memcpy(out, in, VARSIZE(in));
+		return out;
+	}
+
+	/*
+	 * Calculate required size. We don't check any sizes here because old
+	 * format was limited with 1MB
+	 */
+	for (i = 0; i < in->size_; i++)
+	{
+		OldWordEntry *entry = (OldWordEntry *) (in->entries + i);
+
+		INCRSIZE(datalen, i, entry->len, OLDPOSDATALEN(in, entry));
+	}
+
+	totallen = CALCDATASIZE(in->size_, datalen);
+	out = (TSVector) palloc0(totallen);
+	SET_VARSIZE(out, totallen);
+	TS_SETCOUNT(out, in->size_);
+
+	for (i = 0; i < in->size_; i++)
+	{
+		OldWordEntry *entry = (OldWordEntry *) (in->entries + i);
+
+		tsvector_addlexeme(out, i, &dataoff,
+						   OLDSTRPTR(in) + entry->pos, entry->len,
+						   OLDPOSDATAPTR(in, entry), OLDPOSDATALEN(in, entry));
+	}
+
+	return out;
+}
diff --git a/src/backend/tsearch/ts_typanalyze.c b/src/backend/tsearch/ts_typanalyze.c
index 320c7f1a61..9b2fc4be04 100644
--- a/src/backend/tsearch/ts_typanalyze.c
+++ b/src/backend/tsearch/ts_typanalyze.c
@@ -202,7 +202,8 @@ compute_tsvector_stats(VacAttrStats *stats,
 		TSVector	vector;
 		WordEntry  *curentryptr;
 		char	   *lexemesptr;
-		int			j;
+		int			j,
+					pos;
 
 		vacuum_delay_point();
 
@@ -236,7 +237,9 @@ compute_tsvector_stats(VacAttrStats *stats,
 		 */
 		lexemesptr = STRPTR(vector);
 		curentryptr = ARRPTR(vector);
-		for (j = 0; j < vector->size; j++)
+
+		INITPOS(pos);
+		for (j = 0; j < TS_COUNT(vector); j++)
 		{
 			bool		found;
 
@@ -246,8 +249,8 @@ compute_tsvector_stats(VacAttrStats *stats,
 			 * make a copy of it.  This way we can free the tsvector value
 			 * once we've processed all its lexemes.
 			 */
-			hash_key.lexeme = lexemesptr + curentryptr->pos;
-			hash_key.length = curentryptr->len;
+			hash_key.lexeme = lexemesptr + pos;
+			hash_key.length = ENTRY_LEN(vector, curentryptr);
 
 			/* Lookup current lexeme in hashtable, adding it if new */
 			item = (TrackItem *) hash_search(lexemes_tab,
@@ -280,7 +283,7 @@ compute_tsvector_stats(VacAttrStats *stats,
 			}
 
 			/* Advance to the next WordEntry in the tsvector */
-			curentryptr++;
+			INCRPTR(vector, curentryptr, pos);
 		}
 
 		/* If the vector was toasted, free the detoasted copy. */
diff --git a/src/backend/utils/adt/tsginidx.c b/src/backend/utils/adt/tsginidx.c
index 83a939dfd5..75a4364b94 100644
--- a/src/backend/utils/adt/tsginidx.c
+++ b/src/backend/utils/adt/tsginidx.c
@@ -67,23 +67,27 @@ gin_extract_tsvector(PG_FUNCTION_ARGS)
 	TSVector	vector = PG_GETARG_TSVECTOR(0);
 	int32	   *nentries = (int32 *) PG_GETARG_POINTER(1);
 	Datum	   *entries = NULL;
+	int			tscount = TS_COUNT(vector);
 
-	*nentries = vector->size;
-	if (vector->size > 0)
+	*nentries = tscount;
+	if (tscount > 0)
 	{
 		int			i;
+		uint32		pos;
+
 		WordEntry  *we = ARRPTR(vector);
 
-		entries = (Datum *) palloc(sizeof(Datum) * vector->size);
+		entries = (Datum *) palloc(sizeof(Datum) * tscount);
 
-		for (i = 0; i < vector->size; i++)
+		INITPOS(pos);
+		for (i = 0; i < tscount; i++)
 		{
 			text	   *txt;
 
-			txt = cstring_to_text_with_len(STRPTR(vector) + we->pos, we->len);
+			txt = cstring_to_text_with_len(STRPTR(vector) + pos,
+										   ENTRY_LEN(vector, we));
 			entries[i] = PointerGetDatum(txt);
-
-			we++;
+			INCRPTR(vector, we, pos);
 		}
 	}
 
diff --git a/src/backend/utils/adt/tsgistidx.c b/src/backend/utils/adt/tsgistidx.c
index 7ce2699b5c..18d3de3725 100644
--- a/src/backend/utils/adt/tsgistidx.c
+++ b/src/backend/utils/adt/tsgistidx.c
@@ -192,28 +192,33 @@ gtsvector_compress(PG_FUNCTION_ARGS)
 		int32	   *arr;
 		WordEntry  *ptr = ARRPTR(val);
 		char	   *words = STRPTR(val);
+		const int	tscount = TS_COUNT(val);
+		uint32		pos;
 
-		len = CALCGTSIZE(ARRKEY, val->size);
+		len = CALCGTSIZE(ARRKEY, tscount);
 		res = (SignTSVector *) palloc(len);
 		SET_VARSIZE(res, len);
 		res->flag = ARRKEY;
 		arr = GETARR(res);
-		len = val->size;
+		len = tscount;
+
+		INITPOS(pos);
 		while (len--)
 		{
 			pg_crc32	c;
 
 			INIT_LEGACY_CRC32(c);
-			COMP_LEGACY_CRC32(c, words + ptr->pos, ptr->len);
+			COMP_LEGACY_CRC32(c, words + pos, ENTRY_LEN(val, ptr));
 			FIN_LEGACY_CRC32(c);
 
 			*arr = *(int32 *) &c;
 			arr++;
-			ptr++;
+
+			INCRPTR(val, ptr, pos);
 		}
 
-		len = uniqueint(GETARR(res), val->size);
-		if (len != val->size)
+		len = uniqueint(GETARR(res), tscount);
+		if (len != tscount)
 		{
 			/*
 			 * there is a collision of hash-function; len is always less than
diff --git a/src/backend/utils/adt/tsrank.c b/src/backend/utils/adt/tsrank.c
index 4577bcc0b8..cb859d9b47 100644
--- a/src/backend/utils/adt/tsrank.c
+++ b/src/backend/utils/adt/tsrank.c
@@ -53,43 +53,39 @@ word_distance(int32 w)
 static int
 cnt_length(TSVector t)
 {
-	WordEntry  *ptr = ARRPTR(t),
-			   *end = (WordEntry *) STRPTR(t);
-	int			len = 0;
+	int			i,
+				len = 0;
 
-	while (ptr < end)
+	for (i = 0; i < TS_COUNT(t); i++)
 	{
-		int			clen = POSDATALEN(t, ptr);
-
-		if (clen == 0)
-			len += 1;
-		else
-			len += clen;
+		WordEntry  *entry = UNWRAP_ENTRY(t, ARRPTR(t) + i);
 
-		ptr++;
+		Assert(!entry->hasoff);
+		len += (entry->npos == 0) ? 1 : entry->npos;
 	}
 
 	return len;
 }
 
 
-#define WordECompareQueryItem(e,q,p,i,m) \
-	tsCompareString((q) + (i)->distance, (i)->length,	\
-					(e) + (p)->pos, (p)->len, (m))
-
-
 /*
  * Returns a pointer to a WordEntry's array corresponding to 'item' from
  * tsvector 't'. 'q' is the TSQuery containing 'item'.
  * Returns NULL if not found.
  */
-static WordEntry *
+static int
 find_wordentry(TSVector t, TSQuery q, QueryOperand *item, int32 *nitem)
 {
-	WordEntry  *StopLow = ARRPTR(t);
-	WordEntry  *StopHigh = (WordEntry *) STRPTR(t);
-	WordEntry  *StopMiddle = StopHigh;
+#define WordECompareQueryItem(s,l,q,i,m) \
+	tsCompareString((q) + (i)->distance, (i)->length,	\
+					s, l, (m))
+
+	int			StopLow = 0;
+	int			StopHigh = TS_COUNT(t);
+	int			StopMiddle = StopHigh;
 	int			difference;
+	char	   *lexeme;
+	WordEntry  *we;
 
 	*nitem = 0;
 
@@ -97,7 +93,12 @@ find_wordentry(TSVector t, TSQuery q, QueryOperand *item, int32 *nitem)
 	while (StopLow < StopHigh)
 	{
 		StopMiddle = StopLow + (StopHigh - StopLow) / 2;
-		difference = WordECompareQueryItem(STRPTR(t), GETOPERAND(q), StopMiddle, item, false);
+		lexeme = tsvector_getlexeme(t, StopMiddle, &we);
+
+		Assert(!we->hasoff);
+		difference = WordECompareQueryItem(lexeme, we->len,
+										   GETOPERAND(q), item, false);
+
 		if (difference == 0)
 		{
 			StopHigh = StopMiddle;
@@ -117,18 +118,22 @@ find_wordentry(TSVector t, TSQuery q, QueryOperand *item, int32 *nitem)
 
 		*nitem = 0;
 
-		while (StopMiddle < (WordEntry *) STRPTR(t) &&
-			   WordECompareQueryItem(STRPTR(t), GETOPERAND(q), StopMiddle, item, true) == 0)
+		while (StopMiddle < TS_COUNT(t))
 		{
+			lexeme = tsvector_getlexeme(t, StopMiddle, &we);
+
+			Assert(!we->hasoff);
+			if (WordECompareQueryItem(lexeme, we->len, GETOPERAND(q), item, true) != 0)
+				break;
+
 			(*nitem)++;
 			StopMiddle++;
 		}
 	}
 
-	return (*nitem > 0) ? StopHigh : NULL;
+	return (*nitem > 0) ? StopHigh : -1;
 }
 
-
 /*
  * sort QueryOperands by (length, word)
  */
@@ -200,15 +205,13 @@ SortAndUniqItems(TSQuery q, int *size)
 static float
 calc_rank_and(const float *w, TSVector t, TSQuery q)
 {
-	WordEntryPosVector **pos;
-	WordEntryPosVector1 posnull;
-	WordEntryPosVector *POSNULL;
+	WordEntryPos **pos;
+	uint16	   *npos;
+	WordEntryPos posnull[1] = {0};
 	int			i,
 				k,
 				l,
 				p;
-	WordEntry  *entry,
-			   *firstentry;
 	WordEntryPos *post,
 			   *ct;
 	int32		dimt,
@@ -225,41 +228,55 @@ calc_rank_and(const float *w, TSVector t, TSQuery q)
 		pfree(item);
 		return calc_rank_or(w, t, q);
 	}
-	pos = (WordEntryPosVector **) palloc0(sizeof(WordEntryPosVector *) * q->size);
+	pos = (WordEntryPos **) palloc0(sizeof(WordEntryPos *) * q->size);
+	npos = (uint16 *) palloc0(sizeof(uint16) * q->size);
 
-	/* A dummy WordEntryPos array to use when haspos is false */
-	posnull.npos = 1;
-	posnull.pos[0] = 0;
-	WEP_SETPOS(posnull.pos[0], MAXENTRYPOS - 1);
-	POSNULL = (WordEntryPosVector *) &posnull;
+	/* posnull is a dummy WordEntryPos array to use when npos == 0 */
+	WEP_SETPOS(posnull[0], MAXENTRYPOS - 1);
 
 	for (i = 0; i < size; i++)
 	{
-		firstentry = entry = find_wordentry(t, q, item[i], &nitem);
-		if (!entry)
+		int			idx = find_wordentry(t, q, item[i], &nitem),
+					firstidx;
+
+		if (idx == -1)
 			continue;
 
-		while (entry - firstentry < nitem)
+		firstidx = idx;
+
+		while (idx - firstidx < nitem)
 		{
-			if (entry->haspos)
-				pos[i] = _POSVECPTR(t, entry);
+			WordEntry  *entry;
+
+			char	   *lexeme = tsvector_getlexeme(t, idx, &entry);
+
+			Assert(!entry->hasoff);
+			if (entry->npos)
+			{
+				pos[i] = POSDATAPTR(lexeme, entry->len);
+				npos[i] = entry->npos;
+			}
 			else
-				pos[i] = POSNULL;
+			{
+				pos[i] = posnull;
+				npos[i] = 1;
+			}
+
+			post = pos[i];
+			dimt = npos[i];
 
-			dimt = pos[i]->npos;
-			post = pos[i]->pos;
 			for (k = 0; k < i; k++)
 			{
 				if (!pos[k])
 					continue;
-				lenct = pos[k]->npos;
-				ct = pos[k]->pos;
+				lenct = npos[k];
+				ct = pos[k];
 				for (l = 0; l < dimt; l++)
 				{
 					for (p = 0; p < lenct; p++)
 					{
 						dist = Abs((int) WEP_GETPOS(post[l]) - (int) WEP_GETPOS(ct[p]));
-						if (dist || (dist == 0 && (pos[i] == POSNULL || pos[k] == POSNULL)))
+						if (dist || (dist == 0 && (pos[i] == posnull || pos[k] == posnull)))
 						{
 							float		curw;
 
@@ -272,10 +289,11 @@ calc_rank_and(const float *w, TSVector t, TSQuery q)
 				}
 			}
 
-			entry++;
+			idx++;
 		}
 	}
 	pfree(pos);
+	pfree(npos);
 	pfree(item);
 	return res;
 }
@@ -283,9 +301,8 @@ calc_rank_and(const float *w, TSVector t, TSQuery q)
 static float
 calc_rank_or(const float *w, TSVector t, TSQuery q)
 {
-	WordEntry  *entry,
-			   *firstentry;
-	WordEntryPosVector1 posnull;
+	/* A dummy WordEntryPos array to use when lexeme hasn't positions */
+	WordEntryPos posnull[1] = {0};
 	WordEntryPos *post;
 	int32		dimt,
 				j,
@@ -295,33 +312,37 @@ calc_rank_or(const float *w, TSVector t, TSQuery q)
 	QueryOperand **item;
 	int			size = q->size;
 
-	/* A dummy WordEntryPos array to use when haspos is false */
-	posnull.npos = 1;
-	posnull.pos[0] = 0;
-
 	item = SortAndUniqItems(q, &size);
 
 	for (i = 0; i < size; i++)
 	{
+		int			idx,
+					firstidx;
 		float		resj,
 					wjm;
 		int32		jm;
 
-		firstentry = entry = find_wordentry(t, q, item[i], &nitem);
-		if (!entry)
+		idx = find_wordentry(t, q, item[i], &nitem);
+		if (idx == -1)
 			continue;
 
-		while (entry - firstentry < nitem)
+		firstidx = idx;
+
+		while (idx - firstidx < nitem)
 		{
-			if (entry->haspos)
+			WordEntry  *entry;
+			char	   *lexeme = tsvector_getlexeme(t, idx, &entry);
+
+			Assert(!entry->hasoff);
+			if (entry->npos)
 			{
-				dimt = POSDATALEN(t, entry);
-				post = POSDATAPTR(t, entry);
+				dimt = entry->npos;
+				post = POSDATAPTR(lexeme, entry->len);
 			}
 			else
 			{
-				dimt = posnull.npos;
-				post = posnull.pos;
+				dimt = 1;
+				post = posnull;
 			}
 
 			resj = 0.0;
@@ -345,7 +366,7 @@ calc_rank_or(const float *w, TSVector t, TSQuery q)
 */
 			res = res + (wjm + resj - wjm / ((jm + 1) * (jm + 1))) / 1.64493406685;
 
-			entry++;
+			idx++;
 		}
 	}
 	if (size > 0)
@@ -361,7 +382,7 @@ calc_rank(const float *w, TSVector t, TSQuery q, int32 method)
 	float		res = 0.0;
 	int			len;
 
-	if (!t->size || !q->size)
+	if (!TS_COUNT(t) || !q->size)
 		return 0.0;
 
 	/* XXX: What about NOT? */
@@ -373,7 +394,7 @@ calc_rank(const float *w, TSVector t, TSQuery q, int32 method)
 	if (res < 0)
 		res = 1e-20f;
 
-	if ((method & RANK_NORM_LOGLENGTH) && t->size > 0)
+	if ((method & RANK_NORM_LOGLENGTH) && TS_COUNT(t) > 0)
 		res /= log((double) (cnt_length(t) + 1)) / log(2.0);
 
 	if (method & RANK_NORM_LENGTH)
@@ -385,11 +406,11 @@ calc_rank(const float *w, TSVector t, TSQuery q, int32 method)
 
 	/* RANK_NORM_EXTDIST not applicable */
 
-	if ((method & RANK_NORM_UNIQ) && t->size > 0)
-		res /= (float) (t->size);
+	if ((method & RANK_NORM_UNIQ) && TS_COUNT(t) > 0)
+		res /= (float) (TS_COUNT(t));
 
-	if ((method & RANK_NORM_LOGUNIQ) && t->size > 0)
-		res /= log((double) (t->size + 1)) / log(2.0);
+	if ((method & RANK_NORM_LOGUNIQ) && TS_COUNT(t) > 0)
+		res /= log((double) (TS_COUNT(t) + 1)) / log(2.0);
 
 	if (method & RANK_NORM_RDIVRPLUS1)
 		res /= (res + 1);
@@ -504,13 +525,13 @@ typedef struct
 		struct
 		{						/* compiled doc representation */
 			QueryItem **items;
-			int16		nitem;
+			int32		nitem;
 		}			query;
 		struct
 		{						/* struct is used for preparing doc
 								 * representation */
 			QueryItem  *item;
-			WordEntry  *entry;
+			int32		idx;
 		}			map;
 	}			data;
 	WordEntryPos pos;
@@ -526,10 +547,10 @@ compareDocR(const void *va, const void *vb)
 	{
 		if (WEP_GETWEIGHT(a->pos) == WEP_GETWEIGHT(b->pos))
 		{
-			if (a->data.map.entry == b->data.map.entry)
+			if (a->data.map.idx == b->data.map.idx)
 				return 0;
 
-			return (a->data.map.entry > b->data.map.entry) ? 1 : -1;
+			return (a->data.map.idx > b->data.map.idx) ? 1 : -1;
 		}
 
 		return (WEP_GETWEIGHT(a->pos) > WEP_GETWEIGHT(b->pos)) ? 1 : -1;
@@ -724,9 +745,6 @@ static DocRepresentation *
 get_docrep(TSVector txt, QueryRepresentation *qr, int *doclen)
 {
 	QueryItem  *item = GETQUERY(qr->query);
-	WordEntry  *entry,
-			   *firstentry;
-	WordEntryPos *post;
 	int32		dimt,			/* number of 'post' items */
 				j,
 				i,
@@ -743,29 +761,38 @@ get_docrep(TSVector txt, QueryRepresentation *qr, int *doclen)
 	 */
 	for (i = 0; i < qr->query->size; i++)
 	{
+		int			idx,
+					firstidx;
 		QueryOperand *curoperand;
+		WordEntryPos *post;
 
 		if (item[i].type != QI_VAL)
 			continue;
 
 		curoperand = &item[i].qoperand;
 
-		firstentry = entry = find_wordentry(txt, qr->query, curoperand, &nitem);
-		if (!entry)
+		idx = find_wordentry(txt, qr->query, curoperand, &nitem);
+		if (idx < 0)
 			continue;
 
+		firstidx = idx;
+
 		/* iterations over entries in tsvector */
-		while (entry - firstentry < nitem)
+		while (idx - firstidx < nitem)
 		{
-			if (entry->haspos)
+			WordEntry  *entry;
+			char	   *lex = tsvector_getlexeme(txt, idx, &entry);
+
+			Assert(!entry->hasoff);
+			if (entry->npos)
 			{
-				dimt = POSDATALEN(txt, entry);
-				post = POSDATAPTR(txt, entry);
+				dimt = entry->npos;
+				post = POSDATAPTR(lex, entry->len);
 			}
 			else
 			{
 				/* ignore words without positions */
-				entry++;
+				idx++;
 				continue;
 			}
 
@@ -782,13 +809,12 @@ get_docrep(TSVector txt, QueryRepresentation *qr, int *doclen)
 					curoperand->weight & (1 << WEP_GETWEIGHT(post[j])))
 				{
 					doc[cur].pos = post[j];
-					doc[cur].data.map.entry = entry;
+					doc[cur].data.map.idx = idx;
 					doc[cur].data.map.item = (QueryItem *) curoperand;
 					cur++;
 				}
 			}
-
-			entry++;
+			idx++;
 		}
 	}
 
@@ -814,7 +840,7 @@ get_docrep(TSVector txt, QueryRepresentation *qr, int *doclen)
 		while (rptr - doc < cur)
 		{
 			if (rptr->pos == (rptr - 1)->pos &&
-				rptr->data.map.entry == (rptr - 1)->data.map.entry)
+				rptr->data.map.idx == (rptr - 1)->data.map.idx)
 			{
 				storage.data.query.items[storage.data.query.nitem] = rptr->data.map.item;
 				storage.data.query.nitem++;
@@ -917,7 +943,7 @@ calc_rank_cd(const float4 *arrdata, TSVector txt, TSQuery query, int method)
 		NExtent++;
 	}
 
-	if ((method & RANK_NORM_LOGLENGTH) && txt->size > 0)
+	if ((method & RANK_NORM_LOGLENGTH) && TS_COUNT(txt) > 0)
 		Wdoc /= log((double) (cnt_length(txt) + 1));
 
 	if (method & RANK_NORM_LENGTH)
@@ -930,11 +956,11 @@ calc_rank_cd(const float4 *arrdata, TSVector txt, TSQuery query, int method)
 	if ((method & RANK_NORM_EXTDIST) && NExtent > 0 && SumDist > 0)
 		Wdoc /= ((double) NExtent) / SumDist;
 
-	if ((method & RANK_NORM_UNIQ) && txt->size > 0)
-		Wdoc /= (double) (txt->size);
+	if ((method & RANK_NORM_UNIQ) && TS_COUNT(txt) > 0)
+		Wdoc /= (double) (TS_COUNT(txt));
 
-	if ((method & RANK_NORM_LOGUNIQ) && txt->size > 0)
-		Wdoc /= log((double) (txt->size + 1)) / log(2.0);
+	if ((method & RANK_NORM_LOGUNIQ) && TS_COUNT(txt) > 0)
+		Wdoc /= log((double) (TS_COUNT(txt) + 1)) / log(2.0);
 
 	if (method & RANK_NORM_RDIVRPLUS1)
 		Wdoc /= (Wdoc + 1);
diff --git a/src/backend/utils/adt/tsvector.c b/src/backend/utils/adt/tsvector.c
index 6f66c1f58c..de34df0c3d 100644
--- a/src/backend/utils/adt/tsvector.c
+++ b/src/backend/utils/adt/tsvector.c
@@ -23,8 +23,8 @@
 typedef struct
 {
 	WordEntry	entry;			/* must be first! */
+	size_t		offset;			/* offset of lexeme in some buffer */
 	WordEntryPos *pos;
-	int			poslen;			/* number of elements in pos */
 } WordEntryIN;
 
 
@@ -79,14 +79,30 @@ uniquePos(WordEntryPos *a, int l)
 
 /* Compare two WordEntryIN values for qsort */
 static int
-compareentry(const void *va, const void *vb, void *arg)
+compareentry_in(const void *va, const void *vb, void *arg)
 {
 	const WordEntryIN *a = (const WordEntryIN *) va;
 	const WordEntryIN *b = (const WordEntryIN *) vb;
 	char	   *BufferStr = (char *) arg;
 
-	return tsCompareString(&BufferStr[a->entry.pos], a->entry.len,
-						   &BufferStr[b->entry.pos], b->entry.len,
+	return tsCompareString(&BufferStr[a->offset], a->entry.len,
+						   &BufferStr[b->offset], b->entry.len,
+						   false);
+}
+
+/* Compare two WordEntry values for qsort */
+static int
+compareentry(const void *va, const void *vb, void *arg)
+{
+	const WordEntry *a = (const WordEntry *) va;
+	const WordEntry *b = (const WordEntry *) vb;
+	TSVector	tsv = (TSVector) arg;
+
+	uint32		offset1 = tsvector_getoffset(tsv, a - ARRPTR(tsv), NULL),
+				offset2 = tsvector_getoffset(tsv, b - ARRPTR(tsv), NULL);
+
+	return tsCompareString(STRPTR(tsv) + offset1, ENTRY_LEN(tsv, a),
+						   STRPTR(tsv) + offset2, ENTRY_LEN(tsv, b),
 						   false);
 }
 
@@ -97,14 +113,15 @@ compareentry(const void *va, const void *vb, void *arg)
 static int
 uniqueentry(WordEntryIN *a, int l, char *buf, int *outbuflen)
 {
-	int			buflen;
+	int			buflen,
+				i = 0;
 	WordEntryIN *ptr,
 			   *res;
 
 	Assert(l >= 1);
 
 	if (l > 1)
-		qsort_arg((void *) a, l, sizeof(WordEntryIN), compareentry,
+		qsort_arg((void *) a, l, sizeof(WordEntryIN), compareentry_in,
 				  (void *) buf);
 
 	buflen = 0;
@@ -112,67 +129,76 @@ uniqueentry(WordEntryIN *a, int l, char *buf, int *outbuflen)
 	ptr = a + 1;
 	while (ptr - a < l)
 	{
+		Assert(!ptr->entry.hasoff);
+
 		if (!(ptr->entry.len == res->entry.len &&
-			  strncmp(&buf[ptr->entry.pos], &buf[res->entry.pos],
-					  res->entry.len) == 0))
+			  strncmp(&buf[ptr->offset], &buf[res->offset], res->entry.len) == 0))
 		{
 			/* done accumulating data into *res, count space needed */
+			buflen = SHORTALIGN(buflen);
+			if (i++ % TS_OFFSET_STRIDE == 0)
+			{
+				buflen = INTALIGN(buflen);
+				buflen += sizeof(WordEntry);
+			}
+
 			buflen += res->entry.len;
-			if (res->entry.haspos)
+			if (res->entry.npos)
 			{
-				res->poslen = uniquePos(res->pos, res->poslen);
+				res->entry.npos = uniquePos(res->pos, res->entry.npos);
 				buflen = SHORTALIGN(buflen);
-				buflen += res->poslen * sizeof(WordEntryPos) + sizeof(uint16);
+				buflen += res->entry.npos * sizeof(WordEntryPos);
 			}
 			res++;
 			if (res != ptr)
-				memcpy(res, ptr, sizeof(WordEntryIN));
+				*res = *ptr;
 		}
-		else if (ptr->entry.haspos)
+		else if (ptr->entry.npos)
 		{
-			if (res->entry.haspos)
+			if (res->entry.npos)
 			{
 				/* append ptr's positions to res's positions */
-				int			newlen = ptr->poslen + res->poslen;
+				int			newlen = ptr->entry.npos + res->entry.npos;
 
 				res->pos = (WordEntryPos *)
 					repalloc(res->pos, newlen * sizeof(WordEntryPos));
-				memcpy(&res->pos[res->poslen], ptr->pos,
-					   ptr->poslen * sizeof(WordEntryPos));
-				res->poslen = newlen;
+				memcpy(&res->pos[res->entry.npos], ptr->pos,
+					   ptr->entry.npos * sizeof(WordEntryPos));
+				res->entry.npos = newlen;
 				pfree(ptr->pos);
 			}
 			else
 			{
 				/* just give ptr's positions to pos */
-				res->entry.haspos = 1;
+				res->entry.npos = ptr->entry.npos;
 				res->pos = ptr->pos;
-				res->poslen = ptr->poslen;
 			}
 		}
 		ptr++;
 	}
 
 	/* count space needed for last item */
+	if (i % TS_OFFSET_STRIDE == 0)
+	{
+		buflen = INTALIGN(buflen);
+		buflen += sizeof(WordEntry);
+	}
+	else
+		buflen = SHORTALIGN(buflen);
+
 	buflen += res->entry.len;
-	if (res->entry.haspos)
+
+	if (res->entry.npos)
 	{
-		res->poslen = uniquePos(res->pos, res->poslen);
+		res->entry.npos = uniquePos(res->pos, res->entry.npos);
 		buflen = SHORTALIGN(buflen);
-		buflen += res->poslen * sizeof(WordEntryPos) + sizeof(uint16);
+		buflen += res->entry.npos * sizeof(WordEntryPos);
 	}
 
 	*outbuflen = buflen;
 	return res + 1 - a;
 }
 
-static int
-WordEntryCMP(WordEntry *a, WordEntry *b, char *buf)
-{
-	return compareentry(a, b, buf);
-}
-
-
 Datum
 tsvectorin(PG_FUNCTION_ARGS)
 {
@@ -181,7 +207,6 @@ tsvectorin(PG_FUNCTION_ARGS)
 	WordEntryIN *arr;
 	int			totallen;
 	int			arrlen;			/* allocated size of arr */
-	WordEntry  *inarr;
 	int			len = 0;
 	TSVector	in;
 	int			i;
@@ -189,7 +214,6 @@ tsvectorin(PG_FUNCTION_ARGS)
 	int			toklen;
 	WordEntryPos *pos;
 	int			poslen;
-	char	   *strbuf;
 	int			stroff;
 
 	/*
@@ -238,23 +262,13 @@ tsvectorin(PG_FUNCTION_ARGS)
 			tmpbuf = (char *) repalloc((void *) tmpbuf, buflen);
 			cur = tmpbuf + dist;
 		}
+		arr[len].entry.hasoff = 0;
 		arr[len].entry.len = toklen;
-		arr[len].entry.pos = cur - tmpbuf;
+		arr[len].offset = cur - tmpbuf;
+		arr[len].entry.npos = poslen;
+		arr[len].pos = (poslen != 0) ? pos : NULL;
 		memcpy((void *) cur, (void *) token, toklen);
 		cur += toklen;
-
-		if (poslen != 0)
-		{
-			arr[len].entry.haspos = 1;
-			arr[len].pos = pos;
-			arr[len].poslen = poslen;
-		}
-		else
-		{
-			arr[len].entry.haspos = 0;
-			arr[len].pos = NULL;
-			arr[len].poslen = 0;
-		}
 		len++;
 	}
 
@@ -273,36 +287,18 @@ tsvectorin(PG_FUNCTION_ARGS)
 	totallen = CALCDATASIZE(len, buflen);
 	in = (TSVector) palloc0(totallen);
 	SET_VARSIZE(in, totallen);
-	in->size = len;
-	inarr = ARRPTR(in);
-	strbuf = STRPTR(in);
+	TS_SETCOUNT(in, len);
 	stroff = 0;
 	for (i = 0; i < len; i++)
 	{
-		memcpy(strbuf + stroff, &tmpbuf[arr[i].entry.pos], arr[i].entry.len);
-		arr[i].entry.pos = stroff;
-		stroff += arr[i].entry.len;
-		if (arr[i].entry.haspos)
-		{
-			if (arr[i].poslen > 0xFFFF)
-				elog(ERROR, "positions array too long");
-
-			/* Copy number of positions */
-			stroff = SHORTALIGN(stroff);
-			*(uint16 *) (strbuf + stroff) = (uint16) arr[i].poslen;
-			stroff += sizeof(uint16);
-
-			/* Copy positions */
-			memcpy(strbuf + stroff, arr[i].pos, arr[i].poslen * sizeof(WordEntryPos));
-			stroff += arr[i].poslen * sizeof(WordEntryPos);
+		tsvector_addlexeme(in, i, &stroff, &tmpbuf[arr[i].offset],
+						   arr[i].entry.len, arr[i].pos, arr[i].entry.npos);
 
+		if (arr[i].entry.npos)
 			pfree(arr[i].pos);
-		}
-		inarr[i] = arr[i].entry;
 	}
 
-	Assert((strbuf + stroff - (char *) in) == totallen);
-
+	Assert((STRPTR(in) + stroff - (char *) in) == totallen);
 	PG_RETURN_TSVECTOR(in);
 }
 
@@ -313,28 +309,37 @@ tsvectorout(PG_FUNCTION_ARGS)
 	char	   *outbuf;
 	int32		i,
 				lenbuf = 0,
-				pp;
+				pp,
+				tscount = TS_COUNT(out);
+	uint32		pos;
 	WordEntry  *ptr = ARRPTR(out);
 	char	   *curbegin,
 			   *curin,
 			   *curout;
 
-	lenbuf = out->size * 2 /* '' */ + out->size - 1 /* space */ + 2 /* \0 */ ;
-	for (i = 0; i < out->size; i++)
+	lenbuf = tscount * 2 /* '' */ + tscount - 1 /* space */ + 2 /* \0 */ ;
+	for (i = 0; i < tscount; i++)
 	{
-		lenbuf += ptr[i].len * 2 * pg_database_encoding_max_length() /* for escape */ ;
-		if (ptr[i].haspos)
-			lenbuf += 1 /* : */ + 7 /* int2 + , + weight */ * POSDATALEN(out, &(ptr[i]));
+		int			npos = ENTRY_NPOS(out, ptr + i);
+
+		lenbuf += ENTRY_LEN(out, ptr + i) * 2 * pg_database_encoding_max_length() /* for escape */ ;
+		if (npos)
+			lenbuf += 1 /* : */ + 7 /* int2 + , + weight */ * npos;
 	}
 
 	curout = outbuf = (char *) palloc(lenbuf);
-	for (i = 0; i < out->size; i++)
+
+	INITPOS(pos);
+	for (i = 0; i < tscount; i++)
 	{
-		curbegin = curin = STRPTR(out) + ptr->pos;
+		int			lex_len = ENTRY_LEN(out, ptr),
+					npos = ENTRY_NPOS(out, ptr);
+
+		curbegin = curin = STRPTR(out) + pos;
 		if (i != 0)
 			*curout++ = ' ';
 		*curout++ = '\'';
-		while (curin - curbegin < ptr->len)
+		while (curin - curbegin < lex_len)
 		{
 			int			len = pg_mblen(curin);
 
@@ -348,12 +353,12 @@ tsvectorout(PG_FUNCTION_ARGS)
 		}
 
 		*curout++ = '\'';
-		if ((pp = POSDATALEN(out, ptr)) != 0)
+		if ((pp = npos) != 0)
 		{
 			WordEntryPos *wptr;
 
 			*curout++ = ':';
-			wptr = POSDATAPTR(out, ptr);
+			wptr = POSDATAPTR(curbegin, lex_len);
 			while (pp)
 			{
 				curout += sprintf(curout, "%d", WEP_GETPOS(*wptr));
@@ -379,7 +384,8 @@ tsvectorout(PG_FUNCTION_ARGS)
 				wptr++;
 			}
 		}
-		ptr++;
+
+		INCRPTR(out, ptr, pos);
 	}
 
 	*curout = '\0';
@@ -406,35 +412,38 @@ tsvectorsend(PG_FUNCTION_ARGS)
 	StringInfoData buf;
 	int			i,
 				j;
+	uint32		pos;
 	WordEntry  *weptr = ARRPTR(vec);
 
 	pq_begintypsend(&buf);
+	pq_sendint(&buf, TS_COUNT(vec), sizeof(int32));
 
-	pq_sendint(&buf, vec->size, sizeof(int32));
-	for (i = 0; i < vec->size; i++)
+	INITPOS(pos);
+	for (i = 0; i < TS_COUNT(vec); i++)
 	{
-		uint16		npos;
+		char	   *lexeme = STRPTR(vec) + pos;
+		int			npos = ENTRY_NPOS(vec, weptr),
+					lex_len = ENTRY_LEN(vec, weptr);
 
 		/*
 		 * the strings in the TSVector array are not null-terminated, so we
 		 * have to send the null-terminator separately
 		 */
-		pq_sendtext(&buf, STRPTR(vec) + weptr->pos, weptr->len);
+		pq_sendtext(&buf, lexeme, lex_len);
 		pq_sendbyte(&buf, '\0');
-
-		npos = POSDATALEN(vec, weptr);
 		pq_sendint(&buf, npos, sizeof(uint16));
 
 		if (npos > 0)
 		{
-			WordEntryPos *wepptr = POSDATAPTR(vec, weptr);
+			WordEntryPos *wepptr = POSDATAPTR(lexeme, lex_len);
 
 			for (j = 0; j < npos; j++)
 				pq_sendint(&buf, wepptr[j], sizeof(WordEntryPos));
 		}
-		weptr++;
+		INCRPTR(vec, weptr, pos);
 	}
 
+	PG_FREE_IF_COPY(vec, 0);
 	PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
 }
 
@@ -443,14 +452,16 @@ tsvectorrecv(PG_FUNCTION_ARGS)
 {
 	StringInfo	buf = (StringInfo) PG_GETARG_POINTER(0);
 	TSVector	vec;
-	int			i;
-	int32		nentries;
-	int			datalen;		/* number of bytes used in the variable size
+	int			i,
+				datalen;		/* number of bytes used in the variable size
 								 * area after fixed size TSVector header and
 								 * WordEntries */
+	int32		nentries;
 	Size		hdrlen;
 	Size		len;			/* allocated size of vec */
 	bool		needSort = false;
+	char	   *prev_lexeme = NULL;
+	int			prev_lex_len;
 
 	nentries = pq_getmsgint(buf, sizeof(int32));
 	if (nentries < 0 || nentries > (MaxAllocSize / sizeof(WordEntry)))
@@ -460,16 +471,17 @@ tsvectorrecv(PG_FUNCTION_ARGS)
 
 	len = hdrlen * 2;			/* times two to make room for lexemes */
 	vec = (TSVector) palloc0(len);
-	vec->size = nentries;
+	TS_SETCOUNT(vec, nentries);
 
 	datalen = 0;
 	for (i = 0; i < nentries; i++)
 	{
-		const char *lexeme;
+		char	   *lexeme,
+				   *lexeme_out;
 		uint16		npos;
-		size_t		lex_len;
+		int			lex_len;
 
-		lexeme = pq_getmsgstring(buf);
+		lexeme = (char *) pq_getmsgstring(buf);
 		npos = (uint16) pq_getmsgint(buf, sizeof(uint16));
 
 		/* sanity checks */
@@ -489,62 +501,42 @@ tsvectorrecv(PG_FUNCTION_ARGS)
 		 *
 		 * But make sure the buffer is large enough first.
 		 */
-		while (hdrlen + SHORTALIGN(datalen + lex_len) +
-			   (npos + 1) * sizeof(WordEntryPos) >= len)
+		while (hdrlen + SHORTALIGN(datalen + lex_len) + sizeof(WordEntry) +
+			   npos * sizeof(WordEntryPos) >= len)
 		{
 			len *= 2;
 			vec = (TSVector) repalloc(vec, len);
 		}
 
-		vec->entries[i].haspos = (npos > 0) ? 1 : 0;
-		vec->entries[i].len = lex_len;
-		vec->entries[i].pos = datalen;
-
-		memcpy(STRPTR(vec) + datalen, lexeme, lex_len);
-
-		datalen += lex_len;
-
-		if (i > 0 && WordEntryCMP(&vec->entries[i],
-								  &vec->entries[i - 1],
-								  STRPTR(vec)) <= 0)
+		if (prev_lexeme && tsCompareString(lexeme, lex_len,
+										   prev_lexeme, prev_lex_len, false) <= 0)
 			needSort = true;
 
-		/* Receive positions */
+		lexeme_out = tsvector_addlexeme(vec, i, &datalen, lexeme,
+										lex_len, NULL, npos);
 		if (npos > 0)
 		{
-			uint16		j;
 			WordEntryPos *wepptr;
+			int			j;
 
-			/*
-			 * Pad to 2-byte alignment if necessary. Though we used palloc0
-			 * for the initial allocation, subsequent repalloc'd memory areas
-			 * are not initialized to zero.
-			 */
-			if (datalen != SHORTALIGN(datalen))
-			{
-				*(STRPTR(vec) + datalen) = '\0';
-				datalen = SHORTALIGN(datalen);
-			}
-
-			memcpy(STRPTR(vec) + datalen, &npos, sizeof(uint16));
-
-			wepptr = POSDATAPTR(vec, &vec->entries[i]);
+			wepptr = POSDATAPTR(lexeme_out, lex_len);
 			for (j = 0; j < npos; j++)
 			{
 				wepptr[j] = (WordEntryPos) pq_getmsgint(buf, sizeof(WordEntryPos));
 				if (j > 0 && WEP_GETPOS(wepptr[j]) <= WEP_GETPOS(wepptr[j - 1]))
 					elog(ERROR, "position information is misordered");
 			}
-
-			datalen += (npos + 1) * sizeof(WordEntry);
 		}
+
+		prev_lexeme = lexeme;
+		prev_lex_len = lex_len;
 	}
 
 	SET_VARSIZE(vec, hdrlen + datalen);
 
 	if (needSort)
-		qsort_arg((void *) ARRPTR(vec), vec->size, sizeof(WordEntry),
-				  compareentry, (void *) STRPTR(vec));
+		qsort_arg((void *) ARRPTR(vec), TS_COUNT(vec), sizeof(WordEntry),
+				  compareentry, (void *) vec);
 
 	PG_RETURN_TSVECTOR(vec);
 }
diff --git a/src/backend/utils/adt/tsvector_op.c b/src/backend/utils/adt/tsvector_op.c
index 822520299e..02e80c4a74 100644
--- a/src/backend/utils/adt/tsvector_op.c
+++ b/src/backend/utils/adt/tsvector_op.c
@@ -33,9 +33,9 @@
 
 typedef struct
 {
-	WordEntry  *arrb;
-	WordEntry  *arre;
-	char	   *values;
+	TSVector	vec;
+	int			bidx;
+	int			eidx;
 	char	   *operand;
 } CHKVAL;
 
@@ -71,7 +71,7 @@ static Datum tsvector_update_trigger(PG_FUNCTION_ARGS, bool config_column);
 static int	tsvector_bsearch(const TSVector tsv, char *lexeme, int lexeme_len);
 
 /*
- * Order: haspos, len, word, for all positions (pos, weight)
+ * Order: npos, len, word, for all positions (pos, weight)
  */
 static int
 silly_cmp_tsvector(const TSVector a, const TSVector b)
@@ -80,9 +80,9 @@ silly_cmp_tsvector(const TSVector a, const TSVector b)
 		return -1;
 	else if (VARSIZE(a) > VARSIZE(b))
 		return 1;
-	else if (a->size < b->size)
+	else if (TS_COUNT(a) < TS_COUNT(b))
 		return -1;
-	else if (a->size > b->size)
+	else if (TS_COUNT(a) > TS_COUNT(b))
 		return 1;
 	else
 	{
@@ -90,28 +90,40 @@ silly_cmp_tsvector(const TSVector a, const TSVector b)
 		WordEntry  *bptr = ARRPTR(b);
 		int			i = 0;
 		int			res;
+		uint32		pos1,
+					pos2;
 
+		INITPOS(pos1);
+		INITPOS(pos2);
 
-		for (i = 0; i < a->size; i++)
+		for (i = 0; i < TS_COUNT(a); i++)
 		{
-			if (aptr->haspos != bptr->haspos)
-			{
-				return (aptr->haspos > bptr->haspos) ? -1 : 1;
-			}
-			else if ((res = tsCompareString(STRPTR(a) + aptr->pos, aptr->len, STRPTR(b) + bptr->pos, bptr->len, false)) != 0)
+			char	   *lex1 = STRPTR(a) + pos1,
+					   *lex2 = STRPTR(b) + pos2;
+			int			npos1 = ENTRY_NPOS(a, aptr),
+						npos2 = ENTRY_NPOS(b, bptr);
+			int			len1 = ENTRY_LEN(a, aptr),
+						len2 = ENTRY_LEN(b, bptr);
+
+			if ((npos1 == 0 || npos2 == 0) && npos1 != npos2)
+				return npos1 > npos2 ? -1 : 1;
+			else if ((res = tsCompareString(lex1, len1, lex2, len2, false)) != 0)
 			{
 				return res;
 			}
-			else if (aptr->haspos)
+			else if (npos1 > 0)
 			{
-				WordEntryPos *ap = POSDATAPTR(a, aptr);
-				WordEntryPos *bp = POSDATAPTR(b, bptr);
+				WordEntryPos *ap,
+						   *bp;
 				int			j;
 
-				if (POSDATALEN(a, aptr) != POSDATALEN(b, bptr))
-					return (POSDATALEN(a, aptr) > POSDATALEN(b, bptr)) ? -1 : 1;
+				ap = POSDATAPTR(lex1, len1);
+				bp = POSDATAPTR(lex2, len2);
+
+				if (npos1 != npos2)
+					return (npos1 > npos2) ? -1 : 1;
 
-				for (j = 0; j < POSDATALEN(a, aptr); j++)
+				for (j = 0; j < npos1; j++)
 				{
 					if (WEP_GETPOS(*ap) != WEP_GETPOS(*bp))
 					{
@@ -125,8 +137,8 @@ silly_cmp_tsvector(const TSVector a, const TSVector b)
 				}
 			}
 
-			aptr++;
-			bptr++;
+			INCRPTR(a, aptr, pos1);
+			INCRPTR(b, bptr, pos2);
 		}
 	}
 
@@ -161,27 +173,29 @@ tsvector_strip(PG_FUNCTION_ARGS)
 	TSVector	in = PG_GETARG_TSVECTOR(0);
 	TSVector	out;
 	int			i,
+				count,
+				posout = 0,
+				pos,
 				len = 0;
-	WordEntry  *arrin = ARRPTR(in),
-			   *arrout;
-	char	   *cur;
+	WordEntry  *entryin = ARRPTR(in);
 
-	for (i = 0; i < in->size; i++)
-		len += arrin[i].len;
+	count = TS_COUNT(in);
+	for (i = 0; i < count; i++)
+		INCRSIZE(len, i, ENTRY_LEN(in, ARRPTR(in) + i), 0);
 
-	len = CALCDATASIZE(in->size, len);
+	len = CALCDATASIZE(count, len);
 	out = (TSVector) palloc0(len);
 	SET_VARSIZE(out, len);
-	out->size = in->size;
-	arrout = ARRPTR(out);
-	cur = STRPTR(out);
-	for (i = 0; i < in->size; i++)
+	TS_SETCOUNT(out, count);
+
+	INITPOS(pos);
+	for (i = 0; i < count; i++)
 	{
-		memcpy(cur, STRPTR(in) + arrin[i].pos, arrin[i].len);
-		arrout[i].haspos = 0;
-		arrout[i].len = arrin[i].len;
-		arrout[i].pos = cur - STRPTR(out);
-		cur += arrout[i].len;
+		tsvector_addlexeme(out, i, &posout,
+						   STRPTR(in) + pos, ENTRY_LEN(in, entryin),
+						   NULL, 0);
+
+		INCRPTR(in, entryin, pos);
 	}
 
 	PG_FREE_IF_COPY(in, 0);
@@ -192,7 +206,7 @@ Datum
 tsvector_length(PG_FUNCTION_ARGS)
 {
 	TSVector	in = PG_GETARG_TSVECTOR(0);
-	int32		ret = in->size;
+	int32		ret = TS_COUNT(in);
 
 	PG_FREE_IF_COPY(in, 0);
 	PG_RETURN_INT32(ret);
@@ -204,11 +218,10 @@ tsvector_setweight(PG_FUNCTION_ARGS)
 	TSVector	in = PG_GETARG_TSVECTOR(0);
 	char		cw = PG_GETARG_CHAR(1);
 	TSVector	out;
-	int			i,
-				j;
-	WordEntry  *entry;
-	WordEntryPos *p;
+	int			i;
+	WordEntry  *weptr;
 	int			w = 0;
+	uint32		pos;
 
 	switch (cw)
 	{
@@ -235,20 +248,22 @@ tsvector_setweight(PG_FUNCTION_ARGS)
 
 	out = (TSVector) palloc(VARSIZE(in));
 	memcpy(out, in, VARSIZE(in));
-	entry = ARRPTR(out);
-	i = out->size;
-	while (i--)
+	weptr = ARRPTR(out);
+
+	INITPOS(pos);
+	for (i = 0; i < TS_COUNT(out); i++)
 	{
-		if ((j = POSDATALEN(out, entry)) != 0)
+		int			j,
+					npos = ENTRY_NPOS(out, weptr);
+
+		if (npos)
 		{
-			p = POSDATAPTR(out, entry);
-			while (j--)
-			{
-				WEP_SETWEIGHT(*p, w);
-				p++;
-			}
+			WordEntryPos *p = POSDATAPTR(STRPTR(out) + pos, ENTRY_LEN(out, weptr));
+
+			for (j = 0; j < npos; j++)
+				WEP_SETWEIGHT(p[j], w);
 		}
-		entry++;
+		INCRPTR(out, weptr, pos);
 	}
 
 	PG_FREE_IF_COPY(in, 0);
@@ -269,10 +284,8 @@ tsvector_setweight_by_filter(PG_FUNCTION_ARGS)
 
 	TSVector	tsout;
 	int			i,
-				j,
 				nlexemes,
 				weight;
-	WordEntry  *entry;
 	Datum	   *dlexemes;
 	bool	   *nulls;
 
@@ -301,8 +314,6 @@ tsvector_setweight_by_filter(PG_FUNCTION_ARGS)
 
 	tsout = (TSVector) palloc(VARSIZE(tsin));
 	memcpy(tsout, tsin, VARSIZE(tsin));
-	entry = ARRPTR(tsout);
-
 	deconstruct_array(lexemes, TEXTOID, -1, false, 'i',
 					  &dlexemes, &nulls, &nlexemes);
 
@@ -315,7 +326,8 @@ tsvector_setweight_by_filter(PG_FUNCTION_ARGS)
 	{
 		char	   *lex;
 		int			lex_len,
-					lex_pos;
+					lex_idx,
+					npos;
 
 		if (nulls[i])
 			ereport(ERROR,
@@ -324,17 +336,19 @@ tsvector_setweight_by_filter(PG_FUNCTION_ARGS)
 
 		lex = VARDATA(dlexemes[i]);
 		lex_len = VARSIZE(dlexemes[i]) - VARHDRSZ;
-		lex_pos = tsvector_bsearch(tsout, lex, lex_len);
+		lex_idx = tsvector_bsearch(tsin, lex, lex_len);
+		npos = ENTRY_NPOS(tsin, ARRPTR(tsout) + lex_idx);
 
-		if (lex_pos >= 0 && (j = POSDATALEN(tsout, entry + lex_pos)) != 0)
+		if (lex_idx >= 0 && npos > 0)
 		{
-			WordEntryPos *p = POSDATAPTR(tsout, entry + lex_pos);
+			int			j;
+			WordEntry  *we;
+			char	   *lexeme = tsvector_getlexeme(tsout, lex_idx, &we);
 
-			while (j--)
-			{
-				WEP_SETWEIGHT(*p, weight);
-				p++;
-			}
+			WordEntryPos *p = POSDATAPTR(lexeme, we->len);
+
+			for (j = 0; j < npos; j++)
+				WEP_SETWEIGHT(p[j], weight);
 		}
 	}
 
@@ -354,34 +368,27 @@ tsvector_setweight_by_filter(PG_FUNCTION_ARGS)
  * Return the number added (might be less than expected due to overflow)
  */
 static int32
-add_pos(TSVector src, WordEntry *srcptr,
-		TSVector dest, WordEntry *destptr,
+add_pos(char *src, WordEntry *srcptr,
+		WordEntryPos *dest, int from,
 		int32 maxpos)
 {
-	uint16	   *clen = &_POSVECPTR(dest, destptr)->npos;
+	uint16		clen = from;
 	int			i;
-	uint16		slen = POSDATALEN(src, srcptr),
-				startlen;
-	WordEntryPos *spos = POSDATAPTR(src, srcptr),
-			   *dpos = POSDATAPTR(dest, destptr);
-
-	if (!destptr->haspos)
-		*clen = 0;
+	uint16		slen = srcptr->npos;
+	WordEntryPos *spos = POSDATAPTR(src, srcptr->len);
 
-	startlen = *clen;
+	Assert(!srcptr->hasoff);
 	for (i = 0;
-		 i < slen && *clen < MAXNUMPOS &&
-		 (*clen == 0 || WEP_GETPOS(dpos[*clen - 1]) != MAXENTRYPOS - 1);
+		 i < slen && clen < MAXNUMPOS &&
+		 (clen == 0 || WEP_GETPOS(dest[clen - 1]) != MAXENTRYPOS - 1);
 		 i++)
 	{
-		WEP_SETWEIGHT(dpos[*clen], WEP_GETWEIGHT(spos[i]));
-		WEP_SETPOS(dpos[*clen], LIMITPOS(WEP_GETPOS(spos[i]) + maxpos));
-		(*clen)++;
+		WEP_SETWEIGHT(dest[clen], WEP_GETWEIGHT(spos[i]));
+		WEP_SETPOS(dest[clen], LIMITPOS(WEP_GETPOS(spos[i]) + maxpos));
+		clen++;
 	}
 
-	if (*clen != startlen)
-		destptr->haspos = 1;
-	return *clen - startlen;
+	return clen - from;
 }
 
 /*
@@ -392,20 +399,20 @@ add_pos(TSVector src, WordEntry *srcptr,
 static int
 tsvector_bsearch(const TSVector tsv, char *lexeme, int lexeme_len)
 {
-	WordEntry  *arrin = ARRPTR(tsv);
 	int			StopLow = 0,
-				StopHigh = tsv->size,
+				StopHigh = TS_COUNT(tsv),
 				StopMiddle,
 				cmp;
 
 	while (StopLow < StopHigh)
 	{
-		StopMiddle = (StopLow + StopHigh) / 2;
+		WordEntry  *entry = NULL;
+		char	   *str;
 
+		StopMiddle = (StopLow + StopHigh) / 2;
+		str = tsvector_getlexeme(tsv, StopMiddle, &entry);
 		cmp = tsCompareString(lexeme, lexeme_len,
-							  STRPTR(tsv) + arrin[StopMiddle].pos,
-							  arrin[StopMiddle].len,
-							  false);
+							  str, entry->len, false);
 
 		if (cmp < 0)
 			StopHigh = StopMiddle;
@@ -460,14 +467,12 @@ tsvector_delete_by_indices(TSVector tsv, int *indices_to_delete,
 						   int indices_count)
 {
 	TSVector	tsout;
-	WordEntry  *arrin = ARRPTR(tsv),
-			   *arrout;
-	char	   *data = STRPTR(tsv),
-			   *dataout;
-	int			i,				/* index in arrin */
-				j,				/* index in arrout */
+	WordEntry  *ptr = ARRPTR(tsv);
+	int			i,				/* index in input tsvector */
+				j,				/* index in output tsvector */
 				k,				/* index in indices_to_delete */
-				curoff;			/* index in dataout area */
+				curoff = 0,		/* index in data area of output */
+				pos;
 
 	/*
 	 * Sort the filter array to simplify membership checks below.  Also, get
@@ -495,16 +500,18 @@ tsvector_delete_by_indices(TSVector tsv, int *indices_to_delete,
 	tsout = (TSVector) palloc0(VARSIZE(tsv));
 
 	/* This count must be correct because STRPTR(tsout) relies on it. */
-	tsout->size = tsv->size - indices_count;
+	TS_SETCOUNT(tsout, TS_COUNT(tsv) - indices_count);
 
 	/*
 	 * Copy tsv to tsout, skipping lexemes listed in indices_to_delete.
 	 */
-	arrout = ARRPTR(tsout);
-	dataout = STRPTR(tsout);
-	curoff = 0;
-	for (i = j = k = 0; i < tsv->size; i++)
+
+	INITPOS(pos);
+	for (i = j = k = 0; i < TS_COUNT(tsv); i++)
 	{
+		char	   *lex = STRPTR(tsv) + pos;
+		int			lex_len = ENTRY_LEN(tsv, ptr);
+
 		/*
 		 * If current i is present in indices_to_delete, skip this lexeme.
 		 * Since indices_to_delete is already sorted, we only need to check
@@ -513,28 +520,14 @@ tsvector_delete_by_indices(TSVector tsv, int *indices_to_delete,
 		if (k < indices_count && i == indices_to_delete[k])
 		{
 			k++;
-			continue;
+			goto next;
 		}
 
-		/* Copy lexeme and its positions and weights */
-		memcpy(dataout + curoff, data + arrin[i].pos, arrin[i].len);
-		arrout[j].haspos = arrin[i].haspos;
-		arrout[j].len = arrin[i].len;
-		arrout[j].pos = curoff;
-		curoff += arrin[i].len;
-		if (arrin[i].haspos)
-		{
-			int			len = POSDATALEN(tsv, arrin + i) * sizeof(WordEntryPos)
-			+ sizeof(uint16);
-
-			curoff = SHORTALIGN(curoff);
-			memcpy(dataout + curoff,
-				   STRPTR(tsv) + SHORTALIGN(arrin[i].pos + arrin[i].len),
-				   len);
-			curoff += len;
-		}
+		tsvector_addlexeme(tsout, j++, &curoff, lex, lex_len,
+						   POSDATAPTR(lex, lex_len), ENTRY_NPOS(tsv, ptr));
 
-		j++;
+next:
+		INCRPTR(tsv, ptr, pos);
 	}
 
 	/*
@@ -543,8 +536,7 @@ tsvector_delete_by_indices(TSVector tsv, int *indices_to_delete,
 	 * estimation of tsout's size is wrong.
 	 */
 	Assert(k == indices_count);
-
-	SET_VARSIZE(tsout, CALCDATASIZE(tsout->size, curoff));
+	SET_VARSIZE(tsout, CALCDATASIZE(TS_COUNT(tsout), curoff));
 	return tsout;
 }
 
@@ -637,6 +629,7 @@ tsvector_unnest(PG_FUNCTION_ARGS)
 {
 	FuncCallContext *funcctx;
 	TSVector	tsin;
+	uint32		pos;
 
 	if (SRF_IS_FIRSTCALL())
 	{
@@ -655,31 +648,33 @@ tsvector_unnest(PG_FUNCTION_ARGS)
 						   TEXTARRAYOID, -1, 0);
 		funcctx->tuple_desc = BlessTupleDesc(tupdesc);
 
-		funcctx->user_fctx = PG_GETARG_TSVECTOR_COPY(0);
+		INITPOS(pos);
+		funcctx->user_fctx = list_make2(PG_GETARG_TSVECTOR(0), makeInteger(pos));
 
 		MemoryContextSwitchTo(oldcontext);
 	}
 
 	funcctx = SRF_PERCALL_SETUP();
-	tsin = (TSVector) funcctx->user_fctx;
+	tsin = (TSVector) linitial(funcctx->user_fctx);
+	pos = intVal(lsecond(funcctx->user_fctx));
 
-	if (funcctx->call_cntr < tsin->size)
+	if (funcctx->call_cntr < TS_COUNT(tsin))
 	{
-		WordEntry  *arrin = ARRPTR(tsin);
+		WordEntry  *entry = ARRPTR(tsin) + funcctx->call_cntr;
 		char	   *data = STRPTR(tsin);
 		HeapTuple	tuple;
 		int			j,
-					i = funcctx->call_cntr;
+					npos = ENTRY_NPOS(tsin, entry),
+					lex_len = ENTRY_LEN(tsin, entry);
 		bool		nulls[] = {false, false, false};
 		Datum		values[3];
 
 		values[0] = PointerGetDatum(
-									cstring_to_text_with_len(data + arrin[i].pos, arrin[i].len)
-			);
+									cstring_to_text_with_len(data + pos, lex_len));
 
-		if (arrin[i].haspos)
+		if (npos)
 		{
-			WordEntryPosVector *posv;
+			WordEntryPos *apos = POSDATAPTR(data + pos, lex_len);
 			Datum	   *positions;
 			Datum	   *weights;
 			char		weight;
@@ -689,28 +684,28 @@ tsvector_unnest(PG_FUNCTION_ARGS)
 			 * uint16 (2 bits for weight, 14 for position). Here we extract
 			 * that in two separate arrays.
 			 */
-			posv = _POSVECPTR(tsin, arrin + i);
-			positions = palloc(posv->npos * sizeof(Datum));
-			weights = palloc(posv->npos * sizeof(Datum));
-			for (j = 0; j < posv->npos; j++)
+			positions = palloc(npos * sizeof(Datum));
+			weights = palloc(npos * sizeof(Datum));
+			for (j = 0; j < npos; j++)
 			{
-				positions[j] = Int16GetDatum(WEP_GETPOS(posv->pos[j]));
-				weight = 'D' - WEP_GETWEIGHT(posv->pos[j]);
+				positions[j] = Int16GetDatum(WEP_GETPOS(apos[j]));
+				weight = 'D' - WEP_GETWEIGHT(apos[j]);
 				weights[j] = PointerGetDatum(
 											 cstring_to_text_with_len(&weight, 1)
 					);
 			}
 
 			values[1] = PointerGetDatum(
-										construct_array(positions, posv->npos, INT2OID, 2, true, 's'));
+										construct_array(positions, npos, INT2OID, 2, true, 's'));
 			values[2] = PointerGetDatum(
-										construct_array(weights, posv->npos, TEXTOID, -1, false, 'i'));
+										construct_array(weights, npos, TEXTOID, -1, false, 'i'));
 		}
 		else
 		{
 			nulls[1] = nulls[2] = true;
 		}
 
+		INCRPTR(tsin, entry, intVal(lsecond(funcctx->user_fctx)));
 		tuple = heap_form_tuple(funcctx->tuple_desc, values, nulls);
 		SRF_RETURN_NEXT(funcctx, HeapTupleGetDatum(tuple));
 	}
@@ -728,27 +723,147 @@ Datum
 tsvector_to_array(PG_FUNCTION_ARGS)
 {
 	TSVector	tsin = PG_GETARG_TSVECTOR(0);
-	WordEntry  *arrin = ARRPTR(tsin);
+	WordEntry  *entry = ARRPTR(tsin);
 	Datum	   *elements;
 	int			i;
 	ArrayType  *array;
+	long		pos;
 
-	elements = palloc(tsin->size * sizeof(Datum));
+	elements = palloc(TS_COUNT(tsin) * sizeof(Datum));
 
-	for (i = 0; i < tsin->size; i++)
+	INITPOS(pos);
+	for (i = 0; i < TS_COUNT(tsin); i++)
 	{
 		elements[i] = PointerGetDatum(
-									  cstring_to_text_with_len(STRPTR(tsin) + arrin[i].pos, arrin[i].len)
-			);
+									  cstring_to_text_with_len(STRPTR(tsin) + pos, ENTRY_LEN(tsin, entry)));
+		INCRPTR(tsin, entry, pos);
 	}
 
-	array = construct_array(elements, tsin->size, TEXTOID, -1, false, 'i');
+	array = construct_array(elements, TS_COUNT(tsin), TEXTOID, -1, false, 'i');
 
 	pfree(elements);
 	PG_FREE_IF_COPY(tsin, 0);
 	PG_RETURN_POINTER(array);
 }
 
+/*
+ * Returns offset by given index in TSVector,
+ * this function used when we need random access
+ */
+int
+tsvector_getoffset(TSVector vec, int idx, WordEntry **we)
+{
+	int			offset = 0;
+	WordEntry  *entry;
+
+	entry = ARRPTR(vec) + idx;
+	if (we)
+		*we = entry;
+
+	while (!entry->hasoff)
+	{
+		entry--;
+		if (!entry->hasoff)
+			offset += SHORTALIGN(entry->len) + entry->npos * sizeof(WordEntryPos);
+	}
+
+	Assert(entry >= ARRPTR(vec));
+
+	if (idx % TS_OFFSET_STRIDE)
+	{
+		/* if idx is by offset */
+		WordEntry  *offset_entry = (WordEntry *) (STRPTR(vec) + entry->offset);
+
+		offset += entry->offset + sizeof(WordEntry);
+		offset += SHORTALIGN(offset_entry->len) + offset_entry->npos * sizeof(WordEntryPos);
+	}
+	else
+	{
+		Assert(entry == ARRPTR(vec) + idx);
+
+		if (we)
+			*we = (WordEntry *) (STRPTR(vec) + entry->offset);
+		offset = entry->offset + sizeof(WordEntry);
+	}
+
+	return offset;
+}
+
+/*
+ * Add lexeme and its positions to tsvector and move dataoff (offset where
+ * data should be added) to new position.
+ * Returns pointer to lexeme start
+ */
+char *
+tsvector_addlexeme(TSVector tsv, int idx, int *dataoff,
+				   char *lexeme, int lexeme_len, WordEntryPos *pos, int npos)
+{
+	int			stroff;
+	WordEntry  *entry;
+	char	   *result;
+
+	/* when idx is 0, dataoff should be 0 too, and otherwise */
+	Assert(!((idx == 0) ^ (*dataoff == 0)));
+
+	stroff = *dataoff;
+	entry = ARRPTR(tsv) + idx;
+
+	if (idx % TS_OFFSET_STRIDE == 0)
+	{
+		/* WordEntry with offset */
+		WordEntry	offentry;
+
+		stroff = INTALIGN(stroff);
+		entry->hasoff = 1;
+		entry->offset = stroff;
+
+		/* fill WordEntry for offset */
+		offentry.hasoff = 0;
+		offentry.len = lexeme_len;
+		offentry.npos = npos;
+		memcpy(STRPTR(tsv) + stroff, &offentry, sizeof(WordEntry));
+		stroff += sizeof(WordEntry);
+	}
+	else
+	{
+		stroff = SHORTALIGN(stroff);
+		entry->hasoff = 0;
+		entry->len = lexeme_len;
+		entry->npos = npos;
+	}
+
+	memcpy(STRPTR(tsv) + stroff, lexeme, lexeme_len);
+	result = STRPTR(tsv) + stroff;
+	stroff += lexeme_len;
+
+	if (npos)
+	{
+		if (npos > 0xFFFF)
+			elog(ERROR, "positions array too long");
+
+		/*
+		 * Pad to 2-byte alignment if necessary. We don't know how memory was
+		 * allocated, so in case of aligning we need to make sure that unused
+		 * is zero.
+		 */
+		if (stroff != SHORTALIGN(stroff))
+		{
+			*(STRPTR(tsv) + stroff) = '\0';
+			stroff = SHORTALIGN(stroff);
+		}
+
+		/* Copy positions */
+		if (pos)
+			memcpy(STRPTR(tsv) + stroff, pos, npos * sizeof(WordEntryPos));
+
+		stroff += npos * sizeof(WordEntryPos);
+	}
+
+	*dataoff = stroff;
+
+	return result;
+}
+
 /*
  * Build tsvector from array of lexemes.
  */
@@ -758,14 +873,13 @@ array_to_tsvector(PG_FUNCTION_ARGS)
 	ArrayType  *v = PG_GETARG_ARRAYTYPE_P(0);
 	TSVector	tsout;
 	Datum	   *dlexemes;
-	WordEntry  *arrout;
 	bool	   *nulls;
 	int			nitems,
 				i,
 				j,
 				tslen,
+				cur = 0,
 				datalen = 0;
-	char	   *cur;
 
 	deconstruct_array(v, TEXTOID, -1, false, 'i', &dlexemes, &nulls, &nitems);
 
@@ -793,26 +907,24 @@ array_to_tsvector(PG_FUNCTION_ARGS)
 
 	/* Calculate space needed for surviving lexemes. */
 	for (i = 0; i < nitems; i++)
-		datalen += VARSIZE(dlexemes[i]) - VARHDRSZ;
+	{
+		int			lex_len = VARSIZE(dlexemes[i]) - VARHDRSZ;
+
+		INCRSIZE(datalen, i, lex_len, 0);
+	}
 	tslen = CALCDATASIZE(nitems, datalen);
 
 	/* Allocate and fill tsvector. */
 	tsout = (TSVector) palloc0(tslen);
 	SET_VARSIZE(tsout, tslen);
-	tsout->size = nitems;
+	TS_SETCOUNT(tsout, nitems);
 
-	arrout = ARRPTR(tsout);
-	cur = STRPTR(tsout);
 	for (i = 0; i < nitems; i++)
 	{
 		char	   *lex = VARDATA(dlexemes[i]);
 		int			lex_len = VARSIZE(dlexemes[i]) - VARHDRSZ;
 
-		memcpy(cur, lex, lex_len);
-		arrout[i].haspos = 0;
-		arrout[i].len = lex_len;
-		arrout[i].pos = cur - STRPTR(tsout);
-		cur += lex_len;
+		tsvector_addlexeme(tsout, i, &cur, lex, lex_len, NULL, 0);
 	}
 
 	PG_FREE_IF_COPY(v, 0);
@@ -828,17 +940,16 @@ tsvector_filter(PG_FUNCTION_ARGS)
 	TSVector	tsin = PG_GETARG_TSVECTOR(0),
 				tsout;
 	ArrayType  *weights = PG_GETARG_ARRAYTYPE_P(1);
-	WordEntry  *arrin = ARRPTR(tsin),
-			   *arrout;
-	char	   *datain = STRPTR(tsin),
-			   *dataout;
+	char	   *dataout;
 	Datum	   *dweights;
 	bool	   *nulls;
 	int			nweights;
 	int			i,
-				j;
-	int			cur_pos = 0;
+				j,
+				dataoff = 0,
+				pos;
 	char		mask = 0;
+	WordEntry  *ptr = ARRPTR(tsin);
 
 	deconstruct_array(weights, CHAROID, 1, true, 'c',
 					  &dweights, &nulls, &nweights);
@@ -879,109 +990,112 @@ tsvector_filter(PG_FUNCTION_ARGS)
 	}
 
 	tsout = (TSVector) palloc0(VARSIZE(tsin));
-	tsout->size = tsin->size;
-	arrout = ARRPTR(tsout);
+	TS_SETCOUNT(tsout, TS_COUNT(tsin));
 	dataout = STRPTR(tsout);
 
-	for (i = j = 0; i < tsin->size; i++)
+	INITPOS(pos);
+	for (i = j = 0; i < TS_COUNT(tsin); i++)
 	{
-		WordEntryPosVector *posvin,
-				   *posvout;
-		int			npos = 0;
-		int			k;
-
-		if (!arrin[i].haspos)
-			continue;
-
-		posvin = _POSVECPTR(tsin, arrin + i);
-		posvout = (WordEntryPosVector *)
-			(dataout + SHORTALIGN(cur_pos + arrin[i].len));
-
-		for (k = 0; k < posvin->npos; k++)
+		WordEntryPos *posin,
+				   *posout;
+		int			k,
+					npos = 0,
+					lex_len = ENTRY_LEN(tsin, ptr);
+		char	   *lex = STRPTR(tsin) + pos,
+				   *lexout;
+
+		posin = POSDATAPTR(lex, lex_len);
+		for (k = 0; k < ENTRY_NPOS(tsin, ptr); k++)
 		{
-			if (mask & (1 << WEP_GETWEIGHT(posvin->pos[k])))
-				posvout->pos[npos++] = posvin->pos[k];
+			if (mask & (1 << WEP_GETWEIGHT(posin[k])))
+				npos++;
 		}
 
-		/* if no satisfactory positions found, skip lexeme */
 		if (!npos)
-			continue;
+			goto next;
 
-		arrout[j].haspos = true;
-		arrout[j].len = arrin[i].len;
-		arrout[j].pos = cur_pos;
+		lexout = tsvector_addlexeme(tsout, j++, &dataoff, lex, lex_len,
+									NULL, npos);
+		posout = POSDATAPTR(lexout, lex_len);
+		npos = 0;
+		for (k = 0; k < ENTRY_NPOS(tsin, ptr); k++)
+		{
+			if (mask & (1 << WEP_GETWEIGHT(posin[k])))
+				posout[npos++] = posin[k];
+		}
 
-		memcpy(dataout + cur_pos, datain + arrin[i].pos, arrin[i].len);
-		posvout->npos = npos;
-		cur_pos += SHORTALIGN(arrin[i].len);
-		cur_pos += POSDATALEN(tsout, arrout + j) * sizeof(WordEntryPos) +
-			sizeof(uint16);
-		j++;
+next:
+		INCRPTR(tsin, ptr, pos);
 	}
 
-	tsout->size = j;
+	TS_SETCOUNT(tsout, j);
 	if (dataout != STRPTR(tsout))
-		memmove(STRPTR(tsout), dataout, cur_pos);
+		memmove(STRPTR(tsout), dataout, dataoff);
 
-	SET_VARSIZE(tsout, CALCDATASIZE(tsout->size, cur_pos));
+	SET_VARSIZE(tsout, CALCDATASIZE(TS_COUNT(tsout), dataoff));
 
 	PG_FREE_IF_COPY(tsin, 0);
 	PG_RETURN_POINTER(tsout);
 }
 
+/* Get max position in in1; we'll need this to offset in2's positions */
+static int
+get_maxpos(TSVector tsv)
+{
+	int			i,
+				j,
+				maxpos = 0;
+	WordEntry  *ptr = ARRPTR(tsv);
+	uint32		pos;
+	WordEntryPos *apos;
+
+	INITPOS(pos);
+	for (i = 0; i < TS_COUNT(tsv); i++)
+	{
+		apos = POSDATAPTR(STRPTR(tsv) + pos, ENTRY_LEN(tsv, ptr));
+		for (j = 0; j < ENTRY_NPOS(tsv, ptr); j++)
+		{
+			if (WEP_GETPOS(apos[j]) > maxpos)
+				maxpos = WEP_GETPOS(apos[j]);
+		}
+
+		INCRPTR(tsv, ptr, pos);
+	}
+
+	return maxpos;
+}
+
 Datum
 tsvector_concat(PG_FUNCTION_ARGS)
 {
-	TSVector	in1 = PG_GETARG_TSVECTOR(0);
-	TSVector	in2 = PG_GETARG_TSVECTOR(1);
-	TSVector	out;
-	WordEntry  *ptr;
-	WordEntry  *ptr1,
+	TSVector	in1 = PG_GETARG_TSVECTOR(0),
+				in2 = PG_GETARG_TSVECTOR(1),
+				out;
+	WordEntry  *ptr,
+			   *ptr1,
 			   *ptr2;
-	WordEntryPos *p;
 	int			maxpos = 0,
 				i,
-				j,
 				i1,
 				i2,
-				dataoff,
 				output_bytes,
-				output_size;
-	char	   *data,
-			   *data1,
-			   *data2;
-
-	/* Get max position in in1; we'll need this to offset in2's positions */
-	ptr = ARRPTR(in1);
-	i = in1->size;
-	while (i--)
-	{
-		if ((j = POSDATALEN(in1, ptr)) != 0)
-		{
-			p = POSDATAPTR(in1, ptr);
-			while (j--)
-			{
-				if (WEP_GETPOS(*p) > maxpos)
-					maxpos = WEP_GETPOS(*p);
-				p++;
-			}
-		}
-		ptr++;
-	}
+				pos1,
+				pos2,
+				dataoff;
+	char	   *data;
 
 	ptr1 = ARRPTR(in1);
 	ptr2 = ARRPTR(in2);
-	data1 = STRPTR(in1);
-	data2 = STRPTR(in2);
-	i1 = in1->size;
-	i2 = in2->size;
+	i1 = TS_COUNT(in1);
+	i2 = TS_COUNT(in2);
 
 	/*
 	 * Conservative estimate of space needed.  We might need all the data in
-	 * both inputs, and conceivably add a pad byte before position data for
-	 * each item where there was none before.
+	 * both inputs, and conceivably add a pad bytes before lexeme and position
+	 * data, and pad bytes before WordEntry for offset entry.
 	 */
-	output_bytes = VARSIZE(in1) + VARSIZE(in2) + i1 + i2;
+	output_bytes = VARSIZE(in1) + VARSIZE(in2) + i1 * 2 + i2 * 2;
+	output_bytes += 4 * (i1 + i2) / TS_OFFSET_STRIDE;
 
 	out = (TSVector) palloc0(output_bytes);
 	SET_VARSIZE(out, output_bytes);
@@ -990,91 +1104,110 @@ tsvector_concat(PG_FUNCTION_ARGS)
 	 * We must make out->size valid so that STRPTR(out) is sensible.  We'll
 	 * collapse out any unused space at the end.
 	 */
-	out->size = in1->size + in2->size;
+	TS_SETCOUNT(out, i1 + i2);
 
-	ptr = ARRPTR(out);
+	ptr = NULL;
 	data = STRPTR(out);
+	i = 0;
 	dataoff = 0;
+
+	INITPOS(pos1);
+	INITPOS(pos2);
+
+	/*
+	 * we will need max position from first tsvector to add it positions of
+	 * second tsvector
+	 */
+	maxpos = get_maxpos(in1);
+
 	while (i1 && i2)
 	{
-		int			cmp = compareEntry(data1, ptr1, data2, ptr2);
+		char	   *lex = STRPTR(in1) + pos1,
+				   *lex2 = STRPTR(in2) + pos2;
+
+		int			lex_len = ENTRY_LEN(in1, ptr1),
+					lex2_len = ENTRY_LEN(in2, ptr2);
+
+		int			cmp = tsCompareString(lex, lex_len, lex2, lex2_len, false);
 
 		if (cmp < 0)
 		{						/* in1 first */
-			ptr->haspos = ptr1->haspos;
-			ptr->len = ptr1->len;
-			memcpy(data + dataoff, data1 + ptr1->pos, ptr1->len);
-			ptr->pos = dataoff;
-			dataoff += ptr1->len;
-			if (ptr->haspos)
-			{
-				dataoff = SHORTALIGN(dataoff);
-				memcpy(data + dataoff, _POSVECPTR(in1, ptr1), POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16));
-				dataoff += POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16);
-			}
+			tsvector_addlexeme(out, i, &dataoff,
+							   lex, lex_len,
+							   POSDATAPTR(lex, lex_len), ENTRY_NPOS(in1, ptr1));
 
-			ptr++;
-			ptr1++;
+			INCRPTR(in1, ptr1, pos1);
 			i1--;
+			i++;
 		}
 		else if (cmp > 0)
 		{						/* in2 first */
-			ptr->haspos = ptr2->haspos;
-			ptr->len = ptr2->len;
-			memcpy(data + dataoff, data2 + ptr2->pos, ptr2->len);
-			ptr->pos = dataoff;
-			dataoff += ptr2->len;
-			if (ptr->haspos)
+			char	   *new_lex;
+			WordEntry  *we = UNWRAP_ENTRY(in2, ptr2);
+
+			new_lex = tsvector_addlexeme(out, i, &dataoff, lex2, lex2_len, NULL, 0);
+			if (we->npos > 0)
 			{
-				int			addlen = add_pos(in2, ptr2, out, ptr, maxpos);
+				int			addlen;
+				WordEntryPos *apos = POSDATAPTR(new_lex, lex2_len);
 
-				if (addlen == 0)
-					ptr->haspos = 0;
-				else
+				addlen = add_pos(lex2, we, apos, 0, maxpos);
+				if (addlen > 0)
 				{
+					ptr = UNWRAP_ENTRY(out, ARRPTR(out) + i);
+					ptr->npos = addlen;
 					dataoff = SHORTALIGN(dataoff);
-					dataoff += addlen * sizeof(WordEntryPos) + sizeof(uint16);
+					dataoff += ptr->npos * sizeof(WordEntryPos);
 				}
 			}
 
-			ptr++;
-			ptr2++;
+			INCRPTR(in2, ptr2, pos2);
+			i++;
 			i2--;
 		}
 		else
 		{
-			ptr->haspos = ptr1->haspos | ptr2->haspos;
-			ptr->len = ptr1->len;
-			memcpy(data + dataoff, data1 + ptr1->pos, ptr1->len);
-			ptr->pos = dataoff;
-			dataoff += ptr1->len;
-			if (ptr->haspos)
+			char	   *new_lex;
+			int			npos1 = ENTRY_NPOS(in1, ptr1),
+						npos2 = ENTRY_NPOS(in2, ptr2);
+			WordEntryPos *apos;
+
+			new_lex = tsvector_addlexeme(out, i, &dataoff, lex, lex_len, NULL, 0);
+			apos = POSDATAPTR(new_lex, lex_len);
+
+			if (npos1 || npos2)
 			{
-				if (ptr1->haspos)
-				{
-					dataoff = SHORTALIGN(dataoff);
-					memcpy(data + dataoff, _POSVECPTR(in1, ptr1), POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16));
-					dataoff += POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16);
-					if (ptr2->haspos)
-						dataoff += add_pos(in2, ptr2, out, ptr, maxpos) * sizeof(WordEntryPos);
-				}
-				else			/* must have ptr2->haspos */
+				int			addlen;
+				char	   *lex2 = STRPTR(in2) + pos2;
+
+				ptr = UNWRAP_ENTRY(out, ARRPTR(out) + i);
+				if (npos1)
 				{
-					int			addlen = add_pos(in2, ptr2, out, ptr, maxpos);
+					/* add positions from left tsvector */
+					addlen = add_pos(lex, UNWRAP_ENTRY(in1, ptr1), apos, 0, 0);
+					ptr->npos = addlen;
 
-					if (addlen == 0)
-						ptr->haspos = 0;
-					else
+					if (npos2)
 					{
-						dataoff = SHORTALIGN(dataoff);
-						dataoff += addlen * sizeof(WordEntryPos) + sizeof(uint16);
+						/* add positions from right right tsvector */
+						addlen = add_pos(lex2, UNWRAP_ENTRY(in2, ptr2), apos, addlen, maxpos);
+						ptr->npos += addlen;
 					}
 				}
+				else			/* npos in second should be > 0 */
+				{
+					/* add positions from right tsvector */
+					addlen = add_pos(lex2, UNWRAP_ENTRY(in2, ptr2), apos, 0, maxpos);
+					ptr->npos = addlen;
+				}
+
+				dataoff = SHORTALIGN(dataoff);
+				dataoff += ptr->npos * sizeof(WordEntryPos);
 			}
 
-			ptr++;
-			ptr1++;
-			ptr2++;
+			INCRPTR(in1, ptr1, pos1);
+			INCRPTR(in2, ptr2, pos2);
+			i++;
 			i1--;
 			i2--;
 		}
@@ -1082,45 +1215,44 @@ tsvector_concat(PG_FUNCTION_ARGS)
 
 	while (i1)
 	{
-		ptr->haspos = ptr1->haspos;
-		ptr->len = ptr1->len;
-		memcpy(data + dataoff, data1 + ptr1->pos, ptr1->len);
-		ptr->pos = dataoff;
-		dataoff += ptr1->len;
-		if (ptr->haspos)
-		{
-			dataoff = SHORTALIGN(dataoff);
-			memcpy(data + dataoff, _POSVECPTR(in1, ptr1), POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16));
-			dataoff += POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16);
-		}
+		char	   *lex = STRPTR(in1) + pos1;
+		int			lex_len = ENTRY_LEN(in1, ptr1);
 
-		ptr++;
-		ptr1++;
+		tsvector_addlexeme(out, i, &dataoff,
+						   lex, lex_len,
+						   POSDATAPTR(lex, lex_len), ENTRY_NPOS(in1, ptr1));
+
+		INCRPTR(in1, ptr1, pos1);
+		i++;
 		i1--;
 	}
 
 	while (i2)
 	{
-		ptr->haspos = ptr2->haspos;
-		ptr->len = ptr2->len;
-		memcpy(data + dataoff, data2 + ptr2->pos, ptr2->len);
-		ptr->pos = dataoff;
-		dataoff += ptr2->len;
-		if (ptr->haspos)
+		char	   *lex = STRPTR(in2) + pos2,
+				   *new_lex;
+		int			lex_len = ENTRY_LEN(in2, ptr2),
+					npos = ENTRY_NPOS(in2, ptr2);
+
+		new_lex = tsvector_addlexeme(out, i, &dataoff, lex, lex_len, NULL, 0);
+		if (npos > 0)
 		{
-			int			addlen = add_pos(in2, ptr2, out, ptr, maxpos);
+			int			addlen;
+			WordEntryPos *apos = POSDATAPTR(new_lex, lex_len);
 
-			if (addlen == 0)
-				ptr->haspos = 0;
-			else
+			addlen = add_pos(lex, UNWRAP_ENTRY(in2, ptr2), apos, 0, maxpos);
+			if (addlen > 0)
 			{
+				WordEntry  *ptr = UNWRAP_ENTRY(out, ARRPTR(out) + i);
+
+				ptr->npos = addlen;
 				dataoff = SHORTALIGN(dataoff);
-				dataoff += addlen * sizeof(WordEntryPos) + sizeof(uint16);
+				dataoff += npos * sizeof(WordEntryPos);
 			}
 		}
 
-		ptr++;
-		ptr2++;
+		INCRPTR(in2, ptr2, pos2);
+		i++;
 		i2--;
 	}
 
@@ -1137,12 +1269,10 @@ tsvector_concat(PG_FUNCTION_ARGS)
 	 * Adjust sizes (asserting that we didn't overrun the original estimates)
 	 * and collapse out any unused array entries.
 	 */
-	output_size = ptr - ARRPTR(out);
-	Assert(output_size <= out->size);
-	out->size = output_size;
+	TS_SETCOUNT(out, i);
 	if (data != STRPTR(out))
 		memmove(STRPTR(out), data, dataoff);
-	output_bytes = CALCDATASIZE(out->size, dataoff);
+	output_bytes = CALCDATASIZE(TS_COUNT(out), dataoff);
 	Assert(output_bytes <= VARSIZE(out));
 	SET_VARSIZE(out, output_bytes);
 
@@ -1194,35 +1324,26 @@ tsCompareString(char *a, int lena, char *b, int lenb, bool prefix)
  * Check weight info or/and fill 'data' with the required positions
  */
 static bool
-checkclass_str(CHKVAL *chkval, WordEntry *entry, QueryOperand *val,
+checkclass_str(WordEntryPos *pv, int npos, QueryOperand *val,
 			   ExecPhraseData *data)
 {
 	bool		result = false;
 
-	if (entry->haspos && (val->weight || data))
+	if (npos && (val->weight || data))
 	{
-		WordEntryPosVector *posvec;
-
-		/*
-		 * We can't use the _POSVECPTR macro here because the pointer to the
-		 * tsvector's lexeme storage is already contained in chkval->values.
-		 */
-		posvec = (WordEntryPosVector *)
-			(chkval->values + SHORTALIGN(entry->pos + entry->len));
-
 		if (val->weight && data)
 		{
-			WordEntryPos *posvec_iter = posvec->pos;
+			WordEntryPos *posvec_iter = pv;
 			WordEntryPos *dptr;
 
 			/*
 			 * Filter position information by weights
 			 */
-			dptr = data->pos = palloc(sizeof(WordEntryPos) * posvec->npos);
+			dptr = data->pos = palloc(sizeof(WordEntryPos) * npos);
 			data->allocated = true;
 
 			/* Is there a position with a matching weight? */
-			while (posvec_iter < posvec->pos + posvec->npos)
+			while (posvec_iter < (pv + npos))
 			{
 				/* If true, append this position to the data->pos */
 				if (val->weight & (1 << WEP_GETWEIGHT(*posvec_iter)))
@@ -1241,10 +1362,10 @@ checkclass_str(CHKVAL *chkval, WordEntry *entry, QueryOperand *val,
 		}
 		else if (val->weight)
 		{
-			WordEntryPos *posvec_iter = posvec->pos;
+			WordEntryPos *posvec_iter = pv;
 
 			/* Is there a position with a matching weight? */
-			while (posvec_iter < posvec->pos + posvec->npos)
+			while (posvec_iter < (pv + npos))
 			{
 				if (val->weight & (1 << WEP_GETWEIGHT(*posvec_iter)))
 				{
@@ -1257,8 +1378,8 @@ checkclass_str(CHKVAL *chkval, WordEntry *entry, QueryOperand *val,
 		}
 		else					/* data != NULL */
 		{
-			data->npos = posvec->npos;
-			data->pos = posvec->pos;
+			data->npos = npos;
+			data->pos = pv;
 			data->allocated = false;
 			result = true;
 		}
@@ -1311,26 +1432,32 @@ static bool
 checkcondition_str(void *checkval, QueryOperand *val, ExecPhraseData *data)
 {
 	CHKVAL	   *chkval = (CHKVAL *) checkval;
-	WordEntry  *StopLow = chkval->arrb;
-	WordEntry  *StopHigh = chkval->arre;
-	WordEntry  *StopMiddle = StopHigh;
+	int			StopLow = chkval->bidx;
+	int			StopHigh = chkval->eidx;
+	int			StopMiddle = StopHigh;
 	int			difference = -1;
 	bool		res = false;
+	char	   *lexeme;
+	WordEntry  *entry;
 
 	/* Loop invariant: StopLow <= val < StopHigh */
 	while (StopLow < StopHigh)
 	{
 		StopMiddle = StopLow + (StopHigh - StopLow) / 2;
+		lexeme = tsvector_getlexeme(chkval->vec, StopMiddle, &entry);
+
+		Assert(!entry->hasoff);
 		difference = tsCompareString(chkval->operand + val->distance,
 									 val->length,
-									 chkval->values + StopMiddle->pos,
-									 StopMiddle->len,
+									 lexeme,
+									 entry->len,
 									 false);
 
 		if (difference == 0)
 		{
 			/* Check weight info & fill 'data' with positions */
-			res = checkclass_str(chkval, StopMiddle, val, data);
+			res = checkclass_str(POSDATAPTR(lexeme, entry->len),
+								 entry->npos, val, data);
 			break;
 		}
 		else if (difference > 0)
@@ -1352,19 +1479,31 @@ checkcondition_str(void *checkval, QueryOperand *val, ExecPhraseData *data)
 		if (StopLow >= StopHigh)
 			StopMiddle = StopHigh;
 
-		while ((!res || data) && StopMiddle < chkval->arre &&
-			   tsCompareString(chkval->operand + val->distance,
-							   val->length,
-							   chkval->values + StopMiddle->pos,
-							   StopMiddle->len,
-							   true) == 0)
+		while ((!res || data) && StopMiddle < chkval->eidx)
 		{
+			char	   *lexeme;
+			int			cmp;
+			WordEntryPos *pv;
+
+			lexeme = tsvector_getlexeme(chkval->vec, StopMiddle, &entry);
+
+			Assert(!entry->hasoff);
+			pv = POSDATAPTR(lexeme, entry->len);
+			cmp = tsCompareString(chkval->operand + val->distance,
+								  val->length,
+								  lexeme,
+								  entry->len,
+								  true);
+
+			if (cmp != 0)
+				break;
+
 			if (data)
 			{
 				/*
 				 * We need to join position information
 				 */
-				res = checkclass_str(chkval, StopMiddle, val, data);
+				res = checkclass_str(pv, entry->npos, val, data);
 
 				if (res)
 				{
@@ -1388,7 +1527,7 @@ checkcondition_str(void *checkval, QueryOperand *val, ExecPhraseData *data)
 			}
 			else
 			{
-				res = checkclass_str(chkval, StopMiddle, val, NULL);
+				res = checkclass_str(pv, entry->npos, val, NULL);
 			}
 
 			StopMiddle++;
@@ -1935,9 +2074,9 @@ ts_match_vq(PG_FUNCTION_ARGS)
 		PG_RETURN_BOOL(false);
 	}
 
-	chkval.arrb = ARRPTR(val);
-	chkval.arre = chkval.arrb + val->size;
-	chkval.values = STRPTR(val);
+	chkval.bidx = 0;
+	chkval.eidx = TS_COUNT(val);
+	chkval.vec = val;
 	chkval.operand = GETOPERAND(query);
 	result = TS_execute(GETQUERY(query),
 						&chkval,
@@ -2001,12 +2140,15 @@ ts_match_tq(PG_FUNCTION_ARGS)
  * that have a weight equal to one of the weights in 'weight' bitmask.
  */
 static int
-check_weight(TSVector txt, WordEntry *wptr, int8 weight)
+check_weight(char *lexeme, WordEntry *wptr, int8 weight)
 {
-	int			len = POSDATALEN(txt, wptr);
+	int			len;
 	int			num = 0;
-	WordEntryPos *ptr = POSDATAPTR(txt, wptr);
+	WordEntryPos *ptr;
 
+	Assert(!wptr->hasoff);
+	len = wptr->len;
+	ptr = POSDATAPTR(lexeme, len);
 	while (len--)
 	{
 		if (weight & (1 << WEP_GETWEIGHT(*ptr)))
@@ -2017,31 +2159,34 @@ check_weight(TSVector txt, WordEntry *wptr, int8 weight)
 }
 
 #define compareStatWord(a,e,t)							\
-	tsCompareString((a)->lexeme, (a)->lenlexeme,		\
-					STRPTR(t) + (e)->pos, (e)->len,		\
-					false)
+	(tsCompareString((a)->lexeme, (a)->lenlexeme,		\
+					t, (e)->len, false))
 
 static void
 insertStatEntry(MemoryContext persistentContext, TSVectorStat *stat, TSVector txt, uint32 off)
 {
-	WordEntry  *we = ARRPTR(txt) + off;
+	WordEntry  *we;
 	StatEntry  *node = stat->root,
 			   *pnode = NULL;
 	int			n,
 				res = 0;
 	uint32		depth = 1;
+	char	   *lexeme;
+
+	lexeme = tsvector_getlexeme(txt, off, &we);
 
+	Assert(!we->hasoff);
 	if (stat->weight == 0)
-		n = (we->haspos) ? POSDATALEN(txt, we) : 1;
+		n = (we->npos) ? we->npos : 1;
 	else
-		n = (we->haspos) ? check_weight(txt, we, stat->weight) : 0;
+		n = (we->npos) ? check_weight(lexeme, we, stat->weight) : 0;
 
 	if (n == 0)
 		return;					/* nothing to insert */
 
 	while (node)
 	{
-		res = compareStatWord(node, we, txt);
+		res = compareStatWord(node, we, lexeme);
 
 		if (res == 0)
 		{
@@ -2065,7 +2210,7 @@ insertStatEntry(MemoryContext persistentContext, TSVectorStat *stat, TSVector tx
 		node->ndoc = 1;
 		node->nentry = n;
 		node->lenlexeme = we->len;
-		memcpy(node->lexeme, STRPTR(txt) + we->pos, node->lenlexeme);
+		memcpy(node->lexeme, lexeme, node->lenlexeme);
 
 		if (pnode == NULL)
 		{
@@ -2092,13 +2237,14 @@ chooseNextStatEntry(MemoryContext persistentContext, TSVectorStat *stat, TSVecto
 					uint32 low, uint32 high, uint32 offset)
 {
 	uint32		pos;
-	uint32		middle = (low + high) >> 1;
+	uint32		middle = (low + high) >> 1,
+				count = TS_COUNT(txt);
 
 	pos = (low + middle) >> 1;
-	if (low != middle && pos >= offset && pos - offset < txt->size)
+	if (low != middle && pos >= offset && pos - offset < count)
 		insertStatEntry(persistentContext, stat, txt, pos - offset);
 	pos = (high + middle + 1) >> 1;
-	if (middle + 1 != high && pos >= offset && pos - offset < txt->size)
+	if (middle + 1 != high && pos >= offset && pos - offset < count)
 		insertStatEntry(persistentContext, stat, txt, pos - offset);
 
 	if (low != middle)
@@ -2125,7 +2271,8 @@ ts_accum(MemoryContext persistentContext, TSVectorStat *stat, Datum data)
 	TSVector	txt = DatumGetTSVector(data);
 	uint32		i,
 				nbit = 0,
-				offset;
+				offset,
+				count = TS_COUNT(txt);
 
 	if (stat == NULL)
 	{							/* Init in first */
@@ -2134,19 +2281,19 @@ ts_accum(MemoryContext persistentContext, TSVectorStat *stat, Datum data)
 	}
 
 	/* simple check of correctness */
-	if (txt == NULL || txt->size == 0)
+	if (txt == NULL || count == 0)
 	{
 		if (txt && txt != (TSVector) DatumGetPointer(data))
 			pfree(txt);
 		return stat;
 	}
 
-	i = txt->size - 1;
+	i = count - 1;
 	for (; i > 0; i >>= 1)
 		nbit++;
 
 	nbit = 1 << nbit;
-	offset = (nbit - txt->size) / 2;
+	offset = (nbit - count) / 2;
 
 	insertStatEntry(persistentContext, stat, txt, (nbit >> 1) - offset);
 	chooseNextStatEntry(persistentContext, stat, txt, 0, nbit, offset);
@@ -2579,15 +2726,28 @@ tsvector_update_trigger(PG_FUNCTION_ARGS, bool config_column)
 	}
 
 	/* make tsvector value */
-	datum = TSVectorGetDatum(make_tsvector(&prs));
-	isnull = false;
-
-	/* and insert it into tuple */
-	rettuple = heap_modify_tuple_by_cols(rettuple, rel->rd_att,
-										 1, &tsvector_attr_num,
-										 &datum, &isnull);
-
-	pfree(DatumGetPointer(datum));
+	if (prs.curwords)
+	{
+		datum = PointerGetDatum(make_tsvector(&prs));
+		isnull = false;
+		rettuple = heap_modify_tuple_by_cols(rettuple, rel->rd_att,
+											 1, &tsvector_attr_num,
+											 &datum, &isnull);
+		pfree(DatumGetPointer(datum));
+	}
+	else
+	{
+		TSVector	out = palloc(CALCDATASIZE(0, 0));
+
+		SET_VARSIZE(out, CALCDATASIZE(0, 0));
+		TS_SETCOUNT(out, 0);
+		datum = PointerGetDatum(out);
+		isnull = false;
+		rettuple = heap_modify_tuple_by_cols(rettuple, rel->rd_att,
+											 1, &tsvector_attr_num,
+											 &datum, &isnull);
+		pfree(prs.words);
+	}
 
 	return PointerGetDatum(rettuple);
 }
diff --git a/src/include/tsearch/ts_type.h b/src/include/tsearch/ts_type.h
index 30d7c4bccd..eb94c595f2 100644
--- a/src/include/tsearch/ts_type.h
+++ b/src/include/tsearch/ts_type.h
@@ -24,30 +24,40 @@
  * 2) int32		size - number of lexemes (WordEntry array entries)
  * 3) Array of WordEntry - one per lexeme; must be sorted according to
  *				tsCompareString() (ie, memcmp of lexeme strings).
- *				WordEntry->pos gives the number of bytes from end of WordEntry
- *				array to start of lexeme's string, which is of length len.
+ *	  WordEntry have two types: offset or metadata (length of lexeme and number
+ *	  of positions). If it has offset then metadata will be by this offset.
  * 4) Per-lexeme data storage:
- *	  lexeme string (not null-terminated)
- *	  if haspos is true:
+ *    [4-byte aligned WordEntry] (if its WordEntry has offset)
+ *	  2-byte aligned lexeme string (not null-terminated)
+ *	  if it has positions:
  *		padding byte if necessary to make the position data 2-byte aligned
- *		uint16			number of positions that follow
  *		WordEntryPos[]	positions
  *
  * The positions for each lexeme must be sorted.
  *
- * Note, tsvectorsend/recv believe that sizeof(WordEntry) == 4
+ * Note, tsvector functions believe that sizeof(WordEntry) == 4
  */
 
-typedef struct
+#define TS_OFFSET_STRIDE 4
+
+typedef union
 {
-	uint32
-				haspos:1,
-				len:11,			/* MAX 2Kb */
-				pos:20;			/* MAX 1Mb */
+	struct
+	{
+		uint32		hasoff:1,
+					offset:31;
+	};
+	struct
+	{
+		uint32		hasoff_:1,
+					len:11,
+					npos:16,
+					_unused:4;
+	};
 } WordEntry;
 
 #define MAXSTRLEN ( (1<<11) - 1)
-#define MAXSTRPOS ( (1<<20) - 1)
+#define MAXSTRPOS ( (1<<30) - 1)
 
 extern int	compareWordEntryPos(const void *a, const void *b);
 
@@ -62,19 +72,6 @@ extern int	compareWordEntryPos(const void *a, const void *b);
 
 typedef uint16 WordEntryPos;
 
-typedef struct
-{
-	uint16		npos;
-	WordEntryPos pos[FLEXIBLE_ARRAY_MEMBER];
-} WordEntryPosVector;
-
-/* WordEntryPosVector with exactly 1 entry */
-typedef struct
-{
-	uint16		npos;
-	WordEntryPos pos[1];
-} WordEntryPosVector1;
-
 
 #define WEP_GETWEIGHT(x)	( (x) >> 14 )
 #define WEP_GETPOS(x)		( (x) & 0x3fff )
@@ -90,13 +87,17 @@ typedef struct
 typedef struct
 {
 	int32		vl_len_;		/* varlena header (do not touch directly!) */
-	int32		size;
+	int32		size_;			/* flags and lexemes count */
 	WordEntry	entries[FLEXIBLE_ARRAY_MEMBER];
 	/* lexemes follow the entries[] array */
 } TSVectorData;
 
 typedef TSVectorData *TSVector;
 
+#define TS_FLAG_STRETCHED 0x80000000
+#define TS_COUNT(t) ((t)->size_ & 0x0FFFFFFF)
+#define TS_SETCOUNT(t,c) ((t)->size_ = (c) | TS_FLAG_STRETCHED)
+
 #define DATAHDRSIZE (offsetof(TSVectorData, entries))
 #define CALCDATASIZE(nentries, lenstr) (DATAHDRSIZE + (nentries) * sizeof(WordEntry) + (lenstr) )
 
@@ -104,24 +105,65 @@ typedef TSVectorData *TSVector;
 #define ARRPTR(x)	( (x)->entries )
 
 /* pointer to start of a tsvector's lexeme storage */
-#define STRPTR(x)	( (char *) &(x)->entries[(x)->size] )
+#define STRPTR(x)	( (char *) &(x)->entries[TS_COUNT(x)] )
 
-#define _POSVECPTR(x, e)	((WordEntryPosVector *)(STRPTR(x) + SHORTALIGN((e)->pos + (e)->len)))
-#define POSDATALEN(x,e) ( ( (e)->haspos ) ? (_POSVECPTR(x,e)->npos) : 0 )
-#define POSDATAPTR(x,e) (_POSVECPTR(x,e)->pos)
+/* for WordEntry with offset return its WordEntry with other properties */
+#define UNWRAP_ENTRY(x,we) \
+	((we)->hasoff? (WordEntry *)(STRPTR(x) + (we)->offset): (we))
+
+/*
+ * helpers used when we're not sure that WordEntry
+ * contains ether offset or len
+ */
+#define ENTRY_NPOS(x,we) (UNWRAP_ENTRY(x,we)->npos)
+#define ENTRY_LEN(x,we) (UNWRAP_ENTRY(x,we)->len)
+
+/* pointer to start of positions */
+#define POSDATAPTR(lex, len) ((WordEntryPos *) (lex + SHORTALIGN(len)))
+
+/* set default offset in tsvector data */
+#define INITPOS(p) ((p) = sizeof(WordEntry))
+
+/* increment entry and offset by given WordEntry */
+#define INCRPTR(x,w,p) \
+do { \
+	WordEntry *y = (w);									\
+	if ((w)->hasoff)									\
+	{													\
+		y = (WordEntry *) (STRPTR(x) + (w)->offset);	\
+		(p) = (w)->offset + sizeof(WordEntry);			\
+	}													\
+	(w)++;												\
+	Assert(!y->hasoff);									\
+	(p) += SHORTALIGN(y->len) + y->npos * sizeof(WordEntryPos); \
+	if ((w) - ARRPTR(x) < TS_COUNT(x) && w->hasoff)		\
+		(p) = INTALIGN(p) + sizeof(WordEntry);			\
+} while (0);
+
+/* used to calculate tsvector size in in tsvector constructors */
+#define INCRSIZE(s,i,l,n) /* size,index,len,npos */		\
+do {													\
+	if ((i) % TS_OFFSET_STRIDE == 0)					\
+		(s) = INTALIGN(s) + sizeof(WordEntry);			\
+	else												\
+		(s) = SHORTALIGN(s);							\
+	(s) += (l);											\
+	(s) = (n)? SHORTALIGN(s) + (n) * sizeof(WordEntryPos) : (s);	\
+} while (0);
 
 /*
  * fmgr interface macros
  */
 
-#define DatumGetTSVector(X)			((TSVector) PG_DETOAST_DATUM(X))
-#define DatumGetTSVectorCopy(X)		((TSVector) PG_DETOAST_DATUM_COPY(X))
+TSVector	tsvector_upgrade(Datum orig, bool copy);
+
+#define DatumGetTSVector(X)			tsvector_upgrade((X), false)
+#define DatumGetTSVectorCopy(X)		tsvector_upgrade((X), true)
 #define TSVectorGetDatum(X)			PointerGetDatum(X)
 #define PG_GETARG_TSVECTOR(n)		DatumGetTSVector(PG_GETARG_DATUM(n))
 #define PG_GETARG_TSVECTOR_COPY(n)	DatumGetTSVectorCopy(PG_GETARG_DATUM(n))
 #define PG_RETURN_TSVECTOR(x)		return TSVectorGetDatum(x)
 
-
 /*
  * TSQuery
  *
@@ -239,4 +281,22 @@ typedef TSQueryData *TSQuery;
 #define PG_GETARG_TSQUERY_COPY(n)	DatumGetTSQueryCopy(PG_GETARG_DATUM(n))
 #define PG_RETURN_TSQUERY(x)		return TSQueryGetDatum(x)
 
+int			tsvector_getoffset(TSVector vec, int idx, WordEntry **we);
+char *tsvector_addlexeme(TSVector tsv, int idx, int *dataoff,
+				   char *lexeme, int lexeme_len, WordEntryPos *pos, int npos);
+
+/* Returns lexeme and its entry by given index from TSVector */
+inline static char *
+tsvector_getlexeme(TSVector vec, int idx, WordEntry **we)
+{
+	Assert(idx >= 0 && idx < TS_COUNT(vec));
+
+	/*
+	 * we do not allow we == NULL because returned lexeme is not \0 ended, and
+	 * always should be used with we->len
+	 */
+	Assert(we != NULL);
+	return STRPTR(vec) + tsvector_getoffset(vec, idx, we);
+}
+
 #endif							/* _PG_TSTYPE_H_ */

#14

Ildus Kurbangaliev

i.kurbangaliev@postgrespro.ru

over 8 years ago

In reply to: Alexander Korotkov (#11)

Re: Remove 1MB size limit in tsvector

On Thu, 10 Aug 2017 18:06:17 +0300
Alexander Korotkov <a.korotkov@postgrespro.ru> wrote:

On Wed, Aug 9, 2017 at 7:38 PM, Robert Haas <robertmhaas@gmail.com>
wrote:

On Tue, Aug 1, 2017 at 4:00 PM, Ildus K
<i.kurbangaliev@postgrespro.ru> wrote:

It's a workaround. DatumGetTSVector and
DatumGetTSVectorCopy will upgrade tsvector on the fly if it
has old format.

Hmm, that seems like a real fix, not just a workaround. If you can
transparently read the old format, there's no problem. Not sure
about performance, though.

+1
Ildus, I think we need to benchmark reading of the old format. There
would be tradeoff between performance of old format reading and
amount of extra code needed. Once we will have benchmarks we can
consider whether this is the solution we would like to buy.

In my benchmarks when database fits into buffers (so it's measurement of
the time required for the tsvectors conversion) it gives me these
results:

Without conversion:

$ ./tsbench2 -database test1 -bench_time 300
2017/08/17 12:04:44 Number of connections: 4
2017/08/17 12:04:44 Database: test1
2017/08/17 12:09:44 Processed: 51419

With conversion:

$ ./tsbench2 -database test1 -bench_time 300
2017/08/17 12:14:31 Number of connections: 4
2017/08/17 12:14:31 Database: test1
2017/08/17 12:19:31 Processed: 43607

I ran a bunch of these tests, and these results are stable on my
machine. So in these specific tests performance regression about 15%.

Same time I think this could be the worst case, because usually data
is on disk and conversion will not affect so much to performance.

--
---
Ildus Kurbangaliev
Postgres Professional: http://www.postgrespro.com
Russian Postgres Company

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

#15

Tomas Vondra

tomas.vondra@2ndquadrant.com

over 8 years ago

In reply to: Ildus Kurbangaliev (#14)

Re: Remove 1MB size limit in tsvector

Hi,

On 08/17/2017 12:23 PM, Ildus Kurbangaliev wrote:

In my benchmarks when database fits into buffers (so it's measurement of
the time required for the tsvectors conversion) it gives me these
results:

Without conversion:

$ ./tsbench2 -database test1 -bench_time 300
2017/08/17 12:04:44 Number of connections: 4
2017/08/17 12:04:44 Database: test1
2017/08/17 12:09:44 Processed: 51419

With conversion:

$ ./tsbench2 -database test1 -bench_time 300
2017/08/17 12:14:31 Number of connections: 4
2017/08/17 12:14:31 Database: test1
2017/08/17 12:19:31 Processed: 43607

I ran a bunch of these tests, and these results are stable on my
machine. So in these specific tests performance regression about 15%.

Same time I think this could be the worst case, because usually data
is on disk and conversion will not affect so much to performance.

That seems like a fairly significant regression, TBH. I don't quite
agree we can simply assume in-memory workloads don't matter, plenty of
databases have 99% cache hit ratio (particularly when considering not
just shared buffers, but also page cache).

Can you share the benchmarks, so that others can retry running them?

regards

--
Tomas Vondra http://www.2ndQuadrant.com
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

#16

Ildus Kurbangaliev

i.kurbangaliev@postgrespro.ru

over 8 years ago

In reply to: Tomas Vondra (#15)

Re: Remove 1MB size limit in tsvector

On Thu, 7 Sep 2017 23:08:14 +0200
Tomas Vondra <tomas.vondra@2ndquadrant.com> wrote:

Hi,

On 08/17/2017 12:23 PM, Ildus Kurbangaliev wrote:

In my benchmarks when database fits into buffers (so it's
measurement of the time required for the tsvectors conversion) it
gives me these results:

Without conversion:

$ ./tsbench2 -database test1 -bench_time 300
2017/08/17 12:04:44 Number of connections: 4
2017/08/17 12:04:44 Database: test1
2017/08/17 12:09:44 Processed: 51419

With conversion:

$ ./tsbench2 -database test1 -bench_time 300
2017/08/17 12:14:31 Number of connections: 4
2017/08/17 12:14:31 Database: test1
2017/08/17 12:19:31 Processed: 43607

I ran a bunch of these tests, and these results are stable on my
machine. So in these specific tests performance regression about
15%.

Same time I think this could be the worst case, because usually data
is on disk and conversion will not affect so much to performance.

That seems like a fairly significant regression, TBH. I don't quite
agree we can simply assume in-memory workloads don't matter, plenty of
databases have 99% cache hit ratio (particularly when considering not
just shared buffers, but also page cache).

I think part of this regression is caused by better compression of new
format. I can't say exact percent here, need to check with perf.

If you care about performace, you create indexes, which means that
tsvector will no longer be used for text search (except for ORDER BY
rank). Index machinery will only peek into tsquery. Moreover, RUM index
stores positions + lexemes, so it doesn't need tsvectors for ranked
search. As a result, tsvector becomes a storage for
building indexes (indexable type), not something that should be used at
runtime. And the change of the format doesn't affect index creation
time.

Can you share the benchmarks, so that others can retry running them?

Benchmarks are published at github:
https://github.com/ildus/tsbench . I'm not sure that they are easy to
use.

Best regards,
Ildus Kurbangaliev

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

#17

Robert Haas

robertmhaas@gmail.com

over 8 years ago

In reply to: Ildus Kurbangaliev (#16)

Re: Remove 1MB size limit in tsvector

On Mon, Sep 11, 2017 at 5:33 AM, Ildus Kurbangaliev
<i.kurbangaliev@postgrespro.ru> wrote:

Moreover, RUM index
stores positions + lexemes, so it doesn't need tsvectors for ranked
search. As a result, tsvector becomes a storage for
building indexes (indexable type), not something that should be used at
runtime. And the change of the format doesn't affect index creation
time.

RUM indexes, though, are not in core.

--
Robert Haas
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

#18

Tomas Vondra

tomas.vondra@2ndquadrant.com

over 8 years ago

In reply to: Robert Haas (#17)

Re: Remove 1MB size limit in tsvector

On 09/11/2017 01:54 PM, Robert Haas wrote:

On Mon, Sep 11, 2017 at 5:33 AM, Ildus Kurbangaliev
<i.kurbangaliev@postgrespro.ru> wrote:

Moreover, RUM index
stores positions + lexemes, so it doesn't need tsvectors for ranked
search. As a result, tsvector becomes a storage for
building indexes (indexable type), not something that should be used at
runtime. And the change of the format doesn't affect index creation
time.

RUM indexes, though, are not in core.

Yeah, but I think Ildus has a point that this should not really matter
on indexed tsvectors. So the question is how realistic that benchmark
actually is. How likely are we to do queries on fts directly, not
through a GIN/GiST index? Particularly in performance-sensitive cases?

regards

--
Tomas Vondra http://www.2ndQuadrant.com
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

#19

Michael Paquier

michael.paquier@gmail.com

about 8 years ago

In reply to: Tomas Vondra (#18)

Re: [HACKERS] Remove 1MB size limit in tsvector

On Mon, Sep 11, 2017 at 9:51 PM, Tomas Vondra
<tomas.vondra@2ndquadrant.com> wrote:

On 09/11/2017 01:54 PM, Robert Haas wrote:

On Mon, Sep 11, 2017 at 5:33 AM, Ildus Kurbangaliev
<i.kurbangaliev@postgrespro.ru> wrote:

Moreover, RUM index
stores positions + lexemes, so it doesn't need tsvectors for ranked
search. As a result, tsvector becomes a storage for
building indexes (indexable type), not something that should be used at
runtime. And the change of the format doesn't affect index creation
time.

RUM indexes, though, are not in core.

Yeah, but I think Ildus has a point that this should not really matter
on indexed tsvectors. So the question is how realistic that benchmark
actually is. How likely are we to do queries on fts directly, not
through a GIN/GiST index? Particularly in performance-sensitive cases?

So many questions unanswered... I am marking the patch as returned
with feedback as the thread has stalled for two months now.
--
Michael