diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml new file mode 100644 index 60b9a09..62ac7fb *** a/doc/src/sgml/func.sgml --- b/doc/src/sgml/func.sgml *************** CREATE TYPE rainbow AS ENUM ('red', 'ora *** 9096,9101 **** --- 9096,9113 ---- + setweight_by_filter + + setweight(tsvector, "char", "text"[]) + + tsvector + assign weight to elements of tsvector that are listed in array given as a third argument + setweight('fat:2,4 cat:3 rat:5B'::tsvector, 'A', '{cat,rat}') + 'cat':3A 'fat':2,4 'rat':5A + + + + strip strip(tsvector) *************** CREATE TYPE rainbow AS ENUM ('red', 'ora *** 9108,9113 **** --- 9120,9188 ---- + delete + + delete(tsvector, text) + + tsvector + remove entry from tsvector + delete('fat:2,4 cat:3 rat:5A'::tsvector, 'fat') + 'cat':3 'rat':5A + + + + + unnest + + unnest(tsvector) + + setof anyelement + expand a tsvector to a set of rows. Each row has following columns: lexeme, postings, weights. + unnest('fat:2,4 cat:3 rat:5A'::tsvector) + cat {3} {A} + fat {2,4} {D,D} + rat {5} {A} + (3 rows) + + + + + to_array + + to_array(tsvector) + + text[] + convert tsvector to array of lexemes + to_array('fat:2,4 cat:3 rat:5A'::tsvector) + {cat,fat,rat} + + + + + array_to_tsvector + + to_tsvector(text[]) + + tsvector + convert array of lexemes to tsvector + to_tsvector('{fat,cat,rat}'::text[]) + 'fat' 'cat' 'rat' + + + + + filter + + filter(tsvector, "char"[]) + + tsvector + Select only elements with given weights from tsvector + filter('fat:2,4 cat:3b rat:5A'::tsvector, '{a,b}') + 'cat':3B 'rat':5A + + + + to_tsquery to_tsquery( config regconfig , query text) diff --git a/src/backend/utils/adt/tsvector_op.c b/src/backend/utils/adt/tsvector_op.c new file mode 100644 index e822ba8..05beaa9 *** a/src/backend/utils/adt/tsvector_op.c --- b/src/backend/utils/adt/tsvector_op.c *************** *** 14,19 **** --- 14,20 ---- #include "postgres.h" + #include "access/htup_details.h" #include "catalog/namespace.h" #include "catalog/pg_type.h" #include "commands/trigger.h" *************** tsvector_length(PG_FUNCTION_ARGS) *** 195,200 **** --- 196,294 ---- } Datum + tsvector_setweight_by_filter(PG_FUNCTION_ARGS) + { + TSVector in = PG_GETARG_TSVECTOR(0); + char cw = PG_GETARG_CHAR(1); + ArrayType *lexarr = NULL; + TSVector out; + int i, + j, + nlex = 0, + lex_len, + w = 0, + StopLow, StopHigh, StopMiddle, cmp;; + WordEntry *entry; + WordEntryPos *p; + Datum *dlexemes; + bool *nulls; + char *data, + *lex; + + switch (cw) + { + case 'A': + case 'a': + w = 3; + break; + case 'B': + case 'b': + w = 2; + break; + case 'C': + case 'c': + w = 1; + break; + case 'D': + case 'd': + w = 0; + break; + default: + /* internal error */ + elog(ERROR, "unrecognized weight: %c", cw); + } + + out = (TSVector) palloc(VARSIZE(in)); + memcpy(out, in, VARSIZE(in)); + entry = ARRPTR(out); + + lexarr = PG_GETARG_ARRAYTYPE_P(2); + deconstruct_array(lexarr, TEXTOID, -1, false, 'i', + &dlexemes, &nulls, &nlex); + data = STRPTR(out); + + /* + * Assuming that lexarr is significantly shorter than tsvector + * we can iterate through lexarr performing binary search + * of each lexeme from lexarr in tsvector. + */ + for (i = 0; i < nlex; i++) + { + lex = VARDATA(dlexemes[i]); + lex_len = VARSIZE_ANY_EXHDR(dlexemes[i]); + + StopLow = 0; + StopHigh = out->size; + while (StopLow < StopHigh) + { + StopMiddle = (StopLow + StopHigh)/2; + cmp = tsCompareString(lex, lex_len, + data + entry[StopMiddle].pos, entry[StopMiddle].len, false); + + if (cmp < 0) + StopHigh = StopMiddle; + else if (cmp > 0) + StopLow = StopMiddle + 1; + else /* found it */ + break; + } + + if (StopLow < StopHigh && (j = POSDATALEN(out, entry + StopMiddle)) != 0 ) + { + p = POSDATAPTR(out, entry + StopMiddle); + while (j--) + { + WEP_SETWEIGHT(*p, w); + p++; + } + } + } + + PG_FREE_IF_COPY(in, 0); + PG_RETURN_POINTER(out); + } + + Datum tsvector_setweight(PG_FUNCTION_ARGS) { TSVector in = PG_GETARG_TSVECTOR(0); *************** tsvector_setweight(PG_FUNCTION_ARGS) *** 226,232 **** break; default: /* internal error */ ! elog(ERROR, "unrecognized weight: %d", cw); } out = (TSVector) palloc(VARSIZE(in)); --- 320,326 ---- break; default: /* internal error */ ! elog(ERROR, "unrecognized weight: %c", cw); } out = (TSVector) palloc(VARSIZE(in)); *************** add_pos(TSVector src, WordEntry *srcptr, *** 291,296 **** --- 385,719 ---- return *clen - startlen; } + Datum + tsvector_delete(PG_FUNCTION_ARGS) + { + TSVector tsin = PG_GETARG_TSVECTOR(0), + tsout; + WordEntry *arrin = ARRPTR(tsin), + *arrout; + char *lexin = VARDATA(PG_GETARG_TEXT_P(1)), + *data, + *cur; + int i, + j, + StopLow, StopHigh, StopMiddle, cmp, + lexin_len = VARSIZE_ANY_EXHDR(PG_GETARG_TEXT_P(1)), + shrink_len, + skip_index = -1, + curoff = 0, + len = 0; + + data = STRPTR(tsin); + + /* lexemes are sorted, so we can use binary search */ + StopLow = 0; + StopHigh = tsin->size; + while (StopLow < StopHigh) + { + StopMiddle = (StopLow + StopHigh)/2; + cmp = tsCompareString(lexin, lexin_len, + data + arrin[StopMiddle].pos, arrin[StopMiddle].len, false); + + if (cmp < 0) + StopHigh = StopMiddle; + else if (cmp > 0) + StopLow = StopMiddle + 1; + else /* found it */ + break; + } + + if (StopLow >= StopHigh) + PG_RETURN_POINTER(tsin); + else + skip_index = StopMiddle; + + shrink_len = sizeof(WordEntry) + arrin[skip_index].len; + + if (arrin[skip_index].haspos) + shrink_len += sizeof(uint16) + + POSDATALEN(tsin, arrin+skip_index) * sizeof(WordEntryPos); + + tsout = (TSVector) palloc0(VARSIZE(tsin) - shrink_len); + SET_VARSIZE(tsout, VARSIZE(tsin) - shrink_len); + tsout->size = tsin->size - 1; + arrout = ARRPTR(tsout); + + cur = STRPTR(tsout); + for (i = 0, j = 0; i < tsin->size; i++) + { + if (i == skip_index) + continue; + + memcpy(cur + curoff, data + arrin[i].pos, arrin[i].len); + arrout[j].haspos = arrin[i].haspos; + arrout[j].len = arrin[i].len; + arrout[j].pos = curoff; + + curoff += arrin[i].len; + + if (arrin[i].haspos) + { + curoff = SHORTALIGN(curoff); + len = POSDATALEN(tsin, arrin+i) * sizeof(WordEntryPos) + sizeof(uint16); + memcpy(cur + curoff, (STRPTR(tsin) + SHORTALIGN(arrin[i].pos + arrin[i].len)), len); + curoff += len; + } + + j++; + } + + PG_FREE_IF_COPY(tsin, 0); + PG_RETURN_POINTER(tsout); + } + + Datum + tsvector_unnest(PG_FUNCTION_ARGS) + { + FuncCallContext *funcctx; + TSVector tsin = PG_GETARG_TSVECTOR(0); + WordEntry *arrin = ARRPTR(tsin); + char *data; + + if (SRF_IS_FIRSTCALL()) + { + MemoryContext oldcontext; + TupleDesc tupdesc; + + funcctx = SRF_FIRSTCALL_INIT(); + oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx); + + tupdesc = CreateTemplateTupleDesc(3, false); + TupleDescInitEntry(tupdesc, (AttrNumber) 1, "lexeme", + TEXTOID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 2, "postings", + INT2ARRAYOID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 3, "weights", + TEXTARRAYOID, -1, 0); + + funcctx->tuple_desc = BlessTupleDesc(tupdesc); + MemoryContextSwitchTo(oldcontext); + } + + funcctx = SRF_PERCALL_SETUP(); + + data = STRPTR(tsin); + if (funcctx->call_cntr < tsin->size) + { + WordEntryPosVector *posv; + HeapTuple tuple; + int j, + i = funcctx->call_cntr; + bool nulls[] = {false, false, false}; + Datum values[3]; + Datum *positions; + Datum *weights; + char weight; + + values[0] = PointerGetDatum(cstring_to_text_with_len(data + arrin[i].pos, arrin[i].len)); + + if (arrin[i].haspos) + { + posv = (WordEntryPosVector *)(STRPTR(tsin) + SHORTALIGN(arrin[i].pos+arrin[i].len)); + + positions = palloc(posv->npos * sizeof(Datum)); + weights = palloc(posv->npos * sizeof(Datum)); + for (j = 0; j < posv->npos; j++) + { + positions[j] = Int16GetDatum(WEP_GETPOS(posv->pos[j])); + weight = (WEP_GETWEIGHT(posv->pos[j]) >> 2) ? + 'D' : 'D' - WEP_GETWEIGHT(posv->pos[j]); + weights[j] = PointerGetDatum(cstring_to_text_with_len(&weight, 1)); + } + values[1] = PointerGetDatum(construct_array(positions, posv->npos, INT2OID, 2, true, 's')); + values[2] = PointerGetDatum(construct_array(weights, posv->npos, TEXTOID, -1, false, 'i')); + } + else + { + values[1] = PointerGetDatum(construct_array(NULL, 0, INT2OID, 2, true, 's')); + values[2] = PointerGetDatum(construct_array(NULL, 0, TEXTOID, -1, false, 'i')); + } + + tuple = heap_form_tuple(funcctx->tuple_desc, values, nulls); + PG_FREE_IF_COPY(tsin, 0); + SRF_RETURN_NEXT(funcctx, HeapTupleGetDatum(tuple)); + } + else + SRF_RETURN_DONE(funcctx); + } + + Datum + tsvector_to_array(PG_FUNCTION_ARGS) + { + TSVector tsin = PG_GETARG_TSVECTOR(0); + WordEntry *arrin = ARRPTR(tsin); + Datum elements[tsin->size]; + int i; + ArrayType *array; + + for (i = 0; i < tsin->size; i++) + { + elements[i] = PointerGetDatum( + cstring_to_text_with_len(STRPTR(tsin) + arrin[i].pos, arrin[i].len)); + } + array = construct_array(elements, tsin->size, TEXTOID, -1, false, 'i'); + PG_FREE_IF_COPY(tsin, 0); + PG_RETURN_POINTER(array); + } + + Datum + array_to_tsvector(PG_FUNCTION_ARGS) + { + ArrayType *v = PG_GETARG_ARRAYTYPE_P(0); + TSVector tsout; + Datum *dlexemes; + WordEntry *arrout; + bool *nulls; + int nitems, + i, + tslen, + *lexlens, + lexlen = 0; + char *cur, + **lexemes; + + deconstruct_array(v, TEXTOID, -1, false, 'i', + &dlexemes, &nulls, &nitems); + + lexemes = (char **) palloc(nitems * sizeof(char *)); + lexlens = palloc(nitems * sizeof(int)); + + for (i = 0; i < nitems; i++) + { + text *lextext = DatumGetTextP(dlexemes[i]); + + lexemes[i] = VARDATA(lextext); + lexlens[i] = VARSIZE(lextext) - VARHDRSZ; + lexlen += lexlens[i]; + } + + tslen = CALCDATASIZE(nitems, lexlen); + tsout = (TSVector) palloc0(tslen); + SET_VARSIZE(tsout, tslen); + tsout->size = nitems; + arrout = ARRPTR(tsout); + cur = STRPTR(tsout); + + for (i = 0; i < nitems; i++) + { + memcpy(cur, lexemes[i], lexlens[i]); + arrout[i].haspos = 0; + arrout[i].len = lexlens[i]; + arrout[i].pos = cur - STRPTR(tsout); + cur += lexlens[i]; + } + + PG_FREE_IF_COPY(v, 0); + PG_RETURN_POINTER(tsout); + } + + Datum + tsvector_filter(PG_FUNCTION_ARGS) + { + TSVector tsin = PG_GETARG_TSVECTOR(0); + TSVector tsout; + ArrayType *weights = PG_GETARG_ARRAYTYPE_P(1); + Datum *dweights; + bool *nulls; + int nweigths, + npos; + int i, j, k; + char cw, + mask = 0, + cur_pos, + *datain, + *dataout; + WordEntry *arrin, + *arrout; + WordEntryPosVector *posvin, + *posvout; + bool lexeme2copy; + + deconstruct_array(weights, CHAROID, 1, true, 'c', + &dweights, &nulls, &nweigths); + + for (i = 0; i < nweigths; i++) + { + cw = DatumGetChar(dweights[i]); + switch (cw) + { + case 'A': + case 'a': + mask = mask | 8; + break; + case 'B': + case 'b': + mask = mask | 4; + break; + case 'C': + case 'c': + mask = mask | 2; + break; + case 'D': + case 'd': + mask = mask | 1; + break; + default: + /* internal error */ + elog(ERROR, "unrecognized weight: %c", cw); + } + } + + arrin = ARRPTR(tsin); + datain = STRPTR(tsin); + + tsout = (TSVector) palloc0(VARSIZE(tsin)); + tsout->size = tsin->size; + + arrout = ARRPTR(tsout); + dataout = STRPTR(tsout); + + cur_pos = 0; + for (i = 0, k = 0; i < tsin->size; i++) + { + if (!arrin[i].haspos) + continue; + + npos = 0; + lexeme2copy = false; + posvin = (WordEntryPosVector *)(datain + SHORTALIGN(arrin[i].pos+arrin[i].len)); + posvout = (WordEntryPosVector *)(dataout + SHORTALIGN(cur_pos + arrin[i].len)); + + for (j = 0; j < posvin->npos; j++) + { + if (mask & (1 << WEP_GETWEIGHT(posvin->pos[j]))) + posvout->pos[npos++] = posvin->pos[j]; + } + + if (!npos) /* no satisfactory postings found, so skip that lexeme */ + continue; + + arrout[k].haspos = true; + arrout[k].len = arrin[i].len; + arrout[k].pos = cur_pos; + + memcpy(dataout + cur_pos, datain + arrin[i].pos, arrin[i].len); + posvout->npos = npos; + + cur_pos += SHORTALIGN(arrin[i].len); + cur_pos += POSDATALEN(tsout, arrout+k) * sizeof(WordEntryPos) + sizeof(uint16); + k++; + } + + tsout->size = k; + if (dataout != STRPTR(tsout)) + memmove(STRPTR(tsout), dataout, cur_pos); + + SET_VARSIZE(tsout, CALCDATASIZE(tsout->size, cur_pos)); + + PG_FREE_IF_COPY(tsin, 0); + PG_RETURN_POINTER(tsout); + } Datum tsvector_concat(PG_FUNCTION_ARGS) diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h new file mode 100644 index d8640db..28df115 *** a/src/include/catalog/pg_proc.h --- b/src/include/catalog/pg_proc.h *************** DATA(insert OID = 3623 ( strip PGNS *** 4576,4582 **** DESCR("strip position information"); DATA(insert OID = 3624 ( setweight PGNSP PGUID 12 1 0 0 0 f f f f t f i s 2 0 3614 "3614 18" _null_ _null_ _null_ _null_ _null_ tsvector_setweight _null_ _null_ _null_ )); DESCR("set weight of lexeme's entries"); ! DATA(insert OID = 3625 ( tsvector_concat PGNSP PGUID 12 1 0 0 0 f f f f t f i s 2 0 3614 "3614 3614" _null_ _null_ _null_ _null_ _null_ tsvector_concat _null_ _null_ _null_ )); DATA(insert OID = 3634 ( ts_match_vq PGNSP PGUID 12 1 0 0 0 f f f f t f i s 2 0 16 "3614 3615" _null_ _null_ _null_ _null_ _null_ ts_match_vq _null_ _null_ _null_ )); DATA(insert OID = 3635 ( ts_match_qv PGNSP PGUID 12 1 0 0 0 f f f f t f i s 2 0 16 "3615 3614" _null_ _null_ _null_ _null_ _null_ ts_match_qv _null_ _null_ _null_ )); --- 4576,4594 ---- DESCR("strip position information"); DATA(insert OID = 3624 ( setweight PGNSP PGUID 12 1 0 0 0 f f f f t f i s 2 0 3614 "3614 18" _null_ _null_ _null_ _null_ _null_ tsvector_setweight _null_ _null_ _null_ )); DESCR("set weight of lexeme's entries"); ! DATA(insert OID = 3320 ( setweight PGNSP PGUID 12 1 0 0 0 f f f f t f i s 3 0 3614 "3614 18 1009" _null_ _null_ _null_ _null_ _null_ tsvector_setweight_by_filter _null_ _null_ _null_ )); ! DESCR("set weight of lexeme's entries"); ! DATA(insert OID = 3625 ( tsvector_concat PGNSP PGUID 12 1 0 0 0 f f f f t f i s 2 0 3614 "3614 3614" _null_ _null_ _null_ _null_ _null_ tsvector_concat _null_ _null_ _null_ )); ! DATA(insert OID = 3315 ( delete PGNSP PGUID 12 1 0 0 0 f f f f t f i s 2 0 3614 "3614 25" _null_ _null_ _null_ _null_ _null_ tsvector_delete _null_ _null_ _null_ )); ! DESCR("delete lexeme"); ! DATA(insert OID = 3316 ( unnest PGNSP PGUID 12 1 10 0 0 f f f f t t i s 1 0 2249 "3614" "{3614,25,1005,1009}" "{i,o,o,o}" "{tsvector,lexeme,postings,weights}" _null_ _null_ tsvector_unnest _null_ _null_ _null_ )); ! DESCR("expand tsvector to set of rows"); ! DATA(insert OID = 3317 ( to_array PGNSP PGUID 12 1 0 0 0 f f f f t f i s 1 0 1009 "3614" _null_ _null_ _null_ _null_ _null_ tsvector_to_array _null_ _null_ _null_ )); ! DESCR("convert to lexeme's array"); ! DATA(insert OID = 3318 ( to_tsvector PGNSP PGUID 12 1 0 0 0 f f f f t f i s 1 0 3614 "1009" _null_ _null_ _null_ _null_ _null_ array_to_tsvector _null_ _null_ _null_ )); ! DESCR("build tsvector from lexeme's array"); ! DATA(insert OID = 3319 ( filter PGNSP PGUID 12 1 0 0 0 f f f f t f i s 2 0 3614 "3614 1002" _null_ _null_ _null_ _null_ _null_ tsvector_filter _null_ _null_ _null_ )); ! DESCR("returns tsvector that contain only postings with given weights"); DATA(insert OID = 3634 ( ts_match_vq PGNSP PGUID 12 1 0 0 0 f f f f t f i s 2 0 16 "3614 3615" _null_ _null_ _null_ _null_ _null_ ts_match_vq _null_ _null_ _null_ )); DATA(insert OID = 3635 ( ts_match_qv PGNSP PGUID 12 1 0 0 0 f f f f t f i s 2 0 16 "3615 3614" _null_ _null_ _null_ _null_ _null_ ts_match_qv _null_ _null_ _null_ )); diff --git a/src/include/tsearch/ts_type.h b/src/include/tsearch/ts_type.h new file mode 100644 index 281cdd6..78d3d0d *** a/src/include/tsearch/ts_type.h --- b/src/include/tsearch/ts_type.h *************** extern Datum tsvector_cmp(PG_FUNCTION_AR *** 141,147 **** --- 141,154 ---- extern Datum tsvector_length(PG_FUNCTION_ARGS); extern Datum tsvector_strip(PG_FUNCTION_ARGS); extern Datum tsvector_setweight(PG_FUNCTION_ARGS); + extern Datum tsvector_setweight_by_filter(PG_FUNCTION_ARGS); extern Datum tsvector_concat(PG_FUNCTION_ARGS); + extern Datum tsvector_delete(PG_FUNCTION_ARGS); + extern Datum tsvector_unnest(PG_FUNCTION_ARGS); + extern Datum tsvector_to_array(PG_FUNCTION_ARGS); + extern Datum array_to_tsvector(PG_FUNCTION_ARGS); + extern Datum tsvector_filter(PG_FUNCTION_ARGS); + extern Datum tsvector_update_trigger_byid(PG_FUNCTION_ARGS); extern Datum tsvector_update_trigger_bycolumn(PG_FUNCTION_ARGS); diff --git a/src/test/regress/expected/tstypes.out b/src/test/regress/expected/tstypes.out new file mode 100644 index 6284fb6..a02a56b *** a/src/test/regress/expected/tstypes.out --- b/src/test/regress/expected/tstypes.out *************** SELECT 'a:3A b:2a'::tsvector || 'ba:1234 *** 83,100 **** 'a':3A,4B 'b':2A 'ba':1237 (1 row) - SELECT setweight('w:12B w:13* w:12,5,6 a:1,3* a:3 w asd:1dc asd zxc:81,567,222A'::tsvector, 'c'); - setweight - ---------------------------------------------------------- - 'a':1C,3C 'asd':1C 'w':5C,6C,12C,13C 'zxc':81C,222C,567C - (1 row) - - SELECT strip('w:12B w:13* w:12,5,6 a:1,3* a:3 w asd:1dc asd'::tsvector); - strip - --------------- - 'a' 'asd' 'w' - (1 row) - --Base tsquery test SELECT '1'::tsquery; tsquery --- 83,88 ---- *************** SELECT ts_rank_cd(' a:1 s:2 d g'::tsvect *** 625,627 **** --- 613,774 ---- 0.1 (1 row) + -- tsvector editing operations + SELECT strip('w:12B w:13* w:12,5,6 a:1,3* a:3 w asd:1dc asd'::tsvector); + strip + --------------- + 'a' 'asd' 'w' + (1 row) + + SELECT strip('base:7 hidden:6 rebel:1 spaceship:2,33A,34B,35C,36D strike:3'::tsvector); + strip + ---------------------------------------------- + 'base' 'hidden' 'rebel' 'spaceship' 'strike' + (1 row) + + SELECT strip('base hidden rebel spaceship strike'::tsvector); + strip + ---------------------------------------------- + 'base' 'hidden' 'rebel' 'spaceship' 'strike' + (1 row) + + SELECT delete(to_tsvector('english', 'Rebel spaceships, striking from a hidden base'), 'spaceship'); + delete + ------------------------------------------ + 'base':7 'hidden':6 'rebel':1 'strike':3 + (1 row) + + SELECT delete('base:7 hidden:6 rebel:1 spaceship:2,33A,34B,35C,36D strike:3'::tsvector, 'base'); + delete + -------------------------------------------------------------- + 'hidden':6 'rebel':1 'spaceship':2,33A,34B,35C,36 'strike':3 + (1 row) + + SELECT delete('base:7 hidden:6 rebel:1 spaceship:2,33A,34B,35C,36D strike:3'::tsvector, 'spaceship'); + delete + ------------------------------------------ + 'base':7 'hidden':6 'rebel':1 'strike':3 + (1 row) + + SELECT delete('base hidden rebel spaceship strike'::tsvector, 'spaceship'); + delete + ---------------------------------- + 'base' 'hidden' 'rebel' 'strike' + (1 row) + + SELECT unnest('base:7 hidden:6 rebel:1 spaceship:2,33A,34B,35C,36D strike:3'::tsvector); + unnest + --------------------------------------------- + (base,{7},{D}) + (hidden,{6},{D}) + (rebel,{1},{D}) + (spaceship,"{2,33,34,35,36}","{D,A,B,C,D}") + (strike,{3},{D}) + (5 rows) + + SELECT unnest('base hidden rebel spaceship strike'::tsvector); + unnest + ------------------- + (base,{},{}) + (hidden,{},{}) + (rebel,{},{}) + (spaceship,{},{}) + (strike,{},{}) + (5 rows) + + SELECT * FROM unnest('base:7 hidden:6 rebel:1 spaceship:2,33A,34B,35C,36D strike:3'::tsvector); + lexeme | postings | weights + -----------+-----------------+------------- + base | {7} | {D} + hidden | {6} | {D} + rebel | {1} | {D} + spaceship | {2,33,34,35,36} | {D,A,B,C,D} + strike | {3} | {D} + (5 rows) + + SELECT * FROM unnest('base hidden rebel spaceship strike'::tsvector); + lexeme | postings | weights + -----------+----------+--------- + base | {} | {} + hidden | {} | {} + rebel | {} | {} + spaceship | {} | {} + strike | {} | {} + (5 rows) + + SELECT lexeme, postings[1] from unnest('base:7 hidden:6 rebel:1 spaceship:2,33A,34B,35C,36D strike:3'::tsvector); + lexeme | postings + -----------+---------- + base | 7 + hidden | 6 + rebel | 1 + spaceship | 2 + strike | 3 + (5 rows) + + SELECT to_array('base:7 hidden:6 rebel:1 spaceship:2,33A,34B,35C,36D strike:3'::tsvector); + to_array + -------------------------------------- + {base,hidden,rebel,spaceship,strike} + (1 row) + + SELECT to_array('base hidden rebel spaceship strike'::tsvector); + to_array + -------------------------------------- + {base,hidden,rebel,spaceship,strike} + (1 row) + + SELECT to_tsvector(ARRAY['base','hidden','rebel','spaceship','strike']); + to_tsvector + ---------------------------------------------- + 'base' 'hidden' 'rebel' 'spaceship' 'strike' + (1 row) + + SELECT setweight('w:12B w:13* w:12,5,6 a:1,3* a:3 w asd:1dc asd zxc:81,567,222A'::tsvector, 'c'); + setweight + ---------------------------------------------------------- + 'a':1C,3C 'asd':1C 'w':5C,6C,12C,13C 'zxc':81C,222C,567C + (1 row) + + SELECT setweight('a:1,3A asd:1C w:5,6,12B,13A zxc:81,222A,567'::tsvector, 'c'); + setweight + ---------------------------------------------------------- + 'a':1C,3C 'asd':1C 'w':5C,6C,12C,13C 'zxc':81C,222C,567C + (1 row) + + SELECT setweight('a:1,3A asd:1C w:5,6,12B,13A zxc:81,222A,567'::tsvector, 'c', '{a}'); + setweight + ------------------------------------------------------ + 'a':1C,3C 'asd':1C 'w':5,6,12B,13A 'zxc':81,222A,567 + (1 row) + + SELECT setweight('a:1,3A asd:1C w:5,6,12B,13A zxc:81,222A,567'::tsvector, 'c', '{a}'); + setweight + ------------------------------------------------------ + 'a':1C,3C 'asd':1C 'w':5,6,12B,13A 'zxc':81,222A,567 + (1 row) + + SELECT setweight('a:1,3A asd:1C w:5,6,12B,13A zxc:81,222A,567'::tsvector, 'c', '{a,zxc}'); + setweight + -------------------------------------------------------- + 'a':1C,3C 'asd':1C 'w':5,6,12B,13A 'zxc':81C,222C,567C + (1 row) + + SELECT setweight('a asd w:5,6,12B,13A zxc'::tsvector, 'c', '{a,zxc}'); + setweight + --------------------------------- + 'a' 'asd' 'w':5,6,12B,13A 'zxc' + (1 row) + + SELECT filter('base:7A empir:17 evil:15 first:11 galact:16 hidden:6A rebel:1A spaceship:2A strike:3A victori:12 won:9'::tsvector, '{a}'); + filter + ------------------------------------------------------------- + 'base':7A 'hidden':6A 'rebel':1A 'spaceship':2A 'strike':3A + (1 row) + + SELECT filter('base hidden rebel spaceship strike'::tsvector, '{a}'); + filter + -------- + + (1 row) + diff --git a/src/test/regress/sql/tstypes.sql b/src/test/regress/sql/tstypes.sql new file mode 100644 index fd7c702..2288bbd *** a/src/test/regress/sql/tstypes.sql --- b/src/test/regress/sql/tstypes.sql *************** SELECT $$'\\as' ab\c ab\\c AB\\\c ab\\\\ *** 14,21 **** SELECT tsvectorin(tsvectorout($$'\\as' ab\c ab\\c AB\\\c ab\\\\c$$::tsvector)); SELECT '''w'':4A,3B,2C,1D,5 a:8'; SELECT 'a:3A b:2a'::tsvector || 'ba:1234 a:1B'; - SELECT setweight('w:12B w:13* w:12,5,6 a:1,3* a:3 w asd:1dc asd zxc:81,567,222A'::tsvector, 'c'); - SELECT strip('w:12B w:13* w:12,5,6 a:1,3* a:3 w asd:1dc asd'::tsvector); --Base tsquery test SELECT '1'::tsquery; --- 14,19 ---- *************** SELECT ts_rank_cd(' a:1 s:2 d g'::tsvect *** 115,117 **** --- 113,148 ---- SELECT ts_rank_cd(' a:1 s:2C d g'::tsvector, 'a & s'); SELECT ts_rank_cd(' a:1 s:2B d g'::tsvector, 'a & s'); SELECT ts_rank_cd(' a:1 s:2 d g'::tsvector, 'a & s'); + + -- tsvector editing operations + + SELECT strip('w:12B w:13* w:12,5,6 a:1,3* a:3 w asd:1dc asd'::tsvector); + SELECT strip('base:7 hidden:6 rebel:1 spaceship:2,33A,34B,35C,36D strike:3'::tsvector); + SELECT strip('base hidden rebel spaceship strike'::tsvector); + + SELECT delete(to_tsvector('english', 'Rebel spaceships, striking from a hidden base'), 'spaceship'); + SELECT delete('base:7 hidden:6 rebel:1 spaceship:2,33A,34B,35C,36D strike:3'::tsvector, 'base'); + SELECT delete('base:7 hidden:6 rebel:1 spaceship:2,33A,34B,35C,36D strike:3'::tsvector, 'spaceship'); + SELECT delete('base hidden rebel spaceship strike'::tsvector, 'spaceship'); + + SELECT unnest('base:7 hidden:6 rebel:1 spaceship:2,33A,34B,35C,36D strike:3'::tsvector); + SELECT unnest('base hidden rebel spaceship strike'::tsvector); + SELECT * FROM unnest('base:7 hidden:6 rebel:1 spaceship:2,33A,34B,35C,36D strike:3'::tsvector); + SELECT * FROM unnest('base hidden rebel spaceship strike'::tsvector); + SELECT lexeme, postings[1] from unnest('base:7 hidden:6 rebel:1 spaceship:2,33A,34B,35C,36D strike:3'::tsvector); + + SELECT to_array('base:7 hidden:6 rebel:1 spaceship:2,33A,34B,35C,36D strike:3'::tsvector); + SELECT to_array('base hidden rebel spaceship strike'::tsvector); + + SELECT to_tsvector(ARRAY['base','hidden','rebel','spaceship','strike']); + + SELECT setweight('w:12B w:13* w:12,5,6 a:1,3* a:3 w asd:1dc asd zxc:81,567,222A'::tsvector, 'c'); + SELECT setweight('a:1,3A asd:1C w:5,6,12B,13A zxc:81,222A,567'::tsvector, 'c'); + SELECT setweight('a:1,3A asd:1C w:5,6,12B,13A zxc:81,222A,567'::tsvector, 'c', '{a}'); + SELECT setweight('a:1,3A asd:1C w:5,6,12B,13A zxc:81,222A,567'::tsvector, 'c', '{a}'); + SELECT setweight('a:1,3A asd:1C w:5,6,12B,13A zxc:81,222A,567'::tsvector, 'c', '{a,zxc}'); + SELECT setweight('a asd w:5,6,12B,13A zxc'::tsvector, 'c', '{a,zxc}'); + + SELECT filter('base:7A empir:17 evil:15 first:11 galact:16 hidden:6A rebel:1A spaceship:2A strike:3A victori:12 won:9'::tsvector, '{a}'); + SELECT filter('base hidden rebel spaceship strike'::tsvector, '{a}'); +