From 49880be8a3acec3b950eba7eba6b23185111c12c Mon Sep 17 00:00:00 2001 From: Alexander Korotkov Date: Sun, 2 May 2021 20:40:20 +0300 Subject: [PATCH] Make websearch_to_tsquery() parse text in quotes as a single token --- src/backend/access/gist/gist.c | 14 +++-- src/backend/access/gist/gistbuild.c | 2 +- src/backend/access/gist/gistutil.c | 2 +- src/backend/utils/adt/tsquery.c | 83 ++++++++------------------- src/include/access/gist_private.h | 3 +- src/test/regress/expected/tsearch.out | 24 +++++--- src/test/regress/sql/tsearch.sql | 1 + 7 files changed, 53 insertions(+), 76 deletions(-) diff --git a/src/backend/access/gist/gist.c b/src/backend/access/gist/gist.c index 0683f42c258..98ec8858f1c 100644 --- a/src/backend/access/gist/gist.c +++ b/src/backend/access/gist/gist.c @@ -305,7 +305,7 @@ gistplacetopage(Relation rel, Size freespace, GISTSTATE *giststate, memmove(itvec + pos, itvec + pos + 1, sizeof(IndexTuple) * (tlen - pos)); } itvec = gistjoinvector(itvec, &tlen, itup, ntup); - dist = gistSplit(rel, page, itvec, tlen, giststate); + dist = gistSplit(rel, page, itvec, tlen, giststate, freespace); /* * Check that split didn't produce too many pages. @@ -1417,7 +1417,8 @@ gistSplit(Relation r, Page page, IndexTuple *itup, /* contains compressed entry */ int len, - GISTSTATE *giststate) + GISTSTATE *giststate, + Size freespace) { IndexTuple *lvectup, *rvectup; @@ -1439,7 +1440,8 @@ gistSplit(Relation r, ereport(ERROR, (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), errmsg("index row size %zu exceeds maximum %zu for index \"%s\"", - IndexTupleSize(itup[0]), GiSTPageSize, + IndexTupleSize(itup[0]), + GiSTPageSize - sizeof(ItemIdData) - freespace, RelationGetRelationName(r)))); memset(v.spl_lisnull, true, @@ -1461,7 +1463,8 @@ gistSplit(Relation r, /* finalize splitting (may need another split) */ if (!gistfitpage(rvectup, v.splitVector.spl_nright)) { - res = gistSplit(r, page, rvectup, v.splitVector.spl_nright, giststate); + res = gistSplit(r, page, rvectup, v.splitVector.spl_nright, + giststate, freespace); } else { @@ -1476,7 +1479,8 @@ gistSplit(Relation r, SplitedPageLayout *resptr, *subres; - resptr = subres = gistSplit(r, page, lvectup, v.splitVector.spl_nleft, giststate); + resptr = subres = gistSplit(r, page, lvectup, v.splitVector.spl_nleft, + giststate, freespace); /* install on list's tail */ while (resptr->next) diff --git a/src/backend/access/gist/gistbuild.c b/src/backend/access/gist/gistbuild.c index f46a42197c9..d503ea608eb 100644 --- a/src/backend/access/gist/gistbuild.c +++ b/src/backend/access/gist/gistbuild.c @@ -475,7 +475,7 @@ gist_indexsortbuild_pagestate_add(GISTBuildState *state, Size sizeNeeded; /* Does the tuple fit? If not, flush */ - sizeNeeded = IndexTupleSize(itup) + sizeof(ItemIdData) + state->freespace; + sizeNeeded = IndexTupleSize(itup) + state->freespace; if (PageGetFreeSpace(pagestate->page) < sizeNeeded) gist_indexsortbuild_pagestate_flush(state, pagestate); diff --git a/src/backend/access/gist/gistutil.c b/src/backend/access/gist/gistutil.c index 8dcd53c4577..ca5a39d65e5 100644 --- a/src/backend/access/gist/gistutil.c +++ b/src/backend/access/gist/gistutil.c @@ -71,7 +71,7 @@ gistnospace(Page page, IndexTuple *itvec, int len, OffsetNumber todelete, Size f deleted = IndexTupleSize(itup) + sizeof(ItemIdData); } - return (PageGetFreeSpace(page) + deleted < size); + return (PageGetFreeSpace(page) + deleted < size - sizeof(ItemIdData)); } bool diff --git a/src/backend/utils/adt/tsquery.c b/src/backend/utils/adt/tsquery.c index fe4470174f5..f2085594263 100644 --- a/src/backend/utils/adt/tsquery.c +++ b/src/backend/utils/adt/tsquery.c @@ -77,7 +77,6 @@ struct TSQueryParserStateData char *buf; /* current scan point */ int count; /* nesting count, incremented by (, * decremented by ) */ - bool in_quotes; /* phrase in quotes "" */ ts_parserstate state; /* polish (prefix) notation in list, filled in by push* functions */ @@ -235,9 +234,6 @@ parse_or_operator(TSQueryParserState pstate) { char *ptr = pstate->buf; - if (pstate->in_quotes) - return false; - /* it should begin with "OR" literal */ if (pg_strncasecmp(ptr, "or", 2) != 0) return false; @@ -398,42 +394,33 @@ gettoken_query_websearch(TSQueryParserState state, int8 *operator, state->buf++; state->state = WAITOPERAND; - if (state->in_quotes) - continue; - *operator = OP_NOT; return PT_OPR; } else if (t_iseq(state->buf, '"')) { + /* Everything is quotes is processed as a single token */ + + /* skip opening quotes */ state->buf++; + *strval = state->buf; - if (!state->in_quotes) - { - state->state = WAITOPERAND; + /* iterate to the closing quotes or end of the string*/ + while (*state->buf != '\0' && !t_iseq(state->buf, '"')) + state->buf++; + *lenval = state->buf - *strval; - if (strchr(state->buf, '"')) - { - /* quoted text should be ordered <-> */ - state->in_quotes = true; - return PT_OPEN; - } + /* skip closing quotes if not end of the string */ + if (*state->buf != '\0') + state->buf++; - /* web search tolerates missing quotes */ - continue; - } - else - { - /* we have to provide an operand */ - state->in_quotes = false; - state->state = WAITOPERATOR; - pushStop(state); - return PT_CLOSE; - } + state->state = WAITOPERATOR; + state->count++; + return PT_VAL; } else if (ISOPERATOR(state->buf)) { - /* or else gettoken_tsvector() will raise an error */ + /* or else ƒtsvector() will raise an error */ state->buf++; state->state = WAITOPERAND; continue; @@ -467,24 +454,13 @@ gettoken_query_websearch(TSQueryParserState state, int8 *operator, case WAITOPERATOR: if (t_iseq(state->buf, '"')) { - if (!state->in_quotes) - { - /* - * put implicit AND after an operand and handle this - * quote in WAITOPERAND - */ - state->state = WAITOPERAND; - *operator = OP_AND; - return PT_OPR; - } - else - { - state->buf++; - - /* just close quotes */ - state->in_quotes = false; - return PT_CLOSE; - } + /* + * put implicit AND after an operand and handle this + * quote in WAITOPERAND + */ + state->state = WAITOPERAND; + *operator = OP_AND; + return PT_OPR; } else if (parse_or_operator(state)) { @@ -498,18 +474,8 @@ gettoken_query_websearch(TSQueryParserState state, int8 *operator, } else if (!t_isspace(state->buf)) { - if (state->in_quotes) - { - /* put implicit <-> after an operand */ - *operator = OP_PHRASE; - *weight = 1; - } - else - { - /* put implicit AND after an operand */ - *operator = OP_AND; - } - + /* put implicit AND after an operand */ + *operator = OP_AND; state->state = WAITOPERAND; return PT_OPR; } @@ -846,7 +812,6 @@ parse_tsquery(char *buf, state.buffer = buf; state.buf = buf; state.count = 0; - state.in_quotes = false; state.state = WAITFIRSTOPERAND; state.polstr = NIL; diff --git a/src/include/access/gist_private.h b/src/include/access/gist_private.h index 553d364e2d1..5835dd1c7e0 100644 --- a/src/include/access/gist_private.h +++ b/src/include/access/gist_private.h @@ -433,7 +433,8 @@ extern bool gistplacetopage(Relation rel, Size freespace, GISTSTATE *giststate, bool is_build); extern SplitedPageLayout *gistSplit(Relation r, Page page, IndexTuple *itup, - int len, GISTSTATE *giststate); + int len, GISTSTATE *giststate, + Size freespace); /* gistxlog.c */ extern XLogRecPtr gistXLogPageDelete(Buffer buffer, diff --git a/src/test/regress/expected/tsearch.out b/src/test/regress/expected/tsearch.out index 4ae62320c9f..45b92a63388 100644 --- a/src/test/regress/expected/tsearch.out +++ b/src/test/regress/expected/tsearch.out @@ -2678,9 +2678,9 @@ select websearch_to_tsquery('simple', 'abc OR_abc'); -- test quotes select websearch_to_tsquery('english', '"pg_class pg'); - websearch_to_tsquery -------------------------- - 'pg' <-> 'class' & 'pg' + websearch_to_tsquery +--------------------------- + 'pg' <-> 'class' <-> 'pg' (1 row) select websearch_to_tsquery('english', 'pg_class pg"'); @@ -2695,6 +2695,12 @@ select websearch_to_tsquery('english', '"pg_class pg"'); 'pg' <-> 'class' <-> 'pg' (1 row) +select websearch_to_tsquery('english', '"pg_class : pg"'); + websearch_to_tsquery +--------------------------- + 'pg' <-> 'class' <-> 'pg' +(1 row) + select websearch_to_tsquery('english', 'abc "pg_class pg"'); websearch_to_tsquery ----------------------------------- @@ -2708,15 +2714,15 @@ select websearch_to_tsquery('english', '"pg_class pg" def'); (1 row) select websearch_to_tsquery('english', 'abc "pg pg_class pg" def'); - websearch_to_tsquery --------------------------------------------------------- - 'abc' & 'pg' <-> ( 'pg' <-> 'class' ) <-> 'pg' & 'def' + websearch_to_tsquery +---------------------------------------------------- + 'abc' & 'pg' <-> 'pg' <-> 'class' <-> 'pg' & 'def' (1 row) select websearch_to_tsquery('english', ' or "pg pg_class pg" or '); - websearch_to_tsquery ----------------------------------------- - 'pg' <-> ( 'pg' <-> 'class' ) <-> 'pg' + websearch_to_tsquery +------------------------------------ + 'pg' <-> 'pg' <-> 'class' <-> 'pg' (1 row) select websearch_to_tsquery('english', '""pg pg_class pg""'); diff --git a/src/test/regress/sql/tsearch.sql b/src/test/regress/sql/tsearch.sql index b02ed73f6a8..d929210998a 100644 --- a/src/test/regress/sql/tsearch.sql +++ b/src/test/regress/sql/tsearch.sql @@ -759,6 +759,7 @@ select websearch_to_tsquery('simple', 'abc OR_abc'); select websearch_to_tsquery('english', '"pg_class pg'); select websearch_to_tsquery('english', 'pg_class pg"'); select websearch_to_tsquery('english', '"pg_class pg"'); +select websearch_to_tsquery('english', '"pg_class : pg"'); select websearch_to_tsquery('english', 'abc "pg_class pg"'); select websearch_to_tsquery('english', '"pg_class pg" def'); select websearch_to_tsquery('english', 'abc "pg pg_class pg" def'); -- 2.24.3 (Apple Git-128)