Prefix operator for text and spgist support
Hi,
Attached patch introduces prefix operator ^@ for text type. For 'a ^@ b'
it returns true if 'a' starts with 'b'. Also there is spgist index
support for this operator.
It could be useful as an alternative for LIKE for 'something%'
templates. Or even used in LIKE queries instead of ~>=~ and ~<~ in the
future. But it would require new strategy for btree.
--
---
Ildus Kurbangaliev
Postgres Professional: http://www.postgrespro.com
Russian Postgres Company
Attachments:
prefix_operator_and_spgist.patchtext/x-patchDownload
diff --git a/src/backend/access/spgist/spgtextproc.c b/src/backend/access/spgist/spgtextproc.c
index f156b2166e..5beb9e33a1 100644
--- a/src/backend/access/spgist/spgtextproc.c
+++ b/src/backend/access/spgist/spgtextproc.c
@@ -496,7 +496,7 @@ spg_text_inner_consistent(PG_FUNCTION_ARGS)
* well end with a partial multibyte character, so that applying
* any encoding-sensitive test to it would be risky anyhow.)
*/
- if (strategy > 10)
+ if (strategy > 10 && strategy != RTPrefixStrategyNumber)
{
if (collate_is_c)
strategy -= 10;
@@ -526,6 +526,10 @@ spg_text_inner_consistent(PG_FUNCTION_ARGS)
if (r < 0)
res = false;
break;
+ case RTPrefixStrategyNumber:
+ if (r != 0)
+ res = false;
+ break;
default:
elog(ERROR, "unrecognized strategy number: %d",
in->scankeys[j].sk_strategy);
@@ -601,10 +605,21 @@ spg_text_leaf_consistent(PG_FUNCTION_ARGS)
for (j = 0; j < in->nkeys; j++)
{
StrategyNumber strategy = in->scankeys[j].sk_strategy;
- text *query = DatumGetTextPP(in->scankeys[j].sk_argument);
- int queryLen = VARSIZE_ANY_EXHDR(query);
+ text *query;
+ int queryLen;
int r;
+ /* for this strategy collation is not important */
+ if (strategy == RTPrefixStrategyNumber)
+ {
+ res = DatumGetBool(DirectFunctionCall2(text_startswith,
+ out->leafValue, in->scankeys[j].sk_argument));
+ goto next;
+ }
+
+ query = DatumGetTextPP(in->scankeys[j].sk_argument);
+ queryLen = VARSIZE_ANY_EXHDR(query);
+
if (strategy > 10)
{
/* Collation-aware comparison */
@@ -655,6 +670,7 @@ spg_text_leaf_consistent(PG_FUNCTION_ARGS)
break;
}
+next:
if (!res)
break; /* no need to consider remaining conditions */
}
diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c
index fcc8323f62..be91082b56 100644
--- a/src/backend/utils/adt/selfuncs.c
+++ b/src/backend/utils/adt/selfuncs.c
@@ -1315,8 +1315,18 @@ patternsel(PG_FUNCTION_ARGS, Pattern_Type ptype, bool negate)
* right cache key, so that they can be re-used at runtime.
*/
patt = (Const *) other;
- pstatus = pattern_fixed_prefix(patt, ptype, collation,
- &prefix, &rest_selec);
+
+ if (ptype == Pattern_Type_Prefix)
+ {
+ char *s = TextDatumGetCString(constval);
+ prefix = string_to_const(s, vartype);
+ pstatus = Pattern_Prefix_Partial;
+ rest_selec = 1.0; /* all */
+ pfree(s);
+ }
+ else
+ pstatus = pattern_fixed_prefix(patt, ptype, collation,
+ &prefix, &rest_selec);
/*
* If necessary, coerce the prefix constant to the right type.
@@ -1486,6 +1496,16 @@ likesel(PG_FUNCTION_ARGS)
}
/*
+ * prefixsel - selectivity of prefix operator
+ */
+Datum
+prefixsel(PG_FUNCTION_ARGS)
+{
+ PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Prefix, false));
+}
+
+/*
+ *
* iclikesel - Selectivity of ILIKE pattern match.
*/
Datum
@@ -2904,6 +2924,15 @@ likejoinsel(PG_FUNCTION_ARGS)
PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Like, false));
}
+/*
+ * prefixjoinsel - Join selectivity of prefix operator
+ */
+Datum
+prefixjoinsel(PG_FUNCTION_ARGS)
+{
+ PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Prefix, false));
+}
+
/*
* iclikejoinsel - Join selectivity of ILIKE pattern match.
*/
diff --git a/src/backend/utils/adt/varlena.c b/src/backend/utils/adt/varlena.c
index 304cb26691..050875e0bb 100644
--- a/src/backend/utils/adt/varlena.c
+++ b/src/backend/utils/adt/varlena.c
@@ -1762,6 +1762,34 @@ text_ge(PG_FUNCTION_ARGS)
PG_RETURN_BOOL(result);
}
+Datum
+text_startswith(PG_FUNCTION_ARGS)
+{
+ Datum arg1 = PG_GETARG_DATUM(0);
+ Datum arg2 = PG_GETARG_DATUM(1);
+ bool result;
+ Size len1,
+ len2;
+
+ len1 = toast_raw_datum_size(arg1);
+ len2 = toast_raw_datum_size(arg2);
+ if (len2 > len1)
+ result = false;
+ else
+ {
+ text *targ1 = DatumGetTextPP(arg1);
+ text *targ2 = DatumGetTextPP(arg2);
+
+ result = (memcmp(VARDATA_ANY(targ1), VARDATA_ANY(targ2),
+ len2 - VARHDRSZ) == 0);
+
+ PG_FREE_IF_COPY(targ1, 0);
+ PG_FREE_IF_COPY(targ2, 1);
+ }
+
+ PG_RETURN_BOOL(result);
+}
+
Datum
bttextcmp(PG_FUNCTION_ARGS)
{
diff --git a/src/include/access/stratnum.h b/src/include/access/stratnum.h
index bddfac4c10..0db11a1117 100644
--- a/src/include/access/stratnum.h
+++ b/src/include/access/stratnum.h
@@ -68,8 +68,9 @@ typedef uint16 StrategyNumber;
#define RTSubEqualStrategyNumber 25 /* for inet <<= */
#define RTSuperStrategyNumber 26 /* for inet << */
#define RTSuperEqualStrategyNumber 27 /* for inet >>= */
+#define RTPrefixStrategyNumber 28 /* for text ^@ */
-#define RTMaxStrategyNumber 27
+#define RTMaxStrategyNumber 28
#endif /* STRATNUM_H */
diff --git a/src/include/catalog/pg_amop.h b/src/include/catalog/pg_amop.h
index 03af581df4..7f7ffcfecb 100644
--- a/src/include/catalog/pg_amop.h
+++ b/src/include/catalog/pg_amop.h
@@ -795,6 +795,7 @@ DATA(insert ( 4017 25 25 2 s 2315 4000 0 ));
DATA(insert ( 4017 25 25 3 s 98 4000 0 ));
DATA(insert ( 4017 25 25 4 s 2317 4000 0 ));
DATA(insert ( 4017 25 25 5 s 2318 4000 0 ));
+DATA(insert ( 4017 25 25 28 s 315 4000 0 ));
DATA(insert ( 4017 25 25 11 s 664 4000 0 ));
DATA(insert ( 4017 25 25 12 s 665 4000 0 ));
DATA(insert ( 4017 25 25 14 s 667 4000 0 ));
diff --git a/src/include/catalog/pg_operator.h b/src/include/catalog/pg_operator.h
index e74f963eb5..414a967557 100644
--- a/src/include/catalog/pg_operator.h
+++ b/src/include/catalog/pg_operator.h
@@ -134,6 +134,8 @@ DESCR("less than");
DATA(insert OID = 98 ( "=" PGNSP PGUID b t t 25 25 16 98 531 texteq eqsel eqjoinsel ));
DESCR("equal");
#define TextEqualOperator 98
+DATA(insert OID = 315 ( "^@" PGNSP PGUID b f f 25 25 16 0 0 text_startswith prefixsel prefixjoinsel ));
+DESCR("starts with");
DATA(insert OID = 349 ( "||" PGNSP PGUID b f f 2277 2283 2277 0 0 array_append - - ));
DESCR("append element onto end of array");
diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h
index f01648c961..34a3529c70 100644
--- a/src/include/catalog/pg_proc.h
+++ b/src/include/catalog/pg_proc.h
@@ -211,6 +211,7 @@ DATA(insert OID = 64 ( int2lt PGNSP PGUID 12 1 0 0 0 f f f t t f i s 2 0
DATA(insert OID = 65 ( int4eq PGNSP PGUID 12 1 0 0 0 f f f t t f i s 2 0 16 "23 23" _null_ _null_ _null_ _null_ _null_ int4eq _null_ _null_ _null_ ));
DATA(insert OID = 66 ( int4lt PGNSP PGUID 12 1 0 0 0 f f f t t f i s 2 0 16 "23 23" _null_ _null_ _null_ _null_ _null_ int4lt _null_ _null_ _null_ ));
DATA(insert OID = 67 ( texteq PGNSP PGUID 12 1 0 0 0 f f f t t f i s 2 0 16 "25 25" _null_ _null_ _null_ _null_ _null_ texteq _null_ _null_ _null_ ));
+DATA(insert OID = 3450 ( text_startswith PGNSP PGUID 12 1 0 0 0 f f f t t f i s 2 0 16 "25 25" _null_ _null_ _null_ _null_ _null_ text_startswith _null_ _null_ _null_ ));
DATA(insert OID = 68 ( xideq PGNSP PGUID 12 1 0 0 0 f f f t t f i s 2 0 16 "28 28" _null_ _null_ _null_ _null_ _null_ xideq _null_ _null_ _null_ ));
DATA(insert OID = 3308 ( xidneq PGNSP PGUID 12 1 0 0 0 f f f t t f i s 2 0 16 "28 28" _null_ _null_ _null_ _null_ _null_ xidneq _null_ _null_ _null_ ));
DATA(insert OID = 69 ( cideq PGNSP PGUID 12 1 0 0 0 f f f t t f i s 2 0 16 "29 29" _null_ _null_ _null_ _null_ _null_ cideq _null_ _null_ _null_ ));
@@ -2548,6 +2549,10 @@ DATA(insert OID = 1828 ( nlikejoinsel PGNSP PGUID 12 1 0 0 0 f f f f t f s s 5
DESCR("join selectivity of NOT LIKE");
DATA(insert OID = 1829 ( icregexnejoinsel PGNSP PGUID 12 1 0 0 0 f f f f t f s s 5 0 701 "2281 26 2281 21 2281" _null_ _null_ _null_ _null_ _null_ icregexnejoinsel _null_ _null_ _null_ ));
DESCR("join selectivity of case-insensitive regex non-match");
+DATA(insert OID = 2579 ( prefixsel PGNSP PGUID 12 1 0 0 0 f f f f t f s s 4 0 701 "2281 26 2281 23" _null_ _null_ _null_ _null_ _null_ prefixsel _null_ _null_ _null_ ));
+DESCR("restriction selectivity of exact prefix");
+DATA(insert OID = 2580 ( prefixjoinsel PGNSP PGUID 12 1 0 0 0 f f f f t f s s 5 0 701 "2281 26 2281 21 2281" _null_ _null_ _null_ _null_ _null_ prefixjoinsel _null_ _null_ _null_ ));
+DESCR("join selectivity of exact prefix");
/* Aggregate-related functions */
DATA(insert OID = 1830 ( float8_avg PGNSP PGUID 12 1 0 0 0 f f f f t f i s 1 0 701 "1022" _null_ _null_ _null_ _null_ _null_ float8_avg _null_ _null_ _null_ ));
diff --git a/src/include/utils/selfuncs.h b/src/include/utils/selfuncs.h
index 299c9f846a..95e44280c4 100644
--- a/src/include/utils/selfuncs.h
+++ b/src/include/utils/selfuncs.h
@@ -87,8 +87,11 @@ typedef struct VariableStatData
typedef enum
{
- Pattern_Type_Like, Pattern_Type_Like_IC,
- Pattern_Type_Regex, Pattern_Type_Regex_IC
+ Pattern_Type_Like,
+ Pattern_Type_Like_IC,
+ Pattern_Type_Regex,
+ Pattern_Type_Regex_IC,
+ Pattern_Type_Prefix
} Pattern_Type;
typedef enum
diff --git a/src/test/regress/expected/create_index.out b/src/test/regress/expected/create_index.out
index 031a0bcec9..ff6c9cb5dc 100644
--- a/src/test/regress/expected/create_index.out
+++ b/src/test/regress/expected/create_index.out
@@ -372,6 +372,12 @@ SELECT count(*) FROM radix_text_tbl WHERE t ~>~ 'Worth
48
(1 row)
+SELECT count(*) FROM radix_text_tbl WHERE t ^@ 'Worth';
+ count
+-------
+ 2
+(1 row)
+
SELECT * FROM gpolygon_tbl ORDER BY f1 <-> '(0,0)'::point LIMIT 10;
f1
-------------------------------------------------
@@ -1182,6 +1188,21 @@ SELECT count(*) FROM radix_text_tbl WHERE t ~>~ 'Worth
48
(1 row)
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM radix_text_tbl WHERE t ^@ 'Worth';
+ QUERY PLAN
+------------------------------------------------------------
+ Aggregate
+ -> Index Only Scan using sp_radix_ind on radix_text_tbl
+ Index Cond: (t ^@ 'Worth'::text)
+(3 rows)
+
+SELECT count(*) FROM radix_text_tbl WHERE t ^@ 'Worth';
+ count
+-------
+ 2
+(1 row)
+
EXPLAIN (COSTS OFF)
SELECT * FROM gpolygon_tbl ORDER BY f1 <-> '(0,0)'::point LIMIT 10;
QUERY PLAN
diff --git a/src/test/regress/expected/opr_sanity.out b/src/test/regress/expected/opr_sanity.out
index 684f7f20a8..89007d61f9 100644
--- a/src/test/regress/expected/opr_sanity.out
+++ b/src/test/regress/expected/opr_sanity.out
@@ -711,6 +711,7 @@ uuid_gt(uuid,uuid)
uuid_ne(uuid,uuid)
xidneq(xid,xid)
xidneqint4(xid,integer)
+text_startswith(text,text)
macaddr8_eq(macaddr8,macaddr8)
macaddr8_lt(macaddr8,macaddr8)
macaddr8_le(macaddr8,macaddr8)
@@ -1880,7 +1881,8 @@ ORDER BY 1, 2, 3;
4000 | 25 | <<=
4000 | 26 | >>
4000 | 27 | >>=
-(121 rows)
+ 4000 | 28 | ^@
+(122 rows)
-- Check that all opclass search operators have selectivity estimators.
-- This is not absolutely required, but it seems a reasonable thing
diff --git a/src/test/regress/expected/spgist.out b/src/test/regress/expected/spgist.out
index 2d75bbf8dc..0011634b76 100644
--- a/src/test/regress/expected/spgist.out
+++ b/src/test/regress/expected/spgist.out
@@ -32,6 +32,15 @@ insert into spgist_text_tbl (id, t)
select g, 'f' || repeat('o', 100) || g from generate_series(1, 10000) g
union all
select g, 'baaaaaaaaaaaaaar' || g from generate_series(1, 1000) g;
+explain (costs off) select * from spgist_text_tbl where t ^@ 'baaaaaaaaaaaaaar11';
+ QUERY PLAN
+-------------------------------------------------------
+ Bitmap Heap Scan on spgist_text_tbl
+ Recheck Cond: (t ^@ 'baaaaaaaaaaaaaar11'::text)
+ -> Bitmap Index Scan on spgist_text_idx
+ Index Cond: (t ^@ 'baaaaaaaaaaaaaar11'::text)
+(4 rows)
+
-- Do a lot of insertions that have to split an existing node. Hopefully
-- one of these will cause the page to run out of space, causing the inner
-- tuple to be moved to another page.
diff --git a/src/test/regress/sql/create_index.sql b/src/test/regress/sql/create_index.sql
index a45e8ebeff..4776f7bb69 100644
--- a/src/test/regress/sql/create_index.sql
+++ b/src/test/regress/sql/create_index.sql
@@ -224,6 +224,8 @@ SELECT count(*) FROM radix_text_tbl WHERE t > 'Worth
SELECT count(*) FROM radix_text_tbl WHERE t ~>~ 'Worth St ';
+SELECT count(*) FROM radix_text_tbl WHERE t ^@ 'Worth';
+
SELECT * FROM gpolygon_tbl ORDER BY f1 <-> '(0,0)'::point LIMIT 10;
SELECT circle_center(f1), round(radius(f1)) as radius FROM gcircle_tbl ORDER BY f1 <-> '(200,300)'::point LIMIT 10;
@@ -441,6 +443,10 @@ EXPLAIN (COSTS OFF)
SELECT count(*) FROM radix_text_tbl WHERE t ~>~ 'Worth St ';
SELECT count(*) FROM radix_text_tbl WHERE t ~>~ 'Worth St ';
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM radix_text_tbl WHERE t ^@ 'Worth';
+SELECT count(*) FROM radix_text_tbl WHERE t ^@ 'Worth';
+
EXPLAIN (COSTS OFF)
SELECT * FROM gpolygon_tbl ORDER BY f1 <-> '(0,0)'::point LIMIT 10;
SELECT * FROM gpolygon_tbl ORDER BY f1 <-> '(0,0)'::point LIMIT 10;
diff --git a/src/test/regress/sql/spgist.sql b/src/test/regress/sql/spgist.sql
index 77b43a2d3e..2b57fc836c 100644
--- a/src/test/regress/sql/spgist.sql
+++ b/src/test/regress/sql/spgist.sql
@@ -43,6 +43,8 @@ select g, 'f' || repeat('o', 100) || g from generate_series(1, 10000) g
union all
select g, 'baaaaaaaaaaaaaar' || g from generate_series(1, 1000) g;
+explain (costs off) select * from spgist_text_tbl where t ^@ 'baaaaaaaaaaaaaar11';
+
-- Do a lot of insertions that have to split an existing node. Hopefully
-- one of these will cause the page to run out of space, causing the inner
-- tuple to be moved to another page.
Hello,
On Fri, Feb 02, 2018 at 06:03:27PM +0300, Ildus Kurbangaliev wrote:
Hi,
Attached patch introduces prefix operator ^@ for text type. For 'a ^@ b'
it returns true if 'a' starts with 'b'. Also there is spgist index
support for this operator.It could be useful as an alternative for LIKE for 'something%'
templates. Or even used in LIKE queries instead of ~>=~ and ~<~ in the
future. But it would require new strategy for btree.
I've looked at the patch. It is applied and tests pass.
I have a couple comments:
+ if (ptype == Pattern_Type_Prefix) + { + char *s = TextDatumGetCString(constval); + prefix = string_to_const(s, vartype); + pstatus = Pattern_Prefix_Partial; + rest_selec = 1.0; /* all */ + pfree(s); + } + else + pstatus = pattern_fixed_prefix(patt, ptype, collation, + &prefix, &rest_selec);
I think it is better to put Pattern_Type_Prefix processing into
pattern_fixed_prefix() as another case entry.
Secondly, it is worth to fix the documentation. At least here [1]. Maybe
there are another places where documentation should be fixed.
1 - https://www.postgresql.org/docs/current/static/spgist-builtin-opclasses.html
--
Arthur Zakirov
Postgres Professional: http://www.postgrespro.com
Russian Postgres Company
On Mon, 19 Feb 2018 15:06:51 +0300
Arthur Zakirov <a.zakirov@postgrespro.ru> wrote:
Hello,
On Fri, Feb 02, 2018 at 06:03:27PM +0300, Ildus Kurbangaliev wrote:
Hi,
Attached patch introduces prefix operator ^@ for text type. For 'a
^@ b' it returns true if 'a' starts with 'b'. Also there is spgist
index support for this operator.It could be useful as an alternative for LIKE for 'something%'
templates. Or even used in LIKE queries instead of ~>=~ and ~<~ in
the future. But it would require new strategy for btree.I've looked at the patch. It is applied and tests pass.
Hi, thanks for the review.
I have a couple comments:
+ if (ptype == Pattern_Type_Prefix) + { + char *s = TextDatumGetCString(constval); + prefix = string_to_const(s, vartype); + pstatus = Pattern_Prefix_Partial; + rest_selec = 1.0; /* all */ + pfree(s); + } + else + pstatus = pattern_fixed_prefix(patt, ptype, collation, + &prefix, &rest_selec);I think it is better to put Pattern_Type_Prefix processing into
pattern_fixed_prefix() as another case entry.
At brief look at this place seems better to move this block into
pattern_fixed_prefix function. But there is also `vartype` variable
which used to in prefix construction, and it would require pass this
variable too. And since pattern_fixed_prefix called in 10 other places
and vartype is required only for this ptype it seems better just keep
this block outside of this function.
Secondly, it is worth to fix the documentation. At least here [1].
Maybe there are another places where documentation should be fixed.1 -
https://www.postgresql.org/docs/current/static/spgist-builtin-opclasses.html
I've added documentation in current version of the patch.
--
---
Ildus Kurbangaliev
Postgres Professional: http://www.postgrespro.com
Russian Postgres Company
Attachments:
prefix_operator_and_spgist_v2.patchtext/x-patchDownload
diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml
index 1e535cf215..403d0193ce 100644
--- a/doc/src/sgml/func.sgml
+++ b/doc/src/sgml/func.sgml
@@ -2274,6 +2274,21 @@
<entry><literal>ph</literal></entry>
</row>
+ <row>
+ <entry>
+ <indexterm>
+ <primary>text_startswith</primary>
+ </indexterm>
+ <literal><function>text_startswith(<parameter>string</parameter>, <parameter>prefix</parameter>)</function></literal>
+ </entry>
+ <entry><type>bool</type></entry>
+ <entry>
+ Returns true if <parameter>string</parameter> starts with <parameter>prefix</parameter>.
+ </entry>
+ <entry><literal>text_startswith('alphabet', 'alph')</literal></entry>
+ <entry><literal>t</literal></entry>
+ </row>
+
<row>
<entry>
<indexterm>
@@ -3964,6 +3979,12 @@ cast(-44 as bit(12)) <lineannotation>111111010100</lineannotation>
ILIKE</function>, respectively. All of these operators are
<productname>PostgreSQL</productname>-specific.
</para>
+
+ <para>
+ There is also the prefix operator <literal>^@</literal> and corresponding
+ <literal>text_startswith</literal> function which covers cases when only
+ searching by beginning of the string is needed.
+ </para>
</sect2>
diff --git a/doc/src/sgml/spgist.sgml b/doc/src/sgml/spgist.sgml
index 51bb60c92a..00ef9a92b0 100644
--- a/doc/src/sgml/spgist.sgml
+++ b/doc/src/sgml/spgist.sgml
@@ -179,6 +179,7 @@
<literal>~<~</literal>
<literal>~>=~</literal>
<literal>~>~</literal>
+ <literal>^@</literal>
</entry>
</row>
<row>
diff --git a/src/backend/access/spgist/spgtextproc.c b/src/backend/access/spgist/spgtextproc.c
index f156b2166e..5beb9e33a1 100644
--- a/src/backend/access/spgist/spgtextproc.c
+++ b/src/backend/access/spgist/spgtextproc.c
@@ -496,7 +496,7 @@ spg_text_inner_consistent(PG_FUNCTION_ARGS)
* well end with a partial multibyte character, so that applying
* any encoding-sensitive test to it would be risky anyhow.)
*/
- if (strategy > 10)
+ if (strategy > 10 && strategy != RTPrefixStrategyNumber)
{
if (collate_is_c)
strategy -= 10;
@@ -526,6 +526,10 @@ spg_text_inner_consistent(PG_FUNCTION_ARGS)
if (r < 0)
res = false;
break;
+ case RTPrefixStrategyNumber:
+ if (r != 0)
+ res = false;
+ break;
default:
elog(ERROR, "unrecognized strategy number: %d",
in->scankeys[j].sk_strategy);
@@ -601,10 +605,21 @@ spg_text_leaf_consistent(PG_FUNCTION_ARGS)
for (j = 0; j < in->nkeys; j++)
{
StrategyNumber strategy = in->scankeys[j].sk_strategy;
- text *query = DatumGetTextPP(in->scankeys[j].sk_argument);
- int queryLen = VARSIZE_ANY_EXHDR(query);
+ text *query;
+ int queryLen;
int r;
+ /* for this strategy collation is not important */
+ if (strategy == RTPrefixStrategyNumber)
+ {
+ res = DatumGetBool(DirectFunctionCall2(text_startswith,
+ out->leafValue, in->scankeys[j].sk_argument));
+ goto next;
+ }
+
+ query = DatumGetTextPP(in->scankeys[j].sk_argument);
+ queryLen = VARSIZE_ANY_EXHDR(query);
+
if (strategy > 10)
{
/* Collation-aware comparison */
@@ -655,6 +670,7 @@ spg_text_leaf_consistent(PG_FUNCTION_ARGS)
break;
}
+next:
if (!res)
break; /* no need to consider remaining conditions */
}
diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c
index fcc8323f62..be91082b56 100644
--- a/src/backend/utils/adt/selfuncs.c
+++ b/src/backend/utils/adt/selfuncs.c
@@ -1315,8 +1315,18 @@ patternsel(PG_FUNCTION_ARGS, Pattern_Type ptype, bool negate)
* right cache key, so that they can be re-used at runtime.
*/
patt = (Const *) other;
- pstatus = pattern_fixed_prefix(patt, ptype, collation,
- &prefix, &rest_selec);
+
+ if (ptype == Pattern_Type_Prefix)
+ {
+ char *s = TextDatumGetCString(constval);
+ prefix = string_to_const(s, vartype);
+ pstatus = Pattern_Prefix_Partial;
+ rest_selec = 1.0; /* all */
+ pfree(s);
+ }
+ else
+ pstatus = pattern_fixed_prefix(patt, ptype, collation,
+ &prefix, &rest_selec);
/*
* If necessary, coerce the prefix constant to the right type.
@@ -1486,6 +1496,16 @@ likesel(PG_FUNCTION_ARGS)
}
/*
+ * prefixsel - selectivity of prefix operator
+ */
+Datum
+prefixsel(PG_FUNCTION_ARGS)
+{
+ PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Prefix, false));
+}
+
+/*
+ *
* iclikesel - Selectivity of ILIKE pattern match.
*/
Datum
@@ -2904,6 +2924,15 @@ likejoinsel(PG_FUNCTION_ARGS)
PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Like, false));
}
+/*
+ * prefixjoinsel - Join selectivity of prefix operator
+ */
+Datum
+prefixjoinsel(PG_FUNCTION_ARGS)
+{
+ PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Prefix, false));
+}
+
/*
* iclikejoinsel - Join selectivity of ILIKE pattern match.
*/
diff --git a/src/backend/utils/adt/varlena.c b/src/backend/utils/adt/varlena.c
index 304cb26691..050875e0bb 100644
--- a/src/backend/utils/adt/varlena.c
+++ b/src/backend/utils/adt/varlena.c
@@ -1762,6 +1762,34 @@ text_ge(PG_FUNCTION_ARGS)
PG_RETURN_BOOL(result);
}
+Datum
+text_startswith(PG_FUNCTION_ARGS)
+{
+ Datum arg1 = PG_GETARG_DATUM(0);
+ Datum arg2 = PG_GETARG_DATUM(1);
+ bool result;
+ Size len1,
+ len2;
+
+ len1 = toast_raw_datum_size(arg1);
+ len2 = toast_raw_datum_size(arg2);
+ if (len2 > len1)
+ result = false;
+ else
+ {
+ text *targ1 = DatumGetTextPP(arg1);
+ text *targ2 = DatumGetTextPP(arg2);
+
+ result = (memcmp(VARDATA_ANY(targ1), VARDATA_ANY(targ2),
+ len2 - VARHDRSZ) == 0);
+
+ PG_FREE_IF_COPY(targ1, 0);
+ PG_FREE_IF_COPY(targ2, 1);
+ }
+
+ PG_RETURN_BOOL(result);
+}
+
Datum
bttextcmp(PG_FUNCTION_ARGS)
{
diff --git a/src/include/access/stratnum.h b/src/include/access/stratnum.h
index bddfac4c10..0db11a1117 100644
--- a/src/include/access/stratnum.h
+++ b/src/include/access/stratnum.h
@@ -68,8 +68,9 @@ typedef uint16 StrategyNumber;
#define RTSubEqualStrategyNumber 25 /* for inet <<= */
#define RTSuperStrategyNumber 26 /* for inet << */
#define RTSuperEqualStrategyNumber 27 /* for inet >>= */
+#define RTPrefixStrategyNumber 28 /* for text ^@ */
-#define RTMaxStrategyNumber 27
+#define RTMaxStrategyNumber 28
#endif /* STRATNUM_H */
diff --git a/src/include/catalog/pg_amop.h b/src/include/catalog/pg_amop.h
index 03af581df4..7f7ffcfecb 100644
--- a/src/include/catalog/pg_amop.h
+++ b/src/include/catalog/pg_amop.h
@@ -795,6 +795,7 @@ DATA(insert ( 4017 25 25 2 s 2315 4000 0 ));
DATA(insert ( 4017 25 25 3 s 98 4000 0 ));
DATA(insert ( 4017 25 25 4 s 2317 4000 0 ));
DATA(insert ( 4017 25 25 5 s 2318 4000 0 ));
+DATA(insert ( 4017 25 25 28 s 315 4000 0 ));
DATA(insert ( 4017 25 25 11 s 664 4000 0 ));
DATA(insert ( 4017 25 25 12 s 665 4000 0 ));
DATA(insert ( 4017 25 25 14 s 667 4000 0 ));
diff --git a/src/include/catalog/pg_operator.h b/src/include/catalog/pg_operator.h
index e74f963eb5..414a967557 100644
--- a/src/include/catalog/pg_operator.h
+++ b/src/include/catalog/pg_operator.h
@@ -134,6 +134,8 @@ DESCR("less than");
DATA(insert OID = 98 ( "=" PGNSP PGUID b t t 25 25 16 98 531 texteq eqsel eqjoinsel ));
DESCR("equal");
#define TextEqualOperator 98
+DATA(insert OID = 315 ( "^@" PGNSP PGUID b f f 25 25 16 0 0 text_startswith prefixsel prefixjoinsel ));
+DESCR("starts with");
DATA(insert OID = 349 ( "||" PGNSP PGUID b f f 2277 2283 2277 0 0 array_append - - ));
DESCR("append element onto end of array");
diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h
index 2a5321315a..2df784e22e 100644
--- a/src/include/catalog/pg_proc.h
+++ b/src/include/catalog/pg_proc.h
@@ -211,6 +211,7 @@ DATA(insert OID = 64 ( int2lt PGNSP PGUID 12 1 0 0 0 f f f t t f i s 2 0
DATA(insert OID = 65 ( int4eq PGNSP PGUID 12 1 0 0 0 f f f t t f i s 2 0 16 "23 23" _null_ _null_ _null_ _null_ _null_ int4eq _null_ _null_ _null_ ));
DATA(insert OID = 66 ( int4lt PGNSP PGUID 12 1 0 0 0 f f f t t f i s 2 0 16 "23 23" _null_ _null_ _null_ _null_ _null_ int4lt _null_ _null_ _null_ ));
DATA(insert OID = 67 ( texteq PGNSP PGUID 12 1 0 0 0 f f f t t f i s 2 0 16 "25 25" _null_ _null_ _null_ _null_ _null_ texteq _null_ _null_ _null_ ));
+DATA(insert OID = 3450 ( text_startswith PGNSP PGUID 12 1 0 0 0 f f f t t f i s 2 0 16 "25 25" _null_ _null_ _null_ _null_ _null_ text_startswith _null_ _null_ _null_ ));
DATA(insert OID = 68 ( xideq PGNSP PGUID 12 1 0 0 0 f f f t t f i s 2 0 16 "28 28" _null_ _null_ _null_ _null_ _null_ xideq _null_ _null_ _null_ ));
DATA(insert OID = 3308 ( xidneq PGNSP PGUID 12 1 0 0 0 f f f t t f i s 2 0 16 "28 28" _null_ _null_ _null_ _null_ _null_ xidneq _null_ _null_ _null_ ));
DATA(insert OID = 69 ( cideq PGNSP PGUID 12 1 0 0 0 f f f t t f i s 2 0 16 "29 29" _null_ _null_ _null_ _null_ _null_ cideq _null_ _null_ _null_ ));
@@ -2564,6 +2565,10 @@ DATA(insert OID = 1828 ( nlikejoinsel PGNSP PGUID 12 1 0 0 0 f f f f t f s s 5
DESCR("join selectivity of NOT LIKE");
DATA(insert OID = 1829 ( icregexnejoinsel PGNSP PGUID 12 1 0 0 0 f f f f t f s s 5 0 701 "2281 26 2281 21 2281" _null_ _null_ _null_ _null_ _null_ icregexnejoinsel _null_ _null_ _null_ ));
DESCR("join selectivity of case-insensitive regex non-match");
+DATA(insert OID = 2579 ( prefixsel PGNSP PGUID 12 1 0 0 0 f f f f t f s s 4 0 701 "2281 26 2281 23" _null_ _null_ _null_ _null_ _null_ prefixsel _null_ _null_ _null_ ));
+DESCR("restriction selectivity of exact prefix");
+DATA(insert OID = 2580 ( prefixjoinsel PGNSP PGUID 12 1 0 0 0 f f f f t f s s 5 0 701 "2281 26 2281 21 2281" _null_ _null_ _null_ _null_ _null_ prefixjoinsel _null_ _null_ _null_ ));
+DESCR("join selectivity of exact prefix");
/* Aggregate-related functions */
DATA(insert OID = 1830 ( float8_avg PGNSP PGUID 12 1 0 0 0 f f f f t f i s 1 0 701 "1022" _null_ _null_ _null_ _null_ _null_ float8_avg _null_ _null_ _null_ ));
diff --git a/src/include/utils/selfuncs.h b/src/include/utils/selfuncs.h
index 299c9f846a..95e44280c4 100644
--- a/src/include/utils/selfuncs.h
+++ b/src/include/utils/selfuncs.h
@@ -87,8 +87,11 @@ typedef struct VariableStatData
typedef enum
{
- Pattern_Type_Like, Pattern_Type_Like_IC,
- Pattern_Type_Regex, Pattern_Type_Regex_IC
+ Pattern_Type_Like,
+ Pattern_Type_Like_IC,
+ Pattern_Type_Regex,
+ Pattern_Type_Regex_IC,
+ Pattern_Type_Prefix
} Pattern_Type;
typedef enum
diff --git a/src/test/regress/expected/create_index.out b/src/test/regress/expected/create_index.out
index 031a0bcec9..ff6c9cb5dc 100644
--- a/src/test/regress/expected/create_index.out
+++ b/src/test/regress/expected/create_index.out
@@ -372,6 +372,12 @@ SELECT count(*) FROM radix_text_tbl WHERE t ~>~ 'Worth
48
(1 row)
+SELECT count(*) FROM radix_text_tbl WHERE t ^@ 'Worth';
+ count
+-------
+ 2
+(1 row)
+
SELECT * FROM gpolygon_tbl ORDER BY f1 <-> '(0,0)'::point LIMIT 10;
f1
-------------------------------------------------
@@ -1182,6 +1188,21 @@ SELECT count(*) FROM radix_text_tbl WHERE t ~>~ 'Worth
48
(1 row)
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM radix_text_tbl WHERE t ^@ 'Worth';
+ QUERY PLAN
+------------------------------------------------------------
+ Aggregate
+ -> Index Only Scan using sp_radix_ind on radix_text_tbl
+ Index Cond: (t ^@ 'Worth'::text)
+(3 rows)
+
+SELECT count(*) FROM radix_text_tbl WHERE t ^@ 'Worth';
+ count
+-------
+ 2
+(1 row)
+
EXPLAIN (COSTS OFF)
SELECT * FROM gpolygon_tbl ORDER BY f1 <-> '(0,0)'::point LIMIT 10;
QUERY PLAN
diff --git a/src/test/regress/expected/opr_sanity.out b/src/test/regress/expected/opr_sanity.out
index 684f7f20a8..89007d61f9 100644
--- a/src/test/regress/expected/opr_sanity.out
+++ b/src/test/regress/expected/opr_sanity.out
@@ -711,6 +711,7 @@ uuid_gt(uuid,uuid)
uuid_ne(uuid,uuid)
xidneq(xid,xid)
xidneqint4(xid,integer)
+text_startswith(text,text)
macaddr8_eq(macaddr8,macaddr8)
macaddr8_lt(macaddr8,macaddr8)
macaddr8_le(macaddr8,macaddr8)
@@ -1880,7 +1881,8 @@ ORDER BY 1, 2, 3;
4000 | 25 | <<=
4000 | 26 | >>
4000 | 27 | >>=
-(121 rows)
+ 4000 | 28 | ^@
+(122 rows)
-- Check that all opclass search operators have selectivity estimators.
-- This is not absolutely required, but it seems a reasonable thing
diff --git a/src/test/regress/expected/spgist.out b/src/test/regress/expected/spgist.out
index 2d75bbf8dc..0011634b76 100644
--- a/src/test/regress/expected/spgist.out
+++ b/src/test/regress/expected/spgist.out
@@ -32,6 +32,15 @@ insert into spgist_text_tbl (id, t)
select g, 'f' || repeat('o', 100) || g from generate_series(1, 10000) g
union all
select g, 'baaaaaaaaaaaaaar' || g from generate_series(1, 1000) g;
+explain (costs off) select * from spgist_text_tbl where t ^@ 'baaaaaaaaaaaaaar11';
+ QUERY PLAN
+-------------------------------------------------------
+ Bitmap Heap Scan on spgist_text_tbl
+ Recheck Cond: (t ^@ 'baaaaaaaaaaaaaar11'::text)
+ -> Bitmap Index Scan on spgist_text_idx
+ Index Cond: (t ^@ 'baaaaaaaaaaaaaar11'::text)
+(4 rows)
+
-- Do a lot of insertions that have to split an existing node. Hopefully
-- one of these will cause the page to run out of space, causing the inner
-- tuple to be moved to another page.
diff --git a/src/test/regress/sql/create_index.sql b/src/test/regress/sql/create_index.sql
index a45e8ebeff..4776f7bb69 100644
--- a/src/test/regress/sql/create_index.sql
+++ b/src/test/regress/sql/create_index.sql
@@ -224,6 +224,8 @@ SELECT count(*) FROM radix_text_tbl WHERE t > 'Worth
SELECT count(*) FROM radix_text_tbl WHERE t ~>~ 'Worth St ';
+SELECT count(*) FROM radix_text_tbl WHERE t ^@ 'Worth';
+
SELECT * FROM gpolygon_tbl ORDER BY f1 <-> '(0,0)'::point LIMIT 10;
SELECT circle_center(f1), round(radius(f1)) as radius FROM gcircle_tbl ORDER BY f1 <-> '(200,300)'::point LIMIT 10;
@@ -441,6 +443,10 @@ EXPLAIN (COSTS OFF)
SELECT count(*) FROM radix_text_tbl WHERE t ~>~ 'Worth St ';
SELECT count(*) FROM radix_text_tbl WHERE t ~>~ 'Worth St ';
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM radix_text_tbl WHERE t ^@ 'Worth';
+SELECT count(*) FROM radix_text_tbl WHERE t ^@ 'Worth';
+
EXPLAIN (COSTS OFF)
SELECT * FROM gpolygon_tbl ORDER BY f1 <-> '(0,0)'::point LIMIT 10;
SELECT * FROM gpolygon_tbl ORDER BY f1 <-> '(0,0)'::point LIMIT 10;
diff --git a/src/test/regress/sql/spgist.sql b/src/test/regress/sql/spgist.sql
index 77b43a2d3e..2b57fc836c 100644
--- a/src/test/regress/sql/spgist.sql
+++ b/src/test/regress/sql/spgist.sql
@@ -43,6 +43,8 @@ select g, 'f' || repeat('o', 100) || g from generate_series(1, 10000) g
union all
select g, 'baaaaaaaaaaaaaar' || g from generate_series(1, 1000) g;
+explain (costs off) select * from spgist_text_tbl where t ^@ 'baaaaaaaaaaaaaar11';
+
-- Do a lot of insertions that have to split an existing node. Hopefully
-- one of these will cause the page to run out of space, causing the inner
-- tuple to be moved to another page.
On Mon, Feb 19, 2018 at 05:19:15PM +0300, Ildus Kurbangaliev wrote:
At brief look at this place seems better to move this block into
pattern_fixed_prefix function. But there is also `vartype` variable
which used to in prefix construction, and it would require pass this
variable too. And since pattern_fixed_prefix called in 10 other places
and vartype is required only for this ptype it seems better just keep
this block outside of this function.
Understood.
I've added documentation in current version of the patch.
Thank you.
Can you rebase the patch due to changes within pg_proc.h?
Also here
+ <para>
+ There is also the prefix operator <literal>^@</literal> and corresponding
+ <literal>text_startswith</literal> function which covers cases when only
+ searching by beginning of the string is needed.
+ </para>
I think text_startswith should be enclosed with the <function> tag. I'm
not sure, but I think <literal> used for operators, keywords, etc. I
haven't found a manual which describes how to use tags, but after looking
at the documentation where <function> is used, I think that for function
<function> should be used.
--
Arthur Zakirov
Postgres Professional: http://www.postgrespro.com
Russian Postgres Company
On Tue, 6 Mar 2018 19:27:21 +0300
Arthur Zakirov <a.zakirov@postgrespro.ru> wrote:
On Mon, Feb 19, 2018 at 05:19:15PM +0300, Ildus Kurbangaliev wrote:
At brief look at this place seems better to move this block into
pattern_fixed_prefix function. But there is also `vartype` variable
which used to in prefix construction, and it would require pass this
variable too. And since pattern_fixed_prefix called in 10 other
places and vartype is required only for this ptype it seems better
just keep this block outside of this function.Understood.
I've added documentation in current version of the patch.
Thank you.
Can you rebase the patch due to changes within pg_proc.h?
Also here
+ <para> + There is also the prefix operator <literal>^@</literal> and corresponding + <literal>text_startswith</literal> function which covers cases when only + searching by beginning of the string is needed. + </para>I think text_startswith should be enclosed with the <function> tag.
I'm not sure, but I think <literal> used for operators, keywords,
etc. I haven't found a manual which describes how to use tags, but
after looking at the documentation where <function> is used, I think
that for function <function> should be used.
Hi, thanks for the review. I've fixed documentation as you said and
also rebased to current master.
--
---
Ildus Kurbangaliev
Postgres Professional: http://www.postgrespro.com
Russian Postgres Company
Attachments:
prefix_operator_and_spgist_v3.patchtext/x-patchDownload
diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml
index 2f59af25a6..4dc11d8df2 100644
--- a/doc/src/sgml/func.sgml
+++ b/doc/src/sgml/func.sgml
@@ -2274,6 +2274,21 @@
<entry><literal>ph</literal></entry>
</row>
+ <row>
+ <entry>
+ <indexterm>
+ <primary>text_startswith</primary>
+ </indexterm>
+ <literal><function>text_startswith(<parameter>string</parameter>, <parameter>prefix</parameter>)</function></literal>
+ </entry>
+ <entry><type>bool</type></entry>
+ <entry>
+ Returns true if <parameter>string</parameter> starts with <parameter>prefix</parameter>.
+ </entry>
+ <entry><literal>text_startswith('alphabet', 'alph')</literal></entry>
+ <entry><literal>t</literal></entry>
+ </row>
+
<row>
<entry>
<indexterm>
@@ -4033,6 +4048,12 @@ cast(-44 as bit(12)) <lineannotation>111111010100</lineannotation>
ILIKE</function>, respectively. All of these operators are
<productname>PostgreSQL</productname>-specific.
</para>
+
+ <para>
+ There is also the prefix operator <literal>^@</literal> and corresponding
+ <function>text_startswith</function> function which covers cases when only
+ searching by beginning of the string is needed.
+ </para>
</sect2>
diff --git a/doc/src/sgml/spgist.sgml b/doc/src/sgml/spgist.sgml
index e47f70be89..06b7519052 100644
--- a/doc/src/sgml/spgist.sgml
+++ b/doc/src/sgml/spgist.sgml
@@ -161,6 +161,7 @@
<literal>~<~</literal>
<literal>~>=~</literal>
<literal>~>~</literal>
+ <literal>^@</literal>
</entry>
</row>
<row>
diff --git a/src/backend/access/spgist/spgtextproc.c b/src/backend/access/spgist/spgtextproc.c
index f156b2166e..5beb9e33a1 100644
--- a/src/backend/access/spgist/spgtextproc.c
+++ b/src/backend/access/spgist/spgtextproc.c
@@ -496,7 +496,7 @@ spg_text_inner_consistent(PG_FUNCTION_ARGS)
* well end with a partial multibyte character, so that applying
* any encoding-sensitive test to it would be risky anyhow.)
*/
- if (strategy > 10)
+ if (strategy > 10 && strategy != RTPrefixStrategyNumber)
{
if (collate_is_c)
strategy -= 10;
@@ -526,6 +526,10 @@ spg_text_inner_consistent(PG_FUNCTION_ARGS)
if (r < 0)
res = false;
break;
+ case RTPrefixStrategyNumber:
+ if (r != 0)
+ res = false;
+ break;
default:
elog(ERROR, "unrecognized strategy number: %d",
in->scankeys[j].sk_strategy);
@@ -601,10 +605,21 @@ spg_text_leaf_consistent(PG_FUNCTION_ARGS)
for (j = 0; j < in->nkeys; j++)
{
StrategyNumber strategy = in->scankeys[j].sk_strategy;
- text *query = DatumGetTextPP(in->scankeys[j].sk_argument);
- int queryLen = VARSIZE_ANY_EXHDR(query);
+ text *query;
+ int queryLen;
int r;
+ /* for this strategy collation is not important */
+ if (strategy == RTPrefixStrategyNumber)
+ {
+ res = DatumGetBool(DirectFunctionCall2(text_startswith,
+ out->leafValue, in->scankeys[j].sk_argument));
+ goto next;
+ }
+
+ query = DatumGetTextPP(in->scankeys[j].sk_argument);
+ queryLen = VARSIZE_ANY_EXHDR(query);
+
if (strategy > 10)
{
/* Collation-aware comparison */
@@ -655,6 +670,7 @@ spg_text_leaf_consistent(PG_FUNCTION_ARGS)
break;
}
+next:
if (!res)
break; /* no need to consider remaining conditions */
}
diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c
index bf240aa9c5..50b2583ca0 100644
--- a/src/backend/utils/adt/selfuncs.c
+++ b/src/backend/utils/adt/selfuncs.c
@@ -1317,8 +1317,18 @@ patternsel(PG_FUNCTION_ARGS, Pattern_Type ptype, bool negate)
* right cache key, so that they can be re-used at runtime.
*/
patt = (Const *) other;
- pstatus = pattern_fixed_prefix(patt, ptype, collation,
- &prefix, &rest_selec);
+
+ if (ptype == Pattern_Type_Prefix)
+ {
+ char *s = TextDatumGetCString(constval);
+ prefix = string_to_const(s, vartype);
+ pstatus = Pattern_Prefix_Partial;
+ rest_selec = 1.0; /* all */
+ pfree(s);
+ }
+ else
+ pstatus = pattern_fixed_prefix(patt, ptype, collation,
+ &prefix, &rest_selec);
/*
* If necessary, coerce the prefix constant to the right type.
@@ -1488,6 +1498,16 @@ likesel(PG_FUNCTION_ARGS)
}
/*
+ * prefixsel - selectivity of prefix operator
+ */
+Datum
+prefixsel(PG_FUNCTION_ARGS)
+{
+ PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Prefix, false));
+}
+
+/*
+ *
* iclikesel - Selectivity of ILIKE pattern match.
*/
Datum
@@ -2906,6 +2926,15 @@ likejoinsel(PG_FUNCTION_ARGS)
PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Like, false));
}
+/*
+ * prefixjoinsel - Join selectivity of prefix operator
+ */
+Datum
+prefixjoinsel(PG_FUNCTION_ARGS)
+{
+ PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Prefix, false));
+}
+
/*
* iclikejoinsel - Join selectivity of ILIKE pattern match.
*/
diff --git a/src/backend/utils/adt/varlena.c b/src/backend/utils/adt/varlena.c
index 4346410d5a..cb721e9a57 100644
--- a/src/backend/utils/adt/varlena.c
+++ b/src/backend/utils/adt/varlena.c
@@ -1761,6 +1761,34 @@ text_ge(PG_FUNCTION_ARGS)
PG_RETURN_BOOL(result);
}
+Datum
+text_startswith(PG_FUNCTION_ARGS)
+{
+ Datum arg1 = PG_GETARG_DATUM(0);
+ Datum arg2 = PG_GETARG_DATUM(1);
+ bool result;
+ Size len1,
+ len2;
+
+ len1 = toast_raw_datum_size(arg1);
+ len2 = toast_raw_datum_size(arg2);
+ if (len2 > len1)
+ result = false;
+ else
+ {
+ text *targ1 = DatumGetTextPP(arg1);
+ text *targ2 = DatumGetTextPP(arg2);
+
+ result = (memcmp(VARDATA_ANY(targ1), VARDATA_ANY(targ2),
+ len2 - VARHDRSZ) == 0);
+
+ PG_FREE_IF_COPY(targ1, 0);
+ PG_FREE_IF_COPY(targ2, 1);
+ }
+
+ PG_RETURN_BOOL(result);
+}
+
Datum
bttextcmp(PG_FUNCTION_ARGS)
{
diff --git a/src/include/access/stratnum.h b/src/include/access/stratnum.h
index bddfac4c10..0db11a1117 100644
--- a/src/include/access/stratnum.h
+++ b/src/include/access/stratnum.h
@@ -68,8 +68,9 @@ typedef uint16 StrategyNumber;
#define RTSubEqualStrategyNumber 25 /* for inet <<= */
#define RTSuperStrategyNumber 26 /* for inet << */
#define RTSuperEqualStrategyNumber 27 /* for inet >>= */
+#define RTPrefixStrategyNumber 28 /* for text ^@ */
-#define RTMaxStrategyNumber 27
+#define RTMaxStrategyNumber 28
#endif /* STRATNUM_H */
diff --git a/src/include/catalog/pg_amop.h b/src/include/catalog/pg_amop.h
index 03af581df4..7f7ffcfecb 100644
--- a/src/include/catalog/pg_amop.h
+++ b/src/include/catalog/pg_amop.h
@@ -795,6 +795,7 @@ DATA(insert ( 4017 25 25 2 s 2315 4000 0 ));
DATA(insert ( 4017 25 25 3 s 98 4000 0 ));
DATA(insert ( 4017 25 25 4 s 2317 4000 0 ));
DATA(insert ( 4017 25 25 5 s 2318 4000 0 ));
+DATA(insert ( 4017 25 25 28 s 315 4000 0 ));
DATA(insert ( 4017 25 25 11 s 664 4000 0 ));
DATA(insert ( 4017 25 25 12 s 665 4000 0 ));
DATA(insert ( 4017 25 25 14 s 667 4000 0 ));
diff --git a/src/include/catalog/pg_operator.h b/src/include/catalog/pg_operator.h
index e74f963eb5..414a967557 100644
--- a/src/include/catalog/pg_operator.h
+++ b/src/include/catalog/pg_operator.h
@@ -134,6 +134,8 @@ DESCR("less than");
DATA(insert OID = 98 ( "=" PGNSP PGUID b t t 25 25 16 98 531 texteq eqsel eqjoinsel ));
DESCR("equal");
#define TextEqualOperator 98
+DATA(insert OID = 315 ( "^@" PGNSP PGUID b f f 25 25 16 0 0 text_startswith prefixsel prefixjoinsel ));
+DESCR("starts with");
DATA(insert OID = 349 ( "||" PGNSP PGUID b f f 2277 2283 2277 0 0 array_append - - ));
DESCR("append element onto end of array");
diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h
index 0fdb42f639..4e62efbb46 100644
--- a/src/include/catalog/pg_proc.h
+++ b/src/include/catalog/pg_proc.h
@@ -209,6 +209,7 @@ DATA(insert OID = 64 ( int2lt PGNSP PGUID 12 1 0 0 0 f f t t f i s 2 0 16
DATA(insert OID = 65 ( int4eq PGNSP PGUID 12 1 0 0 0 f f t t f i s 2 0 16 "23 23" _null_ _null_ _null_ _null_ _null_ int4eq _null_ _null_ _null_ ));
DATA(insert OID = 66 ( int4lt PGNSP PGUID 12 1 0 0 0 f f t t f i s 2 0 16 "23 23" _null_ _null_ _null_ _null_ _null_ int4lt _null_ _null_ _null_ ));
DATA(insert OID = 67 ( texteq PGNSP PGUID 12 1 0 0 0 f f t t f i s 2 0 16 "25 25" _null_ _null_ _null_ _null_ _null_ texteq _null_ _null_ _null_ ));
+DATA(insert OID = 3450 ( text_startswith PGNSP PGUID 12 1 0 0 0 f f t t f i s 2 0 16 "25 25" _null_ _null_ _null_ _null_ _null_ text_startswith _null_ _null_ _null_ ));
DATA(insert OID = 68 ( xideq PGNSP PGUID 12 1 0 0 0 f f t t f i s 2 0 16 "28 28" _null_ _null_ _null_ _null_ _null_ xideq _null_ _null_ _null_ ));
DATA(insert OID = 3308 ( xidneq PGNSP PGUID 12 1 0 0 0 f f t t f i s 2 0 16 "28 28" _null_ _null_ _null_ _null_ _null_ xidneq _null_ _null_ _null_ ));
DATA(insert OID = 69 ( cideq PGNSP PGUID 12 1 0 0 0 f f t t f i s 2 0 16 "29 29" _null_ _null_ _null_ _null_ _null_ cideq _null_ _null_ _null_ ));
@@ -2568,6 +2569,10 @@ DATA(insert OID = 1828 ( nlikejoinsel PGNSP PGUID 12 1 0 0 0 f f f t f s s 5 0
DESCR("join selectivity of NOT LIKE");
DATA(insert OID = 1829 ( icregexnejoinsel PGNSP PGUID 12 1 0 0 0 f f f t f s s 5 0 701 "2281 26 2281 21 2281" _null_ _null_ _null_ _null_ _null_ icregexnejoinsel _null_ _null_ _null_ ));
DESCR("join selectivity of case-insensitive regex non-match");
+DATA(insert OID = 2579 ( prefixsel PGNSP PGUID 12 1 0 0 0 f f f t f s s 4 0 701 "2281 26 2281 23" _null_ _null_ _null_ _null_ _null_ prefixsel _null_ _null_ _null_ ));
+DESCR("restriction selectivity of exact prefix");
+DATA(insert OID = 2580 ( prefixjoinsel PGNSP PGUID 12 1 0 0 0 f f f t f s s 5 0 701 "2281 26 2281 21 2281" _null_ _null_ _null_ _null_ _null_ prefixjoinsel _null_ _null_ _null_ ));
+DESCR("join selectivity of exact prefix");
/* Aggregate-related functions */
DATA(insert OID = 1830 ( float8_avg PGNSP PGUID 12 1 0 0 0 f f f t f i s 1 0 701 "1022" _null_ _null_ _null_ _null_ _null_ float8_avg _null_ _null_ _null_ ));
diff --git a/src/include/utils/selfuncs.h b/src/include/utils/selfuncs.h
index 299c9f846a..95e44280c4 100644
--- a/src/include/utils/selfuncs.h
+++ b/src/include/utils/selfuncs.h
@@ -87,8 +87,11 @@ typedef struct VariableStatData
typedef enum
{
- Pattern_Type_Like, Pattern_Type_Like_IC,
- Pattern_Type_Regex, Pattern_Type_Regex_IC
+ Pattern_Type_Like,
+ Pattern_Type_Like_IC,
+ Pattern_Type_Regex,
+ Pattern_Type_Regex_IC,
+ Pattern_Type_Prefix
} Pattern_Type;
typedef enum
diff --git a/src/test/regress/expected/create_index.out b/src/test/regress/expected/create_index.out
index 057faff2e5..6d32d4cb5b 100644
--- a/src/test/regress/expected/create_index.out
+++ b/src/test/regress/expected/create_index.out
@@ -372,6 +372,12 @@ SELECT count(*) FROM radix_text_tbl WHERE t ~>~ 'Worth
48
(1 row)
+SELECT count(*) FROM radix_text_tbl WHERE t ^@ 'Worth';
+ count
+-------
+ 2
+(1 row)
+
SELECT * FROM gpolygon_tbl ORDER BY f1 <-> '(0,0)'::point LIMIT 10;
f1
-------------------------------------------------
@@ -1182,6 +1188,21 @@ SELECT count(*) FROM radix_text_tbl WHERE t ~>~ 'Worth
48
(1 row)
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM radix_text_tbl WHERE t ^@ 'Worth';
+ QUERY PLAN
+------------------------------------------------------------
+ Aggregate
+ -> Index Only Scan using sp_radix_ind on radix_text_tbl
+ Index Cond: (t ^@ 'Worth'::text)
+(3 rows)
+
+SELECT count(*) FROM radix_text_tbl WHERE t ^@ 'Worth';
+ count
+-------
+ 2
+(1 row)
+
EXPLAIN (COSTS OFF)
SELECT * FROM gpolygon_tbl ORDER BY f1 <-> '(0,0)'::point LIMIT 10;
QUERY PLAN
diff --git a/src/test/regress/expected/opr_sanity.out b/src/test/regress/expected/opr_sanity.out
index 01608d2c04..919248cdd2 100644
--- a/src/test/regress/expected/opr_sanity.out
+++ b/src/test/regress/expected/opr_sanity.out
@@ -718,6 +718,7 @@ sha224(bytea)
sha256(bytea)
sha384(bytea)
sha512(bytea)
+text_startswith(text,text)
macaddr8_eq(macaddr8,macaddr8)
macaddr8_lt(macaddr8,macaddr8)
macaddr8_le(macaddr8,macaddr8)
@@ -1887,7 +1888,8 @@ ORDER BY 1, 2, 3;
4000 | 25 | <<=
4000 | 26 | >>
4000 | 27 | >>=
-(121 rows)
+ 4000 | 28 | ^@
+(122 rows)
-- Check that all opclass search operators have selectivity estimators.
-- This is not absolutely required, but it seems a reasonable thing
diff --git a/src/test/regress/expected/spgist.out b/src/test/regress/expected/spgist.out
index 2d75bbf8dc..0011634b76 100644
--- a/src/test/regress/expected/spgist.out
+++ b/src/test/regress/expected/spgist.out
@@ -32,6 +32,15 @@ insert into spgist_text_tbl (id, t)
select g, 'f' || repeat('o', 100) || g from generate_series(1, 10000) g
union all
select g, 'baaaaaaaaaaaaaar' || g from generate_series(1, 1000) g;
+explain (costs off) select * from spgist_text_tbl where t ^@ 'baaaaaaaaaaaaaar11';
+ QUERY PLAN
+-------------------------------------------------------
+ Bitmap Heap Scan on spgist_text_tbl
+ Recheck Cond: (t ^@ 'baaaaaaaaaaaaaar11'::text)
+ -> Bitmap Index Scan on spgist_text_idx
+ Index Cond: (t ^@ 'baaaaaaaaaaaaaar11'::text)
+(4 rows)
+
-- Do a lot of insertions that have to split an existing node. Hopefully
-- one of these will cause the page to run out of space, causing the inner
-- tuple to be moved to another page.
diff --git a/src/test/regress/sql/create_index.sql b/src/test/regress/sql/create_index.sql
index 7f17588b0d..28c71abd74 100644
--- a/src/test/regress/sql/create_index.sql
+++ b/src/test/regress/sql/create_index.sql
@@ -224,6 +224,8 @@ SELECT count(*) FROM radix_text_tbl WHERE t > 'Worth
SELECT count(*) FROM radix_text_tbl WHERE t ~>~ 'Worth St ';
+SELECT count(*) FROM radix_text_tbl WHERE t ^@ 'Worth';
+
SELECT * FROM gpolygon_tbl ORDER BY f1 <-> '(0,0)'::point LIMIT 10;
SELECT circle_center(f1), round(radius(f1)) as radius FROM gcircle_tbl ORDER BY f1 <-> '(200,300)'::point LIMIT 10;
@@ -441,6 +443,10 @@ EXPLAIN (COSTS OFF)
SELECT count(*) FROM radix_text_tbl WHERE t ~>~ 'Worth St ';
SELECT count(*) FROM radix_text_tbl WHERE t ~>~ 'Worth St ';
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM radix_text_tbl WHERE t ^@ 'Worth';
+SELECT count(*) FROM radix_text_tbl WHERE t ^@ 'Worth';
+
EXPLAIN (COSTS OFF)
SELECT * FROM gpolygon_tbl ORDER BY f1 <-> '(0,0)'::point LIMIT 10;
SELECT * FROM gpolygon_tbl ORDER BY f1 <-> '(0,0)'::point LIMIT 10;
diff --git a/src/test/regress/sql/spgist.sql b/src/test/regress/sql/spgist.sql
index 77b43a2d3e..2b57fc836c 100644
--- a/src/test/regress/sql/spgist.sql
+++ b/src/test/regress/sql/spgist.sql
@@ -43,6 +43,8 @@ select g, 'f' || repeat('o', 100) || g from generate_series(1, 10000) g
union all
select g, 'baaaaaaaaaaaaaar' || g from generate_series(1, 1000) g;
+explain (costs off) select * from spgist_text_tbl where t ^@ 'baaaaaaaaaaaaaar11';
+
-- Do a lot of insertions that have to split an existing node. Hopefully
-- one of these will cause the page to run out of space, causing the inner
-- tuple to be moved to another page.
Hi!
Patch looks resonable, but I see some place to improvement:
spg_text_leaf_consistent() only needs to check with text_startswith() if
reconstucted value came to leaf consistent is shorter than given prefix. For
example, if level >= length of prefix then we guarantee that fully reconstracted
is matched too. But do not miss that you may need to return value for index only
scan, consult returnData field
In attachment rebased and minorly edited version of your patch.
--
Teodor Sigaev E-mail: teodor@sigaev.ru
WWW: http://www.sigaev.ru/
Attachments:
prefix_operator_and_spgist_v4.patchtext/x-patch; name=prefix_operator_and_spgist_v4.patchDownload
diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml
index 2f59af25a6..4dc11d8df2 100644
--- a/doc/src/sgml/func.sgml
+++ b/doc/src/sgml/func.sgml
@@ -2274,6 +2274,21 @@
<entry><literal>ph</literal></entry>
</row>
+ <row>
+ <entry>
+ <indexterm>
+ <primary>text_startswith</primary>
+ </indexterm>
+ <literal><function>text_startswith(<parameter>string</parameter>, <parameter>prefix</parameter>)</function></literal>
+ </entry>
+ <entry><type>bool</type></entry>
+ <entry>
+ Returns true if <parameter>string</parameter> starts with <parameter>prefix</parameter>.
+ </entry>
+ <entry><literal>text_startswith('alphabet', 'alph')</literal></entry>
+ <entry><literal>t</literal></entry>
+ </row>
+
<row>
<entry>
<indexterm>
@@ -4033,6 +4048,12 @@ cast(-44 as bit(12)) <lineannotation>111111010100</lineannotation>
ILIKE</function>, respectively. All of these operators are
<productname>PostgreSQL</productname>-specific.
</para>
+
+ <para>
+ There is also the prefix operator <literal>^@</literal> and corresponding
+ <function>text_startswith</function> function which covers cases when only
+ searching by beginning of the string is needed.
+ </para>
</sect2>
diff --git a/doc/src/sgml/spgist.sgml b/doc/src/sgml/spgist.sgml
index e47f70be89..06b7519052 100644
--- a/doc/src/sgml/spgist.sgml
+++ b/doc/src/sgml/spgist.sgml
@@ -161,6 +161,7 @@
<literal>~<~</literal>
<literal>~>=~</literal>
<literal>~>~</literal>
+ <literal>^@</literal>
</entry>
</row>
<row>
diff --git a/src/backend/access/spgist/spgtextproc.c b/src/backend/access/spgist/spgtextproc.c
index f156b2166e..f92ff68a5f 100644
--- a/src/backend/access/spgist/spgtextproc.c
+++ b/src/backend/access/spgist/spgtextproc.c
@@ -496,7 +496,7 @@ spg_text_inner_consistent(PG_FUNCTION_ARGS)
* well end with a partial multibyte character, so that applying
* any encoding-sensitive test to it would be risky anyhow.)
*/
- if (strategy > 10)
+ if (strategy > 10 && strategy != RTPrefixStrategyNumber)
{
if (collate_is_c)
strategy -= 10;
@@ -526,6 +526,10 @@ spg_text_inner_consistent(PG_FUNCTION_ARGS)
if (r < 0)
res = false;
break;
+ case RTPrefixStrategyNumber:
+ if (r != 0)
+ res = false;
+ break;
default:
elog(ERROR, "unrecognized strategy number: %d",
in->scankeys[j].sk_strategy);
@@ -601,10 +605,25 @@ spg_text_leaf_consistent(PG_FUNCTION_ARGS)
for (j = 0; j < in->nkeys; j++)
{
StrategyNumber strategy = in->scankeys[j].sk_strategy;
- text *query = DatumGetTextPP(in->scankeys[j].sk_argument);
- int queryLen = VARSIZE_ANY_EXHDR(query);
+ text *query;
+ int queryLen;
int r;
+ /* for this strategy collation is not important */
+ if (strategy == RTPrefixStrategyNumber)
+ {
+ res = DatumGetBool(DirectFunctionCall2(text_startswith,
+ out->leafValue, in->scankeys[j].sk_argument));
+
+ if (res) /* no need to consider remaining conditions */
+ break;
+
+ continue;
+ }
+
+ query = DatumGetTextPP(in->scankeys[j].sk_argument);
+ queryLen = VARSIZE_ANY_EXHDR(query);
+
if (strategy > 10)
{
/* Collation-aware comparison */
diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c
index bf240aa9c5..de3ace442a 100644
--- a/src/backend/utils/adt/selfuncs.c
+++ b/src/backend/utils/adt/selfuncs.c
@@ -1204,7 +1204,6 @@ patternsel(PG_FUNCTION_ARGS, Pattern_Type ptype, bool negate)
Oid vartype;
Oid opfamily;
Pattern_Prefix_Status pstatus;
- Const *patt;
Const *prefix = NULL;
Selectivity rest_selec = 0;
double nullfrac = 0.0;
@@ -1316,9 +1315,21 @@ patternsel(PG_FUNCTION_ARGS, Pattern_Type ptype, bool negate)
* but because we want to be sure we cache compiled regexps under the
* right cache key, so that they can be re-used at runtime.
*/
- patt = (Const *) other;
- pstatus = pattern_fixed_prefix(patt, ptype, collation,
- &prefix, &rest_selec);
+ if (ptype == Pattern_Type_Prefix)
+ {
+ char *s = TextDatumGetCString(constval);
+ prefix = string_to_const(s, vartype);
+ pstatus = Pattern_Prefix_Partial;
+ rest_selec = 1.0; /* all */
+ pfree(s);
+ }
+ else
+ {
+ Const *patt = (Const *) other;
+
+ pstatus = pattern_fixed_prefix(patt, ptype, collation,
+ &prefix, &rest_selec);
+ }
/*
* If necessary, coerce the prefix constant to the right type.
@@ -1488,6 +1499,16 @@ likesel(PG_FUNCTION_ARGS)
}
/*
+ * prefixsel - selectivity of prefix operator
+ */
+Datum
+prefixsel(PG_FUNCTION_ARGS)
+{
+ PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Prefix, false));
+}
+
+/*
+ *
* iclikesel - Selectivity of ILIKE pattern match.
*/
Datum
@@ -2906,6 +2927,15 @@ likejoinsel(PG_FUNCTION_ARGS)
PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Like, false));
}
+/*
+ * prefixjoinsel - Join selectivity of prefix operator
+ */
+Datum
+prefixjoinsel(PG_FUNCTION_ARGS)
+{
+ PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Prefix, false));
+}
+
/*
* iclikejoinsel - Join selectivity of ILIKE pattern match.
*/
diff --git a/src/backend/utils/adt/varlena.c b/src/backend/utils/adt/varlena.c
index 4346410d5a..cb721e9a57 100644
--- a/src/backend/utils/adt/varlena.c
+++ b/src/backend/utils/adt/varlena.c
@@ -1761,6 +1761,34 @@ text_ge(PG_FUNCTION_ARGS)
PG_RETURN_BOOL(result);
}
+Datum
+text_startswith(PG_FUNCTION_ARGS)
+{
+ Datum arg1 = PG_GETARG_DATUM(0);
+ Datum arg2 = PG_GETARG_DATUM(1);
+ bool result;
+ Size len1,
+ len2;
+
+ len1 = toast_raw_datum_size(arg1);
+ len2 = toast_raw_datum_size(arg2);
+ if (len2 > len1)
+ result = false;
+ else
+ {
+ text *targ1 = DatumGetTextPP(arg1);
+ text *targ2 = DatumGetTextPP(arg2);
+
+ result = (memcmp(VARDATA_ANY(targ1), VARDATA_ANY(targ2),
+ len2 - VARHDRSZ) == 0);
+
+ PG_FREE_IF_COPY(targ1, 0);
+ PG_FREE_IF_COPY(targ2, 1);
+ }
+
+ PG_RETURN_BOOL(result);
+}
+
Datum
bttextcmp(PG_FUNCTION_ARGS)
{
diff --git a/src/include/access/stratnum.h b/src/include/access/stratnum.h
index bddfac4c10..0db11a1117 100644
--- a/src/include/access/stratnum.h
+++ b/src/include/access/stratnum.h
@@ -68,8 +68,9 @@ typedef uint16 StrategyNumber;
#define RTSubEqualStrategyNumber 25 /* for inet <<= */
#define RTSuperStrategyNumber 26 /* for inet << */
#define RTSuperEqualStrategyNumber 27 /* for inet >>= */
+#define RTPrefixStrategyNumber 28 /* for text ^@ */
-#define RTMaxStrategyNumber 27
+#define RTMaxStrategyNumber 28
#endif /* STRATNUM_H */
diff --git a/src/include/catalog/pg_amop.h b/src/include/catalog/pg_amop.h
index 03af581df4..a01033b405 100644
--- a/src/include/catalog/pg_amop.h
+++ b/src/include/catalog/pg_amop.h
@@ -795,6 +795,7 @@ DATA(insert ( 4017 25 25 2 s 2315 4000 0 ));
DATA(insert ( 4017 25 25 3 s 98 4000 0 ));
DATA(insert ( 4017 25 25 4 s 2317 4000 0 ));
DATA(insert ( 4017 25 25 5 s 2318 4000 0 ));
+DATA(insert ( 4017 25 25 28 s 3449 4000 0 ));
DATA(insert ( 4017 25 25 11 s 664 4000 0 ));
DATA(insert ( 4017 25 25 12 s 665 4000 0 ));
DATA(insert ( 4017 25 25 14 s 667 4000 0 ));
diff --git a/src/include/catalog/pg_operator.h b/src/include/catalog/pg_operator.h
index e74f963eb5..4f7f4812fe 100644
--- a/src/include/catalog/pg_operator.h
+++ b/src/include/catalog/pg_operator.h
@@ -134,6 +134,8 @@ DESCR("less than");
DATA(insert OID = 98 ( "=" PGNSP PGUID b t t 25 25 16 98 531 texteq eqsel eqjoinsel ));
DESCR("equal");
#define TextEqualOperator 98
+DATA(insert OID = 3449 ( "^@" PGNSP PGUID b f f 25 25 16 0 0 text_startswith prefixsel prefixjoinsel ));
+DESCR("starts with");
DATA(insert OID = 349 ( "||" PGNSP PGUID b f f 2277 2283 2277 0 0 array_append - - ));
DESCR("append element onto end of array");
diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h
index bfc90098f8..a99a628be9 100644
--- a/src/include/catalog/pg_proc.h
+++ b/src/include/catalog/pg_proc.h
@@ -209,6 +209,7 @@ DATA(insert OID = 64 ( int2lt PGNSP PGUID 12 1 0 0 0 f f t t f i s 2 0 16
DATA(insert OID = 65 ( int4eq PGNSP PGUID 12 1 0 0 0 f f t t f i s 2 0 16 "23 23" _null_ _null_ _null_ _null_ _null_ int4eq _null_ _null_ _null_ ));
DATA(insert OID = 66 ( int4lt PGNSP PGUID 12 1 0 0 0 f f t t f i s 2 0 16 "23 23" _null_ _null_ _null_ _null_ _null_ int4lt _null_ _null_ _null_ ));
DATA(insert OID = 67 ( texteq PGNSP PGUID 12 1 0 0 0 f f t t f i s 2 0 16 "25 25" _null_ _null_ _null_ _null_ _null_ texteq _null_ _null_ _null_ ));
+DATA(insert OID = 3450 ( text_startswith PGNSP PGUID 12 1 0 0 0 f f t t f i s 2 0 16 "25 25" _null_ _null_ _null_ _null_ _null_ text_startswith _null_ _null_ _null_ ));
DATA(insert OID = 68 ( xideq PGNSP PGUID 12 1 0 0 0 f f t t f i s 2 0 16 "28 28" _null_ _null_ _null_ _null_ _null_ xideq _null_ _null_ _null_ ));
DATA(insert OID = 3308 ( xidneq PGNSP PGUID 12 1 0 0 0 f f t t f i s 2 0 16 "28 28" _null_ _null_ _null_ _null_ _null_ xidneq _null_ _null_ _null_ ));
DATA(insert OID = 69 ( cideq PGNSP PGUID 12 1 0 0 0 f f t t f i s 2 0 16 "29 29" _null_ _null_ _null_ _null_ _null_ cideq _null_ _null_ _null_ ));
@@ -2568,6 +2569,10 @@ DATA(insert OID = 1828 ( nlikejoinsel PGNSP PGUID 12 1 0 0 0 f f f t f s s 5 0
DESCR("join selectivity of NOT LIKE");
DATA(insert OID = 1829 ( icregexnejoinsel PGNSP PGUID 12 1 0 0 0 f f f t f s s 5 0 701 "2281 26 2281 21 2281" _null_ _null_ _null_ _null_ _null_ icregexnejoinsel _null_ _null_ _null_ ));
DESCR("join selectivity of case-insensitive regex non-match");
+DATA(insert OID = 2579 ( prefixsel PGNSP PGUID 12 1 0 0 0 f f f t f s s 4 0 701 "2281 26 2281 23" _null_ _null_ _null_ _null_ _null_ prefixsel _null_ _null_ _null_ ));
+DESCR("restriction selectivity of exact prefix");
+DATA(insert OID = 2580 ( prefixjoinsel PGNSP PGUID 12 1 0 0 0 f f f t f s s 5 0 701 "2281 26 2281 21 2281" _null_ _null_ _null_ _null_ _null_ prefixjoinsel _null_ _null_ _null_ ));
+DESCR("join selectivity of exact prefix");
/* Aggregate-related functions */
DATA(insert OID = 1830 ( float8_avg PGNSP PGUID 12 1 0 0 0 f f f t f i s 1 0 701 "1022" _null_ _null_ _null_ _null_ _null_ float8_avg _null_ _null_ _null_ ));
diff --git a/src/include/utils/selfuncs.h b/src/include/utils/selfuncs.h
index 299c9f846a..95e44280c4 100644
--- a/src/include/utils/selfuncs.h
+++ b/src/include/utils/selfuncs.h
@@ -87,8 +87,11 @@ typedef struct VariableStatData
typedef enum
{
- Pattern_Type_Like, Pattern_Type_Like_IC,
- Pattern_Type_Regex, Pattern_Type_Regex_IC
+ Pattern_Type_Like,
+ Pattern_Type_Like_IC,
+ Pattern_Type_Regex,
+ Pattern_Type_Regex_IC,
+ Pattern_Type_Prefix
} Pattern_Type;
typedef enum
diff --git a/src/test/regress/expected/create_index.out b/src/test/regress/expected/create_index.out
index 057faff2e5..6d32d4cb5b 100644
--- a/src/test/regress/expected/create_index.out
+++ b/src/test/regress/expected/create_index.out
@@ -372,6 +372,12 @@ SELECT count(*) FROM radix_text_tbl WHERE t ~>~ 'Worth
48
(1 row)
+SELECT count(*) FROM radix_text_tbl WHERE t ^@ 'Worth';
+ count
+-------
+ 2
+(1 row)
+
SELECT * FROM gpolygon_tbl ORDER BY f1 <-> '(0,0)'::point LIMIT 10;
f1
-------------------------------------------------
@@ -1182,6 +1188,21 @@ SELECT count(*) FROM radix_text_tbl WHERE t ~>~ 'Worth
48
(1 row)
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM radix_text_tbl WHERE t ^@ 'Worth';
+ QUERY PLAN
+------------------------------------------------------------
+ Aggregate
+ -> Index Only Scan using sp_radix_ind on radix_text_tbl
+ Index Cond: (t ^@ 'Worth'::text)
+(3 rows)
+
+SELECT count(*) FROM radix_text_tbl WHERE t ^@ 'Worth';
+ count
+-------
+ 2
+(1 row)
+
EXPLAIN (COSTS OFF)
SELECT * FROM gpolygon_tbl ORDER BY f1 <-> '(0,0)'::point LIMIT 10;
QUERY PLAN
diff --git a/src/test/regress/expected/opr_sanity.out b/src/test/regress/expected/opr_sanity.out
index 01608d2c04..919248cdd2 100644
--- a/src/test/regress/expected/opr_sanity.out
+++ b/src/test/regress/expected/opr_sanity.out
@@ -718,6 +718,7 @@ sha224(bytea)
sha256(bytea)
sha384(bytea)
sha512(bytea)
+text_startswith(text,text)
macaddr8_eq(macaddr8,macaddr8)
macaddr8_lt(macaddr8,macaddr8)
macaddr8_le(macaddr8,macaddr8)
@@ -1887,7 +1888,8 @@ ORDER BY 1, 2, 3;
4000 | 25 | <<=
4000 | 26 | >>
4000 | 27 | >>=
-(121 rows)
+ 4000 | 28 | ^@
+(122 rows)
-- Check that all opclass search operators have selectivity estimators.
-- This is not absolutely required, but it seems a reasonable thing
diff --git a/src/test/regress/expected/spgist.out b/src/test/regress/expected/spgist.out
index 2d75bbf8dc..0011634b76 100644
--- a/src/test/regress/expected/spgist.out
+++ b/src/test/regress/expected/spgist.out
@@ -32,6 +32,15 @@ insert into spgist_text_tbl (id, t)
select g, 'f' || repeat('o', 100) || g from generate_series(1, 10000) g
union all
select g, 'baaaaaaaaaaaaaar' || g from generate_series(1, 1000) g;
+explain (costs off) select * from spgist_text_tbl where t ^@ 'baaaaaaaaaaaaaar11';
+ QUERY PLAN
+-------------------------------------------------------
+ Bitmap Heap Scan on spgist_text_tbl
+ Recheck Cond: (t ^@ 'baaaaaaaaaaaaaar11'::text)
+ -> Bitmap Index Scan on spgist_text_idx
+ Index Cond: (t ^@ 'baaaaaaaaaaaaaar11'::text)
+(4 rows)
+
-- Do a lot of insertions that have to split an existing node. Hopefully
-- one of these will cause the page to run out of space, causing the inner
-- tuple to be moved to another page.
diff --git a/src/test/regress/sql/create_index.sql b/src/test/regress/sql/create_index.sql
index 7f17588b0d..28c71abd74 100644
--- a/src/test/regress/sql/create_index.sql
+++ b/src/test/regress/sql/create_index.sql
@@ -224,6 +224,8 @@ SELECT count(*) FROM radix_text_tbl WHERE t > 'Worth
SELECT count(*) FROM radix_text_tbl WHERE t ~>~ 'Worth St ';
+SELECT count(*) FROM radix_text_tbl WHERE t ^@ 'Worth';
+
SELECT * FROM gpolygon_tbl ORDER BY f1 <-> '(0,0)'::point LIMIT 10;
SELECT circle_center(f1), round(radius(f1)) as radius FROM gcircle_tbl ORDER BY f1 <-> '(200,300)'::point LIMIT 10;
@@ -441,6 +443,10 @@ EXPLAIN (COSTS OFF)
SELECT count(*) FROM radix_text_tbl WHERE t ~>~ 'Worth St ';
SELECT count(*) FROM radix_text_tbl WHERE t ~>~ 'Worth St ';
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM radix_text_tbl WHERE t ^@ 'Worth';
+SELECT count(*) FROM radix_text_tbl WHERE t ^@ 'Worth';
+
EXPLAIN (COSTS OFF)
SELECT * FROM gpolygon_tbl ORDER BY f1 <-> '(0,0)'::point LIMIT 10;
SELECT * FROM gpolygon_tbl ORDER BY f1 <-> '(0,0)'::point LIMIT 10;
diff --git a/src/test/regress/sql/spgist.sql b/src/test/regress/sql/spgist.sql
index 77b43a2d3e..2b57fc836c 100644
--- a/src/test/regress/sql/spgist.sql
+++ b/src/test/regress/sql/spgist.sql
@@ -43,6 +43,8 @@ select g, 'f' || repeat('o', 100) || g from generate_series(1, 10000) g
union all
select g, 'baaaaaaaaaaaaaar' || g from generate_series(1, 1000) g;
+explain (costs off) select * from spgist_text_tbl where t ^@ 'baaaaaaaaaaaaaar11';
+
-- Do a lot of insertions that have to split an existing node. Hopefully
-- one of these will cause the page to run out of space, causing the inner
-- tuple to be moved to another page.
On Thu, Mar 22, 2018 at 7:09 PM, Teodor Sigaev <teodor@sigaev.ru> wrote:
Patch looks resonable, but I see some place to improvement:
spg_text_leaf_consistent() only needs to check with text_startswith() if
reconstucted value came to leaf consistent is shorter than given prefix.
For example, if level >= length of prefix then we guarantee that fully
reconstracted is matched too. But do not miss that you may need to return
value for index only scan, consult returnData fieldIn attachment rebased and minorly edited version of your patch.
I took a look at this patch. In addition to Teodor's comments I can note
following.
* This line looks strange for me.
- if (strategy > 10)
+ if (strategy > 10 && strategy != RTPrefixStrategyNumber)
It's not because we added strategy != RTPrefixStrategyNumber condition
there.
It's because we already used magic number here and now have a mix of magic
number and macro constant in one line. Once we anyway touch this place,
could we get rid of magic numbers here?
* I'm a little concern about operator name. We're going to choose @^
operator for
prefix search without any preliminary discussion. However, personally I
don't
have better ideas :)
------
Alexander Korotkov
Postgres Professional: http://www.postgrespro.com
The Russian Postgres Company
On Fri, 23 Mar 2018 11:45:33 +0300
Alexander Korotkov <a.korotkov@postgrespro.ru> wrote:
On Thu, Mar 22, 2018 at 7:09 PM, Teodor Sigaev <teodor@sigaev.ru>
wrote:Patch looks resonable, but I see some place to improvement:
spg_text_leaf_consistent() only needs to check with
text_startswith() if reconstucted value came to leaf consistent is
shorter than given prefix. For example, if level >= length of
prefix then we guarantee that fully reconstracted is matched too.
But do not miss that you may need to return value for index only
scan, consult returnData fieldIn attachment rebased and minorly edited version of your patch.
I took a look at this patch. In addition to Teodor's comments I can
note following.* This line looks strange for me.
- if (strategy > 10) + if (strategy > 10 && strategy != RTPrefixStrategyNumber)It's not because we added strategy != RTPrefixStrategyNumber condition
there.
It's because we already used magic number here and now have a mix of
magic number and macro constant in one line. Once we anyway touch
this place, could we get rid of magic numbers here?* I'm a little concern about operator name. We're going to choose @^
operator for
prefix search without any preliminary discussion. However,
personally I don't
have better ideas :)
Teodor, Alexander, thanks for review. In new version I have added the
optimization in spgist using level variable and also got rid of magic
numbers.
About the operator it's actually ^@ (not @^ :)), I thought about it and
don't really have any idea what operator can be used instead.
Attached version 5 of the patch.
--
---
Ildus Kurbangaliev
Postgres Professional: http://www.postgrespro.com
Russian Postgres Company
Attachments:
prefix_operator_and_spgist_v5.patchtext/x-patchDownload
diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml
index 9d1772f349..a1b4724a88 100644
--- a/doc/src/sgml/func.sgml
+++ b/doc/src/sgml/func.sgml
@@ -2274,6 +2274,21 @@
<entry><literal>ph</literal></entry>
</row>
+ <row>
+ <entry>
+ <indexterm>
+ <primary>text_startswith</primary>
+ </indexterm>
+ <literal><function>text_startswith(<parameter>string</parameter>, <parameter>prefix</parameter>)</function></literal>
+ </entry>
+ <entry><type>bool</type></entry>
+ <entry>
+ Returns true if <parameter>string</parameter> starts with <parameter>prefix</parameter>.
+ </entry>
+ <entry><literal>text_startswith('alphabet', 'alph')</literal></entry>
+ <entry><literal>t</literal></entry>
+ </row>
+
<row>
<entry>
<indexterm>
@@ -4033,6 +4048,12 @@ cast(-44 as bit(12)) <lineannotation>111111010100</lineannotation>
ILIKE</function>, respectively. All of these operators are
<productname>PostgreSQL</productname>-specific.
</para>
+
+ <para>
+ There is also the prefix operator <literal>^@</literal> and corresponding
+ <function>text_startswith</function> function which covers cases when only
+ searching by beginning of the string is needed.
+ </para>
</sect2>
diff --git a/doc/src/sgml/spgist.sgml b/doc/src/sgml/spgist.sgml
index e47f70be89..06b7519052 100644
--- a/doc/src/sgml/spgist.sgml
+++ b/doc/src/sgml/spgist.sgml
@@ -161,6 +161,7 @@
<literal>~<~</literal>
<literal>~>=~</literal>
<literal>~>~</literal>
+ <literal>^@</literal>
</entry>
</row>
<row>
diff --git a/src/backend/access/spgist/spgtextproc.c b/src/backend/access/spgist/spgtextproc.c
index f156b2166e..c5e27dd7aa 100644
--- a/src/backend/access/spgist/spgtextproc.c
+++ b/src/backend/access/spgist/spgtextproc.c
@@ -67,6 +67,20 @@
*/
#define SPGIST_MAX_PREFIX_LENGTH Max((int) (BLCKSZ - 258 * 16 - 100), 32)
+/*
+ * Strategy for collation aware operator on text is equal to btree strategy
+ * plus value of 10.
+ *
+ * Current collation aware strategies and their corresponding btree strategies:
+ * 11 BTLessStrategyNumber
+ * 12 BTLessEqualStrategyNumber
+ * 14 BTGreaterEqualStrategyNumber
+ * 15 BTGreaterStrategyNumber
+ */
+#define SPG_STRATEGY_ADDITION (10)
+#define SPG_IS_COLLATION_AWARE_STRATEGY(s) ((s) > SPG_STRATEGY_ADDITION \
+ && (s) != RTPrefixStrategyNumber)
+
/* Struct for sorting values in picksplit */
typedef struct spgNodePtr
{
@@ -496,10 +510,10 @@ spg_text_inner_consistent(PG_FUNCTION_ARGS)
* well end with a partial multibyte character, so that applying
* any encoding-sensitive test to it would be risky anyhow.)
*/
- if (strategy > 10)
+ if (SPG_IS_COLLATION_AWARE_STRATEGY(strategy))
{
if (collate_is_c)
- strategy -= 10;
+ strategy -= SPG_STRATEGY_ADDITION;
else
continue;
}
@@ -526,6 +540,10 @@ spg_text_inner_consistent(PG_FUNCTION_ARGS)
if (r < 0)
res = false;
break;
+ case RTPrefixStrategyNumber:
+ if (r != 0)
+ res = false;
+ break;
default:
elog(ERROR, "unrecognized strategy number: %d",
in->scankeys[j].sk_strategy);
@@ -605,10 +623,31 @@ spg_text_leaf_consistent(PG_FUNCTION_ARGS)
int queryLen = VARSIZE_ANY_EXHDR(query);
int r;
- if (strategy > 10)
+ if (strategy == RTPrefixStrategyNumber)
+ {
+ if (!in->returnData && level >= queryLen)
+ {
+ /*
+ * If we got to the leaf when level is equal or longer than
+ * query then it is a prefix. We skip cases when returnData is
+ * true, it would mean then leaf could be visited anyway.
+ */
+ continue;
+ }
+
+ res = DatumGetBool(DirectFunctionCall2(text_startswith,
+ out->leafValue, PointerGetDatum(query)));
+
+ if (!res) /* no need to consider remaining conditions */
+ break;
+
+ continue;
+ }
+
+ if (SPG_IS_COLLATION_AWARE_STRATEGY(strategy))
{
/* Collation-aware comparison */
- strategy -= 10;
+ strategy -= SPG_STRATEGY_ADDITION;
/* If asserts enabled, verify encoding of reconstructed string */
Assert(pg_verifymbstr(fullValue, fullLen, false));
diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c
index bf240aa9c5..2bde1d48e9 100644
--- a/src/backend/utils/adt/selfuncs.c
+++ b/src/backend/utils/adt/selfuncs.c
@@ -1204,7 +1204,6 @@ patternsel(PG_FUNCTION_ARGS, Pattern_Type ptype, bool negate)
Oid vartype;
Oid opfamily;
Pattern_Prefix_Status pstatus;
- Const *patt;
Const *prefix = NULL;
Selectivity rest_selec = 0;
double nullfrac = 0.0;
@@ -1307,18 +1306,28 @@ patternsel(PG_FUNCTION_ARGS, Pattern_Type ptype, bool negate)
nullfrac = stats->stanullfrac;
}
- /*
- * Pull out any fixed prefix implied by the pattern, and estimate the
- * fractional selectivity of the remainder of the pattern. Unlike many of
- * the other functions in this file, we use the pattern operator's actual
- * collation for this step. This is not because we expect the collation
- * to make a big difference in the selectivity estimate (it seldom would),
- * but because we want to be sure we cache compiled regexps under the
- * right cache key, so that they can be re-used at runtime.
- */
- patt = (Const *) other;
- pstatus = pattern_fixed_prefix(patt, ptype, collation,
- &prefix, &rest_selec);
+ if (ptype == Pattern_Type_Prefix)
+ {
+ Assert(consttype == TEXTOID);
+ pstatus = Pattern_Prefix_Partial;
+ rest_selec = 1.0; /* all */
+ prefix = (Const *) other;
+ }
+ else
+ {
+ /*
+ * Pull out any fixed prefix implied by the pattern, and estimate the
+ * fractional selectivity of the remainder of the pattern. Unlike
+ * many of the other functions in this file, we use the pattern
+ * operator's actual collation for this step. This is not because
+ * we expect the collation to make a big difference in the selectivity
+ * estimate (it seldom would), but because we want to be sure we cache
+ * compiled regexps under the right cache key, so that they can be
+ * re-used at runtime.
+ */
+ pstatus = pattern_fixed_prefix((Const *) other, ptype, collation,
+ &prefix, &rest_selec);
+ }
/*
* If necessary, coerce the prefix constant to the right type.
@@ -1334,7 +1343,7 @@ patternsel(PG_FUNCTION_ARGS, Pattern_Type ptype, bool negate)
break;
case BYTEAOID:
prefixstr = DatumGetCString(DirectFunctionCall1(byteaout,
- prefix->constvalue));
+ prefix->constvalue));
break;
default:
elog(ERROR, "unrecognized consttype: %u",
@@ -1449,7 +1458,7 @@ patternsel(PG_FUNCTION_ARGS, Pattern_Type ptype, bool negate)
/* result should be in range, but make sure... */
CLAMP_PROBABILITY(result);
- if (prefix)
+ if (prefix && prefix->constvalue != constval)
{
pfree(DatumGetPointer(prefix->constvalue));
pfree(prefix);
@@ -1488,6 +1497,16 @@ likesel(PG_FUNCTION_ARGS)
}
/*
+ * prefixsel - selectivity of prefix operator
+ */
+Datum
+prefixsel(PG_FUNCTION_ARGS)
+{
+ PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Prefix, false));
+}
+
+/*
+ *
* iclikesel - Selectivity of ILIKE pattern match.
*/
Datum
@@ -2906,6 +2925,15 @@ likejoinsel(PG_FUNCTION_ARGS)
PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Like, false));
}
+/*
+ * prefixjoinsel - Join selectivity of prefix operator
+ */
+Datum
+prefixjoinsel(PG_FUNCTION_ARGS)
+{
+ PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Prefix, false));
+}
+
/*
* iclikejoinsel - Join selectivity of ILIKE pattern match.
*/
diff --git a/src/backend/utils/adt/varlena.c b/src/backend/utils/adt/varlena.c
index 4346410d5a..cb721e9a57 100644
--- a/src/backend/utils/adt/varlena.c
+++ b/src/backend/utils/adt/varlena.c
@@ -1761,6 +1761,34 @@ text_ge(PG_FUNCTION_ARGS)
PG_RETURN_BOOL(result);
}
+Datum
+text_startswith(PG_FUNCTION_ARGS)
+{
+ Datum arg1 = PG_GETARG_DATUM(0);
+ Datum arg2 = PG_GETARG_DATUM(1);
+ bool result;
+ Size len1,
+ len2;
+
+ len1 = toast_raw_datum_size(arg1);
+ len2 = toast_raw_datum_size(arg2);
+ if (len2 > len1)
+ result = false;
+ else
+ {
+ text *targ1 = DatumGetTextPP(arg1);
+ text *targ2 = DatumGetTextPP(arg2);
+
+ result = (memcmp(VARDATA_ANY(targ1), VARDATA_ANY(targ2),
+ len2 - VARHDRSZ) == 0);
+
+ PG_FREE_IF_COPY(targ1, 0);
+ PG_FREE_IF_COPY(targ2, 1);
+ }
+
+ PG_RETURN_BOOL(result);
+}
+
Datum
bttextcmp(PG_FUNCTION_ARGS)
{
diff --git a/src/include/access/stratnum.h b/src/include/access/stratnum.h
index bddfac4c10..0db11a1117 100644
--- a/src/include/access/stratnum.h
+++ b/src/include/access/stratnum.h
@@ -68,8 +68,9 @@ typedef uint16 StrategyNumber;
#define RTSubEqualStrategyNumber 25 /* for inet <<= */
#define RTSuperStrategyNumber 26 /* for inet << */
#define RTSuperEqualStrategyNumber 27 /* for inet >>= */
+#define RTPrefixStrategyNumber 28 /* for text ^@ */
-#define RTMaxStrategyNumber 27
+#define RTMaxStrategyNumber 28
#endif /* STRATNUM_H */
diff --git a/src/include/catalog/pg_amop.h b/src/include/catalog/pg_amop.h
index 03af581df4..8eb74a21ca 100644
--- a/src/include/catalog/pg_amop.h
+++ b/src/include/catalog/pg_amop.h
@@ -799,6 +799,7 @@ DATA(insert ( 4017 25 25 11 s 664 4000 0 ));
DATA(insert ( 4017 25 25 12 s 665 4000 0 ));
DATA(insert ( 4017 25 25 14 s 667 4000 0 ));
DATA(insert ( 4017 25 25 15 s 666 4000 0 ));
+DATA(insert ( 4017 25 25 28 s 3449 4000 0 ));
/*
* btree jsonb_ops
diff --git a/src/include/catalog/pg_operator.h b/src/include/catalog/pg_operator.h
index e74f963eb5..4f7f4812fe 100644
--- a/src/include/catalog/pg_operator.h
+++ b/src/include/catalog/pg_operator.h
@@ -134,6 +134,8 @@ DESCR("less than");
DATA(insert OID = 98 ( "=" PGNSP PGUID b t t 25 25 16 98 531 texteq eqsel eqjoinsel ));
DESCR("equal");
#define TextEqualOperator 98
+DATA(insert OID = 3449 ( "^@" PGNSP PGUID b f f 25 25 16 0 0 text_startswith prefixsel prefixjoinsel ));
+DESCR("starts with");
DATA(insert OID = 349 ( "||" PGNSP PGUID b f f 2277 2283 2277 0 0 array_append - - ));
DESCR("append element onto end of array");
diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h
index bfc90098f8..d672b33cc9 100644
--- a/src/include/catalog/pg_proc.h
+++ b/src/include/catalog/pg_proc.h
@@ -209,6 +209,7 @@ DATA(insert OID = 64 ( int2lt PGNSP PGUID 12 1 0 0 0 f f t t f i s 2 0 16
DATA(insert OID = 65 ( int4eq PGNSP PGUID 12 1 0 0 0 f f t t f i s 2 0 16 "23 23" _null_ _null_ _null_ _null_ _null_ int4eq _null_ _null_ _null_ ));
DATA(insert OID = 66 ( int4lt PGNSP PGUID 12 1 0 0 0 f f t t f i s 2 0 16 "23 23" _null_ _null_ _null_ _null_ _null_ int4lt _null_ _null_ _null_ ));
DATA(insert OID = 67 ( texteq PGNSP PGUID 12 1 0 0 0 f f t t f i s 2 0 16 "25 25" _null_ _null_ _null_ _null_ _null_ texteq _null_ _null_ _null_ ));
+DATA(insert OID = 3450 ( text_startswith PGNSP PGUID 12 1 0 0 0 f f t t f i s 2 0 16 "25 25" _null_ _null_ _null_ _null_ _null_ text_startswith _null_ _null_ _null_ ));
DATA(insert OID = 68 ( xideq PGNSP PGUID 12 1 0 0 0 f f t t f i s 2 0 16 "28 28" _null_ _null_ _null_ _null_ _null_ xideq _null_ _null_ _null_ ));
DATA(insert OID = 3308 ( xidneq PGNSP PGUID 12 1 0 0 0 f f t t f i s 2 0 16 "28 28" _null_ _null_ _null_ _null_ _null_ xidneq _null_ _null_ _null_ ));
DATA(insert OID = 69 ( cideq PGNSP PGUID 12 1 0 0 0 f f t t f i s 2 0 16 "29 29" _null_ _null_ _null_ _null_ _null_ cideq _null_ _null_ _null_ ));
@@ -2568,6 +2569,10 @@ DATA(insert OID = 1828 ( nlikejoinsel PGNSP PGUID 12 1 0 0 0 f f f t f s s 5 0
DESCR("join selectivity of NOT LIKE");
DATA(insert OID = 1829 ( icregexnejoinsel PGNSP PGUID 12 1 0 0 0 f f f t f s s 5 0 701 "2281 26 2281 21 2281" _null_ _null_ _null_ _null_ _null_ icregexnejoinsel _null_ _null_ _null_ ));
DESCR("join selectivity of case-insensitive regex non-match");
+DATA(insert OID = 3437 ( prefixsel PGNSP PGUID 12 1 0 0 0 f f f t f s s 4 0 701 "2281 26 2281 23" _null_ _null_ _null_ _null_ _null_ prefixsel _null_ _null_ _null_ ));
+DESCR("restriction selectivity of exact prefix");
+DATA(insert OID = 3438 ( prefixjoinsel PGNSP PGUID 12 1 0 0 0 f f f t f s s 5 0 701 "2281 26 2281 21 2281" _null_ _null_ _null_ _null_ _null_ prefixjoinsel _null_ _null_ _null_ ));
+DESCR("join selectivity of exact prefix");
/* Aggregate-related functions */
DATA(insert OID = 1830 ( float8_avg PGNSP PGUID 12 1 0 0 0 f f f t f i s 1 0 701 "1022" _null_ _null_ _null_ _null_ _null_ float8_avg _null_ _null_ _null_ ));
diff --git a/src/include/utils/selfuncs.h b/src/include/utils/selfuncs.h
index 299c9f846a..95e44280c4 100644
--- a/src/include/utils/selfuncs.h
+++ b/src/include/utils/selfuncs.h
@@ -87,8 +87,11 @@ typedef struct VariableStatData
typedef enum
{
- Pattern_Type_Like, Pattern_Type_Like_IC,
- Pattern_Type_Regex, Pattern_Type_Regex_IC
+ Pattern_Type_Like,
+ Pattern_Type_Like_IC,
+ Pattern_Type_Regex,
+ Pattern_Type_Regex_IC,
+ Pattern_Type_Prefix
} Pattern_Type;
typedef enum
diff --git a/src/test/regress/expected/create_index.out b/src/test/regress/expected/create_index.out
index 057faff2e5..09757c5a74 100644
--- a/src/test/regress/expected/create_index.out
+++ b/src/test/regress/expected/create_index.out
@@ -372,6 +372,12 @@ SELECT count(*) FROM radix_text_tbl WHERE t ~>~ 'Worth
48
(1 row)
+SELECT count(*) FROM radix_text_tbl WHERE t ^@ 'Worth';
+ count
+-------
+ 2
+(1 row)
+
SELECT * FROM gpolygon_tbl ORDER BY f1 <-> '(0,0)'::point LIMIT 10;
f1
-------------------------------------------------
@@ -1182,6 +1188,21 @@ SELECT count(*) FROM radix_text_tbl WHERE t ~>~ 'Worth
48
(1 row)
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM radix_text_tbl WHERE t ^@ 'Worth';
+ QUERY PLAN
+------------------------------------------------------------
+ Aggregate
+ -> Index Only Scan using sp_radix_ind on radix_text_tbl
+ Index Cond: (t ^@ 'Worth'::text)
+(3 rows)
+
+SELECT count(*) FROM radix_text_tbl WHERE t ^@ 'Worth';
+ count
+-------
+ 2
+(1 row)
+
EXPLAIN (COSTS OFF)
SELECT * FROM gpolygon_tbl ORDER BY f1 <-> '(0,0)'::point LIMIT 10;
QUERY PLAN
@@ -1763,6 +1784,23 @@ SELECT count(*) FROM radix_text_tbl WHERE t ~>~ 'Worth
48
(1 row)
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM radix_text_tbl WHERE t ^@ 'Worth';
+ QUERY PLAN
+------------------------------------------------
+ Aggregate
+ -> Bitmap Heap Scan on radix_text_tbl
+ Recheck Cond: (t ^@ 'Worth'::text)
+ -> Bitmap Index Scan on sp_radix_ind
+ Index Cond: (t ^@ 'Worth'::text)
+(5 rows)
+
+SELECT count(*) FROM radix_text_tbl WHERE t ^@ 'Worth';
+ count
+-------
+ 2
+(1 row)
+
RESET enable_seqscan;
RESET enable_indexscan;
RESET enable_bitmapscan;
diff --git a/src/test/regress/expected/opr_sanity.out b/src/test/regress/expected/opr_sanity.out
index 01608d2c04..919248cdd2 100644
--- a/src/test/regress/expected/opr_sanity.out
+++ b/src/test/regress/expected/opr_sanity.out
@@ -718,6 +718,7 @@ sha224(bytea)
sha256(bytea)
sha384(bytea)
sha512(bytea)
+text_startswith(text,text)
macaddr8_eq(macaddr8,macaddr8)
macaddr8_lt(macaddr8,macaddr8)
macaddr8_le(macaddr8,macaddr8)
@@ -1887,7 +1888,8 @@ ORDER BY 1, 2, 3;
4000 | 25 | <<=
4000 | 26 | >>
4000 | 27 | >>=
-(121 rows)
+ 4000 | 28 | ^@
+(122 rows)
-- Check that all opclass search operators have selectivity estimators.
-- This is not absolutely required, but it seems a reasonable thing
diff --git a/src/test/regress/sql/create_index.sql b/src/test/regress/sql/create_index.sql
index 7f17588b0d..c9671a4e13 100644
--- a/src/test/regress/sql/create_index.sql
+++ b/src/test/regress/sql/create_index.sql
@@ -224,6 +224,8 @@ SELECT count(*) FROM radix_text_tbl WHERE t > 'Worth
SELECT count(*) FROM radix_text_tbl WHERE t ~>~ 'Worth St ';
+SELECT count(*) FROM radix_text_tbl WHERE t ^@ 'Worth';
+
SELECT * FROM gpolygon_tbl ORDER BY f1 <-> '(0,0)'::point LIMIT 10;
SELECT circle_center(f1), round(radius(f1)) as radius FROM gcircle_tbl ORDER BY f1 <-> '(200,300)'::point LIMIT 10;
@@ -441,6 +443,10 @@ EXPLAIN (COSTS OFF)
SELECT count(*) FROM radix_text_tbl WHERE t ~>~ 'Worth St ';
SELECT count(*) FROM radix_text_tbl WHERE t ~>~ 'Worth St ';
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM radix_text_tbl WHERE t ^@ 'Worth';
+SELECT count(*) FROM radix_text_tbl WHERE t ^@ 'Worth';
+
EXPLAIN (COSTS OFF)
SELECT * FROM gpolygon_tbl ORDER BY f1 <-> '(0,0)'::point LIMIT 10;
SELECT * FROM gpolygon_tbl ORDER BY f1 <-> '(0,0)'::point LIMIT 10;
@@ -578,6 +584,10 @@ EXPLAIN (COSTS OFF)
SELECT count(*) FROM radix_text_tbl WHERE t ~>~ 'Worth St ';
SELECT count(*) FROM radix_text_tbl WHERE t ~>~ 'Worth St ';
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM radix_text_tbl WHERE t ^@ 'Worth';
+SELECT count(*) FROM radix_text_tbl WHERE t ^@ 'Worth';
+
RESET enable_seqscan;
RESET enable_indexscan;
RESET enable_bitmapscan;
Thank you, pushed with some editorization and renaming text_startswith to
starts_with
Ildus Kurbangaliev wrote:
On Fri, 23 Mar 2018 11:45:33 +0300
Alexander Korotkov <a.korotkov@postgrespro.ru> wrote:On Thu, Mar 22, 2018 at 7:09 PM, Teodor Sigaev <teodor@sigaev.ru>
wrote:Patch looks resonable, but I see some place to improvement:
spg_text_leaf_consistent() only needs to check with
text_startswith() if reconstucted value came to leaf consistent is
shorter than given prefix. For example, if level >= length of
prefix then we guarantee that fully reconstracted is matched too.
But do not miss that you may need to return value for index only
scan, consult returnData fieldIn attachment rebased and minorly edited version of your patch.
I took a look at this patch. In addition to Teodor's comments I can
note following.* This line looks strange for me.
- if (strategy > 10) + if (strategy > 10 && strategy != RTPrefixStrategyNumber)It's not because we added strategy != RTPrefixStrategyNumber condition
there.
It's because we already used magic number here and now have a mix of
magic number and macro constant in one line. Once we anyway touch
this place, could we get rid of magic numbers here?* I'm a little concern about operator name. We're going to choose @^
operator for
prefix search without any preliminary discussion. However,
personally I don't
have better ideas :)Teodor, Alexander, thanks for review. In new version I have added the
optimization in spgist using level variable and also got rid of magic
numbers.About the operator it's actually ^@ (not @^ :)), I thought about it and
don't really have any idea what operator can be used instead.Attached version 5 of the patch.
--
Teodor Sigaev E-mail: teodor@sigaev.ru
WWW: http://www.sigaev.ru/
Thank you, pushed with some editorization and renaming text_startswith to
starts_with
I am sorry for not noticing this before, but what is the point of this
operator? It seems to me we are only making the prefix searching
business, which is already complicated, more complicated.
Also, the new operator is not documented on SQL String Functions and
Operators table. It is not supported by btree text_pattern_ops or
btree indexes with COLLATE "C". It is not defined for "citext", so
people would get wrong results. It doesn't use pg_trgm indexes
whereas LIKE can.
On Mon, 16 Apr 2018 12:45:23 +0200
Emre Hasegeli <emre@hasegeli.com> wrote:
Thank you, pushed with some editorization and renaming
text_startswith to starts_withI am sorry for not noticing this before, but what is the point of this
operator? It seems to me we are only making the prefix searching
business, which is already complicated, more complicated.
Hi.
Also, the new operator is not documented on SQL String Functions and
Operators table. It is not supported by btree text_pattern_ops or
btree indexes with COLLATE "C". It is not defined for "citext", so
people would get wrong results. It doesn't use pg_trgm indexes
whereas LIKE can.
It is mentioned in documentation, look for "starts_with" function.
Currently it's working with spgist indexes which fact is pointed out in
the documentation too. I was going to add btree support but it would
require a new strategy so it will be matter of another patch. I think
this operator could be used in LIKE instead of current weird comparison
operators.
--
---
Ildus Kurbangaliev
Postgres Professional: http://www.postgrespro.com
Russian Postgres Company