diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml
index 08f08322ca..34a338fb21 100644
--- a/doc/src/sgml/func.sgml
+++ b/doc/src/sgml/func.sgml
@@ -3140,6 +3140,29 @@ repeat('Pg', 4) PgPgPgPg
+
+
+ regexp_positions
+
+ regexp_positions ( string text, pattern text [, flags text ] )
+ setof int4range[]
+
+
+ Returns start and end positions of captured substring(s) resulting from matching a POSIX regular
+ expression to the string; see
+ .
+
+
+ regexp_positions('foobarbequebaz', 'ba.', 'g')
+
+
+ {"[4,7)"}
+ {"[12,15)"}
+
+
+
+
+
regexp_replace
diff --git a/src/backend/utils/adt/regexp.c b/src/backend/utils/adt/regexp.c
index a32c5c82ab..fde3d1b80f 100644
--- a/src/backend/utils/adt/regexp.c
+++ b/src/backend/utils/adt/regexp.c
@@ -37,6 +37,7 @@
#include "utils/builtins.h"
#include "utils/memutils.h"
#include "utils/varlena.h"
+#include "utils/rangetypes.h"
#define PG_GETARG_TEXT_PP_IF_EXISTS(_n) \
(PG_NARGS() > (_n) ? PG_GETARG_TEXT_PP(_n) : NULL)
@@ -118,6 +119,7 @@ static regexp_matches_ctx *setup_regexp_matches(text *orig_str, text *pattern,
bool ignore_degenerate,
bool fetching_unmatched);
static ArrayType *build_regexp_match_result(regexp_matches_ctx *matchctx);
+static ArrayType *build_regexp_positions_result(regexp_matches_ctx *matchctx);
static Datum build_regexp_split_result(regexp_matches_ctx *splitctx);
@@ -1056,6 +1058,58 @@ regexp_matches(PG_FUNCTION_ARGS)
SRF_RETURN_DONE(funcctx);
}
+/*
+ * regexp_positions()
+ * Return a table of ranges where a pattern matches within a string.
+ */
+Datum
+regexp_positions(PG_FUNCTION_ARGS)
+{
+ FuncCallContext *funcctx;
+ regexp_matches_ctx *matchctx;
+
+ if (SRF_IS_FIRSTCALL())
+ {
+ text *pattern = PG_GETARG_TEXT_PP(1);
+ text *flags = PG_GETARG_TEXT_PP_IF_EXISTS(2);
+ pg_re_flags re_flags;
+ MemoryContext oldcontext;
+
+ funcctx = SRF_FIRSTCALL_INIT();
+ oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
+
+ /* Determine options */
+ parse_re_flags(&re_flags, flags);
+
+ /* be sure to copy the input string into the multi-call ctx */
+ matchctx = setup_regexp_matches(PG_GETARG_TEXT_P_COPY(0), pattern,
+ &re_flags,
+ PG_GET_COLLATION(),
+ true, false, false);
+
+ /* Pre-create workspace that build_regexp_match_result needs */
+ matchctx->elems = (Datum *) palloc(sizeof(Datum) * matchctx->npatterns);
+ matchctx->nulls = (bool *) palloc(sizeof(bool) * matchctx->npatterns);
+
+ MemoryContextSwitchTo(oldcontext);
+ funcctx->user_fctx = (void *) matchctx;
+ }
+
+ funcctx = SRF_PERCALL_SETUP();
+ matchctx = (regexp_matches_ctx *) funcctx->user_fctx;
+
+ if (matchctx->next_match < matchctx->nmatches)
+ {
+ ArrayType *result_ary;
+
+ result_ary = build_regexp_positions_result(matchctx);
+ matchctx->next_match++;
+ SRF_RETURN_NEXT(funcctx, PointerGetDatum(result_ary));
+ }
+
+ SRF_RETURN_DONE(funcctx);
+}
+
/* This is separate to keep the opr_sanity regression test from complaining */
Datum
regexp_matches_no_flags(PG_FUNCTION_ARGS)
@@ -1063,6 +1117,13 @@ regexp_matches_no_flags(PG_FUNCTION_ARGS)
return regexp_matches(fcinfo);
}
+/* This is separate to keep the opr_sanity regression test from complaining */
+Datum
+regexp_positions_no_flags(PG_FUNCTION_ARGS)
+{
+ return regexp_positions(fcinfo);
+}
+
/*
* setup_regexp_matches --- do the initial matching for regexp_match
* and regexp_split functions
@@ -1332,6 +1393,64 @@ build_regexp_match_result(regexp_matches_ctx *matchctx)
TEXTOID, -1, false, TYPALIGN_INT);
}
+/*
+ * build_regexp_positions_result - build output array for current match
+ */
+static ArrayType *
+build_regexp_positions_result(regexp_matches_ctx *matchctx)
+{
+ Datum *elems = matchctx->elems;
+ bool *nulls = matchctx->nulls;
+ int dims[1];
+ int lbs[1];
+ int loc;
+ int i;
+ RangeType *range;
+ TypeCacheEntry *typcache;
+ RangeBound lower;
+ RangeBound upper;
+
+ typcache = lookup_type_cache(INT4RANGEOID, TYPECACHE_RANGE_INFO);
+
+ /* Extract matching substrings from the original string */
+ loc = matchctx->next_match * matchctx->npatterns * 2;
+ for (i = 0; i < matchctx->npatterns; i++)
+ {
+ int so = matchctx->match_locs[loc++];
+ int eo = matchctx->match_locs[loc++];
+
+ if (so < 0 || eo < 0)
+ {
+ elems[i] = (Datum) 0;
+ nulls[i] = true;
+ }
+ else
+ {
+ lower.val = Int32GetDatum(so + 1);
+ lower.infinite = false;
+ lower.inclusive = true;
+ lower.lower = true;
+
+ upper.val = Int32GetDatum(eo);
+ upper.infinite = false;
+ upper.inclusive = true;
+ upper.lower = false;
+
+ range = make_range(typcache, &lower, &upper, false);
+
+ elems[i] = RangeTypePGetDatum(range);
+ nulls[i] = false;
+ }
+ }
+
+ /* And form an array */
+ dims[0] = matchctx->npatterns;
+ lbs[0] = 1;
+ /* XXX: this hardcodes assumptions about the text type */
+ return construct_md_array(elems, nulls, 1, dims, lbs,
+ INT4RANGEOID, -1, false, TYPALIGN_INT);
+}
+
/*
* regexp_split_to_table()
* Split the string at matches of the pattern, returning the
diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat
index 1487710d59..e2e76935a0 100644
--- a/src/include/catalog/pg_proc.dat
+++ b/src/include/catalog/pg_proc.dat
@@ -3557,6 +3557,14 @@
proname => 'regexp_matches', prorows => '10', proretset => 't',
prorettype => '_text', proargtypes => 'text text text',
prosrc => 'regexp_matches' },
+{ oid => '8104', descr => 'find matching position(s) for regexp',
+ proname => 'regexp_positions', prorows => '10', proretset => 't',
+ prorettype => '_int4range', proargtypes => 'text text text',
+ prosrc => 'regexp_positions' },
+{ oid => '8105', descr => 'find matching position(s) for regexp',
+ proname => 'regexp_positions', prorows => '10', proretset => 't',
+ prorettype => '_int4range', proargtypes => 'text text',
+ prosrc => 'regexp_positions_no_flags' },
{ oid => '2088', descr => 'split string by field_sep and return field_num',
proname => 'split_part', prorettype => 'text',
proargtypes => 'text text int4', prosrc => 'split_part' },
diff --git a/src/test/regress/expected/strings.out b/src/test/regress/expected/strings.out
index fb4573d85f..5071165cd3 100644
--- a/src/test/regress/expected/strings.out
+++ b/src/test/regress/expected/strings.out
@@ -601,6 +601,12 @@ SELECT regexp_matches('foobarbequebaz', $re$(bar)(beque)$re$);
{bar,beque}
(1 row)
+SELECT regexp_positions('foobarbequebaz', $re$(bar)(beque)$re$);
+ regexp_positions
+--------------------
+ {"[4,7)","[7,12)"}
+(1 row)
+
-- test case insensitive
SELECT regexp_matches('foObARbEqUEbAz', $re$(bar)(beque)$re$, 'i');
regexp_matches
@@ -616,6 +622,13 @@ SELECT regexp_matches('foobarbequebazilbarfbonk', $re$(b[^b]+)(b[^b]+)$re$, 'g')
{bazil,barf}
(2 rows)
+SELECT regexp_positions('foobarbequebazilbarfbonk', $re$(b[^b]+)(b[^b]+)$re$, 'g');
+ regexp_positions
+-----------------------
+ {"[4,7)","[7,12)"}
+ {"[12,17)","[17,21)"}
+(2 rows)
+
-- empty capture group (matched empty string)
SELECT regexp_matches('foobarbequebaz', $re$(bar)(.*)(beque)$re$);
regexp_matches
diff --git a/src/test/regress/sql/strings.sql b/src/test/regress/sql/strings.sql
index 57a48c9d0b..aa8b0553f0 100644
--- a/src/test/regress/sql/strings.sql
+++ b/src/test/regress/sql/strings.sql
@@ -198,12 +198,14 @@ SELECT regexp_replace('AAA aaa', 'A+', 'Z', 'z');
-- return all matches from regexp
SELECT regexp_matches('foobarbequebaz', $re$(bar)(beque)$re$);
+SELECT regexp_positions('foobarbequebaz', $re$(bar)(beque)$re$);
-- test case insensitive
SELECT regexp_matches('foObARbEqUEbAz', $re$(bar)(beque)$re$, 'i');
-- global option - more than one match
SELECT regexp_matches('foobarbequebazilbarfbonk', $re$(b[^b]+)(b[^b]+)$re$, 'g');
+SELECT regexp_positions('foobarbequebazilbarfbonk', $re$(b[^b]+)(b[^b]+)$re$, 'g');
-- empty capture group (matched empty string)
SELECT regexp_matches('foobarbequebaz', $re$(bar)(.*)(beque)$re$);