diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml
index fee0561961..b761a49d4e 100644
--- a/doc/src/sgml/func.sgml
+++ b/doc/src/sgml/func.sgml
@@ -3140,6 +3140,31 @@ repeat('Pg', 4) PgPgPgPg
+
+
+ regexp_positions
+
+ regexp_positions ( string text, pattern text [, flags text ] )
+ setof record
+ ( start_pos integer[],
+ end_pos integer[] )
+
+
+ Returns start and end positions of captured substring(s) resulting from matching a POSIX regular
+ expression to the string; see
+ .
+
+
+ regexp_positions('foobarbequebaz', 'ba.', 'g')
+
+
+ ({3},{6})
+ ({11},{14})
+
+
+
+
+
regexp_replace
diff --git a/src/backend/utils/adt/regexp.c b/src/backend/utils/adt/regexp.c
index a32c5c82ab..af3564e3e9 100644
--- a/src/backend/utils/adt/regexp.c
+++ b/src/backend/utils/adt/regexp.c
@@ -118,6 +118,7 @@ static regexp_matches_ctx *setup_regexp_matches(text *orig_str, text *pattern,
bool ignore_degenerate,
bool fetching_unmatched);
static ArrayType *build_regexp_match_result(regexp_matches_ctx *matchctx);
+static Datum build_regexp_positions_result(regexp_matches_ctx *matchctx, TupleDesc tupdesc);
static Datum build_regexp_split_result(regexp_matches_ctx *splitctx);
@@ -1056,6 +1057,66 @@ regexp_matches(PG_FUNCTION_ARGS)
SRF_RETURN_DONE(funcctx);
}
+/*
+ * regexp_positions()
+ * Return a setof record of positions where a pattern matches within a string.
+ */
+Datum
+regexp_positions(PG_FUNCTION_ARGS)
+{
+ FuncCallContext *funcctx;
+ regexp_matches_ctx *matchctx;
+
+ if (SRF_IS_FIRSTCALL())
+ {
+ text *pattern = PG_GETARG_TEXT_PP(1);
+ text *flags = PG_GETARG_TEXT_PP_IF_EXISTS(2);
+ pg_re_flags re_flags;
+ MemoryContext oldcontext;
+ TupleDesc tupdesc;
+
+ funcctx = SRF_FIRSTCALL_INIT();
+ oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
+
+ tupdesc = CreateTemplateTupleDesc(2);
+ TupleDescInitEntry(tupdesc, (AttrNumber) 1, "start_pos",
+ INT4ARRAYOID, -1, 0);
+ TupleDescInitEntry(tupdesc, (AttrNumber) 2, "end_pos",
+ INT4ARRAYOID, -1, 0);
+ funcctx->tuple_desc = BlessTupleDesc(tupdesc);
+
+ /* Determine options */
+ parse_re_flags(&re_flags, flags);
+
+ /* be sure to copy the input string into the multi-call ctx */
+ matchctx = setup_regexp_matches(PG_GETARG_TEXT_P_COPY(0), pattern,
+ &re_flags,
+ PG_GET_COLLATION(),
+ true, false, false);
+
+ /* Pre-create workspace that build_regexp_positions_result needs */
+ matchctx->elems = (Datum *) palloc(sizeof(Datum) * matchctx->npatterns * 2);
+ matchctx->nulls = (bool *) palloc(sizeof(bool) * matchctx->npatterns);
+
+ MemoryContextSwitchTo(oldcontext);
+ funcctx->user_fctx = (void *) matchctx;
+ }
+
+ funcctx = SRF_PERCALL_SETUP();
+ matchctx = (regexp_matches_ctx *) funcctx->user_fctx;
+
+ if (matchctx->next_match < matchctx->nmatches)
+ {
+ Datum tuple;
+
+ tuple = build_regexp_positions_result(matchctx, funcctx->tuple_desc);
+ matchctx->next_match++;
+ SRF_RETURN_NEXT(funcctx, tuple);
+ }
+
+ SRF_RETURN_DONE(funcctx);
+}
+
/* This is separate to keep the opr_sanity regression test from complaining */
Datum
regexp_matches_no_flags(PG_FUNCTION_ARGS)
@@ -1063,6 +1124,13 @@ regexp_matches_no_flags(PG_FUNCTION_ARGS)
return regexp_matches(fcinfo);
}
+/* This is separate to keep the opr_sanity regression test from complaining */
+Datum
+regexp_positions_no_flags(PG_FUNCTION_ARGS)
+{
+ return regexp_positions(fcinfo);
+}
+
/*
* setup_regexp_matches --- do the initial matching for regexp_match
* and regexp_split functions
@@ -1332,6 +1400,59 @@ build_regexp_match_result(regexp_matches_ctx *matchctx)
TEXTOID, -1, false, TYPALIGN_INT);
}
+/*
+ * build_regexp_positions_result - build output array for current match
+ */
+static Datum
+build_regexp_positions_result(regexp_matches_ctx *matchctx, TupleDesc tupdesc)
+{
+ Datum *elems = matchctx->elems;
+ bool *nulls = matchctx->nulls;
+ int dims[1];
+ int lbs[1];
+ int loc;
+ int i;
+ ArrayType *so_ary;
+ ArrayType *eo_ary;
+ Datum values[2];
+ bool tuple_nulls[2] = {false, false};
+ HeapTuple tuple;
+
+ /* Extract matching positions from the original string */
+ loc = matchctx->next_match * matchctx->npatterns * 2;
+ for (i = 0; i < matchctx->npatterns; i++)
+ {
+ int so = matchctx->match_locs[loc++];
+ int eo = matchctx->match_locs[loc++];
+
+ if (so < 0 || eo < 0)
+ {
+ elems[i] = (Datum) 0;
+ elems[i + matchctx->npatterns] = (Datum) 0;
+ nulls[i] = true;
+ }
+ else
+ {
+ elems[i] = Int32GetDatum(so);
+ elems[i + matchctx->npatterns] = Int32GetDatum(eo);
+ nulls[i] = false;
+ }
+ }
+
+ /* And form two arrays */
+ dims[0] = matchctx->npatterns;
+ lbs[0] = 1;
+ so_ary = construct_md_array(elems, nulls, 1, dims, lbs,
+ INT4OID, 4, true, TYPALIGN_INT);
+ eo_ary = construct_md_array(elems + matchctx->npatterns, nulls, 1, dims, lbs,
+ INT4OID, 4, true, TYPALIGN_INT);
+ values[0] = PointerGetDatum(so_ary);
+ values[1] = PointerGetDatum(eo_ary);
+ tuple = heap_form_tuple(tupdesc, values, tuple_nulls);
+ return HeapTupleGetDatum(tuple);
+
+}
+
/*
* regexp_split_to_table()
* Split the string at matches of the pattern, returning the
diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat
index 59d2b71ca9..51ac045b70 100644
--- a/src/include/catalog/pg_proc.dat
+++ b/src/include/catalog/pg_proc.dat
@@ -3560,6 +3560,23 @@
proname => 'regexp_matches', prorows => '10', proretset => 't',
prorettype => '_text', proargtypes => 'text text text',
prosrc => 'regexp_matches' },
+
+{ oid => '8104', descr => 'find matching position(s) for regexp',
+ proname => 'regexp_positions', prorows => '10',
+ proretset => 't', prorettype => 'record',
+ proargtypes => 'text text', proallargtypes => '{text,text,_int4,_int4}',
+ proargmodes => '{i,i,o,o}',
+ proargnames => '{string,pattern,start_pos,end_pos}',
+ prosrc => 'regexp_positions_no_flags' },
+
+{ oid => '8105', descr => 'find matching position(s) for regexp',
+ proname => 'regexp_positions', prorows => '10',
+ proretset => 't', prorettype => 'record',
+ proargtypes => 'text text text', proallargtypes => '{text,text,text,_int4,_int4}',
+ proargmodes => '{i,i,i,o,o}',
+ proargnames => '{string,pattern,flags,start_pos,end_pos}',
+ prosrc => 'regexp_positions' },
+
{ oid => '2088', descr => 'split string by field_sep and return field_num',
proname => 'split_part', prorettype => 'text',
proargtypes => 'text text int4', prosrc => 'split_part' },
diff --git a/src/test/regress/expected/strings.out b/src/test/regress/expected/strings.out
index fb4573d85f..adcc2bea50 100644
--- a/src/test/regress/expected/strings.out
+++ b/src/test/regress/expected/strings.out
@@ -601,6 +601,12 @@ SELECT regexp_matches('foobarbequebaz', $re$(bar)(beque)$re$);
{bar,beque}
(1 row)
+SELECT regexp_positions('foobarbequebaz', $re$(bar)(beque)$re$);
+ regexp_positions
+--------------------
+ ("{3,6}","{6,11}")
+(1 row)
+
-- test case insensitive
SELECT regexp_matches('foObARbEqUEbAz', $re$(bar)(beque)$re$, 'i');
regexp_matches
@@ -616,6 +622,13 @@ SELECT regexp_matches('foobarbequebazilbarfbonk', $re$(b[^b]+)(b[^b]+)$re$, 'g')
{bazil,barf}
(2 rows)
+SELECT regexp_positions('foobarbequebazilbarfbonk', $re$(b[^b]+)(b[^b]+)$re$, 'g');
+ regexp_positions
+-----------------------
+ ("{3,6}","{6,11}")
+ ("{11,16}","{16,20}")
+(2 rows)
+
-- empty capture group (matched empty string)
SELECT regexp_matches('foobarbequebaz', $re$(bar)(.*)(beque)$re$);
regexp_matches
diff --git a/src/test/regress/sql/strings.sql b/src/test/regress/sql/strings.sql
index 57a48c9d0b..aa8b0553f0 100644
--- a/src/test/regress/sql/strings.sql
+++ b/src/test/regress/sql/strings.sql
@@ -198,12 +198,14 @@ SELECT regexp_replace('AAA aaa', 'A+', 'Z', 'z');
-- return all matches from regexp
SELECT regexp_matches('foobarbequebaz', $re$(bar)(beque)$re$);
+SELECT regexp_positions('foobarbequebaz', $re$(bar)(beque)$re$);
-- test case insensitive
SELECT regexp_matches('foObARbEqUEbAz', $re$(bar)(beque)$re$, 'i');
-- global option - more than one match
SELECT regexp_matches('foobarbequebazilbarfbonk', $re$(b[^b]+)(b[^b]+)$re$, 'g');
+SELECT regexp_positions('foobarbequebazilbarfbonk', $re$(b[^b]+)(b[^b]+)$re$, 'g');
-- empty capture group (matched empty string)
SELECT regexp_matches('foobarbequebaz', $re$(bar)(.*)(beque)$re$);