From df3f0c76eec2314ab7a23ef99d4e390d2456a1ce Mon Sep 17 00:00:00 2001
From: Laurenz Albe <laurenz.albe@cybertec.at>
Date: Thu, 22 May 2025 23:05:30 +0200
Subject: [PATCH v1] Fix SIMILAR TO regex translation

The code that translates SIMILAR TO pattern matching expressions
to regular expressions didn't consider that brackets can be nested,
as in [/_[:alpha:]@%], and replaced placeholders like _ and % where
it shouldn't.
---
 src/backend/utils/adt/regexp.c        | 19 +++++++++++++------
 src/test/regress/expected/strings.out | 12 ++++++++++++
 src/test/regress/sql/strings.sql      |  3 +++
 3 files changed, 28 insertions(+), 6 deletions(-)

diff --git a/src/backend/utils/adt/regexp.c b/src/backend/utils/adt/regexp.c
index edee1f7880b..a23c4c959e4 100644
--- a/src/backend/utils/adt/regexp.c
+++ b/src/backend/utils/adt/regexp.c
@@ -773,7 +773,8 @@ similar_escape_internal(text *pat_text, text *esc_text)
 	int			plen,
 				elen;
 	bool		afterescape = false;
-	bool		incharclass = false;
+	int			incharclass = 0;
+	bool		charclass_start = false;
 	int			nquotes = 0;
 
 	p = VARDATA_ANY(pat_text);
@@ -904,7 +905,7 @@ similar_escape_internal(text *pat_text, text *esc_text)
 		/* fast path */
 		if (afterescape)
 		{
-			if (pchar == '"' && !incharclass)	/* escape-double-quote? */
+			if (pchar == '"' && incharclass < 1)	/* escape-double-quote? */
 			{
 				/* emit appropriate part separator, per notes above */
 				if (nquotes == 0)
@@ -953,18 +954,24 @@ similar_escape_internal(text *pat_text, text *esc_text)
 			/* SQL escape character; do not send to output */
 			afterescape = true;
 		}
-		else if (incharclass)
+		else if (incharclass > 0)
 		{
 			if (pchar == '\\')
 				*r++ = '\\';
 			*r++ = pchar;
-			if (pchar == ']')
-				incharclass = false;
+			if (pchar == '[')
+				incharclass++;
+			else if (pchar == ']' && !charclass_start)
+				incharclass--;
+
+			if (pchar != '^' && charclass_start)
+				charclass_start = false;
 		}
 		else if (pchar == '[')
 		{
 			*r++ = pchar;
-			incharclass = true;
+			incharclass++;
+			charclass_start = true;
 		}
 		else if (pchar == '%')
 		{
diff --git a/src/test/regress/expected/strings.out b/src/test/regress/expected/strings.out
index 174f0a68331..aa0bddd682c 100644
--- a/src/test/regress/expected/strings.out
+++ b/src/test/regress/expected/strings.out
@@ -614,6 +614,18 @@ SELECT 'abcdefg' SIMILAR TO '_bcd%' ESCAPE NULL AS null;
 SELECT 'abcdefg' SIMILAR TO '_bcd#%' ESCAPE '##' AS error;
 ERROR:  invalid escape string
 HINT:  Escape string must be empty or one character.
+-- ".", "_" and "%" should be left alone in character classes
+EXPLAIN (VERBOSE, COSTS OFF)
+SELECT (SELECT '') SIMILAR TO '._[._[:alnum:]._]*._[]._]._[^].%].%';
+                                       QUERY PLAN                                       
+----------------------------------------------------------------------------------------
+ Result
+   Output: ((InitPlan 1).col1 ~ '^(?:\..[._[:alnum:]._]*\..[]._]\..[^].%]\..*)$'::text)
+   InitPlan 1
+     ->  Result
+           Output: ''::text
+(5 rows)
+
 -- Test backslash escapes in regexp_replace's replacement string
 SELECT regexp_replace('1112223333', E'(\\d{3})(\\d{3})(\\d{4})', E'(\\1) \\2-\\3');
  regexp_replace 
diff --git a/src/test/regress/sql/strings.sql b/src/test/regress/sql/strings.sql
index f7b325baadf..eb743d9e3af 100644
--- a/src/test/regress/sql/strings.sql
+++ b/src/test/regress/sql/strings.sql
@@ -196,6 +196,9 @@ SELECT 'abcd\efg' SIMILAR TO '_bcd\%' ESCAPE '' AS true;
 -- these behaviors are per spec, though:
 SELECT 'abcdefg' SIMILAR TO '_bcd%' ESCAPE NULL AS null;
 SELECT 'abcdefg' SIMILAR TO '_bcd#%' ESCAPE '##' AS error;
+-- ".", "_" and "%" should be left alone in character classes
+EXPLAIN (VERBOSE, COSTS OFF)
+SELECT (SELECT '') SIMILAR TO '._[._[:alnum:]._]*._[]._]._[^].%].%';
 
 -- Test backslash escapes in regexp_replace's replacement string
 SELECT regexp_replace('1112223333', E'(\\d{3})(\\d{3})(\\d{4})', E'(\\1) \\2-\\3');
-- 
2.49.0

