From 0a1879892d21f7df1cb11e6c98c559609faf41a7 Mon Sep 17 00:00:00 2001 From: Mark Dilger Date: Sun, 22 Aug 2021 14:51:37 -0700 Subject: [PATCH v1] Distinguishing regular expression backref errors To make debugging regular expressions easier, use a different error message for backreferences within lookaround assertions than for backreferences with no valid referent. The error message "invalid backreference number" was being used for both, which is confusing when the backreference number is valid. --- src/backend/regex/regcomp.c | 2 +- src/include/regex/regerrs.h | 4 +++ src/include/regex/regex.h | 29 ++++++++++--------- .../test_regex/expected/test_regex.out | 4 +-- .../modules/test_regex/sql/test_regex.sql | 2 +- src/test/regress/expected/regex.out | 4 +-- 6 files changed, 25 insertions(+), 20 deletions(-) diff --git a/src/backend/regex/regcomp.c b/src/backend/regex/regcomp.c index ae3a7b6a38..9a9b57e307 100644 --- a/src/backend/regex/regcomp.c +++ b/src/backend/regex/regcomp.c @@ -1013,7 +1013,7 @@ parseqatom(struct vars *v, /* postpone everything else pending possible {0} */ break; case BACKREF: /* the Feature From The Black Lagoon */ - INSIST(type != LACON, REG_ESUBREG); + INSIST(type != LACON, REG_ENOBREF); subno = v->nextvalue; assert(subno > 0); INSIST(subno < v->nsubs, REG_ESUBREG); diff --git a/src/include/regex/regerrs.h b/src/include/regex/regerrs.h index 41e25f7ff0..09c3f6f41c 100644 --- a/src/include/regex/regerrs.h +++ b/src/include/regex/regerrs.h @@ -30,6 +30,10 @@ REG_ESUBREG, "REG_ESUBREG", "invalid backreference number" }, +{ + REG_ENOBREF, "REG_ENOBREF", "backreference in lookaround assertion" +}, + { REG_EBRACK, "REG_EBRACK", "brackets [] not balanced" }, diff --git a/src/include/regex/regex.h b/src/include/regex/regex.h index 0455ae8069..d38ef4ab6a 100644 --- a/src/include/regex/regex.h +++ b/src/include/regex/regex.h @@ -143,20 +143,21 @@ typedef struct #define REG_ECTYPE 4 /* invalid character class */ #define REG_EESCAPE 5 /* invalid escape \ sequence */ #define REG_ESUBREG 6 /* invalid backreference number */ -#define REG_EBRACK 7 /* brackets [] not balanced */ -#define REG_EPAREN 8 /* parentheses () not balanced */ -#define REG_EBRACE 9 /* braces {} not balanced */ -#define REG_BADBR 10 /* invalid repetition count(s) */ -#define REG_ERANGE 11 /* invalid character range */ -#define REG_ESPACE 12 /* out of memory */ -#define REG_BADRPT 13 /* quantifier operand invalid */ -#define REG_ASSERT 15 /* "can't happen" -- you found a bug */ -#define REG_INVARG 16 /* invalid argument to regex function */ -#define REG_MIXED 17 /* character widths of regex and string differ */ -#define REG_BADOPT 18 /* invalid embedded option */ -#define REG_ETOOBIG 19 /* regular expression is too complex */ -#define REG_ECOLORS 20 /* too many colors */ -#define REG_CANCEL 21 /* operation cancelled */ +#define REG_ENOBREF 7 /* backreference in lookaround assertion */ +#define REG_EBRACK 8 /* brackets [] not balanced */ +#define REG_EPAREN 9 /* parentheses () not balanced */ +#define REG_EBRACE 10 /* braces {} not balanced */ +#define REG_BADBR 11 /* invalid repetition count(s) */ +#define REG_ERANGE 12 /* invalid character range */ +#define REG_ESPACE 13 /* out of memory */ +#define REG_BADRPT 15 /* quantifier operand invalid */ +#define REG_ASSERT 16 /* "can't happen" -- you found a bug */ +#define REG_INVARG 17 /* invalid argument to regex function */ +#define REG_MIXED 18 /* character widths of regex and string differ */ +#define REG_BADOPT 19 /* invalid embedded option */ +#define REG_ETOOBIG 20 /* regular expression is too complex */ +#define REG_ECOLORS 21 /* too many colors */ +#define REG_CANCEL 22 /* operation cancelled */ /* two specials for debugging and testing */ #define REG_ATOI 101 /* convert error-code name to number */ #define REG_ITOA 102 /* convert error-code number to name */ diff --git a/src/test/modules/test_regex/expected/test_regex.out b/src/test/modules/test_regex/expected/test_regex.out index 6242d0baa9..20cbd5108d 100644 --- a/src/test/modules/test_regex/expected/test_regex.out +++ b/src/test/modules/test_regex/expected/test_regex.out @@ -507,9 +507,9 @@ select * from test_regex('?', '', '-'); ERROR: invalid regular expression: quantifier operand invalid -- These two are not yet incorporated in Tcl, cf -- https://core.tcl-lang.org/tcl/tktview?name=5ea71fdcd3291c38 --- expectError 6.21 - {x(\w)(?=(\1))} ESUBREG +-- expectError 6.21 - {x(\w)(?=(\1))} ENOBREF select * from test_regex('x(\w)(?=(\1))', '', '-'); -ERROR: invalid regular expression: invalid backreference number +ERROR: invalid regular expression: backreference in lookaround assertion -- expectMatch 6.22 HP {x(?=((foo)))} xfoo x select * from test_regex('x(?=((foo)))', 'xfoo', 'HP'); test_regex diff --git a/src/test/modules/test_regex/sql/test_regex.sql b/src/test/modules/test_regex/sql/test_regex.sql index 389b8b61b3..bfaec8865f 100644 --- a/src/test/modules/test_regex/sql/test_regex.sql +++ b/src/test/modules/test_regex/sql/test_regex.sql @@ -172,7 +172,7 @@ select * from test_regex('?', '', '-'); -- These two are not yet incorporated in Tcl, cf -- https://core.tcl-lang.org/tcl/tktview?name=5ea71fdcd3291c38 --- expectError 6.21 - {x(\w)(?=(\1))} ESUBREG +-- expectError 6.21 - {x(\w)(?=(\1))} ENOBREF select * from test_regex('x(\w)(?=(\1))', '', '-'); -- expectMatch 6.22 HP {x(?=((foo)))} xfoo x select * from test_regex('x(?=((foo)))', 'xfoo', 'HP'); diff --git a/src/test/regress/expected/regex.out b/src/test/regress/expected/regex.out index 86477cc506..7074d876ae 100644 --- a/src/test/regress/expected/regex.out +++ b/src/test/regress/expected/regex.out @@ -606,8 +606,8 @@ select regexp_match('foo', '(?:.|){99}'); -- Error conditions select 'xyz' ~ 'x(\w)(?=\1)'; -- no backrefs in LACONs -ERROR: invalid regular expression: invalid backreference number +ERROR: invalid regular expression: backreference in lookaround assertion select 'xyz' ~ 'x(\w)(?=(\1))'; -ERROR: invalid regular expression: invalid backreference number +ERROR: invalid regular expression: backreference in lookaround assertion select 'a' ~ '\x7fffffff'; -- invalid chr code ERROR: invalid regular expression: invalid escape \ sequence -- 2.21.1 (Apple Git-122.3)