fix for BUG #3720: wrong results at using ltree

Started by Filip Rembiałkowskialmost 7 years ago25 messages

filip.rembialkowski@gmail.com

almost 7 years ago

1 attachment(s)

Hi all,

Here is my attempt to fix a 12-years old ltree bug (which is a todo item).

I see it's not backward-compatible, but in my understanding that's
what is documented. Previous behavior was inconsistent with
documentation (where single asterisk should match zero or more
labels).

http://archives.postgresql.org/pgsql-bugs/2007-11/msg00044.php

Attachments:

postgresql-ltree-exclam.20190305.patchtext/x-patch; charset=US-ASCII; name=postgresql-ltree-exclam.20190305.patchDownload

diff --git a/contrib/ltree/expected/ltree.out b/contrib/ltree/expected/ltree.out
index 8226930905..b1878a99b3 100644
--- a/contrib/ltree/expected/ltree.out
+++ b/contrib/ltree/expected/ltree.out
@@ -676,7 +676,7 @@ SELECT 'a.b.c.d.e'::ltree ~ '*.a.*.d.*';
 SELECT 'a.b.c.d.e'::ltree ~ '*.!d.*';
  ?column? 
 ----------
- f
+ t
 (1 row)
 
 SELECT 'a.b.c.d.e'::ltree ~ '*.!d';
@@ -706,7 +706,7 @@ SELECT 'a.b.c.d.e'::ltree ~ '*.!e';
 SELECT 'a.b.c.d.e'::ltree ~ '*.!e.*';
  ?column? 
 ----------
- f
+ t
 (1 row)
 
 SELECT 'a.b.c.d.e'::ltree ~ 'a.*.!e';
@@ -724,7 +724,7 @@ SELECT 'a.b.c.d.e'::ltree ~ 'a.*.!d';
 SELECT 'a.b.c.d.e'::ltree ~ 'a.*.!d.*';
  ?column? 
 ----------
- f
+ t
 (1 row)
 
 SELECT 'a.b.c.d.e'::ltree ~ 'a.*.!f.*';
@@ -742,7 +742,7 @@ SELECT 'a.b.c.d.e'::ltree ~ '*.a.*.!f.*';
 SELECT 'a.b.c.d.e'::ltree ~ '*.a.*.!d.*';
  ?column? 
 ----------
- f
+ t
 (1 row)
 
 SELECT 'a.b.c.d.e'::ltree ~ '*.a.!d.*';
@@ -766,13 +766,13 @@ SELECT 'a.b.c.d.e'::ltree ~ 'a.!d.*';
 SELECT 'a.b.c.d.e'::ltree ~ '*.a.*.!d.*';
  ?column? 
 ----------
- f
+ t
 (1 row)
 
 SELECT 'a.b.c.d.e'::ltree ~ '*.!b.*';
  ?column? 
 ----------
- f
+ t
 (1 row)
 
 SELECT 'a.b.c.d.e'::ltree ~ '*.!b.c.*';
@@ -784,7 +784,7 @@ SELECT 'a.b.c.d.e'::ltree ~ '*.!b.c.*';
 SELECT 'a.b.c.d.e'::ltree ~ '*.!b.*.c.*';
  ?column? 
 ----------
- f
+ t
 (1 row)
 
 SELECT 'a.b.c.d.e'::ltree ~ '!b.*.c.*';
@@ -832,31 +832,31 @@ SELECT 'a.b.c.d.e'::ltree ~ '*{1}.!b.*.!c.*.e';
 SELECT 'a.b.c.d.e'::ltree ~ '*{1}.!b.*{1}.!c.*.e';
  ?column? 
 ----------
- t
+ f
 (1 row)
 
 SELECT 'a.b.c.d.e'::ltree ~ 'a.!b.*{1}.!c.*.e';
  ?column? 
 ----------
- t
+ f
 (1 row)
 
 SELECT 'a.b.c.d.e'::ltree ~ '!b.*{1}.!c.*.e';
  ?column? 
 ----------
- t
+ f
 (1 row)
 
 SELECT 'a.b.c.d.e'::ltree ~ '*.!b.*{1}.!c.*.e';
  ?column? 
 ----------
- t
+ f
 (1 row)
 
 SELECT 'a.b.c.d.e'::ltree ~ '*.!b.*.!c.*.e';
  ?column? 
 ----------
- f
+ t
 (1 row)
 
 SELECT 'a.b.c.d.e'::ltree ~ '!b.!c.*';
@@ -886,19 +886,19 @@ SELECT 'a.b.c.d.e'::ltree ~ '*{1}.!b.*.!c.*';
 SELECT 'a.b.c.d.e'::ltree ~ '*{1}.!b.*{1}.!c.*';
  ?column? 
 ----------
- t
+ f
 (1 row)
 
 SELECT 'a.b.c.d.e'::ltree ~ 'a.!b.*{1}.!c.*';
  ?column? 
 ----------
- t
+ f
 (1 row)
 
 SELECT 'a.b.c.d.e'::ltree ~ '!b.*{1}.!c.*';
  ?column? 
 ----------
- t
+ f
 (1 row)
 
 SELECT 'a.b.c.d.e'::ltree ~ '*.!b.*{1}.!c.*';
@@ -909,6 +909,18 @@ SELECT 'a.b.c.d.e'::ltree ~ '*.!b.*{1}.!c.*';
 
 SELECT 'a.b.c.d.e'::ltree ~ '*.!b.*.!c.*';
  ?column? 
+----------
+ t
+(1 row)
+
+SELECT '5.0.1.0'::ltree ~ '5.!0.!0.0';
+ ?column? 
+----------
+ f
+(1 row)
+
+SELECT 'a.b'::ltree ~ '!a.!a';
+ ?column? 
 ----------
  f
 (1 row)
diff --git a/contrib/ltree/lquery_op.c b/contrib/ltree/lquery_op.c
index b6d2deb1af..392c193fd8 100644
--- a/contrib/ltree/lquery_op.c
+++ b/contrib/ltree/lquery_op.c
@@ -19,16 +19,6 @@ PG_FUNCTION_INFO_V1(lt_q_rregex);
 
 #define NEXTVAL(x) ( (lquery*)( (char*)(x) + INTALIGN( VARSIZE(x) ) ) )
 
-typedef struct
-{
-	lquery_level *q;
-	int			nq;
-	ltree_level *t;
-	int			nt;
-	int			posq;
-	int			post;
-} FieldNot;
-
 static char *
 getlexeme(char *start, char *end, int *len)
 {
@@ -50,7 +40,7 @@ getlexeme(char *start, char *end, int *len)
 }
 
 bool
-			compare_subnode(ltree_level *t, char *qn, int len, int (*cmpptr) (const char *, const char *, size_t), bool anyend)
+compare_subnode(ltree_level *t, char *qn, int len, int (*cmpptr) (const char *, const char *, size_t), bool anyend)
 {
 	char	   *endt = t->name + t->len;
 	char	   *endq = qn + len;
@@ -108,6 +98,9 @@ checkLevel(lquery_level *curq, ltree_level *curt)
 	int			(*cmpptr) (const char *, const char *, size_t);
 	lquery_variant *curvar = LQL_FIRST(curq);
 	int			i;
+	bool        success;
+
+	success = (curq->flag & LQL_NOT) ? false : true;
 
 	for (i = 0; i < curq->numvar; i++)
 	{
@@ -115,8 +108,9 @@ checkLevel(lquery_level *curq, ltree_level *curt)
 
 		if (curvar->flag & LVAR_SUBLEXEME)
 		{
-			if (compare_subnode(curt, curvar->name, curvar->len, cmpptr, (curvar->flag & LVAR_ANYEND)))
-				return true;
+			if (compare_subnode(curt, curvar->name, curvar->len, cmpptr,
+						(curvar->flag & LVAR_ANYEND)))
+				return success;
 		}
 		else if (
 				 (
@@ -126,22 +120,12 @@ checkLevel(lquery_level *curq, ltree_level *curt)
 				 (*cmpptr) (curvar->name, curt->name, curvar->len) == 0)
 		{
 
-			return true;
+			return success;
 		}
 		curvar = LVAR_NEXT(curvar);
 	}
-	return false;
-}
-
-/*
-void
-printFieldNot(FieldNot *fn ) {
-	while(fn->q) {
-		elog(NOTICE,"posQ:%d lenQ:%d posT:%d lenT:%d", fn->posq,fn->nq,fn->post,fn->nt);
-		fn++;
-	}
+	return !success;
 }
-*/
 
 static struct
 {
@@ -154,7 +138,7 @@ static struct
 };
 
 static bool
-checkCond(lquery_level *curq, int query_numlevel, ltree_level *curt, int tree_numlevel, FieldNot *ptr)
+checkCond(lquery_level *curq, int query_numlevel, ltree_level *curt, int tree_numlevel)
 {
 	uint32		low_pos = 0,
 				high_pos = 0,
@@ -163,7 +147,6 @@ checkCond(lquery_level *curq, int query_numlevel, ltree_level *curt, int tree_nu
 				qlen = query_numlevel;
 	int			isok;
 	lquery_level *prevq = NULL;
-	ltree_level *prevt = NULL;
 
 	if (SomeStack.muse)
 	{
@@ -178,7 +161,6 @@ checkCond(lquery_level *curq, int query_numlevel, ltree_level *curt, int tree_nu
 	{
 		if (curq->numvar)
 		{
-			prevt = curt;
 			while (cur_tpos < low_pos)
 			{
 				curt = LEVEL_NEXT(curt);
@@ -186,83 +168,33 @@ checkCond(lquery_level *curq, int query_numlevel, ltree_level *curt, int tree_nu
 				cur_tpos++;
 				if (tlen == 0)
 					return false;
-				if (ptr && ptr->q)
-					ptr->nt++;
 			}
 
-			if (ptr && curq->flag & LQL_NOT)
+			isok = false;
+			while (cur_tpos <= high_pos && tlen > 0 && !isok)
 			{
-				if (!(prevq && prevq->numvar == 0))
-					prevq = curq;
-				if (ptr->q == NULL)
-				{
-					ptr->t = prevt;
-					ptr->q = prevq;
-					ptr->nt = 1;
-					ptr->nq = 1 + ((prevq == curq) ? 0 : 1);
-					ptr->posq = query_numlevel - qlen - ((prevq == curq) ? 0 : 1);
-					ptr->post = cur_tpos;
-				}
-				else
-				{
-					ptr->nt++;
-					ptr->nq++;
-				}
-
-				if (qlen == 1 && ptr->q->numvar == 0)
-					ptr->nt = tree_numlevel - ptr->post;
+				isok = checkLevel(curq, curt);
 				curt = LEVEL_NEXT(curt);
 				tlen--;
 				cur_tpos++;
-				if (high_pos < cur_tpos)
-					high_pos++;
-			}
-			else
-			{
-				isok = false;
-				while (cur_tpos <= high_pos && tlen > 0 && !isok)
-				{
-					isok = checkLevel(curq, curt);
-					curt = LEVEL_NEXT(curt);
-					tlen--;
-					cur_tpos++;
-					if (isok && prevq && prevq->numvar == 0 && tlen > 0 && cur_tpos <= high_pos)
-					{
-						FieldNot	tmpptr;
-
-						if (ptr)
-							memcpy(&tmpptr, ptr, sizeof(FieldNot));
-						SomeStack.high_pos = high_pos - cur_tpos;
-						SomeStack.muse = true;
-						if (checkCond(prevq, qlen + 1, curt, tlen, (ptr) ? &tmpptr : NULL))
-							return true;
-					}
-					if (!isok && ptr)
-						ptr->nt++;
-				}
-				if (!isok)
-					return false;
-
-				if (ptr && ptr->q)
+				if (isok && prevq && prevq->numvar == 0 && tlen > 0 && cur_tpos <= high_pos)
 				{
-					if (checkCond(ptr->q, ptr->nq, ptr->t, ptr->nt, NULL))
-						return false;
-					ptr->q = NULL;
+					SomeStack.high_pos = high_pos - cur_tpos;
+					SomeStack.muse = true;
+					if (checkCond(prevq, qlen + 1, curt, tlen))
+						return true;
 				}
-				low_pos = cur_tpos;
-				high_pos = cur_tpos;
 			}
+			if (!isok)
+				return false;
+
+			low_pos = cur_tpos;
+			high_pos = cur_tpos;
 		}
 		else
 		{
 			low_pos = cur_tpos + curq->low;
 			high_pos = cur_tpos + curq->high;
-			if (ptr && ptr->q)
-			{
-				ptr->nq++;
-				if (qlen == 1)
-					ptr->nt = tree_numlevel - ptr->post;
-			}
 		}
 
 		prevq = curq;
@@ -293,9 +225,6 @@ checkCond(lquery_level *curq, int query_numlevel, ltree_level *curt, int tree_nu
 	if (low_pos > tree_numlevel || tree_numlevel > high_pos)
 		return false;
 
-	if (ptr && ptr->q && checkCond(ptr->q, ptr->nq, ptr->t, ptr->nt, NULL))
-		return false;
-
 	return true;
 }
 
@@ -306,20 +235,8 @@ ltq_regex(PG_FUNCTION_ARGS)
 	lquery	   *query = PG_GETARG_LQUERY_P(1);
 	bool		res = false;
 
-	if (query->flag & LQUERY_HASNOT)
-	{
-		FieldNot	fn;
-
-		fn.q = NULL;
-
-		res = checkCond(LQUERY_FIRST(query), query->numlevel,
-						LTREE_FIRST(tree), tree->numlevel, &fn);
-	}
-	else
-	{
-		res = checkCond(LQUERY_FIRST(query), query->numlevel,
-						LTREE_FIRST(tree), tree->numlevel, NULL);
-	}
+	res = checkCond(LQUERY_FIRST(query), query->numlevel,
+						LTREE_FIRST(tree), tree->numlevel);
 
 	PG_FREE_IF_COPY(tree, 0);
 	PG_FREE_IF_COPY(query, 1);
diff --git a/contrib/ltree/ltree.h b/contrib/ltree/ltree.h
index e4b8c84fa6..f3c6501309 100644
--- a/contrib/ltree/ltree.h
+++ b/contrib/ltree/ltree.h
@@ -78,8 +78,6 @@ typedef struct
 #define LQUERY_HDRSIZE	 MAXALIGN( offsetof(lquery, data) )
 #define LQUERY_FIRST(x)   ( (lquery_level*)( ((char*)(x))+LQUERY_HDRSIZE ) )
 
-#define LQUERY_HASNOT		0x01
-
 #define ISALNUM(x)	( t_isalpha(x) || t_isdigit(x)	|| ( pg_mblen(x) == 1 && t_iseq((x), '_') ) )
 
 /* full text query */
diff --git a/contrib/ltree/ltree_io.c b/contrib/ltree/ltree_io.c
index f54f037443..2e95e8a1e0 100644
--- a/contrib/ltree/ltree_io.c
+++ b/contrib/ltree/ltree_io.c
@@ -205,7 +205,6 @@ lquery_in(PG_FUNCTION_ARGS)
 			   *curqlevel,
 			   *tmpql;
 	lquery_variant *lrptr = NULL;
-	bool		hasnot = false;
 	bool		wasbad = false;
 	int			charlen;
 	int			pos = 0;
@@ -254,7 +253,6 @@ lquery_in(PG_FUNCTION_ARGS)
 				state = LQPRS_WAITDELIM;
 				curqlevel->numvar = 1;
 				curqlevel->flag |= LQL_NOT;
-				hasnot = true;
 			}
 			else if (charlen == 1 && t_iseq(ptr, '*'))
 				state = LQPRS_WAITOPEN;
@@ -480,8 +478,6 @@ lquery_in(PG_FUNCTION_ARGS)
 	result->numlevel = num;
 	result->firstgood = 0;
 	result->flag = 0;
-	if (hasnot)
-		result->flag |= LQUERY_HASNOT;
 	cur = LQUERY_FIRST(result);
 	curqlevel = tmpql;
 	while ((char *) curqlevel - (char *) tmpql < num * ITEMSIZE)
diff --git a/contrib/ltree/sql/ltree.sql b/contrib/ltree/sql/ltree.sql
index 846b04e48e..4f734953e2 100644
--- a/contrib/ltree/sql/ltree.sql
+++ b/contrib/ltree/sql/ltree.sql
@@ -168,7 +168,8 @@ SELECT 'a.b.c.d.e'::ltree ~ 'a.!b.*{1}.!c.*';
 SELECT 'a.b.c.d.e'::ltree ~ '!b.*{1}.!c.*';
 SELECT 'a.b.c.d.e'::ltree ~ '*.!b.*{1}.!c.*';
 SELECT 'a.b.c.d.e'::ltree ~ '*.!b.*.!c.*';
-
+SELECT '5.0.1.0'::ltree ~ '5.!0.!0.0';
+SELECT 'a.b'::ltree ~ '!a.!a';
 
 SELECT 'QWER_TY'::ltree ~ 'q%@*';
 SELECT 'QWER_TY'::ltree ~ 'Q_t%@*';
diff --git a/doc/src/sgml/ltree.sgml b/doc/src/sgml/ltree.sgml
index 3ddd335b8c..df8b4b042f 100644
--- a/doc/src/sgml/ltree.sgml
+++ b/doc/src/sgml/ltree.sgml
@@ -598,7 +598,7 @@ ltreetest=&gt; SELECT path FROM test WHERE path ~ '*.Astronomy.*';
  Top.Collections.Pictures.Astronomy.Astronauts
 (7 rows)
 
-ltreetest=&gt; SELECT path FROM test WHERE path ~ '*.!pictures@.*.Astronomy.*';
+ltreetest=&gt; SELECT path FROM test WHERE path ~ '*.!pictures@.Astronomy.*';
                 path
 ------------------------------------
  Top.Science.Astronomy

Tom Lane

tgl@sss.pgh.pa.us

almost 7 years ago

In reply to: Filip Rembiałkowski (#1)

Re: fix for BUG #3720: wrong results at using ltree

=?UTF-8?Q?Filip_Rembia=C5=82kowski?= <filip.rembialkowski@gmail.com> writes:

Here is my attempt to fix a 12-years old ltree bug (which is a todo item).
I see it's not backward-compatible, but in my understanding that's
what is documented. Previous behavior was inconsistent with
documentation (where single asterisk should match zero or more
labels).
http://archives.postgresql.org/pgsql-bugs/2007-11/msg00044.php

I took a quick look through this. I see where you're going with this,
and I agree that this coding seems like a better match to what it says
in the documentation:

... you can put ! (NOT) at the start to match any label that
doesn't match any of the alternatives.

However, it seems like Teodor and Oleg went to an awful lot of trouble
to implement some other behavior. It looks like what's there is trying
to do something like "true if this pattern does not match any label
between the matches for whatever's around it", rather than just "true
if this pattern does not match at one specific position". The number
of changes in the expected output for existing regression test cases
is also disheartening: it's fairly hard to believe that whoever wrote
the test cases didn't think those expected outputs were correct.

In short, I'm wondering if we should treat this as a documentation
bug not a code bug. But to do that, we'd need a more accurate
description of what the code is supposed to do, because the statement
quoted above is certainly not a match to the actual behavior.

BTW, if we do proceed in this direction, I wonder whether the
ltree_gist code needs any adjustments.

Thoughts?

regards, tom lane

Thomas Munro

thomas.munro@gmail.com

over 6 years ago

In reply to: Tom Lane (#2)

Re: fix for BUG #3720: wrong results at using ltree

On Sun, Apr 7, 2019 at 3:46 AM Tom Lane <tgl@sss.pgh.pa.us> wrote:

=?UTF-8?Q?Filip_Rembia=C5=82kowski?= <filip.rembialkowski@gmail.com> writes:

Here is my attempt to fix a 12-years old ltree bug (which is a todo item).
I see it's not backward-compatible, but in my understanding that's
what is documented. Previous behavior was inconsistent with
documentation (where single asterisk should match zero or more
labels).
http://archives.postgresql.org/pgsql-bugs/2007-11/msg00044.php

[...]

In short, I'm wondering if we should treat this as a documentation
bug not a code bug. But to do that, we'd need a more accurate
description of what the code is supposed to do, because the statement
quoted above is certainly not a match to the actual behavior.

This patch doesn't apply. More importantly, it seems like we don't
have a consensus on whether we want it.

Teodor, Oleg, would you like to offer an opinion here? If I
understand correctly, the choices are doc change, code/comment change
or WONT_FIX. This seems to be an entry that we can bring to a
conclusion in this CF with some input from the ltree experts.

--
Thomas Munro
https://enterprisedb.com

Oleg Bartunov

obartunov@postgrespro.ru

over 6 years ago

In reply to: Thomas Munro (#3)

Re: fix for BUG #3720: wrong results at using ltree

On Mon, Jul 8, 2019 at 7:22 AM Thomas Munro <thomas.munro@gmail.com> wrote:

On Sun, Apr 7, 2019 at 3:46 AM Tom Lane <tgl@sss.pgh.pa.us> wrote:

=?UTF-8?Q?Filip_Rembia=C5=82kowski?= <filip.rembialkowski@gmail.com> writes:

Here is my attempt to fix a 12-years old ltree bug (which is a todo item).
I see it's not backward-compatible, but in my understanding that's
what is documented. Previous behavior was inconsistent with
documentation (where single asterisk should match zero or more
labels).
http://archives.postgresql.org/pgsql-bugs/2007-11/msg00044.php

[...]

In short, I'm wondering if we should treat this as a documentation
bug not a code bug. But to do that, we'd need a more accurate
description of what the code is supposed to do, because the statement
quoted above is certainly not a match to the actual behavior.

This patch doesn't apply. More importantly, it seems like we don't
have a consensus on whether we want it.

Teodor, Oleg, would you like to offer an opinion here? If I
understand correctly, the choices are doc change, code/comment change
or WONT_FIX. This seems to be an entry that we can bring to a
conclusion in this CF with some input from the ltree experts.

We are currently very busy and will look at the problem (and dig into
our memory)
later. There is also another ltree patch
(https://commitfest.postgresql.org/23/1977/), it would be
nice if Filip try it.

--
Thomas Munro
https://enterprisedb.com

--
Postgres Professional: http://www.postgrespro.com
The Russian Postgres Company

Nikita Glukhov

n.gluhov@postgrespro.ru

over 6 years ago

In reply to: Oleg Bartunov (#4)

2 attachment(s)

Re: fix for BUG #3720: wrong results at using ltree

On 09.07.2019 17:57, Oleg Bartunov wrote:

On Mon, Jul 8, 2019 at 7:22 AM Thomas Munro <thomas.munro@gmail.com> wrote:

On Sun, Apr 7, 2019 at 3:46 AM Tom Lane <tgl@sss.pgh.pa.us> wrote:

=?UTF-8?Q?Filip_Rembia=C5=82kowski?= <filip.rembialkowski@gmail.com> writes:

Here is my attempt to fix a 12-years old ltree bug (which is a todo item).
I see it's not backward-compatible, but in my understanding that's
what is documented. Previous behavior was inconsistent with
documentation (where single asterisk should match zero or more
labels).
http://archives.postgresql.org/pgsql-bugs/2007-11/msg00044.php

[...]

In short, I'm wondering if we should treat this as a documentation
bug not a code bug. But to do that, we'd need a more accurate
description of what the code is supposed to do, because the statement
quoted above is certainly not a match to the actual behavior.

This patch doesn't apply. More importantly, it seems like we don't
have a consensus on whether we want it.

Teodor, Oleg, would you like to offer an opinion here? If I
understand correctly, the choices are doc change, code/comment change
or WONT_FIX. This seems to be an entry that we can bring to a
conclusion in this CF with some input from the ltree experts.

We are currently very busy and will look at the problem (and dig into
our memory) later. There is also another ltree patch
(https://commitfest.postgresql.org/23/1977/), it would be nice if
Filip try it.

I looked at "ltree syntax improvement" patch and found two more very
old bugs in ltree/lquery (fixes are attached):

1. ltree/lquery level counter overflow is wrongly checked:

SELECT nlevel((repeat('a.', 65534) || 'a')::ltree);
nlevel
--------
65535
(1 row)

-- expected 65536 or error
SELECT nlevel((repeat('a.', 65535) || 'a')::ltree);
nlevel
--------
0
(1 row)

-- expected 65537 or error
SELECT nlevel((repeat('a.', 65536) || 'a')::ltree);
nlevel
--------
1
(1 row)

-- expected 'aaaaa...' or error
SELECT (repeat('a.', 65535) || 'a')::ltree;
ltree
-------

(1 row)

-- expected 'aaaaa...' or error
SELECT (repeat('a.', 65536) || 'a')::ltree;
ltree
-------
a
(1 row)

2. '*{a}.*{b}.*{c}' is not equivalent to '*{a+b+c}' (as I expect):

SELECT ltree '1.2' ~ '*{2}';
?column?
----------
t
(1 row)

-- expected true
SELECT ltree '1.2' ~ '*{1}.*{1}';
?column?
----------
f
(1 row)

Maybe these two bugs need a separate thread?

--
Nikita Glukhov
Postgres Professional: http://www.postgrespro.com
The Russian Postgres Company

Attachments:

0001-Fix-max-size-checking-for-ltree-and-lquery.patchtext/x-patch; name=0001-Fix-max-size-checking-for-ltree-and-lquery.patchDownload

From 5334c92406fd281409594a8861cbb40ca9a8c0a9 Mon Sep 17 00:00:00 2001
From: Nikita Glukhov <n.gluhov@postgrespro.ru>
Date: Thu, 7 Mar 2019 17:45:44 +0300
Subject: [PATCH 1/2] Fix max size checking for ltree and lquery

---
 contrib/ltree/expected/ltree.out | 16 ++++++++++++++++
 contrib/ltree/ltree.h            |  2 ++
 contrib/ltree/ltree_io.c         | 12 ++++++------
 contrib/ltree/sql/ltree.sql      |  5 +++++
 4 files changed, 29 insertions(+), 6 deletions(-)

diff --git a/contrib/ltree/expected/ltree.out b/contrib/ltree/expected/ltree.out
index 8226930..30b8247 100644
--- a/contrib/ltree/expected/ltree.out
+++ b/contrib/ltree/expected/ltree.out
@@ -457,6 +457,22 @@ SELECT nlevel('1.2.3.4');
       4
 (1 row)
 
+SELECT nlevel(('1' || repeat('.1', 65534))::ltree);
+ nlevel 
+--------
+  65535
+(1 row)
+
+SELECT nlevel(('1' || repeat('.1', 65535))::ltree);
+ERROR:  number of ltree levels (65536) exceeds the maximum allowed (65535)
+SELECT ('1' || repeat('.1', 65534))::lquery IS NULL;
+ ?column? 
+----------
+ f
+(1 row)
+
+SELECT ('1' || repeat('.1', 65535))::lquery IS NULL;
+ERROR:  number of lquery levels (65536) exceeds the maximum allowed (65535)
 SELECT '1.2'::ltree  < '2.2'::ltree;
  ?column? 
 ----------
diff --git a/contrib/ltree/ltree.h b/contrib/ltree/ltree.h
index 366e580..0e843e3 100644
--- a/contrib/ltree/ltree.h
+++ b/contrib/ltree/ltree.h
@@ -25,6 +25,7 @@ typedef struct
 
 #define LTREE_HDRSIZE	MAXALIGN( offsetof(ltree, data) )
 #define LTREE_FIRST(x)	( (ltree_level*)( ((char*)(x))+LTREE_HDRSIZE ) )
+#define LTREE_MAX_LEVELS	Min(PG_UINT16_MAX, MaxAllocSize / sizeof(nodeitem))
 
 
 /* lquery */
@@ -77,6 +78,7 @@ typedef struct
 
 #define LQUERY_HDRSIZE	 MAXALIGN( offsetof(lquery, data) )
 #define LQUERY_FIRST(x)   ( (lquery_level*)( ((char*)(x))+LQUERY_HDRSIZE ) )
+#define LQUERY_MAX_LEVELS	Min(PG_UINT16_MAX, MaxAllocSize / ITEMSIZE)
 
 #define LQUERY_HASNOT		0x01
 
diff --git a/contrib/ltree/ltree_io.c b/contrib/ltree/ltree_io.c
index f54f037..460ed1a 100644
--- a/contrib/ltree/ltree_io.c
+++ b/contrib/ltree/ltree_io.c
@@ -58,11 +58,11 @@ ltree_in(PG_FUNCTION_ARGS)
 		ptr += charlen;
 	}
 
-	if (num + 1 > MaxAllocSize / sizeof(nodeitem))
+	if (num + 1 > LTREE_MAX_LEVELS)
 		ereport(ERROR,
 				(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
-				 errmsg("number of levels (%d) exceeds the maximum allowed (%d)",
-						num + 1, (int) (MaxAllocSize / sizeof(nodeitem)))));
+				 errmsg("number of ltree levels (%d) exceeds the maximum allowed (%d)",
+						num + 1, (int) LTREE_MAX_LEVELS)));
 	list = lptr = (nodeitem *) palloc(sizeof(nodeitem) * (num + 1));
 	ptr = buf;
 	while (*ptr)
@@ -227,11 +227,11 @@ lquery_in(PG_FUNCTION_ARGS)
 	}
 
 	num++;
-	if (num > MaxAllocSize / ITEMSIZE)
+	if (num > LQUERY_MAX_LEVELS)
 		ereport(ERROR,
 				(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
-				 errmsg("number of levels (%d) exceeds the maximum allowed (%d)",
-						num, (int) (MaxAllocSize / ITEMSIZE))));
+				 errmsg("number of lquery levels (%d) exceeds the maximum allowed (%d)",
+						num, (int) LQUERY_MAX_LEVELS)));
 	curqlevel = tmpql = (lquery_level *) palloc0(ITEMSIZE * num);
 	ptr = buf;
 	while (*ptr)
diff --git a/contrib/ltree/sql/ltree.sql b/contrib/ltree/sql/ltree.sql
index 846b04e..20b0928 100644
--- a/contrib/ltree/sql/ltree.sql
+++ b/contrib/ltree/sql/ltree.sql
@@ -90,6 +90,11 @@ SELECT '1.*.4|3|2.*{1}'::lquery;
 SELECT 'qwerty%@*.tu'::lquery;
 
 SELECT nlevel('1.2.3.4');
+SELECT nlevel(('1' || repeat('.1', 65534))::ltree);
+SELECT nlevel(('1' || repeat('.1', 65535))::ltree);
+SELECT ('1' || repeat('.1', 65534))::lquery IS NULL;
+SELECT ('1' || repeat('.1', 65535))::lquery IS NULL;
+
 SELECT '1.2'::ltree  < '2.2'::ltree;
 SELECT '1.2'::ltree  <= '2.2'::ltree;
 SELECT '2.2'::ltree  = '2.2'::ltree;
-- 
2.7.4

0002-Fix-successive-lquery-ops.patchtext/x-patch; name=0002-Fix-successive-lquery-ops.patchDownload

From 073db1359564f1e9ce48ad553ce2c4d09a36d07c Mon Sep 17 00:00:00 2001
From: Nikita Glukhov <n.gluhov@postgrespro.ru>
Date: Thu, 7 Mar 2019 16:47:10 +0300
Subject: [PATCH 2/2] Fix successive lquery * ops

---
 contrib/ltree/expected/ltree.out | 24 ++++++++++++++++++++++++
 contrib/ltree/lquery_op.c        |  8 ++++----
 contrib/ltree/sql/ltree.sql      |  5 ++++-
 3 files changed, 32 insertions(+), 5 deletions(-)

diff --git a/contrib/ltree/expected/ltree.out b/contrib/ltree/expected/ltree.out
index 30b8247..b7f2ec6 100644
--- a/contrib/ltree/expected/ltree.out
+++ b/contrib/ltree/expected/ltree.out
@@ -929,6 +929,30 @@ SELECT 'a.b.c.d.e'::ltree ~ '*.!b.*.!c.*';
  f
 (1 row)
 
+SELECT 'a.b.c.d.e'::ltree ~ 'a.*{2}.*{2}';
+ ?column? 
+----------
+ t
+(1 row)
+
+SELECT 'a.b.c.d.e'::ltree ~ 'a.*{1}.*{2}.e';
+ ?column? 
+----------
+ t
+(1 row)
+
+SELECT 'a.b.c.d.e'::ltree ~ 'a.*{1}.*{4}';
+ ?column? 
+----------
+ f
+(1 row)
+
+SELECT 'a.b.c.d.e'::ltree ~ 'a.*{5}.*';
+ ?column? 
+----------
+ f
+(1 row)
+
 SELECT 'QWER_TY'::ltree ~ 'q%@*';
  ?column? 
 ----------
diff --git a/contrib/ltree/lquery_op.c b/contrib/ltree/lquery_op.c
index 62172d5..d4f4941 100644
--- a/contrib/ltree/lquery_op.c
+++ b/contrib/ltree/lquery_op.c
@@ -255,8 +255,8 @@ checkCond(lquery_level *curq, int query_numlevel, ltree_level *curt, int tree_nu
 		}
 		else
 		{
-			low_pos = cur_tpos + curq->low;
-			high_pos = cur_tpos + curq->high;
+			low_pos = Min(low_pos + curq->low, PG_UINT16_MAX);
+			high_pos = Min(high_pos + curq->high, PG_UINT16_MAX);
 			if (ptr && ptr->q)
 			{
 				ptr->nq++;
@@ -282,8 +282,8 @@ checkCond(lquery_level *curq, int query_numlevel, ltree_level *curt, int tree_nu
 		}
 		else
 		{
-			low_pos = cur_tpos + curq->low;
-			high_pos = cur_tpos + curq->high;
+			low_pos = Min(low_pos + curq->low, PG_UINT16_MAX);
+			high_pos = Min(high_pos + curq->high, PG_UINT16_MAX);
 		}
 
 		curq = LQL_NEXT(curq);
diff --git a/contrib/ltree/sql/ltree.sql b/contrib/ltree/sql/ltree.sql
index 20b0928..07cdf61 100644
--- a/contrib/ltree/sql/ltree.sql
+++ b/contrib/ltree/sql/ltree.sql
@@ -173,7 +173,10 @@ SELECT 'a.b.c.d.e'::ltree ~ 'a.!b.*{1}.!c.*';
 SELECT 'a.b.c.d.e'::ltree ~ '!b.*{1}.!c.*';
 SELECT 'a.b.c.d.e'::ltree ~ '*.!b.*{1}.!c.*';
 SELECT 'a.b.c.d.e'::ltree ~ '*.!b.*.!c.*';
-
+SELECT 'a.b.c.d.e'::ltree ~ 'a.*{2}.*{2}';
+SELECT 'a.b.c.d.e'::ltree ~ 'a.*{1}.*{2}.e';
+SELECT 'a.b.c.d.e'::ltree ~ 'a.*{1}.*{4}';
+SELECT 'a.b.c.d.e'::ltree ~ 'a.*{5}.*';
 
 SELECT 'QWER_TY'::ltree ~ 'q%@*';
 SELECT 'QWER_TY'::ltree ~ 'Q_t%@*';
-- 
2.7.4

Alexander Korotkov

a.korotkov@postgrespro.ru

over 6 years ago

In reply to: Nikita Glukhov (#5)

Re: fix for BUG #3720: wrong results at using ltree

Hi Nikita,

On Tue, Jul 16, 2019 at 6:52 PM Nikita Glukhov <n.gluhov@postgrespro.ru> wrote:

I looked at "ltree syntax improvement" patch and found two more very
old bugs in ltree/lquery (fixes are attached):

Thank you for the fixes. I've couple notes on them.

0001-Fix-max-size-checking-for-ltree-and-lquery.patch

+#define LTREE_MAX_LEVELS Min(PG_UINT16_MAX, MaxAllocSize / sizeof(nodeitem))
+#define LQUERY_MAX_LEVELS Min(PG_UINT16_MAX, MaxAllocSize / ITEMSIZE)

Looks over caution. PG_UINT16_MAX is not even close to MaxAllocSize /
sizeof(nodeitem) or MaxAllocSize / ITEMSIZE.

0002-Fix-successive-lquery-ops.patch

diff --git a/contrib/ltree/lquery_op.c b/contrib/ltree/lquery_op.c
index 62172d5..d4f4941 100644
--- a/contrib/ltree/lquery_op.c
+++ b/contrib/ltree/lquery_op.c
@@ -255,8 +255,8 @@ checkCond(lquery_level *curq, int query_numlevel,
ltree_level *curt, int tree_nu
  }
  else
  {
- low_pos = cur_tpos + curq->low;
- high_pos = cur_tpos + curq->high;
+ low_pos = Min(low_pos + curq->low, PG_UINT16_MAX);
+ high_pos = Min(high_pos + curq->high, PG_UINT16_MAX);
  if (ptr && ptr->q)
  {
  ptr->nq++;
@@ -282,8 +282,8 @@ checkCond(lquery_level *curq, int query_numlevel,
ltree_level *curt, int tree_nu
  }
  else
  {
- low_pos = cur_tpos + curq->low;
- high_pos = cur_tpos + curq->high;
+ low_pos = Min(low_pos + curq->low, PG_UINT16_MAX);
+ high_pos = Min(high_pos + curq->high, PG_UINT16_MAX);
  }

curq = LQL_NEXT(curq);

I'm not sure what do these checks do. Code around is uncommented and
puzzled. But could we guarantee the same invariant on the stage of
ltree/lquery parsing?

------
Alexander Korotkov
Postgres Professional: http://www.postgrespro.com
The Russian Postgres Company

Ibrar Ahmed

ibrar.ahmad@gmail.com

over 6 years ago

In reply to: Nikita Glukhov (#5)

Re: fix for BUG #3720: wrong results at using ltree

Please create separate commitfest entry.

Ibrar Ahmed

ibrar.ahmad@gmail.com

over 6 years ago

In reply to: Nikita Glukhov (#5)

Re: fix for BUG #3720: wrong results at using ltree

On Tue, Jul 16, 2019 at 8:52 PM Nikita Glukhov <n.gluhov@postgrespro.ru>
wrote:

On 09.07.2019 17:57, Oleg Bartunov wrote:

On Mon, Jul 8, 2019 at 7:22 AM Thomas Munro <thomas.munro@gmail.com> <thomas.munro@gmail.com> wrote:

On Sun, Apr 7, 2019 at 3:46 AM Tom Lane <tgl@sss.pgh.pa.us> <tgl@sss.pgh.pa.us> wrote:

=?UTF-8?Q?Filip_Rembia=C5=82kowski?= <filip.rembialkowski@gmail.com> <filip.rembialkowski@gmail.com> writes:

Here is my attempt to fix a 12-years old ltree bug (which is a todo item).
I see it's not backward-compatible, but in my understanding that's
what is documented. Previous behavior was inconsistent with
documentation (where single asterisk should match zero or more
labels).http://archives.postgresql.org/pgsql-bugs/2007-11/msg00044.php

[...]

In short, I'm wondering if we should treat this as a documentation
bug not a code bug. But to do that, we'd need a more accurate
description of what the code is supposed to do, because the statement
quoted above is certainly not a match to the actual behavior.

This patch doesn't apply. More importantly, it seems like we don't
have a consensus on whether we want it.

Teodor, Oleg, would you like to offer an opinion here? If I
understand correctly, the choices are doc change, code/comment change
or WONT_FIX. This seems to be an entry that we can bring to a
conclusion in this CF with some input from the ltree experts.

We are currently very busy and will look at the problem (and dig into
our memory) later. There is also another ltree patch
(https://commitfest.postgresql.org/23/1977/), it would be nice if
Filip try it.

I looked at "ltree syntax improvement" patch and found two more very
old bugs in ltree/lquery (fixes are attached):

1. ltree/lquery level counter overflow is wrongly checked:

SELECT nlevel((repeat('a.', 65534) || 'a')::ltree);
nlevel
--------
65535
(1 row)

-- expected 65536 or error
SELECT nlevel((repeat('a.', 65535) || 'a')::ltree);
nlevel
--------
0
(1 row)

-- expected 65537 or error
SELECT nlevel((repeat('a.', 65536) || 'a')::ltree);
nlevel
--------
1
(1 row)

-- expected 'aaaaa...' or error
SELECT (repeat('a.', 65535) || 'a')::ltree;
ltree
-------

(1 row)

-- expected 'aaaaa...' or error
SELECT (repeat('a.', 65536) || 'a')::ltree;
ltree
-------
a
(1 row)

2. '*{a}.*{b}.*{c}' is not equivalent to '*{a+b+c}' (as I expect):

SELECT ltree '1.2' ~ '*{2}';
?column?
----------
t
(1 row)

-- expected true
SELECT ltree '1.2' ~ '*{1}.*{1}';
?column?
----------
f
(1 row)

Maybe these two bugs need a separate thread?

Please create separate commitfest entry.

--
Nikita Glukhov
Postgres Professional: http://www.postgrespro.com
The Russian Postgres Company

--
Ibrar Ahmed

Alvaro Herrera from 2ndQuadrant

alvherre@alvh.no-ip.org

over 6 years ago

In reply to: Oleg Bartunov (#4)

Re: fix for BUG #3720: wrong results at using ltree

On 2019-Jul-09, Oleg Bartunov wrote:

On Mon, Jul 8, 2019 at 7:22 AM Thomas Munro <thomas.munro@gmail.com> wrote:

On Sun, Apr 7, 2019 at 3:46 AM Tom Lane <tgl@sss.pgh.pa.us> wrote:

In short, I'm wondering if we should treat this as a documentation
bug not a code bug. But to do that, we'd need a more accurate
description of what the code is supposed to do, because the statement
quoted above is certainly not a match to the actual behavior.

Teodor, Oleg, would you like to offer an opinion here?

We are currently very busy and will look at the problem (and dig into
our memory) later.

Hi Oleg, Teodor. Did you find time to refresh your memory on these things?
It would be good to have these bugfixes sorted out.

--
ï¿½lvaro Herrera https://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services

#10

Benjie Gillam

benjie@jemjie.com

about 6 years ago

In reply to: Alvaro Herrera from 2ndQuadrant (#9)

Re: fix for BUG #3720: wrong results at using ltree

The following review has been posted through the commitfest application:
make installcheck-world: tested, passed
Implements feature: tested, passed
Spec compliant: not tested
Documentation: tested, failed

This is my first PostgreSQL commitfest and review, guidance welcome.

This patch is straightforward, it applies cleanly, and it includes tests (I've also tested the feature manually).

The (existing) documentation states "The length of a label path must be less than 65kB," I believe that the 65kB mentioned here should instead be 64kB - perhaps the patch could be updated with this single-character fix? At first I thought the 65kB limit would be applied to the label path string (e.g. 'Top.Countries.Europe.Russia' would be 27 bytes), but it seems the limit applies to the number of labels in the path - perhaps `kB` is not the right measurement here and it should explicitly state 65536?

It is not stated in the documentation what should happen if the label path length is greater than 65535, so raising an error makes sense (but may be a breaking change).

The new status of this patch is: Waiting on Author

#11

Michael Paquier

michael@paquier.xyz

about 6 years ago

In reply to: Alvaro Herrera from 2ndQuadrant (#9)

Re: fix for BUG #3720: wrong results at using ltree

On Thu, Sep 05, 2019 at 04:50:58PM -0400, Alvaro Herrera from 2ndQuadrant wrote:

Hi Oleg, Teodor. Did you find time to refresh your memory on these things?
It would be good to have these bugfixes sorted out.

Two months later. Now would be a good time as well! Alexander, you
have also looked at two patches from Nikita upthread. If these look
good enough for you, are you working on merging them into the tree?
--
Michael

#12

Tomas Vondra

tomas.vondra@2ndquadrant.com

almost 6 years ago

In reply to: Alexander Korotkov (#6)

Re: fix for BUG #3720: wrong results at using ltree

Hi Nikita,

This patch seems inactive / stuck in "waiting on author" since November.
It's marked as bugfix, so it'd be good to get it committed instead of
just punting it to the next CF.

I did a quick review, and I came mostly with the same two complaints as
Alexander ...

On Wed, Jul 17, 2019 at 09:33:46PM +0300, Alexander Korotkov wrote:

Hi Nikita,

On Tue, Jul 16, 2019 at 6:52 PM Nikita Glukhov <n.gluhov@postgrespro.ru> wrote:

I looked at "ltree syntax improvement" patch and found two more very
old bugs in ltree/lquery (fixes are attached):

Thank you for the fixes. I've couple notes on them.

0001-Fix-max-size-checking-for-ltree-and-lquery.patch
+#define LTREE_MAX_LEVELS Min(PG_UINT16_MAX, MaxAllocSize / sizeof(nodeitem))
+#define LQUERY_MAX_LEVELS Min(PG_UINT16_MAX, MaxAllocSize / ITEMSIZE)
Looks over caution. PG_UINT16_MAX is not even close to MaxAllocSize /
sizeof(nodeitem) or MaxAllocSize / ITEMSIZE.

Yeah, I'm also puzzled by the usage of PG_UINT16_MAX here. It's so much
lower than the other values we could jut use the constant directly, but
let's say the structs could grow from the ~16B to chnge this.

The main question is why we need PG_UINT16_MAX at all? It kinda implies
we need to squish the value into a 2B counter or something, but is that
actually true? I don't see anything like that in ltree_io.c.

So it seems more like an arbitrary value considered "sane" - which is
fine, but then a comment saying so would be nice, and we could pick a
value that is "nicer" for humans. Or just use value computed from the
MaxAllocSize limit, without the Min().

0002-Fix-successive-lquery-ops.patch

diff --git a/contrib/ltree/lquery_op.c b/contrib/ltree/lquery_op.c
index 62172d5..d4f4941 100644
--- a/contrib/ltree/lquery_op.c
+++ b/contrib/ltree/lquery_op.c
@@ -255,8 +255,8 @@ checkCond(lquery_level *curq, int query_numlevel,
ltree_level *curt, int tree_nu
}
else
{
- low_pos = cur_tpos + curq->low;
- high_pos = cur_tpos + curq->high;
+ low_pos = Min(low_pos + curq->low, PG_UINT16_MAX);
+ high_pos = Min(high_pos + curq->high, PG_UINT16_MAX);
if (ptr && ptr->q)
{
ptr->nq++;
@@ -282,8 +282,8 @@ checkCond(lquery_level *curq, int query_numlevel,
ltree_level *curt, int tree_nu
}
else
{
- low_pos = cur_tpos + curq->low;
- high_pos = cur_tpos + curq->high;
+ low_pos = Min(low_pos + curq->low, PG_UINT16_MAX);
+ high_pos = Min(high_pos + curq->high, PG_UINT16_MAX);
}

curq = LQL_NEXT(curq);

I'm not sure what do these checks do. Code around is uncommented and
puzzled. But could we guarantee the same invariant on the stage of
ltree/lquery parsing?

Unfortunately, the current code is somewhat undercommented :-(

Anyway, I don't quite understand why we need these caps. It kinda seems
like a band-aid for potential overflow.

Why should it be OK for the values to even get past the maximum, with
sane input data? And isn't there a better upper limit (e.g. based on
how much space we actually allocated)?

regards

--
Tomas Vondra http://www.2ndQuadrant.com
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services

#13

Tomas Vondra

tomas.vondra@2ndquadrant.com

almost 6 years ago

In reply to: Tomas Vondra (#12)

Re: fix for BUG #3720: wrong results at using ltree

Hi,

I've moved this patch to the next CF - it's still in WoA state, but it's
supposedly a bugfix so I've decided not to return it with feedback.

regards

--
Tomas Vondra http://www.2ndQuadrant.com
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services

#14

Nikita Glukhov

n.gluhov@postgrespro.ru

almost 6 years ago

In reply to: Tomas Vondra (#12)

2 attachment(s)

Re: fix for BUG #3720: wrong results at using ltree

On 24.01.2020 21:29, Tomas Vondra wrote:

Hi Nikita,

This patch seems inactive / stuck in "waiting on author" since November.
It's marked as bugfix, so it'd be good to get it committed instead of
just punting it to the next CF.

I did a quick review, and I came mostly with the same two complaints as
Alexander ...

On Wed, Jul 17, 2019 at 09:33:46PM +0300, Alexander Korotkov wrote:
Hi Nikita,

On Tue, Jul 16, 2019 at 6:52 PM Nikita Glukhov
<n.gluhov@postgrespro.ru> wrote:

I looked at "ltree syntax improvement" patch and found two more very
old bugs in ltree/lquery (fixes are attached):

Thank you for the fixes. I've couple notes on them.

0001-Fix-max-size-checking-for-ltree-and-lquery.patch
+#define LTREE_MAX_LEVELS Min(PG_UINT16_MAX, MaxAllocSize / 
sizeof(nodeitem))
+#define LQUERY_MAX_LEVELS Min(PG_UINT16_MAX, MaxAllocSize / ITEMSIZE)
Looks over caution. PG_UINT16_MAX is not even close to MaxAllocSize /
sizeof(nodeitem) or MaxAllocSize / ITEMSIZE.
Yeah, I'm also puzzled by the usage of PG_UINT16_MAX here. It's so much
lower than the other values we could jut use the constant directly, but
let's say the structs could grow from the ~16B to chnge this.

Ok, LTREE_MAX_LEVELS and LQUERY_MAX_LEVELS are defined simply as PG_UINT16_MAX now.

The main question is why we need PG_UINT16_MAX at all? It kinda implies
we need to squish the value into a 2B counter or something, but is that
actually true? I don't see anything like that in ltree_io.c.

ltree.numlevel and lquery.numlevel are uint16 fields.

I also found two places in ltree_concat() where numlevel can overflow.

The first is ltree_concat() (operator ||(ltree, ltree)):

=# SELECT nlevel(('a' || repeat('.a', 65533))::ltree || 'a');
nlevel
--------
65535
(1 row)

=# SELECT nlevel(('a' || repeat('.a', 65534))::ltree || 'a');
nlevel
--------
0
(1 row)

The second is parsing of low and high level limits in lquery_in():

=# SELECT '*{65535}'::lquery;
lquery
----------
*{65535}
(1 row)

=# SELECT '*{65536}'::lquery;
lquery
--------
*{0}
(1 row)

=# SELECT '*{65537}'::lquery;
lquery
--------
*{1}
(1 row)

The both problems are fixed in the new version of the patch.

So it seems more like an arbitrary value considered "sane" - which is
fine, but then a comment saying so would be nice, and we could pick a
value that is "nicer" for humans. Or just use value computed from the
MaxAllocSize limit, without the Min().
0002-Fix-successive-lquery-ops.patch
diff --git a/contrib/ltree/lquery_op.c b/contrib/ltree/lquery_op.c
index 62172d5..d4f4941 100644
--- a/contrib/ltree/lquery_op.c
+++ b/contrib/ltree/lquery_op.c
@@ -255,8 +255,8 @@ checkCond(lquery_level *curq, int query_numlevel,
ltree_level *curt, int tree_nu
 }
 else
 {
- low_pos = cur_tpos + curq->low;
- high_pos = cur_tpos + curq->high;
+ low_pos = Min(low_pos + curq->low, PG_UINT16_MAX);
+ high_pos = Min(high_pos + curq->high, PG_UINT16_MAX);
 if (ptr && ptr->q)
 {
 ptr->nq++;
@@ -282,8 +282,8 @@ checkCond(lquery_level *curq, int query_numlevel,
ltree_level *curt, int tree_nu
 }
 else
 {
- low_pos = cur_tpos + curq->low;
- high_pos = cur_tpos + curq->high;
+ low_pos = Min(low_pos + curq->low, PG_UINT16_MAX);
+ high_pos = Min(high_pos + curq->high, PG_UINT16_MAX);
 }
curq = LQL_NEXT(curq);

I'm not sure what do these checks do. Code around is uncommented and
puzzled. But could we guarantee the same invariant on the stage of
ltree/lquery parsing?
Unfortunately, the current code is somewhat undercommented :-(

The main problem is that no one really understands how it works now.

low_pos and high_pos seem to be a range of tree levels, from which is allowed
to match the rest of lquery.

For example, when we are matching '.b' in the 'a.*{2,3}.*{4,5}.b'::lquery,
low_pos = 1 + 2 + 4 = 7 and high_pos = 1 + 3 + 5 = 9.

The main goal of the patch is to fix calculation of low_pos and high_pos:

- low_pos = cur_tpos + curq->low;
- high_pos = cur_tpos + curq->high;
+ low_pos = low_pos + curq->low;
+ high_pos = high_pos + curq->high;

Anyway, I don't quite understand why we need these caps. It kinda seems
like a band-aid for potential overflow.

Why should it be OK for the values to even get past the maximum, with
sane input data? And isn't there a better upper limit (e.g. based on
how much space we actually allocated)?

We can compare low_pos to tree_numlevel and return false earlier, if it is
greater. And it seems that high_pos we can also limit to tree_numlevel.

--
Nikita Glukhov
Postgres Professional: http://www.postgrespro.com
The Russian Postgres Company

Attachments:

v2-0001-Fix-max-size-checking-for-ltree-and-lquery.patchtext/x-patch; name=v2-0001-Fix-max-size-checking-for-ltree-and-lquery.patchDownload

From 2bb5d618ca7f0ac3997a2e34e3a06dbd404ca26f Mon Sep 17 00:00:00 2001
From: Nikita Glukhov <n.gluhov@postgrespro.ru>
Date: Thu, 7 Mar 2019 17:45:44 +0300
Subject: [PATCH v2 1/2] Fix max size checking for ltree and lquery

---
 contrib/ltree/expected/ltree.out | 46 ++++++++++++++++++++++++++++++++++++++++
 contrib/ltree/ltree.h            |  2 ++
 contrib/ltree/ltree_io.c         | 44 ++++++++++++++++++++++++++------------
 contrib/ltree/ltree_op.c         |  9 +++++++-
 contrib/ltree/sql/ltree.sql      | 11 ++++++++++
 5 files changed, 98 insertions(+), 14 deletions(-)

diff --git a/contrib/ltree/expected/ltree.out b/contrib/ltree/expected/ltree.out
index 8226930..41e7f94 100644
--- a/contrib/ltree/expected/ltree.out
+++ b/contrib/ltree/expected/ltree.out
@@ -457,6 +457,52 @@ SELECT nlevel('1.2.3.4');
       4
 (1 row)
 
+SELECT nlevel(('1' || repeat('.1', 65534))::ltree);
+ nlevel 
+--------
+  65535
+(1 row)
+
+SELECT nlevel(('1' || repeat('.1', 65535))::ltree);
+ERROR:  number of ltree levels (65536) exceeds the maximum allowed (65535)
+SELECT nlevel(('1' || repeat('.1', 65534))::ltree || '1');
+ERROR:  number of ltree levels (65536) exceeds the maximum allowed (65535)
+SELECT ('1' || repeat('.1', 65534))::lquery IS NULL;
+ ?column? 
+----------
+ f
+(1 row)
+
+SELECT ('1' || repeat('.1', 65535))::lquery IS NULL;
+ERROR:  number of lquery levels (65536) exceeds the maximum allowed (65535)
+SELECT '*{65535}'::lquery;
+  lquery  
+----------
+ *{65535}
+(1 row)
+
+SELECT '*{65536}'::lquery;
+ERROR:  lquery syntax error
+LINE 1: SELECT '*{65536}'::lquery;
+               ^
+DETAIL:  Low limit (65536) exceeds the maximum allowed (65535).
+SELECT '*{,65534}'::lquery;
+  lquery   
+-----------
+ *{,65534}
+(1 row)
+
+SELECT '*{,65535}'::lquery;
+ lquery 
+--------
+ *
+(1 row)
+
+SELECT '*{,65536}'::lquery;
+ERROR:  lquery syntax error
+LINE 1: SELECT '*{,65536}'::lquery;
+               ^
+DETAIL:  High limit (65536) exceeds the maximum allowed (65535).
 SELECT '1.2'::ltree  < '2.2'::ltree;
  ?column? 
 ----------
diff --git a/contrib/ltree/ltree.h b/contrib/ltree/ltree.h
index 366e580..a5608ca 100644
--- a/contrib/ltree/ltree.h
+++ b/contrib/ltree/ltree.h
@@ -25,6 +25,7 @@ typedef struct
 
 #define LTREE_HDRSIZE	MAXALIGN( offsetof(ltree, data) )
 #define LTREE_FIRST(x)	( (ltree_level*)( ((char*)(x))+LTREE_HDRSIZE ) )
+#define LTREE_MAX_LEVELS	PG_UINT16_MAX	/* ltree.numlevel is uint16 */
 
 
 /* lquery */
@@ -77,6 +78,7 @@ typedef struct
 
 #define LQUERY_HDRSIZE	 MAXALIGN( offsetof(lquery, data) )
 #define LQUERY_FIRST(x)   ( (lquery_level*)( ((char*)(x))+LQUERY_HDRSIZE ) )
+#define LQUERY_MAX_LEVELS	PG_UINT16_MAX	/* lquery.numlevel is uint16 */
 
 #define LQUERY_HASNOT		0x01
 
diff --git a/contrib/ltree/ltree_io.c b/contrib/ltree/ltree_io.c
index 900a46a..3d2d1b3 100644
--- a/contrib/ltree/ltree_io.c
+++ b/contrib/ltree/ltree_io.c
@@ -58,11 +58,11 @@ ltree_in(PG_FUNCTION_ARGS)
 		ptr += charlen;
 	}
 
-	if (num + 1 > MaxAllocSize / sizeof(nodeitem))
+	if (num + 1 > LTREE_MAX_LEVELS)
 		ereport(ERROR,
 				(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
-				 errmsg("number of levels (%d) exceeds the maximum allowed (%d)",
-						num + 1, (int) (MaxAllocSize / sizeof(nodeitem)))));
+				 errmsg("number of ltree levels (%d) exceeds the maximum allowed (%d)",
+						num + 1, LTREE_MAX_LEVELS)));
 	list = lptr = (nodeitem *) palloc(sizeof(nodeitem) * (num + 1));
 	ptr = buf;
 	while (*ptr)
@@ -227,11 +227,11 @@ lquery_in(PG_FUNCTION_ARGS)
 	}
 
 	num++;
-	if (num > MaxAllocSize / ITEMSIZE)
+	if (num > LQUERY_MAX_LEVELS)
 		ereport(ERROR,
 				(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
-				 errmsg("number of levels (%d) exceeds the maximum allowed (%d)",
-						num, (int) (MaxAllocSize / ITEMSIZE))));
+				 errmsg("number of lquery levels (%d) exceeds the maximum allowed (%d)",
+						num, LQUERY_MAX_LEVELS)));
 	curqlevel = tmpql = (lquery_level *) palloc0(ITEMSIZE * num);
 	ptr = buf;
 	while (*ptr)
@@ -344,7 +344,7 @@ lquery_in(PG_FUNCTION_ARGS)
 			else if (charlen == 1 && t_iseq(ptr, '.'))
 			{
 				curqlevel->low = 0;
-				curqlevel->high = 0xffff;
+				curqlevel->high = LTREE_MAX_LEVELS;
 				curqlevel = NEXTLEV(curqlevel);
 				state = LQPRS_WAITLEVEL;
 			}
@@ -357,7 +357,16 @@ lquery_in(PG_FUNCTION_ARGS)
 				state = LQPRS_WAITSNUM;
 			else if (t_isdigit(ptr))
 			{
-				curqlevel->low = atoi(ptr);
+				int			low = atoi(ptr);
+
+				if (low > PG_UINT16_MAX)
+					ereport(ERROR,
+							(errcode(ERRCODE_SYNTAX_ERROR),
+							 errmsg("lquery syntax error"),
+							 errdetail("Low limit (%d) exceeds the maximum allowed (%d).",
+									   low, PG_UINT16_MAX)));
+
+				curqlevel->low = (uint16) low;
 				state = LQPRS_WAITND;
 			}
 			else
@@ -367,12 +376,21 @@ lquery_in(PG_FUNCTION_ARGS)
 		{
 			if (t_isdigit(ptr))
 			{
-				curqlevel->high = atoi(ptr);
+				int			high = atoi(ptr);
+
+				if (high > LTREE_MAX_LEVELS)
+					ereport(ERROR,
+							(errcode(ERRCODE_SYNTAX_ERROR),
+							 errmsg("lquery syntax error"),
+							 errdetail("High limit (%d) exceeds the maximum allowed (%d).",
+									   high, LTREE_MAX_LEVELS)));
+
+				curqlevel->high = high;
 				state = LQPRS_WAITCLOSE;
 			}
 			else if (charlen == 1 && t_iseq(ptr, '}'))
 			{
-				curqlevel->high = 0xffff;
+				curqlevel->high = LTREE_MAX_LEVELS;
 				state = LQPRS_WAITEND;
 			}
 			else
@@ -444,7 +462,7 @@ lquery_in(PG_FUNCTION_ARGS)
 							   lptr->wlen, pos)));
 	}
 	else if (state == LQPRS_WAITOPEN)
-		curqlevel->high = 0xffff;
+		curqlevel->high = LTREE_MAX_LEVELS;
 	else if (state != LQPRS_WAITEND)
 		ereport(ERROR,
 				(errcode(ERRCODE_SYNTAX_ERROR),
@@ -593,7 +611,7 @@ lquery_out(PG_FUNCTION_ARGS)
 			}
 			else if (curqlevel->low == 0)
 			{
-				if (curqlevel->high == 0xffff)
+				if (curqlevel->high == LTREE_MAX_LEVELS)
 				{
 					*ptr = '*';
 					*(ptr + 1) = '\0';
@@ -601,7 +619,7 @@ lquery_out(PG_FUNCTION_ARGS)
 				else
 					sprintf(ptr, "*{,%d}", curqlevel->high);
 			}
-			else if (curqlevel->high == 0xffff)
+			else if (curqlevel->high == LTREE_MAX_LEVELS)
 			{
 				sprintf(ptr, "*{%d,}", curqlevel->low);
 			}
diff --git a/contrib/ltree/ltree_op.c b/contrib/ltree/ltree_op.c
index 070868f..34e6e4b 100644
--- a/contrib/ltree/ltree_op.c
+++ b/contrib/ltree/ltree_op.c
@@ -274,10 +274,17 @@ static ltree *
 ltree_concat(ltree *a, ltree *b)
 {
 	ltree	   *r;
+	int			numlevel = (int) a->numlevel + b->numlevel;
+
+	if (numlevel > LTREE_MAX_LEVELS)
+		ereport(ERROR,
+				(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+				 errmsg("number of ltree levels (%d) exceeds the maximum allowed (%d)",
+						numlevel, LTREE_MAX_LEVELS)));
 
 	r = (ltree *) palloc0(VARSIZE(a) + VARSIZE(b) - LTREE_HDRSIZE);
 	SET_VARSIZE(r, VARSIZE(a) + VARSIZE(b) - LTREE_HDRSIZE);
-	r->numlevel = a->numlevel + b->numlevel;
+	r->numlevel = (uint16) numlevel;
 
 	memcpy(LTREE_FIRST(r), LTREE_FIRST(a), VARSIZE(a) - LTREE_HDRSIZE);
 	memcpy(((char *) LTREE_FIRST(r)) + VARSIZE(a) - LTREE_HDRSIZE,
diff --git a/contrib/ltree/sql/ltree.sql b/contrib/ltree/sql/ltree.sql
index 846b04e..fca3ae6 100644
--- a/contrib/ltree/sql/ltree.sql
+++ b/contrib/ltree/sql/ltree.sql
@@ -90,6 +90,17 @@ SELECT '1.*.4|3|2.*{1}'::lquery;
 SELECT 'qwerty%@*.tu'::lquery;
 
 SELECT nlevel('1.2.3.4');
+SELECT nlevel(('1' || repeat('.1', 65534))::ltree);
+SELECT nlevel(('1' || repeat('.1', 65535))::ltree);
+SELECT nlevel(('1' || repeat('.1', 65534))::ltree || '1');
+SELECT ('1' || repeat('.1', 65534))::lquery IS NULL;
+SELECT ('1' || repeat('.1', 65535))::lquery IS NULL;
+SELECT '*{65535}'::lquery;
+SELECT '*{65536}'::lquery;
+SELECT '*{,65534}'::lquery;
+SELECT '*{,65535}'::lquery;
+SELECT '*{,65536}'::lquery;
+
 SELECT '1.2'::ltree  < '2.2'::ltree;
 SELECT '1.2'::ltree  <= '2.2'::ltree;
 SELECT '2.2'::ltree  = '2.2'::ltree;
-- 
2.7.4

v2-0002-Fix-successive-lquery-ops.patchtext/x-patch; name=v2-0002-Fix-successive-lquery-ops.patchDownload

From cc3496ddfc72920e0d546f5e6fe7f76952e6484e Mon Sep 17 00:00:00 2001
From: Nikita Glukhov <n.gluhov@postgrespro.ru>
Date: Thu, 7 Mar 2019 16:47:10 +0300
Subject: [PATCH v2 2/2] Fix successive lquery * ops

---
 contrib/ltree/expected/ltree.out | 24 ++++++++++++++++++++++++
 contrib/ltree/lquery_op.c        | 17 +++++++++++++----
 contrib/ltree/sql/ltree.sql      |  5 ++++-
 3 files changed, 41 insertions(+), 5 deletions(-)

diff --git a/contrib/ltree/expected/ltree.out b/contrib/ltree/expected/ltree.out
index 41e7f94..1b31dbc 100644
--- a/contrib/ltree/expected/ltree.out
+++ b/contrib/ltree/expected/ltree.out
@@ -959,6 +959,30 @@ SELECT 'a.b.c.d.e'::ltree ~ '*.!b.*.!c.*';
  f
 (1 row)
 
+SELECT 'a.b.c.d.e'::ltree ~ 'a.*{2}.*{2}';
+ ?column? 
+----------
+ t
+(1 row)
+
+SELECT 'a.b.c.d.e'::ltree ~ 'a.*{1}.*{2}.e';
+ ?column? 
+----------
+ t
+(1 row)
+
+SELECT 'a.b.c.d.e'::ltree ~ 'a.*{1}.*{4}';
+ ?column? 
+----------
+ f
+(1 row)
+
+SELECT 'a.b.c.d.e'::ltree ~ 'a.*{5}.*';
+ ?column? 
+----------
+ f
+(1 row)
+
 SELECT 'QWER_TY'::ltree ~ 'q%@*';
  ?column? 
 ----------
diff --git a/contrib/ltree/lquery_op.c b/contrib/ltree/lquery_op.c
index bfbcee6..1d2614b 100644
--- a/contrib/ltree/lquery_op.c
+++ b/contrib/ltree/lquery_op.c
@@ -248,8 +248,13 @@ checkCond(lquery_level *curq, int query_numlevel, ltree_level *curt, int tree_nu
 		}
 		else
 		{
-			low_pos = cur_tpos + curq->low;
-			high_pos = cur_tpos + curq->high;
+			low_pos += curq->low;
+
+			if (low_pos > tree_numlevel)
+				return false;
+
+			high_pos = Min(high_pos + curq->high, tree_numlevel);
+
 			if (ptr && ptr->q)
 			{
 				ptr->nq++;
@@ -275,8 +280,12 @@ checkCond(lquery_level *curq, int query_numlevel, ltree_level *curt, int tree_nu
 		}
 		else
 		{
-			low_pos = cur_tpos + curq->low;
-			high_pos = cur_tpos + curq->high;
+			low_pos += curq->low;
+
+			if (low_pos > tree_numlevel)
+				return false;
+
+			high_pos = Min(high_pos + curq->high, tree_numlevel);
 		}
 
 		curq = LQL_NEXT(curq);
diff --git a/contrib/ltree/sql/ltree.sql b/contrib/ltree/sql/ltree.sql
index fca3ae6..1de0b2a 100644
--- a/contrib/ltree/sql/ltree.sql
+++ b/contrib/ltree/sql/ltree.sql
@@ -179,7 +179,10 @@ SELECT 'a.b.c.d.e'::ltree ~ 'a.!b.*{1}.!c.*';
 SELECT 'a.b.c.d.e'::ltree ~ '!b.*{1}.!c.*';
 SELECT 'a.b.c.d.e'::ltree ~ '*.!b.*{1}.!c.*';
 SELECT 'a.b.c.d.e'::ltree ~ '*.!b.*.!c.*';
-
+SELECT 'a.b.c.d.e'::ltree ~ 'a.*{2}.*{2}';
+SELECT 'a.b.c.d.e'::ltree ~ 'a.*{1}.*{2}.e';
+SELECT 'a.b.c.d.e'::ltree ~ 'a.*{1}.*{4}';
+SELECT 'a.b.c.d.e'::ltree ~ 'a.*{5}.*';
 
 SELECT 'QWER_TY'::ltree ~ 'q%@*';
 SELECT 'QWER_TY'::ltree ~ 'Q_t%@*';
-- 
2.7.4

#15

Tom Lane

tgl@sss.pgh.pa.us

almost 6 years ago

In reply to: Nikita Glukhov (#14)

1 attachment(s)

Re: fix for BUG #3720: wrong results at using ltree

Nikita Glukhov <n.gluhov@postgrespro.ru> writes:

On 24.01.2020 21:29, Tomas Vondra wrote:

Unfortunately, the current code is somewhat undercommented :-(

The main problem is that no one really understands how it works now.

Indeed. I was disturbed to realize that lquery_op.c, despite being
far from trivial code, contained NOT ONE SINGLE COMMENT before today,
other than the content-free file header and a commented-out (visibly
unsafe, too) debugging printing function. This is a long way south
of minimally acceptable, in my book.

Anyway, I concur that Nikita's two patches are bug fixes, so I pushed
them. Nonetheless, he *did* hijack this thread, so in hopes of restoring
attention to the original topic, here's a rebased version of the original
patch.

My main complaint about it remains the same, that it changes a
disturbingly large number of existing regression-test results,
suggesting that there's not a meeting of the minds about what
this logic is supposed to do. Maybe it's okay or maybe it's
not, but who's going to decide?

Also, now that I've looked at it a bit more, I'd be inclined to
strip out the parts of the patch that remove setting up the
LQUERY_HASNOT flag. Even if we're not using that right now,
we might want it again someday, and we're not saving much of
anything by introducing a minor on-disk incompatibility.

regards, tom lane

Attachments:

ltree-not-fixes-2.patchtext/x-diff; charset=us-ascii; name=ltree-not-fixes-2.patchDownload

diff --git a/contrib/ltree/expected/ltree.out b/contrib/ltree/expected/ltree.out
index 1b31dbc..1eef086 100644
--- a/contrib/ltree/expected/ltree.out
+++ b/contrib/ltree/expected/ltree.out
@@ -722,7 +722,7 @@ SELECT 'a.b.c.d.e'::ltree ~ '*.a.*.d.*';
 SELECT 'a.b.c.d.e'::ltree ~ '*.!d.*';
  ?column? 
 ----------
- f
+ t
 (1 row)
 
 SELECT 'a.b.c.d.e'::ltree ~ '*.!d';
@@ -752,7 +752,7 @@ SELECT 'a.b.c.d.e'::ltree ~ '*.!e';
 SELECT 'a.b.c.d.e'::ltree ~ '*.!e.*';
  ?column? 
 ----------
- f
+ t
 (1 row)
 
 SELECT 'a.b.c.d.e'::ltree ~ 'a.*.!e';
@@ -770,7 +770,7 @@ SELECT 'a.b.c.d.e'::ltree ~ 'a.*.!d';
 SELECT 'a.b.c.d.e'::ltree ~ 'a.*.!d.*';
  ?column? 
 ----------
- f
+ t
 (1 row)
 
 SELECT 'a.b.c.d.e'::ltree ~ 'a.*.!f.*';
@@ -788,7 +788,7 @@ SELECT 'a.b.c.d.e'::ltree ~ '*.a.*.!f.*';
 SELECT 'a.b.c.d.e'::ltree ~ '*.a.*.!d.*';
  ?column? 
 ----------
- f
+ t
 (1 row)
 
 SELECT 'a.b.c.d.e'::ltree ~ '*.a.!d.*';
@@ -812,13 +812,13 @@ SELECT 'a.b.c.d.e'::ltree ~ 'a.!d.*';
 SELECT 'a.b.c.d.e'::ltree ~ '*.a.*.!d.*';
  ?column? 
 ----------
- f
+ t
 (1 row)
 
 SELECT 'a.b.c.d.e'::ltree ~ '*.!b.*';
  ?column? 
 ----------
- f
+ t
 (1 row)
 
 SELECT 'a.b.c.d.e'::ltree ~ '*.!b.c.*';
@@ -830,7 +830,7 @@ SELECT 'a.b.c.d.e'::ltree ~ '*.!b.c.*';
 SELECT 'a.b.c.d.e'::ltree ~ '*.!b.*.c.*';
  ?column? 
 ----------
- f
+ t
 (1 row)
 
 SELECT 'a.b.c.d.e'::ltree ~ '!b.*.c.*';
@@ -878,31 +878,31 @@ SELECT 'a.b.c.d.e'::ltree ~ '*{1}.!b.*.!c.*.e';
 SELECT 'a.b.c.d.e'::ltree ~ '*{1}.!b.*{1}.!c.*.e';
  ?column? 
 ----------
- t
+ f
 (1 row)
 
 SELECT 'a.b.c.d.e'::ltree ~ 'a.!b.*{1}.!c.*.e';
  ?column? 
 ----------
- t
+ f
 (1 row)
 
 SELECT 'a.b.c.d.e'::ltree ~ '!b.*{1}.!c.*.e';
  ?column? 
 ----------
- t
+ f
 (1 row)
 
 SELECT 'a.b.c.d.e'::ltree ~ '*.!b.*{1}.!c.*.e';
  ?column? 
 ----------
- t
+ f
 (1 row)
 
 SELECT 'a.b.c.d.e'::ltree ~ '*.!b.*.!c.*.e';
  ?column? 
 ----------
- f
+ t
 (1 row)
 
 SELECT 'a.b.c.d.e'::ltree ~ '!b.!c.*';
@@ -932,19 +932,19 @@ SELECT 'a.b.c.d.e'::ltree ~ '*{1}.!b.*.!c.*';
 SELECT 'a.b.c.d.e'::ltree ~ '*{1}.!b.*{1}.!c.*';
  ?column? 
 ----------
- t
+ f
 (1 row)
 
 SELECT 'a.b.c.d.e'::ltree ~ 'a.!b.*{1}.!c.*';
  ?column? 
 ----------
- t
+ f
 (1 row)
 
 SELECT 'a.b.c.d.e'::ltree ~ '!b.*{1}.!c.*';
  ?column? 
 ----------
- t
+ f
 (1 row)
 
 SELECT 'a.b.c.d.e'::ltree ~ '*.!b.*{1}.!c.*';
@@ -956,7 +956,7 @@ SELECT 'a.b.c.d.e'::ltree ~ '*.!b.*{1}.!c.*';
 SELECT 'a.b.c.d.e'::ltree ~ '*.!b.*.!c.*';
  ?column? 
 ----------
- f
+ t
 (1 row)
 
 SELECT 'a.b.c.d.e'::ltree ~ 'a.*{2}.*{2}';
@@ -983,6 +983,18 @@ SELECT 'a.b.c.d.e'::ltree ~ 'a.*{5}.*';
  f
 (1 row)
 
+SELECT '5.0.1.0'::ltree ~ '5.!0.!0.0';
+ ?column? 
+----------
+ f
+(1 row)
+
+SELECT 'a.b'::ltree ~ '!a.!a';
+ ?column? 
+----------
+ f
+(1 row)
+
 SELECT 'QWER_TY'::ltree ~ 'q%@*';
  ?column? 
 ----------
diff --git a/contrib/ltree/lquery_op.c b/contrib/ltree/lquery_op.c
index 5c7afe5..81fcb0e 100644
--- a/contrib/ltree/lquery_op.c
+++ b/contrib/ltree/lquery_op.c
@@ -19,16 +19,6 @@ PG_FUNCTION_INFO_V1(lt_q_rregex);
 
 #define NEXTVAL(x) ( (lquery*)( (char*)(x) + INTALIGN( VARSIZE(x) ) ) )
 
-typedef struct
-{
-	lquery_level *q;
-	int			nq;
-	ltree_level *t;
-	int			nt;
-	int			posq;
-	int			post;
-} FieldNot;
-
 static char *
 getlexeme(char *start, char *end, int *len)
 {
@@ -109,6 +99,9 @@ checkLevel(lquery_level *curq, ltree_level *curt)
 	int			(*cmpptr) (const char *, const char *, size_t);
 	lquery_variant *curvar = LQL_FIRST(curq);
 	int			i;
+	bool		success;
+
+	success = (curq->flag & LQL_NOT) ? false : true;
 
 	for (i = 0; i < curq->numvar; i++)
 	{
@@ -116,39 +109,26 @@ checkLevel(lquery_level *curq, ltree_level *curt)
 
 		if (curvar->flag & LVAR_SUBLEXEME)
 		{
-			if (compare_subnode(curt, curvar->name, curvar->len, cmpptr, (curvar->flag & LVAR_ANYEND)))
-				return true;
+			if (compare_subnode(curt, curvar->name, curvar->len, cmpptr,
+								(curvar->flag & LVAR_ANYEND)))
+				return success;
 		}
 		else if ((curvar->len == curt->len ||
 				  (curt->len > curvar->len && (curvar->flag & LVAR_ANYEND))) &&
 				 (*cmpptr) (curvar->name, curt->name, curvar->len) == 0)
 		{
 
-			return true;
+			return success;
 		}
 		curvar = LVAR_NEXT(curvar);
 	}
-	return false;
+	return !success;
 }
 
 /*
-void
-printFieldNot(FieldNot *fn ) {
-	while(fn->q) {
-		elog(NOTICE,"posQ:%d lenQ:%d posT:%d lenT:%d", fn->posq,fn->nq,fn->post,fn->nt);
-		fn++;
-	}
-}
-*/
-
-/*
  * Try to match an lquery (of query_numlevel items) to an ltree (of
  * tree_numlevel items)
  *
- * If the query contains any NOT flags, "ptr" must point to a FieldNot
- * workspace initialized with ptr->q == NULL.  Otherwise it can be NULL.
- * (LQL_NOT flags will be ignored if ptr == NULL.)
- *
  * high_pos is the last ltree position the first lquery item is allowed
  * to match at; it should be zero for external calls.
  *
@@ -157,7 +137,6 @@ printFieldNot(FieldNot *fn ) {
 static bool
 checkCond(lquery_level *curq, int query_numlevel,
 		  ltree_level *curt, int tree_numlevel,
-		  FieldNot *ptr,
 		  uint32 high_pos,
 		  bool force_advance)
 {
@@ -181,7 +160,7 @@ checkCond(lquery_level *curq, int query_numlevel,
 		if (curq->numvar)
 		{
 			/* Current query item is not '*' */
-			ltree_level *prevt = curt;
+			bool		isok = false;
 
 			/* skip tree items that must be ignored due to prior * items */
 			while (cur_tpos < low_pos)
@@ -191,82 +170,28 @@ checkCond(lquery_level *curq, int query_numlevel,
 				cur_tpos++;
 				if (tlen == 0)
 					return false;
-				if (ptr && ptr->q)
-					ptr->nt++;
 			}
 
-			if (ptr && (curq->flag & LQL_NOT))
+			while (cur_tpos <= high_pos && tlen > 0 && !isok)
 			{
-				/* Deal with a NOT item */
-				if (!(prevq && prevq->numvar == 0))
-					prevq = curq;
-				if (ptr->q == NULL)
-				{
-					ptr->t = prevt;
-					ptr->q = prevq;
-					ptr->nt = 1;
-					ptr->nq = 1 + ((prevq == curq) ? 0 : 1);
-					ptr->posq = query_numlevel - qlen - ((prevq == curq) ? 0 : 1);
-					ptr->post = cur_tpos;
-				}
-				else
-				{
-					ptr->nt++;
-					ptr->nq++;
-				}
-
-				if (qlen == 1 && ptr->q->numvar == 0)
-					ptr->nt = tree_numlevel - ptr->post;
+				isok = checkLevel(curq, curt);
 				curt = LEVEL_NEXT(curt);
 				tlen--;
 				cur_tpos++;
-				if (high_pos < cur_tpos)
-					high_pos++;
-			}
-			else
-			{
-				/* Not a NOT item, check for normal match */
-				bool		isok = false;
-
-				while (cur_tpos <= high_pos && tlen > 0 && !isok)
+				if (isok && prevq && prevq->numvar == 0 && tlen > 0 && cur_tpos <= high_pos)
 				{
-					isok = checkLevel(curq, curt);
-					curt = LEVEL_NEXT(curt);
-					tlen--;
-					cur_tpos++;
-					if (isok && prevq && prevq->numvar == 0 &&
-						tlen > 0 && cur_tpos <= high_pos)
-					{
-						FieldNot	tmpptr;
-
-						if (ptr)
-							memcpy(&tmpptr, ptr, sizeof(FieldNot));
-						if (checkCond(prevq, qlen + 1,
-									  curt, tlen,
-									  (ptr) ? &tmpptr : NULL,
-									  high_pos - cur_tpos,
-									  true))
-							return true;
-					}
-					if (!isok && ptr && ptr->q)
-						ptr->nt++;
+					if (checkCond(prevq, qlen + 1,
+								  curt, tlen,
+								  high_pos - cur_tpos,
+								  true))
+						return true;
 				}
-				if (!isok)
-					return false;
-
-				if (ptr && ptr->q)
-				{
-					if (checkCond(ptr->q, ptr->nq,
-								  ptr->t, ptr->nt,
-								  NULL,
-								  0,
-								  false))
-						return false;
-					ptr->q = NULL;
-				}
-				low_pos = cur_tpos;
-				high_pos = cur_tpos;
 			}
+			if (!isok)
+				return false;
+
+			low_pos = cur_tpos;
+			high_pos = cur_tpos;
 		}
 		else
 		{
@@ -277,13 +202,6 @@ checkCond(lquery_level *curq, int query_numlevel,
 				return false;
 
 			high_pos = Min(high_pos + curq->high, tree_numlevel);
-
-			if (ptr && ptr->q)
-			{
-				ptr->nq++;
-				if (qlen == 1)
-					ptr->nt = tree_numlevel - ptr->post;
-			}
 		}
 
 		prevq = curq;
@@ -321,15 +239,6 @@ checkCond(lquery_level *curq, int query_numlevel,
 	if (low_pos > tree_numlevel || tree_numlevel > high_pos)
 		return false;
 
-	/* Finish pending NOT check, if any */
-	if (ptr && ptr->q &&
-		checkCond(ptr->q, ptr->nq,
-				  ptr->t, ptr->nt,
-				  NULL,
-				  0,
-				  false))
-		return false;
-
 	return true;
 }
 
@@ -340,26 +249,10 @@ ltq_regex(PG_FUNCTION_ARGS)
 	lquery	   *query = PG_GETARG_LQUERY_P(1);
 	bool		res = false;
 
-	if (query->flag & LQUERY_HASNOT)
-	{
-		FieldNot	fn;
-
-		fn.q = NULL;
-
-		res = checkCond(LQUERY_FIRST(query), query->numlevel,
-						LTREE_FIRST(tree), tree->numlevel,
-						&fn,
-						0,
-						false);
-	}
-	else
-	{
-		res = checkCond(LQUERY_FIRST(query), query->numlevel,
-						LTREE_FIRST(tree), tree->numlevel,
-						NULL,
-						0,
-						false);
-	}
+	res = checkCond(LQUERY_FIRST(query), query->numlevel,
+					LTREE_FIRST(tree), tree->numlevel,
+					0,
+					false);
 
 	PG_FREE_IF_COPY(tree, 0);
 	PG_FREE_IF_COPY(query, 1);
diff --git a/contrib/ltree/ltree.h b/contrib/ltree/ltree.h
index 2c48304..02f05fc 100644
--- a/contrib/ltree/ltree.h
+++ b/contrib/ltree/ltree.h
@@ -83,8 +83,6 @@ typedef struct
 #define LQUERY_FIRST(x)   ( (lquery_level*)( ((char*)(x))+LQUERY_HDRSIZE ) )
 #define LQUERY_MAX_LEVELS	PG_UINT16_MAX	/* lquery.numlevel is uint16 */
 
-#define LQUERY_HASNOT		0x01
-
 #define ISALNUM(x)	( t_isalpha(x) || t_isdigit(x)	|| ( pg_mblen(x) == 1 && t_iseq((x), '_') ) )
 
 /* full text query */
diff --git a/contrib/ltree/ltree_io.c b/contrib/ltree/ltree_io.c
index 3ae7dc0..43cbb66 100644
--- a/contrib/ltree/ltree_io.c
+++ b/contrib/ltree/ltree_io.c
@@ -205,7 +205,6 @@ lquery_in(PG_FUNCTION_ARGS)
 			   *curqlevel,
 			   *tmpql;
 	lquery_variant *lrptr = NULL;
-	bool		hasnot = false;
 	bool		wasbad = false;
 	int			charlen;
 	int			pos = 0;
@@ -254,7 +253,6 @@ lquery_in(PG_FUNCTION_ARGS)
 				state = LQPRS_WAITDELIM;
 				curqlevel->numvar = 1;
 				curqlevel->flag |= LQL_NOT;
-				hasnot = true;
 			}
 			else if (charlen == 1 && t_iseq(ptr, '*'))
 				state = LQPRS_WAITOPEN;
@@ -498,8 +496,6 @@ lquery_in(PG_FUNCTION_ARGS)
 	result->numlevel = num;
 	result->firstgood = 0;
 	result->flag = 0;
-	if (hasnot)
-		result->flag |= LQUERY_HASNOT;
 	cur = LQUERY_FIRST(result);
 	curqlevel = tmpql;
 	while ((char *) curqlevel - (char *) tmpql < num * ITEMSIZE)
diff --git a/contrib/ltree/sql/ltree.sql b/contrib/ltree/sql/ltree.sql
index 1de0b2a..d1a4d6f 100644
--- a/contrib/ltree/sql/ltree.sql
+++ b/contrib/ltree/sql/ltree.sql
@@ -183,6 +183,8 @@ SELECT 'a.b.c.d.e'::ltree ~ 'a.*{2}.*{2}';
 SELECT 'a.b.c.d.e'::ltree ~ 'a.*{1}.*{2}.e';
 SELECT 'a.b.c.d.e'::ltree ~ 'a.*{1}.*{4}';
 SELECT 'a.b.c.d.e'::ltree ~ 'a.*{5}.*';
+SELECT '5.0.1.0'::ltree ~ '5.!0.!0.0';
+SELECT 'a.b'::ltree ~ '!a.!a';
 
 SELECT 'QWER_TY'::ltree ~ 'q%@*';
 SELECT 'QWER_TY'::ltree ~ 'Q_t%@*';
diff --git a/doc/src/sgml/ltree.sgml b/doc/src/sgml/ltree.sgml
index 0c74aa7..7794068 100644
--- a/doc/src/sgml/ltree.sgml
+++ b/doc/src/sgml/ltree.sgml
@@ -603,7 +603,7 @@ ltreetest=&gt; SELECT path FROM test WHERE path ~ '*.Astronomy.*';
  Top.Collections.Pictures.Astronomy.Astronauts
 (7 rows)
 
-ltreetest=&gt; SELECT path FROM test WHERE path ~ '*.!pictures@.*.Astronomy.*';
+ltreetest=&gt; SELECT path FROM test WHERE path ~ '*.!pictures@.Astronomy.*';
                 path
 ------------------------------------
  Top.Science.Astronomy

#16

Tom Lane

tgl@sss.pgh.pa.us

almost 6 years ago

In reply to: Tom Lane (#15)

2 attachment(s)

Re: fix for BUG #3720: wrong results at using ltree

I wrote:

My main complaint about it remains the same, that it changes a
disturbingly large number of existing regression-test results,
suggesting that there's not a meeting of the minds about what
this logic is supposed to do. Maybe it's okay or maybe it's
not, but who's going to decide?

Well ... *somebody's* got to decide, and since Oleg and Teodor aren't
stepping up, I took it on myself to study this more closely.

It seems to me that indeed the existing behavior is broken: given
what the documentation has to say, it's really hard to argue that
an lquery like "*.!foo.*" means something other than "match any
ltree that has at least one label that is not 'foo'". But the
existing code produces

regression=# select 'bar.foo.baz'::ltree ~ '*.!foo.*'::lquery;
?column?
----------
f
(1 row)

I agree that's just plain wrong, and so are all the regression
test cases that this patch is changing the results of.

However, I think there is a valid use-case that the existing
code is trying to solve: how can you say "match any ltree in
which no label is 'foo'"? That is the effective behavior right
now of a pattern like this, and it seems useful. So if we change
this, we ought to provide some other way to get that result.

What I propose we do about that is allow lquery quantifiers to
be attached to regular items as well as star items, so that the
need is met by saying this:

regression=# select 'bar.foo.baz'::ltree ~ '!foo{,}'::lquery;
?column?
----------
f
(1 row)

regression=# select 'bar.fool.baz'::ltree ~ '!foo{,}'::lquery;
?column?
----------
t
(1 row)

Also, once we do that, it's possible to treat star and non-star items
basically alike in checkCond, with checkLevel being the place that
accounts for them being different. This results in logic that's far
simpler and much more nearly like the way that LIKE patterns are
implemented, which seems like a good thing to me.

Hence, attached are two revised patches that attack the problem
this way. The first one is somewhat unrelated to the original
point --- it's trying to clean up the error messages in ltree_in
and lquery_in so that they are more consistent and agree with
the terminology used in the documentation. (Notably, the term
"level" is used nowhere in the ltree docs, except in the legacy
function name nlevel().) However its movement of the check for
high < low is needed to make that cover the case of a bogus non-star
quantifier in patch 0002. These also depend on the cosmetic
patches I just committed, so you need HEAD as of today to get
them to apply.

regards, tom lane

Attachments:

0001-rationalize-ltree-input-errors-1.patchtext/x-diff; charset=us-ascii; name=0001-rationalize-ltree-input-errors-1.patchDownload

diff --git a/contrib/ltree/expected/ltree.out b/contrib/ltree/expected/ltree.out
index 1b31dbc..cd18516 100644
--- a/contrib/ltree/expected/ltree.out
+++ b/contrib/ltree/expected/ltree.out
@@ -464,7 +464,7 @@ SELECT nlevel(('1' || repeat('.1', 65534))::ltree);
 (1 row)
 
 SELECT nlevel(('1' || repeat('.1', 65535))::ltree);
-ERROR:  number of ltree levels (65536) exceeds the maximum allowed (65535)
+ERROR:  number of ltree labels (65536) exceeds the maximum allowed (65535)
 SELECT nlevel(('1' || repeat('.1', 65534))::ltree || '1');
 ERROR:  number of ltree levels (65536) exceeds the maximum allowed (65535)
 SELECT ('1' || repeat('.1', 65534))::lquery IS NULL;
@@ -474,7 +474,7 @@ SELECT ('1' || repeat('.1', 65534))::lquery IS NULL;
 (1 row)
 
 SELECT ('1' || repeat('.1', 65535))::lquery IS NULL;
-ERROR:  number of lquery levels (65536) exceeds the maximum allowed (65535)
+ERROR:  number of lquery items (65536) exceeds the maximum allowed (65535)
 SELECT '*{65535}'::lquery;
   lquery  
 ----------
@@ -485,7 +485,7 @@ SELECT '*{65536}'::lquery;
 ERROR:  lquery syntax error
 LINE 1: SELECT '*{65536}'::lquery;
                ^
-DETAIL:  Low limit (65536) exceeds the maximum allowed (65535).
+DETAIL:  Low limit (65536) exceeds the maximum allowed (65535), at character 2.
 SELECT '*{,65534}'::lquery;
   lquery   
 -----------
@@ -502,7 +502,12 @@ SELECT '*{,65536}'::lquery;
 ERROR:  lquery syntax error
 LINE 1: SELECT '*{,65536}'::lquery;
                ^
-DETAIL:  High limit (65536) exceeds the maximum allowed (65535).
+DETAIL:  High limit (65536) exceeds the maximum allowed (65535), at character 3.
+SELECT '*{4,3}'::lquery;
+ERROR:  lquery syntax error
+LINE 1: SELECT '*{4,3}'::lquery;
+               ^
+DETAIL:  Low limit (4) is greater than high limit (3), at character 4.
 SELECT '1.2'::ltree  < '2.2'::ltree;
  ?column? 
 ----------
diff --git a/contrib/ltree/ltree_io.c b/contrib/ltree/ltree_io.c
index 2503d47..2136715 100644
--- a/contrib/ltree/ltree_io.c
+++ b/contrib/ltree/ltree_io.c
@@ -17,12 +17,6 @@ PG_FUNCTION_INFO_V1(lquery_in);
 PG_FUNCTION_INFO_V1(lquery_out);
 
 
-#define UNCHAR ereport(ERROR, \
-					   (errcode(ERRCODE_SYNTAX_ERROR), \
-						errmsg("syntax error at position %d", \
-						pos)));
-
-
 typedef struct
 {
 	char	   *start;
@@ -49,6 +43,11 @@ ltree_in(PG_FUNCTION_ARGS)
 	int			charlen;
 	int			pos = 0;
 
+#define UNCHAR ereport(ERROR, \
+					   errcode(ERRCODE_SYNTAX_ERROR), \
+					   errmsg("ltree syntax error at character %d", \
+							  pos))
+
 	ptr = buf;
 	while (*ptr)
 	{
@@ -61,7 +60,7 @@ ltree_in(PG_FUNCTION_ARGS)
 	if (num + 1 > LTREE_MAX_LEVELS)
 		ereport(ERROR,
 				(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
-				 errmsg("number of ltree levels (%d) exceeds the maximum allowed (%d)",
+				 errmsg("number of ltree labels (%d) exceeds the maximum allowed (%d)",
 						num + 1, LTREE_MAX_LEVELS)));
 	list = lptr = (nodeitem *) palloc(sizeof(nodeitem) * (num + 1));
 	ptr = buf;
@@ -88,10 +87,10 @@ ltree_in(PG_FUNCTION_ARGS)
 				if (lptr->wlen > LTREE_LABEL_MAX_CHARS)
 					ereport(ERROR,
 							(errcode(ERRCODE_NAME_TOO_LONG),
-							 errmsg("name of level is too long"),
-							 errdetail("Name length is %d, must "
-									   "be < 256, in position %d.",
-									   lptr->wlen, pos)));
+							 errmsg("label string is too long"),
+							 errdetail("Label length is %d, must be at most %d, at character %d.",
+									   lptr->wlen, LTREE_LABEL_MAX_CHARS,
+									   pos)));
 
 				totallen += MAXALIGN(lptr->len + LEVEL_HDRSIZE);
 				lptr++;
@@ -115,10 +114,9 @@ ltree_in(PG_FUNCTION_ARGS)
 		if (lptr->wlen > LTREE_LABEL_MAX_CHARS)
 			ereport(ERROR,
 					(errcode(ERRCODE_NAME_TOO_LONG),
-					 errmsg("name of level is too long"),
-					 errdetail("Name length is %d, must "
-							   "be < 256, in position %d.",
-							   lptr->wlen, pos)));
+					 errmsg("label string is too long"),
+					 errdetail("Label length is %d, must be at most %d, at character %d.",
+							   lptr->wlen, LTREE_LABEL_MAX_CHARS, pos)));
 
 		totallen += MAXALIGN(lptr->len + LEVEL_HDRSIZE);
 		lptr++;
@@ -126,8 +124,8 @@ ltree_in(PG_FUNCTION_ARGS)
 	else if (!(state == LTPRS_WAITNAME && lptr == list))
 		ereport(ERROR,
 				(errcode(ERRCODE_SYNTAX_ERROR),
-				 errmsg("syntax error"),
-				 errdetail("Unexpected end of line.")));
+				 errmsg("ltree syntax error"),
+				 errdetail("Unexpected end of input.")));
 
 	result = (ltree *) palloc0(LTREE_HDRSIZE + totallen);
 	SET_VARSIZE(result, LTREE_HDRSIZE + totallen);
@@ -144,6 +142,8 @@ ltree_in(PG_FUNCTION_ARGS)
 
 	pfree(list);
 	PG_RETURN_POINTER(result);
+
+#undef UNCHAR
 }
 
 Datum
@@ -210,6 +210,11 @@ lquery_in(PG_FUNCTION_ARGS)
 	int			charlen;
 	int			pos = 0;
 
+#define UNCHAR ereport(ERROR, \
+					   errcode(ERRCODE_SYNTAX_ERROR), \
+					   errmsg("lquery syntax error at character %d", \
+							  pos))
+
 	ptr = buf;
 	while (*ptr)
 	{
@@ -230,7 +235,7 @@ lquery_in(PG_FUNCTION_ARGS)
 	if (num > LQUERY_MAX_LEVELS)
 		ereport(ERROR,
 				(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
-				 errmsg("number of lquery levels (%d) exceeds the maximum allowed (%d)",
+				 errmsg("number of lquery items (%d) exceeds the maximum allowed (%d)",
 						num, LQUERY_MAX_LEVELS)));
 	curqlevel = tmpql = (lquery_level *) palloc0(ITEMSIZE * num);
 	ptr = buf;
@@ -305,10 +310,10 @@ lquery_in(PG_FUNCTION_ARGS)
 				if (lptr->wlen > LTREE_LABEL_MAX_CHARS)
 					ereport(ERROR,
 							(errcode(ERRCODE_NAME_TOO_LONG),
-							 errmsg("name of level is too long"),
-							 errdetail("Name length is %d, must "
-									   "be < 256, in position %d.",
-									   lptr->wlen, pos)));
+							 errmsg("label string is too long"),
+							 errdetail("Label length is %d, must be at most %d, at character %d.",
+									   lptr->wlen, LTREE_LABEL_MAX_CHARS,
+									   pos)));
 
 				state = LQPRS_WAITVAR;
 			}
@@ -321,10 +326,10 @@ lquery_in(PG_FUNCTION_ARGS)
 				if (lptr->wlen > LTREE_LABEL_MAX_CHARS)
 					ereport(ERROR,
 							(errcode(ERRCODE_NAME_TOO_LONG),
-							 errmsg("name of level is too long"),
-							 errdetail("Name length is %d, must "
-									   "be < 256, in position %d.",
-									   lptr->wlen, pos)));
+							 errmsg("label string is too long"),
+							 errdetail("Label length is %d, must be at most %d, at character %d.",
+									   lptr->wlen, LTREE_LABEL_MAX_CHARS,
+									   pos)));
 
 				state = LQPRS_WAITLEVEL;
 				curqlevel = NEXTLEV(curqlevel);
@@ -361,10 +366,10 @@ lquery_in(PG_FUNCTION_ARGS)
 
 				if (low < 0 || low > LTREE_MAX_LEVELS)
 					ereport(ERROR,
-							(errcode(ERRCODE_SYNTAX_ERROR),
+							(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
 							 errmsg("lquery syntax error"),
-							 errdetail("Low limit (%d) exceeds the maximum allowed (%d).",
-									   low, LTREE_MAX_LEVELS)));
+							 errdetail("Low limit (%d) exceeds the maximum allowed (%d), at character %d.",
+									   low, LTREE_MAX_LEVELS, pos)));
 
 				curqlevel->low = (uint16) low;
 				state = LQPRS_WAITND;
@@ -380,10 +385,16 @@ lquery_in(PG_FUNCTION_ARGS)
 
 				if (high < 0 || high > LTREE_MAX_LEVELS)
 					ereport(ERROR,
+							(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+							 errmsg("lquery syntax error"),
+							 errdetail("High limit (%d) exceeds the maximum allowed (%d), at character %d.",
+									   high, LTREE_MAX_LEVELS, pos)));
+				else if (curqlevel->low > high)
+					ereport(ERROR,
 							(errcode(ERRCODE_SYNTAX_ERROR),
 							 errmsg("lquery syntax error"),
-							 errdetail("High limit (%d) exceeds the maximum allowed (%d).",
-									   high, LTREE_MAX_LEVELS)));
+							 errdetail("Low limit (%d) is greater than high limit (%d), at character %d.",
+									   curqlevel->low, high, pos)));
 
 				curqlevel->high = (uint16) high;
 				state = LQPRS_WAITCLOSE;
@@ -441,7 +452,7 @@ lquery_in(PG_FUNCTION_ARGS)
 			ereport(ERROR,
 					(errcode(ERRCODE_SYNTAX_ERROR),
 					 errmsg("lquery syntax error"),
-					 errdetail("Unexpected end of line.")));
+					 errdetail("Unexpected end of input.")));
 
 		lptr->len = ptr - lptr->start -
 			((lptr->flag & LVAR_SUBLEXEME) ? 1 : 0) -
@@ -451,15 +462,14 @@ lquery_in(PG_FUNCTION_ARGS)
 			ereport(ERROR,
 					(errcode(ERRCODE_SYNTAX_ERROR),
 					 errmsg("lquery syntax error"),
-					 errdetail("Unexpected end of line.")));
+					 errdetail("Unexpected end of input.")));
 
 		if (lptr->wlen > LTREE_LABEL_MAX_CHARS)
 			ereport(ERROR,
 					(errcode(ERRCODE_NAME_TOO_LONG),
-					 errmsg("name of level is too long"),
-					 errdetail("Name length is %d, must "
-							   "be < 256, in position %d.",
-							   lptr->wlen, pos)));
+					 errmsg("label string is too long"),
+					 errdetail("Label length is %d, must be at most %d, at character %d.",
+							   lptr->wlen, LTREE_LABEL_MAX_CHARS, pos)));
 	}
 	else if (state == LQPRS_WAITOPEN)
 		curqlevel->high = LTREE_MAX_LEVELS;
@@ -467,7 +477,7 @@ lquery_in(PG_FUNCTION_ARGS)
 		ereport(ERROR,
 				(errcode(ERRCODE_SYNTAX_ERROR),
 				 errmsg("lquery syntax error"),
-				 errdetail("Unexpected end of line.")));
+				 errdetail("Unexpected end of input.")));
 
 	curqlevel = tmpql;
 	totallen = LQUERY_HDRSIZE;
@@ -483,13 +493,6 @@ lquery_in(PG_FUNCTION_ARGS)
 				lptr++;
 			}
 		}
-		else if (curqlevel->low > curqlevel->high)
-			ereport(ERROR,
-					(errcode(ERRCODE_SYNTAX_ERROR),
-					 errmsg("lquery syntax error"),
-					 errdetail("Low limit (%d) is greater than upper (%d).",
-							   curqlevel->low, curqlevel->high)));
-
 		curqlevel = NEXTLEV(curqlevel);
 	}
 
@@ -543,6 +546,8 @@ lquery_in(PG_FUNCTION_ARGS)
 
 	pfree(tmpql);
 	PG_RETURN_POINTER(result);
+
+#undef UNCHAR
 }
 
 Datum
diff --git a/contrib/ltree/sql/ltree.sql b/contrib/ltree/sql/ltree.sql
index 1de0b2a..827f264 100644
--- a/contrib/ltree/sql/ltree.sql
+++ b/contrib/ltree/sql/ltree.sql
@@ -100,6 +100,7 @@ SELECT '*{65536}'::lquery;
 SELECT '*{,65534}'::lquery;
 SELECT '*{,65535}'::lquery;
 SELECT '*{,65536}'::lquery;
+SELECT '*{4,3}'::lquery;
 
 SELECT '1.2'::ltree  < '2.2'::ltree;
 SELECT '1.2'::ltree  <= '2.2'::ltree;
diff --git a/contrib/ltree_plpython/expected/ltree_plpython.out b/contrib/ltree_plpython/expected/ltree_plpython.out
index 4779755..81c0b97 100644
--- a/contrib/ltree_plpython/expected/ltree_plpython.out
+++ b/contrib/ltree_plpython/expected/ltree_plpython.out
@@ -38,6 +38,6 @@ $$;
 -- because it will try to parse the Python list as an ltree input
 -- string.
 SELECT test2();
-ERROR:  syntax error at position 0
+ERROR:  ltree syntax error at character 0
 CONTEXT:  while creating return value
 PL/Python function "test2"

0002-ltree-not-fixes-and-better-quantifiers-1.patchtext/x-diff; charset=us-ascii; name=0002-ltree-not-fixes-and-better-quantifiers-1.patchDownload

diff --git a/contrib/ltree/expected/ltree.out b/contrib/ltree/expected/ltree.out
index cd18516..2eee98f 100644
--- a/contrib/ltree/expected/ltree.out
+++ b/contrib/ltree/expected/ltree.out
@@ -445,6 +445,12 @@ SELECT '1.*.4|3|2.*{1}'::lquery;
  1.*.4|3|2.*{1}
 (1 row)
 
+SELECT 'foo.bar{,}.!a*|b{1,}.c{,44}.d{3,4}'::lquery;
+               lquery               
+------------------------------------
+ foo.bar{,}.!a*|b{1,}.c{,44}.d{3,4}
+(1 row)
+
 SELECT 'qwerty%@*.tu'::lquery;
     lquery    
 --------------
@@ -727,7 +733,7 @@ SELECT 'a.b.c.d.e'::ltree ~ '*.a.*.d.*';
 SELECT 'a.b.c.d.e'::ltree ~ '*.!d.*';
  ?column? 
 ----------
- f
+ t
 (1 row)
 
 SELECT 'a.b.c.d.e'::ltree ~ '*.!d';
@@ -757,7 +763,7 @@ SELECT 'a.b.c.d.e'::ltree ~ '*.!e';
 SELECT 'a.b.c.d.e'::ltree ~ '*.!e.*';
  ?column? 
 ----------
- f
+ t
 (1 row)
 
 SELECT 'a.b.c.d.e'::ltree ~ 'a.*.!e';
@@ -775,7 +781,7 @@ SELECT 'a.b.c.d.e'::ltree ~ 'a.*.!d';
 SELECT 'a.b.c.d.e'::ltree ~ 'a.*.!d.*';
  ?column? 
 ----------
- f
+ t
 (1 row)
 
 SELECT 'a.b.c.d.e'::ltree ~ 'a.*.!f.*';
@@ -793,7 +799,7 @@ SELECT 'a.b.c.d.e'::ltree ~ '*.a.*.!f.*';
 SELECT 'a.b.c.d.e'::ltree ~ '*.a.*.!d.*';
  ?column? 
 ----------
- f
+ t
 (1 row)
 
 SELECT 'a.b.c.d.e'::ltree ~ '*.a.!d.*';
@@ -817,13 +823,13 @@ SELECT 'a.b.c.d.e'::ltree ~ 'a.!d.*';
 SELECT 'a.b.c.d.e'::ltree ~ '*.a.*.!d.*';
  ?column? 
 ----------
- f
+ t
 (1 row)
 
 SELECT 'a.b.c.d.e'::ltree ~ '*.!b.*';
  ?column? 
 ----------
- f
+ t
 (1 row)
 
 SELECT 'a.b.c.d.e'::ltree ~ '*.!b.c.*';
@@ -835,7 +841,7 @@ SELECT 'a.b.c.d.e'::ltree ~ '*.!b.c.*';
 SELECT 'a.b.c.d.e'::ltree ~ '*.!b.*.c.*';
  ?column? 
 ----------
- f
+ t
 (1 row)
 
 SELECT 'a.b.c.d.e'::ltree ~ '!b.*.c.*';
@@ -883,31 +889,31 @@ SELECT 'a.b.c.d.e'::ltree ~ '*{1}.!b.*.!c.*.e';
 SELECT 'a.b.c.d.e'::ltree ~ '*{1}.!b.*{1}.!c.*.e';
  ?column? 
 ----------
- t
+ f
 (1 row)
 
 SELECT 'a.b.c.d.e'::ltree ~ 'a.!b.*{1}.!c.*.e';
  ?column? 
 ----------
- t
+ f
 (1 row)
 
 SELECT 'a.b.c.d.e'::ltree ~ '!b.*{1}.!c.*.e';
  ?column? 
 ----------
- t
+ f
 (1 row)
 
 SELECT 'a.b.c.d.e'::ltree ~ '*.!b.*{1}.!c.*.e';
  ?column? 
 ----------
- t
+ f
 (1 row)
 
 SELECT 'a.b.c.d.e'::ltree ~ '*.!b.*.!c.*.e';
  ?column? 
 ----------
- f
+ t
 (1 row)
 
 SELECT 'a.b.c.d.e'::ltree ~ '!b.!c.*';
@@ -937,19 +943,19 @@ SELECT 'a.b.c.d.e'::ltree ~ '*{1}.!b.*.!c.*';
 SELECT 'a.b.c.d.e'::ltree ~ '*{1}.!b.*{1}.!c.*';
  ?column? 
 ----------
- t
+ f
 (1 row)
 
 SELECT 'a.b.c.d.e'::ltree ~ 'a.!b.*{1}.!c.*';
  ?column? 
 ----------
- t
+ f
 (1 row)
 
 SELECT 'a.b.c.d.e'::ltree ~ '!b.*{1}.!c.*';
  ?column? 
 ----------
- t
+ f
 (1 row)
 
 SELECT 'a.b.c.d.e'::ltree ~ '*.!b.*{1}.!c.*';
@@ -961,7 +967,7 @@ SELECT 'a.b.c.d.e'::ltree ~ '*.!b.*{1}.!c.*';
 SELECT 'a.b.c.d.e'::ltree ~ '*.!b.*.!c.*';
  ?column? 
 ----------
- f
+ t
 (1 row)
 
 SELECT 'a.b.c.d.e'::ltree ~ 'a.*{2}.*{2}';
@@ -988,6 +994,78 @@ SELECT 'a.b.c.d.e'::ltree ~ 'a.*{5}.*';
  f
 (1 row)
 
+SELECT '5.0.1.0'::ltree ~ '5.!0.!0.0';
+ ?column? 
+----------
+ f
+(1 row)
+
+SELECT 'a.b'::ltree ~ '!a.!a';
+ ?column? 
+----------
+ f
+(1 row)
+
+SELECT 'a.b.c.d.e'::ltree ~ 'a{,}';
+ ?column? 
+----------
+ f
+(1 row)
+
+SELECT 'a.b.c.d.e'::ltree ~ 'a{1,}.*';
+ ?column? 
+----------
+ t
+(1 row)
+
+SELECT 'a.b.c.d.e'::ltree ~ 'a{,}.!a{,}';
+ ?column? 
+----------
+ t
+(1 row)
+
+SELECT 'a.b.c.d.a'::ltree ~ 'a{,}.!a{,}';
+ ?column? 
+----------
+ f
+(1 row)
+
+SELECT 'a.b.c.d.a'::ltree ~ 'a{,2}.!a{1,}';
+ ?column? 
+----------
+ f
+(1 row)
+
+SELECT 'a.b.c.d.e'::ltree ~ 'a{,2}.!a{1,}';
+ ?column? 
+----------
+ t
+(1 row)
+
+SELECT 'a.b.c.d.e'::ltree ~ '!x{,}';
+ ?column? 
+----------
+ t
+(1 row)
+
+SELECT 'a.b.c.d.e'::ltree ~ '!c{,}';
+ ?column? 
+----------
+ f
+(1 row)
+
+SELECT 'a.b.c.d.e'::ltree ~ '!c{0,3}.!a{2,}';
+ ?column? 
+----------
+ t
+(1 row)
+
+SELECT 'a.b.c.d.e'::ltree ~ '!c{0,3}.!d{2,}.*';
+ ?column? 
+----------
+ t
+(1 row)
+
 SELECT 'QWER_TY'::ltree ~ 'q%@*';
  ?column? 
 ----------
diff --git a/contrib/ltree/lquery_op.c b/contrib/ltree/lquery_op.c
index 5c7afe5..e7a7f7f 100644
--- a/contrib/ltree/lquery_op.c
+++ b/contrib/ltree/lquery_op.c
@@ -9,6 +9,7 @@
 
 #include "catalog/pg_collation.h"
 #include "ltree.h"
+#include "miscadmin.h"
 #include "utils/formatting.h"
 
 PG_FUNCTION_INFO_V1(ltq_regex);
@@ -19,16 +20,6 @@ PG_FUNCTION_INFO_V1(lt_q_rregex);
 
 #define NEXTVAL(x) ( (lquery*)( (char*)(x) + INTALIGN( VARSIZE(x) ) ) )
 
-typedef struct
-{
-	lquery_level *q;
-	int			nq;
-	ltree_level *t;
-	int			nt;
-	int			posq;
-	int			post;
-} FieldNot;
-
 static char *
 getlexeme(char *start, char *end, int *len)
 {
@@ -99,238 +90,145 @@ ltree_strncasecmp(const char *a, const char *b, size_t s)
 }
 
 /*
- * See if a (non-star) lquery_level matches an ltree_level
+ * See if an lquery_level matches an ltree_level
  *
- * Does not consider level's possible LQL_NOT flag.
+ * This accounts for all flags including LQL_NOT, but does not
+ * consider repetition counts.
  */
 static bool
 checkLevel(lquery_level *curq, ltree_level *curt)
 {
-	int			(*cmpptr) (const char *, const char *, size_t);
 	lquery_variant *curvar = LQL_FIRST(curq);
-	int			i;
+	bool		success;
+
+	success = (curq->flag & LQL_NOT) ? false : true;
+
+	/* numvar == 0 means '*' which matches anything */
+	if (curq->numvar == 0)
+		return success;
 
-	for (i = 0; i < curq->numvar; i++)
+	for (int i = 0; i < curq->numvar; i++)
 	{
+		int			(*cmpptr) (const char *, const char *, size_t);
+
 		cmpptr = (curvar->flag & LVAR_INCASE) ? ltree_strncasecmp : strncmp;
 
 		if (curvar->flag & LVAR_SUBLEXEME)
 		{
-			if (compare_subnode(curt, curvar->name, curvar->len, cmpptr, (curvar->flag & LVAR_ANYEND)))
-				return true;
+			if (compare_subnode(curt, curvar->name, curvar->len, cmpptr,
+								(curvar->flag & LVAR_ANYEND)))
+				return success;
 		}
 		else if ((curvar->len == curt->len ||
 				  (curt->len > curvar->len && (curvar->flag & LVAR_ANYEND))) &&
 				 (*cmpptr) (curvar->name, curt->name, curvar->len) == 0)
-		{
+			return success;
 
-			return true;
-		}
 		curvar = LVAR_NEXT(curvar);
 	}
-	return false;
+	return !success;
 }
 
 /*
-void
-printFieldNot(FieldNot *fn ) {
-	while(fn->q) {
-		elog(NOTICE,"posQ:%d lenQ:%d posT:%d lenT:%d", fn->posq,fn->nq,fn->post,fn->nt);
-		fn++;
-	}
-}
-*/
-
-/*
- * Try to match an lquery (of query_numlevel items) to an ltree (of
- * tree_numlevel items)
- *
- * If the query contains any NOT flags, "ptr" must point to a FieldNot
- * workspace initialized with ptr->q == NULL.  Otherwise it can be NULL.
- * (LQL_NOT flags will be ignored if ptr == NULL.)
- *
- * high_pos is the last ltree position the first lquery item is allowed
- * to match at; it should be zero for external calls.
- *
- * force_advance must be false except in internal recursive calls.
+ * Try to match an lquery (of qlen items) to an ltree (of tlen items)
  */
 static bool
-checkCond(lquery_level *curq, int query_numlevel,
-		  ltree_level *curt, int tree_numlevel,
-		  FieldNot *ptr,
-		  uint32 high_pos,
-		  bool force_advance)
+checkCond(lquery_level *curq, int qlen,
+		  ltree_level *curt, int tlen)
 {
-	uint32		low_pos = 0,	/* first allowed ltree position for match */
-				cur_tpos = 0;	/* ltree position of curt */
-	int			tlen = tree_numlevel,	/* counts of remaining items */
-				qlen = query_numlevel;
-	lquery_level *prevq = NULL;
-
-	/* advance curq (setting up prevq) if requested */
-	if (force_advance)
-	{
-		Assert(qlen > 0);
-		prevq = curq;
-		curq = LQL_NEXT(curq);
-		qlen--;
-	}
+	/* Since this function recurses, it could be driven to stack overflow */
+	check_stack_depth();
 
-	while (tlen > 0 && qlen > 0)
+	/* Normal case where we have some query and text items to match */
+	if (tlen > 0 && qlen > 0)
 	{
-		if (curq->numvar)
+		int			low,
+					high,
+					matchcnt;
+		lquery_level *nextq;
+		ltree_level *nextt;
+
+		/*
+		 * Get min and max repetition counts for this query item, dealing with
+		 * the backwards-compatibility hack that the low/high fields aren't
+		 * meaningful for non-'*' items unless LQL_COUNT is set.
+		 */
+		if ((curq->flag & LQL_COUNT) || curq->numvar == 0)
+			low = curq->low, high = curq->high;
+		else
+			low = high = 1;
+
+		/* Fail if a match of required number of items is impossible */
+		if (tlen < low || high < low)
+			return false;
+
+		/* Check minimum number of matches */
+		nextt = curt;
+		for (matchcnt = 0; matchcnt < low; matchcnt++)
+		{
+			if (!checkLevel(curq, nextt))
+				return false;
+			nextt = LEVEL_NEXT(nextt);
+		}
+
+		/*
+		 * Recursively check the rest of the pattern against each possible
+		 * start point following this item's match(es).
+		 */
+		nextq = LQL_NEXT(curq);
+		for (;;)
 		{
-			/* Current query item is not '*' */
-			ltree_level *prevt = curt;
+			/*
+			 * If the rest of the pattern matches beginning here, we're good.
+			 */
+			if (checkCond(nextq, qlen - 1,
+						  nextt, tlen - matchcnt))
+				return true;
 
-			/* skip tree items that must be ignored due to prior * items */
-			while (cur_tpos < low_pos)
+			/*
+			 * Otherwise, try to match one more text item to this query item.
+			 */
+			if (matchcnt < high && matchcnt < tlen)
 			{
-				curt = LEVEL_NEXT(curt);
-				tlen--;
-				cur_tpos++;
-				if (tlen == 0)
+				if (!checkLevel(curq, nextt))
 					return false;
-				if (ptr && ptr->q)
-					ptr->nt++;
-			}
-
-			if (ptr && (curq->flag & LQL_NOT))
-			{
-				/* Deal with a NOT item */
-				if (!(prevq && prevq->numvar == 0))
-					prevq = curq;
-				if (ptr->q == NULL)
-				{
-					ptr->t = prevt;
-					ptr->q = prevq;
-					ptr->nt = 1;
-					ptr->nq = 1 + ((prevq == curq) ? 0 : 1);
-					ptr->posq = query_numlevel - qlen - ((prevq == curq) ? 0 : 1);
-					ptr->post = cur_tpos;
-				}
-				else
-				{
-					ptr->nt++;
-					ptr->nq++;
-				}
-
-				if (qlen == 1 && ptr->q->numvar == 0)
-					ptr->nt = tree_numlevel - ptr->post;
-				curt = LEVEL_NEXT(curt);
-				tlen--;
-				cur_tpos++;
-				if (high_pos < cur_tpos)
-					high_pos++;
+				nextt = LEVEL_NEXT(nextt);
+				matchcnt++;
 			}
 			else
 			{
-				/* Not a NOT item, check for normal match */
-				bool		isok = false;
-
-				while (cur_tpos <= high_pos && tlen > 0 && !isok)
-				{
-					isok = checkLevel(curq, curt);
-					curt = LEVEL_NEXT(curt);
-					tlen--;
-					cur_tpos++;
-					if (isok && prevq && prevq->numvar == 0 &&
-						tlen > 0 && cur_tpos <= high_pos)
-					{
-						FieldNot	tmpptr;
-
-						if (ptr)
-							memcpy(&tmpptr, ptr, sizeof(FieldNot));
-						if (checkCond(prevq, qlen + 1,
-									  curt, tlen,
-									  (ptr) ? &tmpptr : NULL,
-									  high_pos - cur_tpos,
-									  true))
-							return true;
-					}
-					if (!isok && ptr && ptr->q)
-						ptr->nt++;
-				}
-				if (!isok)
-					return false;
-
-				if (ptr && ptr->q)
-				{
-					if (checkCond(ptr->q, ptr->nq,
-								  ptr->t, ptr->nt,
-								  NULL,
-								  0,
-								  false))
-						return false;
-					ptr->q = NULL;
-				}
-				low_pos = cur_tpos;
-				high_pos = cur_tpos;
-			}
-		}
-		else
-		{
-			/* Current query item is '*' */
-			low_pos += curq->low;
-
-			if (low_pos > tree_numlevel)
+				/* No more text, or max match count exceeded, so fail */
 				return false;
-
-			high_pos = Min(high_pos + curq->high, tree_numlevel);
-
-			if (ptr && ptr->q)
-			{
-				ptr->nq++;
-				if (qlen == 1)
-					ptr->nt = tree_numlevel - ptr->post;
 			}
 		}
-
-		prevq = curq;
-		curq = LQL_NEXT(curq);
-		qlen--;
 	}
 
-	/* Fail if we've already run out of ltree items */
-	if (low_pos > tree_numlevel || tree_numlevel > high_pos)
-		return false;
-
-	/* Remaining lquery items must be NOT or '*' items */
+	/*
+	 * If we're out of text, but query items remain, we can match only if all
+	 * remaining query items permit zero matches.
+	 */
 	while (qlen > 0)
 	{
-		if (curq->numvar)
-		{
-			if (!(curq->flag & LQL_NOT))
-				return false;
-		}
-		else
-		{
-			low_pos += curq->low;
+		int			low;
 
-			if (low_pos > tree_numlevel)
-				return false;
+		/* As above, extract the correct minimum match count. */
+		if ((curq->flag & LQL_COUNT) || curq->numvar == 0)
+			low = curq->low;
+		else
+			low = 1;
 
-			high_pos = Min(high_pos + curq->high, tree_numlevel);
-		}
+		if (low > 0)
+			return false;
 
 		curq = LQL_NEXT(curq);
 		qlen--;
 	}
 
-	/* Fail if trailing '*'s require more ltree items than we have */
-	if (low_pos > tree_numlevel || tree_numlevel > high_pos)
-		return false;
-
-	/* Finish pending NOT check, if any */
-	if (ptr && ptr->q &&
-		checkCond(ptr->q, ptr->nq,
-				  ptr->t, ptr->nt,
-				  NULL,
-				  0,
-				  false))
-		return false;
-
-	return true;
+	/*
+	 * Once we're out of query items, we match only if there's no remaining
+	 * text either.
+	 */
+	return (tlen == 0);
 }
 
 Datum
@@ -338,28 +236,10 @@ ltq_regex(PG_FUNCTION_ARGS)
 {
 	ltree	   *tree = PG_GETARG_LTREE_P(0);
 	lquery	   *query = PG_GETARG_LQUERY_P(1);
-	bool		res = false;
-
-	if (query->flag & LQUERY_HASNOT)
-	{
-		FieldNot	fn;
+	bool		res;
 
-		fn.q = NULL;
-
-		res = checkCond(LQUERY_FIRST(query), query->numlevel,
-						LTREE_FIRST(tree), tree->numlevel,
-						&fn,
-						0,
-						false);
-	}
-	else
-	{
-		res = checkCond(LQUERY_FIRST(query), query->numlevel,
-						LTREE_FIRST(tree), tree->numlevel,
-						NULL,
-						0,
-						false);
-	}
+	res = checkCond(LQUERY_FIRST(query), query->numlevel,
+					LTREE_FIRST(tree), tree->numlevel);
 
 	PG_FREE_IF_COPY(tree, 0);
 	PG_FREE_IF_COPY(query, 1);
diff --git a/contrib/ltree/ltree.h b/contrib/ltree/ltree.h
index e5f2710..6dcd363 100644
--- a/contrib/ltree/ltree.h
+++ b/contrib/ltree/ltree.h
@@ -65,14 +65,20 @@ typedef struct
 /*
  * In an lquery_level, "flag" contains the union of the variants' flags
  * along with possible LQL_xxx flags; so those bit sets can't overlap.
+ *
+ * "low" and "high" are nominally the minimum and maximum number of matches.
+ * However, for backwards compatibility with pre-v13 on-disk lqueries,
+ * non-'*' levels (those with numvar > 0) only have valid low/high if the
+ * LQL_COUNT flag is set; otherwise those fields are zero, but the behavior
+ * is as if they were both 1.
  */
 typedef struct
 {
 	uint16		totallen;		/* total length of this level, in bytes */
 	uint16		flag;			/* see LQL_xxx and LVAR_xxx flags */
 	uint16		numvar;			/* number of variants; 0 means '*' */
-	uint16		low;			/* minimum repeat count for '*' */
-	uint16		high;			/* maximum repeat count for '*' */
+	uint16		low;			/* minimum repeat count */
+	uint16		high;			/* maximum repeat count */
 	/* Array of maxalign'd lquery_variant structs follows: */
 	char		variants[FLEXIBLE_ARRAY_MEMBER];
 } lquery_level;
@@ -82,6 +88,7 @@ typedef struct
 #define LQL_FIRST(x)	( (lquery_variant*)( ((char*)(x))+LQL_HDRSIZE ) )
 
 #define LQL_NOT		0x10		/* level has '!' (NOT) prefix */
+#define LQL_COUNT	0x20		/* level is non-'*' and has repeat counts */
 
 #ifdef LOWER_NODE
 #define FLG_CANLOOKSIGN(x) ( ( (x) & ( LQL_NOT | LVAR_ANYEND | LVAR_SUBLEXEME ) ) == 0 )
diff --git a/contrib/ltree/ltree_io.c b/contrib/ltree/ltree_io.c
index 2136715..3dd15d8 100644
--- a/contrib/ltree/ltree_io.c
+++ b/contrib/ltree/ltree_io.c
@@ -317,6 +317,23 @@ lquery_in(PG_FUNCTION_ARGS)
 
 				state = LQPRS_WAITVAR;
 			}
+			else if (charlen == 1 && t_iseq(ptr, '{'))
+			{
+				lptr->len = ptr - lptr->start -
+					((lptr->flag & LVAR_SUBLEXEME) ? 1 : 0) -
+					((lptr->flag & LVAR_INCASE) ? 1 : 0) -
+					((lptr->flag & LVAR_ANYEND) ? 1 : 0);
+				if (lptr->wlen > LTREE_LABEL_MAX_CHARS)
+					ereport(ERROR,
+							(errcode(ERRCODE_NAME_TOO_LONG),
+							 errmsg("label string is too long"),
+							 errdetail("Label length is %d, must be at most %d, at character %d.",
+									   lptr->wlen, LTREE_LABEL_MAX_CHARS,
+									   pos)));
+
+				curqlevel->flag |= LQL_COUNT;
+				state = LQPRS_WAITFNUM;
+			}
 			else if (charlen == 1 && t_iseq(ptr, '.'))
 			{
 				lptr->len = ptr - lptr->start -
@@ -348,6 +365,7 @@ lquery_in(PG_FUNCTION_ARGS)
 				state = LQPRS_WAITFNUM;
 			else if (charlen == 1 && t_iseq(ptr, '.'))
 			{
+				/* We only get here for '*', so these are correct defaults */
 				curqlevel->low = 0;
 				curqlevel->high = LTREE_MAX_LEVELS;
 				curqlevel = NEXTLEV(curqlevel);
@@ -567,7 +585,11 @@ lquery_out(PG_FUNCTION_ARGS)
 	{
 		totallen++;
 		if (curqlevel->numvar)
+		{
 			totallen += 1 + (curqlevel->numvar * 4) + curqlevel->totallen;
+			if (curqlevel->flag & LQL_COUNT)
+				totallen += 2 * 11 + 3;
+		}
 		else
 			totallen += 2 * 11 + 4;
 		curqlevel = LQL_NEXT(curqlevel);
@@ -619,26 +641,37 @@ lquery_out(PG_FUNCTION_ARGS)
 		}
 		else
 		{
+			*ptr = '*';
+			ptr++;
+		}
+
+		if ((curqlevel->flag & LQL_COUNT) || curqlevel->numvar == 0)
+		{
 			if (curqlevel->low == curqlevel->high)
 			{
-				sprintf(ptr, "*{%d}", curqlevel->low);
+				sprintf(ptr, "{%d}", curqlevel->low);
 			}
 			else if (curqlevel->low == 0)
 			{
 				if (curqlevel->high == LTREE_MAX_LEVELS)
 				{
-					*ptr = '*';
-					*(ptr + 1) = '\0';
+					if (curqlevel->numvar == 0)
+					{
+						/* This is default for '*', so print nothing */
+						*ptr = '\0';
+					}
+					else
+						sprintf(ptr, "{,}");
 				}
 				else
-					sprintf(ptr, "*{,%d}", curqlevel->high);
+					sprintf(ptr, "{,%d}", curqlevel->high);
 			}
 			else if (curqlevel->high == LTREE_MAX_LEVELS)
 			{
-				sprintf(ptr, "*{%d,}", curqlevel->low);
+				sprintf(ptr, "{%d,}", curqlevel->low);
 			}
 			else
-				sprintf(ptr, "*{%d,%d}", curqlevel->low, curqlevel->high);
+				sprintf(ptr, "{%d,%d}", curqlevel->low, curqlevel->high);
 			ptr = strchr(ptr, '\0');
 		}
 
diff --git a/contrib/ltree/sql/ltree.sql b/contrib/ltree/sql/ltree.sql
index 827f264..7cb5a36 100644
--- a/contrib/ltree/sql/ltree.sql
+++ b/contrib/ltree/sql/ltree.sql
@@ -87,6 +87,7 @@ SELECT '1.*.4|3|2.*{1,4}'::lquery;
 SELECT '1.*.4|3|2.*{,4}'::lquery;
 SELECT '1.*.4|3|2.*{1,}'::lquery;
 SELECT '1.*.4|3|2.*{1}'::lquery;
+SELECT 'foo.bar{,}.!a*|b{1,}.c{,44}.d{3,4}'::lquery;
 SELECT 'qwerty%@*.tu'::lquery;
 
 SELECT nlevel('1.2.3.4');
@@ -184,6 +185,19 @@ SELECT 'a.b.c.d.e'::ltree ~ 'a.*{2}.*{2}';
 SELECT 'a.b.c.d.e'::ltree ~ 'a.*{1}.*{2}.e';
 SELECT 'a.b.c.d.e'::ltree ~ 'a.*{1}.*{4}';
 SELECT 'a.b.c.d.e'::ltree ~ 'a.*{5}.*';
+SELECT '5.0.1.0'::ltree ~ '5.!0.!0.0';
+SELECT 'a.b'::ltree ~ '!a.!a';
+
+SELECT 'a.b.c.d.e'::ltree ~ 'a{,}';
+SELECT 'a.b.c.d.e'::ltree ~ 'a{1,}.*';
+SELECT 'a.b.c.d.e'::ltree ~ 'a{,}.!a{,}';
+SELECT 'a.b.c.d.a'::ltree ~ 'a{,}.!a{,}';
+SELECT 'a.b.c.d.a'::ltree ~ 'a{,2}.!a{1,}';
+SELECT 'a.b.c.d.e'::ltree ~ 'a{,2}.!a{1,}';
+SELECT 'a.b.c.d.e'::ltree ~ '!x{,}';
+SELECT 'a.b.c.d.e'::ltree ~ '!c{,}';
+SELECT 'a.b.c.d.e'::ltree ~ '!c{0,3}.!a{2,}';
+SELECT 'a.b.c.d.e'::ltree ~ '!c{0,3}.!d{2,}.*';
 
 SELECT 'QWER_TY'::ltree ~ 'q%@*';
 SELECT 'QWER_TY'::ltree ~ 'Q_t%@*';
diff --git a/doc/src/sgml/ltree.sgml b/doc/src/sgml/ltree.sgml
index 2d539f2..ae1fe10 100644
--- a/doc/src/sgml/ltree.sgml
+++ b/doc/src/sgml/ltree.sgml
@@ -60,7 +60,8 @@
      <type>lquery</type> represents a regular-expression-like pattern
      for matching <type>ltree</type> values.  A simple word matches that
      label within a path.  A star symbol (<literal>*</literal>) matches zero
-     or more labels.  For example:
+     or more labels.  These can be joined with dots to form a pattern that
+     must match the whole label path.  For example:
 <synopsis>
 foo         <lineannotation>Match the exact label path <literal>foo</literal></lineannotation>
 *.foo.*     <lineannotation>Match any label path containing the label <literal>foo</literal></lineannotation>
@@ -69,19 +70,25 @@ foo         <lineannotation>Match the exact label path <literal>foo</literal></l
     </para>
 
     <para>
-     Star symbols can also be quantified to restrict how many labels
-     they can match:
+     Both star symbols and simple words can be quantified to restrict how many
+     labels they can match:
 <synopsis>
 *{<replaceable>n</replaceable>}        <lineannotation>Match exactly <replaceable>n</replaceable> labels</lineannotation>
 *{<replaceable>n</replaceable>,}       <lineannotation>Match at least <replaceable>n</replaceable> labels</lineannotation>
 *{<replaceable>n</replaceable>,<replaceable>m</replaceable>}      <lineannotation>Match at least <replaceable>n</replaceable> but not more than <replaceable>m</replaceable> labels</lineannotation>
-*{,<replaceable>m</replaceable>}       <lineannotation>Match at most <replaceable>m</replaceable> labels &mdash; same as </lineannotation> *{0,<replaceable>m</replaceable>}
+*{,<replaceable>m</replaceable>}       <lineannotation>Match at most <replaceable>m</replaceable> labels &mdash; same as </lineannotation>*{0,<replaceable>m</replaceable>}
+foo{<replaceable>n</replaceable>,<replaceable>m</replaceable>}    <lineannotation>Match at least <replaceable>n</replaceable> but not more than <replaceable>m</replaceable> occurrences of <literal>foo</literal></lineannotation>
+foo{,}      <lineannotation>Match any number of occurrences of <literal>foo</literal>, including zero</lineannotation>
 </synopsis>
+     In the absence of any explicit quantifier, the default for a star symbol
+     is to match any number of labels (that is, <literal>{,}</literal>) while
+     the default for a non-star item is to match exactly once (that
+     is, <literal>{1}</literal>).
     </para>
 
     <para>
      There are several modifiers that can be put at the end of a non-star
-     label in <type>lquery</type> to make it match more than just the exact match:
+     <type>lquery</type> item to make it match more than just the exact match:
 <synopsis>
 @           <lineannotation>Match case-insensitively, for example <literal>a@</literal> matches <literal>A</literal></lineannotation>
 *           <lineannotation>Match any label with this prefix, for example <literal>foo*</literal> matches <literal>foobar</literal></lineannotation>
@@ -97,17 +104,20 @@ foo         <lineannotation>Match the exact label path <literal>foo</literal></l
     </para>
 
     <para>
-     Also, you can write several possibly-modified labels separated with
-     <literal>|</literal> (OR) to match any of those labels, and you can put
-     <literal>!</literal> (NOT) at the start to match any label that doesn't
-     match any of the alternatives.
+     Also, you can write several possibly-modified non-star items separated with
+     <literal>|</literal> (OR) to match any of those items, and you can put
+     <literal>!</literal> (NOT) at the start of a non-star group to match any
+     label that doesn't match any of the alternatives.  A quantifier, if any,
+     goes at the end of the group; it means some number of matches for the
+     group as a whole (that is, some number of labels matching or not matching
+     any of the alternatives).
     </para>
 
     <para>
      Here's an annotated example of <type>lquery</type>:
 <programlisting>
-Top.*{0,2}.sport*@.!football|tennis.Russ*|Spain
-a.  b.     c.      d.               e.
+Top.*{0,2}.sport*@.!football|tennis{1,}.Russ*|Spain
+a.  b.     c.      d.                   e.
 </programlisting>
      This query will match any label path that:
     </para>
@@ -129,8 +139,8 @@ a.  b.     c.      d.               e.
      </listitem>
      <listitem>
       <para>
-       then a label not matching <literal>football</literal> nor
-       <literal>tennis</literal>
+       then has one or more labels, none of which
+       match <literal>football</literal> nor <literal>tennis</literal>
       </para>
      </listitem>
      <listitem>
@@ -603,7 +613,7 @@ ltreetest=&gt; SELECT path FROM test WHERE path ~ '*.Astronomy.*';
  Top.Collections.Pictures.Astronomy.Astronauts
 (7 rows)
 
-ltreetest=&gt; SELECT path FROM test WHERE path ~ '*.!pictures@.*.Astronomy.*';
+ltreetest=&gt; SELECT path FROM test WHERE path ~ '*.!pictures@.Astronomy.*';
                 path
 ------------------------------------
  Top.Science.Astronomy

#17

Tom Lane

tgl@sss.pgh.pa.us

almost 6 years ago

In reply to: Tom Lane (#16)

2 attachment(s)

Re: fix for BUG #3720: wrong results at using ltree

I wrote:

Hence, attached are two revised patches that attack the problem
this way. The first one is somewhat unrelated to the original
point --- it's trying to clean up the error messages in ltree_in
and lquery_in so that they are more consistent and agree with
the terminology used in the documentation. (Notably, the term
"level" is used nowhere in the ltree docs, except in the legacy
function name nlevel().)

One thing I changed in that patch was to change the syntax error
reports to say "at character %d" not "in position %d", because
I thought the latter was pretty confusing --- it's not obvious
whether it's counting characters or labels or what. However,
I now notice that what the code is providing is a zero-based
character index, which is out of line with our practice
elsewhere: core parser errors are reported using 1-based indexes.
If we reword these messages then we should adopt that practice too.
Hence, new patch versions that do it like that. (0002 is unchanged.)

regards, tom lane

Attachments:

0001-rationalize-ltree-input-errors-2.patchtext/x-diff; charset=us-ascii; name=0001-rationalize-ltree-input-errors-2.patchDownload

diff --git a/contrib/ltree/expected/ltree.out b/contrib/ltree/expected/ltree.out
index c78d372..610cb6f 100644
--- a/contrib/ltree/expected/ltree.out
+++ b/contrib/ltree/expected/ltree.out
@@ -464,7 +464,7 @@ SELECT nlevel(('1' || repeat('.1', 65534))::ltree);
 (1 row)
 
 SELECT nlevel(('1' || repeat('.1', 65535))::ltree);
-ERROR:  number of ltree levels (65536) exceeds the maximum allowed (65535)
+ERROR:  number of ltree labels (65536) exceeds the maximum allowed (65535)
 SELECT nlevel(('1' || repeat('.1', 65534))::ltree || '1');
 ERROR:  number of ltree levels (65536) exceeds the maximum allowed (65535)
 SELECT ('1' || repeat('.1', 65534))::lquery IS NULL;
@@ -474,7 +474,7 @@ SELECT ('1' || repeat('.1', 65534))::lquery IS NULL;
 (1 row)
 
 SELECT ('1' || repeat('.1', 65535))::lquery IS NULL;
-ERROR:  number of lquery levels (65536) exceeds the maximum allowed (65535)
+ERROR:  number of lquery items (65536) exceeds the maximum allowed (65535)
 SELECT '*{65535}'::lquery;
   lquery  
 ----------
@@ -485,7 +485,7 @@ SELECT '*{65536}'::lquery;
 ERROR:  lquery syntax error
 LINE 1: SELECT '*{65536}'::lquery;
                ^
-DETAIL:  Low limit (65536) exceeds the maximum allowed (65535).
+DETAIL:  Low limit (65536) exceeds the maximum allowed (65535), at character 3.
 SELECT '*{,65534}'::lquery;
   lquery   
 -----------
@@ -502,7 +502,12 @@ SELECT '*{,65536}'::lquery;
 ERROR:  lquery syntax error
 LINE 1: SELECT '*{,65536}'::lquery;
                ^
-DETAIL:  High limit (65536) exceeds the maximum allowed (65535).
+DETAIL:  High limit (65536) exceeds the maximum allowed (65535), at character 4.
+SELECT '*{4,3}'::lquery;
+ERROR:  lquery syntax error
+LINE 1: SELECT '*{4,3}'::lquery;
+               ^
+DETAIL:  Low limit (4) is greater than high limit (3), at character 5.
 SELECT '1.2'::ltree  < '2.2'::ltree;
  ?column? 
 ----------
diff --git a/contrib/ltree/ltree_io.c b/contrib/ltree/ltree_io.c
index 2503d47..e806a14 100644
--- a/contrib/ltree/ltree_io.c
+++ b/contrib/ltree/ltree_io.c
@@ -17,12 +17,6 @@ PG_FUNCTION_INFO_V1(lquery_in);
 PG_FUNCTION_INFO_V1(lquery_out);
 
 
-#define UNCHAR ereport(ERROR, \
-					   (errcode(ERRCODE_SYNTAX_ERROR), \
-						errmsg("syntax error at position %d", \
-						pos)));
-
-
 typedef struct
 {
 	char	   *start;
@@ -47,7 +41,12 @@ ltree_in(PG_FUNCTION_ARGS)
 	ltree	   *result;
 	ltree_level *curlevel;
 	int			charlen;
-	int			pos = 0;
+	int			pos = 1;		/* character position for error messages */
+
+#define UNCHAR ereport(ERROR, \
+					   errcode(ERRCODE_SYNTAX_ERROR), \
+					   errmsg("ltree syntax error at character %d", \
+							  pos))
 
 	ptr = buf;
 	while (*ptr)
@@ -61,7 +60,7 @@ ltree_in(PG_FUNCTION_ARGS)
 	if (num + 1 > LTREE_MAX_LEVELS)
 		ereport(ERROR,
 				(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
-				 errmsg("number of ltree levels (%d) exceeds the maximum allowed (%d)",
+				 errmsg("number of ltree labels (%d) exceeds the maximum allowed (%d)",
 						num + 1, LTREE_MAX_LEVELS)));
 	list = lptr = (nodeitem *) palloc(sizeof(nodeitem) * (num + 1));
 	ptr = buf;
@@ -88,10 +87,10 @@ ltree_in(PG_FUNCTION_ARGS)
 				if (lptr->wlen > LTREE_LABEL_MAX_CHARS)
 					ereport(ERROR,
 							(errcode(ERRCODE_NAME_TOO_LONG),
-							 errmsg("name of level is too long"),
-							 errdetail("Name length is %d, must "
-									   "be < 256, in position %d.",
-									   lptr->wlen, pos)));
+							 errmsg("label string is too long"),
+							 errdetail("Label length is %d, must be at most %d, at character %d.",
+									   lptr->wlen, LTREE_LABEL_MAX_CHARS,
+									   pos)));
 
 				totallen += MAXALIGN(lptr->len + LEVEL_HDRSIZE);
 				lptr++;
@@ -115,10 +114,9 @@ ltree_in(PG_FUNCTION_ARGS)
 		if (lptr->wlen > LTREE_LABEL_MAX_CHARS)
 			ereport(ERROR,
 					(errcode(ERRCODE_NAME_TOO_LONG),
-					 errmsg("name of level is too long"),
-					 errdetail("Name length is %d, must "
-							   "be < 256, in position %d.",
-							   lptr->wlen, pos)));
+					 errmsg("label string is too long"),
+					 errdetail("Label length is %d, must be at most %d, at character %d.",
+							   lptr->wlen, LTREE_LABEL_MAX_CHARS, pos)));
 
 		totallen += MAXALIGN(lptr->len + LEVEL_HDRSIZE);
 		lptr++;
@@ -126,8 +124,8 @@ ltree_in(PG_FUNCTION_ARGS)
 	else if (!(state == LTPRS_WAITNAME && lptr == list))
 		ereport(ERROR,
 				(errcode(ERRCODE_SYNTAX_ERROR),
-				 errmsg("syntax error"),
-				 errdetail("Unexpected end of line.")));
+				 errmsg("ltree syntax error"),
+				 errdetail("Unexpected end of input.")));
 
 	result = (ltree *) palloc0(LTREE_HDRSIZE + totallen);
 	SET_VARSIZE(result, LTREE_HDRSIZE + totallen);
@@ -144,6 +142,8 @@ ltree_in(PG_FUNCTION_ARGS)
 
 	pfree(list);
 	PG_RETURN_POINTER(result);
+
+#undef UNCHAR
 }
 
 Datum
@@ -208,7 +208,12 @@ lquery_in(PG_FUNCTION_ARGS)
 	bool		hasnot = false;
 	bool		wasbad = false;
 	int			charlen;
-	int			pos = 0;
+	int			pos = 1;		/* character position for error messages */
+
+#define UNCHAR ereport(ERROR, \
+					   errcode(ERRCODE_SYNTAX_ERROR), \
+					   errmsg("lquery syntax error at character %d", \
+							  pos))
 
 	ptr = buf;
 	while (*ptr)
@@ -230,7 +235,7 @@ lquery_in(PG_FUNCTION_ARGS)
 	if (num > LQUERY_MAX_LEVELS)
 		ereport(ERROR,
 				(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
-				 errmsg("number of lquery levels (%d) exceeds the maximum allowed (%d)",
+				 errmsg("number of lquery items (%d) exceeds the maximum allowed (%d)",
 						num, LQUERY_MAX_LEVELS)));
 	curqlevel = tmpql = (lquery_level *) palloc0(ITEMSIZE * num);
 	ptr = buf;
@@ -305,10 +310,10 @@ lquery_in(PG_FUNCTION_ARGS)
 				if (lptr->wlen > LTREE_LABEL_MAX_CHARS)
 					ereport(ERROR,
 							(errcode(ERRCODE_NAME_TOO_LONG),
-							 errmsg("name of level is too long"),
-							 errdetail("Name length is %d, must "
-									   "be < 256, in position %d.",
-									   lptr->wlen, pos)));
+							 errmsg("label string is too long"),
+							 errdetail("Label length is %d, must be at most %d, at character %d.",
+									   lptr->wlen, LTREE_LABEL_MAX_CHARS,
+									   pos)));
 
 				state = LQPRS_WAITVAR;
 			}
@@ -321,10 +326,10 @@ lquery_in(PG_FUNCTION_ARGS)
 				if (lptr->wlen > LTREE_LABEL_MAX_CHARS)
 					ereport(ERROR,
 							(errcode(ERRCODE_NAME_TOO_LONG),
-							 errmsg("name of level is too long"),
-							 errdetail("Name length is %d, must "
-									   "be < 256, in position %d.",
-									   lptr->wlen, pos)));
+							 errmsg("label string is too long"),
+							 errdetail("Label length is %d, must be at most %d, at character %d.",
+									   lptr->wlen, LTREE_LABEL_MAX_CHARS,
+									   pos)));
 
 				state = LQPRS_WAITLEVEL;
 				curqlevel = NEXTLEV(curqlevel);
@@ -361,10 +366,10 @@ lquery_in(PG_FUNCTION_ARGS)
 
 				if (low < 0 || low > LTREE_MAX_LEVELS)
 					ereport(ERROR,
-							(errcode(ERRCODE_SYNTAX_ERROR),
+							(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
 							 errmsg("lquery syntax error"),
-							 errdetail("Low limit (%d) exceeds the maximum allowed (%d).",
-									   low, LTREE_MAX_LEVELS)));
+							 errdetail("Low limit (%d) exceeds the maximum allowed (%d), at character %d.",
+									   low, LTREE_MAX_LEVELS, pos)));
 
 				curqlevel->low = (uint16) low;
 				state = LQPRS_WAITND;
@@ -380,10 +385,16 @@ lquery_in(PG_FUNCTION_ARGS)
 
 				if (high < 0 || high > LTREE_MAX_LEVELS)
 					ereport(ERROR,
+							(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+							 errmsg("lquery syntax error"),
+							 errdetail("High limit (%d) exceeds the maximum allowed (%d), at character %d.",
+									   high, LTREE_MAX_LEVELS, pos)));
+				else if (curqlevel->low > high)
+					ereport(ERROR,
 							(errcode(ERRCODE_SYNTAX_ERROR),
 							 errmsg("lquery syntax error"),
-							 errdetail("High limit (%d) exceeds the maximum allowed (%d).",
-									   high, LTREE_MAX_LEVELS)));
+							 errdetail("Low limit (%d) is greater than high limit (%d), at character %d.",
+									   curqlevel->low, high, pos)));
 
 				curqlevel->high = (uint16) high;
 				state = LQPRS_WAITCLOSE;
@@ -441,7 +452,7 @@ lquery_in(PG_FUNCTION_ARGS)
 			ereport(ERROR,
 					(errcode(ERRCODE_SYNTAX_ERROR),
 					 errmsg("lquery syntax error"),
-					 errdetail("Unexpected end of line.")));
+					 errdetail("Unexpected end of input.")));
 
 		lptr->len = ptr - lptr->start -
 			((lptr->flag & LVAR_SUBLEXEME) ? 1 : 0) -
@@ -451,15 +462,14 @@ lquery_in(PG_FUNCTION_ARGS)
 			ereport(ERROR,
 					(errcode(ERRCODE_SYNTAX_ERROR),
 					 errmsg("lquery syntax error"),
-					 errdetail("Unexpected end of line.")));
+					 errdetail("Unexpected end of input.")));
 
 		if (lptr->wlen > LTREE_LABEL_MAX_CHARS)
 			ereport(ERROR,
 					(errcode(ERRCODE_NAME_TOO_LONG),
-					 errmsg("name of level is too long"),
-					 errdetail("Name length is %d, must "
-							   "be < 256, in position %d.",
-							   lptr->wlen, pos)));
+					 errmsg("label string is too long"),
+					 errdetail("Label length is %d, must be at most %d, at character %d.",
+							   lptr->wlen, LTREE_LABEL_MAX_CHARS, pos)));
 	}
 	else if (state == LQPRS_WAITOPEN)
 		curqlevel->high = LTREE_MAX_LEVELS;
@@ -467,7 +477,7 @@ lquery_in(PG_FUNCTION_ARGS)
 		ereport(ERROR,
 				(errcode(ERRCODE_SYNTAX_ERROR),
 				 errmsg("lquery syntax error"),
-				 errdetail("Unexpected end of line.")));
+				 errdetail("Unexpected end of input.")));
 
 	curqlevel = tmpql;
 	totallen = LQUERY_HDRSIZE;
@@ -483,13 +493,6 @@ lquery_in(PG_FUNCTION_ARGS)
 				lptr++;
 			}
 		}
-		else if (curqlevel->low > curqlevel->high)
-			ereport(ERROR,
-					(errcode(ERRCODE_SYNTAX_ERROR),
-					 errmsg("lquery syntax error"),
-					 errdetail("Low limit (%d) is greater than upper (%d).",
-							   curqlevel->low, curqlevel->high)));
-
 		curqlevel = NEXTLEV(curqlevel);
 	}
 
@@ -543,6 +546,8 @@ lquery_in(PG_FUNCTION_ARGS)
 
 	pfree(tmpql);
 	PG_RETURN_POINTER(result);
+
+#undef UNCHAR
 }
 
 Datum
diff --git a/contrib/ltree/sql/ltree.sql b/contrib/ltree/sql/ltree.sql
index d8489cb..f6d73b8 100644
--- a/contrib/ltree/sql/ltree.sql
+++ b/contrib/ltree/sql/ltree.sql
@@ -100,6 +100,7 @@ SELECT '*{65536}'::lquery;
 SELECT '*{,65534}'::lquery;
 SELECT '*{,65535}'::lquery;
 SELECT '*{,65536}'::lquery;
+SELECT '*{4,3}'::lquery;
 
 SELECT '1.2'::ltree  < '2.2'::ltree;
 SELECT '1.2'::ltree  <= '2.2'::ltree;
diff --git a/contrib/ltree_plpython/expected/ltree_plpython.out b/contrib/ltree_plpython/expected/ltree_plpython.out
index 4779755..f28897f 100644
--- a/contrib/ltree_plpython/expected/ltree_plpython.out
+++ b/contrib/ltree_plpython/expected/ltree_plpython.out
@@ -38,6 +38,6 @@ $$;
 -- because it will try to parse the Python list as an ltree input
 -- string.
 SELECT test2();
-ERROR:  syntax error at position 0
+ERROR:  ltree syntax error at character 1
 CONTEXT:  while creating return value
 PL/Python function "test2"

0002-ltree-not-fixes-and-better-quantifiers-2.patchtext/x-diff; charset=us-ascii; name=0002-ltree-not-fixes-and-better-quantifiers-2.patchDownload

diff --git a/contrib/ltree/expected/ltree.out b/contrib/ltree/expected/ltree.out
index 610cb6f..5d9102c 100644
--- a/contrib/ltree/expected/ltree.out
+++ b/contrib/ltree/expected/ltree.out
@@ -445,6 +445,12 @@ SELECT '1.*.4|3|2.*{1}'::lquery;
  1.*.4|3|2.*{1}
 (1 row)
 
+SELECT 'foo.bar{,}.!a*|b{1,}.c{,44}.d{3,4}'::lquery;
+               lquery               
+------------------------------------
+ foo.bar{,}.!a*|b{1,}.c{,44}.d{3,4}
+(1 row)
+
 SELECT 'qwerty%@*.tu'::lquery;
     lquery    
 --------------
@@ -727,7 +733,7 @@ SELECT 'a.b.c.d.e'::ltree ~ '*.a.*.d.*';
 SELECT 'a.b.c.d.e'::ltree ~ '*.!d.*';
  ?column? 
 ----------
- f
+ t
 (1 row)
 
 SELECT 'a.b.c.d.e'::ltree ~ '*.!d';
@@ -757,7 +763,7 @@ SELECT 'a.b.c.d.e'::ltree ~ '*.!e';
 SELECT 'a.b.c.d.e'::ltree ~ '*.!e.*';
  ?column? 
 ----------
- f
+ t
 (1 row)
 
 SELECT 'a.b.c.d.e'::ltree ~ 'a.*.!e';
@@ -775,7 +781,7 @@ SELECT 'a.b.c.d.e'::ltree ~ 'a.*.!d';
 SELECT 'a.b.c.d.e'::ltree ~ 'a.*.!d.*';
  ?column? 
 ----------
- f
+ t
 (1 row)
 
 SELECT 'a.b.c.d.e'::ltree ~ 'a.*.!f.*';
@@ -793,7 +799,7 @@ SELECT 'a.b.c.d.e'::ltree ~ '*.a.*.!f.*';
 SELECT 'a.b.c.d.e'::ltree ~ '*.a.*.!d.*';
  ?column? 
 ----------
- f
+ t
 (1 row)
 
 SELECT 'a.b.c.d.e'::ltree ~ '*.a.!d.*';
@@ -817,13 +823,13 @@ SELECT 'a.b.c.d.e'::ltree ~ 'a.!d.*';
 SELECT 'a.b.c.d.e'::ltree ~ '*.a.*.!d.*';
  ?column? 
 ----------
- f
+ t
 (1 row)
 
 SELECT 'a.b.c.d.e'::ltree ~ '*.!b.*';
  ?column? 
 ----------
- f
+ t
 (1 row)
 
 SELECT 'a.b.c.d.e'::ltree ~ '*.!b.c.*';
@@ -835,7 +841,7 @@ SELECT 'a.b.c.d.e'::ltree ~ '*.!b.c.*';
 SELECT 'a.b.c.d.e'::ltree ~ '*.!b.*.c.*';
  ?column? 
 ----------
- f
+ t
 (1 row)
 
 SELECT 'a.b.c.d.e'::ltree ~ '!b.*.c.*';
@@ -883,31 +889,31 @@ SELECT 'a.b.c.d.e'::ltree ~ '*{1}.!b.*.!c.*.e';
 SELECT 'a.b.c.d.e'::ltree ~ '*{1}.!b.*{1}.!c.*.e';
  ?column? 
 ----------
- t
+ f
 (1 row)
 
 SELECT 'a.b.c.d.e'::ltree ~ 'a.!b.*{1}.!c.*.e';
  ?column? 
 ----------
- t
+ f
 (1 row)
 
 SELECT 'a.b.c.d.e'::ltree ~ '!b.*{1}.!c.*.e';
  ?column? 
 ----------
- t
+ f
 (1 row)
 
 SELECT 'a.b.c.d.e'::ltree ~ '*.!b.*{1}.!c.*.e';
  ?column? 
 ----------
- t
+ f
 (1 row)
 
 SELECT 'a.b.c.d.e'::ltree ~ '*.!b.*.!c.*.e';
  ?column? 
 ----------
- f
+ t
 (1 row)
 
 SELECT 'a.b.c.d.e'::ltree ~ '!b.!c.*';
@@ -937,19 +943,19 @@ SELECT 'a.b.c.d.e'::ltree ~ '*{1}.!b.*.!c.*';
 SELECT 'a.b.c.d.e'::ltree ~ '*{1}.!b.*{1}.!c.*';
  ?column? 
 ----------
- t
+ f
 (1 row)
 
 SELECT 'a.b.c.d.e'::ltree ~ 'a.!b.*{1}.!c.*';
  ?column? 
 ----------
- t
+ f
 (1 row)
 
 SELECT 'a.b.c.d.e'::ltree ~ '!b.*{1}.!c.*';
  ?column? 
 ----------
- t
+ f
 (1 row)
 
 SELECT 'a.b.c.d.e'::ltree ~ '*.!b.*{1}.!c.*';
@@ -961,7 +967,7 @@ SELECT 'a.b.c.d.e'::ltree ~ '*.!b.*{1}.!c.*';
 SELECT 'a.b.c.d.e'::ltree ~ '*.!b.*.!c.*';
  ?column? 
 ----------
- f
+ t
 (1 row)
 
 SELECT 'a.b.c.d.e'::ltree ~ 'a.*{2}.*{2}';
@@ -988,6 +994,78 @@ SELECT 'a.b.c.d.e'::ltree ~ 'a.*{5}.*';
  f
 (1 row)
 
+SELECT '5.0.1.0'::ltree ~ '5.!0.!0.0';
+ ?column? 
+----------
+ f
+(1 row)
+
+SELECT 'a.b'::ltree ~ '!a.!a';
+ ?column? 
+----------
+ f
+(1 row)
+
+SELECT 'a.b.c.d.e'::ltree ~ 'a{,}';
+ ?column? 
+----------
+ f
+(1 row)
+
+SELECT 'a.b.c.d.e'::ltree ~ 'a{1,}.*';
+ ?column? 
+----------
+ t
+(1 row)
+
+SELECT 'a.b.c.d.e'::ltree ~ 'a{,}.!a{,}';
+ ?column? 
+----------
+ t
+(1 row)
+
+SELECT 'a.b.c.d.a'::ltree ~ 'a{,}.!a{,}';
+ ?column? 
+----------
+ f
+(1 row)
+
+SELECT 'a.b.c.d.a'::ltree ~ 'a{,2}.!a{1,}';
+ ?column? 
+----------
+ f
+(1 row)
+
+SELECT 'a.b.c.d.e'::ltree ~ 'a{,2}.!a{1,}';
+ ?column? 
+----------
+ t
+(1 row)
+
+SELECT 'a.b.c.d.e'::ltree ~ '!x{,}';
+ ?column? 
+----------
+ t
+(1 row)
+
+SELECT 'a.b.c.d.e'::ltree ~ '!c{,}';
+ ?column? 
+----------
+ f
+(1 row)
+
+SELECT 'a.b.c.d.e'::ltree ~ '!c{0,3}.!a{2,}';
+ ?column? 
+----------
+ t
+(1 row)
+
+SELECT 'a.b.c.d.e'::ltree ~ '!c{0,3}.!d{2,}.*';
+ ?column? 
+----------
+ t
+(1 row)
+
 SELECT 'QWER_TY'::ltree ~ 'q%@*';
  ?column? 
 ----------
diff --git a/contrib/ltree/lquery_op.c b/contrib/ltree/lquery_op.c
index 5c7afe5..e7a7f7f 100644
--- a/contrib/ltree/lquery_op.c
+++ b/contrib/ltree/lquery_op.c
@@ -9,6 +9,7 @@
 
 #include "catalog/pg_collation.h"
 #include "ltree.h"
+#include "miscadmin.h"
 #include "utils/formatting.h"
 
 PG_FUNCTION_INFO_V1(ltq_regex);
@@ -19,16 +20,6 @@ PG_FUNCTION_INFO_V1(lt_q_rregex);
 
 #define NEXTVAL(x) ( (lquery*)( (char*)(x) + INTALIGN( VARSIZE(x) ) ) )
 
-typedef struct
-{
-	lquery_level *q;
-	int			nq;
-	ltree_level *t;
-	int			nt;
-	int			posq;
-	int			post;
-} FieldNot;
-
 static char *
 getlexeme(char *start, char *end, int *len)
 {
@@ -99,238 +90,145 @@ ltree_strncasecmp(const char *a, const char *b, size_t s)
 }
 
 /*
- * See if a (non-star) lquery_level matches an ltree_level
+ * See if an lquery_level matches an ltree_level
  *
- * Does not consider level's possible LQL_NOT flag.
+ * This accounts for all flags including LQL_NOT, but does not
+ * consider repetition counts.
  */
 static bool
 checkLevel(lquery_level *curq, ltree_level *curt)
 {
-	int			(*cmpptr) (const char *, const char *, size_t);
 	lquery_variant *curvar = LQL_FIRST(curq);
-	int			i;
+	bool		success;
+
+	success = (curq->flag & LQL_NOT) ? false : true;
+
+	/* numvar == 0 means '*' which matches anything */
+	if (curq->numvar == 0)
+		return success;
 
-	for (i = 0; i < curq->numvar; i++)
+	for (int i = 0; i < curq->numvar; i++)
 	{
+		int			(*cmpptr) (const char *, const char *, size_t);
+
 		cmpptr = (curvar->flag & LVAR_INCASE) ? ltree_strncasecmp : strncmp;
 
 		if (curvar->flag & LVAR_SUBLEXEME)
 		{
-			if (compare_subnode(curt, curvar->name, curvar->len, cmpptr, (curvar->flag & LVAR_ANYEND)))
-				return true;
+			if (compare_subnode(curt, curvar->name, curvar->len, cmpptr,
+								(curvar->flag & LVAR_ANYEND)))
+				return success;
 		}
 		else if ((curvar->len == curt->len ||
 				  (curt->len > curvar->len && (curvar->flag & LVAR_ANYEND))) &&
 				 (*cmpptr) (curvar->name, curt->name, curvar->len) == 0)
-		{
+			return success;
 
-			return true;
-		}
 		curvar = LVAR_NEXT(curvar);
 	}
-	return false;
+	return !success;
 }
 
 /*
-void
-printFieldNot(FieldNot *fn ) {
-	while(fn->q) {
-		elog(NOTICE,"posQ:%d lenQ:%d posT:%d lenT:%d", fn->posq,fn->nq,fn->post,fn->nt);
-		fn++;
-	}
-}
-*/
-
-/*
- * Try to match an lquery (of query_numlevel items) to an ltree (of
- * tree_numlevel items)
- *
- * If the query contains any NOT flags, "ptr" must point to a FieldNot
- * workspace initialized with ptr->q == NULL.  Otherwise it can be NULL.
- * (LQL_NOT flags will be ignored if ptr == NULL.)
- *
- * high_pos is the last ltree position the first lquery item is allowed
- * to match at; it should be zero for external calls.
- *
- * force_advance must be false except in internal recursive calls.
+ * Try to match an lquery (of qlen items) to an ltree (of tlen items)
  */
 static bool
-checkCond(lquery_level *curq, int query_numlevel,
-		  ltree_level *curt, int tree_numlevel,
-		  FieldNot *ptr,
-		  uint32 high_pos,
-		  bool force_advance)
+checkCond(lquery_level *curq, int qlen,
+		  ltree_level *curt, int tlen)
 {
-	uint32		low_pos = 0,	/* first allowed ltree position for match */
-				cur_tpos = 0;	/* ltree position of curt */
-	int			tlen = tree_numlevel,	/* counts of remaining items */
-				qlen = query_numlevel;
-	lquery_level *prevq = NULL;
-
-	/* advance curq (setting up prevq) if requested */
-	if (force_advance)
-	{
-		Assert(qlen > 0);
-		prevq = curq;
-		curq = LQL_NEXT(curq);
-		qlen--;
-	}
+	/* Since this function recurses, it could be driven to stack overflow */
+	check_stack_depth();
 
-	while (tlen > 0 && qlen > 0)
+	/* Normal case where we have some query and text items to match */
+	if (tlen > 0 && qlen > 0)
 	{
-		if (curq->numvar)
+		int			low,
+					high,
+					matchcnt;
+		lquery_level *nextq;
+		ltree_level *nextt;
+
+		/*
+		 * Get min and max repetition counts for this query item, dealing with
+		 * the backwards-compatibility hack that the low/high fields aren't
+		 * meaningful for non-'*' items unless LQL_COUNT is set.
+		 */
+		if ((curq->flag & LQL_COUNT) || curq->numvar == 0)
+			low = curq->low, high = curq->high;
+		else
+			low = high = 1;
+
+		/* Fail if a match of required number of items is impossible */
+		if (tlen < low || high < low)
+			return false;
+
+		/* Check minimum number of matches */
+		nextt = curt;
+		for (matchcnt = 0; matchcnt < low; matchcnt++)
+		{
+			if (!checkLevel(curq, nextt))
+				return false;
+			nextt = LEVEL_NEXT(nextt);
+		}
+
+		/*
+		 * Recursively check the rest of the pattern against each possible
+		 * start point following this item's match(es).
+		 */
+		nextq = LQL_NEXT(curq);
+		for (;;)
 		{
-			/* Current query item is not '*' */
-			ltree_level *prevt = curt;
+			/*
+			 * If the rest of the pattern matches beginning here, we're good.
+			 */
+			if (checkCond(nextq, qlen - 1,
+						  nextt, tlen - matchcnt))
+				return true;
 
-			/* skip tree items that must be ignored due to prior * items */
-			while (cur_tpos < low_pos)
+			/*
+			 * Otherwise, try to match one more text item to this query item.
+			 */
+			if (matchcnt < high && matchcnt < tlen)
 			{
-				curt = LEVEL_NEXT(curt);
-				tlen--;
-				cur_tpos++;
-				if (tlen == 0)
+				if (!checkLevel(curq, nextt))
 					return false;
-				if (ptr && ptr->q)
-					ptr->nt++;
-			}
-
-			if (ptr && (curq->flag & LQL_NOT))
-			{
-				/* Deal with a NOT item */
-				if (!(prevq && prevq->numvar == 0))
-					prevq = curq;
-				if (ptr->q == NULL)
-				{
-					ptr->t = prevt;
-					ptr->q = prevq;
-					ptr->nt = 1;
-					ptr->nq = 1 + ((prevq == curq) ? 0 : 1);
-					ptr->posq = query_numlevel - qlen - ((prevq == curq) ? 0 : 1);
-					ptr->post = cur_tpos;
-				}
-				else
-				{
-					ptr->nt++;
-					ptr->nq++;
-				}
-
-				if (qlen == 1 && ptr->q->numvar == 0)
-					ptr->nt = tree_numlevel - ptr->post;
-				curt = LEVEL_NEXT(curt);
-				tlen--;
-				cur_tpos++;
-				if (high_pos < cur_tpos)
-					high_pos++;
+				nextt = LEVEL_NEXT(nextt);
+				matchcnt++;
 			}
 			else
 			{
-				/* Not a NOT item, check for normal match */
-				bool		isok = false;
-
-				while (cur_tpos <= high_pos && tlen > 0 && !isok)
-				{
-					isok = checkLevel(curq, curt);
-					curt = LEVEL_NEXT(curt);
-					tlen--;
-					cur_tpos++;
-					if (isok && prevq && prevq->numvar == 0 &&
-						tlen > 0 && cur_tpos <= high_pos)
-					{
-						FieldNot	tmpptr;
-
-						if (ptr)
-							memcpy(&tmpptr, ptr, sizeof(FieldNot));
-						if (checkCond(prevq, qlen + 1,
-									  curt, tlen,
-									  (ptr) ? &tmpptr : NULL,
-									  high_pos - cur_tpos,
-									  true))
-							return true;
-					}
-					if (!isok && ptr && ptr->q)
-						ptr->nt++;
-				}
-				if (!isok)
-					return false;
-
-				if (ptr && ptr->q)
-				{
-					if (checkCond(ptr->q, ptr->nq,
-								  ptr->t, ptr->nt,
-								  NULL,
-								  0,
-								  false))
-						return false;
-					ptr->q = NULL;
-				}
-				low_pos = cur_tpos;
-				high_pos = cur_tpos;
-			}
-		}
-		else
-		{
-			/* Current query item is '*' */
-			low_pos += curq->low;
-
-			if (low_pos > tree_numlevel)
+				/* No more text, or max match count exceeded, so fail */
 				return false;
-
-			high_pos = Min(high_pos + curq->high, tree_numlevel);
-
-			if (ptr && ptr->q)
-			{
-				ptr->nq++;
-				if (qlen == 1)
-					ptr->nt = tree_numlevel - ptr->post;
 			}
 		}
-
-		prevq = curq;
-		curq = LQL_NEXT(curq);
-		qlen--;
 	}
 
-	/* Fail if we've already run out of ltree items */
-	if (low_pos > tree_numlevel || tree_numlevel > high_pos)
-		return false;
-
-	/* Remaining lquery items must be NOT or '*' items */
+	/*
+	 * If we're out of text, but query items remain, we can match only if all
+	 * remaining query items permit zero matches.
+	 */
 	while (qlen > 0)
 	{
-		if (curq->numvar)
-		{
-			if (!(curq->flag & LQL_NOT))
-				return false;
-		}
-		else
-		{
-			low_pos += curq->low;
+		int			low;
 
-			if (low_pos > tree_numlevel)
-				return false;
+		/* As above, extract the correct minimum match count. */
+		if ((curq->flag & LQL_COUNT) || curq->numvar == 0)
+			low = curq->low;
+		else
+			low = 1;
 
-			high_pos = Min(high_pos + curq->high, tree_numlevel);
-		}
+		if (low > 0)
+			return false;
 
 		curq = LQL_NEXT(curq);
 		qlen--;
 	}
 
-	/* Fail if trailing '*'s require more ltree items than we have */
-	if (low_pos > tree_numlevel || tree_numlevel > high_pos)
-		return false;
-
-	/* Finish pending NOT check, if any */
-	if (ptr && ptr->q &&
-		checkCond(ptr->q, ptr->nq,
-				  ptr->t, ptr->nt,
-				  NULL,
-				  0,
-				  false))
-		return false;
-
-	return true;
+	/*
+	 * Once we're out of query items, we match only if there's no remaining
+	 * text either.
+	 */
+	return (tlen == 0);
 }
 
 Datum
@@ -338,28 +236,10 @@ ltq_regex(PG_FUNCTION_ARGS)
 {
 	ltree	   *tree = PG_GETARG_LTREE_P(0);
 	lquery	   *query = PG_GETARG_LQUERY_P(1);
-	bool		res = false;
-
-	if (query->flag & LQUERY_HASNOT)
-	{
-		FieldNot	fn;
+	bool		res;
 
-		fn.q = NULL;
-
-		res = checkCond(LQUERY_FIRST(query), query->numlevel,
-						LTREE_FIRST(tree), tree->numlevel,
-						&fn,
-						0,
-						false);
-	}
-	else
-	{
-		res = checkCond(LQUERY_FIRST(query), query->numlevel,
-						LTREE_FIRST(tree), tree->numlevel,
-						NULL,
-						0,
-						false);
-	}
+	res = checkCond(LQUERY_FIRST(query), query->numlevel,
+					LTREE_FIRST(tree), tree->numlevel);
 
 	PG_FREE_IF_COPY(tree, 0);
 	PG_FREE_IF_COPY(query, 1);
diff --git a/contrib/ltree/ltree.h b/contrib/ltree/ltree.h
index 429cdc8..7eac7c9 100644
--- a/contrib/ltree/ltree.h
+++ b/contrib/ltree/ltree.h
@@ -65,14 +65,20 @@ typedef struct
 /*
  * In an lquery_level, "flag" contains the union of the variants' flags
  * along with possible LQL_xxx flags; so those bit sets can't overlap.
+ *
+ * "low" and "high" are nominally the minimum and maximum number of matches.
+ * However, for backwards compatibility with pre-v13 on-disk lqueries,
+ * non-'*' levels (those with numvar > 0) only have valid low/high if the
+ * LQL_COUNT flag is set; otherwise those fields are zero, but the behavior
+ * is as if they were both 1.
  */
 typedef struct
 {
 	uint16		totallen;		/* total length of this level, in bytes */
 	uint16		flag;			/* see LQL_xxx and LVAR_xxx flags */
 	uint16		numvar;			/* number of variants; 0 means '*' */
-	uint16		low;			/* minimum repeat count for '*' */
-	uint16		high;			/* maximum repeat count for '*' */
+	uint16		low;			/* minimum repeat count */
+	uint16		high;			/* maximum repeat count */
 	/* Array of maxalign'd lquery_variant structs follows: */
 	char		variants[FLEXIBLE_ARRAY_MEMBER];
 } lquery_level;
@@ -82,6 +88,7 @@ typedef struct
 #define LQL_FIRST(x)	( (lquery_variant*)( ((char*)(x))+LQL_HDRSIZE ) )
 
 #define LQL_NOT		0x10		/* level has '!' (NOT) prefix */
+#define LQL_COUNT	0x20		/* level is non-'*' and has repeat counts */
 
 #ifdef LOWER_NODE
 #define FLG_CANLOOKSIGN(x) ( ( (x) & ( LQL_NOT | LVAR_ANYEND | LVAR_SUBLEXEME ) ) == 0 )
diff --git a/contrib/ltree/ltree_io.c b/contrib/ltree/ltree_io.c
index e806a14..c6ea5de 100644
--- a/contrib/ltree/ltree_io.c
+++ b/contrib/ltree/ltree_io.c
@@ -317,6 +317,23 @@ lquery_in(PG_FUNCTION_ARGS)
 
 				state = LQPRS_WAITVAR;
 			}
+			else if (charlen == 1 && t_iseq(ptr, '{'))
+			{
+				lptr->len = ptr - lptr->start -
+					((lptr->flag & LVAR_SUBLEXEME) ? 1 : 0) -
+					((lptr->flag & LVAR_INCASE) ? 1 : 0) -
+					((lptr->flag & LVAR_ANYEND) ? 1 : 0);
+				if (lptr->wlen > LTREE_LABEL_MAX_CHARS)
+					ereport(ERROR,
+							(errcode(ERRCODE_NAME_TOO_LONG),
+							 errmsg("label string is too long"),
+							 errdetail("Label length is %d, must be at most %d, at character %d.",
+									   lptr->wlen, LTREE_LABEL_MAX_CHARS,
+									   pos)));
+
+				curqlevel->flag |= LQL_COUNT;
+				state = LQPRS_WAITFNUM;
+			}
 			else if (charlen == 1 && t_iseq(ptr, '.'))
 			{
 				lptr->len = ptr - lptr->start -
@@ -348,6 +365,7 @@ lquery_in(PG_FUNCTION_ARGS)
 				state = LQPRS_WAITFNUM;
 			else if (charlen == 1 && t_iseq(ptr, '.'))
 			{
+				/* We only get here for '*', so these are correct defaults */
 				curqlevel->low = 0;
 				curqlevel->high = LTREE_MAX_LEVELS;
 				curqlevel = NEXTLEV(curqlevel);
@@ -567,7 +585,11 @@ lquery_out(PG_FUNCTION_ARGS)
 	{
 		totallen++;
 		if (curqlevel->numvar)
+		{
 			totallen += 1 + (curqlevel->numvar * 4) + curqlevel->totallen;
+			if (curqlevel->flag & LQL_COUNT)
+				totallen += 2 * 11 + 3;
+		}
 		else
 			totallen += 2 * 11 + 4;
 		curqlevel = LQL_NEXT(curqlevel);
@@ -619,26 +641,37 @@ lquery_out(PG_FUNCTION_ARGS)
 		}
 		else
 		{
+			*ptr = '*';
+			ptr++;
+		}
+
+		if ((curqlevel->flag & LQL_COUNT) || curqlevel->numvar == 0)
+		{
 			if (curqlevel->low == curqlevel->high)
 			{
-				sprintf(ptr, "*{%d}", curqlevel->low);
+				sprintf(ptr, "{%d}", curqlevel->low);
 			}
 			else if (curqlevel->low == 0)
 			{
 				if (curqlevel->high == LTREE_MAX_LEVELS)
 				{
-					*ptr = '*';
-					*(ptr + 1) = '\0';
+					if (curqlevel->numvar == 0)
+					{
+						/* This is default for '*', so print nothing */
+						*ptr = '\0';
+					}
+					else
+						sprintf(ptr, "{,}");
 				}
 				else
-					sprintf(ptr, "*{,%d}", curqlevel->high);
+					sprintf(ptr, "{,%d}", curqlevel->high);
 			}
 			else if (curqlevel->high == LTREE_MAX_LEVELS)
 			{
-				sprintf(ptr, "*{%d,}", curqlevel->low);
+				sprintf(ptr, "{%d,}", curqlevel->low);
 			}
 			else
-				sprintf(ptr, "*{%d,%d}", curqlevel->low, curqlevel->high);
+				sprintf(ptr, "{%d,%d}", curqlevel->low, curqlevel->high);
 			ptr = strchr(ptr, '\0');
 		}
 
diff --git a/contrib/ltree/sql/ltree.sql b/contrib/ltree/sql/ltree.sql
index f6d73b8..0cf3dd6 100644
--- a/contrib/ltree/sql/ltree.sql
+++ b/contrib/ltree/sql/ltree.sql
@@ -87,6 +87,7 @@ SELECT '1.*.4|3|2.*{1,4}'::lquery;
 SELECT '1.*.4|3|2.*{,4}'::lquery;
 SELECT '1.*.4|3|2.*{1,}'::lquery;
 SELECT '1.*.4|3|2.*{1}'::lquery;
+SELECT 'foo.bar{,}.!a*|b{1,}.c{,44}.d{3,4}'::lquery;
 SELECT 'qwerty%@*.tu'::lquery;
 
 SELECT nlevel('1.2.3.4');
@@ -184,6 +185,19 @@ SELECT 'a.b.c.d.e'::ltree ~ 'a.*{2}.*{2}';
 SELECT 'a.b.c.d.e'::ltree ~ 'a.*{1}.*{2}.e';
 SELECT 'a.b.c.d.e'::ltree ~ 'a.*{1}.*{4}';
 SELECT 'a.b.c.d.e'::ltree ~ 'a.*{5}.*';
+SELECT '5.0.1.0'::ltree ~ '5.!0.!0.0';
+SELECT 'a.b'::ltree ~ '!a.!a';
+
+SELECT 'a.b.c.d.e'::ltree ~ 'a{,}';
+SELECT 'a.b.c.d.e'::ltree ~ 'a{1,}.*';
+SELECT 'a.b.c.d.e'::ltree ~ 'a{,}.!a{,}';
+SELECT 'a.b.c.d.a'::ltree ~ 'a{,}.!a{,}';
+SELECT 'a.b.c.d.a'::ltree ~ 'a{,2}.!a{1,}';
+SELECT 'a.b.c.d.e'::ltree ~ 'a{,2}.!a{1,}';
+SELECT 'a.b.c.d.e'::ltree ~ '!x{,}';
+SELECT 'a.b.c.d.e'::ltree ~ '!c{,}';
+SELECT 'a.b.c.d.e'::ltree ~ '!c{0,3}.!a{2,}';
+SELECT 'a.b.c.d.e'::ltree ~ '!c{0,3}.!d{2,}.*';
 
 SELECT 'QWER_TY'::ltree ~ 'q%@*';
 SELECT 'QWER_TY'::ltree ~ 'Q_t%@*';
diff --git a/doc/src/sgml/ltree.sgml b/doc/src/sgml/ltree.sgml
index ae4b33e..d7dd555 100644
--- a/doc/src/sgml/ltree.sgml
+++ b/doc/src/sgml/ltree.sgml
@@ -60,7 +60,8 @@
      <type>lquery</type> represents a regular-expression-like pattern
      for matching <type>ltree</type> values.  A simple word matches that
      label within a path.  A star symbol (<literal>*</literal>) matches zero
-     or more labels.  For example:
+     or more labels.  These can be joined with dots to form a pattern that
+     must match the whole label path.  For example:
 <synopsis>
 foo         <lineannotation>Match the exact label path <literal>foo</literal></lineannotation>
 *.foo.*     <lineannotation>Match any label path containing the label <literal>foo</literal></lineannotation>
@@ -69,19 +70,25 @@ foo         <lineannotation>Match the exact label path <literal>foo</literal></l
     </para>
 
     <para>
-     Star symbols can also be quantified to restrict how many labels
-     they can match:
+     Both star symbols and simple words can be quantified to restrict how many
+     labels they can match:
 <synopsis>
 *{<replaceable>n</replaceable>}        <lineannotation>Match exactly <replaceable>n</replaceable> labels</lineannotation>
 *{<replaceable>n</replaceable>,}       <lineannotation>Match at least <replaceable>n</replaceable> labels</lineannotation>
 *{<replaceable>n</replaceable>,<replaceable>m</replaceable>}      <lineannotation>Match at least <replaceable>n</replaceable> but not more than <replaceable>m</replaceable> labels</lineannotation>
-*{,<replaceable>m</replaceable>}       <lineannotation>Match at most <replaceable>m</replaceable> labels &mdash; same as </lineannotation> *{0,<replaceable>m</replaceable>}
+*{,<replaceable>m</replaceable>}       <lineannotation>Match at most <replaceable>m</replaceable> labels &mdash; same as </lineannotation>*{0,<replaceable>m</replaceable>}
+foo{<replaceable>n</replaceable>,<replaceable>m</replaceable>}    <lineannotation>Match at least <replaceable>n</replaceable> but not more than <replaceable>m</replaceable> occurrences of <literal>foo</literal></lineannotation>
+foo{,}      <lineannotation>Match any number of occurrences of <literal>foo</literal>, including zero</lineannotation>
 </synopsis>
+     In the absence of any explicit quantifier, the default for a star symbol
+     is to match any number of labels (that is, <literal>{,}</literal>) while
+     the default for a non-star item is to match exactly once (that
+     is, <literal>{1}</literal>).
     </para>
 
     <para>
      There are several modifiers that can be put at the end of a non-star
-     label in <type>lquery</type> to make it match more than just the exact match:
+     <type>lquery</type> item to make it match more than just the exact match:
 <synopsis>
 @           <lineannotation>Match case-insensitively, for example <literal>a@</literal> matches <literal>A</literal></lineannotation>
 *           <lineannotation>Match any label with this prefix, for example <literal>foo*</literal> matches <literal>foobar</literal></lineannotation>
@@ -97,17 +104,20 @@ foo         <lineannotation>Match the exact label path <literal>foo</literal></l
     </para>
 
     <para>
-     Also, you can write several possibly-modified labels separated with
-     <literal>|</literal> (OR) to match any of those labels, and you can put
-     <literal>!</literal> (NOT) at the start to match any label that doesn't
-     match any of the alternatives.
+     Also, you can write several possibly-modified non-star items separated with
+     <literal>|</literal> (OR) to match any of those items, and you can put
+     <literal>!</literal> (NOT) at the start of a non-star group to match any
+     label that doesn't match any of the alternatives.  A quantifier, if any,
+     goes at the end of the group; it means some number of matches for the
+     group as a whole (that is, some number of labels matching or not matching
+     any of the alternatives).
     </para>
 
     <para>
      Here's an annotated example of <type>lquery</type>:
 <programlisting>
-Top.*{0,2}.sport*@.!football|tennis.Russ*|Spain
-a.  b.     c.      d.               e.
+Top.*{0,2}.sport*@.!football|tennis{1,}.Russ*|Spain
+a.  b.     c.      d.                   e.
 </programlisting>
      This query will match any label path that:
     </para>
@@ -129,8 +139,8 @@ a.  b.     c.      d.               e.
      </listitem>
      <listitem>
       <para>
-       then a label not matching <literal>football</literal> nor
-       <literal>tennis</literal>
+       then has one or more labels, none of which
+       match <literal>football</literal> nor <literal>tennis</literal>
       </para>
      </listitem>
      <listitem>
@@ -632,7 +642,7 @@ ltreetest=&gt; SELECT path FROM test WHERE path ~ '*.Astronomy.*';
  Top.Collections.Pictures.Astronomy.Astronauts
 (7 rows)
 
-ltreetest=&gt; SELECT path FROM test WHERE path ~ '*.!pictures@.*.Astronomy.*';
+ltreetest=&gt; SELECT path FROM test WHERE path ~ '*.!pictures@.Astronomy.*';
                 path
 ------------------------------------
  Top.Science.Astronomy

#18

Nikita Glukhov

n.gluhov@postgrespro.ru

almost 6 years ago

In reply to: Tom Lane (#17)

2 attachment(s)

Re: fix for BUG #3720: wrong results at using ltree

On 30.03.2020 21:00, Tom Lane wrote:

Hence, new patch versions that do it like that. (0002 is unchanged.)

I tried to simplify a bit loops in checkCond() by merging two of them into
one with an explicit exit condition. Also I added return statement after
this loop, so it's now clear that we can't fall into next "while" loop.

The rest code in 0001 and 0002 is unchanged.

--
Nikita Glukhov
Postgres Professional: http://www.postgrespro.com
The Russian Postgres Company

Attachments:

0001-rationalize-ltree-input-errors-3.patchtext/x-patch; name=0001-rationalize-ltree-input-errors-3.patchDownload

From b30c07ec318edefdcc9faa6c2158f4bb56114789 Mon Sep 17 00:00:00 2001
From: Nikita Glukhov <n.gluhov@postgrespro.ru>
Date: Tue, 31 Mar 2020 00:13:40 +0300
Subject: [PATCH v3 1/2] Rationalize ltree input errors

---
 contrib/ltree/expected/ltree.out                   | 13 ++-
 contrib/ltree/ltree_io.c                           | 99 ++++++++++++----------
 contrib/ltree/sql/ltree.sql                        |  1 +
 contrib/ltree_plpython/expected/ltree_plpython.out |  2 +-
 4 files changed, 63 insertions(+), 52 deletions(-)

diff --git a/contrib/ltree/expected/ltree.out b/contrib/ltree/expected/ltree.out
index c78d372..610cb6f 100644
--- a/contrib/ltree/expected/ltree.out
+++ b/contrib/ltree/expected/ltree.out
@@ -464,7 +464,7 @@ SELECT nlevel(('1' || repeat('.1', 65534))::ltree);
 (1 row)
 
 SELECT nlevel(('1' || repeat('.1', 65535))::ltree);
-ERROR:  number of ltree levels (65536) exceeds the maximum allowed (65535)
+ERROR:  number of ltree labels (65536) exceeds the maximum allowed (65535)
 SELECT nlevel(('1' || repeat('.1', 65534))::ltree || '1');
 ERROR:  number of ltree levels (65536) exceeds the maximum allowed (65535)
 SELECT ('1' || repeat('.1', 65534))::lquery IS NULL;
@@ -474,7 +474,7 @@ SELECT ('1' || repeat('.1', 65534))::lquery IS NULL;
 (1 row)
 
 SELECT ('1' || repeat('.1', 65535))::lquery IS NULL;
-ERROR:  number of lquery levels (65536) exceeds the maximum allowed (65535)
+ERROR:  number of lquery items (65536) exceeds the maximum allowed (65535)
 SELECT '*{65535}'::lquery;
   lquery  
 ----------
@@ -485,7 +485,7 @@ SELECT '*{65536}'::lquery;
 ERROR:  lquery syntax error
 LINE 1: SELECT '*{65536}'::lquery;
                ^
-DETAIL:  Low limit (65536) exceeds the maximum allowed (65535).
+DETAIL:  Low limit (65536) exceeds the maximum allowed (65535), at character 3.
 SELECT '*{,65534}'::lquery;
   lquery   
 -----------
@@ -502,7 +502,12 @@ SELECT '*{,65536}'::lquery;
 ERROR:  lquery syntax error
 LINE 1: SELECT '*{,65536}'::lquery;
                ^
-DETAIL:  High limit (65536) exceeds the maximum allowed (65535).
+DETAIL:  High limit (65536) exceeds the maximum allowed (65535), at character 4.
+SELECT '*{4,3}'::lquery;
+ERROR:  lquery syntax error
+LINE 1: SELECT '*{4,3}'::lquery;
+               ^
+DETAIL:  Low limit (4) is greater than high limit (3), at character 5.
 SELECT '1.2'::ltree  < '2.2'::ltree;
  ?column? 
 ----------
diff --git a/contrib/ltree/ltree_io.c b/contrib/ltree/ltree_io.c
index 2503d47..e806a14 100644
--- a/contrib/ltree/ltree_io.c
+++ b/contrib/ltree/ltree_io.c
@@ -17,12 +17,6 @@ PG_FUNCTION_INFO_V1(lquery_in);
 PG_FUNCTION_INFO_V1(lquery_out);
 
 
-#define UNCHAR ereport(ERROR, \
-					   (errcode(ERRCODE_SYNTAX_ERROR), \
-						errmsg("syntax error at position %d", \
-						pos)));
-
-
 typedef struct
 {
 	char	   *start;
@@ -47,7 +41,12 @@ ltree_in(PG_FUNCTION_ARGS)
 	ltree	   *result;
 	ltree_level *curlevel;
 	int			charlen;
-	int			pos = 0;
+	int			pos = 1;		/* character position for error messages */
+
+#define UNCHAR ereport(ERROR, \
+					   errcode(ERRCODE_SYNTAX_ERROR), \
+					   errmsg("ltree syntax error at character %d", \
+							  pos))
 
 	ptr = buf;
 	while (*ptr)
@@ -61,7 +60,7 @@ ltree_in(PG_FUNCTION_ARGS)
 	if (num + 1 > LTREE_MAX_LEVELS)
 		ereport(ERROR,
 				(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
-				 errmsg("number of ltree levels (%d) exceeds the maximum allowed (%d)",
+				 errmsg("number of ltree labels (%d) exceeds the maximum allowed (%d)",
 						num + 1, LTREE_MAX_LEVELS)));
 	list = lptr = (nodeitem *) palloc(sizeof(nodeitem) * (num + 1));
 	ptr = buf;
@@ -88,10 +87,10 @@ ltree_in(PG_FUNCTION_ARGS)
 				if (lptr->wlen > LTREE_LABEL_MAX_CHARS)
 					ereport(ERROR,
 							(errcode(ERRCODE_NAME_TOO_LONG),
-							 errmsg("name of level is too long"),
-							 errdetail("Name length is %d, must "
-									   "be < 256, in position %d.",
-									   lptr->wlen, pos)));
+							 errmsg("label string is too long"),
+							 errdetail("Label length is %d, must be at most %d, at character %d.",
+									   lptr->wlen, LTREE_LABEL_MAX_CHARS,
+									   pos)));
 
 				totallen += MAXALIGN(lptr->len + LEVEL_HDRSIZE);
 				lptr++;
@@ -115,10 +114,9 @@ ltree_in(PG_FUNCTION_ARGS)
 		if (lptr->wlen > LTREE_LABEL_MAX_CHARS)
 			ereport(ERROR,
 					(errcode(ERRCODE_NAME_TOO_LONG),
-					 errmsg("name of level is too long"),
-					 errdetail("Name length is %d, must "
-							   "be < 256, in position %d.",
-							   lptr->wlen, pos)));
+					 errmsg("label string is too long"),
+					 errdetail("Label length is %d, must be at most %d, at character %d.",
+							   lptr->wlen, LTREE_LABEL_MAX_CHARS, pos)));
 
 		totallen += MAXALIGN(lptr->len + LEVEL_HDRSIZE);
 		lptr++;
@@ -126,8 +124,8 @@ ltree_in(PG_FUNCTION_ARGS)
 	else if (!(state == LTPRS_WAITNAME && lptr == list))
 		ereport(ERROR,
 				(errcode(ERRCODE_SYNTAX_ERROR),
-				 errmsg("syntax error"),
-				 errdetail("Unexpected end of line.")));
+				 errmsg("ltree syntax error"),
+				 errdetail("Unexpected end of input.")));
 
 	result = (ltree *) palloc0(LTREE_HDRSIZE + totallen);
 	SET_VARSIZE(result, LTREE_HDRSIZE + totallen);
@@ -144,6 +142,8 @@ ltree_in(PG_FUNCTION_ARGS)
 
 	pfree(list);
 	PG_RETURN_POINTER(result);
+
+#undef UNCHAR
 }
 
 Datum
@@ -208,7 +208,12 @@ lquery_in(PG_FUNCTION_ARGS)
 	bool		hasnot = false;
 	bool		wasbad = false;
 	int			charlen;
-	int			pos = 0;
+	int			pos = 1;		/* character position for error messages */
+
+#define UNCHAR ereport(ERROR, \
+					   errcode(ERRCODE_SYNTAX_ERROR), \
+					   errmsg("lquery syntax error at character %d", \
+							  pos))
 
 	ptr = buf;
 	while (*ptr)
@@ -230,7 +235,7 @@ lquery_in(PG_FUNCTION_ARGS)
 	if (num > LQUERY_MAX_LEVELS)
 		ereport(ERROR,
 				(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
-				 errmsg("number of lquery levels (%d) exceeds the maximum allowed (%d)",
+				 errmsg("number of lquery items (%d) exceeds the maximum allowed (%d)",
 						num, LQUERY_MAX_LEVELS)));
 	curqlevel = tmpql = (lquery_level *) palloc0(ITEMSIZE * num);
 	ptr = buf;
@@ -305,10 +310,10 @@ lquery_in(PG_FUNCTION_ARGS)
 				if (lptr->wlen > LTREE_LABEL_MAX_CHARS)
 					ereport(ERROR,
 							(errcode(ERRCODE_NAME_TOO_LONG),
-							 errmsg("name of level is too long"),
-							 errdetail("Name length is %d, must "
-									   "be < 256, in position %d.",
-									   lptr->wlen, pos)));
+							 errmsg("label string is too long"),
+							 errdetail("Label length is %d, must be at most %d, at character %d.",
+									   lptr->wlen, LTREE_LABEL_MAX_CHARS,
+									   pos)));
 
 				state = LQPRS_WAITVAR;
 			}
@@ -321,10 +326,10 @@ lquery_in(PG_FUNCTION_ARGS)
 				if (lptr->wlen > LTREE_LABEL_MAX_CHARS)
 					ereport(ERROR,
 							(errcode(ERRCODE_NAME_TOO_LONG),
-							 errmsg("name of level is too long"),
-							 errdetail("Name length is %d, must "
-									   "be < 256, in position %d.",
-									   lptr->wlen, pos)));
+							 errmsg("label string is too long"),
+							 errdetail("Label length is %d, must be at most %d, at character %d.",
+									   lptr->wlen, LTREE_LABEL_MAX_CHARS,
+									   pos)));
 
 				state = LQPRS_WAITLEVEL;
 				curqlevel = NEXTLEV(curqlevel);
@@ -361,10 +366,10 @@ lquery_in(PG_FUNCTION_ARGS)
 
 				if (low < 0 || low > LTREE_MAX_LEVELS)
 					ereport(ERROR,
-							(errcode(ERRCODE_SYNTAX_ERROR),
+							(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
 							 errmsg("lquery syntax error"),
-							 errdetail("Low limit (%d) exceeds the maximum allowed (%d).",
-									   low, LTREE_MAX_LEVELS)));
+							 errdetail("Low limit (%d) exceeds the maximum allowed (%d), at character %d.",
+									   low, LTREE_MAX_LEVELS, pos)));
 
 				curqlevel->low = (uint16) low;
 				state = LQPRS_WAITND;
@@ -380,10 +385,16 @@ lquery_in(PG_FUNCTION_ARGS)
 
 				if (high < 0 || high > LTREE_MAX_LEVELS)
 					ereport(ERROR,
+							(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+							 errmsg("lquery syntax error"),
+							 errdetail("High limit (%d) exceeds the maximum allowed (%d), at character %d.",
+									   high, LTREE_MAX_LEVELS, pos)));
+				else if (curqlevel->low > high)
+					ereport(ERROR,
 							(errcode(ERRCODE_SYNTAX_ERROR),
 							 errmsg("lquery syntax error"),
-							 errdetail("High limit (%d) exceeds the maximum allowed (%d).",
-									   high, LTREE_MAX_LEVELS)));
+							 errdetail("Low limit (%d) is greater than high limit (%d), at character %d.",
+									   curqlevel->low, high, pos)));
 
 				curqlevel->high = (uint16) high;
 				state = LQPRS_WAITCLOSE;
@@ -441,7 +452,7 @@ lquery_in(PG_FUNCTION_ARGS)
 			ereport(ERROR,
 					(errcode(ERRCODE_SYNTAX_ERROR),
 					 errmsg("lquery syntax error"),
-					 errdetail("Unexpected end of line.")));
+					 errdetail("Unexpected end of input.")));
 
 		lptr->len = ptr - lptr->start -
 			((lptr->flag & LVAR_SUBLEXEME) ? 1 : 0) -
@@ -451,15 +462,14 @@ lquery_in(PG_FUNCTION_ARGS)
 			ereport(ERROR,
 					(errcode(ERRCODE_SYNTAX_ERROR),
 					 errmsg("lquery syntax error"),
-					 errdetail("Unexpected end of line.")));
+					 errdetail("Unexpected end of input.")));
 
 		if (lptr->wlen > LTREE_LABEL_MAX_CHARS)
 			ereport(ERROR,
 					(errcode(ERRCODE_NAME_TOO_LONG),
-					 errmsg("name of level is too long"),
-					 errdetail("Name length is %d, must "
-							   "be < 256, in position %d.",
-							   lptr->wlen, pos)));
+					 errmsg("label string is too long"),
+					 errdetail("Label length is %d, must be at most %d, at character %d.",
+							   lptr->wlen, LTREE_LABEL_MAX_CHARS, pos)));
 	}
 	else if (state == LQPRS_WAITOPEN)
 		curqlevel->high = LTREE_MAX_LEVELS;
@@ -467,7 +477,7 @@ lquery_in(PG_FUNCTION_ARGS)
 		ereport(ERROR,
 				(errcode(ERRCODE_SYNTAX_ERROR),
 				 errmsg("lquery syntax error"),
-				 errdetail("Unexpected end of line.")));
+				 errdetail("Unexpected end of input.")));
 
 	curqlevel = tmpql;
 	totallen = LQUERY_HDRSIZE;
@@ -483,13 +493,6 @@ lquery_in(PG_FUNCTION_ARGS)
 				lptr++;
 			}
 		}
-		else if (curqlevel->low > curqlevel->high)
-			ereport(ERROR,
-					(errcode(ERRCODE_SYNTAX_ERROR),
-					 errmsg("lquery syntax error"),
-					 errdetail("Low limit (%d) is greater than upper (%d).",
-							   curqlevel->low, curqlevel->high)));
-
 		curqlevel = NEXTLEV(curqlevel);
 	}
 
@@ -543,6 +546,8 @@ lquery_in(PG_FUNCTION_ARGS)
 
 	pfree(tmpql);
 	PG_RETURN_POINTER(result);
+
+#undef UNCHAR
 }
 
 Datum
diff --git a/contrib/ltree/sql/ltree.sql b/contrib/ltree/sql/ltree.sql
index d8489cb..f6d73b8 100644
--- a/contrib/ltree/sql/ltree.sql
+++ b/contrib/ltree/sql/ltree.sql
@@ -100,6 +100,7 @@ SELECT '*{65536}'::lquery;
 SELECT '*{,65534}'::lquery;
 SELECT '*{,65535}'::lquery;
 SELECT '*{,65536}'::lquery;
+SELECT '*{4,3}'::lquery;
 
 SELECT '1.2'::ltree  < '2.2'::ltree;
 SELECT '1.2'::ltree  <= '2.2'::ltree;
diff --git a/contrib/ltree_plpython/expected/ltree_plpython.out b/contrib/ltree_plpython/expected/ltree_plpython.out
index 4779755..f28897f 100644
--- a/contrib/ltree_plpython/expected/ltree_plpython.out
+++ b/contrib/ltree_plpython/expected/ltree_plpython.out
@@ -38,6 +38,6 @@ $$;
 -- because it will try to parse the Python list as an ltree input
 -- string.
 SELECT test2();
-ERROR:  syntax error at position 0
+ERROR:  ltree syntax error at character 1
 CONTEXT:  while creating return value
 PL/Python function "test2"
-- 
2.7.4

0002-ltree-not-fixes-and-better-quantifiers-3.patchtext/x-patch; name=0002-ltree-not-fixes-and-better-quantifiers-3.patchDownload

From 473635c0350c68bb037796131998955aeee48a06 Mon Sep 17 00:00:00 2001
From: Nikita Glukhov <n.gluhov@postgrespro.ru>
Date: Tue, 31 Mar 2020 00:14:05 +0300
Subject: [PATCH v3 2/2] ltree not fixes and better quantifiers

---
 contrib/ltree/expected/ltree.out | 110 ++++++++++++--
 contrib/ltree/lquery_op.c        | 305 +++++++++++----------------------------
 contrib/ltree/ltree.h            |  11 +-
 contrib/ltree/ltree_io.c         |  45 +++++-
 contrib/ltree/sql/ltree.sql      |  14 ++
 doc/src/sgml/ltree.sgml          |  38 +++--
 6 files changed, 268 insertions(+), 255 deletions(-)

diff --git a/contrib/ltree/expected/ltree.out b/contrib/ltree/expected/ltree.out
index 610cb6f..5d9102c 100644
--- a/contrib/ltree/expected/ltree.out
+++ b/contrib/ltree/expected/ltree.out
@@ -445,6 +445,12 @@ SELECT '1.*.4|3|2.*{1}'::lquery;
  1.*.4|3|2.*{1}
 (1 row)
 
+SELECT 'foo.bar{,}.!a*|b{1,}.c{,44}.d{3,4}'::lquery;
+               lquery               
+------------------------------------
+ foo.bar{,}.!a*|b{1,}.c{,44}.d{3,4}
+(1 row)
+
 SELECT 'qwerty%@*.tu'::lquery;
     lquery    
 --------------
@@ -727,7 +733,7 @@ SELECT 'a.b.c.d.e'::ltree ~ '*.a.*.d.*';
 SELECT 'a.b.c.d.e'::ltree ~ '*.!d.*';
  ?column? 
 ----------
- f
+ t
 (1 row)
 
 SELECT 'a.b.c.d.e'::ltree ~ '*.!d';
@@ -757,7 +763,7 @@ SELECT 'a.b.c.d.e'::ltree ~ '*.!e';
 SELECT 'a.b.c.d.e'::ltree ~ '*.!e.*';
  ?column? 
 ----------
- f
+ t
 (1 row)
 
 SELECT 'a.b.c.d.e'::ltree ~ 'a.*.!e';
@@ -775,7 +781,7 @@ SELECT 'a.b.c.d.e'::ltree ~ 'a.*.!d';
 SELECT 'a.b.c.d.e'::ltree ~ 'a.*.!d.*';
  ?column? 
 ----------
- f
+ t
 (1 row)
 
 SELECT 'a.b.c.d.e'::ltree ~ 'a.*.!f.*';
@@ -793,7 +799,7 @@ SELECT 'a.b.c.d.e'::ltree ~ '*.a.*.!f.*';
 SELECT 'a.b.c.d.e'::ltree ~ '*.a.*.!d.*';
  ?column? 
 ----------
- f
+ t
 (1 row)
 
 SELECT 'a.b.c.d.e'::ltree ~ '*.a.!d.*';
@@ -817,13 +823,13 @@ SELECT 'a.b.c.d.e'::ltree ~ 'a.!d.*';
 SELECT 'a.b.c.d.e'::ltree ~ '*.a.*.!d.*';
  ?column? 
 ----------
- f
+ t
 (1 row)
 
 SELECT 'a.b.c.d.e'::ltree ~ '*.!b.*';
  ?column? 
 ----------
- f
+ t
 (1 row)
 
 SELECT 'a.b.c.d.e'::ltree ~ '*.!b.c.*';
@@ -835,7 +841,7 @@ SELECT 'a.b.c.d.e'::ltree ~ '*.!b.c.*';
 SELECT 'a.b.c.d.e'::ltree ~ '*.!b.*.c.*';
  ?column? 
 ----------
- f
+ t
 (1 row)
 
 SELECT 'a.b.c.d.e'::ltree ~ '!b.*.c.*';
@@ -883,31 +889,31 @@ SELECT 'a.b.c.d.e'::ltree ~ '*{1}.!b.*.!c.*.e';
 SELECT 'a.b.c.d.e'::ltree ~ '*{1}.!b.*{1}.!c.*.e';
  ?column? 
 ----------
- t
+ f
 (1 row)
 
 SELECT 'a.b.c.d.e'::ltree ~ 'a.!b.*{1}.!c.*.e';
  ?column? 
 ----------
- t
+ f
 (1 row)
 
 SELECT 'a.b.c.d.e'::ltree ~ '!b.*{1}.!c.*.e';
  ?column? 
 ----------
- t
+ f
 (1 row)
 
 SELECT 'a.b.c.d.e'::ltree ~ '*.!b.*{1}.!c.*.e';
  ?column? 
 ----------
- t
+ f
 (1 row)
 
 SELECT 'a.b.c.d.e'::ltree ~ '*.!b.*.!c.*.e';
  ?column? 
 ----------
- f
+ t
 (1 row)
 
 SELECT 'a.b.c.d.e'::ltree ~ '!b.!c.*';
@@ -937,19 +943,19 @@ SELECT 'a.b.c.d.e'::ltree ~ '*{1}.!b.*.!c.*';
 SELECT 'a.b.c.d.e'::ltree ~ '*{1}.!b.*{1}.!c.*';
  ?column? 
 ----------
- t
+ f
 (1 row)
 
 SELECT 'a.b.c.d.e'::ltree ~ 'a.!b.*{1}.!c.*';
  ?column? 
 ----------
- t
+ f
 (1 row)
 
 SELECT 'a.b.c.d.e'::ltree ~ '!b.*{1}.!c.*';
  ?column? 
 ----------
- t
+ f
 (1 row)
 
 SELECT 'a.b.c.d.e'::ltree ~ '*.!b.*{1}.!c.*';
@@ -961,7 +967,7 @@ SELECT 'a.b.c.d.e'::ltree ~ '*.!b.*{1}.!c.*';
 SELECT 'a.b.c.d.e'::ltree ~ '*.!b.*.!c.*';
  ?column? 
 ----------
- f
+ t
 (1 row)
 
 SELECT 'a.b.c.d.e'::ltree ~ 'a.*{2}.*{2}';
@@ -988,6 +994,78 @@ SELECT 'a.b.c.d.e'::ltree ~ 'a.*{5}.*';
  f
 (1 row)
 
+SELECT '5.0.1.0'::ltree ~ '5.!0.!0.0';
+ ?column? 
+----------
+ f
+(1 row)
+
+SELECT 'a.b'::ltree ~ '!a.!a';
+ ?column? 
+----------
+ f
+(1 row)
+
+SELECT 'a.b.c.d.e'::ltree ~ 'a{,}';
+ ?column? 
+----------
+ f
+(1 row)
+
+SELECT 'a.b.c.d.e'::ltree ~ 'a{1,}.*';
+ ?column? 
+----------
+ t
+(1 row)
+
+SELECT 'a.b.c.d.e'::ltree ~ 'a{,}.!a{,}';
+ ?column? 
+----------
+ t
+(1 row)
+
+SELECT 'a.b.c.d.a'::ltree ~ 'a{,}.!a{,}';
+ ?column? 
+----------
+ f
+(1 row)
+
+SELECT 'a.b.c.d.a'::ltree ~ 'a{,2}.!a{1,}';
+ ?column? 
+----------
+ f
+(1 row)
+
+SELECT 'a.b.c.d.e'::ltree ~ 'a{,2}.!a{1,}';
+ ?column? 
+----------
+ t
+(1 row)
+
+SELECT 'a.b.c.d.e'::ltree ~ '!x{,}';
+ ?column? 
+----------
+ t
+(1 row)
+
+SELECT 'a.b.c.d.e'::ltree ~ '!c{,}';
+ ?column? 
+----------
+ f
+(1 row)
+
+SELECT 'a.b.c.d.e'::ltree ~ '!c{0,3}.!a{2,}';
+ ?column? 
+----------
+ t
+(1 row)
+
+SELECT 'a.b.c.d.e'::ltree ~ '!c{0,3}.!d{2,}.*';
+ ?column? 
+----------
+ t
+(1 row)
+
 SELECT 'QWER_TY'::ltree ~ 'q%@*';
  ?column? 
 ----------
diff --git a/contrib/ltree/lquery_op.c b/contrib/ltree/lquery_op.c
index 5c7afe5..2659770 100644
--- a/contrib/ltree/lquery_op.c
+++ b/contrib/ltree/lquery_op.c
@@ -9,6 +9,7 @@
 
 #include "catalog/pg_collation.h"
 #include "ltree.h"
+#include "miscadmin.h"
 #include "utils/formatting.h"
 
 PG_FUNCTION_INFO_V1(ltq_regex);
@@ -19,16 +20,6 @@ PG_FUNCTION_INFO_V1(lt_q_rregex);
 
 #define NEXTVAL(x) ( (lquery*)( (char*)(x) + INTALIGN( VARSIZE(x) ) ) )
 
-typedef struct
-{
-	lquery_level *q;
-	int			nq;
-	ltree_level *t;
-	int			nt;
-	int			posq;
-	int			post;
-} FieldNot;
-
 static char *
 getlexeme(char *start, char *end, int *len)
 {
@@ -99,238 +90,136 @@ ltree_strncasecmp(const char *a, const char *b, size_t s)
 }
 
 /*
- * See if a (non-star) lquery_level matches an ltree_level
+ * See if an lquery_level matches an ltree_level
  *
- * Does not consider level's possible LQL_NOT flag.
+ * This accounts for all flags including LQL_NOT, but does not
+ * consider repetition counts.
  */
 static bool
 checkLevel(lquery_level *curq, ltree_level *curt)
 {
-	int			(*cmpptr) (const char *, const char *, size_t);
 	lquery_variant *curvar = LQL_FIRST(curq);
-	int			i;
+	bool		success;
+
+	success = (curq->flag & LQL_NOT) ? false : true;
+
+	/* numvar == 0 means '*' which matches anything */
+	if (curq->numvar == 0)
+		return success;
 
-	for (i = 0; i < curq->numvar; i++)
+	for (int i = 0; i < curq->numvar; i++)
 	{
+		int			(*cmpptr) (const char *, const char *, size_t);
+
 		cmpptr = (curvar->flag & LVAR_INCASE) ? ltree_strncasecmp : strncmp;
 
 		if (curvar->flag & LVAR_SUBLEXEME)
 		{
-			if (compare_subnode(curt, curvar->name, curvar->len, cmpptr, (curvar->flag & LVAR_ANYEND)))
-				return true;
+			if (compare_subnode(curt, curvar->name, curvar->len, cmpptr,
+								(curvar->flag & LVAR_ANYEND)))
+				return success;
 		}
 		else if ((curvar->len == curt->len ||
 				  (curt->len > curvar->len && (curvar->flag & LVAR_ANYEND))) &&
 				 (*cmpptr) (curvar->name, curt->name, curvar->len) == 0)
-		{
+			return success;
 
-			return true;
-		}
 		curvar = LVAR_NEXT(curvar);
 	}
-	return false;
+	return !success;
 }
 
 /*
-void
-printFieldNot(FieldNot *fn ) {
-	while(fn->q) {
-		elog(NOTICE,"posQ:%d lenQ:%d posT:%d lenT:%d", fn->posq,fn->nq,fn->post,fn->nt);
-		fn++;
-	}
-}
-*/
-
-/*
- * Try to match an lquery (of query_numlevel items) to an ltree (of
- * tree_numlevel items)
- *
- * If the query contains any NOT flags, "ptr" must point to a FieldNot
- * workspace initialized with ptr->q == NULL.  Otherwise it can be NULL.
- * (LQL_NOT flags will be ignored if ptr == NULL.)
- *
- * high_pos is the last ltree position the first lquery item is allowed
- * to match at; it should be zero for external calls.
- *
- * force_advance must be false except in internal recursive calls.
+ * Try to match an lquery (of qlen items) to an ltree (of tlen items)
  */
 static bool
-checkCond(lquery_level *curq, int query_numlevel,
-		  ltree_level *curt, int tree_numlevel,
-		  FieldNot *ptr,
-		  uint32 high_pos,
-		  bool force_advance)
+checkCond(lquery_level *curq, int qlen,
+		  ltree_level *curt, int tlen)
 {
-	uint32		low_pos = 0,	/* first allowed ltree position for match */
-				cur_tpos = 0;	/* ltree position of curt */
-	int			tlen = tree_numlevel,	/* counts of remaining items */
-				qlen = query_numlevel;
-	lquery_level *prevq = NULL;
-
-	/* advance curq (setting up prevq) if requested */
-	if (force_advance)
-	{
-		Assert(qlen > 0);
-		prevq = curq;
-		curq = LQL_NEXT(curq);
-		qlen--;
-	}
+	/* Since this function recurses, it could be driven to stack overflow */
+	check_stack_depth();
 
-	while (tlen > 0 && qlen > 0)
+	/* Normal case where we have some query and text items to match */
+	if (tlen > 0 && qlen > 0)
 	{
-		if (curq->numvar)
-		{
-			/* Current query item is not '*' */
-			ltree_level *prevt = curt;
+		int			low,
+					high,
+					matchcnt;
+		lquery_level *nextq;
+
+		/*
+		 * Get min and max repetition counts for this query item, dealing with
+		 * the backwards-compatibility hack that the low/high fields aren't
+		 * meaningful for non-'*' items unless LQL_COUNT is set.
+		 */
+		if ((curq->flag & LQL_COUNT) || curq->numvar == 0)
+			low = curq->low, high = curq->high;
+		else
+			low = high = 1;
 
-			/* skip tree items that must be ignored due to prior * items */
-			while (cur_tpos < low_pos)
-			{
-				curt = LEVEL_NEXT(curt);
-				tlen--;
-				cur_tpos++;
-				if (tlen == 0)
-					return false;
-				if (ptr && ptr->q)
-					ptr->nt++;
-			}
+		/* Fail if a match of required number of items is impossible */
+		if (tlen < low || high < low)
+			return false;
 
-			if (ptr && (curq->flag & LQL_NOT))
-			{
-				/* Deal with a NOT item */
-				if (!(prevq && prevq->numvar == 0))
-					prevq = curq;
-				if (ptr->q == NULL)
-				{
-					ptr->t = prevt;
-					ptr->q = prevq;
-					ptr->nt = 1;
-					ptr->nq = 1 + ((prevq == curq) ? 0 : 1);
-					ptr->posq = query_numlevel - qlen - ((prevq == curq) ? 0 : 1);
-					ptr->post = cur_tpos;
-				}
-				else
-				{
-					ptr->nt++;
-					ptr->nq++;
-				}
-
-				if (qlen == 1 && ptr->q->numvar == 0)
-					ptr->nt = tree_numlevel - ptr->post;
-				curt = LEVEL_NEXT(curt);
-				tlen--;
-				cur_tpos++;
-				if (high_pos < cur_tpos)
-					high_pos++;
-			}
-			else
-			{
-				/* Not a NOT item, check for normal match */
-				bool		isok = false;
-
-				while (cur_tpos <= high_pos && tlen > 0 && !isok)
-				{
-					isok = checkLevel(curq, curt);
-					curt = LEVEL_NEXT(curt);
-					tlen--;
-					cur_tpos++;
-					if (isok && prevq && prevq->numvar == 0 &&
-						tlen > 0 && cur_tpos <= high_pos)
-					{
-						FieldNot	tmpptr;
-
-						if (ptr)
-							memcpy(&tmpptr, ptr, sizeof(FieldNot));
-						if (checkCond(prevq, qlen + 1,
-									  curt, tlen,
-									  (ptr) ? &tmpptr : NULL,
-									  high_pos - cur_tpos,
-									  true))
-							return true;
-					}
-					if (!isok && ptr && ptr->q)
-						ptr->nt++;
-				}
-				if (!isok)
-					return false;
-
-				if (ptr && ptr->q)
-				{
-					if (checkCond(ptr->q, ptr->nq,
-								  ptr->t, ptr->nt,
-								  NULL,
-								  0,
-								  false))
-						return false;
-					ptr->q = NULL;
-				}
-				low_pos = cur_tpos;
-				high_pos = cur_tpos;
-			}
-		}
-		else
+		/* Limit "high" to the ltree length "tlen" */
+		if (high > tlen)
+			high = tlen;
+
+		/*
+		 * Recursively check the rest of the pattern against each possible
+		 * start point following this item's match(es).
+		 */
+		nextq = LQL_NEXT(curq);
+		qlen--;
+
+		for (matchcnt = 0; matchcnt < high; matchcnt++)
 		{
-			/* Current query item is '*' */
-			low_pos += curq->low;
+			/*
+			 * If the rest of the pattern matches beginning here, we're good.
+			 */
+			if (matchcnt >= low && checkCond(nextq, qlen, curt, tlen))
+				return true;
 
-			if (low_pos > tree_numlevel)
+			/*
+			 * Otherwise, try to match one more text item to this query item.
+			 */
+			if (!checkLevel(curq, curt))
 				return false;
 
-			high_pos = Min(high_pos + curq->high, tree_numlevel);
-
-			if (ptr && ptr->q)
-			{
-				ptr->nq++;
-				if (qlen == 1)
-					ptr->nt = tree_numlevel - ptr->post;
-			}
+			curt = LEVEL_NEXT(curt);
+			tlen--;
 		}
 
-		prevq = curq;
-		curq = LQL_NEXT(curq);
-		qlen--;
+		/* Max match count exceeded, so simply check the rest of the pattern */
+		return checkCond(nextq, qlen, curt, tlen);
 	}
 
-	/* Fail if we've already run out of ltree items */
-	if (low_pos > tree_numlevel || tree_numlevel > high_pos)
-		return false;
-
-	/* Remaining lquery items must be NOT or '*' items */
+	/*
+	 * If we're out of text, but query items remain, we can match only if all
+	 * remaining query items permit zero matches.
+	 */
 	while (qlen > 0)
 	{
-		if (curq->numvar)
-		{
-			if (!(curq->flag & LQL_NOT))
-				return false;
-		}
-		else
-		{
-			low_pos += curq->low;
+		int			low;
 
-			if (low_pos > tree_numlevel)
-				return false;
+		/* As above, extract the correct minimum match count. */
+		if ((curq->flag & LQL_COUNT) || curq->numvar == 0)
+			low = curq->low;
+		else
+			low = 1;
 
-			high_pos = Min(high_pos + curq->high, tree_numlevel);
-		}
+		if (low > 0)
+			return false;
 
 		curq = LQL_NEXT(curq);
 		qlen--;
 	}
 
-	/* Fail if trailing '*'s require more ltree items than we have */
-	if (low_pos > tree_numlevel || tree_numlevel > high_pos)
-		return false;
-
-	/* Finish pending NOT check, if any */
-	if (ptr && ptr->q &&
-		checkCond(ptr->q, ptr->nq,
-				  ptr->t, ptr->nt,
-				  NULL,
-				  0,
-				  false))
-		return false;
-
-	return true;
+	/*
+	 * Once we're out of query items, we match only if there's no remaining
+	 * text either.
+	 */
+	return (tlen == 0);
 }
 
 Datum
@@ -338,28 +227,10 @@ ltq_regex(PG_FUNCTION_ARGS)
 {
 	ltree	   *tree = PG_GETARG_LTREE_P(0);
 	lquery	   *query = PG_GETARG_LQUERY_P(1);
-	bool		res = false;
-
-	if (query->flag & LQUERY_HASNOT)
-	{
-		FieldNot	fn;
-
-		fn.q = NULL;
+	bool		res;
 
-		res = checkCond(LQUERY_FIRST(query), query->numlevel,
-						LTREE_FIRST(tree), tree->numlevel,
-						&fn,
-						0,
-						false);
-	}
-	else
-	{
-		res = checkCond(LQUERY_FIRST(query), query->numlevel,
-						LTREE_FIRST(tree), tree->numlevel,
-						NULL,
-						0,
-						false);
-	}
+	res = checkCond(LQUERY_FIRST(query), query->numlevel,
+					LTREE_FIRST(tree), tree->numlevel);
 
 	PG_FREE_IF_COPY(tree, 0);
 	PG_FREE_IF_COPY(query, 1);
diff --git a/contrib/ltree/ltree.h b/contrib/ltree/ltree.h
index 429cdc8..7eac7c9 100644
--- a/contrib/ltree/ltree.h
+++ b/contrib/ltree/ltree.h
@@ -65,14 +65,20 @@ typedef struct
 /*
  * In an lquery_level, "flag" contains the union of the variants' flags
  * along with possible LQL_xxx flags; so those bit sets can't overlap.
+ *
+ * "low" and "high" are nominally the minimum and maximum number of matches.
+ * However, for backwards compatibility with pre-v13 on-disk lqueries,
+ * non-'*' levels (those with numvar > 0) only have valid low/high if the
+ * LQL_COUNT flag is set; otherwise those fields are zero, but the behavior
+ * is as if they were both 1.
  */
 typedef struct
 {
 	uint16		totallen;		/* total length of this level, in bytes */
 	uint16		flag;			/* see LQL_xxx and LVAR_xxx flags */
 	uint16		numvar;			/* number of variants; 0 means '*' */
-	uint16		low;			/* minimum repeat count for '*' */
-	uint16		high;			/* maximum repeat count for '*' */
+	uint16		low;			/* minimum repeat count */
+	uint16		high;			/* maximum repeat count */
 	/* Array of maxalign'd lquery_variant structs follows: */
 	char		variants[FLEXIBLE_ARRAY_MEMBER];
 } lquery_level;
@@ -82,6 +88,7 @@ typedef struct
 #define LQL_FIRST(x)	( (lquery_variant*)( ((char*)(x))+LQL_HDRSIZE ) )
 
 #define LQL_NOT		0x10		/* level has '!' (NOT) prefix */
+#define LQL_COUNT	0x20		/* level is non-'*' and has repeat counts */
 
 #ifdef LOWER_NODE
 #define FLG_CANLOOKSIGN(x) ( ( (x) & ( LQL_NOT | LVAR_ANYEND | LVAR_SUBLEXEME ) ) == 0 )
diff --git a/contrib/ltree/ltree_io.c b/contrib/ltree/ltree_io.c
index e806a14..c6ea5de 100644
--- a/contrib/ltree/ltree_io.c
+++ b/contrib/ltree/ltree_io.c
@@ -317,6 +317,23 @@ lquery_in(PG_FUNCTION_ARGS)
 
 				state = LQPRS_WAITVAR;
 			}
+			else if (charlen == 1 && t_iseq(ptr, '{'))
+			{
+				lptr->len = ptr - lptr->start -
+					((lptr->flag & LVAR_SUBLEXEME) ? 1 : 0) -
+					((lptr->flag & LVAR_INCASE) ? 1 : 0) -
+					((lptr->flag & LVAR_ANYEND) ? 1 : 0);
+				if (lptr->wlen > LTREE_LABEL_MAX_CHARS)
+					ereport(ERROR,
+							(errcode(ERRCODE_NAME_TOO_LONG),
+							 errmsg("label string is too long"),
+							 errdetail("Label length is %d, must be at most %d, at character %d.",
+									   lptr->wlen, LTREE_LABEL_MAX_CHARS,
+									   pos)));
+
+				curqlevel->flag |= LQL_COUNT;
+				state = LQPRS_WAITFNUM;
+			}
 			else if (charlen == 1 && t_iseq(ptr, '.'))
 			{
 				lptr->len = ptr - lptr->start -
@@ -348,6 +365,7 @@ lquery_in(PG_FUNCTION_ARGS)
 				state = LQPRS_WAITFNUM;
 			else if (charlen == 1 && t_iseq(ptr, '.'))
 			{
+				/* We only get here for '*', so these are correct defaults */
 				curqlevel->low = 0;
 				curqlevel->high = LTREE_MAX_LEVELS;
 				curqlevel = NEXTLEV(curqlevel);
@@ -567,7 +585,11 @@ lquery_out(PG_FUNCTION_ARGS)
 	{
 		totallen++;
 		if (curqlevel->numvar)
+		{
 			totallen += 1 + (curqlevel->numvar * 4) + curqlevel->totallen;
+			if (curqlevel->flag & LQL_COUNT)
+				totallen += 2 * 11 + 3;
+		}
 		else
 			totallen += 2 * 11 + 4;
 		curqlevel = LQL_NEXT(curqlevel);
@@ -619,26 +641,37 @@ lquery_out(PG_FUNCTION_ARGS)
 		}
 		else
 		{
+			*ptr = '*';
+			ptr++;
+		}
+
+		if ((curqlevel->flag & LQL_COUNT) || curqlevel->numvar == 0)
+		{
 			if (curqlevel->low == curqlevel->high)
 			{
-				sprintf(ptr, "*{%d}", curqlevel->low);
+				sprintf(ptr, "{%d}", curqlevel->low);
 			}
 			else if (curqlevel->low == 0)
 			{
 				if (curqlevel->high == LTREE_MAX_LEVELS)
 				{
-					*ptr = '*';
-					*(ptr + 1) = '\0';
+					if (curqlevel->numvar == 0)
+					{
+						/* This is default for '*', so print nothing */
+						*ptr = '\0';
+					}
+					else
+						sprintf(ptr, "{,}");
 				}
 				else
-					sprintf(ptr, "*{,%d}", curqlevel->high);
+					sprintf(ptr, "{,%d}", curqlevel->high);
 			}
 			else if (curqlevel->high == LTREE_MAX_LEVELS)
 			{
-				sprintf(ptr, "*{%d,}", curqlevel->low);
+				sprintf(ptr, "{%d,}", curqlevel->low);
 			}
 			else
-				sprintf(ptr, "*{%d,%d}", curqlevel->low, curqlevel->high);
+				sprintf(ptr, "{%d,%d}", curqlevel->low, curqlevel->high);
 			ptr = strchr(ptr, '\0');
 		}
 
diff --git a/contrib/ltree/sql/ltree.sql b/contrib/ltree/sql/ltree.sql
index f6d73b8..0cf3dd6 100644
--- a/contrib/ltree/sql/ltree.sql
+++ b/contrib/ltree/sql/ltree.sql
@@ -87,6 +87,7 @@ SELECT '1.*.4|3|2.*{1,4}'::lquery;
 SELECT '1.*.4|3|2.*{,4}'::lquery;
 SELECT '1.*.4|3|2.*{1,}'::lquery;
 SELECT '1.*.4|3|2.*{1}'::lquery;
+SELECT 'foo.bar{,}.!a*|b{1,}.c{,44}.d{3,4}'::lquery;
 SELECT 'qwerty%@*.tu'::lquery;
 
 SELECT nlevel('1.2.3.4');
@@ -184,6 +185,19 @@ SELECT 'a.b.c.d.e'::ltree ~ 'a.*{2}.*{2}';
 SELECT 'a.b.c.d.e'::ltree ~ 'a.*{1}.*{2}.e';
 SELECT 'a.b.c.d.e'::ltree ~ 'a.*{1}.*{4}';
 SELECT 'a.b.c.d.e'::ltree ~ 'a.*{5}.*';
+SELECT '5.0.1.0'::ltree ~ '5.!0.!0.0';
+SELECT 'a.b'::ltree ~ '!a.!a';
+
+SELECT 'a.b.c.d.e'::ltree ~ 'a{,}';
+SELECT 'a.b.c.d.e'::ltree ~ 'a{1,}.*';
+SELECT 'a.b.c.d.e'::ltree ~ 'a{,}.!a{,}';
+SELECT 'a.b.c.d.a'::ltree ~ 'a{,}.!a{,}';
+SELECT 'a.b.c.d.a'::ltree ~ 'a{,2}.!a{1,}';
+SELECT 'a.b.c.d.e'::ltree ~ 'a{,2}.!a{1,}';
+SELECT 'a.b.c.d.e'::ltree ~ '!x{,}';
+SELECT 'a.b.c.d.e'::ltree ~ '!c{,}';
+SELECT 'a.b.c.d.e'::ltree ~ '!c{0,3}.!a{2,}';
+SELECT 'a.b.c.d.e'::ltree ~ '!c{0,3}.!d{2,}.*';
 
 SELECT 'QWER_TY'::ltree ~ 'q%@*';
 SELECT 'QWER_TY'::ltree ~ 'Q_t%@*';
diff --git a/doc/src/sgml/ltree.sgml b/doc/src/sgml/ltree.sgml
index ae4b33e..d7dd555 100644
--- a/doc/src/sgml/ltree.sgml
+++ b/doc/src/sgml/ltree.sgml
@@ -60,7 +60,8 @@
      <type>lquery</type> represents a regular-expression-like pattern
      for matching <type>ltree</type> values.  A simple word matches that
      label within a path.  A star symbol (<literal>*</literal>) matches zero
-     or more labels.  For example:
+     or more labels.  These can be joined with dots to form a pattern that
+     must match the whole label path.  For example:
 <synopsis>
 foo         <lineannotation>Match the exact label path <literal>foo</literal></lineannotation>
 *.foo.*     <lineannotation>Match any label path containing the label <literal>foo</literal></lineannotation>
@@ -69,19 +70,25 @@ foo         <lineannotation>Match the exact label path <literal>foo</literal></l
     </para>
 
     <para>
-     Star symbols can also be quantified to restrict how many labels
-     they can match:
+     Both star symbols and simple words can be quantified to restrict how many
+     labels they can match:
 <synopsis>
 *{<replaceable>n</replaceable>}        <lineannotation>Match exactly <replaceable>n</replaceable> labels</lineannotation>
 *{<replaceable>n</replaceable>,}       <lineannotation>Match at least <replaceable>n</replaceable> labels</lineannotation>
 *{<replaceable>n</replaceable>,<replaceable>m</replaceable>}      <lineannotation>Match at least <replaceable>n</replaceable> but not more than <replaceable>m</replaceable> labels</lineannotation>
-*{,<replaceable>m</replaceable>}       <lineannotation>Match at most <replaceable>m</replaceable> labels &mdash; same as </lineannotation> *{0,<replaceable>m</replaceable>}
+*{,<replaceable>m</replaceable>}       <lineannotation>Match at most <replaceable>m</replaceable> labels &mdash; same as </lineannotation>*{0,<replaceable>m</replaceable>}
+foo{<replaceable>n</replaceable>,<replaceable>m</replaceable>}    <lineannotation>Match at least <replaceable>n</replaceable> but not more than <replaceable>m</replaceable> occurrences of <literal>foo</literal></lineannotation>
+foo{,}      <lineannotation>Match any number of occurrences of <literal>foo</literal>, including zero</lineannotation>
 </synopsis>
+     In the absence of any explicit quantifier, the default for a star symbol
+     is to match any number of labels (that is, <literal>{,}</literal>) while
+     the default for a non-star item is to match exactly once (that
+     is, <literal>{1}</literal>).
     </para>
 
     <para>
      There are several modifiers that can be put at the end of a non-star
-     label in <type>lquery</type> to make it match more than just the exact match:
+     <type>lquery</type> item to make it match more than just the exact match:
 <synopsis>
 @           <lineannotation>Match case-insensitively, for example <literal>a@</literal> matches <literal>A</literal></lineannotation>
 *           <lineannotation>Match any label with this prefix, for example <literal>foo*</literal> matches <literal>foobar</literal></lineannotation>
@@ -97,17 +104,20 @@ foo         <lineannotation>Match the exact label path <literal>foo</literal></l
     </para>
 
     <para>
-     Also, you can write several possibly-modified labels separated with
-     <literal>|</literal> (OR) to match any of those labels, and you can put
-     <literal>!</literal> (NOT) at the start to match any label that doesn't
-     match any of the alternatives.
+     Also, you can write several possibly-modified non-star items separated with
+     <literal>|</literal> (OR) to match any of those items, and you can put
+     <literal>!</literal> (NOT) at the start of a non-star group to match any
+     label that doesn't match any of the alternatives.  A quantifier, if any,
+     goes at the end of the group; it means some number of matches for the
+     group as a whole (that is, some number of labels matching or not matching
+     any of the alternatives).
     </para>
 
     <para>
      Here's an annotated example of <type>lquery</type>:
 <programlisting>
-Top.*{0,2}.sport*@.!football|tennis.Russ*|Spain
-a.  b.     c.      d.               e.
+Top.*{0,2}.sport*@.!football|tennis{1,}.Russ*|Spain
+a.  b.     c.      d.                   e.
 </programlisting>
      This query will match any label path that:
     </para>
@@ -129,8 +139,8 @@ a.  b.     c.      d.               e.
      </listitem>
      <listitem>
       <para>
-       then a label not matching <literal>football</literal> nor
-       <literal>tennis</literal>
+       then has one or more labels, none of which
+       match <literal>football</literal> nor <literal>tennis</literal>
       </para>
      </listitem>
      <listitem>
@@ -632,7 +642,7 @@ ltreetest=&gt; SELECT path FROM test WHERE path ~ '*.Astronomy.*';
  Top.Collections.Pictures.Astronomy.Astronauts
 (7 rows)
 
-ltreetest=&gt; SELECT path FROM test WHERE path ~ '*.!pictures@.*.Astronomy.*';
+ltreetest=&gt; SELECT path FROM test WHERE path ~ '*.!pictures@.Astronomy.*';
                 path
 ------------------------------------
  Top.Science.Astronomy
-- 
2.7.4

#19

Tom Lane

tgl@sss.pgh.pa.us

almost 6 years ago

In reply to: Nikita Glukhov (#18)

Re: fix for BUG #3720: wrong results at using ltree

Nikita Glukhov <n.gluhov@postgrespro.ru> writes:

On 30.03.2020 21:00, Tom Lane wrote:

Hence, new patch versions that do it like that. (0002 is unchanged.)

I tried to simplify a bit loops in checkCond() by merging two of them into
one with an explicit exit condition. Also I added return statement after
this loop, so it's now clear that we can't fall into next "while" loop.

I dunno, that doesn't really seem clearer to me (although some of it
might be that you expended no effort on making the comments match
the new code logic).

regards, tom lane

#20

Tom Lane

tgl@sss.pgh.pa.us

almost 6 years ago

In reply to: Tom Lane (#19)

Re: fix for BUG #3720: wrong results at using ltree

I wrote:

I dunno, that doesn't really seem clearer to me (although some of it
might be that you expended no effort on making the comments match
the new code logic).

... although looking closer, this formulation does have one very nice
advantage: for the typical non-star case with high = low = 1, the
only recursive call is a tail recursion, so it ought to consume less
stack space than what I wrote.

Let me see what I can do with the comments.

regards, tom lane

#21

Nikita Glukhov

n.gluhov@postgrespro.ru

almost 6 years ago

In reply to: Tom Lane (#20)

Re: fix for BUG #3720: wrong results at using ltree

On 31.03.2020 1:12, Tom Lane wrote:

I wrote:

I dunno, that doesn't really seem clearer to me (although some of it
might be that you expended no effort on making the comments match
the new code logic).

... although looking closer, this formulation does have one very nice
advantage: for the typical non-star case with high = low = 1, the
only recursive call is a tail recursion, so it ought to consume less
stack space than what I wrote.

And we even can simply transform this tail call into a loop:

-if (tlen > 0 && qlen > 0)
+while (tlen > 0 && qlen > 0)

Let me see what I can do with the comments.

Thanks.

--
Nikita Glukhov
Postgres Professional: http://www.postgrespro.com
The Russian Postgres Company

#22

Tom Lane

tgl@sss.pgh.pa.us

almost 6 years ago

In reply to: Nikita Glukhov (#21)

Re: fix for BUG #3720: wrong results at using ltree

Nikita Glukhov <n.gluhov@postgrespro.ru> writes:

And we even can simply transform this tail call into a loop:

-if (tlen > 0 && qlen > 0)
+while (tlen > 0 && qlen > 0)

Yeah, the same occurred to me ... and then we can drop the other loop too.
I've got it down to this now:

/*
* Try to match an lquery (of qlen items) to an ltree (of tlen items)
*/
static bool
checkCond(lquery_level *curq, int qlen,
ltree_level *curt, int tlen)
{
/* Since this function recurses, it could be driven to stack overflow */
check_stack_depth();

/* Loop while we have query items to consider */
while (qlen > 0)
{
int low,
high;
lquery_level *nextq;

/*
* Get min and max repetition counts for this query item, dealing with
* the backwards-compatibility hack that the low/high fields aren't
* meaningful for non-'*' items unless LQL_COUNT is set.
*/
if ((curq->flag & LQL_COUNT) || curq->numvar == 0)
low = curq->low, high = curq->high;
else
low = high = 1;

/*
* We may limit "high" to the remaining text length; this avoids
* separate tests below.
*/
if (high > tlen)
high = tlen;

/* Fail if a match of required number of items is impossible */
if (high < low)
return false;

/*
* Recursively check the rest of the pattern against each possible
* start point following some of this item's match(es).
*/
nextq = LQL_NEXT(curq);
qlen--;

for (int matchcnt = 0; matchcnt < high; matchcnt++)
{
/*
* If we've consumed an acceptable number of matches of this item,
* and the rest of the pattern matches beginning here, we're good.
*/
if (matchcnt >= low && checkCond(nextq, qlen, curt, tlen))
return true;

/*
* Otherwise, try to match one more text item to this query item.
*/
if (!checkLevel(curq, curt))
return false;

curt = LEVEL_NEXT(curt);
tlen--;
}

/*
* Once we've consumed "high" matches, we can succeed only if the rest
* of the pattern matches beginning here. Loop around (if you prefer,
* think of this as tail recursion).
*/
curq = nextq;
}

/*
* Once we're out of query items, we match only if there's no remaining
* text either.
*/
return (tlen == 0);
}

regards, tom lane

#23

Nikita Glukhov

n.gluhov@postgrespro.ru

almost 6 years ago

In reply to: Tom Lane (#22)

Re: fix for BUG #3720: wrong results at using ltree

On 31.03.2020 1:35, Tom Lane wrote:

Nikita Glukhov <n.gluhov@postgrespro.ru> writes:
And we even can simply transform this tail call into a loop:
-if (tlen > 0 && qlen > 0)
+while (tlen > 0 && qlen > 0)
Yeah, the same occurred to me ... and then we can drop the other loop too.

I think now it looks as simple as the whole algorithm is.

--
Nikita Glukhov
Postgres Professional: http://www.postgrespro.com
The Russian Postgres Company

#24

Tom Lane

tgl@sss.pgh.pa.us

almost 6 years ago

In reply to: Nikita Glukhov (#23)

Re: fix for BUG #3720: wrong results at using ltree

Nikita Glukhov <n.gluhov@postgrespro.ru> writes:

I think now it looks as simple as the whole algorithm is.

Yeah, I think we've gotten checkCond to the point of "there's no
longer anything to take away".

I've marked this RFC, and will push tomorrow unless somebody wants
to object to the loss of backwards compatibility.

regards, tom lane

#25

Tom Lane

tgl@sss.pgh.pa.us

almost 6 years ago

In reply to: Tom Lane (#24)

Re: fix for BUG #3720: wrong results at using ltree

I wrote:

I've marked this RFC, and will push tomorrow unless somebody wants
to object to the loss of backwards compatibility.

And done. I noticed in some final testing that it's possible to
make this code take a long time by forcing it to backtrack a lot:

regression=# SELECT (('1' || repeat('.1', 65534))::ltree) ~ '*.*.x';
?column?
----------
f
(1 row)

Time: 54015.421 ms (00:54.015)

so I threw in a CHECK_FOR_INTERRUPTS(). Maybe it'd be worth trying
to optimize such cases, but I'm not sure that it'd ever matter for
real-world cases with reasonable-size label strings.

The old implementation seems to handle that particular case well,
evidently because it more-or-less folds adjacent stars together.
However, before anyone starts complaining about regressions, they
should note that it's really easy to get the old code to fail
via stack overflow:

regression=# SELECT (('1' || repeat('.1', 65534))::ltree) ~ '*.!1.*';
ERROR: stack depth limit exceeded

(That's as of five minutes ago, before that it dumped core.)
So I don't feel bad about the tradeoff. At least now we have
simple, visibly correct code that could serve as a starting
point for optimization if anyone feels the need to.

regards, tom lane