contrib/ltree patches

Started by Dan Langilleabout 23 years ago8 messages
#1Dan Langille
dan@langille.org

I have been looking at contrib/ltree in the PostgreSQL repository. I've
modified the code to allow / as a node delimiter instead of . which is the
default.

Below are the patches to make this change. I have also moved the
delimiter to a DEFINE so that other customizations are easily done. This
is a work in progress.

My thanks to DarbyD for assistance.

cheers

--- ltree.h.orig	Tue Nov 26 18:57:58 2002
+++ ltree.h	Tue Nov 26 20:16:40 2002
@@ -6,6 +6,8 @@
 #include "utils/palloc.h"
 #include "utils/builtins.h"
+#define	NODE_DELIMITER	'/'
+
 typedef struct
 {
 	uint8		len;
@@ -88,7 +90,7 @@
 #ifndef abs
 #define abs(a)					((a) <	(0) ? -(a) : (a))
 #endif
-#define ISALNUM(x)	( isalnum((unsigned int)(x)) || (x) == '_' )
+#define ISALNUM(x)	( isalnum((unsigned int)(x)) || (x) == '_' || (x) == NODE_DELIMITER )

/* full text query */

--- ltree_io.c	Tue Nov 26 20:23:45 2002
+++ ltree_io.c.orig	Tue Nov 26 18:57:26 2002
@@ -48,7 +48,7 @@
 	ptr = buf;
 	while (*ptr)
 	{
-		if (*ptr == NODE_DELIMITER)
+		if (*ptr == '.')
 			num++;
 		ptr++;
 	}
@@ -69,7 +69,7 @@
 		}
 		else if (state == LTPRS_WAITDELIM)
 		{
-			if (*ptr == NODE_DELIMITER)
+			if (*ptr == '.')
 			{
 				lptr->len = ptr - lptr->start;
 				if (lptr->len > 255)
@@ -131,7 +131,7 @@
 	{
 		if (i != 0)
 		{
-			*ptr = NODE_DELIMITER;
+			*ptr = '.';
 			ptr++;
 		}
 		memcpy(ptr, curlevel->name, curlevel->len);
@@ -181,7 +181,7 @@
 	ptr = buf;
 	while (*ptr)
 	{
-		if (*ptr == NODE_DELIMITER)
+		if (*ptr == '.')
 			num++;
 		else if (*ptr == '|')
 			numOR++;
@@ -265,7 +265,7 @@
 						 lptr->len, (int) (lptr->start - buf));
 				state = LQPRS_WAITVAR;
 			}
-			else if (*ptr == NODE_DELIMITER)
+			else if (*ptr == '.')
 			{
 				lptr->len = ptr - lptr->start -
 					((lptr->flag & LVAR_SUBLEXEM) ? 1 : 0) -
@@ -289,7 +289,7 @@
 		{
 			if (*ptr == '{')
 				state = LQPRS_WAITFNUM;
-			else if (*ptr == NODE_DELIMITER)
+			else if (*ptr == '.')
 			{
 				curqlevel->low = 0;
 				curqlevel->high = 0xffff;
@@ -347,7 +347,7 @@
 		}
 		else if (state == LQPRS_WAITEND)
 		{
-			if (*ptr == NODE_DELIMITER)
+			if (*ptr == '.')
 			{
 				state = LQPRS_WAITLEVEL;
 				curqlevel = NEXTLEV(curqlevel);
@@ -471,7 +471,7 @@
 	{
 		if (i != 0)
 		{
-			*ptr = NODE_DELIMITER;
+			*ptr = '.';
 			ptr++;
 		}
 		if (curqlevel->numvar)
#2Teodor Sigaev
teodor@stack.net
In reply to: Dan Langille (#1)
Re: contrib/ltree patches

Dan Langille wrote:

I have been looking at contrib/ltree in the PostgreSQL repository. I've
modified the code to allow / as a node delimiter instead of . which is the
default.

What is the reason for changing delimiter?

Below are the patches to make this change. I have also moved the
delimiter to a DEFINE so that other customizations are easily done. This
is a work in progress.

It's good.

My thanks to DarbyD for assistance.

cheers

--- ltree.h.orig	Tue Nov 26 18:57:58 2002
+++ ltree.h	Tue Nov 26 20:16:40 2002
@@ -6,6 +6,8 @@
#include "utils/palloc.h"
#include "utils/builtins.h"
+#define	NODE_DELIMITER	'/'
+
typedef struct
{
uint8		len;
@@ -88,7 +90,7 @@
#ifndef abs
#define abs(a)					((a) <	(0) ? -(a) : (a))
#endif
-#define ISALNUM(x)	( isalnum((unsigned int)(x)) || (x) == '_' )
+#define ISALNUM(x)	( isalnum((unsigned int)(x)) || (x) == '_' || (x) == NODE_DELIMITER )

It seems to me that it's mistake. ISALNUM shoud define correct character in
name of node (level). Try to test
with incorrect ltree value 'a..b'.

--
Teodor Sigaev
teodor@stack.net

#3Dan Langille
dan@langille.org
In reply to: Teodor Sigaev (#2)
Re: contrib/ltree patches

On 27 Nov 2002 at 12:16, Teodor Sigaev wrote:

Dan Langille wrote:

I have been looking at contrib/ltree in the PostgreSQL repository.
I've modified the code to allow / as a node delimiter instead of .
which is the default.

What is the reason for changing delimiter?

My tree represents a file system. Here are some entries:

# select id, pathname from element_pathnames order by pathname;
77024 | doc/de_DE.ISO8859-1
77028 | doc/de_DE.ISO8859-1/books
84590 | doc/de_DE.ISO8859-1/books/Makefile.inc
77029 | doc/de_DE.ISO8859-1/books/faq
84591 | doc/de_DE.ISO8859-1/books/faq/Makefile
77030 | doc/de_DE.ISO8859-1/books/faq/book.sgml
77691 | doc/de_DE.ISO8859-1/books/handbook
77704 | doc/de_DE.ISO8859-1/books/handbook/Makefile
110592 | doc/de_DE.ISO8859-1/books/handbook/advanced-networking

Below are the patches to make this change. I have also moved the
delimiter to a DEFINE so that other customizations are easily done.
This is a work in progress.

It's good.

Thank you. More patches will follow as I get closer to my objective.

-#define ISALNUM(x)	( isalnum((unsigned int)(x)) || (x) == '_' )
+#define ISALNUM(x)	( isalnum((unsigned int)(x)) || (x) == '_' ||
+#(x) == NODE_DELIMITER )

It seems to me that it's mistake. ISALNUM shoud define correct
character in name of node (level). Try to test with incorrect ltree
value 'a..b'.

I just did some simple tests and I see what you mean:

ltree_test=# select * from tree;
id | pathname
----+------------------
1 | /ports
2 | ports/security
2 | ports//security
2 | /ports//security
2 | a..b
(5 rows)

Then I removed NODE_DELIMITER from ISALNUM and tried again:

ltree_test=# insert into tree values (2, '/ports//security');
ERROR: Syntax error in position 0 near '/'
ltree_test=# insert into tree values (2, 'ports//security');
ERROR: Syntax error in position 6 near '/'
ltree_test=# insert into tree values (2, 'ports/security');
INSERT 29955201 1
ltree_test=# insert into tree values (2, 'ports/security/');
ERROR: Unexpected end of line
ltree_test=# insert into tree values (2, 'ports/security/things');
INSERT 29955202 1

ltree_test=# select * from tree;
id | pathname
----+-----------------------
1 | /ports
2 | ports/security
2 | ports//security
2 | /ports//security
2 | a..b
2 | ports/security
2 | ports/security/things
(7 rows)

Removing NODE_DELIMITER from ISALNUM makes sense. Thank you. Here
is the reason why NODE_DELIMITER was added. My initial data sample
was of the form "/usr/local/" (i.e. it started with a
NODE_DELIMITER). I have since changed my data so it does not start
with a leading / because queries were not working.

Based upon the sample data I was using (approximately 120,000 nodes
as taken from a real file system), I had to change ISALNUM as I went
along. Here is the current definition for ISALNUM:

#define ISALNUM(x) ( isalnum((unsigned int)(x)) || (x) == '_' ||
(x) == '-' || (x) == '.' || (x) == '+' || (x) == ':' || (x) == '~' ||
(x) == '%' || (x) == ',' || (x) == '#')

Given that I am trying to allow any valid filename, I think ISALNUM
needs to allow any ASCII character.

I also think I will need to modify the parsing within lquery_in to
allow escaping of characters it recognizes but which may be part of a
file name (e.g. :%~ may be part of a file name, but these are special
characters to lquery_in). That I think will be the biggest change.

Thank you for your interest and help.

--
Dan Langille : http://www.langille.org/

#4Teodor Sigaev
teodor@stack.net
In reply to: Dan Langille (#3)
Re: contrib/ltree patches

What is the reason for changing delimiter?

My tree represents a file system. Here are some entries:

Below are the patches to make this change. I have also moved the
delimiter to a DEFINE so that other customizations are easily done.
This is a work in progress.

It's good.

#define ISALNUM(x) ( isalnum((unsigned int)(x)) || (x) == '_' ||
(x) == '-' || (x) == '.' || (x) == '+' || (x) == ':' || (x) == '~' ||
(x) == '%' || (x) == ',' || (x) == '#')

Given that I am trying to allow any valid filename, I think ISALNUM
needs to allow any ASCII character.

I also think I will need to modify the parsing within lquery_in to
allow escaping of characters it recognizes but which may be part of a
file name (e.g. :%~ may be part of a file name, but these are special
characters to lquery_in). That I think will be the biggest change.

Ok, I think it's a good extension. Let you prepare cumulative patch.
Nevertheless, we have no chance to insert this to 7.3 release :(. Only for
7.3.1 or even 7.4.

--
Teodor Sigaev
teodor@stack.net

#5Dan Langille
dan@langille.org
In reply to: Teodor Sigaev (#4)
Re: contrib/ltree patches

On 27 Nov 2002 at 19:55, Teodor Sigaev wrote:

Ok, I think it's a good extension. Let you prepare cumulative patch.
Nevertheless, we have no chance to insert this to 7.3 release :(.
Only for 7.3.1 or even 7.4.

Thanks. As for the 7.3 release, yes, it would be nice, but that was
not my goal. :)
--
Dan Langille : http://www.langille.org/

#6Bruce Momjian
pgman@candle.pha.pa.us
In reply to: Dan Langille (#1)
Re: contrib/ltree patches

Dan, is this ready to be applied to CVS?

---------------------------------------------------------------------------

Dan Langille wrote:

I have been looking at contrib/ltree in the PostgreSQL repository. I've
modified the code to allow / as a node delimiter instead of . which is the
default.

Below are the patches to make this change. I have also moved the
delimiter to a DEFINE so that other customizations are easily done. This
is a work in progress.

My thanks to DarbyD for assistance.

cheers

--- ltree.h.orig	Tue Nov 26 18:57:58 2002
+++ ltree.h	Tue Nov 26 20:16:40 2002
@@ -6,6 +6,8 @@
#include "utils/palloc.h"
#include "utils/builtins.h"
+#define	NODE_DELIMITER	'/'
+
typedef struct
{
uint8		len;
@@ -88,7 +90,7 @@
#ifndef abs
#define abs(a)					((a) <	(0) ? -(a) : (a))
#endif
-#define ISALNUM(x)	( isalnum((unsigned int)(x)) || (x) == '_' )
+#define ISALNUM(x)	( isalnum((unsigned int)(x)) || (x) == '_' || (x) == NODE_DELIMITER )

/* full text query */

--- ltree_io.c	Tue Nov 26 20:23:45 2002
+++ ltree_io.c.orig	Tue Nov 26 18:57:26 2002
@@ -48,7 +48,7 @@
ptr = buf;
while (*ptr)
{
-		if (*ptr == NODE_DELIMITER)
+		if (*ptr == '.')
num++;
ptr++;
}
@@ -69,7 +69,7 @@
}
else if (state == LTPRS_WAITDELIM)
{
-			if (*ptr == NODE_DELIMITER)
+			if (*ptr == '.')
{
lptr->len = ptr - lptr->start;
if (lptr->len > 255)
@@ -131,7 +131,7 @@
{
if (i != 0)
{
-			*ptr = NODE_DELIMITER;
+			*ptr = '.';
ptr++;
}
memcpy(ptr, curlevel->name, curlevel->len);
@@ -181,7 +181,7 @@
ptr = buf;
while (*ptr)
{
-		if (*ptr == NODE_DELIMITER)
+		if (*ptr == '.')
num++;
else if (*ptr == '|')
numOR++;
@@ -265,7 +265,7 @@
lptr->len, (int) (lptr->start - buf));
state = LQPRS_WAITVAR;
}
-			else if (*ptr == NODE_DELIMITER)
+			else if (*ptr == '.')
{
lptr->len = ptr - lptr->start -
((lptr->flag & LVAR_SUBLEXEM) ? 1 : 0) -
@@ -289,7 +289,7 @@
{
if (*ptr == '{')
state = LQPRS_WAITFNUM;
-			else if (*ptr == NODE_DELIMITER)
+			else if (*ptr == '.')
{
curqlevel->low = 0;
curqlevel->high = 0xffff;
@@ -347,7 +347,7 @@
}
else if (state == LQPRS_WAITEND)
{
-			if (*ptr == NODE_DELIMITER)
+			if (*ptr == '.')
{
state = LQPRS_WAITLEVEL;
curqlevel = NEXTLEV(curqlevel);
@@ -471,7 +471,7 @@
{
if (i != 0)
{
-			*ptr = NODE_DELIMITER;
+			*ptr = '.';
ptr++;
}
if (curqlevel->numvar)

---------------------------(end of broadcast)---------------------------
TIP 4: Don't 'kill -9' the postmaster

-- 
  Bruce Momjian                        |  http://candle.pha.pa.us
  pgman@candle.pha.pa.us               |  (610) 359-1001
  +  If your life is a hard drive,     |  13 Roberts Road
  +  Christ can be your backup.        |  Newtown Square, Pennsylvania 19073
#7Teodor Sigaev
teodor@stack.net
In reply to: Bruce Momjian (#6)
Re: contrib/ltree patches

Don't do it! It's a wrong patch. Dan will prepare correct patch (with other
changes).

Bruce Momjian wrote:

Dan, is this ready to be applied to CVS?

---------------------------------------------------------------------------

Dan Langille wrote:

I have been looking at contrib/ltree in the PostgreSQL repository. I've
modified the code to allow / as a node delimiter instead of . which is the
default.

Below are the patches to make this change. I have also moved the
delimiter to a DEFINE so that other customizations are easily done. This
is a work in progress.

My thanks to DarbyD for assistance.

cheers

--- ltree.h.orig	Tue Nov 26 18:57:58 2002
+++ ltree.h	Tue Nov 26 20:16:40 2002
@@ -6,6 +6,8 @@
#include "utils/palloc.h"
#include "utils/builtins.h"
+#define	NODE_DELIMITER	'/'
+
typedef struct
{
uint8		len;
@@ -88,7 +90,7 @@
#ifndef abs
#define abs(a)					((a) <	(0) ? -(a) : (a))
#endif
-#define ISALNUM(x)	( isalnum((unsigned int)(x)) || (x) == '_' )
+#define ISALNUM(x)	( isalnum((unsigned int)(x)) || (x) == '_' || (x) == NODE_DELIMITER )

/* full text query */

--- ltree_io.c	Tue Nov 26 20:23:45 2002
+++ ltree_io.c.orig	Tue Nov 26 18:57:26 2002
@@ -48,7 +48,7 @@
ptr = buf;
while (*ptr)
{
-		if (*ptr == NODE_DELIMITER)
+		if (*ptr == '.')
num++;
ptr++;
}
@@ -69,7 +69,7 @@
}
else if (state == LTPRS_WAITDELIM)
{
-			if (*ptr == NODE_DELIMITER)
+			if (*ptr == '.')
{
lptr->len = ptr - lptr->start;
if (lptr->len > 255)
@@ -131,7 +131,7 @@
{
if (i != 0)
{
-			*ptr = NODE_DELIMITER;
+			*ptr = '.';
ptr++;
}
memcpy(ptr, curlevel->name, curlevel->len);
@@ -181,7 +181,7 @@
ptr = buf;
while (*ptr)
{
-		if (*ptr == NODE_DELIMITER)
+		if (*ptr == '.')
num++;
else if (*ptr == '|')
numOR++;
@@ -265,7 +265,7 @@
lptr->len, (int) (lptr->start - buf));
state = LQPRS_WAITVAR;
}
-			else if (*ptr == NODE_DELIMITER)
+			else if (*ptr == '.')
{
lptr->len = ptr - lptr->start -
((lptr->flag & LVAR_SUBLEXEM) ? 1 : 0) -
@@ -289,7 +289,7 @@
{
if (*ptr == '{')
state = LQPRS_WAITFNUM;
-			else if (*ptr == NODE_DELIMITER)
+			else if (*ptr == '.')
{
curqlevel->low = 0;
curqlevel->high = 0xffff;
@@ -347,7 +347,7 @@
}
else if (state == LQPRS_WAITEND)
{
-			if (*ptr == NODE_DELIMITER)
+			if (*ptr == '.')
{
state = LQPRS_WAITLEVEL;
curqlevel = NEXTLEV(curqlevel);
@@ -471,7 +471,7 @@
{
if (i != 0)
{
-			*ptr = NODE_DELIMITER;
+			*ptr = '.';
ptr++;
}
if (curqlevel->numvar)

---------------------------(end of broadcast)---------------------------
TIP 4: Don't 'kill -9' the postmaster

--
Teodor Sigaev
teodor@stack.net

#8Dan Langille
dan@langille.org
In reply to: Bruce Momjian (#6)
Re: contrib/ltree patches

Thanks for asking. I have been diverted to other tasks and won't be
able to get back to this for a short while. The basics work (i.e.
population and simple compares) but I know for sure that certain
functions will not work now that we allow what were previously
operators to be part of the node name. In short, the code needs to
allow for operators to be escaped if they are part of the node name.

On 5 Dec 2002 at 0:54, Bruce Momjian wrote:

Dan, is this ready to be applied to CVS?

----------------------------------------------------------------------
-----

Dan Langille wrote:

I have been looking at contrib/ltree in the PostgreSQL repository.
I've modified the code to allow / as a node delimiter instead of .
which is the default.

Below are the patches to make this change. I have also moved the
delimiter to a DEFINE so that other customizations are easily done.
This is a work in progress.

My thanks to DarbyD for assistance.

cheers

--- ltree.h.orig	Tue Nov 26 18:57:58 2002
+++ ltree.h	Tue Nov 26 20:16:40 2002
@@ -6,6 +6,8 @@
#include "utils/palloc.h"
#include "utils/builtins.h"
+#define	NODE_DELIMITER	'/'
+
typedef struct
{
uint8		len;
@@ -88,7 +90,7 @@
#ifndef abs
#define abs(a)					((a) <	(0) ? -(a) : (a))
#endif
-#define ISALNUM(x)	( isalnum((unsigned int)(x)) || (x) == '_' )
+#define ISALNUM(x)	( isalnum((unsigned int)(x)) || (x) == '_' ||
+#(x) == NODE_DELIMITER )

/* full text query */

--- ltree_io.c	Tue Nov 26 20:23:45 2002
+++ ltree_io.c.orig	Tue Nov 26 18:57:26 2002
@@ -48,7 +48,7 @@
ptr = buf;
while (*ptr)
{
-		if (*ptr == NODE_DELIMITER)
+		if (*ptr == '.')
num++;
ptr++;
}
@@ -69,7 +69,7 @@
}
else if (state == LTPRS_WAITDELIM)
{
-			if (*ptr == NODE_DELIMITER)
+			if (*ptr == '.')
{
lptr->len = ptr - lptr->start;
if (lptr->len > 255)
@@ -131,7 +131,7 @@
{
if (i != 0)
{
-			*ptr = NODE_DELIMITER;
+			*ptr = '.';
ptr++;
}
memcpy(ptr, curlevel->name, curlevel->len);
@@ -181,7 +181,7 @@
ptr = buf;
while (*ptr)
{
-		if (*ptr == NODE_DELIMITER)
+		if (*ptr == '.')
num++;
else if (*ptr == '|')
numOR++;
@@ -265,7 +265,7 @@
lptr->len, (int) (lptr->start - buf));
state = LQPRS_WAITVAR;
}
-			else if (*ptr == NODE_DELIMITER)
+			else if (*ptr == '.')
{
lptr->len = ptr - lptr->start -
((lptr->flag & LVAR_SUBLEXEM) ? 1 : 0) -
@@ -289,7 +289,7 @@
{
if (*ptr == '{')
state = LQPRS_WAITFNUM;
-			else if (*ptr == NODE_DELIMITER)
+			else if (*ptr == '.')
{
curqlevel->low = 0;
curqlevel->high = 0xffff;
@@ -347,7 +347,7 @@
}
else if (state == LQPRS_WAITEND)
{
-			if (*ptr == NODE_DELIMITER)
+			if (*ptr == '.')
{
state = LQPRS_WAITLEVEL;
curqlevel = NEXTLEV(curqlevel);
@@ -471,7 +471,7 @@
{
if (i != 0)
{
-			*ptr = NODE_DELIMITER;
+			*ptr = '.';
ptr++;
}
if (curqlevel->numvar)

---------------------------(end of
broadcast)--------------------------- TIP 4: Don't 'kill -9' the
postmaster

--
Bruce Momjian | http://candle.pha.pa.us
pgman@candle.pha.pa.us | (610) 359-1001 + If your
life is a hard drive, | 13 Roberts Road + Christ can be your
backup. | Newtown Square, Pennsylvania 19073

--
Dan Langille : http://www.langille.org/