Patch: Allow substring/replace() to get/set bit values

Started by Leonardo Fabout 16 years ago8 messages
#1Leonardo F
m_lists@yahoo.it
1 attachment(s)

Hi all,

attached a patch that adds the following functions for bit string:

- overlay
- get_bit
- set_bit

Some info:

1) overlay is implemented as calls to substring; given the different way substring behaves when used with strings vs bit strings:

test=# SELECT substring(B'1111000000000001' from 1 for -1);
substring
------------------
1111000000000001
(1 row)

test=# SELECT substring('1111000000000001' from 1 for -1);
ERROR: negative substring length not allowed

I don't think that this overlay implementation is what we want?

Example:

test=# SELECT overlay(B'1111' placing B'01' from 1 for 2);
overlay
---------
0111
(1 row)

(looks ok)

test=# SELECT overlay(B'1111' placing B'01' from 0 for 2);
overlay
-----------
111101111
(1 row)

????
This happens because substring(bit, pos, -1) means substring(bit, pos, length(bit string)),
and < -1 values for bit substring parameters are allowed: is this a bug in bit substring???

2) I tried implementing bit_get and bit_set as calls to overlay/substring:

DATA(insert OID = 3032 ( get_bit PGNSP PGUID 14 1 0 0 f f f t f i 2 0 23 "1560 23" _null_ _null_ _null_ _null_ "select (pg_catalog.substring($1, $2, 1))::int4" _null_ _null_ _null_ ));
DESCR("get bit");
DATA(insert OID = 3033 ( set_bit PGNSP PGUID 14 1 0 0 f f f t f i 3 0 1560 "1560 23 23" _null_ _null_ _null_ _null_ "select pg_catalog.overlay($1, $3::bit, $2, 1)" _null_ _null_ _null_ ));
DESCR("set bit");

but this doesn't give any check on the values provided:
that the bit looked for is in fact in the right range, and that the bit in set_bit is in fact a bit
(I don't like the idea of writing "select set_bit(B'01010111', B'1')" instead of
"select set_bit(B'01010111', 1)" ).
So I coded them in proper C internal functions.

Leonardo

Attachments:

patch_bit.patchapplication/octet-stream; name=patch_bit.patchDownload
Index: src/backend/utils/adt/varbit.c
===================================================================
RCS file: /projects/cvsroot/pgsql/src/backend/utils/adt/varbit.c,v
retrieving revision 1.61
diff -c -r1.61 varbit.c
*** src/backend/utils/adt/varbit.c	7 Jan 2010 04:53:34 -0000	1.61
--- src/backend/utils/adt/varbit.c	7 Jan 2010 11:39:37 -0000
***************
*** 1576,1578 ****
--- 1576,1674 ----
  	}
  	PG_RETURN_INT32(0);
  }
+ 
+ 
+ /* bitsetbit
+  * Given an instance of type 'bit' creates a new one with
+  * the Nth bit set to the given value.
+  */
+ Datum
+ bitsetbit(PG_FUNCTION_ARGS)
+ {
+ 	VarBit	   *arg1 = PG_GETARG_VARBIT_P(0);
+ 	int32		n = PG_GETARG_INT32(1);
+ 	int32		newBit = PG_GETARG_INT32(2);
+ 	VarBit	   *result;
+ 	int			len,
+ 				bitlen;
+ 	bits8	   *r,
+ 			   *p;
+ 	int			byteNo,
+ 				bitNo;
+ 	int			oldByte,
+ 				newByte;
+ 
+ 	bitlen = VARBITLEN(arg1);
+ 	if (n < 0 || n >= bitlen)
+ 		ereport(ERROR,
+ 				(errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
+ 				 errmsg("index %d out of valid range, 0..%d",
+ 						n, bitlen - 1)));
+ 	/*
+ 	 * sanity check!
+ 	 */
+ 	if (newBit != 0 && newBit != 1)
+ 		ereport(ERROR,
+ 				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ 				 errmsg("new bit must be 0 or 1")));
+ 
+ 	len = VARSIZE(arg1);
+ 	result = (VarBit *) palloc(len);
+ 	SET_VARSIZE(result, len);
+ 	VARBITLEN(result) = bitlen;
+ 
+ 	p = VARBITS(arg1);
+ 	r = VARBITS(result);
+ 
+ 	memcpy(r, p, VARBITBYTES(arg1));
+ 
+ 	byteNo = n / BITS_PER_BYTE;
+ 	bitNo = BITS_PER_BYTE - 1 - (n % BITS_PER_BYTE);
+ 
+ 	/*
+ 	 * Update the byte.
+ 	 */
+ 	oldByte = ((unsigned char *) r)[byteNo];
+ 
+ 	if (newBit == 0)
+ 		newByte = oldByte & (~(1 << bitNo));
+ 	else
+ 		newByte = oldByte | (1 << bitNo);
+ 
+ 	((unsigned char *) r)[byteNo] = newByte;
+ 
+ 	PG_RETURN_VARBIT_P(result);
+ }
+ 
+ /* bitgetbit
+  * returns the value of the Nth bit (0 or 1).
+  */
+ Datum
+ bitgetbit(PG_FUNCTION_ARGS)
+ {
+ 	VarBit	   *arg1 = PG_GETARG_VARBIT_P(0);
+ 	int32		n = PG_GETARG_INT32(1);
+ 	int			bitlen;
+ 	int			byteNo,
+ 				bitNo;
+ 	int byte;
+ 
+ 	bitlen = VARBITLEN(arg1);
+ 
+ 	if (n < 0 || n >= bitlen)
+ 		ereport(ERROR,
+ 				(errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
+ 				 errmsg("index %d out of valid range, 0..%d",
+ 						n, bitlen - 1)));
+ 
+ 	byteNo = n / BITS_PER_BYTE;
+ 	bitNo = BITS_PER_BYTE - 1 - n % BITS_PER_BYTE;
+ 
+ 	byte = ((unsigned char *) VARBITS(arg1))[byteNo];
+ 
+ 	if (byte &(1 << bitNo))
+ 		PG_RETURN_INT32(1);
+ 	else
+ 		PG_RETURN_INT32(0);
+ }
+ 
Index: src/include/catalog/pg_proc.h
===================================================================
RCS file: /projects/cvsroot/pgsql/src/include/catalog/pg_proc.h,v
retrieving revision 1.559
diff -c -r1.559 pg_proc.h
*** src/include/catalog/pg_proc.h	5 Jan 2010 01:06:56 -0000	1.559
--- src/include/catalog/pg_proc.h	7 Jan 2010 11:39:42 -0000
***************
*** 2403,2408 ****
--- 2403,2416 ----
  DATA(insert OID = 1699 (  substring			PGNSP PGUID 14 1 0 0 f f f t f i 2 0 1560 "1560 23" _null_ _null_ _null_ _null_ "select pg_catalog.substring($1, $2, -1)" _null_ _null_ _null_ ));
  DESCR("return portion of bitstring");
  
+ DATA(insert OID = 3030 (  overlay			PGNSP PGUID 14 1 0 0 f f f t f i 4 0 1560 "1560 1560 23 23" _null_ _null_ _null_ _null_	"select pg_catalog.substring($1, 1, ($3 - 1)) || $2 || pg_catalog.substring($1, ($3 + $4))" _null_ _null_ _null_ ));
+ DESCR("substitute portion of bit string");
+ DATA(insert OID = 3031 (  overlay			PGNSP PGUID 14 1 0 0 f f f t f i 3 0 1560 "1560 1560 23" _null_ _null_ _null_ _null_	"select pg_catalog.substring($1, 1, ($3 - 1)) || $2 || pg_catalog.substring($1, ($3 + pg_catalog.length($2)))" _null_ _null_ _null_ ));
+ DESCR("substitute portion of bit string");
+ DATA(insert OID = 3032 (  get_bit		   PGNSP PGUID 12 1 0 0 f f f t f i 2 0 23 "1560 23" _null_ _null_ _null_ _null_ bitgetbit _null_ _null_ _null_ ));
+ DESCR("get bit");
+ DATA(insert OID = 3033 (  set_bit		   PGNSP PGUID 12 1 0 0 f f f t f i 3 0 1560 "1560 23 23" _null_ _null_ _null_ _null_ bitsetbit _null_ _null_ _null_ ));
+ DESCR("set bit");
  
  /* for mac type support */
  DATA(insert OID = 436 (  macaddr_in			PGNSP PGUID 12 1 0 0 f f f t f i 1 0 829 "2275" _null_ _null_ _null_ _null_ macaddr_in _null_ _null_ _null_ ));
Index: src/include/catalog/catversion.h
===================================================================
RCS file: /projects/cvsroot/pgsql/src/include/catalog/catversion.h,v
retrieving revision 1.569
diff -c -r1.569 catversion.h
*** src/include/catalog/catversion.h	6 Jan 2010 05:18:18 -0000	1.569
--- src/include/catalog/catversion.h	7 Jan 2010 11:39:37 -0000
***************
*** 53,58 ****
   */
  
  /*							yyyymmddN */
! #define CATALOG_VERSION_NO	201001061
  
  #endif
--- 53,58 ----
   */
  
  /*							yyyymmddN */
! #define CATALOG_VERSION_NO	201001081
  
  #endif
Index: src/test/regress/expected/bit.out
===================================================================
RCS file: /projects/cvsroot/pgsql/src/test/regress/expected/bit.out,v
retrieving revision 1.4
diff -c -r1.4 bit.out
*** src/test/regress/expected/bit.out	27 Jul 2003 04:53:11 -0000	1.4
--- src/test/regress/expected/bit.out	7 Jan 2010 11:39:42 -0000
***************
*** 509,511 ****
--- 509,535 ----
  
  DROP TABLE BIT_SHIFT_TABLE;
  DROP TABLE VARBIT_SHIFT_TABLE;
+ SELECT get_bit(B'0101011000100', 10);
+  get_bit 
+ ---------
+        1
+ (1 row)
+ 
+ SELECT get_bit(B'0101011', 3);
+  get_bit 
+ ---------
+        1
+ (1 row)
+ 
+ SELECT set_bit(B'0101011', 0, 1);
+  set_bit 
+ ---------
+  1101011
+ (1 row)
+ 
+ SELECT set_bit(B'010101100010010', 14, 1);
+      set_bit     
+ -----------------
+  010101100010011
+ (1 row)
+ 
Index: src/test/regress/sql/bit.sql
===================================================================
RCS file: /projects/cvsroot/pgsql/src/test/regress/sql/bit.sql,v
retrieving revision 1.2
diff -c -r1.2 bit.sql
*** src/test/regress/sql/bit.sql	22 May 2001 16:37:17 -0000	1.2
--- src/test/regress/sql/bit.sql	7 Jan 2010 11:39:42 -0000
***************
*** 184,186 ****
--- 184,191 ----
  
  DROP TABLE BIT_SHIFT_TABLE;
  DROP TABLE VARBIT_SHIFT_TABLE;
+ 
+ SELECT get_bit(B'0101011000100', 10);
+ SELECT get_bit(B'0101011', 3);
+ SELECT set_bit(B'0101011', 0, 1);
+ SELECT set_bit(B'010101100010010', 14, 1);
Index: src/include/utils/varbit.h
===================================================================
RCS file: /projects/cvsroot/pgsql/src/include/utils/varbit.h,v
retrieving revision 1.29
diff -c -r1.29 varbit.h
*** src/include/utils/varbit.h	2 Jan 2010 16:58:10 -0000	1.29
--- src/include/utils/varbit.h	7 Jan 2010 11:39:42 -0000
***************
*** 95,99 ****
--- 95,101 ----
  extern Datum bitfromint8(PG_FUNCTION_ARGS);
  extern Datum bittoint8(PG_FUNCTION_ARGS);
  extern Datum bitposition(PG_FUNCTION_ARGS);
+ extern Datum bitsetbit(PG_FUNCTION_ARGS);
+ extern Datum bitgetbit(PG_FUNCTION_ARGS);
  
  #endif
Index: doc/src/sgml/func.sgml
===================================================================
RCS file: /projects/cvsroot/pgsql/doc/src/sgml/func.sgml,v
retrieving revision 1.495
diff -c -r1.495 func.sgml
*** doc/src/sgml/func.sgml	19 Dec 2009 17:49:50 -0000	1.495
--- doc/src/sgml/func.sgml	7 Jan 2010 11:39:36 -0000
***************
*** 2934,2940 ****
      <literal><function>bit_length</function></literal>,
      <literal><function>octet_length</function></literal>,
      <literal><function>position</function></literal>,
!     <literal><function>substring</function></literal>.
     </para>
  
     <para>
--- 2934,2943 ----
      <literal><function>bit_length</function></literal>,
      <literal><function>octet_length</function></literal>,
      <literal><function>position</function></literal>,
!     <literal><function>substring</function></literal>,
!     <literal><function>overlay</function></literal>,
!     <literal><function>get_bit</function></literal>,
!     <literal><function>set_bit</function></literal>.
     </para>
  
     <para>
#2Robert Haas
robertmhaas@gmail.com
In reply to: Leonardo F (#1)
Re: Patch: Allow substring/replace() to get/set bit values

On Thu, Jan 7, 2010 at 7:02 AM, Leonardo F <m_lists@yahoo.it> wrote:

attached a patch that adds the following functions for bit string:

Thanks! Please add your patch here:

https://commitfest.postgresql.org/action/commitfest_view/open

The next CommitFest starts January 15th.

...Robert

#3Leonardo F
m_lists@yahoo.it
In reply to: Robert Haas (#2)
Re: Patch: Allow substring/replace() to get/set bit values

Thanks! Please add your patch here:

https://commitfest.postgresql.org/action/commitfest_view/open

Ok; but what about what I said about the difference between bit/string substring?
That affects overlay behaviour for bit...

I've even got

"ERROR: invalid memory alloc request size 4244635647"

with:

SELECT substring(B'1111000000000001' from 5 for -2);

(this is 8.4.2, windows version, not modified...)

test=# SELECT substring(B'1111000000000001' from 1 for -1);
substring
------------------
1111000000000001
(1 row)

test=# SELECT substring('1111000000000001' from 1 for -1);
ERROR: negative substring length not allowed

#4Robert Haas
robertmhaas@gmail.com
In reply to: Leonardo F (#3)
Re: Patch: Allow substring/replace() to get/set bit values

On Thu, Jan 7, 2010 at 11:05 AM, Leonardo F <m_lists@yahoo.it> wrote:

Thanks!  Please add your patch here:

https://commitfest.postgresql.org/action/commitfest_view/open

Ok; but what about what I said about the difference between bit/string substring?
That affects overlay behaviour for bit...

I've even got

"ERROR:  invalid memory alloc request size 4244635647"

with:

SELECT substring(B'1111000000000001' from 5 for -2);

(this is 8.4.2, windows version, not modified...)

test=# SELECT substring(B'1111000000000001' from 1 for -1);
substring
------------------
1111000000000001
(1 row)

test=# SELECT substring('1111000000000001' from 1 for -1);
ERROR:  negative substring length not allowed

I haven't tried to reproduce it, but that sounds like a bug.

....Robert

#5Tom Lane
tgl@sss.pgh.pa.us
In reply to: Leonardo F (#3)
Re: Patch: Allow substring/replace() to get/set bit values

Leonardo F <m_lists@yahoo.it> writes:

I've even got
"ERROR: invalid memory alloc request size 4244635647"
with:
SELECT substring(B'1111000000000001' from 5 for -2);

Hm, yeah, somebody was sloppy about exposing the three-argument
form of varbit substring and using -1 to represent the two-argument
form.

What we can do in the back branches is make the code treat any
negative value as meaning two-arg form. To throw an error we'd
need to refactor the pg_proc representation ...

regards, tom lane

#6Leonardo F
m_lists@yahoo.it
In reply to: Tom Lane (#5)
Re: Patch: Allow substring/replace() to get/set bit values

What we can do in the back branches is make the code treat any
negative value as meaning two-arg form. To throw an error we'd
need to refactor the pg_proc representation ...

I was going to fix that myself, but I think it has just been done.

How can I keep up with "who's doing what"?

#7Alvaro Herrera
alvherre@commandprompt.com
In reply to: Leonardo F (#6)
Re: Patch: Allow substring/replace() to get/set bit values

Leonardo F wrote:

What we can do in the back branches is make the code treat any
negative value as meaning two-arg form. To throw an error we'd
need to refactor the pg_proc representation ...

I was going to fix that myself, but I think it has just been done.

How can I keep up with "who's doing what"?

Read this list and pgsql-committers.

--
Alvaro Herrera http://www.CommandPrompt.com/
PostgreSQL Replication, Consulting, Custom Development, 24x7 support

#8Dimitri Fontaine
dfontaine@hi-media.com
In reply to: Alvaro Herrera (#7)
Re: Patch: Allow substring/replace() to get/set bit values

Alvaro Herrera <alvherre@commandprompt.com> writes:

Leonardo F wrote:

How can I keep up with "who's doing what"?

Read this list and pgsql-committers.

Or subscribe to the RSS feed from:

http://git.postgresql.org/gitweb?p=postgresql.git;a=summary

--
dim