Patch: Allow substring/replace() to get/set bit values
Hi all,
attached a patch that adds the following functions for bit string:
- overlay
- get_bit
- set_bit
Some info:
1) overlay is implemented as calls to substring; given the different way substring behaves when used with strings vs bit strings:
test=# SELECT substring(B'1111000000000001' from 1 for -1);
substring
------------------
1111000000000001
(1 row)
test=# SELECT substring('1111000000000001' from 1 for -1);
ERROR: negative substring length not allowed
I don't think that this overlay implementation is what we want?
Example:
test=# SELECT overlay(B'1111' placing B'01' from 1 for 2);
overlay
---------
0111
(1 row)
(looks ok)
test=# SELECT overlay(B'1111' placing B'01' from 0 for 2);
overlay
-----------
111101111
(1 row)
????
This happens because substring(bit, pos, -1) means substring(bit, pos, length(bit string)),
and < -1 values for bit substring parameters are allowed: is this a bug in bit substring???
2) I tried implementing bit_get and bit_set as calls to overlay/substring:
DATA(insert OID = 3032 ( get_bit PGNSP PGUID 14 1 0 0 f f f t f i 2 0 23 "1560 23" _null_ _null_ _null_ _null_ "select (pg_catalog.substring($1, $2, 1))::int4" _null_ _null_ _null_ ));
DESCR("get bit");
DATA(insert OID = 3033 ( set_bit PGNSP PGUID 14 1 0 0 f f f t f i 3 0 1560 "1560 23 23" _null_ _null_ _null_ _null_ "select pg_catalog.overlay($1, $3::bit, $2, 1)" _null_ _null_ _null_ ));
DESCR("set bit");
but this doesn't give any check on the values provided:
that the bit looked for is in fact in the right range, and that the bit in set_bit is in fact a bit
(I don't like the idea of writing "select set_bit(B'01010111', B'1')" instead of
"select set_bit(B'01010111', 1)" ).
So I coded them in proper C internal functions.
Leonardo
Attachments:
patch_bit.patchapplication/octet-stream; name=patch_bit.patchDownload
Index: src/backend/utils/adt/varbit.c
===================================================================
RCS file: /projects/cvsroot/pgsql/src/backend/utils/adt/varbit.c,v
retrieving revision 1.61
diff -c -r1.61 varbit.c
*** src/backend/utils/adt/varbit.c 7 Jan 2010 04:53:34 -0000 1.61
--- src/backend/utils/adt/varbit.c 7 Jan 2010 11:39:37 -0000
***************
*** 1576,1578 ****
--- 1576,1674 ----
}
PG_RETURN_INT32(0);
}
+
+
+ /* bitsetbit
+ * Given an instance of type 'bit' creates a new one with
+ * the Nth bit set to the given value.
+ */
+ Datum
+ bitsetbit(PG_FUNCTION_ARGS)
+ {
+ VarBit *arg1 = PG_GETARG_VARBIT_P(0);
+ int32 n = PG_GETARG_INT32(1);
+ int32 newBit = PG_GETARG_INT32(2);
+ VarBit *result;
+ int len,
+ bitlen;
+ bits8 *r,
+ *p;
+ int byteNo,
+ bitNo;
+ int oldByte,
+ newByte;
+
+ bitlen = VARBITLEN(arg1);
+ if (n < 0 || n >= bitlen)
+ ereport(ERROR,
+ (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
+ errmsg("index %d out of valid range, 0..%d",
+ n, bitlen - 1)));
+ /*
+ * sanity check!
+ */
+ if (newBit != 0 && newBit != 1)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("new bit must be 0 or 1")));
+
+ len = VARSIZE(arg1);
+ result = (VarBit *) palloc(len);
+ SET_VARSIZE(result, len);
+ VARBITLEN(result) = bitlen;
+
+ p = VARBITS(arg1);
+ r = VARBITS(result);
+
+ memcpy(r, p, VARBITBYTES(arg1));
+
+ byteNo = n / BITS_PER_BYTE;
+ bitNo = BITS_PER_BYTE - 1 - (n % BITS_PER_BYTE);
+
+ /*
+ * Update the byte.
+ */
+ oldByte = ((unsigned char *) r)[byteNo];
+
+ if (newBit == 0)
+ newByte = oldByte & (~(1 << bitNo));
+ else
+ newByte = oldByte | (1 << bitNo);
+
+ ((unsigned char *) r)[byteNo] = newByte;
+
+ PG_RETURN_VARBIT_P(result);
+ }
+
+ /* bitgetbit
+ * returns the value of the Nth bit (0 or 1).
+ */
+ Datum
+ bitgetbit(PG_FUNCTION_ARGS)
+ {
+ VarBit *arg1 = PG_GETARG_VARBIT_P(0);
+ int32 n = PG_GETARG_INT32(1);
+ int bitlen;
+ int byteNo,
+ bitNo;
+ int byte;
+
+ bitlen = VARBITLEN(arg1);
+
+ if (n < 0 || n >= bitlen)
+ ereport(ERROR,
+ (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
+ errmsg("index %d out of valid range, 0..%d",
+ n, bitlen - 1)));
+
+ byteNo = n / BITS_PER_BYTE;
+ bitNo = BITS_PER_BYTE - 1 - n % BITS_PER_BYTE;
+
+ byte = ((unsigned char *) VARBITS(arg1))[byteNo];
+
+ if (byte &(1 << bitNo))
+ PG_RETURN_INT32(1);
+ else
+ PG_RETURN_INT32(0);
+ }
+
Index: src/include/catalog/pg_proc.h
===================================================================
RCS file: /projects/cvsroot/pgsql/src/include/catalog/pg_proc.h,v
retrieving revision 1.559
diff -c -r1.559 pg_proc.h
*** src/include/catalog/pg_proc.h 5 Jan 2010 01:06:56 -0000 1.559
--- src/include/catalog/pg_proc.h 7 Jan 2010 11:39:42 -0000
***************
*** 2403,2408 ****
--- 2403,2416 ----
DATA(insert OID = 1699 ( substring PGNSP PGUID 14 1 0 0 f f f t f i 2 0 1560 "1560 23" _null_ _null_ _null_ _null_ "select pg_catalog.substring($1, $2, -1)" _null_ _null_ _null_ ));
DESCR("return portion of bitstring");
+ DATA(insert OID = 3030 ( overlay PGNSP PGUID 14 1 0 0 f f f t f i 4 0 1560 "1560 1560 23 23" _null_ _null_ _null_ _null_ "select pg_catalog.substring($1, 1, ($3 - 1)) || $2 || pg_catalog.substring($1, ($3 + $4))" _null_ _null_ _null_ ));
+ DESCR("substitute portion of bit string");
+ DATA(insert OID = 3031 ( overlay PGNSP PGUID 14 1 0 0 f f f t f i 3 0 1560 "1560 1560 23" _null_ _null_ _null_ _null_ "select pg_catalog.substring($1, 1, ($3 - 1)) || $2 || pg_catalog.substring($1, ($3 + pg_catalog.length($2)))" _null_ _null_ _null_ ));
+ DESCR("substitute portion of bit string");
+ DATA(insert OID = 3032 ( get_bit PGNSP PGUID 12 1 0 0 f f f t f i 2 0 23 "1560 23" _null_ _null_ _null_ _null_ bitgetbit _null_ _null_ _null_ ));
+ DESCR("get bit");
+ DATA(insert OID = 3033 ( set_bit PGNSP PGUID 12 1 0 0 f f f t f i 3 0 1560 "1560 23 23" _null_ _null_ _null_ _null_ bitsetbit _null_ _null_ _null_ ));
+ DESCR("set bit");
/* for mac type support */
DATA(insert OID = 436 ( macaddr_in PGNSP PGUID 12 1 0 0 f f f t f i 1 0 829 "2275" _null_ _null_ _null_ _null_ macaddr_in _null_ _null_ _null_ ));
Index: src/include/catalog/catversion.h
===================================================================
RCS file: /projects/cvsroot/pgsql/src/include/catalog/catversion.h,v
retrieving revision 1.569
diff -c -r1.569 catversion.h
*** src/include/catalog/catversion.h 6 Jan 2010 05:18:18 -0000 1.569
--- src/include/catalog/catversion.h 7 Jan 2010 11:39:37 -0000
***************
*** 53,58 ****
*/
/* yyyymmddN */
! #define CATALOG_VERSION_NO 201001061
#endif
--- 53,58 ----
*/
/* yyyymmddN */
! #define CATALOG_VERSION_NO 201001081
#endif
Index: src/test/regress/expected/bit.out
===================================================================
RCS file: /projects/cvsroot/pgsql/src/test/regress/expected/bit.out,v
retrieving revision 1.4
diff -c -r1.4 bit.out
*** src/test/regress/expected/bit.out 27 Jul 2003 04:53:11 -0000 1.4
--- src/test/regress/expected/bit.out 7 Jan 2010 11:39:42 -0000
***************
*** 509,511 ****
--- 509,535 ----
DROP TABLE BIT_SHIFT_TABLE;
DROP TABLE VARBIT_SHIFT_TABLE;
+ SELECT get_bit(B'0101011000100', 10);
+ get_bit
+ ---------
+ 1
+ (1 row)
+
+ SELECT get_bit(B'0101011', 3);
+ get_bit
+ ---------
+ 1
+ (1 row)
+
+ SELECT set_bit(B'0101011', 0, 1);
+ set_bit
+ ---------
+ 1101011
+ (1 row)
+
+ SELECT set_bit(B'010101100010010', 14, 1);
+ set_bit
+ -----------------
+ 010101100010011
+ (1 row)
+
Index: src/test/regress/sql/bit.sql
===================================================================
RCS file: /projects/cvsroot/pgsql/src/test/regress/sql/bit.sql,v
retrieving revision 1.2
diff -c -r1.2 bit.sql
*** src/test/regress/sql/bit.sql 22 May 2001 16:37:17 -0000 1.2
--- src/test/regress/sql/bit.sql 7 Jan 2010 11:39:42 -0000
***************
*** 184,186 ****
--- 184,191 ----
DROP TABLE BIT_SHIFT_TABLE;
DROP TABLE VARBIT_SHIFT_TABLE;
+
+ SELECT get_bit(B'0101011000100', 10);
+ SELECT get_bit(B'0101011', 3);
+ SELECT set_bit(B'0101011', 0, 1);
+ SELECT set_bit(B'010101100010010', 14, 1);
Index: src/include/utils/varbit.h
===================================================================
RCS file: /projects/cvsroot/pgsql/src/include/utils/varbit.h,v
retrieving revision 1.29
diff -c -r1.29 varbit.h
*** src/include/utils/varbit.h 2 Jan 2010 16:58:10 -0000 1.29
--- src/include/utils/varbit.h 7 Jan 2010 11:39:42 -0000
***************
*** 95,99 ****
--- 95,101 ----
extern Datum bitfromint8(PG_FUNCTION_ARGS);
extern Datum bittoint8(PG_FUNCTION_ARGS);
extern Datum bitposition(PG_FUNCTION_ARGS);
+ extern Datum bitsetbit(PG_FUNCTION_ARGS);
+ extern Datum bitgetbit(PG_FUNCTION_ARGS);
#endif
Index: doc/src/sgml/func.sgml
===================================================================
RCS file: /projects/cvsroot/pgsql/doc/src/sgml/func.sgml,v
retrieving revision 1.495
diff -c -r1.495 func.sgml
*** doc/src/sgml/func.sgml 19 Dec 2009 17:49:50 -0000 1.495
--- doc/src/sgml/func.sgml 7 Jan 2010 11:39:36 -0000
***************
*** 2934,2940 ****
<literal><function>bit_length</function></literal>,
<literal><function>octet_length</function></literal>,
<literal><function>position</function></literal>,
! <literal><function>substring</function></literal>.
</para>
<para>
--- 2934,2943 ----
<literal><function>bit_length</function></literal>,
<literal><function>octet_length</function></literal>,
<literal><function>position</function></literal>,
! <literal><function>substring</function></literal>,
! <literal><function>overlay</function></literal>,
! <literal><function>get_bit</function></literal>,
! <literal><function>set_bit</function></literal>.
</para>
<para>
On Thu, Jan 7, 2010 at 7:02 AM, Leonardo F <m_lists@yahoo.it> wrote:
attached a patch that adds the following functions for bit string:
Thanks! Please add your patch here:
https://commitfest.postgresql.org/action/commitfest_view/open
The next CommitFest starts January 15th.
...Robert
Thanks! Please add your patch here:
https://commitfest.postgresql.org/action/commitfest_view/open
Ok; but what about what I said about the difference between bit/string substring?
That affects overlay behaviour for bit...
I've even got
"ERROR: invalid memory alloc request size 4244635647"
with:
SELECT substring(B'1111000000000001' from 5 for -2);
(this is 8.4.2, windows version, not modified...)
test=# SELECT substring(B'1111000000000001' from 1 for -1);
substring
------------------
1111000000000001
(1 row)
test=# SELECT substring('1111000000000001' from 1 for -1);
ERROR: negative substring length not allowed
On Thu, Jan 7, 2010 at 11:05 AM, Leonardo F <m_lists@yahoo.it> wrote:
Thanks! Please add your patch here:
https://commitfest.postgresql.org/action/commitfest_view/open
Ok; but what about what I said about the difference between bit/string substring?
That affects overlay behaviour for bit...I've even got
"ERROR: invalid memory alloc request size 4244635647"
with:
SELECT substring(B'1111000000000001' from 5 for -2);
(this is 8.4.2, windows version, not modified...)
test=# SELECT substring(B'1111000000000001' from 1 for -1);
substring
------------------
1111000000000001
(1 row)test=# SELECT substring('1111000000000001' from 1 for -1);
ERROR: negative substring length not allowed
I haven't tried to reproduce it, but that sounds like a bug.
....Robert
Leonardo F <m_lists@yahoo.it> writes:
I've even got
"ERROR: invalid memory alloc request size 4244635647"
with:
SELECT substring(B'1111000000000001' from 5 for -2);
Hm, yeah, somebody was sloppy about exposing the three-argument
form of varbit substring and using -1 to represent the two-argument
form.
What we can do in the back branches is make the code treat any
negative value as meaning two-arg form. To throw an error we'd
need to refactor the pg_proc representation ...
regards, tom lane
What we can do in the back branches is make the code treat any
negative value as meaning two-arg form. To throw an error we'd
need to refactor the pg_proc representation ...
I was going to fix that myself, but I think it has just been done.
How can I keep up with "who's doing what"?
Leonardo F wrote:
What we can do in the back branches is make the code treat any
negative value as meaning two-arg form. To throw an error we'd
need to refactor the pg_proc representation ...I was going to fix that myself, but I think it has just been done.
How can I keep up with "who's doing what"?
Read this list and pgsql-committers.
--
Alvaro Herrera http://www.CommandPrompt.com/
PostgreSQL Replication, Consulting, Custom Development, 24x7 support
Alvaro Herrera <alvherre@commandprompt.com> writes:
Leonardo F wrote:
How can I keep up with "who's doing what"?
Read this list and pgsql-committers.
Or subscribe to the RSS feed from:
http://git.postgresql.org/gitweb?p=postgresql.git;a=summary
--
dim