From 92f3d5bf1718b1b52a5a4d792f5c75e0bcc7fb74 Mon Sep 17 00:00:00 2001 From: David Fetter Date: Wed, 30 Dec 2020 02:51:46 -0800 Subject: [PATCH v4] popcount To: hackers MIME-Version: 1.0 Content-Type: multipart/mixed; boundary="------------2.29.2" This is a multi-part message in MIME format. --------------2.29.2 Content-Type: text/plain; charset=UTF-8; format=fixed Content-Transfer-Encoding: 8bit Now it's accessible to SQL for the BIT VARYING and BYTEA types. diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml index ece09699ef..45cfc22ae9 100644 --- a/doc/src/sgml/func.sgml +++ b/doc/src/sgml/func.sgml @@ -4030,6 +4030,23 @@ SELECT format('Testing %3$s, %2$s, %s', 'one', 'two', 'three'); + + + + count_set_bits + + count_set_bits ( bytes bytea ) + bigint + + + Counts the number of bits set in a binary string. + + + count_set_bits('\xdeadbeef'::bytea) + 24 + + + @@ -4830,6 +4847,23 @@ SELECT format('Testing %3$s, %2$s, %s', 'one', 'two', 'three'); + + + + count_set_bits + + count_set_bits ( bits bit ) + bigint + + + Counts the bits set in a bit string. + + + count_set_bits(B'101010101010101010') + 9 + + + @@ -4869,6 +4903,7 @@ SELECT format('Testing %3$s, %2$s, %s', 'one', 'two', 'three'); 101010001010101010 + diff --git a/src/backend/utils/adt/varbit.c b/src/backend/utils/adt/varbit.c index 2235866244..c9c6c73422 100644 --- a/src/backend/utils/adt/varbit.c +++ b/src/backend/utils/adt/varbit.c @@ -36,6 +36,7 @@ #include "libpq/pqformat.h" #include "nodes/nodeFuncs.h" #include "nodes/supportnodes.h" +#include "port/pg_bitutils.h" #include "utils/array.h" #include "utils/builtins.h" #include "utils/varbit.h" @@ -1878,3 +1879,30 @@ bitgetbit(PG_FUNCTION_ARGS) else PG_RETURN_INT32(0); } + +/* + * bitpopcount + * + * Returns the number of bits set in a bit string. + * + */ +Datum +bitpopcount(PG_FUNCTION_ARGS) +{ + /* There's really no chance of an overflow here because + * to get to INT64_MAX set bits, an object would have to be + * an exbibyte long, exceeding what PostgreSQL can currently + * store by a factor of 2^28 + */ + int64 popcount; + VarBit *arg1 = PG_GETARG_VARBIT_P(0); + bits8 *p; + int len; + + p = VARBITS(arg1); + len = (VARBITLEN(arg1) + BITS_PER_BYTE - 1) / BITS_PER_BYTE; + + popcount = pg_popcount((char *)p, len); + + PG_RETURN_INT64(popcount); +} diff --git a/src/backend/utils/adt/varlena.c b/src/backend/utils/adt/varlena.c index 479ed9ae54..3f1179a0e8 100644 --- a/src/backend/utils/adt/varlena.c +++ b/src/backend/utils/adt/varlena.c @@ -3439,6 +3439,22 @@ bytea_overlay(bytea *t1, bytea *t2, int sp, int sl) return result; } +/* + * popcount + */ +Datum +byteapopcount(PG_FUNCTION_ARGS) +{ + bytea *t1 = PG_GETARG_BYTEA_PP(0); + int len; + int64 result; + + len = VARSIZE_ANY_EXHDR(t1); + result = pg_popcount(VARDATA_ANY(t1), len); + + PG_RETURN_INT64(result); +} + /* * byteapos - * Return the position of the specified substring. diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat index 506689d8ac..6f0ef32b87 100644 --- a/src/include/catalog/pg_proc.dat +++ b/src/include/catalog/pg_proc.dat @@ -1446,6 +1446,9 @@ { oid => '752', descr => 'substitute portion of string', proname => 'overlay', prorettype => 'bytea', proargtypes => 'bytea bytea int4', prosrc => 'byteaoverlay_no_len' }, +{ oid => '8436', descr => 'count set bits', + proname => 'count_set_bits', prorettype => 'int8', proargtypes => 'bytea', + prosrc => 'byteapopcount'}, { oid => '725', proname => 'dist_pl', prorettype => 'float8', proargtypes => 'point line', @@ -3876,6 +3879,9 @@ { oid => '3033', descr => 'set bit', proname => 'set_bit', prorettype => 'bit', proargtypes => 'bit int4 int4', prosrc => 'bitsetbit' }, +{ oid => '8435', descr => 'count set bits', + proname => 'count_set_bits', prorettype => 'int8', proargtypes => 'bit', + prosrc => 'bitpopcount'}, # for macaddr type support { oid => '436', descr => 'I/O', diff --git a/src/test/regress/expected/bit.out b/src/test/regress/expected/bit.out index a7f95b846d..1187076a8d 100644 --- a/src/test/regress/expected/bit.out +++ b/src/test/regress/expected/bit.out @@ -710,6 +710,19 @@ SELECT overlay(B'0101011100' placing '001' from 20); 0101011100001 (1 row) +-- Popcount +SELECT count_set_bits(B'0101011100'::bit(10)); + count_set_bits +---------------- + 5 +(1 row) + +SELECT count_set_bits(B'1111111111'::bit(10)); + count_set_bits +---------------- + 10 +(1 row) + -- This table is intentionally left around to exercise pg_dump/pg_upgrade CREATE TABLE bit_defaults( b1 bit(4) DEFAULT '1001', diff --git a/src/test/regress/expected/strings.out b/src/test/regress/expected/strings.out index fb4573d85f..5f27522187 100644 --- a/src/test/regress/expected/strings.out +++ b/src/test/regress/expected/strings.out @@ -2227,3 +2227,10 @@ SELECT encode(overlay(E'Th\\000omas'::bytea placing E'\\002\\003'::bytea from 5 Th\000o\x02\x03 (1 row) +SET bytea_output TO hex; +SELECT count_set_bits(E'\\xdeadbeef'::bytea); + count_set_bits +---------------- + 24 +(1 row) + diff --git a/src/test/regress/sql/bit.sql b/src/test/regress/sql/bit.sql index ea01742c4a..8893d38b0d 100644 --- a/src/test/regress/sql/bit.sql +++ b/src/test/regress/sql/bit.sql @@ -215,6 +215,10 @@ SELECT overlay(B'0101011100' placing '101' from 6); SELECT overlay(B'0101011100' placing '001' from 11); SELECT overlay(B'0101011100' placing '001' from 20); +-- Popcount +SELECT count_set_bits(B'0101011100'::bit(10)); +SELECT count_set_bits(B'1111111111'::bit(10)); + -- This table is intentionally left around to exercise pg_dump/pg_upgrade CREATE TABLE bit_defaults( b1 bit(4) DEFAULT '1001', diff --git a/src/test/regress/sql/strings.sql b/src/test/regress/sql/strings.sql index 57a48c9d0b..523f960d5b 100644 --- a/src/test/regress/sql/strings.sql +++ b/src/test/regress/sql/strings.sql @@ -742,3 +742,6 @@ SELECT btrim(E'\\000trim\\000'::bytea, ''::bytea); SELECT encode(overlay(E'Th\\000omas'::bytea placing E'Th\\001omas'::bytea from 2),'escape'); SELECT encode(overlay(E'Th\\000omas'::bytea placing E'\\002\\003'::bytea from 8),'escape'); SELECT encode(overlay(E'Th\\000omas'::bytea placing E'\\002\\003'::bytea from 5 for 3),'escape'); + +SET bytea_output TO hex; +SELECT count_set_bits(E'\\xdeadbeef'::bytea);