BUG #19501: btree_gist: use float4/float8 comparison functions to handle NaN correctly

Started by PG Bug reporting form21 days ago1 messagesbugs
Jump to latest
#1PG Bug reporting form
noreply@postgresql.org

The following bug has been logged on the website:

Bug reference: 19501
Logged by: Man Zeng
Email address: zengman@halodbtech.com
PostgreSQL version: 18.4
Operating system: 24.04.1-Ubuntu
Description:

Hi all,

Here's an interesting case where btree_gist returns inconsistent results
between index scans and sequential scans when NaN values are involved.

```
postgres@zxm-VMware-Virtual-Platform:~/code/zengine$ psql
psql (19devel)
Type "help" for help.

test=# CREATE EXTENSION btree_gist;
CREATE EXTENSION
test=# CREATE TABLE test_nan_bug (v float8);
CREATE TABLE
test=# INSERT INTO test_nan_bug VALUES (1.0), (2.0), ('NaN'::float8), (3.0);
INSERT 0 4
test=# CREATE INDEX idx_nan_bug ON test_nan_bug USING gist(v);
CREATE INDEX
test=# SET enable_indexscan=off;
SET
test=# SELECT count(*) FROM test_nan_bug WHERE v = 'NaN'::float8;
count
-------
1
(1 row)

test=# EXPLAIN VERBOSE SELECT count(*) FROM test_nan_bug WHERE v =
'NaN'::float8;
QUERY PLAN
-------------------------------------------------------------------------
Aggregate (cost=1.05..1.06 rows=1 width=8)
Output: count(*)
-> Seq Scan on public.test_nan_bug (cost=0.00..1.05 rows=1 width=0)
Output: v
Filter: (test_nan_bug.v = 'NaN'::double precision)
(5 rows)

test=# SET enable_indexscan=on; SET enable_seqscan=off;
SET
SET
test=# SELECT count(*) FROM test_nan_bug WHERE v = 'NaN'::float8;
count
-------
0
(1 row)

test=# EXPLAIN VERBOSE SELECT count(*) FROM test_nan_bug WHERE v =
'NaN'::float8;
QUERY PLAN
--------------------------------------------------------------------------------------------------
Aggregate (cost=8.15..8.16 rows=1 width=8)
Output: count(*)
-> Index Only Scan using idx_nan_bug on public.test_nan_bug
(cost=0.13..8.15 rows=1 width=0)
Output: v
Index Cond: (test_nan_bug.v = 'NaN'::double precision)
(5 rows)

```

Steps to reproduce:
```sql
CREATE EXTENSION btree_gist;
CREATE TABLE test_nan_bug (v float8);
INSERT INTO test_nan_bug VALUES (1.0), (2.0), ('NaN'::float8), (3.0);
CREATE INDEX idx_nan_bug ON test_nan_bug USING gist(v);

SET enable_indexscan=off;
SELECT count(*) FROM test_nan_bug WHERE v = 'NaN'::float8;

SET enable_indexscan=on; SET enable_seqscan=off;
SELECT count(*) FROM test_nan_bug WHERE v = 'NaN'::float8;
```

My fix replaces the C comparison operators with PostgreSQL's float
comparison functions (float8_gt, float8_ge, float8_eq, etc.) and
float8_cmp_internal, which handle NaN consistently.

```
diff --git a/contrib/btree_gist/btree_float4.c
b/contrib/btree_gist/btree_float4.c
index c076918fd48..eaecdac0980 100644
--- a/contrib/btree_gist/btree_float4.c
+++ b/contrib/btree_gist/btree_float4.c
@@ -29,27 +29,27 @@ PG_FUNCTION_INFO_V1(gbt_float4_sortsupport);
 static bool
 gbt_float4gt(const void *a, const void *b, FmgrInfo *flinfo)
 {
-       return (*((const float4 *) a) > *((const float4 *) b));
+       return float4_gt(*(const float4 *) a, *(const float4 *) b);
 }
 static bool
 gbt_float4ge(const void *a, const void *b, FmgrInfo *flinfo)
 {
-       return (*((const float4 *) a) >= *((const float4 *) b));
+       return float4_ge(*(const float4 *) a, *(const float4 *) b);
 }
 static bool
 gbt_float4eq(const void *a, const void *b, FmgrInfo *flinfo)
 {
-       return (*((const float4 *) a) == *((const float4 *) b));
+       return float4_eq(*(const float4 *) a, *(const float4 *) b);
 }
 static bool
 gbt_float4le(const void *a, const void *b, FmgrInfo *flinfo)
 {
-       return (*((const float4 *) a) <= *((const float4 *) b));
+       return float4_le(*(const float4 *) a, *(const float4 *) b);
 }
 static bool
 gbt_float4lt(const void *a, const void *b, FmgrInfo *flinfo)
 {
-       return (*((const float4 *) a) < *((const float4 *) b));
+       return float4_lt(*(const float4 *) a, *(const float4 *) b);
 }

static int
@@ -57,16 +57,12 @@ gbt_float4key_cmp(const void *a, const void *b, FmgrInfo
*flinfo)
{
float4KEY *ia = (float4KEY *) (((const Nsrt *) a)->t);
float4KEY *ib = (float4KEY *) (((const Nsrt *) b)->t);
+ int cmp;

-       if (ia->lower == ib->lower)
-       {
-               if (ia->upper == ib->upper)
-                       return 0;
-
-               return (ia->upper > ib->upper) ? 1 : -1;
-       }
-
-       return (ia->lower > ib->lower) ? 1 : -1;
+       cmp = float4_cmp_internal(ia->lower, ib->lower);
+       if (cmp != 0)
+               return cmp;
+       return float4_cmp_internal(ia->upper, ib->upper);
 }
 static float8
diff --git a/contrib/btree_gist/btree_float8.c
b/contrib/btree_gist/btree_float8.c
index d7386e885a2..132065a648c 100644
--- a/contrib/btree_gist/btree_float8.c
+++ b/contrib/btree_gist/btree_float8.c
@@ -30,27 +30,27 @@ PG_FUNCTION_INFO_V1(gbt_float8_sortsupport);
 static bool
 gbt_float8gt(const void *a, const void *b, FmgrInfo *flinfo)
 {
-       return (*((const float8 *) a) > *((const float8 *) b));
+       return float8_gt(*(const float8 *) a, *(const float8 *) b);
 }
 static bool
 gbt_float8ge(const void *a, const void *b, FmgrInfo *flinfo)
 {
-       return (*((const float8 *) a) >= *((const float8 *) b));
+       return float8_ge(*(const float8 *) a, *(const float8 *) b);
 }
 static bool
 gbt_float8eq(const void *a, const void *b, FmgrInfo *flinfo)
 {
-       return (*((const float8 *) a) == *((const float8 *) b));
+       return float8_eq(*(const float8 *) a, *(const float8 *) b);
 }
 static bool
 gbt_float8le(const void *a, const void *b, FmgrInfo *flinfo)
 {
-       return (*((const float8 *) a) <= *((const float8 *) b));
+       return float8_le(*(const float8 *) a, *(const float8 *) b);
 }
 static bool
 gbt_float8lt(const void *a, const void *b, FmgrInfo *flinfo)
 {
-       return (*((const float8 *) a) < *((const float8 *) b));
+       return float8_lt(*(const float8 *) a, *(const float8 *) b);
 }

static int
@@ -58,16 +58,12 @@ gbt_float8key_cmp(const void *a, const void *b, FmgrInfo
*flinfo)
{
float8KEY *ia = (float8KEY *) (((const Nsrt *) a)->t);
float8KEY *ib = (float8KEY *) (((const Nsrt *) b)->t);
+ int cmp;

-       if (ia->lower == ib->lower)
-       {
-               if (ia->upper == ib->upper)
-                       return 0;
-
-               return (ia->upper > ib->upper) ? 1 : -1;
-       }
-
-       return (ia->lower > ib->lower) ? 1 : -1;
+       cmp = float8_cmp_internal(ia->lower, ib->lower);
+       if (cmp != 0)
+               return cmp;
+       return float8_cmp_internal(ia->upper, ib->upper);
 }
 static float8
diff --git a/contrib/btree_gist/expected/float4.out
b/contrib/btree_gist/expected/float4.out
index dfe732049e6..99ed6ae4668 100644
--- a/contrib/btree_gist/expected/float4.out
+++ b/contrib/btree_gist/expected/float4.out
@@ -89,3 +89,12 @@ SELECT a, a <-> '-179.0' FROM float4tmp ORDER BY a <->
'-179.0' LIMIT 3;
  -158.17741 | 20.822586
 (3 rows)
+INSERT INTO float4tmp VALUES ('NaN'), ('NaN');
+SET enable_seqscan=off;
+SELECT count(*) FROM float4tmp WHERE a = 'NaN';
+ count
+-------
+     2
+(1 row)
+
+RESET enable_seqscan;
diff --git a/contrib/btree_gist/expected/float8.out
b/contrib/btree_gist/expected/float8.out
index ebd0ef3d689..e463b8869d6 100644
--- a/contrib/btree_gist/expected/float8.out
+++ b/contrib/btree_gist/expected/float8.out
@@ -89,3 +89,12 @@ SELECT a, a <-> '-1890.0' FROM float8tmp ORDER BY a <->
'-1890.0' LIMIT 3;
   -1769.73634 | 120.26366000000007
 (3 rows)
+INSERT INTO float8tmp VALUES ('NaN'), ('NaN');
+SET enable_seqscan=off;
+SELECT count(*) FROM float8tmp WHERE a = 'NaN';
+ count
+-------
+     2
+(1 row)
+
+RESET enable_seqscan;
diff --git a/contrib/btree_gist/sql/float4.sql
b/contrib/btree_gist/sql/float4.sql
index 3da1ce953c8..0cacee08276 100644
--- a/contrib/btree_gist/sql/float4.sql
+++ b/contrib/btree_gist/sql/float4.sql
@@ -35,3 +35,11 @@ SELECT count(*) FROM float4tmp WHERE a >  -179.0::float4;
 EXPLAIN (COSTS OFF)
 SELECT a, a <-> '-179.0' FROM float4tmp ORDER BY a <-> '-179.0' LIMIT 3;
 SELECT a, a <-> '-179.0' FROM float4tmp ORDER BY a <-> '-179.0' LIMIT 3;
+
+INSERT INTO float4tmp VALUES ('NaN'), ('NaN');
+
+SET enable_seqscan=off;
+
+SELECT count(*) FROM float4tmp WHERE a = 'NaN';
+
+RESET enable_seqscan;
diff --git a/contrib/btree_gist/sql/float8.sql
b/contrib/btree_gist/sql/float8.sql
index e1e819b37f9..2d6ef9d95e7 100644
--- a/contrib/btree_gist/sql/float8.sql
+++ b/contrib/btree_gist/sql/float8.sql
@@ -35,3 +35,11 @@ SELECT count(*) FROM float8tmp WHERE a >
-1890.0::float8;
 EXPLAIN (COSTS OFF)
 SELECT a, a <-> '-1890.0' FROM float8tmp ORDER BY a <-> '-1890.0' LIMIT 3;
 SELECT a, a <-> '-1890.0' FROM float8tmp ORDER BY a <-> '-1890.0' LIMIT 3;
+
+INSERT INTO float8tmp VALUES ('NaN'), ('NaN');
+
+SET enable_seqscan=off;
+
+SELECT count(*) FROM float8tmp WHERE a = 'NaN';
+
+RESET enable_seqscan;
```
Any thoughts?

Regards,
Man Zeng