From 1ee425f4dfb35b73b68e50295ee37664618a1856 Mon Sep 17 00:00:00 2001
From: Mats Kindahl <mats@timescale.com>
Date: Wed, 29 Mar 2023 10:07:18 +0200
Subject: [PATCH] Avoid floating-point overflow in width_bucket()

The floating-point calculation in width_bucket() can lead to overflow
when values are close to DBL_MAX and -DBL_MAX, and further result in a
NaN in the computation. This is then cast to an integer value, which
produces an out-of-bounds value from width_bucket().

This is fixed by scaling down the values to avoid that the
floating-point expressions overflow.
---
 src/backend/utils/adt/float.c         | 21 ++++++++++++++-
 src/test/regress/expected/numeric.out | 38 +++++++++++++++++++++++++++
 src/test/regress/sql/numeric.sql      | 26 ++++++++++++++++++
 3 files changed, 84 insertions(+), 1 deletion(-)

diff --git a/src/backend/utils/adt/float.c b/src/backend/utils/adt/float.c
index aa79487a11..39a7f1a6e4 100644
--- a/src/backend/utils/adt/float.c
+++ b/src/backend/utils/adt/float.c
@@ -4117,8 +4117,26 @@ width_bucket_float8(PG_FUNCTION_ARGS)
 						(errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
 						 errmsg("integer out of range")));
 		}
+		/*
+		 * Since the expressions "operand - bound1" and "bound2 - bound1" can
+		 * overflow for some combinations of values that are close to -DBL_MAX
+		 * or DBL_MAX, this can lead NaN values, which when cast to integer
+		 * lead to strange values.
+		 *
+		 * To avoid this, we divide the values by 2 before performing the
+		 * subtraction if the result of either of them is infinite. This will
+		 * ensure that the expressions will not overflow and lead to a strange
+		 * result.
+		 *
+		 * Since we know from the previous checks that bound1 <= operand <=
+		 * bound2 it is sufficient to check that bound2 - bound1 is not
+		 * infinite. If it is not infinite, then operand - bound1 cannot be
+		 * infinite either.
+		 */
+		else if (isinf(bound2 - bound1))
+			result = (count * ((operand / 2 - bound1 / 2) / (bound2 / 2 - bound1 / 2))) + 1;
 		else
-			result = ((float8) count * (operand - bound1) / (bound2 - bound1)) + 1;
+			result = (count * ((operand - bound1) / (bound2 - bound1))) + 1;
 	}
 	else if (bound1 > bound2)
 	{
@@ -4142,5 +4160,6 @@ width_bucket_float8(PG_FUNCTION_ARGS)
 		result = 0;				/* keep the compiler quiet */
 	}
 
+	Assert(result >= 0 && result <= count + 1);
 	PG_RETURN_INT32(result);
 }
diff --git a/src/test/regress/expected/numeric.out b/src/test/regress/expected/numeric.out
index 26930f4db4..2191110554 100644
--- a/src/test/regress/expected/numeric.out
+++ b/src/test/regress/expected/numeric.out
@@ -1433,6 +1433,44 @@ SELECT width_bucket('Infinity'::float8, 1, 10, 10),
            11 |            0
 (1 row)
 
+WITH sample(oper, low, high, cnt) AS (
+  VALUES
+    -- Check ranges that can cause overflow in the internal
+    -- calculations. These are basically computations where difference
+    -- between operand and low and high can exceed DBL_MAX.
+    (10.5::float8, -1.797e+308::float8, 1.797e+308::float8, 1::int4),
+    (10.5::float8, -1.797e+308::float8, 1.797e+308::float8, 2),
+    (10.5::float8, -1.797e+308::float8, 1.797e+308::float8, 3),
+    (1.797e+308::float8 / 2, -1.797e+308::float8, 1.797e+308::float8, 10),
+    (-1.797e+308::float8 / 2, -1.797e+308::float8, 1.797e+308::float8, 10),
+    (1.797e+308::float8 / 2, -1.797e+308::float8, 1.797e+308::float8, 16),
+    (-1.797e+308::float8 / 2, -1.797e+308::float8, 1.797e+308::float8, 16),
+    (1.797e+308::float8, -1.797e+308::float8 / 2, 1.797e+308::float8 / 2, 10),
+    (-1.797e+308::float8, -1.797e+308::float8 / 2, 1.797e+308::float8 / 2, 10),
+    -- Check that potential underflow situations does not result in
+    -- weird values.
+    (2.22507e-308::float8, 2 * -2.22507e-308::float8, 2 * 2.22507e-308::float8, 4),
+    (-2.22507e-308::float8, 2 * -2.22507e-308::float8, 2 * 2.22507e-308::float8, 4),
+    (2.22507e-308::float8, 3 * -2.22507e-308::float8, 3 * 2.22507e-308::float8, 6),
+    (-2.22507e-308::float8, 3 * -2.22507e-308::float8, 3 * 2.22507e-308::float8, 6)
+) SELECT width_bucket(oper, low, high, cnt) FROM sample;
+ width_bucket 
+--------------
+            1
+            2
+            2
+            8
+            3
+           12
+            5
+           11
+            0
+            4
+            2
+            5
+            3
+(13 rows)
+
 DROP TABLE width_bucket_test;
 -- Simple test for roundoff error when results should be exact
 SELECT x, width_bucket(x::float8, 10, 100, 9) as flt,
diff --git a/src/test/regress/sql/numeric.sql b/src/test/regress/sql/numeric.sql
index 2dddb58625..13ae07b6ff 100644
--- a/src/test/regress/sql/numeric.sql
+++ b/src/test/regress/sql/numeric.sql
@@ -900,6 +900,32 @@ SELECT width_bucket(0.0::float8, 5, '-Infinity'::float8, 20); -- error
 SELECT width_bucket('Infinity'::float8, 1, 10, 10),
        width_bucket('-Infinity'::float8, 1, 10, 10);
 
+WITH sample(oper, low, high, cnt) AS (
+  VALUES
+    -- Check ranges that can cause overflow in the internal
+    -- calculations. These are basically computations where difference
+    -- between operand and low and high can exceed DBL_MAX.
+    (10.5::float8, -1.797e+308::float8, 1.797e+308::float8, 1::int4),
+    (10.5::float8, -1.797e+308::float8, 1.797e+308::float8, 2),
+    (10.5::float8, -1.797e+308::float8, 1.797e+308::float8, 3),
+
+    (1.797e+308::float8 / 2, -1.797e+308::float8, 1.797e+308::float8, 10),
+    (-1.797e+308::float8 / 2, -1.797e+308::float8, 1.797e+308::float8, 10),
+
+    (1.797e+308::float8 / 2, -1.797e+308::float8, 1.797e+308::float8, 16),
+    (-1.797e+308::float8 / 2, -1.797e+308::float8, 1.797e+308::float8, 16),
+
+    (1.797e+308::float8, -1.797e+308::float8 / 2, 1.797e+308::float8 / 2, 10),
+    (-1.797e+308::float8, -1.797e+308::float8 / 2, 1.797e+308::float8 / 2, 10),
+
+    -- Check that potential underflow situations does not result in
+    -- weird values.
+    (2.22507e-308::float8, 2 * -2.22507e-308::float8, 2 * 2.22507e-308::float8, 4),
+    (-2.22507e-308::float8, 2 * -2.22507e-308::float8, 2 * 2.22507e-308::float8, 4),
+    (2.22507e-308::float8, 3 * -2.22507e-308::float8, 3 * 2.22507e-308::float8, 6),
+    (-2.22507e-308::float8, 3 * -2.22507e-308::float8, 3 * 2.22507e-308::float8, 6)
+) SELECT width_bucket(oper, low, high, cnt) FROM sample;
+
 DROP TABLE width_bucket_test;
 
 -- Simple test for roundoff error when results should be exact
-- 
2.34.1