From ac30b06e3ddcb57eebb380560c2f4a47430dfd74 Mon Sep 17 00:00:00 2001
From: David Christensen <david.christensen@crunchydata.com>
Date: Tue, 29 Jun 2021 10:20:05 -0500
Subject: [PATCH 1/2] Refactor pg_size_pretty and pg_size_bytes to allow for
 supported unit expansion

---
 src/backend/utils/adt/dbsize.c       | 90 ++++++++++++++++------------
 src/include/catalog/pg_proc.dat      |  2 +-
 src/test/regress/expected/dbsize.out |  4 --
 src/test/regress/sql/dbsize.sql      |  2 -
 4 files changed, 53 insertions(+), 45 deletions(-)

diff --git a/src/backend/utils/adt/dbsize.c b/src/backend/utils/adt/dbsize.c
index 9c39e7d3b3..df08845932 100644
--- a/src/backend/utils/adt/dbsize.c
+++ b/src/backend/utils/adt/dbsize.c
@@ -638,7 +638,7 @@ pg_size_pretty_numeric(PG_FUNCTION_ARGS)
 	Numeric		size = PG_GETARG_NUMERIC(0);
 	Numeric		limit,
 				limit2;
-	char	   *result;
+	char	   *result = NULL;
 
 	limit = int64_to_numeric(10 * 1024);
 	limit2 = int64_to_numeric(10 * 1024 * 2 - 1);
@@ -660,31 +660,32 @@ pg_size_pretty_numeric(PG_FUNCTION_ARGS)
 		}
 		else
 		{
-			/* size >>= 10 */
-			size = numeric_shift_right(size, 10);
-			if (numeric_is_less(numeric_absolute(size), limit2))
-			{
-				size = numeric_half_rounded(size);
-				result = psprintf("%s MB", numeric_to_cstring(size));
-			}
-			else
-			{
+			int idx, max_iter = 2; /* highest index of table_below */
+			char *output_formats[] = {
+				"%s MB",
+				"%s GB",
+				"%s TB"
+			};
+
+			for (idx = 0; idx < max_iter; idx++) {
 				/* size >>= 10 */
 				size = numeric_shift_right(size, 10);
-
 				if (numeric_is_less(numeric_absolute(size), limit2))
 				{
 					size = numeric_half_rounded(size);
-					result = psprintf("%s GB", numeric_to_cstring(size));
-				}
-				else
-				{
-					/* size >>= 10 */
-					size = numeric_shift_right(size, 10);
-					size = numeric_half_rounded(size);
-					result = psprintf("%s TB", numeric_to_cstring(size));
+					result = psprintf(output_formats[idx], numeric_to_cstring(size));
+					break;
 				}
 			}
+
+			if (!result) {
+				/* this uses the last format in the table above for anything else */
+
+				/* size >>= 10 */
+				size = numeric_shift_right(size, 10);
+				size = numeric_half_rounded(size);
+				result = psprintf(output_formats[max_iter], numeric_to_cstring(size));
+			}
 		}
 	}
 
@@ -703,7 +704,6 @@ pg_size_bytes(PG_FUNCTION_ARGS)
 			   *endptr;
 	char		saved_char;
 	Numeric		num;
-	int64		result;
 	bool		have_digits = false;
 
 	str = text_to_cstring(arg);
@@ -786,7 +786,16 @@ pg_size_bytes(PG_FUNCTION_ARGS)
 	/* Handle possible unit */
 	if (*strptr != '\0')
 	{
-		int64		multiplier = 0;
+		int64		multiplier = 1;
+		int         i;
+		int         unit_count = 5; /* sizeof units table */
+		char       *units[] = {
+			"bytes",
+			"kb",
+			"mb",
+			"gb",
+			"tb",
+		};
 
 		/* Trim any trailing whitespace */
 		endptr = str + VARSIZE_ANY_EXHDR(arg) - 1;
@@ -797,21 +806,20 @@ pg_size_bytes(PG_FUNCTION_ARGS)
 		endptr++;
 		*endptr = '\0';
 
-		/* Parse the unit case-insensitively */
-		if (pg_strcasecmp(strptr, "bytes") == 0)
-			multiplier = (int64) 1;
-		else if (pg_strcasecmp(strptr, "kb") == 0)
-			multiplier = (int64) 1024;
-		else if (pg_strcasecmp(strptr, "mb") == 0)
-			multiplier = ((int64) 1024) * 1024;
-
-		else if (pg_strcasecmp(strptr, "gb") == 0)
-			multiplier = ((int64) 1024) * 1024 * 1024;
-
-		else if (pg_strcasecmp(strptr, "tb") == 0)
-			multiplier = ((int64) 1024) * 1024 * 1024 * 1024;
+		for (i = 0; i < unit_count; i++) {
+			printf("strptr: %s units: %s", strptr, units[i]);
+			if (pg_strcasecmp(strptr, units[i]) == 0)
+				break;
+			/* 
+			 * Note: int64 isn't large enough to store the full multiplier
+			 * going past ~ 9EB, but since this is a fixed value, we can apply
+			 * it twice, thus storing use 2 ** 5 = 32 here, but 2 ** 10 = 1024
+			 * on actual conversion to numeric.
+			 */
+			multiplier *= 32;
+		}
 
-		else
+		if (i == unit_count)
 			ereport(ERROR,
 					(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
 					 errmsg("invalid size: \"%s\"", text_to_cstring(arg)),
@@ -827,13 +835,19 @@ pg_size_bytes(PG_FUNCTION_ARGS)
 			num = DatumGetNumeric(DirectFunctionCall2(numeric_mul,
 													  NumericGetDatum(mul_num),
 													  NumericGetDatum(num)));
+
+			/* second application to get around int64 limitations in unit multipliers */
+			num = DatumGetNumeric(DirectFunctionCall2(numeric_mul,
+													  NumericGetDatum(mul_num),
+													  NumericGetDatum(num)));
 		}
 	}
 
-	result = DatumGetInt64(DirectFunctionCall1(numeric_int8,
-											   NumericGetDatum(num)));
+	/* now finally truncate, since this is always in integer-like units */
+	num = DatumGetNumeric(DirectFunctionCall1(numeric_ceil,
+											  NumericGetDatum(num)));
 
-	PG_RETURN_INT64(result);
+	PG_RETURN_NUMERIC(num);
 }
 
 /*
diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat
index fde251fa4f..73326e3618 100644
--- a/src/include/catalog/pg_proc.dat
+++ b/src/include/catalog/pg_proc.dat
@@ -7187,7 +7187,7 @@
   prosrc => 'pg_size_pretty_numeric' },
 { oid => '3334',
   descr => 'convert a size in human-readable format with size units into bytes',
-  proname => 'pg_size_bytes', prorettype => 'int8', proargtypes => 'text',
+  proname => 'pg_size_bytes', prorettype => 'numeric', proargtypes => 'text',
   prosrc => 'pg_size_bytes' },
 { oid => '2997',
   descr => 'disk space usage for the specified table, including TOAST, free space and visibility map',
diff --git a/src/test/regress/expected/dbsize.out b/src/test/regress/expected/dbsize.out
index e901a2c92a..624948ccd2 100644
--- a/src/test/regress/expected/dbsize.out
+++ b/src/test/regress/expected/dbsize.out
@@ -114,10 +114,6 @@ SELECT pg_size_bytes('1 AB A    ');
 ERROR:  invalid size: "1 AB A    "
 DETAIL:  Invalid size unit: "AB A".
 HINT:  Valid units are "bytes", "kB", "MB", "GB", and "TB".
-SELECT pg_size_bytes('9223372036854775807.9');
-ERROR:  bigint out of range
-SELECT pg_size_bytes('1e100');
-ERROR:  bigint out of range
 SELECT pg_size_bytes('1e1000000000000000000');
 ERROR:  value overflows numeric format
 SELECT pg_size_bytes('1 byte');  -- the singular "byte" is not supported
diff --git a/src/test/regress/sql/dbsize.sql b/src/test/regress/sql/dbsize.sql
index d10a4d7f68..37023a01c3 100644
--- a/src/test/regress/sql/dbsize.sql
+++ b/src/test/regress/sql/dbsize.sql
@@ -34,8 +34,6 @@ SELECT size, pg_size_bytes(size) FROM
 SELECT pg_size_bytes('1 AB');
 SELECT pg_size_bytes('1 AB A');
 SELECT pg_size_bytes('1 AB A    ');
-SELECT pg_size_bytes('9223372036854775807.9');
-SELECT pg_size_bytes('1e100');
 SELECT pg_size_bytes('1e1000000000000000000');
 SELECT pg_size_bytes('1 byte');  -- the singular "byte" is not supported
 SELECT pg_size_bytes('');
-- 
2.30.1 (Apple Git-130)

