Remove pg_strtouint64(), use strtoull() directly
pg_strtouint64() is a wrapper around strtoull/strtoul/_strtoui64, but it
seems no longer necessary to have this indirection.
msvc/Solution.pm claims HAVE_STRTOULL, so the "MSVC only" part seems
unnecessary. Also, we have code in c.h to substitute alternatives for
strtoull() if not found, and that would appear to cover all currently
supported platforms, so having a further fallback in pg_strtouint64()
seems unnecessary.
(AFAICT, the only buildfarm member that does not have strtoull()
directly but relies on the code in c.h is gaur. So we can hang on to
that code for a while longer, but its utility is also fading away.)
Therefore, remove pg_strtouint64(), and use strtoull() directly in all
call sites.
(This is also useful because we have pg_strtointNN() functions that have
a different API than this pg_strtouintNN(). So removing the latter
makes this problem go away.)
Attachments:
0001-Remove-pg_strtouint64-use-strtoull-directly.patchtext/plain; charset=UTF-8; name=0001-Remove-pg_strtouint64-use-strtoull-directly.patchDownload
From f858f7045c8b526e89e3bdc1e67524180a5b6b5c Mon Sep 17 00:00:00 2001
From: Peter Eisentraut <peter@eisentraut.org>
Date: Fri, 10 Dec 2021 07:24:32 +0100
Subject: [PATCH] Remove pg_strtouint64(), use strtoull() directly
pg_strtouint64() is a wrapper around strtoull/strtoul/_strtoui64, but
it seems no longer necessary to have this indirection.
msvc/Solution.pm claims HAVE_STRTOULL, so the "MSVC only" part seems
unnecessary. Also, we have code in c.h to substitute alternatives for
strtoull() if not found, and that would appear to cover all currently
supported platforms, so having a further fallback in pg_strtouint64()
seems unnecessary.
Therefore, remove pg_strtouint64(), and use strtoull() directly in all
call sites.
---
src/backend/nodes/readfuncs.c | 2 +-
src/backend/utils/adt/numutils.c | 22 ----------------------
src/backend/utils/adt/xid.c | 2 +-
src/backend/utils/adt/xid8funcs.c | 6 +++---
src/backend/utils/misc/guc.c | 2 +-
src/include/utils/builtins.h | 1 -
6 files changed, 6 insertions(+), 29 deletions(-)
diff --git a/src/backend/nodes/readfuncs.c b/src/backend/nodes/readfuncs.c
index dcec3b728f..0eafae0794 100644
--- a/src/backend/nodes/readfuncs.c
+++ b/src/backend/nodes/readfuncs.c
@@ -80,7 +80,7 @@
#define READ_UINT64_FIELD(fldname) \
token = pg_strtok(&length); /* skip :fldname */ \
token = pg_strtok(&length); /* get field value */ \
- local_node->fldname = pg_strtouint64(token, NULL, 10)
+ local_node->fldname = strtoull(token, NULL, 10)
/* Read a long integer field (anything written as ":fldname %ld") */
#define READ_LONG_FIELD(fldname) \
diff --git a/src/backend/utils/adt/numutils.c b/src/backend/utils/adt/numutils.c
index b93096f288..6a9c00fdd3 100644
--- a/src/backend/utils/adt/numutils.c
+++ b/src/backend/utils/adt/numutils.c
@@ -606,25 +606,3 @@ pg_ultostr(char *str, uint32 value)
return str + len;
}
-
-/*
- * pg_strtouint64
- * Converts 'str' into an unsigned 64-bit integer.
- *
- * This has the identical API to strtoul(3), except that it will handle
- * 64-bit ints even where "long" is narrower than that.
- *
- * For the moment it seems sufficient to assume that the platform has
- * such a function somewhere; let's not roll our own.
- */
-uint64
-pg_strtouint64(const char *str, char **endptr, int base)
-{
-#ifdef _MSC_VER /* MSVC only */
- return _strtoui64(str, endptr, base);
-#elif defined(HAVE_STRTOULL) && SIZEOF_LONG < 8
- return strtoull(str, endptr, base);
-#else
- return strtoul(str, endptr, base);
-#endif
-}
diff --git a/src/backend/utils/adt/xid.c b/src/backend/utils/adt/xid.c
index 24c1c93732..02569f61f4 100644
--- a/src/backend/utils/adt/xid.c
+++ b/src/backend/utils/adt/xid.c
@@ -158,7 +158,7 @@ xid8in(PG_FUNCTION_ARGS)
{
char *str = PG_GETARG_CSTRING(0);
- PG_RETURN_FULLTRANSACTIONID(FullTransactionIdFromU64(pg_strtouint64(str, NULL, 0)));
+ PG_RETURN_FULLTRANSACTIONID(FullTransactionIdFromU64(strtoull(str, NULL, 0)));
}
Datum
diff --git a/src/backend/utils/adt/xid8funcs.c b/src/backend/utils/adt/xid8funcs.c
index f1870a7366..6cf3979c49 100644
--- a/src/backend/utils/adt/xid8funcs.c
+++ b/src/backend/utils/adt/xid8funcs.c
@@ -295,12 +295,12 @@ parse_snapshot(const char *str)
char *endp;
StringInfo buf;
- xmin = FullTransactionIdFromU64(pg_strtouint64(str, &endp, 10));
+ xmin = FullTransactionIdFromU64(strtoull(str, &endp, 10));
if (*endp != ':')
goto bad_format;
str = endp + 1;
- xmax = FullTransactionIdFromU64(pg_strtouint64(str, &endp, 10));
+ xmax = FullTransactionIdFromU64(strtoull(str, &endp, 10));
if (*endp != ':')
goto bad_format;
str = endp + 1;
@@ -318,7 +318,7 @@ parse_snapshot(const char *str)
while (*str != '\0')
{
/* read next value */
- val = FullTransactionIdFromU64(pg_strtouint64(str, &endp, 10));
+ val = FullTransactionIdFromU64(strtoull(str, &endp, 10));
str = endp;
/* require the input to be in order */
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index ee6a838b3a..9df553fc77 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -12438,7 +12438,7 @@ check_recovery_target_xid(char **newval, void **extra, GucSource source)
TransactionId *myextra;
errno = 0;
- xid = (TransactionId) pg_strtouint64(*newval, NULL, 0);
+ xid = (TransactionId) strtoull(*newval, NULL, 0);
if (errno == EINVAL || errno == ERANGE)
return false;
diff --git a/src/include/utils/builtins.h b/src/include/utils/builtins.h
index 40fcb0ab6d..b07eefaf1e 100644
--- a/src/include/utils/builtins.h
+++ b/src/include/utils/builtins.h
@@ -53,7 +53,6 @@ extern int pg_ltoa(int32 l, char *a);
extern int pg_lltoa(int64 ll, char *a);
extern char *pg_ultostr_zeropad(char *str, uint32 value, int32 minwidth);
extern char *pg_ultostr(char *str, uint32 value);
-extern uint64 pg_strtouint64(const char *str, char **endptr, int base);
/* oid.c */
extern oidvector *buildoidvector(const Oid *oids, int n);
--
2.34.1
Peter Eisentraut <peter.eisentraut@enterprisedb.com> writes:
Therefore, remove pg_strtouint64(), and use strtoull() directly in all
call sites.
Our experience with the variable size of "long" has left a sufficiently
bad taste in my mouth that I'm not enthused about adding hard-wired
assumptions that "long long" is identical to int64. So this seems like
it's going in the wrong direction, and giving up portability that we
might want back someday.
I'd be okay with making pg_strtouint64 into a really thin wrapper
(ie a macro, at least on most platforms). But please let's not
give up the notational distinction.
regards, tom lane
On 10.12.21 16:25, Tom Lane wrote:
Our experience with the variable size of "long" has left a sufficiently
bad taste in my mouth that I'm not enthused about adding hard-wired
assumptions that "long long" is identical to int64. So this seems like
it's going in the wrong direction, and giving up portability that we
might want back someday.
What kind of scenario do you have in mind? Someone making their long
long int 128 bits?
Peter Eisentraut <peter.eisentraut@enterprisedb.com> writes:
On 10.12.21 16:25, Tom Lane wrote:
Our experience with the variable size of "long" has left a sufficiently
bad taste in my mouth that I'm not enthused about adding hard-wired
assumptions that "long long" is identical to int64. So this seems like
it's going in the wrong direction, and giving up portability that we
might want back someday.
What kind of scenario do you have in mind? Someone making their long
long int 128 bits?
Yeah, exactly. That seems like a natural evolution:
short -> 2 bytes
int -> 4 bytes
long -> 8 bytes
long long -> 16 bytes
so I'm surprised that vendors haven't done that already instead
of inventing hacks like __int128.
Our current hard-coded uses of long long are all written on the
assumption that it's *at least* 64 bits, so we'd survive OK on
such a platform so long as we don't start confusing it with
*exactly* 64 bits.
regards, tom lane
On Mon, Dec 13, 2021 at 9:44 AM Tom Lane <tgl@sss.pgh.pa.us> wrote:
Yeah, exactly. That seems like a natural evolution:
short -> 2 bytes
int -> 4 bytes
long -> 8 bytes
long long -> 16 bytes
so I'm surprised that vendors haven't done that already instead
of inventing hacks like __int128.
I really am glad they haven't. I think it's super-annoying that we
need hacks like UINT64_FORMAT all over the place. I think it was a
mistake not to nail down the size that each type is expected to be in
the original C standard, and making more changes to the conventions
now would cause a whole bunch of unnecessary code churn, probably for
almost everybody using C. It's not like people are writing high-level
applications in C these days; it's all low-level stuff that is likely
to care about the width of a word. It seems much more sensible to
standardize on names for words of all lengths in the standard than to
do anything else. I don't really care whether the standard chooses
int128, int256, int512, etc. or long long long, long long long long,
etc. or reallylong, superlong, incrediblylong, etc. but I hope they
define new stuff instead of encouraging implementations to redefine
what's there already.
--
Robert Haas
EDB: http://www.enterprisedb.com
Robert Haas <robertmhaas@gmail.com> writes:
I really am glad they haven't. I think it's super-annoying that we
need hacks like UINT64_FORMAT all over the place. I think it was a
mistake not to nail down the size that each type is expected to be in
the original C standard,
Well, mumble. One must remember that when C was designed, there was
a LOT more variability in hardware designs than we see today. They
could not have put a language with fixed ideas about datatype widths
onto, say, PDP-10s (36-bit words) or Crays (60-bit, IIRC). But it
is a darn shame that people weren't more consistent about mapping
the C types onto machines with S/360-like addressing.
and making more changes to the conventions
now would cause a whole bunch of unnecessary code churn, probably for
almost everybody using C.
The error in your thinking is believing that there *is* a convention.
There isn't; see "long".
Anyway, my point is that we have created a set of type names that
have the semantics we want, and we should avoid confusing those with
underlying C types that are *not* guaranteed to be the same thing.
regards, tom lane
On Mon, Dec 13, 2021 at 10:46 AM Tom Lane <tgl@sss.pgh.pa.us> wrote:
Robert Haas <robertmhaas@gmail.com> writes:
I really am glad they haven't. I think it's super-annoying that we
need hacks like UINT64_FORMAT all over the place. I think it was a
mistake not to nail down the size that each type is expected to be in
the original C standard,Well, mumble. One must remember that when C was designed, there was
a LOT more variability in hardware designs than we see today. They
could not have put a language with fixed ideas about datatype widths
onto, say, PDP-10s (36-bit words) or Crays (60-bit, IIRC). But it
is a darn shame that people weren't more consistent about mapping
the C types onto machines with S/360-like addressing.
Sure.
and making more changes to the conventions
now would cause a whole bunch of unnecessary code churn, probably for
almost everybody using C.The error in your thinking is believing that there *is* a convention.
There isn't; see "long".
I mean I pretty much pointed out exactly that thing with my mention of
UINT64_FORMAT, so I'm not sure why you're making it seem like I didn't
know that.
Anyway, my point is that we have created a set of type names that
have the semantics we want, and we should avoid confusing those with
underlying C types that are *not* guaranteed to be the same thing.
I agree entirely, but it's still an annoyance when dealing with printf
format codes and other operating-system defined types whose width we
don't know. Standardization here makes it easier to write good code;
different conventions make it harder. I'm guessing that other people
have noticed that too, and that's why we're getting stuff like
__int128 instead of redefining long long.
--
Robert Haas
EDB: http://www.enterprisedb.com
On 13.12.21 15:44, Tom Lane wrote:
Our current hard-coded uses of long long are all written on the
assumption that it's*at least* 64 bits, so we'd survive OK on
such a platform so long as we don't start confusing it with
*exactly* 64 bits.
OK, makes sense. Here is an alternative patch. It introduces two
light-weight macros strtoi64() and strtou64() (compare e.g., strtoimax()
in POSIX) in c.h and removes pg_strtouint64(). This moves the
portability layer from numutils.c to c.h, so it's closer to the rest of
the int64 portability code. And that way it is available to not just
server code. And it resolves the namespace collision with the
pg_strtointNN() functions in numutils.c.
Attachments:
v2-0001-Simplify-the-general-purpose-64-bit-integer-parsi.patchtext/plain; charset=UTF-8; name=v2-0001-Simplify-the-general-purpose-64-bit-integer-parsi.patchDownload
From e72aaaa3f9480be4c466f9c3f59a75af951d94f2 Mon Sep 17 00:00:00 2001
From: Peter Eisentraut <peter@eisentraut.org>
Date: Tue, 14 Dec 2021 08:38:35 +0100
Subject: [PATCH v2] Simplify the general-purpose 64-bit integer parsing APIs
pg_strtouint64() is a wrapper around strtoull/strtoul/_strtoui64, but
it seems no longer necessary to have this indirection.
msvc/Solution.pm claims HAVE_STRTOULL, so the "MSVC only" part seems
unnecessary. Also, we have code in c.h to substitute alternatives for
strtoull() if not found, and that would appear to cover all currently
supported platforms, so having a further fallback in pg_strtouint64()
seems unnecessary.
Therefore, we could remove pg_strtouint64(), and use strtoull()
directly in all call sites. However, it seems useful to keep a
separate notation for parsing exactly 64-bit integers, matching the
type definition int64/uint64. For that, add new macros strtoi64() and
strtou64() in c.h as thin wrappers around strtol()/strtoul() or
strtoll()/stroull(). This makes these functions available everywhere
instead of just in the server code, and it makes the function naming
notably different from the pg_strtointNN() functions in numutils.c,
which have a different API.
---
src/backend/nodes/readfuncs.c | 2 +-
src/backend/utils/adt/numutils.c | 22 ----------------------
src/backend/utils/adt/xid.c | 2 +-
src/backend/utils/adt/xid8funcs.c | 6 +++---
src/backend/utils/misc/guc.c | 2 +-
src/include/c.h | 13 +++++++++++++
src/include/utils/builtins.h | 1 -
7 files changed, 19 insertions(+), 29 deletions(-)
diff --git a/src/backend/nodes/readfuncs.c b/src/backend/nodes/readfuncs.c
index dcec3b728f..76cff8a2b1 100644
--- a/src/backend/nodes/readfuncs.c
+++ b/src/backend/nodes/readfuncs.c
@@ -80,7 +80,7 @@
#define READ_UINT64_FIELD(fldname) \
token = pg_strtok(&length); /* skip :fldname */ \
token = pg_strtok(&length); /* get field value */ \
- local_node->fldname = pg_strtouint64(token, NULL, 10)
+ local_node->fldname = strtou64(token, NULL, 10)
/* Read a long integer field (anything written as ":fldname %ld") */
#define READ_LONG_FIELD(fldname) \
diff --git a/src/backend/utils/adt/numutils.c b/src/backend/utils/adt/numutils.c
index b93096f288..6a9c00fdd3 100644
--- a/src/backend/utils/adt/numutils.c
+++ b/src/backend/utils/adt/numutils.c
@@ -606,25 +606,3 @@ pg_ultostr(char *str, uint32 value)
return str + len;
}
-
-/*
- * pg_strtouint64
- * Converts 'str' into an unsigned 64-bit integer.
- *
- * This has the identical API to strtoul(3), except that it will handle
- * 64-bit ints even where "long" is narrower than that.
- *
- * For the moment it seems sufficient to assume that the platform has
- * such a function somewhere; let's not roll our own.
- */
-uint64
-pg_strtouint64(const char *str, char **endptr, int base)
-{
-#ifdef _MSC_VER /* MSVC only */
- return _strtoui64(str, endptr, base);
-#elif defined(HAVE_STRTOULL) && SIZEOF_LONG < 8
- return strtoull(str, endptr, base);
-#else
- return strtoul(str, endptr, base);
-#endif
-}
diff --git a/src/backend/utils/adt/xid.c b/src/backend/utils/adt/xid.c
index 24c1c93732..a09096d018 100644
--- a/src/backend/utils/adt/xid.c
+++ b/src/backend/utils/adt/xid.c
@@ -158,7 +158,7 @@ xid8in(PG_FUNCTION_ARGS)
{
char *str = PG_GETARG_CSTRING(0);
- PG_RETURN_FULLTRANSACTIONID(FullTransactionIdFromU64(pg_strtouint64(str, NULL, 0)));
+ PG_RETURN_FULLTRANSACTIONID(FullTransactionIdFromU64(strtou64(str, NULL, 0)));
}
Datum
diff --git a/src/backend/utils/adt/xid8funcs.c b/src/backend/utils/adt/xid8funcs.c
index f1870a7366..68985dca5a 100644
--- a/src/backend/utils/adt/xid8funcs.c
+++ b/src/backend/utils/adt/xid8funcs.c
@@ -295,12 +295,12 @@ parse_snapshot(const char *str)
char *endp;
StringInfo buf;
- xmin = FullTransactionIdFromU64(pg_strtouint64(str, &endp, 10));
+ xmin = FullTransactionIdFromU64(strtou64(str, &endp, 10));
if (*endp != ':')
goto bad_format;
str = endp + 1;
- xmax = FullTransactionIdFromU64(pg_strtouint64(str, &endp, 10));
+ xmax = FullTransactionIdFromU64(strtou64(str, &endp, 10));
if (*endp != ':')
goto bad_format;
str = endp + 1;
@@ -318,7 +318,7 @@ parse_snapshot(const char *str)
while (*str != '\0')
{
/* read next value */
- val = FullTransactionIdFromU64(pg_strtouint64(str, &endp, 10));
+ val = FullTransactionIdFromU64(strtou64(str, &endp, 10));
str = endp;
/* require the input to be in order */
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index ee6a838b3a..67971e8c10 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -12438,7 +12438,7 @@ check_recovery_target_xid(char **newval, void **extra, GucSource source)
TransactionId *myextra;
errno = 0;
- xid = (TransactionId) pg_strtouint64(*newval, NULL, 0);
+ xid = (TransactionId) strtou64(*newval, NULL, 0);
if (errno == EINVAL || errno == ERANGE)
return false;
diff --git a/src/include/c.h b/src/include/c.h
index 7e591171a8..98c0b053c9 100644
--- a/src/include/c.h
+++ b/src/include/c.h
@@ -1312,6 +1312,19 @@ extern long long strtoll(const char *str, char **endptr, int base);
extern unsigned long long strtoull(const char *str, char **endptr, int base);
#endif
+/*
+ * Thin wrappers that convert strings to exactly 64-bit integers, matching our
+ * definition of int64. (For the naming, compare that POSIX has
+ * strtoimax()/strtoumax() which return intmax_t/uintmax_t.)
+ */
+#ifdef HAVE_LONG_INT_64
+#define strtoi64(str, endptr, base) ((int64) strtol(str, endptr, base))
+#define strtou64(str, endptr, base) ((uint64) strtoul(str, endptr, base))
+#else
+#define strtoi64(str, endptr, base) ((int64) strtoll(str, endptr, base))
+#define strtou64(str, endptr, base) ((uint64) strtoull(str, endptr, base))
+#endif
+
/*
* Use "extern PGDLLIMPORT ..." to declare variables that are defined
* in the core backend and need to be accessible by loadable modules.
diff --git a/src/include/utils/builtins.h b/src/include/utils/builtins.h
index 40fcb0ab6d..b07eefaf1e 100644
--- a/src/include/utils/builtins.h
+++ b/src/include/utils/builtins.h
@@ -53,7 +53,6 @@ extern int pg_ltoa(int32 l, char *a);
extern int pg_lltoa(int64 ll, char *a);
extern char *pg_ultostr_zeropad(char *str, uint32 value, int32 minwidth);
extern char *pg_ultostr(char *str, uint32 value);
-extern uint64 pg_strtouint64(const char *str, char **endptr, int base);
/* oid.c */
extern oidvector *buildoidvector(const Oid *oids, int n);
--
2.34.1
Peter Eisentraut <peter.eisentraut@enterprisedb.com> writes:
OK, makes sense. Here is an alternative patch. It introduces two
light-weight macros strtoi64() and strtou64() (compare e.g., strtoimax()
in POSIX) in c.h and removes pg_strtouint64(). This moves the
portability layer from numutils.c to c.h, so it's closer to the rest of
the int64 portability code. And that way it is available to not just
server code. And it resolves the namespace collision with the
pg_strtointNN() functions in numutils.c.
Works for me. I'm not in a position to verify that this'll work
on Windows, but the buildfarm will tell us that quickly enough.
regards, tom lane