From 38bb61aeeefc24cdc772474b0fb4f1802d3b7577 Mon Sep 17 00:00:00 2001
From: Thomas Munro <thomas.munro@gmail.com>
Date: Sun, 24 Oct 2021 21:48:26 +1300
Subject: [PATCH 1/2] A basic API for futexes.

A thin wrapper for basic 32 bit futex wait and wake.  Currently, it maps
to native support on Linux, FreeBSD, OpenBSD and macOS, with detection
via configure.  More operating systems and more operations are possible.
---
 configure                   |   4 +-
 configure.ac                |   4 ++
 src/include/pg_config.h.in  |  12 ++++
 src/include/port/pg_futex.h | 139 ++++++++++++++++++++++++++++++++++++
 4 files changed, 157 insertions(+), 2 deletions(-)
 create mode 100644 src/include/port/pg_futex.h

diff --git a/configure b/configure
index 4ffefe4655..23f1cbe9d0 100755
--- a/configure
+++ b/configure
@@ -13381,7 +13381,7 @@ $as_echo "#define HAVE_STDBOOL_H 1" >>confdefs.h
 fi
 
 
-for ac_header in atomic.h copyfile.h execinfo.h getopt.h ifaddrs.h langinfo.h mbarrier.h poll.h sys/epoll.h sys/event.h sys/ipc.h sys/prctl.h sys/procctl.h sys/pstat.h sys/resource.h sys/select.h sys/sem.h sys/shm.h sys/sockio.h sys/tas.h sys/uio.h sys/un.h termios.h ucred.h wctype.h
+for ac_header in atomic.h copyfile.h execinfo.h getopt.h ifaddrs.h langinfo.h linux/futex.h mbarrier.h poll.h sys/epoll.h sys/event.h sys/futex.h sys/ipc.h sys/prctl.h sys/procctl.h sys/pstat.h sys/resource.h sys/select.h sys/sem.h sys/shm.h sys/sockio.h sys/tas.h sys/uio.h sys/umtx.h sys/un.h termios.h ucred.h wctype.h
 do :
   as_ac_Header=`$as_echo "ac_cv_header_$ac_header" | $as_tr_sh`
 ac_fn_c_check_header_mongrel "$LINENO" "$ac_header" "$as_ac_Header" "$ac_includes_default"
@@ -15492,7 +15492,7 @@ fi
 LIBS_including_readline="$LIBS"
 LIBS=`echo "$LIBS" | sed -e 's/-ledit//g' -e 's/-lreadline//g'`
 
-for ac_func in backtrace_symbols clock_gettime copyfile fdatasync getifaddrs getpeerucred getrlimit kqueue mbstowcs_l memset_s poll posix_fallocate ppoll pstat pthread_is_threaded_np readlink readv setproctitle setproctitle_fast setsid shm_open strchrnul strsignal symlink syncfs sync_file_range uselocale wcstombs_l writev
+for ac_func in __ulock_wait backtrace_symbols clock_gettime copyfile fdatasync getifaddrs getpeerucred getrlimit kqueue mbstowcs_l memset_s poll posix_fallocate ppoll pstat pthread_is_threaded_np readlink readv setproctitle setproctitle_fast setsid shm_open strchrnul strsignal symlink syncfs sync_file_range uselocale wcstombs_l writev
 do :
   as_ac_var=`$as_echo "ac_cv_func_$ac_func" | $as_tr_sh`
 ac_fn_c_check_func "$LINENO" "$ac_func" "$as_ac_var"
diff --git a/configure.ac b/configure.ac
index 44ee3ebe2f..542b46437e 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1363,10 +1363,12 @@ AC_CHECK_HEADERS(m4_normalize([
 	getopt.h
 	ifaddrs.h
 	langinfo.h
+	linux/futex.h
 	mbarrier.h
 	poll.h
 	sys/epoll.h
 	sys/event.h
+	sys/futex.h
 	sys/ipc.h
 	sys/prctl.h
 	sys/procctl.h
@@ -1378,6 +1380,7 @@ AC_CHECK_HEADERS(m4_normalize([
 	sys/sockio.h
 	sys/tas.h
 	sys/uio.h
+	sys/umtx.h
 	sys/un.h
 	termios.h
 	ucred.h
@@ -1698,6 +1701,7 @@ LIBS_including_readline="$LIBS"
 LIBS=`echo "$LIBS" | sed -e 's/-ledit//g' -e 's/-lreadline//g'`
 
 AC_CHECK_FUNCS(m4_normalize([
+	__ulock_wait
 	backtrace_symbols
 	clock_gettime
 	copyfile
diff --git a/src/include/pg_config.h.in b/src/include/pg_config.h.in
index 15ffdd895a..6bd2f7b5d8 100644
--- a/src/include/pg_config.h.in
+++ b/src/include/pg_config.h.in
@@ -367,6 +367,9 @@
 /* Define to 1 if you have the `link' function. */
 #undef HAVE_LINK
 
+/* Define to 1 if you have the <linux/futex.h> header file. */
+#undef HAVE_LINUX_FUTEX_H
+
 /* Define to 1 if the system has the type `locale_t'. */
 #undef HAVE_LOCALE_T
 
@@ -623,6 +626,9 @@
 /* Define to 1 if you have the <sys/event.h> header file. */
 #undef HAVE_SYS_EVENT_H
 
+/* Define to 1 if you have the <sys/futex.h> header file. */
+#undef HAVE_SYS_FUTEX_H
+
 /* Define to 1 if you have the <sys/ipc.h> header file. */
 #undef HAVE_SYS_IPC_H
 
@@ -665,6 +671,9 @@
 /* Define to 1 if you have the <sys/uio.h> header file. */
 #undef HAVE_SYS_UIO_H
 
+/* Define to 1 if you have the <sys/umtx.h> header file. */
+#undef HAVE_SYS_UMTX_H
+
 /* Define to 1 if you have the <sys/un.h> header file. */
 #undef HAVE_SYS_UN_H
 
@@ -779,6 +788,9 @@
 /* Define to 1 if you have the `__strtoull' function. */
 #undef HAVE___STRTOULL
 
+/* Define to 1 if you have the `__ulock_wait' function. */
+#undef HAVE___ULOCK_WAIT
+
 /* Define to the appropriate printf length modifier for 64-bit ints. */
 #undef INT64_MODIFIER
 
diff --git a/src/include/port/pg_futex.h b/src/include/port/pg_futex.h
new file mode 100644
index 0000000000..bf254a18b0
--- /dev/null
+++ b/src/include/port/pg_futex.h
@@ -0,0 +1,139 @@
+/*
+ * Minimal wrapper over futex APIs.
+ */
+
+#ifndef PG_FUTEX_H
+#define PG_FUTEX_H
+
+#if defined(HAVE_LINUX_FUTEX_H)
+
+/* https://man7.org/linux/man-pages/man2/futex.2.html */
+
+#include <linux/futex.h>
+#include <sys/syscall.h>
+
+#elif defined(HAVE_SYS_FUTEX_H)
+
+/* https://man.openbsd.org/futex, since OpenBSD 6.2. */
+
+#include <sys/time.h>
+#include <sys/futex.h>
+
+#elif defined(HAVE_SYS_UMTX_H)
+
+/* https://www.freebsd.org/cgi/man.cgi?query=_umtx_op */
+
+#include <sys/types.h>
+#include <sys/umtx.h>
+
+#elif defined(HAVE___ULOCK_WAIT)
+
+/*
+ * This interface is undocumented, but provided by libSystem.dylib since
+ * xnu-3789.1.32 (macOS 10.12, 2016) and is used by eg libc++.
+ *
+ * https://github.com/apple/darwin-xnu/blob/main/bsd/kern/sys_ulock.c
+ * https://github.com/apple/darwin-xnu/blob/main/bsd/sys/ulock.h
+ */
+
+#include <stdint.h>
+
+#define UL_COMPARE_AND_WAIT_SHARED		3
+#define ULF_WAKE_ALL					0x00000100
+extern int __ulock_wait(uint32_t operation,
+						void *addr,
+						uint64_t value,
+						uint32_t timeout);
+extern int __ulock_wake(uint32_t operation,
+						void *addr,
+						uint64_t wake_value);
+
+#endif
+
+/*
+ * Wait for someone to call pg_futex_wake() for the same address, with an
+ * initial check that the value pointed to by 'fut' matches 'value' and an
+ * optional timeout.  Returns 0 when woken, and otherwise -1, with errno set to
+ * EAGAIN if the initial value check fails, and otherwise errors including
+ * EINTR, ETIMEDOUT and EFAULT.
+ */
+static int
+pg_futex_wait_u32(volatile void *fut,
+				  uint32 value,
+				  struct timespec *timeout)
+{
+#if defined(HAVE_LINUX_FUTEX_H)
+	if (syscall(SYS_futex, fut, FUTEX_WAIT, value, timeout, 0, 0) == 0)
+		return 0;
+#elif defined(HAVE_SYS_FUTEX_H)
+	if ((errno = futex(fut, FUTEX_WAIT, (int) value, timeout, NULL)) == 0)
+		return 0;
+	if (errno == ECANCELED)
+		errno = EINTR;
+#elif defined(HAVE_SYS_UMTX_H)
+	if (_umtx_op(fut, UMTX_OP_WAIT_UINT, value, 0, timeout) == 0)
+		return 0;
+#elif defined (HAVE___ULOCK_WAIT)
+	if (__ulock_wait(UL_COMPARE_AND_WAIT_SHARED,
+					 (void *) fut,
+					 value,
+					 timeout ? timeout->tv_sec * 1000000 + timeout->tv_nsec / 1000 : 0) >= 0)
+		return 0;
+#else
+	/*
+	 * If we wanted to simulate futexes on systems that don't have them, here
+	 * we could add a link from our PGPROC struct to a shared memory hash
+	 * table using "fut" (ie address) as the key, then compare *fut == value.
+	 * If false, remove link and fail with EAGAIN.  If so, sleep on proc latch.
+	 * The main complication is that it wouldn't work for DSM segments; for
+	 * those, we could have variants that take a dsm_segment and pointer and
+	 * convert that to segment key + offset.
+	 */
+	errno = ENOSYS;
+#endif
+
+	Assert(errno != 0);
+
+	return -1;
+}
+
+/*
+ * Wake up to nwaiters waiters that currently wait on the same address as
+ * 'fut'.  Returns 0 on success, and -1 on failure, with errno set.  Though
+ * some of these interfaces can tell us how many were woken, they can't all do
+ * that, so we'll hide that information.
+ */
+static int
+pg_futex_wake(volatile void *fut, int nwaiters)
+{
+#if defined(HAVE_LINUX_FUTEX_H)
+	if (syscall(SYS_futex, fut, FUTEX_WAKE, nwaiters, NULL, 0, 0) >= 0)
+		return 0;
+#elif defined(HAVE_SYS_FUTEX_H)
+	if (futex(fut, FUTEX_WAKE, nwaiters, NULL, NULL) >= 0)
+		return 0;
+#elif defined(HAVE_SYS_UMTX_H)
+	if (_umtx_op(fut, UMTX_OP_WAKE, nwaiters, 0, 0) == 0)
+		return 0;
+#elif defined (HAVE___ULOCK_WAIT)
+	if (__ulock_wake(UL_COMPARE_AND_WAIT_SHARED | (nwaiters > 1 ? ULF_WAKE_ALL : 0),
+					 (void *) fut,
+					 0) >= 0)
+		return 0;
+	if (errno == ENOENT)
+		return 0;
+#else
+	/*
+	 * If we wanted to simulate futexes on systems that don't have them, here
+	 * we could look up "fut" in a shmem hash table set and set latches for
+	 * any matches.
+	 */
+	errno = ENOSYS;
+#endif
+
+	Assert(errno != 0);
+
+	return -1;
+}
+
+#endif
-- 
2.30.2

