From 6919b1c1c59a6084017ebae5a884bb6c60639364 Mon Sep 17 00:00:00 2001
From: Tomas Vondra <tomas@vondra.me>
Date: Thu, 22 May 2025 18:27:06 +0200
Subject: [PATCH v1 2/6] NUMA: localalloc

Set the default allocation policy to "localalloc", which means from the
local NUMA node. This is useful for process-private memory, which is not
going to be shared with other nodes, and is relatively short-lived (so
we're unlikely to have issues if the process gets moved by scheduler).

This sets default for the whole process, for all future allocations. But
that's fine, we've already populated the shared memory earlier (by
interleaving it explicitly). Otherwise we'd trigger page fault and it'd
be allocated on local node.

XXX This patch may not be necessary, as we now locate memory to nodes
using explicit numa_tonode_memory() calls, and not by interleaving. But
it's useful for experiments during development, so I'm keeping it.
---
 src/backend/utils/init/globals.c    |  1 +
 src/backend/utils/init/miscinit.c   | 16 ++++++++++++++++
 src/backend/utils/misc/guc_tables.c | 10 ++++++++++
 src/include/miscadmin.h             |  1 +
 4 files changed, 28 insertions(+)

diff --git a/src/backend/utils/init/globals.c b/src/backend/utils/init/globals.c
index 876cb64cf66..f5359db3656 100644
--- a/src/backend/utils/init/globals.c
+++ b/src/backend/utils/init/globals.c
@@ -147,6 +147,7 @@ int			MaxBackends = 0;
 
 /* NUMA stuff */
 bool		numa_buffers_interleave = false;
+bool		numa_localalloc = false;
 
 /* GUC parameters for vacuum */
 int			VacuumBufferUsageLimit = 2048;
diff --git a/src/backend/utils/init/miscinit.c b/src/backend/utils/init/miscinit.c
index 43b4dbccc3d..d11936691b2 100644
--- a/src/backend/utils/init/miscinit.c
+++ b/src/backend/utils/init/miscinit.c
@@ -28,6 +28,10 @@
 #include <arpa/inet.h>
 #include <utime.h>
 
+#ifdef USE_LIBNUMA
+#include <numa.h>
+#endif
+
 #include "access/htup_details.h"
 #include "access/parallel.h"
 #include "catalog/pg_authid.h"
@@ -164,6 +168,18 @@ InitPostmasterChild(void)
 				(errcode_for_socket_access(),
 				 errmsg_internal("could not set postmaster death monitoring pipe to FD_CLOEXEC mode: %m")));
 #endif
+
+#ifdef USE_LIBNUMA
+	/*
+	 * Set the default allocation policy to local node, where the task is
+	 * executing at the time of a page fault.
+	 *
+	 * XXX I believe this is not necessary, now that we don't use automatic
+	 * interleaving (numa_set_interleave_mask).
+	 */
+	if (numa_localalloc)
+		numa_set_localalloc();
+#endif
 }
 
 /*
diff --git a/src/backend/utils/misc/guc_tables.c b/src/backend/utils/misc/guc_tables.c
index 198a57e70a5..57f2df7ab74 100644
--- a/src/backend/utils/misc/guc_tables.c
+++ b/src/backend/utils/misc/guc_tables.c
@@ -2126,6 +2126,16 @@ struct config_bool ConfigureNamesBool[] =
 		NULL, NULL, NULL
 	},
 
+	{
+		{"numa_localalloc", PGC_POSTMASTER, DEVELOPER_OPTIONS,
+			gettext_noop("Enables setting the default allocation policy to local node."),
+			gettext_noop("When enabled, allocate from the node where the task is executing."),
+		},
+		&numa_localalloc,
+		false,
+		NULL, NULL, NULL
+	},
+
 	{
 		{"sync_replication_slots", PGC_SIGHUP, REPLICATION_STANDBY,
 			gettext_noop("Enables a physical standby to synchronize logical failover replication slots from the primary server."),
diff --git a/src/include/miscadmin.h b/src/include/miscadmin.h
index 014a6079af2..692871a401f 100644
--- a/src/include/miscadmin.h
+++ b/src/include/miscadmin.h
@@ -179,6 +179,7 @@ extern PGDLLIMPORT int max_worker_processes;
 extern PGDLLIMPORT int max_parallel_workers;
 
 extern PGDLLIMPORT bool numa_buffers_interleave;
+extern PGDLLIMPORT bool numa_localalloc;
 
 extern PGDLLIMPORT int commit_timestamp_buffers;
 extern PGDLLIMPORT int multixact_member_buffers;
-- 
2.49.0

