From 1c1a40a806acc46d0683783b1a77ddf1e5682309 Mon Sep 17 00:00:00 2001
From: Tomas Vondra <tomas@2ndquadrant.com>
Date: Wed, 19 Jun 2024 12:42:24 +0200
Subject: [PATCH v20240624 1/7] Allow parallel create for GIN indexes

Add support for parallel create of GIN indexes, using an approach and
code very similar to the one used by BRIN indexes.

Each worker reads a subset of the table (from a parallel scan), and
accumulated index entries in memory. But instead of writing the results
into the index (after hitting the memory limit), the data are written
to a shared tuplesort (and sorted by index key).

The leader then reads data from the tuplesort, and combines them into
entries that get inserted into the index.
---
 src/backend/access/gin/gininsert.c         | 1449 +++++++++++++++++++-
 src/backend/access/gin/ginutil.c           |    2 +-
 src/backend/access/transam/parallel.c      |    4 +
 src/backend/utils/sort/tuplesortvariants.c |  199 +++
 src/include/access/gin.h                   |    4 +
 src/include/access/gin_tuple.h             |   31 +
 src/include/utils/tuplesort.h              |    8 +
 src/tools/pgindent/typedefs.list           |    4 +
 8 files changed, 1685 insertions(+), 16 deletions(-)
 create mode 100644 src/include/access/gin_tuple.h

diff --git a/src/backend/access/gin/gininsert.c b/src/backend/access/gin/gininsert.c
index 71f38be90c3..d8767b0fe81 100644
--- a/src/backend/access/gin/gininsert.c
+++ b/src/backend/access/gin/gininsert.c
@@ -15,14 +15,125 @@
 #include "postgres.h"
 
 #include "access/gin_private.h"
+#include "access/gin_tuple.h"
+#include "access/table.h"
 #include "access/tableam.h"
 #include "access/xloginsert.h"
+#include "catalog/index.h"
+#include "commands/vacuum.h"
 #include "miscadmin.h"
 #include "nodes/execnodes.h"
+#include "pgstat.h"
 #include "storage/bufmgr.h"
 #include "storage/predicate.h"
+#include "tcop/tcopprot.h"		/* pgrminclude ignore */
+#include "utils/datum.h"
 #include "utils/memutils.h"
 #include "utils/rel.h"
+#include "utils/builtins.h"
+#include "utils/sortsupport.h"
+
+
+/* Magic numbers for parallel state sharing */
+#define PARALLEL_KEY_GIN_SHARED			UINT64CONST(0xB000000000000001)
+#define PARALLEL_KEY_TUPLESORT			UINT64CONST(0xB000000000000002)
+#define PARALLEL_KEY_QUERY_TEXT			UINT64CONST(0xB000000000000003)
+#define PARALLEL_KEY_WAL_USAGE			UINT64CONST(0xB000000000000004)
+#define PARALLEL_KEY_BUFFER_USAGE		UINT64CONST(0xB000000000000005)
+
+/*
+ * Status for index builds performed in parallel.  This is allocated in a
+ * dynamic shared memory segment.
+ */
+typedef struct GinShared
+{
+	/*
+	 * These fields are not modified during the build.  They primarily exist
+	 * for the benefit of worker processes that need to create state
+	 * corresponding to that used by the leader.
+	 */
+	Oid			heaprelid;
+	Oid			indexrelid;
+	bool		isconcurrent;
+	int			scantuplesortstates;
+
+	/*
+	 * workersdonecv is used to monitor the progress of workers.  All parallel
+	 * participants must indicate that they are done before leader can use
+	 * results built by the workers (and before leader can write the data into
+	 * the index).
+	 */
+	ConditionVariable workersdonecv;
+
+	/*
+	 * mutex protects all fields before heapdesc.
+	 *
+	 * These fields contain status information of interest to GIN index builds
+	 * that must work just the same when an index is built in parallel.
+	 */
+	slock_t		mutex;
+
+	/*
+	 * Mutable state that is maintained by workers, and reported back to
+	 * leader at end of the scans.
+	 *
+	 * nparticipantsdone is number of worker processes finished.
+	 *
+	 * reltuples is the total number of input heap tuples.
+	 *
+	 * indtuples is the total number of tuples that made it into the index.
+	 */
+	int			nparticipantsdone;
+	double		reltuples;
+	double		indtuples;
+
+	/*
+	 * ParallelTableScanDescData data follows. Can't directly embed here, as
+	 * implementations of the parallel table scan desc interface might need
+	 * stronger alignment.
+	 */
+} GinShared;
+
+/*
+ * Return pointer to a GinShared's parallel table scan.
+ *
+ * c.f. shm_toc_allocate as to why BUFFERALIGN is used, rather than just
+ * MAXALIGN.
+ */
+#define ParallelTableScanFromGinShared(shared) \
+	(ParallelTableScanDesc) ((char *) (shared) + BUFFERALIGN(sizeof(GinShared)))
+
+/*
+ * Status for leader in parallel index build.
+ */
+typedef struct GinLeader
+{
+	/* parallel context itself */
+	ParallelContext *pcxt;
+
+	/*
+	 * nparticipanttuplesorts is the exact number of worker processes
+	 * successfully launched, plus one leader process if it participates as a
+	 * worker (only DISABLE_LEADER_PARTICIPATION builds avoid leader
+	 * participating as a worker).
+	 */
+	int			nparticipanttuplesorts;
+
+	/*
+	 * Leader process convenience pointers to shared state (leader avoids TOC
+	 * lookups).
+	 *
+	 * GinShared is the shared state for entire build.  sharedsort is the
+	 * shared, tuplesort-managed state passed to each process tuplesort.
+	 * snapshot is the snapshot used by the scan iff an MVCC snapshot is
+	 * required.
+	 */
+	GinShared  *ginshared;
+	Sharedsort *sharedsort;
+	Snapshot	snapshot;
+	WalUsage   *walusage;
+	BufferUsage *bufferusage;
+} GinLeader;
 
 typedef struct
 {
@@ -32,9 +143,48 @@ typedef struct
 	MemoryContext tmpCtx;
 	MemoryContext funcCtx;
 	BuildAccumulator accum;
+
+	/* FIXME likely duplicate with indtuples */
+	double		bs_numtuples;
+	double		bs_reltuples;
+
+	/*
+	 * bs_leader is only present when a parallel index build is performed, and
+	 * only in the leader process.
+	 */
+	GinLeader  *bs_leader;
+	int			bs_worker_id;
+
+	/*
+	 * The sortstate is used by workers (including the leader). It has to be
+	 * part of the build state, because that's the only thing passed to the
+	 * build callback etc.
+	 */
+	Tuplesortstate *bs_sortstate;
 } GinBuildState;
 
 
+/* parallel index builds */
+static void _gin_begin_parallel(GinBuildState *buildstate, Relation heap, Relation index,
+								bool isconcurrent, int request);
+static void _gin_end_parallel(GinLeader *ginleader, GinBuildState *state);
+static Size _gin_parallel_estimate_shared(Relation heap, Snapshot snapshot);
+static double _gin_parallel_heapscan(GinBuildState *buildstate);
+static double _gin_parallel_merge(GinBuildState *buildstate);
+static void _gin_leader_participate_as_worker(GinBuildState *buildstate,
+											  Relation heap, Relation index);
+static void _gin_parallel_scan_and_build(GinBuildState *buildstate,
+										 GinShared *ginshared,
+										 Sharedsort *sharedsort,
+										 Relation heap, Relation index,
+										 int sortmem, bool progress);
+
+static Datum _gin_parse_tuple(GinTuple *a, ItemPointerData **items);
+static GinTuple *_gin_build_tuple(OffsetNumber attrnum, unsigned char category,
+								  Datum key, int16 typlen, bool typbyval,
+								  ItemPointerData *items, uint32 nitems,
+								  Size *len);
+
 /*
  * Adds array of item pointers to tuple's posting list, or
  * creates posting tree and tuple pointing to tree in case
@@ -313,12 +463,109 @@ ginBuildCallback(Relation index, ItemPointer tid, Datum *values,
 	MemoryContextSwitchTo(oldCtx);
 }
 
+/*
+ * ginBuildCallbackParallel
+ *		Callback for the parallel index build.
+ *
+ * This is very similar to the serial build callback ginBuildCallback,
+ * except that instead of writing the accumulated entries into the index,
+ * we write them into a tuplesort that is then processed by the leader.
+ *
+ * XXX Instead of writing the entries directly into the shared tuplesort,
+ * we might write them into a local one, do a sort in the worker, combine
+ * the results, and only then write the results into the shared tuplesort.
+ * For large tables with many different keys that's going to work better
+ * than the current approach where we don't get many matches in work_mem
+ * (maybe this should use 32MB, which is what we use when planning, but
+ * even that may not be sufficient). Which means we are likely to have
+ * many entries with a small number of TIDs, forcing the leader to merge
+ * the data, often amounting to ~50% of the serial part. By doing the
+ * first sort workers, the leader then could do fewer merges with longer
+ * TID lists, which is much cheaper. Also, the amount of data sent from
+ * workers to the leader woiuld be lower.
+ *
+ * The disadvantage is increased disk space usage, possibly up to 2x, if
+ * no entries get combined at the worker level.
+ *
+ * It would be possible to partition the data into multiple tuplesorts
+ * per worker (by hashing) - we don't need the data produced by workers
+ * to be perfectly sorted, and we could even live with multiple entries
+ * for the same key (in case it has multiple binary representations with
+ * distinct hash values).
+ */
+static void
+ginBuildCallbackParallel(Relation index, ItemPointer tid, Datum *values,
+						 bool *isnull, bool tupleIsAlive, void *state)
+{
+	GinBuildState *buildstate = (GinBuildState *) state;
+	MemoryContext oldCtx;
+	int			i;
+
+	oldCtx = MemoryContextSwitchTo(buildstate->tmpCtx);
+
+	for (i = 0; i < buildstate->ginstate.origTupdesc->natts; i++)
+		ginHeapTupleBulkInsert(buildstate, (OffsetNumber) (i + 1),
+							   values[i], isnull[i], tid);
+
+	/*
+	 * If we've maxed out our available memory, dump everything to the
+	 * tuplesort
+	 *
+	 * XXX It might seem this should set the memory limit to 32MB, same as
+	 * what plan_create_index_workers() uses to calculate the number of
+	 * parallel workers, but that's the limit for tuplesort. So it seems
+	 * better to keep using work_mem here.
+	 *
+	 * XXX But maybe we should calculate this as a per-worker fraction of
+	 * maintenance_work_mem. It's weird to use work_mem here, in a clearly
+	 * maintenance command.
+	 */
+	if (buildstate->accum.allocatedMemory >= (Size) work_mem * 1024L)
+	{
+		ItemPointerData *list;
+		Datum		key;
+		GinNullCategory category;
+		uint32		nlist;
+		OffsetNumber attnum;
+		TupleDesc	tdesc = RelationGetDescr(index);
+
+		ginBeginBAScan(&buildstate->accum);
+		while ((list = ginGetBAEntry(&buildstate->accum,
+									 &attnum, &key, &category, &nlist)) != NULL)
+		{
+			/* information about the index key */
+			Form_pg_attribute attr = TupleDescAttr(tdesc, (attnum - 1));
+
+			/* GIN tuple and tuple length that we'll use for tuplesort */
+			GinTuple   *tup;
+			Size		tuplen;
+
+			/* there could be many entries, so be willing to abort here */
+			CHECK_FOR_INTERRUPTS();
+
+			tup = _gin_build_tuple(attnum, category,
+								   key, attr->attlen, attr->attbyval,
+								   list, nlist, &tuplen);
+
+			tuplesort_putgintuple(buildstate->bs_sortstate, tup, tuplen);
+
+			pfree(tup);
+		}
+
+		MemoryContextReset(buildstate->tmpCtx);
+		ginInitBA(&buildstate->accum);
+	}
+
+	MemoryContextSwitchTo(oldCtx);
+}
+
 IndexBuildResult *
 ginbuild(Relation heap, Relation index, IndexInfo *indexInfo)
 {
 	IndexBuildResult *result;
 	double		reltuples;
 	GinBuildState buildstate;
+	GinBuildState *state = &buildstate;
 	Buffer		RootBuffer,
 				MetaBuffer;
 	ItemPointerData *list;
@@ -336,6 +583,15 @@ ginbuild(Relation heap, Relation index, IndexInfo *indexInfo)
 	buildstate.indtuples = 0;
 	memset(&buildstate.buildStats, 0, sizeof(GinStatsData));
 
+	/*
+	 * Initialize all the fields, not to trip valgrind.
+	 *
+	 * XXX Maybe there should be an "init" function for build state?
+	 */
+	buildstate.bs_numtuples = 0;
+	buildstate.bs_reltuples = 0;
+	buildstate.bs_leader = NULL;
+
 	/* initialize the meta page */
 	MetaBuffer = GinNewBuffer(index);
 
@@ -376,25 +632,93 @@ ginbuild(Relation heap, Relation index, IndexInfo *indexInfo)
 	ginInitBA(&buildstate.accum);
 
 	/*
-	 * Do the heap scan.  We disallow sync scan here because dataPlaceToPage
-	 * prefers to receive tuples in TID order.
+	 * Attempt to launch parallel worker scan when required
+	 *
+	 * XXX plan_create_index_workers makes the number of workers dependent on
+	 * maintenance_work_mem, requiring 32MB for each worker. For GIN that's
+	 * reasonable too, because we sort the data just like btree. It does
+	 * ignore the memory used to accumulate data in memory (set by work_mem),
+	 * but there is no way to communicate that to plan_create_index_workers.
+	 */
+	if (indexInfo->ii_ParallelWorkers > 0)
+		_gin_begin_parallel(state, heap, index, indexInfo->ii_Concurrent,
+							indexInfo->ii_ParallelWorkers);
+
+
+	/*
+	 * If parallel build requested and at least one worker process was
+	 * successfully launched, set up coordination state, wait for workers to
+	 * complete. Then read all tuples from the shared tuplesort and insert
+	 * them into the index.
+	 *
+	 * In serial mode, simply scan the table and build the index one index
+	 * tuple at a time.
 	 */
-	reltuples = table_index_build_scan(heap, index, indexInfo, false, true,
-									   ginBuildCallback, (void *) &buildstate,
-									   NULL);
+	if (state->bs_leader)
+	{
+		SortCoordinate coordinate;
+
+		coordinate = (SortCoordinate) palloc0(sizeof(SortCoordinateData));
+		coordinate->isWorker = false;
+		coordinate->nParticipants =
+			state->bs_leader->nparticipanttuplesorts;
+		coordinate->sharedsort = state->bs_leader->sharedsort;
+
+		/*
+		 * Begin leader tuplesort.
+		 *
+		 * In cases where parallelism is involved, the leader receives the
+		 * same share of maintenance_work_mem as a serial sort (it is
+		 * generally treated in the same way as a serial sort once we return).
+		 * Parallel worker Tuplesortstates will have received only a fraction
+		 * of maintenance_work_mem, though.
+		 *
+		 * We rely on the lifetime of the Leader Tuplesortstate almost not
+		 * overlapping with any worker Tuplesortstate's lifetime.  There may
+		 * be some small overlap, but that's okay because we rely on leader
+		 * Tuplesortstate only allocating a small, fixed amount of memory
+		 * here. When its tuplesort_performsort() is called (by our caller),
+		 * and significant amounts of memory are likely to be used, all
+		 * workers must have already freed almost all memory held by their
+		 * Tuplesortstates (they are about to go away completely, too).  The
+		 * overall effect is that maintenance_work_mem always represents an
+		 * absolute high watermark on the amount of memory used by a CREATE
+		 * INDEX operation, regardless of the use of parallelism or any other
+		 * factor.
+		 */
+		state->bs_sortstate =
+			tuplesort_begin_index_gin(heap, index,
+									  maintenance_work_mem, coordinate,
+									  TUPLESORT_NONE);
+
+		/* scan the relation in parallel and merge per-worker results */
+		reltuples = _gin_parallel_merge(state);
 
-	/* dump remaining entries to the index */
-	oldCtx = MemoryContextSwitchTo(buildstate.tmpCtx);
-	ginBeginBAScan(&buildstate.accum);
-	while ((list = ginGetBAEntry(&buildstate.accum,
-								 &attnum, &key, &category, &nlist)) != NULL)
+		_gin_end_parallel(state->bs_leader, state);
+	}
+	else						/* no parallel index build */
 	{
-		/* there could be many entries, so be willing to abort here */
-		CHECK_FOR_INTERRUPTS();
-		ginEntryInsert(&buildstate.ginstate, attnum, key, category,
-					   list, nlist, &buildstate.buildStats);
+		/*
+		 * Do the heap scan.  We disallow sync scan here because
+		 * dataPlaceToPage prefers to receive tuples in TID order.
+		 */
+		reltuples = table_index_build_scan(heap, index, indexInfo, false, true,
+										   ginBuildCallback, (void *) &buildstate,
+										   NULL);
+
+		/* dump remaining entries to the index */
+		oldCtx = MemoryContextSwitchTo(buildstate.tmpCtx);
+		ginBeginBAScan(&buildstate.accum);
+		while ((list = ginGetBAEntry(&buildstate.accum,
+									 &attnum, &key, &category, &nlist)) != NULL)
+		{
+			/* there could be many entries, so be willing to abort here */
+			CHECK_FOR_INTERRUPTS();
+			ginEntryInsert(&buildstate.ginstate, attnum, key, category,
+						   list, nlist, &buildstate.buildStats);
+		}
+		MemoryContextSwitchTo(oldCtx);
 	}
-	MemoryContextSwitchTo(oldCtx);
 
 	MemoryContextDelete(buildstate.funcCtx);
 	MemoryContextDelete(buildstate.tmpCtx);
@@ -534,3 +858,1098 @@ gininsert(Relation index, Datum *values, bool *isnull,
 
 	return false;
 }
+
+/*
+ * Create parallel context, and launch workers for leader.
+ *
+ * buildstate argument should be initialized (with the exception of the
+ * tuplesort states, which may later be created based on shared
+ * state initially set up here).
+ *
+ * isconcurrent indicates if operation is CREATE INDEX CONCURRENTLY.
+ *
+ * request is the target number of parallel worker processes to launch.
+ *
+ * Sets buildstate's GinLeader, which caller must use to shut down parallel
+ * mode by passing it to _gin_end_parallel() at the very end of its index
+ * build.  If not even a single worker process can be launched, this is
+ * never set, and caller should proceed with a serial index build.
+ */
+static void
+_gin_begin_parallel(GinBuildState *buildstate, Relation heap, Relation index,
+					bool isconcurrent, int request)
+{
+	ParallelContext *pcxt;
+	int			scantuplesortstates;
+	Snapshot	snapshot;
+	Size		estginshared;
+	Size		estsort;
+	GinShared  *ginshared;
+	Sharedsort *sharedsort;
+	GinLeader  *ginleader = (GinLeader *) palloc0(sizeof(GinLeader));
+	WalUsage   *walusage;
+	BufferUsage *bufferusage;
+	bool		leaderparticipates = true;
+	int			querylen;
+
+#ifdef DISABLE_LEADER_PARTICIPATION
+	leaderparticipates = false;
+#endif
+
+	/*
+	 * Enter parallel mode, and create context for parallel build of gin index
+	 */
+	EnterParallelMode();
+	Assert(request > 0);
+	pcxt = CreateParallelContext("postgres", "_gin_parallel_build_main",
+								 request);
+
+	scantuplesortstates = leaderparticipates ? request + 1 : request;
+
+	/*
+	 * Prepare for scan of the base relation.  In a normal index build, we use
+	 * SnapshotAny because we must retrieve all tuples and do our own time
+	 * qual checks (because we have to index RECENTLY_DEAD tuples).  In a
+	 * concurrent build, we take a regular MVCC snapshot and index whatever's
+	 * live according to that.
+	 */
+	if (!isconcurrent)
+		snapshot = SnapshotAny;
+	else
+		snapshot = RegisterSnapshot(GetTransactionSnapshot());
+
+	/*
+	 * Estimate size for our own PARALLEL_KEY_GIN_SHARED workspace.
+	 */
+	estginshared = _gin_parallel_estimate_shared(heap, snapshot);
+	shm_toc_estimate_chunk(&pcxt->estimator, estginshared);
+	estsort = tuplesort_estimate_shared(scantuplesortstates);
+	shm_toc_estimate_chunk(&pcxt->estimator, estsort);
+
+	shm_toc_estimate_keys(&pcxt->estimator, 2);
+
+	/*
+	 * Estimate space for WalUsage and BufferUsage -- PARALLEL_KEY_WAL_USAGE
+	 * and PARALLEL_KEY_BUFFER_USAGE.
+	 *
+	 * If there are no extensions loaded that care, we could skip this.  We
+	 * have no way of knowing whether anyone's looking at pgWalUsage or
+	 * pgBufferUsage, so do it unconditionally.
+	 */
+	shm_toc_estimate_chunk(&pcxt->estimator,
+						   mul_size(sizeof(WalUsage), pcxt->nworkers));
+	shm_toc_estimate_keys(&pcxt->estimator, 1);
+	shm_toc_estimate_chunk(&pcxt->estimator,
+						   mul_size(sizeof(BufferUsage), pcxt->nworkers));
+	shm_toc_estimate_keys(&pcxt->estimator, 1);
+
+	/* Finally, estimate PARALLEL_KEY_QUERY_TEXT space */
+	if (debug_query_string)
+	{
+		querylen = strlen(debug_query_string);
+		shm_toc_estimate_chunk(&pcxt->estimator, querylen + 1);
+		shm_toc_estimate_keys(&pcxt->estimator, 1);
+	}
+	else
+		querylen = 0;			/* keep compiler quiet */
+
+	/* Everyone's had a chance to ask for space, so now create the DSM */
+	InitializeParallelDSM(pcxt);
+
+	/* If no DSM segment was available, back out (do serial build) */
+	if (pcxt->seg == NULL)
+	{
+		if (IsMVCCSnapshot(snapshot))
+			UnregisterSnapshot(snapshot);
+		DestroyParallelContext(pcxt);
+		ExitParallelMode();
+		return;
+	}
+
+	/* Store shared build state, for which we reserved space */
+	ginshared = (GinShared *) shm_toc_allocate(pcxt->toc, estginshared);
+	/* Initialize immutable state */
+	ginshared->heaprelid = RelationGetRelid(heap);
+	ginshared->indexrelid = RelationGetRelid(index);
+	ginshared->isconcurrent = isconcurrent;
+	ginshared->scantuplesortstates = scantuplesortstates;
+
+	ConditionVariableInit(&ginshared->workersdonecv);
+	SpinLockInit(&ginshared->mutex);
+
+	/* Initialize mutable state */
+	ginshared->nparticipantsdone = 0;
+	ginshared->reltuples = 0.0;
+	ginshared->indtuples = 0.0;
+
+	table_parallelscan_initialize(heap,
+								  ParallelTableScanFromGinShared(ginshared),
+								  snapshot);
+
+	/*
+	 * Store shared tuplesort-private state, for which we reserved space.
+	 * Then, initialize opaque state using tuplesort routine.
+	 */
+	sharedsort = (Sharedsort *) shm_toc_allocate(pcxt->toc, estsort);
+	tuplesort_initialize_shared(sharedsort, scantuplesortstates,
+								pcxt->seg);
+
+	/*
+	 * Store shared tuplesort-private state, for which we reserved space.
+	 * Then, initialize opaque state using tuplesort routine.
+	 */
+	shm_toc_insert(pcxt->toc, PARALLEL_KEY_GIN_SHARED, ginshared);
+	shm_toc_insert(pcxt->toc, PARALLEL_KEY_TUPLESORT, sharedsort);
+
+	/* Store query string for workers */
+	if (debug_query_string)
+	{
+		char	   *sharedquery;
+
+		sharedquery = (char *) shm_toc_allocate(pcxt->toc, querylen + 1);
+		memcpy(sharedquery, debug_query_string, querylen + 1);
+		shm_toc_insert(pcxt->toc, PARALLEL_KEY_QUERY_TEXT, sharedquery);
+	}
+
+	/*
+	 * Allocate space for each worker's WalUsage and BufferUsage; no need to
+	 * initialize.
+	 */
+	walusage = shm_toc_allocate(pcxt->toc,
+								mul_size(sizeof(WalUsage), pcxt->nworkers));
+	shm_toc_insert(pcxt->toc, PARALLEL_KEY_WAL_USAGE, walusage);
+	bufferusage = shm_toc_allocate(pcxt->toc,
+								   mul_size(sizeof(BufferUsage), pcxt->nworkers));
+	shm_toc_insert(pcxt->toc, PARALLEL_KEY_BUFFER_USAGE, bufferusage);
+
+	/* Launch workers, saving status for leader/caller */
+	LaunchParallelWorkers(pcxt);
+	ginleader->pcxt = pcxt;
+	ginleader->nparticipanttuplesorts = pcxt->nworkers_launched;
+	if (leaderparticipates)
+		ginleader->nparticipanttuplesorts++;
+	ginleader->ginshared = ginshared;
+	ginleader->sharedsort = sharedsort;
+	ginleader->snapshot = snapshot;
+	ginleader->walusage = walusage;
+	ginleader->bufferusage = bufferusage;
+
+	/* If no workers were successfully launched, back out (do serial build) */
+	if (pcxt->nworkers_launched == 0)
+	{
+		_gin_end_parallel(ginleader, NULL);
+		return;
+	}
+
+	/* Save leader state now that it's clear build will be parallel */
+	buildstate->bs_leader = ginleader;
+
+	/* Join heap scan ourselves */
+	if (leaderparticipates)
+		_gin_leader_participate_as_worker(buildstate, heap, index);
+
+	/*
+	 * Caller needs to wait for all launched workers when we return.  Make
+	 * sure that the failure-to-start case will not hang forever.
+	 */
+	WaitForParallelWorkersToAttach(pcxt);
+}
+
+/*
+ * Shut down workers, destroy parallel context, and end parallel mode.
+ */
+static void
+_gin_end_parallel(GinLeader *ginleader, GinBuildState *state)
+{
+	int			i;
+
+	/* Shutdown worker processes */
+	WaitForParallelWorkersToFinish(ginleader->pcxt);
+
+	/*
+	 * Next, accumulate WAL usage.  (This must wait for the workers to finish,
+	 * or we might get incomplete data.)
+	 */
+	for (i = 0; i < ginleader->pcxt->nworkers_launched; i++)
+		InstrAccumParallelQuery(&ginleader->bufferusage[i], &ginleader->walusage[i]);
+
+	/* Free last reference to MVCC snapshot, if one was used */
+	if (IsMVCCSnapshot(ginleader->snapshot))
+		UnregisterSnapshot(ginleader->snapshot);
+	DestroyParallelContext(ginleader->pcxt);
+	ExitParallelMode();
+}
+
+/*
+ * Within leader, wait for end of heap scan.
+ *
+ * When called, parallel heap scan started by _gin_begin_parallel() will
+ * already be underway within worker processes (when leader participates
+ * as a worker, we should end up here just as workers are finishing).
+ *
+ * Returns the total number of heap tuples scanned.
+ */
+static double
+_gin_parallel_heapscan(GinBuildState *state)
+{
+	GinShared  *ginshared = state->bs_leader->ginshared;
+	int			nparticipanttuplesorts;
+
+	nparticipanttuplesorts = state->bs_leader->nparticipanttuplesorts;
+	for (;;)
+	{
+		SpinLockAcquire(&ginshared->mutex);
+		if (ginshared->nparticipantsdone == nparticipanttuplesorts)
+		{
+			/* copy the data into leader state */
+			state->bs_reltuples = ginshared->reltuples;
+			state->bs_numtuples = ginshared->indtuples;
+
+			SpinLockRelease(&ginshared->mutex);
+			break;
+		}
+		SpinLockRelease(&ginshared->mutex);
+
+		ConditionVariableSleep(&ginshared->workersdonecv,
+							   WAIT_EVENT_PARALLEL_CREATE_INDEX_SCAN);
+	}
+
+	ConditionVariableCancelSleep();
+
+	return state->bs_reltuples;
+}
+
+/*
+ * Buffer used to accumulate TIDs from multiple GinTuples for the same key
+ * (we read these from the tuplesort, sorted by the key).
+ *
+ * This is similar to BuildAccumulator in that it's used to collect TIDs
+ * in memory before inserting them into the index, but it's much simpler
+ * as it only deals with a single index key at a time.
+ *
+ * XXX The TID values in the "items" array are not guaranteed to be sorted,
+ * we have to sort them explicitly. This is due to parallel scans being
+ * synchronized (and thus may wrap around), and when combininng values from
+ * multiple workers.
+ */
+typedef struct GinBuffer
+{
+	OffsetNumber attnum;
+	GinNullCategory category;
+	Datum		key;			/* 0 if no key (and keylen == 0) */
+	Size		keylen;			/* number of bytes (not typlen) */
+
+	/* type info */
+	int16		typlen;
+	bool		typbyval;
+
+	/* array of TID values */
+	int			nitems;
+	int			maxitems;
+	SortSupport ssup;			/* for sorting/comparing keys */
+	ItemPointerData *items;
+} GinBuffer;
+
+/*
+ * Check that TID array contains valid values, and that it's sorted (if we
+ * expect it to be).
+ */
+static void
+AssertCheckItemPointers(ItemPointerData *items, int nitems, bool sorted)
+{
+#ifdef USE_ASSERT_CHECKING
+	for (int i = 0; i < nitems; i++)
+	{
+		Assert(ItemPointerIsValid(&items[i]));
+
+		if ((i == 0) || !sorted)
+			continue;
+
+		Assert(ItemPointerCompare(&items[i - 1], &items[i]) < 0);
+	}
+#endif
+}
+
+/* basic GinBuffer checks */
+static void
+AssertCheckGinBuffer(GinBuffer *buffer)
+{
+#ifdef USE_ASSERT_CHECKING
+	Assert(buffer->nitems <= buffer->maxitems);
+
+	/* if we have any items, the array must exist */
+	Assert(!((buffer->nitems > 0) && (buffer->items == NULL)));
+
+	/*
+	 * we don't know if the TID array is expected to be sorted or not
+	 *
+	 * XXX maybe we can pass that to AssertCheckGinBuffer() call?
+	 */
+	AssertCheckItemPointers(buffer->items, buffer->nitems, false);
+#endif
+}
+
+/*
+ * Initialize the buffer used to accumulate TID for a single key at a time
+ * (we process the data sorted), so we know when we received all data for
+ * a given key.
+ *
+ * Initializes sort support procedures for all index attributes.
+ */
+static GinBuffer *
+GinBufferInit(Relation index)
+{
+	GinBuffer  *buffer = palloc0(sizeof(GinBuffer));
+	int			i,
+				nKeys;
+	TupleDesc	desc = RelationGetDescr(index);
+
+	nKeys = IndexRelationGetNumberOfKeyAttributes(index);
+
+	buffer->ssup = palloc0(sizeof(SortSupportData) * nKeys);
+
+	/*
+	 * Lookup ordering operator for the index key data type, and initialize
+	 * the sort support function.
+	 */
+	for (i = 0; i < nKeys; i++)
+	{
+		SortSupport sortKey = &buffer->ssup[i];
+		Form_pg_attribute att = TupleDescAttr(desc, i);
+		TypeCacheEntry *typentry;
+
+		typentry = lookup_type_cache(att->atttypid, TYPECACHE_LT_OPR);
+
+		sortKey->ssup_cxt = CurrentMemoryContext;
+		sortKey->ssup_collation = index->rd_indcollation[i];
+		sortKey->ssup_nulls_first = false;
+		sortKey->ssup_attno = i + 1;
+		sortKey->abbreviate = false;
+
+		Assert(sortKey->ssup_attno != 0);
+
+		PrepareSortSupportFromOrderingOp(typentry->lt_opr, sortKey);
+	}
+
+	return buffer;
+}
+
+/* Is the buffer empty, i.e. has no TID values in the array? */
+static bool
+GinBufferIsEmpty(GinBuffer *buffer)
+{
+	return (buffer->nitems == 0);
+}
+
+/*
+ * Compare if the tuple matches the already accumulated data in the GIN
+ * buffer. Compare scalar fields first, before the actual key.
+ *
+ * Returns true if the key matches, and the TID belonds to the buffer.
+ */
+static bool
+GinBufferKeyEquals(GinBuffer *buffer, GinTuple *tup)
+{
+	int			r;
+	Datum		tupkey;
+
+	AssertCheckGinBuffer(buffer);
+
+	if (tup->attrnum != buffer->attnum)
+		return false;
+
+	/* same attribute should have the same type info */
+	Assert(tup->typbyval == buffer->typbyval);
+	Assert(tup->typlen == buffer->typlen);
+
+	if (tup->category != buffer->category)
+		return false;
+
+	/*
+	 * For NULL/empty keys, this means equality, for normal keys we need to
+	 * compare the actual key value.
+	 */
+	if (buffer->category != GIN_CAT_NORM_KEY)
+		return true;
+
+	/*
+	 * For the tuple, get either the first sizeof(Datum) bytes for byval
+	 * types, or a pointer to the beginning of the data array.
+	 */
+	tupkey = (buffer->typbyval) ? *(Datum *) tup->data : PointerGetDatum(tup->data);
+
+	r = ApplySortComparator(buffer->key, false,
+							tupkey, false,
+							&buffer->ssup[buffer->attnum - 1]);
+
+	return (r == 0);
+}
+
+/*
+ * GinBufferStoreTuple
+ *		Add data (especially TID list) from a GIN tuple to the buffer.
+ *
+ * The buffer is expected to be empty (in which case it's initialized), or
+ * having the same key. The TID values from the tuple are simply appended
+ * to the array, without sorting.
+ *
+ * XXX We expect the tuples to contain sorted TID lists, so maybe we should
+ * check that's true with an assert. And we could also check if the values
+ * are already in sorted order, in which case we can skip the sort later.
+ * But it seems like a waste of time, because it won't be unnecessary after
+ * switching to mergesort in a later patch, and also because it's reasonable
+ * to expect the arrays to overlap.
+ */
+static void
+GinBufferStoreTuple(GinBuffer *buffer, GinTuple *tup)
+{
+	ItemPointerData *items;
+	Datum		key;
+
+	AssertCheckGinBuffer(buffer);
+
+	key = _gin_parse_tuple(tup, &items);
+
+	/* if the buffer is empty, set the fields (and copy the key) */
+	if (GinBufferIsEmpty(buffer))
+	{
+		buffer->category = tup->category;
+		buffer->keylen = tup->keylen;
+		buffer->attnum = tup->attrnum;
+
+		buffer->typlen = tup->typlen;
+		buffer->typbyval = tup->typbyval;
+
+		if (tup->category == GIN_CAT_NORM_KEY)
+			buffer->key = datumCopy(key, buffer->typbyval, buffer->typlen);
+		else
+			buffer->key = (Datum) 0;
+	}
+
+	/* enlarge the TID buffer, if needed */
+	if (buffer->nitems + tup->nitems > buffer->maxitems)
+	{
+		/* 64 seems like a good init value */
+		buffer->maxitems = Max(buffer->maxitems, 64);
+
+		while (buffer->nitems + tup->nitems > buffer->maxitems)
+			buffer->maxitems *= 2;
+
+		if (buffer->items == NULL)
+			buffer->items = palloc(buffer->maxitems * sizeof(ItemPointerData));
+		else
+			buffer->items = repalloc(buffer->items,
+									 buffer->maxitems * sizeof(ItemPointerData));
+	}
+
+	/* now we should be guaranteed to have enough space for all the TIDs */
+	Assert(buffer->nitems + tup->nitems <= buffer->maxitems);
+
+	/* copy the new TIDs into the buffer */
+	memcpy(&buffer->items[buffer->nitems], items, sizeof(ItemPointerData) * tup->nitems);
+	buffer->nitems += tup->nitems;
+
+	/* we simply append the TID values, so don't check sorting */
+	AssertCheckItemPointers(buffer->items, buffer->nitems, false);
+}
+
+/* TID comparator for qsort */
+static int
+tid_cmp(const void *a, const void *b)
+{
+	return ItemPointerCompare((ItemPointer) a, (ItemPointer) b);
+}
+
+/*
+ * GinBufferSortItems
+ *		Sort the TID values stored in the TID buffer.
+ */
+static void
+GinBufferSortItems(GinBuffer *buffer)
+{
+	/* we should not have a buffer with no TIDs to sort */
+	Assert(buffer->items != NULL);
+	Assert(buffer->nitems > 0);
+
+	pg_qsort(buffer->items, buffer->nitems, sizeof(ItemPointerData), tid_cmp);
+
+	AssertCheckItemPointers(buffer->items, buffer->nitems, true);
+}
+
+/*
+ * GinBufferReset
+ *		Reset the buffer into a state as if it contains no data.
+ *
+ * XXX Should we do something if the array of TIDs gets too large? It may
+ * grow too much, and we'll not free it until the worker finishes building.
+ * But it's better to not let the array grow arbitrarily large, and enforce
+ * work_mem as memory limit by flushing the buffer into the tuplestore.
+ *
+ * XXX Might be better to have a separate memory context for the buffer.
+ */
+static void
+GinBufferReset(GinBuffer *buffer)
+{
+	Assert(!GinBufferIsEmpty(buffer));
+
+	/* release byref values, do nothing for by-val ones */
+	if ((buffer->category == GIN_CAT_NORM_KEY) && !buffer->typbyval)
+		pfree(DatumGetPointer(buffer->key));
+
+	/*
+	 * Not required, but makes it more likely to trigger NULL derefefence if
+	 * using the value incorrectly, etc.
+	 */
+	buffer->key = (Datum) 0;
+
+	buffer->attnum = 0;
+	buffer->category = 0;
+	buffer->keylen = 0;
+	buffer->nitems = 0;
+
+	buffer->typlen = 0;
+	buffer->typbyval = 0;
+}
+
+/*
+ * GinBufferCanAddKey
+ *		Check if a given GIN tuple can be added to the current buffer.
+ *
+ * Returns true if the buffer is either empty or for the same index key.
+ *
+ * XXX This could / should also enforce a memory limit by checking the size of
+ * the TID array, and returning false if it's too large (more thant work_mem,
+ * for example).
+ */
+static bool
+GinBufferCanAddKey(GinBuffer *buffer, GinTuple *tup)
+{
+	/* empty buffer can accept data for any key */
+	if (GinBufferIsEmpty(buffer))
+		return true;
+
+	/* otherwise just data for the same key */
+	return GinBufferKeyEquals(buffer, tup);
+}
+
+/*
+ * Within leader, wait for end of heap scan and merge per-worker results.
+ *
+ * After waiting for all workers to finish, merge the per-worker results into
+ * the complete index. The results from each worker are sorted by block number
+ * (start of the page range). While combinig the per-worker results we merge
+ * summaries for the same page range, and also fill-in empty summaries for
+ * ranges without any tuples.
+ *
+ * Returns the total number of heap tuples scanned.
+ *
+ * FIXME Maybe should have local memory contexts similar to what
+ * _brin_parallel_merge does?
+ */
+static double
+_gin_parallel_merge(GinBuildState *state)
+{
+	GinTuple   *tup;
+	Size		tuplen;
+	double		reltuples = 0;
+	GinBuffer  *buffer;
+
+	/* wait for workers to scan table and produce partial results */
+	reltuples = _gin_parallel_heapscan(state);
+
+	/* do the actual sort in the leader */
+	tuplesort_performsort(state->bs_sortstate);
+
+	/* initialize buffer to combine entries for the same key */
+	buffer = GinBufferInit(state->ginstate.index);
+
+	/*
+	 * Read the GIN tuples from the shared tuplesort, sorted by category and
+	 * key. That probably gives us order matching how data is organized in the
+	 * index.
+	 *
+	 * We don't insert the GIN tuples right away, but instead accumulate as
+	 * many TIDs for the same key as possible, and then insert that at once.
+	 * This way we don't need to decompress/recompress the posting lists, etc.
+	 *
+	 * XXX Maybe we should sort by key first, then by category? The idea is
+	 * that if this matches the order of the keys in the index, we'd insert
+	 * the entries in order better matching the index.
+	 */
+	while ((tup = tuplesort_getgintuple(state->bs_sortstate, &tuplen, true)) != NULL)
+	{
+		CHECK_FOR_INTERRUPTS();
+
+		/*
+		 * If the buffer can accept the new GIN tuple, just store it there and
+		 * we're done. If it's a different key (or maybe too much data) flush
+		 * the current contents into the index first.
+		 */
+		if (!GinBufferCanAddKey(buffer, tup))
+		{
+			/*
+			 * Buffer is not empty and it's storing a different key - flush
+			 * the data into the insert, and start a new entry for current
+			 * GinTuple.
+			 */
+			GinBufferSortItems(buffer);
+
+			ginEntryInsert(&state->ginstate,
+						   buffer->attnum, buffer->key, buffer->category,
+						   buffer->items, buffer->nitems, &state->buildStats);
+
+			/* discard the existing data */
+			GinBufferReset(buffer);
+		}
+
+		/* now remember the new key */
+		GinBufferStoreTuple(buffer, tup);
+	}
+
+	/* flush data remaining in the buffer (for the last key) */
+	if (!GinBufferIsEmpty(buffer))
+	{
+		GinBufferSortItems(buffer);
+
+		ginEntryInsert(&state->ginstate,
+					   buffer->attnum, buffer->key, buffer->category,
+					   buffer->items, buffer->nitems, &state->buildStats);
+
+		/* discard the existing data */
+		GinBufferReset(buffer);
+	}
+
+	tuplesort_end(state->bs_sortstate);
+
+	return reltuples;
+}
+
+/*
+ * Returns size of shared memory required to store state for a parallel
+ * gin index build based on the snapshot its parallel scan will use.
+ */
+static Size
+_gin_parallel_estimate_shared(Relation heap, Snapshot snapshot)
+{
+	/* c.f. shm_toc_allocate as to why BUFFERALIGN is used */
+	return add_size(BUFFERALIGN(sizeof(GinShared)),
+					table_parallelscan_estimate(heap, snapshot));
+}
+
+/*
+ * Within leader, participate as a parallel worker.
+ */
+static void
+_gin_leader_participate_as_worker(GinBuildState *buildstate, Relation heap, Relation index)
+{
+	GinLeader  *ginleader = buildstate->bs_leader;
+	int			sortmem;
+
+	/*
+	 * Might as well use reliable figure when doling out maintenance_work_mem
+	 * (when requested number of workers were not launched, this will be
+	 * somewhat higher than it is for other workers).
+	 */
+	sortmem = maintenance_work_mem / ginleader->nparticipanttuplesorts;
+
+	/* Perform work common to all participants */
+	_gin_parallel_scan_and_build(buildstate, ginleader->ginshared,
+								 ginleader->sharedsort, heap, index, sortmem, true);
+}
+
+/*
+ * Perform a worker's portion of a parallel sort.
+ *
+ * This generates a tuplesort for the worker portion of the table.
+ *
+ * sortmem is the amount of working memory to use within each worker,
+ * expressed in KBs.
+ *
+ * When this returns, workers are done, and need only release resources.
+ */
+static void
+_gin_parallel_scan_and_build(GinBuildState *state,
+							 GinShared *ginshared, Sharedsort *sharedsort,
+							 Relation heap, Relation index,
+							 int sortmem, bool progress)
+{
+	SortCoordinate coordinate;
+	TableScanDesc scan;
+	double		reltuples;
+	IndexInfo  *indexInfo;
+
+	/* Initialize local tuplesort coordination state */
+	coordinate = palloc0(sizeof(SortCoordinateData));
+	coordinate->isWorker = true;
+	coordinate->nParticipants = -1;
+	coordinate->sharedsort = sharedsort;
+
+	/* Begin "partial" tuplesort */
+	state->bs_sortstate = tuplesort_begin_index_gin(heap, index,
+													sortmem, coordinate,
+													TUPLESORT_NONE);
+
+	/* Join parallel scan */
+	indexInfo = BuildIndexInfo(index);
+	indexInfo->ii_Concurrent = ginshared->isconcurrent;
+
+	scan = table_beginscan_parallel(heap,
+									ParallelTableScanFromGinShared(ginshared));
+
+	reltuples = table_index_build_scan(heap, index, indexInfo, true, progress,
+									   ginBuildCallbackParallel, state, scan);
+
+	/* write remaining accumulated entries */
+	{
+		ItemPointerData *list;
+		Datum		key;
+		GinNullCategory category;
+		uint32		nlist;
+		OffsetNumber attnum;
+		TupleDesc	tdesc = RelationGetDescr(index);
+
+		ginBeginBAScan(&state->accum);
+		while ((list = ginGetBAEntry(&state->accum,
+									 &attnum, &key, &category, &nlist)) != NULL)
+		{
+			/* information about the key */
+			Form_pg_attribute attr = TupleDescAttr(tdesc, (attnum - 1));
+
+			GinTuple   *tup;
+			Size		len;
+
+			/* there could be many entries, so be willing to abort here */
+			CHECK_FOR_INTERRUPTS();
+
+			tup = _gin_build_tuple(attnum, category,
+								   key, attr->attlen, attr->attbyval,
+								   list, nlist, &len);
+
+			tuplesort_putgintuple(state->bs_sortstate, tup, len);
+
+			pfree(tup);
+		}
+
+		MemoryContextReset(state->tmpCtx);
+		ginInitBA(&state->accum);
+	}
+
+	/* sort the GIN tuples built by this worker */
+	tuplesort_performsort(state->bs_sortstate);
+
+	state->bs_reltuples += reltuples;
+
+	/*
+	 * Done.  Record ambuild statistics.
+	 */
+	SpinLockAcquire(&ginshared->mutex);
+	ginshared->nparticipantsdone++;
+	ginshared->reltuples += state->bs_reltuples;
+	ginshared->indtuples += state->bs_numtuples;
+	SpinLockRelease(&ginshared->mutex);
+
+	/* Notify leader */
+	ConditionVariableSignal(&ginshared->workersdonecv);
+
+	tuplesort_end(state->bs_sortstate);
+}
+
+/*
+ * Perform work within a launched parallel process.
+ */
+void
+_gin_parallel_build_main(dsm_segment *seg, shm_toc *toc)
+{
+	char	   *sharedquery;
+	GinShared  *ginshared;
+	Sharedsort *sharedsort;
+	GinBuildState buildstate;
+	Relation	heapRel;
+	Relation	indexRel;
+	LOCKMODE	heapLockmode;
+	LOCKMODE	indexLockmode;
+	WalUsage   *walusage;
+	BufferUsage *bufferusage;
+	int			sortmem;
+
+	/*
+	 * The only possible status flag that can be set to the parallel worker is
+	 * PROC_IN_SAFE_IC.
+	 */
+	Assert((MyProc->statusFlags == 0) ||
+		   (MyProc->statusFlags == PROC_IN_SAFE_IC));
+
+	/* Set debug_query_string for individual workers first */
+	sharedquery = shm_toc_lookup(toc, PARALLEL_KEY_QUERY_TEXT, true);
+	debug_query_string = sharedquery;
+
+	/* Report the query string from leader */
+	pgstat_report_activity(STATE_RUNNING, debug_query_string);
+
+	/* Look up gin shared state */
+	ginshared = shm_toc_lookup(toc, PARALLEL_KEY_GIN_SHARED, false);
+
+	/* Open relations using lock modes known to be obtained by index.c */
+	if (!ginshared->isconcurrent)
+	{
+		heapLockmode = ShareLock;
+		indexLockmode = AccessExclusiveLock;
+	}
+	else
+	{
+		heapLockmode = ShareUpdateExclusiveLock;
+		indexLockmode = RowExclusiveLock;
+	}
+
+	/* Open relations within worker */
+	heapRel = table_open(ginshared->heaprelid, heapLockmode);
+	indexRel = index_open(ginshared->indexrelid, indexLockmode);
+
+	/* initialize the GIN build state */
+	initGinState(&buildstate.ginstate, indexRel);
+	buildstate.indtuples = 0;
+	memset(&buildstate.buildStats, 0, sizeof(GinStatsData));
+
+	/*
+	 * create a temporary memory context that is used to hold data not yet
+	 * dumped out to the index
+	 */
+	buildstate.tmpCtx = AllocSetContextCreate(CurrentMemoryContext,
+											  "Gin build temporary context",
+											  ALLOCSET_DEFAULT_SIZES);
+
+	/*
+	 * create a temporary memory context that is used for calling
+	 * ginExtractEntries(), and can be reset after each tuple
+	 */
+	buildstate.funcCtx = AllocSetContextCreate(CurrentMemoryContext,
+											   "Gin build temporary context for user-defined function",
+											   ALLOCSET_DEFAULT_SIZES);
+
+	buildstate.accum.ginstate = &buildstate.ginstate;
+	ginInitBA(&buildstate.accum);
+
+
+	/* Look up shared state private to tuplesort.c */
+	sharedsort = shm_toc_lookup(toc, PARALLEL_KEY_TUPLESORT, false);
+	tuplesort_attach_shared(sharedsort, seg);
+
+	/* Prepare to track buffer usage during parallel execution */
+	InstrStartParallelQuery();
+
+	/*
+	 * Might as well use reliable figure when doling out maintenance_work_mem
+	 * (when requested number of workers were not launched, this will be
+	 * somewhat higher than it is for other workers).
+	 */
+	sortmem = maintenance_work_mem / ginshared->scantuplesortstates;
+
+	_gin_parallel_scan_and_build(&buildstate, ginshared, sharedsort,
+								 heapRel, indexRel, sortmem, false);
+
+	/* Report WAL/buffer usage during parallel execution */
+	bufferusage = shm_toc_lookup(toc, PARALLEL_KEY_BUFFER_USAGE, false);
+	walusage = shm_toc_lookup(toc, PARALLEL_KEY_WAL_USAGE, false);
+	InstrEndParallelQuery(&bufferusage[ParallelWorkerNumber],
+						  &walusage[ParallelWorkerNumber]);
+
+	index_close(indexRel, indexLockmode);
+	table_close(heapRel, heapLockmode);
+}
+
+/*
+ * _gin_build_tuple
+ *		Serialize the state for an index key into a tuple for tuplesort.
+ *
+ * The tuple has a number of scalar fields (mostly matching the build state),
+ * and then a data array that stores the key first, and then the TID list.
+ *
+ * For by-reference data types, we store the actual data. For by-val types
+ * we simply copy the whole Datum, so that we don't have to care about stuff
+ * like endianess etc. We could make it a little bit smaller, but it's not
+ * worth it - it's a tiny fraction of the data, and we need to MAXALIGN the
+ * start of the TID list anyway. So we wouldn't save anything.
+ */
+static GinTuple *
+_gin_build_tuple(OffsetNumber attrnum, unsigned char category,
+				 Datum key, int16 typlen, bool typbyval,
+				 ItemPointerData *items, uint32 nitems,
+				 Size *len)
+{
+	GinTuple   *tuple;
+	char	   *ptr;
+
+	Size		tuplen;
+	int			keylen;
+
+	/*
+	 * Calculate how long is the key value. Only keys with GIN_CAT_NORM_KEY
+	 * have actual non-empty key. We include varlena headers and \0 bytes for
+	 * strings, to make it easier to access the data in-line.
+	 *
+	 * For byval types we simply copy the whole Datum. We could store just the
+	 * necessary bytes, but this is simpler to work with and not worth the
+	 * extra complexity. Moreover we still need to do the MAXALIGN to allow
+	 * direct access to items pointers.
+	 *
+	 * XXX Note that for byval types we store the whole datum, no matter what
+	 * the typlen value is.
+	 */
+	if (category != GIN_CAT_NORM_KEY)
+		keylen = 0;
+	else if (typbyval)
+		keylen = sizeof(Datum);
+	else if (typlen > 0)
+		keylen = typlen;
+	else if (typlen == -1)
+		keylen = VARSIZE_ANY(key);
+	else if (typlen == -2)
+		keylen = strlen(DatumGetPointer(key)) + 1;
+	else
+		elog(ERROR, "invalid typlen");
+
+	/*
+	 * Determine GIN tuple length with all the data included. Be careful about
+	 * alignment, to allow direct access to item pointers.
+	 */
+	tuplen = MAXALIGN(offsetof(GinTuple, data) + keylen) +
+		(sizeof(ItemPointerData) * nitems);
+
+	*len = tuplen;
+
+	/*
+	 * Allocate space for the whole GIN tuple.
+	 *
+	 * XXX palloc0 so that valgrind does not complain about uninitialized
+	 * bytes in writetup_index_gin, likely because of padding
+	 */
+	tuple = palloc0(tuplen);
+
+	tuple->tuplen = tuplen;
+	tuple->attrnum = attrnum;
+	tuple->category = category;
+	tuple->keylen = keylen;
+	tuple->nitems = nitems;
+
+	/* key type info */
+	tuple->typlen = typlen;
+	tuple->typbyval = typbyval;
+
+	/*
+	 * Copy the key and items into the tuple. First the key value, which we
+	 * can simply copy right at the beginning of the data array.
+	 */
+	if (category == GIN_CAT_NORM_KEY)
+	{
+		if (typbyval)
+		{
+			memcpy(tuple->data, &key, sizeof(Datum));
+		}
+		else if (typlen > 0)	/* byref, fixed length */
+		{
+			memcpy(tuple->data, DatumGetPointer(key), typlen);
+		}
+		else if (typlen == -1)
+		{
+			memcpy(tuple->data, DatumGetPointer(key), keylen);
+		}
+		else if (typlen == -2)
+		{
+			memcpy(tuple->data, DatumGetPointer(key), keylen);
+		}
+	}
+
+	/* finally, copy the TIDs into the array */
+	ptr = (char *) tuple + MAXALIGN(offsetof(GinTuple, data) + keylen);
+
+	memcpy(ptr, items, sizeof(ItemPointerData) * nitems);
+
+	return tuple;
+}
+
+/*
+ * _gin_parse_tuple
+ *		Deserialize the tuple from the tuplestore representation.
+ *
+ * Most of the fields are actually directly accessible, the only thing that
+ * needs more care is the key and the TID list.
+ *
+ * For the key, this returns a regular Datum representing it. It's either the
+ * actual key value, or a pointer to the beginning of the data array (which is
+ * where the data was copied by _gin_build_tuple).
+ *
+ * The pointer to the TID list is returned through 'items' (which is simply
+ * a pointer to the data array).
+ */
+static Datum
+_gin_parse_tuple(GinTuple *a, ItemPointerData **items)
+{
+	Datum		key;
+
+	if (items)
+	{
+		char	   *ptr = (char *) a + MAXALIGN(offsetof(GinTuple, data) + a->keylen);
+
+		*items = (ItemPointerData *) ptr;
+	}
+
+	if (a->category != GIN_CAT_NORM_KEY)
+		return (Datum) 0;
+
+	if (a->typbyval)
+	{
+		memcpy(&key, a->data, a->keylen);
+		return key;
+	}
+
+	return PointerGetDatum(a->data);
+}
+
+/*
+ * _gin_compare_tuples
+ *		Compare GIN tuples, used by tuplesort during parallel index build.
+ *
+ * The scalar fields (attrnum, category) are compared first, the key value is
+ * compared last. The comparisons are done using type-specific sort support
+ * functions.
+ *
+ * XXX We might try using memcmp(), based on the assumption that if we get
+ * two keys that are two different representations of a logically equal
+ * value, it'll get merged by the index build. But it's not clear that's
+ * safe, because for keys with multiple binary representations we might
+ * end with overlapping lists. Which might affect performance by requiring
+ * full merge of the TID lists, and perhaps even failures (e.g. errors like
+ * "could not split GIN page; all old items didn't fit" when inserting data
+ * into the index).
+ */
+int
+_gin_compare_tuples(GinTuple *a, GinTuple *b, SortSupport ssup)
+{
+	Datum		keya,
+				keyb;
+
+	if (a->attrnum < b->attrnum)
+		return -1;
+
+	if (a->attrnum > b->attrnum)
+		return 1;
+
+	if (a->category < b->category)
+		return -1;
+
+	if (a->category > b->category)
+		return 1;
+
+	if ((a->category == GIN_CAT_NORM_KEY) &&
+		(b->category == GIN_CAT_NORM_KEY))
+	{
+		keya = _gin_parse_tuple(a, NULL);
+		keyb = _gin_parse_tuple(b, NULL);
+
+		return ApplySortComparator(keya, false,
+								   keyb, false,
+								   &ssup[a->attrnum - 1]);
+	}
+
+	return 0;
+}
diff --git a/src/backend/access/gin/ginutil.c b/src/backend/access/gin/ginutil.c
index 5747ae6a4ca..dd22b44aca9 100644
--- a/src/backend/access/gin/ginutil.c
+++ b/src/backend/access/gin/ginutil.c
@@ -53,7 +53,7 @@ ginhandler(PG_FUNCTION_ARGS)
 	amroutine->amclusterable = false;
 	amroutine->ampredlocks = true;
 	amroutine->amcanparallel = false;
-	amroutine->amcanbuildparallel = false;
+	amroutine->amcanbuildparallel = true;
 	amroutine->amcaninclude = false;
 	amroutine->amusemaintenanceworkmem = true;
 	amroutine->amsummarizing = false;
diff --git a/src/backend/access/transam/parallel.c b/src/backend/access/transam/parallel.c
index 8613fc6fb54..c9ea769afb5 100644
--- a/src/backend/access/transam/parallel.c
+++ b/src/backend/access/transam/parallel.c
@@ -15,6 +15,7 @@
 #include "postgres.h"
 
 #include "access/brin.h"
+#include "access/gin.h"
 #include "access/nbtree.h"
 #include "access/parallel.h"
 #include "access/session.h"
@@ -146,6 +147,9 @@ static const struct
 	{
 		"_brin_parallel_build_main", _brin_parallel_build_main
 	},
+	{
+		"_gin_parallel_build_main", _gin_parallel_build_main
+	},
 	{
 		"parallel_vacuum_main", parallel_vacuum_main
 	}
diff --git a/src/backend/utils/sort/tuplesortvariants.c b/src/backend/utils/sort/tuplesortvariants.c
index 05a853caa36..ed6084960b8 100644
--- a/src/backend/utils/sort/tuplesortvariants.c
+++ b/src/backend/utils/sort/tuplesortvariants.c
@@ -20,6 +20,7 @@
 #include "postgres.h"
 
 #include "access/brin_tuple.h"
+#include "access/gin_tuple.h"
 #include "access/hash.h"
 #include "access/htup_details.h"
 #include "access/nbtree.h"
@@ -46,6 +47,8 @@ static void removeabbrev_index(Tuplesortstate *state, SortTuple *stups,
 							   int count);
 static void removeabbrev_index_brin(Tuplesortstate *state, SortTuple *stups,
 									int count);
+static void removeabbrev_index_gin(Tuplesortstate *state, SortTuple *stups,
+								   int count);
 static void removeabbrev_datum(Tuplesortstate *state, SortTuple *stups,
 							   int count);
 static int	comparetup_heap(const SortTuple *a, const SortTuple *b,
@@ -74,6 +77,8 @@ static int	comparetup_index_hash_tiebreak(const SortTuple *a, const SortTuple *b
 										   Tuplesortstate *state);
 static int	comparetup_index_brin(const SortTuple *a, const SortTuple *b,
 								  Tuplesortstate *state);
+static int	comparetup_index_gin(const SortTuple *a, const SortTuple *b,
+								 Tuplesortstate *state);
 static void writetup_index(Tuplesortstate *state, LogicalTape *tape,
 						   SortTuple *stup);
 static void readtup_index(Tuplesortstate *state, SortTuple *stup,
@@ -82,6 +87,10 @@ static void writetup_index_brin(Tuplesortstate *state, LogicalTape *tape,
 								SortTuple *stup);
 static void readtup_index_brin(Tuplesortstate *state, SortTuple *stup,
 							   LogicalTape *tape, unsigned int len);
+static void writetup_index_gin(Tuplesortstate *state, LogicalTape *tape,
+							   SortTuple *stup);
+static void readtup_index_gin(Tuplesortstate *state, SortTuple *stup,
+							  LogicalTape *tape, unsigned int len);
 static int	comparetup_datum(const SortTuple *a, const SortTuple *b,
 							 Tuplesortstate *state);
 static int	comparetup_datum_tiebreak(const SortTuple *a, const SortTuple *b,
@@ -580,6 +589,79 @@ tuplesort_begin_index_brin(int workMem,
 	return state;
 }
 
+/*
+ * XXX Maybe we should pass the ordering functions, not the heap/index?
+ */
+Tuplesortstate *
+tuplesort_begin_index_gin(Relation heapRel,
+						  Relation indexRel,
+						  int workMem, SortCoordinate coordinate,
+						  int sortopt)
+{
+	Tuplesortstate *state = tuplesort_begin_common(workMem, coordinate,
+												   sortopt);
+	TuplesortPublic *base = TuplesortstateGetPublic(state);
+	MemoryContext oldcontext;
+	int			i;
+	TupleDesc	desc = RelationGetDescr(indexRel);
+
+	oldcontext = MemoryContextSwitchTo(base->maincontext);
+
+#ifdef TRACE_SORT
+	if (trace_sort)
+		elog(LOG,
+			 "begin index sort: workMem = %d, randomAccess = %c",
+			 workMem,
+			 sortopt & TUPLESORT_RANDOMACCESS ? 't' : 'f');
+#endif
+
+	/*
+	 * Multi-column GIN indexes expand the row into a separate index entry for
+	 * attribute, and that's what we write into the tuplesort. But we still
+	 * need to initialize sortsupport for all the attributes.
+	 */
+	base->nKeys = IndexRelationGetNumberOfKeyAttributes(indexRel);
+
+	/* Prepare SortSupport data for each column */
+	base->sortKeys = (SortSupport) palloc0(base->nKeys *
+										   sizeof(SortSupportData));
+
+	for (i = 0; i < base->nKeys; i++)
+	{
+		SortSupport sortKey = base->sortKeys + i;
+		Form_pg_attribute att = TupleDescAttr(desc, i);
+		TypeCacheEntry *typentry;
+
+		sortKey->ssup_cxt = CurrentMemoryContext;
+		sortKey->ssup_collation = indexRel->rd_indcollation[i];
+		sortKey->ssup_nulls_first = false;
+		sortKey->ssup_attno = i + 1;
+		sortKey->abbreviate = false;
+
+		Assert(sortKey->ssup_attno != 0);
+
+		/*
+		 * Look for a ordering for the index key data type, and then the sort
+		 * support function.
+		 *
+		 * XXX does this use the right opckeytype/opcintype for GIN?
+		 */
+		typentry = lookup_type_cache(att->atttypid, TYPECACHE_LT_OPR);
+		PrepareSortSupportFromOrderingOp(typentry->lt_opr, sortKey);
+	}
+
+	base->removeabbrev = removeabbrev_index_gin;
+	base->comparetup = comparetup_index_gin;
+	base->writetup = writetup_index_gin;
+	base->readtup = readtup_index_gin;
+	base->haveDatum1 = false;
+	base->arg = NULL;
+
+	MemoryContextSwitchTo(oldcontext);
+
+	return state;
+}
+
 Tuplesortstate *
 tuplesort_begin_datum(Oid datumType, Oid sortOperator, Oid sortCollation,
 					  bool nullsFirstFlag, int workMem,
@@ -817,6 +899,37 @@ tuplesort_putbrintuple(Tuplesortstate *state, BrinTuple *tuple, Size size)
 	MemoryContextSwitchTo(oldcontext);
 }
 
+void
+tuplesort_putgintuple(Tuplesortstate *state, GinTuple *tuple, Size size)
+{
+	SortTuple	stup;
+	GinTuple   *ctup;
+	TuplesortPublic *base = TuplesortstateGetPublic(state);
+	MemoryContext oldcontext = MemoryContextSwitchTo(base->tuplecontext);
+	Size		tuplen;
+
+	/* copy the GinTuple into the right memory context */
+	ctup = palloc(size);
+	memcpy(ctup, tuple, size);
+
+	stup.tuple = ctup;
+	stup.datum1 = (Datum) 0;
+	stup.isnull1 = false;
+
+	/* GetMemoryChunkSpace is not supported for bump contexts */
+	if (TupleSortUseBumpTupleCxt(base->sortopt))
+		tuplen = MAXALIGN(size);
+	else
+		tuplen = GetMemoryChunkSpace(ctup);
+
+	tuplesort_puttuple_common(state, &stup,
+							  base->sortKeys &&
+							  base->sortKeys->abbrev_converter &&
+							  !stup.isnull1, tuplen);
+
+	MemoryContextSwitchTo(oldcontext);
+}
+
 /*
  * Accept one Datum while collecting input data for sort.
  *
@@ -989,6 +1102,29 @@ tuplesort_getbrintuple(Tuplesortstate *state, Size *len, bool forward)
 	return &btup->tuple;
 }
 
+GinTuple *
+tuplesort_getgintuple(Tuplesortstate *state, Size *len, bool forward)
+{
+	TuplesortPublic *base = TuplesortstateGetPublic(state);
+	MemoryContext oldcontext = MemoryContextSwitchTo(base->sortcontext);
+	SortTuple	stup;
+	GinTuple   *tup;
+
+	if (!tuplesort_gettuple_common(state, forward, &stup))
+		stup.tuple = NULL;
+
+	MemoryContextSwitchTo(oldcontext);
+
+	if (!stup.tuple)
+		return false;
+
+	tup = (GinTuple *) stup.tuple;
+
+	*len = tup->tuplen;
+
+	return tup;
+}
+
 /*
  * Fetch the next Datum in either forward or back direction.
  * Returns false if no more datums.
@@ -1777,6 +1913,69 @@ readtup_index_brin(Tuplesortstate *state, SortTuple *stup,
 	stup->datum1 = tuple->tuple.bt_blkno;
 }
 
+/*
+ * Routines specialized for GIN case
+ */
+
+static void
+removeabbrev_index_gin(Tuplesortstate *state, SortTuple *stups, int count)
+{
+	Assert(false);
+	elog(ERROR, "removeabbrev_index_gin not implemented");
+}
+
+static int
+comparetup_index_gin(const SortTuple *a, const SortTuple *b,
+					 Tuplesortstate *state)
+{
+	TuplesortPublic *base = TuplesortstateGetPublic(state);
+
+	Assert(!TuplesortstateGetPublic(state)->haveDatum1);
+
+	return _gin_compare_tuples((GinTuple *) a->tuple,
+							   (GinTuple *) b->tuple,
+							   base->sortKeys);
+}
+
+static void
+writetup_index_gin(Tuplesortstate *state, LogicalTape *tape, SortTuple *stup)
+{
+	TuplesortPublic *base = TuplesortstateGetPublic(state);
+	GinTuple   *tuple = (GinTuple *) stup->tuple;
+	unsigned int tuplen = tuple->tuplen;
+
+	tuplen = tuplen + sizeof(tuplen);
+	LogicalTapeWrite(tape, &tuplen, sizeof(tuplen));
+	LogicalTapeWrite(tape, tuple, tuple->tuplen);
+	if (base->sortopt & TUPLESORT_RANDOMACCESS) /* need trailing length word? */
+		LogicalTapeWrite(tape, &tuplen, sizeof(tuplen));
+}
+
+static void
+readtup_index_gin(Tuplesortstate *state, SortTuple *stup,
+				  LogicalTape *tape, unsigned int len)
+{
+	GinTuple   *tuple;
+	TuplesortPublic *base = TuplesortstateGetPublic(state);
+	unsigned int tuplen = len - sizeof(unsigned int);
+
+	/*
+	 * Allocate space for the GIN sort tuple, which already has the proper
+	 * length included in the header.
+	 */
+	tuple = (GinTuple *) tuplesort_readtup_alloc(state, tuplen);
+
+	tuple->tuplen = tuplen;
+
+	LogicalTapeReadExact(tape, tuple, tuplen);
+	if (base->sortopt & TUPLESORT_RANDOMACCESS) /* need trailing length word? */
+		LogicalTapeReadExact(tape, &tuplen, sizeof(tuplen));
+	stup->tuple = (void *) tuple;
+
+	/* no abbreviations (FIXME maybe use attrnum for this?) */
+	stup->datum1 = (Datum) 0;
+}
+
 /*
  * Routines specialized for DatumTuple case
  */
diff --git a/src/include/access/gin.h b/src/include/access/gin.h
index 25983b7a505..be76d8446f4 100644
--- a/src/include/access/gin.h
+++ b/src/include/access/gin.h
@@ -12,6 +12,8 @@
 
 #include "access/xlogreader.h"
 #include "lib/stringinfo.h"
+#include "nodes/execnodes.h"
+#include "storage/shm_toc.h"
 #include "storage/block.h"
 #include "utils/relcache.h"
 
@@ -88,4 +90,6 @@ extern void ginGetStats(Relation index, GinStatsData *stats);
 extern void ginUpdateStats(Relation index, const GinStatsData *stats,
 						   bool is_build);
 
+extern void _gin_parallel_build_main(dsm_segment *seg, shm_toc *toc);
+
 #endif							/* GIN_H */
diff --git a/src/include/access/gin_tuple.h b/src/include/access/gin_tuple.h
new file mode 100644
index 00000000000..6f529a5aaf0
--- /dev/null
+++ b/src/include/access/gin_tuple.h
@@ -0,0 +1,31 @@
+/*--------------------------------------------------------------------------
+ * gin.h
+ *	  Public header file for Generalized Inverted Index access method.
+ *
+ *	Copyright (c) 2006-2024, PostgreSQL Global Development Group
+ *
+ *	src/include/access/gin.h
+ *--------------------------------------------------------------------------
+ */
+#ifndef GIN_TUPLE_
+#define GIN_TUPLE_
+
+#include "storage/itemptr.h"
+#include "utils/sortsupport.h"
+
+/* XXX do we still need all the fields now that we use SortSupport? */
+typedef struct GinTuple
+{
+	Size		tuplen;			/* length of the whole tuple */
+	Size		keylen;			/* bytes in data for key value */
+	int16		typlen;			/* typlen for key */
+	bool		typbyval;		/* typbyval for key */
+	OffsetNumber attrnum;		/* attnum of index key */
+	signed char category;		/* category: normal or NULL? */
+	int			nitems;			/* number of TIDs in the data */
+	char		data[FLEXIBLE_ARRAY_MEMBER];
+} GinTuple;
+
+extern int	_gin_compare_tuples(GinTuple *a, GinTuple *b, SortSupport ssup);
+
+#endif							/* GIN_TUPLE_H */
diff --git a/src/include/utils/tuplesort.h b/src/include/utils/tuplesort.h
index cde83f62015..0ed71ae922a 100644
--- a/src/include/utils/tuplesort.h
+++ b/src/include/utils/tuplesort.h
@@ -22,6 +22,7 @@
 #define TUPLESORT_H
 
 #include "access/brin_tuple.h"
+#include "access/gin_tuple.h"
 #include "access/itup.h"
 #include "executor/tuptable.h"
 #include "storage/dsm.h"
@@ -443,6 +444,10 @@ extern Tuplesortstate *tuplesort_begin_index_gist(Relation heapRel,
 												  int sortopt);
 extern Tuplesortstate *tuplesort_begin_index_brin(int workMem, SortCoordinate coordinate,
 												  int sortopt);
+extern Tuplesortstate *tuplesort_begin_index_gin(Relation heapRel,
+												 Relation indexRel,
+												 int workMem, SortCoordinate coordinate,
+												 int sortopt);
 extern Tuplesortstate *tuplesort_begin_datum(Oid datumType,
 											 Oid sortOperator, Oid sortCollation,
 											 bool nullsFirstFlag,
@@ -456,6 +461,7 @@ extern void tuplesort_putindextuplevalues(Tuplesortstate *state,
 										  Relation rel, ItemPointer self,
 										  const Datum *values, const bool *isnull);
 extern void tuplesort_putbrintuple(Tuplesortstate *state, BrinTuple *tuple, Size size);
+extern void tuplesort_putgintuple(Tuplesortstate *state, GinTuple *tuple, Size size);
 extern void tuplesort_putdatum(Tuplesortstate *state, Datum val,
 							   bool isNull);
 
@@ -465,6 +471,8 @@ extern HeapTuple tuplesort_getheaptuple(Tuplesortstate *state, bool forward);
 extern IndexTuple tuplesort_getindextuple(Tuplesortstate *state, bool forward);
 extern BrinTuple *tuplesort_getbrintuple(Tuplesortstate *state, Size *len,
 										 bool forward);
+extern GinTuple *tuplesort_getgintuple(Tuplesortstate *state, Size *len,
+									   bool forward);
 extern bool tuplesort_getdatum(Tuplesortstate *state, bool forward, bool copy,
 							   Datum *val, bool *isNull, Datum *abbrev);
 
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list
index 61ad417cde6..af86c22093e 100644
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -1015,11 +1015,13 @@ GinBtreeData
 GinBtreeDataLeafInsertData
 GinBtreeEntryInsertData
 GinBtreeStack
+GinBuffer
 GinBuildState
 GinChkVal
 GinEntries
 GinEntryAccumulator
 GinIndexStat
+GinLeader
 GinMetaPageData
 GinNullCategory
 GinOptions
@@ -1032,9 +1034,11 @@ GinScanEntry
 GinScanKey
 GinScanOpaque
 GinScanOpaqueData
+GinShared
 GinState
 GinStatsData
 GinTernaryValue
+GinTuple
 GinTupleCollector
 GinVacuumState
 GistBuildMode
-- 
2.45.2

